strong text I'm trying to get info from page1,2,3... in this page https://myanimelist.net/topanime.php?limit=0 (instead of page=1, page=2 and on, it goes like limit=0,limit=50, limit=100...). The thing is that when the code loop through the number of pages I want, it gets the info from all the pages but it only saves the info from the last one in the new csv file.
def main(number):
driver = webdriver.Chrome()
url = 'https://myanimelist.net/topanime.php?limit={}'
if number <= 1:
return url.format(0)
elif number >= 2:
for limit in range(0,(int(number)*50), 50):
driver.get(url.format(limit))
soup = BeautifulSoup(driver.page_source, 'html.parser')
results = soup.find_all('tr', class_= 'ranking-list')
with open('MAL_topanime.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
header = ['Anime','Date', 'No_eps', 'Ranking', 'Score']
writer.writerow(header)
for result in results:
Anime = result.find('h3', class_='hoverinfo_trigger fl-l fs14 fw-b anime_ranking_h3').text.replace('\n','')
Date = result.find('div', class_='information di-ib mt4').text.replace('\n','')
No_eps = result.find('div', class_='information di-ib mt4').text.replace('\n','')
Ranking = result.find('td', class_='rank ac').text.replace('\n','')
Score = result.find('div', class_='js-top-ranking-score-col di-ib al').text.replace('\n','')
info = [Anime, Date, No_eps, Ranking,Score]
writer.writerow(info)
CodePudding user response:
You can try the next example
import requests
from bs4 import BeautifulSoup
import pandas as pd
data = []
for limit in range(0,150,50):
r = requests.get(f'https://myanimelist.net/topanime.php?limit={limit}')
soup = BeautifulSoup(r.content, 'html.parser')
results = soup.find_all('tr', class_= 'ranking-list')
for result in results:
Anime = result.find('h3', class_='hoverinfo_trigger fl-l fs14 fw-b anime_ranking_h3').text.replace('\n','')
Date = result.find('div', class_='information di-ib mt4').text.replace('\n','')
No_eps = result.find('div', class_='information di-ib mt4').text.replace('\n','')
Ranking = result.find('td', class_='rank ac').text.replace('\n','')
Score = result.find('div', class_='js-top-ranking-score-col di-ib al').text.replace('\n','')
data.append({
'Anime':Anime,
'Date':Date,
'No_eps':No_eps,
'Ranking':Ranking,
'Score':Score
})
df = pd.DataFrame(data)
print(df)
Output:
Anime ... Score
0 Fullmetal Alchemist: Brotherhood ... 9.12
1 Bleach: Sennen Kessen-hen ... 9.11
2 Kaguya-sama wa Kokurasetai: Ultra Romantic ... 9.10
3 Gintama° ... 9.08
4 Steins;Gate ... 9.08
.. ... ... ...
145 Mushishi Zoku Shou: Odoro no Michi ... 8.44
146 Saenai Heroine no Sodatekata Fine ... 8.44
147 Wu Liuqi Zhi Xuanwu Guo Pian ... 8.44
148 JoJo no Kimyou na Bouken Part 3: Stardust Crus... ... 8.44
149 Gintama: Yorinuki Gintama-san on Theater 2D ... 8.43
[150 rows x 5 columns]