I'm new to python and practicing web scraping. I was writing a code to get all the movie names and the the consecutive year of the movies. I got the result into a dataframe, but when i am exporting to excel, its showing as the html code. Thanks in advance.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import os
pages=np.arange(1,2,1)
pwd = os.getcwd()
yify = pd.DataFrame (columns = ['Title', 'Year'])
#looping through different pages
for page in pages:
page='https://yts.mx/browse-movies?page=' str(page)
data=requests.get(page).text
soup = BeautifulSoup(data,'html.parser')
#looping through all the movie names and years
for row in soup.find_all('div', class_ = 'browse-movie-bottom'):
title=row.find('a', class_ = 'browse-movie-title')
years=row.find('div', class_ = 'browse-movie-year')
yify=yify.append({'Title': title, 'Year':years},ignore_index=True)
yify.head()
output.to_excel(pwd '\\Yify_Test_Output.xlsx', index=False )
CodePudding user response:
Try:
for row in soup.find_all('div', class_ = 'browse-movie-bottom'):
title=row.find('a', class_ = 'browse-movie-title').text.strip()
years=row.find('div', class_ = 'browse-movie-year').text.strip()