I want to scrape video game's release information. The structure of all relevant tags is shown in this picture (the specific case is from this link: https://www.mobygames.com/game/ps2/007-nightfire/release-info).
A screen shot from the above website link
What I want to do is to scrape all release information and store it in a dataframe.
The code I have so far is as following. However, I don't think how to finish the code as many things are undetermined. Is there a way to write such web scraper in a for-loop?
Here is my current code
def get_releases(url):
response = requests.get(url '/release-info', headers={"User-Agent": "Mozilla/5.0"})
assert response.status_code == 200, "Problem with url request! %s throws %s" % (
url,
response.status_code,
)
page = response.text
release_soup = BeautifulSoup(page, "lxml")
return release_soup
def get_releases_info(release_soup):
game_releases_info = defaultdict()
title = release_soup.find('h1').findNext('a').text
game_releases_info['title'] = title
console = release_soup.find('h2').text
game_releases_info['console'] = console
release_list = release_soup.find('h2').findNextSiblings('div')
num_cells = len(release_list)
for tag in release_list:
if tag.attrs == {'class': ['floatholder']}:
field = tag.div.text.lower()
value = tag.a.text
game_releases_info[field] = value
else: # not finishing...
CodePudding user response:
To get siblings elements, you can follow the next example. So far so good,getting the desired output.
import requests
from bs4 import BeautifulSoup
headers={"User-Agent": "Mozilla/5.0"}
url = requests.get("https://www.mobygames.com/game/ps2/007-nightfire/release-info",headers=headers)
#print(url)
soup = BeautifulSoup(url.text,'lxml')
data = []
for d in soup.select('div.floatholder'):
p=list(d.stripped_strings)
data2.append({p[0]:''.join(p[1:])})
final_data = {
'title':soup.select_one('.niceHeaderTitle > a').get_text(strip=True),
'console':soup.find('h2').text, 'release':data
}
print(final_data)
Output:
{'title': '007: Nightfire', 'console': 'PlayStation 2', 'release': [{'Published by': 'Electronic Arts, Inc.'}, {'Developed by': 'Eurocom Developments Ltd,Savage Entertainment, LLC'}, {'Publishing label': 'EA Games'}, {'Cutscenes by': '1K|Studios,Dan Krech Productions, Inc.,Mondo Media, Inc.'}, {'Additional Sound by': 'Soundelux Design Music Group,SPG Studios'}, {'Voice Recording by': 'Open Door Productions'}, {'Testing by': 'EA Tiburon'}, {'Licensed by': 'Danjaq, LLC,United Artists Corporation'}, {'Countries': 'United States,Canada'}, {'Release Date': 'Nov 19, 2002'}, {'UPC-A': '0\xa014633\xa014592\xa02'}, {'Sony PN': 'SLUS-20579'}, {'Published by':
'Electronic Arts, Inc.'}, {'Developed by': 'Eurocom Developments Ltd,Savage Entertainment, LLC'}, {'Publishing label': 'EA Games'}, {'Distributed by': 'Electronic Arts France'}, {'Cutscenes by': '1K|Studios,Dan Krech Productions, Inc.,Mondo Media, Inc.'}, {'Additional Sound by': 'Soundelux Design Music Group,SPG Studios'}, {'Testing by': 'EA Tiburon'}, {'Country': 'France'}, {'Release Date': 'Nov 28, 2002'}, {'EAN-13': '5\xa0030931\xa0032394'}, {'Published by': 'Electronic Arts, Inc.'}, {'Developed by': 'Eurocom Developments Ltd,Savage Entertainment, LLC'},
{'Publishing label': 'EA Games'}, {'Localized by': 'Effective Media GmbH'}, {'Cutscenes by': '1K|Studios,Dan Krech Productions, Inc.,Mondo Media, Inc.'}, {'Additional Sound by': 'Soundelux Design Music Group,SPG Studios'}, {'Voice Recording by': 'Open Door Productions'}, {'Testing
by': 'EA Tiburon'}, {'Licensed by': 'Danjaq, LLC,United Artists Corporation'}, {'Country': 'United Kingdom'}, {'Release Date': 'Nov 29, 2002'}, {'EAN-13': '5\xa0030930\xa0032395'}, {'Sony
PN': 'SLES-51258'}, {'Published by': 'Electronic Arts, Inc.'}, {'Developed by': 'Eurocom Developments Ltd,Savage Entertainment, LLC'}, {'Publishing label': 'EA Games'}, {'Distributed by':
'Electronic Arts Deutschland GmbH'}, {'Cutscenes by': '1K|Studios,Dan Krech Productions, Inc.,Mondo Media, Inc.'}, {'Additional Sound by': 'Soundelux Design Music Group,SPG Studios'}, {'Testing by': 'EA Tiburon'}, {'Country': 'Germany'}, {'Release Date': 'Nov 29, 2002'}, {'EAN-13': '5\xa0030932\xa0032393'}, {'Sony PN': 'SLES-51260'}, {'Published by': 'Electronic Arts, Inc.'}, {'Developed by': 'Eurocom Developments Ltd,Savage Entertainment, LLC'}, {'Publishing label': 'EA Games'}, {'Cutscenes by': '1K|Studios,Dan Krech Productions, Inc.,Mondo Media, Inc.'}, {'Additional Sound by': 'Soundelux Design Music Group,SPG Studios'}, {'Testing by': 'EA Tiburon'}, {'Country': 'Japan'}, {'Release Date': 'Jan 30, 2003'}, {'EAN-13': '4\xa0938833\xa0005830'}, {'Sony PN': 'SLPS-25203'}, {'Country': 'Japan'}, {'Release Date': 'Feb 11, 2004'}, {'Comments': 'EA Best Hits release'}, {'EAN-13': '4\xa0938833\xa0006257'}, {'Sony PN': 'SLPM-65538'}]}