I'm trying to webscrape a list of players and save it as csv.
This is the result I'm looking for: a list of the first x (4 players in this example) for y teams (2 teams in the example, total of 8 players)
Here is the code
from bs4 import BeautifulSoup
import requests
import pandas as pd
html_text = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS').text
soup = BeautifulSoup(html_text, 'html.parser')
lineups = soup.find_all('div', class_='lineup is-soccer')
j = range(2)
selections = []
for index1 in j:
selections.append(lineups[index1])
for selection in selections:
home_squad = selection.find('ul', class_='lineup__list is-home')
home_players = home_squad.find_all('li', class_='lineup__player')
list_home = []
for home_player in home_players:
h_player_name = home_player.find('a').text
list_home.append(h_player_name)
start_11 = list(list_home[i] for i in range(4))
df_h = pd.DataFrame(start_11)
df_h.to_csv('home.csv', index=False, header=False)
Unfortunately I'm able to save only the last occurrence of the inner loop, thus just the last 4 players. I'm stuck, since if I try to append inside the inner loop I get the letters of the names, or "out of range", if I go to the outermost loop I get only the last player in the last team.
What I'm missing? Thanks for the help
CodePudding user response:
from bs4 import BeautifulSoup
import requests
import pandas as pd
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17'}
top_4_players = []
r = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS', headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
players_lists = soup.select('ul.lineup__list.is-home')
for x in players_lists[:2]:
players_list = x.select('li.lineup__player')[:4]
for player_link in players_list:
top_4_players.append(player_link.select_one('a').text)
df = pd.DataFrame(top_4_players, columns = ['Top 4 Players'])
df.to_csv('home.csv', index=False, header=False)
df
This returns (for only 2 home lineup lists):
Top 4 Players
0 Sean Johnson
1 Malte Amundsen
2 T. Martins
3 A. Callens
4 Q. Westberg
5 D. Criscito
6 S. O'Neill
7 L. MacNaughton
CodePudding user response:
I had a similar script, I changed it a bit for your, it ll show much more than just usernames, you can find out if the player has an injury, his position, name.
from bs4 import BeautifulSoup
import requests
url = 'https://www.rotowire.com/soccer/lineups.php?league=MLS'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
data = {}
for place in ['is-home', 'is-visit']:
for box in soup.find_all('div', class_='lineup__box')[:-1]:
team = box.find('div', {'class': ['lineup__team', place]}).get_text(strip=True)
visit_team = box.find('div', {'class': ['lineup__team', place]}).get_text(strip=True)
players = []
for player in box.find('ul', {'class': ['lineup__list', place]}).find_all('li', class_='lineup__player'):
name = player.find('a').get_text(strip=True)
pos = player.find('div', class_='lineup__pos').get_text(strip=True)
link = player.find('a').get('href')
injury = None
if player.find('span', class_='lineup__inj'):
injury = player.find('span', class_='lineup__inj').get_text(strip=True)
players.append({'Name': name, 'Position': pos, 'Link': link, 'Injury': injury})
data[team] = players
for team in data:
print('Team:' team)
for player in data[team][:4]:
print(player['Name'])
OUTPUT:
Team:NYC
Sean Johnson
Malte Amundsen
T. Martins
A. Callens
Team:TOR
Q. Westberg
D. Criscito
S. O'Neill
L. MacNaughton
Team:ORL
P. Gallese
Kyle Smith
R. Jansson
Rodrigo Schlegel
Team:CLB
Eloy Room
J. Mensah
J. Williams
J. Anibaba
Team:CIN
R. Celentano
G. Cameron
T. Blackett
N. Hagglund
Team:DCU
Rafael Romo
Sami Guediri
B. Hines-Ike
S. Birnbaum
Team:SKC
J. Pulskamp
Logan Ndenbe
A. Fontas
N. Isimat-Mirin
Team:HOU
Steve Clark
A. Lundqvist
T. Hadebe
Tim Parker
Team:WHI
I. Boehmer
R. Veselinovic
T. Blackmon
J. Nerwinski
Team:SOU
Stefan Frei
Nouhou Tolo
X. Arreaga
Yeimar Gomez
Team:RSL
Zac MacMath
Andrew Brody
Justen Glad
M. Silva
Team:POR
Aljaz Ivacic
C. Bravo
L. Mabiala
D. Zuparic
Team:ATX
Brad Stuver
J. Gallagher
R. Gabrielsen
J. Cascante
Team:GAL
J. Bond
R. Edwards
S. Coulibaly
D. Williams
Team:LAF
J. Gaines
D. Musovski
B. Rodriguez
Erik Duenas
Full output:
Team:NYC
{'Name': 'Sean Johnson', 'Position': 'GK', 'Link': '/soccer/player/sean-johnson-17054', 'Injury': None}
{'Name': 'Malte Amundsen', 'Position': 'DL', 'Link': '/soccer/player/malte-amundsen-34461', 'Injury': None}
{'Name': 'T. Martins', 'Position': 'DC', 'Link': '/soccer/player/thiago-martins-24061', 'Injury': None}
{'Name': 'A. Callens', 'Position': 'DC', 'Link': '/soccer/player/alexander-callens-23428', 'Injury': None}
Team:TOR
{'Name': 'Q. Westberg', 'Position': 'GK', 'Link': '/soccer/player/quentin-westberg-27608', 'Injury': None}
{'Name': 'D. Criscito', 'Position': 'DL', 'Link': '/soccer/player/domenico-criscito-16059', 'Injury': None}
{'Name': "S. O'Neill", 'Position': 'DC', 'Link': '/soccer/player/shane-oneill-18957', 'Injury': None}
{'Name': 'L. MacNaughton', 'Position': 'DC', 'Link': '/soccer/player/lukas-macnaughton-36348', 'Injury': None}
Team:ORL
{'Name': 'P. Gallese', 'Position': 'GK', 'Link': '/soccer/player/pedro-gallese-5303', 'Injury': None}
{'Name': 'Kyle Smith', 'Position': 'DL', 'Link': '/soccer/player/kyle-smith-27215', 'Injury': None}
{'Name': 'R. Jansson', 'Position': 'DC', 'Link': '/soccer/player/robin-jansson-27664', 'Injury': None}
{'Name': 'Rodrigo Schlegel', 'Position': 'DC', 'Link': '/soccer/player/rodrigo-schlegel-30948', 'Injury': None}
Team:CLB
{'Name': 'Eloy Room', 'Position': 'GK', 'Link': '/soccer/player/eloy-room-26699', 'Injury': None}
{'Name': 'J. Mensah', 'Position': 'DC', 'Link': '/soccer/player/jonathan-mensah-15891', 'Injury': None}
{'Name': 'J. Williams', 'Position': 'DC', 'Link': '/soccer/player/josh-williams-17090', 'Injury': None}
{'Name': 'J. Anibaba', 'Position': 'DC', 'Link': '/soccer/player/jalil-anibaba-17048', 'Injury': None}
Team:CIN
{'Name': 'R. Celentano', 'Position': 'GK', 'Link': '/soccer/player/roman-celentano-36502', 'Injury': None}
{'Name': 'G. Cameron', 'Position': 'DC', 'Link': '/soccer/player/geoff-cameron-16320', 'Injury': None}
{'Name': 'T. Blackett', 'Position': 'DC', 'Link': '/soccer/player/tyler-blackett-17942', 'Injury': None}
{'Name': 'N. Hagglund', 'Position': 'DC', 'Link': '/soccer/player/nick-hagglund-18854', 'Injury': None}
Team:DCU
{'Name': 'Rafael Romo', 'Position': 'GK', 'Link': '/soccer/player/rafael-romo-16116', 'Injury': None}
{'Name': 'Sami Guediri', 'Position': 'DL', 'Link': '/soccer/player/sami-guediri-34637', 'Injury': None}
{'Name': 'B. Hines-Ike', 'Position': 'DC', 'Link': '/soccer/player/brendan-hines-ike-34544', 'Injury': None}
{'Name': 'S. Birnbaum', 'Position': 'DC', 'Link': '/soccer/player/steve-birnbaum-18636', 'Injury': None}
Team:SKC
{'Name': 'J. Pulskamp', 'Position': 'GK', 'Link': '/soccer/player/john-pulskamp-33317', 'Injury': None}
{'Name': 'Logan Ndenbe', 'Position': 'DC', 'Link': '/soccer/player/logan-ndenbe-36246', 'Injury': None}
{'Name': 'A. Fontas', 'Position': 'DC', 'Link': '/soccer/player/andreu-fontas-20565', 'Injury': None}
{'Name': 'N. Isimat-Mirin', 'Position': 'DC', 'Link': '/soccer/player/nicolas-isimat-mirin-19443', 'Injury': None}
Team:HOU
{'Name': 'Steve Clark', 'Position': 'GK', 'Link': '/soccer/player/steve-clark-18721', 'Injury': None}
{'Name': 'A. Lundqvist', 'Position': 'DL', 'Link': '/soccer/player/adam-lundqvist-25811', 'Injury': None}
{'Name': 'T. Hadebe', 'Position': 'DC', 'Link': '/soccer/player/teenage-hadebe-31664', 'Injury': None}
{'Name': 'Tim Parker', 'Position': 'DC', 'Link': '/soccer/player/tim-parker-18932', 'Injury': None}
Team:WHI
{'Name': 'I. Boehmer', 'Position': 'GK', 'Link': '/soccer/player/isaac-boehmer-33098', 'Injury': None}
{'Name': 'R. Veselinovic', 'Position': 'DC', 'Link': '/soccer/player/ranko-veselinovic-30989', 'Injury': None}
{'Name': 'T. Blackmon', 'Position': 'DC', 'Link': '/soccer/player/tristan-blackmon-25566', 'Injury': None}
{'Name': 'J. Nerwinski', 'Position': 'DC', 'Link': '/soccer/player/jake-nerwinski-23510', 'Injury': None}
Team:SOU
{'Name': 'Stefan Frei', 'Position': 'GK', 'Link': '/soccer/player/stefan-frei-18658', 'Injury': 'QUES'}
{'Name': 'Nouhou Tolo', 'Position': 'DL', 'Link': '/soccer/player/nouhou-tolo-23430', 'Injury': None}
{'Name': 'X. Arreaga', 'Position': 'DC', 'Link': '/soccer/player/xavier-arreaga-27919', 'Injury': None}
{'Name': 'Yeimar Gomez', 'Position': 'DC', 'Link': '/soccer/player/yeimar-gomez-30976', 'Injury': None}
Team:RSL
{'Name': 'Zac MacMath', 'Position': 'GK', 'Link': '/soccer/player/zac-macmath-18844', 'Injury': None}
{'Name': 'Andrew Brody', 'Position': 'DL', 'Link': '/soccer/player/andrew-brody-34542', 'Injury': None}
{'Name': 'Justen Glad', 'Position': 'DC', 'Link': '/soccer/player/justen-glad-18802', 'Injury': None}
{'Name': 'M. Silva', 'Position': 'DC', 'Link': '/soccer/player/marcelo-silva-12977', 'Injury': None}
Team:POR
{'Name': 'Aljaz Ivacic', 'Position': 'GK', 'Link': '/soccer/player/aljaz-ivacic-27224', 'Injury': None}
{'Name': 'C. Bravo', 'Position': 'DL', 'Link': '/soccer/player/claudio-nicolas-bravo-34541', 'Injury': None}
{'Name': 'L. Mabiala', 'Position': 'DC', 'Link': '/soccer/player/larrys-mabiala-24574', 'Injury': None}
{'Name': 'D. Zuparic', 'Position': 'DC', 'Link': '/soccer/player/dario-zuparic-21718', 'Injury': None}
Team:ATX
{'Name': 'Brad Stuver', 'Position': 'GK', 'Link': '/soccer/player/brad-stuver-18866', 'Injury': None}
{'Name': 'J. Gallagher', 'Position': 'DL', 'Link': '/soccer/player/jon-gallagher-25490', 'Injury': None}
{'Name': 'R. Gabrielsen', 'Position': 'DC', 'Link': '/soccer/player/ruben-gabrielsen-30690', 'Injury': None}
{'Name': 'J. Cascante', 'Position': 'DC', 'Link': '/soccer/player/julio-cascante-25531', 'Injury': None}
Team:GAL
{'Name': 'J. Bond', 'Position': 'GK', 'Link': '/soccer/player/jonathan-bond-19111', 'Injury': None}
{'Name': 'R. Edwards', 'Position': 'DL', 'Link': '/soccer/player/raheem-edwards-21458', 'Injury': None}
{'Name': 'S. Coulibaly', 'Position': 'DC', 'Link': '/soccer/player/sega-coulibaly-34663', 'Injury': None}
{'Name': 'D. Williams', 'Position': 'DC', 'Link': '/soccer/player/derrick-williams-31084', 'Injury': None}
Team:LAF
{'Name': 'J. Gaines', 'Position': 'D', 'Link': '/soccer/player/julian-gaines-35748', 'Injury': 'QUES'}
{'Name': 'D. Musovski', 'Position': 'F', 'Link': '/soccer/player/danny-musovski-30911', 'Injury': 'QUES'}
{'Name': 'B. Rodriguez', 'Position': 'F', 'Link': '/soccer/player/brian-rodriguez-29638', 'Injury': 'QUES'}
{'Name': 'Erik Duenas', 'Position': 'M/D', 'Link': '/soccer/player/erik-duenas-33112', 'Injury': 'OUT'}
CodePudding user response:
Don't write inside loop but after all loops.
And you should append to list created before all loops
You should also use slices [:4]
and [:2]
instead of range()
and for
-loop
from bs4 import BeautifulSoup
import requests
import pandas as pd
html_text = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS').text
soup = BeautifulSoup(html_text, 'html.parser')
lineups = soup.find_all('div', class_='lineup is-soccer')
# --- before loop ---
all_results = []
# --- loop ---
for selection in lineups[:2]: # <-- use directly slice `[:2]`
home_squad = selection.find('ul', class_='lineup__list is-home')
home_players = home_squad.find_all('li', class_='lineup__player')
for home_player in home_players[:4]: # <-- use directly slice `[:4]`
h_player_name = home_player.find('a').text
all_results.append(h_player_name) # <-- append
# --- after loop ---
df_h = pd.DataFrame(all_results)
df_h.to_csv('home.csv', index=False, header=False)
And if you want to write inside loop then you have to use append mode
- to_csv(..., mode="a")
from bs4 import BeautifulSoup
import requests
import pandas as pd
html_text = requests.get('https://www.rotowire.com/soccer/lineups.php?league=MLS').text
soup = BeautifulSoup(html_text, 'html.parser')
lineups = soup.find_all('div', class_='lineup is-soccer')
# --- before loop ---
# --- loop ---
for selection in lineups[:2]:
home_squad = selection.find('ul', class_='lineup__list is-home')
home_players = home_squad.find_all('li', class_='lineup__player')
partial_results = []
for home_player in home_players[:4]:
h_player_name = home_player.find('a').text
partial_results.append(h_player_name)
df_h = pd.DataFrame(partial_results)
df_h.to_csv('home.csv', index=False, header=False, mode='a')
# --- after loop ---
CodePudding user response:
Is this what you are looking for ?
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 22 13:16:32 2022
@author:
"""
from bs4 import BeautifulSoup
import requests
import pandas as pd
html_text = requests.get("https://www.rotowire.com/soccer/lineups.php?league=MLS").text
soup = BeautifulSoup(html_text, "html.parser")
lineups = soup.find_all("div", class_="lineup is-soccer")
# print(lineups)
j = range(2)
selections = []
list_home = []
count = 0
for index1 in j:
selections.append(lineups[index1])
for selection in selections:
home_squad = selection.find("ul", class_="lineup__list is-home")
home_players = home_squad.find_all("li", class_="lineup__player")
for home_player in home_players[0:4]:
h_player_name = home_player.find("a").text
print(h_player_name)
list_home.append(h_player_name)
df_h = pd.DataFrame(list_home)
df_h.to_csv("home.csv", index=False, header=False)