Home > Net >  Automating a web scraping process in R
Automating a web scraping process in R

Time:10-11

I'm trying to automate a web scrape from transfermarkt.com. Currently what I have done so far is scraped basic details from the premier league, which is the English league. I want to automate it so that it generates the same details I have for the premier league for the top 25 leagues in europe (based on market value at "https://www.transfermarkt.com/wettbewerbe/europa")

Here is my code so far to get the data I wanted for premier league

library(tidyverse)
library(magrittr) # better handling of pipes
library(purrr) # to work with lists and map functions
library(glue) # to paste strings
library(stringr) # to hand strings

library(rvest) # rvest makes scraping easier
library(polite) # polite is the "polite" version of rvest
library(xml2) # makes it easier to work with HTML and XML from R

epl_url <- "http://www.transfermarkt.com/premier- 
league/startseite/wettbewerb/GB1"
page_link <- epl_url %>% read_html()

page_link %>% typeof()
page_link %>% glimpse() 
page_link %>% html_structure()

name_selector <- '#yw1 .no-border-links a:nth-child(1)'
market_value_selector <- 'td.rechts a'
avg_age_selector <- '#yw1 tbody .zentriert:nth-child(4)'

club_name <- page_link %>%
html_elements(name_selector) %>% html_text2()

market_value <- page_link %>% 
html_elements(market_value_selector) %>% html_text

average_age <- page_link %>% 
html_elements(avg_age_selector) %>% html_text

club_name
market_value #checking
average_age

get_club_name <- function(page_link){
  club_name <- page_link %>%
  html_elements(name_selector) %>%
  html_text
return(club_name)
}

get_avg_age <- function(page_link){
  average_age <- page_link %>%
  html_elements(avg_age_selector) %>%
  html_text
return(average_age)
}

get_market_val <- function(page_link){
  market_value <- page_link %>%
  html_elements(market_value_selector) %>%
  html_text
return(market_value)
}

# Functions combined into one, create a dataframe.

get_club_details <- function(link){

 page <- link %>% read_html()

 club <- page %>% html_elements(name_selector) %>% html_text
 average_age <- page %>% html_elements(avg_age_selector) %>% 
 html_text
 market_value <- page %>% html_elements(market_value_selector) %>% html_text

 epl_table_df <- tibble(Club = club, Average_Age = average_age, 
 Market_Value = market_value)
 return(epl_table_df)
 }

 # Testing that the above function works:

 get_club_details(epl_url)

 # Wrangling the data

 epl_table_df <- get_club_details(epl_url)
 epl_table_df %>% glimpse()

 epl_table_df %<>%
   mutate(Tier = case_when(27 < Average_Age & Average_Age <= 28 ~ "Experienced",
                      25 < Average_Age & Average_Age <= 27 ~ "Moderately Experienced ",
                      23 < Average_Age & Average_Age <= 25 ~ "Inexperienced"))
                     
 generate_club_details_df <- function(epl_url){
 epl_table_df <- get_club_details(epl_url)

 epl_table_df %>%
   mutate(Tier = case_when(27 < Average_Age & Average_Age <= 28 ~ "Experienced",
                        25 < Average_Age & Average_Age <= 27 ~ "Moderately Experienced ",
                        23 < Average_Age & Average_Age <= 25 ~ "Inexperienced")) 
}

 generate_club_details_df(epl_url)

CodePudding user response:

You can consider the following approach :

library(rvest)
library(RSelenium)

port <- as.integer(4444L   rpois(lambda = 1000, 1))
rd <- rsDriver(chromever = "105.0.5195.52", browser = "chrome", port = port)
remDr <- rd$client
remDr$open()

url <- "https://www.transfermarkt.com/wettbewerbe/europa"
remDr$navigate(url)

list_Url <- list()
list_Info <- list()

for(i in 1 : 25)
{
  print(i)
  xpath <- paste0('//*[@id="yw1"]/table/tbody/tr[', i   1, ']/td[1]/table/tbody/tr/td[2]/a')
  web_Obj <- remDr$findElement("xpath", xpath)
  list_Url[[i]] <- web_Obj$getElementAttribute("href")[[1]]
  
  page_link <- list_Url[[i]] %>% read_html()
  
  name_selector <- '#yw1 .no-border-links a:nth-child(1)'
  market_value_selector <- 'td.rechts a'
  avg_age_selector <- '#yw1 tbody .zentriert:nth-child(4)'
  
  club_name <- page_link %>% html_elements(name_selector) %>% html_text2()
  market_value <- page_link %>% html_elements(market_value_selector) %>% html_text2()
  average_age <- page_link %>% html_elements(avg_age_selector) %>% html_text2()
  
  list_Info[[i]] <- list(club_name = club_name, market_value = market_value, average_age = average_age)
}

list_Info

[[1]]
[[1]]$club_name
 [1] "Manchester City"         "Liverpool FC"            "Chelsea FC"              "Manchester United"       "Tottenham Hotspur"      
 [6] "Arsenal FC"              "Aston Villa"             "West Ham United"         "Leicester City"          "Everton FC"             
[11] "Newcastle United"        "Wolverhampton Wanderers" "Southampton FC"          "Brentford FC"            "Nottingham Forest"      
[16] "Leeds United"            "Crystal Palace"          "Brighton & Hove Albion"  "Fulham FC"               "AFC Bournemouth"        

[[1]]$market_value
 [1] "€1.06bn"  "€925.00m" "€861.50m" "€792.30m" "€685.30m" "€665.50m" "€505.60m" "€471.90m" "€454.10m" "€431.80m" "€422.10m" "€386.25m"
[13] "€316.05m" "€299.90m" "€298.15m" "€286.30m" "€280.70m" "€278.60m" "€232.30m" "€175.90m"

[[1]]$average_age
 [1] "27.2" "26.8" "26.6" "26.2" "26.2" "24.4" "27.2" "27.8" "27.3" "27.4" "27.5" "25.7" "24.9" "25.8" "26.3" "24.9" "26.9" "25.0" "27.6"
[20] "26.1"


[[2]]
[[2]]$club_name
 [1] "Real Madrid"            "FC Barcelona"           "Atlético de Madrid"     "Real Sociedad"          "Villarreal CF"         
 [6] "Sevilla FC"             "Real Betis Balompié"    "Valencia CF"            "Athletic Bilbao"        "Getafe CF"             
[11] "Celta de Vigo"          "CA Osasuna"             "RCD Espanyol Barcelona" "Girona FC"              "UD Almería"            
[16] "Rayo Vallecano"         "Elche CF"               "RCD Mallorca"           "Cádiz CF"               "Real Valladolid CF"    

[[2]]$market_value
 [1] "€837.50m" "€813.70m" "€611.50m" "€371.30m" "€360.20m" "€276.50m" "€260.00m" "€237.20m" "€237.00m" "€144.30m" "€128.70m" "€124.00m"
[13] "€92.20m"  "€91.60m"  "€87.00m"  "€82.50m"  "€72.20m"  "€71.60m"  "€65.00m"  "€65.00m" 

[[2]]$average_age
 [1] "27.4" "25.4" "27.8" "24.8" "27.8" "28.5" "28.8" "24.4" "27.2" "27.2" "26.0" "27.6" "26.5" "25.8" "25.3" "28.0" "28.0" "27.4" "28.6"
[20] "27.9"


[[3]]
[[3]]$club_name
 [1] "Inter Milan"         "AC Milan"            "Juventus FC"         "SSC Napoli"          "AS Roma"             "Atalanta BC"        
 [7] "ACF Fiorentina"      "SS Lazio"            "US Sassuolo"         "Torino FC"           "Bologna FC 1909"     "AC Monza"           
[13] "Udinese Calcio"      "Hellas Verona"       "US Salernitana 1919" "FC Empoli"           "UC Sampdoria"        "US Cremonese"       
[19] "Spezia Calcio"       "US Lecce"           

[[3]]$market_value
 [1] "€592.95m" "€550.55m" "€502.40m" "€480.55m" "€393.35m" "€346.00m" "€260.15m" "€254.85m" "€205.60m" "€151.66m" "€140.05m" "€124.85m"
[13] "€119.95m" "€100.85m" "€99.95m"  "€91.75m"  "€82.23m"  "€76.88m"  "€74.35m"  "€64.85m" 

[[3]]$average_age
 [1] "27.5" "26.2" "26.7" "25.6" "25.6" "26.1" "25.6" "26.5" "25.0" "24.1" "24.9" "26.5" "25.1" "25.5" "26.3" "24.3" "26.5" "25.0" "24.7"
[20] "23.5"


[[4]]
[[4]]$club_name
 [1] "Bayern Munich"            "RB Leipzig"               "Borussia Dortmund"        "Bayer 04 Leverkusen"     
 [5] "Eintracht Frankfurt"      "VfL Wolfsburg"            "Borussia Mönchengladbach" "TSG 1899 Hoffenheim"     
 [9] "SC Freiburg"              "VfB Stuttgart"            "1.FSV Mainz 05"           "1.FC Union Berlin"       
[13] "Hertha BSC"               "FC Augsburg"              "1. FC Köln"               "SV Werder Bremen"        
[17] "FC Schalke 04"            "VfL Bochum"              

[[4]]$market_value
 [1] "€878.70m" "€485.08m" "€483.15m" "€469.85m" "€227.55m" "€213.40m" "€206.60m" "€178.40m" "€162.80m" "€128.68m" "€104.95m" "€104.30m"
[13] "€104.10m" "€90.60m"  "€90.58m"  "€62.23m"  "€54.70m"  "€45.40m" 

[[4]]$average_age
 [1] "25.5" "25.2" "24.4" "24.4" "24.8" "24.6" "25.5" "25.5" "25.5" "22.8" "24.4" "26.6" "25.2" "26.0" "24.8" "24.4" "26.7" "27.3"


[[5]]
[[5]]$club_name
 [1] "Paris Saint-Germain"  "AS Monaco"            "Stade Rennais FC"     "Olympique Lyon"       "Olympique Marseille" 
 [6] "OGC Nice"             "LOSC Lille"           "RC Lens"              "FC Nantes"            "Montpellier HSC"     
[11] "RC Strasbourg Alsace" "Stade Reims"          "FC Lorient"           "Stade Brestois 29"    "ESTAC Troyes"        
[16] "Angers SCO"           "FC Toulouse"          "AJ Auxerre"           "Clermont Foot 63"     "AC Ajaccio"          

[[5]]$market_value
 [1] "€890.35m" "€347.60m" "€290.05m" "€266.75m" "€255.00m" "€234.75m" "€188.80m" "€141.25m" "€112.55m" "€111.05m" "€109.40m" "€83.20m" 
[13] "€74.05m"  "€71.35m"  "€69.80m"  "€65.90m"  "€51.10m"  "€36.30m"  "€33.40m"  "€25.05m" 

[[5]]$average_age
 [1] "25.3" "23.7" "23.5" "25.0" "24.6" "24.9" "24.8" "25.4" "25.8" "24.9" "26.3" "24.1" "24.0" "25.3" "25.0" "24.5" "24.0" "26.3" "25.9"
[20] "27.4"


[[6]]
[[6]]$club_name
 [1] "SL Benfica"           "FC Porto"             "Sporting CP"          "SC Braga"             "FC Famalicão"        
 [6] "Vitória Guimarães SC" "Gil Vicente FC"       "FC Paços de Ferreira" "GD Estoril Praia"     "CD Santa Clara"      
[11] "Boavista FC"          "Portimonense SC"      "Rio Ave FC"           "CS Marítimo"          "FC Vizela"           
[16] "Casa Pia AC"          "FC Arouca"            "GD Chaves"           

[[6]]$market_value
 [1] "€260.30m" "€257.00m" "€229.80m" "€114.70m" "€40.80m"  "€31.50m"  "€27.20m"  "€27.20m"  "€25.40m"  "€25.10m"  "€19.25m"  "€17.08m" 
[13] "€15.63m"  "€15.35m"  "€14.68m"  "€14.55m"  "€12.33m"  "€9.78m"  

[[6]]$average_age
 [1] "25.5" "26.1" "24.4" "24.5" "24.1" "23.7" "25.3" "24.8" "23.6" "24.6" "24.7" "24.0" "25.6" "25.1" "24.3" "26.6" "25.7" "25.8"


[[7]]
[[7]]$club_name
 [1] "Fenerbahce"       "Galatasaray"      "Trabzonspor"      "Besiktas JK"      "Basaksehir FK"    "Adana Demirspor"  "Konyaspor"       
 [8] "Alanyaspor"       "Antalyaspor"      "MKE Ankaragücü"   "Sivasspor"        "Fatih Karagümrük" "Kayserispor"      "Kasimpasa"       
[15] "Gaziantep FK"     "Hatayspor"        "Giresunspor"      "Istanbulspor"     "Ümraniyespor"    

[[7]]$market_value
 [1] "€178.58m" "€153.35m" "€152.03m" "€123.68m" "€55.70m"  "€45.13m"  "€42.78m"  "€41.70m"  "€37.18m"  "€36.35m"  "€31.98m"  "€30.15m" 
[13] "€29.85m"  "€29.08m"  "€28.80m"  "€28.13m"  "€17.33m"  "€12.03m"  "€10.03m" 

[[7]]$average_age
 [1] "25.8" "25.6" "25.7" "26.7" "26.7" "27.2" "26.7" "24.8" "27.4" "27.1" "28.3" "25.9" "25.8" "26.8" "26.0" "26.5" "25.1" "24.7" "27.4"


[[8]]
[[8]]$club_name
 [1] "Ajax Amsterdam"        "PSV Eindhoven"         "Feyenoord Rotterdam"   "AZ Alkmaar"            "Twente Enschede FC"   
 [6] "Vitesse Arnhem"        "FC Utrecht"            "FC Groningen"          "SC Heerenveen"         "Fortuna Sittard"      
[11] "NEC Nijmegen"          "SC Cambuur-Leeuwarden" "RKC Waalwijk"          "FC Emmen"              "Sparta Rotterdam"     
[16] "FC Volendam"           "Go Ahead Eagles"       "Excelsior Rotterdam"  

[[8]]$market_value
 [1] "€295.80m" "€218.00m" "€136.35m" "€71.80m"  "€42.95m"  "€42.33m"  "€41.40m"  "€29.93m"  "€23.00m"  "€20.43m"  "€19.53m"  "€16.56m" 
[13] "€11.75m"  "€11.68m"  "€11.55m"  "€11.55m"  "€11.33m"  "€10.13m" 

[[8]]$average_age
 [1] "24.7" "24.1" "23.1" "22.6" "24.2" "22.4" "25.0" "22.9" "23.8" "25.4" "24.0" "24.5" "25.5" "24.7" "26.2" "22.4" "23.7" "23.1"


[[9]]
[[9]]$club_name
 [1] "Club Brugge KV"              "RSC Anderlecht"              "KRC Genk"                    "Royal Antwerp FC"           
 [5] "KAA Gent"                    "Standard Liège"              "Royale Union Saint Gilloise" "RSC Charleroi"              
 [9] "Cercle Brugge"               "KV Mechelen"                 "KVC Westerlo"                "Oud-Heverlee Leuven"        
[13] "KV Kortrijk"                 "KV Oostende"                 "KAS Eupen"                   "Sint-Truidense VV"          
[17] "SV Zulte Waregem"            "RFC Seraing"                

[[9]]$market_value
 [1] "€161.05m" "€99.35m"  "€94.15m"  "€77.10m"  "€69.35m"  "€47.30m"  "€37.30m"  "€36.38m"  "€30.05m"  "€29.85m"  "€29.40m"  "€29.15m" 
[13] "€24.43m"  "€20.98m"  "€18.08m"  "€16.80m"  "€14.10m"  "€13.10m" 

[[9]]$average_age
 [1] "24.2" "23.2" "22.6" "25.2" "25.7" "23.6" "24.8" "23.5" "23.2" "24.9" "25.6" "24.9" "25.1" "23.3" "22.8" "26.6" "24.2" "23.6"


[[10]]
[[10]]$club_name
 [1] "Zenit St. Petersburg"     "Dynamo Moscow"            "Spartak Moscow"           "Lokomotiv Moscow"        
 [5] "CSKA Moscow"              "FK Krasnodar"             "FK Rostov"                "Krylya Sovetov Samara"   
 [9] "FC Sochi"                 "Akhmat Grozny"            "Ural Yekaterinburg"       "FK Khimki"               
[13] "FC Pari Nizhniy Novgorod" "Torpedo Moscow"           "FK Orenburg"              "Fakel Voronezh"          

[[10]]$market_value
 [1] "€156.80m" "€87.70m"  "€79.48m"  "€73.25m"  "€71.65m"  "€68.48m"  "€37.90m"  "€31.13m"  "€27.05m"  "€26.33m"  "€21.65m"  "€20.68m" 
[13] "€18.80m"  "€16.60m"  "€15.43m"  "€9.53m"  

[[10]]$average_age
 [1] "26.3" "24.5" "24.7" "23.3" "24.1" "23.4" "24.1" "23.8" "27.1" "26.1" "26.5" "26.0" "24.7" "27.0" "24.9" "27.3"


[[11]]
[[11]]$club_name
 [1] "Olympiacos Piraeus"   "AEK Athens"           "PAOK Thessaloniki"    "Panathinaikos Athens" "Aris Thessaloniki"   
 [6] "OFI Crete FC"         "Volos NPS"            "APO Levadiakos"       "Atromitos Athen"      "Panetolikos GFS"     
[11] "Ionikos Nikeas"       "Asteras Tripolis"     "PAS Giannina"         "PAS Lamia 1964"      

[[11]]$market_value
 [1] "€138.90m" "€54.55m"  "€48.85m"  "€39.00m"  "€27.88m"  "€15.75m"  "€12.10m"  "€11.63m"  "€11.30m"  "€11.15m"  "€10.83m"  "€10.45m" 
[13] "€9.63m"   "€8.23m"  

[[11]]$average_age
 [1] "27.6" "27.8" "25.3" "26.8" "25.6" "26.2" "25.5" "27.1" "25.2" "25.5" "27.4" "27.0" "24.4" "27.8"


[[12]]
[[12]]$club_name
 [1] "Red Bull Salzburg"     "SK Sturm Graz"         "LASK"                  "Rapid Vienna"          "Austria Vienna"       
 [6] "Wolfsberger AC"        "SK Austria Klagenfurt" "SCR Altach"            "TSV Hartberg"          "WSG Tirol"            
[11] "SV Ried"               "SC Austria Lustenau"  

[[12]]$market_value
 [1] "€203.93m" "€36.38m"  "€25.53m"  "€23.58m"  "€20.13m"  "€14.73m"  "€10.55m"  "€10.30m"  "€9.75m"   "€9.25m"   "€8.55m"   "€8.20m"  

[[12]]$average_age
 [1] "22.1" "23.2" "26.3" "24.1" "23.9" "24.0" "24.9" "24.1" "25.9" "23.1" "24.3" "22.8"


[[13]]
[[13]]$club_name
 [1] "Rangers FC"             "Celtic FC"              "Heart of Midlothian FC" "Hibernian FC"           "Aberdeen FC"           
 [6] "Dundee United FC"       "St. Johnstone FC"       "Kilmarnock FC"          "Motherwell FC"          "Livingston FC"         
[11] "St. Mirren FC"          "Ross County FC"        

[[13]]$market_value
 [1] "€127.33m" "€107.90m" "€15.43m"  "€15.25m"  "€10.18m"  "€10.18m"  "€9.48m"   "€9.30m"   "€8.70m"   "€8.60m"   "€7.93m"   "€7.80m"  

[[13]]$average_age
 [1] "26.2" "25.8" "26.5" "24.8" "24.5" "25.3" "27.1" "25.9" "26.0" "26.8" "26.5" "24.8"


[[14]]
[[14]]$club_name
 [1] "Dynamo Kyiv"           "Shakhtar Donetsk"      "SK Dnipro-1"           "Vorskla Poltava"       "Zorya Lugansk"        
 [6] "FK Oleksandriya"       "Kolos Kovalivka"       "Kryvbas Kryvyi Rig"    "Chornomorets Odessa"   "Ingulets Petrove"     
[11] "Metalist 1925 Kharkiv" "NK Veres Rivne"        "Rukh Lviv"             "Metalist Kharkiv"      "PFK Lviv"             
[16] "FK Minaj"             

[[14]]$market_value
 [1] "€110.40m" "€80.30m"  "€23.85m"  "€12.45m"  "€11.90m"  "€9.53m"   "€9.20m"   "€6.78m"   "€6.65m"   "€5.58m"   "€5.55m"   "€4.90m"  
[13] "€4.90m"   "€4.63m"   "€4.40m"   "€4.05m"  

[[14]]$average_age
 [1] "24.7" "24.7" "25.6" "25.6" "23.7" "23.8" "26.9" "25.5" "27.6" "26.4" "25.3" "27.7" "23.2" "26.3" "24.2" "25.5"


[[15]]
[[15]]$club_name
 [1] "GNK Dinamo Zagreb"        "HNK Hajduk Split"         "NK Osijek"                "HNK Rijeka"              
 [5] "NK Lokomotiva Zagreb"     "HNK Gorica"               "NK Varazdin"              "NK Istra 1961"           
 [9] "HNK Sibenik"              "Slaven Belupo Koprivnica"

[[15]]$market_value
 [1] "€95.95m" "€46.80m" "€36.45m" "€23.49m" "€20.13m" "€16.20m" "€9.74m"  "€9.03m"  "€8.55m"  "€6.75m" 

[[15]]$average_age
 [1] "25.4" "25.3" "25.9" "23.1" "23.2" "22.8" "23.7" "23.4" "24.4" "25.3"


[[16]]
[[16]]$club_name
 [1] "FC Copenhagen"    "FC Midtjylland"   "Bröndby IF"       "FC Nordsjaelland" "Aarhus GF"        "Randers FC"       "Aalborg BK"      
 [8] "Odense Boldklub"  "Silkeborg IF"     "Lyngby BK"        "Viborg FF"        "AC Horsens"      

[[16]]$market_value
 [1] "€68.30m" "€48.60m" "€34.60m" "€21.90m" "€13.60m" "€13.50m" "€13.25m" "€12.78m" "€11.35m" "€8.85m"  "€8.50m"  "€7.05m" 

[[16]]$average_age
 [1] "24.8" "24.1" "24.1" "20.9" "25.2" "25.4" "23.2" "23.4" "24.1" "24.0" "23.4" "25.6"


[[17]]
[[17]]$club_name
 [1] "BSC Young Boys"          "FC Basel 1893"           "FC Zürich"               "FC St. Gallen 1879"      "Grasshopper Club Zurich"
 [6] "FC Lugano"               "Servette FC"             "FC Sion"                 "FC Luzern"               "FC Winterthur"          

[[17]]$market_value
 [1] "€54.40m" "€38.85m" "€32.85m" "€25.28m" "€20.93m" "€20.15m" "€20.13m" "€20.05m" "€15.80m" "€8.68m" 

[[17]]$average_age
 [1] "24.4" "23.1" "25.3" "23.1" "24.0" "24.2" "24.9" "26.2" "24.3" "24.4"


[[18]]
[[18]]$club_name
 [1] "Rakow Czestochowa"     "Lech Poznan"           "Pogon Szczecin"        "Legia Warszawa"        "Cracovia"             
 [6] "Piast Gliwice"         "Wisla Plock"           "Zaglebie Lubin"        "Jagiellonia Bialystok" "Lechia Gdansk"        
[11] "Górnik Zabrze"         "Slask Wroclaw"         "Miedz Legnica"         "Widzew Lodz"           "Warta Poznan"         
[16] "Radomiak Radom"        "Korona Kielce"         "Stal Mielec"          

[[18]]$market_value
 [1] "€33.43m" "€32.33m" "€19.88m" "€19.00m" "€13.75m" "€12.95m" "€11.63m" "€11.13m" "€10.98m" "€10.50m" "€9.80m"  "€9.78m"  "€9.28m" 
[14] "€9.23m"  "€8.70m"  "€8.05m"  "€6.85m"  "€6.45m" 

[[18]]$average_age
 [1] "25.0" "24.9" "25.3" "25.3" "24.9" "25.6" "24.2" "23.8" "24.1" "25.9" "25.3" "23.3" "23.9" "25.5" "24.2" "24.2" "25.4" "24.7"


[[19]]
[[19]]$club_name
 [1] "SK Slavia Prague"           "AC Sparta Prague"           "FC Viktoria Plzen"          "FC Banik Ostrava"          
 [5] "1.FC Slovacko"              "FK Jablonec"                "FC Slovan Liberec"          "Bohemians Prague 1905"     
 [9] "SK Sigma Olomouc"           "FC Zbrojovka Brno"          "FK Pardubice"               "FK Mlada Boleslav"         
[13] "FC Hradec Kralove"          "SK Dynamo Ceske Budejovice" "FC Trinity Zlin"            "FK Teplice"                

[[19]]$market_value
 [1] "€58.78m" "€41.10m" "€28.50m" "€11.45m" "€10.05m" "€9.60m"  "€8.68m"  "€7.60m"  "€7.48m"  "€7.40m"  "€6.79m"  "€6.53m"  "€6.45m" 
[14] "€6.38m"  "€5.69m"  "€5.60m" 

[[19]]$average_age
 [1] "26.3" "25.8" "27.8" "25.4" "27.6" "25.8" "24.0" "27.5" "26.2" "27.1" "24.4" "25.6" "25.0" "25.4" "26.6" "25.9"


[[20]]
[[20]]$club_name
 [1] "FCSB"                   "CFR Cluj"               "Universitatea Craiova"  "FC Rapid 1923"          "Sepsi OSK Sf. Gheorghe"
 [6] "FCV Farul Constanta"    "FC Voluntari"           "FC U Craiova 1948"      "UTA Arad"               "FC Universitatea Cluj" 
[11] "ACSC FC Arges"          "Petrolul Ploiesti"      "FC Botosani"            "AFC Chindia Targoviste" "FC Hermannstadt"       
[16] "CS Mioveni"            

[[20]]$market_value
 [1] "€35.80m" "€29.18m" "€18.90m" "€17.52m" "€13.24m" "€12.13m" "€10.33m" "€9.68m"  "€9.60m"  "€9.35m"  "€9.10m"  "€8.80m"  "€8.58m" 
[14] "€7.78m"  "€6.68m"  "€6.20m" 

[[20]]$average_age
 [1] "24.5" "26.7" "27.3" "25.7" "25.1" "23.2" "25.9" "24.7" "25.5" "25.3" "27.0" "26.7" "26.2" "24.7" "25.5" "27.1"


[[21]]
[[21]]$club_name
 [1] "Malmö FF"        "Hammarby IF"     "Djurgårdens IF"  "AIK Solna"       "IF Elfsborg"     "BK Häcken"       "IFK Norrköping" 
 [8] "IFK Göteborg"    "Mjällby AIF"     "Kalmar FF"       "IK Sirius"       "Helsingborgs IF" "Degerfors IF"    "IFK Värnamo"    
[15] "Varbergs BoIS"   "GIF Sundsvall"  

[[21]]$market_value
 [1] "€26.55m" "€21.45m" "€18.13m" "€18.05m" "€16.83m" "€16.38m" "€15.50m" "€12.55m" "€9.65m"  "€9.30m"  "€8.50m"  "€8.43m"  "€7.48m" 
[14] "€7.30m"  "€7.05m"  "€5.70m" 

[[21]]$average_age
 [1] "27.5" "25.4" "26.5" "25.4" "24.0" "25.4" "24.7" "25.0" "23.4" "25.3" "24.7" "25.8" "25.8" "24.0" "24.4" "25.7"


[[22]]
[[22]]$club_name
 [1] "Red Star Belgrade"           "FK Partizan Belgrade"        "FK Cukaricki"                "FK Vojvodina Novi Sad"      
 [5] "FK TSC Backa Topola"         "FK Mladost GAT Novi Sad"     "FK Radnik Surdulica"         "FK Radnicki Nis"            
 [9] "FK Novi Pazar"               "FK Vozdovac"                 "FK Spartak Subotica"         "FK Radnicki 1923 Kragujevac"
[13] "FK Napredak Krusevac"        "FK Javor-Matis Ivanjica"     "FK Kolubara Lazarevac"       "FK Mladost Lucani"          

[[22]]$market_value
 [1] "€56.35m" "€36.45m" "€13.45m" "€12.78m" "€9.05m"  "€7.43m"  "€7.08m"  "€7.03m"  "€6.80m"  "€6.58m"  "€6.45m"  "€5.95m"  "€5.83m" 
[14] "€5.68m"  "€5.18m"  "€5.05m" 

[[22]]$average_age
 [1] "26.5" "25.4" "23.4" "23.8" "24.6" "26.3" "25.6" "24.7" "23.9" "21.4" "23.3" "26.4" "24.9" "24.5" "24.9" "24.9"


[[23]]
[[23]]$club_name
 [1] "FK Bodø/Glimt"      "Molde FK"           "Lillestrøm SK"      "Rosenborg BK"       "Vålerenga Fotball"  "Viking FK"         
 [7] "Aalesunds FK"       "Sarpsborg 08 FF"    "Strømsgodset IF"    "Hamarkameratene"    "Kristiansund BK"    "FK Haugesund"      
[13] "Odds BK"            "Sandefjord Fotball" "Tromsø IL"          "FK Jerv"           

[[23]]$market_value
 [1] "€21.80m" "€17.40m" "€12.50m" "€11.93m" "€11.58m" "€9.75m"  "€8.75m"  "€8.25m"  "€8.05m"  "€7.95m"  "€7.35m"  "€7.13m"  "€7.10m" 
[14] "€6.70m"  "€6.23m"  "€6.00m" 

[[23]]$average_age
 [1] "24.1" "24.6" "23.1" "22.3" "23.5" "24.1" "25.5" "25.3" "23.3" "24.5" "24.8" "23.8" "24.3" "24.2" "23.5" "24.5"


[[24]]
[[24]]$club_name
 [1] "Omonia Nicosia"              "Pafos FC"                    "APOEL Nicosia"               "Apollon Limassol"           
 [5] "Aris Limassol"               "Anorthosis Famagusta"        "AEK Larnaca"                 "AEL Limassol"               
 [9] "Karmiotissa Pano Polemidion" "Nea Salamina Famagusta"      "Olympiakos Nicosia"          "Doxa Katokopias"            
[13] "Akritas Chlorakas"           "Enosis Neon Paralimniou"    

[[24]]$market_value
 [1] "€18.80m" "€16.93m" "€16.03m" "€15.75m" "€14.75m" "€14.30m" "€13.53m" "€11.08m" "€8.30m"  "€7.08m"  "€6.73m"  "€5.59m"  "€3.93m" 
[14] "€3.41m" 

[[24]]$average_age
 [1] "26.7" "25.7" "25.7" "25.9" "24.7" "28.4" "25.9" "25.3" "28.8" "28.8" "25.0" "25.3" "22.5" "23.9"


[[25]]
[[25]]$club_name
 [1] "Maccabi Tel Aviv"    "Maccabi Haifa"       "Hapoel Beer Sheva"   "Maccabi Netanya"     "FC Ashdod"           "Beitar Jerusalem"   
 [7] "Hapoel Haifa"        "Hapoel Tel Aviv"     "Ironi Kiryat Shmona" "Hapoel Jerusalem"    "Ihud Bnei Sakhnin"   "Maccabi Bnei Reineh"
[13] "Hapoel Hadera"       "Sekzia Ness Ziona"  

[[25]]$market_value
 [1] "€24.55m" "€20.60m" "€16.60m" "€11.75m" "€9.23m"  "€8.20m"  "€7.80m"  "€6.83m"  "€6.15m"  "€6.10m"  "€5.30m"  "€4.98m"  "€4.63m" 
[14] "€4.50m" 

[[25]]$average_age
 [1] "25.8" "27.4" "27.4" "24.5" "23.4" "23.8" "27.1" "25.3" "23.5" "22.8" "27.1" "26.7" "23.9" "26.2"

CodePudding user response:

Here is another approach that is base on rvest only :

library(rvest)
library(stringr)
page <- read_html("https://www.transfermarkt.com/wettbewerbe/europa")
all_Href <- page %>% html_elements("a") %>% html_attr("href")
bool_Href <- stringr::str_detect(all_Href, "/startseite/wettbewerb/[^ ]*1")
needed_Href <- all_Href[bool_Href]
needed_Href <- unique(needed_Href)

list_Info <- list()

for(i in 1 : 25)
{
  print(i)
  
  url <- paste0( "https://www.transfermarkt.com/wettbewerbe/europa", needed_Href[i])
  page_link <- list_Url[[i]] %>% read_html()
  
  name_selector <- '#yw1 .no-border-links a:nth-child(1)'
  market_value_selector <- 'td.rechts a'
  avg_age_selector <- '#yw1 tbody .zentriert:nth-child(4)'
  
  club_name <- page_link %>% html_elements(name_selector) %>% html_text2()
  market_value <- page_link %>% html_elements(market_value_selector) %>% html_text2()
  average_age <- page_link %>% html_elements(avg_age_selector) %>% html_text2()
  
  list_Info[[i]] <- list(club_name = club_name, market_value = market_value, average_age = average_age)
}
  • Related