Im want make a scrape of this node. In one page run well but I need the other 342 pages. About pages only change the final number, like 1, 2 , 3 to 342.
library(rvest)
library(xml2)
library(httr)
# For page 1
website<-'https://cgspace.cgiar.org/discover?
rpp=10&etal=0&query=cassava&scope=10568/35697&group_by=none&page=1'
link <- vector()
#loop through nodes
for (i in 1:10){
link[i] <-website %>%
read_html() %>%
html_nodes(xpath=paste0('//*[@id="aspect_discovery_SimpleSearch_div_search-
results"]/div[',i,']/div[2]/div/div[1]/a')) %>%
html_attr('href')
}
pag <- data.frame(link)
pag$link2 <- paste0('https://cgspace.cgiar.org', pag$link)
pag
# link link2
# 1 /handle/10568/71370 https://cgspace.cgiar.org/handle/10568/71370
# 2 /handle/10568/43831 https://cgspace.cgiar.org/handle/10568/43831
# 3 /handle/10568/56285 https://cgspace.cgiar.org/handle/10568/56285
# For page 2
website<-'https://cgspace.cgiar.org/discover?
rpp=10&etal=0&query=cassava&scope=10568/35697&group_by=none&page=2'
link <- vector()
#loop through nodes
for (i in 1:10){
link[i] <-website %>%
read_html() %>%
html_nodes(xpath=paste0('//*[@id="aspect_discovery_SimpleSearch_div_search-
results"]/div[',i,']/div[2]/div/div[1]/a')) %>%
html_attr('href')
}
pag2 <- data.frame(link)
pag2$link2 <- paste0('https://cgspace.cgiar.org', pag2$link)
pag2
# link link2
# 1 /handle/10568/90626 https://cgspace.cgiar.org/handle/10568/90626
# 2 /handle/10568/71796 https://cgspace.cgiar.org/handle/10568/71796
# 3 /handle/10568/68788 https://cgspace.cgiar.org/handle/10568/68788
The idea is make this in a single loop and have a data frame.
CodePudding user response:
Put it in a loop
library(rvest)
library(xml2)
library(httr)
all_pags <- data.frame()
for( i in 1:342){
website<-paste0('https://cgspace.cgiar.org/discover?rpp=10&etal=0&query=cassava&scope=10568/35697&group_by=none&page=',i)
link <- vector()
#loop through nodes
for (i in 1:10){
link[i] <-website %>%
read_html() %>%
html_nodes(xpath=paste0('//*[@id="aspect_discovery_SimpleSearch_div_search-results"]/div[',i,']/div[2]/div/div[1]/a')) %>% html_attr('href')
}
pag <- data.frame(link)
pag$link2 <- paste0('https://cgspace.cgiar.org', pag$link)
all_pags <- rbind(all_pags, pag)
}
all_pags