The code below used to work, but the website i'm trying to download files from has added a user validation step. I've tried a couple things, including asking the code to sleep in the loop step, but nothing has worked so far. any advice?
library(tidyverse)
library(rvest)
page <-
"https://burnsville.civicweb.net/filepro/documents/25657/" %>%
read_html
df <- tibble(
names1 = page %>%
html_nodes(".document-link") %>%
html_text2() %>%
str_remove_all("\r") %>%
str_squish(),
links = page %>%
html_nodes(".document-link") %>%
html_attr("href") %>%
paste0("https://burnsville.civicweb.net", .)
)
destfile<-("destination.pdf")
df %>%
map(~ download.file(df$links, destfile = paste0(df$names1, ".pdf")))
#loop through and download PDFs
for (i in df$links) {
tryCatch({
download.file(url,
basename(url),
mode = "wb",
quiet=TRUE)
}, error = function(e){})
}
Thanks in advance!
CodePudding user response:
library(tidyverse)
library(rvest)
page <-
"https://burnsville.civicweb.net/filepro/documents/25657/" %>%
read_html
docs <- tibble(
names = page %>%
html_nodes(".document-link") %>%
html_text2() %>%
str_remove_all("\r") %>%
str_squish(),
links = page %>%
html_nodes(".document-link") %>%
html_attr("href") %>%
paste0("https://burnsville.civicweb.net", .),
file = str_extract(links, "[^/]*$")
)
map2(docs$links, docs$file, ~ download.file(url = .x,
destfile = str_c(.y, ".pdf"),
mode = "wb"))