Home > database >  R download.file() returning 403 Forbidden error
R download.file() returning 403 Forbidden error

Time:01-02

The code below used to work, but the website i'm trying to download files from has added a user validation step. I've tried a couple things, including asking the code to sleep in the loop step, but nothing has worked so far. any advice?

library(tidyverse)
library(rvest)


page <-
 "https://burnsville.civicweb.net/filepro/documents/25657/" %>%
  read_html

df <- tibble(
  names1 = page %>%
    html_nodes(".document-link") %>%
    html_text2() %>%
    str_remove_all("\r") %>%
    str_squish(),
  links = page %>%
    html_nodes(".document-link") %>%
    html_attr("href") %>%
    paste0("https://burnsville.civicweb.net", .)
)

destfile<-("destination.pdf")

df %>% 
  map(~ download.file(df$links, destfile = paste0(df$names1, ".pdf")))

#loop through and download PDFs
for (i in df$links) {
  tryCatch({
    download.file(url,
                  basename(url),
                  mode = "wb",
                  quiet=TRUE)
  }, error = function(e){})
}

Thanks in advance!

CodePudding user response:

library(tidyverse)
library(rvest)

page <-
  "https://burnsville.civicweb.net/filepro/documents/25657/" %>%
  read_html

docs <- tibble(
  names = page %>%
    html_nodes(".document-link") %>%
    html_text2() %>%
    str_remove_all("\r") %>%
    str_squish(),
  links = page %>%
    html_nodes(".document-link") %>%
    html_attr("href") %>%
    paste0("https://burnsville.civicweb.net", .), 
  file = str_extract(links, "[^/]*$")
)

map2(docs$links, docs$file, ~ download.file(url = .x, 
                                            destfile = str_c(.y, ".pdf"), 
                                            mode = "wb"))
  • Related