I'd like to display a line graph using R shiny, with data from web-scrapping. I kind of succeed in scrapping with one day, but fail with a date range.
The following is my code for one day. I select the date by hard-coding the digits in R console (i.e. 20221018) since I fail to do so in ui:
library(dplyr)
library(tidyverse)
library(purrr)
library(shiny)
rows <-
read_html("https://www.immd.gov.hk/eng/stat_20221018.html") %>% html_elements(".table-passengerTrafficStat tbody tr")
prefixes <- c("arr", "dep")
cols <-
c("Hong Kong Residents",
"Mainland Visitors",
"Other Visitors",
"Total")
headers <-
c(
"Control_Point",
crossing(prefixes, cols) %>% unite("headers", 1:2, remove = T) %>% unlist() %>% unname()
)
df <- map_dfr(rows,
function(x) {
x %>%
html_elements("td[headers]") %>%
set_names(headers) %>%
html_text()
}) %>%
filter(Control_Point %in% c("Airport")) %>%
mutate(across(c(-1), ~ str_replace(.x, ",", "") %>% as.integer())) %>%
mutate(date = "2022-10-18")
ui <- fluidPage(dataTableOutput("T"))
server <- function(input, output) {
output$T <- renderDataTable({
df
})
}
shinyApp(ui = ui, server = server)
The following is my attempt to expand to a date range. I expect the result will be a data frame:
library(rvest)
library(dplyr)
library(tidyverse)
library(purrr)
library(shiny)
ui <- fluidPage(
textInput("choice_company", "Enter name of a company"),
dateRangeInput(
"daterange",
"Date range:",
start = "2022-10-01",
end = Sys.Date() - 1,
min = "2022-10-01",
max = Sys.Date() - 1,
format = "yyyymmdd",
separator = "/"
),
textOutput("ShowUrl"),
hr(),
textOutput("ShowHtml"),
dataTableOutput("T")
)
server <- function(input, output) {
prefixes <- c("arr", "dep")
cols <-
c("Hong Kong Residents",
"Mainland Visitors",
"Other Visitors",
"Total")
headers <-
c(
"Control_Point",
crossing(prefixes, cols) %>% unite("headers", 1:2, remove = T) %>% unlist() %>% unname()
)
theDate <- input$daterange[1]
answer <- list() #empty list
while (input$theDate <= end) {
URL <- reactive({
paste0("https://www.immd.gov.hk/eng/stat_",
input$theDate,
".html")
})
rows <-
read_html(url_data) %>% html_elements(".table-passengerTrafficStat tbody tr")
df <- map_dfr(rows,
function(x) {
x %>%
html_elements("td[headers]") %>%
set_names(headers) %>%
html_text()
}) %>%
filter(Control_Point %in% c("Airport")) %>%
mutate(across(c(-1), ~ str_replace(.x, ",", "") %>% as.integer())) %>%
mutate(date = input$daterange[1])
answer[[input$daterange[1]]] <- df
input$daterange[1] <- input$daterange[1] 1
Sys.sleep(1)
output$T <- renderDataTable({
URL
})
}
}
shinyApp(ui = ui, server = server)
This is the complaint message:
Warning: Error in $: Can't access reactive value 'daterange' outside of reactive consumer. i Do you need to wrap inside reactive() or observer()? 53: Error in input$daterange : Can't access reactive value 'daterange' outside of reactive consumer. i Do you need to wrap inside reactive() or observer()?
1. May I know what the complaint means?
2. How to fix the error?
3. If possible, how to translate the data to a line graph ?
Thank you so much in advance.
CodePudding user response:
I significatively modified the code in order to make it work.
- all the code that displays the table must run inside the
renderDataTable({...})
in order to change every time you selected dates. - row names does not match column count from the web
- definitively the
while
block does not iterate over the dates. - also date format must match the url requeriments.
Further refinement should be made.
I hope will be helpful.
library(rvest)
library(dplyr)
library(tidyverse)
library(purrr)
library(shiny)
ui <- fluidPage(
textInput("choice_company", "Enter name of a company"),
dateRangeInput(
"daterange",
"Date range:",
start = "2022-10-16",
end = Sys.Date() - 1,
min = "2022-10-01",
max = Sys.Date() - 1,
format = "yyyymmdd",
separator = "/"
),
textOutput("ShowUrl"),
hr(),
textOutput("ShowHtml"),
dataTableOutput("T")
)
server <- function(input, output) {
prefixes <- c("arr", "dep")
cols <-
c("Hong Kong Residents",
"Mainland Visitors",
"Other Visitors",
"Total")
headers <-
c(
"Control_Point",
crossing(prefixes, cols) %>% unite("headers", 1:2, remove = T) %>% unlist() %>% unname()
)
output$T <- renderDataTable({
date_seq<- seq(input$daterange[1], input$daterange[2], by = "1 day")
rows <- map(date_seq, ~ {
URL <-
paste0("https://www.immd.gov.hk/eng/stat_", format(., "%Y%m%d"),
".html")
message(URL)
rows <-
read_html(URL) %>% html_elements(".table-passengerTrafficStat tbody tr")})
df <- map_dfr(rows,
function(x) {
x %>%
html_elements("td[headers]") %>%
set_names(headers[seq_len(NROW(.))]) %>%
html_text()
}) %>%
filter(Control_Point %in% c("Airport")) %>%
mutate(across(c(-1), ~ str_replace(.x, ",", "") %>% as.integer())) %>%
mutate(date = input$daterange[1])
# answer[[input$daterange[1]]] <- df
# Sys.sleep(1)
df
})
}
shiny::shinyApp(ui,server)
CodePudding user response:
I somehow fix the question by narrowing the selection in read_html()
. This is not the best solution, but I think it's better to end the puzzle and move forward to learn dplyr and shiny thoroughly.
library(rvest)
library(dplyr)
library(tidyverse)
library(purrr)
library(shiny)
ui <- fluidPage(
dateRangeInput(
"daterange",
"Date range:",
start = "2022-10-16",
end = Sys.Date() - 1,
min = "2022-10-01",
max = Sys.Date() - 1,
format = "yyyymmdd",
separator = "/"
),
textOutput("ShowUrl"),
hr(),
textOutput("ShowHtml"),
dataTableOutput("T")
)
server <- function(input, output) {
prefixes <- c("arr", "dep")
cols <-
c("Hong Kong Residents",
"Mainland Visitors",
"Other Visitors",
"Total")
headers <-
c(
"Control_Point",
crossing(prefixes, cols) %>% unite("headers", 1:2, remove = T) %>% unlist() %>% unname()
)
output$T <- renderDataTable({
date_seq<- seq(input$daterange[1], input$daterange[2], by = "1 day")
rows <- map(date_seq, ~ {
URL <-
paste0("https://www.immd.gov.hk/eng/stat_", format(., "%Y%m%d"),
".html")
message(URL)
rows <-
read_html(URL) %>% html_elements(".table-passengerTrafficStat tbody tr.a")}) #the key here: select only the airport data.
df <- map_dfr(rows,
function(x) {
x %>%
html_elements("td[headers]") %>%
set_names(headers[seq_len(NROW(.))]) %>%
html_text()
}) %>%
mutate(across(c(-1), ~ str_replace(.x, ",", "") %>% as.integer())) %>%
mutate(date = date_seq)
df
})
}
shiny::shinyApp(ui,server)
````
[![enter image description here][1]][1]
[1]: https://i.stack.imgur.com/Z77kC.jpg