I want to load the data from this https://ourworldindata.org/grapher/cumulative-deaths-and-cases-covid-19 directly into R.
However the download button does not have an url that I can copy into the read.csv()
command.
How do I do this?
CodePudding user response:
You cannot open the download button links with read.csv() because the download button links to image files, not the underlying data. The webpage does store the data needed to create the graph being displayed but this is not the underlying data set.
If you want to scrape the data set used to create the graph from this page then you can do this using the code below but you may wish to access the underlying data set from its original source (there is a link to the sources on the page).
library(curl)
library(jsonlite)
library(stringr)
# Download webpage
res <- curl_fetch_memory("https://ourworldindata.org/grapher/cumulative-deaths-and-cases-covid-19")
# Convernt the content of the response from binary to text characters
txt <- rawToChar(res$content)
# Pull out the embedded json from the webpage
txt <- stringr::str_split(txt, "//EMBEDDED_JSON", simplify = T)[2]
# Convert the json text string to a list
json <- jsonlite::fromJSON(txt=txt)
# The result is the data model needed to create the graph
json
# $id
# [1] 4048
#
# $map
# $map$time
# [1] "latest"
#
# $map$colorScale
# $map$colorScale$equalSizeBins
# [1] TRUE
#
# $map$colorScale$baseColorScheme
# [1] "Reds"
#
# $map$colorScale$binningStrategy
# [1] "manual"
#
# $map$colorScale$customNumericColors
# [1] NA NA NA NA
#
# $map$colorScale$customNumericValues
# [1] 10 50 100 500 5000 50000 100000 500000 1000000
#
#
# $map$variableId
# [1] 142582
#
# $map$timeTolerance
# [1] 2
#
#
# $data
# $data$availableEntities
# [1] "Asia" "Upper middle income" "World"
# [4] "China" "Lower middle income" "World excl. China"
# [7] "World excl. China and South Korea" "World excl. China, South Korea, Japan and Singapore" "Asia excl. China"
# [10] "Philippines" "Hong Kong" "High income"
# [13] "Japan" "France" "Europe"
# [16] "European Union" "Taiwan" "Iran"
# [19] "South Korea" "International" "Italy"
# [22] "United States" "North America" "Oceania"
# [25] "Thailand" "Australia" "San Marino"
# [28] "Spain" "Iraq" "Switzerland"
# [31] "United Kingdom" "Netherlands" "Peru"
# [34] "South America" "Argentina" "Egypt"
# [37] "Africa" "Canada" "Germany"
# [40] "Sweden" "Morocco" "Lebanon"
# [43] "Indonesia" "India" "Greece"
# [46] "Panama" "Albania" "Bulgaria"
# [49] "Ireland" "Belgium" "Austria"
# [52] "Poland" "Algeria" "Guyana"
# [55] "Low income" "Azerbaijan" "Ukraine"
# [58] "Sudan" "Norway" "Slovenia"
# [61] "Denmark" "Ecuador" "Luxembourg"
# [64] "Hungary" "Bahrain" "Guatemala"
# [67] "Dominican Republic" "Portugal" "Turkey"
# [70] "Brazil" "Malaysia" "Pakistan"
# [73] "Moldova" "Bangladesh" "Burkina Faso"
# [76] "Cuba" "Jamaica" "Croatia"
# [79] "Costa Rica" "Mexico" "Tunisia"
# [82] "Russia" "Serbia" "United Arab Emirates"
# [85] "Israel" "Gabon" "Ghana"
# [88] "Finland" "Democratic Republic of Congo" "Iceland"
# [91] "Mauritius" "Singapore" "Paraguay"
# [94] "Bosnia and Herzegovina" "Afghanistan" "Andorra"
# [97] "North Macedonia" "Romania" "Czechia"
# [100] "Cyprus" "Colombia" "Chile"
# [103] "Gambia" "Kosovo" "Montenegro"
# [106] "Zimbabwe" "Nigeria" "Saudi Arabia"
# [109] "Cape Verde" "Lithuania" "Estonia"
# [112] "Niger" "Trinidad and Tobago" "Cameroon"
# [115] "Armenia" "Kenya" "Kazakhstan"
# [118] "Honduras" "Palestine" "Jordan"
# [121] "Venezuela" "Nicaragua" "Togo"
# [124] "South Africa" "Uzbekistan" "Brunei"
# [127] "Uruguay" "Sri Lanka" "Qatar"
# [130] "Monaco" "Cote d'Ivoire" "Bolivia"
# [133] "Angola" "New Zealand" "Mali"
# [136] "Syria" "Mauritania" "Oman"
# [139] "El Salvador" "Myanmar" "Belarus"
# [142] "Botswana" "Tanzania" "Slovakia"
# [145] "Senegal" "Bahamas" "Libya"
# [148] "Congo" "Zambia" "Latvia"
# [151] "Kyrgyzstan" "Suriname" "Georgia"
# [154] "Liberia" "Kuwait" "Liechtenstein"
# [157] "Barbados" "Ethiopia" "Belize"
# [160] "Benin" "Antigua and Barbuda" "Haiti"
# [163] "Malawi" "Malta" "Somalia"
# [166] "Djibouti" "Burundi" "Guinea"
# [169] "Eswatini" "Equatorial Guinea" "Sierra Leone"
# [172] "Guinea-Bissau" "Chad" "Maldives"
# [175] "Yemen" "Sao Tome and Principe" "Tajikistan"
# [178] "Comoros" "South Sudan" "Nepal"
# [181] "Madagascar" "Central African Republic" "Mozambique"
# [184] "Rwanda" "Lesotho" "Namibia"
# [187] "Uganda" "Papua New Guinea" "Fiji"
# [190] "Vietnam" "Saint Lucia" "Eritrea"
# [193] "Mongolia" "Grenada" "Seychelles"
# [196] "Bhutan" "Saint Vincent and the Grenadines" "Cambodia"
# [199] "Timor" "Vanuatu" "Laos"
# [202] "Saint Kitts and Nevis" "Vatican" "Dominica"
# [205] "Solomon Islands" "Marshall Islands" "Samoa"
# [208] "Micronesia (country)" "Kiribati" "2020 Summer Olympics athletes & staff"
#
#
# $slug
# [1] "cumulative-deaths-and-cases-covid-19"
#
# $title
# [1] "Cumulative confirmed COVID-19 cases and deaths"
#
# $yAxis
# $yAxis$min
# [1] 0
#
# $yAxis$canChangeScaleType
# [1] TRUE
#
#
# $minTime
# [1] 1
#
# $version
# [1] 1417
#
# $subtitle
# [1] "Limited testing and challenges in the attribution of the cause of death means that the number of confirmed cases and deaths may not be an accurate count of the true totals."
#
# $hasMapTab
# [1] TRUE
#
# $originUrl
# [1] "https://ourworldindata.org/coronavirus"
#
# $dimensions
# display.name display.color display.includeInTable display.numDecimalPlaces property variableId
# 1 Total confirmed deaths #d95f02 TRUE 0 y 145451
# 2 Total confirmed cases #7570b3 TRUE 0 y 145450
#
# $isPublished
# [1] TRUE
#
# $selectedData
# color index entityId
# 1 #932834 0 355
# 2 #a652ba 1 355
#
# $addCountryMode
# [1] "change-country"
#
# $selectedEntityIds
# [1] 355
#
# $hideTitleAnnotation
# [1] TRUE
#
# $selectedEntityNames
# [1] "World"
CodePudding user response:
If you want to download the csv file you can use RSelenium
,
library(
eCaps <- list(
chromeOptions =
list(prefs = list('download.default_directory' = "D:\\mywork"))
)
driver <- rsDriver(browser = "chrome", extraCapabilities = eCaps)
remDr <- driver[["client"]]
#Navigate to webpage
url <- 'https://ourworldindata.org/grapher/cumulative-deaths-and-cases-covid-19'
remDr$navigate(url)
#CLick the download section
click <- remDr$findElement(using = 'xpath', value = '/html/body/main/figure/div/div[4]/div[2]/nav/ul/li[5]/a')
click$clickElement()
#click the download button for csv file
click1 <- remDr$findElement(using = 'xpath', value = '/html/body/main/figure/div/div[5]/div[2]/a')
click1$clickElement()