I have a dataframe from almost all zipcodes of Germany.
# German Zip
Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")
head(Germany)
id loc_id zipcode name lat lon
1 1 14308 19348 Berge bei Perleberg 53.23746 11.870770
2 2 22537 85309 Pörnbach 48.61670 11.466700
3 3 106968 24790 Osterrönfeld Heidkrug, Gemeinde Osterrönfeld 54.27536 9.737535
4 4 18324 98646 Hildburghausen 50.43950 10.723922
5 5 16590 27336 Frankenfeld, Aller 52.76951 9.430780
6 6 19092 19294 Karenz 53.23012 11.343840
and a dataframe of particular places/locations in Germany, e.g. blood donation center, both with their respective longitude and latitude information:
# German Blood Donation
Blooddonation <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/95cc459b81f2bc6bec2f2b46d1f6273a/raw/2b1c77fe5cf1203ca105b7f61019bb390335db8e/LocationsUpdate.csv", sep=",")
head(Blooddonation)
title zip lat lon
1 Haema Blutspendezentrum Dresden-World Trade Center 01067 51.04807 13.7238
2 Octapharma Plasmaspende Dresden 01067 51.04932 13.73557
3 Haema Dresden Elbepark 01139 51.08232 13.696
4 DRK-Blutspendedienst Dresden 01307 51.05294 13.78027
5 Haema Blutspendezentrum Dresden-Fetscherplatz 01307 51.04654 13.77047
6 Haema Blutspendezentrum Görlitz 02826 51.15275 14.98878
How can I find the number of neighbour locations (blood donation centers) within a radius of e.g. 10km, 20km from each zipcode in Germany and store the result as a variable in my Germany
dataframe.
Is there a tidyverse
(tidy) solution such that the results are stored as variable in a dataframe?
CodePudding user response:
An answer hint might be :
library(geosphere)
withinKM=10
Germany$within10KM=0
for (i in 1:300) # test only the first 300 zipcode
{
count=0
for (k in 1:nrow(Blooddonation))
{
dis=distm(c(Germany[i,'lon'], Germany[i,'lat']),
c( as.numeric(Blooddonation[k,'lon']), as.numeric(Blooddonation[k,'lat'])), fun = distHaversine)/1000
if (dis<withinKM) count=count 1
}
Germany$within10KM=count
}
CodePudding user response:
With sf and distance matrix:
library(dplyr)
library(sf)
ger_sf <- st_as_sf(Germany, coords = c("lon", "lat"), crs = "WGS84")
bd_sf <- st_as_sf(Blooddonation, coords = c("lon", "lat"), crs = "WGS84")
# distance matrix in km with units dropped
# rows: locations from Germany
# cols: locations from Blooddonation
distm_km <- st_distance(ger_sf, bd_sf) %>%
units::set_units("km") %>%
units::drop_units()
distm_km[1:8, 1:8]
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
#> [1,] 274.3081 274.5550 270.0329 275.6236 275.9407 314.4278 313.1224 234.2907
#> [2,] 315.0837 315.6358 317.3197 317.6389 316.6712 378.3292 378.1703 404.2936
#> [3,] 448.2405 448.6026 444.0099 450.0901 450.2696 495.4185 494.1030 415.7224
#> [4,] 221.6435 222.4717 220.9114 225.5819 224.7243 310.0170 309.1489 291.4043
#> [5,] 351.1441 351.7419 347.3852 354.0903 353.9269 420.9728 419.6516 352.0349
#> [6,] 291.9424 292.2731 287.6897 293.6504 293.8668 339.1699 337.8463 260.3578
#> [7,] 272.3777 272.5296 268.1349 273.2452 273.6690 303.6681 302.3973 222.6707
#> [8,] 158.5451 159.3540 156.3434 162.4375 161.8107 246.2483 245.1566 210.5743
dim(distm_km)
#> [1] 17367 248
# rowSums() to count values matching condition across each row in the matrix
Germany <- Germany %>%
mutate(within10km = rowSums(distm_km <= 10),
within20km = rowSums(distm_km <= 20))
Results :
as_tibble(Germany)
#> # A tibble: 17,367 × 8
#> id loc_id zipcode name lat lon withi…¹ withi…²
#> <int> <int> <int> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 14308 19348 Berge bei Perleberg 53.2 11.9 0 0
#> 2 2 22537 85309 Pörnbach 48.6 11.5 0 1
#> 3 3 106968 24790 Osterrönfeld Heidkrug, Geme… 54.3 9.74 0 0
#> 4 4 18324 98646 Hildburghausen 50.4 10.7 0 1
#> 5 5 16590 27336 Frankenfeld, Aller 52.8 9.43 0 0
#> 6 6 19092 19294 Karenz 53.2 11.3 0 1
#> 7 7 144118 19395 Wendisch Priborn Tönchow 53.3 12.3 0 0
#> 8 8 16355 99628 Eßleben-Teutleben 51.1 11.5 0 0
#> 9 9 25953 38486 Wenze 52.6 11.1 0 0
#> 10 10 21836 72622 Nürtingen 48.6 9.35 0 0
#> # … with 17,357 more rows, and abbreviated variable names ¹within10km,
#> # ²within20km
Inuput:
library(httr)
library(stringr)
Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")
Blooddonation <- GET('https://www.blutspenden.de/blutspendedienste/#') %>%
content(as = "text") %>%
str_match("var instituionsmap_data = '(.*)'") %>%
.[, 2] %>%
jsonlite::parse_json(simplifyVector = T) %>%
select(title, street, number, zip, city, lat, lon)
Created on 2023-01-15 with reprex v2.0.2