Home > other >  Number of locations within radius based on longitude and latitude in dataframe format in R
Number of locations within radius based on longitude and latitude in dataframe format in R

Time:01-16

I have a dataframe from almost all zipcodes of Germany.

# German Zip 
Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")

head(Germany) 
id loc_id zipcode                                         name      lat       lon
1  1  14308   19348                          Berge bei Perleberg 53.23746 11.870770
2  2  22537   85309                                     Pörnbach 48.61670 11.466700
3  3 106968   24790 Osterrönfeld Heidkrug, Gemeinde Osterrönfeld 54.27536  9.737535
4  4  18324   98646                               Hildburghausen 50.43950 10.723922
5  5  16590   27336                           Frankenfeld, Aller 52.76951  9.430780
6  6  19092   19294                                       Karenz 53.23012 11.343840

and a dataframe of particular places/locations in Germany, e.g. blood donation center, both with their respective longitude and latitude information:

# German Blood Donation 
Blooddonation <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/95cc459b81f2bc6bec2f2b46d1f6273a/raw/2b1c77fe5cf1203ca105b7f61019bb390335db8e/LocationsUpdate.csv", sep=",")
head(Blooddonation)

                                             title   zip      lat      lon
1 Haema Blutspendezentrum Dresden-World Trade Center 01067 51.04807  13.7238
2                    Octapharma Plasmaspende Dresden 01067 51.04932 13.73557
3                             Haema Dresden Elbepark 01139 51.08232   13.696
4                       DRK-Blutspendedienst Dresden 01307 51.05294 13.78027
5      Haema Blutspendezentrum Dresden-Fetscherplatz 01307 51.04654 13.77047
6                    Haema Blutspendezentrum Görlitz 02826 51.15275 14.98878

How can I find the number of neighbour locations (blood donation centers) within a radius of e.g. 10km, 20km from each zipcode in Germany and store the result as a variable in my Germany dataframe.

Is there a tidyverse (tidy) solution such that the results are stored as variable in a dataframe?

CodePudding user response:

An answer hint might be :

library(geosphere)
withinKM=10
Germany$within10KM=0
for (i in 1:300) # test only the first 300 zipcode
{
  count=0
  for (k in 1:nrow(Blooddonation))
  {
    dis=distm(c(Germany[i,'lon'], Germany[i,'lat']), 
      c( as.numeric(Blooddonation[k,'lon']), as.numeric(Blooddonation[k,'lat'])), fun = distHaversine)/1000
    if (dis<withinKM) count=count 1
  }
  Germany$within10KM=count
}

CodePudding user response:

With sf and distance matrix:

library(dplyr)
library(sf)

ger_sf <- st_as_sf(Germany, coords = c("lon", "lat"), crs = "WGS84")
bd_sf <- st_as_sf(Blooddonation, coords = c("lon", "lat"), crs = "WGS84")

# distance matrix in km with units dropped
# rows: locations from Germany
# cols: locations from Blooddonation
distm_km <- st_distance(ger_sf, bd_sf) %>% 
  units::set_units("km") %>% 
  units::drop_units()

distm_km[1:8, 1:8]
#>          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
#> [1,] 274.3081 274.5550 270.0329 275.6236 275.9407 314.4278 313.1224 234.2907
#> [2,] 315.0837 315.6358 317.3197 317.6389 316.6712 378.3292 378.1703 404.2936
#> [3,] 448.2405 448.6026 444.0099 450.0901 450.2696 495.4185 494.1030 415.7224
#> [4,] 221.6435 222.4717 220.9114 225.5819 224.7243 310.0170 309.1489 291.4043
#> [5,] 351.1441 351.7419 347.3852 354.0903 353.9269 420.9728 419.6516 352.0349
#> [6,] 291.9424 292.2731 287.6897 293.6504 293.8668 339.1699 337.8463 260.3578
#> [7,] 272.3777 272.5296 268.1349 273.2452 273.6690 303.6681 302.3973 222.6707
#> [8,] 158.5451 159.3540 156.3434 162.4375 161.8107 246.2483 245.1566 210.5743
dim(distm_km)
#> [1] 17367   248

# rowSums() to count values matching condition across each row in the matrix
Germany <- Germany %>% 
  mutate(within10km = rowSums(distm_km <= 10),
         within20km = rowSums(distm_km <= 20))

Results :

as_tibble(Germany)
#> # A tibble: 17,367 × 8
#>       id loc_id zipcode name                           lat   lon withi…¹ withi…²
#>    <int>  <int>   <int> <chr>                        <dbl> <dbl>   <dbl>   <dbl>
#>  1     1  14308   19348 Berge bei Perleberg           53.2 11.9        0       0
#>  2     2  22537   85309 Pörnbach                      48.6 11.5        0       1
#>  3     3 106968   24790 Osterrönfeld Heidkrug, Geme…  54.3  9.74       0       0
#>  4     4  18324   98646 Hildburghausen                50.4 10.7        0       1
#>  5     5  16590   27336 Frankenfeld, Aller            52.8  9.43       0       0
#>  6     6  19092   19294 Karenz                        53.2 11.3        0       1
#>  7     7 144118   19395 Wendisch Priborn Tönchow      53.3 12.3        0       0
#>  8     8  16355   99628 Eßleben-Teutleben             51.1 11.5        0       0
#>  9     9  25953   38486 Wenze                         52.6 11.1        0       0
#> 10    10  21836   72622 Nürtingen                     48.6  9.35       0       0
#> # … with 17,357 more rows, and abbreviated variable names ¹​within10km,
#> #   ²​within20km

Inuput:

library(httr)
library(stringr)

Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")
Blooddonation <- GET('https://www.blutspenden.de/blutspendedienste/#') %>% 
  content(as = "text") %>% 
  str_match("var instituionsmap_data = '(.*)'") %>% 
  .[, 2] %>% 
  jsonlite::parse_json(simplifyVector = T) %>% 
  select(title, street, number, zip, city, lat, lon)

Created on 2023-01-15 with reprex v2.0.2

  • Related