Home > OS >  Combine coordinates with same custom key in R
Combine coordinates with same custom key in R

Time:06-10

I web-scrapped multiple store locators, and now I want to find all the brands that each store has. As every brand has slightly different coordinates, I combined the data by creating a custom key (rounding lat and long to 4 decimals and adding the first 5 letters of the store name) but this has obvious limitations and is not good enough for my case.

My next approach should be perfect, but I am stuck. I thought to:

  1. Create circles of 100m around each point (st_transform and st_buffer based on this answer)
  2. Check if any of the circles with the same first 5 letters intersect (... group_by(name_key) %>% st_overlaps) and somehow get a unique group_id.
  3. Combine the data to using the group_id as key

Hope somebody can give me a hand because I am totally stuck.

tt <- tibble(lat = c(41.436921, 41.4329208, 41.3829246, 41.398841, 41.3908955, 41.3790881, 41.3876734, 41.4091834, 41.3819518, 41.3878469, 41.3941022, 41.390988469863, 41.3917335, 41.3987172, 41.43623025, 41.3868139, 41.4329513, 41.387896, 41.3913932, 41.3876928, 41.401127, 41.3826847, 41.385063, 41.408131, 41.38855346, 41.39145716, 41.3883035, 41.387676, 41.3856535, 41.39719374, 41.4408444, 41.432961, 41.4331833, 41.4083721, 41.37511991, 41.3925928, 41.4361573, 41.401058, 41.40464, 41.3878013, 41.4136467, 41.390564, 41.3868788, 41.3865356, 41.4125671, 41.3896482, 41.3850639, 41.3919671, 41.3999835, 41.386993, 41.3878405, 41.3917335, 41.3987172, 41.382774, 41.388265, 41.4091834, 41.3828898, 41.386879, 41.3917335, 41.3987302, 41.409183, 41.432923, 41.4344164, 41.3869842, 41.40922657),
       long = c(2.1806423, 2.1893701, 2.1274556, 2.152954, 2.1440196, 2.1278398, 2.1668985, 2.2021878, 2.1757278, 2.127962, 2.1418273, 2.1391142945205, 2.1436192, 2.1528498, 2.1814508559036, 2.1560665191209, 2.1893589, 2.1666726, 2.1704746, 2.171124, 2.203366, 2.1731507, 2.173404, 2.188301, 2.17087001, 2.17049986, 2.1677237, 2.155939, 2.1539378, 2.16165528, 2.19848871, 2.189327, 2.189431, 2.1644419, 2.13273168, 2.1464824, 2.1817558, 2.151513, 2.154447, 2.1280387, 2.1385689, 2.1438591, 2.1560561, 2.1714268, 2.2113698, 2.1659005, 2.1734034999999, 2.1757278, 2.1487078, 2.16995, 2.16675529999997, 2.1436192, 2.1528498, 2.12765960000001, 2.127915, 2.2021878, 2.1274947, 2.156056, 2.1436192, 2.1528875, 2.202188, 2.189361, 2.1544574, 2.1208967, 2.20219468),
       name = c("El Corte Inglés", "Babis", "Experiencia bebe - Barcelona", "ALI-BEY nens", "Bitti", "El Corte Inglés", "Abitare Kids Barruguet", "Noari kids", "El Corte Inglés", "El Corte Inglés", "Pops And Co", "El Corte Inglés", "BITTI", "Ali Bey", "El Corte Inglés", "Tienda bebés Barcelona - Nonetes & Nou mesos", "Babis", "Abitare Kids - Barruguet", "Palacio del bebe", "El Corte Inglés", "Noari Kids", "El Món D'en Dadà", "Els Tresors de la Panera", "Paloma", "Prenatal Barcelona", "Palacio del Bebé", "El Corte Inglés Barcelona", "Rabasa", "Nonetes", "Prenatal Barcelona", "Prenatal Barcelona", "Babi's", "Nou Mesos", "BCN Bebé", "Nadons", "El Corte Inglés Barcelona", "El Corte Inglés Barcelona", "La Mama Vaca", "Kangura Portabebes ", "El Corte Inglés Barcelona", "Bitti", "Corte Ingles", "Nonetes", "Corte Ingles", "Corte Ingles", "MAINADA KIDS", "LES 4 LLUNES", "EL PALACIO DEL BEBE", "CASTELLBELL", "EL CORTE INGLÉS PLAZA CATALUñA (002)", "ABITARE KIDS BARRUGUET BARCELONA", "BITTI", "ALÍ BEY NENS", "EXPERIENCIA BEBÉ BARCELONA", "EL CORTE INGLÉS DIAGONAL (007)", "NOARI KIDS- BARCELONA", "Experiencia Bebé (Barcelona)", "Nonetes", "Bitti", "Ali Bey Nens", "Noari Kids", "Babis", "Nonetes & Nou mesos", "Nonetes & Nou mesos", "Noari Kids Barcelona"),
       brand = c("b", "b", "b", "b", "b", "b", "b", "b", "b", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "m", "m", "m", "m", "m", "sc", "sc", "sc", "sc", "s", "s", "s", "s", "s", "s", "s", "e", "e", "e", "e", "e", "e", "n", "n", "n") ) %>% 
        mutate(name_key = tolower(name),
               name_key = iconv(name_key, from="UTF-8",to="ASCII//TRANSLIT"),
               name_key = str_remove_all(name_key, '[[:digit:]]'),
               name_key = str_remove_all(name_key, '[:punct:]'), 
               name_key = str_remove_all(name_key, '[:space:]'),
               # Remove the common company types from the name
               name_key = str_remove_all(name_key, 'ltda'),
               name_key = str_remove_all(name_key, 'ltd'),
               # Only get the first 5 letters
               name_key = str_sub(name_key, end = 5L))

leaflet() %>%
  # add different provider tiles
  addProviderTiles(
    "OpenStreetMap",
    # give the layer a name
    group = "OpenStreetMap") %>% 
  addCircleMarkers(data = tt,
                   radius = 4,
                   opacity = 0.7,
                   label = paste(
                     "Store name: ", tt$name, "<br>",
                     "name_id: ", tt$name_key,"<br>",
                     "Brand: ", tt$brand) %>%
                     lapply(htmltools::HTML) ) 

CodePudding user response:

This seems to do the trick. With this, you can do all the data wrangling and group_by() summarization you want

library(sf)
library(tidyverse)

tt <- tibble(lat = c(41.436921, 41.4329208, 41.3829246, 41.398841, 41.3908955, 41.3790881, 41.3876734, 41.4091834, 41.3819518, 41.3878469, 41.3941022, 41.390988469863, 41.3917335, 41.3987172, 41.43623025, 41.3868139, 41.4329513, 41.387896, 41.3913932, 41.3876928, 41.401127, 41.3826847, 41.385063, 41.408131, 41.38855346, 41.39145716, 41.3883035, 41.387676, 41.3856535, 41.39719374, 41.4408444, 41.432961, 41.4331833, 41.4083721, 41.37511991, 41.3925928, 41.4361573, 41.401058, 41.40464, 41.3878013, 41.4136467, 41.390564, 41.3868788, 41.3865356, 41.4125671, 41.3896482, 41.3850639, 41.3919671, 41.3999835, 41.386993, 41.3878405, 41.3917335, 41.3987172, 41.382774, 41.388265, 41.4091834, 41.3828898, 41.386879, 41.3917335, 41.3987302, 41.409183, 41.432923, 41.4344164, 41.3869842, 41.40922657),
             long = c(2.1806423, 2.1893701, 2.1274556, 2.152954, 2.1440196, 2.1278398, 2.1668985, 2.2021878, 2.1757278, 2.127962, 2.1418273, 2.1391142945205, 2.1436192, 2.1528498, 2.1814508559036, 2.1560665191209, 2.1893589, 2.1666726, 2.1704746, 2.171124, 2.203366, 2.1731507, 2.173404, 2.188301, 2.17087001, 2.17049986, 2.1677237, 2.155939, 2.1539378, 2.16165528, 2.19848871, 2.189327, 2.189431, 2.1644419, 2.13273168, 2.1464824, 2.1817558, 2.151513, 2.154447, 2.1280387, 2.1385689, 2.1438591, 2.1560561, 2.1714268, 2.2113698, 2.1659005, 2.1734034999999, 2.1757278, 2.1487078, 2.16995, 2.16675529999997, 2.1436192, 2.1528498, 2.12765960000001, 2.127915, 2.2021878, 2.1274947, 2.156056, 2.1436192, 2.1528875, 2.202188, 2.189361, 2.1544574, 2.1208967, 2.20219468),
             name = c("El Corte Inglés", "Babis", "Experiencia bebe - Barcelona", "ALI-BEY nens", "Bitti", "El Corte Inglés", "Abitare Kids Barruguet", "Noari kids", "El Corte Inglés", "El Corte Inglés", "Pops And Co", "El Corte Inglés", "BITTI", "Ali Bey", "El Corte Inglés", "Tienda bebés Barcelona - Nonetes & Nou mesos", "Babis", "Abitare Kids - Barruguet", "Palacio del bebe", "El Corte Inglés", "Noari Kids", "El Món D'en Dadà", "Els Tresors de la Panera", "Paloma", "Prenatal Barcelona", "Palacio del Bebé", "El Corte Inglés Barcelona", "Rabasa", "Nonetes", "Prenatal Barcelona", "Prenatal Barcelona", "Babi's", "Nou Mesos", "BCN Bebé", "Nadons", "El Corte Inglés Barcelona", "El Corte Inglés Barcelona", "La Mama Vaca", "Kangura Portabebes ", "El Corte Inglés Barcelona", "Bitti", "Corte Ingles", "Nonetes", "Corte Ingles", "Corte Ingles", "MAINADA KIDS", "LES 4 LLUNES", "EL PALACIO DEL BEBE", "CASTELLBELL", "EL CORTE INGLÉS PLAZA CATALUñA (002)", "ABITARE KIDS BARRUGUET BARCELONA", "BITTI", "ALÍ BEY NENS", "EXPERIENCIA BEBÉ BARCELONA", "EL CORTE INGLÉS DIAGONAL (007)", "NOARI KIDS- BARCELONA", "Experiencia Bebé (Barcelona)", "Nonetes", "Bitti", "Ali Bey Nens", "Noari Kids", "Babis", "Nonetes & Nou mesos", "Nonetes & Nou mesos", "Noari Kids Barcelona"),
             brand = c("b", "b", "b", "b", "b", "b", "b", "b", "b", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "m", "m", "m", "m", "m", "sc", "sc", "sc", "sc", "s", "s", "s", "s", "s", "s", "s", "e", "e", "e", "e", "e", "e", "n", "n", "n") ) %>% 
  mutate(name_key = tolower(name),
         name_key = iconv(name_key, from="UTF-8",to="ASCII//TRANSLIT"),
         name_key = str_remove_all(name_key, '[[:digit:]]'),
         name_key = str_remove_all(name_key, '[:punct:]'), 
         name_key = str_remove_all(name_key, '[:space:]'),
         # Remove the common company types from the name
         name_key = str_remove_all(name_key, 'ltda'),
         name_key = str_remove_all(name_key, 'ltd'),
         # Only get the first 5 letters
         name_key = str_sub(name_key, end = 5L))


tt <- tt %>% 
  st_as_sf(coords = c("long", "lat"),
           crs = 4326) %>% 
  mutate(id = row_number())

tt

tt_buffer <- tt %>% 
  st_buffer(100)


tt <- tt %>% 
  mutate(intersects = st_intersects(.,
                                    tt_buffer))


tt_long <- unnest(tt, intersects) %>% 
  arrange(name, intersects)

tt <- tt %>% 
  st_drop_geometry() %>% 
  select(-intersects)

tt_long <- tt_long %>% 
  tidylog::left_join(.,
                     tt,
                     by = c("intersects" = "id"))

CodePudding user response:

Convert your tibble to a sf object before passing to leaflet:

tt_sf <- st_as_sf(tt, coords = c("long", "lat"), crs=4326)

leaflet() %>%
  # add different provider tiles
  addProviderTiles(
    "OpenStreetMap",
    # give the layer a name
    group = "OpenStreetMap") %>% 
  addCircleMarkers(data=tt_sf, 
                   radius = 4,
                   opacity = 0.7,
                   label = paste(
                     "Store name: ", tt$name, "<br>",
                     "name_id: ", tt$name_key,"<br>",
                     "Brand: ", tt$brand) %>%
                     lapply(htmltools::HTML))
  • Related