I have two sf points datasets (let's say DtA and DtB). I want to use the function st_distance to measure the straight line distance between each point in DtA and each point in DtB. For example, DtA have 3 points and DtB have 2 points. It should get 6 pairs of distance results. But I know st_distance could only measure one point in DtA each time.
Does anyone know how to use mapply/sapply/lapply to measure all the points in DtA one time with st_distance? Many thanks!
I have big data. Using the loop is very slow. My loop is as below:
df <- c()
for (i in 1:nrow(DtA)) {
dist = st_distance(DtA[i,], DtB, by_element=TRUE)
dist <- as.numeric(as.character(dist))
df <- rbind(df, dist)
}
I want to change the above loop into an apply function. Many thanks!
CodePudding user response:
Update
If you need to stick to loop/apply approach you can do the following. That would be in fact a problem of looping, not a specific spatial one:
library(sf)
# Fake data
library(giscoR)
points <- gisco_get_nuts(spatialtype = "LB")
DtA <- points[sample(seq_len(nrow(points)), 132), ]
DtB <- points[sample(seq_len(nrow(points)), 921), ]
# End of fake data
# lapply approach
distend <- lapply(seq_len(nrow(DtA)), function(x){
dist = st_distance(DtA[x,], DtB, by_element=TRUE)
dist <- as.numeric(as.character(dist))
return(dist)
})
# List to matrix
distend2 <- do.call("rbind", distend)
dim(distend2)
#> [1] 132 921
Created on 2022-03-24 by the reprex package (v2.0.1)
Initial answer
Expanding the comment of @det, you can do it with st_distance()
:
library(sf)
# Fake data
library(giscoR)
points <- gisco_get_nuts(spatialtype = "LB")
DtA <- points[sample(seq_len(nrow(points)), 132), ]
DtB <- points[sample(seq_len(nrow(points)), 921), ]
# End of fake data
# Distances from DtA to DtB
dist <- st_distance(DtA, DtB)
dim(dist)
#> [1] 132 921
# A matrix with the right dimensions
As an extra, you can manipulate this a bit to get a long version of the matrix using the tidyverse
approach:
# If you need to manipulate with tidyverse...
library(tidyverse)
tib <- as_tibble(dist)
# Ids of DtB as colnames
names(tib) <- DtB$NUTS_ID
# Ids of DtA on the first column
finaltib <- DtA %>%
st_drop_geometry() %>%
mutate(ids_DtA = NUTS_ID) %>%
select(ids_DtA) %>%
bind_cols(tib) %>%
as_tibble()
finaltib
#> # A tibble: 132 x 922
#> ids_DtA DE25C CH065 FRC13 UKJ44 ITH10 DE93 ES113 HU32 DE277 ITH2
#> <chr> [m] [m] [m] [m] [m] [m] [m] [m] [m] [m]
#> 1 RO421 8.68e5 1.02e6 1.32e6 1.60e6 7.92e5 1.13e6 2.37e6 1.50e5 8.82e5 8.38e5
#> 2 ES431 1.78e6 1.50e6 1.25e6 1.51e6 1.69e6 2.02e6 4.18e5 2.42e6 1.73e6 1.62e6
#> 3 BE334 4.48e5 4.77e5 4.50e5 2.94e5 6.34e5 4.21e5 1.36e6 1.22e6 4.48e5 6.55e5
#> 4 BE241 5.14e5 5.23e5 4.58e5 2.26e5 6.92e5 4.68e5 1.32e6 1.29e6 5.12e5 7.08e5
#> 5 HU222 4.79e5 6.29e5 9.27e5 1.22e6 4.06e5 8.06e5 1.99e6 3.51e5 4.89e5 4.62e5
#> 6 FI19 1.68e6 1.96e6 2.12e6 1.84e6 1.90e6 1.31e6 3.04e6 1.65e6 1.73e6 1.98e6
#> 7 TR90 2.42e6 2.58e6 2.87e6 3.15e6 2.35e6 2.60e6 3.89e6 1.64e6 2.44e6 2.39e6
#> 8 DEE0C 3.18e5 5.93e5 7.75e5 7.34e5 5.72e5 1.68e5 1.80e6 8.42e5 3.72e5 6.38e5
#> 9 PT187 1.89e6 1.62e6 1.35e6 1.56e6 1.81e6 2.11e6 3.92e5 2.55e6 1.84e6 1.74e6
#> 10 DED5 2.88e5 5.80e5 7.91e5 8.05e5 5.25e5 2.55e5 1.84e6 7.54e5 3.45e5 5.96e5
#> # ... with 122 more rows, and 911 more variables: DE114 [m], SE231 [m],
#> # DE40A [m], TRA11 [m], DE932 [m], ES613 [m], DEA59 [m], UKD62 [m],
#> # SE213 [m], HR04A [m], HU120 [m], DE256 [m], ITI32 [m], TR905 [m],
#> # UKJ11 [m], BE24 [m], PL715 [m], ITF43 [m], BG342 [m], ITG17 [m], DE118 [m],
#> # LT01 [m], SE22 [m], DE25B [m], MK006 [m], DEA38 [m], TR621 [m], UKJ27 [m],
#> # UKJ41 [m], RO313 [m], RO123 [m], ES7 [m], DEB3J [m], FRY4 [m], AT130 [m],
#> # DEG0H [m], RS [m], ITC15 [m], DE269 [m], ES63 [m], PT186 [m], ...
# Make it longer
finaltib_long <- finaltib %>%
pivot_longer(cols = -(ids_DtA), names_to = "ids_DtB")
finaltib_long
#> # A tibble: 121,572 x 3
#> ids_DtA ids_DtB value
#> <chr> <chr> [m]
#> 1 BE353 ES5 944805.
#> 2 BE353 UKG1 521480.
#> 3 BE353 FRJ28 719087.
#> 4 BE353 FI1C 1793053.
#> 5 BE353 DK05 866304.
#> 6 BE353 CH01 488356.
#> 7 BE353 MT001 1774618.
#> 8 BE353 RS223 1508833.
#> 9 BE353 NL226 214993.
#> 10 BE353 FRG04 396129.
#> # ... with 121,562 more rows
Created on 2022-03-24 by the reprex package (v2.0.1)
CodePudding user response:
UPDATED
I have created some dummy data trying to resemble yours :
library(sp)
library(sf)
# dummy big data 10k points in Dta, 3 points in DtB
df = data.frame(x = rep(13.637398,10*1000), y = rep(41.30736, 10*100))
DtA = SpatialPoints(cbind(df$x, df$y), proj4string = CRS(" proj=longlat datum=WGS84"))
DtA = st_as_sf(DtA)
DtB = SpatialPoints(cbind(df$x[c(1:3)], df$y[c(1:3)]),proj4string = CRS(" proj=longlat datum=WGS84"))
DtB = st_as_sf(DtB)
dists <- list()
for (i in 1:length(DtB$geometry)) {
dists[[i]] <- st_distance(DtA, DtB[i,], by_element=TRUE)
}
To retrieve the distance of DtA point to DtB point:
n_DtA_point = 500
n_DtB_point = 2
dists[[n_DtB_point]][n_DtA_point]
output:
0 [m]