I have this function:
lines$geometry = do.call(
"c",
lapply(seq(nrow(b)), function(i) {
st_sfc(
st_linestring(
as.matrix(
rbind(b[i, ], e[i, ])
)
),
crs = 4326
)
}))
It takes point coordinates in data frame b
and data frame e
and adds them as line geometry to data frame lines
and works just fine.
I now want to use this function on lists of several data frames. More specifically, lines_list, b_list
and e_list
which each contain seven data frames, called
"2005" "2006" "2007" "2008" "2009" "2010" "2012" "2011"
.
I tried using:
wave_years <- c("2005" "2006" "2007" "2008" "2009" "2010" "2012" "2011")
for(x in wave_years) {
lines_list[[as.character(x)]][,"geometry"] = do.call(
"c",
lapply(seq(nrow(b_list[[as.character(x)]])), function(i) {
st_sfc(
st_linestring(
as.matrix(
rbind(b_list[[as.character(x)]][i, ], e_list[[as.character(x)]][i, ])
)
),
crs = 4326
)
}))
}
but this code gives the error
Error:
! Assigned data `do.call(...)` must be compatible with existing data.
✖ Existing data has 21797 rows.
✖ Element 1 of assigned data has 2 rows.
ℹ Only vectors of size 1 are recycled.
Backtrace:
1. base::`[<-`(`*tmp*`, , "geometry", value = `<LINESTRING [°]>`)
12. tibble (local) `<fn>`(`<vctrs___>`)
The first data frame ("2005"
) of each of the lists contains 21797 rows, but I do not know what the 2 rows in element 1 are.
I tried an approach with mapply
which looks like
lines_list <- mapply(function(x, b, e){
x %>% mutate(geometry = do.call(
"c",
lapply(seq(nrow(b), function(i) {
st_sfc(
st_linestring(
as.matrix(
rbind(b[i, ], e[i, ])
)
),
crs = 4326
)}
))))
}, x = lines_list, b = b_list, e = e_list, SIMPLIFY = FALSE)
which gives the error:
Error in `mutate()`:
! Problem while computing `geometry = do.call(...)`.
Caused by error in `match.fun()`:
! argument "FUN" is missing, with no default
Run `rlang::last_error()` to see where the error occurred.
I hope the provided details are sufficient to address my problem.
Example
To create my datasets lines_list, b_list
and e_list
, I start at my dataset lines
which looks like:
structure(list(ID = 15131:15180, long.x = c(19.100531, 19.100531,
19.100531, 19.100531, 18.706671, 18.706671, 18.706671, 18.706671,
18.570601, 18.570601, 18.570601, 18.570601, 18.609079, 18.609079,
18.609079, 18.609079, 20.58342, 20.58342, 20.58342, 20.58342,
18.89217, 18.89217, 18.89217, 18.89217, 18.50174, 18.50174, 18.50174,
18.50174, 18.497459, 18.497459, 18.497459, 18.497459, 20.05595,
20.05595, 20.05595, 20.05595, 18.47617, 18.47617, 18.47617, 18.47617,
18.542721, 18.542721, 18.542721, 18.542721, 18.56119, 18.56119,
18.56119, 18.56119, 17.944201, 17.944201), lat.x = c(-33.892639,
-33.892639, -33.892639, -33.892639, -33.463631, -33.463631, -33.463631,
-33.463631, -33.983509, -33.983509, -33.983509, -33.983509, -33.92136,
-33.92136, -33.92136, -33.92136, -33.231041, -33.231041, -33.231041,
-33.231041, -32.181728, -32.181728, -32.181728, -32.181728, -33.91544,
-33.91544, -33.91544, -33.91544, -33.98735, -33.98735, -33.98735,
-33.98735, -33.83136, -33.83136, -33.83136, -33.83136, -34.055149,
-34.055149, -34.055149, -34.055149, -34.008751, -34.008751, -34.008751,
-34.008751, -34.073101, -34.073101, -34.073101, -34.073101, -33.011669,
-33.011669), nn.idx = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), nn.dists = c(2.87876203333951,
2.87876203333951, 2.87876203333951, 2.87876203333951, 2.33190509250205,
2.33190509250205, 2.33190509250205, 2.33190509250205, 2.79021190550413,
2.79021190550413, 2.79021190550413, 2.79021190550413, 2.7395316966416,
2.7395316966416, 2.7395316966416, 2.7395316966416, 1.82980100932888,
1.82980100932888, 1.82980100932888, 1.82980100932888, 1.34113063086822,
1.34113063086822, 1.34113063086822, 1.34113063086822, 2.70815622290683,
2.70815622290683, 2.70815622290683, 2.70815622290683, 2.77742816726626,
2.77742816726626, 2.77742816726626, 2.77742816726626, 2.11948529933567,
2.11948529933567, 2.11948529933567, 2.11948529933567, 2.83934545175644,
2.83934545175644, 2.83934545175644, 2.83934545175644, 2.80827041913292,
2.80827041913292, 2.80827041913292, 2.80827041913292, 2.87513519685771,
2.87513519685771, 2.87513519685771, 2.87513519685771, 1.73827934386191,
1.73827934386191), long.y = c(17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 22.14605, 22.14605, 22.14605, 22.14605, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 22.14605,
22.14605, 22.14605, 22.14605, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077), lat.y = c(-31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -34.18307, -34.18307, -34.18307,
-34.18307, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -34.18307, -34.18307, -34.18307, -34.18307, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383)), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
> dput(lines2[1:50,])
structure(list(ID = 15131:15180, long.x = c(19.100531, 19.100531,
19.100531, 19.100531, 18.706671, 18.706671, 18.706671, 18.706671,
18.570601, 18.570601, 18.570601, 18.570601, 18.609079, 18.609079,
18.609079, 18.609079, 20.58342, 20.58342, 20.58342, 20.58342,
18.89217, 18.89217, 18.89217, 18.89217, 18.50174, 18.50174, 18.50174,
18.50174, 18.497459, 18.497459, 18.497459, 18.497459, 20.05595,
20.05595, 20.05595, 20.05595, 18.47617, 18.47617, 18.47617, 18.47617,
18.542721, 18.542721, 18.542721, 18.542721, 18.56119, 18.56119,
18.56119, 18.56119, 17.944201, 17.944201), lat.x = c(-33.892639,
-33.892639, -33.892639, -33.892639, -33.463631, -33.463631, -33.463631,
-33.463631, -33.983509, -33.983509, -33.983509, -33.983509, -33.92136,
-33.92136, -33.92136, -33.92136, -33.231041, -33.231041, -33.231041,
-33.231041, -32.181728, -32.181728, -32.181728, -32.181728, -33.91544,
-33.91544, -33.91544, -33.91544, -33.98735, -33.98735, -33.98735,
-33.98735, -33.83136, -33.83136, -33.83136, -33.83136, -34.055149,
-34.055149, -34.055149, -34.055149, -34.008751, -34.008751, -34.008751,
-34.008751, -34.073101, -34.073101, -34.073101, -34.073101, -33.011669,
-33.011669), nn.idx = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), nn.dists = c(2.87876203333951,
2.87876203333951, 2.87876203333951, 2.87876203333951, 2.33190509250205,
2.33190509250205, 2.33190509250205, 2.33190509250205, 2.79021190550413,
2.79021190550413, 2.79021190550413, 2.79021190550413, 2.7395316966416,
2.7395316966416, 2.7395316966416, 2.7395316966416, 1.82980100932888,
1.82980100932888, 1.82980100932888, 1.82980100932888, 1.34113063086822,
1.34113063086822, 1.34113063086822, 1.34113063086822, 2.70815622290683,
2.70815622290683, 2.70815622290683, 2.70815622290683, 2.77742816726626,
2.77742816726626, 2.77742816726626, 2.77742816726626, 2.11948529933567,
2.11948529933567, 2.11948529933567, 2.11948529933567, 2.83934545175644,
2.83934545175644, 2.83934545175644, 2.83934545175644, 2.80827041913292,
2.80827041913292, 2.80827041913292, 2.80827041913292, 2.87513519685771,
2.87513519685771, 2.87513519685771, 2.87513519685771, 1.73827934386191,
1.73827934386191), long.y = c(17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 22.14605, 22.14605, 22.14605, 22.14605, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 22.14605,
22.14605, 22.14605, 22.14605, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077), lat.y = c(-31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -34.18307, -34.18307, -34.18307,
-34.18307, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -34.18307, -34.18307, -34.18307, -34.18307, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383), year = c(2007, 2012, 2009, 2005, 2009, 2011, 2006,
2008, 2012, 2012, 2012, 2008, 2011, 2005, 2006, 2009, 2010, 2007,
2008, 2012, 2011, 2010, 2011, 2007, 2005, 2007, 2011, 2005, 2008,
2006, 2010, 2006, 2008, 2005, 2006, 2010, 2007, 2008, 2005, 2010,
2010, 2007, 2009, 2010, 2011, 2005, 2010, 2007, 2008, 2005)), row.names = c(NA,
-50L), class = c("tbl_df", "tbl", "data.frame"))
I create lines_list
:
lines_list <- split(lines, f = lines$year)
and b_list
and e_list
:
b_list <- vector(mode = "list", length = length(wave_years))
names(b_list) <- wave_years
e_list <- vector(mode = "list", length = length(wave_years))
names(e_list) <- wave_years
and fill them with information
for(x in wave_years) {
b_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.x", "lat.x")]
names(b_list[[as.character(x)]]) = c("long", "lat")
e_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.y", "lat.y")]
names(e_list[[as.character(x)]]) = c("long", "lat")
}
This leaves me with the 3 datasets I am trying to operate on.
This is the original post I have my code from. Connecting two sets of coordinates to create lines using sf/mapview
CodePudding user response:
This is happening because of how the single square bracket [
selector works with objects of class tbl_df
. Unlike with data.frame
objects, but consistent with other lists, using [
returns an object of the same class (i.e. tbl_df
) and using [[
returns a vector. From R for Data Science:
[ extracts a sub-list. The result will always be a list.
[[ extracts a single component from a list. It removes a level of hierarchy from the list.
In this case, you have done the difficult part correctly. However you then try to assign the result back to a tbl_df
in the statement lines_list[[as.character(x)]][,"geometry"] = do.call(...)
.
You can replace it with the [[
operator:
for(x in wave_years) {
# Use the `[[` operator here
lines_list[[as.character(x)]][["geometry"]] <- do.call(
"c",
lapply(seq(nrow(b_list[[as.character(x)]])), function(i) {
st_sfc(
st_linestring(
as.matrix(
rbind(b_list[[as.character(x)]][i, ], e_list[[as.character(x)]][i, ])
)
),
crs = 4326
)
})
)
}
Output:
head(lines_list[[1]])
# # A tibble: 6 x 9
# ID long.x lat.x nn.idx nn.dists long.y lat.y year geometry
# <int> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <LINESTRING [°]>
# 1 15134 19.1 -33.9 5 2.88 17.9 -31.3 2005 (19.10053 -33.89264, 17.90508 -31.27383)
# 2 15144 18.6 -33.9 5 2.74 17.9 -31.3 2005 (18.60908 -33.92136, 17.90508 -31.27383)
# 3 15155 18.5 -33.9 5 2.71 17.9 -31.3 2005 (18.50174 -33.91544, 17.90508 -31.27383)
# 4 15158 18.5 -33.9 5 2.71 17.9 -31.3 2005 (18.50174 -33.91544, 17.90508 -31.27383)
# 5 15164 20.1 -33.8 7 2.12 22.1 -34.2 2005 (20.05595 -33.83136, 22.14605 -34.18307)
# 6 15169 18.5 -34.1 5 2.84 17.9 -31.3 2005 (18.47617 -34.05515, 17.90508 -31.27383)
CodePudding user response:
OK, it was not easy as I told you.
First of all, the definition of
lines_list
was not clear, because you copied-pasted too much lines in your reproducible example (two objects instead of one), so I had to guess that the definition oflines_list
was the second one.Then, the elements of
wave_years
were defined in a different order than the names oflines_list
(see the code below).The error in the
mapply
was caused by (1) but also for a badly placed closing parenthesis.
Moral: Try to avoid nesting too many functions in that way, it is more recommended to define a function outside in order to facilitate a debug procedure (for you and others) as well as to organize better your code. Remember, the worst errors in R are those that does not show you any error message, be very carefull because that errors used to appear just at the final of your analysis or (even worst) when someone else ask you to check your procedures.
The reproducible example with the issues and corrections:
# Input data
require(dplyr)
lines <- structure(list(ID = 15131:15180,
long.x = c(19.100531, 19.100531,
19.100531, 19.100531, 18.706671, 18.706671, 18.706671, 18.706671,
18.570601, 18.570601, 18.570601, 18.570601, 18.609079, 18.609079,
18.609079, 18.609079, 20.58342, 20.58342, 20.58342, 20.58342,
18.89217, 18.89217, 18.89217, 18.89217, 18.50174, 18.50174, 18.50174,
18.50174, 18.497459, 18.497459, 18.497459, 18.497459, 20.05595,
20.05595, 20.05595, 20.05595, 18.47617, 18.47617, 18.47617, 18.47617,
18.542721, 18.542721, 18.542721, 18.542721, 18.56119, 18.56119,
18.56119, 18.56119, 17.944201, 17.944201),
lat.x = c(-33.892639,
-33.892639, -33.892639, -33.892639, -33.463631, -33.463631, -33.463631,
-33.463631, -33.983509, -33.983509, -33.983509, -33.983509, -33.92136,
-33.92136, -33.92136, -33.92136, -33.231041, -33.231041, -33.231041,
-33.231041, -32.181728, -32.181728, -32.181728, -32.181728, -33.91544,
-33.91544, -33.91544, -33.91544, -33.98735, -33.98735, -33.98735,
-33.98735, -33.83136, -33.83136, -33.83136, -33.83136, -34.055149,
-34.055149, -34.055149, -34.055149, -34.008751, -34.008751, -34.008751,
-34.008751, -34.073101, -34.073101, -34.073101, -34.073101, -33.011669,
-33.011669),
nn.idx = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
nn.dists = c(2.87876203333951,
2.87876203333951, 2.87876203333951, 2.87876203333951, 2.33190509250205,
2.33190509250205, 2.33190509250205, 2.33190509250205, 2.79021190550413,
2.79021190550413, 2.79021190550413, 2.79021190550413, 2.7395316966416,
2.7395316966416, 2.7395316966416, 2.7395316966416, 1.82980100932888,
1.82980100932888, 1.82980100932888, 1.82980100932888, 1.34113063086822,
1.34113063086822, 1.34113063086822, 1.34113063086822, 2.70815622290683,
2.70815622290683, 2.70815622290683, 2.70815622290683, 2.77742816726626,
2.77742816726626, 2.77742816726626, 2.77742816726626, 2.11948529933567,
2.11948529933567, 2.11948529933567, 2.11948529933567, 2.83934545175644,
2.83934545175644, 2.83934545175644, 2.83934545175644, 2.80827041913292,
2.80827041913292, 2.80827041913292, 2.80827041913292, 2.87513519685771,
2.87513519685771, 2.87513519685771, 2.87513519685771, 1.73827934386191,
1.73827934386191),
long.y = c(17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 22.14605, 22.14605, 22.14605, 22.14605, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 22.14605,
22.14605, 22.14605, 22.14605, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077,
17.905077, 17.905077, 17.905077, 17.905077, 17.905077),
lat.y = c(-31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -34.18307, -34.18307, -34.18307,
-34.18307, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -34.18307, -34.18307, -34.18307, -34.18307, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383,
-31.27383),
year = c(2007, 2012, 2009, 2005, 2009, 2011, 2006,
2008, 2012, 2012, 2012, 2008, 2011, 2005, 2006, 2009, 2010, 2007,
2008, 2012, 2011, 2010, 2011, 2007, 2005, 2007, 2011, 2005, 2008,
2006, 2010, 2006, 2008, 2005, 2006, 2010, 2007, 2008, 2005, 2010,
2010, 2007, 2009, 2010, 2011, 2005, 2010, 2007, 2008, 2005)),
row.names = c(NA,
-50L),
class = c("tbl_df", "tbl", "data.frame"))
wave_years <- c("2005", "2006", "2007", "2008", "2009", "2010", "2012", "2011")
lines_list <- split(lines, f = lines$year)
# [issue 01]: "wave_years" vector defined above is sortly in a different way than lines_list
# [issue 01]: To corroborate the issue:
print(names(lines_list))
print(wave_years)
# [issue 1]: Solving
wave_years <- names(lines_list)
b_list <- vector(mode = "list", length = length(wave_years))
names(b_list) <- wave_years
e_list <- vector(mode = "list", length = length(wave_years))
names(e_list) <- wave_years
for(x in wave_years) {
b_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.x", "lat.x")]
names(b_list[[as.character(x)]]) = c("long", "lat")
e_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.y", "lat.y")]
names(e_list[[as.character(x)]]) = c("long", "lat")
}
# [issue 01]: This 3 vectors must be exactly the same
sapply(lines_list, nrow)
sapply(b_list, nrow)
sapply(e_list, nrow)
# Loading sf package (don't forget to indicate)
require(sf)
# [Issue 2]: This line is not defined properly. The closing parenthesis for 'seq'
# is badly placed
lines_list <- mapply(function(x, b, e){
x %>% mutate(geometry = do.call(
"c",
lapply(seq(nrow(b), function(i) {
st_sfc(
st_linestring(
as.matrix(
rbind(b[i, ], e[i, ])
)
),
crs = 4326
)}
))))
}, x = lines_list, b = b_list, e = e_list, SIMPLIFY = FALSE)
# [Issue 2]: Solving
lines_list <- mapply(function(x, b, e){
x %>%
mutate(geometry = do.call(what = "c",
args = lapply(seq(nrow(b)),
function(i) st_sfc(st_linestring(x = as.matrix(rbind(b[i, ], e[i, ]))),
crs = 4326))))
}, x = lines_list, b = b_list, e = e_list, SIMPLIFY = FALSE)