Home > Net >  Translate function from df level to list level
Translate function from df level to list level

Time:07-26

I have this function:

lines$geometry = do.call(
  "c", 
  lapply(seq(nrow(b)), function(i) {
    st_sfc(
      st_linestring(
        as.matrix(
          rbind(b[i, ], e[i, ])
        )
      ),
      crs = 4326
    )
  }))

It takes point coordinates in data frame b and data frame e and adds them as line geometry to data frame lines and works just fine.

I now want to use this function on lists of several data frames. More specifically, lines_list, b_list and e_list which each contain seven data frames, called "2005" "2006" "2007" "2008" "2009" "2010" "2012" "2011".

I tried using:

wave_years <- c("2005" "2006" "2007" "2008" "2009" "2010" "2012" "2011")

for(x in wave_years) {
lines_list[[as.character(x)]][,"geometry"] = do.call(
  "c", 
  lapply(seq(nrow(b_list[[as.character(x)]])), function(i) {
    st_sfc(
      st_linestring(
        as.matrix(
          rbind(b_list[[as.character(x)]][i, ], e_list[[as.character(x)]][i, ])
        )
      ),
      crs = 4326
    )
  }))
}

but this code gives the error

Error:
! Assigned data `do.call(...)` must be compatible with existing data.
✖ Existing data has 21797 rows.
✖ Element 1 of assigned data has 2 rows.
ℹ Only vectors of size 1 are recycled.
Backtrace:
  1. base::`[<-`(`*tmp*`, , "geometry", value = `<LINESTRING [°]>`)
 12. tibble (local) `<fn>`(`<vctrs___>`)

The first data frame ("2005") of each of the lists contains 21797 rows, but I do not know what the 2 rows in element 1 are.

I tried an approach with mapply which looks like

lines_list <- mapply(function(x, b, e){
  x %>% mutate(geometry = do.call(
  "c", 
  lapply(seq(nrow(b), function(i) {
    st_sfc(
      st_linestring(
        as.matrix(
          rbind(b[i, ], e[i, ])
        )
      ),
      crs = 4326
    )}
    ))))
}, x = lines_list, b = b_list, e = e_list, SIMPLIFY = FALSE)

which gives the error:

Error in `mutate()`:
! Problem while computing `geometry = do.call(...)`.
Caused by error in `match.fun()`:
! argument "FUN" is missing, with no default
Run `rlang::last_error()` to see where the error occurred.

I hope the provided details are sufficient to address my problem.

Example To create my datasets lines_list, b_list and e_list, I start at my dataset lines which looks like:

structure(list(ID = 15131:15180, long.x = c(19.100531, 19.100531, 
19.100531, 19.100531, 18.706671, 18.706671, 18.706671, 18.706671, 
18.570601, 18.570601, 18.570601, 18.570601, 18.609079, 18.609079, 
18.609079, 18.609079, 20.58342, 20.58342, 20.58342, 20.58342, 
18.89217, 18.89217, 18.89217, 18.89217, 18.50174, 18.50174, 18.50174, 
18.50174, 18.497459, 18.497459, 18.497459, 18.497459, 20.05595, 
20.05595, 20.05595, 20.05595, 18.47617, 18.47617, 18.47617, 18.47617, 
18.542721, 18.542721, 18.542721, 18.542721, 18.56119, 18.56119, 
18.56119, 18.56119, 17.944201, 17.944201), lat.x = c(-33.892639, 
-33.892639, -33.892639, -33.892639, -33.463631, -33.463631, -33.463631, 
-33.463631, -33.983509, -33.983509, -33.983509, -33.983509, -33.92136, 
-33.92136, -33.92136, -33.92136, -33.231041, -33.231041, -33.231041, 
-33.231041, -32.181728, -32.181728, -32.181728, -32.181728, -33.91544, 
-33.91544, -33.91544, -33.91544, -33.98735, -33.98735, -33.98735, 
-33.98735, -33.83136, -33.83136, -33.83136, -33.83136, -34.055149, 
-34.055149, -34.055149, -34.055149, -34.008751, -34.008751, -34.008751, 
-34.008751, -34.073101, -34.073101, -34.073101, -34.073101, -33.011669, 
-33.011669), nn.idx = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), nn.dists = c(2.87876203333951, 
2.87876203333951, 2.87876203333951, 2.87876203333951, 2.33190509250205, 
2.33190509250205, 2.33190509250205, 2.33190509250205, 2.79021190550413, 
2.79021190550413, 2.79021190550413, 2.79021190550413, 2.7395316966416, 
2.7395316966416, 2.7395316966416, 2.7395316966416, 1.82980100932888, 
1.82980100932888, 1.82980100932888, 1.82980100932888, 1.34113063086822, 
1.34113063086822, 1.34113063086822, 1.34113063086822, 2.70815622290683, 
2.70815622290683, 2.70815622290683, 2.70815622290683, 2.77742816726626, 
2.77742816726626, 2.77742816726626, 2.77742816726626, 2.11948529933567, 
2.11948529933567, 2.11948529933567, 2.11948529933567, 2.83934545175644, 
2.83934545175644, 2.83934545175644, 2.83934545175644, 2.80827041913292, 
2.80827041913292, 2.80827041913292, 2.80827041913292, 2.87513519685771, 
2.87513519685771, 2.87513519685771, 2.87513519685771, 1.73827934386191, 
1.73827934386191), long.y = c(17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 22.14605, 22.14605, 22.14605, 22.14605, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 22.14605, 
22.14605, 22.14605, 22.14605, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077), lat.y = c(-31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -34.18307, -34.18307, -34.18307, 
-34.18307, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -34.18307, -34.18307, -34.18307, -34.18307, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", 
"data.frame"))
> dput(lines2[1:50,])
structure(list(ID = 15131:15180, long.x = c(19.100531, 19.100531, 
19.100531, 19.100531, 18.706671, 18.706671, 18.706671, 18.706671, 
18.570601, 18.570601, 18.570601, 18.570601, 18.609079, 18.609079, 
18.609079, 18.609079, 20.58342, 20.58342, 20.58342, 20.58342, 
18.89217, 18.89217, 18.89217, 18.89217, 18.50174, 18.50174, 18.50174, 
18.50174, 18.497459, 18.497459, 18.497459, 18.497459, 20.05595, 
20.05595, 20.05595, 20.05595, 18.47617, 18.47617, 18.47617, 18.47617, 
18.542721, 18.542721, 18.542721, 18.542721, 18.56119, 18.56119, 
18.56119, 18.56119, 17.944201, 17.944201), lat.x = c(-33.892639, 
-33.892639, -33.892639, -33.892639, -33.463631, -33.463631, -33.463631, 
-33.463631, -33.983509, -33.983509, -33.983509, -33.983509, -33.92136, 
-33.92136, -33.92136, -33.92136, -33.231041, -33.231041, -33.231041, 
-33.231041, -32.181728, -32.181728, -32.181728, -32.181728, -33.91544, 
-33.91544, -33.91544, -33.91544, -33.98735, -33.98735, -33.98735, 
-33.98735, -33.83136, -33.83136, -33.83136, -33.83136, -34.055149, 
-34.055149, -34.055149, -34.055149, -34.008751, -34.008751, -34.008751, 
-34.008751, -34.073101, -34.073101, -34.073101, -34.073101, -33.011669, 
-33.011669), nn.idx = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), nn.dists = c(2.87876203333951, 
2.87876203333951, 2.87876203333951, 2.87876203333951, 2.33190509250205, 
2.33190509250205, 2.33190509250205, 2.33190509250205, 2.79021190550413, 
2.79021190550413, 2.79021190550413, 2.79021190550413, 2.7395316966416, 
2.7395316966416, 2.7395316966416, 2.7395316966416, 1.82980100932888, 
1.82980100932888, 1.82980100932888, 1.82980100932888, 1.34113063086822, 
1.34113063086822, 1.34113063086822, 1.34113063086822, 2.70815622290683, 
2.70815622290683, 2.70815622290683, 2.70815622290683, 2.77742816726626, 
2.77742816726626, 2.77742816726626, 2.77742816726626, 2.11948529933567, 
2.11948529933567, 2.11948529933567, 2.11948529933567, 2.83934545175644, 
2.83934545175644, 2.83934545175644, 2.83934545175644, 2.80827041913292, 
2.80827041913292, 2.80827041913292, 2.80827041913292, 2.87513519685771, 
2.87513519685771, 2.87513519685771, 2.87513519685771, 1.73827934386191, 
1.73827934386191), long.y = c(17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 22.14605, 22.14605, 22.14605, 22.14605, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 22.14605, 
22.14605, 22.14605, 22.14605, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
17.905077, 17.905077, 17.905077, 17.905077, 17.905077), lat.y = c(-31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -34.18307, -34.18307, -34.18307, 
-34.18307, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -34.18307, -34.18307, -34.18307, -34.18307, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
-31.27383), year = c(2007, 2012, 2009, 2005, 2009, 2011, 2006, 
2008, 2012, 2012, 2012, 2008, 2011, 2005, 2006, 2009, 2010, 2007, 
2008, 2012, 2011, 2010, 2011, 2007, 2005, 2007, 2011, 2005, 2008, 
2006, 2010, 2006, 2008, 2005, 2006, 2010, 2007, 2008, 2005, 2010, 
2010, 2007, 2009, 2010, 2011, 2005, 2010, 2007, 2008, 2005)), row.names = c(NA, 
-50L), class = c("tbl_df", "tbl", "data.frame"))

I create lines_list:

lines_list <- split(lines, f = lines$year)

and b_list and e_list:

b_list <- vector(mode = "list", length = length(wave_years))
names(b_list) <- wave_years
e_list <- vector(mode = "list", length = length(wave_years))
names(e_list) <- wave_years

and fill them with information

for(x in wave_years) {
b_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.x", "lat.x")]
names(b_list[[as.character(x)]]) = c("long", "lat")
e_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.y", "lat.y")]
names(e_list[[as.character(x)]]) = c("long", "lat")
}

This leaves me with the 3 datasets I am trying to operate on.

This is the original post I have my code from. Connecting two sets of coordinates to create lines using sf/mapview

CodePudding user response:

This is happening because of how the single square bracket [ selector works with objects of class tbl_df. Unlike with data.frame objects, but consistent with other lists, using [ returns an object of the same class (i.e. tbl_df) and using [[ returns a vector. From R for Data Science:

[ extracts a sub-list. The result will always be a list.

[[ extracts a single component from a list. It removes a level of hierarchy from the list.

In this case, you have done the difficult part correctly. However you then try to assign the result back to a tbl_df in the statement lines_list[[as.character(x)]][,"geometry"] = do.call(...).

You can replace it with the [[ operator:

for(x in wave_years) {

 # Use the `[[` operator here
 lines_list[[as.character(x)]][["geometry"]] <- do.call(
  "c", 
  lapply(seq(nrow(b_list[[as.character(x)]])), function(i) {
    st_sfc(
      st_linestring(
        as.matrix(
          rbind(b_list[[as.character(x)]][i, ], e_list[[as.character(x)]][i, ])
        )
      ),
      crs = 4326
    )
  })
  )
}

Output:

head(lines_list[[1]])
# # A tibble: 6 x 9
#      ID long.x lat.x nn.idx nn.dists long.y lat.y  year                                 geometry
#   <int>  <dbl> <dbl>  <int>    <dbl>  <dbl> <dbl> <dbl>                         <LINESTRING [°]>
# 1 15134   19.1 -33.9      5     2.88   17.9 -31.3  2005 (19.10053 -33.89264, 17.90508 -31.27383)
# 2 15144   18.6 -33.9      5     2.74   17.9 -31.3  2005 (18.60908 -33.92136, 17.90508 -31.27383)
# 3 15155   18.5 -33.9      5     2.71   17.9 -31.3  2005 (18.50174 -33.91544, 17.90508 -31.27383)
# 4 15158   18.5 -33.9      5     2.71   17.9 -31.3  2005 (18.50174 -33.91544, 17.90508 -31.27383)
# 5 15164   20.1 -33.8      7     2.12   22.1 -34.2  2005 (20.05595 -33.83136, 22.14605 -34.18307)
# 6 15169   18.5 -34.1      5     2.84   17.9 -31.3  2005 (18.47617 -34.05515, 17.90508 -31.27383)

CodePudding user response:

OK, it was not easy as I told you.

  1. First of all, the definition of lines_list was not clear, because you copied-pasted too much lines in your reproducible example (two objects instead of one), so I had to guess that the definition of lines_list was the second one.

  2. Then, the elements of wave_years were defined in a different order than the names of lines_list (see the code below).

  3. The error in the mapply was caused by (1) but also for a badly placed closing parenthesis.

Moral: Try to avoid nesting too many functions in that way, it is more recommended to define a function outside in order to facilitate a debug procedure (for you and others) as well as to organize better your code. Remember, the worst errors in R are those that does not show you any error message, be very carefull because that errors used to appear just at the final of your analysis or (even worst) when someone else ask you to check your procedures.

The reproducible example with the issues and corrections:

# Input data
require(dplyr)

lines <- structure(list(ID = 15131:15180, 
                        long.x = c(19.100531, 19.100531, 
                                   19.100531, 19.100531, 18.706671, 18.706671, 18.706671, 18.706671, 
                                   18.570601, 18.570601, 18.570601, 18.570601, 18.609079, 18.609079, 
                                   18.609079, 18.609079, 20.58342, 20.58342, 20.58342, 20.58342, 
                                   18.89217, 18.89217, 18.89217, 18.89217, 18.50174, 18.50174, 18.50174, 
                                   18.50174, 18.497459, 18.497459, 18.497459, 18.497459, 20.05595, 
                                   20.05595, 20.05595, 20.05595, 18.47617, 18.47617, 18.47617, 18.47617, 
                                   18.542721, 18.542721, 18.542721, 18.542721, 18.56119, 18.56119, 
                                   18.56119, 18.56119, 17.944201, 17.944201), 
                        lat.x = c(-33.892639, 
                                  -33.892639, -33.892639, -33.892639, -33.463631, -33.463631, -33.463631, 
                                  -33.463631, -33.983509, -33.983509, -33.983509, -33.983509, -33.92136, 
                                  -33.92136, -33.92136, -33.92136, -33.231041, -33.231041, -33.231041, 
                                  -33.231041, -32.181728, -32.181728, -32.181728, -32.181728, -33.91544, 
                                  -33.91544, -33.91544, -33.91544, -33.98735, -33.98735, -33.98735, 
                                  -33.98735, -33.83136, -33.83136, -33.83136, -33.83136, -34.055149, 
                                  -34.055149, -34.055149, -34.055149, -34.008751, -34.008751, -34.008751, 
                                  -34.008751, -34.073101, -34.073101, -34.073101, -34.073101, -33.011669, 
                                  -33.011669), 
                        nn.idx = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
                                   5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 
                                   5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 
                                   5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), 
                        nn.dists = c(2.87876203333951, 
                                     2.87876203333951, 2.87876203333951, 2.87876203333951, 2.33190509250205, 
                                     2.33190509250205, 2.33190509250205, 2.33190509250205, 2.79021190550413, 
                                     2.79021190550413, 2.79021190550413, 2.79021190550413, 2.7395316966416, 
                                     2.7395316966416, 2.7395316966416, 2.7395316966416, 1.82980100932888, 
                                     1.82980100932888, 1.82980100932888, 1.82980100932888, 1.34113063086822, 
                                     1.34113063086822, 1.34113063086822, 1.34113063086822, 2.70815622290683, 
                                     2.70815622290683, 2.70815622290683, 2.70815622290683, 2.77742816726626, 
                                     2.77742816726626, 2.77742816726626, 2.77742816726626, 2.11948529933567, 
                                     2.11948529933567, 2.11948529933567, 2.11948529933567, 2.83934545175644, 
                                     2.83934545175644, 2.83934545175644, 2.83934545175644, 2.80827041913292, 
                                     2.80827041913292, 2.80827041913292, 2.80827041913292, 2.87513519685771, 
                                     2.87513519685771, 2.87513519685771, 2.87513519685771, 1.73827934386191, 
                                     1.73827934386191), 
                        long.y = c(17.905077, 17.905077, 17.905077, 
                                   17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
                                   17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
                                   17.905077, 22.14605, 22.14605, 22.14605, 22.14605, 17.905077, 
                                   17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
                                   17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 22.14605, 
                                   22.14605, 22.14605, 22.14605, 17.905077, 17.905077, 17.905077, 
                                   17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 17.905077, 
                                   17.905077, 17.905077, 17.905077, 17.905077, 17.905077), 
                        lat.y = c(-31.27383, 
                                  -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
                                  -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
                                  -31.27383, -31.27383, -31.27383, -34.18307, -34.18307, -34.18307, 
                                  -34.18307, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
                                  -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
                                  -31.27383, -34.18307, -34.18307, -34.18307, -34.18307, -31.27383, 
                                  -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
                                  -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, -31.27383, 
                                  -31.27383), 
                        year = c(2007, 2012, 2009, 2005, 2009, 2011, 2006, 
                                 2008, 2012, 2012, 2012, 2008, 2011, 2005, 2006, 2009, 2010, 2007, 
                                 2008, 2012, 2011, 2010, 2011, 2007, 2005, 2007, 2011, 2005, 2008, 
                                 2006, 2010, 2006, 2008, 2005, 2006, 2010, 2007, 2008, 2005, 2010, 
                                 2010, 2007, 2009, 2010, 2011, 2005, 2010, 2007, 2008, 2005)), 
                   row.names = c(NA, 
                                 -50L), 
                   class = c("tbl_df", "tbl", "data.frame"))


wave_years <- c("2005", "2006", "2007", "2008", "2009", "2010", "2012", "2011")

lines_list <- split(lines, f = lines$year)

# [issue 01]: "wave_years" vector defined above is sortly in a different way than lines_list
# [issue 01]: To corroborate the issue:
print(names(lines_list))
print(wave_years)

# [issue 1]: Solving
wave_years <- names(lines_list)

b_list <- vector(mode = "list", length = length(wave_years))
names(b_list) <- wave_years
e_list <- vector(mode = "list", length = length(wave_years))
names(e_list) <- wave_years
for(x in wave_years) {
  b_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.x", "lat.x")]
  names(b_list[[as.character(x)]]) = c("long", "lat")
  e_list[[as.character(x)]] = lines_list[[as.character(x)]][, c("long.y", "lat.y")]
  names(e_list[[as.character(x)]]) = c("long", "lat")
}

# [issue 01]: This 3 vectors must be exactly the same
sapply(lines_list, nrow)
sapply(b_list, nrow)
sapply(e_list, nrow)

# Loading sf package (don't forget to indicate)
require(sf)

# [Issue 2]: This line is not defined properly. The closing parenthesis for 'seq' 
# is badly placed 
lines_list <- mapply(function(x, b, e){
  x %>% mutate(geometry = do.call(
    "c", 
    lapply(seq(nrow(b), function(i) {
      st_sfc(
        st_linestring(
          as.matrix(
            rbind(b[i, ], e[i, ])
          )
        ),
        crs = 4326
      )}
    ))))
}, x = lines_list, b = b_list, e = e_list, SIMPLIFY = FALSE)

# [Issue 2]: Solving
lines_list <- mapply(function(x, b, e){
  x %>% 
    
    mutate(geometry = do.call(what = "c",
                              args = lapply(seq(nrow(b)), 
                                            function(i) st_sfc(st_linestring(x =  as.matrix(rbind(b[i, ], e[i, ]))),
                                                               crs = 4326))))
}, x = lines_list, b = b_list, e = e_list, SIMPLIFY = FALSE)
  • Related