Home > database >  Force R to write loop output to list after each iteration
Force R to write loop output to list after each iteration

Time:12-01

I am looping over a large list of shape files (xy-coordinates) that I want to store in another list in R after applying some functions to the shape.

list_adj <- list()
for(ii in 1:length(data_list)) {

  ij <- names(data_list)[ii]
  M <- data_list[[ii]]

  fancy_obj <- function(){do_fancy_things}

  list_adj[[ij]] <- fancy_obj

  Sys.sleep(2)
}

The problem I am facing is that R keeps looping over all shape files without writing them to list_adj variable after each iteration. Instead, R tries to write all 1600 shape files to the list after all iterations are completed. This causes my for loop to crash all the time. Is there a way to force R to write to list after each iteration?

CodePudding user response:

R targets allows you to read files and apply R functions on them. It will process one file by the other and does not load everything into one big R list which might lead to out of memory errors:

  1. Create a file _targets.R:
# load all libraries needed to read and process the shapefiles
library(targets)
library(sf)

# do fancy things
modify_shape <- function(x) {
  x
}

list(
  tar_target(
    shape_files, {
      list.files(pattern = ".shp$", recursive = TRUE, full.names = TRUE)
    },
    format = "file"
  ),
  tar_target(
    raw_shapes, {
      read_sf(shape_files)
    },
     pattern = map(shape_files)
  ),
  tar_target(
    processed_shapes, {
      modify_shape(shape_files)
    },
     pattern = map(raw_shapes)
  ),
)
  1. Run the pipeline
tar_make()
  1. load the result
tar_load(processed_shapes)
processed_shapes

CodePudding user response:

You can use purrr::map function instead of loop:

Data:

data_list <- list(sf1 = structure(list(geometry = structure(list(structure(c(2.3111662, 
48.840346), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 2.3111662, ymin = 48.840346, 
xmax = 2.3111662, ymax = 48.840346), class = "bbox"), crs = structure(list(
    input = "WGS84", wkt = "GEOGCRS[\"WGS 84\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
-1L), sf_column = "geometry", agr = structure(integer(0), .Label = c("constant", 
"aggregate", "identity"), class = "factor", .Names = character(0)), class = c("sf", 
"tbl_df", "tbl", "data.frame")), sf2 = structure(list(geometry = structure(list(
    structure(c(2.36571460340002, 2.3655857662182, 2.36547329046861, 
    2.36542516193229, 2.36528545258358, 2.36512653636567, 2.3648060350023, 
    2.36424260145439, 2.36387560297359, 2.36366713159424, 2.36334443947327, 
    2.36322695208547, 2.36317764205549, 2.36318437079076, 2.36321528700506, 
    2.36326729012258, 2.36331020083956, 48.8776400092826, 48.8774711513306, 
    48.8773023799338, 48.8772238828371, 48.8770522688551, 48.8768904454866, 
    48.8765622871995, 48.8759189703608, 48.8754916354644, 48.8752575997409, 
    48.8748871565497, 48.874683281842, 48.8744788703593, 48.8742630655848, 
    48.8740761681905, 48.8739487391069, 48.8738959065238), .Dim = c(17L, 
    2L), class = c("XY", "LINESTRING", "sfg"))), class = c("sfc_LINESTRING", 
"sfc"), precision = 0, bbox = structure(c(xmin = 2.36317764205549, 
ymin = 48.8738959065238, xmax = 2.36571460340002, ymax = 48.8776400092826
), class = "bbox"), crs = structure(list(input = "WGS84", wkt = "GEOGCRS[\"WGS 84\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = 1L, class = c("sf", 
"data.frame"), sf_column = "geometry", agr = structure(integer(0), .Label = c("constant", 
"aggregate", "identity"), class = "factor", .Names = character(0))))

Code:

library(tidyverse)
list_adj <- map(1:length(data_list), function(x) data_list[[x]] %>%
                  st_crs()) # Replace st_crs() by anything, that is for the example.

names(list_adj) <- map(1:length(data_list), function(x) names(data_list)[[x]])

Output:

$sf1
Coordinate Reference System:
  User input: WGS84 
  wkt:
GEOGCRS["WGS 84",
    DATUM["World Geodetic System 1984",
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4326]]

$sf2
Coordinate Reference System:
  User input: WGS84 
  wkt:
GEOGCRS["WGS 84",
    DATUM["World Geodetic System 1984",
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4326]]
  • Related