With the help of NLCD land cover I am assigning a land use to some coordinates of species occurrences. However, the output changes depending on the length of the dataset. The function seems to reorder the dataset in a way that I still ignore. So I am interested in knowing why this happens and how this can be solved? Below, I have a minimal reproducible example that illustrates my issue.
Note: A similar code (https://www.r-bloggers.com/2014/11/spatial-data-extraction-around-buffered-points-in-r/) with older versions of the libraries were running smoothly without this issue.
#Load libraries
library(tidyverse) #version 1.3.2
library(raster) #version 3.6-3
#Code adapted originally from:
#https://www.r-bloggers.com/2014/11/spatial-data-extraction-around-buffered-points-in-r/
#Link of land cover data
#https://www.mrlc.gov/data/nlcd-2006-land-cover-conus
#Create example data with 5 records
data1 = tibble(species = c("sp1", "sp2", "sp2", "sp1", "sp3"),
long = as.numeric(c("-77.7270", "-76.4044", "-77.6611", "-70.9570", "-72.3198")),
lat = as.numeric(c("39.37850", "38.38720", "39.50530", "42.78245", "41.70597")))
#Extract coordinates
coords1 <- data1[, c("long", "lat")]
#convert lat/lon to appropriate projection
names(coords1) <- c("x", "y")
coordinates(coords1) <- ~x y
proj4string(coords1) <- CRS(" proj=longlat ellps=WGS84 datum=WGS84")
crs_args <- NLCD@crs@projargs
Datos_transformed1 <- spTransform(coords1, CRS(crs_args))
#extract land cover data for each point
data1$Cover.names1 <- raster::extract(NLCD, Datos_transformed1)
data1$Cover.names1
#Create same dataset with extra 5 records
data2 = tibble(species = c("sp2", "sp4", "sp1", "sp1", "sp5"),
long = as.numeric(c("-71.12069", "-71.13031", "-71.17280", "-72.97187", "-74.67561")),
lat = as.numeric(c("42.40300", "42.41218", "42.27502", "44.47191", "40.59236")))
#Bind new data with dataset 1
data2 = bind_rows(data1, data2)
#Extract coordinates
coords2 <- data2[, c("long", "lat")]
#convert lat/lon to appropriate projection
names(coords2) <- c("x", "y")
coordinates(coords2) <- ~x y
proj4string(coords2) <- CRS(" proj=longlat ellps=WGS84 datum=WGS84")
crs_args <- NLCD@crs@projargs
Datos_transformed2 <- spTransform(coords2, CRS(crs_args))
#Extract land cover data for each point
data2$Cover.names2 <- raster::extract(NLCD, Datos_transformed2)
data2$Cover.names2
#Compare first 5 records in both datasets
data$Cover.names1[1:5] == data$Cover.names2[1:5]
data1$long[1:5] == data2$long[1:5]
data1$lat[1:5] == data2$lat[1:5]
CodePudding user response:
This is a bug introduced in the last version of "raster" on CRAN. This version has many changes under the hood, as the "rgdal" package is no longer used (because it will be retired soon); instead functions from the "terra" package are used. This has affected the way values are extracted from categorical rasters. The bug was fixed in raster version 3.6-6. You can install it with
install.packages('terra', repos='https://rspatial.r-universe.dev')
install.packages('raster', repos='https://rspatial.r-universe.dev')
However, I would suggest moving away from "raster" as it has been replaced by "terra".
Get the data:
url <- "https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2006_land_cover_l48_20210604.zip"
fz <- basename(url)
if (!file.exists(fz)) {
download.file(url, fz, mode="wb")
unzip(fz)
}
f <- gsub("zip$", "img", fz)
data1 = data.frame(species = c("sp1", "sp2", "sp2", "sp1", "sp3"),
long = as.numeric(c("-77.7270", "-76.4044", "-77.6611", "-70.9570", "-72.3198")),
lat = as.numeric(c("39.37850", "38.38720", "39.50530", "42.78245", "41.70597")))
data2 = data.frame(species = c("sp2", "sp4", "sp1", "sp1", "sp5"),
long = as.numeric(c("-71.12069", "-71.13031", "-71.17280", "-72.97187", "-74.67561")),
lat = as.numeric(c("42.40300", "42.41218", "42.27502", "44.47191", "40.59236")))
data = rbind(data1, data2)
Use "terra" for extraction of values
library(terra)
NLCD <- rast(f)
v <- vect(data, c("long", "lat"), crs=" proj=longlat") |> project(NLCD)
Cov1 <- extract(NLCD, v[1:5,], ID=FALSE)
Cov2 <- extract(NLCD, v, ID=FALSE)
cbind(Cov1, Cov2[1:5,,drop=F])
# NLCD Land Cover Class NLCD Land Cover Class
#1 Developed, Open Space Developed, Open Space
#2 Deciduous Forest Deciduous Forest
#3 Hay/Pasture Hay/Pasture
#4 Mixed Forest Mixed Forest
#5 Deciduous Forest Deciduous Forest
Or with the fixed "raster"
library(raster)
rNLCD <- raster(f)
sp <- data
coordinates(sp) <- ~long lat
crs(sp) <- " proj=longlat datum=WGS84"
sp <- spTransform(sp, crs(NLCD))
Cover1 <- extract(rNLCD, sp[1:5, ])
Cover2 <- extract(rNLCD, sp)
cbind(cov1=Cover1, cov2=Cover2[1:5])
# cov1 cov2
#[1,] 21 21
#[2,] 41 41
#[3,] 81 81
#[4,] 43 43
#[5,] 41 41
And note that raster::extract does not return the class labels, just the raw values.