I have created a data frame, in the data frame there are 3 sites and I have created a nested for loop to create my desired matrices. THe overall objective is find a more efficient way to do this for each of the 3 sites instead of just the one.
The outputs from the nested for loop (EDmatrix
and timelags
) are the expected results for the other two sites. I would like to find a more efficient way of obtaining these matrices as well as be able to do it for all site
instead of just the one in this example.
set.seed(123)
d1 = sample.int(50, 27)
d2 = sample.int(50, 27)
d3 = sample.int(50, 27)
year <- c(1990:1998)
site <- c(rep("a", 9), rep("b", 9), rep("c", 9))
ED = function(x,y){
#x and y are vectors of spp abundances
#they must be the same length!
if(length(x)!=length(y)) stop("Bad abundances!")
out = sqrt(sum((x-y)^2))
out
}
df <- data.frame(site, year, d1 = d1, d2 = d2, d3 = d3)
Here is the code to get the expected output for only a single site, but I would like to be able to do this for all of the sites in the data frame df
.
subdf = subset(df,site=="a") # subset data for one site
EDmatrix = matrix(NA,dim(subdf)[1],dim(subdf)[1]) # create a place to store the dissimilarity values
timeLags = matrix(NA,dim(subdf)[1],dim(subdf)[1]) # create a place to store the time lags
# First loop through all "j" years from 1 to the total number of years
# Now loop through all "k" years from 1 to the total number of years
for(j in 1: length(subdf$year)){
for(k in 1: length(subdf$year)){
# grab density data for year "j"
jdensity <- subdf[j,-c(1:2)]
# grab density data for year "k"
kdensity <- subdf[k,-c(1:2)]
# calculate and store (in the EDmatrix) the ED value based on the data for year j and k
EDmatrix[j,k] <- ED(jdensity, kdensity)
# calculate and store (in timeLags) the time lag (the absolute value of the difference
# in time between year j and k
timeLags[j,k] <- abs(subdf[j, 2] - subdf[k, 2])
}# exit k loop
}# exit j loop
EDmatrix[lower.tri(EDmatrix, diag=T)]=NA # set duplicate entries to NA
timeLags[lower.tri(timeLags, diag=T)]=NA # set duplicate entries to NA
y = as.vector(EDmatrix) # turn the matrix into a vector
x = as.vector(timeLags)
CodePudding user response:
We may use outer
for this operation
library(dplyr)
library(tidyr)
library(purrr)
f1 <- function(dat, i, j) {
subdat <- dat %>%
select(starts_with('d'))
jdensity <- subdat[i, ]
kdensity <- subdat[j,]
EDtmp <- ED(jdensity, kdensity)
timetmp <- abs(dat$year[i] - dat$year[j])
tibble(EDtmp, timetmp)
}
f2 <- function(dat, s1, s2) {
mat <- outer(s1, s2, Vectorize(\(i, j) list(f1(dat, i, j))))
EDmatrix <- matrix(map_dbl(mat, ~ .x$EDtmp), length(s1), length(s1))
timeLags <- matrix(map_dbl(mat, ~ .x$timetmp), length(s1), length(s1))
EDmatrix[lower.tri(EDmatrix, diag=TRUE)]=NA
timeLags[lower.tri(timeLags, diag=TRUE)]=NA
y = as.vector(EDmatrix)
x = as.vector(timeLags)
tibble(y, x)
}
out1 <- df %>%
group_by(site) %>%
summarise(out = f2(cur_data(), row_number(), row_number()),
.groups = 'drop') %>%
unnest(out)
-checking with OP's output
> out1$x[out1$site == "a"]
[1] NA NA NA NA NA NA NA NA NA 1 NA NA NA NA NA NA NA NA 2 1 NA NA NA NA NA NA NA 3 2 1 NA NA NA NA NA NA 4 3 2 1 NA NA NA NA NA 5 4 3
[49] 2 1 NA NA NA NA 6 5 4 3 2 1 NA NA NA 7 6 5 4 3 2 1 NA NA 8 7 6 5 4 3 2 1 NA
> x
[1] NA NA NA NA NA NA NA NA NA 1 NA NA NA NA NA NA NA NA 2 1 NA NA NA NA NA NA NA 3 2 1 NA NA NA NA NA NA 4 3 2 1 NA NA NA NA NA 5 4 3
[49] 2 1 NA NA NA NA 6 5 4 3 2 1 NA NA NA 7 6 5 4 3 2 1 NA NA 8 7 6 5 4 3 2 1 NA
> out1$y[out1$site == "a"]
[1] NA NA NA NA NA NA NA NA NA 30.675723 NA NA NA NA
[15] NA NA NA NA 41.388404 18.055470 NA NA NA NA NA NA NA 42.485292
[29] 33.136083 25.729361 NA NA NA NA NA NA 38.288379 41.581246 34.770677 39.433488 NA NA
[43] NA NA NA 13.038405 38.379682 49.264592 54.083269 40.865633 NA NA NA NA 16.431677 25.317978
[57] 36.701499 47.549974 36.359318 15.362291 NA NA NA 34.799425 54.680892 54.018515 49.254441 26.019224 35.791060 41.484937
[71] NA NA 9.433981 34.842503 46.108568 42.801869 45.199558 19.924859 25.079872 38.652296 NA
> y
[1] NA NA NA NA NA NA NA NA NA 30.675723 NA NA NA NA
[15] NA NA NA NA 41.388404 18.055470 NA NA NA NA NA NA NA 42.485292
[29] 33.136083 25.729361 NA NA NA NA NA NA 38.288379 41.581246 34.770677 39.433488 NA NA
[43] NA NA NA 13.038405 38.379682 49.264592 54.083269 40.865633 NA NA NA NA 16.431677 25.317978
[57] 36.701499 47.549974 36.359318 15.362291 NA NA NA 34.799425 54.680892 54.018515 49.254441 26.019224 35.791060 41.484937
[71] NA NA 9.433981 34.842503 46.108568 42.801869 45.199558 19.924859 25.079872 38.652296 NA