Home > Software design >  Applying a for-loop to different levels of a variable
Applying a for-loop to different levels of a variable

Time:10-16

I have created a data frame, in the data frame there are 3 sites and I have created a nested for loop to create my desired matrices. THe overall objective is find a more efficient way to do this for each of the 3 sites instead of just the one.

The outputs from the nested for loop (EDmatrix and timelags) are the expected results for the other two sites. I would like to find a more efficient way of obtaining these matrices as well as be able to do it for all site instead of just the one in this example.

set.seed(123)


d1 = sample.int(50, 27)
d2 = sample.int(50, 27)
d3 = sample.int(50, 27)
year <- c(1990:1998)
site <- c(rep("a", 9), rep("b", 9), rep("c", 9))


ED = function(x,y){
  #x and y are vectors of spp abundances
  #they must be the same length!
  if(length(x)!=length(y)) stop("Bad abundances!")
  out =  sqrt(sum((x-y)^2))
  out
}

df <- data.frame(site, year, d1 = d1, d2 = d2, d3 = d3)

Here is the code to get the expected output for only a single site, but I would like to be able to do this for all of the sites in the data frame df.

subdf = subset(df,site=="a")   # subset data for one site
EDmatrix = matrix(NA,dim(subdf)[1],dim(subdf)[1])   # create a place to store the dissimilarity values

timeLags = matrix(NA,dim(subdf)[1],dim(subdf)[1])   # create a place to store the time lags


# First loop through all "j" years from 1 to the total number of years

# Now loop through all "k" years from 1 to the total number of years
for(j in 1: length(subdf$year)){
  for(k in 1: length(subdf$year)){
    # grab density data for year "j"
    jdensity <- subdf[j,-c(1:2)]
    # grab density data for year "k"
    kdensity <- subdf[k,-c(1:2)]
    # calculate and store (in the EDmatrix) the ED value based on the data for year j and k
    EDmatrix[j,k] <- ED(jdensity, kdensity)
    # calculate and store (in timeLags) the time lag (the absolute value of the difference 
    # in time between year j and k
    timeLags[j,k] <- abs(subdf[j, 2] - subdf[k, 2])
    
  }# exit  k loop
}# exit j loop

EDmatrix[lower.tri(EDmatrix, diag=T)]=NA    # set duplicate entries to NA
timeLags[lower.tri(timeLags, diag=T)]=NA     # set duplicate entries to NA
y = as.vector(EDmatrix)  # turn the matrix into a vector
x = as.vector(timeLags)

CodePudding user response:

We may use outer for this operation

library(dplyr)
library(tidyr)
library(purrr)
f1 <- function(dat, i, j) {
        subdat <- dat %>% 
                  select(starts_with('d'))
        jdensity <- subdat[i, ]
        kdensity <- subdat[j,]
        EDtmp <- ED(jdensity, kdensity)
        timetmp <- abs(dat$year[i] - dat$year[j])
        
        tibble(EDtmp, timetmp)
  
  }
  
f2 <- function(dat, s1, s2) {
        mat <- outer(s1, s2, Vectorize(\(i, j) list(f1(dat, i, j))))
        EDmatrix <- matrix(map_dbl(mat, ~ .x$EDtmp), length(s1), length(s1))
        timeLags <- matrix(map_dbl(mat, ~ .x$timetmp), length(s1), length(s1))
        EDmatrix[lower.tri(EDmatrix, diag=TRUE)]=NA    
        timeLags[lower.tri(timeLags, diag=TRUE)]=NA  
        y = as.vector(EDmatrix) 
        x = as.vector(timeLags)
        tibble(y, x)
}

out1 <-  df %>% 
              group_by(site) %>% 
               summarise(out = f2(cur_data(), row_number(), row_number()), 
         .groups = 'drop') %>%
               unnest(out)

-checking with OP's output

> out1$x[out1$site == "a"]
 [1] NA NA NA NA NA NA NA NA NA  1 NA NA NA NA NA NA NA NA  2  1 NA NA NA NA NA NA NA  3  2  1 NA NA NA NA NA NA  4  3  2  1 NA NA NA NA NA  5  4  3
[49]  2  1 NA NA NA NA  6  5  4  3  2  1 NA NA NA  7  6  5  4  3  2  1 NA NA  8  7  6  5  4  3  2  1 NA
> x
 [1] NA NA NA NA NA NA NA NA NA  1 NA NA NA NA NA NA NA NA  2  1 NA NA NA NA NA NA NA  3  2  1 NA NA NA NA NA NA  4  3  2  1 NA NA NA NA NA  5  4  3
[49]  2  1 NA NA NA NA  6  5  4  3  2  1 NA NA NA  7  6  5  4  3  2  1 NA NA  8  7  6  5  4  3  2  1 NA
> out1$y[out1$site == "a"]
 [1]        NA        NA        NA        NA        NA        NA        NA        NA        NA 30.675723        NA        NA        NA        NA
[15]        NA        NA        NA        NA 41.388404 18.055470        NA        NA        NA        NA        NA        NA        NA 42.485292
[29] 33.136083 25.729361        NA        NA        NA        NA        NA        NA 38.288379 41.581246 34.770677 39.433488        NA        NA
[43]        NA        NA        NA 13.038405 38.379682 49.264592 54.083269 40.865633        NA        NA        NA        NA 16.431677 25.317978
[57] 36.701499 47.549974 36.359318 15.362291        NA        NA        NA 34.799425 54.680892 54.018515 49.254441 26.019224 35.791060 41.484937
[71]        NA        NA  9.433981 34.842503 46.108568 42.801869 45.199558 19.924859 25.079872 38.652296        NA
> y
 [1]        NA        NA        NA        NA        NA        NA        NA        NA        NA 30.675723        NA        NA        NA        NA
[15]        NA        NA        NA        NA 41.388404 18.055470        NA        NA        NA        NA        NA        NA        NA 42.485292
[29] 33.136083 25.729361        NA        NA        NA        NA        NA        NA 38.288379 41.581246 34.770677 39.433488        NA        NA
[43]        NA        NA        NA 13.038405 38.379682 49.264592 54.083269 40.865633        NA        NA        NA        NA 16.431677 25.317978
[57] 36.701499 47.549974 36.359318 15.362291        NA        NA        NA 34.799425 54.680892 54.018515 49.254441 26.019224 35.791060 41.484937
[71]        NA        NA  9.433981 34.842503 46.108568 42.801869 45.199558 19.924859 25.079872 38.652296        NA
  • Related