Perform linear mixed model on several subsets of data in R-CodePudding

Consider the sleepstudy data in the lme4 package as shown below. The contains 18 subjects with repeated measurements of Reaction taken on different days. Suppose there is an additional variable Days2 as follows:

library("lme4")
sleepstudy$Days2<- rep(10:18, rep(20,9)) 
sleepstudy
  Reaction Days Subject Age Days2
1 249.5600    0     308  20    10
2 258.7047    1     308  20    10
3 250.8006    2     308  20    10
...
178 343.2199    7     372  28    18
179 369.1417    8     372  28    18
180 364.1236    9     372  28    18

This data has 9 unique Days2 i.e 10,11,...,18. Make 9 subsets of data as follows:

Subset data 1 will have all people with Days2 as 10 and above which is simply the whole dataset
Subset data 2 will have all people with Days2 as 11 and above
...
Subset 9 will have all people with Days2 as 18 and above.

tt <- sort(unique(sleepstudy$Days2))
Subset1 <- sleepstudy[sleepstudy$Days2>=tt[1], ]
Subset2<-sleepstudy[sleepstudy$Days2>=tt[2], ]
...
Subset9<-sleepstudy[sleepstudy$Days2>=tt[length(tt)], ]

Perform separate linear mixed models with a random intercept on the 9 subsets of data and then perform predictions at Days=tt[i] as follows:

fit1 = lmer(Reaction ~ Days   (1 | Subject), data = Subset1)
newSubset1 <- data.frame(   Days = tt[1], Subject = unique(Subset1$Subject))
newSubset1$Predicted_Response <- predict(fit1, newdata = newSubset1)

fit2 = lmer(Reaction ~ Days   (1 | Subject), data = Subset2)
newSubset2 <- data.frame(   Days = tt[2], Subject = unique(Subset2$Subject))
newSubset2$Predicted_Response <- predict(fit2, newdata = newSubset2)
...
fit9 = lmer(Reaction ~ Days   (1 | Subject), data = Subset9)
newSubset9 <- data.frame(  Days = tt[9], Subject = unique(Subset9$Subject) )
newSubset9$Predicted_Response <- predict(fit9, newdata = newSubset9)

Final step of merging output into one dataset

FinalOutput<-rbind( newSubset1,newSubset2,...,newSubset9 )
FinalOutput
  Days Subject Predicted_Response
   10     308           396.8617
   10     309           278.2284
   10     310           292.9694
   ...
   11     372           383.3525
   ...
   18     371           434.8685
   18     372           454.5697

The above steps are manual. How could I obtain the final output in R using a generalization of the steps? Maybe something like:

for(i in length(tt)){
Subset[i]<-sleepstudy[sleepstudy$Days2>=tt[i], ] 

fit[i]= lmer(Reaction ~ Days   (1 | Subject), data = Subset[i])
newSubset[i] <- data.frame(  Days = tt[i], Subject = unique(Subset[i]$Subject) )
newSubset[i]$Predicted_Response <- predict(fit[i], newdata = newSubset[i])
...

CodePudding user response：

purrr::map_dfr() (a) takes your subset-specific function, (b) applies the function to each subset, and (c) combines all the new subsets into a single data.frame

library("lme4")
ds_sample <-
  lme4::sleepstudy |> 
  dplyr::mutate(
    Days2 = rep(10:18, rep(20,9)) 
  )

predict_reaction <- function (.days_2) {
  ds_subset <- ds_sample[.days_2 <= ds_sample$Days2, ]

  fit = lmer(Reaction ~ Days   (1 | Subject), data = ds_subset)
  newSubset1 <- data.frame(   Days = .days_2, Subject = unique(ds_subset$Subject))
  newSubset1$Predicted_Response <- predict(fit, newdata = newSubset1)
  newSubset1
}

sort(unique(ds_sample$Days2)) |> 
  purrr::map_dfr(predict_reaction)

Output:

   Days Subject Predicted_Response
1    10     308           396.8617
2    10     309           278.2284
3    10     310           292.9694
4    10     330           360.4844
5    10     331           366.2942
6    10     332           364.2992
7    10     333           372.5785
8    10     334           353.0810
9    10     335           310.7958
...
87   17     371           453.4433
88   17     372           468.7271
89   18     371           434.8685
90   18     372           454.5697