Home > Net >  Specify random effect with different variance across groups in nlme
Specify random effect with different variance across groups in nlme

Time:11-06

I have a dataset with measures for individuals, where each measure represents a specific type of measure (say '1' or '2') and each individual belongs to specific group (say 'A' or 'B'). For a subset of individuals, I have observed both measures '1' and '2'. In this data, the different measures have different variances, and there is a subject-level random effect that has very different variances in the two groups. How would I go about fitting this model in the right way?

Here is an example:

dat <- structure(list(subject = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 
                       28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 
                       44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 
                       60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 
                       76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 
                       92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 101, 102, 102, 103, 
                       103, 104, 104, 105, 105, 106, 106, 107, 107, 108, 108, 109, 109, 
                       110, 110, 111, 111, 112, 112, 113, 113, 114, 114, 115, 115, 116, 
                       116, 117, 117, 118, 118, 119, 119, 120, 120, 121, 121, 122, 122, 
                       123, 123, 124, 124, 125, 125, 126, 126, 127, 127, 128, 128, 129, 
                       129, 130, 130), 
           group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                               2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
                               2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("A", "B"), class = "factor"), 
           measure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
                                 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                                 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
                                 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
                                 2L), .Label = c("1", "2"), class = "factor"), 
           y = c(-1.71, 
                 121.74, -1.57, 109.96, -0.64, 101.67, -0.13, 120.64, 1.47, 
                 101.99, -4.51, 133.18, -2.9, 117.95, -0.97, 126.94, -1.44, 
                 105.1, -1.52, 122.2, -2.29, 130.17, -0.35, 133.14, -0.94, 
                 112.68, -0.89, 105.37, -2.49, 126.75, -2.61, 139.25, -2.13, 
                 113.43, 0.61, 140.76, -0.75, 129.17, 1.94, 139.4, -0.49, 
                 119.03, -2.09, 89.97, -2.76, 107.85, 1.61, 136.31, -0.55, 
                 128.6, 0.41, 86.66, 0.54, 100.03, 2.46, 115.37, 6.94, 109.34, 
                 3.78, 102.34, -4.46, 104.06, 1.48, 105.06, 3.98, 85.21, 1.31, 
                 103.17, -3.35, 110.83, 2.75, 98.38, -2.43, 101.57, 2.2, 120.45, 
                 -4.06, 101.25, 3.85, 99.38, 2.17, 108, 9.27, 100.76, 3.27, 
                 110.3, 1.22, 98.91, 1.62, 105.65, 4.64, 113.07, 8.14, 108.75, 
                 6.84, 73.08, 1.42, 99.41, -0.5, 95.25, 1.42, 3.76, 102.95, 
                 85.45, -2.71, -0.48, 137.34, 114.61, -0.42, 1.71, 98.82, 
                 83.06, -3.51, -0.32, 109.66, 91.99, -0.46, -1.35, 113.88, 
                 97.32, -0.93, 1.17, 111.26, 103.9, -4.11, 6.78, 106.36, 88.22, 
                 -0.85, -6.56, 137.39, 112.19, -0.91, 3.26, 122.53, 105.18, 
                 -0.61, 4.25, 111.01, 95.85, -2.68, 3.1, 142.26, 114.44, -0.31, 
                 3.76, 127.61, 102.26, -1.82, 4.01, 116.61, 97.1, -3.61, 0.9, 
                 107.73, 90.6, -0.13, 3.78, 108.73, 95.12)), 
      row.names = c(NA, -160L), class = "data.frame")

Plot of values for the two measures across groups

I can fit a mixed-effects model with nlme:

init <- c(-1.2, 120, 2, 100)

model1 <- nlme(y ~ a,
               data = dat,
               fixed = list(a ~ group : measure   0),
               random = a ~ 1,
               groups = ~ subject,
               start = init,
               weights = varIdent(form = ~ 1 | measure))

Is there a way to fit the model such that the random effect has different variances across groups? I have a feeling that this should be achievable by using a correlation structure, but so far I have been unsuccessful.

In reality, my model is nonlinear and more complex than the above, so the problem can unfortunately not be solved by crossed random effects with lmer (but maybe a crossed random effects hack for nlme?)

CodePudding user response:

This may be tough. I can get it to work with lme, and I think I have the syntax right with nlme, but I'm struggling.

The basic trick is to set up a numeric dummy variable that is 0 for your baseline group and 1 for the group with the greater among-subject variance — this only works if you know that a priori (if you want to do it for lots of groups you need to identify the group with the smallest among-subject variance, and set up a whole bunch of group-level dummies ...)

dat$groupdummy <- as.numeric(dat$group) - 1

Fitting the model with a diagonal RE covariance structure so it doesn't try to estimate the correlation between the baseline (group-A) among-subject RE and the 'extra' (group-B) among-subject RE, which is unidentifiable anyway ...

model1 <- lme(y ~ group:measure   0,
              data = dat,
              random = list(subject = pdDiag(~ groupdummy   1)),
              weights = varIdent(form = ~ 1 | measure))

The answers are at least reasonable:

Random effects:
 Formula: ~groupdummy   1 | subject
 Structure: Diagonal
         (Intercept) groupdummy Residual
StdDev: 0.0002643621   11.95069 1.578527

Variance function:
 Structure: Different standard deviations per stratum
 Formula: ~1 | measure 
 Parameter estimates:
       1        2 
1.000000 2.312402 

I should warn you that you have a pretty sketchy data set as far as estimating among-subject variances goes (only 30/130 individuals have repeated measurements), and everything is going to get harder when fitting nonlinear models ...

The bad news is that I couldn't make the equivalent nlme model work, even fussing with control parameters. (I think this is the correct equivalent syntax ... although it can be hard to tell the difference between real numerical problems and incorrectly specifying the model ...)

re <- ranef(model1)
names(re) <- paste0("a.",names(re))
model2 <- nlme(y ~ a,
               data = dat,
               fixed = list(a ~ group : measure   0),
               random = pdDiag(a ~ groupdummy   1),
               groups = ~ subject,
               start = list(fixed = fixef(model1),
                            random = re),
               control = nlmeControl(minscale = 1e-6,
                                     pnlsTol = 1e-2))

CodePudding user response:

Following, Ben Bolker's suggestion in the comment, I ended up implementing the model in the Template Model Builder framework (Thanks for pointing me in this direction Ben!) The model did not converge for the given simulated dataset, but it did for other simulations and fitted the correct model with substantially improved likelihood values (identical to the nlme model when removing the extra variance parameter).

To achieve this, I first made the model code in mixed.cpp:

#include <TMB.hpp>

template<class Type>
Type objective_function<Type>::operator() ()
{
  // Data and design variables
  DATA_VECTOR(y); // Outcome
  DATA_ARRAY(X_mean); // Covariates
  DATA_ARRAY(X_subj); // Subject design matrix
  DATA_ARRAY(X_var); // Variance design matrix  
  DATA_ARRAY(X_var_subj); // Subject-level random-effect variance design matrix
  
  
  int n = y.size(); // Number of observations
  int n_mean = X_mean.dim[1]; // Number of observations
  int n_subj = X_subj.dim[1]; // Number of subjects
  
  
  // Parameters
  PARAMETER_VECTOR(a);  
  PARAMETER_VECTOR(a_subj);
  PARAMETER(log_sigma_measure1);
  PARAMETER(log_sigma_measure2);
  PARAMETER(log_sigma_subj_group1);
  PARAMETER(log_sigma_subj_group2);
  
  // Variance parameters
  Type sigma_measure1 = exp(log_sigma_measure1);
  Type sigma_measure2 = exp(log_sigma_measure2);
  Type sigma_subj_group1 = exp(log_sigma_subj_group1);
  Type sigma_subj_group2 = exp(log_sigma_subj_group2);
  
  // Negative log likelihood
  Type nll = 0.0;
  vector<Type> yfit(n);
  vector<Type> random(n);
  
  for (int i = 0; i < n; i  ) { // Loop over observations
    random(i) = 0.0;
    for (int j = 0; j < n_subj; j  ) { // Subject random effect for observation i
      random(i)  = X_subj(i, j) * a_subj(j);
    }
    
    yfit(i) = 0.0;
    for (int j = 0; j < n_mean; j  ) { // Mean structure for observation i
      yfit(i)  = X_mean(i, j) * a(j)   random(i);
    }
    
    nll  = -dnorm(y(i), 
                  yfit(i),
                  X_var(i, 0) * sigma_measure1  
                    X_var(i, 1) * sigma_measure2, true);
  }
  
  for (int j = 0; j < n_subj; j  ) { // Loop over subjects for random effect contribution
    nll  = -dnorm(a_subj[j], 
                  Type(0.0), 
                  X_var_subj(j, 0) * sigma_subj_group1  
                    X_var_subj(j, 1) * sigma_subj_group2, true);
  }
  
  return nll;
}

and then fitted the model from R:

library(TMB)

compile('mixed.cpp')
dyn.load(dynlib('mixed'))

fit_dat <- list(y = dat$y, 
                X_mean = model.matrix(~ group : measure   0, data = dat),
                X_subj = model.matrix(~ subject   0, data = dat),
                X_var = model.matrix(~ measure   0, data = dat),
                X_var_subj = model.matrix(~ group   0, data = dat))

parameters <- list(a = c(-2, 120, 2, 100),
                   a_subj = rep(0, ncol(fit_dat$X_subj)),
                   log_sigma_measure1 = 0,
                   log_sigma_measure2 = 1,
                   log_sigma_subj_group1 = 2,
                   log_sigma_subj_group2 = 2) 

model <- MakeADFun(data = fit_dat, 
                   parameters = parameters,
                   DLL = 'mixed',
                   random = 'a_subj')

fit <- nlminb(start = model$par, 
              objective = model$fn, 
              gradient = model$gr)

So far, I am very impressed with the robustness of the TMB package, but I am struggling to understand differences in the scale of variance parameters of random effects between TMB and nlme (despite of models being identical with otherwise identical fits).

  • Related