Resampling groups of rows in R-CodePudding

Do you have a faster way to resample (with replacement) groups in a dataset using R?

Edit: Note that I would like to resample groups of rows, not individual rows.

toydata <- data.frame(
  group = rep(letters[1:3], each = 2),
  rep   = rep(1:2, times = 3),
  value = 1:6)

    print(toydata)

  group rep value
1     a   1     1
2     a   2     2
3     b   1     3
4     b   2     4
5     c   1     5
6     c   2     6

ngroups <- n_distinct(toydata$group)
nreps   <- nrow(toydata) / ngroups    

s <- sample(unique(toydata$group), replace = TRUE)  # resampling groups with replacement
toydata_resampled <- left_join(
  x  = data.frame(group = rep(s, each = nreps), rep = rep(1:nreps, ngroups)),
  y  = toydata,
  by = c("group", "rep"))

One expected output:

> print(toydata_resampled)
  group rep value
1     a   1     1
2     a   2     2
3     a   1     1
4     a   2     2
5     c   1     5
6     c   2     6

CodePudding user response：

split your dataframe by groups, then sample the list, and return as data.frame.

set.seed(1)
do.call(rbind, sample(split(toydata, toydata$group), replace = T))

output

     group rep value
a.1      a   1     1
a.2      a   2     2
c.5      c   1     5
c.6      c   2     6
a.11     a   1     1
a.21     a   2     2

CodePudding user response：

Is this faster?

iter <- 10
uq <- unique(toydata$group)
for (i in 1:iter){
  if (i == 1){
    output <- subset(toydata, group == sample(unique(toydata$group), 1))
  } else {
    output <- rbind(output, subset(toydata, group == sample(unique(toydata$group), 1)))
  }
}

output

> output
    group rep value
 1:     a   1     1
 2:     a   2     2
 3:     b   1     3
 4:     b   2     4
 5:     c   1     5
 6:     c   2     6
 7:     c   1     5
 8:     c   2     6
 9:     a   1     1
10:     a   2     2
11:     a   1     1
12:     a   2     2
13:     c   1     5
14:     c   2     6
15:     b   1     3
16:     b   2     4
17:     c   1     5
18:     c   2     6
19:     a   1     1
20:     a   2     2
21:     c   1     5
22:     c   2     6
23:     b   1     3
24:     b   2     4
25:     a   1     1
26:     a   2     2
    group rep value