Add new blank rows into dataset by group (in R)-CodePudding

I use R. I have dataframe like this:

dat <- data.frame(
  group = c(1,1,1,1,1,1,2,2,2,2,2),
  horizon  = c(1,3,5,6,7,10,1,3,5,9,10),
  value = c(1.0,0.9,0.8,0.6,0.3,0.0,0.5,0.6,0.8,0.9,0.8)
  other = c(a,a,a,a,a,a,b,b,b,b,b)
)

And i would like to add row for every horizon that is missing (2,4,8 and 9 for the first group and 2,4,6,7,8 for the second group). Values (value) for the missing horizons would be blank.

I would like to get something like this:

datx <- data.frame(
  group = c(1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2),
  horizon  = c(1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10),
  value = c(1.0,"na",0.9,"na",0.8,0.6,0.3,"na","na",0.0,0.5,"na",0.6,"na",0.8,"na","na","na",0.9,0.8)
  other = c(a,a,a,a,a,a,a,a,a,a,b,b,b,b,b,b,b,b,b,b)
)

i.e. englarged dataset with new horizons, blank or "na" spaces in "value" variable and retained "other" variable.

This is just an example. I am actually working with a much larger dataset.

Without the groups, the problem would be much easier to solve, i would use something like this:

newdat <- merge(data.frame(horizon=seq(1,10,1)),dat,all=TRUE)
newdat <- newdat[order(newdat$horizon),]

Thanks for help!

CodePudding user response：

I'll assume that the values in the variable other are the characters, a or b, and that this is completely redundant with your variable group. If this is the case, you could accomplish this with full_join in the dplyrpackage.

a="a"
b="b"
dat <- data.frame(
  group = c(1,1,1,1,1,1,2,2,2,2,2),
  horizon  = c(1,3,5,6,7,10,1,3,5,9,10),
  value = c(1.0,0.9,0.8,0.6,0.3,0.0,0.5,0.6,0.8,0.9,0.8),
  other = c(a,a,a,a,a,a,b,b,b,b,b)
)

groups <- expand.grid(group=c(1,2),horizon=1:10)
groups <- groups %>% dplyr::mutate(other=ifelse(group==1,"a","b"))

dat %>%
  dplyr::full_join(groups,by=c('group','horizon','other')) %>%
  dplyr::arrange(group,horizon)

CodePudding user response：

Using data.table:

library(data.table)
setDT(dat)
fill = c("other")
RES = 
  dat[CJ(group = group, horizon = min(horizon):max(horizon), unique = TRUE), 
      on = .(group, horizon)
      ][, (fill) := lapply(.SD, \(x) x[which.min(is.na(x))]), by = group, .SDcols = fill]
   
RES[]
#     group horizon value  other
#     <num>   <int> <num> <char>
#  1:     1       1   1.0      a
#  2:     1       2    NA      a
#  3:     1       3   0.9      a
#  4:     1       4    NA      a
#  5:     1       5   0.8      a
#  6:     1       6   0.6      a
#  7:     1       7   0.3      a
#  8:     1       8    NA      a
#  9:     1       9    NA      a
# 10:     1      10   0.0      a
# 11:     2       1   0.5      b
# 12:     2       2    NA      b
# 13:     2       3   0.6      b
# 14:     2       4    NA      b
# 15:     2       5   0.8      b
# 16:     2       6    NA      b
# 17:     2       7    NA      b
# 18:     2       8    NA      b
# 19:     2       9   0.9      b
# 20:     2      10   0.8      b
#     group horizon value  other