I am looking for a data.table
solution to a simply problem. I have data like so
library(data.table)
data <- data.table(
id = seq(1:20),
match_id = c( rep(1,5), rep(2,5), rep(3,5), rep(4,5) ),
exp = c( 1, rep(0,4), 1 , rep(0,4), 1 , rep(0,4), 1 , rep(0,4) )
)
data[, var := ifelse(exp==1, runif(1,min=0,max=100), 0), by = id]
I want, for each unique match_id where exp==0 to fill var
based on the value of var
for exp==1. Data to end up with:
data_want <- data.table(
id = seq(1:20),
match_id = c( rep(1,5), rep(2,5), rep(3,5), rep(4,5) ),
exp = c( 1, rep(0,4), 1 , rep(0,4), 1 , rep(0,4), 1 , rep(0,4) ),
var = c(rep(data[1]$var,5), rep(data[6]$var,5), rep(data[11]$var,5), rep(data[16]$var,5))
)
CodePudding user response:
data[, var:=var[which(exp==1)], by=match_id]
id match_id exp var
<int> <num> <num> <num>
1: 1 1 1 75.342705
2: 2 1 0 75.342705
3: 3 1 0 75.342705
4: 4 1 0 75.342705
5: 5 1 0 75.342705
6: 6 2 1 81.822968
7: 7 2 0 81.822968
8: 8 2 0 81.822968
9: 9 2 0 81.822968
10: 10 2 0 81.822968
11: 11 3 1 3.309884
12: 12 3 0 3.309884
13: 13 3 0 3.309884
14: 14 3 0 3.309884
15: 15 3 0 3.309884
16: 16 4 1 2.047301
17: 17 4 0 2.047301
18: 18 4 0 2.047301
19: 19 4 0 2.047301
20: 20 4 0 2.047301
id match_id exp var
CodePudding user response:
Convert 0 to NA
with na_if
(from dplyr
) and then use nafill
library(data.table)
library(dplyr)
data[, var := nafill(na_if(var, 0), type = "locf")]
-output
> data
id match_id exp var
<int> <num> <num> <num>
1: 1 1 1 42.81752
2: 2 1 0 42.81752
3: 3 1 0 42.81752
4: 4 1 0 42.81752
5: 5 1 0 42.81752
6: 6 2 1 28.05760
7: 7 2 0 28.05760
8: 8 2 0 28.05760
9: 9 2 0 28.05760
10: 10 2 0 28.05760
11: 11 3 1 76.53963
12: 12 3 0 76.53963
13: 13 3 0 76.53963
14: 14 3 0 76.53963
15: 15 3 0 76.53963
16: 16 4 1 50.95047
17: 17 4 0 50.95047
18: 18 4 0 50.95047
19: 19 4 0 50.95047
20: 20 4 0 50.95047
id match_id exp var
Or another option is
data[, var := var[var!=0][cumsum(exp)]]
CodePudding user response:
library(data.table)
data <- data.table(
id = seq(1:20),
match_id = c(rep(1, 5), rep(2, 5), rep(3, 5), rep(4, 5)),
exp = c(1, rep(0, 4), 1 , rep(0, 4), 1 , rep(0, 4), 1 , rep(0, 4))
)
data[, var := ifelse(exp==1, runif(1,min=0,max=100), 0), by = id]
data[, var := max(var), by = list(match_id)][]
#> id match_id exp var
#> 1: 1 1 1 40.474630
#> 2: 2 1 0 40.474630
#> 3: 3 1 0 40.474630
#> 4: 4 1 0 40.474630
#> 5: 5 1 0 40.474630
#> 6: 6 2 1 91.434384
#> 7: 7 2 0 91.434384
#> 8: 8 2 0 91.434384
#> 9: 9 2 0 91.434384
#> 10: 10 2 0 91.434384
#> 11: 11 3 1 61.503224
#> 12: 12 3 0 61.503224
#> 13: 13 3 0 61.503224
#> 14: 14 3 0 61.503224
#> 15: 15 3 0 61.503224
#> 16: 16 4 1 8.480375
#> 17: 17 4 0 8.480375
#> 18: 18 4 0 8.480375
#> 19: 19 4 0 8.480375
#> 20: 20 4 0 8.480375
Created on 2022-03-23 by the reprex package (v2.0.1)