Home > OS >  Product Combination of variables by loop in R with data table
Product Combination of variables by loop in R with data table

Time:02-20

I have an initial table in R including 7 variables as shown below:

library(data.table)
Data<-data.frame(
  ID=c(1,1,1,2,2,2,3,3,3,4,4,4),
  CP1 =c(1,0,0,1,0,0,1,0,0,1,0,0),
  CP2 =c(0,1,1,0,0,1,0,1,0,0,0,0),
  CP3 =c(0,0,0,0,0,0,0,0,0,0,0,1),
  PR1 =c(1,1,0,0,0,0,0,0,0,0,0,0),
  PR2=c(0,0,1,0,0,0,0,0,0,0,0,0),
  PR3=c(0,0,0,0,1,0,0,0,0,0,0,1)
  
  )
Data

> Data
   ID CP1 CP2 CP3 PR1 PR2 PR3
1   1   1   0   0   1   0   0
2   1   0   1   0   1   0   0
3   1   0   1   0   0   1   0
4   2   1   0   0   0   0   0
5   2   0   0   0   0   0   1
6   2   0   1   0   0   0   0
7   3   1   0   0   0   0   0
8   3   0   1   0   0   0   0
9   3   0   0   0   0   0   0
10  4   1   0   0   0   0   0
11  4   0   0   0   0   0   0
12  4   0   0   1   0   0   1

I want to create all the product combination of CP1, CP2 and CP3 with the PR1, PR2 and PR3 variables, with the following names CP1_PR1, CP1_PR2, CP1_PR3, CP2_PR1, CP2_PR2, CP2_PR3, CP3_PR1, CP3_PR2 and CP3_PR3.

But I want to use a condition for doing this product. When both CP and PR variable are equal to 1 I want to create the CP_PR variable which will be equal to 1 and also to make zero the initial CP variable.

I make a vector with the names of the CP variables and a vector of the PR variables:

ListCP<-colnames(Data)[2:4]
ListPr<-colnames(Data)[5:7]

And then I use a double for loop in order to create the needed product combination Variables which correct creates the combinations I want:

for (i in ListPr) {
  
  for (j in ListCP) {
    
    Data<-Data[,paste0(j,"_",i) := ifelse(get(i)==1 & get(j)==1,1,0)]

    
  }
  
}

> Data
> Data
    ID CP1 CP2 CP3 Pr1 Pr2 Pr3 CP1_Pr1 CP2_Pr1 CP3_Pr1 CP1_Pr2 CP2_Pr2 CP3_Pr2 CP1_Pr3 CP2_Pr3 CP3_Pr3
 1:  1   1   0   0   1   0   0       1       0       0       0       0       0       0       0       0
 2:  1   0   1   0   1   0   0       0       1       0       0       0       0       0       0       0
 3:  1   0   1   0   0   1   0       0       0       0       0       1       0       0       0       0
 4:  2   1   0   0   0   0   0       0       0       0       0       0       0       0       0       0
 5:  2   0   0   0   0   0   1       0       0       0       0       0       0       0       0       0
 6:  2   0   1   0   0   0   0       0       0       0       0       0       0       0       0       0
 7:  3   1   0   0   0   0   0       0       0       0       0       0       0       0       0       0
 8:  3   0   1   0   0   0   0       0       0       0       0       0       0       0       0       0
 9:  3   0   0   0   0   0   0       0       0       0       0       0       0       0       0       0
10:  4   1   0   0   0   0   0       0       0       0       0       0       0       0       0       0
11:  4   0   0   0   0   0   0       0       0       0       0       0       0       0       0       0
12:  4   0   0   1   0   0   1       0       0       0       0       0       0       0       0       1

Then when I am trying to make zero the initial CP vars in case that I said above I am getting an error.

> for (i in ListCP) {
    
    for (j in ListPr) {
      
      Data<-Data[paste0(j,"_",i)==1,.(j) := 0]
      
      
    }
    
  }
Error in `[.data.table`(Data, paste0(j, "_", i) == 1, `:=`(.(j), 0)) : 
  LHS of := must be a symbol, or an atomic vector (column names or positions).

My question here is if I can combine both double for loops into one without getting an error. Also, because my dataset is bigger, any other faster implementation would be greatly appreciated.

Thank you.

CodePudding user response:

The following solution uses the variables .SD and its accompanying .SDcols and only one for loop is needed. This seems to be a more natural way of doing it.

library(data.table)

Data<-data.frame(
  ID=c(1,1,1,2,2,2,3,3,3,4,4,4),
  CP1 =c(1,0,0,1,0,0,1,0,0,1,0,0),
  CP2 =c(0,1,1,0,0,1,0,1,0,0,0,0),
  CP3 =c(0,0,0,0,0,0,0,0,0,0,0,1),
  PR1 =c(1,1,0,0,0,0,0,0,0,0,0,0),
  PR2=c(0,0,1,0,0,0,0,0,0,0,0,0),
  PR3=c(0,0,0,0,1,0,0,0,0,0,0,1)
)
Data2 <- as.data.table(Data)
Data <- as.data.table(Data)
                                              
ListCP<-colnames(Data)[2:4]
ListPr<-colnames(Data)[5:7]

for (i in ListPr) {
  for (j in ListCP) {
    Data<-Data[,paste0(j,"_",i) := ifelse(get(i)==1 & get(j)==1,1,0)]
  }
}

for(j in ListPr) {
  new_cols <- paste0(ListCP, "_", j)
  j_val <- Data2[[j]]
  Data2[, (new_cols) := lapply(.SD, \(i)  (i & j_val)), .SDcols = ListCP]
}

all.equal(Data, Data2)
#> [1] TRUE

Created on 2022-02-19 by the reprex package (v2.0.1)

CodePudding user response:

Here is another approach - this only gets the combination variables so far, but I'll update it when you clarify (see my comment) which product will determine whether or not CP gets replaced from 1 to 0.

setDT(Data)

df =  melt(Data[, id:=.I], id="id",measure =patterns("CP", "PR"), value.name = c("CP", "PR"))
result = do.call(cbind, lapply(unique(df$variable), function(i) {
  do.call(cbind, lapply(unique(df$variable), function(j) {
    result = merge(df[variable==i], df[variable==j], by="id")[,`:=`(p=CP.x*PR.y)]
    setnames(result[, .(p)], new=paste0("CP",i,"_PR",j))
  }))
}))
cbind(Data, result)[,id:=NULL][]

Output:

    ID CP1 CP2 CP3 PR1 PR2 PR3 CP1_PR1 CP1_PR2 CP1_PR3 CP2_PR1 CP2_PR2 CP2_PR3 CP3_PR1 CP3_PR2 CP3_PR3
 1:  1   1   0   0   1   0   0       1       0       0       0       0       0       0       0       0
 2:  1   0   1   0   1   0   0       0       0       0       1       0       0       0       0       0
 3:  1   0   1   0   0   1   0       0       0       0       0       1       0       0       0       0
 4:  2   1   0   0   0   0   0       0       0       0       0       0       0       0       0       0
 5:  2   0   0   0   0   0   1       0       0       0       0       0       0       0       0       0
 6:  2   0   1   0   0   0   0       0       0       0       0       0       0       0       0       0
 7:  3   1   0   0   0   0   0       0       0       0       0       0       0       0       0       0
 8:  3   0   1   0   0   0   0       0       0       0       0       0       0       0       0       0
 9:  3   0   0   0   0   0   0       0       0       0       0       0       0       0       0       0
10:  4   1   0   0   0   0   0       0       0       0       0       0       0       0       0       0
11:  4   0   0   0   0   0   0       0       0       0       0       0       0       0       0       0
12:  4   0   0   1   0   0   1       0       0       0       0       0       0       0       0       1```
  • Related