Create new column that identifies matches for each group-CodePudding

Let's say I have a data frame like this:

dat<- data.frame(ID= c("A","A","A","A","B","B", "B", "B"), 
             test= rep(c("pre","post"),4),
             item= c(rep("item1",2), rep("item2",2), rep("item1",2), rep("item2",2)),
             answer= c("1_2_3_4", "1_2_3_4","2_4_3_1","4_3_2_1", "2_4_3_1","2_4_3_1","4_3_2_1","4_3_2_1"))

For each group of ID and item, I want to determine if the levels of answer match.

The result data frame would look like this:

res<- data.frame(ID= c("A","A","B","B"), 
             item= c("item1","item2","item1","item2"),
             match=c("TRUE","FALSE", "TRUE", "TRUE"))

CodePudding user response：

dat<- data.frame(ID= c("A","A","A","A","B","B", "B", "B"), 
             test= rep(c("pre","post"),4),
             item= c(rep("item1",2), rep("item2",2), rep("item1",2), rep("item2",2)),
             answer= c("1_2_3_4", "1_2_3_4","2_4_3_1","4_3_2_1", "2_4_3_1","2_4_3_1","4_3_2_1","4_3_2_1"))

library(data.table)
setDT(dat)

dat[, .(match = all(answer == answer[1])), by = .(ID, item)]
#>        ID   item  match
#>    <char> <char> <lgcl>
#> 1:      A  item1   TRUE
#> 2:      A  item2  FALSE
#> 3:      B  item1   TRUE
#> 4:      B  item2   TRUE

^{Created on 2022-05-04 by the reprex package (v2.0.1)}

CodePudding user response：

In dplyr, we can use group_by and summarize to see if the answer column is the same in "pre" and "post" with the same ID and item columns.

library(dplyr)

dat<- data.frame(ID= c("A","A","A","A","B","B", "B", "B"), 
                 test= rep(c("pre","post"),4),
                 item= c(rep("item1",2), rep("item2",2), rep("item1",2), rep("item2",2)),
                 answer= c("1_2_3_4", "1_2_3_4","2_4_3_1","4_3_2_1", "2_4_3_1","2_4_3_1","4_3_2_1","4_3_2_1"))

dat %>% 
  group_by(ID, item) %>% 
  summarize(match = answer[test == "pre"] == answer[test == "post"])

#> # A tibble: 4 × 3
#> # Groups:   ID [2]
#>   ID    item  match
#>   <chr> <chr> <lgl>
#> 1 A     item1 TRUE 
#> 2 A     item2 FALSE
#> 3 B     item1 TRUE 
#> 4 B     item2 TRUE

^{Created on 2022-05-05 by the reprex package (v2.0.1)}