Home > front end >  Gathering separate rows into one based on three grouping variables R
Gathering separate rows into one based on three grouping variables R

Time:02-08

I have some data-wrangling I'm struggling with. Below is a minimal working example. I need some rolling loop which for every unique participant*item*cond combination, looks from the first row to the last row, pasting together each value in thresh into a new column (should look like what_I_want). Does anyone have any tips?

my_df <- data.frame("participant" = c("1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
                                "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2"), 
              "item" = c("101", "101", "101", "101", "101", "101", "101", "101", "102", "102", "102", "102", "102", "102", "102", "102", "101", "101", "101", "101", "101", "101", "101", "101", "103", "103", "103", "103", "103", "103", "103", "103"),
              "cond" = c("a", "a", "a", "a", "b", "b", "b", "b", "a", "a", "a", "a", "b", "b", "b", "b", "a", "a", "a", "a", "b", "b", "b", "b", "a", "a", "a", "a", "b", "b", "b", "b"),
              "thresh" = c(1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1),
              "what_I_want" = c("1011", "1011", "1011", "1011", "1101", "1101", "1101", "1101", "1001", "1001", "1001", "1001", "1111", "1111", "1111", "1111", "0011", "0011", "0011", "0011", "0000", "0000", "0000", "0000", "1010", "1010", "1010", "1010", "1011", "1011", "1011", "1011"))

CodePudding user response:

You can group by the three columns (i.e., participant, item, cond), then use collapse within paste0 to combine all values in the group for thresh.

library(tidyverse)

my_df %>% 
  group_by(participant, item, cond) %>% 
  mutate(result = paste0(thresh, collapse = ""))

base R option:

merge(my_df,
      setNames(
        aggregate(thresh ~ participant   item   cond, my_df, paste, collapse = ""),
        c("participant", "item", "cond", "result")
      ),
      by = c("participant", "item", "cond"))

Output

   participant item  cond  thresh what_I_want result
   <chr>       <chr> <chr>  <dbl> <chr>       <chr> 
 1 1           101   a          1 1011        1011  
 2 1           101   a          0 1011        1011  
 3 1           101   a          1 1011        1011  
 4 1           101   a          1 1011        1011  
 5 1           101   b          1 1101        1101  
 6 1           101   b          1 1101        1101  
 7 1           101   b          0 1101        1101  
 8 1           101   b          1 1101        1101  
 9 1           102   a          1 1001        1001  
10 1           102   a          0 1001        1001  
11 1           102   a          0 1001        1001  
12 1           102   a          1 1001        1001  
13 1           102   b          1 1111        1111  
14 1           102   b          1 1111        1111  
15 1           102   b          1 1111        1111  
16 1           102   b          1 1111        1111  
17 2           101   a          0 0011        0011  
18 2           101   a          0 0011        0011  
19 2           101   a          1 0011        0011  
20 2           101   a          1 0011        0011  
21 2           101   b          0 0000        0000  
22 2           101   b          0 0000        0000  
23 2           101   b          0 0000        0000  
24 2           101   b          0 0000        0000  
25 2           103   a          1 1010        1010  
26 2           103   a          0 1010        1010  
27 2           103   a          1 1010        1010  
28 2           103   a          0 1010        1010  
29 2           103   b          1 1011        1011  
30 2           103   b          0 1011        1011  
31 2           103   b          1 1011        1011  
32 2           103   b          1 1011        1011  
  •  Tags:  
  • Related