How to merge datasets with repeated measures-CodePudding

I have two datasets that I want to MERGE/JOIN.

df1

      ID     Grup   pretest
 1     1     A      2  
 2     1     A      1   
 3     1     A      3   
 4     2     B      NA  
 5     2     B      1   
 6     2     B      3  
 7     3     A      2   
 8     3     A      1  
 9     3     A      NA   
 10    4     B      2   
 11    4     B      1   
 12    4     B      3

df2

      ID     Grup   posttest  
 1     1     A      NA 
 2     1     A      5   
 3     1     A      4  
 4     2     B      2   
 5     2     B      4  
 6     2     B      3  
 7     3     A      5 
 8     3     A      6
 9     3     A      3   
 10    4     B      4 
 11    4     B      2  
 12    4     B      NA

I’ve been trying merge() and full_join() but both end up creating duplicates that I don’t want.

It won’t recognize the ID as an independent value, it’s creating 9 IDs for every ID value.

New <- merge(df1, df2, by= 'ID')  

New <- full_join(df1, df2, By = "ID")

Setting all = TRUE doesn’t help.

I need the dataset to look like this

     ID     Grup   pretest posttest 
 1     1     A      2        NA
 2     1     A      1         5
 3     1     A      3         4
 4     2     B      NA        2
 5     2     B      1         4
 6     2     B      3         3
 7     3     A      2         5
 8     3     A      1         6
 9     3     A      NA        3
 10    4     B      2         4
 11    4     B      1         2
 12    4     B      3         NA

CodePudding user response：

Since you are relying on the order of the frames, you can simply use cbind()

cbind(df1,df2[,3,F])

Output:

   ID Grup pretest posttest
1   1    A       2       NA
2   1    A       1        5
3   1    A       3        4
4   2    B      NA        2
5   2    B       1        4
6   2    B       3        3
7   3    A       2        5
8   3    A       1        6
9   3    A      NA        3
10  4    B       2        4
11  4    B       1        2
12  4    B       3       NA

CodePudding user response：

You can add a helper column iid to separate the entries.

df1 <- cbind(iid = 1:nrow(df1), df1)
df2 <- cbind(iid = 1:nrow(df2), df2)

With dplyr

library(dplyr)

left_join(df1, df2, c("iid", "ID", "Grup"))[,-1]
   ID Grup pretest posttest
1   1    A       2       NA
2   1    A       1        5
3   1    A       3        4
4   2    B      NA        2
5   2    B       1        4
6   2    B       3        3
7   3    A       2        5
8   3    A       1        6
9   3    A      NA        3
10  4    B       2        4
11  4    B       1        2
12  4    B       3       NA

With base R merge

merge(df1, df2, c("iid", "ID", "Grup"))[,-1]
   ID Grup pretest posttest
1   1    A       2       NA
2   4    B       2        4
3   4    B       1        2
4   4    B       3       NA
5   1    A       1        5
6   1    A       3        4
7   2    B      NA        2
8   2    B       1        4
9   2    B       3        3
10  3    A       2        5
11  3    A       1        6
12  3    A      NA        3

Data

df1 <- structure(list(ID = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L), Grup = c("A", "A", "A", "B", "B", "B", "A", "A", "A",
"B", "B", "B"), pretest = c(2L, 1L, 3L, NA, 1L, 3L, 2L, 1L, NA,
2L, 1L, 3L)), class = "data.frame", row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12"))

df2 <- structure(list(ID = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L), Grup = c("A", "A", "A", "B", "B", "B", "A", "A", "A",
"B", "B", "B"), posttest = c(NA, 5L, 4L, 2L, 4L, 3L, 5L, 6L,
3L, 4L, 2L, NA)), class = "data.frame", row.names = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))

CodePudding user response：

Another option is joining by rownames, eg. row numbers:

library(tibble)
library(dplyr)

left_join(rownames_to_column(df1), df2 %>% rownames_to_column() , by="rowname") %>% 
  select(ID = ID.x, Grup = Grup.x, pretest, posttest)

   ID Grup pretest posttest
1   1    A       2       NA
2   1    A       1        5
3   1    A       3        4
4   2    B      NA        2
5   2    B       1        4
6   2    B       3        3
7   3    A       2        5
8   3    A       1        6
9   3    A      NA        3
10  4    B       2        4
11  4    B       1        2
12  4    B       3       NA