Assiging the values of a column correlated by the same IDs to a longer data in r-CodePudding

I have data like this in which participants have their ids and are categorized in the "group" column:

df1<-structure(list(id = 1:8, group = c("a", "b", "a", "c", "a", "a", 
"b", "d")), class = "data.frame", row.names = c(NA, -8L))

In another longitudinal data set (df2), the ids of df1 repeat many times and randomly, but I do not have the group column. How can I create a "new_group" column in df1 with the values of the "group" column?

The desire data is like this :

df2<-structure(list(id = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 
4L, 4L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 7L, 7L, 8L, 8L, 8L, 8L
), new_group = c("a", "a", "a", "a", "b", "b", "b", "b", "a", 
"a", "c", "c", "c", "c", "c", "c", "a", "a", "a", "a", "b", "b", 
"d", "d", "d", "d")), class = "data.frame", row.names = c(NA, 
-26L))

CodePudding user response：

There are several different ways to create 'new_group', e.g.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

df1<-structure(list(id = 1:8, group = c("a", "b", "a", "c", "a", "a", 
                                        "b", "d")), class = "data.frame", row.names = c(NA, -8L))
df1
#>   id group
#> 1  1     a
#> 2  2     b
#> 3  3     a
#> 4  4     c
#> 5  5     a
#> 6  6     a
#> 7  7     b
#> 8  8     d

df2<-structure(list(id = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 
                           4L, 4L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 
                           7L, 7L, 8L, 8L, 8L, 8L)),
               class = "data.frame", row.names = c(NA, -26L))
df2
#>    id
#> 1   1
#> 2   1
#> 3   1
#> 4   1
#> 5   2
#> 6   2
#> 7   2
#> 8   2
#> 9   3
#> 10  3
#> 11  4
#> 12  4
#> 13  4
#> 14  4
#> 15  4
#> 16  4
#> 17  5
#> 18  6
#> 19  6
#> 20  6
#> 21  7
#> 22  7
#> 23  8
#> 24  8
#> 25  8
#> 26  8

lookup <- setNames(df1$group, df1$id)

df2 %>% 
  mutate(new_group = coalesce(lookup[id], as.character(id)))
#>    id new_group
#> 1   1         a
#> 2   1         a
#> 3   1         a
#> 4   1         a
#> 5   2         b
#> 6   2         b
#> 7   2         b
#> 8   2         b
#> 9   3         a
#> 10  3         a
#> 11  4         c
#> 12  4         c
#> 13  4         c
#> 14  4         c
#> 15  4         c
#> 16  4         c
#> 17  5         a
#> 18  6         a
#> 19  6         a
#> 20  6         a
#> 21  7         b
#> 22  7         b
#> 23  8         d
#> 24  8         d
#> 25  8         d
#> 26  8         d

# Or, using recode()
df2 %>%
  mutate(new_group = recode(id, !!!lookup))
#>    id new_group
#> 1   1         a
#> 2   1         a
#> 3   1         a
#> 4   1         a
#> 5   2         b
#> 6   2         b
#> 7   2         b
#> 8   2         b
#> 9   3         a
#> 10  3         a
#> 11  4         c
#> 12  4         c
#> 13  4         c
#> 14  4         c
#> 15  4         c
#> 16  4         c
#> 17  5         a
#> 18  6         a
#> 19  6         a
#> 20  6         a
#> 21  7         b
#> 22  7         b
#> 23  8         d
#> 24  8         d
#> 25  8         d
#> 26  8         d

# Or using a for loop in base R
for (i in 1:length(lookup)) {
    df2$new_group[df2$id == df1$id[i]] = as.character(df1$group[i])
}
df2
#>    id new_group
#> 1   1         a
#> 2   1         a
#> 3   1         a
#> 4   1         a
#> 5   2         b
#> 6   2         b
#> 7   2         b
#> 8   2         b
#> 9   3         a
#> 10  3         a
#> 11  4         c
#> 12  4         c
#> 13  4         c
#> 14  4         c
#> 15  4         c
#> 16  4         c
#> 17  5         a
#> 18  6         a
#> 19  6         a
#> 20  6         a
#> 21  7         b
#> 22  7         b
#> 23  8         d
#> 24  8         d
#> 25  8         d
#> 26  8         d

^{Created on 2022-07-19 by the reprex package (v2.0.1)}