I have data like this in which participants have their ids and are categorized in the "group" column:
df1<-structure(list(id = 1:8, group = c("a", "b", "a", "c", "a", "a",
"b", "d")), class = "data.frame", row.names = c(NA, -8L))
In another longitudinal data set (df2), the ids of df1 repeat many times and randomly, but I do not have the group column. How can I create a "new_group" column in df1 with the values of the "group" column?
The desire data is like this :
df2<-structure(list(id = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 7L, 7L, 8L, 8L, 8L, 8L
), new_group = c("a", "a", "a", "a", "b", "b", "b", "b", "a",
"a", "c", "c", "c", "c", "c", "c", "a", "a", "a", "a", "b", "b",
"d", "d", "d", "d")), class = "data.frame", row.names = c(NA,
-26L))
CodePudding user response:
There are several different ways to create 'new_group', e.g.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df1<-structure(list(id = 1:8, group = c("a", "b", "a", "c", "a", "a",
"b", "d")), class = "data.frame", row.names = c(NA, -8L))
df1
#> id group
#> 1 1 a
#> 2 2 b
#> 3 3 a
#> 4 4 c
#> 5 5 a
#> 6 6 a
#> 7 7 b
#> 8 8 d
df2<-structure(list(id = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L,
7L, 7L, 8L, 8L, 8L, 8L)),
class = "data.frame", row.names = c(NA, -26L))
df2
#> id
#> 1 1
#> 2 1
#> 3 1
#> 4 1
#> 5 2
#> 6 2
#> 7 2
#> 8 2
#> 9 3
#> 10 3
#> 11 4
#> 12 4
#> 13 4
#> 14 4
#> 15 4
#> 16 4
#> 17 5
#> 18 6
#> 19 6
#> 20 6
#> 21 7
#> 22 7
#> 23 8
#> 24 8
#> 25 8
#> 26 8
lookup <- setNames(df1$group, df1$id)
df2 %>%
mutate(new_group = coalesce(lookup[id], as.character(id)))
#> id new_group
#> 1 1 a
#> 2 1 a
#> 3 1 a
#> 4 1 a
#> 5 2 b
#> 6 2 b
#> 7 2 b
#> 8 2 b
#> 9 3 a
#> 10 3 a
#> 11 4 c
#> 12 4 c
#> 13 4 c
#> 14 4 c
#> 15 4 c
#> 16 4 c
#> 17 5 a
#> 18 6 a
#> 19 6 a
#> 20 6 a
#> 21 7 b
#> 22 7 b
#> 23 8 d
#> 24 8 d
#> 25 8 d
#> 26 8 d
# Or, using recode()
df2 %>%
mutate(new_group = recode(id, !!!lookup))
#> id new_group
#> 1 1 a
#> 2 1 a
#> 3 1 a
#> 4 1 a
#> 5 2 b
#> 6 2 b
#> 7 2 b
#> 8 2 b
#> 9 3 a
#> 10 3 a
#> 11 4 c
#> 12 4 c
#> 13 4 c
#> 14 4 c
#> 15 4 c
#> 16 4 c
#> 17 5 a
#> 18 6 a
#> 19 6 a
#> 20 6 a
#> 21 7 b
#> 22 7 b
#> 23 8 d
#> 24 8 d
#> 25 8 d
#> 26 8 d
# Or using a for loop in base R
for (i in 1:length(lookup)) {
df2$new_group[df2$id == df1$id[i]] = as.character(df1$group[i])
}
df2
#> id new_group
#> 1 1 a
#> 2 1 a
#> 3 1 a
#> 4 1 a
#> 5 2 b
#> 6 2 b
#> 7 2 b
#> 8 2 b
#> 9 3 a
#> 10 3 a
#> 11 4 c
#> 12 4 c
#> 13 4 c
#> 14 4 c
#> 15 4 c
#> 16 4 c
#> 17 5 a
#> 18 6 a
#> 19 6 a
#> 20 6 a
#> 21 7 b
#> 22 7 b
#> 23 8 d
#> 24 8 d
#> 25 8 d
#> 26 8 d
Created on 2022-07-19 by the reprex package (v2.0.1)