I need to add a factor column labeled Cohort and populate it with either a 1 or 2 depending on the first letter of the Id. If A then 1, if B then 2. How can I accomplish this with dplyr? Thanks
collars <- collars %>%
mutate(Cohort = ?)
structure(list(Id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("A628", "A629", "A630", "A631", "A632", "A633",
"A634", "A635", "A636", "A637", "A82117", "A82118", "A82119",
"A82120", "A82121", "A82122", "A82123", "A82124", "A82125", "A82126",
"A82127", "A82129", "A82130", "A82131", "A82132", "A82133", "A82134",
"A82135", "A82136", "A82137", "A82138", "A82139", "A82140", "A82141",
"A82142", "A82143", "A82144", "A82145", "A82146", "B628", "B629",
"B630", "B631", "B632", "B633", "B634", "B635", "B636", "B637",
"B82117", "B82118", "B82119", "B82120", "B82121", "B82122", "B82123",
"B82126", "B82127", "B82128", "B82129", "B82130", "B82131", "B82132",
"B82133", "B82135", "B82136", "B82137", "B82138", "B82139", "B82140",
"B82141", "B82143", "B82145"), class = "factor"), DateTime = structure(c(1557401400,
1557403200, 1557405000, 1557406800, 1557408600, 1557410400, 1557417600,
1557419400, 1557421200, 1557423000), class = c("POSIXct", "POSIXt"
), tzone = "CST6CDT")), row.names = c(NA, 10L), class = "data.frame")
CodePudding user response:
I think the following solution may help you:
library(dplyr)
df %>%
rowwise() %>%
mutate(cohort = case_when(
substr(Id, 1, 1) == "A" ~ 1,
substr(Id, 1, 1) == "B" ~ 2,
TRUE ~ NA_real_
))
Id DateTime cohort
1 A628 2019-05-09 06:30:00 1
2 A628 2019-05-09 07:00:00 1
3 A628 2019-05-09 07:30:00 1
4 A628 2019-05-09 08:00:00 1
5 A628 2019-05-09 08:30:00 1
6 A628 2019-05-09 09:00:00 1
7 A628 2019-05-09 11:00:00 1
8 A628 2019-05-09 11:30:00 1
9 A628 2019-05-09 12:00:00 1
10 A628 2019-05-09 12:30:00 1
CodePudding user response:
Here is a solution with stringr::str_detect
.
library(dplyr)
library(stringr)
collars %>%
mutate(Cohort = case_when(
str_detect(Id, "^A") ~ 1L,
str_detect(Id, "^B") ~ 2L,
TRUE ~ NA_integer_
))
#> Id DateTime Cohort
#> 1 A628 2019-05-09 06:30:00 1
#> 2 A628 2019-05-09 07:00:00 1
#> 3 A628 2019-05-09 07:30:00 1
#> 4 A628 2019-05-09 08:00:00 1
#> 5 A628 2019-05-09 08:30:00 1
#> 6 A628 2019-05-09 09:00:00 1
#> 7 A628 2019-05-09 11:00:00 1
#> 8 A628 2019-05-09 11:30:00 1
#> 9 A628 2019-05-09 12:00:00 1
#> 10 A628 2019-05-09 12:30:00 1
Created on 2022-03-01 by the reprex package (v2.0.1)