How can I populate a column based on the first character of values in another column?-CodePudding

I need to add a factor column labeled Cohort and populate it with either a 1 or 2 depending on the first letter of the Id. If A then 1, if B then 2. How can I accomplish this with dplyr? Thanks

collars <- collars %>%
    mutate(Cohort = ?)

structure(list(Id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = c("A628", "A629", "A630", "A631", "A632", "A633", 
"A634", "A635", "A636", "A637", "A82117", "A82118", "A82119", 
"A82120", "A82121", "A82122", "A82123", "A82124", "A82125", "A82126", 
"A82127", "A82129", "A82130", "A82131", "A82132", "A82133", "A82134", 
"A82135", "A82136", "A82137", "A82138", "A82139", "A82140", "A82141", 
"A82142", "A82143", "A82144", "A82145", "A82146", "B628", "B629", 
"B630", "B631", "B632", "B633", "B634", "B635", "B636", "B637", 
"B82117", "B82118", "B82119", "B82120", "B82121", "B82122", "B82123", 
"B82126", "B82127", "B82128", "B82129", "B82130", "B82131", "B82132", 
"B82133", "B82135", "B82136", "B82137", "B82138", "B82139", "B82140", 
"B82141", "B82143", "B82145"), class = "factor"), DateTime = structure(c(1557401400, 
1557403200, 1557405000, 1557406800, 1557408600, 1557410400, 1557417600, 
1557419400, 1557421200, 1557423000), class = c("POSIXct", "POSIXt"
), tzone = "CST6CDT")), row.names = c(NA, 10L), class = "data.frame")

CodePudding user response：

I think the following solution may help you:

library(dplyr)

df %>%
  rowwise() %>%
  mutate(cohort = case_when(
    substr(Id, 1, 1) == "A" ~ 1,
    substr(Id, 1, 1) == "B" ~ 2,
    TRUE ~ NA_real_
  ))

     Id            DateTime cohort
1  A628 2019-05-09 06:30:00      1
2  A628 2019-05-09 07:00:00      1
3  A628 2019-05-09 07:30:00      1
4  A628 2019-05-09 08:00:00      1
5  A628 2019-05-09 08:30:00      1
6  A628 2019-05-09 09:00:00      1
7  A628 2019-05-09 11:00:00      1
8  A628 2019-05-09 11:30:00      1
9  A628 2019-05-09 12:00:00      1
10 A628 2019-05-09 12:30:00      1

CodePudding user response：

Here is a solution with stringr::str_detect.

library(dplyr)
library(stringr)

collars %>%
  mutate(Cohort = case_when(
    str_detect(Id, "^A") ~ 1L,
    str_detect(Id, "^B") ~ 2L,
    TRUE ~ NA_integer_
  ))
#>      Id            DateTime Cohort
#> 1  A628 2019-05-09 06:30:00      1
#> 2  A628 2019-05-09 07:00:00      1
#> 3  A628 2019-05-09 07:30:00      1
#> 4  A628 2019-05-09 08:00:00      1
#> 5  A628 2019-05-09 08:30:00      1
#> 6  A628 2019-05-09 09:00:00      1
#> 7  A628 2019-05-09 11:00:00      1
#> 8  A628 2019-05-09 11:30:00      1
#> 9  A628 2019-05-09 12:00:00      1
#> 10 A628 2019-05-09 12:30:00      1

^{Created on 2022-03-01 by the reprex package (v2.0.1)}