I have data like these:
ID color_1 color_2 color_3 color_4
1 blue NA NA NA
5 green blue yellow NA
9 None NA NA NA
2 blue pink NA NA
11 green NA NA NA
I want to add a new column that counts the number of colors (None, 1, or 2 or more):
ID color_1 color_2 color_3 color_4 colors
1 blue NA NA NA 1
5 green blue yellow NA 2 or more
9 None NA NA NA None
2 blue pink NA NA 2 or more
11 green NA NA NA 1
I am thinking I can do something like this:
library(tidyverse)
data2 <- data %>%
mutate(colors = case_when(
color_1 == "None" ~ "None",
But I can't figure out how to do the counting part. Any help is appreciated!
CodePudding user response:
You can count using c_across
and rowwise
:
library(tidyverse)
df %>%
na_if("None") %>%
rowwise() %>%
mutate(count = sum(!is.na(c_across(color_1:color_4))),
colors = case_when(count >= 2 ~ "2 or more",
count == 0 ~ "None",
T ~ as.character(count))) %>%
select(-count)
Output
ID color_1 color_2 color_3 color_4 colors
<int> <chr> <chr> <chr> <lgl> <chr>
1 1 blue NA NA NA 1
2 5 green blue yellow NA 2 or more
3 9 NA NA NA NA None
4 2 blue pink NA NA 2 or more
5 11 green NA NA NA 1
CodePudding user response:
the approach is similar to @Maël, but using rowSums
df <- data.frame(
stringsAsFactors = FALSE,
ID = c(1L, 5L, 9L, 2L, 11L),
color_1 = c("blue", "green", "None", "blue", "green"),
color_2 = c(NA, "blue", NA, "pink", NA),
color_3 = c(NA, "yellow", NA, NA, NA),
color_4 = c(NA, NA, NA, NA, NA)
)
library(tidyverse)
library(data.table)
df %>%
na_if("None") %>%
mutate(
count = rowSums(across(starts_with("color"), ~ !is.na(.x))),
colors = case_when(count >= 2 ~ "2 or more",
count == 0 ~ "None",
T ~ as.character(count))
) %>%
select(-count)
#> ID color_1 color_2 color_3 color_4 colors
#> 1 1 blue <NA> <NA> NA 1
#> 2 5 green blue yellow NA 2 or more
#> 3 9 <NA> <NA> <NA> NA None
#> 4 2 blue pink <NA> NA 2 or more
#> 5 11 green <NA> <NA> NA 1
# data.table
COLS <- grepl("^color", names(df))
setDT(df) %>%
na_if("None") %>%
.[, count := rowSums(!is.na(.SD)), .SDcols = COLS] %>%
.[, colors := fcase(count >= 2, "2 or more",
count == 0, "None",
default = "1")] %>%
.[, count := NULL] %>%
.[]
#> ID color_1 color_2 color_3 color_4 colors
#> 1: 1 blue <NA> <NA> NA 1
#> 2: 5 green blue yellow NA 2 or more
#> 3: 9 <NA> <NA> <NA> NA None
#> 4: 2 blue pink <NA> NA 2 or more
#> 5: 11 green <NA> <NA> NA 1
Created on 2022-01-31 by the reprex package (v2.0.1)