I have this dataframe:
df <- tibble(id = c(1, 2, 3), c_1 = c(T, T, F), c_2 = c(F, F, T)) %>% group_by(id)
# A tibble: 3 x 3
id c_1 c_2
<dbl> <lgl> <lgl>
1 1 TRUE FALSE
2 2 TRUE FALSE
3 3 FALSE TRUE
I now want to compute the rowise logical OR over the columns starting with c_
I tried
df %>% mutate(valid = sum(select(matches("^c_")) == 0))
However I get
`matches()` must be used within a *selecting* function.
how can i solve this?
CodePudding user response:
library(dplyr)
df <- tibble(id = c(1, 2, 3), c_1 = c(T, T, F), c_2 = c(F, F, T))
df %>%
rowwise() %>%
mutate(
valid = any(c_across(starts_with("c_")))
) %>%
ungroup()
#> # A tibble: 3 × 4
#> id c_1 c_2 valid
#> <dbl> <lgl> <lgl> <lgl>
#> 1 1 TRUE FALSE TRUE
#> 2 2 TRUE FALSE TRUE
#> 3 3 FALSE TRUE TRUE
Created on 2022-07-11 by the reprex package (v2.0.1)
CodePudding user response:
base R
option using grepl
:
library(dplyr) # For tibble
df <- tibble(id = c(1, 2, 3), c_1 = c(T, T, F), c_2 = c(F, F, T)) %>% group_by(id)
df$valid <- apply(df, 1, function(x) any(x %in% grepl("c_" , names(x))))
df
#> # A tibble: 3 × 4
#> # Groups: id [3]
#> id c_1 c_2 valid
#> <dbl> <lgl> <lgl> <lgl>
#> 1 1 TRUE FALSE TRUE
#> 2 2 TRUE FALSE TRUE
#> 3 3 FALSE TRUE TRUE
Created on 2022-07-11 by the reprex package (v2.0.1)
CodePudding user response:
Update: Why is tibble() needed:
Without as_tibble()
or tibble()
or data.frame()
it won't work:
Your table:
> class(df)
[1] "grouped_df" "tbl_df" "tbl" "data.frame"
- Without
as_tibble()
ortibble()
ordata.frame()
-> won't work:
>df %>%
mutate(valid = ifelse(rowSums(select(., contains("c_")))==1, TRUE, FALSE))
Adding missing grouping variables: `id`
Error in `mutate()`:
! Problem while computing `valid = ifelse(rowSums(select(.,
contains("c_"))) == 1, TRUE, FALSE)`.
x `valid` must be size 1, not 3.
i The error occurred in group 1: id = 1.
- With
as_tibble()
ortibble()
ordata.frame()
-> it will work:
df %>%
data.frame() %>%
mutate(valid = ifelse(rowSums(select(., contains("c_")))==1, TRUE, FALSE))
#or
df %>%
tibble() %>%
mutate(valid = ifelse(rowSums(select(., contains("c_")))==1, TRUE, FALSE))
First answer: If we want to do it with select: Here is an out of the box approach:
library(tibble)
library(dplyr)
df %>%
as_tibble() %>%
mutate(valid = ifelse(rowSums(.[2:3])==1, TRUE, FALSE))
or
library(tibble)
library(dplyr)
df %>%
as_tibble() %>%
mutate(valid = ifelse(rowSums(select(., contains("c_")))==1, TRUE, FALSE))
# A tibble: 3 x 4
id c_1 c_2 valid
<dbl> <lgl> <lgl> <lgl>
1 1 TRUE FALSE TRUE
2 2 TRUE FALSE TRUE
3 3 FALSE TRUE TRUE
CodePudding user response:
We may use if_any
directly without rowwise
library(dplyr)
df %>%
mutate(valid = if_any(starts_with('c_')))
# A tibble: 3 × 4
id c_1 c_2 valid
<dbl> <lgl> <lgl> <lgl>
1 1 TRUE FALSE TRUE
2 2 TRUE FALSE TRUE
3 3 FALSE TRUE TRUE
data
df <- tibble(id = c(1, 2, 3), c_1 = c(TRUE, TRUE, FALSE),
c_2 = c(FALSE, FALSE, TRUE))