Create new variable based on the condition if two of five variables is not NA and one other variable-CodePudding

I have the following variables: comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis, basdai_baseline

I wish to create a new variable called "EAM_2" (=extra articular manifestations present N=2) based on the following conditions:

If two of the five variables (uveitis, ibd, psoriasis, enthesitis, arthritis) is present
AND if the basdai_baseline is present
Return 1, otherwise return 0

Sample of data

structure(list(comorb_uveitis = c(0L, 0L, 0L, 0L, 1L, NA, 0L, 
0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L, 0L, 0L, 0L), comorb_ibd = c(0L, 
1L, 0L, 0L, 0L, NA, 0L, 0L, NA, 1L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 
0L, 0L, 0L), comorb_psoriasis = c(0L, 0L, 0L, 0L, 0L, NA, 0L, 
0L, NA, 0L, 0L, 0L, 0L, NA, 1L, 0L, 0L, 0L, 0L, 0L), asas_criteria_3_enthesitis = c(1L, 
0L, 1L, 0L, NA, NA, 0L, 1L, NA, 1L, NA, NA, 1L, 1L, 1L, 0L, NA, 
NA, 1L, NA), asas_criteria_2_arthritis = c(1L, 1L, 0L, 1L, 0L, 
1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L), 
    basdai_baseline = c(70, NA, 46, 36, 29, 40, NA, NA, NA, 52, 
    30, NA, 47, 98, 56, 57, NA, NA, 72, NA)), row.names = c("76271", 
"46010", "76621", "46310", "76761", "76831", "76841", "76871", 
"76951", "76961", "77011", "77041", "77091", "77281", "77291", 
"77561", "77691", "77711", "77791", "77871"), class = "data.frame")

Managed to come this far, but this checks if atleast "one" EAM is present

df <- df2 <- mutate(df, EAM_2 = if_else (comorb_uveitis == 1 | comorb_uveitis == 0 | comorb_ibd == 1 | comorb_ibd == 0
                                                           | comorb_psoriasis == 1 | comorb_psoriasis == 0 | asas_criteria_3_enthesitis == 1 | asas_criteria_3_enthesitis == 0
                                                           | asas_criteria_2_arthritis == 1 | asas_criteria_2_arthritis == 0 & basdai_baseline != is.na(basdai_baseline), 1, 0))

EDIT: When trying the code as proposed by @Iago

       EAM_2 comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline
76271      1              0          0                0                          1                         1              70
46010      0              0          1                0                          0                         1              NA
76621      0              0          0                0                          1                         0              46
46310      0              0          0                0                          0                         1              36
76761      0              1          0                0                         NA                         0              29
76831      0             NA         NA               NA                         NA                         1              40
76841      0              0          0                0                          0                         0              NA
76871      0              0          0                0                          1                         1              NA
76951      0             NA         NA               NA                         NA                         0              NA
76961      1              0          1                0                          1                         0              52
77011      0              1          0                0                         NA                         0              30
77041      0              0          0                0                         NA                         1              NA
77091      1              1          0                0                          1                         1              47
77281      0             NA         NA               NA                          1                         1              98
81061      0             NA         NA               NA                          1                         1              84

Errors present in last two rows.

CodePudding user response：

library(dplyr)
df %>% 
    mutate(EAM_2 = case_when(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), ~(. == 1L)), na.rm = TRUE) >= 2 & !is.na(basdai_baseline) ~ 1, 
                             TRUE ~ 0))
      comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline EAM_2
76271              0          0                0                          1                         1              70     1
46010              0          1                0                          0                         1              NA     0
76621              0          0                0                          1                         0              46     0
46310              0          0                0                          0                         1              36     0
76761              1          0                0                         NA                         0              29     0
76831             NA         NA               NA                         NA                         1              40     0
76841              0          0                0                          0                         0              NA     0
76871              0          0                0                          1                         1              NA     0
76951             NA         NA               NA                         NA                         0              NA     0
76961              0          1                0                          1                         0              52     1
77011              1          0                0                         NA                         0              30     0
77041              0          0                0                         NA                         1              NA     0
77091              1          0                0                          1                         1              47     1
77281             NA         NA               NA                          1                         1              98     0
77291              0          0                1                          1                         1              56     1
77561              0          0                0                          0                         1              57     0
77691              0          0                0                         NA                         0              NA     0
77711              0          0                0                         NA                         0              NA     0
77791              0          0                0                          1                         1              72     1
77871              0          0                0                         NA                         1              NA     0

Of course, you may use also if_else or ifelse instead of case_when and an anonymous function instead of ~:

df %>% 
    mutate(EAM_2 = if_else(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), function(var) var == 1L), na.rm = TRUE) >= 2 & !is.na(basdai_baseline), 1, 0))

CodePudding user response：

Simple:

# assuming you want "at least 2" comorbidities
df$EAM_2 <- rowSums(df[ 
              c("comorb_uveitis", "comorb_ibd", "comorb_psoriasis",
                "asas_criteria_3_enthesitis", "asas_criteria_2_arthritis")
            ], na.rm = TRUE) >= 2

df$EAM_2 <- df$EAM_2 & ! is.na(df$basdai_baseline)

# if you want 0/1 not TRUE/FALSE (why tho)
df$EAM_2 <- as.numeric(df$EAM_2)