Home > Net >  Create new variable based on the condition if two of five variables is not NA and one other variable
Create new variable based on the condition if two of five variables is not NA and one other variable

Time:12-29

I have the following variables: comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis, basdai_baseline

I wish to create a new variable called "EAM_2" (=extra articular manifestations present N=2) based on the following conditions:

  • If two of the five variables (uveitis, ibd, psoriasis, enthesitis, arthritis) is present
  • AND if the basdai_baseline is present
  • Return 1, otherwise return 0

Sample of data

structure(list(comorb_uveitis = c(0L, 0L, 0L, 0L, 1L, NA, 0L, 
0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L, 0L, 0L, 0L), comorb_ibd = c(0L, 
1L, 0L, 0L, 0L, NA, 0L, 0L, NA, 1L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 
0L, 0L, 0L), comorb_psoriasis = c(0L, 0L, 0L, 0L, 0L, NA, 0L, 
0L, NA, 0L, 0L, 0L, 0L, NA, 1L, 0L, 0L, 0L, 0L, 0L), asas_criteria_3_enthesitis = c(1L, 
0L, 1L, 0L, NA, NA, 0L, 1L, NA, 1L, NA, NA, 1L, 1L, 1L, 0L, NA, 
NA, 1L, NA), asas_criteria_2_arthritis = c(1L, 1L, 0L, 1L, 0L, 
1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L), 
    basdai_baseline = c(70, NA, 46, 36, 29, 40, NA, NA, NA, 52, 
    30, NA, 47, 98, 56, 57, NA, NA, 72, NA)), row.names = c("76271", 
"46010", "76621", "46310", "76761", "76831", "76841", "76871", 
"76951", "76961", "77011", "77041", "77091", "77281", "77291", 
"77561", "77691", "77711", "77791", "77871"), class = "data.frame")

Managed to come this far, but this checks if atleast "one" EAM is present

df <- df2 <- mutate(df, EAM_2 = if_else (comorb_uveitis == 1 | comorb_uveitis == 0 | comorb_ibd == 1 | comorb_ibd == 0
                                                           | comorb_psoriasis == 1 | comorb_psoriasis == 0 | asas_criteria_3_enthesitis == 1 | asas_criteria_3_enthesitis == 0
                                                           | asas_criteria_2_arthritis == 1 | asas_criteria_2_arthritis == 0 & basdai_baseline != is.na(basdai_baseline), 1, 0))

EDIT: When trying the code as proposed by @Iago

       EAM_2 comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline
76271      1              0          0                0                          1                         1              70
46010      0              0          1                0                          0                         1              NA
76621      0              0          0                0                          1                         0              46
46310      0              0          0                0                          0                         1              36
76761      0              1          0                0                         NA                         0              29
76831      0             NA         NA               NA                         NA                         1              40
76841      0              0          0                0                          0                         0              NA
76871      0              0          0                0                          1                         1              NA
76951      0             NA         NA               NA                         NA                         0              NA
76961      1              0          1                0                          1                         0              52
77011      0              1          0                0                         NA                         0              30
77041      0              0          0                0                         NA                         1              NA
77091      1              1          0                0                          1                         1              47
77281      0             NA         NA               NA                          1                         1              98
81061      0             NA         NA               NA                          1                         1              84

Errors present in last two rows.

CodePudding user response:

library(dplyr)
df %>% 
    mutate(EAM_2 = case_when(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), ~(. == 1L)), na.rm = TRUE) >= 2 & !is.na(basdai_baseline) ~ 1, 
                             TRUE ~ 0))
      comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline EAM_2
76271              0          0                0                          1                         1              70     1
46010              0          1                0                          0                         1              NA     0
76621              0          0                0                          1                         0              46     0
46310              0          0                0                          0                         1              36     0
76761              1          0                0                         NA                         0              29     0
76831             NA         NA               NA                         NA                         1              40     0
76841              0          0                0                          0                         0              NA     0
76871              0          0                0                          1                         1              NA     0
76951             NA         NA               NA                         NA                         0              NA     0
76961              0          1                0                          1                         0              52     1
77011              1          0                0                         NA                         0              30     0
77041              0          0                0                         NA                         1              NA     0
77091              1          0                0                          1                         1              47     1
77281             NA         NA               NA                          1                         1              98     0
77291              0          0                1                          1                         1              56     1
77561              0          0                0                          0                         1              57     0
77691              0          0                0                         NA                         0              NA     0
77711              0          0                0                         NA                         0              NA     0
77791              0          0                0                          1                         1              72     1
77871              0          0                0                         NA                         1              NA     0

Of course, you may use also if_else or ifelse instead of case_when and an anonymous function instead of ~:

df %>% 
    mutate(EAM_2 = if_else(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), function(var) var == 1L), na.rm = TRUE) >= 2 & !is.na(basdai_baseline), 1, 0))

CodePudding user response:

Simple:

# assuming you want "at least 2" comorbidities
df$EAM_2 <- rowSums(df[ 
              c("comorb_uveitis", "comorb_ibd", "comorb_psoriasis",
                "asas_criteria_3_enthesitis", "asas_criteria_2_arthritis")
            ], na.rm = TRUE) >= 2

df$EAM_2 <- df$EAM_2 & ! is.na(df$basdai_baseline)

# if you want 0/1 not TRUE/FALSE (why tho)
df$EAM_2 <- as.numeric(df$EAM_2)
  • Related