I have the following variables: comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis, basdai_baseline
I wish to create a new variable called "EAM_2" (=extra articular manifestations present N=2) based on the following conditions:
- If two of the five variables (uveitis, ibd, psoriasis, enthesitis, arthritis) is present
- AND if the basdai_baseline is present
- Return 1, otherwise return 0
Sample of data
structure(list(comorb_uveitis = c(0L, 0L, 0L, 0L, 1L, NA, 0L,
0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L, 0L, 0L, 0L), comorb_ibd = c(0L,
1L, 0L, 0L, 0L, NA, 0L, 0L, NA, 1L, 0L, 0L, 0L, NA, 0L, 0L, 0L,
0L, 0L, 0L), comorb_psoriasis = c(0L, 0L, 0L, 0L, 0L, NA, 0L,
0L, NA, 0L, 0L, 0L, 0L, NA, 1L, 0L, 0L, 0L, 0L, 0L), asas_criteria_3_enthesitis = c(1L,
0L, 1L, 0L, NA, NA, 0L, 1L, NA, 1L, NA, NA, 1L, 1L, 1L, 0L, NA,
NA, 1L, NA), asas_criteria_2_arthritis = c(1L, 1L, 0L, 1L, 0L,
1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L),
basdai_baseline = c(70, NA, 46, 36, 29, 40, NA, NA, NA, 52,
30, NA, 47, 98, 56, 57, NA, NA, 72, NA)), row.names = c("76271",
"46010", "76621", "46310", "76761", "76831", "76841", "76871",
"76951", "76961", "77011", "77041", "77091", "77281", "77291",
"77561", "77691", "77711", "77791", "77871"), class = "data.frame")
Managed to come this far, but this checks if atleast "one" EAM is present
df <- df2 <- mutate(df, EAM_2 = if_else (comorb_uveitis == 1 | comorb_uveitis == 0 | comorb_ibd == 1 | comorb_ibd == 0
| comorb_psoriasis == 1 | comorb_psoriasis == 0 | asas_criteria_3_enthesitis == 1 | asas_criteria_3_enthesitis == 0
| asas_criteria_2_arthritis == 1 | asas_criteria_2_arthritis == 0 & basdai_baseline != is.na(basdai_baseline), 1, 0))
EDIT: When trying the code as proposed by @Iago
EAM_2 comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline
76271 1 0 0 0 1 1 70
46010 0 0 1 0 0 1 NA
76621 0 0 0 0 1 0 46
46310 0 0 0 0 0 1 36
76761 0 1 0 0 NA 0 29
76831 0 NA NA NA NA 1 40
76841 0 0 0 0 0 0 NA
76871 0 0 0 0 1 1 NA
76951 0 NA NA NA NA 0 NA
76961 1 0 1 0 1 0 52
77011 0 1 0 0 NA 0 30
77041 0 0 0 0 NA 1 NA
77091 1 1 0 0 1 1 47
77281 0 NA NA NA 1 1 98
81061 0 NA NA NA 1 1 84
Errors present in last two rows.
CodePudding user response:
library(dplyr)
df %>%
mutate(EAM_2 = case_when(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), ~(. == 1L)), na.rm = TRUE) >= 2 & !is.na(basdai_baseline) ~ 1,
TRUE ~ 0))
comorb_uveitis comorb_ibd comorb_psoriasis asas_criteria_3_enthesitis asas_criteria_2_arthritis basdai_baseline EAM_2
76271 0 0 0 1 1 70 1
46010 0 1 0 0 1 NA 0
76621 0 0 0 1 0 46 0
46310 0 0 0 0 1 36 0
76761 1 0 0 NA 0 29 0
76831 NA NA NA NA 1 40 0
76841 0 0 0 0 0 NA 0
76871 0 0 0 1 1 NA 0
76951 NA NA NA NA 0 NA 0
76961 0 1 0 1 0 52 1
77011 1 0 0 NA 0 30 0
77041 0 0 0 NA 1 NA 0
77091 1 0 0 1 1 47 1
77281 NA NA NA 1 1 98 0
77291 0 0 1 1 1 56 1
77561 0 0 0 0 1 57 0
77691 0 0 0 NA 0 NA 0
77711 0 0 0 NA 0 NA 0
77791 0 0 0 1 1 72 1
77871 0 0 0 NA 1 NA 0
Of course, you may use also if_else
or ifelse
instead of case_when
and an anonymous function instead of ~
:
df %>%
mutate(EAM_2 = if_else(rowSums(across(c(comorb_uveitis, comorb_ibd, comorb_psoriasis, asas_criteria_3_enthesitis, asas_criteria_2_arthritis), function(var) var == 1L), na.rm = TRUE) >= 2 & !is.na(basdai_baseline), 1, 0))
CodePudding user response:
Simple:
# assuming you want "at least 2" comorbidities
df$EAM_2 <- rowSums(df[
c("comorb_uveitis", "comorb_ibd", "comorb_psoriasis",
"asas_criteria_3_enthesitis", "asas_criteria_2_arthritis")
], na.rm = TRUE) >= 2
df$EAM_2 <- df$EAM_2 & ! is.na(df$basdai_baseline)
# if you want 0/1 not TRUE/FALSE (why tho)
df$EAM_2 <- as.numeric(df$EAM_2)