I have a dataset named trainset that has 50 variables. For each row, I need to sum up the values under columns called Systolic.Blood.Pressure, Blood.Urea.Nitrogen, Blood.Sodium, Age, heart.rR and COPD values. I already defined a function to sum these up:
m.gwtg = function(Systolic.BP, BUN, Sodium, Age, HR, COPD){
if (Systolic.BP>=200){
pt.sbp = 0
}else if (Systolic.BP>= 190){
pt.sbp = 2
}else if (Systolic.BP>= 180){
pt.sbp = 4
}else if (Systolic.BP>= 170){
pt.sbp = 6
}else if (Systolic.BP>= 160){
pt.sbp = 8
}else if (Systolic.BP>= 150){
pt.sbp = 9
}else if (Systolic.BP>= 140){
pt.sbp = 11
}else if (Systolic.BP>= 130){
pt.sbp = 13
}else if (Systolic.BP>= 120){
pt.sbp = 15
}else if (Systolic.BP>= 110){
pt.sbp = 17
}else if (Systolic.BP>= 100){
pt.sbp = 19
}else if (Systolic.BP>= 90){
pt.sbp = 21
}else if (Systolic.BP>= 80){
pt.sbp = 23
}else if (Systolic.BP>= 70){
pt.sbp = 24
}else if (Systolic.BP>= 60){
pt.sbp = 26
}else if (Systolic.BP>= 50){
pt.sbp = 28
}
if (BUN>=150){
pt.bun = 28
}else if (BUN>= 140){
pt.bun = 27
}else if (BUN>= 130){
pt.bun = pt 25
}else if (BUN>= 120){
pt.bun = 23
}else if (BUN>= 110){
pt.bun = 21
}else if (BUN>= 100){
pt.bun = 19
}else if (BUN>= 90){
pt.bun = 17
}else if (BUN>= 80){
p.bunt = 15
}else if (BUN>= 70){
pt.bun = pt 13
}else if (BUN>= 60){
pt.bun = 11
}else if (BUN>= 50){
pt.bun = 9
}else if (BUN>= 40){
pt.bun = pt 8
}else if (BUN>= 30){
pt = 6
}else if (BUN>= 20){
pt.bun = 4
}else if (BUN>= 10){
pt.bun = 2
}else if (BUN<= 9){
pt.bun = 0
}
if (Sodium>=139){
pt.sodium = 0
}else if (Sodium>= 137){
pt.sodium = 1
}else if (Sodium>= 134){
pt.sodium = 2
}else if (Sodium>= 131){
pt.sodium = 3
}else if (Sodium<= 130){
pt.sodium = 4
}
if (Age>=110){
pt.age = 28
}else if (Age>= 100){
pt.age = 25
}else if (Age>= 90){
pt.age = 22
}else if (Age>= 80){
pt.age = 19
}else if (Age>= 70){
pt.age = 17
}else if (Age>= 60){
pt.age = 14
}else if (Age>= 50){
p.aget = 11
}else if (Age>= 40){
pt.age = 8
}else if (Age>= 30){
pt.age = 6
}else if (Age>= 20){
pt.age = 3
}else if (Age<= 19){
pt.age = 0
}
if (HR>=105){
pt.hr = 8
}else if (HR>= 100){
pt.hr = 6
}else if (HR>= 95){
pt.hr = 5
}else if (HR>= 90){
pt.hr = 4
}else if (HR>= 85){
pt.hr = 3
}else if (HR>= 80){
pt.hr = 1
}else if (HR<= 79){
pt.hr = 0
}
if (COPD == 1){
pt.copd =2
} else {
pt.copd = 0
}
total = pt.sbp pt.bun pt.sodium pt.age pt.hr pt.copd
return(if (total < 79){
outcome = 0
} else {
outcome = 1
})
}
I have problem with coding out the for loop to loop through the trainset and apply the function defined for each row. I tried to code the loop like this:
for (i in 1:nrow(trainset)) {
Systolic.BP[i] <- trainset$Systolic.blood.pressure[i]
BUN[i] <- trainset$Urea.nitrogen[i]
Sodium[i] <- trainset$Blood.sodium[i]
Age[i] <- trainset$age[i]
HR[i] <- trainset$heart.rate[i]
COPD[i] <- trainset$COPD[i]
total[i] <- Systolic.BP[i] BUN[i] Sodium[i] Age[i] HR[i] COPD[i]
outcome.gwtg.trainset[i]= m.gwtg(total[i])
}
I got quite confused on on the code out the for loop. Thanks for any answers!
CodePudding user response:
This is a more concise way to calculate the outcome:
library(tidyverse)
calc_score <- function(systolic_bp, bun, sodium) {
systolic_bp_score <- case_when(
systolic_bp >= 200 ~ 0,
systolic_bp >= 190 ~ 2,
systolic_bp >= 180 ~ 4
)
bun_score <- case_when(
bun >= 150 ~ 28,
bun >= 140 ~ 27
)
sodium_score <- case_when(
sodium >= 139 ~ 0,
sodium >= 137 ~ 1
)
systolic_bp_score bun_score sodium_score
}
# example data
trainset <- tibble(
systolic_bp = c(180, 195),
bun = c(145, 180),
sodium = c(138, 140)
)
trainset %>%
mutate(
score = list(systolic_bp, bun, sodium) %>% pmap_dbl(calc_score),
outcome = as.numeric(score > 97)
)
#> # A tibble: 2 × 5
#> systolic_bp bun sodium score outcome
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 180 145 138 32 0
#> 2 195 180 140 30 0
Created on 2022-03-25 by the reprex package (v2.0.0)
CodePudding user response:
You should avoid loops and similar functions (eg, *apply()
and purrr::map()
) whenever possible in R. R is designed to work with vectors, and loops are much much slower.
Instead of this loop operation, do the following:
- Recode each column into its transformed values
- Add up the transformed values
- Recode the transformed values into the 0/1 outcome
These operations should be done vector-wise to be efficient (and to avoid the tricky indexing problems you are encountering).
For example:
m.gwtg <- function(data) {
data <- dplyr::mutate(data,
pt.sbp = dplyr::case_when(
Systolic.BP >= 200 ~ 0,
Systolic.BP >= 190 ~ 2,
Systolic.BP >= 180 ~ 4,
Systolic.BP >= 170 ~ 6,
Systolic.BP >= 160 ~ 8,
Systolic.BP >= 150 ~ 9,
Systolic.BP >= 140 ~ 11,
Systolic.BP >= 130 ~ 13,
Systolic.BP >= 120 ~ 15,
Systolic.BP >= 110 ~ 17,
Systolic.BP >= 100 ~ 19,
Systolic.BP >= 90 ~ 21,
Systolic.BP >= 80 ~ 23,
Systolic.BP >= 70 ~ 24,
Systolic.BP >= 60 ~ 26,
Systolic.BP >= 50 ~ 28, # should Systolic.BP < 50 be NA or 28?
# else
TRUE ~ NA_real_
),
pt.bun = dplyr::case_when(
BUN >= 150 ~ 28,
BUN >= 140 ~ 27,
BUN >= 130 ~ 25,
BUN >= 120 ~ 23,
BUN >= 110 ~ 21,
BUN >= 100 ~ 19,
BUN >= 90 ~ 17,
BUN >= 80 ~ 15,
BUN >= 70 ~ 13,
BUN >= 60 ~ 11,
BUN >= 50 ~ 9,
BUN >= 40 ~ 8,
BUN >= 30 ~ 6,
BUN >= 20 ~ 4,
BUN >= 10 ~ 2,
BUN < 10 ~ 0,
# else
TRUE ~ NA_real_
),
pt.sodium = dplyr::case_when(
Sodium >= 139 ~ 0,
Sodium >= 137 ~ 1,
Sodium >= 134 ~ 2,
Sodium >= 131 ~ 3,
Sodium < 131 ~ 4,
# else
TRUE ~ NA_real_
),
pt.age = dplyr::case_when(
Age >= 110 ~ 28,
Age >= 100 ~ 25,
Age >= 90 ~ 22,
Age >= 80 ~ 19,
Age <= 70 ~ 17,
Age >= 60 ~ 14,
Age >= 50 ~ 11,
Age <= 40 ~ 8,
Age >= 30 ~ 6,
Age >= 20 ~ 3,
Age < 20 ~ 0,
# else
TRUE ~ NA_real_
),
pt.hr = dplyr::case_when(
HR >= 105 ~ 8,
HR >= 100 ~ 6,
HR >= 95 ~ 5,
HR >= 90 ~ 4,
HR >= 85 ~ 3,
HR >= 80 ~ 1,
HR < 80 ~ 0,
# else
TRUE ~ NA_real_
),
pt.copd = dplyr::case_when(
COPD == 1 ~ 2,
COPD == 0 ~ 0,
# else
TRUE ~ NA_real_
),
total = pt.sbp pt.bun pt.sodium pt.age pt.hr pt.copd,
outcome = dplyr::if_else(total < 79, 0, 1)
)
return(data)
}
example_data <- data.frame(
Systolic.BP = c(170, 160, 200),
BUN = c(60, 150, 10),
Sodium = c(134, 131, 139),
Age = c(40, 80, 20),
HR = c(90, 105, 80),
COPD = c(1, 0, 0)
)
m.gwtg(example_data)
#> Systolic.BP BUN Sodium Age HR COPD pt.sbp pt.bun pt.sodium pt.age pt.hr
#> 1 170 60 134 40 90 1 6 11 2 17 4
#> 2 160 150 131 80 105 0 8 28 3 19 8
#> 3 200 10 139 20 80 0 0 2 0 17 1
#> pt.copd total outcome
#> 1 2 42 0
#> 2 0 66 0
#> 3 0 20 0
Created on 2022-03-25 by the reprex package (v2.0.1)