Home > Mobile >  for loop in R: How to apply a defined function to loop through rows and columns in R?
for loop in R: How to apply a defined function to loop through rows and columns in R?

Time:03-25

I have a dataset named trainset that has 50 variables. For each row, I need to sum up the values under columns called Systolic.Blood.Pressure, Blood.Urea.Nitrogen, Blood.Sodium, Age, heart.rR and COPD values. I already defined a function to sum these up:

m.gwtg = function(Systolic.BP, BUN, Sodium, Age, HR, COPD){
  if (Systolic.BP>=200){
    pt.sbp = 0
  }else if (Systolic.BP>= 190){
    pt.sbp = 2
  }else if (Systolic.BP>= 180){
    pt.sbp = 4
  }else if (Systolic.BP>= 170){
    pt.sbp = 6
  }else if (Systolic.BP>= 160){
    pt.sbp = 8
  }else if (Systolic.BP>= 150){
    pt.sbp = 9
  }else if (Systolic.BP>= 140){
    pt.sbp = 11
  }else if (Systolic.BP>= 130){
    pt.sbp = 13
  }else if (Systolic.BP>= 120){
    pt.sbp = 15
  }else if (Systolic.BP>= 110){
    pt.sbp = 17
  }else if (Systolic.BP>= 100){
    pt.sbp = 19
  }else if (Systolic.BP>= 90){
    pt.sbp = 21
  }else if (Systolic.BP>= 80){
    pt.sbp = 23
  }else if (Systolic.BP>= 70){
    pt.sbp = 24
  }else if (Systolic.BP>= 60){
    pt.sbp = 26
  }else if (Systolic.BP>= 50){
    pt.sbp = 28
  }
  if (BUN>=150){
    pt.bun = 28
  }else if (BUN>= 140){
    pt.bun = 27
  }else if (BUN>= 130){
    pt.bun = pt   25
  }else if (BUN>= 120){
    pt.bun =  23
  }else if (BUN>= 110){
    pt.bun =  21
  }else if (BUN>= 100){
    pt.bun =  19
  }else if (BUN>= 90){
    pt.bun = 17
  }else if (BUN>= 80){
    p.bunt =  15
  }else if (BUN>= 70){
    pt.bun = pt   13
  }else if (BUN>= 60){
    pt.bun =  11
  }else if (BUN>= 50){
    pt.bun =  9
  }else if (BUN>= 40){
    pt.bun = pt   8
  }else if (BUN>= 30){
    pt = 6
  }else if (BUN>= 20){
    pt.bun = 4
  }else if (BUN>= 10){
    pt.bun =  2
  }else if (BUN<= 9){
    pt.bun =  0
  }
  if (Sodium>=139){
    pt.sodium =  0
  }else if (Sodium>= 137){
    pt.sodium =  1
  }else if (Sodium>= 134){
    pt.sodium =  2
  }else if (Sodium>= 131){
    pt.sodium =  3
  }else if (Sodium<= 130){
    pt.sodium =  4
  }
  
  if (Age>=110){
    pt.age = 28
  }else if (Age>= 100){
    pt.age = 25
  }else if (Age>= 90){
    pt.age = 22
  }else if (Age>= 80){
    pt.age = 19
  }else if (Age>= 70){
    pt.age = 17
  }else if (Age>= 60){
    pt.age =  14
  }else if (Age>= 50){
    p.aget = 11
  }else if (Age>= 40){
    pt.age = 8
  }else if (Age>= 30){
    pt.age = 6
  }else if (Age>= 20){
    pt.age = 3
  }else if (Age<= 19){
    pt.age = 0
  }
  
  if (HR>=105){
    pt.hr = 8
  }else if (HR>= 100){
    pt.hr = 6
  }else if (HR>= 95){
    pt.hr = 5
  }else if (HR>= 90){
    pt.hr =  4
  }else if (HR>= 85){
    pt.hr = 3
  }else if (HR>= 80){
    pt.hr = 1
  }else if (HR<= 79){
    pt.hr = 0
  }
  
  if (COPD == 1){
    pt.copd =2
  } else {
    pt.copd = 0
  }
  total = pt.sbp   pt.bun  pt.sodium  pt.age   pt.hr  pt.copd
  return(if (total < 79){
    outcome = 0
  } else {
    outcome = 1
  })
}

I have problem with coding out the for loop to loop through the trainset and apply the function defined for each row. I tried to code the loop like this:

for (i in 1:nrow(trainset)) {
  Systolic.BP[i] <- trainset$Systolic.blood.pressure[i]
  BUN[i] <- trainset$Urea.nitrogen[i]
  Sodium[i]  <- trainset$Blood.sodium[i]
  Age[i]  <- trainset$age[i]
  HR[i]  <- trainset$heart.rate[i]
  COPD[i]  <- trainset$COPD[i]
  total[i] <- Systolic.BP[i]  BUN[i]  Sodium[i]  Age[i]  HR[i] COPD[i]
  
  outcome.gwtg.trainset[i]= m.gwtg(total[i])
  
}

I got quite confused on on the code out the for loop. Thanks for any answers!

CodePudding user response:

This is a more concise way to calculate the outcome:

library(tidyverse)

calc_score <- function(systolic_bp, bun, sodium) {
  systolic_bp_score <- case_when(
    systolic_bp >= 200 ~ 0,
    systolic_bp >= 190 ~ 2,
    systolic_bp >= 180 ~ 4
  )
  bun_score <- case_when(
    bun >= 150 ~ 28,
    bun >= 140 ~ 27
  )
  sodium_score <- case_when(
    sodium >= 139 ~ 0,
    sodium >= 137 ~ 1
  )
  systolic_bp_score   bun_score   sodium_score
}

# example data
trainset <- tibble(
  systolic_bp = c(180, 195),
  bun = c(145, 180),
  sodium = c(138, 140)
)

trainset %>%
  mutate(
    score = list(systolic_bp, bun, sodium) %>% pmap_dbl(calc_score),
    outcome = as.numeric(score > 97)
  )
#> # A tibble: 2 × 5
#>   systolic_bp   bun sodium score outcome
#>         <dbl> <dbl>  <dbl> <dbl>   <dbl>
#> 1         180   145    138    32       0
#> 2         195   180    140    30       0

Created on 2022-03-25 by the reprex package (v2.0.0)

CodePudding user response:

You should avoid loops and similar functions (eg, *apply() and purrr::map()) whenever possible in R. R is designed to work with vectors, and loops are much much slower.

Instead of this loop operation, do the following:

  1. Recode each column into its transformed values
  2. Add up the transformed values
  3. Recode the transformed values into the 0/1 outcome

These operations should be done vector-wise to be efficient (and to avoid the tricky indexing problems you are encountering).

For example:

m.gwtg <- function(data) {
  data <- dplyr::mutate(data, 
    pt.sbp = dplyr::case_when(
      Systolic.BP >= 200 ~ 0,
      Systolic.BP >= 190 ~ 2,
      Systolic.BP >= 180 ~ 4,
      Systolic.BP >= 170 ~ 6,
      Systolic.BP >= 160 ~ 8,
      Systolic.BP >= 150 ~ 9,
      Systolic.BP >= 140 ~ 11,
      Systolic.BP >= 130 ~ 13,
      Systolic.BP >= 120 ~ 15,
      Systolic.BP >= 110 ~ 17,
      Systolic.BP >= 100 ~ 19,
      Systolic.BP >=  90 ~ 21,
      Systolic.BP >=  80 ~ 23,
      Systolic.BP >=  70 ~ 24,
      Systolic.BP >=  60 ~ 26,
      Systolic.BP >=  50 ~ 28, # should Systolic.BP < 50 be NA or 28?
      # else
      TRUE ~ NA_real_
    ),
    pt.bun = dplyr::case_when(
      BUN >= 150 ~ 28,
      BUN >= 140 ~ 27,
      BUN >= 130 ~ 25,
      BUN >= 120 ~ 23,
      BUN >= 110 ~ 21,
      BUN >= 100 ~ 19,
      BUN >=  90 ~ 17,
      BUN >=  80 ~ 15,
      BUN >=  70 ~ 13,
      BUN >=  60 ~ 11,
      BUN >=  50 ~  9,
      BUN >=  40 ~  8,
      BUN >=  30 ~  6,
      BUN >=  20 ~  4,
      BUN >=  10 ~  2,
      BUN  <  10 ~  0,
      # else
      TRUE ~ NA_real_
    ),
    pt.sodium = dplyr::case_when(
      Sodium >= 139 ~ 0,
      Sodium >= 137 ~ 1,
      Sodium >= 134 ~ 2,
      Sodium >= 131 ~ 3,
      Sodium  < 131 ~ 4,
      # else
      TRUE ~ NA_real_
    ),
    pt.age = dplyr::case_when(
      Age >= 110 ~ 28,
      Age >= 100 ~ 25,
      Age >=  90 ~ 22,
      Age >=  80 ~ 19,
      Age <=  70 ~ 17,
      Age >=  60 ~ 14,
      Age >=  50 ~ 11,
      Age <=  40 ~  8,
      Age >=  30 ~  6,
      Age >=  20 ~  3,
      Age  <  20 ~  0,
      # else
      TRUE ~ NA_real_
    ),
    pt.hr = dplyr::case_when(
      HR >= 105 ~ 8,
      HR >= 100 ~ 6,
      HR >=  95 ~ 5,
      HR >=  90 ~ 4,
      HR >=  85 ~ 3,
      HR >=  80 ~ 1,
      HR  <  80 ~ 0,
      # else
      TRUE ~ NA_real_
    ),
    pt.copd = dplyr::case_when(
      COPD == 1 ~ 2,
      COPD == 0 ~ 0,
      # else
      TRUE ~ NA_real_
    ),
    total = pt.sbp   pt.bun   pt.sodium   pt.age   pt.hr   pt.copd,
    outcome = dplyr::if_else(total < 79, 0, 1)
  )
  return(data)
}

example_data <- data.frame(
  Systolic.BP = c(170, 160, 200), 
  BUN = c(60, 150, 10), 
  Sodium = c(134, 131, 139), 
  Age = c(40, 80, 20), 
  HR = c(90, 105, 80), 
  COPD = c(1, 0, 0)
)

m.gwtg(example_data)
#>   Systolic.BP BUN Sodium Age  HR COPD pt.sbp pt.bun pt.sodium pt.age pt.hr
#> 1         170  60    134  40  90    1      6     11         2     17     4
#> 2         160 150    131  80 105    0      8     28         3     19     8
#> 3         200  10    139  20  80    0      0      2         0     17     1
#>   pt.copd total outcome
#> 1       2    42       0
#> 2       0    66       0
#> 3       0    20       0

Created on 2022-03-25 by the reprex package (v2.0.1)

  • Related