Home > database >  Referring to a dynamic name inside a for loop
Referring to a dynamic name inside a for loop

Time:11-25

I'm having trouble with referring to a dynamic name inside a for loop. I have the following dataframe:

library("tidyverse")
set.seed(10)
df <- data.frame(group = rep(LETTERS[1:3], each = 100), 
                 measure1 = runif(300, min = 20, max = 30), 
                 measure2 = runif(300, min = 10, max = 20),
                 risk = rbinom(n=300, size=1, prob=0.05))
df[c(20,21,103),2] <- NA
df[c(44,80,201),3] <- NA
df[c(61,98,207),4] <- NA

in which i calculate limits:

df %>% group_by(group)%>%
  mutate(LLm1 = quantile(measure1[risk == 0], prob = c(0.05), na.rm = TRUE), 
         ULm2 = quantile(measure2[risk == 0], prob = c(0.95), na.rm = TRUE))%>% ungroup -> df

Now I would like to calculate which rows are outside a certain interval, and i would like to vary this interval according to some predefined additions or subtractions

for (k in seq(-2, 2, length.out = 5)){
  for (i in seq(0.7, 1.0, length.out = 4)){
    df %>%
      group_by(group) %>%
      mutate(
        !!paste0("new", format(i, nsmall=1), "var", format(k, nsmall=0)) := ifelse(measure1 < (LLm1 - k) & measure2 >= (ULm2 - i), 1, ifelse(is.na(measure1) | is.na(measure2), NA, 0)),
      ) %>% ungroup -> df
  }
}

So far this works fine, i'd like to however make another variable based on the dynamic variables im making, like this:

for (k in seq(-2, 2, length.out = 5)){
  for (i in seq(0.7, 1.0, length.out = 4)){
    df %>%
      group_by(group) %>%
      mutate(
        !!paste0("var", format(i, nsmall=1), "_", format(k, nsmall=0)) := ifelse(measure1 < (LLm1 - k) & measure2 >= (ULm2 - i), 1, ifelse(is.na(measure1) | is.na(measure2), NA, 0)),
        !!paste0("new", format(i, nsmall=1), "_", format(k, nsmall=0)) := ifelse((!!paste0("var", format(i, nsmall=1), "_", format(k, nsmall=0))) == 1 & risk == 1, 1, ifelse(is.na(measure1) | is.na(measure2), NA, 0)),
        ) %>% ungroup -> df
  }
}

Unfortunately im not getting the desired results, as a row who has 1 in the new variable 'var' and a 1 in 'risk' does not get a 1 but it gets a 0. I've tried some alternatives with brackets and eval() but the result stays the same. Can anyone show me were I'm wrong in the syntax or help me explain how to refer to a dynamic name inside the for loop?

CodePudding user response:

You have to wrap the string with the variable name after the double-bang operator !! in sym() to make sure it is treated as a name.

Further, as I pointed out in my comment, the condition risk == 1 in ifelse is never met, so it seems like its not working, so for the example at hand, I dropped that condition.

for (k in seq(-2, 2, length.out = 5)){
  for (i in seq(0.7, 1.0, length.out = 4)){
    df %>%
      group_by(group) %>%
      mutate(
        !!paste0("var", format(i, nsmall=1), "_", format(k, nsmall=0)) := ifelse(measure1 < (LLm1 - k) & measure2 >= (ULm2 - i), 1, ifelse(is.na(measure1) | is.na(measure2), NA, 0)),
        !!paste0("new", format(i, nsmall=1), "_", format(k, nsmall=0)) := ifelse((!! sym(paste0("var", format(i, nsmall=1), "_", format(k, nsmall=0)))) == 1, 1, ifelse(is.na(measure1) | is.na(measure2), NA, 0)),
      ) %>% ungroup -> df
  }
}

df %>% filter(if_any(starts_with("new"), ~ .x != 0))
#> # A tibble: 23 x 46
#>    group measure1 measure2  risk  LLm1  ULm2 `var0.7_-2` `new0.7_-2` `var0.8_-2`
#>    <chr>    <dbl>    <dbl> <int> <dbl> <dbl>       <dbl>       <dbl>       <dbl>
#>  1 A         22.6     19.7     0  20.6  19.5           1           1           1
#>  2 A         21.0     19.3     0  20.6  19.5           1           1           1
#>  3 A         21.8     19.8     0  20.6  19.5           1           1           1
#>  4 A         21.9     19.0     0  20.6  19.5           1           1           1
#>  5 A         22.2     18.9     1  20.6  19.5           1           1           1
#>  6 A         21.7     19.5     0  20.6  19.5           1           1           1
#>  7 B         22.1     19.4     0  20.6  19.4           1           1           1
#>  8 B         22.1     18.6     0  20.6  19.4           0           0           0
#>  9 B         20.6     19.6     0  20.6  19.4           1           1           1
#> 10 B         22.5     18.7     0  20.6  19.4           0           0           1
#> # ... with 13 more rows, and 37 more variables: new0.8_-2 <dbl>,
#> #   var0.9_-2 <dbl>, new0.9_-2 <dbl>, var1.0_-2 <dbl>, new1.0_-2 <dbl>,
#> #   var0.7_-1 <dbl>, new0.7_-1 <dbl>, var0.8_-1 <dbl>, new0.8_-1 <dbl>,
#> #   var0.9_-1 <dbl>, new0.9_-1 <dbl>, var1.0_-1 <dbl>, new1.0_-1 <dbl>,
#> #   var0.7_0 <dbl>, new0.7_0 <dbl>, var0.8_0 <dbl>, new0.8_0 <dbl>,
#> #   var0.9_0 <dbl>, new0.9_0 <dbl>, var1.0_0 <dbl>, new1.0_0 <dbl>,
#> #   var0.7_1 <dbl>, new0.7_1 <dbl>, var0.8_1 <dbl>, new0.8_1 <dbl>, ...

Another way to approach the problem is to use the dplyover package (disclaimer: I'm the maintainer), and here the funciton dplyover::over2x() which generates columns in a nested loop style based on the input objects.

After dplyover::over2x() we can just use a regular call to across() and target all variables that start_with("var").

library(dplyover)

df %>%
  group_by(group) %>%
  mutate(
    over2x(seq(-2, 2, length.out = 5),
           seq(0.7, 1.0, length.out = 4),
           ~ ifelse(measure1 < (LLm1 - .x) & measure2 >= (ULm2 - .y), 1, ifelse(is.na(measure1) | is.na(measure2), NA, 0)),
           .names = "var{y}_{x}"
    ),
    across(starts_with("var"),
           ~ ifelse(.x == 1, 1,
                    ifelse(is.na(measure1) | is.na(measure2),
                           NA, 0)),
           .names = "{gsub('var', 'new', {.col})}")
  ) %>% 
  ungroup()

#> # A tibble: 300 x 46
#>    group measure1 measure2  risk  LLm1  ULm2 `var0.7_-2` `var0.8_-2` `var0.9_-2`
#>    <chr>    <dbl>    <dbl> <int> <dbl> <dbl>       <dbl>       <dbl>       <dbl>
#>  1 A         27.2     12.6     0  20.8  19.3           0           0           0
#>  2 A         29.0     14.1     0  20.8  19.3           0           0           0
#>  3 A         29.3     14.6     0  20.8  19.3           0           0           0
#>  4 A         25.4     14.4     0  20.8  19.3           0           0           0
#>  5 A         25.2     12.3     0  20.8  19.3           0           0           0
#>  6 A         25.6     10.0     0  20.8  19.3           0           0           0
#>  7 A         28.2     10.3     0  20.8  19.3           0           0           0
#>  8 A         27.4     19.7     0  20.8  19.3           0           0           0
#>  9 A         24.8     19.3     0  20.8  19.3           0           0           0
#> 10 A         22.7     18.0     0  20.8  19.3           0           0           0
#> # ... with 290 more rows, and 37 more variables: var1_-2 <dbl>,
#> #   var0.7_-1 <dbl>, var0.8_-1 <dbl>, var0.9_-1 <dbl>, var1_-1 <dbl>,
#> #   var0.7_0 <dbl>, var0.8_0 <dbl>, var0.9_0 <dbl>, var1_0 <dbl>,
#> #   var0.7_1 <dbl>, var0.8_1 <dbl>, var0.9_1 <dbl>, var1_1 <dbl>,
#> #   var0.7_2 <dbl>, var0.8_2 <dbl>, var0.9_2 <dbl>, var1_2 <dbl>,
#> #   new0.7_-2 <dbl>, new0.8_-2 <dbl>, new0.9_-2 <dbl>, new1_-2 <dbl>,
#> #   new0.7_-1 <dbl>, new0.8_-1 <dbl>, new0.9_-1 <dbl>, new1_-1 <dbl>, ...

The data

df <- data.frame(group = rep(LETTERS[1:3], each = 100), 
                 measure1 = runif(300, min = 20, max = 30), 
                 measure2 = runif(300, min = 10, max = 20),
                 risk = rbinom(n=300, size=1, prob=0.05))
df[c(20,21,103),2] <- NA
df[c(44,80,201),3] <- NA
df[c(61,98,207),4] <- NA

library(dplyr)

df %>% group_by(group)%>%
  mutate(LLm1 = quantile(measure1[risk == 0], prob = c(0.05), na.rm = TRUE), 
         ULm2 = quantile(measure2[risk == 0], prob = c(0.95), na.rm = TRUE))%>% ungroup -> df

Created on 2022-11-25 by the reprex package (v2.0.1)

  • Related