Loop over months and apply a function-CodePudding

Hi I would like to loop over months and for each subset apply a function,

1 - (1 - se * p2)^df$n

Do you have any alternative to a for loop? Or would you suggest a better way to code it with a loop? This is just a fake example as the real database is quite huge and it is annoying to loop over 12 months The column month is

Thanks in advance

rm(list = ls())

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#> Error: RStudio not running
getwd()
#> [1] "C:/Users/Angela/AppData/Local/Temp/Rtmp21Zbwk/reprex-19b06a781308-waspy-bunny"

#load required packages 
library(mc2d)
#> Loading required package: mvtnorm
#> 
#> Attaching package: 'mc2d'
#> The following objects are masked from 'package:base':
#> 
#>     pmax, pmin
library(gplots)
#> 
#> Attaching package: 'gplots'
#> The following object is masked from 'package:stats':
#> 
#>     lowess
library(RColorBrewer)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(reprex)
library(tidyverse)
set.seed(99)
iters<-1000

df<-data.frame(id=c(1:30),month=c(1:6,1,6,4,1,5,2,3,2,5,4,6,3:6,4:6,1:5,5),n=rpois(30,5))

df$n[df$n == "0"] <- 3
se<-rbeta(iters,96,6)
epi.a<-rpert(iters,min=1.5, mode=2, max=3)
p=0.2
p2=epi.a*p

##my try the idea is to loop over the months 

results<-data.frame(m1=numeric(iters))
results<-cbind(results,rep(results[1],5))
colnames(results)<-paste("m", sep = "_", 1:6)

for (j in 1:6) {
  for (i in 1:iters) {
    if (df$month[i]== "1")results$m_1[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if (df$month[i]== "2")results$m_2[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "3")results$m_3[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "4")results$m_4[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "5")results$m_5[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
    else if(df$month[i]== "6")results$m_6[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
  }
  
}
#> Error in if (df$month[i] == "1") results$m_1[i] <- 1 - (1 - se[i] * p2[i])^df$n[i] else if (df$month[i] == : missing value where TRUE/FALSE needed

^{Created on 2022-05-04 by the reprex package (v2.0.1)}

CodePudding user response：

Using a for loop in R is very slow, because the interpreter can not use vectorization here. However, you can create a table of all possible combinations and calculate the result in a new column:

library(tidyverse)

expand_grid(
  j = seq(6),
  i = seq(10)
) %>%
  mutate(
    res = map2_dbl(j, i, ~ .x   .y**2)
  )
#> # A tibble: 60 × 3
#>        j     i   res
#>    <int> <int> <dbl>
#>  1     1     1     2
#>  2     1     2     5
#>  3     1     3    10
#>  4     1     4    17
#>  5     1     5    26
#>  6     1     6    37
#>  7     1     7    50
#>  8     1     8    65
#>  9     1     9    82
#> 10     1    10   101
#> # … with 50 more rows

^{Created on 2022-05-04 by the reprex package (v2.0.0)}

CodePudding user response：

Here is a way that avoids for loops. The calculations of p2 and m are vectorized and the results are output in a tidyverse pipe that reshapes those values, m, to wide format.

suppressPackageStartupMessages({
  library(mc2d)
  library(tidyverse)
})
set.seed(99)

df <- data.frame(
  id = 1:30,
  month = c(1:6, 1, 6, 4, 1, 5, 2, 3, 2, 5, 4, 6, 3:6, 4:6, 1:5, 5),
  n = rpois(30, 5)
)
iters <- nrow(df)

df$n[df$n == "0"] <- 3
se <- rbeta(iters, 96, 6)
epi.a <- rpert(iters, min = 1.5, mode = 2, max = 3)
p <- 0.2
p2 <- epi.a*p

m <- 1 - (1 - se * p2)^df$n
results <- data.frame(month = df$month, m)

results %>%
  arrange(month) %>%
  group_by(month) %>%
  mutate(n = row_number(), .groups = "drop") %>%
  pivot_wider(
    id_cols = n,
    names_from = month,
    names_glue = "m_{.name}",
    values_from = m
  ) %>%
  select(-n)
#> # A tibble: 7 × 6
#>      m_1    m_2    m_3    m_4   m_5    m_6
#>    <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>
#> 1  0.970  0.623  0.905  0.998 0.929  0.980
#> 2  0.912  0.892  0.736  0.830 0.890  0.862
#> 3  0.795  0.932  0.553  0.958 0.931  0.798
#> 4  0.950  0.892  0.732  0.649 0.777  0.743
#> 5 NA     NA     NA      0.657 0.980  0.945
#> 6 NA     NA     NA      0.976 0.836 NA    
#> 7 NA     NA     NA     NA     0.740 NA

^{Created on 2022-05-04 by the reprex package (v2.0.1)}