Hi I would like to loop over months and for each subset apply a function,
1 - (1 - se * p2)^df$n
Do you have any alternative to a for loop? Or would you suggest a better way to code it with a loop? This is just a fake example as the real database is quite huge and it is annoying to loop over 12 months The column month is
Thanks in advance
rm(list = ls())
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#> Error: RStudio not running
getwd()
#> [1] "C:/Users/Angela/AppData/Local/Temp/Rtmp21Zbwk/reprex-19b06a781308-waspy-bunny"
#load required packages
library(mc2d)
#> Loading required package: mvtnorm
#>
#> Attaching package: 'mc2d'
#> The following objects are masked from 'package:base':
#>
#> pmax, pmin
library(gplots)
#>
#> Attaching package: 'gplots'
#> The following object is masked from 'package:stats':
#>
#> lowess
library(RColorBrewer)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(reprex)
library(tidyverse)
set.seed(99)
iters<-1000
df<-data.frame(id=c(1:30),month=c(1:6,1,6,4,1,5,2,3,2,5,4,6,3:6,4:6,1:5,5),n=rpois(30,5))
df$n[df$n == "0"] <- 3
se<-rbeta(iters,96,6)
epi.a<-rpert(iters,min=1.5, mode=2, max=3)
p=0.2
p2=epi.a*p
##my try the idea is to loop over the months
results<-data.frame(m1=numeric(iters))
results<-cbind(results,rep(results[1],5))
colnames(results)<-paste("m", sep = "_", 1:6)
for (j in 1:6) {
for (i in 1:iters) {
if (df$month[i]== "1")results$m_1[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
else if (df$month[i]== "2")results$m_2[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
else if(df$month[i]== "3")results$m_3[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
else if(df$month[i]== "4")results$m_4[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
else if(df$month[i]== "5")results$m_5[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
else if(df$month[i]== "6")results$m_6[i]<- 1 - (1 - se[i] * p2[i])^df$n[i]
}
}
#> Error in if (df$month[i] == "1") results$m_1[i] <- 1 - (1 - se[i] * p2[i])^df$n[i] else if (df$month[i] == : missing value where TRUE/FALSE needed
Created on 2022-05-04 by the reprex package (v2.0.1)
CodePudding user response:
Using a for loop in R is very slow, because the interpreter can not use vectorization here. However, you can create a table of all possible combinations and calculate the result in a new column:
library(tidyverse)
expand_grid(
j = seq(6),
i = seq(10)
) %>%
mutate(
res = map2_dbl(j, i, ~ .x .y**2)
)
#> # A tibble: 60 × 3
#> j i res
#> <int> <int> <dbl>
#> 1 1 1 2
#> 2 1 2 5
#> 3 1 3 10
#> 4 1 4 17
#> 5 1 5 26
#> 6 1 6 37
#> 7 1 7 50
#> 8 1 8 65
#> 9 1 9 82
#> 10 1 10 101
#> # … with 50 more rows
Created on 2022-05-04 by the reprex package (v2.0.0)
CodePudding user response:
Here is a way that avoids for
loops. The calculations of p2
and m
are vectorized and the results
are output in a tidyverse
pipe that reshapes those values, m
, to wide format.
suppressPackageStartupMessages({
library(mc2d)
library(tidyverse)
})
set.seed(99)
df <- data.frame(
id = 1:30,
month = c(1:6, 1, 6, 4, 1, 5, 2, 3, 2, 5, 4, 6, 3:6, 4:6, 1:5, 5),
n = rpois(30, 5)
)
iters <- nrow(df)
df$n[df$n == "0"] <- 3
se <- rbeta(iters, 96, 6)
epi.a <- rpert(iters, min = 1.5, mode = 2, max = 3)
p <- 0.2
p2 <- epi.a*p
m <- 1 - (1 - se * p2)^df$n
results <- data.frame(month = df$month, m)
results %>%
arrange(month) %>%
group_by(month) %>%
mutate(n = row_number(), .groups = "drop") %>%
pivot_wider(
id_cols = n,
names_from = month,
names_glue = "m_{.name}",
values_from = m
) %>%
select(-n)
#> # A tibble: 7 × 6
#> m_1 m_2 m_3 m_4 m_5 m_6
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.970 0.623 0.905 0.998 0.929 0.980
#> 2 0.912 0.892 0.736 0.830 0.890 0.862
#> 3 0.795 0.932 0.553 0.958 0.931 0.798
#> 4 0.950 0.892 0.732 0.649 0.777 0.743
#> 5 NA NA NA 0.657 0.980 0.945
#> 6 NA NA NA 0.976 0.836 NA
#> 7 NA NA NA NA 0.740 NA
Created on 2022-05-04 by the reprex package (v2.0.1)