Home > front end >  loop to run through multiple datasets and save each output in R
loop to run through multiple datasets and save each output in R

Time:04-06

We are trying to run a loop through eight different datasets and then save the output in a standardized format afterwards.

Dataframes are called df1, df2, df3, etc.

I can't share the data (here is a sample of the data), but every dataset is a subset of df1 - so it does have the same columns throughout.

df1 would look like:

age   wt   sex
10    200  F
15    250  F
20    300  F
12    200  M
13    250  M
25    300  M

And the subsetted dfs would be, for example, df2<-df1%>%filter(sex=="F"), df3<-df1%>%filter(sex=="M"), and so on for different conditions.

Here is a small example of the code we want to run for each dataframe.

nls.mon <- nls(wt~A*(1-exp(k*(t0-age))), 
    data=df1,
    start = list(A=253.6,k=.03348,t0=32.02158))

aad_mon_est <- data.frame(tidy(nls.mon)) 
mon_A_est  <- as.numeric(aad_mon_est[1, "estimate"])
mon_k_est  <- as.numeric(aad_mon_est[2, "estimate"])
mon_t0_est <- as.numeric(aad_mon_est[3, "estimate"])

nls.von <- nls(wt ~A*(1-(1/3)*exp(k*(t0-age)))^3, 
    data=df1,
    start=list(A=253.6,k=.03348,t0=32.02158))

aad_von_est <- data.frame(tidy(nls.von))
von_A_est  <- as.numeric(aad_von_est[1, "estimate"])
von_k_est  <- as.numeric(aad_von_est[2, "estimate"])
von_t0_est <- as.numeric(aad_von_est[3, "estimate"])

If there a way to tell the loop to run through each dataframe (df1, df2, df3, etc.) and then save aad_arc_B_est, aad_arc_k_est, and aad_arc_mx_est afterwards?

We are hoping to have an output that would look like:

dataframe  model     A_est   k_est  t0_est
df1        nls.mon   250     10     0.14   
df1        nls.von   350     12     0.13   
df2        nls.mon   150     11     0.15   
df2        nls.von   240     14     0.16
df3        nls.mon   220     11     0.11   
df3        nls.von   450     15     0.10                 

We are thinking of using an index - something like for (i in dataframe) to have it run through each dataframe, and
dataframe[i,] <- row_i to append each row after?

But, maybe there is a better way?

CodePudding user response:

Have you considered changing your code to a function? You can store all data.frames in a named list, then apply the function on each data.frame in the list and then collect the results.

library(tidyverse)
library(broom)

# changing your code to a function
my_function <- function(.df){
  
  nls.mon <- nls(wt~A*(1-exp(k*(t0-age))), 
                 data=.df,
                 start = list(A=253.6,k=.03348,t0=32.02158))
  
  nls.von <- nls(wt ~A*(1-(1/3)*exp(k*(t0-age)))^3, 
                 data=.df,
                 start= list(A=253.6,k=.03348,t0=32.02158))
  
  # I slightly edited this part of your code
  df <- bind_rows(
    tidy(nls.mon) %>% mutate(model = "nls.mon"),
    tidy(nls.von) %>% mutate(model = "nls.von")
  ) %>%
    select(model, term, estimate) %>%
    pivot_wider(names_from = "term", values_from = "estimate")

  return(df)
}

# reading in data, the path to the data needs to be changed
df1 <- read_csv(r"{C:\Users\novot\Downloads\sample.csv}") %>%
  select(-1)

df2 <- df1 %>%
  filter(sex == "M")

# using map to apply the created function to each member of the list
df_out <- list("df1" = df1, "df2" = df2) %>%
  map(
    ~my_function(.x)
  ) %>%
  bind_rows(.id = "dataframe")

df_out
#> # A tibble: 4 x 5
#>   dataframe model       A      k    t0
#>   <chr>     <chr>   <dbl>  <dbl> <dbl>
#> 1 df1       nls.mon  248. 0.0135  2.09
#> 2 df1       nls.von  246. 0.0222 32.9 
#> 3 df2       nls.mon  248. 0.0135  2.09
#> 4 df2       nls.von  246. 0.0222 32.9

CodePudding user response:

You can wrap your code in a function, and apply the function to a list of your dataframes

f <- function(n,dfs) {
  nls.aad_arc <- nls(wt~ B*atan(k*(age - mx))   my, data=dfs[[n]],start = list(B=120, k=.02, mx=30, my= 82.06)) 
  aad_arc_est <- data.frame(tidy(nls.aad_arc)) 
  data.frame(
    dataframe = n,
    aad_arc_B_est  <- as.numeric(aad_arc_est[1, "estimate"]), 
    aad_arc_k_est  <- as.numeric(aad_arc_est[2, "estimate"]),
    aad_arc_mx_est <- as.numeric(aad_arc_est[3, "estimate"])  
  )
}

dfs = list('df1'=df1, 'df2'=df2,'df3' = df3)
do.call(rbind, lapply(names(dfs), function(x) f(x,dfs)))
  • Related