Home > Software design >  for loop that lets you pipe in different column names to dplyr pipe
for loop that lets you pipe in different column names to dplyr pipe

Time:11-20

I want to run four linear regressions from four metrics from two sites.

Site DCNSUB is the response variable and DCSSUB is the predictor in the regression. I only want to regress where I have a complete pair of data for an event. I do this for one metric at a time using the following dplyr pipe:

mV<-Tile%>% # model V for Volume
  select(Event, Site, Volume)%>%
  group_by(Event)%>%
  filter(!any(is.na(all_of(Volume))))%>%  # group by event and remove pairs that are missing volume
  ungroup()%>%
  mutate(Volume = log(Volume))%>% # take log
  pivot_wider(names_from = Site,values_from = Volume) %>% # get responsive and predictor variable data into columns
  as.data.frame(.)%>%
  lm(DCNSUB~DCSSUB, data = .)

How can incorporate this into a for loop, where each iteration puts a different metric where 'Volume' is in the pipe? Here is my attempt:

for (i in names(Tile[-c(1,2)])){
  mX<-Tile%>%
    select(Event, Site, i)%>%
    group_by(Event)%>%
    filter(!any(is.na(all_of(i))))%>%  # remove pairs that are missing i, note group by event helps removes the pairs
    ungroup()%>%
    mutate(i = log(i))%>% # take log
    pivot_wider(names_from = Site,values_from = i) %>%# get responsive and predictor variable data into columns
    as.data.frame(.)%>%
    lm(DCNSUB~DCSSUB, data = .)
}

There have been other posts that use column indexing to call columns, but this doesn't work when trying to mix it with the column I want to remain constant in each loop. Also, those solution are for much less complicated pipes. Any help is appreciated, thanks.

data:

Tile<-structure(list(Event = c("10/17/2019", "10/17/2019", "10/23/2019", 
"10/23/2019", "10/27/2019", "10/27/2019", "10/31/2019", "10/31/2019", 
"11/24/2019", "11/24/2019", "11/28/2019", "11/28/2019", "12/10/2019", 
"12/10/2019", "12/15/2019", "12/15/2019", "12/28/2019", "12/28/2019", 
"12/30/2019", "12/30/2019", "1/3/2020", "1/3/2020", "1/12/2020", 
"1/12/2020", "1/26/2020", "1/26/2020", "3/3/2020", "3/3/2020", 
"3/8/2020", "3/8/2020", "3/13/2020", "3/13/2020", "5/12/2020", 
"5/12/2020", "8/5/2020", "8/5/2020", "9/30/2020", "9/30/2020", 
"12/1/2020", "12/1/2020", "12/25/2020", "12/25/2020", "1/17/2021", 
"1/17/2021", "3/11/2021", "3/11/2021", "4/16/2021", "4/16/2021", 
"4/22/2021", "4/22/2021", "4/30/2021", "4/30/2021", "5/6/2021", 
"5/6/2021", "7/2/2021", "7/2/2021", "7/3/2021", "7/3/2021", "7/9/2021", 
"7/9/2021", "7/13/2021", "7/13/2021", "7/14/2021", "7/14/2021", 
"7/18/2021", "7/18/2021", "7/19/2021", "7/19/2021", "7/21/2021", 
"7/21/2021", "7/31/2021", "7/31/2021", "8/2/2021", "8/2/2021", 
"8/20/2021", "8/20/2021", "9/9/2021", "9/9/2021", "9/24/2021", 
"9/24/2021", "10/17/2021", "10/17/2021", "10/22/2021", "10/22/2021", 
"10/25/2021", "10/25/2021", "10/27/2021", "10/27/2021", "11/1/2021", 
"11/1/2021"), Site = structure(c(3L, 1L, 3L, 1L, 3L, 1L, 3L, 
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 
1L, 3L, 1L), .Label = c("DCNSUB", "DCNSUR", "DCSSUB", "DCSSUR"
), class = "factor"), TP.conc = c(1550, 2770, NA, NA, 1650, NA, 
1810, NA, 666, 468, 1190, 1120, 574, 538, 487, 580, NA, 238, 
610, 378, 398, 306, 744, 766, 447, 468, 504, 413, 384, 377, 714, 
542, 927.2, 1000.1, 265.4, 285.1, 1527, 1764.5, NA, 460.9, NA, 
469.8, NA, 172.8, 454, 432.8, 524.4, 476.4, 300, 303.6, 588.7, 
598.1, 852.5, 797.6, 144.4, 122.1, 170.6, 110.1, 301.8, 328.8, 
363.3, 498.5, 283.7, 104.9, 314.1, 327.6, 436.6, 262.1, 398.1, 
358, 312, 251, 598, 831, 348, 456, 345, 240, 648, 949, 852, 1260, 
643, 549, 712, 999, 982, 1100, 1240, 1555), TP.load = c(180.4, 
NA, NA, NA, 67.6, NA, 201.5, NA, NA, NA, 53.3, 131.7, 12.1, 38.1, 
7, 21.6, NA, NA, 21.2, 44.4, 6.1, 10.9, 79.7, 189.9, 10.5, 27.9, 
84.2, 178.7, 13.6, 46.3, 14.4, 26.2, 11.4, 35, 4.2, 10.1, 4.6, 
18.9, NA, 58.3, NA, 140.9, NA, 7.6, 181.8, 238.2, 72.5, 97.7, 
18.5, 30.6, 121.4, 177.7, 114.1, 166.3, 22.8, 21.9, 15.1, 9.2, 
25.8, 29.4, 7.6, 12.3, 3, 1.7, 58.5, 71, 23.3, 15.7, 32.7, 39.7, 
3.1, 3.4, 67.2, 126, 49.1, 79.1, 8.6, 5.8, 8.7, 15.3, 38.62755, 
62.40857143, NA, NA, NA, NA, NA, NA, NA, NA), SRP.conc = c(NA, 
NA, NA, NA, 403, NA, NA, NA, NA, NA, NA, NA, 245, 234, 238, 197, 
NA, 118, NA, NA, NA, NA, NA, 270, 121, 135, NA, NA, NA, NA, NA, 
NA, 596.7, 635.6, 48, 85.9, 514.8, 572.7, NA, 161.5, NA, 163.3, 
NA, 46.4, 96.9, 127, 83.1, 92.3, 53.5, 60.7, 111.7, 133.7, 132.2, 
164.1, 50.1, 49.1, 54, 42.5, 122.5, 131.9, 104.2, 194.5, 84.6, 
34.8, 90.2, 106.6, NA, NA, 129.9, 118.2, 62.2, 84.7, 105, 152, 
92.6, 66, 45.9, 50.5, 66.2, 167, 264, 412, 203, 175, 352, 560, 
503, 625, 621, 836), SRP.load = c(NA, NA, NA, NA, 16.5, NA, NA, 
NA, NA, NA, NA, NA, 5.2, 16.6, 3.4, 7.3, NA, NA, NA, NA, NA, 
NA, NA, 66.9, 2.8, 8, NA, NA, NA, NA, NA, NA, 7.3, 22.2, 0.8, 
3.1, 1.6, 6.1, NA, 20.4, NA, 49, NA, 2, 38.8, 69.9, 11.5, 18.9, 
3.3, 6.1, 23, 39.7, 17.7, 34.2, 7.9, 8.8, 4.8, 3.6, 10.5, 11.8, 
2.2, 4.8, 0.9, 0.6, 16.8, 23.1, NA, NA, 10.7, 13.1, 0.6, 1.2, 
11.8, 23, 13.1, 11.4, 1.1, 1.2, 0.9, 2.7, 12, 20.4, NA, NA, NA, 
NA, NA, NA, NA, NA), Volume = c(11.64, NA, 1.87, 4.5, 4.1, 9.69, 
11.13, 34, NA, NA, 4.48, 11.76, 2.1, 7.08, 1.45, 3.73, NA, NA, 
3.47, 11.74, 1.52, 3.56, 10.71, 24.79, 2.34, 5.96, 16.71, 43.28, 
3.54, 12.29, 2.02, 4.84, 1.22, 3.5, 1.59, 3.56, 0.3, 1.07, NA, 
12.66, NA, 29.99, NA, 4.37, 40.04, 55.03, 13.82, 20.51, 6.18, 
10.07, 20.62, 29.72, 13.38, 20.85, 15.76, 17.96, 8.82, 8.36, 
8.56, 8.94, 2.1, 2.46, 1.07, 1.58, 18.64, 21.67, 5.33, 6, 8.22, 
11.09, 0.99, 1.36, 11.23, 15.16, 14.11, 17.34, 2.48, 2.4, 1.34, 
1.61, 4.53, 4.95, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(1L, 
4L, 5L, 8L, 9L, 12L, 13L, 16L, 17L, 20L, 21L, 24L, 25L, 28L, 
29L, 32L, 33L, 36L, 37L, 40L, 41L, 44L, 45L, 48L, 49L, 52L, 53L, 
56L, 57L, 60L, 61L, 64L, 65L, 68L, 69L, 72L, 73L, 76L, 77L, 80L, 
81L, 84L, 85L, 88L, 89L, 92L, 93L, 96L, 97L, 100L, 101L, 104L, 
105L, 108L, 109L, 112L, 113L, 116L, 117L, 120L, 121L, 124L, 125L, 
128L, 129L, 132L, 133L, 136L, 137L, 140L, 141L, 144L, 145L, 148L, 
149L, 152L, 153L, 156L, 157L, 160L, 161L, 164L, 165L, 168L, 169L, 
172L, 173L, 176L, 177L, 180L), class = "data.frame")

CodePudding user response:

We may use map/lapply to loop as with for loop it needs a list to store the output which can be created of course, however, the output from map/lapply is itself a list

library(purrr)
library(dplyr)
library(tidyr)
# // loop over the names
out <- map(names(Tile)[-(1:2)], ~ Tile %>%
     # // select the columns of interest along with looped column names
     select(Event, Site, all_of(.x))%>%
     # // grouped by Event and remove groups based on the NA in the looped column
     group_by(Event)%>%
    filter(!any(is.na(.data[[.x]])))%>%
    ungroup()%>%
    # // convert the column looped to its `log`
    mutate(!! .x := log(.data[[.x]]))%>% 
    # // reshape from long to wide
    pivot_wider(names_from = Site,values_from = all_of(.x)) %>%   
    # // build the linear model 
    lm(DCNSUB~DCSSUB, data = .)
)

-output

> out
[[1]]

Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)

Coefficients:
(Intercept)       DCSSUB  
     -1.475        1.231  


[[2]]

Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)

Coefficients:
(Intercept)       DCSSUB  
     0.5418       0.9812  


[[3]]

Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)

Coefficients:
(Intercept)       DCSSUB  
    0.09282      1.00866  


[[4]]

Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)

Coefficients:
(Intercept)       DCSSUB  
     0.7099       0.9064  


[[5]]

Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)

Coefficients:
(Intercept)       DCSSUB  
     0.8000       0.8535  

From the list output, either use tidy (from broom) to convert to a tibble output or extract the components separately by looping (It can be done in the same map looped earlier though)

 map_dfr(out, ~ {
     v1 <- summary(.x)
      tibble(pval = v1$coefficients[,4][2], MSE = v1$sigma^2)})
# A tibble: 5 × 2
      pval    MSE
     <dbl>  <dbl>
1 7.23e-17 0.0773
2 5.81e-13 0.267 
3 3.49e-12 0.120 
4 3.77e-10 0.238 
5 2.10e-15 0.156 
  • Related