I want to run four linear regressions from four metrics from two sites.
Site DCNSUB is the response variable and DCSSUB is the predictor in the regression. I only want to regress where I have a complete pair of data for an event. I do this for one metric at a time using the following dplyr pipe:
mV<-Tile%>% # model V for Volume
select(Event, Site, Volume)%>%
group_by(Event)%>%
filter(!any(is.na(all_of(Volume))))%>% # group by event and remove pairs that are missing volume
ungroup()%>%
mutate(Volume = log(Volume))%>% # take log
pivot_wider(names_from = Site,values_from = Volume) %>% # get responsive and predictor variable data into columns
as.data.frame(.)%>%
lm(DCNSUB~DCSSUB, data = .)
How can incorporate this into a for loop, where each iteration puts a different metric where 'Volume' is in the pipe? Here is my attempt:
for (i in names(Tile[-c(1,2)])){
mX<-Tile%>%
select(Event, Site, i)%>%
group_by(Event)%>%
filter(!any(is.na(all_of(i))))%>% # remove pairs that are missing i, note group by event helps removes the pairs
ungroup()%>%
mutate(i = log(i))%>% # take log
pivot_wider(names_from = Site,values_from = i) %>%# get responsive and predictor variable data into columns
as.data.frame(.)%>%
lm(DCNSUB~DCSSUB, data = .)
}
There have been other posts that use column indexing to call columns, but this doesn't work when trying to mix it with the column I want to remain constant in each loop. Also, those solution are for much less complicated pipes. Any help is appreciated, thanks.
data:
Tile<-structure(list(Event = c("10/17/2019", "10/17/2019", "10/23/2019",
"10/23/2019", "10/27/2019", "10/27/2019", "10/31/2019", "10/31/2019",
"11/24/2019", "11/24/2019", "11/28/2019", "11/28/2019", "12/10/2019",
"12/10/2019", "12/15/2019", "12/15/2019", "12/28/2019", "12/28/2019",
"12/30/2019", "12/30/2019", "1/3/2020", "1/3/2020", "1/12/2020",
"1/12/2020", "1/26/2020", "1/26/2020", "3/3/2020", "3/3/2020",
"3/8/2020", "3/8/2020", "3/13/2020", "3/13/2020", "5/12/2020",
"5/12/2020", "8/5/2020", "8/5/2020", "9/30/2020", "9/30/2020",
"12/1/2020", "12/1/2020", "12/25/2020", "12/25/2020", "1/17/2021",
"1/17/2021", "3/11/2021", "3/11/2021", "4/16/2021", "4/16/2021",
"4/22/2021", "4/22/2021", "4/30/2021", "4/30/2021", "5/6/2021",
"5/6/2021", "7/2/2021", "7/2/2021", "7/3/2021", "7/3/2021", "7/9/2021",
"7/9/2021", "7/13/2021", "7/13/2021", "7/14/2021", "7/14/2021",
"7/18/2021", "7/18/2021", "7/19/2021", "7/19/2021", "7/21/2021",
"7/21/2021", "7/31/2021", "7/31/2021", "8/2/2021", "8/2/2021",
"8/20/2021", "8/20/2021", "9/9/2021", "9/9/2021", "9/24/2021",
"9/24/2021", "10/17/2021", "10/17/2021", "10/22/2021", "10/22/2021",
"10/25/2021", "10/25/2021", "10/27/2021", "10/27/2021", "11/1/2021",
"11/1/2021"), Site = structure(c(3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L,
1L, 3L, 1L), .Label = c("DCNSUB", "DCNSUR", "DCSSUB", "DCSSUR"
), class = "factor"), TP.conc = c(1550, 2770, NA, NA, 1650, NA,
1810, NA, 666, 468, 1190, 1120, 574, 538, 487, 580, NA, 238,
610, 378, 398, 306, 744, 766, 447, 468, 504, 413, 384, 377, 714,
542, 927.2, 1000.1, 265.4, 285.1, 1527, 1764.5, NA, 460.9, NA,
469.8, NA, 172.8, 454, 432.8, 524.4, 476.4, 300, 303.6, 588.7,
598.1, 852.5, 797.6, 144.4, 122.1, 170.6, 110.1, 301.8, 328.8,
363.3, 498.5, 283.7, 104.9, 314.1, 327.6, 436.6, 262.1, 398.1,
358, 312, 251, 598, 831, 348, 456, 345, 240, 648, 949, 852, 1260,
643, 549, 712, 999, 982, 1100, 1240, 1555), TP.load = c(180.4,
NA, NA, NA, 67.6, NA, 201.5, NA, NA, NA, 53.3, 131.7, 12.1, 38.1,
7, 21.6, NA, NA, 21.2, 44.4, 6.1, 10.9, 79.7, 189.9, 10.5, 27.9,
84.2, 178.7, 13.6, 46.3, 14.4, 26.2, 11.4, 35, 4.2, 10.1, 4.6,
18.9, NA, 58.3, NA, 140.9, NA, 7.6, 181.8, 238.2, 72.5, 97.7,
18.5, 30.6, 121.4, 177.7, 114.1, 166.3, 22.8, 21.9, 15.1, 9.2,
25.8, 29.4, 7.6, 12.3, 3, 1.7, 58.5, 71, 23.3, 15.7, 32.7, 39.7,
3.1, 3.4, 67.2, 126, 49.1, 79.1, 8.6, 5.8, 8.7, 15.3, 38.62755,
62.40857143, NA, NA, NA, NA, NA, NA, NA, NA), SRP.conc = c(NA,
NA, NA, NA, 403, NA, NA, NA, NA, NA, NA, NA, 245, 234, 238, 197,
NA, 118, NA, NA, NA, NA, NA, 270, 121, 135, NA, NA, NA, NA, NA,
NA, 596.7, 635.6, 48, 85.9, 514.8, 572.7, NA, 161.5, NA, 163.3,
NA, 46.4, 96.9, 127, 83.1, 92.3, 53.5, 60.7, 111.7, 133.7, 132.2,
164.1, 50.1, 49.1, 54, 42.5, 122.5, 131.9, 104.2, 194.5, 84.6,
34.8, 90.2, 106.6, NA, NA, 129.9, 118.2, 62.2, 84.7, 105, 152,
92.6, 66, 45.9, 50.5, 66.2, 167, 264, 412, 203, 175, 352, 560,
503, 625, 621, 836), SRP.load = c(NA, NA, NA, NA, 16.5, NA, NA,
NA, NA, NA, NA, NA, 5.2, 16.6, 3.4, 7.3, NA, NA, NA, NA, NA,
NA, NA, 66.9, 2.8, 8, NA, NA, NA, NA, NA, NA, 7.3, 22.2, 0.8,
3.1, 1.6, 6.1, NA, 20.4, NA, 49, NA, 2, 38.8, 69.9, 11.5, 18.9,
3.3, 6.1, 23, 39.7, 17.7, 34.2, 7.9, 8.8, 4.8, 3.6, 10.5, 11.8,
2.2, 4.8, 0.9, 0.6, 16.8, 23.1, NA, NA, 10.7, 13.1, 0.6, 1.2,
11.8, 23, 13.1, 11.4, 1.1, 1.2, 0.9, 2.7, 12, 20.4, NA, NA, NA,
NA, NA, NA, NA, NA), Volume = c(11.64, NA, 1.87, 4.5, 4.1, 9.69,
11.13, 34, NA, NA, 4.48, 11.76, 2.1, 7.08, 1.45, 3.73, NA, NA,
3.47, 11.74, 1.52, 3.56, 10.71, 24.79, 2.34, 5.96, 16.71, 43.28,
3.54, 12.29, 2.02, 4.84, 1.22, 3.5, 1.59, 3.56, 0.3, 1.07, NA,
12.66, NA, 29.99, NA, 4.37, 40.04, 55.03, 13.82, 20.51, 6.18,
10.07, 20.62, 29.72, 13.38, 20.85, 15.76, 17.96, 8.82, 8.36,
8.56, 8.94, 2.1, 2.46, 1.07, 1.58, 18.64, 21.67, 5.33, 6, 8.22,
11.09, 0.99, 1.36, 11.23, 15.16, 14.11, 17.34, 2.48, 2.4, 1.34,
1.61, 4.53, 4.95, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(1L,
4L, 5L, 8L, 9L, 12L, 13L, 16L, 17L, 20L, 21L, 24L, 25L, 28L,
29L, 32L, 33L, 36L, 37L, 40L, 41L, 44L, 45L, 48L, 49L, 52L, 53L,
56L, 57L, 60L, 61L, 64L, 65L, 68L, 69L, 72L, 73L, 76L, 77L, 80L,
81L, 84L, 85L, 88L, 89L, 92L, 93L, 96L, 97L, 100L, 101L, 104L,
105L, 108L, 109L, 112L, 113L, 116L, 117L, 120L, 121L, 124L, 125L,
128L, 129L, 132L, 133L, 136L, 137L, 140L, 141L, 144L, 145L, 148L,
149L, 152L, 153L, 156L, 157L, 160L, 161L, 164L, 165L, 168L, 169L,
172L, 173L, 176L, 177L, 180L), class = "data.frame")
CodePudding user response:
We may use map/lapply
to loop as with for
loop it needs a list
to store the output which can be created of course, however, the output from map/lapply
is itself a list
library(purrr)
library(dplyr)
library(tidyr)
# // loop over the names
out <- map(names(Tile)[-(1:2)], ~ Tile %>%
# // select the columns of interest along with looped column names
select(Event, Site, all_of(.x))%>%
# // grouped by Event and remove groups based on the NA in the looped column
group_by(Event)%>%
filter(!any(is.na(.data[[.x]])))%>%
ungroup()%>%
# // convert the column looped to its `log`
mutate(!! .x := log(.data[[.x]]))%>%
# // reshape from long to wide
pivot_wider(names_from = Site,values_from = all_of(.x)) %>%
# // build the linear model
lm(DCNSUB~DCSSUB, data = .)
)
-output
> out
[[1]]
Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)
Coefficients:
(Intercept) DCSSUB
-1.475 1.231
[[2]]
Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)
Coefficients:
(Intercept) DCSSUB
0.5418 0.9812
[[3]]
Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)
Coefficients:
(Intercept) DCSSUB
0.09282 1.00866
[[4]]
Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)
Coefficients:
(Intercept) DCSSUB
0.7099 0.9064
[[5]]
Call:
lm(formula = DCNSUB ~ DCSSUB, data = .)
Coefficients:
(Intercept) DCSSUB
0.8000 0.8535
From the list
output, either use tidy
(from broom
) to convert to a tibble output or extract the components separately by looping (It can be done in the same map
looped earlier though)
map_dfr(out, ~ {
v1 <- summary(.x)
tibble(pval = v1$coefficients[,4][2], MSE = v1$sigma^2)})
# A tibble: 5 × 2
pval MSE
<dbl> <dbl>
1 7.23e-17 0.0773
2 5.81e-13 0.267
3 3.49e-12 0.120
4 3.77e-10 0.238
5 2.10e-15 0.156