I run a linear regression model with looping over the categorical levels of one column of my data. I want to save some of the model summary output.
I achieved to save some of them include coefficients, R2 and Pvalue. However, when I save the fitted and observed values in a data.frame called obsFit
, it returns the fitted and observed values of only one categorical level as I need 7
levels.
Any ideas and thoughts?
So sorry for hardcoding R, I am still a novice who is in the learning process.
Here is the chuck of code
I used.
# Wilayah 1 vs MADA4 model
> Y1 <- yield[ ,2:5]
> mada4 <- stns[["S1"]][["Mada4_sum"]]
> unique(mada4$aggre_per)
[1] "3" "4" "5" "6" "7" "mamjj"
[7] "amj"
>
> # create an empty matrix to fill results
> df <- matrix(rep(NA, 9))
>
> for (agg.per in unique(mada4$aggre_per)) {
subdata <- subset(mada4, aggre_per == agg.per)
subdata <- cbind(subdata, Y1$I_S1)
names(subdata)[4] <- "I_S1"
#save model summary
mod <- lm('I_S1 ~ value', subdata)
slope <- summary(mod)[[4]][2,1]
intercept <- summary(mod)[[4]][1,1]
r2 <- summary(mod)[[8]]
adjr2 <- summary(mod)[[9]]
fstat <- summary(mod)[[10]][[1]]
pval <- summary(mod)[[4]][2,4]
pearson <- cor(subdata$I_S1,subdata$value )
res <- c("Mada4_sum", agg.per, slope, intercept, r2, adjr2, fstat, pval, pearson)
df <- cbind(df, res)
#save the fitted and observed yields as data.frame
obsFit <- data.frame(cbind("Mada4_sum", agg.per,subdata$I_S1, fitted(mod)))
}
>
> df <- data.frame(t(df[,-1]))
> df[,-2:-1] <- apply (df[,-2:-1], 2, as.numeric)
> df[,-2:-1] <- apply (df[,-2:-1], 2, round, digit =2)
> rownames(df) <- 1:nrow(df)
> names(df) <-c( "WI",
"aggre_per",
"slope",
"intercept",
"R2",
"adjustedR2",
"F-stat",
"p-value",
"correlation")
> head(df)
WI aggre_per slope intercept R2 adjustedR2
1 Mada4_sum 3 -1.17 6230.91 0.09 0.03
2 Mada4_sum 4 -0.27 6128.81 0.00 -0.07
3 Mada4_sum 5 -1.04 6240.46 0.02 -0.05
4 Mada4_sum 6 -2.88 6507.82 0.14 0.08
5 Mada4_sum 7 -1.12 6296.72 0.05 -0.02
6 Mada4_sum mamjj -0.89 6760.92 0.18 0.12
F-stat p-value correlation
1 1.40 0.26 -0.30
2 0.07 0.80 -0.07
3 0.23 0.64 -0.13
4 2.26 0.15 -0.37
5 0.76 0.40 -0.23
6 3.12 0.10 -0.43
> #check levels of `aggre_per`
> unique(df$aggre_per)
[1] "3" "4" "5" "6" "7" "mamjj"
[7] "amj"
>
> names(obsFit) <- c("WI", "Aggr.per", "Observed", "Fitted")
> print(obsFit)
WI Aggr.per Observed Fitted
103 Mada4_sum amj 5204 6019.05130014577
104 Mada4_sum amj 5824 6061.66361454587
105 Mada4_sum amj 5481 6129.49546195825
106 Mada4_sum amj 5587 5962.52476063545
107 Mada4_sum amj 6260 6124.27762754192
108 Mada4_sum amj 5771 6017.312022007
109 Mada4_sum amj 5899 5888.60543973734
110 Mada4_sum amj 6338 6189.50055774613
111 Mada4_sum amj 6340 6023.39949549272
112 Mada4_sum amj 6013 6146.01860427665
113 Mada4_sum amj 6075 6221.67720331355
114 Mada4_sum amj 6484 5959.91584342728
115 Mada4_sum amj 6771 5998.17996248043
116 Mada4_sum amj 6493 6249.50565353401
117 Mada4_sum amj 6386 6290.37868979533
118 Mada4_sum amj 6465 6109.49376336229
>
Hereis the dput
of my data?
> dput(mada4)
structure(list(year = c(2001, 2002, 2003, 2004, 2005, 2006, 2007,
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2001, 2002,
2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2001, 2002, 2003,
2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016), aggre_per = c("3", "3",
"3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "4", "4", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "5", "6", "6", "6", "6", "6", "6",
"6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "7", "7", "7",
"7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7", "7",
"mamjj", "mamjj", "mamjj", "mamjj", "mamjj", "mamjj", "mamjj",
"mamjj", "mamjj", "mamjj", "mamjj", "mamjj", "mamjj", "mamjj",
"mamjj", "mamjj", "amj", "amj", "amj", "amj", "amj", "amj", "amj",
"amj", "amj", "amj", "amj", "amj", "amj", "amj", "amj", "amj"
), value = c(189, 6, 212, 81, 101, 84, 163, 198, 143, 128, 456,
93, 37, 23, 58, 0, 210, 196, 70, 122, 132, 184, 387, 57, 232,
153, 11, 325, 228, 75, 41, 31, 141, 95, 219, 217, 178, 154, 139,
112, 164, 62, 106, 202, 101, 123, 108, 252, 175, 186, 110, 252,
95, 190, 150, 161, 125, 165, 176, 67, 221, 63, 65, 139, 100,
267, 207, 202, 48, 322, 240, 250, 270, 298, 159, 164, 178, 29,
94, 164, 815, 750, 818, 874, 554, 934, 1079, 778, 934, 806, 908,
851, 765, 313, 366, 586, 526, 477, 399, 591, 405, 528, 676, 330,
521, 380, 293, 594, 550, 261, 214, 422)), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 86L,
87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L,
100L, 101L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L,
112L, 113L, 114L, 115L, 116L, 117L, 118L), class = "data.frame")
>
> dput(yield)
structure(list(date = structure(c(11323, 11688, 12053, 12418,
12784, 13149, 13514, 13879, 14245, 14610, 14975, 15340, 15706,
16071, 16436, 16801), class = "Date"), I_S1 = c(5204, 5824, 5481,
5587, 6260, 5771, 5899, 6338, 6340, 6013, 6075, 6484, 6771, 6493,
6386, 6465), II_S1 = c(5081, 5674, 5589, 5809, 6055, 6231, 5728,
5971, 6268, 5999, 6015, 6624, 6827, 6453, 6382, 6340), III_S1 = c(5071,
5349, 5703, 5451, 5764, 5528, 5629, 5395, 5797, 5785, 5887, 5581,
5899, 6179, 6086, 5734), IV_S1 = c(5515, 5491, 5969, 5971, 6265,
6242, 6153, 6188, 6311, 6447, 6213, 6410, 6744, 6754, 6247, 6415
), I_S2 = c(4735, 5227, 5415, 5894, 3294, 5113, 6056, 6216, 5752,
5758, 5540, 5418, 6461, 5346, 6437, 5633), II_S2 = c(4837, 5209,
5111, 5482, 3203, 5389, 5209, 6041, 5617, 5927, 4993, 5342, 6380,
5122, 6320, 5470), III_S2 = c(4720, 4986, 4846, 4720, 3968, 4912,
4437, 5675, 5368, 5638, 4892, 4849, 6117, 4996, 5586, 4666),
IV_S2 = c(5054, 5584, 5546, 5666, 5116, 6288, 6089, 6529,
5880, 6156, 5263, 5358, 6804, 5212, 6174, 5560)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -16L))
CodePudding user response:
You could do this in a by
which splits the data by condition and applies a function, later rbind
the resulting list. The obsFit
you could pass through as an attr
ibute.
res <- by(mada4, mada4$aggre_per, \(x) {
x <- cbind(x, yield[ ,2:5])
sm <- summary(mod <- lm(I_S1 ~ value, x))
o <- setNames(as.vector(sm$coefficients[, c(1, 4)])[c(1, 2, 4)],
c('Intercept', 'slope', 'pval'))
o <- c(o, sapply(sm[c('r.squared', 'adj.r.squared', 'fstatistic')], `[`, 1))
o <- c(o, pearson=with(x, cor(I_S1, value)))[c(2, 1, 4, 5, 6, 3, 7)]
ft <- cbind(x, x$I_S1, fitted(mod))
return(`attr<-`(o, 'obsFit', ft))
})
Results
do.call(rbind, res) |> signif(4)
# slope Intercept r.squared adj.r.squared fstatistic.value pval pearson
# 3 -1.1680 6231 0.09105 0.026130 1.40200 0.25600 -0.30170
# 4 -0.2730 6129 0.00464 -0.066460 0.06526 0.80210 -0.06812
# 5 -1.0350 6240 0.01636 -0.053900 0.23290 0.63680 -0.12790
# 6 -2.8780 6508 0.13910 0.077660 2.26300 0.15470 -0.37300
# 7 -1.1220 6297 0.05134 -0.016420 0.75760 0.39880 -0.22660
# amj -0.8696 6476 0.07058 0.004188 1.06300 0.32000 -0.26570
# mamjj -0.8889 6761 0.18210 0.123700 3.11700 0.09926 -0.42670
obsFits
obsFits <- lapply(res, attr, 'obsFit')
obsFits$`3` ## first aggre_per `3`
# year aggre_per value I_S1 II_S1 III_S1 IV_S1 x$I_S1 fitted(mod)
# 1 2001 3 189 5204 5081 5071 5515 5204 6010.131
# 2 2002 3 6 5824 5674 5349 5491 5824 6223.904
# 3 2003 3 212 5481 5589 5703 5969 5481 5983.264
# 4 2004 3 81 5587 5809 5451 5971 5587 6136.292
# 5 2005 3 101 6260 6055 5764 6265 6260 6112.929
# 6 2006 3 84 5771 6231 5528 6242 5771 6132.788
# 7 2007 3 163 5899 5728 5629 6153 5899 6040.503
# 8 2008 3 198 6338 5971 5395 6188 6338 5999.618
# 9 2009 3 143 6340 6268 5797 6311 6340 6063.866
# 10 2010 3 128 6013 5999 5785 6447 6013 6081.389
# 11 2011 3 456 6075 6015 5887 6213 6075 5698.234
# 12 2012 3 93 6484 6624 5581 6410 6484 6122.274
# 13 2013 3 37 6771 6827 5899 6744 6771 6187.691
# 14 2014 3 23 6493 6453 6179 6754 6493 6204.045
# 15 2015 3 58 6386 6382 6086 6247 6386 6163.160
# 16 2016 3 0 6465 6340 5734 6415 6465 6230.913
Note: R >= 4.1 used.