Home > Mobile >  calculating adjusted R-Squared in R
calculating adjusted R-Squared in R

Time:11-17

I have the following dataset and I would like to calculate the adjusted r-squared based on this dataset. I have the formula for adjusted R-Squared "Adjusted R2 = 1 – [(1-R2)*(n-1)/(n-k-1)]".

where:

R2: The R-Squared

n: is the number of observations, in this case, "DV.obs"

k: is the number of predictor variables, in this case, "nParam" (where its either 0,1,2,3)

the R code to calculate it is the following, where it is grouped by "ITER", iterations, we have 4 iterations.So the idea is to calculate adjusted R-Squared based on the iterations(4) iteration 1, the nParam should only be 0, iteration 2, the nParam should only be 1, etc, instead of choosing every nParam in the dataset, since the nParam is exactly the same for each iteration.

The output should be only 4 rows ( for every iteration, as its grouped by(ITER)) and 2 columns (R2, and adjusted R-Squared) and not for every row in the data. i hope i have explained myself well.

library(dplyr)
ff <- df %>%
  group_by(ITER) %>%
  summarise(
    Rsq = cor(x= DV.obs, y = DV.sim)^2,
    
    adjRsq =  1 - ((1-Rsq)*(length(DV.obs)-1)/(length(DV.obs)- nParam  - 1 )) 
    
  )


ff

however, this formula will go through every predictor variable(nParam),

df<-structure(list(CASE = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L), ITER = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L), nParam = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), DV.obs = c(0.101483807, 0.069196694, 0.053869542, 0.043831971, 
0.030330271, 0.023612088, 0.01978679, 0.014310351, 0.01164389, 
0.007267871, 0.004536453, 0.002873573, 0.002408037, 0.001417053, 
0.001136154, 0.101483807, 0.069196694, 0.053869542, 0.043831971, 
0.030330271, 0.023612088, 0.01978679, 0.014310351, 0.01164389, 
0.007267871, 0.004536453, 0.002873573, 0.002408037, 0.001417053, 
0.001136154, 0.101483807, 0.069196694, 0.053869542, 0.043831971, 
0.030330271, 0.023612088, 0.01978679, 0.014310351, 0.01164389, 
0.007267871, 0.004536453, 0.002873573, 0.002408037, 0.001417053, 
0.001136154, 0.101483807, 0.069196694, 0.053869542, 0.043831971, 
0.030330271, 0.023612088, 0.01978679, 0.014310351, 0.01164389, 
0.007267871, 0.004536453, 0.002873573, 0.002408037, 0.001417053, 
0.001136154, 0.000116054, 0.003829787, 0.01206963, 0.02088975, 
0.027388781, 0.03423598, 0.037833661, 0.037369438, 0.035164408, 
0.034584139, 0.02947776, 0.023210831, 0.014622821, 0.009632495, 
0.006731141, 0.0027853, 0.000116054, 0.003829787, 0.01206963, 
0.02088975, 0.027388781, 0.03423598, 0.037833661, 0.037369438, 
0.035164408, 0.034584139, 0.02947776, 0.023210831, 0.014622821, 
0.009632495, 0.006731141, 0.0027853, 0.000116054, 0.003829787, 
0.01206963, 0.02088975, 0.027388781, 0.03423598, 0.037833661, 
0.037369438, 0.035164408, 0.034584139, 0.02947776, 0.023210831, 
0.014622821, 0.009632495, 0.006731141, 0.0027853, 0.000116054, 
0.003829787, 0.01206963, 0.02088975, 0.027388781, 0.03423598, 
0.037833661, 0.037369438, 0.035164408, 0.034584139, 0.02947776, 
0.023210831, 0.014622821, 0.009632495, 0.006731141, 0.0027853
), DV.sim = c(0, 0.0889808909410658, 0.0947484349571132, 0.0798169790285827, 
0.0574006922793388, 0.0505799935506284, 0.0468774569150804, 0.0417447990739346, 
0.0375742405164242, 0.0306761993989349, 0.0251120797996223, 0.0205737193532288, 
0.0168649279846251, 0.0138327510148287, 0.0113531698574871, 0, 
0.0829660195227578, 0.0876380159497916, 0.0723450386112931, 0.0464863987773657, 
0.0380595525625348, 0.0343245102453232, 0.0307144539731741, 0.0283392784461379, 
0.0245820489723981, 0.0214487023548782, 0.0187365858632326, 0.0163729577744008, 
0.0143107050991059, 0.0125108672587574, 0, 0.0762191578459362, 
0.0737615750578683, 0.0549565160764756, 0.0280085518714786, 0.0206076781625301, 
0.0172540310333669, 0.0134899928846955, 0.0108952926749736, 0.00728254194885496, 
0.00491441482789815, 0.00332488210681827, 0.00225250494349749, 
0.00152820673925803, 0.00103880306820386, 0, 0.0329456788891303, 
0.0365534415712808, 0.03318406650424, 0.0278133129626513, 0.0238151342895627, 
0.0205330317793787, 0.0155563822799921, 0.0119589968463779, 0.0072024345056713, 
0.00437676923945547, 0.00266755578568207, 0.00162810577310623, 
0.000994532813206324, 0.000607859854716811, 0, 0.00238890872602278, 
0.02000716184065, 0.0509446502289174, 0.0907202677155637, 0.173563302880525, 
0.223891823887825, 0.2226231635499, 0.19175603264451, 0.168494781267643, 
0.150974664176703, 0.136206244819164, 0.111464575245381, 0.0913691590994598, 
0.0749306779146197, 0.0504548476848009, 0, 0.00141190656836649, 
0.0124264488774641, 0.0328390336436031, 0.0603613019163447, 0.123470497330427, 
0.172404586815834, 0.178024356626272, 0.151606226187945, 0.130227694458962, 
0.117105708281994, 0.107832603356838, 0.0935153502613309, 0.081651206263304, 
0.0713645335614684, 0.0545446672743561, 0, 0.00122455342249632, 
0.00957195676775054, 0.0233009280455857, 0.0398901057214595, 
0.069490838356018, 0.0753487069702148, 0.0619427798080445, 0.0388082119899989, 
0.0282194718351961, 0.0223033058814705, 0.0181158699408174, 0.012206885059923, 
0.00828045272134247, 0.00562572468560191, 0.00260434861259537, 
0, 0.00337575118759914, 0.0123247819279197, 0.0212808990854769, 
0.0292664165479362, 0.0407316533482074, 0.0457373328155279, 0.0440263413557409, 
0.0350818961969019, 0.0268987657874823, 0.0206920115460456, 0.0160182394650579, 
0.00970028643496338, 0.00590740063816313, 0.00360522091817113, 
0.00134665597468616)), row.names = c(NA, 124L), class = "data.frame")

CodePudding user response:

You could add distinct(ITER, .keep_all = TRUE)

library(tidyverse)

df %>%
  group_by(ITER) %>% 
  summarise(
    Rsq = cor(x = DV.obs, y = DV.sim)^2,
    adjRsq = 1 - ((1 - Rsq) * (length(DV.obs) - 1) / (length(DV.obs) - nParam - 1))
  ) %>% 
  distinct(ITER, .keep_all = T)
#> `summarise()` has grouped output by 'ITER'. You can override using the
#> `.groups` argument.
#> # A tibble: 4 × 3
#> # Groups:   ITER [4]
#>    ITER   Rsq adjRsq
#>   <int> <dbl>  <dbl>
#> 1     1 0.113 0.113 
#> 2     2 0.116 0.0858
#> 3     3 0.334 0.286 
#> 4     4 0.268 0.187

CodePudding user response:

The issue is that you get a value per row as your are using the nParam column to compute the adjusted R^2 without any aggregating operation. This could be fixed by using unique(nParam) to "aggregate" nParam to just one value per group:

library(dplyr)

df %>%
  group_by(ITER) %>%
  summarise(
    Rsq = cor(x = DV.obs, y = DV.sim)^2,
    adjRsq = 1 - ((1 - Rsq) * (n() - 1) / (n() - unique(nParam) - 1))
  )
#> # A tibble: 4 × 3
#>    ITER   Rsq adjRsq
#>   <int> <dbl>  <dbl>
#> 1     1 0.113 0.113 
#> 2     2 0.116 0.0858
#> 3     3 0.334 0.286 
#> 4     4 0.268 0.187
  • Related