I would like to calculate growth rates across rows by groups (here groups are "country" and "year") in a dataset that is in the long format. Since the growth rate should be relative to the same value (i.e., same "country" and same "year") under the "Baseline" scenario (not relative to the previous row), I am doing this by changing the data format to wide format, as follows:
df <- spread(df, scenario, value) %>%
mutate(NDC_growth=((NDC/Baseline)-1)*100,
`Partial BCA_growth`=((`Partial BCA`/Baseline)-1)*100,
BCA_growth=((BCA/Baseline)-1)*100,
`Full BCA_growth`=((`Full BCA`/Baseline)-1)*100 )
Is there a way to do it in the long format?
Here is the data:
df<- structure(list(country = c("CAN", "CAN", "CAN", "CAN", "CAN",
"CAN", "CAN", "CAN", "CAN", "CAN", "CAN", "CAN", "CAN", "CAN",
"CAN", "NCOA", "NCOA", "NCOA", "NCOA", "NCOA"), year = c("2020",
"2020", "2020", "2020", "2020", "2025", "2025", "2025", "2025",
"2025", "2030", "2030", "2030", "2030", "2030", "2020", "2020",
"2020", "2020", "2020"), scenario = c("Baseline", "BCA", "Full BCA",
"NDC", "Partial BCA", "Baseline", "BCA", "Full BCA", "NDC", "Partial BCA",
"Baseline", "BCA", "Full BCA", "NDC", "Partial BCA", "Baseline",
"BCA", "Full BCA", "NDC", "Partial BCA"), value = c(50527.8708215592,
50487.4619290311, 50485.0924261504, 50489.4453487844, 50486.1975947164,
55845.9708589775, 55070.2745559464, 55133.107605613, 55153.4525662034,
55065.0036253937, 61463.2383809614, 59893.8712077455, 59971.8726308887,
59936.72156767, 59875.7762254252, 338418.917408225, 338420.617142445,
338428.007621131, 338419.514027857, 338427.263672463)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
To compute based on the first in each group use, well, first
. This is valid if Baseline
is always the first in each group.
df<- structure(list(
country = c("CAN", "CAN", "CAN", "CAN", "CAN",
"CAN", "CAN", "CAN", "CAN", "CAN", "CAN", "CAN", "CAN", "CAN",
"CAN", "NCOA", "NCOA", "NCOA", "NCOA", "NCOA"),
year = c("2020", "2020", "2020", "2020", "2020", "2025", "2025",
"2025", "2025", "2025", "2030", "2030", "2030", "2030", "2030",
"2020", "2020", "2020", "2020", "2020"),
scenario = c("Baseline", "BCA", "Full BCA", "NDC", "Partial BCA",
"Baseline", "BCA", "Full BCA", "NDC", "Partial BCA",
"Baseline", "BCA", "Full BCA", "NDC", "Partial BCA", "Baseline",
"BCA", "Full BCA", "NDC", "Partial BCA"),
value = c(50527.8708215592, 50487.4619290311, 50485.0924261504,
50489.4453487844, 50486.1975947164, 55845.9708589775, 55070.2745559464,
55133.107605613, 55153.4525662034, 55065.0036253937, 61463.2383809614,
59893.8712077455, 59971.8726308887, 59936.72156767, 59875.7762254252,
338418.917408225, 338420.617142445, 338428.007621131, 338419.514027857,
338427.263672463)), row.names = c(NA, -20L),
class = c("tbl_df", "tbl", "data.frame"))
suppressPackageStartupMessages(library(dplyr))
df %>%
group_by(country, year) %>%
mutate(growth = (value/first(value) - 1)*100)
#> # A tibble: 20 × 5
#> # Groups: country, year [4]
#> country year scenario value growth
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 CAN 2020 Baseline 50528. 0
#> 2 CAN 2020 BCA 50487. -0.0800
#> 3 CAN 2020 Full BCA 50485. -0.0847
#> 4 CAN 2020 NDC 50489. -0.0760
#> 5 CAN 2020 Partial BCA 50486. -0.0825
#> 6 CAN 2025 Baseline 55846. 0
#> 7 CAN 2025 BCA 55070. -1.39
#> 8 CAN 2025 Full BCA 55133. -1.28
#> 9 CAN 2025 NDC 55153. -1.24
#> 10 CAN 2025 Partial BCA 55065. -1.40
#> 11 CAN 2030 Baseline 61463. 0
#> 12 CAN 2030 BCA 59894. -2.55
#> 13 CAN 2030 Full BCA 59972. -2.43
#> 14 CAN 2030 NDC 59937. -2.48
#> 15 CAN 2030 Partial BCA 59876. -2.58
#> 16 NCOA 2020 Baseline 338419. 0
#> 17 NCOA 2020 BCA 338421. 0.000502
#> 18 NCOA 2020 Full BCA 338428. 0.00269
#> 19 NCOA 2020 NDC 338420. 0.000176
#> 20 NCOA 2020 Partial BCA 338427. 0.00247
Created on 2022-05-08 by the reprex package (v2.0.1)
CodePudding user response:
You could do:
df %>%
group_by(country, year) %>%
mutate(growth = (value / value[scenario == "Baseline"] - 1) * 100)
#> # A tibble: 20 x 5
#> # Groups: country, year [4]
#> country year scenario value growth
#> <chr> <chr> <chr> <dbl> <dbl>
#> 1 CAN 2020 Baseline 50528. 0
#> 2 CAN 2020 BCA 50487. -0.0800
#> 3 CAN 2020 Full BCA 50485. -0.0847
#> 4 CAN 2020 NDC 50489. -0.0760
#> 5 CAN 2020 Partial BCA 50486. -0.0825
#> 6 CAN 2025 Baseline 55846. 0
#> 7 CAN 2025 BCA 55070. -1.39
#> 8 CAN 2025 Full BCA 55133. -1.28
#> 9 CAN 2025 NDC 55153. -1.24
#> 10 CAN 2025 Partial BCA 55065. -1.40
#> 11 CAN 2030 Baseline 61463. 0
#> 12 CAN 2030 BCA 59894. -2.55
#> 13 CAN 2030 Full BCA 59972. -2.43
#> 14 CAN 2030 NDC 59937. -2.48
#> 15 CAN 2030 Partial BCA 59876. -2.58
#> 16 NCOA 2020 Baseline 338419. 0
#> 17 NCOA 2020 BCA 338421. 0.000502
#> 18 NCOA 2020 Full BCA 338428. 0.00269
#> 19 NCOA 2020 NDC 338420. 0.000176
#> 20 NCOA 2020 Partial BCA 338427. 0.00247