I have a dataframe output of my first 200 rows as follows:
structure(list(week_id = c("2021100301", "2021092601", "2021091901",
"2021091201", "2021082901", "2021082201", "2021081501", "2021080801",
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401",
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001",
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501",
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101",
"2021031401", "2021030701", "2021022101", "2021021401", "2021020701",
"2021013101", "2021012401", "2021011701", "2021011001", "2021010301",
"2020122701", "2020122001", "2020121301", "2020120601", "2020112901",
"2020112201", "2020111501", "2020110801", "2020110101", "2020102501",
"2020101801", "2020101101", "2020100401", "2020092701", "2020092001",
"2020091301", "2020090601", "2020083001", "2020082301", "2020081601",
"2020080901", "2020080201", "2020072601", "2020071901", "2020071201",
"2020070501", "2020062801", "2020062101", "2020061401", "2020060701",
"2020053101", "2020052401", "2020051701", "2020051001", "2020050301",
"2020042601", "2020041901", "2020041201", "2020040501", "2020032901",
"2020032201", "2020031501", "2020030101", "2020022301", "2020021601",
"2020020901", "2020020201", "2020012601", "2020011901", "2020011201",
"2020010501", "2021100301", "2021092601", "2021091901", "2021091201",
"2021090501", "2021082901", "2021082201", "2021081501", "2021080801",
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401",
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001",
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501",
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101",
"2021031401", "2021030701", "2021022801", "2021022101", "2021021401",
"2021020701", "2021013101", "2021012401", "2021011701", "2021011001",
"2021010301", "2020122701", "2020122001", "2020121301", "2020120601",
"2020112901", "2020112201", "2020111501", "2020110801", "2020110101",
"2020102501", "2020101801", "2020101101", "2020100401", "2020092701",
"2020092001", "2020091301", "2020090601", "2020083001", "2020082301",
"2020081601", "2020080901", "2020080201", "2020072601", "2020071901",
"2020071201", "2020070501", "2020062801", "2020062101", "2020061401",
"2020060701", "2020053101", "2020052401", "2020051701", "2020051001",
"2020050301", "2020042601", "2020041901", "2020041201", "2020040501",
"2020032901", "2020032201", "2020031501", "2020030801", "2020030101",
"2020022301", "2020021601", "2020020901", "2020020201", "2020012601",
"2020011901", "2020011201", "2020010501", "2021100301", "2021092601",
"2021091901", "2021091201", "2021090501", "2021082901", "2021082201",
"2021081501", "2021080801", "2021080101", "2021072501", "2021071801",
"2021071101", "2021070401", "2021062701", "2021062001", "2021061301",
"2021060601", "2021053001"), retailer_id = c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L), store_id = c(167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5515L, 5515L,
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L,
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L), dollars = c(121817.89,
123837.04, 118670.04, 125059.625, 108753.24, 103086.164, 93883.47,
97233.34, 104718.26, 106884.21, 105388.664, 103774.02, 105210.89,
110289.805, 117055.945, 113839.07, 112145.44, 120899.42, 123817.1,
115896.3, 125034.63, 121903.98, 114903.13, 117721.086, 105937.99,
124112.63, 112720.3, 120637.18, 120471.27, 110500.37, 110503.08,
112969.96, 115869.27, 118878.375, 105458.195, 106309.5, 110294.62,
108330.82, 146878.53, 126304.055, 116068.125, 107469.35, 136214.23,
113220.75, 112149.66, 100493.305, 92780.78, 97302.18, 93464.86,
96070.89, 95837.6, 94597.96, 98911.57, 85228.016, 94473.36, 93500.97,
87337.375, 88164.125, 87065.74, 66466, 90029.71, 97007.35, 102726.36,
91905.15, 104088.82, 110672.805, 103954.48, 109547.445, 105544.875,
120075.03, 118728, 134109.06, 123252.85, 130008.92, 114023.5,
144025.67, 125673.97, 109288.62, 145974.9, 162897.97, 99991.97,
98035.12, 95207.695, 95602.53, 91691.84, 95935.414, 96264.945,
83052.78, 81082.19, 308727.38, 316221.84, 307510.53, 331655.47,
325742.12, 313349.8, 314485.25, 296090.7, 294508.4, 289239.16,
291942.7, 305845.53, 319315.16, 326085.72, 337905.4, 344706.47,
327700.53, 371220.06, 359514.38, 337943.56, 373691.47, 349582.7,
334601.75, 336816.66, 337005.75, 347109.06, 326315.97, 311354.84,
322053.34, 336316.03, 338193.53, 313785.03, 344732.66, 342939.25,
346253.62, 324986.56, 326116.2, 338265.66, 268262.62, 386346.6,
626153.6, 365243.03, 340560.28, 293116.5, 465687.8, 353180.1,
351834.12, 320955.12, 334259.28, 325735.22, 333862.25, 324085.34,
334968.44, 315079.22, 317985.3, 330642.66, 326725.25, 318800.8,
326554, 333438.25, 330099.03, 321774.66, 330337.6, 341311.16,
315433.25, 385807.25, 386330.25, 374521.12, 376984.34, 387566.56,
382244.22, 380587.84, 417656.53, 398990, 393197.03, 385053.06,
391217.75, 416653.6, 360383.8, 296253.6, 479710.7, 346368.38,
277086.44, 286023.53, 270573, 307881.22, 280272.6, 288636.3,
265937.72, 277954.03, 273006.03, 281253.47, 851667.25, 820867.3,
828810.06, 827808.7, 728991.94, 713789.44, 685840.06, 701648.6,
697648.25, 719623.3, 709753.9, 731245.5, 751857.94, 723741.4,
788438.6, 816226.7, 843330.25, 879954.7, 870035.94), fill = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), spins_week = c("20211040", "20211039",
"20211038", "20211037", "20210935", "20210934", "20210933", "20210832",
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726",
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520",
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414",
"20210413", "20210312", "20210311", "20210310", "20210208", "20210207",
"20210206", "20210205", "20210104", "20210103", "20210102", "20210101",
"20201352", "20201351", "20201350", "20201349", "20201248", "20201247",
"20201246", "20201245", "20201144", "20201143", "20201142", "20201141",
"20201040", "20201039", "20201038", "20201037", "20200936", "20200935",
"20200934", "20200933", "20200832", "20200831", "20200830", "20200829",
"20200728", "20200727", "20200726", "20200725", "20200624", "20200623",
"20200622", "20200621", "20200520", "20200519", "20200518", "20200517",
"20200416", "20200415", "20200414", "20200413", "20200312", "20200311",
"20200309", "20200208", "20200207", "20200206", "20200205", "20200104",
"20200103", "20200102", "20200101", "20211040", "20211039", "20211038",
"20211037", "20210936", "20210935", "20210934", "20210933", "20210832",
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726",
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520",
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414",
"20210413", "20210312", "20210311", "20210310", "20210309", "20210208",
"20210207", "20210206", "20210205", "20210104", "20210103", "20210102",
"20210101", "20201352", "20201351", "20201350", "20201349", "20201248",
"20201247", "20201246", "20201245", "20201144", "20201143", "20201142",
"20201141", "20201040", "20201039", "20201038", "20201037", "20200936",
"20200935", "20200934", "20200933", "20200832", "20200831", "20200830",
"20200829", "20200728", "20200727", "20200726", "20200725", "20200624",
"20200623", "20200622", "20200621", "20200520", "20200519", "20200518",
"20200517", "20200416", "20200415", "20200414", "20200413", "20200312",
"20200311", "20200310", "20200309", "20200208", "20200207", "20200206",
"20200205", "20200104", "20200103", "20200102", "20200101", "20211040",
"20211039", "20211038", "20211037", "20210936", "20210935", "20210934",
"20210933", "20210832", "20210831", "20210830", "20210829", "20210728",
"20210727", "20210726", "20210725", "20210624", "20210623", "20210622"
), id = c("1040", "1039", "1038", "1037", "0935", "0934", "0933",
"0832", "0831", "0830", "0829", "0728", "0727", "0726", "0725",
"0624", "0623", "0622", "0621", "0520", "0519", "0518", "0517",
"0416", "0415", "0414", "0413", "0312", "0311", "0310", "0208",
"0207", "0206", "0205", "0104", "0103", "0102", "0101", "1352",
"1351", "1350", "1349", "1248", "1247", "1246", "1245", "1144",
"1143", "1142", "1141", "1040", "1039", "1038", "1037", "0936",
"0935", "0934", "0933", "0832", "0831", "0830", "0829", "0728",
"0727", "0726", "0725", "0624", "0623", "0622", "0621", "0520",
"0519", "0518", "0517", "0416", "0415", "0414", "0413", "0312",
"0311", "0309", "0208", "0207", "0206", "0205", "0104", "0103",
"0102", "0101", "1040", "1039", "1038", "1037", "0936", "0935",
"0934", "0933", "0832", "0831", "0830", "0829", "0728", "0727",
"0726", "0725", "0624", "0623", "0622", "0621", "0520", "0519",
"0518", "0517", "0416", "0415", "0414", "0413", "0312", "0311",
"0310", "0309", "0208", "0207", "0206", "0205", "0104", "0103",
"0102", "0101", "1352", "1351", "1350", "1349", "1248", "1247",
"1246", "1245", "1144", "1143", "1142", "1141", "1040", "1039",
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831",
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623",
"0622", "0621", "0520", "0519", "0518", "0517", "0416", "0415",
"0414", "0413", "0312", "0311", "0310", "0309", "0208", "0207",
"0206", "0205", "0104", "0103", "0102", "0101", "1040", "1039",
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831",
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623",
"0622")), row.names = c(NA, -200L), groups = structure(list(retailer_id = c(2L,
4L, 5L), store_id = c(167L, 5987L, 5515L), .rows = structure(list(
1:89, 90:181, 182:200), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
I would like to take the first 4 values in the id column within each store group and find the percent change in dollars between those and the value that matches it within the group.
For example for retailer_id = 2 and store_id = 167, the first 4 "id" values are 1040, 1039, 1038, and 1037. These values occur twice within each grouping but correspond to different dollar amounts. As you can see the id = 1040 (for retailer_id = 2 and store_id = 167) corresponds to dollar amounts 121817.89 and 95837.60. I would like to calculate the percent difference from the second value to the first (121817.89 - 95837.60 / 95837.60).
I have achieved the current output above by doing:
A = dollars.weeks %>%
select(-c(retail_chain, retail_tag, store_name, store_tag)) %>%
group_by(retailer_id, store_id) %>%
arrange(retailer_id, store_id, desc(spins_week)) %>%
mutate(id = substr(spins_week,5,8))
After grouping by retailer_id, store_id and then arranging by descending spins_week, I have shortened the spins_week values to the last 4 digits to make the id column.
I would like to match the top four occurring id values to their corresponding values down the column (keeping the retailer and store group as I would like to calculate per group), but I am not sure how to add onto my current code to do so. Any help would be great thanks!
CodePudding user response:
Based on the description, we may add 'id' as grouping variable and then calculate the 'percecnt_change' by dividing the deviation of dollars (from the max
value of 'dollars') with the dollars column
library(dplyr)
A %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = (max(dollars) - dollars)/dollars)
Or may be
A %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = if(n() == 2)
(first(dollars) - last(dollars))/last(dollars) else NA)
-output
# A tibble: 200 × 8
# Groups: retailer_id, store_id, id [123]
week_id retailer_id store_id dollars fill spins_week id percent_change
<chr> <int> <int> <dbl> <dbl> <chr> <chr> <dbl>
1 2021100301 2 167 121818. 0 20211040 1040 0.271
2 2021092601 2 167 123837. 0 20211039 1039 0.309
3 2021091901 2 167 118670. 0 20211038 1038 0.200
4 2021091201 2 167 125060. 0 20211037 1037 0.467
5 2021082901 2 167 108753. 0 20210935 0935 0.163
6 2021082201 2 167 103086. 0 20210934 0934 0.180
7 2021081501 2 167 93883. 0 20210933 0933 0.0649
8 2021080801 2 167 97233. 0 20210832 0832 0.117
9 2021080101 2 167 104718. 0 20210831 0831 0.576
10 2021072501 2 167 106884. 0 20210830 0830 0.187
# … with 190 more rows
Or as @Greg mentioned in the comments, if we want the first 4 values
A %>%
mutate(r_num = row_number()) %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = if(n() == 2)
(first(dollars) - last(dollars))/last(dollars) else NA) %>%
filter(r_num <= 4)