Home > OS >  matching values within group to calculate percent change R
matching values within group to calculate percent change R

Time:10-12

I have a dataframe output of my first 200 rows as follows:

structure(list(week_id = c("2021100301", "2021092601", "2021091901", 
"2021091201", "2021082901", "2021082201", "2021081501", "2021080801", 
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401", 
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001", 
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501", 
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101", 
"2021031401", "2021030701", "2021022101", "2021021401", "2021020701", 
"2021013101", "2021012401", "2021011701", "2021011001", "2021010301", 
"2020122701", "2020122001", "2020121301", "2020120601", "2020112901", 
"2020112201", "2020111501", "2020110801", "2020110101", "2020102501", 
"2020101801", "2020101101", "2020100401", "2020092701", "2020092001", 
"2020091301", "2020090601", "2020083001", "2020082301", "2020081601", 
"2020080901", "2020080201", "2020072601", "2020071901", "2020071201", 
"2020070501", "2020062801", "2020062101", "2020061401", "2020060701", 
"2020053101", "2020052401", "2020051701", "2020051001", "2020050301", 
"2020042601", "2020041901", "2020041201", "2020040501", "2020032901", 
"2020032201", "2020031501", "2020030101", "2020022301", "2020021601", 
"2020020901", "2020020201", "2020012601", "2020011901", "2020011201", 
"2020010501", "2021100301", "2021092601", "2021091901", "2021091201", 
"2021090501", "2021082901", "2021082201", "2021081501", "2021080801", 
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401", 
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001", 
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501", 
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101", 
"2021031401", "2021030701", "2021022801", "2021022101", "2021021401", 
"2021020701", "2021013101", "2021012401", "2021011701", "2021011001", 
"2021010301", "2020122701", "2020122001", "2020121301", "2020120601", 
"2020112901", "2020112201", "2020111501", "2020110801", "2020110101", 
"2020102501", "2020101801", "2020101101", "2020100401", "2020092701", 
"2020092001", "2020091301", "2020090601", "2020083001", "2020082301", 
"2020081601", "2020080901", "2020080201", "2020072601", "2020071901", 
"2020071201", "2020070501", "2020062801", "2020062101", "2020061401", 
"2020060701", "2020053101", "2020052401", "2020051701", "2020051001", 
"2020050301", "2020042601", "2020041901", "2020041201", "2020040501", 
"2020032901", "2020032201", "2020031501", "2020030801", "2020030101", 
"2020022301", "2020021601", "2020020901", "2020020201", "2020012601", 
"2020011901", "2020011201", "2020010501", "2021100301", "2021092601", 
"2021091901", "2021091201", "2021090501", "2021082901", "2021082201", 
"2021081501", "2021080801", "2021080101", "2021072501", "2021071801", 
"2021071101", "2021070401", "2021062701", "2021062001", "2021061301", 
"2021060601", "2021053001"), retailer_id = c(2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L), store_id = c(167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5515L, 5515L, 
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L), dollars = c(121817.89, 
123837.04, 118670.04, 125059.625, 108753.24, 103086.164, 93883.47, 
97233.34, 104718.26, 106884.21, 105388.664, 103774.02, 105210.89, 
110289.805, 117055.945, 113839.07, 112145.44, 120899.42, 123817.1, 
115896.3, 125034.63, 121903.98, 114903.13, 117721.086, 105937.99, 
124112.63, 112720.3, 120637.18, 120471.27, 110500.37, 110503.08, 
112969.96, 115869.27, 118878.375, 105458.195, 106309.5, 110294.62, 
108330.82, 146878.53, 126304.055, 116068.125, 107469.35, 136214.23, 
113220.75, 112149.66, 100493.305, 92780.78, 97302.18, 93464.86, 
96070.89, 95837.6, 94597.96, 98911.57, 85228.016, 94473.36, 93500.97, 
87337.375, 88164.125, 87065.74, 66466, 90029.71, 97007.35, 102726.36, 
91905.15, 104088.82, 110672.805, 103954.48, 109547.445, 105544.875, 
120075.03, 118728, 134109.06, 123252.85, 130008.92, 114023.5, 
144025.67, 125673.97, 109288.62, 145974.9, 162897.97, 99991.97, 
98035.12, 95207.695, 95602.53, 91691.84, 95935.414, 96264.945, 
83052.78, 81082.19, 308727.38, 316221.84, 307510.53, 331655.47, 
325742.12, 313349.8, 314485.25, 296090.7, 294508.4, 289239.16, 
291942.7, 305845.53, 319315.16, 326085.72, 337905.4, 344706.47, 
327700.53, 371220.06, 359514.38, 337943.56, 373691.47, 349582.7, 
334601.75, 336816.66, 337005.75, 347109.06, 326315.97, 311354.84, 
322053.34, 336316.03, 338193.53, 313785.03, 344732.66, 342939.25, 
346253.62, 324986.56, 326116.2, 338265.66, 268262.62, 386346.6, 
626153.6, 365243.03, 340560.28, 293116.5, 465687.8, 353180.1, 
351834.12, 320955.12, 334259.28, 325735.22, 333862.25, 324085.34, 
334968.44, 315079.22, 317985.3, 330642.66, 326725.25, 318800.8, 
326554, 333438.25, 330099.03, 321774.66, 330337.6, 341311.16, 
315433.25, 385807.25, 386330.25, 374521.12, 376984.34, 387566.56, 
382244.22, 380587.84, 417656.53, 398990, 393197.03, 385053.06, 
391217.75, 416653.6, 360383.8, 296253.6, 479710.7, 346368.38, 
277086.44, 286023.53, 270573, 307881.22, 280272.6, 288636.3, 
265937.72, 277954.03, 273006.03, 281253.47, 851667.25, 820867.3, 
828810.06, 827808.7, 728991.94, 713789.44, 685840.06, 701648.6, 
697648.25, 719623.3, 709753.9, 731245.5, 751857.94, 723741.4, 
788438.6, 816226.7, 843330.25, 879954.7, 870035.94), fill = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), spins_week = c("20211040", "20211039", 
"20211038", "20211037", "20210935", "20210934", "20210933", "20210832", 
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726", 
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520", 
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414", 
"20210413", "20210312", "20210311", "20210310", "20210208", "20210207", 
"20210206", "20210205", "20210104", "20210103", "20210102", "20210101", 
"20201352", "20201351", "20201350", "20201349", "20201248", "20201247", 
"20201246", "20201245", "20201144", "20201143", "20201142", "20201141", 
"20201040", "20201039", "20201038", "20201037", "20200936", "20200935", 
"20200934", "20200933", "20200832", "20200831", "20200830", "20200829", 
"20200728", "20200727", "20200726", "20200725", "20200624", "20200623", 
"20200622", "20200621", "20200520", "20200519", "20200518", "20200517", 
"20200416", "20200415", "20200414", "20200413", "20200312", "20200311", 
"20200309", "20200208", "20200207", "20200206", "20200205", "20200104", 
"20200103", "20200102", "20200101", "20211040", "20211039", "20211038", 
"20211037", "20210936", "20210935", "20210934", "20210933", "20210832", 
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726", 
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520", 
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414", 
"20210413", "20210312", "20210311", "20210310", "20210309", "20210208", 
"20210207", "20210206", "20210205", "20210104", "20210103", "20210102", 
"20210101", "20201352", "20201351", "20201350", "20201349", "20201248", 
"20201247", "20201246", "20201245", "20201144", "20201143", "20201142", 
"20201141", "20201040", "20201039", "20201038", "20201037", "20200936", 
"20200935", "20200934", "20200933", "20200832", "20200831", "20200830", 
"20200829", "20200728", "20200727", "20200726", "20200725", "20200624", 
"20200623", "20200622", "20200621", "20200520", "20200519", "20200518", 
"20200517", "20200416", "20200415", "20200414", "20200413", "20200312", 
"20200311", "20200310", "20200309", "20200208", "20200207", "20200206", 
"20200205", "20200104", "20200103", "20200102", "20200101", "20211040", 
"20211039", "20211038", "20211037", "20210936", "20210935", "20210934", 
"20210933", "20210832", "20210831", "20210830", "20210829", "20210728", 
"20210727", "20210726", "20210725", "20210624", "20210623", "20210622"
), id = c("1040", "1039", "1038", "1037", "0935", "0934", "0933", 
"0832", "0831", "0830", "0829", "0728", "0727", "0726", "0725", 
"0624", "0623", "0622", "0621", "0520", "0519", "0518", "0517", 
"0416", "0415", "0414", "0413", "0312", "0311", "0310", "0208", 
"0207", "0206", "0205", "0104", "0103", "0102", "0101", "1352", 
"1351", "1350", "1349", "1248", "1247", "1246", "1245", "1144", 
"1143", "1142", "1141", "1040", "1039", "1038", "1037", "0936", 
"0935", "0934", "0933", "0832", "0831", "0830", "0829", "0728", 
"0727", "0726", "0725", "0624", "0623", "0622", "0621", "0520", 
"0519", "0518", "0517", "0416", "0415", "0414", "0413", "0312", 
"0311", "0309", "0208", "0207", "0206", "0205", "0104", "0103", 
"0102", "0101", "1040", "1039", "1038", "1037", "0936", "0935", 
"0934", "0933", "0832", "0831", "0830", "0829", "0728", "0727", 
"0726", "0725", "0624", "0623", "0622", "0621", "0520", "0519", 
"0518", "0517", "0416", "0415", "0414", "0413", "0312", "0311", 
"0310", "0309", "0208", "0207", "0206", "0205", "0104", "0103", 
"0102", "0101", "1352", "1351", "1350", "1349", "1248", "1247", 
"1246", "1245", "1144", "1143", "1142", "1141", "1040", "1039", 
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831", 
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623", 
"0622", "0621", "0520", "0519", "0518", "0517", "0416", "0415", 
"0414", "0413", "0312", "0311", "0310", "0309", "0208", "0207", 
"0206", "0205", "0104", "0103", "0102", "0101", "1040", "1039", 
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831", 
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623", 
"0622")), row.names = c(NA, -200L), groups = structure(list(retailer_id = c(2L, 
4L, 5L), store_id = c(167L, 5987L, 5515L), .rows = structure(list(
    1:89, 90:181, 182:200), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

I would like to take the first 4 values in the id column within each store group and find the percent change in dollars between those and the value that matches it within the group.

For example for retailer_id = 2 and store_id = 167, the first 4 "id" values are 1040, 1039, 1038, and 1037. These values occur twice within each grouping but correspond to different dollar amounts. As you can see the id = 1040 (for retailer_id = 2 and store_id = 167) corresponds to dollar amounts 121817.89 and 95837.60. I would like to calculate the percent difference from the second value to the first (121817.89 - 95837.60 / 95837.60).

I have achieved the current output above by doing:

A = dollars.weeks %>%
  select(-c(retail_chain, retail_tag, store_name, store_tag)) %>%
  group_by(retailer_id, store_id) %>%
  arrange(retailer_id, store_id, desc(spins_week)) %>%
  mutate(id = substr(spins_week,5,8))

After grouping by retailer_id, store_id and then arranging by descending spins_week, I have shortened the spins_week values to the last 4 digits to make the id column.

I would like to match the top four occurring id values to their corresponding values down the column (keeping the retailer and store group as I would like to calculate per group), but I am not sure how to add onto my current code to do so. Any help would be great thanks!

CodePudding user response:

Based on the description, we may add 'id' as grouping variable and then calculate the 'percecnt_change' by dividing the deviation of dollars (from the max value of 'dollars') with the dollars column

library(dplyr)
A %>%
    group_by(id, .add = TRUE) %>% 
    mutate(percent_change = (max(dollars) - dollars)/dollars)

Or may be

A %>%
    group_by(id, .add = TRUE) %>% 
    mutate(percent_change = if(n() == 2) 
        (first(dollars) - last(dollars))/last(dollars) else NA)

-output

# A tibble: 200 × 8
# Groups:   retailer_id, store_id, id [123]
   week_id    retailer_id store_id dollars  fill spins_week id    percent_change
   <chr>            <int>    <int>   <dbl> <dbl> <chr>      <chr>          <dbl>
 1 2021100301           2      167 121818.     0 20211040   1040          0.271 
 2 2021092601           2      167 123837.     0 20211039   1039          0.309 
 3 2021091901           2      167 118670.     0 20211038   1038          0.200 
 4 2021091201           2      167 125060.     0 20211037   1037          0.467 
 5 2021082901           2      167 108753.     0 20210935   0935          0.163 
 6 2021082201           2      167 103086.     0 20210934   0934          0.180 
 7 2021081501           2      167  93883.     0 20210933   0933          0.0649
 8 2021080801           2      167  97233.     0 20210832   0832          0.117 
 9 2021080101           2      167 104718.     0 20210831   0831          0.576 
10 2021072501           2      167 106884.     0 20210830   0830          0.187 
# … with 190 more rows

Or as @Greg mentioned in the comments, if we want the first 4 values

A %>% 
   mutate(r_num = row_number()) %>%
   group_by(id, .add = TRUE) %>% 
   mutate(percent_change = if(n() == 2) 
        (first(dollars) - last(dollars))/last(dollars) else NA) %>% 
   filter(r_num <= 4)
  • Related