Home > Net >  How to perform regression analysis by groups and get the estimated coefficients for each group separ
How to perform regression analysis by groups and get the estimated coefficients for each group separ

Time:11-28

i have such data (the data is given as an example, so both groups have the same values)

    dat=structure(list(sku = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), period = c("30.09.2021", 
    "14.03.2019", "01.04.2022", "18.02.2022", "07.07.2021", "09.10.2020", 
    "17.01.2019", "10.11.2020", "14.07.2021", "10.09.2019", "31.01.2019", 
    "01.07.2021", "30.09.2021", "14.03.2019", "01.04.2022", "18.02.2022", 
    "07.07.2021", "09.10.2020", "17.01.2019", "10.11.2020", "14.07.2021", 
    "10.09.2019", "31.01.2019", "01.07.2021"), hist.prices = c(3728.16, 
    34899.84, 6126, 1789.44, 18098.4, 15633.6, 26174.88, 2401.56, 
    12668.88, 239500.8, 26174.88, 5429.52, 3728.16, 34899.84, 6126, 
    1789.44, 18098.4, 15633.6, 26174.88, 2401.56, 12668.88, 239500.8, 
    26174.88, 5429.52), hist.revenue = c(178951.68, 20102307.84, 
    367560, 42946.56, 4343616, 3752064, 11307548.16, 86456.16, 2128371.84, 
    965667225.6, 11307548.16, 390925.44, 178951.68, 20102307.84, 
    367560, 42946.56, 4343616, 3752064, 11307548.16, 86456.16, 2128371.84, 
    965667225.6, 11307548.16, 390925.44), hist.demand = c(254L, 276L, 
    272L, 250L, 299L, 297L, 291L, 260L, 270L, 275L, 295L, 279L, 254L, 
    276L, 272L, 250L, 299L, 297L, 291L, 260L, 270L, 275L, 295L, 279L
    ), hist.cost = c(12572.6698, 10498.9848, 14949.392, 13160.5, 
    14557.9512, 12443.3199, 10692.3294, 10893.116, 13145.976, 10222.6025, 
    10982.9975, 13584.1752, 12572.6698, 10498.9848, 14949.392, 13160.5, 
    14557.9512, 12443.3199, 10692.3294, 10893.116, 13145.976, 10222.6025, 
    10982.9975, 13584.1752), unity.cost = c(49.4987, 38.0398, 54.961, 
    52.642, 48.6888, 41.8967, 36.7434, 41.8966, 48.6888, 37.1731, 
    37.2305, 48.6888, 49.4987, 38.0398, 54.961, 52.642, 48.6888, 
    41.8967, 36.7434, 41.8966, 48.6888, 37.1731, 37.2305, 48.6888
    ), hist.profit = c(1336L, 1592L, 1128L, 1882L, 1387L, 1818L, 
    1357L, 1087L, 1253L, 1009L, 1092L, 1804L, 1336L, 1592L, 1128L, 
    1882L, 1387L, 1818L, 1357L, 1087L, 1253L, 1009L, 1092L, 1804L
    )), class = "data.frame", row.names = c(NA, -24L))

I need to do a regression analysis and calculate the coefficients for each sku(group variable) separately. The demand function is the same for all sku. Then i perform regression:

    # example of linear demand curve (first equation) 
    demand = function(p, alpha = -40, beta = 500, sd = 10) {
      error = rnorm(length(p), sd = sd)
      q = p*alpha   beta   error
      return(q)
    }

in this example, this is only for one sku, but it is necessary for all that are available.

    library(stargazer)
    model.fit = lm(hist.demand ~ hist.prices)
    stargazer(model.fit, type = 'html', header = FALSE) # output
    # estimated parameters
    beta = model.fit$coefficients[1]
    alpha = model.fit$coefficients[2]  
    p.revenue = -beta/(2*alpha) # estimated price for revenue
    p.profit = (alpha*unity.cost - beta)/(2*alpha) # estimated price for profit
    
    true.revenue = function(p) p*(-40*p   500) # Revenue with true parameters (chunck demand)
    true.profit = function(p) (p - unity.cost)*(-40*p   500) # price with true parameters
    # estimated curves
    estimated.revenue = function(p) p*(model.fit$coefficients[2]*p   model.fit$coefficients[1])
    estimated.profit = function(p) (p - unity.cost)*(model.fit$coefficients[2]*p   model.fit$coefficients[1])
    opt.revenue = true.revenue(p.revenue) # Revenue with estimated optimum price
    opt.profit = true.profit(p.profit) # Profit with estimated optimum price

how to execute this code for all sku separately, so that the desired output is something like this

    sku opt.profit  opt.revenue
    1   722.0413    1562.041
    2   722.0413    1562.041

thanks for any of your valuable help

CodePudding user response:

We could get the estimates from lm by group in this way:

library(tidyverse)
library(broom)
dat %>%
  group_split(sku) %>% 
  map_dfr(.f = function(df){
    lm(hist.demand ~ hist.prices, data = df) %>% 
      tidy() %>% 
      add_column(group = unique(df$sku), .before=1)
  })
  group term           estimate std.error statistic  p.value
  <int> <chr>             <dbl>     <dbl>     <dbl>    <dbl>
1     1 (Intercept) 276.        5.64         48.9   3.09e-13
2     1 hist.prices   0.0000198 0.0000793     0.249 8.08e- 1
3     2 (Intercept) 276.        5.64         48.9   3.09e-13
4     2 hist.prices   0.0000198 0.0000793     0.249 8.08e- 1

CodePudding user response:

If we want to do a group by approach, an option is to nest and then either loop over the nested data with map or use nest_ functions from nplyr

library(dplyr)
library(nplyr)
library(tidyr)
dat %>% 
  nest(data = -sku) %>% 
  nest_summarise(data, 
   model.fit = list(lm(hist.demand ~ hist.prices)), 
   beta = model.fit[[1]]$coefficients[1], 
   alpha = model.fit[[1]]$coefficients[2],
   p.revenue = -beta/(2*alpha),
   p.profit = (alpha*unity.cost - beta)/(2*alpha),
   opt.revenue = true.revenue(p.revenue), 
   opt.profit = true.profit(p.profit)) %>% 
  nest_select(data, opt.revenue, opt.profit) %>%
  unnest(data)
  • Related