How can I check if the sum of some brands is smaller than the total of that brand and return those w-CodePudding

I have a data like this and I need to sum the Value of the "Brand" rows and check if it is smaller than the "Total" rows Value for each Sub_Category and to return a table with the results and if it's easier only with those who fail the check, something like this , in R.

So far I've started with this:

result<- with(df, sum(df$Value[df$Brand_Type == 'Brand']))

but it only returns the sum for all of them so I've also tried a group_by but I could not get anywhere close to the wanted result so I really need some help here.

CodePudding user response：

With dplyr you can do this

library(dplyr)

df %>% 
  group_by(Sub_Category, Brand_Type) %>% 
  mutate(Sum = sum(Value)) %>% 
  group_by(Sub_Category) %>% 
  mutate(Total = Value[Brand_Type == "Total"], Change = Total - Sum, Value = NULL) %>% 
  filter(Sum > lag(Sum)) %>% 
  ungroup()
# A tibble: 2 × 6
  Category Sub_Category Brand_Type   Sum Total Change
  <chr>    <chr>        <chr>      <dbl> <dbl>  <dbl>
1 Puma     Puma         Brand         23    19     -4
2 Nike     Nike Air     Brand         18    16     -2

With base R

df_sum <- aggregate(Value ~ ., df, sum)

df_t <- cbind(df_sum, Total = df_sum$Value[df_sum$Brand_Type == "Total"])

cbind(df_t[df_t$Value > df_t$Total,], 
  Change = df_t[df_t$Value > df_t$Total,"Total"] - 
             df_t[df_t $Value > df_t$Total,"Value"])
  Category Sub_Category Brand_Type Value Total Change
3     Nike     Nike Air      Brand    18    16     -2
4     Puma         Puma      Brand    23    19     -4

Data

df <- structure(list(Category = c("Adidas", "Adidas", "Adidas", "Puma", 
"Puma", "Puma", "Nike", "Nike", "Nike", "Nike", "Nike", "Nike"
), Sub_Category = c("Adidas", "Adidas", "Adidas", "Puma", "Puma", 
"Puma", "Nike", "Nike", "Nike", "Nike Air", "Nike Air", "Nike Air"
), Brand_Type = c("Total", "Brand", "Brand", "Total", "Brand", 
"Brand", "Total", "Brand", "Brand", "Total", "Brand", "Brand"
), Value = c(12, 3, 4, 19, 12, 11, 25.8, 11, 5, 16, 9, 9)), class = "data.frame", row.names = c(NA, 
-12L))

Edit

mod Data

df
   Category Sub_Category   Brand_Type Value
1    Adidas       Adidas        Total  12.0
2    Adidas       Adidas        Brand   3.0
3    Adidas       Adidas        Brand   4.0
4      Puma         Puma        Total  19.0
5      Puma         Puma Manufacturer  12.0
6      Puma         Puma Manufacturer  11.0
7      Nike         Nike        Total  25.8
8      Nike         Nike        Brand  11.0
9      Nike         Nike        Brand   5.0
10     Nike     Nike Air        Total  16.0
11     Nike     Nike Air        Brand   9.0
12     Nike     Nike Air        Brand   9.0

# ----------

df %>% 
  group_by(Sub_Category, Brand_Type) %>% 
  mutate(Sum = sum(Value)) %>% 
  group_by(Sub_Category) %>% 
  mutate(Total = Value[Brand_Type == "Total"], Change = Total - Sum, Value = NULL) %>% 
  filter(Sum > lag(Sum)) %>% 
  ungroup()
# A tibble: 2 × 6
  Category Sub_Category Brand_Type     Sum Total Change
  <chr>    <chr>        <chr>        <dbl> <dbl>  <dbl>
1 Puma     Puma         Manufacturer    23    19     -4
2 Nike     Nike Air     Brand           18    16     -2

# ----------

df_sum <- aggregate(Value ~ ., df, sum)
 
df_t <- cbind(df_sum, Total = df_sum$Value[df_sum$Brand_Type == "Total"])

cbind(df_t[df_t$Value > df_t$Total,], 
  Change = df_t[df_t$Value > df_t$Total,"Total"] - 
             df_t[df_t $Value > df_t$Total,"Value"])
  Category Sub_Category   Brand_Type Value Total Change
3     Nike     Nike Air        Brand    18    16     -2
4     Puma         Puma Manufacturer    23    19     -4