Finding percentage difference using dplyr when there are multiple groups-CodePudding

Here's the data

structure(list(name = c("A", "A", "A", "A", "A", "A", "B", "B", 
"B", "B", "B", "B", "C", "C", "C", "C", "C", "C"), value = c(1.19836644827586, 
2.46856144477028, 0.856237188191882, 0.778078289325843, 0.59811550273224, 
0.787017526104418, 0.473075959100205, 1.11257028100264, 3.53950002293968, 
3.25319619936034, 0.514323313099042, 0.58826350129199, 3.38210735688006, 
3.78537735596708, 0.917653452784504, 0.753012044982699, 6.84112906311637, 
6.27268644079398), exp = c("control", "experiment", "control", 
"experiment", "control", "experiment", "control", "experiment", 
"control", "experiment", "control", "experiment", "control", 
"experiment", "control", "experiment", "control", "experiment"
), type = c("typeA", "typeA", "typeB", "typeB", "typeC", "typeC", 
"typeA", "typeA", "typeB", "typeB", "typeC", "typeC", "typeA", 
"typeA", "typeB", "typeB", "typeC", "typeC")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -18L))

For each type and for each name I want to find the percentage difference of experiment over control.

Expected output:

  name  type  percentage_change
  <chr> <chr>             <dbl>
1 A     typeA            106.  
2 A     typeB             -9.13
3 A     typeC             31.6 
4 B     typeA            135.  
5 B     typeB             -8.09
6 B     typeC             14.4 
7 C     typeA             11.9 
8 C     typeB            -17.9 
9 C     typeC             -8.31

I have tried many things, but I couldn't able to wrap my head around this.

Thank you.

CodePudding user response：

try this

library(dplyr)

df |> group_by(name , type) |> 
      summarise(percentage_change = (value[2] - value[1]) / value[1] * 100)

output

# A tibble: 9 × 3
# Groups:   name [3]
  name  type  percentage_change
  <chr> <chr>             <dbl>
1 A     typeA            106.  
2 A     typeB             -9.13
3 A     typeC             31.6 
4 B     typeA            135.  
5 B     typeB             -8.09
6 B     typeC             14.4 
7 C     typeA             11.9 
8 C     typeB            -17.9 
9 C     typeC             -8.31

CodePudding user response：

With dplyr you can do

dd %>% 
  group_by(name, type) %>% 
  summarize(pct_change=(value[exp=="experiment"]-value[exp=="control"])/value[exp=="control"]*100)

of if you wanted to clean it up a bit

dd %>% 
  group_by(name, type) %>% 
  summarize(
    experiment = value[exp=="experiment"], 
    control = value[exp=="control"], .groups="keep") %>% 
  transmute(pct_change = (experiment-control)/control*100)

CodePudding user response：

You could pivot_wider to make the calculation simpler:

library(tidyr)
library(dplyr)
df %>% 
    pivot_wider(names_from="exp") %>% 
    group_by(name, type) %>% 
    summarise(percentage_change = ((experiment - control) / control) * 100)

# A tibble: 9 × 3
# Groups:   name [3]
  name  type  percentage_change
  <chr> <chr>             <dbl>
1 A     typeA            106.  
2 A     typeB             -9.13
3 A     typeC             31.6 
4 B     typeA            135.  
5 B     typeB             -8.09
6 B     typeC             14.4 
7 C     typeA             11.9 
8 C     typeB            -17.9 
9 C     typeC             -8.31

CodePudding user response：

With dplyr:

df %>% 
  pivot_wider(names_from = exp, values_from = value) %>% 
  mutate(percentage_change = (experiment/control - 1) * 100)

# A tibble: 9 × 5
  name  type  control experiment percentage_change
  <chr> <chr>   <dbl>      <dbl>             <dbl>
1 A     typeA   1.20       2.47             106.  
2 A     typeB   0.856      0.778             -9.13
3 A     typeC   0.598      0.787             31.6 
4 B     typeA   0.473      1.11             135.  
5 B     typeB   3.54       3.25              -8.09
6 B     typeC   0.514      0.588             14.4 
7 C     typeA   3.38       3.79              11.9 
8 C     typeB   0.918      0.753            -17.9 
9 C     typeC   6.84       6.27              -8.31

CodePudding user response：

df %>%
  group_by(type, name) %>%
  summarise(percentage_change = nth(value,2)/nth(value,1)*100 - 100)
# A tibble: 9 × 3
# Groups:   type [3]
  type  name  percentage_change
  <chr> <chr>             <dbl>
1 typeA A                106.  
2 typeA B                135.  
3 typeA C                 11.9 
4 typeB A                 -9.13
5 typeB B                 -8.09
6 typeB C                -17.9 
7 typeC A                 31.6 
8 typeC B                 14.4 
9 typeC C                 -8.31