Here's the data
structure(list(name = c("A", "A", "A", "A", "A", "A", "B", "B",
"B", "B", "B", "B", "C", "C", "C", "C", "C", "C"), value = c(1.19836644827586,
2.46856144477028, 0.856237188191882, 0.778078289325843, 0.59811550273224,
0.787017526104418, 0.473075959100205, 1.11257028100264, 3.53950002293968,
3.25319619936034, 0.514323313099042, 0.58826350129199, 3.38210735688006,
3.78537735596708, 0.917653452784504, 0.753012044982699, 6.84112906311637,
6.27268644079398), exp = c("control", "experiment", "control",
"experiment", "control", "experiment", "control", "experiment",
"control", "experiment", "control", "experiment", "control",
"experiment", "control", "experiment", "control", "experiment"
), type = c("typeA", "typeA", "typeB", "typeB", "typeC", "typeC",
"typeA", "typeA", "typeB", "typeB", "typeC", "typeC", "typeA",
"typeA", "typeB", "typeB", "typeC", "typeC")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -18L))
For each type and for each name I want to find the percentage difference of experiment over control.
Expected output:
name type percentage_change
<chr> <chr> <dbl>
1 A typeA 106.
2 A typeB -9.13
3 A typeC 31.6
4 B typeA 135.
5 B typeB -8.09
6 B typeC 14.4
7 C typeA 11.9
8 C typeB -17.9
9 C typeC -8.31
I have tried many things, but I couldn't able to wrap my head around this.
Thank you.
CodePudding user response:
- try this
library(dplyr)
df |> group_by(name , type) |>
summarise(percentage_change = (value[2] - value[1]) / value[1] * 100)
- output
# A tibble: 9 × 3
# Groups: name [3]
name type percentage_change
<chr> <chr> <dbl>
1 A typeA 106.
2 A typeB -9.13
3 A typeC 31.6
4 B typeA 135.
5 B typeB -8.09
6 B typeC 14.4
7 C typeA 11.9
8 C typeB -17.9
9 C typeC -8.31
CodePudding user response:
With dplyr
you can do
dd %>%
group_by(name, type) %>%
summarize(pct_change=(value[exp=="experiment"]-value[exp=="control"])/value[exp=="control"]*100)
of if you wanted to clean it up a bit
dd %>%
group_by(name, type) %>%
summarize(
experiment = value[exp=="experiment"],
control = value[exp=="control"], .groups="keep") %>%
transmute(pct_change = (experiment-control)/control*100)
CodePudding user response:
You could pivot_wider
to make the calculation simpler:
library(tidyr)
library(dplyr)
df %>%
pivot_wider(names_from="exp") %>%
group_by(name, type) %>%
summarise(percentage_change = ((experiment - control) / control) * 100)
# A tibble: 9 × 3
# Groups: name [3]
name type percentage_change
<chr> <chr> <dbl>
1 A typeA 106.
2 A typeB -9.13
3 A typeC 31.6
4 B typeA 135.
5 B typeB -8.09
6 B typeC 14.4
7 C typeA 11.9
8 C typeB -17.9
9 C typeC -8.31
CodePudding user response:
With dplyr:
df %>%
pivot_wider(names_from = exp, values_from = value) %>%
mutate(percentage_change = (experiment/control - 1) * 100)
# A tibble: 9 × 5
name type control experiment percentage_change
<chr> <chr> <dbl> <dbl> <dbl>
1 A typeA 1.20 2.47 106.
2 A typeB 0.856 0.778 -9.13
3 A typeC 0.598 0.787 31.6
4 B typeA 0.473 1.11 135.
5 B typeB 3.54 3.25 -8.09
6 B typeC 0.514 0.588 14.4
7 C typeA 3.38 3.79 11.9
8 C typeB 0.918 0.753 -17.9
9 C typeC 6.84 6.27 -8.31
CodePudding user response:
df %>%
group_by(type, name) %>%
summarise(percentage_change = nth(value,2)/nth(value,1)*100 - 100)
# A tibble: 9 × 3
# Groups: type [3]
type name percentage_change
<chr> <chr> <dbl>
1 typeA A 106.
2 typeA B 135.
3 typeA C 11.9
4 typeB A -9.13
5 typeB B -8.09
6 typeB C -17.9
7 typeC A 31.6
8 typeC B 14.4
9 typeC C -8.31