I'm looking to do something that's out of my coding league. I have included 2 snippets of real data below.
Basically I want to mutate/add a column (or even create a new dataframe if that is easier) that calculates the % difference of a measurement "median.fsc" that corresponds to the same "strain" but different "starvation state" but within the same experiment "run".
So for example
- I want the % difference of median.fsc of 381-starved / 381-enriched for run 1
I presume i would be using dplyr but have no clue where to start!
Some further manipulation that I also want to do - but guessing I can adapt whatever code I do end up using.
- Strain-comparison (median.fsc) expressed as % ie 381.dwecA (enriched) / 381 (enriched) in run 1
structure(list(strain = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 10L), .Label = c("381", "381.dwecA", "381.dwecA.cwecA", "381.dwecE",
"381.dwecE.cwecE", "381.dwaaG", "381.dwaaG.cwaaG", "381.dE.dG",
"381.dE.dG.cE.cG.c1", "381.dE.dG.cE.cG.c2"), class = "factor"),
starvation = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("starved", "enriched"), class = "factor"),
live.dead = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("live", "mixed"), class = "factor"), geomean.fsc = c(9283.6,
8012.8, 8030.1, 9464.1, 8286, 7903.6, 10600.7, 8539.2, 8935.1
), mean.fsc = c(11002.9, 9551.4, 9606.6, 10747.4, 9824, 9277,
11874.9, 9645.4, 10537.6), median.fsc = c(10581.7, 8420.3,
8393.4, 10050.1, 8771.6, 7979.3, 11526, 8371.3, 9701.5),
std.dv = c(5808.4, 5482.6, 5533, 5264, 5481.1, 5250, 5202.3,
4816.8, 5654.1), run = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor")), row.names = c(NA,
-9L), class = c("tbl_df", "tbl", "data.frame"))
structure(list(strain = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 10L), .Label = c("381", "381.dwecA", "381.dwecA.cwecA", "381.dwecE",
"381.dwecE.cwecE", "381.dwaaG", "381.dwaaG.cwaaG", "381.dE.dG",
"381.dE.dG.cE.cG.c1", "381.dE.dG.cE.cG.c2"), class = "factor"),
starvation = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("starved", "enriched"), class = "factor"),
live.dead = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("live", "mixed"), class = "factor"), geomean.fsc = c(2123.8,
2426.4, 2056.6, 2556.5, 2452, 2703.3, 2065, 3875, 2362.6),
mean.fsc = c(2482.7, 2867, 2321, 2757.6, 2651.4, 3107.2,
2368.9, 4308.2, 2571.1), median.fsc = c(1924.9, 2177, 1896,
2512.2, 2378.8, 2442.8, 1883.6, 3882.8, 2263.4), std.dv = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), run = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor")), row.names = c(NA,
-9L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
Let's assume your datasets are called:df_starved
and df_enriched
library(dplyr)
df_new <- tibble(strain = df_starved$strain) %>% mutate("diff_median" = df_starved$median.fsc/df_enriched$median.fsc*100,
"strain_comp_enriched" = df_enriched$median.fsc/df_enriched$median.fsc[1]*100)
df_new
CodePudding user response:
Would you consider joining the two tables on matching columns and then deploying calculations within the obtained structure?
library("tidyverse")
full_join(
mutate(df_A, strain = as.character(strain), run = as.integer(run)),
mutate(df_B, strain = as.character(strain), run = as.integer(run)),
by = c("strain" = "strain", "run" = "run"),
suffix = c ("_dtaA", "_dtaB")
) %>%
glimpse() %>%
# Ensure that looking at unmatched starvation rates
filter(starvation_dtaA != starvation_dtaB) %>%
mutate(any_calcs = median.fsc_dtaA - median.fsc_dtaB) %>%
select(starts_with("strain"), starts_with("run"), contains("any_calcs"),
starts_with("starvation")) %>%
glimpse()
Results
# Rows: 9
# Columns: 5
# $ strain <chr> "381", "381.dwecA", "381.dwecA.cwecA", "381.dwecE", "381.dwecE.cwecE", "381.dwaa…
# $ run <int> 1, 1, 1, 1, 1, 1, 1, 1, 1
# $ any_calcs <dbl> 8656.8, 6243.3, 6497.4, 7537.9, 6392.8, 5536.5, 9642.4, 4488.5, 7438.1
# $ starvation_dtaA <fct> enriched, enriched, enriched, enriched, enriched, enriched, enriched, enriched, …
# $ starvation_dtaB <fct> starved, starved, starved, starved, starved, starved, starved, starved, starved