Data
pcadata <- structure(list(sample = c("1-1", "1-2", "1-3", "2-1", "2-2",
"2-3", "3-1", "3-2", "3-3", "4-1", "4-2", "4-3", "1-1", "1-2",
"1-3", "2-1", "2-2", "3-2", "4-1", "4-2", "4-3"), compound = c("Linalool",
"Linalool", "Linalool", "Linalool", "Linalool", "Linalool", "Linalool",
"Linalool", "Linalool", "Linalool", "Linalool", "Linalool", "Acetic Acid",
"Acetic Acid", "Acetic Acid", "Acetic Acid", "Acetic Acid", "Acetic Acid",
"Acetic Acid", "Acetic Acid", "Acetic Acid"), conc = c(82855,
74398, 59563, 117635, 118724, 75271, 95219, 50870, 67546, 58063,
86610, 88594, 263774, 99287, 79800, 529503, 666771, 117253, 101193,
65006, 221687), code = c("1", "1", "1", "2", "2", "2", "3", "3",
"3", "4", "4", "4", "1", "1", "1", "2", "2", "3", "4", "4", "4"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-21L), groups = structure(list(sample = c("1-1", "1-1", "1-2",
"1-2", "1-3", "1-3", "2-1", "2-1", "2-2", "2-2", "2-3", "3-1",
"3-2", "3-2", "3-3", "4-1", "4-1", "4-2", "4-2", "4-3", "4-3"
), compound = c("Acetic Acid", "Linalool", "Acetic Acid", "Linalool",
"Acetic Acid", "Linalool", "Acetic Acid", "Linalool", "Acetic Acid",
"Linalool", "Linalool", "Linalool", "Acetic Acid", "Linalool",
"Linalool", "Acetic Acid", "Linalool", "Acetic Acid", "Linalool",
"Acetic Acid", "Linalool"), .rows = structure(list(13L, 1L, 14L,
2L, 15L, 3L, 16L, 4L, 17L, 5L, 6L, 7L, 18L, 8L, 9L, 19L,
10L, 20L, 11L, 21L, 12L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -21L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
Code
pacman::p_load(tidyverse)
codes_vector <- c("code1", "code2", "code3", "code4", "code5")
colors_vector <- c("#1B9E77","#D95F02","#7570B3","#E7298A","#66A61E","#E6AB02","#A6761D", "#666666")
analysis1 <- pcadata %>%
filter(code %in% c(1, 2)) %>%
arrange(code, 4) %>%
group_by(sample, compound) %>%
pivot_wider(names_from = compound,
values_from = conc,
values_fill = 0) %>%
ungroup() %>%
column_to_rownames(var = "sample") %>%
mutate(code = recode(code,
`1` = codes_vector[1],
`2` = codes_vector[2],
`3` = codes_vector[3],
`4` = codes_vector[4],
`5` = codes_vector[5])) %>%
mutate(color = case_when(code == codes_vector[1] ~ "#1B9E77",
code == codes_vector[2] ~ "#D95F02",
code == codes_vector[3] ~ "#7570B3",
code == codes_vector[4] ~ "#E7298A",
code == codes_vector[5] ~ "#66A61E",
code == codes_vector[6] ~ "#E6AB02",
code == codes_vector[7] ~ "#A6761D",
code == codes_vector[8] ~ "#666666")) %>%
mutate(color=as.factor(color)) %>%
relocate(color, .after = code)
Question
Is there a way for me to replicate this case_when()
work without so much copy and pasting? How can I iterate these operations more concisely or programatically? With all the sequencing I expect there is a way.
Something like this pseudo code:
mutate(code = recode(code[i] = codes_vector[i]))
and
mutate(color = case_when(code == codes(vector[i] ~ colors_vector[i])))
Desired output
structure(list(code = c("hybrid", "hybrid", "hybrid", "plant based",
"plant based", "plant based"), color = structure(c(1L, 1L, 1L,
2L, 2L, 2L), levels = c("#1B9E77", "#D95F02"), class = "factor"),
Linalool = c(82855, 74398, 59563, 117635, 118724, 75271),
Nonanal = c(45433, 27520, 28883, 0, 0, 52454), `Acetic Acid` = c(263774,
99287, 79800, 529503, 666771, 0)), row.names = c("1-1", "1-2",
"1-3", "2-1", "2-2", "2-3"), class = "data.frame")
CodePudding user response:
We can simplify by converting the code
to integer
column and use as index for replacing values from 'codes_vector', then create the color
by matching the 'code' column with named vector from colors_vector and codes_vector
library(dplyr)
library(tidyr)
pcadata %>%
ungroup %>%
mutate(code = codes_vector[as.integer(code)],
color = setNames(colors_vector, codes_vector)[code])%>%
pivot_wider(names_from = compound, values_from = conc, values_fill = 0)
-output
# A tibble: 12 × 5
sample code color Linalool `Acetic Acid`
<chr> <chr> <chr> <dbl> <dbl>
1 1-1 code1 #1B9E77 82855 263774
2 1-2 code1 #1B9E77 74398 99287
3 1-3 code1 #1B9E77 59563 79800
4 2-1 code2 #D95F02 117635 529503
5 2-2 code2 #D95F02 118724 666771
6 2-3 code2 #D95F02 75271 0
7 3-1 code3 #7570B3 95219 0
8 3-2 code3 #7570B3 50870 117253
9 3-3 code3 #7570B3 67546 0
10 4-1 code4 #E7298A 58063 101193
11 4-2 code4 #E7298A 86610 65006
12 4-3 code4 #E7298A 88594 221687
CodePudding user response:
In general, we can use the function match
pcadata %>%
filter(code %in% c(1, 2)) %>%
arrange(code, 4) %>%
group_by(sample, compound) %>%
pivot_wider(names_from = compound,
values_from = conc,
values_fill = 0) %>%
ungroup() %>%
column_to_rownames(var = "sample") %>%
mutate(code = codes_vector[match(code, 1:5)]) %>%
mutate(color = colors_vector[match(code, codes_vector)]) %>%
mutate(color=as.factor(color)) %>%
relocate(color, .after = code)
#> code color Linalool Acetic Acid
#> 1-1 code1 #1B9E77 82855 263774
#> 1-2 code1 #1B9E77 74398 99287
#> 1-3 code1 #1B9E77 59563 79800
#> 2-1 code2 #D95F02 117635 529503
#> 2-2 code2 #D95F02 118724 666771
#> 2-3 code2 #D95F02 75271 0