Home > Blockchain >  Reduce repetition within my case_when and recode statements?
Reduce repetition within my case_when and recode statements?

Time:11-30

Data

pcadata <- structure(list(sample = c("1-1", "1-2", "1-3", "2-1", "2-2", 
"2-3", "3-1", "3-2", "3-3", "4-1", "4-2", "4-3", "1-1", "1-2", 
"1-3", "2-1", "2-2", "3-2", "4-1", "4-2", "4-3"), compound = c("Linalool", 
"Linalool", "Linalool", "Linalool", "Linalool", "Linalool", "Linalool", 
"Linalool", "Linalool", "Linalool", "Linalool", "Linalool", "Acetic Acid", 
"Acetic Acid", "Acetic Acid", "Acetic Acid", "Acetic Acid", "Acetic Acid", 
"Acetic Acid", "Acetic Acid", "Acetic Acid"), conc = c(82855, 
74398, 59563, 117635, 118724, 75271, 95219, 50870, 67546, 58063, 
86610, 88594, 263774, 99287, 79800, 529503, 666771, 117253, 101193, 
65006, 221687), code = c("1", "1", "1", "2", "2", "2", "3", "3", 
"3", "4", "4", "4", "1", "1", "1", "2", "2", "3", "4", "4", "4"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-21L), groups = structure(list(sample = c("1-1", "1-1", "1-2", 
"1-2", "1-3", "1-3", "2-1", "2-1", "2-2", "2-2", "2-3", "3-1", 
"3-2", "3-2", "3-3", "4-1", "4-1", "4-2", "4-2", "4-3", "4-3"
), compound = c("Acetic Acid", "Linalool", "Acetic Acid", "Linalool", 
"Acetic Acid", "Linalool", "Acetic Acid", "Linalool", "Acetic Acid", 
"Linalool", "Linalool", "Linalool", "Acetic Acid", "Linalool", 
"Linalool", "Acetic Acid", "Linalool", "Acetic Acid", "Linalool", 
"Acetic Acid", "Linalool"), .rows = structure(list(13L, 1L, 14L, 
    2L, 15L, 3L, 16L, 4L, 17L, 5L, 6L, 7L, 18L, 8L, 9L, 19L, 
    10L, 20L, 11L, 21L, 12L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -21L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

Code

pacman::p_load(tidyverse)
codes_vector <- c("code1", "code2", "code3", "code4", "code5")
colors_vector <- c("#1B9E77","#D95F02","#7570B3","#E7298A","#66A61E","#E6AB02","#A6761D", "#666666")

analysis1 <- pcadata %>% 
  filter(code %in% c(1, 2)) %>% 
  arrange(code, 4) %>%
  group_by(sample, compound) %>% 
  pivot_wider(names_from = compound,
              values_from = conc,
              values_fill = 0) %>% 
  ungroup() %>% 
  column_to_rownames(var = "sample") %>% 
  mutate(code = recode(code,
                       `1` = codes_vector[1],
                       `2` = codes_vector[2],
                       `3` = codes_vector[3],
                       `4` = codes_vector[4],
                       `5` = codes_vector[5])) %>% 
  mutate(color = case_when(code == codes_vector[1] ~ "#1B9E77",
                           code == codes_vector[2] ~ "#D95F02",
                           code == codes_vector[3] ~ "#7570B3",
                           code == codes_vector[4] ~ "#E7298A",
                           code == codes_vector[5] ~ "#66A61E",
                           code == codes_vector[6] ~ "#E6AB02",
                           code == codes_vector[7] ~ "#A6761D",
                           code == codes_vector[8] ~ "#666666")) %>% 
  mutate(color=as.factor(color)) %>% 
  relocate(color, .after = code)

Question Is there a way for me to replicate this case_when() work without so much copy and pasting? How can I iterate these operations more concisely or programatically? With all the sequencing I expect there is a way.

Something like this pseudo code:

mutate(code = recode(code[i] = codes_vector[i]))

and

mutate(color = case_when(code == codes(vector[i] ~ colors_vector[i])))

Desired output

structure(list(code = c("hybrid", "hybrid", "hybrid", "plant based", 
"plant based", "plant based"), color = structure(c(1L, 1L, 1L, 
2L, 2L, 2L), levels = c("#1B9E77", "#D95F02"), class = "factor"), 
    Linalool = c(82855, 74398, 59563, 117635, 118724, 75271), 
    Nonanal = c(45433, 27520, 28883, 0, 0, 52454), `Acetic Acid` = c(263774, 
    99287, 79800, 529503, 666771, 0)), row.names = c("1-1", "1-2", 
"1-3", "2-1", "2-2", "2-3"), class = "data.frame")

CodePudding user response:

We can simplify by converting the code to integer column and use as index for replacing values from 'codes_vector', then create the color by matching the 'code' column with named vector from colors_vector and codes_vector

library(dplyr)
library(tidyr)
pcadata %>%
   ungroup %>%
   mutate(code = codes_vector[as.integer(code)],
   color = setNames(colors_vector, codes_vector)[code])%>% 
   pivot_wider(names_from = compound, values_from = conc, values_fill = 0)

-output

# A tibble: 12 × 5
   sample code  color   Linalool `Acetic Acid`
   <chr>  <chr> <chr>      <dbl>         <dbl>
 1 1-1    code1 #1B9E77    82855        263774
 2 1-2    code1 #1B9E77    74398         99287
 3 1-3    code1 #1B9E77    59563         79800
 4 2-1    code2 #D95F02   117635        529503
 5 2-2    code2 #D95F02   118724        666771
 6 2-3    code2 #D95F02    75271             0
 7 3-1    code3 #7570B3    95219             0
 8 3-2    code3 #7570B3    50870        117253
 9 3-3    code3 #7570B3    67546             0
10 4-1    code4 #E7298A    58063        101193
11 4-2    code4 #E7298A    86610         65006
12 4-3    code4 #E7298A    88594        221687

CodePudding user response:

In general, we can use the function match

pcadata %>% 
  filter(code %in% c(1, 2)) %>% 
  arrange(code, 4) %>%
  group_by(sample, compound) %>% 
  pivot_wider(names_from = compound,
              values_from = conc,
              values_fill = 0) %>% 
  ungroup() %>% 
  column_to_rownames(var = "sample") %>% 
  mutate(code = codes_vector[match(code, 1:5)]) %>% 
  mutate(color = colors_vector[match(code, codes_vector)]) %>% 
  mutate(color=as.factor(color)) %>% 
  relocate(color, .after = code)
#>      code   color Linalool Acetic Acid
#> 1-1 code1 #1B9E77    82855      263774
#> 1-2 code1 #1B9E77    74398       99287
#> 1-3 code1 #1B9E77    59563       79800
#> 2-1 code2 #D95F02   117635      529503
#> 2-2 code2 #D95F02   118724      666771
#> 2-3 code2 #D95F02    75271           0
  • Related