I have the following data frame:
`1_X94` <- c("apple", "lemon", "orange")
`2_X94` <- c("apple", "strawberry", "lemon")
`1_X09` <- c(1, 2, 3)
`2_X09` <- c(4, 5, 6)
`1_X38` <- c("red", "yellow", "orange")
`2_X38` <- c("red", "red", "yellow")
df <- data.frame(`1_X94`, `2_X94`, `1_X09`, `2_X09`, `1_X38`, `2_X38`)
And I have a second data frame:
fruit <- c("apple", "watermelon")
fruit_list <- data.frame(fruit)
What I would like to accomplish is, whenever there is a column name with the regex pattern of ^\d _X94?
with a value that matches the fruit_list
data frame, it replaces the column name with the regex pattern of ^\d _X38
with the word "green."
I currently have the following code, but I want to add some of the automated aspects so I don't have to list all the fruits in the str_detect()
and create multiple mutate
commands for X1, X2, etc.
library(tidyverse)
library(stringr)
df <- df %>%
mutate(
X1_X38 = case_when(
str_detect(X1_X94, "apple|watermelon") ~ "green",
TRUE ~ .$X1_X38
)
) %>%
mutate(
X2_X38 = case_when(
str_detect(X2_X94, "apple|watermelon") ~ "green",
TRUE ~ .$X2_X38
)
)
Any guidance would be appreciated.
CodePudding user response:
This does not feel like the most efficient way, but here is an option:
library(tidyverse)
df|>
mutate(row = row_number()) |>
pivot_longer(names_pattern = "(X\\d)_(X\\d )",
names_to = c("X1", "X2"),
values_transform = as.character,
cols = -row)|>
pivot_wider(names_from = X2, values_from = value) |>
mutate(X38 = ifelse(X94 %in% fruit_list$fruit, "green", X38)) |>
pivot_longer(c(X38,X09, X94)) |>
pivot_wider(names_from = c(X1, name),
names_glue = "{X1}_{name}",
values_from = value)
#> # A tibble: 3 x 7
#> row X1_X38 X1_X09 X1_X94 X2_X38 X2_X09 X2_X94
#> <int> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 1 green 1 apple green 4 apple
#> 2 2 yellow 2 lemon red 5 strawberry
#> 3 3 orange 3 orange yellow 6 lemon
EDIT
This feels a little cleaner:
library(tidyverse)
#helper
col_split <- function(dat){
list(
dat[,grepl("X1_", colnames(dat))],
dat[,grepl("X2_", colnames(dat))]
)
}
df |>
col_split() |>
map_dfc(\(x) mutate(x, across(ends_with("X38"),
\(y) ifelse(x[,grepl("X94", colnames(x))] %in% fruit_list$fruit,
"green", y))))
#> X1_X94 X1_X09 X1_X38 X2_X94 X2_X09 X2_X38
#> 1 apple 1 green apple 4 green
#> 2 lemon 2 yellow strawberry 5 red
#> 3 orange 3 orange lemon 6 yellow
CodePudding user response:
We can use across
library(dplyr)
library(stringr)
df %>%
mutate(across(ends_with('_X38'),
~ case_when(get(str_replace(cur_column(), "_X38$", "_X94")) %in%
fruit ~ "green", TRUE ~ .x)))
-output
X1_X94 X2_X94 X1_X09 X2_X09 X1_X38 X2_X38
1 apple apple 1 4 green green
2 lemon strawberry 2 5 yellow red
3 orange lemon 3 6 orange yellow