My test data:
df = structure(list(field_name = c("fruit1", "fruit2"), field_label = c("Yellow fruits",
"Green fruits"), value = c("1, Pineapple | 2, Mango | 3, Peach",
"1, Lemon | 2, Apple")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L))
I need to separate the value
column based on the pipe delimiter and create multiple lines (pivot_longer
) but considering that the each value has a number that i would like to append to the field_name
My desired output:
df_out=structure(list(field_name = c("fruit1_1", "fruit1_2", "fruit1_3",
"fruit2_1", "fruit2_2"), field_label = c("Yellow fruits", "Yellow fruits",
"Yellow fruits", "Green fruits", "Green fruits"), value = c("Pineapple",
"Mango", "Peach", "Lemon", "Apple")), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -5L))
CodePudding user response:
You can first separate rows using tidyr::separate_rows
only on the pipe and then use extract
to create two columns - number and fruit, then mutate
to add the number to the field_name
library(tidyr)
library(dplyr)
df %>%
separate_rows("value", sep = " \\| ") %>%
extract(value, into = c("first", "value"), regex = "(\\d ), (.*)") %>%
mutate(field_name = paste(field_name, first, sep = "_")) %>%
select(-first)
# A tibble: 5 x 3
# field_name field_label value
# <chr> <chr> <chr>
# fruit1_1 Yellow fruits Pineapple
# fruit1_2 Yellow fruits Mango
# fruit1_3 Yellow fruits Peach
# fruit2_1 Green fruits Lemon
# fruit2_2 Green fruits Apple
CodePudding user response:
Here is an alternative approach:
- Using
separate_rows
fromtidyr
package we could separatevalue
- Use lead to bring
value
on the same line - with
filter
we keep all even row numbers - paste for
field_name
- tweak with
select
library(dplyr)
library(tidyr)
df %>%
separate_rows(value) %>%
mutate(value1 = lead(value)) %>%
filter(row_number() %% 2 == 1) %>%
mutate(field_name = paste(field_name, value, sep = "_")) %>%
select(-value, value = value1)
field_name field_label value
<chr> <chr> <chr>
1 fruit1_1 Yellow fruits Pineapple
2 fruit1_2 Yellow fruits Mango
3 fruit1_3 Yellow fruits Peach
4 fruit2_1 Green fruits Lemon
5 fruit2_2 Green fruits Apple
CodePudding user response:
Using by
.
do.call(rbind, c(by(df, df$field_name, \(x) {
s <- do.call(rbind, strsplit(el(strsplit(x$value, " \\| ")), ", "))
r <- Reduce(\(x, y) paste0(x, "_", y), data.frame(x$field_name, s[, 1]))
setNames(cbind.data.frame(r, x$field_label, s[, 2]), names(x))
}), make.row.names=FALSE))
# field_name field_label value
# 1 fruit1_1 Yellow fruits Pineapple
# 2 fruit1_2 Yellow fruits Mango
# 3 fruit1_3 Yellow fruits Peach
# 4 fruit2_1 Green fruits Lemon
# 5 fruit2_2 Green fruits Apple
CodePudding user response:
Here is a tidyverse
way. Use str_extract_all
to get what are not numbers, unlist the result and finally, put the output values in the required formats.
df = structure(list(field_name = c("fruit1", "fruit2"),
field_label = c("Yellow fruits",
"Green fruits"),
value = c("1, Pineapple | 2, Mango | 3, Peach",
"1, Lemon | 2, Apple")),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -2L))
suppressPackageStartupMessages(library(dplyr))
library(tidyr)
df %>%
mutate(value = stringr::str_extract_all(value, pattern = "\\D ")) %>%
unnest(cols = value) %>%
group_by(field_label) %>%
mutate(field_name = paste(sub("\\d$", "", field_name), row_number(), sep = "_"),
value = gsub(", | \\| ", "", value))
#> # A tibble: 5 x 3
#> # Groups: field_label [2]
#> field_name field_label value
#> <chr> <chr> <chr>
#> 1 fruit_1 Yellow fruits Pineapple
#> 2 fruit_2 Yellow fruits Mango
#> 3 fruit_3 Yellow fruits Peach
#> 4 fruit_1 Green fruits Lemon
#> 5 fruit_2 Green fruits Apple
Created on 2022-03-28 by the reprex package (v2.0.1)