I have two dataframe like this
```
v1. v2
1 a,b,c 1,2,3
2 d,e,f,g 4,6
3 h,k,v,x 9,0
```
```
v1 v2
1 a AA
2 c CC
3 d DD
```
after combine
```
v1 v2 v3
1 a,b,c 1,2,3 AA,CC
2. d,e,f,g 4,6 DD
3 h,k,v,x 9,0
```
I dont know how to perform like this , any comment would be appreciated
CodePudding user response:
library(tidyverse)
library(fuzzyjoin)
df1 %>%
regex_left_join(., df2, by = c(v1 = "v1")) %>%
group_by(v1 = v1.x, v2 = v2.x) %>%
summarise(v3 = paste0(v2.y, collapse = ","))
# v1 v2 v3
# <chr> <chr> <chr>
# 1 a,b,c 1,2,3 AA,CC
# 2 d,e,f,g 4,6 DD
# 3 h,k,v,x 9,0 NA
Sample data used
df1 <- read.table(text = "v1 v2
a,b,c 1,2,3
d,e,f,g 4,6
h,k,v,x 9,0", header = TRUE)
df2 <- read.table(text = "v1 v2
a AA
c CC
d DD", header = TRUE)
CodePudding user response:
Here is a very long tidyverse
pipe. There should be simpler solutions.
library(dplyr)
library(tidyr)
df1 %>%
mutate(id = row_number()) %>%
separate(v1, into = c("v1a", "v1b", "v1c", "v1d"), fill = "right") %>%
pivot_longer(
cols = starts_with("v1"),
names_to = "v1_col",
values_to = "v1"
) %>%
na.omit() %>%
separate(v2, into = c("v2a", "v2b", "v2c"), fill = "right") %>%
pivot_longer(
cols = starts_with("v2"),
names_to = "v2_col",
values_to = "v2_value"
) %>%
na.omit() %>%
select(-ends_with("col")) %>%
left_join(df2, by = "v1") %>%
group_by(id, v1, v2) %>%
summarise(v2_value = paste(v2_value, collapse = ","),
.groups = "drop") %>%
group_by(id, v2_value) %>%
summarise(v1 = paste(v1, collapse = ","),
v3 = paste(na.omit(v2), collapse = ","),
.groups = "drop") %>%
ungroup() %>%
select(-id) %>%
rename(v2 = v2_value) %>%
relocate(v2, .after = v1)
## A tibble: 3 x 3
# v1 v2 v3
# <chr> <chr> <chr>
#1 a,b,c 1,2,3 "AA,CC"
#2 d,e,f,g 4,6 "DD"
#3 h,k,v,x 9,0 ""