I have a dataset(df1) that looks like this
ID |New York Athletes
Base001 Aaron Judge
Bask001 Kevin Durant
Bask002 Julius Randle
Base002 Max Scherzer
I want to merge in two other datasets at the same time while not adding extra columns
ID |TEAM
Bask001 Nets
Bask002 Knicks
ID |TEAM
Base001 Yankees
Base002 Mets
df1<- df1 %>%
mutate(merge(df1,Base,by="ID",all.x = TRUE))%>%
mutate(merge(.,Base,by="ID",all.x = TRUE))
However when i do this i get
ID |New York Athletes|Teams |Teams.x|Team.y
Base001 Aaron Judge Yankees Yankees
Bask001 Kevin Durant Nets
Bask002 Julius Randle Knicks
Base002 Max Scherzer Mets Mets
I would like to get something like this
ID |New York Athletes|Teams
Base001 Aaron Judge Yankees
Bask001 Kevin Durant Nets
Bask002 Julius Randle Knicks
Base002 Max Scherzer Mets
CodePudding user response:
In base R
merge(df1, rbind(df2, df3))
ID New.York.Athletes TEAM
1 Base001 Aaron Judge Yankees
2 Base002 Max Scherzer Mets
3 Bask001 Kevin Durant Nets
4 Bask002 Julius Randle Knicks
CodePudding user response:
I think @KU99's answer is probably the simplest, but here's another option that uses coalesce
.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df1 <- tibble::tribble(
~ID, ~`New York Athletes`,
"Base001", "Aaron Judge",
"Bask001", "Kevin Durant",
"Bask002", "Julius Randle",
"Base002", "Max Scherzer")
df2 <- tibble::tibble(ID = c("Bask001", "Bask002"),
TEAM = c("Nets", "Knicks"))
df3 <- tibble::tibble(ID = c("Base001", "Base002"),
TEAM = c("Yankees", "Mets"))
df1 <- df1 %>%
mutate(merge(.,df2,by="ID",all.x = TRUE))%>%
mutate(merge(.,df3,by="ID",all.x = TRUE)) %>%
select(-TEAM) %>%
mutate(TEAM = coalesce(TEAM.x,TEAM.y )) %>%
select(-c(TEAM.x, TEAM.y))
df1
#> # A tibble: 4 × 3
#> ID `New York Athletes` TEAM
#> <chr> <chr> <chr>
#> 1 Base001 Aaron Judge Yankees
#> 2 Base002 Max Scherzer Mets
#> 3 Bask001 Kevin Durant Nets
#> 4 Bask002 Julius Randle Knicks
Created on 2022-04-15 by the reprex package (v2.0.1)
CodePudding user response:
Edited answer: You actually need to do two separate joins for this. Leaving first attempt if you want to join multiple dataframes at once, for future reference.
library(tidyverse)
df1 <- tribble(~ID, ~`New York Athletes`,
"Base001", "Aaron Judge",
"Bask001", "Kevin Durant",
"Bask002", "Julius Randle",
"Base002", "Max Scherzer")
df2 <- tribble(~ID,~TEAM,
"Bask001", "Nets",
"Bask002", "Knicks")
df3 <- tribble(~ID, ~TEAM,
"Base001", "Yankees",
"Base002", "Mets")
df1_1 <- full_join(df2, df3, by = c("ID", "TEAM"))
final_df <- left_join(df1, df1_1, by = "ID"); final_df
#> # A tibble: 4 × 3
#> ID `New York Athletes` TEAM
#> <chr> <chr> <chr>
#> 1 Base001 Aaron Judge Yankees
#> 2 Bask001 Kevin Durant Nets
#> 3 Bask002 Julius Randle Knicks
#> 4 Base002 Max Scherzer Mets
Created on 2022-04-15 by the reprex package (v2.0.1)
Yes you can use the purrr
function to do multiple joins at once as so:
library(tidyverse)
df1 <- tibble(x = c("A1", "A2", "B1", "B2"), y = c(1, 2, 3, 4))
df2 <- tibble(x = c("A1", "A2", "B1", "B2"), z = c(4, 5, 6, 7))
df3 <- tibble(x = c("A1", "A2", "B1", "B2"), delta = c(8, 9, 10, 11))
list_of_dataframes <- list(df1, df2, df3)
purrr::reduce(list_of_dataframes, left_join, by = "x")
#> # A tibble: 4 × 4
#> x y z delta
#> <chr> <dbl> <dbl> <dbl>
#> 1 A1 1 4 8
#> 2 A2 2 5 9
#> 3 B1 3 6 10
#> 4 B2 4 7 11
Created on 2022-04-15 by the reprex package (v2.0.1)