I have the following two dataframes. I'm trying to combine the two using a left_join
by Key
, however I'm getting an error
df <- structure(list(Key = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L), levels = c("GC23", "GC24", "GC25",
"GC26", "GC27", "GC28", "GC30", "GC35", "GC45", "GC48", "GC50"
), class = "factor"), Quartile = structure(c(1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), levels = c("1", "2", "3", "4"
), class = "factor"), min = c(0.800000000000001, 70.2, 102.9,
124.4, -108.1, -0.200000000000067, 63.2, 124.4, -70.9999999999999,
5.29999999999999, 67.0999999999999, 144.3, -52.1000000000001,
16, 37.1, 51.6999999999999, -19.2, 75.0999999999999, 92.7999999999999,
161.8, -45.5, -3.80000000000003, 12.7000000000001, 31.3000000000001,
3.30000000000013, 107.9, 120.2, 143.4, 29.7000000000001, 102.1,
138.3, 172.3, 83.9, 183.6, 216.6, 240.3, 202.1, 258.6, 290.9,
321.9, 107.5, 201.1, 247.1, 290.1), max = c(70.1, 102.8, 124.3,
342.6, -0.200000000000067, 63.1, 124.4, 190.2, 4.79999999999992,
67.0999999999999, 144.2, 209.7, 16, 37.1, 51.3999999999999, 131.7,
75, 92.7000000000001, 161.3, 250.3, -4.70000000000006, 12.5999999999999,
30.1, 62.9, 107.8, 119.8, 143.2, 192.3, 102, 138.2, 172.3, 258,
183.5, 216.6, 240.3, 349.4, 258.5, 290.9, 321.9, 374.5, 201.1,
247, 289.6, 400.9)), row.names = c(NA, -44L), class = c("tbl_df",
"tbl", "data.frame"))
df2 <- structure(list(Key = structure(1:11, levels = c("GC23", "GC24",
"GC25", "GC26", "GC27", "GC28", "GC30", "GC35", "GC45", "GC48",
"GC50"), class = "factor"), Quartile = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = "Today", class = "factor"),
min = c(131.8, -47.2, -12.2, 36.1000000000001, 67.3, 27.4999999999999,
119, 133, 235.3, 287.6, 303.3), max = c(131.8, -47.2, -12.2,
36.1000000000001, 67.3, 27.4999999999999, 119, 133, 235.3,
287.6, 303.3)), row.names = c(NA, -11L), class = c("tbl_df",
"tbl", "data.frame"))
left_join(df,df2, by = Key)
Error in standardise_join_by(by, x_names = x_names, y_names = y_names, :
object 'Key' not found
Is there any way around this issue?
CodePudding user response:
Using base merge
. Since df
and df2
share Key
there's no need to specify it. all=TRUE
means, we want all rows of df
and all of df2
.
res <- merge(df, df2, all=TRUE)
head(res)
# Key Quartile min max
# 1 GC23 1 0.8 70.1
# 2 GC23 2 70.2 102.8
# 3 GC23 3 102.9 124.3
# 4 GC23 4 124.4 342.6
# 5 GC23 Today 131.8 131.8
# 6 GC24 1 -108.1 -0.2
CodePudding user response:
It should be in quotes i.e. a character
vector
library(dplyr)
left_join(df,df2, by = "Key")
-output
# A tibble: 44 × 7
Key Quartile.x min.x max.x Quartile.y min.y max.y
<fct> <fct> <dbl> <dbl> <fct> <dbl> <dbl>
1 GC23 1 0.800 70.1 Today 132. 132.
2 GC23 2 70.2 103. Today 132. 132.
3 GC23 3 103. 124. Today 132. 132.
4 GC23 4 124. 343. Today 132. 132.
5 GC24 1 -108. -0.200 Today -47.2 -47.2
6 GC24 2 -0.200 63.1 Today -47.2 -47.2
7 GC24 3 63.2 124. Today -47.2 -47.2
8 GC24 4 124. 190. Today -47.2 -47.2
9 GC25 1 -71.0 4.80 Today -12.2 -12.2
10 GC25 2 5.30 67.1 Today -12.2 -12.2
# … with 34 more rows
Update
If we want to add new rows, use bind_rows
> bind_rows(df, df2)
# A tibble: 55 × 4
Key Quartile min max
<fct> <fct> <dbl> <dbl>
1 GC23 1 0.800 70.1
2 GC23 2 70.2 103.
3 GC23 3 103. 124.
4 GC23 4 124. 343.
5 GC24 1 -108. -0.200
6 GC24 2 -0.200 63.1
7 GC24 3 63.2 124.
8 GC24 4 124. 190.
9 GC25 1 -71.0 4.80
10 GC25 2 5.30 67.1
# … with 45 more rows
According to ?left_join
by - A character vector of variables to join by or a join specification created with
In case, if we need to update the min/max
based on the new dataset
rows_update(df, df2[-2], by = "Key")
# A tibble: 44 × 4
Key Quartile min max
<fct> <fct> <dbl> <dbl>
1 GC23 1 132. 132.
2 GC23 2 132. 132.
3 GC23 3 132. 132.
4 GC23 4 132. 132.
5 GC24 1 -47.2 -47.2
6 GC24 2 -47.2 -47.2
7 GC24 3 -47.2 -47.2
8 GC24 4 -47.2 -47.2
9 GC25 1 -12.2 -12.2
10 GC25 2 -12.2 -12.2
# … with 34 more rows