I'm trying to merge two data.frames in R so that the values in test_1 are overwritten if they exist in test_2
Every time I try and join/merge them I end up with a bunch of NAs.
I can't work out a simple logic for if.na then use test_2. Is that what I'd need to do, or is there an easier way?
test_1 = structure(list(rn = c("Red", "Blue",
"Green", "Yellow", "Pink", "Gold"
), X2022.08.01 = c(0, 0, 0, 0, 0, 0), X2022.08.02 = c(0, 0, 0,
0, 0, 0), X2022.08.03 = c(0, 0, 0, 0, 0, 0), X2022.08.04 = c(0,
0, 0, 0, 0, 0), X2022.08.05 = c(0, 0, 0, 0, 0, 0), X2022.08.08 = c(0,
0, 0, 0, 0, 0), X2022.08.09 = c(0, 0, 0, 0, 0, 0), X2022.08.10 = c(0,
0, 0, 0, 0, 0), X2022.08.11 = c(0, 0, 0, 0, 0, 0), X2022.08.12 = c(0,
0, 0, 0, 0, 0), X2022.08.15 = c(0, 0, 0, 0, 0, 0), X2022.08.16 = c(0,
0, 0, 0, 0, 0), X2022.08.17 = c(0, 0, 0, 0, 0, 0), X2022.08.18 = c(0,
0, 0, 0, 0, 0), X2022.08.19 = c(0, 0, 0, 0, 0, 0), X2022.08.22 = c(0,
0, 0, 0, 0, 0), X2022.08.23 = c(0, 0, 0, 0, 0, 0), X2022.08.24 = c(0,
0, 0, 0, 0, 0), X2022.08.25 = c(0, 0, 0, 0, 0, 0), X2022.08.26 = c(0,
0, 0, 0, 0, 0), X2022.08.29 = c(0, 0, 0, 0, 0, 0), X2022.08.30 = c(0,
0, 0, 0, 0, 0), X2022.08.31 = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
test_2 = structure(list(rn = c("Blue", "Pink",
"Red", "Yellow", "Green", "Gold"
), X2022.08.01 = c(10, 10, 10, 10, 10, 10), X2022.08.03 = c(10, 10, 10,
10, 10, 10), X2022.08.04 = c(10, 10, 10, 10, 10, 10), X2022.08.05 = c(10,
10, 10, 10, 10, 10), X2022.08.26 = c(10, 10, 10, 10, 10, 10)), row.names = c(NA,
6L), class = "data.frame")
the desired output would look like this:
test_output = structure(list(rn = c("Red", "Blue",
"Green", "Yellow", "Pink", "Gold"
), X2022.08.01 = c(10, 10, 10, 10, 10, 10), X2022.08.02 = c(0, 0, 0,
0, 0, 0), X2022.08.03 = c(10, 10, 10, 10, 10, 10), X2022.08.04 = c(10,
10, 10, 10, 10, 10), X2022.08.05 = c(10, 10, 10, 10, 10, 10), X2022.08.08 = c(0,
0, 0, 0, 0, 0), X2022.08.09 = c(0, 0, 0, 0, 0, 0), X2022.08.10 = c(0,
0, 0, 0, 0, 0), X2022.08.11 = c(0, 0, 0, 0, 0, 0), X2022.08.12 = c(0,
0, 0, 0, 0, 0), X2022.08.15 = c(0, 0, 0, 0, 0, 0), X2022.08.16 = c(0,
0, 0, 0, 0, 0), X2022.08.17 = c(0, 0, 0, 0, 0, 0), X2022.08.18 = c(0,
0, 0, 0, 0, 0), X2022.08.19 = c(0, 0, 0, 0, 0, 0), X2022.08.22 = c(0,
0, 0, 0, 0, 0), X2022.08.23 = c(0, 0, 0, 0, 0, 0), X2022.08.24 = c(0,
0, 0, 0, 0, 0), X2022.08.25 = c(0, 0, 0, 0, 0, 0), X2022.08.26 = c(10,
10, 10, 10, 10, 10), X2022.08.29 = c(0, 0, 0, 0, 0, 0), X2022.08.30 = c(0,
0, 0, 0, 0, 0), X2022.08.31 = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
CodePudding user response:
If you use the column names of the source as indices on both sides of the assignment, you can get replacement with
test_1[ ,colnames(test_2)] <- test_2[ , colnames(test_2)]
test_1
rn X2022.08.01 X2022.08.02 X2022.08.03 X2022.08.04 X2022.08.05 X2022.08.08 X2022.08.09
1 Blue 10 0 10 10 10 0 0
2 Pink 10 0 10 10 10 0 0
3 Red 10 0 10 10 10 0 0
4 Yellow 10 0 10 10 10 0 0
5 Green 10 0 10 10 10 0 0
6 Gold 10 0 10 10 10 0 0
X2022.08.10 X2022.08.11 X2022.08.12 X2022.08.15 X2022.08.16 X2022.08.17 X2022.08.18 X2022.08.19
1 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0
X2022.08.22 X2022.08.23 X2022.08.24 X2022.08.25 X2022.08.26 X2022.08.29 X2022.08.30 X2022.08.31
1 0 0 0 0 10 0 0 0
2 0 0 0 0 10 0 0 0
3 0 0 0 0 10 0 0 0
4 0 0 0 0 10 0 0 0
5 0 0 0 0 10 0 0 0
6 0 0 0 0 10 0 0 0
I doubt there could be a more simple method.. It could even work with a more limited number of rows as long as the indices area proper subset of the column and row names of the target matrix or dataframe. Note: I'm not understanding the issue with NA's. There were not NA's in either structure.
CodePudding user response:
There is probably a more elegant way of achieving this, but is this producing what you expect?
library(tidyverse)
# transpose both tables
test_1 <- test_1[-1] %>%
t() %>%
as.data.frame() %>%
setNames(test_1[,1]) %>%
rownames_to_column("rn")
test_2 <- test_2[-1] %>%
t() %>%
as.data.frame() %>%
setNames(test_2[,1]) %>%
rownames_to_column("rn")
# join the antijoin to test_2 to "replace" the values
test_1<- test_2 %>%
full_join(test_1 %>%
anti_join(test_2, by = "rn"))
# back to the original layout
test_1 <- test_1[-1] %>%
t() %>%
as.data.frame() %>%
setNames(test_1[,1]) %>%
rownames_to_column("rn")
CodePudding user response:
One option is to use the powerjoin package and specify how to handle conflicts between tables:
library(dplyr)
library(powerjoin)
test_1 = structure(list(rn = c("Red", "Blue",
"Green", "Yellow", "Pink", "Gold"
), X2022.08.01 = c(0, 0, 0, 0, 0, 0), X2022.08.02 = c(0, 0, 0,
0, 0, 0), X2022.08.03 = c(0, 0, 0, 0, 0, 0), X2022.08.04 = c(0,
0, 0, 0, 0, 0), X2022.08.05 = c(0, 0, 0, 0, 0, 0), X2022.08.08 = c(0,
0, 0, 0, 0, 0), X2022.08.09 = c(0, 0, 0, 0, 0, 0), X2022.08.10 = c(0,
0, 0, 0, 0, 0), X2022.08.11 = c(0, 0, 0, 0, 0, 0), X2022.08.12 = c(0,
0, 0, 0, 0, 0), X2022.08.15 = c(0, 0, 0, 0, 0, 0), X2022.08.16 = c(0,
0, 0, 0, 0, 0), X2022.08.17 = c(0, 0, 0, 0, 0, 0), X2022.08.18 = c(0,
0, 0, 0, 0, 0), X2022.08.19 = c(0, 0, 0, 0, 0, 0), X2022.08.22 = c(0,
0, 0, 0, 0, 0), X2022.08.23 = c(0, 0, 0, 0, 0, 0), X2022.08.24 = c(0,
0, 0, 0, 0, 0), X2022.08.25 = c(0, 0, 0, 0, 0, 0), X2022.08.26 = c(0,
0, 0, 0, 0, 0), X2022.08.29 = c(0, 0, 0, 0, 0, 0), X2022.08.30 = c(0,
0, 0, 0, 0, 0), X2022.08.31 = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
test_2 = structure(list(rn = c("Blue", "Pink",
"Red", "Yellow", "Green", "Gold"
), X2022.08.01 = c(10, 10, 10, 10, 10, 10), X2022.08.03 = c(10, 10, 10,
10, 10, 10), X2022.08.04 = c(10, 10, 10, 10, 10, 10), X2022.08.05 = c(10,
10, 10, 10, 10, 10), X2022.08.26 = c(10, 10, 10, 10, 10, 10)), row.names = c(NA,
6L), class = "data.frame")
test_output = structure(list(rn = c("Red", "Blue",
"Green", "Yellow", "Pink", "Gold"
), X2022.08.01 = c(10, 10, 10, 10, 10, 10), X2022.08.02 = c(0, 0, 0,
0, 0, 0), X2022.08.03 = c(10, 10, 10, 10, 10, 10), X2022.08.04 = c(10,
10, 10, 10, 10, 10), X2022.08.05 = c(10, 10, 10, 10, 10, 10), X2022.08.08 = c(0,
0, 0, 0, 0, 0), X2022.08.09 = c(0, 0, 0, 0, 0, 0), X2022.08.10 = c(0,
0, 0, 0, 0, 0), X2022.08.11 = c(0, 0, 0, 0, 0, 0), X2022.08.12 = c(0,
0, 0, 0, 0, 0), X2022.08.15 = c(0, 0, 0, 0, 0, 0), X2022.08.16 = c(0,
0, 0, 0, 0, 0), X2022.08.17 = c(0, 0, 0, 0, 0, 0), X2022.08.18 = c(0,
0, 0, 0, 0, 0), X2022.08.19 = c(0, 0, 0, 0, 0, 0), X2022.08.22 = c(0,
0, 0, 0, 0, 0), X2022.08.23 = c(0, 0, 0, 0, 0, 0), X2022.08.24 = c(0,
0, 0, 0, 0, 0), X2022.08.25 = c(0, 0, 0, 0, 0, 0), X2022.08.26 = c(10,
10, 10, 10, 10, 10), X2022.08.29 = c(0, 0, 0, 0, 0, 0), X2022.08.30 = c(0,
0, 0, 0, 0, 0), X2022.08.31 = c(0, 0, 0, 0, 0, 0)), row.names = c(NA,
6L), class = "data.frame")
output <- power_left_join(test_2, test_1, by = "rn", conflict = coalesce_xy)
all_equal(test_output, output)
#> [1] TRUE
Created on 2022-10-24 by the reprex package (v2.0.1)