I have the dataframe below:
rp<-structure(list(Subarea = c("SA_16", "SA_5", "SA_5", "SA_15",
"SA_15", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6",
"SA_6", "SA_17", "SA_6", "SA_6", "SA_22", "SA_22", "SA_22", "SA_22",
"SA_22", "SA_6", "SA_6", "SA_6", "SA_15", "SA_6", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_11", "SA_11", "SA_11", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_10", "SA_10"), Country_1 = c(NA,
NA, NA, NA, NA, 92.6961421759861, NA, NA, NA, NA, 78.3001808318264,
NA, NA, NA, 106.832963501416, 0.613496932515337, 104.21011973735,
NA, NA, NA, NA, NA, 99.5238622522423, NA, NA, NA, NA, NA, NA,
89.0343347639485, NA, NA, NA, NA, NA, NA, 101.231684009344, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Country_2 = c(NA,
NA, 161.55950752394, NA, NA, NA, NA, NA, 59.1346153846154, NA,
NA, NA, NA, 128.113063407181, 93.3812839543959, NA, NA, NA, 137.724550898204,
NA, NA, NA, NA, 90.1602849510241, 37.3939722071828, NA, NA, 40.9756097560976,
NA, NA, NA, 87.0095902353967, NA, NA, NA, 50.4591590140164, 92.1639413888299,
44.7601588756493, NA, NA, NA, NA, NA, 100.053835800808, NA, NA,
NA, NA, 136.420722135008, NA)), row.names = c(NA, 50L), class = "data.frame")
and I want to replace the NAs
with the values of a second dataframe rp2
(if exist) by same Country_
and Subarea
. For example if we see that Country_1
for Subarea SA16
is NA
in rp
we go to rp2
and replace it with 108.35096
rp2<-structure(list(Subarea = c("SA_1", "SA_2", "SA_3", "SA_4", "SA_5",
"SA_6", "SA_7", "SA_8", "SA_10", "SA_9", "SA_11", "SA_12", "SA_13",
"SA_14", "SA_15", "SA_16", "SA_17", "SA_18", "SA_19", "SA_20",
"SA_21", "SA_22", "SA_23", "SA_24", "SA_25"), Country_1 = c(101.37519256645,
105.268942332558, 100.49933368058, 104.531597221684, NA, 83.4404308144341,
86.2833044714836, 81.808967345926, 79.6786979951661, 77.6863475527052,
NA, 78.3001808318264, 112.499238782021, 113.526674294436, NA,
108.350959378962, NA, NA, 102.243471199266, NA, 104.323270355678,
NA, NA, NA, 100), Country_2 = c(98.7267717862572, 83.9572019653478,
97.164068306148, 103.654771613923, 161.55950752394, 75.4091957339533,
96.5255996196344, 99.8317785594128, 88.1477193135348, NA, NA,
NA, 151.411687458963, 107.652477161141, NA, NA, NA, NA, 92.4695215620261,
NA, 88.9890571623243, 39.1847909816402, 87.0642912470953, NA,
NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-25L))
CodePudding user response:
You can merge them by Subarea
and use coalesce()
to find the first non-missing Country_*
value.
library(dplyr)
left_join(rp, rp2, by = "Subarea", suffix = c("", ".y")) %>%
mutate(Country_1 = coalesce(Country_1, Country_1.y),
Country_2 = coalesce(Country_2, Country_2.y)) %>%
select(!ends_with(".y"))
A compact version if you have more Country_*
to manipulate:
left_join(rp, rp2, by = "Subarea", suffix = c("", ".y")) %>%
mutate(across(matches("^Country_\\d $"),
~ coalesce(.x, cur_data()[[paste0(cur_column(), ".y")]]) )) %>%
select(!ends_with(".y"))