I have 2 dataframes. The RP
and the RP2
. My goal is to fill the NAs
of RP
columns Country_1
and Country_2
based on the conditions below:
1.If a cell of RP
has already value leave it as it is.
2.If it has NA
then see the TreatmenArea
and Subarea
of that cell and find the same pair of TreatmenArea
, Subarea
in RP2
then:
a.multiply this value with the PFaverage
column of RP2
and divide by 100.
b.if the relative cell in RP2
is NA
as well then leave the value in RP
as NA
RP<-structure(list(ProductFamily = c("PF_1", "PF_10", "PF_10", "PF_100",
"PF_100", "PF_101", "PF_102", "PF_102", "PF_102", "PF_102", "PF_103",
"PF_103", "PF_104", "PF_105", "PF_106", "PF_106", "PF_106", "PF_106",
"PF_107", "PF_108", "PF_109", "PF_11", "PF_110", "PF_110", "PF_111",
"PF_111", "PF_111", "PF_112", "PF_112", "PF_113", "PF_113", "PF_113",
"PF_114", "PF_115", "PF_115", "PF_116", "PF_117", "PF_118", "PF_119",
"PF_12", "PF_12", "PF_12", "PF_120", "PF_120", "PF_120", "PF_120",
"PF_120", "PF_120", "PF_121", "PF_122"), TreatmenArea = c("TA_7",
"TA_2", "TA_2", "TA_6", "TA_6", "TA_2", "TA_2", "TA_2", "TA_2",
"TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2",
"TA_2", "TA_2", "TA_2", "TA_2", "TA_7", "TA_2", "TA_2", "TA_7",
"TA_7", "TA_7", "TA_7", "TA_7", "TA_2", "TA_2", "TA_2", "TA_6",
"TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_5", "TA_5",
"TA_5", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_4",
"TA_4"), Subarea = c("SA_16", "SA_5", "SA_5", "SA_15", "SA_15",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6",
"SA_17", "SA_6", "SA_6", "SA_22", "SA_22", "SA_22", "SA_22",
"SA_22", "SA_6", "SA_6", "SA_6", "SA_15", "SA_6", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_11", "SA_11", "SA_11", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_10", "SA_10"), Country_1 = c(NA,
NA, NA, NA, NA, 92.6961421759861, NA, NA, NA, NA, 78.3001808318264,NA, NA, NA, 106.832963501416, 0.613496932515337, 104.21011973735,
NA, NA, NA, NA, NA, 99.5238622522423, NA, NA, NA, NA, NA, NA,
89.0343347639485, NA, NA, NA, NA, NA, NA, 101.231684009344, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Country_2 = c(NA,
NA, 161.55950752394, NA, NA, NA, NA, NA, 59.1346153846154, NA,
NA, NA, NA, 128.113063407181, 93.3812839543959, NA, NA, NA, 137.724550898204,
NA, NA, NA, NA, 90.1602849510241, 37.3939722071828, NA, NA, 40.9756097560976,
NA, NA, NA, 87.0095902353967, NA, NA, NA, 50.4591590140164, 92.1639413888299,
44.7601588756493, NA, NA, NA, NA, NA, 100.053835800808, NA, NA,
NA, NA, 136.420722135008, NA)), row.names = c(NA, 50L), class = "data.frame")
and the RP2
RP2<-structure(list(TreatmenArea = c("TA_1", "TA_1", "TA_1", "TA_1",
"TA_2", "TA_2", "TA_2", "TA_3", "TA_4", "TA_4", "TA_5", "TA_5",
"TA_5", "TA_6", "TA_6", "TA_7", "TA_7", "TA_7", "TA_7", "TA_7",
"TA_7", "TA_7", "TA_7", "TA_8", "TA_9"), Subarea = c("SA_1",
"SA_2", "SA_3", "SA_4", "SA_5", "SA_6", "SA_7", "SA_8", "SA_10",
"SA_9", "SA_11", "SA_12", "SA_13", "SA_14", "SA_15", "SA_16",
"SA_17", "SA_18", "SA_19", "SA_20", "SA_21", "SA_22", "SA_23",
"SA_24", "SA_25"), Country_1 = c(101.37519256645, 105.268942332558,
100.49933368058, 104.531597221684, NaN, 83.4404308144341, 86.2833044714836,
81.808967345926, 79.6786979951661, 77.6863475527052, NaN, 78.3001808318264,
112.499238782021, 113.526674294436, NaN, 108.350959378962, NaN,
NaN, 102.243471199266, NaN, 104.323270355678, NaN, NaN, NaN,
100), Country_2 = c(98.7267717862572, 83.9572019653478, 97.164068306148,
103.654771613923, 161.55950752394, 75.4091957339533, 96.5255996196344,
99.8317785594128, 88.1477193135348, NaN, NaN, NaN, 151.411687458963,
107.652477161141, NaN, NaN, NaN, NaN, 92.4695215620261, NaN,
88.9890571623243, 39.1847909816402, 87.0642912470953, NaN, NaN
), PFaverage = c(9.09293100169062, 16.2821052631579, 9.76688333333333,
13.4754047619048, 17.8741666666667, 9.42546567085954, 12.6145188492064,
12.4536666666667, 7.36169471153846, 13.1581818181818, 21.0866666666667,
9.58, 16.2525049019608, 11.552822039072, 12.6908333333333, 20.5489611111111,
11.99, 7.19134920634921, 8.51728472222222, 5.97, 12.2264183501684,6.37925, 16.1375, 11.9384615384615, 15.9185714285714)), class = c("data.frame"), row.names = c(NA, -25L))
If for example we check first row of Country_1
in RP we see that its NA
and its Subarea
is SA_16
so we move to RP2
and check that the relative value for Country_1
and Subarea SA_16
is 108.35
. So the calculation is 108.35*20.54(PFaverage)/100
.
CodePudding user response:
library(dplyr)
RP2 %>%
mutate_at(c("Country_1", "Country_2"),
list(PFa = ~. * PFaverage / 100)) %>%
select(TreatmenArea, Subarea, contains("PFa")) %>%
right_join(RP, . , by = c("TreatmenArea" , "Subarea")) %>%
mutate(Country_1 = coalesce(Country_1, Country_1_PFa),
Country_2 = coalesce(Country_2, Country_2_PFa)) %>%
select(-contains("PFa"))
#> ProductFamily TreatmenArea Subarea Country_1 Country_2
#> 1 PF_1 TA_7 SA_16 22.2649965 NaN
#> 2 PF_10 TA_2 SA_5 NaN 28.877416
#> 3 PF_10 TA_2 SA_5 NaN 161.559508
#> 4 PF_100 TA_6 SA_15 NaN NaN
#> 5 PF_100 TA_6 SA_15 NaN NaN
#> 6 PF_101 TA_2 SA_6 92.6961422 7.107668
#> 7 PF_102 TA_2 SA_6 7.8646492 7.107668
#> 8 PF_102 TA_2 SA_6 7.8646492 7.107668
#> 9 PF_102 TA_2 SA_6 7.8646492 59.134615
#> 10 PF_102 TA_2 SA_6 7.8646492 7.107668
...
Created on 2022-04-06 by the reprex package (v2.0.0)