I have 2 dataframes - the first dataframe (df1
) has columns with values at different times of the year; these columns are ones that don't have stationary
or Air
in the column name. I used a linear model to predict the rest of the values for the year - which I created into a second data frame (df2
).
df1 = df = structure(list(Date_Time_GMT_3 =
structure(c(1622552400, 1622553300,1622554200, 1622555100, 1622556000, 1622556900),
class = c("POSIXct","POSIXt"),
tzone = "EST"),
X20819830_R1AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 16.808, 16.713, 17.753),
X20819742_R1AR_S_Stationary = c(16.903, 16.828, 16.808, NA_real_, NA_real_, NA_real_),
X20822215_R3AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 13.942, 13.942, 13.846),
X20822215_R3AR_S_Stationary = c(13.942, 13.972, 13.842, NA_real_, NA_real_, NA_real_),
X20874235_R4AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 14.134, 14.534, 14.404),
X20874235_R4AR_S_Stationary = c(14.23, 14.23, 14.134, NA_real_, NA_real_, NA_real_),
X20874311_F1AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 15.187, 15.327, 15.567),
X20874311_F1AR_S_Stationary = c(15.282, 15.387, 15.587, NA_real_, NA_real_, NA_real_),
X20817727_F8AR_U = c(15.421, 14.441, 14.631, 14.781, 15.521, 15.821),
X20819742_X1AR_U = c(14.996, 15.996, 14.776, 14.920, 14.870, 14.235),
X20819742_R2AR_U = c(14.781, 15.521, 15.821, NA_real_, NA_real_, NA_real_),
X20817727_R5AR_U = c(NA_real_, NA_real_, NA_real_, 13.942, 13.942, 13.846),
X20817727_R7AR = c(14.23, 14.23, 14.134, NA_real_, NA_real_, NA_real_)),
row.names = c(NA, 6L), class = "data.frame")
df2 = structure(list(Date_Time_GMT_3 =
structure(c(1622552400, 1622553300,1622554200, 1622555100, 1622556000, 1622556900),
class = c("POSIXct","POSIXt"),
tzone = "EST"),
Predicted_X20817727_F8AR_U = c(17.421, 15.441, 17.631, 15.781, 15.001, 16.821),
Predicted_X20819742_X1AR_U = c(15.596, 17.996, 13.676, 13.620, 12.860, 13.245),
Predicted_X20819742_R2AR_U = c(14.781, 15.521, 15.821, 17.421, 15.441, 17.631),
Predicted_X20817727_R5AR_U = c(15.596, 17.996, 13.676, 13.620, 12.860, 13.245),
Predicted_X20817727_R7AR = c(13.942, 13.942, 13.846, 17.421, 15.441, 17.631)),
row.names = c(NA, 6L), class = "data.frame")
I am trying to add the column values from df2
to matching column names and matching Date_Time
into df1
where there are no values (i.e. NA
), and maintain the original values that are already recorded in the df1
. The code I'm using now works, but I was wondering if there was a more efficient way of doing it, without having to type out each column name. Here is the code I've been using
###cOMBINE the predicted columns to the mobile loggers so that values in mobile loggers are preserved
df1$Predicted_F8AR = df2$Predicted_X20817727_F8AR_U
df1$Predicted_R2AR = df2$Predicted_X20819742_R2AR_U
df1$Predicted_R5AR = df2$Predicted_X20817727_R5AR_U
df1$Predicted_X1AR = df2$Predicted_X20819742_X1AR_U
###cOMBINE the predicted columns to the mobile loggers so that values in mobile loggers are preserved
F8AR_U = df1 %>%
mutate(F8AR_U = coalesce(X20817727_F8AR_U,Predicted_F8AR)) %>%
select(X20817727_F8AR_U, F8AR_U)
df1$X20817727_F8AR_U = F8AR_U$F8AR_U
R2AR_U = df1 %>%
mutate(R2AR_U = coalesce(X20819742_R2AR_U,Predicted_R2AR)) %>%
select(X20819742_R2AR_U, R2AR_U)
df1$X20819742_R2AR_U = R2AR_U$R2AR_U
R5AR_U = df1 %>%
mutate(R5AR_U = coalesce(X20817727_R5AR_U,Predicted_R5AR)) %>%
select(X20817727_R5AR_U, R5AR_U)
df1$X20817727_R5AR_U = R5AR_U$R5AR_U
X1AR_U = df1 %>%
mutate(X1AR_U = coalesce(X20819742_X1AR_U,Predicted_X1AR)) %>%
select(X20819742_X1AR_U, X1AR_U)
df1$`X20819742_X1AR_U` = X1AR_U$X1AR_U
#gET RID OF PREDICTED COLUMNS FOR FINAL TABLE Export
df1 = df1[,c(1:13)]
any ideas?
CodePudding user response:
consider:
df1 %>%
pivot_longer(-Date_Time_GMT_3, names_to = c('name1', 'name', 'name2'),
names_pattern = '(.*?)_([^_] )_(.*)') %>%
left_join(df2 %>%
pivot_longer(-Date_Time_GMT_3,names_pattern = 'Predicted_[^_] _([^_] )') ,
by = c('Date_Time_GMT_3', 'name'))%>%
mutate(value=coalesce(value.x, value.y)) %>%
pivot_wider(Date_Time_GMT_3, names_from = c(name1, name, name2))