I have dataframe as below:
df <- data.frame(Resource_Code = c("B-GA100","B-GA100","B-
GA100"),
"2021-09-01"= c("", "", ""),
"2021-09-06"= c(125, "", ""),
"2021-09-20" = c("", "", ""),
"2021-12-06" = c("", "", ""),
"2021-12-12" = c(125, "", ""),
"2021-12-13" = c("", 200, ""),
"2021-12-19" = c("", "", ""),
"2021-12-13" = c("", 200, ""),
"2022-08-08" = c("", "", 350),
"2022-08-20" = c("", "", ""),
"2022-08-28" = c("", "", 350))
The desired output required:
df1 <- data.frame(Resource_Code = c("B-GA100","B-
GA100","B-GA100"),
"2021-09-01"= c("", "", ""),
"2021-09-06"= c(125, "", ""),
"2021-09-20" = c(125, "", ""),
"2021-12-06" = c(125, "", ""),
"2021-12-12" = c(125, "", ""),
"2021-12-13" = c("", 200, ""),
"2021-12-19" = c("", 200, ""),
"2021-12-13" = c("", 200, ""),
"2022-08-08" = c("", "", 350),
"2022-08-20" = c("", "", 350),
"2022-08-28" = c("", "", 350))
My code is as follows- which not giving desired output: Please help !!!
for (i in 2:ncol(df))
df[,i] = ifelse(is.na(df[,i]), df[,i-1],df[,i])
CodePudding user response:
If you want to do it in a loop this will work.
for (i in 1:nrow(df)){
temp_locs = which(df[i,-1] != "") 1 # find the columns where the row is not empty
df[i, seq(from = temp_locs[1], to = temp_locs[2], by = 1)] = df[i, temp_locs[1]] #fill in the gaps between
}
CodePudding user response:
Here is a data.table approach
library(data.table)
# Make data.table
setDT(df)
# Add rownumbers to avoid problems with duplicates
df[, rownum := .I]
#replace the emtpy values with NA
df[df == ""] <- NA_character_
# Melt to long and split
L <- split(melt(df, id.vars = c("rownum", "Resource_Code")),
by = c("rownum", "Resource_Code"),
keep.by = TRUE)
#fill in values to new column 'newval'
L <- lapply(L, function(x) {
# create two temporary columns based on previous and next non-na value
x[, locf := nafill(as.numeric(x$value), "locf")]
x[, nocb := nafill(as.numeric(x$value), "nocb")]
#if locf value is the same as nocb value, then take it
x[locf == nocb, newval := locf]
return(x)
})
# create final output, drop the rownum-column
ans <- dcast(rbindlist(L), rownum Resource_Code ~variable, value.var = "newval")[, rownum := NULL]
# raplce NA with "" again (if desired)
ans[is.na(ans)] <- ""
# Resource_Code X2021.09.06 X2021.09.20 X2021.12.06 X2021.12.12 X2021.12.13 X2021.12.19 X2021.12.13.1 X2022.08.08 X2022.08.20 X2022.08.28
# 1: B-GA100 125 125 125 125
# 2: B-GA100 200 200 200
# 3: B-GA100 350 350 350
CodePudding user response:
Alternatively here's a dplyr
solution
df %>%
mutate(
row1 = row_number()
) %>%
pivot_longer(starts_with("x")) %>%
mutate(
tt1 = cumsum(value !=""),
value = as.numeric(value)
) %>%
group_by(tt1) %>%
mutate(
value = if_else(is.na(value) & tt1%%2 == 1, max(value, na.rm=T), value),
tt1 = NA
) %>%
dplyr::select(-tt1) %>% #View()
pivot_wider() %>%
select(-row1)
CodePudding user response:
Here's a tidyverse
answer assuming you want to fill values in every row for the respective months.
library(dplyr)
library(tidyr)
df %>%
mutate(row = row_number()) %>%
pivot_longer(cols = -c(Resource_Code, row)) %>%
mutate(name = lubridate::ymd(sub('^X', '', name)),
value = as.numeric(na_if(value, '')),
month = format(name, '%b-%Y')) %>%
group_by(row, month) %>%
fill(value) %>%
ungroup %>% select(-month) %>%
pivot_wider(names_from = name, values_from = value) %>%
select(-row)
# Resource_Code 2021-09-01 2021-09-06 2021-09-20 2021-12-06 2021-12-12
#1 B-GA100 NA 125 125 NA 125
#2 B-GA100 NA NA NA NA NA
#3 B-GA100 NA NA NA NA NA
# 2021-12-13 2021-12-19 2021-12-31 2022-08-08 2022-08-20 2022-08-28
#1 125 125 125 NA NA NA
#2 200 200 200 NA NA NA
#3 NA NA NA 350 350 350
If you want to have blanks instead of NA
add values_fill = ""
in pivot_wider
.