Home > Software design >  Fill selected columns from immediate previous one but not entire row
Fill selected columns from immediate previous one but not entire row

Time:10-05

I have dataframe as below:

             df <- data.frame(Resource_Code = c("B-GA100","B-GA100","B- 
                GA100"),
             "2021-09-01"= c("", "", ""),
             "2021-09-06"= c(125, "", ""),
             "2021-09-20" = c("", "", ""),
             "2021-12-06" = c("", "", ""),
             "2021-12-12" = c(125, "", ""),
             "2021-12-13" = c("", 200, ""),
             "2021-12-19" = c("", "", ""),
             "2021-12-13" = c("", 200, ""),
             "2022-08-08" = c("", "", 350),
             "2022-08-20" = c("", "", ""),
             "2022-08-28" = c("", "", 350))

The desired output required:

              df1 <- data.frame(Resource_Code = c("B-GA100","B- 
              GA100","B-GA100"),
              "2021-09-01"= c("", "", ""),
             "2021-09-06"= c(125, "", ""),
             "2021-09-20" = c(125, "", ""),
             "2021-12-06" = c(125, "", ""),
             "2021-12-12" = c(125, "", ""),
             "2021-12-13" = c("", 200, ""),
             "2021-12-19" = c("", 200, ""),
             "2021-12-13" = c("", 200, ""),
             "2022-08-08" = c("", "", 350),
             "2022-08-20" = c("", "", 350),
             "2022-08-28" = c("", "", 350))

My code is as follows- which not giving desired output: Please help !!!

                for (i in 2:ncol(df))
                df[,i] = ifelse(is.na(df[,i]), df[,i-1],df[,i])
     
 

CodePudding user response:

If you want to do it in a loop this will work.

for (i in 1:nrow(df)){
  temp_locs = which(df[i,-1] != "")   1 # find the columns where the row is not empty
  df[i, seq(from = temp_locs[1], to = temp_locs[2], by = 1)] = df[i, temp_locs[1]] #fill in the gaps between
}

CodePudding user response:

Here is a data.table approach

library(data.table)
# Make data.table
setDT(df)
# Add rownumbers to avoid problems with duplicates
df[, rownum := .I]
#replace the emtpy values with NA
df[df == ""] <- NA_character_
# Melt to long and split
L <- split(melt(df, id.vars = c("rownum", "Resource_Code")), 
           by = c("rownum", "Resource_Code"),
           keep.by = TRUE)
#fill in values to new column 'newval'
L <- lapply(L, function(x) {
  # create two temporary columns based on previous and next non-na value
  x[, locf := nafill(as.numeric(x$value), "locf")]
  x[, nocb := nafill(as.numeric(x$value), "nocb")]
  #if locf value is the same as nocb value, then take it
  x[locf == nocb, newval := locf]
  return(x)
})
# create final output, drop the rownum-column
ans <- dcast(rbindlist(L), rownum   Resource_Code ~variable, value.var = "newval")[, rownum := NULL]
# raplce NA with "" again (if desired)
ans[is.na(ans)] <- ""
#    Resource_Code X2021.09.06 X2021.09.20 X2021.12.06 X2021.12.12 X2021.12.13 X2021.12.19 X2021.12.13.1 X2022.08.08 X2022.08.20 X2022.08.28
# 1:       B-GA100         125         125         125         125                                                                          
# 2:       B-GA100                                                         200         200           200                                    
# 3:       B-GA100                                                                                               350         350         350

CodePudding user response:

Alternatively here's a dplyr solution

df %>%
  mutate(
    row1 = row_number()
  ) %>%
  pivot_longer(starts_with("x")) %>%
  mutate(
    tt1 = cumsum(value !=""),
    value = as.numeric(value)
  ) %>%
  group_by(tt1) %>%
  mutate(
    value = if_else(is.na(value) & tt1%%2 == 1, max(value, na.rm=T), value),
    tt1 = NA
  ) %>% 
  dplyr::select(-tt1) %>% #View()
  pivot_wider() %>%
  select(-row1)

CodePudding user response:

Here's a tidyverse answer assuming you want to fill values in every row for the respective months.

library(dplyr)
library(tidyr)

df %>%
  mutate(row = row_number()) %>%
  pivot_longer(cols = -c(Resource_Code, row)) %>%
  mutate(name = lubridate::ymd(sub('^X', '', name)), 
         value = as.numeric(na_if(value, '')), 
         month = format(name, '%b-%Y')) %>%
  group_by(row, month) %>%
  fill(value) %>%
  ungroup %>% select(-month) %>%
  pivot_wider(names_from = name, values_from = value) %>% 
  select(-row) 

#  Resource_Code 2021-09-01 2021-09-06 2021-09-20 2021-12-06 2021-12-12
#1       B-GA100         NA        125        125         NA        125
#2       B-GA100         NA         NA         NA         NA         NA
#3       B-GA100         NA         NA         NA         NA         NA

#  2021-12-13 2021-12-19 2021-12-31 2022-08-08 2022-08-20 2022-08-28
#1        125        125        125         NA         NA         NA
#2        200        200        200         NA         NA         NA
#3         NA         NA         NA        350        350        350

If you want to have blanks instead of NA add values_fill = "" in pivot_wider.

  •  Tags:  
  • r
  • Related