Home > Software engineering >  Copy value from one column to the next given a condition
Copy value from one column to the next given a condition

Time:07-05

I have binary data as below:

ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1) 
Q2 <- c(0, NA, 1, 0, NA, 1) 
Q3 <- c(0, NA, NA, 1, NA, 1) 
Q4 <- c(0, NA, NA, 1, NA, 1)

dta <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)

If there is 1 for a row in one of the columns, all the subsequent columns should be 1 as well. If there is 0 or NA, the next column should stay as is. I have written the code below:

dta2  <- dta %>% 
  mutate(Q2 = case_when(Q1 == 1 ~ 1, 
                        TRUE ~ Q2))

dta3  <- dta2 %>% 
  mutate(Q3 = case_when(Q2 == 1 ~ 1, 
                        TRUE ~ Q3))

dta4  <- dta3 %>% 
  mutate(Q4 = case_when(Q3 == 1 ~ 1, 
                        TRUE ~ Q4))

It works fine, and the output looks as intended:

ID    Q0    Q1    Q2    Q3    Q4
A     0     0     0     0     0
B     0     1     1     1     1
C     0     0     1     1     1
D     0     0     0     1     1
E     0     NA    NA    NA    NA
F     0     1     1     1     1

My question is: is there a more elegant way to do this? Perhaps using apply or even a for loop?

CodePudding user response:

Here is a base R way with apply.

dta[-1] <- t(apply(dta[-1], 1, \(x) {
  y <- x
  y[is.na(y)] <- 0
  y <- as.integer(cumsum(y) > 0)
  is.na(y) <- is.na(x) & y == 0
  y
}))
dta
#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

Created on 2022-07-04 by the reprex package (v2.0.1)

CodePudding user response:

First you can create all the variables in the same mutate

dta %>% 
  mutate(
  Q2 = case_when(Q1 == 1 ~ 1, TRUE ~ Q2), 
  Q3 = case_when(Q2 == 1 ~ 1, TRUE ~ Q3),
  Q4 = case_when(Q3 == 1 ~ 1, TRUE ~ Q4))

  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1

Then, I don't know if it's possible to do this more programmatically

CodePudding user response:

Another possible solution:

library(dplyr)

dta %>% 
  mutate(t(apply(.[-1], 1, \(x) {if (max(x, na.rm = T) == 1) 
    x[which.max(x):length(x)] <- 1 else x; x})) %>% as_tibble)

#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

CodePudding user response:

An option with na.locf

library(zoo)
i1 <- do.call(pmax, c(dta[-1], na.rm = TRUE))!= 0
dta[-1][i1,] <- t(na.locf(as.data.frame(t(dta[-1][i1,]))))

-output

> dta
  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1

CodePudding user response:

I found one more with pivoting:

library(tidyr)
library(dplyr)

dta %>% 
  pivot_longer(-ID) %>% 
  group_by(ID) %>% 
  mutate(value2 = value) %>% 
  fill(value2) %>% 
  mutate(value = ifelse(value2 == 0, value, value2)) %>% 
  select(-value2) %>% 
  pivot_wider(names_from = name, values_from = value)
  ID       Q0    Q1    Q2    Q3    Q4
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A         0     0     0     0     0
2 B         0     1     1     1     1
3 C         0     0     1     1     1
4 D         0     0     0     1     1
5 E         0    NA    NA    NA    NA
6 F         0     1     1     1     1

CodePudding user response:

Keeping things simple with a loop:

for (i in 3:ncol(dta)) dta[[i]][dta[[i-1]] == 1] <- 1
    #   ID Q0 Q1 Q2 Q3 Q4
    # 1  A  0  0  0  0  0
    # 2  B  0  1  1  1  1
    # 3  C  0  0  1  1  1
    # 4  D  0  0  0  1  1
    # 5  E  0 NA NA NA NA
    # 6  F  0  1  1  1  1

With dplyr data.table inspired by yuriy:

library(dplyr)
library(data.table)
setDT(dta)

dta[, (names(dta)[-1]) := as.list(cumany(.SD == 1)), by = ID]

CodePudding user response:

Yet another dplyr purrr option could be:

dta %>%
 mutate(pmap_dfr(across(-ID), ~ `[<-`(c(...), seq_along(c(...)) > match(1, c(...)), 1)))

  ID Q0 Q1 Q2 Q3 Q4
1  A  0  0  0  0  0
2  B  0  1  1  1  1
3  C  0  0  1  1  1
4  D  0  0  0  1  1
5  E  0 NA NA NA NA
6  F  0  1  1  1  1

CodePudding user response:

ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1) 
Q2 <- c(0, NA, 1, 0, NA, 1) 
Q3 <- c(0, NA, NA, 1, NA, 1) 
Q4 <- c(0, NA, NA, 1, NA, 1)

df <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)

df[-1] <- t(apply(df[-1], 1, function(x)  (dplyr::cumany(x == 1))))
df
#>   ID Q0 Q1 Q2 Q3 Q4
#> 1  A  0  0  0  0  0
#> 2  B  0  1  1  1  1
#> 3  C  0  0  1  1  1
#> 4  D  0  0  0  1  1
#> 5  E  0 NA NA NA NA
#> 6  F  0  1  1  1  1

Created on 2022-07-04 by the reprex package (v2.0.1)

  •  Tags:  
  • r
  • Related