I have binary data as below:
ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1)
Q2 <- c(0, NA, 1, 0, NA, 1)
Q3 <- c(0, NA, NA, 1, NA, 1)
Q4 <- c(0, NA, NA, 1, NA, 1)
dta <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)
If there is 1 for a row in one of the columns, all the subsequent columns should be 1 as well. If there is 0 or NA, the next column should stay as is. I have written the code below:
dta2 <- dta %>%
mutate(Q2 = case_when(Q1 == 1 ~ 1,
TRUE ~ Q2))
dta3 <- dta2 %>%
mutate(Q3 = case_when(Q2 == 1 ~ 1,
TRUE ~ Q3))
dta4 <- dta3 %>%
mutate(Q4 = case_when(Q3 == 1 ~ 1,
TRUE ~ Q4))
It works fine, and the output looks as intended:
ID Q0 Q1 Q2 Q3 Q4
A 0 0 0 0 0
B 0 1 1 1 1
C 0 0 1 1 1
D 0 0 0 1 1
E 0 NA NA NA NA
F 0 1 1 1 1
My question is: is there a more elegant way to do this? Perhaps using apply
or even a for loop?
CodePudding user response:
Here is a base R way with apply
.
dta[-1] <- t(apply(dta[-1], 1, \(x) {
y <- x
y[is.na(y)] <- 0
y <- as.integer(cumsum(y) > 0)
is.na(y) <- is.na(x) & y == 0
y
}))
dta
#> ID Q0 Q1 Q2 Q3 Q4
#> 1 A 0 0 0 0 0
#> 2 B 0 1 1 1 1
#> 3 C 0 0 1 1 1
#> 4 D 0 0 0 1 1
#> 5 E 0 NA NA NA NA
#> 6 F 0 1 1 1 1
Created on 2022-07-04 by the reprex package (v2.0.1)
CodePudding user response:
First you can create all the variables in the same mutate
dta %>%
mutate(
Q2 = case_when(Q1 == 1 ~ 1, TRUE ~ Q2),
Q3 = case_when(Q2 == 1 ~ 1, TRUE ~ Q3),
Q4 = case_when(Q3 == 1 ~ 1, TRUE ~ Q4))
ID Q0 Q1 Q2 Q3 Q4
1 A 0 0 0 0 0
2 B 0 1 1 1 1
3 C 0 0 1 1 1
4 D 0 0 0 1 1
5 E 0 NA NA NA NA
6 F 0 1 1 1 1
Then, I don't know if it's possible to do this more programmatically
CodePudding user response:
Another possible solution:
library(dplyr)
dta %>%
mutate(t(apply(.[-1], 1, \(x) {if (max(x, na.rm = T) == 1)
x[which.max(x):length(x)] <- 1 else x; x})) %>% as_tibble)
#> ID Q0 Q1 Q2 Q3 Q4
#> 1 A 0 0 0 0 0
#> 2 B 0 1 1 1 1
#> 3 C 0 0 1 1 1
#> 4 D 0 0 0 1 1
#> 5 E 0 NA NA NA NA
#> 6 F 0 1 1 1 1
CodePudding user response:
An option with na.locf
library(zoo)
i1 <- do.call(pmax, c(dta[-1], na.rm = TRUE))!= 0
dta[-1][i1,] <- t(na.locf(as.data.frame(t(dta[-1][i1,]))))
-output
> dta
ID Q0 Q1 Q2 Q3 Q4
1 A 0 0 0 0 0
2 B 0 1 1 1 1
3 C 0 0 1 1 1
4 D 0 0 0 1 1
5 E 0 NA NA NA NA
6 F 0 1 1 1 1
CodePudding user response:
I found one more with pivoting:
library(tidyr)
library(dplyr)
dta %>%
pivot_longer(-ID) %>%
group_by(ID) %>%
mutate(value2 = value) %>%
fill(value2) %>%
mutate(value = ifelse(value2 == 0, value, value2)) %>%
select(-value2) %>%
pivot_wider(names_from = name, values_from = value)
ID Q0 Q1 Q2 Q3 Q4
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A 0 0 0 0 0
2 B 0 1 1 1 1
3 C 0 0 1 1 1
4 D 0 0 0 1 1
5 E 0 NA NA NA NA
6 F 0 1 1 1 1
CodePudding user response:
Keeping things simple with a loop:
for (i in 3:ncol(dta)) dta[[i]][dta[[i-1]] == 1] <- 1
# ID Q0 Q1 Q2 Q3 Q4
# 1 A 0 0 0 0 0
# 2 B 0 1 1 1 1
# 3 C 0 0 1 1 1
# 4 D 0 0 0 1 1
# 5 E 0 NA NA NA NA
# 6 F 0 1 1 1 1
With dplyr
data.table
inspired by yuriy:
library(dplyr)
library(data.table)
setDT(dta)
dta[, (names(dta)[-1]) := as.list(cumany(.SD == 1)), by = ID]
CodePudding user response:
Yet another dplyr
purrr
option could be:
dta %>%
mutate(pmap_dfr(across(-ID), ~ `[<-`(c(...), seq_along(c(...)) > match(1, c(...)), 1)))
ID Q0 Q1 Q2 Q3 Q4
1 A 0 0 0 0 0
2 B 0 1 1 1 1
3 C 0 0 1 1 1
4 D 0 0 0 1 1
5 E 0 NA NA NA NA
6 F 0 1 1 1 1
CodePudding user response:
ID <- c("A", "B", "C", "D", "E", "F")
Q0 <- c(0, 0, 0, 0, 0, 0)
Q1 <- c(0, 1, 0, 0, NA, 1)
Q2 <- c(0, NA, 1, 0, NA, 1)
Q3 <- c(0, NA, NA, 1, NA, 1)
Q4 <- c(0, NA, NA, 1, NA, 1)
df <- data.frame(ID, Q0, Q1, Q2, Q3, Q4)
df[-1] <- t(apply(df[-1], 1, function(x) (dplyr::cumany(x == 1))))
df
#> ID Q0 Q1 Q2 Q3 Q4
#> 1 A 0 0 0 0 0
#> 2 B 0 1 1 1 1
#> 3 C 0 0 1 1 1
#> 4 D 0 0 0 1 1
#> 5 E 0 NA NA NA NA
#> 6 F 0 1 1 1 1
Created on 2022-07-04 by the reprex package (v2.0.1)