I have this dataframe:
df <- structure(list(lg0 = c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE,
TRUE), lg2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE
), lg4 = c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), ld0 = c(TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE, FALSE), ld1 = c(FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE), ld2 = c(FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE), ld4 = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA,
-7L))
lg0 lg2 lg4 ld0 ld1 ld2 ld4
1 FALSE FALSE TRUE TRUE FALSE FALSE FALSE
2 FALSE TRUE FALSE TRUE FALSE FALSE FALSE
3 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE TRUE FALSE
6 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
How can I add missing columns lg1
, lg3
, ld3
filled with FALSE
.
I would like to 'quasi' complete / fill / expand missing COLUMNS to have this:
This should be independent on which of the columns are not present, this can change.
Desired Output:
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
So far I have tried:
library(tidyr)
library(dplyr)
library(readr)
df2 <- df1 %>%
pivot_longer(
everything()
) %>%
mutate(id = parse_number(name))
expand(df2, id, name)
CodePudding user response:
In base R
, we can construct a new dataset of FALSE
and assign the 'df' back to it based on the matching column names
nm1 <- paste0(rep(c("lg", "ld"), each = 5), 0:4)
df2 <- as.data.frame(matrix(FALSE, nrow = nrow(df),
ncol = length(nm1), dimnames = list(NULL, nm1)))
df2[names(df)] <- df
-output
> df2
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
If we want to automatically construct the nm1
d1 <- as.numeric(sub("\\D ", "", names(df)))
nm1 <- c(t(outer(unique(sub("\\d ", "", names(df))),
min(d1):max(d1), FUN = paste0)))
df[setdiff(nm1, names(df))] <- FALSE
df[nm1]
CodePudding user response:
I think you can use the following solution:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(everything()) %>%
extract(name, c("base", "pre"), regex = "(\\D )(\\d )") %>%
mutate(id = rep(1:ncol(df), each = ncol(df))) %>%
group_by(id, base) %>%
mutate(pre = as.integer(pre)) %>%
complete(pre = seq(min(pre), max(pre), 1), fill = list(value = FALSE)) %>%
unite("New_Name", c(base, pre), sep = "") %>%
pivot_wider(names_from = New_Name, values_from = value) %>%
ungroup() %>%
select(-id) %>%
relocate(starts_with("lg"))
# A tibble: 7 x 10
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
<lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE