Suppose I have a data set like this:
dat <- tibble(id = 1:4,
col1 = c(0, 1, 1, 0),
col2 = c(1, 0, 1, 0),
col3 = c(1, 1, 0, 1))
> dat
# A tibble: 4 × 4
id col1 col2 col3
<int> <dbl> <dbl> <dbl>
1 1 0 1 1
2 2 1 0 1
3 3 1 1 0
4 4 0 0 1
I'd like to separate, for every unique id, the multiple 1s into multiple rows, i.e. the expected output is:
# A tibble: 7 × 4
id col1 col2 col3
<dbl> <dbl> <dbl> <dbl>
1 1 0 1 0
2 1 0 0 1
3 2 1 0 0
4 2 0 0 1
5 3 1 0 0
6 3 0 1 0
7 4 0 0 1
For the first id (id = 1), col2 and col3 are both 1, so I would like a separate row for each of them. It kinda is like one-hot encoding for rows.
CodePudding user response:
With help from Ritchie Sacramento and RobertoT
library(tidyverse)
dat <- tibble(id = 1:4,
col1 = c(0, 1, 1, 0),
col2 = c(1, 0, 1, 0),
col3 = c(1, 1, 0, 1))
dat %>%
pivot_longer(-id) %>%
filter(value != 0) %>%
mutate(rows = 1:nrow(.)) %>%
pivot_wider(values_fill = 0,
names_sort = TRUE) %>%
select(-rows)
# A tibble: 7 × 4
id col1 col2 col3
<int> <dbl> <dbl> <dbl>
1 1 0 1 0
2 1 0 0 1
3 2 1 0 0
4 2 0 0 1
5 3 1 0 0
6 3 0 1 0
7 4 0 0 1
CodePudding user response:
Here is an alternative approach using model.matrix()
:
From the documenation: model.matrix
creates a design (or model) matrix, e.g., by expanding factors to a set of dummy variables (depending on the contrasts) and expanding interactions similarly.
library(dplyr)
library(tidyr)
dat %>%
pivot_longer(-id) %>%
filter(value == 1) %>%
cbind((model.matrix(~ name 0, .) == 1)*1)
id name value namecol1 namecol2 namecol3
1 1 col2 1 0 1 0
2 1 col3 1 0 0 1
3 2 col1 1 1 0 0
4 2 col3 1 0 0 1
5 3 col1 1 1 0 0
6 3 col2 1 0 1 0
7 4 col3 1 0 0 1
CodePudding user response:
You could do
arrange(bind_rows(lapply(2:4, function(x) {
d <- dat[dat[[x]] == 1,]
d[-c(1, x)] <- 0
d})), id)
#> # A tibble: 7 x 4
#> id col1 col2 col3
#> <int> <dbl> <dbl> <dbl>
#> 1 1 0 1 0
#> 2 1 0 0 1
#> 3 2 1 0 0
#> 4 2 0 0 1
#> 5 3 1 0 0
#> 6 3 0 1 0
#> 7 4 0 0 1
Created on 2022-07-14 by the reprex package (v2.0.1)
CodePudding user response:
Using explicit loops:
nullrow <- rep(0, ncol(dat)-1)
data <- dat[,-1]
rowsums <- apply(data, 1, sum)
res <- data[0,]
ids <- c()
for(i in 1:nrow(data)) {
if(rowsums[i]>0) {
for(j in 1:rowsums[i]) {
thisrow <- nullrow
thiscolumn <- which(data[i,]==1)[j]
thisrow[thiscolumn] <- 1
res <- rbind(res, thisrow)
}
ids <- c(ids, rep(dat$id[i], rowsums[i]))
}
}
names(res) <- colnames(data)
res$id <- ids
> res
col1 col2 col3 id
1 0 1 0 1
2 0 0 1 1
3 1 0 0 2
4 0 0 1 2
5 1 0 0 3
6 0 1 0 3
7 0 0 1 4
CodePudding user response:
A possible solution, based on Matrix::sparseMatrix
:
- First, it gets the indexes where there are 1 (with
which
). - Second, it adjusts the row indexes, to force one 1 per row.
- Third, it creates a sparse matrix, putting the 1 where the adjusted indexes specify.
library(tidyverse)
library(Matrix)
which(dat[-1] == 1, arr.ind = T) %>%
as.data.frame %>%
arrange(row) %>%
mutate(id = dat[row,"id"], row = 1:n()) %>%
{data.frame(id = .$id, as.matrix( sparseMatrix(i = .$row, j= .$col, x= 1)))}
#> id X1 X2 X3
#> 1 1 0 1 0
#> 2 1 0 0 1
#> 3 2 1 0 0
#> 4 2 0 0 1
#> 5 3 1 0 0
#> 6 3 0 1 0
#> 7 4 0 0 1
Another possible solution:
library(tidyverse)
f <- function(df)
{
got <- 0
for (i in 1:nrow(df))
{
got <- which.max(df[i, (got 1):ncol(df)]) got
df[i, -got] <- 0
}
df
}
dat %>%
slice(map(1:nrow(dat), ~ rep(.x, rowSums(dat[-1])[.x])) %>% unlist) %>%
group_by(id) %>%
group_modify(~ f(.)) %>%
ungroup
#> # A tibble: 7 × 4
#> id col1 col2 col3
#> <int> <dbl> <dbl> <dbl>
#> 1 1 0 1 0
#> 2 1 0 0 1
#> 3 2 1 0 0
#> 4 2 0 0 1
#> 5 3 1 0 0
#> 6 3 0 1 0
#> 7 4 0 0 1