I have a database of connection pipes with the inflow (letters) and outflow as numbers.
df=data.frame(id=c(1,2,3,4,5), A-Y=c(0,1,0,1,0), B-Z=c(1,1,1,0,0), C-W=c(1,1,0,0,0))
df
df
id A.Y B.Z C.W
1 1 0 1 1
2 2 1 1 1
3 3 0 1 0
4 4 1 0 0
5 5 0 0 0
i want to convert it to long format as follows
df.out<-data.frame(id=c(1,1,2,2,2,3,4,5), inflow=c("B", "C", "A", "B", "C", "B", "A",""), outflow=c("Z", "W", "Y", "Z", "W", "Z","Y",""))
df.out
id inflow outflow
1 B Z
1 C W
2 A Y
2 B Z
2 C W
3 B Z
4 A Y
5
I think I should use pivot_longer
but I am not sure how to define the split.
df %>% pivot_longer(cols=A.Y:C.W, names_to=c("Inflow", "Outflow"), names_pattern = ".-.", values_to = status)
Error: `regex` should define 2 groups; found.
does not seem to work
Any ideas?
CodePudding user response:
Does this work:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(-id) %>% separate(name, into = c('inflow','outflow'), sep = '\\.') %>%
filter(value != 0) %>% select(-value)
# A tibble: 7 x 3
id inflow outflow
<dbl> <chr> <chr>
1 1 B Z
2 1 C W
3 2 A Y
4 2 B Z
5 2 C W
6 3 B Z
7 4 A Y
CodePudding user response:
You need to provide capture groups:
df <- tibble(
id = c(1, 2, 3, 4, 5),
A_Y = c(0, 1, 0, 1, 0),
B_Z = c(1, 1, 1, 0, 0),
C_W = c(1, 1, 0, 0, 0)
)
df
df %>%
pivot_longer(
cols = -id,
names_to = c("Inflow", "Outflow"),
names_pattern = "(.)_(.)",
values_to = "status"
) %>%
filter(status == 1) %>%
select(-status)
CodePudding user response:
You may use names_sep
.
tidyr::pivot_longer(df, cols = -id,
names_to = c('inflow', 'outflow'),
names_sep = '\\.') %>%
filter(value == 1) %>%
select(-value)
# id inflow outflow
# <dbl> <chr> <chr>
#1 1 B Z
#2 1 C W
#3 2 A Y
#4 2 B Z
#5 2 C W
#6 3 B Z
#7 4 A Y
data
df=data.frame(id=c(1,2,3,4,5), A.Y=c(0,1,0,1,0),
B.Z=c(1,1,1,0,0), C.W=c(1,1,0,0,0))