I have this kind of data:
Break | Start | Stop |
---|---|---|
1 | 1 | 0 |
1 | 0 | 0 |
1 | 0 | 0 |
1 | 0 | 0 |
1 | 0 | 1 |
0 | 0 | 0 |
0 | 0 | 0 |
0 | 0 | 0 |
0 | 0 | 0 |
I'm trying to create a new column so that I have the start and end only if there are five 1 in a row when Brake == 1 like this:
Break | Start | Stop | NewCol |
---|---|---|---|
1 | 1 | 0 | Start |
1 | 0 | 0 | |
1 | 0 | 0 | |
1 | 0 | 0 | |
1 | 0 | 1 | Stop |
0 | 0 | 0 | |
0 | 0 | 0 | |
0 | 0 | 0 | |
0 | 0 | 0 |
CodePudding user response:
I've added some more rows to your data
df=structure(list(Break = c(1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), Start = c(1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Stop = c(0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), class = "data.frame", row.names = c(NA,
-31L))
using a loop
tmp=rle(paste(ifelse(df$Start==1,"A",""),ifelse(df$Stop==1,"B",""),sep=""))
for (i in seq(2,length(tmp$lengths)-1)) {
if (tmp$values[i-1]=="A" & tmp$values[i]=="" & tmp$lengths[i]>=3 & tmp$values[i 1]=="B") {
tmp$values[i-1]="Start"
tmp$values[i 1]="Stop"
}
}
df$NewCol=rep(tmp$values,tmp$lengths)
df$NewCol[!(df$NewCol %in% c("Start","Stop") & df$Break==1)]=""
and the result
Break Start Stop NewCol
1 1 1 0 Start
2 1 0 0
3 1 0 0
4 1 0 0
5 1 0 1 Stop
6 1 0 0
7 0 1 0
8 0 0 0
9 0 0 0
10 0 0 0
11 0 0 0
12 0 0 1
13 0 0 0
14 1 1 0
15 1 0 0
16 1 0 0
17 1 0 1
18 1 0 0
19 1 0 0
20 1 0 0
21 1 0 0
22 1 0 0
23 1 0 0
24 1 0 0
25 1 1 0 Start
26 1 0 0
27 1 0 0
28 1 0 0
29 1 0 0
30 1 0 0
31 1 0 1 Stop
CodePudding user response:
Try this:
df<- data.frame(Break = c(1,1,1,1,1,0,0,0,1,1,1,1,1,0))
df$Start <- 0
df$Stop <- 0
v<- c(1,1,1,1,1)
x<-which(df == v)
df$Start[x[seq(1,length(x),5)]] <- 1
df$Stop[x[seq(5,length(x),5)]] <- 1
CodePudding user response:
As far as I understand it Start and Stop can be ignored? Either way this solution doesn't need them so can be added if desired. I extended the example to show edge cases if more or less then 5 1's exist.
library(dplyr)
df %>%
group_by(grp = cumsum(lag(Break, default=T) != Break)) %>%
mutate(is = cumsum(Break) %% 5,
newcol = case_when(is == 0 & Break == 1 ~ "Stop",
is == 1 & Break == 1 & lead(is, 4) == 0 &
lead(Break, 4) == 1 ~ "Start",
TRUE ~ ""),
is = NULL) %>%
ungroup() %>%
select(-grp)
# A tibble: 18 × 2
Break newcol
<dbl> <chr>
1 1 "Start"
2 1 ""
3 1 ""
4 1 ""
5 1 "Stop"
6 0 ""
7 0 ""
8 1 "Start"
9 1 ""
10 1 ""
11 1 ""
12 1 "Stop"
13 1 "Start"
14 1 ""
15 1 ""
16 1 ""
17 1 "Stop"
18 1 ""
Data
df <- structure(list(Break = c(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1)), row.names = c(NA, -18L), class = "data.frame")
CodePudding user response:
This may by a solution too:
# install.packages("data.table")
library(data.table)
df<-structure(list(
Break = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L),
Start = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L),
Stop = c(0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)),
class = "data.frame", row.names = c(NA, -21L))
dt <- as.data.table(df)
dt[Break == 1, dbatch := {
nbatch = ceiling(.N / 5)
head(rep(seq(nbatch), each = 5), .N)
}]
dt[, NewCol := fcase(
sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Start == 1, "Start",
sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Stop == 1, "Stop",
default = ""
), by = dbatch]
Break Start Stop dbatch NewCol
1: 1 1 0 1 Start
2: 1 0 0 1
3: 1 0 0 1
4: 1 0 0 1
5: 1 0 1 1 Stop
6: 0 0 0 NA
7: 0 0 0 NA
8: 0 0 0 NA
9: 0 0 0 NA
10: 1 1 0 2
11: 1 0 0 2
12: 1 0 0 2
13: 1 0 1 2
14: 1 0 0 2
15: 0 0 0 NA
16: 0 0 0 NA
17: 1 1 0 3 Start
18: 1 0 0 3
19: 1 0 0 3
20: 1 0 0 3
21: 1 0 1 3 Stop
Break Start Stop dbatch NewCol
CodePudding user response:
using rle
library(purrr)
library(dplyr)
df |>
mutate(NewCol = with(rle(Break),
map2(lengths,values, ~{
if(.x >= 5 && .y == 1)
c("Start",rep("", .x - 2), "Stop")
else
rep("", .x)
}) |>
flatten()))
##> Break Start Stop NewCol
##> 1 1 1 0 Start
##> 2 1 0 0
##> 3 1 0 0
##> 4 1 0 0
##> 5 1 0 1 Stop
##> 6 0 0 0
##> 7 0 0 0
##> 8 0 0 0
##> 9 0 0 0