How do you know if the value is the same for the following rows in R?-CodePudding

I have this kind of data:

Break	Start	Stop
1	1	0
1	0	0
1	0	0
1	0	0
1	0	1
0	0	0
0	0	0
0	0	0
0	0	0

I'm trying to create a new column so that I have the start and end only if there are five 1 in a row when Brake == 1 like this:

Break	Start	Stop	NewCol
1	1	0	Start
1	0	0
1	0	0
1	0	0
1	0	1	Stop
0	0	0
0	0	0
0	0	0
0	0	0

CodePudding user response：

I've added some more rows to your data

df=structure(list(Break = c(1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), Start = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Stop = c(0L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), class = "data.frame", row.names = c(NA, 
-31L))

using a loop

tmp=rle(paste(ifelse(df$Start==1,"A",""),ifelse(df$Stop==1,"B",""),sep=""))

for (i in seq(2,length(tmp$lengths)-1)) {
  if (tmp$values[i-1]=="A" & tmp$values[i]=="" & tmp$lengths[i]>=3 & tmp$values[i 1]=="B") {
    tmp$values[i-1]="Start"
    tmp$values[i 1]="Stop"
  }
}

df$NewCol=rep(tmp$values,tmp$lengths)
df$NewCol[!(df$NewCol %in% c("Start","Stop") & df$Break==1)]=""

and the result

   Break Start Stop NewCol
1      1     1    0  Start
2      1     0    0       
3      1     0    0       
4      1     0    0       
5      1     0    1   Stop
6      1     0    0       
7      0     1    0       
8      0     0    0       
9      0     0    0       
10     0     0    0       
11     0     0    0       
12     0     0    1       
13     0     0    0       
14     1     1    0       
15     1     0    0       
16     1     0    0       
17     1     0    1       
18     1     0    0       
19     1     0    0       
20     1     0    0       
21     1     0    0       
22     1     0    0       
23     1     0    0       
24     1     0    0       
25     1     1    0  Start
26     1     0    0       
27     1     0    0       
28     1     0    0       
29     1     0    0       
30     1     0    0       
31     1     0    1   Stop

CodePudding user response：

Try this:

df<- data.frame(Break = c(1,1,1,1,1,0,0,0,1,1,1,1,1,0))
df$Start <- 0
df$Stop <- 0
v<- c(1,1,1,1,1)
x<-which(df == v)
df$Start[x[seq(1,length(x),5)]] <- 1
df$Stop[x[seq(5,length(x),5)]] <- 1

CodePudding user response：

As far as I understand it Start and Stop can be ignored? Either way this solution doesn't need them so can be added if desired. I extended the example to show edge cases if more or less then 5 1's exist.

library(dplyr)

df %>% 
  group_by(grp = cumsum(lag(Break, default=T) != Break)) %>% 
  mutate(is = cumsum(Break) %% 5, 
         newcol = case_when(is == 0 & Break == 1 ~ "Stop", 
                    is == 1 & Break == 1 & lead(is, 4) == 0 & 
                      lead(Break, 4) == 1 ~ "Start", 
                    TRUE ~ ""), 
    is = NULL) %>% 
  ungroup() %>% 
  select(-grp)
# A tibble: 18 × 2
   Break newcol 
   <dbl> <chr>  
 1     1 "Start"
 2     1 ""     
 3     1 ""     
 4     1 ""     
 5     1 "Stop" 
 6     0 ""     
 7     0 ""     
 8     1 "Start"
 9     1 ""     
10     1 ""     
11     1 ""     
12     1 "Stop" 
13     1 "Start"
14     1 ""     
15     1 ""     
16     1 ""     
17     1 "Stop" 
18     1 ""

Data

df <- structure(list(Break = c(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1)), row.names = c(NA, -18L), class = "data.frame")

CodePudding user response：

This may by a solution too:

# install.packages("data.table")
library(data.table)
    df<-structure(list(
      Break = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L),
      Start = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L),
      Stop = c(0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)),
      class = "data.frame", row.names = c(NA, -21L))                                                                                                         
    dt <- as.data.table(df)
    dt[Break == 1, dbatch := {
      nbatch = ceiling(.N / 5)
      head(rep(seq(nbatch), each = 5), .N)
      }]
    dt[, NewCol := fcase(
      sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Start == 1, "Start",
      sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Stop == 1, "Stop",
      default = ""
    ), by = dbatch]

    Break Start Stop dbatch NewCol
 1:     1     1    0      1  Start
 2:     1     0    0      1       
 3:     1     0    0      1       
 4:     1     0    0      1       
 5:     1     0    1      1   Stop
 6:     0     0    0     NA       
 7:     0     0    0     NA       
 8:     0     0    0     NA       
 9:     0     0    0     NA       
10:     1     1    0      2       
11:     1     0    0      2       
12:     1     0    0      2       
13:     1     0    1      2       
14:     1     0    0      2       
15:     0     0    0     NA       
16:     0     0    0     NA       
17:     1     1    0      3  Start
18:     1     0    0      3       
19:     1     0    0      3       
20:     1     0    0      3       
21:     1     0    1      3   Stop
    Break Start Stop dbatch NewCol

CodePudding user response：

using rle

library(purrr)
library(dplyr)

df |>
    mutate(NewCol = with(rle(Break),
                map2(lengths,values, ~{
                    if(.x >= 5 && .y == 1)
                        c("Start",rep("", .x - 2), "Stop")
                    else
                        rep("", .x)
                }) |>
                flatten()))

##>   Break Start Stop NewCol
##> 1     1     1    0  Start
##> 2     1     0    0       
##> 3     1     0    0       
##> 4     1     0    0       
##> 5     1     0    1   Stop
##> 6     0     0    0       
##> 7     0     0    0       
##> 8     0     0    0       
##> 9     0     0    0