Home > Back-end >  How do you know if the value is the same for the following rows in R?
How do you know if the value is the same for the following rows in R?

Time:12-22

I have this kind of data:

Break Start Stop
1 1 0
1 0 0
1 0 0
1 0 0
1 0 1
0 0 0
0 0 0
0 0 0
0 0 0

I'm trying to create a new column so that I have the start and end only if there are five 1 in a row when Brake == 1 like this:

Break Start Stop NewCol
1 1 0 Start
1 0 0
1 0 0
1 0 0
1 0 1 Stop
0 0 0
0 0 0
0 0 0
0 0 0

CodePudding user response:

I've added some more rows to your data

df=structure(list(Break = c(1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), Start = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L), Stop = c(0L, 0L, 0L, 0L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), class = "data.frame", row.names = c(NA, 
-31L))

using a loop

tmp=rle(paste(ifelse(df$Start==1,"A",""),ifelse(df$Stop==1,"B",""),sep=""))

for (i in seq(2,length(tmp$lengths)-1)) {
  if (tmp$values[i-1]=="A" & tmp$values[i]=="" & tmp$lengths[i]>=3 & tmp$values[i 1]=="B") {
    tmp$values[i-1]="Start"
    tmp$values[i 1]="Stop"
  }
}

df$NewCol=rep(tmp$values,tmp$lengths)
df$NewCol[!(df$NewCol %in% c("Start","Stop") & df$Break==1)]=""

and the result

   Break Start Stop NewCol
1      1     1    0  Start
2      1     0    0       
3      1     0    0       
4      1     0    0       
5      1     0    1   Stop
6      1     0    0       
7      0     1    0       
8      0     0    0       
9      0     0    0       
10     0     0    0       
11     0     0    0       
12     0     0    1       
13     0     0    0       
14     1     1    0       
15     1     0    0       
16     1     0    0       
17     1     0    1       
18     1     0    0       
19     1     0    0       
20     1     0    0       
21     1     0    0       
22     1     0    0       
23     1     0    0       
24     1     0    0       
25     1     1    0  Start
26     1     0    0       
27     1     0    0       
28     1     0    0       
29     1     0    0       
30     1     0    0       
31     1     0    1   Stop

CodePudding user response:

Try this:

df<- data.frame(Break = c(1,1,1,1,1,0,0,0,1,1,1,1,1,0))
df$Start <- 0
df$Stop <- 0
v<- c(1,1,1,1,1)
x<-which(df == v)
df$Start[x[seq(1,length(x),5)]] <- 1
df$Stop[x[seq(5,length(x),5)]] <- 1

CodePudding user response:

As far as I understand it Start and Stop can be ignored? Either way this solution doesn't need them so can be added if desired. I extended the example to show edge cases if more or less then 5 1's exist.

library(dplyr)

df %>% 
  group_by(grp = cumsum(lag(Break, default=T) != Break)) %>% 
  mutate(is = cumsum(Break) %% 5, 
         newcol = case_when(is == 0 & Break == 1 ~ "Stop", 
                    is == 1 & Break == 1 & lead(is, 4) == 0 & 
                      lead(Break, 4) == 1 ~ "Start", 
                    TRUE ~ ""), 
    is = NULL) %>% 
  ungroup() %>% 
  select(-grp)
# A tibble: 18 × 2
   Break newcol 
   <dbl> <chr>  
 1     1 "Start"
 2     1 ""     
 3     1 ""     
 4     1 ""     
 5     1 "Stop" 
 6     0 ""     
 7     0 ""     
 8     1 "Start"
 9     1 ""     
10     1 ""     
11     1 ""     
12     1 "Stop" 
13     1 "Start"
14     1 ""     
15     1 ""     
16     1 ""     
17     1 "Stop" 
18     1 ""

Data

df <- structure(list(Break = c(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1)), row.names = c(NA, -18L), class = "data.frame")

CodePudding user response:

This may by a solution too:

# install.packages("data.table")
library(data.table)
    df<-structure(list(
      Break = c(1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L),
      Start = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L),
      Stop = c(0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)),
      class = "data.frame", row.names = c(NA, -21L))                                                                                                         
    dt <- as.data.table(df)
    dt[Break == 1, dbatch := {
      nbatch = ceiling(.N / 5)
      head(rep(seq(nbatch), each = 5), .N)
      }]
    dt[, NewCol := fcase(
      sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Start == 1, "Start",
      sum(Break) == 5 & .SD[, Start[1] == 1 & Stop[5] == 1] & Stop == 1, "Stop",
      default = ""
    ), by = dbatch]

    Break Start Stop dbatch NewCol
 1:     1     1    0      1  Start
 2:     1     0    0      1       
 3:     1     0    0      1       
 4:     1     0    0      1       
 5:     1     0    1      1   Stop
 6:     0     0    0     NA       
 7:     0     0    0     NA       
 8:     0     0    0     NA       
 9:     0     0    0     NA       
10:     1     1    0      2       
11:     1     0    0      2       
12:     1     0    0      2       
13:     1     0    1      2       
14:     1     0    0      2       
15:     0     0    0     NA       
16:     0     0    0     NA       
17:     1     1    0      3  Start
18:     1     0    0      3       
19:     1     0    0      3       
20:     1     0    0      3       
21:     1     0    1      3   Stop
    Break Start Stop dbatch NewCol

CodePudding user response:

using rle

library(purrr)
library(dplyr)

df |>
    mutate(NewCol = with(rle(Break),
                map2(lengths,values, ~{
                    if(.x >= 5 && .y == 1)
                        c("Start",rep("", .x - 2), "Stop")
                    else
                        rep("", .x)
                }) |>
                flatten()))

##>   Break Start Stop NewCol
##> 1     1     1    0  Start
##> 2     1     0    0       
##> 3     1     0    0       
##> 4     1     0    0       
##> 5     1     0    1   Stop
##> 6     0     0    0       
##> 7     0     0    0       
##> 8     0     0    0       
##> 9     0     0    0       

  • Related