Substitute certain values in vector conditionally, based on run-length-encoding [R]-CodePudding

I have some numeric vectors of various lengths. Each of them may contain three types of values: 0s, 1s and -1s, but mostly 0s. I would like to replace 0s with neighboring values based on 2 conditions (both of them must be met for replacement): (I) if there are less than three 0s in the row (one by one), and (II) this string is surrounded on both sides by the same non-zero values.

For instance, if there would be: 1,1,1,1,0,1,1, I would like to replace the 0 for 1. On the other hand, if there would be: 1,1,-1,1,0,-1,-1, I would like to leave it unchanged.

I wrote a function for doing this, although this is not an elegant one. I tried to manage to handle both conditions at once - unfortunately R threw errors while I attempted to do so.

Here are some dummy vectors:

x <- c(1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,-1,0,0,0,0,1,1,0,0,0,1)
y <- c(0,0,-1,0,-1,0,-1,-1,-1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0)
z <- c(0,0,0,0,1,0,1,0,1,0,1,0,-1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,-1,0,1,0,-1,0,1,0,0,0,0,0,0,0,0,0)
a <- c(0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,-1,0,0,0,0,0,0)

Here are desired outputs:

x_desired <- c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, -1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1)
y_desired <- c(0, 0, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0)
z_desired <- c(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, -1, 0, 1, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
a_desired <- c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0)

And here is my function:

substitute_plus_and_minus <- function(x){
    
  # create the run length encoding
  mod_rle <- rle(x)
  # create an index of 0s to be changed for 1s
  one_substitute <- mod_rle$lengths <3 &
    mod_rle$values == 0 &
    c(utils::tail(mod_rle$values, -1) == 1, FALSE) &
    c(FALSE, utils::head(mod_rle$values, -1) == 1)
  # set the values to 1
  mod_rle$values[one_substitute] <- 1
  # recreate the original vector
  x <- inverse.rle(mod_rle)
  
  # create the run length encoding
  mod_rle <- rle(x)
  # create an index of 0s to be changed for -1s
  minus_one_substitute <- mod_rle$lengths <3 &
    mod_rle$values == 0 &
    c(utils::tail(mod_rle$values, -1) == -1, FALSE) &
    c(FALSE, utils::head(mod_rle$values, -1) == -1)
  # set the values to -1
  mod_rle$values[minus_one_substitute] <- -1
  # recreate the original vector
  x <- inverse.rle(mod_rle)
  
  return(x)
  
}

I am looking for more elegant and compact solution (preferably base R approach), so there would be no need to iterate the data twice.

CodePudding user response：

Try this:

fun <- function(z) {
  r <- rle(z)
  ind <- r$lengths < 3 & r$values == 0 & c(Inf, r$values[-length(r$values)]) == c(r$values[-1], Inf)
  if (any(ind)) r$values[ind] <- r$values[which(ind)-1]
  inverse.rle(r)
}

### in a list here just to gather them into one place,
### vectors work fine too
vecs <- list(x = c(1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, -1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1), y = c(0, 0, -1, 0, -1, 0, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0), z = c(0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, -1, 0, 1, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), a = c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0))
desired <- list(x = c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, -1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1), y = c(0, 0, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0), z = c(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, -1, 0, 1, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), a = c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0))

### run once on one vector
fun(vecs$x)
#  [1]  1  1  1  1  1  1  0  0  0  0  0  0  0  0  1  1  1  1  0 -1  0  0  0  0  1  1  0  0  0  1
identical(fun(vecs$x), desired$x)
# [1] TRUE

### all at once
lapply(vecs, fun)
# $x
#  [1]  1  1  1  1  1  1  0  0  0  0  0  0  0  0  1  1  1  1  0 -1  0  0  0  0  1  1  0  0  0  1
# $y
#  [1]  0  0 -1 -1 -1 -1 -1 -1 -1  0  0  0  0  0  0  0  0  0  0  0  1  1  1  1  0  0  0  0  1  1  1  1  0  0  0
# $z
#  [1]  0  0  0  0  1  1  1  1  1  1  1  0 -1  0  1  0  0  0  0  0  0  0  0  0  0  1  0 -1  0  1  0 -1  0  1  0  0  0  0  0  0  0  0  0
# $a
#  [1]  0  0  0  0  0  0  1  1  0  0  0  1  0  0  0  0  0  0  1  1  0  0  0 -1  0  0  0  0  0  0
identical(lapply(vecs, fun), desired)
# [1] TRUE

The use of Inf in the determination of ind is purely to have something non-match (and NA does not work here).

CodePudding user response：

How about a loop with conditions:

substitute_plus_and_minus <- function(x) {
  for (i in 2:(length(x)-1))
    if (x[i] == 0)
      if ((x[i-1] == x[i 1] & x[i-1] != 0) | (x[i 1] == 0 & x[i-1] == x[i 2] & i < length(x)-1)) 
        x[i] <- x[i-1]
  return(x)
}

Check:

identical(x_desired, substitute_plus_and_minus(x))
[1] TRUE
identical(y_desired, substitute_plus_and_minus(y))
[1] TRUE
identical(z_desired, substitute_plus_and_minus(z))
[1] TRUE
identical(a_desired, substitute_plus_and_minus(a))
[1] TRUE

Updated with further conditions to account for 0 0 situation.