Interpolate between each row of a data.frame-CodePudding

I am looking to resample and interpolate between each row of a data.frame in a fast way. I don't mind working with data.table or other data structures if necessary. Here is a reproducible example :

df <- data.frame(x = c(0, 2, 10),
                 y = c(10, 12, 0))

Desired output : a function f(df, n), where n is the number of interpolation values that would lead to :

df_int <- f(df, 1)

# That would produce :
# df_int <- data.frame(x = c(0, 1, 2, 6, 10),
#                      y = c(10, 11, 12, 6, 0))

df_int <- f(df, 3)

# That would produce :
# df_int <- data.frame(x = c(0, 0.5, 1, 1.5, 2, 4, 6, 8, 10),
#                      y = c(10, 10.5, 11, 11.5, 12, 9, 6, 3, 0))

Some solutions were proposed using approx but that doesn't work in my case.

CodePudding user response：

Without consideration of speed

interpolate_vector <- function(x, n) {
  Reduce(function(x, tail_x) {
    c(head(x, -1), seq(tail(x, 1), tail_x[1], length.out = n   2))
  }, init = x[1], x = tail(x, -1))
}

f <- function(df, n) {
  as.data.frame(lapply(df, interpolate_vector, n))
}

f(df, 1)

f(df, 3)

     x    y
1  0.0 10.0
2  0.5 10.5
3  1.0 11.0
4  1.5 11.5
5  2.0 12.0
6  4.0  9.0
7  6.0  6.0
8  8.0  3.0
9 10.0  0.0

Without Reduce and growing vectors:

interpolate_vector_2 <- function(x, n) {
  res <- numeric(length = (length(x)-1) * (n 1)   1)
  for (i in head(seq_along(x), -1)) {
    res[(i   (i-1)*n) : (i   i*n   1)] <- 
      seq(x[i], x[i 1], length.out = n 2)
  }
  res
}

f_2 <- function(df, n) {
  as.data.frame(lapply(df, interpolate_vector_2, n))
}

Benchmark template (including @Maël's answers):

res <- bench::press(
  rows = c(1e2, 1e3),
  n = c(1, 3, 10),
  {
    df <- data.frame(
      x = runif(rows),
      y = runif(rows)
    )
    bench::mark(
      zoo = f_3(df, n),
      loop = f_2(df, n),
      reduce = f(df, n),
      approx = f_4(df, n)
    )
  }
)

CodePudding user response：

Using approx:

interp <- function(x, n){
  v = c()
  for(i in seq(length(x) - 1)) {
    tmp = approx(c(x[i], x[i   1]), n = 2   n)$y
    v = c(v, tmp)
  }
  v[!duplicated(v)]
}

f <- function(df, n) as.data.frame(lapply(df, interp, n))

examples

f(df, 1)
#    x  y
# 1  0 10
# 2  1 11
# 3  2 12
# 4  6  6
# 5 10  0

f(df, 3)
#      x    y
# 1  0.0 10.0
# 2  0.5 10.5
# 3  1.0 11.0
# 4  1.5 11.5
# 5  2.0 12.0
# 6  4.0  9.0
# 7  6.0  6.0
# 8  8.0  3.0
# 9 10.0  0.0

CodePudding user response：

Another possibility with zoo::na.approx. The idea is to create a vector with n NA between the elements of the vectors, and then use na.approx. This solution is supposedly the fastest (see benchmark).

library(zoo)
interp <- function(v, n){
  na_vec <- c(sapply(v, \(x) c(x, rep(NA, n))))[1:((length(v) - 1) * (n   1)   1)]
  zoo::na.approx(na_vec)
}

f <- function(df, n) as.data.frame(lapply(df, interp, n))

examples

f(df, 1)
#    x  y
# 1  0 10
# 2  1 11
# 3  2 12
# 4  6  6
# 5 10  0

f(df, 3)
#      x    y
# 1  0.0 10.0
# 2  0.5 10.5
# 3  1.0 11.0
# 4  1.5 11.5
# 5  2.0 12.0
# 6  4.0  9.0
# 7  6.0  6.0
# 8  8.0  3.0
# 9 10.0  0.0