How to find "next" value along 0th axis from 2d boolean mask in tensorflow-CodePudding

Suppose I have a boolean tensor that tells me whether the value at the given coordinate is "of interest":

is_value_of_interest = np.array(
    [[0, 0, 0],
    [0, 1, 1],
    [1, 0, 0],
    [0, 0, 0],
    [0, 1, 1]])

is_value_of_interest_tf = tf.constant(is_value_of_interest)

Now I have another array/tensor containing the actual values themselves:

values = np.random.rand(5, 3)
values_tf = tf.constant(values)

What I want to do, is build a tensor that will return the next value of interest along the 0th axis. So in numpy/pandas this would be:

values_of_interest = np.where(is_value_of_interest, values, np.nan)
df = pandas.DataFrame(values_of_interest).bfill()

And the result:

How can I build a tensor operation to achieve the same result, such that (df.values == my_tensor.numpy()).all()?

CodePudding user response：

This was nontrivial due to the lack of any equivalent of pandas' ffill/bfill methods, so began by implementing a ffill function for tensorflow in the specific case of a 2d input along the 0th dimension.

I'm not happy with the result as it's long and messy, so if anyone can improve it please feel free to suggest a better answer:

def tf_ffill(data: tf.Tensor) -> tf.Tensor:
    """
    2d forward-fill along 0th dimension
    """

    is_value_of_interest = ~tf.math.is_nan(data)
    grid = tf.meshgrid(tf.range(data.shape[0]), tf.range(data.shape[1]), indexing="ij")
    nan_sentinel = tf.maximum(data.shape[0], data.shape[1])

    next_value_of_interest_row_index = (
        tf.cumsum(tf.cast(is_value_of_interest, tf.int32), axis=0) - 1
    )
    defined_values_mask = tf.where(next_value_of_interest_row_index == -1, False, True)

    next_value_of_interest_row_index = tf.where(
        defined_values_mask, next_value_of_interest_row_index, nan_sentinel
    )
    next_value_of_interest_col_index = tf.where(
        defined_values_mask, tf.cast(grid[1], tf.int32), nan_sentinel
    )

    # this is a rank N 1 tensor with the last 2 dimensions representing the x/y
    # coordinates of the index of the next point of interest The only quirk
    # here is that the column is the 2nd last dimension and the row is the
    # last. This is to align it with the representation produced by the
    # boolean_mask op.
    values_of_interest_index_lookup = tf.concat(
        [
            tf.expand_dims(next_value_of_interest_col_index, axis=2),
            tf.expand_dims(next_value_of_interest_row_index, axis=2),
        ],
        2,
    )
    values_of_interest_indices = tf.ragged.boolean_mask(
        tf.transpose(grid[0]), tf.transpose(is_value_of_interest)
    )

    mapped_indices = tf.gather_nd(
        values_of_interest_indices,
        tf.where(
            values_of_interest_index_lookup == nan_sentinel,
            0,
            values_of_interest_index_lookup,
        ),
    )
    mapped_indices = tf.where(defined_values_mask, mapped_indices, nan_sentinel)

    res = tf.experimental.numpy.take_along_axis(
        data, tf.where(mapped_indices == nan_sentinel, 0, mapped_indices), axis=0
    )
    return tf.where(defined_values_mask, res, np.nan)


def tf_bfill(data: tf.Tensor) -> tf.Tensor:
    return tf.reverse(tf_ffill(tf.reverse(data, axis=[0])), axis=[0])

Once you have tf_bfill getting to the answer is trivial:

data = tf.where(is_value_of_interest, values_tf, np.nan)
tf_bfill(data)

This whole question could be rephrased as "how do you do a forward fill in tensorflow?"