Suppose I have a boolean tensor that tells me whether the value at the given coordinate is "of interest":
is_value_of_interest = np.array(
[[0, 0, 0],
[0, 1, 1],
[1, 0, 0],
[0, 0, 0],
[0, 1, 1]])
is_value_of_interest_tf = tf.constant(is_value_of_interest)
Now I have another array/tensor containing the actual values themselves:
values = np.random.rand(5, 3)
values_tf = tf.constant(values)
What I want to do, is build a tensor that will return the next value of interest along the 0th axis. So in numpy/pandas this would be:
values_of_interest = np.where(is_value_of_interest, values, np.nan)
df = pandas.DataFrame(values_of_interest).bfill()
And the result:
How can I build a tensor operation to achieve the same result, such that (df.values == my_tensor.numpy()).all()
?
CodePudding user response:
This was nontrivial due to the lack of any equivalent of pandas' ffill/bfill methods, so began by implementing a ffill
function for tensorflow in the specific case of a 2d input along the 0th dimension.
I'm not happy with the result as it's long and messy, so if anyone can improve it please feel free to suggest a better answer:
def tf_ffill(data: tf.Tensor) -> tf.Tensor:
"""
2d forward-fill along 0th dimension
"""
is_value_of_interest = ~tf.math.is_nan(data)
grid = tf.meshgrid(tf.range(data.shape[0]), tf.range(data.shape[1]), indexing="ij")
nan_sentinel = tf.maximum(data.shape[0], data.shape[1])
next_value_of_interest_row_index = (
tf.cumsum(tf.cast(is_value_of_interest, tf.int32), axis=0) - 1
)
defined_values_mask = tf.where(next_value_of_interest_row_index == -1, False, True)
next_value_of_interest_row_index = tf.where(
defined_values_mask, next_value_of_interest_row_index, nan_sentinel
)
next_value_of_interest_col_index = tf.where(
defined_values_mask, tf.cast(grid[1], tf.int32), nan_sentinel
)
# this is a rank N 1 tensor with the last 2 dimensions representing the x/y
# coordinates of the index of the next point of interest The only quirk
# here is that the column is the 2nd last dimension and the row is the
# last. This is to align it with the representation produced by the
# boolean_mask op.
values_of_interest_index_lookup = tf.concat(
[
tf.expand_dims(next_value_of_interest_col_index, axis=2),
tf.expand_dims(next_value_of_interest_row_index, axis=2),
],
2,
)
values_of_interest_indices = tf.ragged.boolean_mask(
tf.transpose(grid[0]), tf.transpose(is_value_of_interest)
)
mapped_indices = tf.gather_nd(
values_of_interest_indices,
tf.where(
values_of_interest_index_lookup == nan_sentinel,
0,
values_of_interest_index_lookup,
),
)
mapped_indices = tf.where(defined_values_mask, mapped_indices, nan_sentinel)
res = tf.experimental.numpy.take_along_axis(
data, tf.where(mapped_indices == nan_sentinel, 0, mapped_indices), axis=0
)
return tf.where(defined_values_mask, res, np.nan)
def tf_bfill(data: tf.Tensor) -> tf.Tensor:
return tf.reverse(tf_ffill(tf.reverse(data, axis=[0])), axis=[0])
Once you have tf_bfill
getting to the answer is trivial:
data = tf.where(is_value_of_interest, values_tf, np.nan)
tf_bfill(data)
This whole question could be rephrased as "how do you do a forward fill in tensorflow?"