How do I find the max value in a tensorflow dataset batch across a specific number of columns?-CodePudding

Suppose the following code below:

import tensorflow as tf
import numpy as np
 
simple_data_samples = np.array([
         [1, 1, 1, 7, -1],
         [2, -2, 2, -2, -2],
         [3, 3, 3, -3, -3],
         [-4, 4, 4, -4, -4],
         [5, 5, 5, -5, -5],
         [6, 6, 6, -4, -6],
         [7, 7, 8, -7, -7],
         [8, 8, 8, -8, -8],
         [9, 4, 9, -9, -9],
         [10, 10, 10, -10, -10],
         [11, 5, 11, -11, -11],
         [12, 12, 12, -12, -12],
])


def print_dataset(ds):
    for inputs, targets in ds:
        print("---Batch---")
        print("Feature:", inputs.numpy())
        print("Label:", targets.numpy())
        print("")
 
    
def timeseries_dataset_multistep_combined(features, label_slice, input_sequence_length, output_sequence_length, sequence_stride, batch_size):
    feature_ds = tf.keras.preprocessing.timeseries_dataset_from_array(features, None, sequence_length=input_sequence_length   output_sequence_length, sequence_stride=sequence_stride ,batch_size=batch_size, shuffle=False)
     
    def split_feature_label(x):
        return x[:, :input_sequence_length, :]  tf.reduce_max(x[:,:,:],axis=1), x[:, input_sequence_length:, label_slice]  tf.reduce_max(x[:,:,:],axis=1)
         
    feature_ds = feature_ds.map(split_feature_label)
     
    return feature_ds
 
ds = timeseries_dataset_multistep_combined(simple_data_samples, slice(None, None, None), input_sequence_length=4, output_sequence_length=2, sequence_stride=2, batch_size=1)
print_dataset(ds)

Let me explain what the above code does. It creates many features and labels. Then it takes the maximum value from each column and adds it the individual values in the column. For instance, this feature and its corresponding label:

Feature: [[[ 1  1  1  7 -1]
  [ 2 -2  2 -2 -2]
  [ 3  3  3 -3 -3]
  [-4  4  4 -4 -4]]]
Label: [[[ 5  5  5 -5 -5]
  [ 6  6  6 -4 -6]]]

have the following max values in each column:

6,6,6,7,-1

The max-values are then added to the corresponding column and you get the final output:

[[ 7  7  7 14 -2]
  [ 8  4  8  4 -3]
  [ 9  9  9  3 -4]
  [ 2 10 10  2 -5]]]
Label: [[[11 11 11  1 -6]
  [12 12 12  2 -7]]]

Instead of extracting the maximum value from each column, I want to extract the maximum value from the first three columns and the last two columns of each feature and its corresponding label. After the extraction, I want to add the max value to each value in the corresponding column. For instance, in the above example, the max value would be 6 for the first three columns and 7 for the last two columns. After that, 6 would be added to each value in the first three columns and 7 to each value in the last 2 columns. The final output for the first batch would look like this:

Feature: [[[ 7  7  7  14 6]
  [ 8 4  8 5 5]
  [ 9  9  9 4 4]
  [ 2  10  10 3 3]]]
Label: [[[ 11  11  11 2 2]
  [ 12  12 12 3 1]]]

Has anyone got an idea how to extract the max value from the first three columns and the last two columns in each batch?

CodePudding user response：

Does using tf.tile with tf.reduce_max like this work for you:

import tensorflow as tf
import numpy as np
 
simple_data_samples = np.array([
         [1, 1, 1, 7, -1],
         [2, -2, 2, -2, -2],
         [3, 3, 3, -3, -3],
         [-4, 4, 4, -4, -4],
         [5, 5, 5, -5, -5],
         [6, 6, 6, -4, -6],
         [7, 7, 8, -7, -7],
         [8, 8, 8, -8, -8],
         [9, 4, 9, -9, -9],
         [10, 10, 10, -10, -10],
         [11, 5, 11, -11, -11],
         [12, 12, 12, -12, -12],
])


def print_dataset(ds):
    for inputs, targets in ds:
        print("---Batch---")
        print("Feature:", inputs.numpy())
        print("Label:", targets.numpy())
        print("")
 
    
def timeseries_dataset_multistep_combined(features, label_slice, input_sequence_length, output_sequence_length, sequence_stride, batch_size):
    feature_ds = tf.keras.preprocessing.timeseries_dataset_from_array(features, None, sequence_length=input_sequence_length   output_sequence_length, sequence_stride=sequence_stride ,batch_size=batch_size, shuffle=False)
     
    def split_feature_label(x):
        reduced_first_max_columns = tf.reduce_max(x[:,:,:3], axis=1, keepdims=True) 
        reduced_last_max_columns = tf.reduce_max(x[:,:,3:], axis=1, keepdims=True)
        reduced_first_max_columns = tf.tile(tf.reduce_max(reduced_first_max_columns, axis=-1), [1, 3])
        reduced_last_max_columns = tf.tile(tf.reduce_max(reduced_last_max_columns, axis=-1), [1, 2])
        reduced_x = tf.expand_dims(tf.concat([reduced_first_max_columns, reduced_last_max_columns], axis=1), axis=0)
        
        return x[:, :input_sequence_length, :]   reduced_x, x[:, input_sequence_length:, label_slice]   reduced_x
         
    feature_ds = feature_ds.map(split_feature_label)
     
    return feature_ds
 
ds = timeseries_dataset_multistep_combined(simple_data_samples, slice(None, None, None), input_sequence_length=4, output_sequence_length=2, sequence_stride=2, batch_size=1)
print_dataset(ds)

---Batch---
Feature: [[[ 7  7  7 14  6]
  [ 8  4  8  5  5]
  [ 9  9  9  4  4]
  [ 2 10 10  3  3]]]
Label: [[[11 11 11  2  2]
  [12 12 12  3  1]]]

---Batch---
Feature: [[[11 11 11 -6 -6]
  [ 4 12 12 -7 -7]
  [13 13 13 -8 -8]
  [14 14 14 -7 -9]]]
Label: [[[ 15  15  16 -10 -10]
  [ 16  16  16 -11 -11]]]
...