How to use layer output as argument to subsequent layer-CodePudding

I need to add a Cropping2D layer where the left and right crop arguments are determined dynamically by the output of previous layers. I.E., the left_crop and right_crop arguments are not known at code-time. However, I seem unable to access the value of a previous tensor in the model. Here is my code:

input1 = Input(name='dirty', shape=(IMG_HEIGHT, None, 1), dtype='float32')
input2 = Input(name='x0', shape=(), dtype='int32')
input3 = Input(name='x1', shape=(), dtype='int32')

# Encoder
conv1 = Conv2D(48, kernel_size=(3, 3), activation='relu', padding='same', name='conv1')(input1)
pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(conv1)
conv2 = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='conv2')(pool1)

# Decoder
deconv2 = Conv2DTranspose(48, kernel_size=(3, 3), activation='relu', padding='same', name='deconv2')(conv2)
depool1 = UpSampling2D(size=(2, 2), name='depool1')(deconv2)
output1 = Conv2DTranspose(1, kernel_size=(3, 3), activation='relu', padding='same', name='clean')(depool1)

_, _, width, _ = K.int_shape(output1)
left = K.eval(input2)
right = width - K.eval(input3)
output2 = Cropping2D(name='clean_snippet', cropping=((0, 0), (left, right)))(output1)

That produces the following error:

Traceback (most recent call last):
  File "test.py", line 81, in <module>
    left = K.eval(input2)
  File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/keras/backend.py", line 1632, in eval
    return get_value(to_dense(x))
  File "/Users/garnet/Library/Python/3.8/lib/python/site-packages/keras/backend.py", line 4208, in get_value
    return x.numpy()
AttributeError: 'KerasTensor' object has no attribute 'numpy'

I'm using TF 2.10.0 with Keras 2.10.0. I've tried both with and without eager mode enabled. My question is specifically about the four lines after the "HERE'S THE AREA IN QUESTION..." comment in my code above. How can I access previous layer values to use them as an argument (not the input layer) to Cropping2D(). Any ideas?

For context, here's my entire code:

import tensorflow as tf
import cv2
import random
import os
import numpy as np

from tensorflow.keras import backend as K

from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, Cropping2D, UpSampling2D, Input
from tensorflow.keras import losses

SNIPPET_WIDTH = 40
IMG_HEIGHT = 60


def get_data(paths):
    for path in paths:
        clean = cv2.imread(path.decode('utf-8'), cv2.IMREAD_GRAYSCALE)
        h, w = clean.shape
        dirty = cv2.blur(clean, (random.randint(1, 5), random.randint(1, 5)))
        x0 = random.randint(0, w - SNIPPET_WIDTH)
        x1 = x0   SNIPPET_WIDTH
        y0 = 0
        y1 = h - 1
        clean_snippet = clean[y0:y1, x0:x1]
        dirty[y0:y1, x0:x1] = 0  # masked out region
        dirty = (256. - dirty.astype(np.float32)) / 255.
        dirty = tf.convert_to_tensor(np.expand_dims(dirty, axis=2))
        x0 = tf.convert_to_tensor(x0)
        x1 = tf.convert_to_tensor(x1)
        clean = (256. - clean.astype(np.float32)) / 255.
        clean = tf.convert_to_tensor(np.expand_dims(clean, axis=2))
        clean_snippet = (256. - clean_snippet.astype(np.float32)) / 255.
        clean_snippet = tf.convert_to_tensor(np.expand_dims(clean_snippet, axis=2))
        yield {'dirty': dirty, 'x0': x0, 'x1': x1}, {'clean': clean, 'clean_snippet': clean_snippet}


train_directory = 'data/training/'
files = os.listdir(train_directory)
paths = []
for f in files:
    filename = os.fsdecode(f)
    paths.append(train_directory   filename)

train_ds = tf.data.Dataset.from_generator(get_data, args=[paths], output_signature=(
    {
        'dirty': tf.TensorSpec(shape=(IMG_HEIGHT, None, 1), dtype=tf.float32),
        'x0': tf.TensorSpec(shape=(), dtype=tf.int32),
        'x1': tf.TensorSpec(shape=(), dtype=tf.int32)
    },
    {
        'clean': tf.TensorSpec(shape=(IMG_HEIGHT, None, 1), dtype=tf.float32),
        'clean_snippet': tf.TensorSpec(shape=(IMG_HEIGHT, None, 1), dtype=tf.float32)
    }
))

bucket_sizes = [400, 500, 600, 700, 800]
bucket_batch_sizes = [16, 16, 16, 16, 16, 16]

train_ds = train_ds.bucket_by_sequence_length(element_length_func=lambda x, y: tf.shape(y['clean'])[1],
                                              bucket_boundaries=bucket_sizes, bucket_batch_sizes=bucket_batch_sizes)


input1 = Input(name='dirty', shape=(IMG_HEIGHT, None, 1), dtype='float32')
input2 = Input(name='x0', shape=(), dtype='int32')
input3 = Input(name='x1', shape=(), dtype='int32')

# Encoder
conv1 = Conv2D(48, kernel_size=(3, 3), activation='relu', padding='same', name='conv1')(input1)
pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(conv1)
conv2 = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='conv2')(pool1)

# Decoder
deconv2 = Conv2DTranspose(48, kernel_size=(3, 3), activation='relu', padding='same', name='deconv2')(conv2)
depool1 = UpSampling2D(size=(2, 2), name='depool1')(deconv2)
output1 = Conv2DTranspose(1, kernel_size=(3, 3), activation='relu', padding='same', name='clean')(depool1)

# HERE'S THE AREA IN QUESTION...
_, _, width, _ = K.int_shape(output1)
left = K.eval(input2)
right = width - K.eval(input3)
output2 = Cropping2D(name='clean_snippet', cropping=((0, 0), (left, right)))(output1)
# ...END AREA IN QUESTION

model = Model(inputs=[input1, input2, input3], outputs=[output1, output2])
optimizer = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
loss_fcns = {'clean': losses.MeanAbsoluteError(), 'clean_snippet': losses.MeanAbsoluteError()}

model.compile(loss=losses.MeanAbsoluteError(), optimizer=optimizer, metrics=['acc'])
model.fit(x=train_ds, y=None, epochs=1000, shuffle=True, verbose=1)

CodePudding user response：

This is a classic bug that pops up because of graph mode. When you run this code, it's not really running the code, but Tensorflow introspects the python code and compiles it to a graph that runs well on GPU. Some of the things you think you can do in Python, you can't do when it's compiled.

In this case, tensor shapes must be fixed during execution, so you can't have dynamic output shapes during training.

Instead of cropping in the model, I'd just zero out the pixels you would have cropped. And in your dataset of training images, instead of dynamically adjusting the image sizes, dynamically adjust then pad with zeros to match the image size (and exception location). The MAE of those zero pixels in the ground truth and the hard coded zeros will be zero.

And drop the k.eval. You won't need it anymore - you can build masks with input2 and input3 directly using tf ops. Note that tf ops take the full batch, unlike Keras layers, and you can't loop, so you'll need to do it vectorized. You can do it with tf.sequence_mask.

CodePudding user response：

Here's the working solution inspired by @Yaoshiang's comment:

import tensorflow as tf
import cv2
import random
import os
import numpy as np

from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D, Cropping2D, UpSampling2D, Input, Multiply
from tensorflow.keras import losses

SNIPPET_WIDTH = 40
IMG_HEIGHT = 60


def normalize(img):
    return np.expand_dims((256. - img.astype(np.float32)) / 255., axis=2)


def get_data(paths):
    for path in paths:
        clean = cv2.imread(path.decode('utf-8'), cv2.IMREAD_GRAYSCALE)
        h, w = clean.shape
        dirty = cv2.blur(clean, (random.randint(1, 5), random.randint(1, 5)))
        x0 = random.randint(0, w - SNIPPET_WIDTH)
        x1 = x0   SNIPPET_WIDTH
        y0 = 0
        y1 = h - 1
        dirty[y0:y1, x0:x1] = 0  # masked out region
        dirty = normalize(dirty)
        clean = normalize(clean)
        mask = np.zeros((h, w, 1), dtype=np.float32)
        mask[:, x0:x1, :] = 1.0
        clean_snippet = clean * mask
        clean = tf.convert_to_tensor(clean)
        dirty = tf.convert_to_tensor(dirty)
        mask = tf.convert_to_tensor(mask)
        clean_snippet = tf.convert_to_tensor(clean_snippet)

        yield {'dirty': dirty, 'mask': mask}, {'clean': clean, 'clean_snippet': clean_snippet}


train_directory = 'data/training/'
files = os.listdir(train_directory)
paths = []
for f in files:
    filename = os.fsdecode(f)
    paths.append(train_directory   filename)

train_ds = tf.data.Dataset.from_generator(get_data, args=[paths], output_signature=(
    {
        'dirty': tf.TensorSpec(shape=(IMG_HEIGHT, None, 1), dtype=tf.float32),
        'mask': tf.TensorSpec(shape=(IMG_HEIGHT, None, 1), dtype=tf.float32)
    },
    {
        'clean': tf.TensorSpec(shape=(IMG_HEIGHT, None, 1), dtype=tf.float32),
        'clean_snippet': tf.TensorSpec(shape=(IMG_HEIGHT, None, 1), dtype=tf.float32)
    }
))

bucket_sizes = [400, 500, 600, 700, 800]
bucket_batch_sizes = [16, 16, 16, 16, 16, 16]

train_ds = train_ds.bucket_by_sequence_length(element_length_func=lambda x, y: tf.shape(y['clean'])[1],
                                              bucket_boundaries=bucket_sizes, bucket_batch_sizes=bucket_batch_sizes)

input1 = Input(name='dirty', shape=(IMG_HEIGHT, None, 1), dtype='float32')
input2 = Input(name='mask', shape=(IMG_HEIGHT, None, 1), dtype='float32')

# Encoder
conv1 = Conv2D(48, kernel_size=(3, 3), activation='relu', padding='same', name='conv1')(input1)
pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(conv1)
conv2 = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='conv2')(pool1)

# Decoder
deconv2 = Conv2DTranspose(48, kernel_size=(3, 3), activation='relu', padding='same', name='deconv2')(conv2)
depool1 = UpSampling2D(size=(2, 2), name='depool1')(deconv2)
output1 = Conv2DTranspose(1, kernel_size=(3, 3), activation='relu', padding='same', name='clean')(depool1)
output2 = Multiply(name='clean_snippet')([output1, input2])

model = Model(inputs=[input1, input2], outputs=[output1, output2])
optimizer = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
loss_fcns = {'clean': losses.MeanAbsoluteError(), 'clean_snippet': losses.MeanAbsoluteError()}

model.compile(loss=loss_fcns, optimizer=optimizer, metrics=['acc'])
model.fit(x=train_ds, y=None, epochs=1000, shuffle=True, verbose=1)