How to solve "ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef"-CodePudding

I'm following the tutorial of TensorFlow_Federated: custom_federated_algorithms_2. Everything works when I just copy and run the tutorial's code. So I wanna change the code by myself for being more familar with tff. Then bug appeared.

My runtime environment:

python: 3.8.12

tensorflow: 2.5.0

tensorflow_federated: 0.19.0

Code below is the orginal code of testing model in tutorial:

MODEL_SPEC = collections.OrderedDict(
    weights=tf.TensorSpec(shape=[784, 10], dtype=tf.float32),
    bias=tf.TensorSpec(shape=[10], dtype=tf.float32))
MODEL_TYPE = tff.to_type(MODEL_SPEC)
print(MODEL_TYPE) # <weights=float32[784,10],bias=float32[10]>


BATCH_SPEC = collections.OrderedDict(
    x=tf.TensorSpec(shape=[None, 784], dtype=tf.float32),
    y=tf.TensorSpec(shape=[None], dtype=tf.int32)
)
BATCH_TYPE = tff.to_type(BATCH_SPEC)
print(BATCH_TYPE) # <x=float32[?,784],y=int32[?]>

And I changed the MODEL_TYPE into:

MODEL_SPEC = collections.OrderedDict(
    fc1=tf.TensorSpec(shape=[784, 256], dtype=tf.float32),
    b1=tf.TensorSpec(shape=[256], dtype=tf.float32),
    fc2=tf.TensorSpec(shape=[256, 128], dtype=tf.float32),
    b2=tf.TensorSpec(shape=[128], dtype=tf.float32),
    fc3=tf.TensorSpec(shape=[128, 10], dtype=tf.float32),
    b3=tf.TensorSpec(shape=[10], dtype=tf.float32)
)
MODEL_TYPE = tff.to_type(MODEL_SPEC)

Thanks to the structure of model changed, the process of forward pass needs to be changed too:

# original
@tf.function
def forward_pass(model, batch):
  predicted_y = tf.nn.softmax(
      tf.matmul(batch['x'], model['weights'])   model['bias'])
  return -tf.reduce_mean(
      tf.reduce_sum(
          tf.one_hot(batch['y'], 10) * tf.math.log(predicted_y), axis=[1]))

@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
  return forward_pass(model, batch)

# new 
@tf.function
def forward(model, batch):
    logits = batch["x"] @ model["fc1"]   model["b1"]

    logits = logits @ model["fc2"]   model["b2"]

    logits = logits @ model["fc3"]   model["b3"]

    logits = tf.nn.softmax(logits, axis=-1,)
    
    one_hot_y = tf.one_hot(batch["y"], depth=10)
    return -tf.reduce_mean(tf.reduce_sum(tf.math.log(logits) * one_hot_y, axis=[1]))


@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
    return forward(model, batch)

I didn't change the batch_train() code.

@tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
  # Define a group of model variables and set them to `initial_model`. Must
  # be defined outside the @tf.function.
  model_vars = collections.OrderedDict([
      (name, tf.Variable(name=name, initial_value=value))
      for name, value in initial_model.items()
  ])
  optimizer = tf.keras.optimizers.SGD(learning_rate)

  @tf.function
  def _train_on_batch(model_vars, batch):
    # Perform one step of gradient descent using loss from `batch_loss`.
    with tf.GradientTape() as tape:
      loss = forward_pass(model_vars, batch)
    grads = tape.gradient(loss, model_vars)
    optimizer.apply_gradients(
        zip(tf.nest.flatten(grads), tf.nest.flatten(model_vars)))
    return model_vars

  return _train_on_batch(model_vars, batch)

And it works fine so far. But when implementing the local_train() section, errors appeared even I just using the original code.

initial_model = collections.OrderedDict(
    fc1=tf.zeros([784, 256]),
    b1=tf.zeros([256]),
    fc2=tf.zeros([256,128]),
    b2=tf.zeros([128]),
    fc3=tf.zeros([128, 10]),
    b3=tf.zeros([10])
)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)

@tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):

  @tff.tf_computation(LOCAL_DATA_TYPE, tf.float32)
  def _insert_learning_rate_to_sequence(dataset, learning_rate):
    return dataset.map(lambda x: (x, learning_rate))

  batches_with_learning_rate = _insert_learning_rate_to_sequence(all_batches, learning_rate)

  # Mapping function to apply to each batch.
  @tff.federated_computation(MODEL_TYPE, batches_with_learning_rate.type_signature.element)
  def batch_fn(model, batch_with_lr):
    batch, lr = batch_with_lr
    return batch_train(model, batch, lr)

  return tff.sequence_reduce(batches_with_learning_rate, initial_model, batch_fn)

locally_trained_model = local_train(initial_model, 1e-1, mnist_train_dataset[5])
# ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef

CodePudding user response：

One issue I noticed on a quick skim (did not sift through all of the pasted code) is this line:

  return batch_train(model, batch, lr)

To invoke a tff.tf_computation from within the context of a tff.federated_computation, you need to use the tff.federated_map operator. So it could look like

  return tff.federated_map(batch_train, (model, batch, lr))

CodePudding user response：

Finally, I found that I had made a low-level mistake.