Keras symbolic inputs/outputs do not implement `__len_

I want to make an AI playing my custom environment, unfortunately, when I run my code, following error accrues:

  File "C:\Program Files\JetBrains\PyCharm Community Edition 2021.2\plugins\python-ce\helpers\pydev\_pydev_bundle\pydev_umd.py", line 198, in runfile
    pydev_imports.execfile(filename, global_vars, local_vars)  # execute the script
  File "C:\Program Files\JetBrains\PyCharm Community Edition 2021.2\plugins\python-ce\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents "\n", file, 'exec'), glob, loc)
  File "D:/PycharmProjects/Custom Enviroment AI/Enviroment.py", line 88, in <module>
    DQN = buildAgent(model, actions)
  File "D:/PycharmProjects/Custom Enviroment AI/Enviroment.py", line 82, in buildAgent
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
  File "D:\PycharmProjects\Custom Enviroment AI\venv\lib\site-packages\rl\agents\dqn.py", line 108, in __init__
    if hasattr(model.output, '__len__') and len(model.output) > 1:
  File "D:\PycharmProjects\Custom Enviroment AI\venv\lib\site-packages\keras\engine\keras_tensor.py", line 221, in __len__
    raise TypeError('Keras symbolic inputs/outputs do not '
TypeError: Keras symbolic inputs/outputs do not implement `__len__`. You may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model. This error will also get raised if you try asserting a symbolic input/output directly.

The error says that you souldn't use len() and you should use .shape istead, unfortunately this seems to be an error inside tensorflow My full code is:

from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
from rl.agents.dqn import DQNAgent
from keras.layers import Dense
import tensorflow as tf
import numpy as np
import random
import pygame
import gym


class Env(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.MultiDiscrete([39, 27])
        self.screen = pygame.display.set_mode((800, 600))
        self.PlayerX = 0
        self.PlayerY = 0
        self.FoodX = 0
        self.FoodY = 0
        self.state = [self.FoodX - self.PlayerX   19, self.FoodY - self.PlayerY   14]
        self.timeLimit = 1000

    def render(self, mode="human"):
        self.screen.fill((0, 0, 0))
        pygame.draw.rect(self.screen, (255, 255, 255), pygame.Rect(self.PlayerX * 40, self.PlayerY * 40, 40, 40))
        pygame.draw.rect(self.screen, (255, 0, 0), pygame.Rect(self.FoodX * 40, self.FoodY * 40, 40, 40))
        pygame.display.update()

    def reset(self):
        self.FoodX = random.randint(1, 19)
        self.FoodY = random.randint(1, 14)
        self.PlayerX = 0
        self.PlayerY = 0
        self.timeLimit = 1000
        return self.state

    def step(self, action):
        self.timeLimit -= 1
        reward = -1

        if action == 0 and self.PlayerY > 0:
            self.PlayerY -= 1
        if action == 1 and self.PlayerX > 0:
            self.PlayerX -= 1
        if action == 2 and self.PlayerY < 14:
            self.PlayerY  = 1
        if action == 3 and self.PlayerX < 19:
            self.PlayerX  = 1

        if self.PlayerX == self.FoodX and self.PlayerY == self.FoodY:
            reward  = 30
            self.FoodX = random.randint(1, 19)
            self.FoodY = random.randint(1, 14)

        if self.timeLimit <= 0:
            done = True
        else:
            done = False

        self.state = [self.FoodX - self.PlayerX, self.FoodY - self.PlayerY]
        return self.state, reward, done


env = Env()

states = env.observation_space.shape
actions = env.action_space.n


def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Dense(2, activation='relu', input_shape=states))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model


def buildAgent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn


model = build_model(states, actions)
DQN = buildAgent(model, actions)
DQN.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae'])
DQN.fit(env, nb_steps=50000, visualize=False, verbose=1)
scores = DQN.test(env, nb_episodes=100, visualize=True)
print(np.mean(scores.history['episode_reward']))
pygame.quit()
model.save('model.h5')

I use Tensorflow: 2.8.0. This seems to be an error in Tensorflow's code but I have no idea what to do

CodePudding user response：

As mentioned here, you need to install a newer version of keras-rl:

!pip install keras-rl2

You also need to add an extra dimension to your input shape and a Flatten layer at the end, since Keras expects this when working with the DQN agent:

def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Dense(2, activation='relu', input_shape=(1, states[0])))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model

Lastly, your step method in your custom environment must also return an info dictionary (I just created an empty one):

    def step(self, action):
        self.timeLimit -= 1
        reward = -1

        if action == 0 and self.PlayerY > 0:
            self.PlayerY -= 1
        if action == 1 and self.PlayerX > 0:
            self.PlayerX -= 1
        if action == 2 and self.PlayerY < 14:
            self.PlayerY  = 1
        if action == 3 and self.PlayerX < 19:
            self.PlayerX  = 1

        if self.PlayerX == self.FoodX and self.PlayerY == self.FoodY:
            reward  = 30
            self.FoodX = random.randint(1, 19)
            self.FoodY = random.randint(1, 14)

        if self.timeLimit <= 0:
            done = True
        else:
            done = False

        self.state = [self.FoodX - self.PlayerX, self.FoodY - self.PlayerY]
        return self.state, reward, done, {}

If you make these changes, it should work fine. Here is the full working code:

from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
from rl.agents.dqn import DQNAgent
from keras.layers import Dense, Flatten
import tensorflow as tf
import numpy as np
import random
import pygame
import gym


class Env(gym.Env):
    def __init__(self):
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.MultiDiscrete([39, 27])
        self.screen = pygame.display.set_mode((800, 600))
        self.PlayerX = 0
        self.PlayerY = 0
        self.FoodX = 0
        self.FoodY = 0
        self.state = [self.FoodX - self.PlayerX   19, self.FoodY - self.PlayerY   14]
        self.timeLimit = 1000

    def render(self, mode="human"):
        self.screen.fill((0, 0, 0))
        pygame.draw.rect(self.screen, (255, 255, 255), pygame.Rect(self.PlayerX * 40, self.PlayerY * 40, 40, 40))
        pygame.draw.rect(self.screen, (255, 0, 0), pygame.Rect(self.FoodX * 40, self.FoodY * 40, 40, 40))
        pygame.display.update()

    def reset(self):
        self.FoodX = random.randint(1, 19)
        self.FoodY = random.randint(1, 14)
        self.PlayerX = 0
        self.PlayerY = 0
        self.timeLimit = 1000
        return self.state

    def step(self, action):
        self.timeLimit -= 1
        reward = -1

        if action == 0 and self.PlayerY > 0:
            self.PlayerY -= 1
        if action == 1 and self.PlayerX > 0:
            self.PlayerX -= 1
        if action == 2 and self.PlayerY < 14:
            self.PlayerY  = 1
        if action == 3 and self.PlayerX < 19:
            self.PlayerX  = 1

        if self.PlayerX == self.FoodX and self.PlayerY == self.FoodY:
            reward  = 30
            self.FoodX = random.randint(1, 19)
            self.FoodY = random.randint(1, 14)

        if self.timeLimit <= 0:
            done = True
        else:
            done = False

        self.state = [self.FoodX - self.PlayerX, self.FoodY - self.PlayerY]
        return self.state, reward, done, {}


env = Env()

states = env.observation_space.shape
actions = env.action_space.n

def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Dense(2, activation='relu', input_shape=(1, states[0])))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model

def buildAgent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10,
                   target_model_update=1e-2)
    return dqn


model = build_model(states, actions)
DQN = buildAgent(model, actions)

DQN.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae'])
DQN.fit(env, nb_steps=50000, visualize=False, verbose=1)
scores = DQN.test(env, nb_episodes=100, visualize=True)
print(np.mean(scores.history['episode_reward']))
pygame.quit()
model.save('model.h5')

For more information, see the docs.