Edit: Problem Solved. Solution below.
Attempting to build a RL model to handle a task. There are two inputs: x and y, both are measured on an int scale of 1 to 100. Based on these two inputs there should be an output (action to take on, discrete(5)) and confidence.
Also, I'm very new to this territory. Please, feel free to ask me anything or correct me on something that seems downright dumb/wrong.
Here's my program (imports haven't been cleaned up....):
from abc import ABC
import gym
from tensorflow import keras
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, losses, metrics
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.optimizers import Adam
import os
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
steps = 10000
episodes = 100
score_requirement = 1000
class PlantEnv(Env, ABC):
def __init__(self):
# Actions = water: 0=(none), 1=(3 seconds), 2=(4 seconds), 3=(5 seconds), 4=(6 seconds)
self.action_space = Discrete(5)
# Starting Moisture
moisture = 20 random.randint(-10, 10)
# Starting Chance of Rain
chance_of_rain = 50 random.randint(-50, 50)
# Observations
self.observation_space = Box(low=np.array([0, 0]), high=np.array([100, 100]), dtype=np.int)
self.state = moisture, chance_of_rain
# Number of water steps left
self.water_length = steps
def step(self, action):
# Action section
water = 0
if action == 1:
water = 2
elif action == 2:
water = 3
elif action == 3:
water = 4
elif action == 4:
water = 5
moisture, chance_of_rain = self.state
moisture = (water * 5)
self.water_length -= 1
# Reward Section
reward = 0
if 40 <= moisture <= 60:
reward = 2
# If moisture is dry or wet
elif 60 < moisture <= 80 or 20 <= moisture < 40:
reward = 0.5
# If moisture is really dry or really wet
elif 80 < moisture <= 100 or 0 <= moisture < 20:
reward = -1
# If moisture is really dry or really wet
elif 100 < moisture or moisture < 0:
reward = -2
# Check if shower is done
if self.water_length <= 0:
done = True
else:
done = False
moistureLoss = random.randint(15, 25)
moisture -= moistureLoss
chance_of_rain = 50 random.randint(-50, 50)
xfactor = chance_of_rain random.randint(-50, 50)
if xfactor > 100:
moisture = (10 random.randint(0, 15))
# Set placeholder for info
info = {}
# Save current state
self.state = moisture, chance_of_rain
# Return step information
return self.state, reward, done, info
def reset(self):
# Reset test environment
# Set starting moisture
moisture = 50 random.randint(-10, 10)
# Set starting chance of rain array
chance_of_rain = 50 random.randint(-50, 50)
self.state = moisture, chance_of_rain
# Reset Test time
self.water_length = steps
return self.state
def build_model():
model = Sequential()
model.add(Flatten(input_shape=(1, 4)))
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(2, activation='linear'))
return model
def build_agent(model):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=2,
nb_steps_warmup=10, target_model_update=1e-2)
return dqn
# Create environment
env = PlantEnv()
accepted_scores = []
training_data = []
scores = []
good_episodes = 0
# Create episodes and initiate simulation
for episode in range(1, episodes 1):
observation = env.reset()
done = False
score = 0
history = []
prev_observation = []
while not done:
action = env.action_space.sample()
if observation[0] > 100:
action = 0
elif observation[0] < 0:
action = 4
observation, reward, done, info = env.step(action)
score = reward
if len(prev_observation) > 0:
history.append([prev_observation, action])
prev_observation = observation
if score >= score_requirement:
good_episodes = 1
accepted_scores.append(score)
for data in history:
if data[1] == 1:
output = [1]
else:
output = [0]
training_data.append([data[0], output])
scores.append(score)
if len(accepted_scores) > 0:
print("Average accepted score: ", np.mean(accepted_scores))
print("Median accepted score : ", np.median(accepted_scores))
print("Episodes above accepted score of {}: {}/{}\n".format(score_requirement, good_episodes, episodes))
model = build_model()
model.summary()
dqn = build_agent(model)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)
The first model gives this error when trying to dqn.fit: ValueError: Error when checking input: expected dense_input to have 2 dimensions, but got array with shape (1, 1, 2)
The second model gives this error when trying to build_agent: AttributeError: 'list' object has no attribute 'shape'
Any ideas as to what I'm doing wrong or how to go about correcting it would be a massive help. I feel 95% confident that I have my environment setup correctly.
I initially went with the first model just to see if I could get the program to compile and work. Then, after further research, I built the second model because I understood that it was capable of giving me an action with a confidence rating. Getting errors at both turns.
CodePudding user response:
Your model expects a 2D input but you defined it as 1D. Here is a working example:
from abc import ABC
import gym
from tensorflow import keras
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, losses, metrics
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.optimizers import Adam
import os
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
steps = 10000
episodes = 100
score_requirement = 1000
class PlantEnv(Env, ABC):
def __init__(self):
# Actions = water: 0=(none), 1=(3 seconds), 2=(4 seconds), 3=(5 seconds), 4=(6 seconds)
self.action_space = Discrete(5)
# Starting Moisture
moisture = 20 random.randint(-10, 10)
# Starting Chance of Rain
chance_of_rain = 50 random.randint(-50, 50)
# Observations
self.observation_space = Box(low=np.array([0, 0]), high=np.array([100, 100]), dtype=np.int)
self.state = moisture, chance_of_rain
# Number of water steps left
self.water_length = steps
def step(self, action):
# Action section
water = 0
if action == 1:
water = 2
elif action == 2:
water = 3
elif action == 3:
water = 4
elif action == 4:
water = 5
moisture, chance_of_rain = self.state
moisture = (water * 5)
self.water_length -= 1
# Reward Section
reward = 0
if 40 <= moisture <= 60:
reward = 2
# If moisture is dry or wet
elif 60 < moisture <= 80 or 20 <= moisture < 40:
reward = 0.5
# If moisture is really dry or really wet
elif 80 < moisture <= 100 or 0 <= moisture < 20:
reward = -1
# If moisture is really dry or really wet
elif 100 < moisture or moisture < 0:
reward = -2
# Check if shower is done
if self.water_length <= 0:
done = True
else:
done = False
moistureLoss = random.randint(15, 25)
moisture -= moistureLoss
chance_of_rain = 50 random.randint(-50, 50)
xfactor = chance_of_rain random.randint(-50, 50)
if xfactor > 100:
moisture = (10 random.randint(0, 15))
# Set placeholder for info
info = {}
# Save current state
self.state = moisture, chance_of_rain
# Return step information
return self.state, reward, done, info
def reset(self):
# Reset test environment
# Set starting moisture
moisture = 50 random.randint(-10, 10)
# Set starting chance of rain array
chance_of_rain = 50 random.randint(-50, 50)
self.state = moisture, chance_of_rain
# Reset Test time
self.water_length = steps
return self.state
def build_model():
model = Sequential()
model.add(Flatten(input_shape=(1, 2)))
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(5, activation='linear'))
return model
def build_agent(model):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=5,
nb_steps_warmup=10, target_model_update=1e-2)
return dqn
# Create environment
env = PlantEnv()
accepted_scores = []
training_data = []
scores = []
good_episodes = 0
# Create episodes and initiate simulation
for episode in range(1, episodes 1):
observation = env.reset()
done = False
score = 0
history = []
prev_observation = []
while not done:
action = env.action_space.sample()
if observation[0] > 100:
action = 0
elif observation[0] < 0:
action = 4
observation, reward, done, info = env.step(action)
score = reward
if len(prev_observation) > 0:
history.append([prev_observation, action])
prev_observation = observation
if score >= score_requirement:
good_episodes = 1
accepted_scores.append(score)
for data in history:
if data[1] == 1:
output = [1]
else:
output = [0]
training_data.append([data[0], output])
scores.append(score)
if len(accepted_scores) > 0:
print("Average accepted score: ", np.mean(accepted_scores))
print("Median accepted score : ", np.median(accepted_scores))
print("Episodes above accepted score of {}: {}/{}\n".format(score_requirement, good_episodes, episodes))
model = build_model()
model.summary()
dqn = build_agent(model)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)
Note that the input_shape
4 equals the number of states
and the output nodes (2) equals the number of actions
. You will have to change these parameters according to your dataset.