Hi Can somone please help, a bewilderd and optomistic hobby programmer please
I am using this code I have modified it to work with a car (0 Left, 1 Strait, 2 Right) I Would like to add some observation, such as Destination (XY), Car Location (XY) bearing (angle), distance_to_destination and bearing_of_detination. In the hope that the car can find its way to the destination.
I have spent most of the day trying to get this to work, however failed, and failed in many differant ways. Too many to go through. The crux of the problem seams to be getting the input shape to match
I think the closest that i have got is this:
low = np.array([-5, -5, -5, -5, -5])
high = -np.array([ 5, 5, 5, 5, 5])
self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
self.action_space = gym.spaces.Box(low, high, dtype=np.float32)
def reset(self):
self.state =Myarray# \[\[1,2\], \[1,2\],\[1,2\],\[1,2\],\[1,2\]\]#result.BearingToDest
self.shower_length = 60000
return self.state
def build_model(states, actions):
model = Sequential()
model.add(Dense(units=24, activation='relu', input_shape=\[ 2\]))
model.add(Dense(units=24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
when I run it and the model loads and then i think runs, but the error message is :
ValueError: Error when checking input: expected dense_input to have 2 dimensions, but got array with shape (1, 1, 1, 2)
Training for 1000000 steps ... Resetting ML Interval 1 (0 steps performed)
Any pointers would be fantasic
CodePudding user response:
First, just to make sure, the state you return in reset should look something like:
self.state = np.array([x1, y1, x2, y2, angle])
I don't see the step function in your code, but I assume you also modified it to return self.state?
Also, your action space is the same as the observation space, it is not normal is it? Given what you said, there are 3 actions so it should be:
self.action_space = Discrete(3)
Without the full code, it is not really possible to find the cause of your problem. Could you show it?
I also noticed a minus sign which I find strange (although it seems unrelated to your main problem):
high = -np.array([ 5, 5, 5, 5, 5])
^
HERE
CodePudding user response:
The Code that Ran for me is below, it will take some tweeks to get everything learning in the way that I want, but at least its running :)
import gym
from gym import Env
import numpy as np
from gym.spaces import Discrete, Box, Dict
import random
Myarray = np.Myarray = [[3, 2]]
# Myarray = [[1][2 ]]
x1 = y1 = x2 = y2 = angle = 1
# create a custom class
class ShowerEnv(Env):
def __init__(self, size=5):
self.size = size
high = np.array([[600, 600, 600, 600, 360]])
low = np.array([[-1, -1, -1, -1, -360]])
self.state = np.zeros((1, 5), dtype=np.float32)
self.x1, self.y1, self.x2, self.y2, self.angle = 1, 1, 1, 1, 1
self.action_space = Discrete(3)
self.observation_space = gym.spaces.Box(low, high, dtype=np.float32, shape=(1, 5))
self.shower_length = 60 # duration of temperature
def step(self, shower_action):
x1 = y1 = x2 = y2 = angle = 1
self.shower_length -= 1
# this line sends a protobuf command to the car program and gets a response ie the true enviroment
result = client.ChangeCoarse( shower_action - 1, True)#( shower_action - 1, True)
self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])
if (result.ResetML == True):
self.reset()
# x1 = y1 = x2 = y2 = angle = 1
# self.state = np.array([x1, y1, x2, y2, angle])
# this should set the reward and gets it from protobuf
reward = result.BearingToDest
# reward = 1 # just put in to make the code run
if self.shower_length <= 0:
done = True
else:
done = False
info = ()
info = {}
return self.state, reward, done, info
def render(self):
pass
def reset(self):
result = client.ChangeResetDest()
self.shower_length = 60000
self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])
print("Resetting ML")
return self.state
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
env = ShowerEnv()
states = env.observation_space.shape
actions = env.action_space.n
def build_model(states, actions):
model = Sequential()
model.add(Dense(units=24, activation='relu', input_shape=states))
model.add(Dense(units=24, activation='relu'))
model.add(Dense(actions, activation='linear'))
model.add(Flatten())
return model
# model =build_model(states,actions)
# model.compile(optimizer=Adam(learning_rate=1e-3), metrics=['mae'])
# del model
#print(model.summary())
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import grpc
import Message_pb2_grpc as pb2_grpc, Message_pb2 as pb2
class UnaryClient(object):
"""
Client for gRPC functionality
"""
def __init__(self):
self.host = 'localhost'
self.server_port = 50052
# instantiate a channel
self.channel = grpc.insecure_channel(
'{}:{}'.format(self.host, self.server_port))
# bind the client and the server
self.stub = pb2_grpc.UnaryStub(self.channel)
def ChangeCoarse(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, BoatDelta=val)
# (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeSail(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, SailDelta=val)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeWindDirection(self, val, TF):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, WindDelta=val)
# print(f'{message}')
return self.stub.GetServerResponse(message)
def ChangeResetDest(self):
"""
Client function to call the rpc for GetServerResponse
"""
message = pb2.MessageTo(MoveBoat=True, ResetTarget=True)
# (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
# print(f'{message}')
return self.stub.GetServerResponse(message)
client = UnaryClient()
result = client.ChangeCoarse(90, True)
# if result.
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=900000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=100,
target_model_update=1e-2)
return dqn
dqn = build_agent(build_model(states, actions), actions)
dqn.compile(optimizer=Adam(learning_rate=1e-5), metrics=['mae'])
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)