Im having problems adding observation space to a custom Gym enviroment-CodePudding

Hi Can somone please help, a bewilderd and optomistic hobby programmer please

I am using this code I have modified it to work with a car (0 Left, 1 Strait, 2 Right) I Would like to add some observation, such as Destination (XY), Car Location (XY) bearing (angle), distance_to_destination and bearing_of_detination. In the hope that the car can find its way to the destination.

I have spent most of the day trying to get this to work, however failed, and failed in many differant ways. Too many to go through. The crux of the problem seams to be getting the input shape to match

I think the closest that i have got is this:


        low = np.array([-5, -5, -5, -5, -5])
        high = -np.array([ 5,  5,  5,  5,  5])
        self.observation_space = gym.spaces.Box(low, high, dtype=np.float32)
        self.action_space = gym.spaces.Box(low, high, dtype=np.float32)

def reset(self):
    
    self.state =Myarray# \[\[1,2\], \[1,2\],\[1,2\],\[1,2\],\[1,2\]\]#result.BearingToDest
    self.shower_length = 60000
    return  self.state

def build_model(states, actions):
    model = Sequential()
    model.add(Dense(units=24, activation='relu', input_shape=\[ 2\]))
    model.add(Dense(units=24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

when I run it and the model loads and then i think runs, but the error message is :

ValueError: Error when checking input: expected dense_input to have 2 dimensions, but got array with shape (1, 1, 1, 2)

Training for 1000000 steps ... Resetting ML Interval 1 (0 steps performed)


Any pointers would be fantasic

CodePudding user response：

First, just to make sure, the state you return in reset should look something like:

self.state = np.array([x1, y1, x2, y2, angle])

I don't see the step function in your code, but I assume you also modified it to return self.state?

Also, your action space is the same as the observation space, it is not normal is it? Given what you said, there are 3 actions so it should be:

self.action_space = Discrete(3)

Without the full code, it is not really possible to find the cause of your problem. Could you show it?

I also noticed a minus sign which I find strange (although it seems unrelated to your main problem):

high = -np.array([ 5,  5,  5,  5,  5])
       ^
      HERE

CodePudding user response：

The Code that Ran for me is below, it will take some tweeks to get everything learning in the way that I want, but at least its running :)

import gym
from gym import Env
import numpy as np
from gym.spaces import Discrete, Box, Dict
import random

Myarray = np.Myarray = [[3, 2]]
# Myarray = [[1][2 ]]
x1 = y1 = x2 = y2 = angle = 1


# create a custom class
class ShowerEnv(Env):
    def __init__(self, size=5):
        self.size = size
        high = np.array([[600, 600, 600, 600, 360]])
        low = np.array([[-1, -1, -1, -1, -360]])
        self.state = np.zeros((1, 5), dtype=np.float32)
        self.x1, self.y1, self.x2, self.y2, self.angle = 1, 1, 1, 1, 1
        self.action_space = Discrete(3)
        self.observation_space = gym.spaces.Box(low, high, dtype=np.float32, shape=(1, 5))
        self.shower_length = 60  # duration of  temperature

    def step(self, shower_action):
        x1 = y1 = x2 = y2 = angle = 1
        self.shower_length -= 1
        # this line sends a protobuf command to the car program and gets a response ie the true enviroment
        result = client.ChangeCoarse( shower_action - 1, True)#( shower_action - 1, True)
        self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])

        if (result.ResetML == True):
            self.reset()
       # x1 = y1 = x2 = y2 = angle = 1
      #  self.state = np.array([x1, y1, x2, y2, angle])

        # this should set the reward and gets it from protobuf
        reward =    result.BearingToDest
       # reward = 1  # just put in to make the code run
        if self.shower_length <= 0:
            done = True
        else:
            done = False
        info = ()
        info = {}

        return self.state, reward, done, info

    def render(self):
        pass

    def reset(self):
        result = client.ChangeResetDest()
        self.shower_length = 60000
        self.state = np.array([result.X, result.Y, result.DesX, result.DesY, result.BearingToDest])
        print("Resetting ML")
        return self.state


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

env = ShowerEnv()
states = env.observation_space.shape
actions =  env.action_space.n


def build_model(states, actions):
    model = Sequential()
    model.add(Dense(units=24, activation='relu', input_shape=states))
    model.add(Dense(units=24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model


# model =build_model(states,actions)
# model.compile(optimizer=Adam(learning_rate=1e-3), metrics=['mae'])
# del model
#print(model.summary())
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import grpc
import Message_pb2_grpc as pb2_grpc, Message_pb2 as pb2


class UnaryClient(object):
    """
     Client for gRPC functionality
     """

    def __init__(self):
        self.host = 'localhost'
        self.server_port = 50052

        # instantiate a channel
        self.channel = grpc.insecure_channel(
            '{}:{}'.format(self.host, self.server_port))

        # bind the client and the server
        self.stub = pb2_grpc.UnaryStub(self.channel)

    def ChangeCoarse(self, val, TF):
        """
        Client function to call the rpc for GetServerResponse
        """
        message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, BoatDelta=val)
        # (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
        #     print(f'{message}')

        return self.stub.GetServerResponse(message)

    def ChangeSail(self, val, TF):
        """
        Client function to call the rpc for GetServerResponse
        """
        message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, SailDelta=val)
        # print(f'{message}')
        return self.stub.GetServerResponse(message)

    def ChangeWindDirection(self, val, TF):
        """
        Client function to call the rpc for GetServerResponse
        """
        message = pb2.MessageTo(MoveBoat=True, MoveBoatStep=TF, WindDelta=val)
        #   print(f'{message}')
        return self.stub.GetServerResponse(message)

    def ChangeResetDest(self):
        """
        Client function to call the rpc for GetServerResponse
        """
        message = pb2.MessageTo(MoveBoat=True, ResetTarget=True)
        # (message="message", Val=9, MoveBoat=True,MoveBoatStep=True, SailAngle=4, BoatAngle=5.79878, SailDelta=0, BoatDelta=-1)
        #     print(f'{message}')
        return self.stub.GetServerResponse(message)


client = UnaryClient()
result = client.ChangeCoarse(90, True)


# if result.
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=900000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=100,
                   target_model_update=1e-2)
    return dqn


dqn = build_agent(build_model(states, actions), actions)
dqn.compile(optimizer=Adam(learning_rate=1e-5), metrics=['mae'])
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)