My agent's objective is to control the speed of the motor. Here all state values are the rpm value of the motor and actions are defined as 0(decrease rpm by 1),1(no change) and 2(increase rpm by 1). I am using Q learning.
class SpeedControlEnv(Env): #we can access gym env
def __init__(self): #initializing actions, observation, spaces,
#Actions that we can take up, down, no change in speed
self.action_space = Discrete(3)
# Observation space to hold the current speed so our agent can take necessary action
#self.observation_space = Box(low=np.array([0]), high=np.array([100])) #box is used for continuous state space
self.observation_space = Discrete(100) #discrete observation space
#set start temp
self.state = 40 random.randint(-30,40) #this is the start state for my agent.
# set time for our agent to complete the task before my motor blows
self.control_length = 60 # this is in seconds : So my agent has 'n' seconds to bring it in normal state
def step(self, action): #what actions agent can take in each steps
#Take action (0,1,2)
# 0 -1 = -1 #decreaase speed by 1
#1 -1 = 0 #no change
#2- 1 = 1 #increase speed by 1
self.state = action -1
#with each action reduce the time my agent has by 1
self.control_length -= 1
# assign reward
if self.state >= 40 and self.state <= 45:
reward = 1
else:
reward = -1
# check if shower is done
if self.control_length <= 0:
done = True
else:
done = False
#apply random noise
#self.state = random.randint(-3,3)
#set placeholder for information, Required by OpenAI
info = {}
#return step information
return self.state, reward, done , info
pass
def render(self): #visualization
pass
def reset(self): #reset after training or a episode
#Reset speed that is the state
self.state = 40 random.randint(-30,40)
#Reset control time
self.control_length = 60
return self.state
#Defined my hyperparameters as #Initialize all the hyperparameters
num_episodes = 50000 #agent plays step
#max_steps_per_episode = 60 #max steps agent can take in one episode
learning_rate = 0.1 #alpha
discount_rate = 0.99 #gamma
exploration_rate = 1 #epsilon
max_exploration_rate = 1 #max epsilon
min_exploration_rate = 0.01 #min epsilon
exploration_decay_rate = 0.01 #decaying rate of exploration
#My Q learning code is as follow
for episode in range(num_episodes):
state = env.reset()
done = False
reward_current_episode = 0
for step in range(env.control_length):
exploration_rate_threshold = random.uniform(0,1)
if exploration_rate_threshold > exploration_rate:
action = np.argmax(q_table[state,:])
else:
action = env.action_space.sample()
new_state,reward,done,info = env.step(action)
#Update Q table
q_table[state,action] = q_table[state, action]*(1-learning_rate) learning_rate*(reward discount_rate*np.max(q_table[new_state,:]))
state = new_state
reward_current_episode = reward
if done == True:
break
exploration_rate = min_exploration_rate (max_exploration_rate - min_exploration_rate)*np.exp(-exploration_decay_rate*episode)
#append rewards from current episode to the list of rewards achieved from all episode
reward_from_all_episodes.append(reward_current_episode)
each_state.append(state)
#error
IndexError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_6020/777037272.py in <module>
14
15 #Update Q table
---> 16 q_table[state,action] = q_table[state, action]*(1-learning_rate) learning_rate*(reward discount_rate*np.max(q_table[new_state,:]))
17
18 state = new_state
IndexError: index 100 is out of bounds for axis 0 with size 100
It will be great if anyone can explain to me why I am getting this error. I am new to programming and machine learning.
CodePudding user response:
It looks like you're trying to index a numpy array. Arrays, and just about everything in python and programming in general, are 0-indexed. That means their indices start at 0 instead of 1, which means that the maximum index in an array with 100 items is 99.