I'm fairly new to reinforcement learning and I've built an agent that feeds two inputs to its neural network (first input is a tuple with two numbers representing the agents current position | second input is an array of numbers ranging from 0 to 3 representing what type of requests the agent receives from the environment) and outputs which movement is the best (move forwards, backwards, sideways etc...)
Each episode has 300 steps, the for loop inside the train_pos_nn() takes 5s (each call to predict() takes about 20ms and each call to fit() takes about 7ms), which amounts to 25 minutes per episode, which is too much time. (about 17 days to finish 1000 episodes which is the required number of episodes to converge / it takes the same amount of time on Google Colab ((Edit: even when using the GPU option and gpu cannot be setup to be used on my local machine))
Is there any way I can reduce the amount of time it takes the agent to train ?
n_possible_movements = 9
MINIBATCH_SIZE = 32
class DQNAgent(object):
def __init__(self):
#self.gamma = 0.95
self.epsilon = 1.0
self.epsilon_decay = 0.8
self.epsilon_min = 0.1
self.learning_rate = 10e-4
self.tau = 1e-3
# Main models
self.model_uav_pos = self._build_pos_model()
# Target networks
self.target_model_uav_pos = self._build_pos_model()
# Copy weights
self.target_model_uav_pos.set_weights(self.model_uav_pos.get_weights())
# An array with last n steps for training
self.replay_memory_pos_nn = deque(maxlen=REPLAY_MEMORY_SIZE)
def _build_pos_model(self): # compile the DNN
# create the DNN model
dnn = self.create_pos_dnn()
opt = Adam(learning_rate=self.learning_rate) #, decay=self.epsilon_decay)
dnn.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
return dnn
def create_pos_dnn(self):
# initialize the input shape (The shape of an array is the number of elements in each dimension)
pos_input_shape = (2,)
requests_input_shape = (len(env.ues),)
# How many possible outputs we can have
output_nodes = n_possible_movements
# Initialize the inputs
uav_current_position = Input(shape=pos_input_shape, name='pos')
ues_requests = Input(shape=requests_input_shape, name='requests')
# Put them in a list
list_inputs = [uav_current_position, ues_requests]
# Merge all input features into a single large vector
x = layers.concatenate(list_inputs)
# Add a 1st Hidden (Dense) Layer
dense_layer_1 = Dense(512, activation="relu")(x)
# Add a 2nd Hidden (Dense) Layer
dense_layer_2 = Dense(512, activation="relu")(dense_layer_1)
# Add a 3rd Hidden (Dense) Layer
dense_layer_3 = Dense(256, activation="relu")(dense_layer_2)
# Output layer
output_layer = Dense(output_nodes, activation="softmax")(dense_layer_3)
model = Model(inputs=list_inputs, outputs=output_layer)
# return the DNN
return model
def remember_pos_nn(self, state, action, reward, next_state, done):
self.replay_memory_pos_nn.append((state, action, reward, next_state, done))
def act_upon_choosing_a_new_position(self, state): # state is a tuple (uav_position, requests_array)
if np.random.rand() <= self.epsilon: # if acting randomly, take random action
return random.randrange(n_possible_movements)
pos = np.array([state[0]])
reqs = np.array([state[1]])
act_values = self.model_uav_pos.predict(x=[pos, reqs]) # if not acting randomly, predict reward value based on current state
return np.argmax(act_values[0])
def train_pos_nn(self):
print("In Training..")
# Start training only if certain number of samples is already saved
if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
print("Exiting Training: Replay Memory Not Full Enough...")
return
# Get a minibatch of random samples from memory replay table
minibatch = random.sample(self.replay_memory_pos_nn, MINIBATCH_SIZE)
start_time = time.time()
# Enumerate our batches
for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
print('...Starting Training...')
target = 0
pos = np.array([current_state[0]])
reqs = np.array([current_state[1]])
pos_next = np.array([new_current_state[0]])
reqs_next = np.array([new_current_state[1]])
if not done:
target = reward DISCOUNT * np.amax(self.target_model_uav_pos.predict(x=[pos_next, reqs_next]))
else:
target = reward
# Update Q value for given state
target_f = self.model_uav_pos.predict(x=[pos, reqs])
target_f[0][action] = target
self.model_uav_pos.fit([pos, reqs], \
target_f, \
verbose=2, \
shuffle=False, \
callbacks=None, \
epochs=1 \
)
end_time = time.time()
print("Time", end_time - start_time)
# Update target network counter every episode
self.target_train()
def target_train(self):
weights = self.model_uav_pos.get_weights()
target_weights = self.target_model_uav_pos.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i] * self.tau target_weights[i] * (1 - self.tau)
self.target_model_uav_pos.set_weights(target_weights)
# Main
SIZE = 100 # size of the grid the agent is in
for episode in tqdm(range(1, n_episodes 1), ascii=True, unit='episodes'):
# Reset environment and get initial state
current_state = env.reset(SIZE)
# Reset flag and start iterating until episode ends
done = False
steps_n = 300
for t in range(steps_n):
# Normalize the input (the current state)
current_state_normalized = normalize_pos_state(current_state)
# Get new position for the agent
action_pos = agent_dqn.act_upon_choosing_a_new_position(current_state_normalized)
new_state, reward, done, _ = env.step(action_pos)
agent_dqn.remember_pos_nn(current_state_normalized, action_pos, reward, normalize_pos_state(new_state), done)
current_state = new_state # not normalized
agent_dqn.train_pos_nn()
# Decay epsilon
if episode % 50 == 0:
if agent_dqn.epsilon > agent_dqn.epsilon_min:
agent_dqn.epsilon *= agent_dqn.epsilon_decay
agent_dqn.epsilon = max(agent_dqn.epsilon, agent_dqn.epsilon_min)
CodePudding user response:
One performance optimization in your training loop is using the call
method of a model instead of calling predict
, and wrapping it with tf.function
. predict
is good for batch inference, but there is some overhead, and for single samples, call
will likely be faster. Some more details about this difference can be found here. For your purposes, how it might be modified could be:
class DQNAgent(object):
def _build_pos_model(self): # compile the DNN
# create the DNN model
dnn = self.create_pos_dnn()
opt = Adam(learning_rate=self.learning_rate) #, decay=self.epsilon_decay)
dnn.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
dnn.call = tf.function(dnn.call)
return dnn
Then change every call of self.model_uav_pos.predict(..)
and self.target_model_uav_pos.predict(...)
to self.model_uav_pos(...)
and self.target_model_uav_pos(...)
, respectively.
Further potential optimizations could be to JIT compile the TF function buy supplying jit_compile=True
to the tf.function
wrapper e.g;
dnn.call = tf.function(dnn.call, jit_compile=True)
CodePudding user response:
Utilizing a GPU (Graphics Processing Unit) will always make model training faster. You can follow these steps to train your model on a GPU:
How to Finally Install TensorFlow 2 GPU on Windows 10 in 2022:
- Step 1: Find out the TF version and its drivers.
- Step 2: Install Microsoft Visual Studio
- Step 3: Install the NVIDIA CUDA toolkit
- Step 4: Install cuDNN
- Step 5: Extract the ZIP folder and copy core directories
- Step 6: Add CUDA toolkit to PATH
- Step 7: Install TensorFlow inside a virtual environment with Jupyter Lab
(Detailed instruction in the link above)
However, you can use Google Colab, as it has a GPU option that doesn't require you to do any installations. You can change the accelerator in colab settings: Runtime -> Change runtime type -> None/GPU/TPU
.