I've been working on a deep q learning snake game in my free time, with plans to add genetic algorithm components to it. To that end, I was setting up loops that would allow me to create a given population of snakes that would each run for some number of episodes for a total of some number of generations.
It should be simple. Just some nested for loops. Only, I've been getting some pretty wild results from my for loops.
This is the code in question:
def run(population_size=1, max_episodes=10, max_generations=50):
total_score = 0
agents = [Agent() for i in range(population_size)]
game = SnakeGameAI()
for cur_gen in range(max_generations):
game.generation = cur_gen
for agent_num, agent in enumerate(agents):
# Set colors
game.color1 = agent.color1
game.color2 = agent.color2
# Set agent number
game.agent_num = agent_num
for cur_episode in range(1, max_episodes 1):
# Get old state
state_old = agent.get_state(game)
# Get move
final_move = agent.get_action(state_old)
# Perform move and get new state
reward, done, score = game.play_step(final_move)
state_new = agent.get_state(game)
# Train short memory
agent.train_short_memory(state_old, final_move, reward, state_new, done)
# Remember
agent.remember(state_old, final_move, reward, state_new, done)
# Snake died
if done:
# Train long memory, plot result
game.reset()
agent.episode = cur_episode
game.agent_episode = cur_episode
agent.train_long_memory()
if score > game.top_score:
game.top_score = score
agent.model.save()
total_score = score
game.mean_score = np.round((total_score / cur_episode), 3)
print(f"Agent{game.agent_num}")
print(f"Episode: {cur_episode}")
print(f"Generation: {cur_gen}")
print(f"Score: {score}")
print(f"Top Score: {game.top_score}")
print(f"Mean: {game.mean_score}\n")
And this is the output it gives:
Agent0
Episode: 3
Generation: 7
Score: 0
Top Score: 0
Mean: 0.0
Agent0
Episode: 3
Generation: 14
Score: 0
Top Score: 0
Mean: 0.0
Agent0
Episode: 7
Generation: 20
Score: 1
Top Score: 1
Mean: 0.143
Agent0
Episode: 10
Generation: 26
Score: 0
Top Score: 1
Mean: 0.1
Agent0
Episode: 6
Generation: 28
Score: 1
Top Score: 1
Mean: 0.333
Agent0
Episode: 5
Generation: 37
Score: 0
Top Score: 1
Mean: 0.4
Agent0
Episode: 3
Generation: 43
Score: 0
Top Score: 1
Mean: 0.667
Agent0
Episode: 1
Generation: 45
Score: 1
Top Score: 1
Mean: 3.0
Agent0
Episode: 2
Generation: 49
Score: 0
Top Score: 1
Mean: 1.5
The generation number steadily ticks up every second until it hits 49 and ends the loop, while the episode number randomly changes every time the snake dies. It's bizarre. I've never seen anything like this and have no idea what in my code could possible cause it.
CodePudding user response:
Answer:
For everyone who doesn't want to go through the comments where Eli Harold helped me work this out, the problem was that I had my code treating each episode like a frame of the game. So instead of an episode being the full lifespan of a snake (an entire game), every time the snake took an action was an episode.
Here's what my code looks like now. I added a run loop, which fixed the issue.
def run(population_size=1, max_episodes=10, max_generations=50):
total_score = 0
agents = [Agent() for i in range(population_size)]
game = SnakeGameAI()
for cur_gen in range(max_generations):
game.generation = cur_gen
for agent_num, agent in enumerate(agents):
# Set colors
game.color1 = agent.color1
game.color2 = agent.color2
# Set agent number
game.agent_num = agent_num
for cur_episode in range(1, max_episodes 1):
run = True
while run:
# Get old state
state_old = agent.get_state(game)
# Get move
final_move = agent.get_action(state_old)
# Perform move and get new state
reward, done, score = game.play_step(final_move)
state_new = agent.get_state(game)
# Train short memory
agent.train_short_memory(state_old, final_move, reward, state_new, done)
# Remember
agent.remember(state_old, final_move, reward, state_new, done)
# Snake died
if done:
run = False
# Train long memory, plot result
game.reset()
agent.episode = cur_episode
game.agent_episode = cur_episode
agent.train_long_memory()
if score > game.top_score:
game.top_score = score
agent.model.save()
total_score = score
game.mean_score = np.round((total_score / cur_episode), 3)
print(f"Agent{game.agent_num}")
print(f"Episode: {cur_episode}")
print(f"Generation: {cur_gen}")
print(f"Score: {score}")
print(f"Top Score: {game.top_score}")
print(f"Mean: {game.mean_score}\n")