Why does my model not learn? Very high loss-CodePudding

I built a simulation model where trucks collect garbage containers based on their fill level. I used OpenAi Gym and Tensorflow/keras to create my Deep Reinforcement Learning model... But my training has a very high loss... Where did I go wrong? Thanks in advance

this is the Env

class Marltf(Env):
    def __init__(self):
       
        self.i= 0
        self.containers1 = Container(3,3)
        self.containers2 = Container(1,3)
        self.containers3 = Container(3,1)
        self.containers4 = Container(5,6)
        self.containers5 = Container(8,6)
        self.containers6 = Container(10,10)
        self.containers7 = Container(11,11)
        self.containers8 = Container(7,12) 
        self.passo = 0
        self.containers2.lv = 2
        self.containers3.lv = 4
        self.containers5.lv = 4
        self.containers6.lv = 1
        self.containers8.lv = 2
        self.shower_length= 300
        
        self.containers = [self.containers1,self.containers2,self.containers3,self.containers4, self.containers5, self.containers6, self.containers7, self.containers8]
        self.positions ={}
        self.capacities ={}
        self.camions= []
        b = 0
        for cont in self.containers:
            b  = cont.lv
        reward = 0
        nCamionFloat = 0
        while b > 6:
          b  =-10
          nCamionFloat  =1
        nCamionInt = int(nCamionFloat)
       
        for ic in range(nCamionInt):
          self.camions.append(Camion(1,1,None,ic))


        for cam in self.camions:
          
          self.positions[cam.name] = cam.position  
          self.capacities[cam.name] = 10
        
        
        self.frames = []
        self.cnt=0  


        self.mapp = Map(15,15,self.camions,self.containers)

        self.state = (15*15)/5
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = Box(low = np.array([0]), high= np.array([51]))

    def step(self, action):
      
        moves = {0: (-1, 0),1: (1, 0),2: (0, -1),3: (0, 1)}
        
        done = False
       
        ic = 0   
        for cam in self.camions: 
            cam.position = (self.positions[ic][0],self.positions[ic][1])            
            cam.capacity = self.capacities[ic] 
            
            self.state  = -5
            

        mossa = moves[action]
        x=self.camions[self.i].position
        reward = 0
        nuovaposizione = [mossa[0]   x[0],mossa[1]  x[1]]
        self.shower_length -= 1 
        if self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] == -1:
          reward  = -5
          self.state  = -5
        
        else:
            self.mapp.mapp[x[0],x[1]] = 0
            self.camions[self.i].position=nuovaposizione
            self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] = 9
            self.positions.update({self.camions[self.i].name : nuovaposizione})
           
            
            
            
            reward  = -1
            self.state = -2
            

        for contain in self.containers:
                  if self.camions[self.i].position[0] == contain.position[0] and camion.position[1] == contain.position[1] :
                        
                        if contain.lv ==3 and self.camions[self.i].capacity >=3:
                            self.camions[self.i].reward  = 100
                            self.camions[self.i].capacity  = -3
                            self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
                            reward  =20
                            
                            self.state  =20
                         
                            contain.lv=0

                        elif contain.lv == 2 and self.camions[self.i].capacity >=2:
                            self.camions[self.i].reward  = 50
                            self.camions[self.i].capacity  = -2
                            self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
                            self.state  =10
                          
                            reward  = 50
                            
                            contain.lv=0

                        elif contain.lv == 1 and self.camions[self.i].capacity >=1:
                            
                            reward  = 10
                            self.camions[self.i].reward  =5
                            self.camions[self.i].capacity  = -1
                            self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
                            contain.lv=0
                            self.state =1
                           
                        elif contain.lv==4 and self.camions[self.i].capacity >=4:
                            reward  =50
                            self.camions[self.i].reward  =50
                            self.camions[self.i].capacity  = -4
                            self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
                            self.state  =50
                            contain.lv=0
                           
                          
                        elif contain.lv==0 and self.camions[self.i].capacity >=4:
                            reward  = -20
                            self.camions[self.i].reward  =-20
                            self.camions[self.i].capacity  = 0
                            self.state  = -20
                            contain.lv=0
                         
                        
                  if self.camions[self.i].capacity <=2:
                              self.camions[self.i].positions=(1,1)
                              self.positions.update({self.camions[self.i].name : (1,1)})

                              self.camions[self.i].capacity = 10
                              self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})

                  
                  

                          

        if self.i ==1:
                      self.i= 0              
                      self.i = 0
                      self.i = 0
        elif self.i ==0:
                      self.i= 1


        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        

        self.passo  =1
       
        
        
        
        

        info = {}
        
        return self.state,reward,done,info



    def render(self, mode="human"):
           
            BLACK = (0, 0, 0)
            WHITE = (200, 200, 200)
            
            WINDOW_HEIGHT = len(self.mapp.mapp[0]) *50
            WINDOW_WIDTH = len(self.mapp.mapp[0]) *50
           
            whiteC=pygame.image.load('white.jpg')
            whiteC=pygame.transform.scale(whiteC,(50, 50))
           
            greenC=pygame.image.load('green.jpg')
            greenC=pygame.transform.scale(greenC,(50, 50))
            
            yellowC=pygame.image.load('yellow.jpg')
            yellowC=pygame.transform.scale(yellowC,(50, 50))

            orangeC=pygame.image.load('orange.jpg')
            orangeC=pygame.transform.scale(orangeC,(50, 50))

            redC=pygame.image.load('red.jpg')
            redC=pygame.transform.scale(redC,(50, 50))

            
            gT=pygame.image.load('greenCamion.jpg')
            gT=pygame.transform.scale(gT,(50, 50))

            yT=pygame.image.load('yellowCamion.jpg')
            yT=pygame.transform.scale(yT,(50, 50))

            rT=pygame.image.load('redCamion.jpg')
            rT=pygame.transform.scale(rT,(50, 50))
            
           
            
            
            global SCREEN, CLOCK
            pygame.init()
            SCREEN = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
            CLOCK = pygame.time.Clock()
            SCREEN.fill(BLACK)
            
            pygame.draw.rect(SCREEN, WHITE, pygame.Rect( 10, 0, 50, 50))
            blockSize = 50 #Set the size of the grid block
            
            for i in range(0,len(self.mapp.mapp[0])):
              for j in range(0,len(self.mapp.mapp[0])):
                      a=i*50
                      b=j*50
                    
                      if self.mapp.mapp[i][j] == -1:
                        pygame.draw.rect(SCREEN, WHITE, pygame.Rect( a, b, 50, 50))

            for c in self.camions :
              if c.capacity > 6:
                SCREEN.blit(gT, (c.position[0]*50, c.position[1]*50))
              
              if c.capacity > 3 and c.capacity <= 6:
                SCREEN.blit(yT, (c.position[0]*50, c.position[1]*50))     
              
              if c.capacity <= 3:
                SCREEN.blit(rT, (c.position[0]*50, c.position[1]*50))
            
            
            for contain in self.containers :
              if contain.lv == 0:
                 SCREEN.blit(whiteC,(contain.position[0]*50 , contain.position[1]*50))
              
              elif contain.lv == 1:
                  SCREEN.blit(greenC,(contain.position[0]*50 , contain.position[1]*50))
              
              elif contain.lv == 2:
                  SCREEN.blit(yellowC,(contain.position[0]*50 , contain.position[1]*50))
              
              elif contain.lv == 3:
                 SCREEN.blit(orangeC,(contain.position[0]*50 , contain.position[1]*50))
              
              if contain.lv == 4:
                 SCREEN.blit(redC,(contain.position[0]*50 , contain.position[1]*50))
              
                
            
            for x in range(0, WINDOW_WIDTH, blockSize):
                for y in range(0, WINDOW_HEIGHT, blockSize):
                    rect = pygame.Rect(x, y, blockSize, blockSize)
                    pygame.draw.rect(SCREEN, WHITE, rect, 1)

            pygame.display.flip()

            view = pygame.surfarray.array3d(SCREEN)
            view = view.transpose([1, 0, 2])

            img_bgr = cv2.cvtColor(view, cv2.COLOR_RGB2BGR)
           
            
            
            
            
            pygame.image.save(SCREEN, f"screenshot{self.cnt}.png")
            self.cnt  =1
            pygame.event.get()
                    


       
    def reset(self):
        self.state = (15*15)/4
        self.shower_length = 300
        
        self.containers1.lv=3
        self.containers2.lv=1
        self.containers7.lv = 2 
        self.containers3.lv = 4
        self.containers5.lv = 4
        self.containers6.lv = 1
        self.containers8.lv = 2
        self.passo = 0
        self.positions ={}
        self.capacities ={}
        self.camions= []
        b = 0
        for cont in self.containers:
            b  = cont.lv
        reward = 0
        nCamionFloat = 0
        while b > 6:
          b  =-10
          nCamionFloat  =1
        nCamionInt = int(nCamionFloat)
      
        for ic in range(nCamionInt):
          self.camions.append(Camion(1,1,None,ic))


        for cam in self.camions:
          
          self.positions[cam.name] = cam.position  
          self.capacities[cam.name] = 10
        
        self.shower_length =60
        self.cnt=0  
        self.i = 0
            
            
        




        containers = [    containers1,    containers2,    containers3,    containers4]
        containers.append(    containers1)

states = env.observation_space.shape
actions = env.action_space.n
b = env.action_space.sample()

My model

def build_model(states,actions):
  model = tf.keras.Sequential([
      keras.layers.Dense(64, input_shape=states),
      keras.layers.LeakyReLU(0.24,),
      keras.layers.Dense(64),
      keras.layers.LeakyReLU(0.24,),
      keras.layers.Dense(32),
      keras.layers.LeakyReLU(0.24,),
  
      keras.layers.Dense(16),
      keras.layers.LeakyReLU(0.24,),
      keras.layers.Dense(8),
      keras.layers.LeakyReLU(0.24,),
      
      keras.layers.Dense(actions, activation='linear'),
      
])
  return model



model = build_model(states, actions)
model.compile(loss='mse',  metrics=['accuracy'])

def build_agent(model, actions):
      policy = GreedyQPolicy()
      memory = SequentialMemory(limit=10000, window_length=1)
      dqn = DQNAgent(model=model, memory=memory, policy=policy,nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
      
      return dqn



dqn = build_agent(model, actions)
dqn.compile(tf.keras.optimizers.Adadelta(
    learning_rate=0.1, rho=0.95, epsilon=1e-07, name='Adadelta'), metrics= ["accuracy"] 
)

a =dqn.fit(env, nb_steps=5000, visualize=True, verbose=2,)

the loss starts from 50 and reaches 200

CodePudding user response：

loss does not really matter in RL. Very high loss is actually normal. In RL we care the reward most.

CodePudding user response：

In reinforcement learning you usually don't care about loss, but rewards. From the class name, it looks like it also is a multi agent reinforcement learning problem, which are usually more difficult to deal with w.r.t single agent problems.

The first thing that I would try to change is the number of steps: 5000 is very low. Try to define, if it is not already defined, an episode, then plot the cumulative reward at the end of the episode, and check if the cumulative reward increases as the number of episodes increase.

This is the cleanest way to check if the reward is actually increasing and the agent is learning something.