Home > other >  How to solve: TypeError: get_next_s () missing 1 required positional argument: 'pi_0'
How to solve: TypeError: get_next_s () missing 1 required positional argument: 'pi_0'

Time:12-14

The import numpy as np
The import matplotlib. Pyplot as PLT
% matplotlib inline
?
# a maze of initial state
FIG.=PLT figure (figsize=(5, 5))
Ax=PLT. Gca ()
?
PLT. The plot ([1, 1], [0, 1], color='red', our linewidth=2)
PLT. The plot ([1, 2], [2], color='red', our linewidth=2)
PLT. The plot ([2], [2, 1], the color='red', our linewidth=2)
PLT. The plot ([2, 3], [1, 1], color='red', our linewidth=2)
?
PLT. Text (0.5, 2.5, 'S0, size=14, ha=' center ')
PLT. Text (1.5, 2.5, 'S1' size=14, ha='center')
PLT. Text (2.5, 2.5, "S2", size=14, ha='center')
PLT. Text (0.5, 1.5, "S3", size=14, ha='center')
PLT. Text (1.5, 1.5, 'S4, size=14, ha=' center ')
PLT. Text (2.5, 1.5, 'S5, size=14, ha=' center ')
PLT. Text (0.5, 0.5, 'S6', size=14, ha='center')
PLT. Text (1.5, 0.5, 'S7, size=14, ha=' center ')
PLT. Text (2.5, 0.5, 'the S8, size=14, ha=' center ')
PLT. Text (0.5, 2.3, "START", ha='center')
PLT. Text (2.5, 0.3, 'GOAL', ha='center')
?
Ax. Set_xlim (0, 3)
Ax. Set_ylim (0, 3)
='both' PLT. Tick_params (axis, which='both', bottom='off', top='off',
Labelbottom='off', right='off', left='off', labelleft='off')
?
Line,=ax. The plot ([0.5], [2.5], marker="o", color='g' markersize=60)


# is the initial value of the
Theta_0=np. Array ([[np. Nan, 1, 1, np. Nan], # s0
[np. Nan, 1, np. Nan, 1], # s1
[np. Nan, np. Nan, 1, 1], # s2
[1, 1, 1, np. Nan], # s3
[np. Nan, np. Nan, 1, 1], # s4
[1, np. Nan, np. Nan, np, nan], # s5
[1, np. Nan, np. Nan, np, nan], # s6
[1, 1, np. Nan, np, nan], # s7,
])

# set the initial action value function
[a, b]=theta_0. Shape# a, b, respectively the rows and columns of theta_0
Q=np. Random. Rand (a, b) * theta_0 # to generate a random number within the line b column of 0-1 matrix, the state of the location of the wall is nan

# the initial strategy theta_0 into random strategy
Def simple_convert_into_pi_from_theta (theta) :
[m, n]=theta. Shape
PI=np. Zeros ((m, n))
For I in range (0, m) :
PI [I:]=theta. [I:]/np nansum (theta [I:])
PI=np. Nan_to_num (PI) # nan to the number 0
Return PI
?
Pi_0=simple_convert_into_pi_from_theta (theta_0)

Def get_action (s, Q, epsilon, pi_0) :
Direction=[" up ", "right" and "down", "left"]

If np. Random. Rand () & lt; Epsilon:
Next_direction=np, the random choice (direction, p=pi_0 [s:]) # according to the strategy pi0 randomly select the next move
The else:
Next_direction=direction [np nanargmax (Q/s, :)] # maximum Q corresponding action

If next_direction=="up" :
Action=0
Elif next_direction=="right" :
Action=1
Elif next_direction=="down" :
Action=2
Elif next_direction=="left" :
Action=3
Return the action

Def get_next_s (s, a, Q, epsilon, pi_0) :
Direction=[" up ", "right" and "down", "left"]
Next_direction=direction [a]
If next_direction=="up" :
Next_s=s - 3
Elif next_direction=="right" :
Next_s=s + 1
Elif next_direction=="down" :
Next_s=s + 3
Elif next_direction=="left" :
Next_s=s - 1
Return next_s

# based on sarsa update action value function
Def sarsa (s, a, r, next_s, next_a, Q, eta, gamma) :
If next_s==8:
Q [a] s, [a] s, +=Q eta * (r - Q [s] a)
The else:
Q [a] s, [a] s, +=Q eta * (r + gamma * Q [next_s next_a] - [a] s, Q)
Return Q

# based on SARSA output the final Q - table, and the final Q
Def goal_maze_ret_s_a_Q (Q, epsilon, eta, gamma, PI) :
S=0 # starting state, starting location
A=next_a=get_action (s, Q, epsilon, PI)
S_a_history=[[0, np. Nan]] # record agent mobile s_a table
While (1) :
A=next_a
S_a_history [1] [1]=a
Next_s=get_next_s (s, a, Q, epsilon, PI)
S_a_history. Append ([next_s, np. Nan]) # records after each action state
If next_s==8: # to the finish location
R=1 # to the finish, reward
Next_a=np. Nan
The else:
R=0
Next_a=get_action (s, Q, epsilon, PI)

Q=sarsa (s, a, r, next_s, next_a, Q, eta, gamma)
If next_s==8:
Break
The else:
S=next_s
The return/s_a_history, Q

Eta=0.1
Gamma=0.9
Epsilon=0.5
V=np. Nanmax (Q, axis=1)
Is_continue=True
Episode came=1
?
While is_continue: # end judgment condition
Print (' current round: '+ STR (episode came))
Epsilon=epsilon/2
[s_a_history, Q]=goal_maze_ret_s_a_Q (Q, epsilon, eta, gamma, pi_0)
New_v=np. Nanmax (Q, axis=1)
Print (' status value variation is: '+ STR (np) sum (np) abs (new_v - v))))
V=new_v
Print (s_a_history)
Print (" planning the path to total "+ STR (len (s_a_history) - 1) +" step ")

Episode came=episode came + 1
If episode> 100:
Break
The current round: 1
-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
TypeError Traceback (the most recent call last)

9 print (' current round: '+ STR (episode came))
Ten epsilon=epsilon/2
- & gt; 11 [s_a_history, Q]=goal_maze_ret_s_a_Q (Q, epsilon, eta, gamma, pi_0)
12 new_v=np. Nanmax (Q, axis=1)
13 print (' status value variation is: '+ STR (np) sum (np) abs (new_v - v))))

6 a=next_a
7 s_a_history [1] [1]=a
- & gt; 8 s=get_next_s (s, a, Q, epsilon)
nullnullnullnullnullnullnullnullnull
  • Related