My goal is to train an agent (ship) that takes two actions for now. 1. Choosing it's heading angle (where to go next) and 2. Choosing it's acceleration (if it will change its speed or not).
However, it seems like that I cannot undestand how to properly construct my action space and state space. I keep getting an error which I do not know how to fix. I have been trying to make it work using the Space wrapper.
I use the following code.
#Packages used
using ReinforcementLearning
using Flux #Needed for all the Neural Networks functionalities
using Plots
using DelimitedFiles #Needed to read all the txt files
using PolygonOps
using Random
using Intervals #not being used
#GeoBoundariesManipulation
include(joinpath(pwd(),"GeoBoundariesManipulation.jl"));
using .GeoBoundariesManipulation
#My problem's parameters
struct ShippingEnvParams
gridworld_dims::Tuple{Int64,Int64} #Gridworld dimensions
velocities::Vector{Int64} #available velocities from 6 knots to 20 knots
acceleration::Vector{Int64} #available acceleration per step: -2, 0, 2
heading::Vector{CartesianIndex{2}} #all heading manoeuvers
punishment::Int64 #punishment per ordinary step
out_of_grid_punishment::Int64 #punishment for going towards an island or out of grid bounds
StartingPoint::CartesianIndex{2}
GoalPoint::CartesianIndex{2}
all_polygons::Vector{Vector{Tuple{Float64,Float64}}} #all the boundaries
end
function ShippingEnvParams(;
gridworld_dims = (50,50),
velocities = Vector((6:2:20)),
acceleration = Vector((-2:2:2)),
heading = [CartesianIndex(0,1);CartesianIndex(0,-1);CartesianIndex(-1,0);CartesianIndex(-1,1);CartesianIndex(-1,-1);CartesianIndex(1,-1);CartesianIndex(1,1);CartesianIndex(1,0)],
punishment = -5,
out_of_grid_punishment = -100,
StartingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.733535,61.997345),gridworld_dims[1],gridworld_dims[2]),
EndingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.691500,61.535580),gridworld_dims[1],gridworld_dims[2]),
AllPolygons = GeoBoundariesManipulation.load_files("finalboundaries")
)
ShippingEnvParams(
gridworld_dims,
velocities,
acceleration,
heading,
punishment,
out_of_grid_punishment,
StartingPoint,
EndingPoint,
AllPolygons
)
end
###ENVIRONMENT CONSTRUCTION
#Instance
mutable struct ShippingEnv <: AbstractEnv
params::ShippingEnvParams
action_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}}
observation_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}} #state_space
state::Space{Tuple{Int64,Int64}} #state: (position,velocity)
action::Space{Tuple{Int64,Int64}} #action: (heading_angle,acceleration)
done::Bool #checks if agent has reached its goal
position::CartesianIndex{2}
time::Float64
velocity::Int64
distance::Float64
reward::Union{Nothing,Float64}
end
function ShippingEnv()
params1 = ShippingEnvParams()
env = ShippingEnv(
params1,
#Base.OneTo(length(params.heading)*length(params.velocities)),
Space((1:length(params1.heading),1:length(params1.acceleration))), #Space: (1-number of heading options, 1-number of acceleration options)
#Space([1..params.gridworld_dims[1]*params.gridworld_dims[2],minimum(params.velocities)..maximum(params.velocities)]),
Space((1:(params1.gridworld_dims[1]*params1.gridworld_dims[2]),(1:length(params1.velocities)))), #(1-number of grid tiles, 1-number of velocity options)
Space((LinearIndices((params1.gridworld_dims[1],params1.gridworld_dims[2]))[params1.StartingPoint],6)),
Space((1,1)),
false,
params1.StartingPoint,
0.0,
params1.velocities[1],
0.0,
0.0
)
reset!(env)
env
end
#Minimal interfaces implemented
RLBase.action_space(m::ShippingEnv) = m.action_space
RLBase.state_space(m::ShippingEnv) = m.observation_space
RLBase.reward(m::ShippingEnv) = m.done ? 0.0 : -1.0
RLBase.is_terminated(m::ShippingEnv) = m.done
RLBase.state(m::ShippingEnv) = m.state
#Random.seed!(m::ShippingEnv,seed) = Random.seed!(m.rng,seed)
function RLBase.reset!(m::ShippingEnv)
m.position = m.params.StartingPoint
m.velocity = m.params.velocities[1]
m.done = false
m.time = 0
m.distance = 0
nothing
end
#Function defining what happens every time an action is made
function (m::ShippingEnv)(a::Vector{Int64})
nextstep(m,a[1],a[2])
end
function nextstep(m::ShippingEnv, head_action, acceleration)
heading = m.params.heading[head_action]
r = m.params.punishment #initialized punishment if everything's okay
m.position = heading
dist_covered = sqrt(heading[1]^2 heading[2]^2)
m.distance = dist_covered
next_state_norm = (m.position[1]/m.params.gridworld_dims[1],m.position[2]/m.params.gridworld_dims[2])
#Check if next state is out of bounds and assign appropriate punishment
if m.position[1]<1 || m.position[1]>m.params.gridworld_dims[1] || m.position[2]<1 || m.position[2]>m.params.gridworld_dims[2] || inanypolygon(next_state_norm, m.params.all_polygons)
r = m.params.out_of_grid_punishment #replace punishment
m.position -= heading
m.distance -= dist_covered
end
#Checking if velocity acceleration is out of velocities' bounds
if (m.velocity acceleration > minimum(m.params.velocities)) && (m.velocity acceleration < maximum(m.params.velocities))
m.velocity = acceleration
end
m.time = dist_covered/m.velocity
m.reward = r -m.time
m.state[1] = LinearIndices((m.params.gridworld_dims[1],m.params.gridworld_dims[2]))[m.position]
m.state[2] = m.velocity
end
env = ShippingEnv()
RLBase.test_runnable!(env)
This is the stacktrace I've been getting after running test_runnable!(env).
Error During Test at C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266
Got exception outside of a @test
method not implemented
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:33
[2] (::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer) (repeats 2 times)
@ ReinforcementLearningBase .\none:0
[3] macro expansion
@ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:281 [inlined]
[4] macro expansion
@ C:\Users\kwstas\AppData\Local\Programs\Julia-1.7.1\share\julia\stdlib\v1.7\Test\src\Test.jl:1283 [inlined]
[5] test_runnable!(env::ShippingEnv, n::Int64; rng::Random._GLOBAL_RNG)
@ ReinforcementLearningBase C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:267
[6] test_runnable! (repeats 2 times)
@ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266 [inlined]
[7] top-level scope
@ c:\Users\kwstas\Desktop\ThesisDir\RL-New-Env.jl:138
[8] eval
@ .\boot.jl:373 [inlined]
[9] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base .\loading.jl:1196
[10] invokelatest(::Any, ::Any, ::Vararg{Any}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base .\essentials.jl:716
[11] invokelatest(::Any, ::Any, ::Vararg{Any})
@ Base .\essentials.jl:714
[12] inlineeval(m::Module, code::String, code_line::Int64, code_column::Int64, file::String; softscope::Bool)
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:211
[13] (::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:155
[14] withpath(f::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams}, path::String)
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:184
[15] (::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:153
[16] hideprompt(f::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:36
[17] (::VSCodeServer.var"#63#67"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:124
[18] with_logstate(f::Function, logstate::Any)
@ Base.CoreLogging .\logging.jl:511
[19] with_logger
@ .\logging.jl:623 [inlined]
[20] (::VSCodeServer.var"#62#66"{VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:201
[21] #invokelatest#2
@ .\essentials.jl:716 [inlined]
[22] invokelatest(::Any)
@ Base .\essentials.jl:714
[23] macro expansion
@ c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:34 [inlined]
[24] (::VSCodeServer.var"#60#61")()
@ VSCodeServer .\task.jl:423
Test Summary: | Pass Error Total
random policy with ShippingEnv | 2 1 3
ERROR: Some tests did not pass: 2 passed, 0 failed, 1 errored, 0 broken.
CodePudding user response:
I think the error message already explained it clearly.
(::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer)
This means that this method is not found. And it is the fallback of (::ShippingEnv)(action::Tuple{Int, Int})
While the one you implemented is of (m::ShippingEnv)(a::Vector{Int64})
. So here are two choices, either to define (m::ShippingEnv)(a::Tuple{Int64, Int64})
instead, or define the action space of type Space([1:length(params1.heading),1:length(params1.acceleration)])
. Note the difference between tuple
and a vector
.