Multidimensional Action Space in Reinforcement Learning-CodePudding

My goal is to train an agent (ship) that takes two actions for now. 1. Choosing it's heading angle (where to go next) and 2. Choosing it's acceleration (if it will change its speed or not).

However, it seems like that I cannot undestand how to properly construct my action space and state space. I keep getting an error which I do not know how to fix. I have been trying to make it work using the Space wrapper.

I use the following code.

#Packages used
using ReinforcementLearning
using Flux #Needed for all the Neural Networks functionalities
using Plots
using DelimitedFiles #Needed to read all the txt files
using PolygonOps
using Random
using Intervals #not being used

#GeoBoundariesManipulation
include(joinpath(pwd(),"GeoBoundariesManipulation.jl"));
using .GeoBoundariesManipulation

#My problem's parameters
struct ShippingEnvParams
    gridworld_dims::Tuple{Int64,Int64} #Gridworld dimensions
    velocities::Vector{Int64} #available velocities from 6 knots to 20 knots
    acceleration::Vector{Int64} #available acceleration per step: -2, 0, 2
    heading::Vector{CartesianIndex{2}} #all heading manoeuvers
    punishment::Int64 #punishment per ordinary step
    out_of_grid_punishment::Int64 #punishment for going towards an island or out of grid bounds
    StartingPoint::CartesianIndex{2}
    GoalPoint::CartesianIndex{2}
    all_polygons::Vector{Vector{Tuple{Float64,Float64}}} #all the boundaries
end

function ShippingEnvParams(;
    gridworld_dims = (50,50),
    velocities = Vector((6:2:20)), 
    acceleration = Vector((-2:2:2)), 
    heading = [CartesianIndex(0,1);CartesianIndex(0,-1);CartesianIndex(-1,0);CartesianIndex(-1,1);CartesianIndex(-1,-1);CartesianIndex(1,-1);CartesianIndex(1,1);CartesianIndex(1,0)], 
    punishment = -5, 
    out_of_grid_punishment = -100, 
    StartingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.733535,61.997345),gridworld_dims[1],gridworld_dims[2]),
    EndingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.691500,61.535580),gridworld_dims[1],gridworld_dims[2]),
    AllPolygons = GeoBoundariesManipulation.load_files("finalboundaries") 
    )
    ShippingEnvParams(
        gridworld_dims,
        velocities,
        acceleration,
        heading,
        punishment,
        out_of_grid_punishment,
        StartingPoint,
        EndingPoint,
        AllPolygons
    )
end

###ENVIRONMENT CONSTRUCTION
#Instance
mutable struct ShippingEnv <: AbstractEnv
    params::ShippingEnvParams
    action_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}}
    observation_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}} #state_space
    state::Space{Tuple{Int64,Int64}} #state: (position,velocity)
    action::Space{Tuple{Int64,Int64}} #action: (heading_angle,acceleration)
    done::Bool #checks if agent has reached its goal
    position::CartesianIndex{2}
    time::Float64
    velocity::Int64
    distance::Float64
    reward::Union{Nothing,Float64} 
end

function ShippingEnv()
    params1 = ShippingEnvParams()
    env = ShippingEnv(
        params1,
        #Base.OneTo(length(params.heading)*length(params.velocities)),
        Space((1:length(params1.heading),1:length(params1.acceleration))), #Space: (1-number of heading options, 1-number of acceleration options)
        #Space([1..params.gridworld_dims[1]*params.gridworld_dims[2],minimum(params.velocities)..maximum(params.velocities)]),
        Space((1:(params1.gridworld_dims[1]*params1.gridworld_dims[2]),(1:length(params1.velocities)))), #(1-number of grid tiles, 1-number of velocity options)
        Space((LinearIndices((params1.gridworld_dims[1],params1.gridworld_dims[2]))[params1.StartingPoint],6)),
        Space((1,1)),
        false,
        params1.StartingPoint,
        0.0,
        params1.velocities[1],
        0.0,
        0.0
    )
    reset!(env)
    env
end


#Minimal interfaces implemented
RLBase.action_space(m::ShippingEnv) = m.action_space
RLBase.state_space(m::ShippingEnv) = m.observation_space
RLBase.reward(m::ShippingEnv) = m.done ? 0.0 : -1.0
RLBase.is_terminated(m::ShippingEnv) = m.done
RLBase.state(m::ShippingEnv) = m.state
#Random.seed!(m::ShippingEnv,seed) = Random.seed!(m.rng,seed)

function RLBase.reset!(m::ShippingEnv)
    m.position = m.params.StartingPoint
    m.velocity = m.params.velocities[1]
    m.done = false
    m.time = 0
    m.distance = 0
    nothing
end

#Function defining what happens every time an action is made
function (m::ShippingEnv)(a::Vector{Int64})
    nextstep(m,a[1],a[2])
end

function nextstep(m::ShippingEnv, head_action, acceleration)
    heading = m.params.heading[head_action]
    r = m.params.punishment #initialized punishment if everything's okay
    m.position  = heading
    dist_covered = sqrt(heading[1]^2   heading[2]^2)
    m.distance  = dist_covered
    next_state_norm = (m.position[1]/m.params.gridworld_dims[1],m.position[2]/m.params.gridworld_dims[2])
    #Check if next state is out of bounds and assign appropriate punishment
    if m.position[1]<1 || m.position[1]>m.params.gridworld_dims[1] || m.position[2]<1 || m.position[2]>m.params.gridworld_dims[2] || inanypolygon(next_state_norm, m.params.all_polygons)
        r = m.params.out_of_grid_punishment #replace punishment
        m.position -= heading
        m.distance -= dist_covered
    end

    #Checking if velocity acceleration is out of velocities' bounds
    if (m.velocity   acceleration > minimum(m.params.velocities)) && (m.velocity   acceleration < maximum(m.params.velocities))
        m.velocity  = acceleration
    end
    
    m.time = dist_covered/m.velocity
    m.reward = r -m.time

    m.state[1] = LinearIndices((m.params.gridworld_dims[1],m.params.gridworld_dims[2]))[m.position]
    m.state[2] = m.velocity
end

env = ShippingEnv()
RLBase.test_runnable!(env)

This is the stacktrace I've been getting after running test_runnable!(env).

Error During Test at C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266
  Got exception outside of a @test
  method not implemented
  Stacktrace:
    [1] error(s::String)
      @ Base .\error.jl:33
    [2] (::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer) (repeats 2 times)
      @ ReinforcementLearningBase .\none:0
    [3] macro expansion
      @ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:281 [inlined]
    [4] macro expansion
      @ C:\Users\kwstas\AppData\Local\Programs\Julia-1.7.1\share\julia\stdlib\v1.7\Test\src\Test.jl:1283 [inlined]
    [5] test_runnable!(env::ShippingEnv, n::Int64; rng::Random._GLOBAL_RNG)
      @ ReinforcementLearningBase C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:267
    [6] test_runnable! (repeats 2 times)
      @ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266 [inlined]
    [7] top-level scope
      @ c:\Users\kwstas\Desktop\ThesisDir\RL-New-Env.jl:138
    [8] eval
      @ .\boot.jl:373 [inlined]
    [9] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
      @ Base .\loading.jl:1196
   [10] invokelatest(::Any, ::Any, ::Vararg{Any}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
      @ Base .\essentials.jl:716
   [11] invokelatest(::Any, ::Any, ::Vararg{Any})
      @ Base .\essentials.jl:714
   [12] inlineeval(m::Module, code::String, code_line::Int64, code_column::Int64, file::String; softscope::Bool)
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:211
   [13] (::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:155
   [14] withpath(f::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams}, path::String)
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:184
   [15] (::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:153
   [16] hideprompt(f::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:36
   [17] (::VSCodeServer.var"#63#67"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:124
   [18] with_logstate(f::Function, logstate::Any)
      @ Base.CoreLogging .\logging.jl:511
   [19] with_logger
      @ .\logging.jl:623 [inlined]
   [20] (::VSCodeServer.var"#62#66"{VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:201
   [21] #invokelatest#2
      @ .\essentials.jl:716 [inlined]
   [22] invokelatest(::Any)
      @ Base .\essentials.jl:714
   [23] macro expansion
      @ c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:34 [inlined]
   [24] (::VSCodeServer.var"#60#61")()
      @ VSCodeServer .\task.jl:423
Test Summary:                  | Pass  Error  Total
random policy with ShippingEnv |    2      1      3
ERROR: Some tests did not pass: 2 passed, 0 failed, 1 errored, 0 broken.

CodePudding user response：

I think the error message already explained it clearly.

(::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer)

This means that this method is not found. And it is the fallback of (::ShippingEnv)(action::Tuple{Int, Int})

While the one you implemented is of (m::ShippingEnv)(a::Vector{Int64}). So here are two choices, either to define (m::ShippingEnv)(a::Tuple{Int64, Int64}) instead, or define the action space of type Space([1:length(params1.heading),1:length(params1.acceleration)]). Note the difference between tuple and a vector.