|
| 1 | +using Test |
| 2 | +using Distributions: Categorical |
| 3 | +using ReinforcementLearningCore: EpsilonGreedyExplorer, GreedyExplorer, get_ϵ |
| 4 | +using Random |
| 5 | + |
| 6 | +@testset "EpsilonGreedyExplorer" begin |
| 7 | + @testset "get_ϵ for linear kind" begin |
| 8 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 50) ≈ 0.9 |
| 9 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 100) ≈ 0.9 |
| 10 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 150) ≈ 0.5 |
| 11 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 200) ≈ 0.1 |
| 12 | + end |
| 13 | + |
| 14 | + @testset "get_ϵ for exp kind" begin |
| 15 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:exp, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 50) ≈ 0.9 |
| 16 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 100) ≈ 0.9 |
| 17 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:exp, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 150) ≈ 0.5852245277701068 |
| 18 | + @test get_ϵ(EpsilonGreedyExplorer(kind=:exp, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100), 2000) ≈ 0.1 atol=1e-2 |
| 19 | + end |
| 20 | + |
| 21 | + @testset "EpsilonGreedyExplorer Tests" begin |
| 22 | + # Test plan! for is_break_tie=true |
| 23 | + rng = Random.default_rng(123) |
| 24 | + s = EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100, is_break_tie=true, rng=rng) |
| 25 | + values = [0.1, 0.5, 0.5, 0.3] |
| 26 | + actions = [] |
| 27 | + for _ in 1:300 |
| 28 | + push!(actions, RLBase.plan!(s, values)) |
| 29 | + end |
| 30 | + @test length(unique(actions)) == 4 |
| 31 | + end |
| 32 | + |
| 33 | + @testset "EpsilonGreedyExplorer Tests" begin |
| 34 | + # Test plan! for is_break_tie=false |
| 35 | + rng = Random.default_rng(123) |
| 36 | + s = EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100, is_break_tie=false, rng=rng) |
| 37 | + values = [0.1, 0.5, 0.5, 0.3] |
| 38 | + actions = [] |
| 39 | + for _ in 1:300 |
| 40 | + push!(actions, RLBase.plan!(s, values)) |
| 41 | + end |
| 42 | + @test length(unique(actions)) == 4 |
| 43 | + end |
| 44 | + |
| 45 | + @testset "prob for is_break_tie=true" begin |
| 46 | + s = EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100, is_break_tie=true) |
| 47 | + values = [0.1, 0.5, 0.5, 0.3] |
| 48 | + @test RLBase.prob(s, values) ≈ Categorical([0.225, 0.275, 0.275, 0.225]) |
| 49 | + @test RLBase.prob(s, values, 2) ≈ 0.275 |
| 50 | + end |
| 51 | + |
| 52 | + @testset "prob for is_break_tie=false" begin |
| 53 | + s = EpsilonGreedyExplorer(kind=:linear, ϵ_init=0.9, ϵ_stable=0.1, warmup_steps=100, decay_steps=100, is_break_tie=false) |
| 54 | + values = [0.1, 0.5, 0.5, 0.3] |
| 55 | + @test RLBase.prob(s, values) ≈ Categorical([0.225, 0.32499999999999996, 0.225, 0.225]) |
| 56 | + @test RLBase.prob(s, values, 2) ≈ 0.32500000000000007 |
| 57 | + end |
| 58 | +end |
| 59 | + |
| 60 | +@testset "GreedyExplorer" begin |
| 61 | + @testset "plan!" begin |
| 62 | + s = GreedyExplorer() |
| 63 | + values = [0.1, 0.5, 0.5, 0.3] |
| 64 | + @test RLBase.plan!(s, values) == 2 |
| 65 | + end |
| 66 | + |
| 67 | + @testset "prob" begin |
| 68 | + s = GreedyExplorer() |
| 69 | + values = [0.1, 0.5, 0.5, 0.3] |
| 70 | + @test RLBase.prob(s, values) ≈ Categorical([0.0, 1.0, 0.0, 0.0]) |
| 71 | + @test RLBase.prob(s, values, 2) == 1.0 |
| 72 | + end |
| 73 | +end |
0 commit comments