|
| 1 | +import numpy as np |
| 2 | +import pytest |
| 3 | +from mlagents.torch_utils import torch |
| 4 | +from mlagents.trainers.torch.components.reward_providers import ( |
| 5 | + RNDRewardProvider, |
| 6 | + create_reward_provider, |
| 7 | +) |
| 8 | +from mlagents_envs.base_env import BehaviorSpec, ActionType |
| 9 | +from mlagents.trainers.settings import RNDSettings, RewardSignalType |
| 10 | +from mlagents.trainers.tests.torch.test_reward_providers.utils import ( |
| 11 | + create_agent_buffer, |
| 12 | +) |
| 13 | + |
| 14 | +SEED = [42] |
| 15 | + |
| 16 | + |
| 17 | +@pytest.mark.parametrize( |
| 18 | + "behavior_spec", |
| 19 | + [ |
| 20 | + BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5), |
| 21 | + BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)), |
| 22 | + ], |
| 23 | +) |
| 24 | +def test_construction(behavior_spec: BehaviorSpec) -> None: |
| 25 | + curiosity_settings = RNDSettings(32, 0.01) |
| 26 | + curiosity_settings.strength = 0.1 |
| 27 | + curiosity_rp = RNDRewardProvider(behavior_spec, curiosity_settings) |
| 28 | + assert curiosity_rp.strength == 0.1 |
| 29 | + assert curiosity_rp.name == "RND" |
| 30 | + |
| 31 | + |
| 32 | +@pytest.mark.parametrize( |
| 33 | + "behavior_spec", |
| 34 | + [ |
| 35 | + BehaviorSpec([(10,)], ActionType.CONTINUOUS, 5), |
| 36 | + BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ActionType.CONTINUOUS, 5), |
| 37 | + BehaviorSpec([(10,), (64, 66, 1)], ActionType.DISCRETE, (2, 3)), |
| 38 | + BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)), |
| 39 | + ], |
| 40 | +) |
| 41 | +def test_factory(behavior_spec: BehaviorSpec) -> None: |
| 42 | + curiosity_settings = RNDSettings(32, 0.01) |
| 43 | + curiosity_rp = create_reward_provider( |
| 44 | + RewardSignalType.RND, behavior_spec, curiosity_settings |
| 45 | + ) |
| 46 | + assert curiosity_rp.name == "RND" |
| 47 | + |
| 48 | + |
| 49 | +@pytest.mark.parametrize("seed", SEED) |
| 50 | +@pytest.mark.parametrize( |
| 51 | + "behavior_spec", |
| 52 | + [ |
| 53 | + BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ActionType.CONTINUOUS, 5), |
| 54 | + BehaviorSpec([(10,)], ActionType.DISCRETE, (2, 3)), |
| 55 | + BehaviorSpec([(10,)], ActionType.DISCRETE, (2,)), |
| 56 | + ], |
| 57 | +) |
| 58 | +def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None: |
| 59 | + np.random.seed(seed) |
| 60 | + torch.manual_seed(seed) |
| 61 | + rnd_settings = RNDSettings(32, 0.01) |
| 62 | + rnd_rp = RNDRewardProvider(behavior_spec, rnd_settings) |
| 63 | + buffer = create_agent_buffer(behavior_spec, 5) |
| 64 | + rnd_rp.update(buffer) |
| 65 | + reward_old = rnd_rp.evaluate(buffer)[0] |
| 66 | + for _ in range(100): |
| 67 | + rnd_rp.update(buffer) |
| 68 | + reward_new = rnd_rp.evaluate(buffer)[0] |
| 69 | + assert reward_new < reward_old |
0 commit comments