diff --git a/config/imitation/CrawlerStatic.yaml b/config/imitation/CrawlerStatic.yaml index c69ed5571f..6bda49e1ef 100644 --- a/config/imitation/CrawlerStatic.yaml +++ b/config/imitation/CrawlerStatic.yaml @@ -24,7 +24,6 @@ behaviors: use_actions: false use_vail: false demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 1000 diff --git a/config/imitation/FoodCollector.yaml b/config/imitation/FoodCollector.yaml index 3d6328269e..614772331c 100644 --- a/config/imitation/FoodCollector.yaml +++ b/config/imitation/FoodCollector.yaml @@ -24,7 +24,6 @@ behaviors: use_actions: false use_vail: false demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo - output_path: default keep_checkpoints: 5 max_steps: 2000000 time_horizon: 64 diff --git a/config/imitation/Hallway.yaml b/config/imitation/Hallway.yaml index 27baeacdd4..709bd33d7b 100644 --- a/config/imitation/Hallway.yaml +++ b/config/imitation/Hallway.yaml @@ -30,7 +30,6 @@ behaviors: use_actions: false use_vail: false demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 64 diff --git a/config/imitation/PushBlock.yaml b/config/imitation/PushBlock.yaml index ffddc01278..693407ad71 100644 --- a/config/imitation/PushBlock.yaml +++ b/config/imitation/PushBlock.yaml @@ -24,7 +24,6 @@ behaviors: use_actions: false use_vail: false demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo - output_path: default keep_checkpoints: 5 max_steps: 15000000 time_horizon: 64 diff --git a/config/ppo/3DBall.yaml b/config/ppo/3DBall.yaml index a76e60656a..2cccd57763 100644 --- a/config/ppo/3DBall.yaml +++ b/config/ppo/3DBall.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 1000 diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index d4155223dd..6dc83100f2 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 5000000 time_horizon: 1000 diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index aee0a3b1e2..b3c6c13f21 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 1000 diff --git a/config/ppo/Basic.yaml b/config/ppo/Basic.yaml index 5b054a6612..8d8424e496 100644 --- a/config/ppo/Basic.yaml +++ b/config/ppo/Basic.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.9 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 3 diff --git a/config/ppo/Bouncer.yaml b/config/ppo/Bouncer.yaml index 56629e0a10..02d43efa18 100644 --- a/config/ppo/Bouncer.yaml +++ b/config/ppo/Bouncer.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 4000000 time_horizon: 64 diff --git a/config/ppo/CrawlerDynamic.yaml b/config/ppo/CrawlerDynamic.yaml index 82268ac509..7f278f2a57 100644 --- a/config/ppo/CrawlerDynamic.yaml +++ b/config/ppo/CrawlerDynamic.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 1000 diff --git a/config/ppo/CrawlerStatic.yaml b/config/ppo/CrawlerStatic.yaml index 1d38e26623..5fc686f4af 100644 --- a/config/ppo/CrawlerStatic.yaml +++ b/config/ppo/CrawlerStatic.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 1000 diff --git a/config/ppo/FoodCollector.yaml b/config/ppo/FoodCollector.yaml index 95c9d10703..07f777bb9c 100644 --- a/config/ppo/FoodCollector.yaml +++ b/config/ppo/FoodCollector.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 2000000 time_horizon: 64 diff --git a/config/ppo/GridWorld.yaml b/config/ppo/GridWorld.yaml index b124fb46c1..251fdb1d82 100644 --- a/config/ppo/GridWorld.yaml +++ b/config/ppo/GridWorld.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.9 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 5 diff --git a/config/ppo/Hallway.yaml b/config/ppo/Hallway.yaml index 38a941689a..241bf56774 100644 --- a/config/ppo/Hallway.yaml +++ b/config/ppo/Hallway.yaml @@ -22,7 +22,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 64 diff --git a/config/ppo/PushBlock.yaml b/config/ppo/PushBlock.yaml index 062f8fd364..a89bf831e9 100644 --- a/config/ppo/PushBlock.yaml +++ b/config/ppo/PushBlock.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 2000000 time_horizon: 64 diff --git a/config/ppo/Pyramids.yaml b/config/ppo/Pyramids.yaml index 85317963bf..a68116cea4 100644 --- a/config/ppo/Pyramids.yaml +++ b/config/ppo/Pyramids.yaml @@ -24,7 +24,6 @@ behaviors: strength: 0.02 encoding_size: 256 learning_rate: 0.0003 - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 128 diff --git a/config/ppo/Reacher.yaml b/config/ppo/Reacher.yaml index 2b17116430..69c821cdd6 100644 --- a/config/ppo/Reacher.yaml +++ b/config/ppo/Reacher.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 1000 diff --git a/config/ppo/SoccerTwos.yaml b/config/ppo/SoccerTwos.yaml index 9fe5ca73de..f8a3ad9df6 100644 --- a/config/ppo/SoccerTwos.yaml +++ b/config/ppo/SoccerTwos.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 50000000 time_horizon: 1000 diff --git a/config/ppo/StrikersVsGoalie.yaml b/config/ppo/StrikersVsGoalie.yaml index 4199377faf..44d95918ed 100644 --- a/config/ppo/StrikersVsGoalie.yaml +++ b/config/ppo/StrikersVsGoalie.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 50000000 time_horizon: 1000 @@ -52,7 +51,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 50000000 time_horizon: 1000 diff --git a/config/ppo/Tennis.yaml b/config/ppo/Tennis.yaml index 473ab7deff..ed73a6d83a 100644 --- a/config/ppo/Tennis.yaml +++ b/config/ppo/Tennis.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 50000000 time_horizon: 1000 diff --git a/config/ppo/VisualHallway.yaml b/config/ppo/VisualHallway.yaml index bf953bba40..421b3ad5b0 100644 --- a/config/ppo/VisualHallway.yaml +++ b/config/ppo/VisualHallway.yaml @@ -22,7 +22,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 64 diff --git a/config/ppo/VisualPushBlock.yaml b/config/ppo/VisualPushBlock.yaml index 260a31f3ff..000b3f5074 100644 --- a/config/ppo/VisualPushBlock.yaml +++ b/config/ppo/VisualPushBlock.yaml @@ -22,7 +22,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 3000000 time_horizon: 64 diff --git a/config/ppo/VisualPyramids.yaml b/config/ppo/VisualPyramids.yaml index 34f1b8843c..48782626ad 100644 --- a/config/ppo/VisualPyramids.yaml +++ b/config/ppo/VisualPyramids.yaml @@ -24,7 +24,6 @@ behaviors: strength: 0.01 encoding_size: 256 learning_rate: 0.0003 - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 128 diff --git a/config/ppo/WalkerDynamic.yaml b/config/ppo/WalkerDynamic.yaml index 4cd30e8782..08a8048f14 100644 --- a/config/ppo/WalkerDynamic.yaml +++ b/config/ppo/WalkerDynamic.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 25000000 time_horizon: 1000 diff --git a/config/ppo/WalkerStatic.yaml b/config/ppo/WalkerStatic.yaml index 56eea37a81..fa3633a418 100644 --- a/config/ppo/WalkerStatic.yaml +++ b/config/ppo/WalkerStatic.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 1000 diff --git a/config/ppo/WallJump.yaml b/config/ppo/WallJump.yaml index f150863489..a2c749d813 100644 --- a/config/ppo/WallJump.yaml +++ b/config/ppo/WallJump.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 128 @@ -45,7 +44,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 5000000 time_horizon: 128 diff --git a/config/ppo/WallJump_curriculum.yaml b/config/ppo/WallJump_curriculum.yaml index de13b5efcf..93a8813ae7 100644 --- a/config/ppo/WallJump_curriculum.yaml +++ b/config/ppo/WallJump_curriculum.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 128 @@ -45,7 +44,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 5000000 time_horizon: 128 diff --git a/config/ppo/WormDynamic.yaml b/config/ppo/WormDynamic.yaml index 0c0331209f..319820baf2 100644 --- a/config/ppo/WormDynamic.yaml +++ b/config/ppo/WormDynamic.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 3500000 time_horizon: 1000 diff --git a/config/ppo/WormStatic.yaml b/config/ppo/WormStatic.yaml index 5bbcbc58ea..5fce0b0c76 100644 --- a/config/ppo/WormStatic.yaml +++ b/config/ppo/WormStatic.yaml @@ -19,7 +19,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 3500000 time_horizon: 1000 diff --git a/config/sac/3DBall.yaml b/config/sac/3DBall.yaml index 3949be48c3..0458c03070 100644 --- a/config/sac/3DBall.yaml +++ b/config/sac/3DBall.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 1000 diff --git a/config/sac/3DBallHard.yaml b/config/sac/3DBallHard.yaml index 2ce4183e70..511f375bc2 100644 --- a/config/sac/3DBallHard.yaml +++ b/config/sac/3DBallHard.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 1000 diff --git a/config/sac/Basic.yaml b/config/sac/Basic.yaml index 51cab0ebce..e6d53113f3 100644 --- a/config/sac/Basic.yaml +++ b/config/sac/Basic.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 10 diff --git a/config/sac/Bouncer.yaml b/config/sac/Bouncer.yaml index 0503a1eb4c..439e902877 100644 --- a/config/sac/Bouncer.yaml +++ b/config/sac/Bouncer.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 1000000 time_horizon: 64 diff --git a/config/sac/CrawlerDynamic.yaml b/config/sac/CrawlerDynamic.yaml index 9324162947..9187328374 100644 --- a/config/sac/CrawlerDynamic.yaml +++ b/config/sac/CrawlerDynamic.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 5000000 time_horizon: 1000 diff --git a/config/sac/CrawlerStatic.yaml b/config/sac/CrawlerStatic.yaml index 5935bb4fb3..255143cb04 100644 --- a/config/sac/CrawlerStatic.yaml +++ b/config/sac/CrawlerStatic.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 3000000 time_horizon: 1000 diff --git a/config/sac/FoodCollector.yaml b/config/sac/FoodCollector.yaml index 1ce4d6cb11..d09e3cc6d0 100644 --- a/config/sac/FoodCollector.yaml +++ b/config/sac/FoodCollector.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 2000000 time_horizon: 64 diff --git a/config/sac/GridWorld.yaml b/config/sac/GridWorld.yaml index 084e821a87..b0d32e668c 100644 --- a/config/sac/GridWorld.yaml +++ b/config/sac/GridWorld.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.9 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 500000 time_horizon: 5 diff --git a/config/sac/Hallway.yaml b/config/sac/Hallway.yaml index 30a507d8dd..583393a11b 100644 --- a/config/sac/Hallway.yaml +++ b/config/sac/Hallway.yaml @@ -24,7 +24,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 5000000 time_horizon: 64 diff --git a/config/sac/PushBlock.yaml b/config/sac/PushBlock.yaml index bb0319885e..349730a6f8 100644 --- a/config/sac/PushBlock.yaml +++ b/config/sac/PushBlock.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 2000000 time_horizon: 64 diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index 476273d681..cd764a4d34 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -29,7 +29,6 @@ behaviors: use_actions: true use_vail: false demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 128 diff --git a/config/sac/Reacher.yaml b/config/sac/Reacher.yaml index 91d4e02a59..800b92a146 100644 --- a/config/sac/Reacher.yaml +++ b/config/sac/Reacher.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 1000 diff --git a/config/sac/Tennis.yaml b/config/sac/Tennis.yaml index f5e258b655..c8f932187a 100644 --- a/config/sac/Tennis.yaml +++ b/config/sac/Tennis.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 64 diff --git a/config/sac/VisualHallway.yaml b/config/sac/VisualHallway.yaml index e27d1a0298..28be1e5c9b 100644 --- a/config/sac/VisualHallway.yaml +++ b/config/sac/VisualHallway.yaml @@ -24,7 +24,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 64 diff --git a/config/sac/VisualPushBlock.yaml b/config/sac/VisualPushBlock.yaml index 315773656b..375d16e9df 100644 --- a/config/sac/VisualPushBlock.yaml +++ b/config/sac/VisualPushBlock.yaml @@ -24,7 +24,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 3000000 time_horizon: 64 diff --git a/config/sac/VisualPyramids.yaml b/config/sac/VisualPyramids.yaml index ce4af8b017..b840fb7762 100644 --- a/config/sac/VisualPyramids.yaml +++ b/config/sac/VisualPyramids.yaml @@ -29,7 +29,6 @@ behaviors: use_actions: true use_vail: false demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo - output_path: default keep_checkpoints: 5 max_steps: 10000000 time_horizon: 128 diff --git a/config/sac/WalkerDynamic.yaml b/config/sac/WalkerDynamic.yaml index 2b2136bf86..097aee7ef1 100644 --- a/config/sac/WalkerDynamic.yaml +++ b/config/sac/WalkerDynamic.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 15000000 time_horizon: 1000 diff --git a/config/sac/WalkerStatic.yaml b/config/sac/WalkerStatic.yaml index 1172964f58..ef61a8c054 100644 --- a/config/sac/WalkerStatic.yaml +++ b/config/sac/WalkerStatic.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 1000 diff --git a/config/sac/WallJump.yaml b/config/sac/WallJump.yaml index f40e379a2b..865954c554 100644 --- a/config/sac/WallJump.yaml +++ b/config/sac/WallJump.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 15000000 time_horizon: 128 @@ -49,7 +48,6 @@ behaviors: extrinsic: gamma: 0.99 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 5000000 time_horizon: 128 diff --git a/config/sac/WormDynamic.yaml b/config/sac/WormDynamic.yaml index 9af3fe2e66..5966364315 100644 --- a/config/sac/WormDynamic.yaml +++ b/config/sac/WormDynamic.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 5000000 time_horizon: 1000 diff --git a/config/sac/WormStatic.yaml b/config/sac/WormStatic.yaml index 48a688afe8..d40f220d21 100644 --- a/config/sac/WormStatic.yaml +++ b/config/sac/WormStatic.yaml @@ -21,7 +21,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 1.0 - output_path: default keep_checkpoints: 5 max_steps: 3000000 time_horizon: 1000 diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py index 48c5a1f76a..5d42ac671c 100644 --- a/ml-agents/mlagents/trainers/ghost/trainer.py +++ b/ml-agents/mlagents/trainers/ghost/trainer.py @@ -45,7 +45,7 @@ def __init__( reward_buff_cap, trainer_settings, training, - run_id, + artifact_path, ): """ Creates a GhostTrainer. @@ -55,11 +55,11 @@ def __init__( :param reward_buff_cap: Max reward history to track in the reward buffer :param trainer_settings: The parameters for the trainer. :param training: Whether the trainer is set for training. - :param run_id: The identifier of the current run + :param artifact_path: Path to store artifacts from this trainer. """ super(GhostTrainer, self).__init__( - brain_name, trainer_settings, training, run_id, reward_buff_cap + brain_name, trainer_settings, training, artifact_path, reward_buff_cap ) self.trainer = trainer diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 63af614370..55e3fa0d15 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -138,7 +138,6 @@ def run_training(run_seed: int, options: RunOptions) -> None: ) trainer_factory = TrainerFactory( options.behaviors, - checkpoint_settings.run_id, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py index 3b098352ee..a41660591f 100644 --- a/ml-agents/mlagents/trainers/policy/nn_policy.py +++ b/ml-agents/mlagents/trainers/policy/nn_policy.py @@ -22,6 +22,7 @@ def __init__( brain: BrainParameters, trainer_params: TrainerSettings, is_training: bool, + model_path: str, load: bool, tanh_squash: bool = False, reparameterize: bool = False, @@ -37,10 +38,11 @@ def __init__( :param trainer_params: Defined training parameters. :param is_training: Whether the model should be trained. :param load: Whether a pre-trained model will be loaded or a new one created. + :param model_path: Path where the model should be saved and loaded. :param tanh_squash: Whether to use a tanh function on the continuous output, or a clipped output. :param reparameterize: Whether we are using the resampling trick to update the policy in continuous output. """ - super().__init__(seed, brain, trainer_params, load) + super().__init__(seed, brain, trainer_params, model_path, load) self.grads = None self.update_batch: Optional[tf.Operation] = None num_layers = self.network_settings.num_layers diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index 0b5909a87c..aba930abd3 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -46,6 +46,7 @@ def __init__( seed: int, brain: BrainParameters, trainer_settings: TrainerSettings, + model_path: str, load: bool = False, ): """ @@ -53,6 +54,8 @@ def __init__( :param seed: Random seed to use for TensorFlow. :param brain: The corresponding Brain for this policy. :param trainer_settings: The trainer parameters. + :param model_path: Where to load/save the model. + :param load: If True, load model from model_path. Otherwise, create new model. """ self.m_size = 0 @@ -80,7 +83,7 @@ def __init__( self.use_continuous_act = brain.vector_action_space_type == "continuous" if self.use_continuous_act: self.num_branches = self.brain.vector_action_space_size[0] - self.model_path = self.trainer_settings.output_path + self.model_path = model_path self.initialize_path = self.trainer_settings.init_path self.keep_checkpoints = self.trainer_settings.keep_checkpoints self.graph = tf.Graph() diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 4d157dfe36..7ff474b1b0 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -32,7 +32,7 @@ def __init__( training: bool, load: bool, seed: int, - run_id: str, + artifact_path: str, ): """ Responsible for collecting experiences and training PPO model. @@ -42,10 +42,10 @@ def __init__( :param training: Whether the trainer is set for training. :param load: Whether the model should be loaded. :param seed: The seed the model will be initialized with - :param run_id: The identifier of the current run + :param artifact_path: The directory within which to store artifacts from this trainer. """ super(PPOTrainer, self).__init__( - brain_name, trainer_settings, training, run_id, reward_buff_cap + brain_name, trainer_settings, training, artifact_path, reward_buff_cap ) self.hyperparameters: PPOSettings = cast( PPOSettings, self.trainer_settings.hyperparameters @@ -203,6 +203,7 @@ def create_policy( brain_parameters, self.trainer_settings, self.is_training, + self.artifact_path, self.load, condition_sigma_on_obs=False, # Faster training for PPO create_tf_graph=False, # We will create the TF graph in the Optimizer diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 848362a29c..85e81215ac 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -40,7 +40,7 @@ def __init__( training: bool, load: bool, seed: int, - run_id: str, + artifact_path: str, ): """ Responsible for collecting experiences and training SAC model. @@ -50,10 +50,10 @@ def __init__( :param training: Whether the trainer is set for training. :param load: Whether the model should be loaded. :param seed: The seed the model will be initialized with - :param run_id: The identifier of the current run + :param artifact_path: The directory within which to store artifacts from this trainer. """ super().__init__( - brain_name, trainer_settings, training, run_id, reward_buff_cap + brain_name, trainer_settings, training, artifact_path, reward_buff_cap ) self.load = load @@ -89,9 +89,7 @@ def save_replay_buffer(self) -> None: """ Save the training buffer's update buffer to a pickle file. """ - filename = os.path.join( - self.trainer_settings.output_path, "last_replay_buffer.hdf5" - ) + filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5") logger.info("Saving Experience Replay Buffer to {}".format(filename)) with open(filename, "wb") as file_object: self.update_buffer.save_to_file(file_object) @@ -100,9 +98,7 @@ def load_replay_buffer(self) -> None: """ Loads the last saved replay buffer from a file. """ - filename = os.path.join( - self.trainer_settings.output_path, "last_replay_buffer.hdf5" - ) + filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5") logger.info("Loading Experience Replay Buffer from {}".format(filename)) with open(filename, "rb+") as file_object: self.update_buffer.load_from_file(file_object) @@ -196,6 +192,7 @@ def create_policy( brain_parameters, self.trainer_settings, self.is_training, + self.artifact_path, self.load, tanh_squash=True, reparameterize=True, diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index d3ea4205ca..9d171f370f 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -190,7 +190,6 @@ def _set_default_hyperparameters(self): factory=lambda: {RewardSignalType.EXTRINSIC: RewardSignalSettings()} ) init_path: Optional[str] = None - output_path: str = "default" keep_checkpoints: int = 5 checkpoint_interval: int = 500000 max_steps: int = 500000 diff --git a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py index 79006ded31..86ca749a49 100644 --- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py +++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py @@ -36,9 +36,13 @@ def test_barracuda_converter(): @pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"]) def test_policy_conversion(tmpdir, rnn, visual, discrete): tf.reset_default_graph() - dummy_config = TrainerSettings(output_path=os.path.join(tmpdir, "test")) + dummy_config = TrainerSettings() policy = create_policy_mock( - dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual + dummy_config, + use_rnn=rnn, + model_path=os.path.join(tmpdir, "test"), + use_discrete=discrete, + use_visual=visual, ) policy.save_model(1000) settings = SerializationSettings( diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index e67318897f..a0b971383d 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -20,7 +20,7 @@ def create_bc_module(mock_brain, bc_settings, use_rnn, tanhresample): NetworkSettings.MemorySettings() if use_rnn else None ) policy = NNPolicy( - 0, mock_brain, trainer_config, False, False, tanhresample, tanhresample + 0, mock_brain, trainer_config, False, "test", False, tanhresample, tanhresample ) with policy.graph.as_default(): bc_module = BCModule( diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py index 20462d459b..436454ec61 100644 --- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py @@ -29,6 +29,7 @@ def create_policy_mock( use_rnn: bool = False, use_discrete: bool = True, use_visual: bool = False, + model_path: str = "", load: bool = False, seed: int = 0, ) -> NNPolicy: @@ -45,15 +46,15 @@ def create_policy_mock( trainer_settings.network_settings.memory = ( NetworkSettings.MemorySettings() if use_rnn else None ) - policy = NNPolicy(seed, mock_brain, trainer_settings, False, load) + policy = NNPolicy(seed, mock_brain, trainer_settings, False, model_path, load) return policy def test_load_save(tmp_path): path1 = os.path.join(tmp_path, "runid1") path2 = os.path.join(tmp_path, "runid2") - trainer_params = TrainerSettings(output_path=path1) - policy = create_policy_mock(trainer_params) + trainer_params = TrainerSettings() + policy = create_policy_mock(trainer_params, model_path=path1) policy.initialize_or_load() policy._set_step(2000) policy.save_model(2000) @@ -61,7 +62,7 @@ def test_load_save(tmp_path): assert len(os.listdir(tmp_path)) > 0 # Try load from this path - policy2 = create_policy_mock(trainer_params, load=True, seed=1) + policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1) policy2.initialize_or_load() _compare_two_policies(policy, policy2) assert policy2.get_current_step() == 2000 @@ -69,7 +70,7 @@ def test_load_save(tmp_path): # Try initialize from path 1 trainer_params.output_path = path2 trainer_params.init_path = path1 - policy3 = create_policy_mock(trainer_params, load=False, seed=2) + policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2) policy3.initialize_or_load() _compare_two_policies(policy2, policy3) @@ -82,8 +83,8 @@ def test_version_compare(self): # Test write_stats with self.assertLogs("mlagents.trainers", level="WARNING") as cm: path1 = tempfile.mkdtemp() - trainer_params = TrainerSettings(output_path=path1) - policy = create_policy_mock(trainer_params) + trainer_params = TrainerSettings() + policy = create_policy_mock(trainer_params, model_path=path1) policy.initialize_or_load() policy._check_model_version( "0.0.0" @@ -152,6 +153,7 @@ def test_normalization(): brain_params, TrainerSettings(network_settings=NetworkSettings(normalize=True)), False, + "testdir", False, ) diff --git a/ml-agents/mlagents/trainers/tests/test_policy.py b/ml-agents/mlagents/trainers/tests/test_policy.py index f5d50646e4..2cebd4b135 100644 --- a/ml-agents/mlagents/trainers/tests/test_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_policy.py @@ -24,7 +24,7 @@ def get_trainable_variables(self): def test_take_action_returns_empty_with_no_agents(): test_seed = 3 - policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings()) + policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output") # Doesn't really matter what this is dummy_groupspec = BehaviorSpec([(1,)], "continuous", 1) no_agent_step = DecisionSteps.empty(dummy_groupspec) @@ -34,7 +34,7 @@ def test_take_action_returns_empty_with_no_agents(): def test_take_action_returns_nones_on_missing_values(): test_seed = 3 - policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings()) + policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output") policy.evaluate = MagicMock(return_value={}) policy.save_memories = MagicMock() step_with_agents = DecisionSteps( @@ -46,7 +46,7 @@ def test_take_action_returns_nones_on_missing_values(): def test_take_action_returns_action_info_when_available(): test_seed = 3 - policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings()) + policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output") policy_eval_out = { "action": np.array([1.0], dtype=np.float32), "memory_out": np.array([[2.5]], dtype=np.float32), diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index 11af4549a7..babb0e78c6 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -51,7 +51,7 @@ def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visu else None ) policy = NNPolicy( - 0, mock_brain, trainer_settings, False, False, create_tf_graph=False + 0, mock_brain, trainer_settings, False, "test", False, create_tf_graph=False ) optimizer = PPOOptimizer(policy, trainer_settings) return optimizer diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index d0b8f21038..d32769b0ab 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -69,7 +69,7 @@ def create_optimizer_mock( else None ) policy = NNPolicy( - 0, mock_brain, trainer_settings, False, False, create_tf_graph=False + 0, mock_brain, trainer_settings, False, "test", False, create_tf_graph=False ) if trainer_settings.trainer_type == TrainerType.SAC: optimizer = SACOptimizer(policy, trainer_settings) diff --git a/ml-agents/mlagents/trainers/tests/test_sac.py b/ml-agents/mlagents/trainers/tests/test_sac.py index 8b8d21bdbe..92cbda7a14 100644 --- a/ml-agents/mlagents/trainers/tests/test_sac.py +++ b/ml-agents/mlagents/trainers/tests/test_sac.py @@ -46,7 +46,7 @@ def create_sac_optimizer_mock(dummy_config, use_rnn, use_discrete, use_visual): else None ) policy = NNPolicy( - 0, mock_brain, trainer_settings, False, False, create_tf_graph=False + 0, mock_brain, trainer_settings, False, "test", False, create_tf_graph=False ) optimizer = SACOptimizer(policy, trainer_settings) return optimizer @@ -104,7 +104,9 @@ def test_sac_save_load_buffer(tmpdir, dummy_config): ) trainer_params = dummy_config trainer_params.hyperparameters.save_replay_buffer = True - trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0) + trainer = SACTrainer( + mock_brain.brain_name, 1, trainer_params, True, False, 0, "testdir" + ) policy = trainer.create_policy(mock_brain.brain_name, mock_brain) trainer.add_policy(mock_brain.brain_name, policy) @@ -113,7 +115,9 @@ def test_sac_save_load_buffer(tmpdir, dummy_config): trainer.save_model(mock_brain.brain_name) # Wipe Trainer and try to load - trainer2 = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, True, 0, 0) + trainer2 = SACTrainer( + mock_brain.brain_name, 1, trainer_params, True, True, 0, "testdir" + ) policy = trainer2.create_policy(mock_brain.brain_name, mock_brain) trainer2.add_policy(mock_brain.brain_name, policy) diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index 907e27a189..0407601957 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -123,7 +123,6 @@ def _check_environment_trains( env_manager = SimpleEnvManager(env, EnvironmentParametersChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, - run_id=run_id, output_path=dir, train_model=True, load_model=False, diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index fe7fa9cb65..ca10e1290e 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -22,7 +22,6 @@ def test_initialize_ppo_trainer(BrainParametersMock, dummy_config): brain_params_mock = BrainParametersMock() BrainParametersMock.return_value.brain_name = "testbrain" external_brains = {"testbrain": BrainParametersMock()} - run_id = "testrun" output_path = "results_dir" train_model = True load_model = False @@ -33,7 +32,14 @@ def test_initialize_ppo_trainer(BrainParametersMock, dummy_config): expected_config = PPO_CONFIG def mock_constructor( - self, brain, reward_buff_cap, trainer_settings, training, load, seed, run_id + self, + brain, + reward_buff_cap, + trainer_settings, + training, + load, + seed, + artifact_path, ): assert brain == brain_params_mock.brain_name assert trainer_settings == expected_config @@ -41,12 +47,11 @@ def mock_constructor( assert training == train_model assert load == load_model assert seed == seed - assert run_id == run_id + assert artifact_path == os.path.join(output_path, brain_name) with patch.object(PPOTrainer, "__init__", mock_constructor): trainer_factory = trainer_util.TrainerFactory( trainer_config=base_config, - run_id=run_id, output_path=output_path, train_model=train_model, load_model=load_model, @@ -77,7 +82,6 @@ def test_handles_no_config_provided(BrainParametersMock): trainer_factory = trainer_util.TrainerFactory( trainer_config=no_default_config, - run_id="testrun", output_path="output_path", train_model=True, load_model=False, diff --git a/ml-agents/mlagents/trainers/trainer/trainer.py b/ml-agents/mlagents/trainers/trainer/trainer.py index f243320280..868c4cdc7e 100644 --- a/ml-agents/mlagents/trainers/trainer/trainer.py +++ b/ml-agents/mlagents/trainers/trainer/trainer.py @@ -28,19 +28,18 @@ def __init__( brain_name: str, trainer_settings: TrainerSettings, training: bool, - run_id: str, + artifact_path: str, reward_buff_cap: int = 1, ): """ Responsible for collecting experiences and training a neural network model. :BrainParameters brain: Brain to be trained. - :dict trainer_settings: The parameters for the trainer (dictionary). - :bool training: Whether the trainer is set for training. - :str run_id: The identifier of the current run - :int reward_buff_cap: + :param trainer_settings: The parameters for the trainer (dictionary). + :param training: Whether the trainer is set for training. + :param artifact_path: The directory within which to store artifacts from this trainer + :param reward_buff_cap: """ self.brain_name = brain_name - self.run_id = run_id self.trainer_settings = trainer_settings self._threaded = trainer_settings.threaded self._stats_reporter = StatsReporter(brain_name) @@ -49,6 +48,7 @@ def __init__( self.policy_queues: List[AgentManagerQueue[Policy]] = [] self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = [] self.step: int = 0 + self.artifact_path = artifact_path self.summary_freq = self.trainer_settings.summary_freq @property diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 15e5955662..450116e9cf 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -20,7 +20,6 @@ class TrainerFactory: def __init__( self, trainer_config: Dict[str, TrainerSettings], - run_id: str, output_path: str, train_model: bool, load_model: bool, @@ -30,7 +29,6 @@ def __init__( multi_gpu: bool = False, ): self.trainer_config = trainer_config - self.run_id = run_id self.output_path = output_path self.init_path = init_path self.train_model = train_model @@ -44,7 +42,6 @@ def generate(self, brain_name: str) -> Trainer: return initialize_trainer( self.trainer_config[brain_name], brain_name, - self.run_id, self.output_path, self.train_model, self.load_model, @@ -59,7 +56,6 @@ def generate(self, brain_name: str) -> Trainer: def initialize_trainer( trainer_settings: TrainerSettings, brain_name: str, - run_id: str, output_path: str, train_model: bool, load_model: bool, @@ -75,7 +71,6 @@ def initialize_trainer( :param trainer_settings: Original trainer configuration loaded from YAML :param brain_name: Name of the brain to be associated with trainer - :param run_id: Run ID to associate with this training run :param output_path: Path to save the model and summary statistics :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) @@ -86,7 +81,7 @@ def initialize_trainer( :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :return: """ - trainer_settings.output_path = os.path.join(output_path, brain_name) + trainer_artifact_path = os.path.join(output_path, brain_name) if init_path is not None: trainer_settings.init_path = os.path.join(init_path, brain_name) @@ -113,7 +108,7 @@ def initialize_trainer( train_model, load_model, seed, - run_id, + trainer_artifact_path, ) elif trainer_type == TrainerType.SAC: trainer = SACTrainer( @@ -123,7 +118,7 @@ def initialize_trainer( train_model, load_model, seed, - run_id, + trainer_artifact_path, ) else: raise TrainerConfigError( @@ -138,7 +133,7 @@ def initialize_trainer( min_lesson_length, trainer_settings, train_model, - run_id, + trainer_artifact_path, ) return trainer