[refactor] Store and restore state along with checkpoints (#4025)

Ervin T · web-flow · commit 5d02292ad889 · 2020-06-02T18:11:48.000-07:00
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -17,18 +17,20 @@ and this project adheres to
 - `use_visual` and `allow_multiple_visual_obs` in the `UnityToGymWrapper` constructor
 were replaced by `allow_multiple_obs` which allows one or more visual observations and
 vector observations to be used simultaneously. (#3981) Thank you @shakenes !
-### Minor Changes
-#### com.unity.ml-agents (C#)
-- `ObservableAttribute` was added. Adding the attribute to fields or properties on an Agent will allow it to generate
-  observations via reflection. (#3925, #4006)
-#### ml-agents / ml-agents-envs / gym-unity (Python)
 - Curriculum and Parameter Randomization configurations have been merged
   into the main training configuration file. Note that this means training
   configuration files are now environment-specific. (#3791)
 - The format for trainer configuration has changed, and the "default" behavior has been deprecated.
   See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Migrating.md) for more details. (#3936)
 - Training artifacts (trained models, summaries) are now found in the `results/`
   directory. (#3829)
+- When using Curriculum, the current lesson will resume if training is quit and resumed. As such,
+  the `--lesson` CLI option has been removed. (#4025)
+### Minor Changes
+#### com.unity.ml-agents (C#)
+- `ObservableAttribute` was added. Adding the attribute to fields or properties on an Agent will allow it to generate
+  observations via reflection. (#3925, #4006)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
 - Unity Player logs are now written out to the results directory. (#3877)
 - Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
 - When trying to load/resume from a checkpoint created with an earlier verison of ML-Agents,
diff --git a/docs/Migrating.md b/docs/Migrating.md
@@ -28,6 +28,8 @@ double-check that the versions are in the same. The versions can be found in
 - `use_visual` and `allow_multiple_visual_obs` in the `UnityToGymWrapper` constructor
 were replaced by `allow_multiple_obs` which allows one or more visual observations and
 vector observations to be used simultaneously.
+- `--lesson` has been removed from the CLI. Lessons will resume when using `--resume`.
+  To start at a different lesson, modify your Curriculum configuration.
 
 ### Steps to Migrate
 - To upgrade your configuration files, an upgrade script has been provided. Run `python config/update_config.py
diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
@@ -420,11 +420,9 @@ train agents in the Wall Jump environment with curriculum learning, we can run:
 mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum
 ```
 
-We can then keep track of the current lessons and progresses via TensorBoard.
-
-**Note**: If you are resuming a training session that uses curriculum, please
-pass the number of the last-reached lesson using the `--lesson` flag when
-running `mlagents-learn`.
+We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated
+the run, you can resume it using `--resume` and lesson progress will start off where it
+ended.
 
 ### Environment Parameter Randomization
 
diff --git a/ml-agents/README.md b/ml-agents/README.md
@@ -31,5 +31,3 @@ scene with the ML-Agents SDK, check out the main
   cooperative behavior among different agents is not stable.
 - Resuming self-play from a checkpoint resets the reported ELO to the default
   value.
-- Resuming curriculum learning from a checkpoint requires the last lesson be
-  specified using the `--lesson` CLI option
diff --git a/ml-agents/mlagents/trainers/cli_utils.py b/ml-agents/mlagents/trainers/cli_utils.py
@@ -59,13 +59,6 @@ def _create_parser() -> argparse.ArgumentParser:
         help="Path to the Unity executable to train",
         action=DetectDefault,
     )
-    argparser.add_argument(
-        "--lesson",
-        default=0,
-        type=int,
-        help="The lesson to start with when performing curriculum training",
-        action=DetectDefault,
-    )
     argparser.add_argument(
         "--load",
         default=False,
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
@@ -25,6 +25,7 @@
 from mlagents.trainers.sampler_class import SamplerManager
 from mlagents.trainers.exception import SamplerException
 from mlagents.trainers.settings import RunOptions
+from mlagents.trainers.training_status import GlobalTrainingStatus
 from mlagents_envs.base_env import BaseEnv
 from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
 from mlagents_envs.side_channel.side_channel import SideChannel
@@ -38,6 +39,8 @@
 
 logger = logging_util.get_logger(__name__)
 
+TRAINING_STATUS_FILE_NAME = "training_status.json"
+
 
 def get_version_string() -> str:
     # pylint: disable=no-member
@@ -82,6 +85,11 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         )
         # Make run logs directory
         os.makedirs(run_logs_dir, exist_ok=True)
+        # Load any needed states
+        if checkpoint_settings.resume:
+            GlobalTrainingStatus.load_state(
+                os.path.join(run_logs_dir, "training_status.json")
+            )
         # Configure CSV, Tensorboard Writers and StatsReporter
         # We assume reward and episode length are needed in the CSV.
         csv_writer = CSVWriter(
@@ -123,7 +131,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
             env_factory, engine_config, env_settings.num_envs
         )
         maybe_meta_curriculum = try_create_meta_curriculum(
-            options.curriculum, env_manager, checkpoint_settings.lesson
+            options.curriculum, env_manager, restore=checkpoint_settings.resume
         )
         sampler_manager, resampling_interval = create_sampler_manager(
             options.parameter_randomization, run_seed
@@ -159,6 +167,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
         env_manager.close()
         write_run_options(write_path, options)
         write_timing_tree(run_logs_dir)
+        write_training_status(run_logs_dir)
 
 
 def write_run_options(output_dir: str, run_options: RunOptions) -> None:
@@ -175,6 +184,10 @@ def write_run_options(output_dir: str, run_options: RunOptions) -> None:
         )
 
 
+def write_training_status(output_dir: str) -> None:
+    GlobalTrainingStatus.save_state(os.path.join(output_dir, TRAINING_STATUS_FILE_NAME))
+
+
 def write_timing_tree(output_dir: str) -> None:
     timing_path = os.path.join(output_dir, "timers.json")
     try:
@@ -209,15 +222,14 @@ def create_sampler_manager(sampler_config, run_seed=None):
 
 
 def try_create_meta_curriculum(
-    curriculum_config: Optional[Dict], env: SubprocessEnvManager, lesson: int
+    curriculum_config: Optional[Dict], env: SubprocessEnvManager, restore: bool = False
 ) -> Optional[MetaCurriculum]:
     if curriculum_config is None or len(curriculum_config) <= 0:
         return None
     else:
         meta_curriculum = MetaCurriculum(curriculum_config)
-        # TODO: Should be able to start learning at different lesson numbers
-        # for each curriculum.
-        meta_curriculum.set_all_curricula_to_lesson_num(lesson)
+        if restore:
+            meta_curriculum.try_restore_all_curriculum()
         return meta_curriculum
 
 
diff --git a/ml-agents/mlagents/trainers/meta_curriculum.py b/ml-agents/mlagents/trainers/meta_curriculum.py
@@ -3,6 +3,7 @@
 from typing import Dict, Set
 from mlagents.trainers.curriculum import Curriculum
 from mlagents.trainers.settings import CurriculumSettings
+from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType
 
 from mlagents_envs.logging_util import get_logger
 
@@ -115,16 +116,22 @@ def increment_lessons(self, measure_vals, reward_buff_sizes=None):
                 )
         return ret
 
-    def set_all_curricula_to_lesson_num(self, lesson_num):
-        """Sets all the curricula in this meta curriculum to a specified
-        lesson number.
-
-        Args:
-            lesson_num (int): The lesson number which all the curricula will
-                be set to.
+    def try_restore_all_curriculum(self):
         """
-        for _, curriculum in self.brains_to_curricula.items():
-            curriculum.lesson_num = lesson_num
+        Tries to restore all the curriculums to what is saved in training_status.json
+        """
+
+        for brain_name, curriculum in self.brains_to_curricula.items():
+            lesson_num = GlobalTrainingStatus.get_parameter_state(
+                brain_name, StatusType.LESSON_NUM
+            )
+            if lesson_num is not None:
+                logger.info(
+                    f"Resuming curriculum for {brain_name} at lesson {lesson_num}."
+                )
+                curriculum.lesson_num = lesson_num
+            else:
+                curriculum.lesson_num = 0
 
     def get_config(self):
         """Get the combined configuration of all curricula in this
diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py
@@ -275,7 +275,6 @@ class CheckpointSettings:
     force: bool = parser.get_default("force")
     train_model: bool = parser.get_default("train_model")
     inference: bool = parser.get_default("inference")
-    lesson: int = parser.get_default("lesson")
 
 
 @attr.s(auto_attribs=True)
diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -31,7 +31,6 @@ def basic_options(extra_args=None):
         base_port: 4001
         seed: 9870
     checkpoint_settings:
-        lesson: 2
         run_id: uselessrun
         save_freq: 654321
     debug: false
@@ -120,7 +119,6 @@ def test_commandline_args(mock_file):
     assert opt.behaviors == {}
     assert opt.env_settings.env_path is None
     assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 0
     assert opt.checkpoint_settings.resume is False
     assert opt.checkpoint_settings.inference is False
     assert opt.checkpoint_settings.run_id == "ppo"
@@ -135,7 +133,6 @@ def test_commandline_args(mock_file):
     full_args = [
         "mytrainerpath",
         "--env=./myenvfile",
-        "--lesson=3",
         "--resume",
         "--inference",
         "--run-id=myawesomerun",
@@ -152,7 +149,6 @@ def test_commandline_args(mock_file):
     assert opt.behaviors == {}
     assert opt.env_settings.env_path == "./myenvfile"
     assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 3
     assert opt.checkpoint_settings.run_id == "myawesomerun"
     assert opt.checkpoint_settings.save_freq == 123456
     assert opt.env_settings.seed == 7890
@@ -172,7 +168,6 @@ def test_yaml_args(mock_file):
     assert opt.behaviors == {}
     assert opt.env_settings.env_path == "./oldenvfile"
     assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 2
     assert opt.checkpoint_settings.run_id == "uselessrun"
     assert opt.checkpoint_settings.save_freq == 654321
     assert opt.env_settings.seed == 9870
@@ -185,7 +180,6 @@ def test_yaml_args(mock_file):
     full_args = [
         "mytrainerpath",
         "--env=./myenvfile",
-        "--lesson=3",
         "--resume",
         "--inference",
         "--run-id=myawesomerun",
@@ -202,7 +196,6 @@ def test_yaml_args(mock_file):
     assert opt.behaviors == {}
     assert opt.env_settings.env_path == "./myenvfile"
     assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 3
     assert opt.checkpoint_settings.run_id == "myawesomerun"
     assert opt.checkpoint_settings.save_freq == 123456
     assert opt.env_settings.seed == 7890
diff --git a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
@@ -1,5 +1,5 @@
 import pytest
-from unittest.mock import patch, Mock
+from unittest.mock import patch, Mock, call
 
 from mlagents.trainers.meta_curriculum import MetaCurriculum
 
@@ -11,6 +11,7 @@
 )
 from mlagents.trainers.tests.test_curriculum import dummy_curriculum_config
 from mlagents.trainers.settings import CurriculumSettings
+from mlagents.trainers.training_status import StatusType
 
 
 @pytest.fixture
@@ -77,14 +78,26 @@ def test_increment_lessons_with_reward_buff_sizes(
     curriculum_b.increment_lesson.assert_not_called()
 
 
-def test_set_all_curriculums_to_lesson_num():
+@patch("mlagents.trainers.meta_curriculum.GlobalTrainingStatus")
+def test_restore_curriculums(mock_trainingstatus):
     meta_curriculum = MetaCurriculum(test_meta_curriculum_config)
-
-    meta_curriculum.set_all_curricula_to_lesson_num(2)
-
+    # Test restore to value
+    mock_trainingstatus.get_parameter_state.return_value = 2
+    meta_curriculum.try_restore_all_curriculum()
+    mock_trainingstatus.get_parameter_state.assert_has_calls(
+        [call("Brain1", StatusType.LESSON_NUM), call("Brain2", StatusType.LESSON_NUM)],
+        any_order=True,
+    )
     assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 2
     assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 2
 
+    # Test restore to None
+    mock_trainingstatus.get_parameter_state.return_value = None
+    meta_curriculum.try_restore_all_curriculum()
+
+    assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 0
+    assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 0
+
 
 def test_get_config():
     meta_curriculum = MetaCurriculum(test_meta_curriculum_config)
diff --git a/ml-agents/mlagents/trainers/tests/test_training_status.py b/ml-agents/mlagents/trainers/tests/test_training_status.py
@@ -0,0 +1,60 @@
+import os
+import unittest
+import json
+from enum import Enum
+
+from mlagents.trainers.training_status import (
+    StatusType,
+    StatusMetaData,
+    GlobalTrainingStatus,
+)
+
+
+def test_globaltrainingstatus(tmpdir):
+    path_dir = os.path.join(tmpdir, "test.json")
+
+    GlobalTrainingStatus.set_parameter_state("Category1", StatusType.LESSON_NUM, 3)
+    GlobalTrainingStatus.save_state(path_dir)
+
+    with open(path_dir, "r") as fp:
+        test_json = json.load(fp)
+
+    assert "Category1" in test_json
+    assert StatusType.LESSON_NUM.value in test_json["Category1"]
+    assert test_json["Category1"][StatusType.LESSON_NUM.value] == 3
+    assert "metadata" in test_json
+
+    GlobalTrainingStatus.load_state(path_dir)
+    restored_val = GlobalTrainingStatus.get_parameter_state(
+        "Category1", StatusType.LESSON_NUM
+    )
+    assert restored_val == 3
+
+    # Test unknown categories and status types (keys)
+    unknown_category = GlobalTrainingStatus.get_parameter_state(
+        "Category3", StatusType.LESSON_NUM
+    )
+
+    class FakeStatusType(Enum):
+        NOTAREALKEY = "notarealkey"
+
+    unknown_key = GlobalTrainingStatus.get_parameter_state(
+        "Category1", FakeStatusType.NOTAREALKEY
+    )
+    assert unknown_category is None
+    assert unknown_key is None
+
+
+class StatsMetaDataTest(unittest.TestCase):
+    def test_metadata_compare(self):
+        # Test write_stats
+        with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
+            default_metadata = StatusMetaData()
+            version_statsmetadata = StatusMetaData(mlagents_version="test")
+            default_metadata.check_compatibility(version_statsmetadata)
+
+            tf_version_statsmetadata = StatusMetaData(tensorflow_version="test")
+            default_metadata.check_compatibility(tf_version_statsmetadata)
+
+        # Assert that 2 warnings have been thrown
+        assert len(cm.output) == 2
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -30,6 +30,7 @@
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManager
 from mlagents.trainers.settings import CurriculumSettings
+from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType
 
 
 class TrainerController(object):
@@ -310,6 +311,9 @@ def advance(self, env: EnvManager) -> int:
                     self.trainers[brain_name].stats_reporter.set_stat(
                         "Environment/Lesson", curr.lesson_num
                     )
+                    GlobalTrainingStatus.set_parameter_state(
+                        brain_name, StatusType.LESSON_NUM, curr.lesson_num
+                    )
 
         for trainer in self.trainers.values():
             if not trainer.threaded:
diff --git a/ml-agents/mlagents/trainers/training_status.py b/ml-agents/mlagents/trainers/training_status.py