Fix lesson incrementing (#4279)

awjuliani · awjuliani · commit 549bea201370 · 2020-07-30T14:11:39.000-07:00
diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py
@@ -131,7 +131,7 @@ def update_lessons(
             lesson = settings.curriculum[lesson_num]
             if (
                 lesson.completion_criteria is not None
-                and len(settings.curriculum) > lesson_num
+                and len(settings.curriculum) > lesson_num + 1
             ):
                 behavior_to_consider = lesson.completion_criteria.behavior
                 if behavior_to_consider in trainer_steps:
diff --git a/ml-agents/mlagents/trainers/exception.py b/ml-agents/mlagents/trainers/exception.py
@@ -19,6 +19,14 @@ class TrainerConfigError(Exception):
     pass
 
 
+class TrainerConfigWarning(Warning):
+    """
+    Any warning related to the configuration of trainers in the ML-Agents Toolkit.
+    """
+
+    pass
+
+
 class CurriculumError(TrainerError):
     """
     Any error related to training with a curriculum.
diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py
@@ -1,3 +1,5 @@
+import warnings
+
 import attr
 import cattr
 from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple, Union
@@ -10,8 +12,7 @@
 
 from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser
 from mlagents.trainers.cli_utils import load_config
-from mlagents.trainers.exception import TrainerConfigError
-from mlagents.trainers.models import ScheduleType, EncoderType
+from mlagents.trainers.exception import TrainerConfigError, TrainerConfigWarning
 
 from mlagents_envs import logging_util
 from mlagents_envs.side_channel.environment_parameters_channel import (
@@ -51,6 +52,17 @@ def as_dict(self):
         return cattr.unstructure(self)
 
 
+class EncoderType(Enum):
+    SIMPLE = "simple"
+    NATURE_CNN = "nature_cnn"
+    RESNET = "resnet"
+
+
+class ScheduleType(Enum):
+    CONSTANT = "constant"
+    LINEAR = "linear"
+
+
 @attr.s(auto_attribs=True)
 class NetworkSettings:
     @attr.s
@@ -433,14 +445,20 @@ class EnvironmentParameterSettings:
     def _check_lesson_chain(lessons, parameter_name):
         """
         Ensures that when using curriculum, all non-terminal lessons have a valid
-        CompletionCriteria
+        CompletionCriteria, and that the terminal lesson does not contain a CompletionCriteria.
         """
         num_lessons = len(lessons)
         for index, lesson in enumerate(lessons):
             if index < num_lessons - 1 and lesson.completion_criteria is None:
                 raise TrainerConfigError(
                     f"A non-terminal lesson does not have a completion_criteria for {parameter_name}."
                 )
+            if index == num_lessons - 1 and lesson.completion_criteria is not None:
+                warnings.warn(
+                    f"Your final lesson definition contains completion_criteria for {parameter_name}."
+                    f"It will be ignored.",
+                    TrainerConfigWarning,
+                )
 
     @staticmethod
     def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]:
diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
@@ -2,7 +2,7 @@
 import yaml
 
 
-from mlagents.trainers.exception import TrainerConfigError
+from mlagents.trainers.exception import TrainerConfigError, TrainerConfigWarning
 from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
 from mlagents.trainers.settings import (
     RunOptions,
@@ -154,13 +154,75 @@ def test_curriculum_conversion():
 """
 
 
+test_bad_curriculum_all_competion_criteria_config_yaml = """
+environment_parameters:
+    param_1:
+      curriculum:
+          - name: Lesson1
+            completion_criteria:
+                measure: reward
+                behavior: fake_behavior
+                threshold: 30
+                min_lesson_length: 100
+                require_reset: true
+            value: 1
+          - name: Lesson2
+            completion_criteria:
+                measure: reward
+                behavior: fake_behavior
+                threshold: 30
+                min_lesson_length: 100
+                require_reset: true
+            value: 2
+          - name: Lesson3
+            completion_criteria:
+                measure: reward
+                behavior: fake_behavior
+                threshold: 30
+                min_lesson_length: 100
+                require_reset: true
+            value:
+                sampler_type: uniform
+                sampler_parameters:
+                    min_value: 1
+                    max_value: 3
+"""
+
+
 def test_curriculum_raises_no_completion_criteria_conversion():
     with pytest.raises(TrainerConfigError):
         RunOptions.from_dict(
             yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml)
         )
 
 
+def test_curriculum_raises_all_completion_criteria_conversion():
+    with pytest.warns(TrainerConfigWarning):
+        run_options = RunOptions.from_dict(
+            yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
+        )
+
+        param_manager = EnvironmentParameterManager(
+            run_options.environment_parameters, 1337, False
+        )
+        assert param_manager.update_lessons(
+            trainer_steps={"fake_behavior": 500},
+            trainer_max_steps={"fake_behavior": 1000},
+            trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        ) == (True, True)
+        assert param_manager.update_lessons(
+            trainer_steps={"fake_behavior": 500},
+            trainer_max_steps={"fake_behavior": 1000},
+            trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        ) == (True, True)
+        assert param_manager.update_lessons(
+            trainer_steps={"fake_behavior": 500},
+            trainer_max_steps={"fake_behavior": 1000},
+            trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        ) == (False, False)
+        assert param_manager.get_current_lesson_number() == {"param_1": 2}
+
+
 test_everything_config_yaml = """
 environment_parameters:
     param_1: