From e2cecce5ca9d0fb7bb9c6a12ca5e28b5037a0a24 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 2 Sep 2020 13:07:51 -0700 Subject: [PATCH 1/7] Enable default settings for TrainerSettings --- ml-agents/mlagents/trainers/settings.py | 43 ++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 2a1f27b391..aaeefb3f57 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -2,13 +2,24 @@ import attr import cattr -from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple, Union +from typing import ( + Dict, + Optional, + List, + Any, + DefaultDict, + Mapping, + Tuple, + Union, + ClassVar, +) from enum import Enum import collections import argparse import abc import numpy as np import math +import copy from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser from mlagents.trainers.cli_utils import load_config @@ -539,6 +550,7 @@ class FrameworkType(Enum): @attr.s(auto_attribs=True) class TrainerSettings(ExportableSettings): + default_override: ClassVar[Optional["TrainerSettings"]] = None trainer_type: TrainerType = TrainerType.PPO hyperparameters: HyperparamSettings = attr.ib() @@ -578,8 +590,8 @@ def _check_batch_size_seq_length(self, attribute, value): @staticmethod def dict_to_defaultdict(d: Dict, t: type) -> DefaultDict: - return collections.defaultdict( - TrainerSettings, cattr.structure(d, Dict[str, TrainerSettings]) + return TrainerSettings.DefaultTrainerDict( + cattr.structure(d, Dict[str, TrainerSettings]) ) @staticmethod @@ -588,9 +600,14 @@ def structure(d: Mapping, t: type) -> Any: Helper method to structure a TrainerSettings class. Meant to be registered with cattr.register_structure_hook() and called with cattr.structure(). """ + if not isinstance(d, Mapping): raise TrainerConfigError(f"Unsupported config {d} for {t.__name__}.") + d_copy: Dict[str, Any] = {} + if TrainerSettings.default_override is not None: + d_copy.update(cattr.unstructure(TrainerSettings.default_override)) + d_copy.update(d) for key, val in d_copy.items(): @@ -613,6 +630,16 @@ def structure(d: Mapping, t: type) -> Any: d_copy[key] = check_and_structure(key, val, t) return t(**d_copy) + class DefaultTrainerDict(collections.defaultdict): + def __init__(self, *args): + super().__init__(TrainerSettings, *args) + + def __missing__(self, key: Any) -> "TrainerSettings": + if TrainerSettings.default_override is not None: + return copy.deepcopy(TrainerSettings.default_override) + else: + return TrainerSettings() + # COMMAND LINE ######################################################################### @attr.s(auto_attribs=True) @@ -653,8 +680,9 @@ class EngineSettings: @attr.s(auto_attribs=True) class RunOptions(ExportableSettings): + default_settings: Optional[TrainerSettings] = None behaviors: DefaultDict[str, TrainerSettings] = attr.ib( - factory=lambda: collections.defaultdict(TrainerSettings) + factory=lambda: TrainerSettings.DefaultTrainerDict ) env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings) engine_settings: EngineSettings = attr.ib(factory=EngineSettings) @@ -714,6 +742,13 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": key ) ) + + # If a default settings was specified, set the TrainerSettings override + if "default_settings" in configured_dict.keys(): + TrainerSettings.default_override = cattr.structure( + configured_dict["default_settings"], TrainerSettings + ) + # Override with CLI args # Keep deprecated --load working, TODO: remove argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"] From f6b100efe5030c5aa4d6606daa669bafacdebaa8 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 2 Sep 2020 13:13:24 -0700 Subject: [PATCH 2/7] Improve comments --- ml-agents/mlagents/trainers/settings.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index aaeefb3f57..e8e2a0092c 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -605,6 +605,9 @@ def structure(d: Mapping, t: type) -> Any: raise TrainerConfigError(f"Unsupported config {d} for {t.__name__}.") d_copy: Dict[str, Any] = {} + + # Check if a default_settings was specified. If so, used those as the default + # rather than an empty dict. if TrainerSettings.default_override is not None: d_copy.update(cattr.unstructure(TrainerSettings.default_override)) @@ -743,7 +746,7 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": ) ) - # If a default settings was specified, set the TrainerSettings override + # If a default settings was specified, set the TrainerSettings class override if "default_settings" in configured_dict.keys(): TrainerSettings.default_override = cattr.structure( configured_dict["default_settings"], TrainerSettings From 18a2329191f72b70152f896b9de1278f2a3f53ab Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 2 Sep 2020 15:58:33 -0700 Subject: [PATCH 3/7] Fix bugs and add tests --- ml-agents/mlagents/trainers/settings.py | 31 +++++++++++++----- .../mlagents/trainers/tests/test_settings.py | 32 +++++++++++++++++++ 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index e8e2a0092c..062e9e9d6e 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -57,6 +57,17 @@ def defaultdict_to_dict(d: DefaultDict) -> Dict: return {key: cattr.unstructure(val) for key, val in d.items()} +def deep_update_dict(d: Dict, update_d: Mapping) -> None: + """ + Similar to dict.update(), but works for nested dicts of dicts as well. + """ + for key, val in update_d.items(): + if key in d and isinstance(d[key], Mapping) and isinstance(val, Mapping): + deep_update_dict(d[key], val) + else: + d[key] = val + + class SerializationSettings: convert_to_barracuda = True convert_to_onnx = True @@ -611,7 +622,7 @@ def structure(d: Mapping, t: type) -> Any: if TrainerSettings.default_override is not None: d_copy.update(cattr.unstructure(TrainerSettings.default_override)) - d_copy.update(d) + deep_update_dict(d_copy, d) for key, val in d_copy.items(): if attr.has(type(val)): @@ -685,7 +696,7 @@ class EngineSettings: class RunOptions(ExportableSettings): default_settings: Optional[TrainerSettings] = None behaviors: DefaultDict[str, TrainerSettings] = attr.ib( - factory=lambda: TrainerSettings.DefaultTrainerDict + factory=TrainerSettings.DefaultTrainerDict ) env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings) engine_settings: EngineSettings = attr.ib(factory=EngineSettings) @@ -709,7 +720,9 @@ class RunOptions(ExportableSettings): cattr.register_unstructure_hook( ParameterRandomizationSettings, ParameterRandomizationSettings.unstructure ) + cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) + cattr.register_structure_hook( DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict ) @@ -746,12 +759,6 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": ) ) - # If a default settings was specified, set the TrainerSettings class override - if "default_settings" in configured_dict.keys(): - TrainerSettings.default_override = cattr.structure( - configured_dict["default_settings"], TrainerSettings - ) - # Override with CLI args # Keep deprecated --load working, TODO: remove argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"] @@ -771,4 +778,12 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": @staticmethod def from_dict(options_dict: Dict[str, Any]) -> "RunOptions": + # If a default settings was specified, set the TrainerSettings class override + if ( + "default_settings" in options_dict.keys() + and options_dict["default_settings"] is not None + ): + TrainerSettings.default_override = cattr.structure( + options_dict["default_settings"], TrainerSettings + ) return cattr.structure(options_dict, RunOptions) diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 2a9393c1b3..d4cfb1d1b9 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -1,4 +1,5 @@ import attr +import cattr import pytest import yaml @@ -20,6 +21,7 @@ GaussianSettings, MultiRangeUniformSettings, TrainerType, + deep_update_dict, strict_to_cls, ) from mlagents.trainers.exception import TrainerConfigError @@ -104,6 +106,14 @@ class TestAttrsClass: strict_to_cls("non_dict_input", TestAttrsClass) +def test_deep_update_dict(): + dict1 = {"a": 1, "b": 2, "c": {"d": 3}} + dict2 = {"a": 2, "c": {"d": 4, "e": 5}} + + deep_update_dict(dict1, dict2) + assert dict1 == {"a": 2, "b": 2, "c": {"d": 4, "e": 5}} + + def test_trainersettings_structure(): """ Test structuring method for TrainerSettings @@ -468,3 +478,25 @@ def test_environment_settings(): # Multiple environments with no env_path is an error with pytest.raises(ValueError): EnvironmentSettings(num_envs=2) + + +def test_default_settings(): + # Make default settings, one nested and one not. + default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}} + behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}} + run_options_dict = {"default_settings": default_settings, "behaviors": behaviors} + run_options = RunOptions.from_dict(run_options_dict) + + # Check that a new behavior has the default settings + default_settings_cls = cattr.structure(default_settings, TrainerSettings) + check_if_different(default_settings_cls, run_options.behaviors["test2"]) + + # Check that an existing beehavior overrides the defaults in specified fields + test1_settings = run_options.behaviors["test1"] + assert test1_settings.max_steps == 2 + assert test1_settings.network_settings.hidden_units == 2000 + assert test1_settings.network_settings.num_layers == 1000 + # Change the overridden fields back, and check if the rest are equal. + test1_settings.max_steps = 1 + test1_settings.network_settings.hidden_units == default_settings_cls.network_settings.hidden_units + check_if_different(test1_settings, default_settings_cls) From 508a8ed48fedac22d327e54ac0d0035079738e57 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 2 Sep 2020 16:01:07 -0700 Subject: [PATCH 4/7] Remove unneccessary changes --- ml-agents/mlagents/trainers/settings.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 062e9e9d6e..8863944ca5 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -720,9 +720,7 @@ class RunOptions(ExportableSettings): cattr.register_unstructure_hook( ParameterRandomizationSettings, ParameterRandomizationSettings.unstructure ) - cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) - cattr.register_structure_hook( DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict ) @@ -758,7 +756,6 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": key ) ) - # Override with CLI args # Keep deprecated --load working, TODO: remove argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"] From 7a057e1729313009fcd6750b6aff170b24ee7ea0 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 2 Sep 2020 16:11:21 -0700 Subject: [PATCH 5/7] Update docs --- docs/Training-ML-Agents.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 5327db5e49..5152a91adc 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -337,6 +337,24 @@ each of these parameters mean and provide guidelines on how to set them. See description of all the configurations listed above, along with their defaults. Unless otherwise specified, omitting a configuration will revert it to its default. +### Default Behavior Settings + +In some cases, you may want to specify a set of default configurations for your Behaviors. +This may be useful, for instance, if your Behavior names are generated procedurally by +the environment and not known before runtime, or if you have many Behaviors with very similar +settings. To specify a default configuraton, insert a `default_settings` section in your YAML. +This section should be formatted exactly like a configuration for a Behavior. + +```yaml +default_settings: + # < Same as Behavior configuration > +behaviors: + # < Same as above > +``` + +Behaviors found in the environment that aren't secified in the YAML will now use the `default_settings`, +and unspecified settings in behavior configurations will default to the values in `default_settings` if +specified there. ### Environment Parameters From ea0b063d6d18b3deeb45badb60862dea557d32f6 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 2 Sep 2020 16:11:28 -0700 Subject: [PATCH 6/7] Update changelog --- com.unity.ml-agents/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 820b9bb428..09c5cd6aa0 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -24,6 +24,8 @@ Note that PyTorch 1.6.0 or greater should be installed to use this feature; see - The minimum supported version of TensorFlow was increased to 1.14.0. (#4411) - A CNN (`vis_encode_type: match3`) for smaller grids, e.g. board games, has been added. (#4434) +- You can now again specify a default configuration for your behaviors. Specify `default_settings` in +your trainer configuration to do so. (#4448) ### Bug Fixes #### com.unity.ml-agents (C#) From a698e00b3a92b73803521b6c1cd87acc66aa19f5 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Thu, 3 Sep 2020 14:48:21 -0700 Subject: [PATCH 7/7] spelling correction --- docs/Training-ML-Agents.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 5152a91adc..bc699fc380 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -352,7 +352,7 @@ behaviors: # < Same as above > ``` -Behaviors found in the environment that aren't secified in the YAML will now use the `default_settings`, +Behaviors found in the environment that aren't specified in the YAML will now use the `default_settings`, and unspecified settings in behavior configurations will default to the values in `default_settings` if specified there.