Skip to content

[refactor] Allow full RunOptions to be specified in trainer configuration YAML #3815

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 45 commits into from
Apr 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a2b4fb7
Move sampler config into main YAML
Apr 14, 2020
94c684c
Make CLI override YAML
Apr 14, 2020
54e9914
Bring back default functionality, curriculum loader
Apr 15, 2020
df4a358
Load curriculum from same YAML
Apr 15, 2020
3a84c13
Example WallJump curriculum
Apr 15, 2020
92c9682
New-format YAML files
Apr 15, 2020
9dcf38d
Fix walljump curriculum
Apr 16, 2020
a926d4c
Commit SAC parameters
Apr 16, 2020
419a156
Delete old configs and add gail
Apr 16, 2020
c80c359
Change some of the documentation
Apr 17, 2020
f020ecc
Merge master into develop-single-config
Apr 17, 2020
0fa8f8b
More doc updates
Apr 17, 2020
72b39f0
Fix Yamato test
Apr 17, 2020
0c89258
Fix learn.py test
Apr 17, 2020
b84396f
More docs updates
Apr 17, 2020
756a75f
Update migrating.md file
Apr 17, 2020
cb97315
Update changelog and improve migrating
Apr 17, 2020
7bb6366
Don't hard break trying to get curriculum out of bad config
Apr 17, 2020
e0b8c9c
Use behavior name instead of brain
Apr 17, 2020
8d37045
Fix yamato_utils
Apr 17, 2020
b20ab5d
Merge branch 'master' of github.com:Unity-Technologies/ml-agents into…
Apr 17, 2020
50eafc2
Delete curricula
Apr 17, 2020
cf920b6
Merge branch 'master' of github.com:Unity-Technologies/ml-agents into…
Apr 17, 2020
eb3df94
Make RunOptions and YAML compatible
Apr 20, 2020
690da4a
Use entire YAML as run options, and override via CLI
Apr 20, 2020
3c26e57
Add test
Apr 21, 2020
4171565
Rename walljump yaml SAC
Apr 21, 2020
4330c02
Fix newline formatting
Apr 21, 2020
41dd3f7
Merge branch 'master' into develop-single-config
Apr 22, 2020
75ad833
Update SAC configurations
Apr 22, 2020
5a75d7f
Edit Changelog
Apr 22, 2020
9ba2ef3
Fix learn.py tests
Apr 22, 2020
36c9591
Update strikers vs goalie and add Worm
Apr 23, 2020
79c8a6c
Merge branch 'master' into develop-single-config
Apr 23, 2020
850be42
Merge branch 'develop-single-config' into develop-default-overrides
Apr 23, 2020
7c0b901
Change name to run_options_dict
Apr 29, 2020
54929d5
use custom action to store config file
Apr 29, 2020
3568c2e
Merge branch 'master' into develop-single-config
Apr 29, 2020
4214745
remove leftover debug
Apr 29, 2020
4d27ed5
Use hard links in Migrating.md
Apr 29, 2020
d3c611a
Merge branch 'develop-single-config' into develop-default-overrides
Apr 29, 2020
27d2f10
Add capture framerate back into CLI options
Apr 29, 2020
d9757b4
Merge branch 'master' into develop-default-overrides
Apr 29, 2020
4ab60ce
Move CLI args back into learn.py
Apr 29, 2020
20b664f
Update CHANGELOG
Apr 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ you will need to change the signature of its `Write()` method. (#3834)
will allow use with python 3.8 using tensorflow 2.2.0rc3.
- `UnityRLCapabilities` was added to help inform users when RL features are mismatched between C# and Python packages. (#3831)
- Unity Player logs are now written out to the results directory. (#3877)
- Run configuration YAML files are written out to the results directory at the end of the run. (#3815)

### Bug Fixes

Expand Down
41 changes: 41 additions & 0 deletions ml-agents/mlagents/trainers/cli_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import Set
import argparse


class DetectDefault(argparse.Action):
"""
Internal custom Action to help detect arguments that aren't default.
"""

non_default_args: Set[str] = set()

def __call__(self, arg_parser, namespace, values, option_string=None):
setattr(namespace, self.dest, values)
DetectDefault.non_default_args.add(self.dest)


class DetectDefaultStoreTrue(DetectDefault):
"""
Internal class to help detect arguments that aren't default.
Used for store_true arguments.
"""

def __init__(self, nargs=0, **kwargs):
super().__init__(nargs=nargs, **kwargs)

def __call__(self, arg_parser, namespace, values, option_string=None):
super().__call__(arg_parser, namespace, True, option_string)


class StoreConfigFile(argparse.Action):
"""
Custom Action to store the config file location not as part of the CLI args.
This is because we want to maintain an equivalence between the config file's
contents and the args themselves.
"""

trainer_config_path: str

def __call__(self, arg_parser, namespace, values, option_string=None):
delattr(namespace, self.dest)
StoreConfigFile.trainer_config_path = values
74 changes: 52 additions & 22 deletions ml-agents/mlagents/trainers/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
GaugeWriter,
ConsoleWriter,
)
from mlagents.trainers.cli_utils import (
StoreConfigFile,
DetectDefault,
DetectDefaultStoreTrue,
)
from mlagents_envs.environment import UnityEnvironment
from mlagents.trainers.sampler_class import SamplerManager
from mlagents.trainers.exception import SamplerException, TrainerConfigError
Expand All @@ -48,18 +53,20 @@ def _create_parser():
argparser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
argparser.add_argument("trainer_config_path")
argparser.add_argument("trainer_config_path", action=StoreConfigFile)
argparser.add_argument(
"--env",
default=None,
dest="env_path",
help="Path to the Unity executable to train",
action=DetectDefault,
)
argparser.add_argument(
"--lesson",
default=0,
type=int,
help="The lesson to start with when performing curriculum training",
action=DetectDefault,
)
argparser.add_argument(
"--keep-checkpoints",
Expand All @@ -68,19 +75,20 @@ def _create_parser():
help="The maximum number of model checkpoints to keep. Checkpoints are saved after the"
"number of steps specified by the save-freq option. Once the maximum number of checkpoints"
"has been reached, the oldest checkpoint is deleted when saving a new checkpoint.",
action=DetectDefault,
)
argparser.add_argument(
"--load",
default=False,
dest="load_model",
action="store_true",
action=DetectDefaultStoreTrue,
help=argparse.SUPPRESS, # Deprecated but still usable for now.
)
argparser.add_argument(
"--resume",
default=False,
dest="resume",
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to resume training from a checkpoint. Specify a --run-id to use this option. "
"If set, the training code loads an already trained model to initialize the neural network "
"before resuming training. This option is only valid when the models exist, and have the same "
Expand All @@ -90,7 +98,7 @@ def _create_parser():
"--force",
default=False,
dest="force",
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to force-overwrite this run-id's existing summary and model data. (Without "
"this flag, attempting to train a model with a run-id that has been used before will throw "
"an error.",
Expand All @@ -103,6 +111,7 @@ def _create_parser():
"as the saved model itself. If you use TensorBoard to view the training statistics, "
"always set a unique run-id for each training run. (The statistics for all runs with the "
"same id are combined as if they were produced by a the same session.)",
action=DetectDefault,
)
argparser.add_argument(
"--initialize-from",
Expand All @@ -112,31 +121,34 @@ def _create_parser():
"This can be used, for instance, to fine-tune an existing model on a new environment. "
"Note that the previously saved models must have the same behavior parameters as your "
"current environment.",
action=DetectDefault,
)
argparser.add_argument(
"--save-freq",
default=50000,
type=int,
help="How often (in steps) to save the model during training",
action=DetectDefault,
)
argparser.add_argument(
"--seed",
default=-1,
type=int,
help="A number to use as a seed for the random number generator used by the training code",
action=DetectDefault,
)
argparser.add_argument(
"--train",
default=False,
dest="train_model",
action="store_true",
action=DetectDefaultStoreTrue,
help=argparse.SUPPRESS,
)
argparser.add_argument(
"--inference",
default=False,
dest="inference",
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to run in Python inference mode (i.e. no training). Use with --resume to load "
"a model trained with an existing run ID.",
)
Expand All @@ -149,25 +161,27 @@ def _create_parser():
"will use the port (base_port + worker_id), where the worker_id is sequential IDs given to "
"each instance from 0 to (num_envs - 1). Note that when training using the Editor rather "
"than an executable, the base port will be ignored.",
action=DetectDefault,
)
argparser.add_argument(
"--num-envs",
default=1,
type=int,
help="The number of concurrent Unity environment instances to collect experiences "
"from when training",
action=DetectDefault,
)
argparser.add_argument(
"--no-graphics",
default=False,
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to run the Unity executable in no-graphics mode (i.e. without initializing "
"the graphics driver. Use this only if your agents don't use visual observations.",
)
argparser.add_argument(
"--debug",
default=False,
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to enable debug-level logging for some parts of the code",
)
argparser.add_argument(
Expand All @@ -178,11 +192,12 @@ def _create_parser():
"process these as Unity Command Line Arguments. You should choose different argument names if "
"you want to create environment-specific arguments. All arguments after this flag will be "
"passed to the executable.",
action=DetectDefault,
)
argparser.add_argument(
"--cpu",
default=False,
action="store_true",
action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)

Expand All @@ -195,41 +210,47 @@ def _create_parser():
type=int,
help="The width of the executable window of the environment(s) in pixels "
"(ignored for editor training).",
action=DetectDefault,
)
eng_conf.add_argument(
"--height",
default=84,
type=int,
help="The height of the executable window of the environment(s) in pixels "
"(ignored for editor training)",
action=DetectDefault,
)
eng_conf.add_argument(
"--quality-level",
default=5,
type=int,
help="The quality level of the environment(s). Equivalent to calling "
"QualitySettings.SetQualityLevel in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--time-scale",
default=20,
type=float,
help="The time scale of the Unity environment(s). Equivalent to setting "
"Time.timeScale in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--target-frame-rate",
default=-1,
type=int,
help="The target frame rate of the Unity environment(s). Equivalent to setting "
"Application.targetFrameRate in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--capture-frame-rate",
default=60,
type=int,
help="The capture frame rate of the Unity environment(s). Equivalent to setting "
"Time.captureFramerate in Unity.",
action=DetectDefault,
)
return argparser

Expand Down Expand Up @@ -277,26 +298,35 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions":
configs loaded from files.
"""
argparse_args = vars(args)
config_path = argparse_args["trainer_config_path"]
# Load YAML and apply overrides as needed
run_options_dict = {}
run_options_dict.update(argparse_args)
config_path = StoreConfigFile.trainer_config_path

# Load YAML
yaml_config = load_config(config_path)
try:
argparse_args["behaviors"] = yaml_config["behaviors"]
except KeyError:
# This is the only option that is not optional and has no defaults.
if "behaviors" not in yaml_config:
raise TrainerConfigError(
"Trainer configurations not found. Make sure your YAML file has a section for behaviors."
)
# Use the YAML file values for all values not specified in the CLI.
for key, val in yaml_config.items():
# Detect bad config options
if not hasattr(RunOptions, key):
raise TrainerConfigError(
"The option {} was specified in your YAML file, but is invalid.".format(
key
)
)
if key not in DetectDefault.non_default_args:
run_options_dict[key] = val

argparse_args["parameter_randomization"] = yaml_config.get(
"parameter_randomization", None
)
# Keep deprecated --load working, TODO: remove
argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
# Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,
# these keys will need to be deleted to use the **/splat operator below.
argparse_args.pop("trainer_config_path")
run_options_dict["resume"] = (
run_options_dict["resume"] or run_options_dict["load_model"]
)

return RunOptions(**vars(args))
return RunOptions(**run_options_dict)


def get_version_string() -> str:
Expand Down
Loading