From 8d0244b4cda2a37cc830b1952a29957de40c23ae Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Mon, 1 Jun 2020 11:58:57 -0700 Subject: [PATCH 01/44] passing sampler configs to c# --- .../Examples/3DBall/Scripts/Ball3DAgent.cs | 6 +++++ .../EnvironmentParametersChannel.cs | 27 ++++++++++++++++++- .../environment_parameters_channel.py | 15 +++++++++++ ml-agents/mlagents/trainers/learn.py | 17 ++++++++++++ .../trainers/subprocess_env_manager.py | 5 +++- 5 files changed, 68 insertions(+), 2 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs index 7487ec9951..77cf89fb50 100644 --- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs +++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs @@ -13,6 +13,12 @@ public override void Initialize() { m_BallRb = ball.GetComponent(); m_ResetParams = Academy.Instance.EnvironmentParameters; + var samplerType = m_ResetParams.GetWithDefault("mass-sampler-type", -1.0f); + var min = m_ResetParams.GetWithDefault("mass-min", -1.0f); + var max = m_ResetParams.GetWithDefault("mass-max", -1.0f); + Debug.Log(samplerType); + Debug.Log(min); + Debug.Log(max); SetResetParameters(); } diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 5c9fd509b6..2c98bc347f 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -9,7 +9,8 @@ namespace Unity.MLAgents.SideChannels /// internal enum EnvironmentDataTypes { - Float = 0 + Float = 0, + Sampler = 1 } /// @@ -48,6 +49,16 @@ protected override void OnMessageReceived(IncomingMessage msg) m_RegisteredActions.TryGetValue(key, out action); action?.Invoke(value); } + else if ((int)EnvironmentDataTypes.Sampler == type) + { + var samplerType = msg.ReadFloat32(); + var statOne = msg.ReadFloat32(); + var statTwo = msg.ReadFloat32(); + + m_Parameters[key+"-sampler-type"] = samplerType; + m_Parameters[key+"-min"] = statOne; + m_Parameters[key+"-max"] = statTwo; + } else { Debug.LogWarning("EnvironmentParametersChannel received an unknown data type."); @@ -68,6 +79,20 @@ public float GetWithDefault(string key, float defaultValue) return hasKey ? valueOut : defaultValue; } + /// + /// Returns the parameter value associated with the provided key. Returns the default + /// value if one doesn't exist. + /// + /// Parameter key. + /// Default value to return. + /// + public float GetListWithDefault(string key, float defaultValue) + { + float valueOut; + bool hasKey = m_Parameters.TryGetValue(key, out valueOut); + return hasKey ? valueOut : defaultValue; + } + /// /// Registers a callback for the associated parameter key. Will overwrite any existing /// actions for this parameter key. diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 958364b675..9770dd0b74 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -2,6 +2,7 @@ from mlagents_envs.exception import UnityCommunicationException import uuid from enum import IntEnum +from typing import List class EnvironmentParametersChannel(SideChannel): @@ -13,6 +14,7 @@ class EnvironmentParametersChannel(SideChannel): class EnvironmentDataTypes(IntEnum): FLOAT = 0 + SAMPLER = 1 def __init__(self) -> None: channel_id = uuid.UUID(("534c891e-810f-11ea-a9d0-822485860400")) @@ -35,3 +37,16 @@ def set_float_parameter(self, key: str, value: float) -> None: msg.write_int32(self.EnvironmentDataTypes.FLOAT) msg.write_float32(value) super().queue_message_to_send(msg) + + def set_sampler_parameters(self, key: str, values: List[float]) -> None: + """ + Sets a float environment parameter in the Unity Environment. + :param key: The string identifier of the parameter. + :param value: The float value of the parameter. + """ + msg = OutgoingMessage() + msg.write_string(key) + msg.write_int32(self.EnvironmentDataTypes.SAMPLER) + for value in values: + msg.write_float32(value) + super().queue_message_to_send(msg) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 121ccf5c9c..75029d7946 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -125,9 +125,12 @@ def run_training(run_seed: int, options: RunOptions) -> None: maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, checkpoint_settings.lesson ) + maybe_add_samplers(options.parameter_randomization, env_manager) + sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed ) + trainer_factory = TrainerFactory( options.behaviors, checkpoint_settings.run_id, @@ -186,6 +189,20 @@ def write_timing_tree(output_dir: str) -> None: ) +def maybe_add_samplers(sampler_config, env): + restructured_sampler_config: Dict[str, List[float]] = {} + if sampler_config is not None: + for v, config in sampler_config.items(): + if v != "resampling-interval": + sampler_type = 0.0 if config["sampler-type"] == "uniform" else 1.0 + restructured_sampler_config[v] = [ + sampler_type, + config["min_value"], + config["max_value"], + ] + env.reset(config=restructured_sampler_config) + + def create_sampler_manager(sampler_config, run_seed=None): resample_interval = None if sampler_config is not None: diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 0687cbdf14..f87df51fce 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -175,7 +175,10 @@ def external_brains(): _send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains()) elif req.cmd == EnvironmentCommand.RESET: for k, v in req.payload.items(): - env_parameters.set_float_parameter(k, v) + if isinstance(v, float): + env_parameters.set_float_parameter(k, v) + elif isinstance(v, list): + env_parameters.set_sampler_parameters(k, v) env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) From eb0c495548a84b8b2dc6239e097f3b3f662f1e90 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 3 Jun 2020 11:46:53 -0700 Subject: [PATCH 02/44] ignoring commit checks --- .../Examples/3DBall/Scripts/Ball3DAgent.cs | 11 +-- .../Runtime/EnvironmentParameters.cs | 5 ++ com.unity.ml-agents/Runtime/Sampler.cs | 74 +++++++++++++++++++ com.unity.ml-agents/Runtime/Sampler.cs.meta | 11 +++ .../EnvironmentParametersChannel.cs | 25 +++++-- config/ppo/3DBall_randomize.yaml | 5 -- .../environment_parameters_channel.py | 2 + ml-agents/mlagents/trainers/learn.py | 48 +++--------- ml-agents/mlagents/trainers/sampler_utils.py | 41 ++++++++++ ml-agents/mlagents/trainers/settings.py | 52 +++++++++++++ .../trainers/subprocess_env_manager.py | 1 + .../mlagents/trainers/trainer_controller.py | 26 ++----- 12 files changed, 223 insertions(+), 78 deletions(-) create mode 100644 com.unity.ml-agents/Runtime/Sampler.cs create mode 100644 com.unity.ml-agents/Runtime/Sampler.cs.meta create mode 100644 ml-agents/mlagents/trainers/sampler_utils.py diff --git a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs index 77cf89fb50..1e930597aa 100644 --- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs +++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs @@ -13,12 +13,6 @@ public override void Initialize() { m_BallRb = ball.GetComponent(); m_ResetParams = Academy.Instance.EnvironmentParameters; - var samplerType = m_ResetParams.GetWithDefault("mass-sampler-type", -1.0f); - var min = m_ResetParams.GetWithDefault("mass-min", -1.0f); - var max = m_ResetParams.GetWithDefault("mass-max", -1.0f); - Debug.Log(samplerType); - Debug.Log(min); - Debug.Log(max); SetResetParameters(); } @@ -80,8 +74,9 @@ public override void Heuristic(float[] actionsOut) public void SetBall() { //Set the attributes of the ball by fetching the information from the academy - m_BallRb.mass = m_ResetParams.GetWithDefault("mass", 1.0f); - var scale = m_ResetParams.GetWithDefault("scale", 1.0f); + //m_BallRb.mass = m_ResetParams.GetWithDefault("mass", 1.0f); + m_BallRb.mass = m_ResetParams.Sample("mass", 1.0f); + var scale = m_ResetParams.Sample("scale", 1.0f); ball.transform.localScale = new Vector3(scale, scale, scale); } diff --git a/com.unity.ml-agents/Runtime/EnvironmentParameters.cs b/com.unity.ml-agents/Runtime/EnvironmentParameters.cs index dbae3cd0be..f864ce4803 100644 --- a/com.unity.ml-agents/Runtime/EnvironmentParameters.cs +++ b/com.unity.ml-agents/Runtime/EnvironmentParameters.cs @@ -41,6 +41,11 @@ public float GetWithDefault(string key, float defaultValue) return m_Channel.GetWithDefault(key, defaultValue); } + public float Sample(string key, float defaultValue) + { + return m_Channel.Sample(key, defaultValue); + } + /// /// Registers a callback action for the provided parameter key. Will overwrite any /// existing action for that parameter. The callback will be called whenever the parameter diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs new file mode 100644 index 0000000000..c8b2e6fdf6 --- /dev/null +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -0,0 +1,74 @@ +using System; +using System.Collections.Generic; +using Unity.MLAgents; +using Unity.MLAgents.Inference.Utils; +using UnityEngine; +using Random=UnityEngine.Random; + +namespace Unity.MLAgents +{ + /// + /// The types of distributions from which to sample reset parameters. + /// + public enum SamplerType + { + /// + /// Samples a reset parameter from a uniform distribution. + /// + Uniform = 0, + + /// + /// Samples a reset parameter from a Gaussian distribution. + /// + Gaussian = 1 + } + + /// + /// Takes a list of floats that encode a sampling distribution and returns the sampling function. + /// + public sealed class SamplerFactory + { + + int m_Seed; + + /// + /// Constructor. + /// + internal SamplerFactory(int seed) + { + m_Seed = seed; + } + + /// + /// Create the sampling distribution described by the encoding. + /// + /// List of floats the describe sampling destribution. + public Func CreateSampler(IList encoding) + { + if ((int)encoding[0] == (int)SamplerType.Uniform) + { + return CreateUniformSampler(encoding[1], encoding[2]); + } + else if ((int)encoding[0] == (int)SamplerType.Gaussian) + { + return CreateGaussianSampler(encoding[1], encoding[2]); + } + else{ + Debug.LogWarning("EnvironmentParametersChannel received an unknown data type."); + return () => 0; + } + + } + + public Func CreateUniformSampler(float min, float max) + { + return () => Random.Range(min, max); + } + + public Func CreateGaussianSampler(float mean, float stddev) + { + RandomNormal distr = new RandomNormal(m_Seed, mean, stddev); + return () => (float)distr.NextDouble(); + } + } +} diff --git a/com.unity.ml-agents/Runtime/Sampler.cs.meta b/com.unity.ml-agents/Runtime/Sampler.cs.meta new file mode 100644 index 0000000000..950e28c5b6 --- /dev/null +++ b/com.unity.ml-agents/Runtime/Sampler.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 39ce0ea5a8b2e47f696f6efc807029f6 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 2c98bc347f..ba099dd84c 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -20,9 +20,12 @@ internal enum EnvironmentDataTypes internal class EnvironmentParametersChannel : SideChannel { Dictionary m_Parameters = new Dictionary(); + Dictionary> m_Samplers = new Dictionary>(); Dictionary> m_RegisteredActions = new Dictionary>(); + SamplerFactory m_SamplerFactory = new SamplerFactory(1); + const string k_EnvParamsId = "534c891e-810f-11ea-a9d0-822485860400"; /// @@ -51,13 +54,14 @@ protected override void OnMessageReceived(IncomingMessage msg) } else if ((int)EnvironmentDataTypes.Sampler == type) { - var samplerType = msg.ReadFloat32(); - var statOne = msg.ReadFloat32(); - var statTwo = msg.ReadFloat32(); - - m_Parameters[key+"-sampler-type"] = samplerType; - m_Parameters[key+"-min"] = statOne; - m_Parameters[key+"-max"] = statTwo; + var encoding = msg.ReadFloatList(); + m_Samplers[key] = m_SamplerFactory.CreateSampler(encoding); + //var samplerType = msg.ReadFloat32(); + //var statOne = msg.ReadFloat32(); + //var statTwo = msg.ReadFloat32(); + //m_Parameters[key+"-sampler-type"] = samplerType; + //m_Parameters[key+"-min"] = statOne; + //m_Parameters[key+"-max"] = statTwo; } else { @@ -79,6 +83,13 @@ public float GetWithDefault(string key, float defaultValue) return hasKey ? valueOut : defaultValue; } + public float Sample(string key, float defaultValue) + { + Func valueOut; + bool hasKey = m_Samplers.TryGetValue(key, out valueOut); + return hasKey ? valueOut() : defaultValue; + } + /// /// Returns the parameter value associated with the provided key. Returns the default /// value if one doesn't exist. diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index aee0a3b1e2..14407938d5 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -27,15 +27,10 @@ behaviors: threaded: true parameter_randomization: - resampling-interval: 5000 mass: sampler-type: uniform min_value: 0.5 max_value: 10 - gravity: - sampler-type: uniform - min_value: 7 - max_value: 12 scale: sampler-type: uniform min_value: 0.75 diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 9770dd0b74..abd05d9d97 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -47,6 +47,8 @@ def set_sampler_parameters(self, key: str, values: List[float]) -> None: msg = OutgoingMessage() msg.write_string(key) msg.write_int32(self.EnvironmentDataTypes.SAMPLER) + # length of list + msg.write_int32(len(values)) for value in values: msg.write_float32(value) super().queue_message_to_send(msg) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 75029d7946..1c55477e07 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -22,8 +22,7 @@ ) from mlagents.trainers.cli_utils import parser from mlagents_envs.environment import UnityEnvironment -from mlagents.trainers.sampler_class import SamplerManager -from mlagents.trainers.exception import SamplerException +from mlagents.trainers.sampler_utils import SamplerUtils from mlagents.trainers.settings import RunOptions from mlagents_envs.base_env import BaseEnv from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager @@ -127,10 +126,6 @@ def run_training(run_seed: int, options: RunOptions) -> None: ) maybe_add_samplers(options.parameter_randomization, env_manager) - sampler_manager, resampling_interval = create_sampler_manager( - options.parameter_randomization, run_seed - ) - trainer_factory = TrainerFactory( options.behaviors, checkpoint_settings.run_id, @@ -151,8 +146,6 @@ def run_training(run_seed: int, options: RunOptions) -> None: maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, - sampler_manager, - resampling_interval, ) # Begin training @@ -191,38 +184,19 @@ def write_timing_tree(output_dir: str) -> None: def maybe_add_samplers(sampler_config, env): restructured_sampler_config: Dict[str, List[float]] = {} - if sampler_config is not None: - for v, config in sampler_config.items(): - if v != "resampling-interval": - sampler_type = 0.0 if config["sampler-type"] == "uniform" else 1.0 - restructured_sampler_config[v] = [ - sampler_type, - config["min_value"], - config["max_value"], - ] - env.reset(config=restructured_sampler_config) - - -def create_sampler_manager(sampler_config, run_seed=None): - resample_interval = None + # TODO send seed if sampler_config is not None: if "resampling-interval" in sampler_config: - # Filter arguments that do not exist in the environment - resample_interval = sampler_config.pop("resampling-interval") - if (resample_interval <= 0) or (not isinstance(resample_interval, int)): - raise SamplerException( - "Specified resampling-interval is not valid. Please provide" - " a positive integer value for resampling-interval" - ) - - else: - raise SamplerException( - "Resampling interval was not specified in the sampler file." - " Please specify it with the 'resampling-interval' key in the sampler config file." + logger.warning( + "The resampling-interval is no longer necessary to specify for parameter randomization and is being ignored." ) - - sampler_manager = SamplerManager(sampler_config, run_seed) - return sampler_manager, resample_interval + sampler_config.pop("resampling-interval") + for param, config in sampler_config.items(): + list_of_config_floats = SamplerUtils.validate_and_structure_config( + param, config + ) + restructured_sampler_config[param] = list_of_config_floats + env.reset(config=restructured_sampler_config) def try_create_meta_curriculum( diff --git a/ml-agents/mlagents/trainers/sampler_utils.py b/ml-agents/mlagents/trainers/sampler_utils.py new file mode 100644 index 0000000000..faf3b56692 --- /dev/null +++ b/ml-agents/mlagents/trainers/sampler_utils.py @@ -0,0 +1,41 @@ +import numpy as np +from enum import Enum +from typing import Dict, List + +from mlagents.trainers.exception import SamplerException + + +class SamplerUtils: + """ + Maintain a directory of available samplers and their configs. + Validates sampler configs are correct. + """ + + NAME_TO_ARGS = { + "uniform": ["min_value", "max_value"], + "gaussian": ["mean", "st_dev"], + "multirangeuniform": ["intervals"], + } + NAME_TO_FLOAT_REPR = {"uniform": 0.0, "gaussian": 1.0, "multirangeuniform": 2.0} + + @staticmethod + def validate_and_structure_config( + param: str, config: Dict[str, List[float]] + ) -> List[float]: + # Config must have a valid type + if ( + "sampler-type" not in config + or config["sampler-type"] not in SamplerUtils.NAME_TO_ARGS + ): + raise SamplerException( + f"The sampler config for environment parameter {param} does not contain a sampler-type or the sampler-type is invalid." + ) + # Check args are correct + sampler_type = config.pop("sampler-type") + if list(config.keys()) != SamplerUtils.NAME_TO_ARGS[sampler_type]: + raise SamplerException( + "The sampler config for environment parameter {} does not contain the correct arguments. Please specify {}.".format( + param, SamplerUtils.NAME_TO_ARGS[config["sampler-type"]] + ) + ) + return [SamplerUtils.NAME_TO_FLOAT_REPR[sampler_type]] + list(config.values()) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 4c89073d19..75fa795b0a 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -151,6 +151,58 @@ class CuriositySettings(RewardSignalSettings): learning_rate: float = 3e-4 +class ParameterRandomizationType(Enum): + UNIFORM: str = "uniform" + GAUSSIAN: str = "gaussian" + MULTIRANGEUNIFORM: str = "multirangeuniform" + + def to_settings(self) -> type: + _mapping = { + ParameterRandomizationType.UNIFORM: UniformSettings, + ParameterRandomizationType.GAUSSIAN: GaussianSettings, + ParameterRandomizationType.MULTIRANGEUNIFORM: MultiRangeUniformSettings, + } + return _mapping[self] + + +@attr.s(auto_attribs=True) +class ParameterRandomizationSettings: + @staticmethod + def structure(d: Mapping, t: type) -> Any: + """ + Helper method to structure a Dict of ParameterRandomizationSettings class. Meant to be registered with + cattr.register_structure_hook() and called with cattr.structure(). This is needed to handle + the special Enum selection of ParameterRandomizationSettings classes. + """ + if not isinstance(d, Mapping): + raise TrainerConfigError( + f"Unsupported parameter randomization configuration {d}." + ) + d_final: Dict[ParameterRandomizationType, ParameterRandomizationSettings] = {} + for key, val in d.items(): + enum_key = ParameterRandomizationType(key) + t = enum_key.to_settings() + d_final[enum_key] = strict_to_cls(val, t) + return d_final + + +@attr.s(auto_attribs=True) +class UniformSettings(ParameterRandomizationSettings): + min_value: float = 1.0 + max_value: float = 1.0 + + +@attr.s(auto_attribs=True) +class GaussianSettings(ParameterRandomizationSettings): + mean: float = 1.0 + st_dev: float = 1.0 + + +@attr.s(auto_attribs=True) +class MultiRangeUniformSettings(ParameterRandomizationSettings): + intervals: List[List[float]] = [[1.0, 1.0]] + + @attr.s(auto_attribs=True) class SelfPlaySettings: save_steps: int = 20000 diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index f87df51fce..b47a2b92a2 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -175,6 +175,7 @@ def external_brains(): _send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains()) elif req.cmd == EnvironmentCommand.RESET: for k, v in req.payload.items(): + print(k, v) if isinstance(v, float): env_parameters.set_float_parameter(k, v) elif isinstance(v, list): diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index b0cbcb2c9e..5e0ddb1f57 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -17,7 +17,6 @@ UnityCommunicationException, UnityCommunicatorStoppedException, ) -from mlagents.trainers.sampler_class import SamplerManager from mlagents_envs.timers import ( hierarchical_timer, timed, @@ -42,8 +41,6 @@ def __init__( meta_curriculum: Optional[MetaCurriculum], train: bool, training_seed: int, - sampler_manager: SamplerManager, - resampling_interval: Optional[int], ): """ :param output_path: Path to save the model. @@ -53,8 +50,6 @@ def __init__( :param meta_curriculum: MetaCurriculum object which stores information about all curricula. :param train: Whether to train model, or only run inference. :param training_seed: Seed to use for Numpy and Tensorflow random number generation. - :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters. - :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled. :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging. """ self.trainers: Dict[str, Trainer] = {} @@ -66,8 +61,6 @@ def __init__( self.save_freq = save_freq self.train_model = train self.meta_curriculum = meta_curriculum - self.sampler_manager = sampler_manager - self.resampling_interval = resampling_interval self.ghost_controller = self.trainer_factory.ghost_controller self.trainer_threads: List[threading.Thread] = [] @@ -144,12 +137,10 @@ def _reset_env(self, env: EnvManager) -> None: A Data structure corresponding to the initial reset state of the environment. """ - sampled_reset_param = self.sampler_manager.sample_all() new_meta_curriculum_config = ( self.meta_curriculum.get_config() if self.meta_curriculum else {} ) - sampled_reset_param.update(new_meta_curriculum_config) - env.reset(config=sampled_reset_param) + env.reset(config=new_meta_curriculum_config) def _should_save_model(self, global_step: int) -> bool: return ( @@ -227,7 +218,7 @@ def start_learning(self, env_manager: EnvManager) -> None: n_steps = self.advance(env_manager) for _ in range(n_steps): global_step += 1 - self.reset_env_if_ready(env_manager, global_step) + self.reset_env_if_ready(env_manager) if self._should_save_model(global_step): self._save_model() # Stop advancing trainers @@ -269,7 +260,7 @@ def end_trainer_episodes( if changed: self.trainers[brain_name].reward_buffer.clear() - def reset_env_if_ready(self, env: EnvManager, steps: int) -> None: + def reset_env_if_ready(self, env: EnvManager) -> None: if self.meta_curriculum: # Get the sizes of the reward buffers. reward_buff_sizes = { @@ -285,16 +276,9 @@ def reset_env_if_ready(self, env: EnvManager, steps: int) -> None: # If any lessons were incremented or the environment is # ready to be reset meta_curriculum_reset = any(lessons_incremented.values()) - # Check if we are performing generalization training and we have finished the - # specified number of steps for the lesson - generalization_reset = ( - not self.sampler_manager.is_empty() - and (steps != 0) - and (self.resampling_interval) - and (steps % self.resampling_interval == 0) - ) + # If ghost trainer swapped teams ghost_controller_reset = self.ghost_controller.should_reset() - if meta_curriculum_reset or generalization_reset or ghost_controller_reset: + if meta_curriculum_reset or ghost_controller_reset: self.end_trainer_episodes(env, lessons_incremented) @timed From e9d8350eb03a527ca2a9a181eb9e96bd6411d355 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 3 Jun 2020 15:02:10 -0700 Subject: [PATCH 03/44] use settings.py to check PR config --- config/ppo/3DBall_randomize.yaml | 12 +- ml-agents/mlagents/trainers/learn.py | 12 +- ml-agents/mlagents/trainers/sampler_class.py | 193 ------------------ ml-agents/mlagents/trainers/sampler_utils.py | 41 ---- ml-agents/mlagents/trainers/settings.py | 34 ++- .../trainers/subprocess_env_manager.py | 1 - 6 files changed, 39 insertions(+), 254 deletions(-) delete mode 100644 ml-agents/mlagents/trainers/sampler_class.py delete mode 100644 ml-agents/mlagents/trainers/sampler_utils.py diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index 14407938d5..f361c27c25 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -28,10 +28,10 @@ behaviors: parameter_randomization: mass: - sampler-type: uniform - min_value: 0.5 - max_value: 10 + uniform: + min_value: 0.5 + max_value: 10 scale: - sampler-type: uniform - min_value: 0.75 - max_value: 3 + uniform: + min_value: 0.75 + max_value: 3 diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 1c55477e07..e066c0e492 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -22,7 +22,6 @@ ) from mlagents.trainers.cli_utils import parser from mlagents_envs.environment import UnityEnvironment -from mlagents.trainers.sampler_utils import SamplerUtils from mlagents.trainers.settings import RunOptions from mlagents_envs.base_env import BaseEnv from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager @@ -182,20 +181,19 @@ def write_timing_tree(output_dir: str) -> None: ) -def maybe_add_samplers(sampler_config, env): +def maybe_add_samplers( + sampler_config: Optional[Dict], env: SubprocessEnvManager +) -> None: restructured_sampler_config: Dict[str, List[float]] = {} # TODO send seed if sampler_config is not None: if "resampling-interval" in sampler_config: logger.warning( - "The resampling-interval is no longer necessary to specify for parameter randomization and is being ignored." + "The resampling-interval is no longer necessary for parameter randomization. It is being ignored." ) sampler_config.pop("resampling-interval") for param, config in sampler_config.items(): - list_of_config_floats = SamplerUtils.validate_and_structure_config( - param, config - ) - restructured_sampler_config[param] = list_of_config_floats + restructured_sampler_config[param] = config env.reset(config=restructured_sampler_config) diff --git a/ml-agents/mlagents/trainers/sampler_class.py b/ml-agents/mlagents/trainers/sampler_class.py deleted file mode 100644 index f1a7c20327..0000000000 --- a/ml-agents/mlagents/trainers/sampler_class.py +++ /dev/null @@ -1,193 +0,0 @@ -import numpy as np -from typing import Union, Optional, Type, List, Dict, Any -from abc import ABC, abstractmethod - -from mlagents.trainers.exception import SamplerException - - -class Sampler(ABC): - @abstractmethod - def sample_parameter(self) -> float: - pass - - -class UniformSampler(Sampler): - """ - Uniformly draws a single sample in the range [min_value, max_value). - """ - - def __init__( - self, - min_value: Union[int, float], - max_value: Union[int, float], - seed: Optional[int] = None, - ): - """ - :param min_value: minimum value of the range to be sampled uniformly from - :param max_value: maximum value of the range to be sampled uniformly from - :param seed: Random seed used for making draws from the uniform sampler - """ - self.min_value = min_value - self.max_value = max_value - # Draw from random state to allow for consistent reset parameter draw for a seed - self.random_state = np.random.RandomState(seed) - - def sample_parameter(self) -> float: - """ - Draws and returns a sample from the specified interval - """ - return self.random_state.uniform(self.min_value, self.max_value) - - -class MultiRangeUniformSampler(Sampler): - """ - Draws a single sample uniformly from the intervals provided. The sampler - first picks an interval based on a weighted selection, with the weights - assigned to an interval based on its range. After picking the range, - it proceeds to pick a value uniformly in that range. - """ - - def __init__( - self, intervals: List[List[Union[int, float]]], seed: Optional[int] = None - ): - """ - :param intervals: List of intervals to draw uniform samples from - :param seed: Random seed used for making uniform draws from the specified intervals - """ - self.intervals = intervals - # Measure the length of the intervals - interval_lengths = [abs(x[1] - x[0]) for x in self.intervals] - cum_interval_length = sum(interval_lengths) - # Assign weights to an interval proportionate to the interval size - self.interval_weights = [x / cum_interval_length for x in interval_lengths] - # Draw from random state to allow for consistent reset parameter draw for a seed - self.random_state = np.random.RandomState(seed) - - def sample_parameter(self) -> float: - """ - Selects an interval to pick and then draws a uniform sample from the picked interval - """ - cur_min, cur_max = self.intervals[ - self.random_state.choice(len(self.intervals), p=self.interval_weights) - ] - return self.random_state.uniform(cur_min, cur_max) - - -class GaussianSampler(Sampler): - """ - Draw a single sample value from a normal (gaussian) distribution. - This sampler is characterized by the mean and the standard deviation. - """ - - def __init__( - self, - mean: Union[float, int], - st_dev: Union[float, int], - seed: Optional[int] = None, - ): - """ - :param mean: Specifies the mean of the gaussian distribution to draw from - :param st_dev: Specifies the standard devation of the gaussian distribution to draw from - :param seed: Random seed used for making gaussian draws from the sample - """ - self.mean = mean - self.st_dev = st_dev - # Draw from random state to allow for consistent reset parameter draw for a seed - self.random_state = np.random.RandomState(seed) - - def sample_parameter(self) -> float: - """ - Returns a draw from the specified Gaussian distribution - """ - return self.random_state.normal(self.mean, self.st_dev) - - -class SamplerFactory: - """ - Maintain a directory of all samplers available. - Add new samplers using the register_sampler method. - """ - - NAME_TO_CLASS = { - "uniform": UniformSampler, - "gaussian": GaussianSampler, - "multirange_uniform": MultiRangeUniformSampler, - } - - @staticmethod - def register_sampler(name: str, sampler_cls: Type[Sampler]) -> None: - """ - Registers the sampe in the Sampler Factory to be used later - :param name: String name to set as key for the sampler_cls in the factory - :param sampler_cls: Sampler object to associate to the name in the factory - """ - SamplerFactory.NAME_TO_CLASS[name] = sampler_cls - - @staticmethod - def init_sampler_class( - name: str, params: Dict[str, Any], seed: Optional[int] = None - ) -> Sampler: - """ - Initializes the sampler class associated with the name with the params - :param name: Name of the sampler in the factory to initialize - :param params: Parameters associated to the sampler attached to the name - :param seed: Random seed to be used to set deterministic random draws for the sampler - """ - if name not in SamplerFactory.NAME_TO_CLASS: - raise SamplerException( - name + " sampler is not registered in the SamplerFactory." - " Use the register_sample method to register the string" - " associated to your sampler in the SamplerFactory." - ) - sampler_cls = SamplerFactory.NAME_TO_CLASS[name] - params["seed"] = seed - try: - return sampler_cls(**params) - except TypeError: - raise SamplerException( - "The sampler class associated to the " + name + " key in the factory " - "was not provided the required arguments. Please ensure that the sampler " - "config file consists of the appropriate keys for this sampler class." - ) - - -class SamplerManager: - def __init__( - self, reset_param_dict: Dict[str, Any], seed: Optional[int] = None - ) -> None: - """ - :param reset_param_dict: Arguments needed for initializing the samplers - :param seed: Random seed to be used for drawing samples from the samplers - """ - self.reset_param_dict = reset_param_dict if reset_param_dict else {} - assert isinstance(self.reset_param_dict, dict) - self.samplers: Dict[str, Sampler] = {} - for param_name, cur_param_dict in self.reset_param_dict.items(): - if "sampler-type" not in cur_param_dict: - raise SamplerException( - "'sampler_type' argument hasn't been supplied for the {0} parameter".format( - param_name - ) - ) - sampler_name = cur_param_dict.pop("sampler-type") - param_sampler = SamplerFactory.init_sampler_class( - sampler_name, cur_param_dict, seed - ) - - self.samplers[param_name] = param_sampler - - def is_empty(self) -> bool: - """ - Check for if sampler_manager is empty. - """ - return not bool(self.samplers) - - def sample_all(self) -> Dict[str, float]: - """ - Loop over all samplers and draw a sample from each one for generating - next set of reset parameter values. - """ - res = {} - for param_name, param_sampler in list(self.samplers.items()): - res[param_name] = param_sampler.sample_parameter() - return res diff --git a/ml-agents/mlagents/trainers/sampler_utils.py b/ml-agents/mlagents/trainers/sampler_utils.py deleted file mode 100644 index faf3b56692..0000000000 --- a/ml-agents/mlagents/trainers/sampler_utils.py +++ /dev/null @@ -1,41 +0,0 @@ -import numpy as np -from enum import Enum -from typing import Dict, List - -from mlagents.trainers.exception import SamplerException - - -class SamplerUtils: - """ - Maintain a directory of available samplers and their configs. - Validates sampler configs are correct. - """ - - NAME_TO_ARGS = { - "uniform": ["min_value", "max_value"], - "gaussian": ["mean", "st_dev"], - "multirangeuniform": ["intervals"], - } - NAME_TO_FLOAT_REPR = {"uniform": 0.0, "gaussian": 1.0, "multirangeuniform": 2.0} - - @staticmethod - def validate_and_structure_config( - param: str, config: Dict[str, List[float]] - ) -> List[float]: - # Config must have a valid type - if ( - "sampler-type" not in config - or config["sampler-type"] not in SamplerUtils.NAME_TO_ARGS - ): - raise SamplerException( - f"The sampler config for environment parameter {param} does not contain a sampler-type or the sampler-type is invalid." - ) - # Check args are correct - sampler_type = config.pop("sampler-type") - if list(config.keys()) != SamplerUtils.NAME_TO_ARGS[sampler_type]: - raise SamplerException( - "The sampler config for environment parameter {} does not contain the correct arguments. Please specify {}.".format( - param, SamplerUtils.NAME_TO_ARGS[config["sampler-type"]] - ) - ) - return [SamplerUtils.NAME_TO_FLOAT_REPR[sampler_type]] + list(config.values()) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 75fa795b0a..7ffc8b03fa 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -164,6 +164,14 @@ def to_settings(self) -> type: } return _mapping[self] + def to_float(self) -> float: + _mapping = { + ParameterRandomizationType.UNIFORM: 0.0, + ParameterRandomizationType.GAUSSIAN: 1.0, + ParameterRandomizationType.MULTIRANGEUNIFORM: 2.0, + } + return _mapping[self] + @attr.s(auto_attribs=True) class ParameterRandomizationSettings: @@ -178,11 +186,12 @@ def structure(d: Mapping, t: type) -> Any: raise TrainerConfigError( f"Unsupported parameter randomization configuration {d}." ) - d_final: Dict[ParameterRandomizationType, ParameterRandomizationSettings] = {} - for key, val in d.items(): - enum_key = ParameterRandomizationType(key) - t = enum_key.to_settings() - d_final[enum_key] = strict_to_cls(val, t) + d_final: Dict[str, List[float]] = {} + for param, param_config in d.items(): + for key, val in param_config.items(): + enum_key = ParameterRandomizationType(key) + t = enum_key.to_settings() + d_final[param] = strict_to_cls(val, t).to_float() return d_final @@ -191,17 +200,26 @@ class UniformSettings(ParameterRandomizationSettings): min_value: float = 1.0 max_value: float = 1.0 + def to_float(self) -> List[float]: + return [0.0, self.min_value, self.max_value] + @attr.s(auto_attribs=True) class GaussianSettings(ParameterRandomizationSettings): mean: float = 1.0 st_dev: float = 1.0 + def to_float(self) -> List[float]: + return [1.0, self.mean, self.st_dev] + @attr.s(auto_attribs=True) class MultiRangeUniformSettings(ParameterRandomizationSettings): intervals: List[List[float]] = [[1.0, 1.0]] + def to_float(self) -> List[float]: + return [2.0] + [val for interval in self.intervals for val in interval] + @attr.s(auto_attribs=True) class SelfPlaySettings: @@ -357,7 +375,7 @@ class RunOptions(ExportableSettings): ) env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings) engine_settings: EngineSettings = attr.ib(factory=EngineSettings) - parameter_randomization: Optional[Dict] = None + parameter_randomization: Optional[Dict[str, ParameterRandomizationSettings]] = None curriculum: Optional[Dict[str, CurriculumSettings]] = None checkpoint_settings: CheckpointSettings = attr.ib(factory=CheckpointSettings) @@ -368,6 +386,10 @@ class RunOptions(ExportableSettings): cattr.register_structure_hook(EnvironmentSettings, strict_to_cls) cattr.register_structure_hook(EngineSettings, strict_to_cls) cattr.register_structure_hook(CheckpointSettings, strict_to_cls) + cattr.register_structure_hook( + Dict[str, ParameterRandomizationSettings], + ParameterRandomizationSettings.structure, + ) cattr.register_structure_hook(CurriculumSettings, strict_to_cls) cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) cattr.register_structure_hook( diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index b47a2b92a2..f87df51fce 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -175,7 +175,6 @@ def external_brains(): _send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains()) elif req.cmd == EnvironmentCommand.RESET: for k, v in req.payload.items(): - print(k, v) if isinstance(v, float): env_parameters.set_float_parameter(k, v) elif isinstance(v, list): From e856f7b9ce0c9c87a321ae2e1c2a16f53d62956e Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 3 Jun 2020 15:58:34 -0700 Subject: [PATCH 04/44] some cleanups/ interval error checking --- .../Examples/3DBall/Scripts/Ball3DAgent.cs | 5 +-- .../Runtime/EnvironmentParameters.cs | 5 --- .../EnvironmentParametersChannel.cs | 38 +++---------------- .../environment_parameters_channel.py | 4 +- ml-agents/mlagents/trainers/settings.py | 18 ++++++++- 5 files changed, 26 insertions(+), 44 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs index 1e930597aa..7487ec9951 100644 --- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs +++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs @@ -74,9 +74,8 @@ public override void Heuristic(float[] actionsOut) public void SetBall() { //Set the attributes of the ball by fetching the information from the academy - //m_BallRb.mass = m_ResetParams.GetWithDefault("mass", 1.0f); - m_BallRb.mass = m_ResetParams.Sample("mass", 1.0f); - var scale = m_ResetParams.Sample("scale", 1.0f); + m_BallRb.mass = m_ResetParams.GetWithDefault("mass", 1.0f); + var scale = m_ResetParams.GetWithDefault("scale", 1.0f); ball.transform.localScale = new Vector3(scale, scale, scale); } diff --git a/com.unity.ml-agents/Runtime/EnvironmentParameters.cs b/com.unity.ml-agents/Runtime/EnvironmentParameters.cs index f864ce4803..dbae3cd0be 100644 --- a/com.unity.ml-agents/Runtime/EnvironmentParameters.cs +++ b/com.unity.ml-agents/Runtime/EnvironmentParameters.cs @@ -41,11 +41,6 @@ public float GetWithDefault(string key, float defaultValue) return m_Channel.GetWithDefault(key, defaultValue); } - public float Sample(string key, float defaultValue) - { - return m_Channel.Sample(key, defaultValue); - } - /// /// Registers a callback action for the provided parameter key. Will overwrite any /// existing action for that parameter. The callback will be called whenever the parameter diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index ba099dd84c..546dd3d2bf 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -19,8 +19,7 @@ internal enum EnvironmentDataTypes /// internal class EnvironmentParametersChannel : SideChannel { - Dictionary m_Parameters = new Dictionary(); - Dictionary> m_Samplers = new Dictionary>(); + Dictionary> m_Parameters = new Dictionary>(); Dictionary> m_RegisteredActions = new Dictionary>(); @@ -46,7 +45,7 @@ protected override void OnMessageReceived(IncomingMessage msg) { var value = msg.ReadFloat32(); - m_Parameters[key] = value; + m_Parameters[key] = () => value; Action action; m_RegisteredActions.TryGetValue(key, out action); @@ -55,13 +54,7 @@ protected override void OnMessageReceived(IncomingMessage msg) else if ((int)EnvironmentDataTypes.Sampler == type) { var encoding = msg.ReadFloatList(); - m_Samplers[key] = m_SamplerFactory.CreateSampler(encoding); - //var samplerType = msg.ReadFloat32(); - //var statOne = msg.ReadFloat32(); - //var statTwo = msg.ReadFloat32(); - //m_Parameters[key+"-sampler-type"] = samplerType; - //m_Parameters[key+"-min"] = statOne; - //m_Parameters[key+"-max"] = statTwo; + m_Parameters[key] = m_SamplerFactory.CreateSampler(encoding); } else { @@ -77,33 +70,12 @@ protected override void OnMessageReceived(IncomingMessage msg) /// Default value to return. /// public float GetWithDefault(string key, float defaultValue) - { - float valueOut; - bool hasKey = m_Parameters.TryGetValue(key, out valueOut); - return hasKey ? valueOut : defaultValue; - } - - public float Sample(string key, float defaultValue) { Func valueOut; - bool hasKey = m_Samplers.TryGetValue(key, out valueOut); - return hasKey ? valueOut() : defaultValue; - } - - /// - /// Returns the parameter value associated with the provided key. Returns the default - /// value if one doesn't exist. - /// - /// Parameter key. - /// Default value to return. - /// - public float GetListWithDefault(string key, float defaultValue) - { - float valueOut; bool hasKey = m_Parameters.TryGetValue(key, out valueOut); - return hasKey ? valueOut : defaultValue; + return hasKey ? valueOut() : defaultValue; } - + /// /// Registers a callback for the associated parameter key. Will overwrite any existing /// actions for this parameter key. diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index abd05d9d97..4700b3b7eb 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -40,9 +40,9 @@ def set_float_parameter(self, key: str, value: float) -> None: def set_sampler_parameters(self, key: str, values: List[float]) -> None: """ - Sets a float environment parameter in the Unity Environment. + Sets a float encoding of an environment parameter sampelr. :param key: The string identifier of the parameter. - :param value: The float value of the parameter. + :param value: The float encoding of the sampler. """ msg = OutgoingMessage() msg.write_string(key) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 46a5b99e5c..942a60625c 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -201,6 +201,10 @@ class UniformSettings(ParameterRandomizationSettings): max_value: float = 1.0 def to_float(self) -> List[float]: + if self.min_value > self.max_value: + raise TrainerConfigError( + "Minimum value is greater than maximum value in uniform sampler." + ) return [0.0, self.min_value, self.max_value] @@ -218,7 +222,19 @@ class MultiRangeUniformSettings(ParameterRandomizationSettings): intervals: List[List[float]] = [[1.0, 1.0]] def to_float(self) -> List[float]: - return [2.0] + [val for interval in self.intervals for val in interval] + floats: List[float] = [] + for interval in self.intervals: + if len(interval) != 2: + raise TrainerConfigError( + f"The sampling interval {interval} must contain exactly two values." + ) + [min_value, max_value] = interval + if min_value > max_value: + raise TrainerConfigError( + f"Minimum value is greater than maximum value in interval {interval}." + ) + floats += interval + return [2.0] + floats @attr.s(auto_attribs=True) From daa56880756736f56ee4f224a8e5316abd3149fb Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 4 Jun 2020 14:16:28 -0700 Subject: [PATCH 05/44] using validator to check settings --- com.unity.ml-agents/Runtime/Sampler.cs | 7 +++--- ml-agents/mlagents/trainers/settings.py | 29 +++++++++++++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index c8b2e6fdf6..9c2673f083 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -3,14 +3,13 @@ using Unity.MLAgents; using Unity.MLAgents.Inference.Utils; using UnityEngine; -using Random=UnityEngine.Random; namespace Unity.MLAgents { /// /// The types of distributions from which to sample reset parameters. /// - public enum SamplerType + internal enum SamplerType { /// /// Samples a reset parameter from a uniform distribution. @@ -26,7 +25,7 @@ public enum SamplerType /// /// Takes a list of floats that encode a sampling distribution and returns the sampling function. /// - public sealed class SamplerFactory + internal sealed class SamplerFactory { int m_Seed; @@ -62,7 +61,7 @@ public Func CreateSampler(IList encoding) public Func CreateUniformSampler(float min, float max) { - return () => Random.Range(min, max); + return () => UnityEngine.Random.Range(min, max); } public Func CreateGaussianSampler(float mean, float stddev) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 942a60625c..ffdc86a411 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -191,20 +191,27 @@ def structure(d: Mapping, t: type) -> Any: for key, val in param_config.items(): enum_key = ParameterRandomizationType(key) t = enum_key.to_settings() - d_final[param] = strict_to_cls(val, t).to_float() + d_final[param] = strict_to_cls(val, t).to_float_encoding() return d_final @attr.s(auto_attribs=True) class UniformSettings(ParameterRandomizationSettings): - min_value: float = 1.0 + min_value: float = attr.ib() max_value: float = 1.0 - def to_float(self) -> List[float]: + @min_value.default + def _min_value_default(self): + return 1.0 + + @min_value.validator + def _check_intervals(self, attribute, value): if self.min_value > self.max_value: raise TrainerConfigError( "Minimum value is greater than maximum value in uniform sampler." ) + + def to_float_encoding(self) -> List[float]: return [0.0, self.min_value, self.max_value] @@ -213,16 +220,20 @@ class GaussianSettings(ParameterRandomizationSettings): mean: float = 1.0 st_dev: float = 1.0 - def to_float(self) -> List[float]: + def to_float_encoding(self) -> List[float]: return [1.0, self.mean, self.st_dev] @attr.s(auto_attribs=True) class MultiRangeUniformSettings(ParameterRandomizationSettings): - intervals: List[List[float]] = [[1.0, 1.0]] + intervals: List[List[float]] = attr.ib() - def to_float(self) -> List[float]: - floats: List[float] = [] + @intervals.default + def _intervals_default(self): + return [[1.0, 1.0]] + + @intervals.validator + def _check_intervals(self, attribute, value): for interval in self.intervals: if len(interval) != 2: raise TrainerConfigError( @@ -233,6 +244,10 @@ def to_float(self) -> List[float]: raise TrainerConfigError( f"Minimum value is greater than maximum value in interval {interval}." ) + + def to_float_encoding(self) -> List[float]: + floats: List[float] = [] + for interval in self.intervals: floats += interval return [2.0] + floats From 9756a2c38f31b1d952d3531ec66ad2627127cadb Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 4 Jun 2020 14:23:53 -0700 Subject: [PATCH 06/44] rename min value function check --- ml-agents/mlagents/trainers/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index ffdc86a411..e1c3c19da9 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -205,7 +205,7 @@ def _min_value_default(self): return 1.0 @min_value.validator - def _check_intervals(self, attribute, value): + def _check_min_value(self, attribute, value): if self.min_value > self.max_value: raise TrainerConfigError( "Minimum value is greater than maximum value in uniform sampler." From ec2493fa591d451a45cf54196f4f39645107acb6 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 4 Jun 2020 15:25:33 -0700 Subject: [PATCH 07/44] type checks for parameter randomization settings/enforces float encoding --- .../SideChannels/EnvironmentParametersChannel.cs | 2 +- .../side_channel/environment_parameters_channel.py | 4 ++-- ml-agents/mlagents/trainers/settings.py | 13 +++++++++++-- .../mlagents/trainers/subprocess_env_manager.py | 5 +++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 546dd3d2bf..57ef5d2500 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -75,7 +75,7 @@ public float GetWithDefault(string key, float defaultValue) bool hasKey = m_Parameters.TryGetValue(key, out valueOut); return hasKey ? valueOut() : defaultValue; } - + /// /// Registers a callback for the associated parameter key. Will overwrite any existing /// actions for this parameter key. diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 4700b3b7eb..51c2450969 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -40,9 +40,9 @@ def set_float_parameter(self, key: str, value: float) -> None: def set_sampler_parameters(self, key: str, values: List[float]) -> None: """ - Sets a float encoding of an environment parameter sampelr. + Sets a float encoding of an environment parameter sampler. :param key: The string identifier of the parameter. - :param value: The float encoding of the sampler. + :param values: The float encoding of the sampler. """ msg = OutgoingMessage() msg.write_string(key) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index e1c3c19da9..345becded5 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -4,6 +4,7 @@ from enum import Enum import collections import argparse +import abc from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser from mlagents.trainers.cli_utils import load_config @@ -174,7 +175,7 @@ def to_float(self) -> float: @attr.s(auto_attribs=True) -class ParameterRandomizationSettings: +class ParameterRandomizationSettings(abc.ABC): @staticmethod def structure(d: Mapping, t: type) -> Any: """ @@ -191,9 +192,14 @@ def structure(d: Mapping, t: type) -> Any: for key, val in param_config.items(): enum_key = ParameterRandomizationType(key) t = enum_key.to_settings() - d_final[param] = strict_to_cls(val, t).to_float_encoding() + d_final[param] = strict_to_cls(val, t) return d_final + @abc.abstractmethod + def to_float_encoding(self) -> List[float]: + "Returns the float encoding of the sampler" + pass + @attr.s(auto_attribs=True) class UniformSettings(ParameterRandomizationSettings): @@ -212,6 +218,7 @@ def _check_min_value(self, attribute, value): ) def to_float_encoding(self) -> List[float]: + "Returns the sampler type followed by the min and max values" return [0.0, self.min_value, self.max_value] @@ -221,6 +228,7 @@ class GaussianSettings(ParameterRandomizationSettings): st_dev: float = 1.0 def to_float_encoding(self) -> List[float]: + "Returns the sampler type followed by the mean and standard deviation" return [1.0, self.mean, self.st_dev] @@ -246,6 +254,7 @@ def _check_intervals(self, attribute, value): ) def to_float_encoding(self) -> List[float]: + "Returns the sampler type followed by a flattened list of the interval values" floats: List[float] = [] for interval in self.intervals: floats += interval diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index f87df51fce..1f41c904d7 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -23,6 +23,7 @@ get_timer_root, ) from mlagents.trainers.brain import BrainParameters +from mlagents.trainers.settings import ParameterRandomizationSettings from mlagents.trainers.action_info import ActionInfo from mlagents_envs.side_channel.environment_parameters_channel import ( EnvironmentParametersChannel, @@ -177,8 +178,8 @@ def external_brains(): for k, v in req.payload.items(): if isinstance(v, float): env_parameters.set_float_parameter(k, v) - elif isinstance(v, list): - env_parameters.set_sampler_parameters(k, v) + elif isinstance(v, ParameterRandomizationSettings): + env_parameters.set_sampler_parameters(k, v.to_float_encoding()) env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) From aa4ebd94df5388780387d2313ff4fa71c622254d Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 4 Jun 2020 15:58:27 -0700 Subject: [PATCH 08/44] tests for settings --- .../trainers/tests/test_sampler_class.py | 96 ------------------- .../mlagents/trainers/tests/test_settings.py | 63 ++++++++++++ 2 files changed, 63 insertions(+), 96 deletions(-) delete mode 100644 ml-agents/mlagents/trainers/tests/test_sampler_class.py diff --git a/ml-agents/mlagents/trainers/tests/test_sampler_class.py b/ml-agents/mlagents/trainers/tests/test_sampler_class.py deleted file mode 100644 index 29954c6599..0000000000 --- a/ml-agents/mlagents/trainers/tests/test_sampler_class.py +++ /dev/null @@ -1,96 +0,0 @@ -import pytest - -from mlagents.trainers.sampler_class import SamplerManager -from mlagents.trainers.sampler_class import ( - UniformSampler, - MultiRangeUniformSampler, - GaussianSampler, -) -from mlagents.trainers.exception import TrainerError - - -def sampler_config_1(): - return { - "mass": {"sampler-type": "uniform", "min_value": 5, "max_value": 10}, - "gravity": { - "sampler-type": "multirange_uniform", - "intervals": [[8, 11], [15, 20]], - }, - } - - -def check_value_in_intervals(val, intervals): - check_in_bounds = [a <= val <= b for a, b in intervals] - return any(check_in_bounds) - - -def test_sampler_config_1(): - config = sampler_config_1() - sampler = SamplerManager(config) - - assert sampler.is_empty() is False - assert isinstance(sampler.samplers["mass"], UniformSampler) - assert isinstance(sampler.samplers["gravity"], MultiRangeUniformSampler) - - cur_sample = sampler.sample_all() - - # Check uniform sampler for mass - assert sampler.samplers["mass"].min_value == config["mass"]["min_value"] - assert sampler.samplers["mass"].max_value == config["mass"]["max_value"] - assert config["mass"]["min_value"] <= cur_sample["mass"] - assert config["mass"]["max_value"] >= cur_sample["mass"] - - # Check multirange_uniform sampler for gravity - assert sampler.samplers["gravity"].intervals == config["gravity"]["intervals"] - assert check_value_in_intervals( - cur_sample["gravity"], sampler.samplers["gravity"].intervals - ) - - -def sampler_config_2(): - return {"angle": {"sampler-type": "gaussian", "mean": 0, "st_dev": 1}} - - -def test_sampler_config_2(): - config = sampler_config_2() - sampler = SamplerManager(config) - assert sampler.is_empty() is False - assert isinstance(sampler.samplers["angle"], GaussianSampler) - - # Check angle gaussian sampler - assert sampler.samplers["angle"].mean == config["angle"]["mean"] - assert sampler.samplers["angle"].st_dev == config["angle"]["st_dev"] - - -def test_empty_samplers(): - empty_sampler = SamplerManager({}) - assert empty_sampler.is_empty() - empty_cur_sample = empty_sampler.sample_all() - assert empty_cur_sample == {} - - none_sampler = SamplerManager(None) - assert none_sampler.is_empty() - none_cur_sample = none_sampler.sample_all() - assert none_cur_sample == {} - - -def incorrect_uniform_sampler(): - # Do not specify required arguments to uniform sampler - return {"mass": {"sampler-type": "uniform", "min-value": 10}} - - -def incorrect_sampler_config(): - # Do not specify 'sampler-type' key - return {"mass": {"min-value": 2, "max-value": 30}} - - -def test_incorrect_uniform_sampler(): - config = incorrect_uniform_sampler() - with pytest.raises(TrainerError): - SamplerManager(config) - - -def test_incorrect_sampler(): - config = incorrect_sampler_config() - with pytest.raises(TrainerError): - SamplerManager(config) diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 6a8b2b9355..5d19a019de 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -11,6 +11,10 @@ RewardSignalType, RewardSignalSettings, CuriositySettings, + ParameterRandomizationSettings, + UniformSettings, + GaussianSettings, + MultiRangeUniformSettings, TrainerType, strict_to_cls, ) @@ -149,3 +153,62 @@ def test_reward_signal_structure(): RewardSignalSettings.structure( "notadict", Dict[RewardSignalType, RewardSignalSettings] ) + + +def test_parameter_randomization_structure(): + """ + Tests the ParameterRandomizationSettings structure method and all validators. + """ + parameter_randomization_dict = { + "mass": {"uniform": {"min_value": 1.0, "max_value": 2.0}}, + "scale": {"gaussian": {"mean": 1.0, "st_dev": 2.0}}, + "length": {"multirangeuniform": {"intervals": [[1.0, 2.0], [3.0, 4.0]]}}, + } + parameter_randomization_distributions = ParameterRandomizationSettings.structure( + parameter_randomization_dict, Dict[str, ParameterRandomizationSettings] + ) + assert isinstance(parameter_randomization_distributions["mass"], UniformSettings) + assert isinstance(parameter_randomization_distributions["scale"], GaussianSettings) + assert isinstance( + parameter_randomization_distributions["length"], MultiRangeUniformSettings + ) + + # Check invalid distribution type + invalid_distribution_dict = {"mass": {"beta": {"alpha": 1.0, "beta": 2.0}}} + with pytest.raises(ValueError): + ParameterRandomizationSettings.structure( + invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + ) + + # Check min less than max in uniform + invalid_distribution_dict = { + "mass": {"uniform": {"min_value": 2.0, "max_value": 1.0}} + } + with pytest.raises(TrainerConfigError): + ParameterRandomizationSettings.structure( + invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + ) + + # Check min less than max in multirange + invalid_distribution_dict = { + "mass": {"multirangeuniform": {"intervals": [[2.0, 1.0]]}} + } + with pytest.raises(TrainerConfigError): + ParameterRandomizationSettings.structure( + invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + ) + + # Check multirange has valid intervals + invalid_distribution_dict = { + "mass": {"multirangeuniform": {"intervals": [[1.0, 2.0], [3.0]]}} + } + with pytest.raises(TrainerConfigError): + ParameterRandomizationSettings.structure( + invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + ) + + # Check non-Dict input + with pytest.raises(TrainerConfigError): + ParameterRandomizationSettings.structure( + "notadict", Dict[str, ParameterRandomizationSettings] + ) From 460a2eaf0e335d82def912111d720114a05e8750 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 4 Jun 2020 17:44:53 -0700 Subject: [PATCH 09/44] error properly when a keyword is not followed by a valid config in yaml --- ml-agents/mlagents/trainers/learn.py | 10 +--------- ml-agents/mlagents/trainers/settings.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 150bccd5f3..bdb692f2dd 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -197,17 +197,9 @@ def write_timing_tree(output_dir: str) -> None: def maybe_add_samplers( sampler_config: Optional[Dict], env: SubprocessEnvManager ) -> None: - restructured_sampler_config: Dict[str, List[float]] = {} # TODO send seed if sampler_config is not None: - if "resampling-interval" in sampler_config: - logger.warning( - "The resampling-interval is no longer necessary for parameter randomization. It is being ignored." - ) - sampler_config.pop("resampling-interval") - for param, config in sampler_config.items(): - restructured_sampler_config[param] = config - env.reset(config=restructured_sampler_config) + env.reset(config=sampler_config) def try_create_meta_curriculum( diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 345becded5..ec8335d7e7 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -11,6 +11,10 @@ from mlagents.trainers.exception import TrainerConfigError from mlagents.trainers.models import ScheduleType, EncoderType +from mlagents_envs import logging_util + +logger = logging_util.get_logger(__name__) + def check_and_structure(key: str, value: Any, class_type: type) -> Any: attr_fields_dict = attr.fields_dict(class_type) @@ -189,6 +193,15 @@ def structure(d: Mapping, t: type) -> Any: ) d_final: Dict[str, List[float]] = {} for param, param_config in d.items(): + if param == "resampling-interval": + logger.warning( + "The resampling-interval is no longer necessary for parameter randomization. It is being ignored." + ) + continue + if not isinstance(param_config, Mapping): + raise TrainerConfigError( + f"Unsupported distribution configuration {param_config}." + ) for key, val in param_config.items(): enum_key = ParameterRandomizationType(key) t = enum_key.to_settings() From 54b695903b4ad4cb0f737c59503b986b3448e8a5 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 5 Jun 2020 10:18:27 -0700 Subject: [PATCH 10/44] seed each sampler individually --- com.unity.ml-agents/Runtime/Sampler.cs | 21 ++++++++----------- .../EnvironmentParametersChannel.cs | 9 ++++++-- .../environment_parameters_channel.py | 8 ++++--- ml-agents/mlagents/trainers/settings.py | 8 ++++--- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index 9c2673f083..d7e2b490e4 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -27,30 +27,26 @@ internal enum SamplerType /// internal sealed class SamplerFactory { - - int m_Seed; - /// /// Constructor. /// - internal SamplerFactory(int seed) + internal SamplerFactory() { - m_Seed = seed; } /// /// Create the sampling distribution described by the encoding. /// /// List of floats the describe sampling destribution. - public Func CreateSampler(IList encoding) + public Func CreateSampler(IList encoding, int seed) { if ((int)encoding[0] == (int)SamplerType.Uniform) { - return CreateUniformSampler(encoding[1], encoding[2]); + return CreateUniformSampler(encoding[1], encoding[2], seed); } else if ((int)encoding[0] == (int)SamplerType.Gaussian) { - return CreateGaussianSampler(encoding[1], encoding[2]); + return CreateGaussianSampler(encoding[1], encoding[2], seed); } else{ Debug.LogWarning("EnvironmentParametersChannel received an unknown data type."); @@ -59,14 +55,15 @@ public Func CreateSampler(IList encoding) } - public Func CreateUniformSampler(float min, float max) + public Func CreateUniformSampler(float min, float max, int seed) { - return () => UnityEngine.Random.Range(min, max); + System.Random distr = new System.Random(seed); + return () => min + (float)distr.NextDouble() * (max - min); } - public Func CreateGaussianSampler(float mean, float stddev) + public Func CreateGaussianSampler(float mean, float stddev, int seed) { - RandomNormal distr = new RandomNormal(m_Seed, mean, stddev); + RandomNormal distr = new RandomNormal(seed, mean, stddev); return () => (float)distr.NextDouble(); } } diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 57ef5d2500..eb7dc27f30 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -23,7 +23,7 @@ internal class EnvironmentParametersChannel : SideChannel Dictionary> m_RegisteredActions = new Dictionary>(); - SamplerFactory m_SamplerFactory = new SamplerFactory(1); + SamplerFactory m_SamplerFactory = new SamplerFactory(); const string k_EnvParamsId = "534c891e-810f-11ea-a9d0-822485860400"; @@ -53,8 +53,13 @@ protected override void OnMessageReceived(IncomingMessage msg) } else if ((int)EnvironmentDataTypes.Sampler == type) { + int seed = msg.ReadInt32(); + if (seed == -1) + { + seed = UnityEngine.Random.Range(0, 10000); + } var encoding = msg.ReadFloatList(); - m_Parameters[key] = m_SamplerFactory.CreateSampler(encoding); + m_Parameters[key] = m_SamplerFactory.CreateSampler(encoding, seed); } else { diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 51c2450969..b51932d408 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -47,8 +47,10 @@ def set_sampler_parameters(self, key: str, values: List[float]) -> None: msg = OutgoingMessage() msg.write_string(key) msg.write_int32(self.EnvironmentDataTypes.SAMPLER) - # length of list - msg.write_int32(len(values)) - for value in values: + # Write seed + msg.write_int32(int(values[0])) + msg.write_int32(len(values[1:])) + # Sampler encoding + for value in values[1:]: msg.write_float32(value) super().queue_message_to_send(msg) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index ec8335d7e7..23cb3fcf7e 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -180,6 +180,8 @@ def to_float(self) -> float: @attr.s(auto_attribs=True) class ParameterRandomizationSettings(abc.ABC): + seed: int = parser.get_default("seed") + @staticmethod def structure(d: Mapping, t: type) -> Any: """ @@ -232,7 +234,7 @@ def _check_min_value(self, attribute, value): def to_float_encoding(self) -> List[float]: "Returns the sampler type followed by the min and max values" - return [0.0, self.min_value, self.max_value] + return [self.seed, 0.0, self.min_value, self.max_value] @attr.s(auto_attribs=True) @@ -242,7 +244,7 @@ class GaussianSettings(ParameterRandomizationSettings): def to_float_encoding(self) -> List[float]: "Returns the sampler type followed by the mean and standard deviation" - return [1.0, self.mean, self.st_dev] + return [self.seed, 1.0, self.mean, self.st_dev] @attr.s(auto_attribs=True) @@ -271,7 +273,7 @@ def to_float_encoding(self) -> List[float]: floats: List[float] = [] for interval in self.intervals: floats += interval - return [2.0] + floats + return [self.seed, 2.0] + floats @attr.s(auto_attribs=True) From b4469cadde56b4973d25b766296ec41aaa5af299 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 5 Jun 2020 10:54:37 -0700 Subject: [PATCH 11/44] using to_float for encoding --- .../environment_parameters_channel.py | 16 +++++++------ ml-agents/mlagents/trainers/settings.py | 23 +++++++++++-------- .../trainers/subprocess_env_manager.py | 4 +++- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index b51932d408..936508d5e9 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -38,19 +38,21 @@ def set_float_parameter(self, key: str, value: float) -> None: msg.write_float32(value) super().queue_message_to_send(msg) - def set_sampler_parameters(self, key: str, values: List[float]) -> None: + def set_sampler_parameters( + self, key: str, encoding: List[float], seed: int + ) -> None: """ Sets a float encoding of an environment parameter sampler. :param key: The string identifier of the parameter. - :param values: The float encoding of the sampler. + :param encoding: The float encoding of the sampler. + :param seed: The random seed to initialize the sampler. """ msg = OutgoingMessage() msg.write_string(key) msg.write_int32(self.EnvironmentDataTypes.SAMPLER) - # Write seed - msg.write_int32(int(values[0])) - msg.write_int32(len(values[1:])) - # Sampler encoding - for value in values[1:]: + msg.write_int32(seed) + # for read float list in C# + msg.write_int32(len(encoding)) + for value in encoding: msg.write_float32(value) super().queue_message_to_send(msg) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 23cb3fcf7e..660135f6e2 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -169,13 +169,14 @@ def to_settings(self) -> type: } return _mapping[self] - def to_float(self) -> float: - _mapping = { - ParameterRandomizationType.UNIFORM: 0.0, - ParameterRandomizationType.GAUSSIAN: 1.0, - ParameterRandomizationType.MULTIRANGEUNIFORM: 2.0, + @staticmethod + def to_float(t: type) -> float: + _mapping: Dict[type, float] = { + UniformSettings: 0.0, + GaussianSettings: 1.0, + MultiRangeUniformSettings: 2.0, } - return _mapping[self] + return _mapping[t] @attr.s(auto_attribs=True) @@ -234,7 +235,11 @@ def _check_min_value(self, attribute, value): def to_float_encoding(self) -> List[float]: "Returns the sampler type followed by the min and max values" - return [self.seed, 0.0, self.min_value, self.max_value] + return [ + ParameterRandomizationType.to_float(type(self)), + self.min_value, + self.max_value, + ] @attr.s(auto_attribs=True) @@ -244,7 +249,7 @@ class GaussianSettings(ParameterRandomizationSettings): def to_float_encoding(self) -> List[float]: "Returns the sampler type followed by the mean and standard deviation" - return [self.seed, 1.0, self.mean, self.st_dev] + return [ParameterRandomizationType.to_float(type(self)), self.mean, self.st_dev] @attr.s(auto_attribs=True) @@ -273,7 +278,7 @@ def to_float_encoding(self) -> List[float]: floats: List[float] = [] for interval in self.intervals: floats += interval - return [self.seed, 2.0] + floats + return [ParameterRandomizationType.to_float(type(self))] + floats @attr.s(auto_attribs=True) diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 1f41c904d7..ae16ffb1bb 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -179,7 +179,9 @@ def external_brains(): if isinstance(v, float): env_parameters.set_float_parameter(k, v) elif isinstance(v, ParameterRandomizationSettings): - env_parameters.set_sampler_parameters(k, v.to_float_encoding()) + env_parameters.set_sampler_parameters( + k, v.to_float_encoding(), v.seed + ) env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) From 3d260470e08dadd7738d8f537427b812f2953931 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 5 Jun 2020 11:59:06 -0700 Subject: [PATCH 12/44] use run_seed if no seed specified in yaml --- .../Runtime/SideChannels/EnvironmentParametersChannel.cs | 4 ---- ml-agents/mlagents/trainers/learn.py | 9 ++++++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index eb7dc27f30..90acd93e61 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -54,10 +54,6 @@ protected override void OnMessageReceived(IncomingMessage msg) else if ((int)EnvironmentDataTypes.Sampler == type) { int seed = msg.ReadInt32(); - if (seed == -1) - { - seed = UnityEngine.Random.Range(0, 10000); - } var encoding = msg.ReadFloatList(); m_Parameters[key] = m_SamplerFactory.CreateSampler(encoding, seed); } diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index bdb692f2dd..91e0745ca9 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -131,7 +131,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume ) - maybe_add_samplers(options.parameter_randomization, env_manager) + maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) trainer_factory = TrainerFactory( options.behaviors, @@ -195,10 +195,13 @@ def write_timing_tree(output_dir: str) -> None: def maybe_add_samplers( - sampler_config: Optional[Dict], env: SubprocessEnvManager + sampler_config: Optional[Dict], env: SubprocessEnvManager, run_seed: int ) -> None: - # TODO send seed if sampler_config is not None: + # If the seed is not specified in yaml, this will grab the run seed + for _, v in sampler_config.items(): + if v.seed == -1: + v.seed = run_seed env.reset(config=sampler_config) From 7f116cd9e3be3ef164efb8e01ac57819eb97046d Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 5 Jun 2020 12:47:20 -0700 Subject: [PATCH 13/44] add docstring for maybe_add_samplers --- ml-agents/mlagents/trainers/learn.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index b30b386c6a..56cb7313ce 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -132,7 +132,6 @@ def run_training(run_seed: int, options: RunOptions) -> None: options.curriculum, env_manager, restore=checkpoint_settings.resume ) maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) - trainer_factory = TrainerFactory( options.behaviors, checkpoint_settings.run_id, @@ -196,6 +195,12 @@ def write_timing_tree(output_dir: str) -> None: def maybe_add_samplers( sampler_config: Optional[Dict], env: SubprocessEnvManager, run_seed: int ) -> None: + """ + Adds samplers to env if sampler config provided and sets seed if not configured. + :param sampler_config: validated dict of sampler configs. None if not included. + :param env: env manager to pass samplers via reset + :param run_seed: Random seed used for training. + """ if sampler_config is not None: # If the seed is not specified in yaml, this will grab the run seed for _, v in sampler_config.items(): From 46f6491e8343e2007486f8563444bb8d15fd2ea5 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sun, 7 Jun 2020 10:30:27 -0700 Subject: [PATCH 14/44] added multirange uniform distr --- com.unity.ml-agents/Runtime/Sampler.cs | 61 ++++++++++++++++++- .../EnvironmentParametersChannel.cs | 2 +- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index d7e2b490e4..98ebe38554 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -1,4 +1,5 @@ using System; +using System.Linq; using System.Collections.Generic; using Unity.MLAgents; using Unity.MLAgents.Inference.Utils; @@ -19,7 +20,13 @@ internal enum SamplerType /// /// Samples a reset parameter from a Gaussian distribution. /// - Gaussian = 1 + Gaussian = 1, + + /// + /// Samples a reset parameter from a Gaussian distribution. + /// + MultiRangeUniform = 2 + } /// @@ -48,6 +55,10 @@ public Func CreateSampler(IList encoding, int seed) { return CreateGaussianSampler(encoding[1], encoding[2], seed); } + else if ((int)encoding[0] == (int)SamplerType.MultiRangeUniform) + { + return CreateMultiRangeUniformSampler(encoding, seed); + } else{ Debug.LogWarning("EnvironmentParametersChannel received an unknown data type."); return () => 0; @@ -55,16 +66,60 @@ public Func CreateSampler(IList encoding, int seed) } - public Func CreateUniformSampler(float min, float max, int seed) + internal Func CreateUniformSampler(float min, float max, int seed) { System.Random distr = new System.Random(seed); return () => min + (float)distr.NextDouble() * (max - min); } - public Func CreateGaussianSampler(float mean, float stddev, int seed) + internal Func CreateGaussianSampler(float mean, float stddev, int seed) { RandomNormal distr = new RandomNormal(seed, mean, stddev); return () => (float)distr.NextDouble(); } + + internal Func CreateMultiRangeUniformSampler(IList encoding, int seed) + { + //RNG + System.Random distr = new System.Random(seed); + // Skip type of distribution since already checked to get into this function + var sampler_encoding = encoding.Skip(1); + // Will be used to normalize intervals + float sum_interval_sizes = 0; + //The number of intervals + int num_intervals = (int)(sampler_encoding.Count()/2); + // List that will store interval lengths + float[] interval_sizes = new float[num_intervals]; + // List that will store uniform distributions + IList> intervals = new Func[num_intervals]; + // Collect all intervals and store as uniform distrus + // Collect all interval sizes + for(int i = 0; i < num_intervals; i++) + { + var min = sampler_encoding.ElementAt(2 * i); + var max = sampler_encoding.ElementAt(2 * i + 1); + var interval_size = max - min; + sum_interval_sizes += interval_size; + interval_sizes[i] = interval_size; + intervals[i] = () => min + (float)distr.NextDouble() * interval_size; + } + // Normalize interval lengths + for(int i = 0; i < num_intervals; i++) + { + interval_sizes[i] = interval_sizes[i] / sum_interval_sizes; + } + // Build cmf for intervals + for(int i = 1; i < num_intervals; i++) + { + interval_sizes[i] += interval_sizes[i - 1]; + } + Multinomial intervalDistr = new Multinomial(seed); + float MultiRange() + { + int sampledInterval = intervalDistr.Sample(interval_sizes); + return intervals[sampledInterval].Invoke(); + } + return MultiRange; + } } } diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 90acd93e61..8f39104088 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -74,7 +74,7 @@ public float GetWithDefault(string key, float defaultValue) { Func valueOut; bool hasKey = m_Parameters.TryGetValue(key, out valueOut); - return hasKey ? valueOut() : defaultValue; + return hasKey ? valueOut.Invoke() : defaultValue; } /// From 528667593f7889c915908c53fe8f18b531173aae Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sun, 7 Jun 2020 10:42:56 -0700 Subject: [PATCH 15/44] fix variable name case --- com.unity.ml-agents/Runtime/Sampler.cs | 34 +++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index 98ebe38554..bccc19b746 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -83,40 +83,40 @@ internal Func CreateMultiRangeUniformSampler(IList encoding, int s //RNG System.Random distr = new System.Random(seed); // Skip type of distribution since already checked to get into this function - var sampler_encoding = encoding.Skip(1); + var samplerEncoding = encoding.Skip(1); // Will be used to normalize intervals - float sum_interval_sizes = 0; + float sumIntervalSizes = 0; //The number of intervals - int num_intervals = (int)(sampler_encoding.Count()/2); + int numIntervals = (int)(samplerEncoding.Count()/2); // List that will store interval lengths - float[] interval_sizes = new float[num_intervals]; + float[] intervalSizes = new float[numIntervals]; // List that will store uniform distributions - IList> intervals = new Func[num_intervals]; + IList> intervals = new Func[numIntervals]; // Collect all intervals and store as uniform distrus // Collect all interval sizes - for(int i = 0; i < num_intervals; i++) + for(int i = 0; i < numIntervals; i++) { - var min = sampler_encoding.ElementAt(2 * i); - var max = sampler_encoding.ElementAt(2 * i + 1); - var interval_size = max - min; - sum_interval_sizes += interval_size; - interval_sizes[i] = interval_size; - intervals[i] = () => min + (float)distr.NextDouble() * interval_size; + var min = samplerEncoding.ElementAt(2 * i); + var max = samplerEncoding.ElementAt(2 * i + 1); + var intervalSize = max - min; + sumIntervalSizes += intervalSize; + intervalSizes[i] = intervalSize; + intervals[i] = () => min + (float)distr.NextDouble() * intervalSize; } // Normalize interval lengths - for(int i = 0; i < num_intervals; i++) + for(int i = 0; i < numIntervals; i++) { - interval_sizes[i] = interval_sizes[i] / sum_interval_sizes; + intervalSizes[i] = intervalSizes[i] / sumIntervalSizes; } // Build cmf for intervals - for(int i = 1; i < num_intervals; i++) + for(int i = 1; i < numIntervals; i++) { - interval_sizes[i] += interval_sizes[i - 1]; + intervalSizes[i] += intervalSizes[i - 1]; } Multinomial intervalDistr = new Multinomial(seed); float MultiRange() { - int sampledInterval = intervalDistr.Sample(interval_sizes); + int sampledInterval = intervalDistr.Sample(intervalSizes); return intervals[sampledInterval].Invoke(); } return MultiRange; From 9dbcc4bd9a4b2c796cb98152a6b9902f75af5943 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sun, 7 Jun 2020 12:59:11 -0700 Subject: [PATCH 16/44] from set_sampler_params => set_{samplertype}_params --- com.unity.ml-agents/Runtime/Sampler.cs | 68 +++---------------- .../EnvironmentParametersChannel.cs | 48 ++++++++++++- .../environment_parameters_channel.py | 57 ++++++++++++++-- ml-agents/mlagents/trainers/settings.py | 34 +--------- .../trainers/subprocess_env_manager.py | 18 ++++- 5 files changed, 123 insertions(+), 102 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index bccc19b746..eda67480d9 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -7,27 +7,6 @@ namespace Unity.MLAgents { - /// - /// The types of distributions from which to sample reset parameters. - /// - internal enum SamplerType - { - /// - /// Samples a reset parameter from a uniform distribution. - /// - Uniform = 0, - - /// - /// Samples a reset parameter from a Gaussian distribution. - /// - Gaussian = 1, - - /// - /// Samples a reset parameter from a Gaussian distribution. - /// - MultiRangeUniform = 2 - - } /// /// Takes a list of floats that encode a sampling distribution and returns the sampling function. @@ -41,67 +20,40 @@ internal SamplerFactory() { } - /// - /// Create the sampling distribution described by the encoding. - /// - /// List of floats the describe sampling destribution. - public Func CreateSampler(IList encoding, int seed) - { - if ((int)encoding[0] == (int)SamplerType.Uniform) - { - return CreateUniformSampler(encoding[1], encoding[2], seed); - } - else if ((int)encoding[0] == (int)SamplerType.Gaussian) - { - return CreateGaussianSampler(encoding[1], encoding[2], seed); - } - else if ((int)encoding[0] == (int)SamplerType.MultiRangeUniform) - { - return CreateMultiRangeUniformSampler(encoding, seed); - } - else{ - Debug.LogWarning("EnvironmentParametersChannel received an unknown data type."); - return () => 0; - } - - } - - internal Func CreateUniformSampler(float min, float max, int seed) + public Func CreateUniformSampler(float min, float max, int seed) { System.Random distr = new System.Random(seed); return () => min + (float)distr.NextDouble() * (max - min); } - internal Func CreateGaussianSampler(float mean, float stddev, int seed) + public Func CreateGaussianSampler(float mean, float stddev, int seed) { RandomNormal distr = new RandomNormal(seed, mean, stddev); return () => (float)distr.NextDouble(); } - internal Func CreateMultiRangeUniformSampler(IList encoding, int seed) + public Func CreateMultiRangeUniformSampler(IList intervals, int seed) { //RNG System.Random distr = new System.Random(seed); - // Skip type of distribution since already checked to get into this function - var samplerEncoding = encoding.Skip(1); - // Will be used to normalize intervals + // Will be used to normalize intervalFuncs float sumIntervalSizes = 0; //The number of intervals - int numIntervals = (int)(samplerEncoding.Count()/2); + int numIntervals = (int)(intervals.Count()/2); // List that will store interval lengths float[] intervalSizes = new float[numIntervals]; // List that will store uniform distributions - IList> intervals = new Func[numIntervals]; + IList> intervalFuncs = new Func[numIntervals]; // Collect all intervals and store as uniform distrus // Collect all interval sizes for(int i = 0; i < numIntervals; i++) { - var min = samplerEncoding.ElementAt(2 * i); - var max = samplerEncoding.ElementAt(2 * i + 1); + var min = intervals.ElementAt(2 * i); + var max = intervals.ElementAt(2 * i + 1); var intervalSize = max - min; sumIntervalSizes += intervalSize; intervalSizes[i] = intervalSize; - intervals[i] = () => min + (float)distr.NextDouble() * intervalSize; + intervalFuncs[i] = () => min + (float)distr.NextDouble() * intervalSize; } // Normalize interval lengths for(int i = 0; i < numIntervals; i++) @@ -117,7 +69,7 @@ internal Func CreateMultiRangeUniformSampler(IList encoding, int s float MultiRange() { int sampledInterval = intervalDistr.Sample(intervalSizes); - return intervals[sampledInterval].Invoke(); + return intervalFuncs[sampledInterval].Invoke(); } return MultiRange; } diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 8f39104088..1c1b748ecc 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -13,6 +13,28 @@ internal enum EnvironmentDataTypes Sampler = 1 } + /// + /// The types of distributions from which to sample reset parameters. + /// + internal enum SamplerType + { + /// + /// Samples a reset parameter from a uniform distribution. + /// + Uniform = 0, + + /// + /// Samples a reset parameter from a Gaussian distribution. + /// + Gaussian = 1, + + /// + /// Samples a reset parameter from a Gaussian distribution. + /// + MultiRangeUniform = 2 + + } + /// /// A side channel that manages the environment parameter values from Python. Currently /// limited to parameters of type float. @@ -54,8 +76,30 @@ protected override void OnMessageReceived(IncomingMessage msg) else if ((int)EnvironmentDataTypes.Sampler == type) { int seed = msg.ReadInt32(); - var encoding = msg.ReadFloatList(); - m_Parameters[key] = m_SamplerFactory.CreateSampler(encoding, seed); + int samplerType = msg.ReadInt32(); + Func sampler = () => 0.0f; + if ((int)SamplerType.Uniform == samplerType) + { + float min = msg.ReadFloat32(); + float max = msg.ReadFloat32(); + sampler = m_SamplerFactory.CreateUniformSampler(min, max, seed); + } + else if ((int)SamplerType.Gaussian == samplerType) + { + float mean = msg.ReadFloat32(); + float stddev = msg.ReadFloat32(); + + sampler = m_SamplerFactory.CreateGaussianSampler(mean, stddev, seed); + } + else if ((int)SamplerType.MultiRangeUniform == samplerType) + { + IList intervals = msg.ReadFloatList(); + sampler = m_SamplerFactory.CreateMultiRangeUniformSampler(intervals, seed); + } + else{ + Debug.LogWarning("EnvironmentParametersChannel received an unknown data type."); + } + m_Parameters[key] = sampler; } else { diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 936508d5e9..9a20996ce6 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -16,6 +16,11 @@ class EnvironmentDataTypes(IntEnum): FLOAT = 0 SAMPLER = 1 + class SamplerTypes(IntEnum): + UNIFORM = 0 + GAUSSIAN = 1 + MULTIRANGEUNIFORM = 2 + def __init__(self) -> None: channel_id = uuid.UUID(("534c891e-810f-11ea-a9d0-822485860400")) super().__init__(channel_id) @@ -38,21 +43,59 @@ def set_float_parameter(self, key: str, value: float) -> None: msg.write_float32(value) super().queue_message_to_send(msg) - def set_sampler_parameters( - self, key: str, encoding: List[float], seed: int + def set_uniform_sampler_parameters( + self, key: str, min_value: float, max_value: float, seed: int + ) -> None: + """ + Sets a uniform environment parameter sampler. + :param key: The string identifier of the parameter. + :param min_value: The minimum of the sampling distribution. + :param max_value: The maximum of the sampling distribution. + :param seed: The random seed to initialize the sampler. + """ + msg = OutgoingMessage() + msg.write_string(key) + msg.write_int32(self.EnvironmentDataTypes.SAMPLER) + msg.write_int32(seed) + msg.write_int32(self.SamplerTypes.UNIFORM) + msg.write_float32(min_value) + msg.write_float32(max_value) + super().queue_message_to_send(msg) + + def set_gaussian_sampler_parameters( + self, key: str, mean: float, st_dev: float, seed: int + ) -> None: + """ + Sets a gaussian environment parameter sampler. + :param key: The string identifier of the parameter. + :param mean: The mean of the sampling distribution. + :param st_dev: The standard deviation of the sampling distribution. + :param seed: The random seed to initialize the sampler. + """ + msg = OutgoingMessage() + msg.write_string(key) + msg.write_int32(self.EnvironmentDataTypes.SAMPLER) + msg.write_int32(seed) + msg.write_int32(self.SamplerTypes.GAUSSIAN) + msg.write_float32(mean) + msg.write_float32(st_dev) + super().queue_message_to_send(msg) + + def set_multirangeuniform_sampler_parameters( + self, key: str, intervals: List[float], seed: int ) -> None: """ - Sets a float encoding of an environment parameter sampler. + Sets a gaussian environment parameter sampler. :param key: The string identifier of the parameter. - :param encoding: The float encoding of the sampler. + :param intervals: The min and max that define each uniform distribution. :param seed: The random seed to initialize the sampler. """ msg = OutgoingMessage() msg.write_string(key) msg.write_int32(self.EnvironmentDataTypes.SAMPLER) msg.write_int32(seed) - # for read float list in C# - msg.write_int32(len(encoding)) - for value in encoding: + msg.write_int32(self.SamplerTypes.MULTIRANGEUNIFORM) + msg.write_int32(len(intervals)) + for value in intervals: msg.write_float32(value) super().queue_message_to_send(msg) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 993f46d1be..d99bde400b 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -4,7 +4,6 @@ from enum import Enum import collections import argparse -import abc from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser from mlagents.trainers.cli_utils import load_config @@ -169,18 +168,9 @@ def to_settings(self) -> type: } return _mapping[self] - @staticmethod - def to_float(t: type) -> float: - _mapping: Dict[type, float] = { - UniformSettings: 0.0, - GaussianSettings: 1.0, - MultiRangeUniformSettings: 2.0, - } - return _mapping[t] - @attr.s(auto_attribs=True) -class ParameterRandomizationSettings(abc.ABC): +class ParameterRandomizationSettings: seed: int = parser.get_default("seed") @staticmethod @@ -211,11 +201,6 @@ def structure(d: Mapping, t: type) -> Any: d_final[param] = strict_to_cls(val, t) return d_final - @abc.abstractmethod - def to_float_encoding(self) -> List[float]: - "Returns the float encoding of the sampler" - pass - @attr.s(auto_attribs=True) class UniformSettings(ParameterRandomizationSettings): @@ -233,24 +218,12 @@ def _check_min_value(self, attribute, value): "Minimum value is greater than maximum value in uniform sampler." ) - def to_float_encoding(self) -> List[float]: - "Returns the sampler type followed by the min and max values" - return [ - ParameterRandomizationType.to_float(type(self)), - self.min_value, - self.max_value, - ] - @attr.s(auto_attribs=True) class GaussianSettings(ParameterRandomizationSettings): mean: float = 1.0 st_dev: float = 1.0 - def to_float_encoding(self) -> List[float]: - "Returns the sampler type followed by the mean and standard deviation" - return [ParameterRandomizationType.to_float(type(self)), self.mean, self.st_dev] - @attr.s(auto_attribs=True) class MultiRangeUniformSettings(ParameterRandomizationSettings): @@ -275,10 +248,7 @@ def _check_intervals(self, attribute, value): def to_float_encoding(self) -> List[float]: "Returns the sampler type followed by a flattened list of the interval values" - floats: List[float] = [] - for interval in self.intervals: - floats += interval - return [ParameterRandomizationType.to_float(type(self))] + floats + return [value for interval in self.intervals for value in interval] @attr.s(auto_attribs=True) diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index ae16ffb1bb..ff3d3df016 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -23,7 +23,11 @@ get_timer_root, ) from mlagents.trainers.brain import BrainParameters -from mlagents.trainers.settings import ParameterRandomizationSettings +from mlagents.trainers.settings import ( + UniformSettings, + GaussianSettings, + MultiRangeUniformSettings, +) from mlagents.trainers.action_info import ActionInfo from mlagents_envs.side_channel.environment_parameters_channel import ( EnvironmentParametersChannel, @@ -178,8 +182,16 @@ def external_brains(): for k, v in req.payload.items(): if isinstance(v, float): env_parameters.set_float_parameter(k, v) - elif isinstance(v, ParameterRandomizationSettings): - env_parameters.set_sampler_parameters( + elif isinstance(v, UniformSettings): + env_parameters.set_uniform_sampler_parameters( + k, v.min_value, v.max_value, v.seed + ) + elif isinstance(v, GaussianSettings): + env_parameters.set_gaussian_sampler_parameters( + k, v.mean, v.st_dev, v.seed + ) + elif isinstance(v, MultiRangeUniformSettings): + env_parameters.set_multirangeuniform_sampler_parameters( k, v.to_float_encoding(), v.seed ) env.reset() From d3e0d9c8926e7cf92159c51fe0983a9328350955 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sun, 7 Jun 2020 13:08:12 -0700 Subject: [PATCH 17/44] cleaned up sampler --- com.unity.ml-agents/Runtime/Sampler.cs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index eda67480d9..f02c0e65d4 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -1,9 +1,6 @@ using System; -using System.Linq; using System.Collections.Generic; -using Unity.MLAgents; using Unity.MLAgents.Inference.Utils; -using UnityEngine; namespace Unity.MLAgents { @@ -22,7 +19,7 @@ internal SamplerFactory() public Func CreateUniformSampler(float min, float max, int seed) { - System.Random distr = new System.Random(seed); + Random distr = new Random(seed); return () => min + (float)distr.NextDouble() * (max - min); } @@ -35,11 +32,11 @@ public Func CreateGaussianSampler(float mean, float stddev, int seed) public Func CreateMultiRangeUniformSampler(IList intervals, int seed) { //RNG - System.Random distr = new System.Random(seed); + Random distr = new Random(seed); // Will be used to normalize intervalFuncs float sumIntervalSizes = 0; //The number of intervals - int numIntervals = (int)(intervals.Count()/2); + int numIntervals = (int)(intervals.Count/2); // List that will store interval lengths float[] intervalSizes = new float[numIntervals]; // List that will store uniform distributions @@ -48,8 +45,8 @@ public Func CreateMultiRangeUniformSampler(IList intervals, int se // Collect all interval sizes for(int i = 0; i < numIntervals; i++) { - var min = intervals.ElementAt(2 * i); - var max = intervals.ElementAt(2 * i + 1); + var min = intervals[2 * i]; + var max = intervals[2 * i + 1]; var intervalSize = max - min; sumIntervalSizes += intervalSize; intervalSizes[i] = intervalSize; From 4c111e46581387adeb4e5e63f4766a3aecc5fad4 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sun, 7 Jun 2020 14:22:42 -0700 Subject: [PATCH 18/44] fix tests --- .../mlagents/trainers/tests/test_learn.py | 18 ++++++------------ .../mlagents/trainers/tests/test_simple_rl.py | 3 --- .../trainers/tests/test_trainer_controller.py | 5 ----- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index daacc1c4d3..31eed0e227 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -7,6 +7,7 @@ from mlagents.trainers.cli_utils import DetectDefault from mlagents_envs.exception import UnityEnvironmentException from mlagents.trainers.stats import StatsReporter +from mlagents.trainers.settings import UniformSettings def basic_options(extra_args=None): @@ -37,7 +38,9 @@ def basic_options(extra_args=None): MOCK_SAMPLER_CURRICULUM_YAML = """ parameter_randomization: - sampler1: foo + sampler1: + uniform: + min_value: 0.2 curriculum: behavior1: @@ -53,7 +56,6 @@ def basic_options(extra_args=None): @patch("mlagents.trainers.learn.write_run_options") @patch("mlagents.trainers.learn.handle_existing_directories") @patch("mlagents.trainers.learn.TrainerFactory") -@patch("mlagents.trainers.learn.SamplerManager") @patch("mlagents.trainers.learn.SubprocessEnvManager") @patch("mlagents.trainers.learn.create_environment_factory") @patch("mlagents.trainers.settings.load_config") @@ -61,7 +63,6 @@ def test_run_training( load_config, create_environment_factory, subproc_env_mock, - sampler_manager_mock, trainer_factory_mock, handle_dir_mock, write_run_options_mock, @@ -79,14 +80,7 @@ def test_run_training( options = basic_options() learn.run_training(0, options) mock_init.assert_called_once_with( - trainer_factory_mock.return_value, - "results/ppo", - "ppo", - None, - True, - 0, - sampler_manager_mock.return_value, - None, + trainer_factory_mock.return_value, "results/ppo", "ppo", None, True, 0 ) handle_dir_mock.assert_called_once_with("results/ppo", False, False, None) write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs") @@ -202,7 +196,7 @@ def test_yaml_args(mock_file): @patch("builtins.open", new_callable=mock_open, read_data=MOCK_SAMPLER_CURRICULUM_YAML) def test_sampler_configs(mock_file): opt = parse_command_line(["mytrainerpath"]) - assert opt.parameter_randomization == {"sampler1": "foo"} + assert isinstance(opt.parameter_randomization["sampler1"], UniformSettings) assert len(opt.curriculum.keys()) == 2 diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index 907e27a189..e68adde70f 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -13,7 +13,6 @@ from mlagents.trainers.trainer_controller import TrainerController from mlagents.trainers.trainer_util import TrainerFactory from mlagents.trainers.simple_env_manager import SimpleEnvManager -from mlagents.trainers.sampler_class import SamplerManager from mlagents.trainers.demo_loader import write_demo from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary from mlagents.trainers.settings import ( @@ -139,8 +138,6 @@ def _check_environment_trains( meta_curriculum=meta_curriculum, train=True, training_seed=seed, - sampler_manager=SamplerManager(None), - resampling_interval=None, ) # Begin training diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index cf2f872531..8a0280dc4b 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -4,7 +4,6 @@ from mlagents.tf_utils import tf from mlagents.trainers.trainer_controller import TrainerController from mlagents.trainers.ghost.controller import GhostController -from mlagents.trainers.sampler_class import SamplerManager @pytest.fixture @@ -18,8 +17,6 @@ def basic_trainer_controller(): meta_curriculum=None, train=True, training_seed=99, - sampler_manager=SamplerManager({}), - resampling_interval=None, ) @@ -36,8 +33,6 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): meta_curriculum=None, train=True, training_seed=seed, - sampler_manager=SamplerManager({}), - resampling_interval=None, ) numpy_random_seed.assert_called_with(seed) tensorflow_set_seed.assert_called_with(seed) From 38f48f1573b85d2a02a47bf2ed4df7d037d19854 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sun, 7 Jun 2020 15:04:42 -0700 Subject: [PATCH 19/44] dummy doc update to trigger CI --- docs/Training-ML-Agents.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 2f93938466..3bb6b028b0 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -435,12 +435,11 @@ behaviors: # < Same as above> parameter_randomization: - resampling-interval: 5000 mass: - sampler-type: "uniform" - min_value: 0.5 - max_value: 10 + uniform: + min_value: 0.5 + max_value: 10 gravity: sampler-type: "multirange_uniform" From 988452adc57ff4b239f591e2a1538e2485011ac0 Mon Sep 17 00:00:00 2001 From: Hans Duedal Date: Mon, 8 Jun 2020 20:17:29 +0200 Subject: [PATCH 20/44] Test Circle-CI From cd06ce78f2f11472d269cf660bb7e0f67fad1364 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Mon, 8 Jun 2020 12:00:41 -0700 Subject: [PATCH 21/44] clean up --- ml-agents/mlagents/trainers/learn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 56cb7313ce..8ff4260180 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -203,7 +203,7 @@ def maybe_add_samplers( """ if sampler_config is not None: # If the seed is not specified in yaml, this will grab the run seed - for _, v in sampler_config.items(): + for v in sampler_config.values(): if v.seed == -1: v.seed = run_seed env.reset(config=sampler_config) From 1b9f2d5e0a075f5beb933093d8a65b69674c02ce Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Mon, 8 Jun 2020 12:20:47 -0700 Subject: [PATCH 22/44] remove square brackets --- ml-agents/mlagents/trainers/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index d99bde400b..01ff66da2f 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -240,7 +240,7 @@ def _check_intervals(self, attribute, value): raise TrainerConfigError( f"The sampling interval {interval} must contain exactly two values." ) - [min_value, max_value] = interval + min_value, max_value = interval if min_value > max_value: raise TrainerConfigError( f"Minimum value is greater than maximum value in interval {interval}." From aff9c004987eb3f6ae37e5e5df691343258ce73d Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Mon, 8 Jun 2020 15:46:29 -0700 Subject: [PATCH 23/44] fix side channel helper functions/add offset to seed --- .../side_channel/environment_parameters_channel.py | 4 +--- ml-agents/mlagents/trainers/learn.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 9a20996ce6..dd666ec089 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -95,7 +95,5 @@ def set_multirangeuniform_sampler_parameters( msg.write_int32(self.EnvironmentDataTypes.SAMPLER) msg.write_int32(seed) msg.write_int32(self.SamplerTypes.MULTIRANGEUNIFORM) - msg.write_int32(len(intervals)) - for value in intervals: - msg.write_float32(value) + msg.write_float32_list(intervals) super().queue_message_to_send(msg) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 8ff4260180..96ea552dd4 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -203,9 +203,9 @@ def maybe_add_samplers( """ if sampler_config is not None: # If the seed is not specified in yaml, this will grab the run seed - for v in sampler_config.values(): + for offset, v in enumerate(sampler_config.values()): if v.seed == -1: - v.seed = run_seed + v.seed = run_seed + offset env.reset(config=sampler_config) From 63a24cc220cffee9c7a4a40f9d2040c51a76f8b8 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Tue, 9 Jun 2020 16:00:29 -0700 Subject: [PATCH 24/44] restructure yaml config --- config/ppo/3DBall_randomize.yaml | 6 +++-- .../environment_parameters_channel.py | 2 +- ml-agents/mlagents/trainers/settings.py | 23 ++++++++++++------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index f361c27c25..6a85a8d7b0 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -28,10 +28,12 @@ behaviors: parameter_randomization: mass: - uniform: + sampler_type: uniform + sampler_parameters: min_value: 0.5 max_value: 10 scale: - uniform: + sampler_type: uniform + sampler_parameters: min_value: 0.75 max_value: 3 diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index dd666ec089..af905ee6d4 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -85,7 +85,7 @@ def set_multirangeuniform_sampler_parameters( self, key: str, intervals: List[float], seed: int ) -> None: """ - Sets a gaussian environment parameter sampler. + Sets a multirangeuniform environment parameter sampler. :param key: The string identifier of the parameter. :param intervals: The min and max that define each uniform distribution. :param seed: The random seed to initialize the sampler. diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 01ff66da2f..381ff2ce79 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -185,20 +185,27 @@ def structure(d: Mapping, t: type) -> Any: f"Unsupported parameter randomization configuration {d}." ) d_final: Dict[str, List[float]] = {} - for param, param_config in d.items(): - if param == "resampling-interval": + for environment_parameter, environment_parameter_config in d.items(): + if environment_parameter == "resampling-interval": logger.warning( "The resampling-interval is no longer necessary for parameter randomization. It is being ignored." ) continue - if not isinstance(param_config, Mapping): + if "sampler_type" not in environment_parameter_config: raise TrainerConfigError( - f"Unsupported distribution configuration {param_config}." + f"Sampler configuration for {environment_parameter} does not contain sampler_type." ) - for key, val in param_config.items(): - enum_key = ParameterRandomizationType(key) - t = enum_key.to_settings() - d_final[param] = strict_to_cls(val, t) + if "sampler_parameters" not in environment_parameter_config: + raise TrainerConfigError( + f"Sampler configuration for {environment_parameter} does not contain sampler_parameters." + ) + enum_key = ParameterRandomizationType( + environment_parameter_config["sampler_type"] + ) + t = enum_key.to_settings() + d_final[environment_parameter] = strict_to_cls( + environment_parameter_config["sampler_parameters"], t + ) return d_final From da3cb2d6036f9d583665a20e6db51f5c9a388b80 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Tue, 9 Jun 2020 16:11:56 -0700 Subject: [PATCH 25/44] fix tests and markdown --- docs/Training-ML-Agents.md | 2 +- .../mlagents/trainers/tests/test_learn.py | 3 +- .../mlagents/trainers/tests/test_settings.py | 37 +++++++++++++++---- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 3bb6b028b0..7a16f36092 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -486,7 +486,7 @@ Below is a list of included `sampler-type` as part of the toolkit. - **sub-arguments** - `intervals` The implementation of the samplers can be found in the -[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py). +[Samplers.cs file](../com.unity.ml-agents/Runtime/Sampler.cs). #### Defining a New Sampler Type diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 31eed0e227..6e9ae0ec85 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -39,7 +39,8 @@ def basic_options(extra_args=None): MOCK_SAMPLER_CURRICULUM_YAML = """ parameter_randomization: sampler1: - uniform: + sampler_type: uniform + sampler_parameters: min_value: 0.2 curriculum: diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 5d19a019de..14928599fa 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -160,9 +160,18 @@ def test_parameter_randomization_structure(): Tests the ParameterRandomizationSettings structure method and all validators. """ parameter_randomization_dict = { - "mass": {"uniform": {"min_value": 1.0, "max_value": 2.0}}, - "scale": {"gaussian": {"mean": 1.0, "st_dev": 2.0}}, - "length": {"multirangeuniform": {"intervals": [[1.0, 2.0], [3.0, 4.0]]}}, + "mass": { + "sampler_type": "uniform", + "sampler_parameters": {"min_value": 1.0, "max_value": 2.0}, + }, + "scale": { + "sampler_type": "gaussian", + "sampler_parameters": {"mean": 1.0, "st_dev": 2.0}, + }, + "length": { + "sampler_type": "multirangeuniform", + "sampler_parameters": {"intervals": [[1.0, 2.0], [3.0, 4.0]]}, + }, } parameter_randomization_distributions = ParameterRandomizationSettings.structure( parameter_randomization_dict, Dict[str, ParameterRandomizationSettings] @@ -174,7 +183,12 @@ def test_parameter_randomization_structure(): ) # Check invalid distribution type - invalid_distribution_dict = {"mass": {"beta": {"alpha": 1.0, "beta": 2.0}}} + invalid_distribution_dict = { + "mass": { + "sampler_type": "beta", + "sampler_parameters": {"alpha": 1.0, "beta": 2.0}, + } + } with pytest.raises(ValueError): ParameterRandomizationSettings.structure( invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] @@ -182,7 +196,10 @@ def test_parameter_randomization_structure(): # Check min less than max in uniform invalid_distribution_dict = { - "mass": {"uniform": {"min_value": 2.0, "max_value": 1.0}} + "mass": { + "sampler_type": "uniform", + "sampler_parameters": {"min_value": 2.0, "max_value": 1.0}, + } } with pytest.raises(TrainerConfigError): ParameterRandomizationSettings.structure( @@ -191,7 +208,10 @@ def test_parameter_randomization_structure(): # Check min less than max in multirange invalid_distribution_dict = { - "mass": {"multirangeuniform": {"intervals": [[2.0, 1.0]]}} + "mass": { + "sampler_type": "multirangeuniform", + "sampler_parameters": {"intervals": [[2.0, 1.0]]}, + } } with pytest.raises(TrainerConfigError): ParameterRandomizationSettings.structure( @@ -200,7 +220,10 @@ def test_parameter_randomization_structure(): # Check multirange has valid intervals invalid_distribution_dict = { - "mass": {"multirangeuniform": {"intervals": [[1.0, 2.0], [3.0]]}} + "mass": { + "sampler_type": "multirangeuniform", + "sampler_parameters": {"intervals": [[1.0, 2.0], [3.0]]}, + } } with pytest.raises(TrainerConfigError): ParameterRandomizationSettings.structure( From fae0ca381e1a8c4a373b28e2d8ceaff9fb567414 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Tue, 9 Jun 2020 16:37:48 -0700 Subject: [PATCH 26/44] some doc updates --- docs/Training-ML-Agents.md | 55 +++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 7a16f36092..82b02b08c2 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -437,53 +437,52 @@ behaviors: parameter_randomization: mass: - uniform: + sampler_type: uniform + sampler_parameters: min_value: 0.5 max_value: 10 - gravity: - sampler-type: "multirange_uniform" - intervals: [[7, 10], [15, 20]] + length: + sampler_type: multirange_uniform + sampler_parameters: + intervals: [[7, 10], [15, 20]] scale: - sampler-type: "uniform" - min_value: 0.75 - max_value: 3 + sampler_type: gaussian + sampler_parameters: + mean: 2 + st_dev: .3 ``` -Note that `mass`, `gravity` and `scale` are the names of the environment +Note that `mass`, `length` and `scale` are the names of the environment parameters that will be sampled. If a parameter specified in the file doesn't exist in the environment, then this parameter will be ignored. | **Setting** | **Description** | | :--------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `resampling-interval` | Number of steps for the agent to train under a particular environment configuration before resetting the environment with a new sample of `Environment Parameters`. | -| `sampler-type` | Type of sampler use for this `Environment Parameter`. This is a string that should exist in the `Sampler Factory` (explained below). | -| `sampler-type-sub-arguments` | Specify the sub-arguments depending on the `sampler-type`. In the example above, this would correspond to the `intervals` under the `sampler-type` `multirange_uniform` for the `Environment Parameter` called `gravity`. The key name should match the name of the corresponding argument in the sampler definition (explained) below) | +| `sampler_type` | A string identifier for the type of sampler to use for this `Environment Parameter`. | +| `sampler_parameters` | The parameters for a given `sampler_type`. Samplers of different types can have different `sampler_parameters` | -#### Included Sampler Types +#### Supported Sampler Types -Below is a list of included `sampler-type` as part of the toolkit. +Below is a list of the `sampler_type`s supported by the toolkit. - `uniform` - Uniform sampler - - Uniformly samples a single float value between defined endpoints. The - sub-arguments for this sampler to specify the interval endpoints are as - below. The sampling is done in the range of [`min_value`, `max_value`). - - **sub-arguments** - `min_value`, `max_value` + - Uniformly samples a single float value from a range with a given minimum + and maximum value (inclusive). + - **parameters** - `min_value`, `max_value` - `gaussian` - Gaussian sampler - - Samples a single float value from the distribution characterized by the mean - and standard deviation. The sub-arguments to specify the Gaussian - distribution to use are as below. - - **sub-arguments** - `mean`, `st_dev` + - Samples a single float value from a normal distribution with a given mean + and standard deviation. + - **parameters** - `mean`, `st_dev` - `multirange_uniform` - Multirange uniform sampler - - Uniformly samples a single float value between the specified intervals. - Samples by first performing a weight pick of an interval from the list of - intervals (weighted based on interval width) and samples uniformly from the - selected interval (half-closed interval, same as the uniform sampler). This - sampler can take an arbitrary number of intervals in a list in the following - format: [[`interval_1_min`, `interval_1_max`], [`interval_2_min`, + - First, samples an interval from a set of intervals in proportion to relative + length of the intervals. Then, uniformly samples a single float value from the + sampled interval (inclusive). This sampler can take an arbitrary number of + intervals in a list in the following format: + [[`interval_1_min`, `interval_1_max`], [`interval_2_min`, `interval_2_max`], ...] - - **sub-arguments** - `intervals` + - **parameters** - `intervals` The implementation of the samplers can be found in the [Samplers.cs file](../com.unity.ml-agents/Runtime/Sampler.cs). From 681c7ea2a4e0dca2da98f9cff0e1d471020e6ca8 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 10 Jun 2020 14:33:08 -0700 Subject: [PATCH 27/44] doc updates --- docs/Training-ML-Agents.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 82b02b08c2..5977b1433d 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -443,7 +443,7 @@ parameter_randomization: max_value: 10 length: - sampler_type: multirange_uniform + sampler_type: multirangeuniform sampler_parameters: intervals: [[7, 10], [15, 20]] @@ -465,7 +465,7 @@ exist in the environment, then this parameter will be ignored. #### Supported Sampler Types -Below is a list of the `sampler_type`s supported by the toolkit. +Below is a list of the `sampler_type` values supported by the toolkit. - `uniform` - Uniform sampler - Uniformly samples a single float value from a range with a given minimum From c40c4d060b98c7904d233b54a5f9fdf3fb4a6088 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 10 Jun 2020 16:55:28 -0700 Subject: [PATCH 28/44] remove "adding your own sampler" section from docs --- docs/Training-ML-Agents.md | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 5977b1433d..a353bdf62e 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -487,44 +487,6 @@ Below is a list of the `sampler_type` values supported by the toolkit. The implementation of the samplers can be found in the [Samplers.cs file](../com.unity.ml-agents/Runtime/Sampler.cs). -#### Defining a New Sampler Type - -If you want to define your own sampler type, you must first inherit the -_Sampler_ base class (included in the `sampler_class` file) and preserve the -interface. Once the class for the required method is specified, it must be -registered in the Sampler Factory. - -This can be done by subscribing to the _register_sampler_ method of the -`SamplerFactory`. The command is as follows: - -`SamplerFactory.register_sampler(*custom_sampler_string_key*, *custom_sampler_object*)` - -Once the Sampler Factory reflects the new register, the new sampler type can be -used for sample any `Environment Parameter`. For example, lets say a new sampler -type was implemented as below and we register the `CustomSampler` class with the -string `custom-sampler` in the Sampler Factory. - -```python -class CustomSampler(Sampler): - - def __init__(self, argA, argB, argC): - self.possible_vals = [argA, argB, argC] - - def sample_all(self): - return np.random.choice(self.possible_vals) -``` - -Now we need to specify the new sampler type in the sampler YAML file. For -example, we use this new sampler type for the `Environment Parameter` _mass_. - -```yaml -mass: - sampler-type: "custom-sampler" - argB: 1 - argA: 2 - argC: 3 -``` - #### Training with Environment Parameter Randomization After the sampler configuration is defined, we proceed by launching `mlagents-learn` From 320233b10e6fb0ef57c512a8588e536bd5e74f8f Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 10 Jun 2020 17:07:57 -0700 Subject: [PATCH 29/44] update changelog --- com.unity.ml-agents/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index db3eb10e5a..bd94746488 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to ### Major Changes #### com.unity.ml-agents (C#) #### ml-agents / ml-agents-envs / gym-unity (Python) +- The Parameter Randomization feature has been refactored to enable sampling of new parameters per episode to improve robustness. The + `resampling-interval` parameter has been removed and the config structure updated. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md). (#4065) ### Minor Changes #### com.unity.ml-agents (C#) From 94e1d2089c694e064bc8f887893f6b7942fd3fd2 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 11 Jun 2020 09:33:33 -0700 Subject: [PATCH 30/44] update upgrade_config.py --- ml-agents/mlagents/trainers/upgrade_config.py | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/upgrade_config.py b/ml-agents/mlagents/trainers/upgrade_config.py index 7425708ddb..d4cbcaf7b9 100644 --- a/ml-agents/mlagents/trainers/upgrade_config.py +++ b/ml-agents/mlagents/trainers/upgrade_config.py @@ -82,6 +82,34 @@ def remove_nones(config: Dict[Any, Any]) -> Dict[str, Any]: return new_config +# Take a sampler from the old format and convert to new sampler structure +def convert_samplers(old_sampler_config: Dict[str, Any]) -> Dict[str, Any]: + new_sampler_config: Dict[str, Any] = {} + for parameter, parameter_config in old_sampler_config.items(): + if parameter == "resampling-interval": + print( + "resampling-interval is no longer necessary for parameter randomization and is being ignored." + ) + continue + new_sampler_config[parameter] = {} + new_sampler_config[parameter]["sampler_type"] = parameter_config["sampler-type"] + if parameter_config["sampler-type"] == "uniform": + new_sampler_config[parameter]["sampler_parameters"] = { + "min_value": parameter_config["min_value"], + "max_value": parameter_config["max_value"], + } + elif parameter_config["sampler-type"] == "gaussian": + new_sampler_config[parameter]["sampler_parameters"] = { + "mean": parameter_config["mean"], + "st_dev": parameter_config["st_dev"], + } + elif parameter_config["sampler-type"] == "multirangeuniform": + new_sampler_config[parameter]["sampler_parameters"] = { + "intervals": parameter_config["intervals"] + } + return new_sampler_config + + def parse_args(): argparser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter @@ -124,7 +152,8 @@ def main() -> None: full_config["curriculum"] = curriculum_config_dict if args.sampler is not None: - sampler_config_dict = load_config(args.sampler) + old_sampler_config_dict = load_config(args.sampler) + sampler_config_dict = convert_samplers(old_sampler_config_dict) full_config["parameter_randomization"] = sampler_config_dict # Convert config to dict From 9d18e7ba27807a7a4120af3fc2c3ee9896e3aaa9 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 11 Jun 2020 17:57:40 -0700 Subject: [PATCH 31/44] sampler C# tests --- .../Tests/Editor/SamplerTests.cs | 107 ++++++++++++++++++ .../Tests/Editor/SamplerTests.cs.meta | 11 ++ 2 files changed, 118 insertions(+) create mode 100644 com.unity.ml-agents/Tests/Editor/SamplerTests.cs create mode 100644 com.unity.ml-agents/Tests/Editor/SamplerTests.cs.meta diff --git a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs new file mode 100644 index 0000000000..d1ddafb331 --- /dev/null +++ b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs @@ -0,0 +1,107 @@ +using System; +using NUnit.Framework; +using System.IO; +using System.Collections.Generic; +using UnityEngine; +using Unity.MLAgents.SideChannels; + +namespace Unity.MLAgents.Tests +{ + public class SamplerTests + { + const int k_Seed = 1337; + const double k_Epsilon = 0.0001; + EnvironmentParametersChannel m_Channel = new EnvironmentParametersChannel(); + + public SamplerTests() + { + SideChannelsManager.RegisterSideChannel(m_Channel); + } + + [Test] + public void UniformSamplerTest() + { + float min_value = 1.0f; + float max_value = 2.0f; + string parameter = "parameter1"; + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.0f); + using (var outgoingMsg = new OutgoingMessage()) + { + outgoingMsg.WriteString(parameter); + // 1 indicates this meessage is a Sampler + outgoingMsg.WriteInt32(1); + outgoingMsg.WriteInt32(k_Seed); + outgoingMsg.WriteInt32((int)SamplerType.Uniform); + outgoingMsg.WriteFloat32(min_value); + outgoingMsg.WriteFloat32(max_value); + byte[] message = GetByteMessage(m_Channel, outgoingMsg); + SideChannelsManager.ProcessSideChannelData(message); + } + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.208888f, k_Epsilon); + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.118017f, k_Epsilon); + } + + [Test] + public void GaussianSamplerTest() + { + float mean = 3.0f; + float stddev = 0.2f; + string parameter = "parameter2"; + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.0f); + using (var outgoingMsg = new OutgoingMessage()) + { + outgoingMsg.WriteString(parameter); + // 1 indicates this meessage is a Sampler + outgoingMsg.WriteInt32(1); + outgoingMsg.WriteInt32(k_Seed); + outgoingMsg.WriteInt32((int)SamplerType.Gaussian); + outgoingMsg.WriteFloat32(mean); + outgoingMsg.WriteFloat32(stddev); + byte[] message = GetByteMessage(m_Channel, outgoingMsg); + SideChannelsManager.ProcessSideChannelData(message); + } + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 2.936162f, k_Epsilon); + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 2.951348f, k_Epsilon); + } + + [Test] + public void MultiRangeUniformSamplerTest() + { + float[] intervals = new float[4]; + intervals[0] = 1.8f; + intervals[1] = 2f; + intervals[2] = 3.2f; + intervals[3] = 4.1f; + string parameter = "parameter3"; + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.0f); + using (var outgoingMsg = new OutgoingMessage()) + { + outgoingMsg.WriteString(parameter); + // 1 indicates this meessage is a Sampler + outgoingMsg.WriteInt32(1); + outgoingMsg.WriteInt32(k_Seed); + outgoingMsg.WriteInt32((int)SamplerType.MultiRangeUniform); + outgoingMsg.WriteFloatList(intervals); + byte[] message = GetByteMessage(m_Channel, outgoingMsg); + SideChannelsManager.ProcessSideChannelData(message); + } + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 3.388, k_Epsilon); + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.823603, k_Epsilon); + } + + internal static byte[] GetByteMessage(SideChannel sideChannel, OutgoingMessage msg) + { + byte[] message = msg.ToByteArray(); + using (var memStream = new MemoryStream()) + { + using (var binaryWriter = new BinaryWriter(memStream)) + { + binaryWriter.Write(sideChannel.ChannelId.ToByteArray()); + binaryWriter.Write(message.Length); + binaryWriter.Write(message); + } + return memStream.ToArray(); + } + } + } +} diff --git a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs.meta b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs.meta new file mode 100644 index 0000000000..ef0d54e72a --- /dev/null +++ b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 7e6609c51018d4132beda8ddedd46d91 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From 38e111520af0a1dcd6502720736fb01e5f44ad99 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 11 Jun 2020 18:21:23 -0700 Subject: [PATCH 32/44] flatten intervals just before sending --- com.unity.ml-agents/Runtime/Sampler.cs | 4 +++- com.unity.ml-agents/Tests/Editor/SamplerTests.cs | 6 +++--- .../side_channel/environment_parameters_channel.py | 5 +++-- ml-agents/mlagents/trainers/settings.py | 4 ---- ml-agents/mlagents/trainers/subprocess_env_manager.py | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index f02c0e65d4..7329027270 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -1,6 +1,8 @@ using System; using System.Collections.Generic; using Unity.MLAgents.Inference.Utils; +using UnityEngine; +using Random=System.Random; namespace Unity.MLAgents { @@ -62,7 +64,7 @@ public Func CreateMultiRangeUniformSampler(IList intervals, int se { intervalSizes[i] += intervalSizes[i - 1]; } - Multinomial intervalDistr = new Multinomial(seed); + Multinomial intervalDistr = new Multinomial(seed + 1); float MultiRange() { int sampledInterval = intervalDistr.Sample(intervalSizes); diff --git a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs index d1ddafb331..14acbdeb40 100644 --- a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs +++ b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs @@ -68,7 +68,7 @@ public void GaussianSamplerTest() public void MultiRangeUniformSamplerTest() { float[] intervals = new float[4]; - intervals[0] = 1.8f; + intervals[0] = 1.2f; intervals[1] = 2f; intervals[2] = 3.2f; intervals[3] = 4.1f; @@ -85,8 +85,8 @@ public void MultiRangeUniformSamplerTest() byte[] message = GetByteMessage(m_Channel, outgoingMsg); SideChannelsManager.ProcessSideChannelData(message); } - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 3.388, k_Epsilon); - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.823603, k_Epsilon); + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 3.387999f, k_Epsilon); + Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.294413f, k_Epsilon); } internal static byte[] GetByteMessage(SideChannel sideChannel, OutgoingMessage msg) diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index af905ee6d4..e21e406e64 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -82,7 +82,7 @@ def set_gaussian_sampler_parameters( super().queue_message_to_send(msg) def set_multirangeuniform_sampler_parameters( - self, key: str, intervals: List[float], seed: int + self, key: str, intervals: List[List[float]], seed: int ) -> None: """ Sets a multirangeuniform environment parameter sampler. @@ -95,5 +95,6 @@ def set_multirangeuniform_sampler_parameters( msg.write_int32(self.EnvironmentDataTypes.SAMPLER) msg.write_int32(seed) msg.write_int32(self.SamplerTypes.MULTIRANGEUNIFORM) - msg.write_float32_list(intervals) + flattened_intervals = [value for interval in intervals for value in interval] + msg.write_float32_list(flattened_intervals) super().queue_message_to_send(msg) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index eb3d0e90a4..1a77e01955 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -253,10 +253,6 @@ def _check_intervals(self, attribute, value): f"Minimum value is greater than maximum value in interval {interval}." ) - def to_float_encoding(self) -> List[float]: - "Returns the sampler type followed by a flattened list of the interval values" - return [value for interval in self.intervals for value in interval] - @attr.s(auto_attribs=True) class SelfPlaySettings: diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index ff3d3df016..26ed3c644f 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -192,7 +192,7 @@ def external_brains(): ) elif isinstance(v, MultiRangeUniformSettings): env_parameters.set_multirangeuniform_sampler_parameters( - k, v.to_float_encoding(), v.seed + k, v.intervals, v.seed ) env.reset() all_step_result = _generate_all_results() From de6ba284b0601125d501b0b8119f3b7171f02fda Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 11 Jun 2020 18:22:58 -0700 Subject: [PATCH 33/44] update comment --- .../side_channel/environment_parameters_channel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index e21e406e64..7dc66d9e65 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -87,7 +87,7 @@ def set_multirangeuniform_sampler_parameters( """ Sets a multirangeuniform environment parameter sampler. :param key: The string identifier of the parameter. - :param intervals: The min and max that define each uniform distribution. + :param intervals: The lists of min and max that define each uniform distribution. :param seed: The random seed to initialize the sampler. """ msg = OutgoingMessage() From bf52d2fa41db2db088e706308f5d3e96fcccae1c Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 11 Jun 2020 18:25:21 -0700 Subject: [PATCH 34/44] update settings default --- ml-agents/mlagents/trainers/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 1a77e01955..0a7d6d308a 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -216,7 +216,7 @@ class UniformSettings(ParameterRandomizationSettings): @min_value.default def _min_value_default(self): - return 1.0 + return 0.0 @min_value.validator def _check_min_value(self, attribute, value): @@ -238,7 +238,7 @@ class MultiRangeUniformSettings(ParameterRandomizationSettings): @intervals.default def _intervals_default(self): - return [[1.0, 1.0]] + return [[0.0, 1.0]] @intervals.validator def _check_intervals(self, attribute, value): From 915d102e2058c41cd5aa94f463aeb8e08f4a5735 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 09:07:08 -0700 Subject: [PATCH 35/44] fix conversion script and test --- .../trainers/tests/test_config_conversion.py | 15 ++++++++++++--- ml-agents/mlagents/trainers/upgrade_config.py | 17 +++-------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_config_conversion.py b/ml-agents/mlagents/trainers/tests/test_config_conversion.py index 00bfc42ac0..49a1489f12 100644 --- a/ml-agents/mlagents/trainers/tests/test_config_conversion.py +++ b/ml-agents/mlagents/trainers/tests/test_config_conversion.py @@ -152,12 +152,20 @@ def test_convert_behaviors(trainer_type, use_recurrent): assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals +@mock.patch("mlagents.trainers.upgrade_config.convert_samplers") @mock.patch("mlagents.trainers.upgrade_config.convert_behaviors") @mock.patch("mlagents.trainers.upgrade_config.remove_nones") @mock.patch("mlagents.trainers.upgrade_config.write_to_yaml_file") @mock.patch("mlagents.trainers.upgrade_config.parse_args") @mock.patch("mlagents.trainers.upgrade_config.load_config") -def test_main(mock_load, mock_parse, yaml_write_mock, remove_none_mock, mock_convert): +def test_main( + mock_load, + mock_parse, + yaml_write_mock, + remove_none_mock, + mock_convert_behaviors, + mock_convert_samplers, +): test_output_file = "test.yaml" mock_load.side_effect = [ yaml.safe_load(PPO_CONFIG), @@ -171,7 +179,8 @@ def test_main(mock_load, mock_parse, yaml_write_mock, remove_none_mock, mock_con sampler="test", ) mock_parse.return_value = mock_args - mock_convert.return_value = "test_converted_config" + mock_convert_behaviors.return_value = "test_converted_config" + mock_convert_samplers.return_value = "test_converted_sampler_config" dict_without_nones = mock.Mock(name="nonones") remove_none_mock.return_value = dict_without_nones @@ -181,7 +190,7 @@ def test_main(mock_load, mock_parse, yaml_write_mock, remove_none_mock, mock_con yaml_write_mock.assert_called_with(dict_without_nones, test_output_file) assert saved_dict["behaviors"] == "test_converted_config" assert saved_dict["curriculum"] == "test_curriculum_config" - assert saved_dict["parameter_randomization"] == "test_sampler_config" + assert saved_dict["parameter_randomization"] == "test_converted_sampler_config" def test_remove_nones(): diff --git a/ml-agents/mlagents/trainers/upgrade_config.py b/ml-agents/mlagents/trainers/upgrade_config.py index d4cbcaf7b9..4263e6cf1f 100644 --- a/ml-agents/mlagents/trainers/upgrade_config.py +++ b/ml-agents/mlagents/trainers/upgrade_config.py @@ -93,20 +93,9 @@ def convert_samplers(old_sampler_config: Dict[str, Any]) -> Dict[str, Any]: continue new_sampler_config[parameter] = {} new_sampler_config[parameter]["sampler_type"] = parameter_config["sampler-type"] - if parameter_config["sampler-type"] == "uniform": - new_sampler_config[parameter]["sampler_parameters"] = { - "min_value": parameter_config["min_value"], - "max_value": parameter_config["max_value"], - } - elif parameter_config["sampler-type"] == "gaussian": - new_sampler_config[parameter]["sampler_parameters"] = { - "mean": parameter_config["mean"], - "st_dev": parameter_config["st_dev"], - } - elif parameter_config["sampler-type"] == "multirangeuniform": - new_sampler_config[parameter]["sampler_parameters"] = { - "intervals": parameter_config["intervals"] - } + new_samp_parameters = dict(parameter_config) # Copy dict + new_samp_parameters.pop("sampler-type") + new_sampler_config[parameter]["sampler_parameters"] = new_samp_parameters return new_sampler_config From af11e36af7f8222ea505a4bcea44b17479255040 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 09:20:00 -0700 Subject: [PATCH 36/44] moved sampler type checking from env_manager to env_side_channel --- .../environment_parameters_channel.py | 33 +++++++++++++++++++ .../trainers/subprocess_env_manager.py | 20 ++--------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 7dc66d9e65..3ea23e8bc3 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -1,5 +1,11 @@ from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage from mlagents_envs.exception import UnityCommunicationException +from mlagents.trainers.settings import ( + ParameterRandomizationSettings, + UniformSettings, + GaussianSettings, + MultiRangeUniformSettings, +) import uuid from enum import IntEnum from typing import List @@ -43,6 +49,33 @@ def set_float_parameter(self, key: str, value: float) -> None: msg.write_float32(value) super().queue_message_to_send(msg) + def set_sampler_parameters( + self, key: str, sampler_settings: ParameterRandomizationSettings + ) -> None: + """ + Sets an environment parameter sampler. + :param key: The string identifier of the parameter. + :param sampler_settings: The sampler specific hyperparameters + """ + if isinstance(sampler_settings, UniformSettings): + self.set_uniform_sampler_parameters( + key, + sampler_settings.min_value, + sampler_settings.max_value, + sampler_settings.seed, + ) + elif isinstance(sampler_settings, GaussianSettings): + self.set_gaussian_sampler_parameters( + key, + sampler_settings.mean, + sampler_settings.st_dev, + sampler_settings.seed, + ) + elif isinstance(sampler_settings, MultiRangeUniformSettings): + self.set_multirangeuniform_sampler_parameters( + key, sampler_settings.intervals, sampler_settings.seed + ) + def set_uniform_sampler_parameters( self, key: str, min_value: float, max_value: float, seed: int ) -> None: diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 26ed3c644f..e54655dae9 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -23,11 +23,7 @@ get_timer_root, ) from mlagents.trainers.brain import BrainParameters -from mlagents.trainers.settings import ( - UniformSettings, - GaussianSettings, - MultiRangeUniformSettings, -) +from mlagents.trainers.settings import ParameterRandomizationSettings from mlagents.trainers.action_info import ActionInfo from mlagents_envs.side_channel.environment_parameters_channel import ( EnvironmentParametersChannel, @@ -182,18 +178,8 @@ def external_brains(): for k, v in req.payload.items(): if isinstance(v, float): env_parameters.set_float_parameter(k, v) - elif isinstance(v, UniformSettings): - env_parameters.set_uniform_sampler_parameters( - k, v.min_value, v.max_value, v.seed - ) - elif isinstance(v, GaussianSettings): - env_parameters.set_gaussian_sampler_parameters( - k, v.mean, v.st_dev, v.seed - ) - elif isinstance(v, MultiRangeUniformSettings): - env_parameters.set_multirangeuniform_sampler_parameters( - k, v.intervals, v.seed - ) + elif isinstance(v, ParameterRandomizationSettings): + env_parameters.set_sampler_parameters(k, v) env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result) From 61e316559260ffa6d051b4049bea5e46fc13bef0 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 09:24:40 -0700 Subject: [PATCH 37/44] update simple env manager --- ml-agents/mlagents/trainers/simple_env_manager.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/simple_env_manager.py b/ml-agents/mlagents/trainers/simple_env_manager.py index b335efd6fc..fc6de8adde 100644 --- a/ml-agents/mlagents/trainers/simple_env_manager.py +++ b/ml-agents/mlagents/trainers/simple_env_manager.py @@ -5,6 +5,7 @@ from mlagents_envs.timers import timed from mlagents.trainers.action_info import ActionInfo from mlagents.trainers.brain import BrainParameters +from mlagents.trainers.settings import ParameterRandomizationSettings from mlagents_envs.side_channel.environment_parameters_channel import ( EnvironmentParametersChannel, ) @@ -44,7 +45,10 @@ def _reset_env( ) -> List[EnvironmentStep]: # type: ignore if config is not None: for k, v in config.items(): - self.env_params.set_float_parameter(k, v) + if isinstance(v, float): + self.env_params.set_float_parameter(k, v) + elif isinstance(v, ParameterRandomizationSettings): + self.env_params.set_sampler_parameters(k, v) self.env.reset() all_step_result = self._generate_all_results() self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) From cf838d9ed327dea9515e7f63fb032ef24589a3a6 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 10:40:02 -0700 Subject: [PATCH 38/44] fix C# unit test --- .../Tests/Editor/SamplerTests.cs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs index 14acbdeb40..b30a705fe8 100644 --- a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs +++ b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs @@ -11,13 +11,8 @@ public class SamplerTests { const int k_Seed = 1337; const double k_Epsilon = 0.0001; - EnvironmentParametersChannel m_Channel = new EnvironmentParametersChannel(); + EnvironmentParametersChannel m_Channel = SideChannelsManager.GetSideChannel(); - public SamplerTests() - { - SideChannelsManager.RegisterSideChannel(m_Channel); - } - [Test] public void UniformSamplerTest() { @@ -37,8 +32,8 @@ public void UniformSamplerTest() byte[] message = GetByteMessage(m_Channel, outgoingMsg); SideChannelsManager.ProcessSideChannelData(message); } - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.208888f, k_Epsilon); - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.118017f, k_Epsilon); + Assert.AreEqual(1.208888f, m_Channel.GetWithDefault(parameter, 1.0f), k_Epsilon); + Assert.AreEqual(1.118017f, m_Channel.GetWithDefault(parameter, 1.0f), k_Epsilon); } [Test] @@ -60,8 +55,8 @@ public void GaussianSamplerTest() byte[] message = GetByteMessage(m_Channel, outgoingMsg); SideChannelsManager.ProcessSideChannelData(message); } - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 2.936162f, k_Epsilon); - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 2.951348f, k_Epsilon); + Assert.AreEqual(2.936162f, m_Channel.GetWithDefault(parameter, 1.0f), k_Epsilon); + Assert.AreEqual(2.951348f, m_Channel.GetWithDefault(parameter, 1.0f), k_Epsilon); } [Test] @@ -85,8 +80,8 @@ public void MultiRangeUniformSamplerTest() byte[] message = GetByteMessage(m_Channel, outgoingMsg); SideChannelsManager.ProcessSideChannelData(message); } - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 3.387999f, k_Epsilon); - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.294413f, k_Epsilon); + Assert.AreEqual(3.387999f, m_Channel.GetWithDefault(parameter, 1.0f), k_Epsilon); + Assert.AreEqual(1.294413f, m_Channel.GetWithDefault(parameter, 1.0f), k_Epsilon); } internal static byte[] GetByteMessage(SideChannel sideChannel, OutgoingMessage msg) From fd5420f0abce8c5c9431c6123c18065efce73717 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 11:04:31 -0700 Subject: [PATCH 39/44] fix C# tests for cloud --- com.unity.ml-agents/Tests/Editor/SamplerTests.cs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs index b30a705fe8..14307e6733 100644 --- a/com.unity.ml-agents/Tests/Editor/SamplerTests.cs +++ b/com.unity.ml-agents/Tests/Editor/SamplerTests.cs @@ -11,15 +11,24 @@ public class SamplerTests { const int k_Seed = 1337; const double k_Epsilon = 0.0001; - EnvironmentParametersChannel m_Channel = SideChannelsManager.GetSideChannel(); + EnvironmentParametersChannel m_Channel; + public SamplerTests() + { + m_Channel = SideChannelsManager.GetSideChannel(); + // if running test on its own + if (m_Channel == null) + { + m_Channel = new EnvironmentParametersChannel(); + SideChannelsManager.RegisterSideChannel(m_Channel); + } + } [Test] public void UniformSamplerTest() { float min_value = 1.0f; float max_value = 2.0f; string parameter = "parameter1"; - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.0f); using (var outgoingMsg = new OutgoingMessage()) { outgoingMsg.WriteString(parameter); @@ -42,7 +51,6 @@ public void GaussianSamplerTest() float mean = 3.0f; float stddev = 0.2f; string parameter = "parameter2"; - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.0f); using (var outgoingMsg = new OutgoingMessage()) { outgoingMsg.WriteString(parameter); @@ -68,7 +76,6 @@ public void MultiRangeUniformSamplerTest() intervals[2] = 3.2f; intervals[3] = 4.1f; string parameter = "parameter3"; - Assert.AreEqual(m_Channel.GetWithDefault(parameter, 1.0f), 1.0f); using (var outgoingMsg = new OutgoingMessage()) { outgoingMsg.WriteString(parameter); From 4afc7b1f6b2383a39e3160bba5bd4e2291388b2b Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 12:39:32 -0700 Subject: [PATCH 40/44] typing of intervals fix --- .../side_channel/environment_parameters_channel.py | 4 ++-- ml-agents/mlagents/trainers/settings.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 3ea23e8bc3..3002a0aed2 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -8,7 +8,7 @@ ) import uuid from enum import IntEnum -from typing import List +from typing import List, Tuple class EnvironmentParametersChannel(SideChannel): @@ -115,7 +115,7 @@ def set_gaussian_sampler_parameters( super().queue_message_to_send(msg) def set_multirangeuniform_sampler_parameters( - self, key: str, intervals: List[List[float]], seed: int + self, key: str, intervals: List[Tuple[float, float]], seed: int ) -> None: """ Sets a multirangeuniform environment parameter sampler. diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 49522e2863..9cf574c3cf 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -1,6 +1,6 @@ import attr import cattr -from typing import Dict, Optional, List, Any, DefaultDict, Mapping +from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple from enum import Enum import collections import argparse @@ -234,7 +234,7 @@ class GaussianSettings(ParameterRandomizationSettings): @attr.s(auto_attribs=True) class MultiRangeUniformSettings(ParameterRandomizationSettings): - intervals: List[List[float]] = attr.ib() + intervals: List[Tuple[float, float]] = attr.ib() @intervals.default def _intervals_default(self): From 4c6cf57e44b0f87fe945fcfa0049ec872ccb3cce Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 12:50:53 -0700 Subject: [PATCH 41/44] made sampler static class --- com.unity.ml-agents/Runtime/Sampler.cs | 14 ++++---------- .../SideChannels/EnvironmentParametersChannel.cs | 8 +++----- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Sampler.cs b/com.unity.ml-agents/Runtime/Sampler.cs index 7329027270..fc48f3c271 100644 --- a/com.unity.ml-agents/Runtime/Sampler.cs +++ b/com.unity.ml-agents/Runtime/Sampler.cs @@ -10,28 +10,22 @@ namespace Unity.MLAgents /// /// Takes a list of floats that encode a sampling distribution and returns the sampling function. /// - internal sealed class SamplerFactory + internal static class SamplerFactory { - /// - /// Constructor. - /// - internal SamplerFactory() - { - } - public Func CreateUniformSampler(float min, float max, int seed) + public static Func CreateUniformSampler(float min, float max, int seed) { Random distr = new Random(seed); return () => min + (float)distr.NextDouble() * (max - min); } - public Func CreateGaussianSampler(float mean, float stddev, int seed) + public static Func CreateGaussianSampler(float mean, float stddev, int seed) { RandomNormal distr = new RandomNormal(seed, mean, stddev); return () => (float)distr.NextDouble(); } - public Func CreateMultiRangeUniformSampler(IList intervals, int seed) + public static Func CreateMultiRangeUniformSampler(IList intervals, int seed) { //RNG Random distr = new Random(seed); diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 1c1b748ecc..5aa828cd26 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -45,8 +45,6 @@ internal class EnvironmentParametersChannel : SideChannel Dictionary> m_RegisteredActions = new Dictionary>(); - SamplerFactory m_SamplerFactory = new SamplerFactory(); - const string k_EnvParamsId = "534c891e-810f-11ea-a9d0-822485860400"; /// @@ -82,19 +80,19 @@ protected override void OnMessageReceived(IncomingMessage msg) { float min = msg.ReadFloat32(); float max = msg.ReadFloat32(); - sampler = m_SamplerFactory.CreateUniformSampler(min, max, seed); + sampler = SamplerFactory.CreateUniformSampler(min, max, seed); } else if ((int)SamplerType.Gaussian == samplerType) { float mean = msg.ReadFloat32(); float stddev = msg.ReadFloat32(); - sampler = m_SamplerFactory.CreateGaussianSampler(mean, stddev, seed); + sampler = SamplerFactory.CreateGaussianSampler(mean, stddev, seed); } else if ((int)SamplerType.MultiRangeUniform == samplerType) { IList intervals = msg.ReadFloatList(); - sampler = m_SamplerFactory.CreateMultiRangeUniformSampler(intervals, seed); + sampler = SamplerFactory.CreateMultiRangeUniformSampler(intervals, seed); } else{ Debug.LogWarning("EnvironmentParametersChannel received an unknown data type."); From 9088b4c258e27868350e35373aa75389fe143809 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 13:05:46 -0700 Subject: [PATCH 42/44] fixed comment --- .../Runtime/SideChannels/EnvironmentParametersChannel.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs index 5aa828cd26..d28f84d2da 100644 --- a/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs +++ b/com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs @@ -29,7 +29,7 @@ internal enum SamplerType Gaussian = 1, /// - /// Samples a reset parameter from a Gaussian distribution. + /// Samples a reset parameter from a MultiRangeUniform distribution. /// MultiRangeUniform = 2 From c5da1c37104a2fc9c083517b652946fc8577bad2 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 14:42:22 -0700 Subject: [PATCH 43/44] fix docs --- docs/Training-ML-Agents.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index a353bdf62e..b61fc3b24b 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -455,8 +455,8 @@ parameter_randomization: ``` Note that `mass`, `length` and `scale` are the names of the environment -parameters that will be sampled. If a parameter specified in the file doesn't -exist in the environment, then this parameter will be ignored. +parameters that will be sampled. These are used as keys by the `EnvironmentParameter` +class to sample new parameters via the function `GetWithDefault`. | **Setting** | **Description** | | :--------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | From 1b45baba16f04b43b389d65c4abd481a8cbfe309 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Fri, 12 Jun 2020 16:05:17 -0700 Subject: [PATCH 44/44] sampler settings apply themselves to env channel --- .../environment_parameters_channel.py | 33 ------------- ml-agents/mlagents/trainers/settings.py | 49 ++++++++++++++++++- .../mlagents/trainers/simple_env_manager.py | 2 +- .../trainers/subprocess_env_manager.py | 2 +- 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py index 3002a0aed2..2d379cbb3f 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py @@ -1,11 +1,5 @@ from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage from mlagents_envs.exception import UnityCommunicationException -from mlagents.trainers.settings import ( - ParameterRandomizationSettings, - UniformSettings, - GaussianSettings, - MultiRangeUniformSettings, -) import uuid from enum import IntEnum from typing import List, Tuple @@ -49,33 +43,6 @@ def set_float_parameter(self, key: str, value: float) -> None: msg.write_float32(value) super().queue_message_to_send(msg) - def set_sampler_parameters( - self, key: str, sampler_settings: ParameterRandomizationSettings - ) -> None: - """ - Sets an environment parameter sampler. - :param key: The string identifier of the parameter. - :param sampler_settings: The sampler specific hyperparameters - """ - if isinstance(sampler_settings, UniformSettings): - self.set_uniform_sampler_parameters( - key, - sampler_settings.min_value, - sampler_settings.max_value, - sampler_settings.seed, - ) - elif isinstance(sampler_settings, GaussianSettings): - self.set_gaussian_sampler_parameters( - key, - sampler_settings.mean, - sampler_settings.st_dev, - sampler_settings.seed, - ) - elif isinstance(sampler_settings, MultiRangeUniformSettings): - self.set_multirangeuniform_sampler_parameters( - key, sampler_settings.intervals, sampler_settings.seed - ) - def set_uniform_sampler_parameters( self, key: str, min_value: float, max_value: float, seed: int ) -> None: diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 9cf574c3cf..49a8e8036b 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -4,6 +4,7 @@ from enum import Enum import collections import argparse +import abc from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser from mlagents.trainers.cli_utils import load_config @@ -11,6 +12,9 @@ from mlagents.trainers.models import ScheduleType, EncoderType from mlagents_envs import logging_util +from mlagents_envs.side_channel.environment_parameters_channel import ( + EnvironmentParametersChannel, +) logger = logging_util.get_logger(__name__) @@ -170,7 +174,7 @@ def to_settings(self) -> type: @attr.s(auto_attribs=True) -class ParameterRandomizationSettings: +class ParameterRandomizationSettings(abc.ABC): seed: int = parser.get_default("seed") @staticmethod @@ -208,6 +212,16 @@ def structure(d: Mapping, t: type) -> Any: ) return d_final + @abc.abstractmethod + def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: + """ + Helper method to send sampler settings over EnvironmentParametersChannel + Calls the appropriate sampler type set method. + :param key: environment parameter to be sampled + :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment + """ + pass + @attr.s(auto_attribs=True) class UniformSettings(ParameterRandomizationSettings): @@ -225,12 +239,34 @@ def _check_min_value(self, attribute, value): "Minimum value is greater than maximum value in uniform sampler." ) + def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: + """ + Helper method to send sampler settings over EnvironmentParametersChannel + Calls the uniform sampler type set method. + :param key: environment parameter to be sampled + :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment + """ + env_channel.set_uniform_sampler_parameters( + key, self.min_value, self.max_value, self.seed + ) + @attr.s(auto_attribs=True) class GaussianSettings(ParameterRandomizationSettings): mean: float = 1.0 st_dev: float = 1.0 + def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: + """ + Helper method to send sampler settings over EnvironmentParametersChannel + Calls the gaussian sampler type set method. + :param key: environment parameter to be sampled + :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment + """ + env_channel.set_gaussian_sampler_parameters( + key, self.mean, self.st_dev, self.seed + ) + @attr.s(auto_attribs=True) class MultiRangeUniformSettings(ParameterRandomizationSettings): @@ -253,6 +289,17 @@ def _check_intervals(self, attribute, value): f"Minimum value is greater than maximum value in interval {interval}." ) + def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: + """ + Helper method to send sampler settings over EnvironmentParametersChannel + Calls the multirangeuniform sampler type set method. + :param key: environment parameter to be sampled + :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment + """ + env_channel.set_multirangeuniform_sampler_parameters( + key, self.intervals, self.seed + ) + @attr.s(auto_attribs=True) class SelfPlaySettings: diff --git a/ml-agents/mlagents/trainers/simple_env_manager.py b/ml-agents/mlagents/trainers/simple_env_manager.py index fc6de8adde..98cdfbbe99 100644 --- a/ml-agents/mlagents/trainers/simple_env_manager.py +++ b/ml-agents/mlagents/trainers/simple_env_manager.py @@ -48,7 +48,7 @@ def _reset_env( if isinstance(v, float): self.env_params.set_float_parameter(k, v) elif isinstance(v, ParameterRandomizationSettings): - self.env_params.set_sampler_parameters(k, v) + v.apply(k, self.env_params) self.env.reset() all_step_result = self._generate_all_results() self.previous_step = EnvironmentStep(all_step_result, 0, {}, {}) diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index e54655dae9..8bf2e4e771 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -179,7 +179,7 @@ def external_brains(): if isinstance(v, float): env_parameters.set_float_parameter(k, v) elif isinstance(v, ParameterRandomizationSettings): - env_parameters.set_sampler_parameters(k, v) + v.apply(k, env_parameters) env.reset() all_step_result = _generate_all_results() _send_response(EnvironmentCommand.RESET, all_step_result)