diff --git a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs index 377d4d7fa2..ef66d7d12e 100644 --- a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs +++ b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs @@ -351,9 +351,7 @@ void ConfigureAgent(int config) } else { - var min = m_ResetParams.GetWithDefault("big_wall_min_height", 8); - var max = m_ResetParams.GetWithDefault("big_wall_max_height", 8); - var height = min + Random.value * (max - min); + var height = m_ResetParams.GetWithDefault("big_wall_height", 8); localScale = new Vector3( localScale.x, height, diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 5d2bcaa6f4..e87e782e47 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to #### ml-agents / ml-agents-envs / gym-unity (Python) - The Parameter Randomization feature has been refactored to enable sampling of new parameters per episode to improve robustness. The `resampling-interval` parameter has been removed and the config structure updated. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md). (#4065) +- The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler +in the lesson of a Curriculum. Curriculum has been refactored and is now specified at the level of the parameter, not the +behavior. More information +[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md).(#4160) ### Minor Changes #### com.unity.ml-agents (C#) diff --git a/config/ppo/3DBall_randomize.yaml b/config/ppo/3DBall_randomize.yaml index 2f3608b880..31f472c541 100644 --- a/config/ppo/3DBall_randomize.yaml +++ b/config/ppo/3DBall_randomize.yaml @@ -24,8 +24,7 @@ behaviors: time_horizon: 1000 summary_freq: 12000 threaded: true - -parameter_randomization: +environment_parameters: mass: sampler_type: uniform sampler_parameters: diff --git a/config/ppo/WallJump_curriculum.yaml b/config/ppo/WallJump_curriculum.yaml index 93a8813ae7..9ccc2d33b6 100644 --- a/config/ppo/WallJump_curriculum.yaml +++ b/config/ppo/WallJump_curriculum.yaml @@ -49,20 +49,72 @@ behaviors: time_horizon: 128 summary_freq: 20000 threaded: true - -curriculum: - BigWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - big_wall_min_height: [0.0, 4.0, 6.0, 8.0] - big_wall_max_height: [4.0, 7.0, 8.0, 8.0] - SmallWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - small_wall_height: [1.5, 2.0, 2.5, 4.0] +environment_parameters: + big_wall_height: + curriculum: + - name: Lesson0 # The '-' is important as this is a list + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.1 + value: + sampler_type: uniform + sampler_parameters: + min_value: 0.0 + max_value: 4.0 + - name: Lesson1 # This is the start of the second lesson + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.3 + value: + sampler_type: uniform + sampler_parameters: + min_value: 4.0 + max_value: 7.0 + - name: Lesson2 + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.5 + value: + sampler_type: uniform + sampler_parameters: + min_value: 6.0 + max_value: 8.0 + - name: Lesson3 + value: 8.0 + small_wall_height: + curriculum: + - name: Lesson0 + completion_criteria: + measure: progress + behavior: SmallWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.1 + value: 1.5 + - name: Lesson1 + completion_criteria: + measure: progress + behavior: SmallWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.3 + value: 2.0 + - name: Lesson2 + completion_criteria: + measure: progress + behavior: SmallWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.5 + value: 2.5 + - name: Lesson3 + value: 4.0 diff --git a/docs/Migrating.md b/docs/Migrating.md index f27baef5f8..593d29f5a9 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -14,7 +14,21 @@ double-check that the versions are in the same. The versions can be found in # Migrating -## Migrating from Release 1 to latest +## Migrating from Release 3 to latest + +### Important changes +- The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler +in the lesson of a Curriculum. Curriculum has been refactored and is now specified at the level of the parameter, not the +behavior. More information +[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md).(#4160) + +### Steps to Migrate +- The configuration format for curriculum and parameter randomization has changed. To upgrade your configuration files, +an upgrade script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` to see the script usage. Note that you will have had to upgrade to/install the current version of ML-Agents before running the script. To update manually: + - If your config file used a `parameter_randomization` section, rename that section to `environment_parameters` + - If your config file used a `curriculum` section, you will need to rewrite your curriculum with this [format](Training-ML-Agents.md#curriculum). + +## Migrating from Release 1 to Release 3 ### Important changes - Training artifacts (trained models, summaries) are now found under `results/` diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index b61fc3b24b..e7fc6c1df3 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -9,13 +9,12 @@ - [Loading an Existing Model](#loading-an-existing-model) - [Training Configurations](#training-configurations) - [Behavior Configurations](#behavior-configurations) - - [Curriculum Learning](#curriculum-learning) - - [Specifying Curricula](#specifying-curricula) - - [Training with a Curriculum](#training-with-a-curriculum) - - [Environment Parameter Randomization](#environment-parameter-randomization) - - [Included Sampler Types](#included-sampler-types) - - [Defining a New Sampler Type](#defining-a-new-sampler-type) - - [Training with Environment Parameter Randomization](#training-with-environment-parameter-randomization) + - [Environment Parameters](#environment-parameters) + - [Environment Parameter Randomization](#environment-parameter-randomization) + - [Supported Sampler Types](#supported-sampler-types) + - [Training with Environment Parameter Randomization](#training-with-environment-parameter-randomization) + - [Curriculum Learning](#curriculum) + - [Training with a Curriculum](#training-with-a-curriculum) - [Training Using Concurrent Unity Instances](#training-using-concurrent-unity-instances) For a broad overview of reinforcement learning, imitation learning and all the @@ -137,8 +136,8 @@ More specifically, this section offers a detailed guide on the command-line flags for `mlagents-learn` that control the training configurations: - ``: defines the training hyperparameters for each - Behavior in the scene, and the set-ups for Curriculum Learning and - Environment Parameter Randomization + Behavior in the scene, and the set-ups for the environment parameters + (Curriculum Learning and Environment Parameter Randomization) - `--num-envs`: number of concurrent Unity instances to use during training Reminder that a detailed description of all command-line options can be found by @@ -179,7 +178,8 @@ use during training, and the answers to the above questions will dictate its con The rest of this guide breaks down the different sub-sections of the trainer config file and explains the possible settings for each. -**NOTE:** The configuration file format has been changed from 0.17.0 and onwards. To convert +**NOTE:** The configuration file format has been changed between 0.17.0 and 0.18.0 and +between 0.18.0 and onwards. To convert an old set of configuration files (trainer config, curriculum, and sampler files) to the new format, a script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` in your console to see the script's usage. @@ -194,7 +194,7 @@ below is a sample file that includes all the possible settings if we're using a PPO trainer with all the possible training functionalities enabled (memory, behavioral cloning, curiosity, GAIL and self-play). You will notice that curriculum and environment parameter randomization settings are not part of the `behaviors` -configuration, but their settings live in different sections that we'll cover subsequently. +configuration, but in their own section called `environment_parameters`. ```yaml behaviors: @@ -337,11 +337,13 @@ each of these parameters mean and provide guidelines on how to set them. See description of all the configurations listed above, along with their defaults. Unless otherwise specified, omitting a configuration will revert it to its default. -### Curriculum Learning -To enable curriculum learning, you need to add a `curriculum ` sub-section to the trainer -configuration YAML file. Within this sub-section, add an entry for each behavior that defines -the curriculum for thatbehavior. Here is one example: +### Environment Parameters + +In order to control the `EnvironmentParameters` in the Unity simulation during training, +you need to add a section called `environment_parameters`. For example you can set the +value of an `EnvironmentParameter` called `my_environment_parameter` to `3.0` with +the following code : ```yml behaviors: @@ -349,93 +351,30 @@ behaviors: # < Same as above > # Add this section -curriculum: - BehaviorY: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - wall_height: [1.5, 2.0, 2.5, 4.0] -``` - -Each group of Agents under the same `Behavior Name` in an environment can have a -corresponding curriculum. These curricula are held in what we call a -"metacurriculum". A metacurriculum allows different groups of Agents to follow -different curricula within the same environment. - -#### Specifying Curricula - -In order to define the curricula, the first step is to decide which parameters -of the environment will vary. In the case of the Wall Jump environment, the -height of the wall is what varies. Rather than adjusting it by hand, we will -create a configuration which describes the structure of the curricula. Within it, we -can specify which points in the training process our wall height will change, -either based on the percentage of training steps which have taken place, or what -the average reward the agent has received in the recent past is. Below is an -example config for the curricula for the Wall Jump environment. - -```yaml -behaviors: - BigWallJump: - # < Trainer parameters for BigWallJump > - SmallWallJump: - # < Trainer parameters for SmallWallJump > - -curriculum: - BigWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - big_wall_min_height: [0.0, 4.0, 6.0, 8.0] - big_wall_max_height: [4.0, 7.0, 8.0, 8.0] - SmallWallJump: - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - small_wall_height: [1.5, 2.0, 2.5, 4.0] +environment_parameters: + my_environment_parameter: 3.0 ``` -The curriculum for each Behavior has the following parameters: - -| **Setting** | **Description** | -| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `measure` | What to measure learning progress, and advancement in lessons by.

`reward` uses a measure received reward, while `progress` uses the ratio of steps/max_steps. | -| `thresholds` | Points in value of `measure` where lesson should be increased. | -| `min_lesson_length` | The minimum number of episodes that should be completed before the lesson can change. If `measure` is set to `reward`, the average cumulative reward of the last `min_lesson_length` episodes will be used to determine if the lesson should change. Must be nonnegative.

**Important**: the average reward that is compared to the thresholds is different than the mean reward that is logged to the console. For example, if `min_lesson_length` is `100`, the lesson will increment after the average cumulative reward of the last `100` episodes exceeds the current threshold. The mean reward logged to the console is dictated by the `summary_freq` parameter defined above. | -| `signal_smoothing` | Whether to weight the current progress measure by previous values. | -| `parameters` | Corresponds to environment parameters to control. Length of each array should be one greater than number of thresholds. | - -#### Training with a Curriculum - -Once we have specified our metacurriculum and curricula, we can launch -`mlagents-learn` to point to the config file containing -our curricula and PPO will train using Curriculum Learning. For example, to -train agents in the Wall Jump environment with curriculum learning, we can run: +Inside the Unity simulation, you can access your Environment Parameters by doing : -```sh -mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum +```csharp +Academy.Instance.EnvironmentParameters.GetWithDefault("my_environment_parameter", 0.0f); ``` -We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated -the run, you can resume it using `--resume` and lesson progress will start off where it -ended. +#### Environment Parameter Randomization -### Environment Parameter Randomization +To enable environment parameter randomization, you need to edit the `environment_parameters` +section of your training configuration yaml file. Instead of providing a single float value +for your environment parameter, you can specify a sampler instead. Here is an example with +three environment parameters called `mass`, `length` and `scale`: -To enable parameter randomization, you need to add a `parameter-randomization` sub-section -to your trainer config YAML file. Here is one example: - -```yaml +```yml behaviors: - # < Same as above> - -parameter_randomization: + BehaviorY: + # < Same as above > +# Add this section +environment_parameters: mass: sampler_type: uniform sampler_parameters: @@ -454,16 +393,13 @@ parameter_randomization: st_dev: .3 ``` -Note that `mass`, `length` and `scale` are the names of the environment -parameters that will be sampled. These are used as keys by the `EnvironmentParameter` -class to sample new parameters via the function `GetWithDefault`. | **Setting** | **Description** | | :--------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sampler_type` | A string identifier for the type of sampler to use for this `Environment Parameter`. | | `sampler_parameters` | The parameters for a given `sampler_type`. Samplers of different types can have different `sampler_parameters` | -#### Supported Sampler Types +##### Supported Sampler Types Below is a list of the `sampler_type` values supported by the toolkit. @@ -487,12 +423,11 @@ Below is a list of the `sampler_type` values supported by the toolkit. The implementation of the samplers can be found in the [Samplers.cs file](../com.unity.ml-agents/Runtime/Sampler.cs). -#### Training with Environment Parameter Randomization +##### Training with Environment Parameter Randomization After the sampler configuration is defined, we proceed by launching `mlagents-learn` -and specify trainer configuration with `parameter-randomization` defined. For example, -if we wanted to train the 3D ball agent with parameter randomization using -`Environment Parameters` with sampling setup, we would run +and specify trainer configuration with parameter randomization enabled. For example, +if we wanted to train the 3D ball agent with parameter randomization, we would run ```sh mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize @@ -500,6 +435,85 @@ mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize We can observe progress and metrics via Tensorboard. +#### Curriculum + +To enable curriculum learning, you need to add a `curriculum` sub-section to your environment +parameter. Here is one example with the environment parameter `my_environment_parameter` : + +```yml +behaviors: + BehaviorY: + # < Same as above > + +# Add this section +environment_parameters: + my_environment_parameter: + curriculum: + - name: MyFirstLesson # The '-' is important as this is a list + completion_criteria: + measure: progress + behavior: my_behavior + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.2 + value: 0.0 + - name: MySecondLesson # This is the start of the second lesson + completion_criteria: + measure: progress + behavior: my_behavior + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.6 + require_reset: true + value: + sampler_type: uniform + sampler_parameters: + min_value: 4.0 + max_value: 7.0 + - name: MyLastLesson + value: 8.0 +``` + +Note that this curriculum __only__ applies to `my_environment_parameter`. The `curriculum` section +contains a list of `Lessons`. In the example, the lessons are named `MyFirstLesson`, `MySecondLesson` +and `MyLastLesson`. +Each `Lesson` has 3 fields : + + - `name` which is a user defined name for the lesson (The name of the lesson will be displayed in + the console when the lesson changes) + - `completion_criteria` which determines what needs to happen in the simulation before the lesson + can be considered complete. When that condition is met, the curriculum moves on to the next + `Lesson`. Note that you do not need to specify a `completion_criteria` for the last `Lesson` + - `value` which is the value the environment parameter will take during the lesson. Note that this + can be a float or a sampler. + + There are the different settings of the `completion_criteria` : + + +| **Setting** | **Description** | +| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `measure` | What to measure learning progress, and advancement in lessons by.

`reward` uses a measure received reward, while `progress` uses the ratio of steps/max_steps. | +| `behavior` | Specifies which behavior is being tracked. There can be multiple behaviors with different names, each at different points of training. This setting allows the curriculum to track only one of them. | +| `threshold` | Determines at what point in value of `measure` the lesson should be increased. | +| `min_lesson_length` | The minimum number of episodes that should be completed before the lesson can change. If `measure` is set to `reward`, the average cumulative reward of the last `min_lesson_length` episodes will be used to determine if the lesson should change. Must be nonnegative.

**Important**: the average reward that is compared to the thresholds is different than the mean reward that is logged to the console. For example, if `min_lesson_length` is `100`, the lesson will increment after the average cumulative reward of the last `100` episodes exceeds the current threshold. The mean reward logged to the console is dictated by the `summary_freq` parameter defined above. | +| `signal_smoothing` | Whether to weight the current progress measure by previous values. | +| `require_reset` | Whether changing lesson requires the environment to reset (default: false) | +##### Training with a Curriculum + +Once we have specified our metacurriculum and curricula, we can launch +`mlagents-learn` to point to the config file containing +our curricula and PPO will train using Curriculum Learning. For example, to +train agents in the Wall Jump environment with curriculum learning, we can run: + +```sh +mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum +``` + +We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated +the run, you can resume it using `--resume` and lesson progress will start off where it +ended. + + ### Training Using Concurrent Unity Instances In order to run concurrent Unity instances during training, set the number of diff --git a/docs/Using-Docker.md b/docs/Using-Docker.md index f7ea5b7a7c..f2922964f8 100644 --- a/docs/Using-Docker.md +++ b/docs/Using-Docker.md @@ -36,7 +36,7 @@ agents using camera-based visual observations might be slower. - Since Docker runs a container in an environment that is isolated from the host machine, a mounted directory in your host machine is used to share data, e.g. - the trainer configuration file, Unity executable, curriculum files and + the trainer configuration file, Unity executable and TensorFlow graph. For convenience, we created an empty `unity-volume` directory at the root of the repository for this purpose, but feel free to use any other directory. The remainder of this guide assumes that the diff --git a/ml-agents/mlagents/trainers/curriculum.py b/ml-agents/mlagents/trainers/curriculum.py deleted file mode 100644 index f81fb26c38..0000000000 --- a/ml-agents/mlagents/trainers/curriculum.py +++ /dev/null @@ -1,91 +0,0 @@ -import math -from typing import Dict, Any - -from mlagents.trainers.exception import CurriculumConfigError - -from mlagents_envs.logging_util import get_logger -from mlagents.trainers.settings import CurriculumSettings - -logger = get_logger(__name__) - - -class Curriculum: - def __init__(self, brain_name: str, settings: CurriculumSettings): - """ - Initializes a Curriculum object. - :param brain_name: Name of the brain this Curriculum is associated with - :param config: Dictionary of fields needed to configure the Curriculum - """ - self.max_lesson_num = 0 - self.measure = None - self._lesson_num = 0 - self.brain_name = brain_name - self.settings = settings - - self.smoothing_value = 0.0 - self.measure = self.settings.measure - self.min_lesson_length = self.settings.min_lesson_length - self.max_lesson_num = len(self.settings.thresholds) - - parameters = self.settings.parameters - for key in parameters: - if len(parameters[key]) != self.max_lesson_num + 1: - raise CurriculumConfigError( - f"The parameter {key} in {brain_name}'s curriculum must have {self.max_lesson_num + 1} values " - f"but {len(parameters[key])} were found" - ) - - @property - def lesson_num(self) -> int: - return self._lesson_num - - @lesson_num.setter - def lesson_num(self, lesson_num: int) -> None: - self._lesson_num = max(0, min(lesson_num, self.max_lesson_num)) - - def increment_lesson(self, measure_val: float) -> bool: - """ - Increments the lesson number depending on the progress given. - :param measure_val: Measure of progress (either reward or percentage - steps completed). - :return Whether the lesson was incremented. - """ - if not self.settings or not measure_val or math.isnan(measure_val): - return False - if self.settings.signal_smoothing: - measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val - self.smoothing_value = measure_val - if self.lesson_num < self.max_lesson_num: - if measure_val > self.settings.thresholds[self.lesson_num]: - self.lesson_num += 1 - config = {} - parameters = self.settings.parameters - for key in parameters: - config[key] = parameters[key][self.lesson_num] - logger.info( - "{0} lesson changed. Now in lesson {1}: {2}".format( - self.brain_name, - self.lesson_num, - ", ".join([str(x) + " -> " + str(config[x]) for x in config]), - ) - ) - return True - return False - - def get_config(self, lesson: int = None) -> Dict[str, Any]: - """ - Returns reset parameters which correspond to the lesson. - :param lesson: The lesson you want to get the config of. If None, the - current lesson is returned. - :return: The configuration of the reset parameters. - """ - if not self.settings: - return {} - if lesson is None: - lesson = self.lesson_num - lesson = max(0, min(lesson, self.max_lesson_num)) - config = {} - parameters = self.settings.parameters - for key in parameters: - config[key] = parameters[key][lesson] - return config diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py index f58191c18b..c96555cdaf 100644 --- a/ml-agents/mlagents/trainers/env_manager.py +++ b/ml-agents/mlagents/trainers/env_manager.py @@ -70,7 +70,7 @@ def reset(self, config: Dict = None) -> int: def set_env_parameters(self, config: Dict = None) -> None: """ Sends environment parameter settings to C# via the - EnvironmentParametersSidehannel. + EnvironmentParametersSideChannel. :param config: Dict of environment parameter keys and values """ pass diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py new file mode 100644 index 0000000000..232dd0fb83 --- /dev/null +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -0,0 +1,156 @@ +from typing import Dict, List, Tuple, Optional +from mlagents.trainers.settings import ( + EnvironmentParameterSettings, + ParameterRandomizationSettings, +) +from collections import defaultdict +from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType + +from mlagents_envs.logging_util import get_logger + +logger = get_logger(__name__) + + +class EnvironmentParameterManager: + def __init__( + self, + settings: Optional[Dict[str, EnvironmentParameterSettings]] = None, + run_seed: int = -1, + restore: bool = False, + ): + """ + EnvironmentParameterManager manages all the environment parameters of a training + session. It determines when parameters should change and gives access to the + current sampler of each parameter. + :param settings: A dictionary from environment parameter to + EnvironmentParameterSettings. + :param run_seed: When the seed is not provided for an environment parameter, + this seed will be used instead. + :param restore: If true, the EnvironmentParameterManager will use the + GlobalTrainingStatus to try and reload the lesson status of each environment + parameter. + """ + if settings is None: + settings = {} + self._dict_settings = settings + for parameter_name in self._dict_settings.keys(): + initial_lesson = GlobalTrainingStatus.get_parameter_state( + parameter_name, StatusType.LESSON_NUM + ) + if initial_lesson is None or not restore: + GlobalTrainingStatus.set_parameter_state( + parameter_name, StatusType.LESSON_NUM, 0 + ) + self._smoothed_values: Dict[str, float] = defaultdict(float) + for key in self._dict_settings.keys(): + self._smoothed_values[key] = 0.0 + # Update the seeds of the samplers + self._set_sampler_seeds(run_seed) + + def _set_sampler_seeds(self, seed): + """ + Sets the seeds for the samplers (if no seed was already present). Note that + using the provided seed. + """ + offset = 0 + for settings in self._dict_settings.values(): + for lesson in settings.curriculum: + if lesson.value.seed == -1: + lesson.value.seed = seed + offset + offset += 1 + + def get_minimum_reward_buffer_size(self, behavior_name: str) -> int: + """ + Calculates the minimum size of the reward buffer a behavior must use. This + method uses the 'min_lesson_length' sampler_parameter to determine this value. + :param behavior_name: The name of the behavior the minimum reward buffer + size corresponds to. + """ + result = 1 + for settings in self._dict_settings.values(): + for lesson in settings.curriculum: + if lesson.completion_criteria is not None: + if lesson.completion_criteria.behavior == behavior_name: + result = max( + result, lesson.completion_criteria.min_lesson_length + ) + return result + + def get_current_samplers(self) -> Dict[str, ParameterRandomizationSettings]: + """ + Creates a dictionary from environment parameter name to their corresponding + ParameterRandomizationSettings. If curriculum is used, the + ParameterRandomizationSettings corresponds to the sampler of the current lesson. + """ + samplers: Dict[str, ParameterRandomizationSettings] = {} + for param_name, settings in self._dict_settings.items(): + lesson_num = GlobalTrainingStatus.get_parameter_state( + param_name, StatusType.LESSON_NUM + ) + lesson = settings.curriculum[lesson_num] + samplers[param_name] = lesson.value + return samplers + + def get_current_lesson_number(self) -> Dict[str, int]: + """ + Creates a dictionary from environment parameter to the current lesson number. + If not using curriculum, this number is always 0 for that environment parameter. + """ + result: Dict[str, int] = {} + for parameter_name in self._dict_settings.keys(): + result[parameter_name] = GlobalTrainingStatus.get_parameter_state( + parameter_name, StatusType.LESSON_NUM + ) + return result + + def update_lessons( + self, + trainer_steps: Dict[str, int], + trainer_max_steps: Dict[str, int], + trainer_reward_buffer: Dict[str, List[float]], + ) -> Tuple[bool, bool]: + """ + Given progress metrics, calculates if at least one environment parameter is + in a new lesson and if at least one environment parameter requires the env + to reset. + :param trainer_steps: A dictionary from behavior_name to the number of training + steps this behavior's trainer has performed. + :param trainer_max_steps: A dictionary from behavior_name to the maximum number + of training steps this behavior's trainer has performed. + :param trainer_reward_buffer: A dictionary from behavior_name to the list of + the most recent episode returns for this behavior's trainer. + :returns: A tuple of two booleans : (True if any lesson has changed, True if + environment needs to reset) + """ + must_reset = False + updated = False + for param_name, settings in self._dict_settings.items(): + lesson_num = GlobalTrainingStatus.get_parameter_state( + param_name, StatusType.LESSON_NUM + ) + lesson = settings.curriculum[lesson_num] + if ( + lesson.completion_criteria is not None + and len(settings.curriculum) > lesson_num + ): + behavior_to_consider = lesson.completion_criteria.behavior + if behavior_to_consider in trainer_steps: + must_increment, new_smoothing = lesson.completion_criteria.need_increment( + float(trainer_steps[behavior_to_consider]) + / float(trainer_max_steps[behavior_to_consider]), + trainer_reward_buffer[behavior_to_consider], + self._smoothed_values[param_name], + ) + self._smoothed_values[param_name] = new_smoothing + if must_increment: + GlobalTrainingStatus.set_parameter_state( + param_name, StatusType.LESSON_NUM, lesson_num + 1 + ) + new_lesson_name = settings.curriculum[lesson_num + 1].name + logger.info( + f"Parameter '{param_name}' has changed. Now in lesson '{new_lesson_name}'" + ) + updated = True + if lesson.completion_criteria.require_reset: + must_reset = True + return updated, must_reset diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 5cd2d599f7..55525468cf 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -5,13 +5,13 @@ import numpy as np import json -from typing import Callable, Optional, List, Dict +from typing import Callable, Optional, List import mlagents.trainers import mlagents_envs from mlagents import tf_utils from mlagents.trainers.trainer_controller import TrainerController -from mlagents.trainers.meta_curriculum import MetaCurriculum +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories from mlagents.trainers.stats import ( TensorboardWriter, @@ -23,6 +23,7 @@ from mlagents.trainers.cli_utils import parser from mlagents_envs.environment import UnityEnvironment from mlagents.trainers.settings import RunOptions + from mlagents.trainers.training_status import GlobalTrainingStatus from mlagents_envs.base_env import BaseEnv from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager @@ -128,18 +129,18 @@ def run_training(run_seed: int, options: RunOptions) -> None: env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) - maybe_meta_curriculum = try_create_meta_curriculum( - options.curriculum, env_manager, restore=checkpoint_settings.resume + env_parameter_manager = EnvironmentParameterManager( + options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) - maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) + trainer_factory = TrainerFactory( options.behaviors, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, + env_parameter_manager, maybe_init_path, - maybe_meta_curriculum, False, ) # Create controller and begin training. @@ -147,7 +148,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: trainer_factory, write_path, checkpoint_settings.run_id, - maybe_meta_curriculum, + env_parameter_manager, not checkpoint_settings.inference, run_seed, ) @@ -191,35 +192,6 @@ def write_timing_tree(output_dir: str) -> None: ) -def maybe_add_samplers( - sampler_config: Optional[Dict], env: SubprocessEnvManager, run_seed: int -) -> None: - """ - Adds samplers to env if sampler config provided and sets seed if not configured. - :param sampler_config: validated dict of sampler configs. None if not included. - :param env: env manager to pass samplers via reset - :param run_seed: Random seed used for training. - """ - if sampler_config is not None: - # If the seed is not specified in yaml, this will grab the run seed - for offset, v in enumerate(sampler_config.values()): - if v.seed == -1: - v.seed = run_seed + offset - env.set_env_parameters(config=sampler_config) - - -def try_create_meta_curriculum( - curriculum_config: Optional[Dict], env: SubprocessEnvManager, restore: bool = False -) -> Optional[MetaCurriculum]: - if curriculum_config is None or len(curriculum_config) <= 0: - return None - else: - meta_curriculum = MetaCurriculum(curriculum_config) - if restore: - meta_curriculum.try_restore_all_curriculum() - return meta_curriculum - - def create_environment_factory( env_path: Optional[str], no_graphics: bool, diff --git a/ml-agents/mlagents/trainers/meta_curriculum.py b/ml-agents/mlagents/trainers/meta_curriculum.py deleted file mode 100644 index 187a9345ff..0000000000 --- a/ml-agents/mlagents/trainers/meta_curriculum.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Contains the MetaCurriculum class.""" - -from typing import Dict, Set -from mlagents.trainers.curriculum import Curriculum -from mlagents.trainers.settings import CurriculumSettings -from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType - -from mlagents_envs.logging_util import get_logger - -logger = get_logger(__name__) - - -class MetaCurriculum: - """A MetaCurriculum holds curricula. Each curriculum is associated to a - particular brain in the environment. - """ - - def __init__(self, curriculum_configs: Dict[str, CurriculumSettings]): - """Initializes a MetaCurriculum object. - - :param curriculum_folder: Dictionary of brain_name to the - Curriculum for each brain. - """ - self._brains_to_curricula: Dict[str, Curriculum] = {} - used_reset_parameters: Set[str] = set() - for brain_name, curriculum_settings in curriculum_configs.items(): - self._brains_to_curricula[brain_name] = Curriculum( - brain_name, curriculum_settings - ) - config_keys: Set[str] = set( - self._brains_to_curricula[brain_name].get_config().keys() - ) - - # Check if any two curricula use the same reset params. - if config_keys & used_reset_parameters: - logger.warning( - "Two or more curricula will " - "attempt to change the same reset " - "parameter. The result will be " - "non-deterministic." - ) - - used_reset_parameters.update(config_keys) - - @property - def brains_to_curricula(self): - """A dict from brain_name to the brain's curriculum.""" - return self._brains_to_curricula - - @property - def lesson_nums(self): - """A dict from brain name to the brain's curriculum's lesson number.""" - lesson_nums = {} - for brain_name, curriculum in self.brains_to_curricula.items(): - lesson_nums[brain_name] = curriculum.lesson_num - - return lesson_nums - - @lesson_nums.setter - def lesson_nums(self, lesson_nums): - for brain_name, lesson in lesson_nums.items(): - self.brains_to_curricula[brain_name].lesson_num = lesson - - def _lesson_ready_to_increment( - self, brain_name: str, reward_buff_size: int - ) -> bool: - """Determines whether the curriculum of a specified brain is ready - to attempt an increment. - - Args: - brain_name (str): The name of the brain whose curriculum will be - checked for readiness. - reward_buff_size (int): The size of the reward buffer of the trainer - that corresponds to the specified brain. - - Returns: - Whether the curriculum of the specified brain should attempt to - increment its lesson. - """ - if brain_name not in self.brains_to_curricula: - return False - - return reward_buff_size >= ( - self.brains_to_curricula[brain_name].min_lesson_length - ) - - def increment_lessons(self, measure_vals, reward_buff_sizes=None): - """Attempts to increments all the lessons of all the curricula in this - MetaCurriculum. Note that calling this method does not guarantee the - lesson of a curriculum will increment. The lesson of a curriculum will - only increment if the specified measure threshold defined in the - curriculum has been reached and the minimum number of episodes in the - lesson have been completed. - - Args: - measure_vals (dict): A dict of brain name to measure value. - reward_buff_sizes (dict): A dict of brain names to the size of their - corresponding reward buffers. - - Returns: - A dict from brain name to whether that brain's lesson number was - incremented. - """ - ret = {} - if reward_buff_sizes: - for brain_name, buff_size in reward_buff_sizes.items(): - if self._lesson_ready_to_increment(brain_name, buff_size): - measure_val = measure_vals[brain_name] - ret[brain_name] = self.brains_to_curricula[ - brain_name - ].increment_lesson(measure_val) - else: - for brain_name, measure_val in measure_vals.items(): - ret[brain_name] = self.brains_to_curricula[brain_name].increment_lesson( - measure_val - ) - return ret - - def try_restore_all_curriculum(self): - """ - Tries to restore all the curriculums to what is saved in training_status.json - """ - - for brain_name, curriculum in self.brains_to_curricula.items(): - lesson_num = GlobalTrainingStatus.get_parameter_state( - brain_name, StatusType.LESSON_NUM - ) - if lesson_num is not None: - logger.info( - f"Resuming curriculum for {brain_name} at lesson {lesson_num}." - ) - curriculum.lesson_num = lesson_num - else: - curriculum.lesson_num = 0 - - def get_config(self): - """Get the combined configuration of all curricula in this - MetaCurriculum. - - :return: A dict from parameter to value. - """ - config = {} - - for _, curriculum in self.brains_to_curricula.items(): - curr_config = curriculum.get_config() - config.update(curr_config) - - return config diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index b7d8eaf4c5..55ee9b0b63 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -1,10 +1,12 @@ import attr import cattr -from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple +from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple, Union from enum import Enum import collections import argparse import abc +import numpy as np +import math from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser from mlagents.trainers.cli_utils import load_config @@ -119,6 +121,7 @@ def _reward_signal_steps_per_update_default(self): return self.steps_per_update +# INTRINSIC REWARD SIGNALS ############################################################# class RewardSignalType(Enum): EXTRINSIC: str = "extrinsic" GAIL: str = "gail" @@ -170,16 +173,20 @@ class CuriositySettings(RewardSignalSettings): learning_rate: float = 3e-4 +# SAMPLERS ############################################################################# class ParameterRandomizationType(Enum): UNIFORM: str = "uniform" GAUSSIAN: str = "gaussian" MULTIRANGEUNIFORM: str = "multirangeuniform" + CONSTANT: str = "constant" def to_settings(self) -> type: _mapping = { ParameterRandomizationType.UNIFORM: UniformSettings, ParameterRandomizationType.GAUSSIAN: GaussianSettings, ParameterRandomizationType.MULTIRANGEUNIFORM: MultiRangeUniformSettings, + ParameterRandomizationType.CONSTANT: ConstantSettings + # Constant type is handled if a float is provided instead of a config } return _mapping[self] @@ -189,39 +196,50 @@ class ParameterRandomizationSettings(abc.ABC): seed: int = parser.get_default("seed") @staticmethod - def structure(d: Mapping, t: type) -> Any: + def structure( + d: Union[Mapping, float], t: type + ) -> "ParameterRandomizationSettings": """ - Helper method to structure a Dict of ParameterRandomizationSettings class. Meant to be registered with + Helper method to a ParameterRandomizationSettings class. Meant to be registered with cattr.register_structure_hook() and called with cattr.structure(). This is needed to handle the special Enum selection of ParameterRandomizationSettings classes. """ + if isinstance(d, (float, int)): + return ConstantSettings(value=d) if not isinstance(d, Mapping): raise TrainerConfigError( f"Unsupported parameter randomization configuration {d}." ) - d_final: Dict[str, List[float]] = {} - for environment_parameter, environment_parameter_config in d.items(): - if environment_parameter == "resampling-interval": - logger.warning( - "The resampling-interval is no longer necessary for parameter randomization. It is being ignored." - ) - continue - if "sampler_type" not in environment_parameter_config: - raise TrainerConfigError( - f"Sampler configuration for {environment_parameter} does not contain sampler_type." - ) - if "sampler_parameters" not in environment_parameter_config: - raise TrainerConfigError( - f"Sampler configuration for {environment_parameter} does not contain sampler_parameters." - ) - enum_key = ParameterRandomizationType( - environment_parameter_config["sampler_type"] + if "sampler_type" not in d: + raise TrainerConfigError( + f"Sampler configuration does not contain sampler_type : {d}." ) - t = enum_key.to_settings() - d_final[environment_parameter] = strict_to_cls( - environment_parameter_config["sampler_parameters"], t + if "sampler_parameters" not in d: + raise TrainerConfigError( + f"Sampler configuration does not contain sampler_parameters : {d}." ) - return d_final + enum_key = ParameterRandomizationType(d["sampler_type"]) + t = enum_key.to_settings() + return strict_to_cls(d["sampler_parameters"], t) + + @staticmethod + def unstructure(d: "ParameterRandomizationSettings") -> Mapping: + """ + Helper method to a ParameterRandomizationSettings class. Meant to be registered with + cattr.register_unstructure_hook() and called with cattr.unstructure(). + """ + _reversed_mapping = { + UniformSettings: ParameterRandomizationType.UNIFORM, + GaussianSettings: ParameterRandomizationType.GAUSSIAN, + MultiRangeUniformSettings: ParameterRandomizationType.MULTIRANGEUNIFORM, + ConstantSettings: ParameterRandomizationType.CONSTANT, + } + sampler_type: Optional[str] = None + for t, name in _reversed_mapping.items(): + if isinstance(d, t): + sampler_type = name.value + sampler_parameters = attr.asdict(d) + return {"sampler_type": sampler_type, "sampler_parameters": sampler_parameters} @abc.abstractmethod def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: @@ -234,6 +252,20 @@ def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: pass +@attr.s(auto_attribs=True) +class ConstantSettings(ParameterRandomizationSettings): + value: float = 0.0 + + def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: + """ + Helper method to send sampler settings over EnvironmentParametersChannel + Calls the constant sampler type set method. + :param key: environment parameter to be sampled + :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment + """ + env_channel.set_float_parameter(key, self.value) + + @attr.s(auto_attribs=True) class UniformSettings(ParameterRandomizationSettings): min_value: float = attr.ib() @@ -312,6 +344,144 @@ def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None: ) +# ENVIRONMENT PARAMETERS ############################################################### +@attr.s(auto_attribs=True) +class CompletionCriteriaSettings: + """ + CompletionCriteriaSettings contains the information needed to figure out if the next + lesson must start. + """ + + class MeasureType(Enum): + PROGRESS: str = "progress" + REWARD: str = "reward" + + measure: MeasureType = attr.ib(default=MeasureType.REWARD) + behavior: str = attr.ib(default="") + min_lesson_length: int = 0 + signal_smoothing: bool = True + threshold: float = attr.ib(default=0.0) + require_reset: bool = False + + @threshold.validator + def _check_threshold_value(self, attribute, value): + """ + Verify that the threshold has a value between 0 and 1 when the measure is + PROGRESS + """ + if self.measure == self.MeasureType.PROGRESS: + if self.threshold > 1.0: + raise TrainerConfigError( + "Threshold for next lesson cannot be greater than 1 when the measure is progress." + ) + if self.threshold < 0.0: + raise TrainerConfigError( + "Threshold for next lesson cannot be negative when the measure is progress." + ) + + def need_increment( + self, progress: float, reward_buffer: List[float], smoothing: float + ) -> Tuple[bool, float]: + """ + Given measures, this method returns a boolean indicating if the lesson + needs to change now, and a float corresponding to the new smoothed value. + """ + # Is the min number of episodes reached + if len(reward_buffer) < self.min_lesson_length: + return False, smoothing + if self.measure == CompletionCriteriaSettings.MeasureType.PROGRESS: + if progress > self.threshold: + return True, smoothing + if self.measure == CompletionCriteriaSettings.MeasureType.REWARD: + if len(reward_buffer) < 1: + return False, smoothing + measure = np.mean(reward_buffer) + if math.isnan(measure): + return False, smoothing + if self.signal_smoothing: + measure = 0.25 * smoothing + 0.75 * measure + smoothing = measure + if measure > self.threshold: + return True, smoothing + return False, smoothing + + +@attr.s(auto_attribs=True) +class Lesson: + """ + Gathers the data of one lesson for one environment parameter including its name, + the condition that must be fullfiled for the lesson to be completed and a sampler + for the environment parameter. If the completion_criteria is None, then this is + the last lesson in the curriculum. + """ + + value: ParameterRandomizationSettings + name: str + completion_criteria: Optional[CompletionCriteriaSettings] = attr.ib(default=None) + + +@attr.s(auto_attribs=True) +class EnvironmentParameterSettings: + """ + EnvironmentParameterSettings is an ordered list of lessons for one environment + parameter. + """ + + curriculum: List[Lesson] + + @staticmethod + def _check_lesson_chain(lessons, parameter_name): + """ + Ensures that when using curriculum, all non-terminal lessons have a valid + CompletionCriteria + """ + num_lessons = len(lessons) + for index, lesson in enumerate(lessons): + if index < num_lessons - 1 and lesson.completion_criteria is None: + raise TrainerConfigError( + f"A non-terminal lesson does not have a completion_criteria for {parameter_name}." + ) + + @staticmethod + def structure(d: Mapping, t: type) -> Dict[str, "EnvironmentParameterSettings"]: + """ + Helper method to structure a Dict of EnvironmentParameterSettings class. Meant + to be registered with cattr.register_structure_hook() and called with + cattr.structure(). + """ + if not isinstance(d, Mapping): + raise TrainerConfigError( + f"Unsupported parameter environment parameter settings {d}." + ) + d_final: Dict[str, EnvironmentParameterSettings] = {} + for environment_parameter, environment_parameter_config in d.items(): + if ( + isinstance(environment_parameter_config, Mapping) + and "curriculum" in environment_parameter_config + ): + d_final[environment_parameter] = strict_to_cls( + environment_parameter_config, EnvironmentParameterSettings + ) + EnvironmentParameterSettings._check_lesson_chain( + d_final[environment_parameter].curriculum, environment_parameter + ) + else: + sampler = ParameterRandomizationSettings.structure( + environment_parameter_config, ParameterRandomizationSettings + ) + d_final[environment_parameter] = EnvironmentParameterSettings( + curriculum=[ + Lesson( + completion_criteria=None, + value=sampler, + name=environment_parameter, + ) + ] + ) + return d_final + + +# TRAINERS ############################################################################# @attr.s(auto_attribs=True) class SelfPlaySettings: save_steps: int = 20000 @@ -413,19 +583,7 @@ def structure(d: Mapping, t: type) -> Any: return t(**d_copy) -@attr.s(auto_attribs=True) -class CurriculumSettings: - class MeasureType: - PROGRESS: str = "progress" - REWARD: str = "reward" - - measure: str = attr.ib(default=MeasureType.REWARD) - thresholds: List[float] = attr.ib(factory=list) - min_lesson_length: int = 0 - signal_smoothing: bool = True - parameters: Dict[str, List[float]] = attr.ib(kw_only=True) - - +# COMMAND LINE ######################################################################### @attr.s(auto_attribs=True) class CheckpointSettings: run_id: str = parser.get_default("run_id") @@ -464,8 +622,7 @@ class RunOptions(ExportableSettings): ) env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings) engine_settings: EngineSettings = attr.ib(factory=EngineSettings) - parameter_randomization: Optional[Dict[str, ParameterRandomizationSettings]] = None - curriculum: Optional[Dict[str, CurriculumSettings]] = None + environment_parameters: Optional[Dict[str, EnvironmentParameterSettings]] = None checkpoint_settings: CheckpointSettings = attr.ib(factory=CheckpointSettings) # These are options that are relevant to the run itself, and not the engine or environment. @@ -476,10 +633,15 @@ class RunOptions(ExportableSettings): cattr.register_structure_hook(EngineSettings, strict_to_cls) cattr.register_structure_hook(CheckpointSettings, strict_to_cls) cattr.register_structure_hook( - Dict[str, ParameterRandomizationSettings], - ParameterRandomizationSettings.structure, + Dict[str, EnvironmentParameterSettings], EnvironmentParameterSettings.structure + ) + cattr.register_structure_hook(Lesson, strict_to_cls) + cattr.register_structure_hook( + ParameterRandomizationSettings, ParameterRandomizationSettings.structure + ) + cattr.register_unstructure_hook( + ParameterRandomizationSettings, ParameterRandomizationSettings.unstructure ) - cattr.register_structure_hook(CurriculumSettings, strict_to_cls) cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) cattr.register_structure_hook( DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index db0426a441..08b0c3402f 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -166,9 +166,7 @@ def _generate_all_results() -> AllStepResult: _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs) elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS: for k, v in req.payload.items(): - if isinstance(v, float): - env_parameters.set_float_parameter(k, v) - elif isinstance(v, ParameterRandomizationSettings): + if isinstance(v, ParameterRandomizationSettings): v.apply(k, env_parameters) elif req.cmd == EnvironmentCommand.RESET: env.reset() diff --git a/ml-agents/mlagents/trainers/tests/test_config_conversion.py b/ml-agents/mlagents/trainers/tests/test_config_conversion.py index 49a1489f12..644dbdced3 100644 --- a/ml-agents/mlagents/trainers/tests/test_config_conversion.py +++ b/ml-agents/mlagents/trainers/tests/test_config_conversion.py @@ -1,9 +1,7 @@ import yaml import pytest -from unittest import mock -from argparse import Namespace -from mlagents.trainers.upgrade_config import convert_behaviors, main, remove_nones +from mlagents.trainers.upgrade_config import convert_behaviors, remove_nones, convert from mlagents.trainers.settings import ( TrainerType, PPOSettings, @@ -125,6 +123,41 @@ encoding_size: 128 """ +CURRICULUM = """ + + BigWallJump: + measure: progress + thresholds: [0.1, 0.3, 0.5] + min_lesson_length: 200 + signal_smoothing: true + parameters: + big_wall_min_height: [0.0, 4.0, 6.0, 8.0] + big_wall_max_height: [4.0, 7.0, 8.0, 8.0] + SmallWallJump: + measure: progress + thresholds: [0.1, 0.3, 0.5] + min_lesson_length: 100 + signal_smoothing: true + parameters: + small_wall_height: [1.5, 2.0, 2.5, 4.0] + """ + +RANDOMIZATION = """ + resampling-interval: 5000 + mass: + sampler-type: uniform + min_value: 0.5 + max_value: 10 + gravity: + sampler-type: uniform + min_value: 7 + max_value: 12 + scale: + sampler-type: uniform + min_value: 0.75 + max_value: 3 + """ + @pytest.mark.parametrize("use_recurrent", [True, False]) @pytest.mark.parametrize("trainer_type", [TrainerType.PPO, TrainerType.SAC]) @@ -152,45 +185,31 @@ def test_convert_behaviors(trainer_type, use_recurrent): assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals -@mock.patch("mlagents.trainers.upgrade_config.convert_samplers") -@mock.patch("mlagents.trainers.upgrade_config.convert_behaviors") -@mock.patch("mlagents.trainers.upgrade_config.remove_nones") -@mock.patch("mlagents.trainers.upgrade_config.write_to_yaml_file") -@mock.patch("mlagents.trainers.upgrade_config.parse_args") -@mock.patch("mlagents.trainers.upgrade_config.load_config") -def test_main( - mock_load, - mock_parse, - yaml_write_mock, - remove_none_mock, - mock_convert_behaviors, - mock_convert_samplers, -): - test_output_file = "test.yaml" - mock_load.side_effect = [ - yaml.safe_load(PPO_CONFIG), - "test_curriculum_config", - "test_sampler_config", - ] - mock_args = Namespace( - trainer_config_path="mock", - output_config_path=test_output_file, - curriculum="test", - sampler="test", - ) - mock_parse.return_value = mock_args - mock_convert_behaviors.return_value = "test_converted_config" - mock_convert_samplers.return_value = "test_converted_sampler_config" - dict_without_nones = mock.Mock(name="nonones") - remove_none_mock.return_value = dict_without_nones - - main() - saved_dict = remove_none_mock.call_args[0][0] - # Check that the output of the remove_none call is here - yaml_write_mock.assert_called_with(dict_without_nones, test_output_file) - assert saved_dict["behaviors"] == "test_converted_config" - assert saved_dict["curriculum"] == "test_curriculum_config" - assert saved_dict["parameter_randomization"] == "test_converted_sampler_config" +def test_convert(): + old_behaviors = yaml.safe_load(PPO_CONFIG) + old_curriculum = yaml.safe_load(CURRICULUM) + old_sampler = yaml.safe_load(RANDOMIZATION) + config = convert(old_behaviors, old_curriculum, old_sampler) + assert BRAIN_NAME in config["behaviors"] + assert "big_wall_min_height" in config["environment_parameters"] + + curriculum = config["environment_parameters"]["big_wall_min_height"]["curriculum"] + assert len(curriculum) == 4 + for i, expected_value in enumerate([0.0, 4.0, 6.0, 8.0]): + assert curriculum[i][f"Lesson{i}"]["value"] == expected_value + for i, threshold in enumerate([0.1, 0.3, 0.5]): + criteria = curriculum[i][f"Lesson{i}"]["completion_criteria"] + assert criteria["threshold"] == threshold + assert criteria["behavior"] == "BigWallJump" + assert criteria["signal_smoothing"] + assert criteria["min_lesson_length"] == 200 + assert criteria["measure"] == "progress" + + assert "gravity" in config["environment_parameters"] + gravity = config["environment_parameters"]["gravity"] + assert gravity["sampler_type"] == "uniform" + assert gravity["sampler_parameters"]["min_value"] == 7 + assert gravity["sampler_parameters"]["max_value"] == 12 def test_remove_nones(): diff --git a/ml-agents/mlagents/trainers/tests/test_curriculum.py b/ml-agents/mlagents/trainers/tests/test_curriculum.py deleted file mode 100644 index 2740206924..0000000000 --- a/ml-agents/mlagents/trainers/tests/test_curriculum.py +++ /dev/null @@ -1,77 +0,0 @@ -import pytest - -from mlagents.trainers.exception import CurriculumConfigError -from mlagents.trainers.curriculum import Curriculum -from mlagents.trainers.settings import CurriculumSettings - - -dummy_curriculum_config = CurriculumSettings( - measure="reward", - thresholds=[10, 20, 50], - min_lesson_length=3, - signal_smoothing=True, - parameters={ - "param1": [0.7, 0.5, 0.3, 0.1], - "param2": [100, 50, 20, 15], - "param3": [0.2, 0.3, 0.7, 0.9], - }, -) - -bad_curriculum_config = CurriculumSettings( - measure="reward", - thresholds=[10, 20, 50], - min_lesson_length=3, - signal_smoothing=False, - parameters={ - "param1": [0.7, 0.5, 0.3, 0.1], - "param2": [100, 50, 20], - "param3": [0.2, 0.3, 0.7, 0.9], - }, -) - - -@pytest.fixture -def default_reset_parameters(): - return {"param1": 1, "param2": 1, "param3": 1} - - -def test_init_curriculum_happy_path(): - curriculum = Curriculum("TestBrain", dummy_curriculum_config) - - assert curriculum.brain_name == "TestBrain" - assert curriculum.lesson_num == 0 - assert curriculum.measure == "reward" - - -def test_increment_lesson(): - curriculum = Curriculum("TestBrain", dummy_curriculum_config) - assert curriculum.lesson_num == 0 - - curriculum.lesson_num = 1 - assert curriculum.lesson_num == 1 - - assert not curriculum.increment_lesson(10) - assert curriculum.lesson_num == 1 - - assert curriculum.increment_lesson(30) - assert curriculum.lesson_num == 2 - - assert not curriculum.increment_lesson(30) - assert curriculum.lesson_num == 2 - - assert curriculum.increment_lesson(10000) - assert curriculum.lesson_num == 3 - - -def test_get_parameters(): - curriculum = Curriculum("TestBrain", dummy_curriculum_config) - assert curriculum.get_config() == {"param1": 0.7, "param2": 100, "param3": 0.2} - - curriculum.lesson_num = 2 - assert curriculum.get_config() == {"param1": 0.3, "param2": 20, "param3": 0.7} - assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2} - - -def test_load_bad_curriculum_file_raises_error(): - with pytest.raises(CurriculumConfigError): - Curriculum("TestBrain", bad_curriculum_config) diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py new file mode 100644 index 0000000000..b8fb92e15e --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -0,0 +1,256 @@ +import pytest +import yaml + + +from mlagents.trainers.exception import TrainerConfigError +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager +from mlagents.trainers.settings import ( + RunOptions, + UniformSettings, + GaussianSettings, + ConstantSettings, + CompletionCriteriaSettings, +) + + +test_sampler_config_yaml = """ +environment_parameters: + param_1: + sampler_type: uniform + sampler_parameters: + min_value: 0.5 + max_value: 10 +""" + + +def test_sampler_conversion(): + run_options = RunOptions.from_dict(yaml.safe_load(test_sampler_config_yaml)) + assert run_options.environment_parameters is not None + assert "param_1" in run_options.environment_parameters + lessons = run_options.environment_parameters["param_1"].curriculum + assert len(lessons) == 1 + assert lessons[0].completion_criteria is None + assert isinstance(lessons[0].value, UniformSettings) + assert lessons[0].value.min_value == 0.5 + assert lessons[0].value.max_value == 10 + + +test_sampler_and_constant_config_yaml = """ +environment_parameters: + param_1: + sampler_type: gaussian + sampler_parameters: + mean: 4 + st_dev: 5 + param_2: 20 +""" + + +def test_sampler_and_constant_conversion(): + run_options = RunOptions.from_dict( + yaml.safe_load(test_sampler_and_constant_config_yaml) + ) + assert "param_1" in run_options.environment_parameters + assert "param_2" in run_options.environment_parameters + lessons_1 = run_options.environment_parameters["param_1"].curriculum + lessons_2 = run_options.environment_parameters["param_2"].curriculum + # gaussian + assert isinstance(lessons_1[0].value, GaussianSettings) + assert lessons_1[0].value.mean == 4 + assert lessons_1[0].value.st_dev == 5 + # constant + assert isinstance(lessons_2[0].value, ConstantSettings) + assert lessons_2[0].value.value == 20 + + +test_curriculum_config_yaml = """ +environment_parameters: + param_1: + curriculum: + - name: Lesson1 + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - name: Lesson2 + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 60 + min_lesson_length: 100 + require_reset: false + value: 2 + - name: Lesson3 + value: + sampler_type: uniform + sampler_parameters: + min_value: 1 + max_value: 3 +""" + + +def test_curriculum_conversion(): + run_options = RunOptions.from_dict(yaml.safe_load(test_curriculum_config_yaml)) + assert "param_1" in run_options.environment_parameters + lessons = run_options.environment_parameters["param_1"].curriculum + assert len(lessons) == 3 + # First lesson + lesson = lessons[0] + assert lesson.completion_criteria is not None + assert ( + lesson.completion_criteria.measure + == CompletionCriteriaSettings.MeasureType.REWARD + ) + assert lesson.completion_criteria.behavior == "fake_behavior" + assert lesson.completion_criteria.threshold == 30.0 + assert lesson.completion_criteria.min_lesson_length == 100 + assert lesson.completion_criteria.require_reset + assert isinstance(lesson.value, ConstantSettings) + assert lesson.value.value == 1 + # Second lesson + lesson = lessons[1] + assert lesson.completion_criteria is not None + assert ( + lesson.completion_criteria.measure + == CompletionCriteriaSettings.MeasureType.REWARD + ) + assert lesson.completion_criteria.behavior == "fake_behavior" + assert lesson.completion_criteria.threshold == 60.0 + assert lesson.completion_criteria.min_lesson_length == 100 + assert not lesson.completion_criteria.require_reset + assert isinstance(lesson.value, ConstantSettings) + assert lesson.value.value == 2 + # Last lesson + lesson = lessons[2] + assert lesson.completion_criteria is None + assert isinstance(lesson.value, UniformSettings) + assert lesson.value.min_value == 1 + assert lesson.value.max_value == 3 + + +test_bad_curriculum_no_competion_criteria_config_yaml = """ +environment_parameters: + param_1: + curriculum: + - name: Lesson1 + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - name: Lesson2 + value: 2 + - name: Lesson3 + value: + sampler_type: uniform + sampler_parameters: + min_value: 1 + max_value: 3 +""" + + +def test_curriculum_raises_no_completion_criteria_conversion(): + with pytest.raises(TrainerConfigError): + RunOptions.from_dict( + yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml) + ) + + +test_everything_config_yaml = """ +environment_parameters: + param_1: + curriculum: + - name: Lesson1 + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - name: Lesson2 + completion_criteria: + measure: progress + behavior: fake_behavior + threshold: 0.5 + min_lesson_length: 100 + require_reset: false + value: 2 + - name: Lesson3 + value: + sampler_type: uniform + sampler_parameters: + min_value: 1 + max_value: 3 + param_2: + sampler_type: gaussian + sampler_parameters: + mean: 4 + st_dev: 5 + param_3: 20 +""" + + +def test_create_manager(): + run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml)) + param_manager = EnvironmentParameterManager( + run_options.environment_parameters, 1337, False + ) + assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100 + assert param_manager.get_current_lesson_number() == { + "param_1": 0, + "param_2": 0, + "param_3": 0, + } + assert param_manager.get_current_samplers() == { + "param_1": ConstantSettings(seed=1337, value=1), + "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), + "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), + } + # Not enough episodes completed + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1000] * 99}, + ) == (False, False) + # Not enough episodes reward + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1] * 101}, + ) == (False, False) + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1000] * 101}, + ) == (True, True) + assert param_manager.get_current_lesson_number() == { + "param_1": 1, + "param_2": 0, + "param_3": 0, + } + param_manager_2 = EnvironmentParameterManager( + run_options.environment_parameters, 1337, restore=True + ) + # The use of global status should make it so that the lesson numbers are maintained + assert param_manager_2.get_current_lesson_number() == { + "param_1": 1, + "param_2": 0, + "param_3": 0, + } + # No reset required + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 700}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [0] * 101}, + ) == (True, False) + assert param_manager.get_current_samplers() == { + "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), + "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5), + "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20), + } diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 167fe157e3..0df04893eb 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -7,7 +7,7 @@ from mlagents.trainers.cli_utils import DetectDefault from mlagents_envs.exception import UnityEnvironmentException from mlagents.trainers.stats import StatsReporter -from mlagents.trainers.settings import UniformSettings +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager def basic_options(extra_args=None): @@ -44,22 +44,6 @@ def basic_options(extra_args=None): debug: false """ -MOCK_SAMPLER_CURRICULUM_YAML = """ - parameter_randomization: - sampler1: - sampler_type: uniform - sampler_parameters: - min_value: 0.2 - - curriculum: - behavior1: - parameters: - foo: [0.2, 0.5] - behavior2: - parameters: - foo: [0.2, 0.5] - """ - @patch("mlagents.trainers.learn.write_timing_tree") @patch("mlagents.trainers.learn.write_run_options") @@ -82,20 +66,26 @@ def test_run_training( mock_env.academy_name = "TestAcademyName" create_environment_factory.return_value = mock_env load_config.return_value = yaml.safe_load(MOCK_INITIALIZE_YAML) - + mock_param_manager = MagicMock(return_value="mock_param_manager") mock_init = MagicMock(return_value=None) - with patch.object(TrainerController, "__init__", mock_init): - with patch.object(TrainerController, "start_learning", MagicMock()): - options = basic_options() - learn.run_training(0, options) - mock_init.assert_called_once_with( - trainer_factory_mock.return_value, "results/ppo", "ppo", None, True, 0 - ) - handle_dir_mock.assert_called_once_with( - "results/ppo", False, False, "results/notuselessrun" - ) - write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs") - write_run_options_mock.assert_called_once_with("results/ppo", options) + with patch.object(EnvironmentParameterManager, "__new__", mock_param_manager): + with patch.object(TrainerController, "__init__", mock_init): + with patch.object(TrainerController, "start_learning", MagicMock()): + options = basic_options() + learn.run_training(0, options) + mock_init.assert_called_once_with( + trainer_factory_mock.return_value, + "results/ppo", + "ppo", + "mock_param_manager", + True, + 0, + ) + handle_dir_mock.assert_called_once_with( + "results/ppo", False, False, "results/notuselessrun" + ) + write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs") + write_run_options_mock.assert_called_once_with("results/ppo", options) StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py @@ -121,7 +111,6 @@ def test_commandline_args(mock_file): opt = parse_command_line(["mytrainerpath"]) assert opt.behaviors == {} assert opt.env_settings.env_path is None - assert opt.parameter_randomization is None assert opt.checkpoint_settings.resume is False assert opt.checkpoint_settings.inference is False assert opt.checkpoint_settings.run_id == "ppo" @@ -151,7 +140,6 @@ def test_commandline_args(mock_file): opt = parse_command_line(full_args) assert opt.behaviors == {} assert opt.env_settings.env_path == "./myenvfile" - assert opt.parameter_randomization is None assert opt.checkpoint_settings.run_id == "myawesomerun" assert opt.checkpoint_settings.initialize_from == "testdir" assert opt.env_settings.seed == 7890 @@ -170,7 +158,6 @@ def test_yaml_args(mock_file): opt = parse_command_line(["mytrainerpath"]) assert opt.behaviors == {} assert opt.env_settings.env_path == "./oldenvfile" - assert opt.parameter_randomization is None assert opt.checkpoint_settings.run_id == "uselessrun" assert opt.checkpoint_settings.initialize_from == "notuselessrun" assert opt.env_settings.seed == 9870 @@ -197,7 +184,6 @@ def test_yaml_args(mock_file): opt = parse_command_line(full_args) assert opt.behaviors == {} assert opt.env_settings.env_path == "./myenvfile" - assert opt.parameter_randomization is None assert opt.checkpoint_settings.run_id == "myawesomerun" assert opt.env_settings.seed == 7890 assert opt.env_settings.base_port == 4004 @@ -208,13 +194,6 @@ def test_yaml_args(mock_file): assert opt.checkpoint_settings.resume is True -@patch("builtins.open", new_callable=mock_open, read_data=MOCK_SAMPLER_CURRICULUM_YAML) -def test_sampler_configs(mock_file): - opt = parse_command_line(["mytrainerpath"]) - assert isinstance(opt.parameter_randomization["sampler1"], UniformSettings) - assert len(opt.curriculum.keys()) == 2 - - @patch("builtins.open", new_callable=mock_open, read_data=MOCK_YAML) def test_env_args(mock_file): full_args = [ diff --git a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py deleted file mode 100644 index 6bff34f841..0000000000 --- a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py +++ /dev/null @@ -1,136 +0,0 @@ -import pytest -from unittest.mock import patch, Mock, call -import yaml -import cattr - -from mlagents.trainers.meta_curriculum import MetaCurriculum - -from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment -from mlagents.trainers.tests.test_simple_rl import ( - _check_environment_trains, - BRAIN_NAME, - PPO_CONFIG, -) -from mlagents.trainers.tests.test_curriculum import dummy_curriculum_config -from mlagents.trainers.settings import CurriculumSettings -from mlagents.trainers.training_status import StatusType - - -@pytest.fixture -def measure_vals(): - return {"Brain1": 0.2, "Brain2": 0.3} - - -@pytest.fixture -def reward_buff_sizes(): - return {"Brain1": 7, "Brain2": 8} - - -def test_convert_from_dict(): - config = yaml.safe_load( - """ - measure: progress - thresholds: [0.1, 0.3, 0.5] - min_lesson_length: 100 - signal_smoothing: true - parameters: - param1: [0.0, 4.0, 6.0, 8.0] - """ - ) - should_be_config = CurriculumSettings( - thresholds=[0.1, 0.3, 0.5], - min_lesson_length=100, - signal_smoothing=True, - measure=CurriculumSettings.MeasureType.PROGRESS, - parameters={"param1": [0.0, 4.0, 6.0, 8.0]}, - ) - assert cattr.structure(config, CurriculumSettings) == should_be_config - - -def test_curriculum_config(param_name="test_param1", min_lesson_length=100): - return CurriculumSettings( - thresholds=[0.1, 0.3, 0.5], - min_lesson_length=min_lesson_length, - parameters={f"{param_name}": [0.0, 4.0, 6.0, 8.0]}, - ) - - -test_meta_curriculum_config = { - "Brain1": test_curriculum_config("test_param1"), - "Brain2": test_curriculum_config("test_param2"), -} - - -def test_set_lesson_nums(): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - meta_curriculum.lesson_nums = {"Brain1": 1, "Brain2": 3} - - assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 1 - assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 3 - - -def test_increment_lessons(measure_vals): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - meta_curriculum.brains_to_curricula["Brain1"] = Mock() - meta_curriculum.brains_to_curricula["Brain2"] = Mock() - - meta_curriculum.increment_lessons(measure_vals) - - meta_curriculum.brains_to_curricula["Brain1"].increment_lesson.assert_called_with( - 0.2 - ) - meta_curriculum.brains_to_curricula["Brain2"].increment_lesson.assert_called_with( - 0.3 - ) - - -@patch("mlagents.trainers.curriculum.Curriculum") -@patch("mlagents.trainers.curriculum.Curriculum") -def test_increment_lessons_with_reward_buff_sizes( - curriculum_a, curriculum_b, measure_vals, reward_buff_sizes -): - curriculum_a.min_lesson_length = 5 - curriculum_b.min_lesson_length = 10 - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - meta_curriculum.brains_to_curricula["Brain1"] = curriculum_a - meta_curriculum.brains_to_curricula["Brain2"] = curriculum_b - - meta_curriculum.increment_lessons(measure_vals, reward_buff_sizes=reward_buff_sizes) - - curriculum_a.increment_lesson.assert_called_with(0.2) - curriculum_b.increment_lesson.assert_not_called() - - -@patch("mlagents.trainers.meta_curriculum.GlobalTrainingStatus") -def test_restore_curriculums(mock_trainingstatus): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - # Test restore to value - mock_trainingstatus.get_parameter_state.return_value = 2 - meta_curriculum.try_restore_all_curriculum() - mock_trainingstatus.get_parameter_state.assert_has_calls( - [call("Brain1", StatusType.LESSON_NUM), call("Brain2", StatusType.LESSON_NUM)], - any_order=True, - ) - assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 2 - assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 2 - - # Test restore to None - mock_trainingstatus.get_parameter_state.return_value = None - meta_curriculum.try_restore_all_curriculum() - - assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 0 - assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 0 - - -def test_get_config(): - meta_curriculum = MetaCurriculum(test_meta_curriculum_config) - assert meta_curriculum.get_config() == {"test_param1": 0.0, "test_param2": 0.0} - - -@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"]) -def test_simple_metacurriculum(curriculum_brain_name): - env = SimpleEnvironment([BRAIN_NAME], use_discrete=False) - mc = MetaCurriculum({curriculum_brain_name: dummy_curriculum_config}) - _check_environment_trains( - env, {BRAIN_NAME: PPO_CONFIG}, meta_curriculum=mc, success_threshold=None - ) diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 63e2759779..be93042396 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -13,7 +13,8 @@ RewardSignalType, RewardSignalSettings, CuriositySettings, - ParameterRandomizationSettings, + EnvironmentParameterSettings, + ConstantSettings, UniformSettings, GaussianSettings, MultiRangeUniformSettings, @@ -191,11 +192,11 @@ def test_memory_settings_validation(): NetworkSettings.MemorySettings(sequence_length=128, memory_size=0) -def test_parameter_randomization_structure(): +def test_env_parameter_structure(): """ - Tests the ParameterRandomizationSettings structure method and all validators. + Tests the EnvironmentParameterSettings structure method and all validators. """ - parameter_randomization_dict = { + env_params_dict = { "mass": { "sampler_type": "uniform", "sampler_parameters": {"min_value": 1.0, "max_value": 2.0}, @@ -208,14 +209,35 @@ def test_parameter_randomization_structure(): "sampler_type": "multirangeuniform", "sampler_parameters": {"intervals": [[1.0, 2.0], [3.0, 4.0]]}, }, + "gravity": 1, + "wall_height": { + "curriculum": [ + { + "name": "Lesson1", + "completion_criteria": { + "measure": "reward", + "behavior": "fake_behavior", + "threshold": 10, + }, + "value": 1, + }, + {"value": 4, "name": "Lesson2"}, + ] + }, } - parameter_randomization_distributions = ParameterRandomizationSettings.structure( - parameter_randomization_dict, Dict[str, ParameterRandomizationSettings] + env_param_settings = EnvironmentParameterSettings.structure( + env_params_dict, Dict[str, EnvironmentParameterSettings] + ) + assert isinstance(env_param_settings["mass"].curriculum[0].value, UniformSettings) + assert isinstance(env_param_settings["scale"].curriculum[0].value, GaussianSettings) + assert isinstance( + env_param_settings["length"].curriculum[0].value, MultiRangeUniformSettings ) - assert isinstance(parameter_randomization_distributions["mass"], UniformSettings) - assert isinstance(parameter_randomization_distributions["scale"], GaussianSettings) assert isinstance( - parameter_randomization_distributions["length"], MultiRangeUniformSettings + env_param_settings["wall_height"].curriculum[0].value, ConstantSettings + ) + assert isinstance( + env_param_settings["wall_height"].curriculum[1].value, ConstantSettings ) # Check invalid distribution type @@ -226,8 +248,8 @@ def test_parameter_randomization_structure(): } } with pytest.raises(ValueError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check min less than max in uniform @@ -238,8 +260,8 @@ def test_parameter_randomization_structure(): } } with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check min less than max in multirange @@ -250,8 +272,8 @@ def test_parameter_randomization_structure(): } } with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check multirange has valid intervals @@ -262,14 +284,35 @@ def test_parameter_randomization_structure(): } } with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - invalid_distribution_dict, Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + invalid_distribution_dict, Dict[str, EnvironmentParameterSettings] ) # Check non-Dict input with pytest.raises(TrainerConfigError): - ParameterRandomizationSettings.structure( - "notadict", Dict[str, ParameterRandomizationSettings] + EnvironmentParameterSettings.structure( + "notadict", Dict[str, EnvironmentParameterSettings] + ) + + invalid_curriculum_dict = { + "wall_height": { + "curriculum": [ + { + "name": "Lesson1", + "completion_criteria": { + "measure": "progress", + "behavior": "fake_behavior", + "threshold": 10, + }, # > 1 is too large + "value": 1, + }, + {"value": 4, "name": "Lesson2"}, + ] + } + } + with pytest.raises(TrainerConfigError): + EnvironmentParameterSettings.structure( + invalid_curriculum_dict, Dict[str, EnvironmentParameterSettings] ) @@ -342,6 +385,51 @@ def test_exportable_settings(use_defaults): train_model: false inference: false debug: true + environment_parameters: + big_wall_height: + curriculum: + - name: Lesson0 + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.1 + value: + sampler_type: uniform + sampler_parameters: + min_value: 0.0 + max_value: 4.0 + - name: Lesson1 + completion_criteria: + measure: reward + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 100 + threshold: 0.2 + value: + sampler_type: gaussian + sampler_parameters: + mean: 4.0 + st_dev: 7.0 + - name: Lesson2 + completion_criteria: + measure: progress + behavior: BigWallJump + signal_smoothing: true + min_lesson_length: 20 + threshold: 0.3 + value: + sampler_type: multirangeuniform + sampler_parameters: + intervals: [[1.0, 2.0],[4.0, 5.0]] + - name: Lesson3 + value: 8.0 + small_wall_height: 42.0 + other_wall_height: + sampler_type: multirangeuniform + sampler_parameters: + intervals: [[1.0, 2.0],[4.0, 5.0]] """ if not use_defaults: loaded_yaml = yaml.safe_load(test_yaml) @@ -351,11 +439,16 @@ def test_exportable_settings(use_defaults): dict_export = run_options.as_dict() if not use_defaults: # Don't need to check if no yaml - check_dict_is_at_least(loaded_yaml, dict_export) - + check_dict_is_at_least( + loaded_yaml, dict_export, exceptions=["environment_parameters"] + ) # Re-import and verify has same elements run_options2 = RunOptions.from_dict(dict_export) second_export = run_options2.as_dict() + check_dict_is_at_least(dict_export, second_export) + # Should be able to use equality instead of back-and-forth once environment_parameters + # is working + check_dict_is_at_least(second_export, dict_export) # Check that the two exports are the same assert dict_export == second_export diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index 67d3c66617..82b6a11c7c 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -26,6 +26,7 @@ TrainerType, RewardSignalType, ) +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.models import EncoderType, ScheduleType from mlagents_envs.side_channel.environment_parameters_channel import ( EnvironmentParametersChannel, @@ -107,10 +108,12 @@ def _check_environment_trains( env, trainer_config, reward_processor=default_reward_processor, - meta_curriculum=None, + env_parameter_manager=None, success_threshold=0.9, env_manager=None, ): + if env_parameter_manager is None: + env_parameter_manager = EnvironmentParameterManager() # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" @@ -126,7 +129,7 @@ def _check_environment_trains( train_model=True, load_model=False, seed=seed, - meta_curriculum=meta_curriculum, + param_manager=env_parameter_manager, multi_gpu=False, ) @@ -134,7 +137,7 @@ def _check_environment_trains( trainer_factory=trainer_factory, output_path=dir, run_id=run_id, - meta_curriculum=meta_curriculum, + param_manager=env_parameter_manager, train=True, training_seed=seed, ) diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index d4ad4cd043..175ac174b6 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -3,6 +3,7 @@ from mlagents.tf_utils import tf from mlagents.trainers.trainer_controller import TrainerController +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.ghost.controller import GhostController @@ -14,7 +15,7 @@ def basic_trainer_controller(): trainer_factory=trainer_factory_mock, output_path="test_model_path", run_id="test_run_id", - meta_curriculum=None, + param_manager=EnvironmentParameterManager(), train=True, training_seed=99, ) @@ -30,7 +31,7 @@ def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): trainer_factory=trainer_factory_mock, output_path="", run_id="1", - meta_curriculum=None, + param_manager=None, train=True, training_seed=seed, ) diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index 579b4e7f1f..a39ad7c8db 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -9,6 +9,7 @@ from mlagents.trainers.exception import TrainerConfigError, UnityTrainerException from mlagents.trainers.settings import RunOptions from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager @pytest.fixture @@ -54,6 +55,7 @@ def mock_constructor( train_model=train_model, load_model=load_model, seed=seed, + param_manager=EnvironmentParameterManager(), ) trainers = {} for brain_name in training_behaviors.keys(): @@ -75,6 +77,7 @@ def test_handles_no_config_provided(): train_model=True, load_model=False, seed=42, + param_manager=EnvironmentParameterManager(), ) trainer_factory.generate(brain_name) diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 36e18d4dec..51af3c0c13 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -4,7 +4,7 @@ import os import threading -from typing import Dict, Optional, Set, List +from typing import Dict, Set, List from collections import defaultdict import numpy as np @@ -24,12 +24,10 @@ merge_gauges, ) from mlagents.trainers.trainer import Trainer -from mlagents.trainers.meta_curriculum import MetaCurriculum +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.trainer_util import TrainerFactory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.agent_processor import AgentManager -from mlagents.trainers.settings import CurriculumSettings -from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType class TrainerController(object): @@ -38,7 +36,7 @@ def __init__( trainer_factory: TrainerFactory, output_path: str, run_id: str, - meta_curriculum: Optional[MetaCurriculum], + param_manager: EnvironmentParameterManager, train: bool, training_seed: int, ): @@ -46,7 +44,8 @@ def __init__( :param output_path: Path to save the model. :param summaries_dir: Folder to save training summaries. :param run_id: The sub-directory name for model and summary statistics - :param meta_curriculum: MetaCurriculum object which stores information about all curricula. + :param param_manager: EnvironmentParameterManager object which stores information about all + environment parameters. :param train: Whether to train model, or only run inference. :param training_seed: Seed to use for Numpy and Tensorflow random number generation. :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging. @@ -58,7 +57,7 @@ def __init__( self.logger = get_logger(__name__) self.run_id = run_id self.train_model = train - self.meta_curriculum = meta_curriculum + self.param_manager = param_manager self.ghost_controller = self.trainer_factory.ghost_controller self.trainer_threads: List[threading.Thread] = [] @@ -66,30 +65,6 @@ def __init__( np.random.seed(training_seed) tf.set_random_seed(training_seed) - def _get_measure_vals(self): - brain_names_to_measure_vals = {} - if self.meta_curriculum: - for ( - brain_name, - curriculum, - ) in self.meta_curriculum.brains_to_curricula.items(): - # Skip brains that are in the metacurriculum but no trainer yet. - if brain_name not in self.trainers: - continue - if curriculum.measure == CurriculumSettings.MeasureType.PROGRESS: - measure_val = self.trainers[brain_name].get_step / float( - self.trainers[brain_name].get_max_steps - ) - brain_names_to_measure_vals[brain_name] = measure_val - elif curriculum.measure == CurriculumSettings.MeasureType.REWARD: - measure_val = np.mean(self.trainers[brain_name].reward_buffer) - brain_names_to_measure_vals[brain_name] = measure_val - else: - for brain_name, trainer in self.trainers.items(): - measure_val = np.mean(trainer.reward_buffer) - brain_names_to_measure_vals[brain_name] = measure_val - return brain_names_to_measure_vals - @timed def _save_model(self): """ @@ -135,10 +110,8 @@ def _reset_env(self, env: EnvManager) -> None: A Data structure corresponding to the initial reset state of the environment. """ - new_meta_curriculum_config = ( - self.meta_curriculum.get_config() if self.meta_curriculum else {} - ) - env.reset(config=new_meta_curriculum_config) + new_config = self.param_manager.get_current_samplers() + env.reset(config=new_config) def _not_done_training(self) -> bool: return ( @@ -235,38 +208,32 @@ def start_learning(self, env_manager: EnvManager) -> None: self._save_model() self._export_graph() - def end_trainer_episodes( - self, env: EnvManager, lessons_incremented: Dict[str, bool] - ) -> None: - self._reset_env(env) + def end_trainer_episodes(self) -> None: # Reward buffers reset takes place only for curriculum learning # else no reset. for trainer in self.trainers.values(): trainer.end_episode() - for brain_name, changed in lessons_incremented.items(): - if changed: - self.trainers[brain_name].reward_buffer.clear() def reset_env_if_ready(self, env: EnvManager) -> None: - if self.meta_curriculum: - # Get the sizes of the reward buffers. - reward_buff_sizes = { - k: len(t.reward_buffer) for (k, t) in self.trainers.items() - } - # Attempt to increment the lessons of the brains who - # were ready. - lessons_incremented = self.meta_curriculum.increment_lessons( - self._get_measure_vals(), reward_buff_sizes=reward_buff_sizes - ) - else: - lessons_incremented = {} - # If any lessons were incremented or the environment is - # ready to be reset - meta_curriculum_reset = any(lessons_incremented.values()) + # Get the sizes of the reward buffers. + reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()} + curr_step = {k: int(t.step) for (k, t) in self.trainers.items()} + max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()} + # Attempt to increment the lessons of the brains who + # were ready. + updated, param_must_reset = self.param_manager.update_lessons( + curr_step, max_step, reward_buff + ) + if updated: + for trainer in self.trainers.values(): + trainer.reward_buffer.clear() # If ghost trainer swapped teams ghost_controller_reset = self.ghost_controller.should_reset() - if meta_curriculum_reset or ghost_controller_reset: - self.end_trainer_episodes(env, lessons_incremented) + if param_must_reset or ghost_controller_reset: + self._reset_env(env) # This reset also sends the new config to env + self.end_trainer_episodes() + elif updated: + env.set_env_parameters(self.param_manager.get_current_samplers()) @timed def advance(self, env: EnvManager) -> int: @@ -274,16 +241,15 @@ def advance(self, env: EnvManager) -> int: with hierarchical_timer("env_step"): num_steps = env.advance() - # Report current lesson - if self.meta_curriculum: - for brain_name, curr in self.meta_curriculum.brains_to_curricula.items(): - if brain_name in self.trainers: - self.trainers[brain_name].stats_reporter.set_stat( - "Environment/Lesson", curr.lesson_num - ) - GlobalTrainingStatus.set_parameter_state( - brain_name, StatusType.LESSON_NUM, curr.lesson_num - ) + # Report current lesson for each environment parameter + for ( + param_name, + lesson_number, + ) in self.param_manager.get_current_lesson_number().items(): + for trainer in self.trainers.values(): + trainer.stats_reporter.set_stat( + f"Environment/Lesson/{param_name}", lesson_number + ) for trainer in self.trainers.values(): if not trainer.threaded: diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 450116e9cf..01fe654dbc 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -2,7 +2,7 @@ from typing import Dict from mlagents_envs.logging_util import get_logger -from mlagents.trainers.meta_curriculum import MetaCurriculum +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager from mlagents.trainers.exception import TrainerConfigError from mlagents.trainers.trainer import Trainer from mlagents.trainers.exception import UnityTrainerException @@ -24,8 +24,8 @@ def __init__( train_model: bool, load_model: bool, seed: int, + param_manager: EnvironmentParameterManager, init_path: str = None, - meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ): self.trainer_config = trainer_config @@ -34,7 +34,7 @@ def __init__( self.train_model = train_model self.load_model = load_model self.seed = seed - self.meta_curriculum = meta_curriculum + self.param_manager = param_manager self.multi_gpu = multi_gpu self.ghost_controller = GhostController() @@ -47,8 +47,8 @@ def generate(self, brain_name: str) -> Trainer: self.load_model, self.ghost_controller, self.seed, + self.param_manager, self.init_path, - self.meta_curriculum, self.multi_gpu, ) @@ -61,8 +61,8 @@ def initialize_trainer( load_model: bool, ghost_controller: GhostController, seed: int, + param_manager: EnvironmentParameterManager, init_path: str = None, - meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ) -> Trainer: """ @@ -77,25 +77,15 @@ def initialize_trainer( :param load_model: Whether to load the model or randomly initialize :param ghost_controller: The object that coordinates ghost trainers :param seed: The random seed to use + :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer :param init_path: Path from which to load model, if different from model_path. - :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :return: """ trainer_artifact_path = os.path.join(output_path, brain_name) if init_path is not None: trainer_settings.init_path = os.path.join(init_path, brain_name) - min_lesson_length = 1 - if meta_curriculum: - if brain_name in meta_curriculum.brains_to_curricula: - min_lesson_length = meta_curriculum.brains_to_curricula[ - brain_name - ].min_lesson_length - else: - logger.warning( - f"Metacurriculum enabled, but no curriculum for brain {brain_name}. " - f"Brains with curricula: {meta_curriculum.brains_to_curricula.keys()}. " - ) + min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) trainer: Trainer = None # type: ignore # will be set to one of these, or raise trainer_type = trainer_settings.trainer_type diff --git a/ml-agents/mlagents/trainers/upgrade_config.py b/ml-agents/mlagents/trainers/upgrade_config.py index 4263e6cf1f..e1c8a05ad7 100644 --- a/ml-agents/mlagents/trainers/upgrade_config.py +++ b/ml-agents/mlagents/trainers/upgrade_config.py @@ -5,7 +5,7 @@ import attr import cattr import yaml -from typing import Dict, Any +from typing import Dict, Any, Optional import argparse from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType from mlagents.trainers.cli_utils import load_config @@ -99,13 +99,72 @@ def convert_samplers(old_sampler_config: Dict[str, Any]) -> Dict[str, Any]: return new_sampler_config +def convert_samplers_and_curriculum( + parameter_dict: Dict[str, Any], curriculum: Dict[str, Any] +) -> Dict[str, Any]: + for key, sampler in parameter_dict.items(): + if "sampler_parameters" not in sampler: + parameter_dict[key]["sampler_parameters"] = {} + for argument in [ + "seed", + "min_value", + "max_value", + "mean", + "st_dev", + "intervals", + ]: + if argument in sampler: + parameter_dict[key]["sampler_parameters"][argument] = sampler[argument] + parameter_dict[key].pop(argument) + param_set = set(parameter_dict.keys()) + for behavior_name, behavior_dict in curriculum.items(): + measure = behavior_dict["measure"] + min_lesson_length = behavior_dict.get("min_lesson_length", 1) + signal_smoothing = behavior_dict.get("signal_smoothing", False) + thresholds = behavior_dict["thresholds"] + num_lessons = len(thresholds) + 1 + parameters = behavior_dict["parameters"] + for param_name in parameters.keys(): + if param_name in param_set: + print( + f"The parameter {param_name} has both a sampler and a curriculum. Will ignore curriculum" + ) + else: + param_set.add(param_name) + parameter_dict[param_name] = {"curriculum": []} + for lesson_index in range(num_lessons - 1): + parameter_dict[param_name]["curriculum"].append( + { + f"Lesson{lesson_index}": { + "completion_criteria": { + "measure": measure, + "behavior": behavior_name, + "signal_smoothing": signal_smoothing, + "min_lesson_length": min_lesson_length, + "threshold": thresholds[lesson_index], + }, + "value": parameters[param_name][lesson_index], + } + } + ) + lesson_index += 1 # This is the last lesson + parameter_dict[param_name]["curriculum"].append( + { + f"Lesson{lesson_index}": { + "value": parameters[param_name][lesson_index] + } + } + ) + return parameter_dict + + def parse_args(): argparser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) argparser.add_argument( "trainer_config_path", - help="Path to old format (<=0.16.X) trainer configuration YAML.", + help="Path to old format (<=0.18.X) trainer configuration YAML.", ) argparser.add_argument( "--curriculum", @@ -124,6 +183,51 @@ def parse_args(): return args +def convert( + config: Dict[str, Any], + old_curriculum: Optional[Dict[str, Any]], + old_param_random: Optional[Dict[str, Any]], +) -> Dict[str, Any]: + if "behaviors" not in config: + print("Config file format version : version <= 0.16.X") + behavior_config_dict = convert_behaviors(config) + full_config = {"behaviors": behavior_config_dict} + + # Convert curriculum and sampler. note that we don't validate these; if it was correct + # before it should be correct now. + if old_curriculum is not None: + full_config["curriculum"] = old_curriculum + + if old_param_random is not None: + sampler_config_dict = convert_samplers(old_param_random) + full_config["parameter_randomization"] = sampler_config_dict + + # Convert config to dict + config = cattr.unstructure(full_config) + if "curriculum" in config or "parameter_randomization" in config: + print("Config file format version : 0.16.X < version <= 0.18.X") + full_config = {"behaviors": config["behaviors"]} + + param_randomization = config.get("parameter_randomization", {}) + if "resampling-interval" in param_randomization: + param_randomization.pop("resampling-interval") + if len(param_randomization) > 0: + # check if we use the old format sampler-type vs sampler_type + if ( + "sampler-type" + in param_randomization[list(param_randomization.keys())[0]] + ): + param_randomization = convert_samplers(param_randomization) + + full_config["environment_parameters"] = convert_samplers_and_curriculum( + param_randomization, config.get("curriculum", {}) + ) + + # Convert config to dict + config = cattr.unstructure(full_config) + return config + + def main() -> None: args = parse_args() print( @@ -131,23 +235,14 @@ def main() -> None: ) old_config = load_config(args.trainer_config_path) - behavior_config_dict = convert_behaviors(old_config) - full_config = {"behaviors": behavior_config_dict} - - # Convert curriculum and sampler. note that we don't validate these; if it was correct - # before it should be correct now. + curriculum_config_dict = None + old_sampler_config_dict = None if args.curriculum is not None: curriculum_config_dict = load_config(args.curriculum) - full_config["curriculum"] = curriculum_config_dict - if args.sampler is not None: old_sampler_config_dict = load_config(args.sampler) - sampler_config_dict = convert_samplers(old_sampler_config_dict) - full_config["parameter_randomization"] = sampler_config_dict - - # Convert config to dict - unstructed_config = cattr.unstructure(full_config) - unstructed_config = remove_nones(unstructed_config) + new_config = convert(old_config, curriculum_config_dict, old_sampler_config_dict) + unstructed_config = remove_nones(new_config) write_to_yaml_file(unstructed_config, args.output_config_path)