diff --git a/ml-agents/mlagents/trainers/meta_curriculum.py b/ml-agents/mlagents/trainers/meta_curriculum.py index 80ed282f11..ed30fa3ee5 100644 --- a/ml-agents/mlagents/trainers/meta_curriculum.py +++ b/ml-agents/mlagents/trainers/meta_curriculum.py @@ -32,9 +32,9 @@ def __init__(self, curriculum_folder: str): try: for curriculum_filename in os.listdir(curriculum_folder): # This process requires JSON files - if not curriculum_filename.lower().endswith(".json"): + brain_name, extension = os.path.splitext(curriculum_filename) + if extension.lower() != ".json": continue - brain_name = curriculum_filename.split(".")[0] curriculum_filepath = os.path.join( curriculum_folder, curriculum_filename ) @@ -78,7 +78,9 @@ def lesson_nums(self, lesson_nums): for brain_name, lesson in lesson_nums.items(): self.brains_to_curriculums[brain_name].lesson_num = lesson - def _lesson_ready_to_increment(self, brain_name, reward_buff_size): + def _lesson_ready_to_increment( + self, brain_name: str, reward_buff_size: int + ) -> bool: """Determines whether the curriculum of a specified brain is ready to attempt an increment. @@ -92,6 +94,9 @@ def _lesson_ready_to_increment(self, brain_name, reward_buff_size): Whether the curriculum of the specified brain should attempt to increment its lesson. """ + if brain_name not in self.brains_to_curriculums: + return False + return reward_buff_size >= ( self.brains_to_curriculums[brain_name].min_lesson_length ) diff --git a/ml-agents/mlagents/trainers/tests/test_curriculum.py b/ml-agents/mlagents/trainers/tests/test_curriculum.py index 712d45258f..fcd81cf027 100644 --- a/ml-agents/mlagents/trainers/tests/test_curriculum.py +++ b/ml-agents/mlagents/trainers/tests/test_curriculum.py @@ -6,7 +6,6 @@ from mlagents.trainers.exception import CurriculumConfigError, CurriculumLoadingError from mlagents.trainers.curriculum import Curriculum - dummy_curriculum_json_str = """ { "measure" : "reward", diff --git a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py index eacde79a0b..8976207043 100644 --- a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py +++ b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py @@ -1,9 +1,17 @@ import pytest -from unittest.mock import patch, call +from unittest.mock import patch, call, mock_open from mlagents.trainers.meta_curriculum import MetaCurriculum +from mlagents.trainers.curriculum import Curriculum from mlagents.trainers.exception import MetaCurriculumError +from mlagents.trainers.tests.test_simple_rl import ( + Simple1DEnvironment, + _check_environment_trains, + BRAIN_NAME, +) +from mlagents.trainers.tests.test_curriculum import dummy_curriculum_json_str + class MetaCurriculumTest(MetaCurriculum): """This class allows us to test MetaCurriculum objects without calling @@ -36,7 +44,7 @@ def reward_buff_sizes(): @patch("mlagents.trainers.curriculum.Curriculum.get_config", return_value={}) @patch("mlagents.trainers.curriculum.Curriculum.__init__", return_value=None) -@patch("os.listdir", return_value=["Brain1.json", "Brain2.json"]) +@patch("os.listdir", return_value=["Brain1.json", "Brain2.test.json"]) def test_init_meta_curriculum_happy_path( listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters ): @@ -45,9 +53,9 @@ def test_init_meta_curriculum_happy_path( assert len(meta_curriculum.brains_to_curriculums) == 2 assert "Brain1" in meta_curriculum.brains_to_curriculums - assert "Brain2" in meta_curriculum.brains_to_curriculums + assert "Brain2.test" in meta_curriculum.brains_to_curriculums - calls = [call("test/Brain1.json"), call("test/Brain2.json")] + calls = [call("test/Brain1.json"), call("test/Brain2.test.json")] mock_curriculum_init.assert_has_calls(calls) @@ -133,3 +141,40 @@ def test_get_config( new_reset_parameters.update(more_reset_parameters) assert meta_curriculum.get_config() == new_reset_parameters + + +META_CURRICULUM_CONFIG = """ + default: + trainer: ppo + batch_size: 16 + beta: 5.0e-3 + buffer_size: 64 + epsilon: 0.2 + hidden_units: 128 + lambd: 0.95 + learning_rate: 5.0e-3 + max_steps: 100 + memory_size: 256 + normalize: false + num_epoch: 3 + num_layers: 2 + time_horizon: 64 + sequence_length: 64 + summary_freq: 50 + use_recurrent: false + reward_signals: + extrinsic: + strength: 1.0 + gamma: 0.99 + """ + + +@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"]) +def test_simple_metacurriculum(curriculum_brain_name): + env = Simple1DEnvironment(use_discrete=False) + with patch( + "builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str + ): + curriculum = Curriculum("TestBrain.json") + mc = MetaCurriculumTest({curriculum_brain_name: curriculum}) + _check_environment_trains(env, META_CURRICULUM_CONFIG, mc, -100.0) diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index 8d100d2057..a9fb69e032 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -183,7 +183,9 @@ def close(self): """ -def _check_environment_trains(env, config): +def _check_environment_trains( + env, config, meta_curriculum=None, success_threshold=0.99 +): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" @@ -201,7 +203,7 @@ def _check_environment_trains(env, config): train_model=True, load_model=False, seed=seed, - meta_curriculum=None, + meta_curriculum=meta_curriculum, multi_gpu=False, ) @@ -210,7 +212,7 @@ def _check_environment_trains(env, config): summaries_dir=dir, model_path=dir, run_id=run_id, - meta_curriculum=None, + meta_curriculum=meta_curriculum, train=True, training_seed=seed, sampler_manager=SamplerManager(None), @@ -223,7 +225,7 @@ def _check_environment_trains(env, config): print(tc._get_measure_vals()) for brain_name, mean_reward in tc._get_measure_vals().items(): assert not math.isnan(mean_reward) - assert mean_reward > 0.99 + assert mean_reward > success_threshold @pytest.mark.parametrize("use_discrete", [True, False]) diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 31a6504af5..d1a7608c5d 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -71,6 +71,9 @@ def _get_measure_vals(self): brain_name, curriculum, ) in self.meta_curriculum.brains_to_curriculums.items(): + # Skip brains that are in the metacurriculum but no trainer yet. + if brain_name not in self.trainers: + continue if curriculum.measure == "progress": measure_val = ( self.trainers[brain_name].get_step @@ -168,7 +171,10 @@ def write_to_tensorboard(self, global_step: int) -> None: for brain_name, trainer in self.trainers.items(): # Write training statistics to Tensorboard. delta_train_start = time() - self.training_start_time - if self.meta_curriculum is not None: + if ( + self.meta_curriculum + and brain_name in self.meta_curriculum.brains_to_curriculums + ): trainer.write_summary( global_step, delta_train_start, diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 64d1e2e0f5..1b4df99c19 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -1,5 +1,6 @@ import yaml from typing import Any, Dict, TextIO +import logging from mlagents.trainers.meta_curriculum import MetaCurriculum from mlagents.trainers.exception import TrainerConfigError @@ -8,6 +9,8 @@ from mlagents.trainers.ppo.trainer import PPOTrainer from mlagents.trainers.sac.trainer import SACTrainer +logger = logging.getLogger("mlagents.trainers") + class TrainerFactory: def __init__( @@ -101,6 +104,18 @@ def initialize_trainer( _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) + min_lesson_length = 1 + if meta_curriculum: + if brain_name in meta_curriculum.brains_to_curriculums: + min_lesson_length = meta_curriculum.brains_to_curriculums[ + brain_name + ].min_lesson_length + else: + logger.warning( + f"Metacurriculum enabled, but no curriculum for brain {brain_name}. " + f"Brains with curricula: {meta_curriculum.brains_to_curriculums.keys()}. " + ) + trainer: Trainer = None # type: ignore # will be set to one of these, or raise if "trainer" not in trainer_parameters: raise TrainerConfigError( @@ -117,9 +132,7 @@ def initialize_trainer( elif trainer_type == "ppo": trainer = PPOTrainer( brain_parameters, - meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length - if meta_curriculum - else 1, + min_lesson_length, trainer_parameters, train_model, load_model, @@ -130,9 +143,7 @@ def initialize_trainer( elif trainer_type == "sac": trainer = SACTrainer( brain_parameters, - meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length - if meta_curriculum - else 1, + min_lesson_length, trainer_parameters, train_model, load_model,