Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions ml-agents/mlagents/trainers/meta_curriculum.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def __init__(self, curriculum_folder: str):
try:
for curriculum_filename in os.listdir(curriculum_folder):
# This process requires JSON files
if not curriculum_filename.lower().endswith(".json"):
brain_name, extension = os.path.splitext(curriculum_filename)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So far I don't think this has tripped anybody up. But the old behavior would mean that a brain with a . in the name wouldn't be able to match up with the metacurriculum, because curriculum_filename.split(".")[0] would take the only the filename up to the first . instead of the last one.

if extension.lower() != ".json":
continue
brain_name = curriculum_filename.split(".")[0]
curriculum_filepath = os.path.join(
curriculum_folder, curriculum_filename
)
Expand Down Expand Up @@ -78,7 +78,9 @@ def lesson_nums(self, lesson_nums):
for brain_name, lesson in lesson_nums.items():
self.brains_to_curriculums[brain_name].lesson_num = lesson

def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
def _lesson_ready_to_increment(
self, brain_name: str, reward_buff_size: int
) -> bool:
"""Determines whether the curriculum of a specified brain is ready
to attempt an increment.

Expand All @@ -92,6 +94,9 @@ def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
Whether the curriculum of the specified brain should attempt to
increment its lesson.
"""
if brain_name not in self.brains_to_curriculums:
return False

return reward_buff_size >= (
self.brains_to_curriculums[brain_name].min_lesson_length
)
Expand Down
1 change: 0 additions & 1 deletion ml-agents/mlagents/trainers/tests/test_curriculum.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from mlagents.trainers.exception import CurriculumConfigError, CurriculumLoadingError
from mlagents.trainers.curriculum import Curriculum


dummy_curriculum_json_str = """
{
"measure" : "reward",
Expand Down
53 changes: 49 additions & 4 deletions ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import pytest
from unittest.mock import patch, call
from unittest.mock import patch, call, mock_open

from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.curriculum import Curriculum
from mlagents.trainers.exception import MetaCurriculumError

from mlagents.trainers.tests.test_simple_rl import (
Simple1DEnvironment,
_check_environment_trains,
BRAIN_NAME,
)
from mlagents.trainers.tests.test_curriculum import dummy_curriculum_json_str


class MetaCurriculumTest(MetaCurriculum):
"""This class allows us to test MetaCurriculum objects without calling
Expand Down Expand Up @@ -36,7 +44,7 @@ def reward_buff_sizes():

@patch("mlagents.trainers.curriculum.Curriculum.get_config", return_value={})
@patch("mlagents.trainers.curriculum.Curriculum.__init__", return_value=None)
@patch("os.listdir", return_value=["Brain1.json", "Brain2.json"])
@patch("os.listdir", return_value=["Brain1.json", "Brain2.test.json"])
def test_init_meta_curriculum_happy_path(
listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters
):
Expand All @@ -45,9 +53,9 @@ def test_init_meta_curriculum_happy_path(
assert len(meta_curriculum.brains_to_curriculums) == 2

assert "Brain1" in meta_curriculum.brains_to_curriculums
assert "Brain2" in meta_curriculum.brains_to_curriculums
assert "Brain2.test" in meta_curriculum.brains_to_curriculums

calls = [call("test/Brain1.json"), call("test/Brain2.json")]
calls = [call("test/Brain1.json"), call("test/Brain2.test.json")]

mock_curriculum_init.assert_has_calls(calls)

Expand Down Expand Up @@ -133,3 +141,40 @@ def test_get_config(
new_reset_parameters.update(more_reset_parameters)

assert meta_curriculum.get_config() == new_reset_parameters


META_CURRICULUM_CONFIG = """
default:
trainer: ppo
batch_size: 16
beta: 5.0e-3
buffer_size: 64
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 5.0e-3
max_steps: 100
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 50
use_recurrent: false
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
"""


@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"])
def test_simple_metacurriculum(curriculum_brain_name):
env = Simple1DEnvironment(use_discrete=False)
with patch(
"builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str
):
curriculum = Curriculum("TestBrain.json")
mc = MetaCurriculumTest({curriculum_brain_name: curriculum})
_check_environment_trains(env, META_CURRICULUM_CONFIG, mc, -100.0)
10 changes: 6 additions & 4 deletions ml-agents/mlagents/trainers/tests/test_simple_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@ def close(self):
"""


def _check_environment_trains(env, config):
def _check_environment_trains(
env, config, meta_curriculum=None, success_threshold=0.99
):
# Create controller and begin training.
with tempfile.TemporaryDirectory() as dir:
run_id = "id"
Expand All @@ -201,7 +203,7 @@ def _check_environment_trains(env, config):
train_model=True,
load_model=False,
seed=seed,
meta_curriculum=None,
meta_curriculum=meta_curriculum,
multi_gpu=False,
)

Expand All @@ -210,7 +212,7 @@ def _check_environment_trains(env, config):
summaries_dir=dir,
model_path=dir,
run_id=run_id,
meta_curriculum=None,
meta_curriculum=meta_curriculum,
train=True,
training_seed=seed,
sampler_manager=SamplerManager(None),
Expand All @@ -223,7 +225,7 @@ def _check_environment_trains(env, config):
print(tc._get_measure_vals())
for brain_name, mean_reward in tc._get_measure_vals().items():
assert not math.isnan(mean_reward)
assert mean_reward > 0.99
assert mean_reward > success_threshold


@pytest.mark.parametrize("use_discrete", [True, False])
Expand Down
8 changes: 7 additions & 1 deletion ml-agents/mlagents/trainers/trainer_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ def _get_measure_vals(self):
brain_name,
curriculum,
) in self.meta_curriculum.brains_to_curriculums.items():
# Skip brains that are in the metacurriculum but no trainer yet.
if brain_name not in self.trainers:
continue
if curriculum.measure == "progress":
measure_val = (
self.trainers[brain_name].get_step
Expand Down Expand Up @@ -168,7 +171,10 @@ def write_to_tensorboard(self, global_step: int) -> None:
for brain_name, trainer in self.trainers.items():
# Write training statistics to Tensorboard.
delta_train_start = time() - self.training_start_time
if self.meta_curriculum is not None:
if (
self.meta_curriculum
and brain_name in self.meta_curriculum.brains_to_curriculums
):
trainer.write_summary(
global_step,
delta_train_start,
Expand Down
23 changes: 17 additions & 6 deletions ml-agents/mlagents/trainers/trainer_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import yaml
from typing import Any, Dict, TextIO
import logging

from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.exception import TrainerConfigError
Expand All @@ -8,6 +9,8 @@
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.sac.trainer import SACTrainer

logger = logging.getLogger("mlagents.trainers")


class TrainerFactory:
def __init__(
Expand Down Expand Up @@ -101,6 +104,18 @@ def initialize_trainer(
_brain_key = trainer_config[_brain_key]
trainer_parameters.update(trainer_config[_brain_key])

min_lesson_length = 1
if meta_curriculum:
if brain_name in meta_curriculum.brains_to_curriculums:
min_lesson_length = meta_curriculum.brains_to_curriculums[
brain_name
].min_lesson_length
else:
logger.warning(
f"Metacurriculum enabled, but no curriculum for brain {brain_name}. "
f"Brains with curricula: {meta_curriculum.brains_to_curriculums.keys()}. "
)

trainer: Trainer = None # type: ignore # will be set to one of these, or raise
if "trainer" not in trainer_parameters:
raise TrainerConfigError(
Expand All @@ -117,9 +132,7 @@ def initialize_trainer(
elif trainer_type == "ppo":
trainer = PPOTrainer(
brain_parameters,
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
if meta_curriculum
else 1,
min_lesson_length,
trainer_parameters,
train_model,
load_model,
Expand All @@ -130,9 +143,7 @@ def initialize_trainer(
elif trainer_type == "sac":
trainer = SACTrainer(
brain_parameters,
meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
if meta_curriculum
else 1,
min_lesson_length,
trainer_parameters,
train_model,
load_model,
Expand Down