Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ vector observations to be used simultaneously. (#3981) Thank you @shakenes !
directory. (#3829)
- Unity Player logs are now written out to the results directory. (#3877)
- Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
- When trying to load/resume from a checkpoint created with an earlier verison of ML-Agents,
a warning will be thrown. (#4035)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
Expand Down
10 changes: 9 additions & 1 deletion ml-agents/mlagents/model_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,15 @@
)

MODEL_CONSTANTS = frozenset(
["action_output_shape", "is_continuous_control", "memory_size", "version_number"]
[
"action_output_shape",
"is_continuous_control",
"memory_size",
"version_number",
"trainer_major_version",
"trainer_minor_version",
"trainer_patch_version",
]
)
VISUAL_OBSERVATION_PREFIX = "visual_observation_"

Expand Down
62 changes: 59 additions & 3 deletions ml-agents/mlagents/trainers/policy/tf_policy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
import abc
import os
import numpy as np
from distutils.version import LooseVersion

from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException
Expand All @@ -14,11 +16,17 @@
from mlagents.trainers.models import ModelUtils
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers import __version__


logger = get_logger(__name__)


# This is the version number of the inputs and outputs of the model, and
# determines compatibility with inference in Barracuda.
MODEL_FORMAT_VERSION = 2


class UnityPolicyException(UnityException):
"""
Related to errors with the Trainer.
Expand Down Expand Up @@ -46,7 +54,7 @@ def __init__(
:param brain: The corresponding Brain for this policy.
:param trainer_settings: The trainer parameters.
"""
self._version_number_ = 2

self.m_size = 0
self.trainer_settings = trainer_settings
self.network_settings: NetworkSettings = trainer_settings.network_settings
Expand Down Expand Up @@ -114,6 +122,32 @@ def create_tf_graph(self):
"""
pass

@staticmethod
def _convert_version_string(version_string: str) -> Tuple[int, ...]:
"""
Converts the version string into a Tuple of ints (major_ver, minor_ver, patch_ver).
:param version_string: The semantic-versioned version string (X.Y.Z).
:return: A Tuple containing (major_ver, minor_ver, patch_ver).
"""
ver = LooseVersion(version_string)
return tuple(map(int, ver.version[0:3]))

def _check_model_version(self, version: str) -> None:
"""
Checks whether the model being loaded was created with the same version of
ML-Agents, and throw a warning if not so.
"""
if self.version_tensors is not None:
loaded_ver = tuple(
num.eval(session=self.sess) for num in self.version_tensors
)
if loaded_ver != TFPolicy._convert_version_string(version):
logger.warning(
f"The model checkpoint you are loading from was saved with ML-Agents version "
f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"
f"version is {version}. Model may not behave properly."
)

def _initialize_graph(self):
with self.graph.as_default():
self.saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
Expand Down Expand Up @@ -146,6 +180,7 @@ def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None
model_path
)
)
self._check_model_version(__version__)
if reset_global_steps:
self._set_step(0)
logger.info(
Expand Down Expand Up @@ -418,6 +453,7 @@ def _initialize_tensorflow_references(self):
self.prev_action: Optional[tf.Tensor] = None
self.memory_in: Optional[tf.Tensor] = None
self.memory_out: Optional[tf.Tensor] = None
self.version_tensors: Optional[Tuple[tf.Tensor, tf.Tensor, tf.Tensor]] = None

def create_input_placeholders(self):
with self.graph.as_default():
Expand Down Expand Up @@ -467,8 +503,28 @@ def create_input_placeholders(self):
trainable=False,
dtype=tf.int32,
)
int_version = TFPolicy._convert_version_string(__version__)
major_ver_t = tf.Variable(
int_version[0],
name="trainer_major_version",
trainable=False,
dtype=tf.int32,
)
minor_ver_t = tf.Variable(
int_version[1],
name="trainer_minor_version",
trainable=False,
dtype=tf.int32,
)
patch_ver_t = tf.Variable(
int_version[2],
name="trainer_patch_version",
trainable=False,
dtype=tf.int32,
)
self.version_tensors = (major_ver_t, minor_ver_t, patch_ver_t)
tf.Variable(
self._version_number_,
MODEL_FORMAT_VERSION,
name="version_number",
trainable=False,
dtype=tf.int32,
Expand Down
21 changes: 21 additions & 0 deletions ml-agents/mlagents/trainers/tests/test_nn_policy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest
import os
import unittest
import tempfile

import numpy as np
from mlagents.tf_utils import tf
Expand All @@ -12,6 +14,7 @@
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers import __version__


VECTOR_ACTION_SPACE = [2]
Expand Down Expand Up @@ -74,6 +77,24 @@ def test_load_save(tmp_path):
assert policy3.get_current_step() == 0


class ModelVersionTest(unittest.TestCase):
def test_version_compare(self):
# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
path1 = tempfile.mkdtemp()
trainer_params = TrainerSettings(output_path=path1)
policy = create_policy_mock(trainer_params)
policy.initialize_or_load()
policy._check_model_version(
"0.0.0"
) # This is not the right version for sure
# Assert that 1 warning has been thrown with incorrect version
assert len(cm.output) == 1
policy._check_model_version(__version__) # This should be the right version
# Assert that no additional warnings have been thrown wth correct ver
assert len(cm.output) == 1


def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None:
"""
Make sure two policies have the same output for the same input.
Expand Down
8 changes: 8 additions & 0 deletions ml-agents/mlagents/trainers/tests/test_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,11 @@ def test_take_action_returns_action_info_when_available():
policy_eval_out["action"], policy_eval_out["value"], policy_eval_out, [0]
)
assert result == expected


def test_convert_version_string():
result = TFPolicy._convert_version_string("200.300.100")
assert result == (200, 300, 100)
# Test dev versions
result = TFPolicy._convert_version_string("200.300.100.dev0")
assert result == (200, 300, 100)
16 changes: 11 additions & 5 deletions ml-agents/mlagents/trainers/tests/test_simple_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,12 @@ def test_simple_ppo(use_discrete):
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
config = attr.evolve(PPO_CONFIG)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config})


Expand Down Expand Up @@ -299,14 +302,17 @@ def test_visual_advanced_sac(vis_encode_type, num_visual):

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
step_size = 0.2 if use_discrete else 1.0
env = MemoryEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=32),
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
)
new_hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters,
batch_size=64,
batch_size=128,
learning_rate=1e-3,
buffer_init_steps=500,
steps_per_update=2,
Expand Down