From a475151fac832e5158d88e0cec020e5c93acb447 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Thu, 30 Jul 2020 19:09:57 -0700 Subject: [PATCH 01/14] Add test for some utils and fix bug --- .../trainers/tests/torch/test_utils.py | 72 +++++++++++++++++++ ml-agents/mlagents/trainers/torch/utils.py | 24 ++++--- 2 files changed, 85 insertions(+), 11 deletions(-) create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_utils.py diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py new file mode 100644 index 0000000000..e104ba65cd --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py @@ -0,0 +1,72 @@ +import pytest +import torch + +from mlagents.trainers.settings import EncoderType +from mlagents.trainers.torch.utils import ModelUtils +from mlagents.trainers.exception import UnityTrainerException +from mlagents.trainers.torch.encoders import ( + VectorEncoder, + VectorAndUnnormalizedInputEncoder, +) + + +def test_min_visual_size(): + # Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER + assert set(ModelUtils.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType) + + for encoder_type in EncoderType: + good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] + vis_input = torch.ones((1, 3, good_size, good_size)) + ModelUtils._check_resolution_for_encoder(vis_input, encoder_type) + enc_func = ModelUtils.get_encoder_for_type(encoder_type) + enc = enc_func(good_size, good_size, 3, 1) + enc.forward(vis_input) + + # Anything under the min size should raise an exception. If not, decrease the min size! + with pytest.raises(Exception): + bad_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1 + vis_input = torch.ones((1, 3, bad_size, bad_size)) + + with pytest.raises(UnityTrainerException): + # Make sure we'd hit a friendly error during model setup time. + ModelUtils._check_resolution_for_encoder(vis_input, encoder_type) + + enc = enc_func(bad_size, bad_size, 3, 1) + enc.forward(vis_input) + + +@pytest.mark.parametrize("unnormalized_inputs", [0, 1]) +@pytest.mark.parametrize("num_visual", [0, 1, 2]) +@pytest.mark.parametrize("num_vector", [0, 1, 2]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize("encoder_type", [EncoderType.SIMPLE, EncoderType.NATURE_CNN]) +def test_create_encoders( + encoder_type, normalize, num_vector, num_visual, unnormalized_inputs +): + vec_obs_shape = (5,) + vis_obs_shape = (84, 84, 3) + obs_shapes = [] + for _ in range(num_vector): + obs_shapes.append(vec_obs_shape) + for _ in range(num_visual): + obs_shapes.append(vis_obs_shape) + h_size = 128 + num_layers = 3 + unnormalized_inputs = 1 + vis_enc, vec_enc = ModelUtils.create_encoders( + obs_shapes, h_size, num_layers, encoder_type, unnormalized_inputs, normalize + ) + vec_enc = list(vec_enc) + vis_enc = list(vis_enc) + assert len(vec_enc) == ( + 1 if unnormalized_inputs + num_vector > 0 else 0 + ) # There's always at most one vector encoder. + assert len(vis_enc) == num_visual + + if unnormalized_inputs > 0: + assert isinstance(vec_enc[0], VectorAndUnnormalizedInputEncoder) + elif num_vector > 0: + assert isinstance(vec_enc[0], VectorEncoder) + + for enc in vis_enc: + assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type)) diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py index d628ac3ebf..bfb5217648 100644 --- a/ml-agents/mlagents/trainers/torch/utils.py +++ b/ml-agents/mlagents/trainers/torch/utils.py @@ -42,8 +42,9 @@ def _check_resolution_for_encoder( vis_in: torch.Tensor, vis_encoder_type: EncoderType ) -> None: min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type] - height = vis_in.shape[1] - width = vis_in.shape[2] + # Note: PyTorch uses NCHW not NHWC (TF). + height = vis_in.shape[2] + width = vis_in.shape[3] if height < min_res or width < min_res: raise UnityTrainerException( f"Visual observation resolution ({width}x{height}) is too small for" @@ -90,16 +91,17 @@ def create_encoders( raise UnityTrainerException( f"Unsupported shape of {dimension} for observation {i}" ) - if unnormalized_inputs > 0: - vector_encoders.append( - VectorAndUnnormalizedInputEncoder( - vector_size, h_size, unnormalized_inputs, num_layers, normalize + if vector_size + unnormalized_inputs > 0: + if unnormalized_inputs > 0: + vector_encoders.append( + VectorAndUnnormalizedInputEncoder( + vector_size, h_size, unnormalized_inputs, num_layers, normalize + ) + ) + else: + vector_encoders.append( + VectorEncoder(vector_size, h_size, num_layers, normalize) ) - ) - else: - vector_encoders.append( - VectorEncoder(vector_size, h_size, num_layers, normalize) - ) return nn.ModuleList(visual_encoders), nn.ModuleList(vector_encoders) @staticmethod From 7f5b4a9ed7894f8ea412675ef11e25ab97aae04f Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Thu, 30 Jul 2020 19:10:11 -0700 Subject: [PATCH 02/14] Fix PPO TF test --- ml-agents/mlagents/trainers/tests/test_reward_signals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index d13cb2674b..5ccbfe8836 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -4,7 +4,7 @@ import mlagents.trainers.tests.mock_brain as mb from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.sac.optimizer import SACOptimizer -from mlagents.trainers.ppo.optimizer import PPOOptimizer +from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG from mlagents.trainers.settings import ( GAILSettings, @@ -75,7 +75,7 @@ def create_optimizer_mock( if trainer_settings.trainer_type == TrainerType.SAC: optimizer = SACOptimizer(policy, trainer_settings) else: - optimizer = PPOOptimizer(policy, trainer_settings) + optimizer = TFPPOOptimizer(policy, trainer_settings) return optimizer From 86a8a8e1174952838b46824eb0d2dcfd887126c4 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 31 Jul 2020 11:31:56 -0700 Subject: [PATCH 03/14] Added some more utils tests --- .../trainers/tests/torch/test_utils.py | 50 +++++++++++++++++++ ml-agents/mlagents/trainers/torch/utils.py | 8 +++ 2 files changed, 58 insertions(+) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py index e104ba65cd..9467880b7b 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py @@ -1,5 +1,6 @@ import pytest import torch +import numpy as np from mlagents.trainers.settings import EncoderType from mlagents.trainers.torch.utils import ModelUtils @@ -70,3 +71,52 @@ def test_create_encoders( for enc in vis_enc: assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type)) + + +def test_list_to_tensor(): + # Test converting pure list + unconverted_list = [[1, 2], [1, 3], [1, 4]] + tensor = ModelUtils.list_to_tensor(unconverted_list) + # Should be equivalent to torch.tensor conversion + assert torch.equal(tensor, torch.tensor(unconverted_list)) + + # Test converting pure numpy array + np_list = np.asarray(unconverted_list) + tensor = ModelUtils.list_to_tensor(np_list) + # Should be equivalent to torch.tensor conversion + assert torch.equal(tensor, torch.tensor(unconverted_list)) + + # Test converting list of numpy arrays + list_of_np = [np.asarray(_el) for _el in unconverted_list] + tensor = ModelUtils.list_to_tensor(list_of_np) + # Should be equivalent to torch.tensor conversion + assert torch.equal(tensor, torch.tensor(unconverted_list)) + + +def test_break_into_branches(): + # Test normal multi-branch case + all_actions = torch.tensor([[1, 2, 3, 4, 5, 6]]) + action_size = [2, 1, 3] + broken_actions = ModelUtils.break_into_branches(all_actions, action_size) + assert len(action_size) == len(broken_actions) + for i, _action in enumerate(broken_actions): + assert _action.shape == (1, action_size[i]) + + # Test 1-branch case + action_size = [6] + broken_actions = ModelUtils.break_into_branches(all_actions, action_size) + assert len(broken_actions) == 1 + assert broken_actions[0].shape == (1, 6) + + +def test_actions_to_onehot(): + all_actions = torch.tensor([[1, 0, 2], [1, 0, 2]]) + action_size = [2, 1, 3] + oh_actions = ModelUtils.actions_to_onehot(all_actions, action_size) + expected_result = [ + torch.tensor([[0, 1], [0, 1]]), + torch.tensor([[1], [1]]), + torch.tensor([[0, 0, 1], [0, 0, 1]]), + ] + for res, exp in zip(oh_actions, expected_result): + assert torch.equal(res, exp) diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py index bfb5217648..27b58b42fc 100644 --- a/ml-agents/mlagents/trainers/torch/utils.py +++ b/ml-agents/mlagents/trainers/torch/utils.py @@ -136,6 +136,14 @@ def break_into_branches( def actions_to_onehot( discrete_actions: torch.Tensor, action_size: List[int] ) -> List[torch.Tensor]: + """ + Takes a tensor of discrete actions and turns it into a List of onehot encoding for each + action, + :param discrete_actions: Actions in integer form. + :param action_size: List of branch sizes. Should be of same size as discrete_actions' + last dimension. + :return: List of one-hot tensors, one representing each branch. + """ onehot_branches = [ torch.nn.functional.one_hot(_act.T, action_size[i]) for i, _act in enumerate(discrete_actions.T) From 3729d0f17323a57c92820e94e15e987a0c3fb1e1 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 31 Jul 2020 11:35:12 -0700 Subject: [PATCH 04/14] Fix typo in docstring --- ml-agents/mlagents/trainers/torch/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py index 27b58b42fc..b741edf19c 100644 --- a/ml-agents/mlagents/trainers/torch/utils.py +++ b/ml-agents/mlagents/trainers/torch/utils.py @@ -138,7 +138,7 @@ def actions_to_onehot( ) -> List[torch.Tensor]: """ Takes a tensor of discrete actions and turns it into a List of onehot encoding for each - action, + action. :param discrete_actions: Actions in integer form. :param action_size: List of branch sizes. Should be of same size as discrete_actions' last dimension. From 1f062803bd8fc657daea02eee8d28ca54c16a714 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 31 Jul 2020 17:23:10 -0700 Subject: [PATCH 05/14] Fix saving mean and std with normalizer --- ml-agents/mlagents/trainers/torch/encoders.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ml-agents/mlagents/trainers/torch/encoders.py b/ml-agents/mlagents/trainers/torch/encoders.py index 0607fbcca5..72f605c29f 100644 --- a/ml-agents/mlagents/trainers/torch/encoders.py +++ b/ml-agents/mlagents/trainers/torch/encoders.py @@ -9,9 +9,9 @@ class Normalizer(nn.Module): def __init__(self, vec_obs_size: int): super().__init__() - self.normalization_steps = torch.tensor(1) - self.running_mean = torch.zeros(vec_obs_size) - self.running_variance = torch.ones(vec_obs_size) + self.register_buffer("normalization_steps", torch.tensor(1)) + self.register_buffer("running_mean", torch.zeros(vec_obs_size)) + self.register_buffer("running_variance", torch.ones(vec_obs_size)) def forward(self, inputs: torch.Tensor) -> torch.Tensor: normalized_state = torch.clamp( @@ -33,9 +33,10 @@ def update(self, vector_input: torch.Tensor) -> None: new_variance = self.running_variance + ( input_to_new_mean * input_to_old_mean ).sum(0) - self.running_mean = new_mean - self.running_variance = new_variance - self.normalization_steps = total_new_steps + # Update in-place + self.running_mean.data.copy_(new_mean.data) + self.running_variance.data.copy_(new_variance.data) + self.normalization_steps.data.copy_(total_new_steps.data) def copy_from(self, other_normalizer: "Normalizer") -> None: self.normalization_steps.data.copy_(other_normalizer.normalization_steps.data) From d93680c16e4831f6f9658cbfcdddee55b1d497d0 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 31 Jul 2020 17:23:22 -0700 Subject: [PATCH 06/14] Tests for encoders --- .../trainers/tests/torch/test_encoders.py | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_encoders.py diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py new file mode 100644 index 0000000000..76f48038b1 --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py @@ -0,0 +1,92 @@ +import torch +from unittest import mock +import pytest + +from mlagents.trainers.torch.encoders import ( + VectorEncoder, + VectorAndUnnormalizedInputEncoder, + Normalizer, +) + + +def compare_models(module_1, module_2): + is_same = True + for key_item_1, key_item_2 in zip( + module_1.state_dict().items(), module_2.state_dict().items() + ): + is_same = torch.equal(key_item_1[1], key_item_2[1]) and is_same + return is_same + + +def test_normalizer(): + input_size = 2 + norm = Normalizer(input_size) + + # These three inputs should mean to 0.5, and variance 2 + # with the steps starting at 1 + vec_input1 = torch.tensor([[1, 1]]) + vec_input2 = torch.tensor([[1, 1]]) + vec_input3 = torch.tensor([[0, 0]]) + norm.update(vec_input1) + norm.update(vec_input2) + norm.update(vec_input3) + + # Test normalization + for val in norm(vec_input1)[0]: + assert val == pytest.approx(0.707, abs=0.001) + + # Test copy normalization + norm2 = Normalizer(input_size) + assert not compare_models(norm, norm2) + norm2.copy_from(norm) + assert compare_models(norm, norm2) + for val in norm2(vec_input1)[0]: + assert val == pytest.approx(0.707, abs=0.001) + + +@mock.patch("mlagents.trainers.torch.encoders.Normalizer") +def test_vector_encoder(mock_normalizer): + mock_normalizer_inst = mock.Mock() + mock_normalizer.return_value = mock_normalizer_inst + input_size = 64 + hidden_size = 128 + num_layers = 3 + normalize = False + vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize) + output = vector_encoder(torch.ones((1, input_size))) + assert output.shape == (1, hidden_size) + + normalize = True + vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize) + new_vec = torch.ones((1, input_size)) + vector_encoder.update_normalization(new_vec) + + mock_normalizer.assert_called_with(input_size) + mock_normalizer_inst.update.assert_called_with(new_vec) + + vector_encoder2 = VectorEncoder(input_size, hidden_size, num_layers, normalize) + vector_encoder.copy_normalization(vector_encoder2) + mock_normalizer_inst.copy_from.assert_called_with(mock_normalizer_inst) + + +@mock.patch("mlagents.trainers.torch.encoders.Normalizer") +def test_vector_and_unnormalized_encoder(mock_normalizer): + mock_normalizer_inst = mock.Mock() + mock_normalizer.return_value = mock_normalizer_inst + input_size = 64 + unnormalized_size = 32 + hidden_size = 128 + num_layers = 3 + normalize = True + mock_normalizer_inst.return_value = torch.ones((1, input_size)) + vector_encoder = VectorAndUnnormalizedInputEncoder( + input_size, hidden_size, unnormalized_size, num_layers, normalize + ) + # Make sure normalizer is only called on input_size + mock_normalizer.assert_called_with(input_size) + normal_input = torch.ones((1, input_size)) + + unnormalized_input = torch.ones((1, 32)) + output = vector_encoder(normal_input, unnormalized_input) + mock_normalizer_inst.assert_called_with(normal_input) + assert output.shape == (1, hidden_size) From 59ba0b668fa20fb43d39cea3a7f8bec6f9ac4797 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 31 Jul 2020 18:34:15 -0700 Subject: [PATCH 07/14] Visual encoder test --- .../trainers/tests/torch/test_encoders.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py index 76f48038b1..76253611b0 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py @@ -6,9 +6,13 @@ VectorEncoder, VectorAndUnnormalizedInputEncoder, Normalizer, + SimpleVisualEncoder, + ResNetVisualEncoder, + NatureVisualEncoder, ) +# This test will also reveal issues with states not being saved in the state_dict. def compare_models(module_1, module_2): is_same = True for key_item_1, key_item_2 in zip( @@ -90,3 +94,16 @@ def test_vector_and_unnormalized_encoder(mock_normalizer): output = vector_encoder(normal_input, unnormalized_input) mock_normalizer_inst.assert_called_with(normal_input) assert output.shape == (1, hidden_size) + + +@pytest.mark.parametrize("image_size", [(36, 36, 3), (84, 84, 4), (256, 256, 5)]) +@pytest.mark.parametrize( + "vis_class", [SimpleVisualEncoder, ResNetVisualEncoder, NatureVisualEncoder] +) +def test_visual_encoder(vis_class, image_size): + num_outputs = 128 + enc = vis_class(image_size[0], image_size[1], image_size[2], num_outputs) + # Note: NCHW not NHWC + sample_input = torch.ones((1, image_size[2], image_size[0], image_size[1])) + encoding = enc(sample_input) + assert encoding.shape == (1, num_outputs) From ed5c169fb99330fc4795ce4766e26a10d52fd928 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 3 Aug 2020 11:50:05 -0700 Subject: [PATCH 08/14] Add decoder test --- .../trainers/tests/torch/test_decoders.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_decoders.py diff --git a/ml-agents/mlagents/trainers/tests/torch/test_decoders.py b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py new file mode 100644 index 0000000000..00172d0719 --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py @@ -0,0 +1,31 @@ +import pytest +import torch + +from mlagents.trainers.torch.decoders import ValueHeads + + +def test_valueheads(): + stream_names = [f"reward_signal_{num}" for num in range(5)] + input_size = 5 + batch_size = 4 + + # Test default 1 value per head + value_heads = ValueHeads(stream_names, input_size) + input_data = torch.ones((batch_size, input_size)) + value_out, _ = value_heads(input_data) # Note: mean value will be removed shortly + + for stream_name in stream_names: + assert value_out[stream_name].shape == (batch_size,) + + # Test that iinputting the wrong size input will throw an error + with pytest.raises(Exception): + value_out = value_heads(torch.ones((batch_size, input_size + 2))) + + # Test multiple values per head (e.g. discrete Q function) + output_size = 4 + value_heads = ValueHeads(stream_names, input_size, output_size) + input_data = torch.ones((batch_size, input_size)) + value_out, _ = value_heads(input_data) + + for stream_name in stream_names: + assert value_out[stream_name].shape == (batch_size, output_size) From f6135dc3c1f7c12c9bd6443086f7ec7916c267da Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 4 Aug 2020 16:57:31 -0700 Subject: [PATCH 09/14] Add check to create_encoders --- ml-agents/mlagents/trainers/torch/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py index 3bed11ccfa..86e3e15b39 100644 --- a/ml-agents/mlagents/trainers/torch/utils.py +++ b/ml-agents/mlagents/trainers/torch/utils.py @@ -40,12 +40,9 @@ def get_encoder_for_type(encoder_type: EncoderType) -> nn.Module: @staticmethod def _check_resolution_for_encoder( - vis_in: torch.Tensor, vis_encoder_type: EncoderType + height: int, width: int, vis_encoder_type: EncoderType ) -> None: min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type] - # Note: PyTorch uses NCHW not NHWC (TF). - height = vis_in.shape[2] - width = vis_in.shape[3] if height < min_res or width < min_res: raise UnityTrainerException( f"Visual observation resolution ({width}x{height}) is too small for" @@ -81,6 +78,9 @@ def create_encoders( vector_size = 0 for i, dimension in enumerate(observation_shapes): if len(dimension) == 3: + ModelUtils._check_resolution_for_encoder( + dimension[0], dimension[1], vis_encode_type + ) visual_encoders.append( visual_encoder_class( dimension[0], dimension[1], dimension[2], h_size From d0bed895c11ddc33be8bcbfcb0222178605dad47 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 4 Aug 2020 18:44:48 -0700 Subject: [PATCH 10/14] Add typing to distributions --- .../mlagents/trainers/torch/distributions.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py index bdca1a0382..c83ae4649e 100644 --- a/ml-agents/mlagents/trainers/torch/distributions.py +++ b/ml-agents/mlagents/trainers/torch/distributions.py @@ -1,4 +1,5 @@ import abc +from typing import List import torch from torch import nn import numpy as np @@ -114,13 +115,12 @@ def entropy(self): class GaussianDistribution(nn.Module): def __init__( self, - hidden_size, - num_outputs, - conditional_sigma=False, - tanh_squash=False, - **kwargs + hidden_size: int, + num_outputs: int, + conditional_sigma: bool = False, + tanh_squash: bool = False, ): - super().__init__(**kwargs) + super().__init__() self.conditional_sigma = conditional_sigma self.mu = nn.Linear(hidden_size, num_outputs) self.tanh_squash = tanh_squash @@ -133,7 +133,7 @@ def __init__( torch.zeros(1, num_outputs, requires_grad=True) ) - def forward(self, inputs): + def forward(self, inputs: torch.Tensor) -> List[DistInstance]: mu = self.mu(inputs) if self.conditional_sigma: log_sigma = torch.clamp(self.log_sigma(inputs), min=-20, max=2) @@ -146,12 +146,12 @@ def forward(self, inputs): class MultiCategoricalDistribution(nn.Module): - def __init__(self, hidden_size, act_sizes): + def __init__(self, hidden_size: int, act_sizes: List[int]): super().__init__() self.act_sizes = act_sizes - self.branches = self.create_policy_branches(hidden_size) + self.branches = self._create_policy_branches(hidden_size) - def create_policy_branches(self, hidden_size): + def _create_policy_branches(self, hidden_size: int) -> nn.ModuleList: branches = [] for size in self.act_sizes: branch_output_layer = nn.Linear(hidden_size, size) @@ -159,13 +159,13 @@ def create_policy_branches(self, hidden_size): branches.append(branch_output_layer) return nn.ModuleList(branches) - def mask_branch(self, logits, mask): + def _mask_branch(self, logits: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: raw_probs = torch.nn.functional.softmax(logits, dim=-1) * mask normalized_probs = raw_probs / torch.sum(raw_probs, dim=-1).unsqueeze(-1) normalized_logits = torch.log(normalized_probs + EPSILON) return normalized_logits - def split_masks(self, masks): + def _split_masks(self, masks: torch.Tensor) -> List[torch.Tensor]: split_masks = [] for idx, _ in enumerate(self.act_sizes): start = int(np.sum(self.act_sizes[:idx])) @@ -173,13 +173,13 @@ def split_masks(self, masks): split_masks.append(masks[:, start:end]) return split_masks - def forward(self, inputs, masks): + def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> List[DistInstance]: # Todo - Support multiple branches in mask code branch_distributions = [] - masks = self.split_masks(masks) + masks = self._split_masks(masks) for idx, branch in enumerate(self.branches): logits = branch(inputs) - norm_logits = self.mask_branch(logits, masks[idx]) + norm_logits = self._mask_branch(logits, masks[idx]) distribution = CategoricalDistInstance(norm_logits) branch_distributions.append(distribution) return branch_distributions From d228d4b921077f58790167d3669ed7af2c1bec10 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 4 Aug 2020 18:45:28 -0700 Subject: [PATCH 11/14] Tests for distributions --- .../trainers/tests/torch/test_distribution.py | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_distribution.py diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py new file mode 100644 index 0000000000..273ceeaee8 --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py @@ -0,0 +1,126 @@ +import pytest +import torch + +from mlagents.trainers.torch.distributions import ( + GaussianDistribution, + MultiCategoricalDistribution, + GaussianDistInstance, + TanhGaussianDistInstance, + CategoricalDistInstance, +) + + +@pytest.mark.parametrize("tanh_squash", [True, False]) +@pytest.mark.parametrize("conditional_sigma", [True, False]) +def test_gaussian_distribution(conditional_sigma, tanh_squash): + hidden_size = 16 + act_size = 4 + sample_embedding = torch.ones((1, 16)) + gauss_dist = GaussianDistribution( + hidden_size, + act_size, + conditional_sigma=conditional_sigma, + tanh_squash=tanh_squash, + ) + + # Make sure backprop works + force_action = torch.zeros((1, act_size)) + optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) + + for _ in range(50): + dist_inst = gauss_dist(sample_embedding)[0] + if tanh_squash: + assert isinstance(dist_inst, TanhGaussianDistInstance) + else: + assert isinstance(dist_inst, GaussianDistInstance) + log_prob = dist_inst.log_prob(force_action) + loss = torch.nn.functional.mse_loss(log_prob, -2 * torch.ones(log_prob.shape)) + optimizer.zero_grad() + loss.backward() + optimizer.step() + for prob in log_prob.flatten(): + assert prob == pytest.approx(-2, abs=0.1) + + +def test_multi_categorical_distribution(): + hidden_size = 16 + act_size = [3, 3, 4] + sample_embedding = torch.ones((1, 16)) + gauss_dist = MultiCategoricalDistribution(hidden_size, act_size) + + # Make sure backprop works + optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) + + for _ in range(50): + dist_insts = gauss_dist(sample_embedding, masks=torch.ones((1, sum(act_size)))) + loss = 0 + for i, dist_inst in enumerate(dist_insts): + assert isinstance(dist_inst, CategoricalDistInstance) + log_prob = dist_inst.all_log_prob() + test_prob = torch.tensor( + [1.0 - 0.01 * (act_size[i] - 1)] + [0.01] * (act_size[i] - 1) + ) # High prob for first action + test_log_prob = test_prob.log() + loss += torch.nn.functional.mse_loss(log_prob, test_log_prob) + optimizer.zero_grad() + loss.backward() + optimizer.step() + for dist_inst in dist_insts: + assert dist_inst.all_log_prob().flatten()[0] == pytest.approx(0, abs=0.1) + + # Test masks + masks = [] + for branch in act_size: + masks += [0] * (branch - 1) + [1] + masks = torch.tensor([masks]) + dist_insts = gauss_dist(sample_embedding, masks=masks) + for dist_inst in dist_insts: + log_prob = dist_inst.all_log_prob() + assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001) + + +def test_gaussian_dist_instance(): + act_size = 4 + dist_instance = GaussianDistInstance( + torch.zeros(1, act_size), torch.ones(1, act_size) + ) + action = dist_instance.sample() + assert action.shape == (1, act_size) + for log_prob in dist_instance.log_prob(torch.zeros((1, act_size))).flatten(): + # Log prob of standard normal at 0 + assert log_prob == pytest.approx(-0.919, abs=0.01) + + for ent in dist_instance.entropy().flatten(): + # entropy of standard normal at 0 + assert ent == pytest.approx(2.83, abs=0.01) + + +def test_tanh_gaussian_dist_instance(): + act_size = 4 + dist_instance = GaussianDistInstance( + torch.zeros(1, act_size), torch.ones(1, act_size) + ) + for _ in range(10): + action = dist_instance.sample() + assert action.shape == (1, act_size) + assert torch.max(action) < 1.0 and torch.min(action) > -1.0 + + +def test_categorical_dist_instance(): + act_size = 4 + test_prob = torch.tensor( + [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1) + ) # High prob for first action + dist_instance = CategoricalDistInstance(test_prob) + + for _ in range(10): + action = dist_instance.sample() + assert action.shape == (1,) + assert action < act_size + + # Make sure log_prob of 1st action is high + prob_first_action = dist_instance.log_prob(torch.tensor([0])) + + # Make sure log_prob of other actions is low + for i in range(1, act_size): + assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action From df2a29c23b2e9573b7585e91741d3cd4f0bfbe15 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 5 Aug 2020 10:56:39 -0700 Subject: [PATCH 12/14] Address comments --- .../trainers/tests/torch/test_decoders.py | 2 +- .../trainers/tests/torch/test_distribution.py | 33 ++++++++++++++----- .../trainers/tests/torch/test_encoders.py | 1 + 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_decoders.py b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py index 00172d0719..aa417edd05 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_decoders.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py @@ -17,7 +17,7 @@ def test_valueheads(): for stream_name in stream_names: assert value_out[stream_name].shape == (batch_size,) - # Test that iinputting the wrong size input will throw an error + # Test that inputting the wrong size input will throw an error with pytest.raises(Exception): value_out = value_heads(torch.ones((batch_size, input_size + 2))) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py index 273ceeaee8..6637eb159b 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py @@ -13,6 +13,7 @@ @pytest.mark.parametrize("tanh_squash", [True, False]) @pytest.mark.parametrize("conditional_sigma", [True, False]) def test_gaussian_distribution(conditional_sigma, tanh_squash): + torch.manual_seed(0) hidden_size = 16 act_size = 4 sample_embedding = torch.ones((1, 16)) @@ -43,6 +44,7 @@ def test_gaussian_distribution(conditional_sigma, tanh_squash): def test_multi_categorical_distribution(): + torch.manual_seed(0) hidden_size = 16 act_size = [3, 3, 4] sample_embedding = torch.ones((1, 16)) @@ -51,22 +53,33 @@ def test_multi_categorical_distribution(): # Make sure backprop works optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) - for _ in range(50): + def create_test_prob(size: int) -> torch.Tensor: + test_prob = torch.tensor( + [[1.0 - 0.01 * (size - 1)] + [0.01] * (size - 1)] + ) # High prob for first action + return test_prob.log() + + for _ in range(100): dist_insts = gauss_dist(sample_embedding, masks=torch.ones((1, sum(act_size)))) loss = 0 for i, dist_inst in enumerate(dist_insts): assert isinstance(dist_inst, CategoricalDistInstance) log_prob = dist_inst.all_log_prob() - test_prob = torch.tensor( - [1.0 - 0.01 * (act_size[i] - 1)] + [0.01] * (act_size[i] - 1) - ) # High prob for first action - test_log_prob = test_prob.log() + test_log_prob = create_test_prob(act_size[i]) + # Force log_probs to match the high probability for the first action generated by + # create_test_prob loss += torch.nn.functional.mse_loss(log_prob, test_log_prob) optimizer.zero_grad() loss.backward() optimizer.step() - for dist_inst in dist_insts: - assert dist_inst.all_log_prob().flatten()[0] == pytest.approx(0, abs=0.1) + for dist_inst, size in zip(dist_insts, act_size): + # Check that the log probs are close to the fake ones that we generated. + test_log_probs = create_test_prob(size) + for _prob, _test_prob in zip( + dist_inst.all_log_prob().flatten().tolist(), + test_log_probs.flatten().tolist(), + ): + assert _prob == pytest.approx(_test_prob, abs=0.1) # Test masks masks = [] @@ -80,6 +93,7 @@ def test_multi_categorical_distribution(): def test_gaussian_dist_instance(): + torch.manual_seed(0) act_size = 4 dist_instance = GaussianDistInstance( torch.zeros(1, act_size), torch.ones(1, act_size) @@ -96,6 +110,7 @@ def test_gaussian_dist_instance(): def test_tanh_gaussian_dist_instance(): + torch.manual_seed(0) act_size = 4 dist_instance = GaussianDistInstance( torch.zeros(1, act_size), torch.ones(1, act_size) @@ -107,6 +122,7 @@ def test_tanh_gaussian_dist_instance(): def test_categorical_dist_instance(): + torch.manual_seed(0) act_size = 4 test_prob = torch.tensor( [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1) @@ -118,9 +134,8 @@ def test_categorical_dist_instance(): assert action.shape == (1,) assert action < act_size - # Make sure log_prob of 1st action is high + # Make sure the first action as higher probability than the others. prob_first_action = dist_instance.log_prob(torch.tensor([0])) - # Make sure log_prob of other actions is low for i in range(1, act_size): assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py index 76253611b0..7f77b3d72a 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py @@ -18,6 +18,7 @@ def compare_models(module_1, module_2): for key_item_1, key_item_2 in zip( module_1.state_dict().items(), module_2.state_dict().items() ): + # Compare tensors in state_dict and not the keys. is_same = torch.equal(key_item_1[1], key_item_2[1]) and is_same return is_same From 503dd4645a331b85759d40d00d549bc259dbb4e6 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 5 Aug 2020 17:27:25 -0700 Subject: [PATCH 13/14] Add test for get_probs_and_entropy --- .../trainers/tests/torch/test_utils.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py index 9467880b7b..25c7a6c05e 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py @@ -9,6 +9,10 @@ VectorEncoder, VectorAndUnnormalizedInputEncoder, ) +from mlagents.trainers.torch.distributions import ( + CategoricalDistInstance, + GaussianDistInstance, +) def test_min_visual_size(): @@ -120,3 +124,43 @@ def test_actions_to_onehot(): ] for res, exp in zip(oh_actions, expected_result): assert torch.equal(res, exp) + + +def test_get_probs_and_entropy(): + # Test continuous + # Add two dists to the list. This isn't done in the code but we'd like to support it. + dist_list = [ + GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))), + GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))), + ] + action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))] + log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy( + action_list, dist_list + ) + assert log_probs.shape == (1, 2, 2) + assert entropies.shape == (1, 2, 2) + assert all_probs is None + + for log_prob in log_probs.flatten(): + # Log prob of standard normal at 0 + assert log_prob == pytest.approx(-0.919, abs=0.01) + + for ent in entropies.flatten(): + # entropy of standard normal at 0 + assert ent == pytest.approx(2.83, abs=0.01) + + # Test continuous + # Add two dists to the list. + act_size = 2 + test_prob = torch.tensor( + [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1) + ) # High prob for first action + dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)] + action_list = [torch.tensor([0]), torch.tensor([1])] + log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy( + action_list, dist_list + ) + assert all_probs.shape == (len(dist_list * act_size),) + assert entropies.shape == (len(dist_list),) + # Make sure the first action has high probability than the others. + assert log_probs.flatten()[0] > log_probs.flatten()[1] From 594cb8633465216e86be4becef928f3444177d2e Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 5 Aug 2020 17:29:06 -0700 Subject: [PATCH 14/14] Rename test_distribution to test_distributions --- .../tests/torch/{test_distribution.py => test_distributions.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ml-agents/mlagents/trainers/tests/torch/{test_distribution.py => test_distributions.py} (100%) diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py similarity index 100% rename from ml-agents/mlagents/trainers/tests/torch/test_distribution.py rename to ml-agents/mlagents/trainers/tests/torch/test_distributions.py