From a475151fac832e5158d88e0cec020e5c93acb447 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Thu, 30 Jul 2020 19:09:57 -0700
Subject: [PATCH 01/14] Add test for some utils and fix bug

---
 .../trainers/tests/torch/test_utils.py        | 72 +++++++++++++++++++
 ml-agents/mlagents/trainers/torch/utils.py    | 24 ++++---
 2 files changed, 85 insertions(+), 11 deletions(-)
 create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_utils.py

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
new file mode 100644
index 0000000000..e104ba65cd
--- /dev/null
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
@@ -0,0 +1,72 @@
+import pytest
+import torch
+
+from mlagents.trainers.settings import EncoderType
+from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.exception import UnityTrainerException
+from mlagents.trainers.torch.encoders import (
+    VectorEncoder,
+    VectorAndUnnormalizedInputEncoder,
+)
+
+
+def test_min_visual_size():
+    # Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER
+    assert set(ModelUtils.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType)
+
+    for encoder_type in EncoderType:
+        good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
+        vis_input = torch.ones((1, 3, good_size, good_size))
+        ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
+        enc_func = ModelUtils.get_encoder_for_type(encoder_type)
+        enc = enc_func(good_size, good_size, 3, 1)
+        enc.forward(vis_input)
+
+        # Anything under the min size should raise an exception. If not, decrease the min size!
+        with pytest.raises(Exception):
+            bad_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1
+            vis_input = torch.ones((1, 3, bad_size, bad_size))
+
+            with pytest.raises(UnityTrainerException):
+                # Make sure we'd hit a friendly error during model setup time.
+                ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
+
+            enc = enc_func(bad_size, bad_size, 3, 1)
+            enc.forward(vis_input)
+
+
+@pytest.mark.parametrize("unnormalized_inputs", [0, 1])
+@pytest.mark.parametrize("num_visual", [0, 1, 2])
+@pytest.mark.parametrize("num_vector", [0, 1, 2])
+@pytest.mark.parametrize("normalize", [True, False])
+@pytest.mark.parametrize("encoder_type", [EncoderType.SIMPLE, EncoderType.NATURE_CNN])
+def test_create_encoders(
+    encoder_type, normalize, num_vector, num_visual, unnormalized_inputs
+):
+    vec_obs_shape = (5,)
+    vis_obs_shape = (84, 84, 3)
+    obs_shapes = []
+    for _ in range(num_vector):
+        obs_shapes.append(vec_obs_shape)
+    for _ in range(num_visual):
+        obs_shapes.append(vis_obs_shape)
+    h_size = 128
+    num_layers = 3
+    unnormalized_inputs = 1
+    vis_enc, vec_enc = ModelUtils.create_encoders(
+        obs_shapes, h_size, num_layers, encoder_type, unnormalized_inputs, normalize
+    )
+    vec_enc = list(vec_enc)
+    vis_enc = list(vis_enc)
+    assert len(vec_enc) == (
+        1 if unnormalized_inputs + num_vector > 0 else 0
+    )  # There's always at most one vector encoder.
+    assert len(vis_enc) == num_visual
+
+    if unnormalized_inputs > 0:
+        assert isinstance(vec_enc[0], VectorAndUnnormalizedInputEncoder)
+    elif num_vector > 0:
+        assert isinstance(vec_enc[0], VectorEncoder)
+
+    for enc in vis_enc:
+        assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type))
diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py
index d628ac3ebf..bfb5217648 100644
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
@@ -42,8 +42,9 @@ def _check_resolution_for_encoder(
         vis_in: torch.Tensor, vis_encoder_type: EncoderType
     ) -> None:
         min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
-        height = vis_in.shape[1]
-        width = vis_in.shape[2]
+        # Note: PyTorch uses NCHW not NHWC (TF).
+        height = vis_in.shape[2]
+        width = vis_in.shape[3]
         if height < min_res or width < min_res:
             raise UnityTrainerException(
                 f"Visual observation resolution ({width}x{height}) is too small for"
@@ -90,16 +91,17 @@ def create_encoders(
                 raise UnityTrainerException(
                     f"Unsupported shape of {dimension} for observation {i}"
                 )
-        if unnormalized_inputs > 0:
-            vector_encoders.append(
-                VectorAndUnnormalizedInputEncoder(
-                    vector_size, h_size, unnormalized_inputs, num_layers, normalize
+        if vector_size + unnormalized_inputs > 0:
+            if unnormalized_inputs > 0:
+                vector_encoders.append(
+                    VectorAndUnnormalizedInputEncoder(
+                        vector_size, h_size, unnormalized_inputs, num_layers, normalize
+                    )
+                )
+            else:
+                vector_encoders.append(
+                    VectorEncoder(vector_size, h_size, num_layers, normalize)
                 )
-            )
-        else:
-            vector_encoders.append(
-                VectorEncoder(vector_size, h_size, num_layers, normalize)
-            )
         return nn.ModuleList(visual_encoders), nn.ModuleList(vector_encoders)
 
     @staticmethod

From 7f5b4a9ed7894f8ea412675ef11e25ab97aae04f Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Thu, 30 Jul 2020 19:10:11 -0700
Subject: [PATCH 02/14] Fix PPO TF test

---
 ml-agents/mlagents/trainers/tests/test_reward_signals.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
index d13cb2674b..5ccbfe8836 100644
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
@@ -4,7 +4,7 @@
 import mlagents.trainers.tests.mock_brain as mb
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.sac.optimizer import SACOptimizer
-from mlagents.trainers.ppo.optimizer import PPOOptimizer
+from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
 from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
 from mlagents.trainers.settings import (
     GAILSettings,
@@ -75,7 +75,7 @@ def create_optimizer_mock(
     if trainer_settings.trainer_type == TrainerType.SAC:
         optimizer = SACOptimizer(policy, trainer_settings)
     else:
-        optimizer = PPOOptimizer(policy, trainer_settings)
+        optimizer = TFPPOOptimizer(policy, trainer_settings)
     return optimizer
 
 

From 86a8a8e1174952838b46824eb0d2dcfd887126c4 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 31 Jul 2020 11:31:56 -0700
Subject: [PATCH 03/14] Added some more utils tests

---
 .../trainers/tests/torch/test_utils.py        | 50 +++++++++++++++++++
 ml-agents/mlagents/trainers/torch/utils.py    |  8 +++
 2 files changed, 58 insertions(+)

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
index e104ba65cd..9467880b7b 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
@@ -1,5 +1,6 @@
 import pytest
 import torch
+import numpy as np
 
 from mlagents.trainers.settings import EncoderType
 from mlagents.trainers.torch.utils import ModelUtils
@@ -70,3 +71,52 @@ def test_create_encoders(
 
     for enc in vis_enc:
         assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type))
+
+
+def test_list_to_tensor():
+    # Test converting pure list
+    unconverted_list = [[1, 2], [1, 3], [1, 4]]
+    tensor = ModelUtils.list_to_tensor(unconverted_list)
+    # Should be equivalent to torch.tensor conversion
+    assert torch.equal(tensor, torch.tensor(unconverted_list))
+
+    # Test converting pure numpy array
+    np_list = np.asarray(unconverted_list)
+    tensor = ModelUtils.list_to_tensor(np_list)
+    # Should be equivalent to torch.tensor conversion
+    assert torch.equal(tensor, torch.tensor(unconverted_list))
+
+    # Test converting list of numpy arrays
+    list_of_np = [np.asarray(_el) for _el in unconverted_list]
+    tensor = ModelUtils.list_to_tensor(list_of_np)
+    # Should be equivalent to torch.tensor conversion
+    assert torch.equal(tensor, torch.tensor(unconverted_list))
+
+
+def test_break_into_branches():
+    # Test normal multi-branch case
+    all_actions = torch.tensor([[1, 2, 3, 4, 5, 6]])
+    action_size = [2, 1, 3]
+    broken_actions = ModelUtils.break_into_branches(all_actions, action_size)
+    assert len(action_size) == len(broken_actions)
+    for i, _action in enumerate(broken_actions):
+        assert _action.shape == (1, action_size[i])
+
+    # Test 1-branch case
+    action_size = [6]
+    broken_actions = ModelUtils.break_into_branches(all_actions, action_size)
+    assert len(broken_actions) == 1
+    assert broken_actions[0].shape == (1, 6)
+
+
+def test_actions_to_onehot():
+    all_actions = torch.tensor([[1, 0, 2], [1, 0, 2]])
+    action_size = [2, 1, 3]
+    oh_actions = ModelUtils.actions_to_onehot(all_actions, action_size)
+    expected_result = [
+        torch.tensor([[0, 1], [0, 1]]),
+        torch.tensor([[1], [1]]),
+        torch.tensor([[0, 0, 1], [0, 0, 1]]),
+    ]
+    for res, exp in zip(oh_actions, expected_result):
+        assert torch.equal(res, exp)
diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py
index bfb5217648..27b58b42fc 100644
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
@@ -136,6 +136,14 @@ def break_into_branches(
     def actions_to_onehot(
         discrete_actions: torch.Tensor, action_size: List[int]
     ) -> List[torch.Tensor]:
+        """
+        Takes a tensor of discrete actions and turns it into a List of onehot encoding for each
+        action,
+        :param discrete_actions: Actions in integer form.
+        :param action_size: List of branch sizes. Should be of same size as discrete_actions'
+        last dimension.
+        :return: List of one-hot tensors, one representing each branch.
+        """
         onehot_branches = [
             torch.nn.functional.one_hot(_act.T, action_size[i])
             for i, _act in enumerate(discrete_actions.T)

From 3729d0f17323a57c92820e94e15e987a0c3fb1e1 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 31 Jul 2020 11:35:12 -0700
Subject: [PATCH 04/14] Fix typo in docstring

---
 ml-agents/mlagents/trainers/torch/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py
index 27b58b42fc..b741edf19c 100644
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
@@ -138,7 +138,7 @@ def actions_to_onehot(
     ) -> List[torch.Tensor]:
         """
         Takes a tensor of discrete actions and turns it into a List of onehot encoding for each
-        action,
+        action.
         :param discrete_actions: Actions in integer form.
         :param action_size: List of branch sizes. Should be of same size as discrete_actions'
         last dimension.

From 1f062803bd8fc657daea02eee8d28ca54c16a714 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 31 Jul 2020 17:23:10 -0700
Subject: [PATCH 05/14] Fix saving mean and std with normalizer

---
 ml-agents/mlagents/trainers/torch/encoders.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/ml-agents/mlagents/trainers/torch/encoders.py b/ml-agents/mlagents/trainers/torch/encoders.py
index 0607fbcca5..72f605c29f 100644
--- a/ml-agents/mlagents/trainers/torch/encoders.py
+++ b/ml-agents/mlagents/trainers/torch/encoders.py
@@ -9,9 +9,9 @@
 class Normalizer(nn.Module):
     def __init__(self, vec_obs_size: int):
         super().__init__()
-        self.normalization_steps = torch.tensor(1)
-        self.running_mean = torch.zeros(vec_obs_size)
-        self.running_variance = torch.ones(vec_obs_size)
+        self.register_buffer("normalization_steps", torch.tensor(1))
+        self.register_buffer("running_mean", torch.zeros(vec_obs_size))
+        self.register_buffer("running_variance", torch.ones(vec_obs_size))
 
     def forward(self, inputs: torch.Tensor) -> torch.Tensor:
         normalized_state = torch.clamp(
@@ -33,9 +33,10 @@ def update(self, vector_input: torch.Tensor) -> None:
         new_variance = self.running_variance + (
             input_to_new_mean * input_to_old_mean
         ).sum(0)
-        self.running_mean = new_mean
-        self.running_variance = new_variance
-        self.normalization_steps = total_new_steps
+        # Update in-place
+        self.running_mean.data.copy_(new_mean.data)
+        self.running_variance.data.copy_(new_variance.data)
+        self.normalization_steps.data.copy_(total_new_steps.data)
 
     def copy_from(self, other_normalizer: "Normalizer") -> None:
         self.normalization_steps.data.copy_(other_normalizer.normalization_steps.data)

From d93680c16e4831f6f9658cbfcdddee55b1d497d0 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 31 Jul 2020 17:23:22 -0700
Subject: [PATCH 06/14] Tests for encoders

---
 .../trainers/tests/torch/test_encoders.py     | 92 +++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_encoders.py

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
new file mode 100644
index 0000000000..76f48038b1
--- /dev/null
+++ b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
@@ -0,0 +1,92 @@
+import torch
+from unittest import mock
+import pytest
+
+from mlagents.trainers.torch.encoders import (
+    VectorEncoder,
+    VectorAndUnnormalizedInputEncoder,
+    Normalizer,
+)
+
+
+def compare_models(module_1, module_2):
+    is_same = True
+    for key_item_1, key_item_2 in zip(
+        module_1.state_dict().items(), module_2.state_dict().items()
+    ):
+        is_same = torch.equal(key_item_1[1], key_item_2[1]) and is_same
+    return is_same
+
+
+def test_normalizer():
+    input_size = 2
+    norm = Normalizer(input_size)
+
+    # These three inputs should mean to 0.5, and variance 2
+    # with the steps starting at 1
+    vec_input1 = torch.tensor([[1, 1]])
+    vec_input2 = torch.tensor([[1, 1]])
+    vec_input3 = torch.tensor([[0, 0]])
+    norm.update(vec_input1)
+    norm.update(vec_input2)
+    norm.update(vec_input3)
+
+    # Test normalization
+    for val in norm(vec_input1)[0]:
+        assert val == pytest.approx(0.707, abs=0.001)
+
+    # Test copy normalization
+    norm2 = Normalizer(input_size)
+    assert not compare_models(norm, norm2)
+    norm2.copy_from(norm)
+    assert compare_models(norm, norm2)
+    for val in norm2(vec_input1)[0]:
+        assert val == pytest.approx(0.707, abs=0.001)
+
+
+@mock.patch("mlagents.trainers.torch.encoders.Normalizer")
+def test_vector_encoder(mock_normalizer):
+    mock_normalizer_inst = mock.Mock()
+    mock_normalizer.return_value = mock_normalizer_inst
+    input_size = 64
+    hidden_size = 128
+    num_layers = 3
+    normalize = False
+    vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize)
+    output = vector_encoder(torch.ones((1, input_size)))
+    assert output.shape == (1, hidden_size)
+
+    normalize = True
+    vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize)
+    new_vec = torch.ones((1, input_size))
+    vector_encoder.update_normalization(new_vec)
+
+    mock_normalizer.assert_called_with(input_size)
+    mock_normalizer_inst.update.assert_called_with(new_vec)
+
+    vector_encoder2 = VectorEncoder(input_size, hidden_size, num_layers, normalize)
+    vector_encoder.copy_normalization(vector_encoder2)
+    mock_normalizer_inst.copy_from.assert_called_with(mock_normalizer_inst)
+
+
+@mock.patch("mlagents.trainers.torch.encoders.Normalizer")
+def test_vector_and_unnormalized_encoder(mock_normalizer):
+    mock_normalizer_inst = mock.Mock()
+    mock_normalizer.return_value = mock_normalizer_inst
+    input_size = 64
+    unnormalized_size = 32
+    hidden_size = 128
+    num_layers = 3
+    normalize = True
+    mock_normalizer_inst.return_value = torch.ones((1, input_size))
+    vector_encoder = VectorAndUnnormalizedInputEncoder(
+        input_size, hidden_size, unnormalized_size, num_layers, normalize
+    )
+    # Make sure normalizer is only called on input_size
+    mock_normalizer.assert_called_with(input_size)
+    normal_input = torch.ones((1, input_size))
+
+    unnormalized_input = torch.ones((1, 32))
+    output = vector_encoder(normal_input, unnormalized_input)
+    mock_normalizer_inst.assert_called_with(normal_input)
+    assert output.shape == (1, hidden_size)

From 59ba0b668fa20fb43d39cea3a7f8bec6f9ac4797 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 31 Jul 2020 18:34:15 -0700
Subject: [PATCH 07/14] Visual encoder test

---
 .../trainers/tests/torch/test_encoders.py       | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
index 76f48038b1..76253611b0 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
@@ -6,9 +6,13 @@
     VectorEncoder,
     VectorAndUnnormalizedInputEncoder,
     Normalizer,
+    SimpleVisualEncoder,
+    ResNetVisualEncoder,
+    NatureVisualEncoder,
 )
 
 
+# This test will also reveal issues with states not being saved in the state_dict.
 def compare_models(module_1, module_2):
     is_same = True
     for key_item_1, key_item_2 in zip(
@@ -90,3 +94,16 @@ def test_vector_and_unnormalized_encoder(mock_normalizer):
     output = vector_encoder(normal_input, unnormalized_input)
     mock_normalizer_inst.assert_called_with(normal_input)
     assert output.shape == (1, hidden_size)
+
+
+@pytest.mark.parametrize("image_size", [(36, 36, 3), (84, 84, 4), (256, 256, 5)])
+@pytest.mark.parametrize(
+    "vis_class", [SimpleVisualEncoder, ResNetVisualEncoder, NatureVisualEncoder]
+)
+def test_visual_encoder(vis_class, image_size):
+    num_outputs = 128
+    enc = vis_class(image_size[0], image_size[1], image_size[2], num_outputs)
+    # Note: NCHW not NHWC
+    sample_input = torch.ones((1, image_size[2], image_size[0], image_size[1]))
+    encoding = enc(sample_input)
+    assert encoding.shape == (1, num_outputs)

From ed5c169fb99330fc4795ce4766e26a10d52fd928 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Mon, 3 Aug 2020 11:50:05 -0700
Subject: [PATCH 08/14] Add decoder test

---
 .../trainers/tests/torch/test_decoders.py     | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_decoders.py

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_decoders.py b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py
new file mode 100644
index 0000000000..00172d0719
--- /dev/null
+++ b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py
@@ -0,0 +1,31 @@
+import pytest
+import torch
+
+from mlagents.trainers.torch.decoders import ValueHeads
+
+
+def test_valueheads():
+    stream_names = [f"reward_signal_{num}" for num in range(5)]
+    input_size = 5
+    batch_size = 4
+
+    # Test default 1 value per head
+    value_heads = ValueHeads(stream_names, input_size)
+    input_data = torch.ones((batch_size, input_size))
+    value_out, _ = value_heads(input_data)  # Note: mean value will be removed shortly
+
+    for stream_name in stream_names:
+        assert value_out[stream_name].shape == (batch_size,)
+
+    # Test that iinputting the wrong size input will throw an error
+    with pytest.raises(Exception):
+        value_out = value_heads(torch.ones((batch_size, input_size + 2)))
+
+    # Test multiple values per head (e.g. discrete Q function)
+    output_size = 4
+    value_heads = ValueHeads(stream_names, input_size, output_size)
+    input_data = torch.ones((batch_size, input_size))
+    value_out, _ = value_heads(input_data)
+
+    for stream_name in stream_names:
+        assert value_out[stream_name].shape == (batch_size, output_size)

From f6135dc3c1f7c12c9bd6443086f7ec7916c267da Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 4 Aug 2020 16:57:31 -0700
Subject: [PATCH 09/14] Add check to create_encoders

---
 ml-agents/mlagents/trainers/torch/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py
index 3bed11ccfa..86e3e15b39 100644
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
@@ -40,12 +40,9 @@ def get_encoder_for_type(encoder_type: EncoderType) -> nn.Module:
 
     @staticmethod
     def _check_resolution_for_encoder(
-        vis_in: torch.Tensor, vis_encoder_type: EncoderType
+        height: int, width: int, vis_encoder_type: EncoderType
     ) -> None:
         min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
-        # Note: PyTorch uses NCHW not NHWC (TF).
-        height = vis_in.shape[2]
-        width = vis_in.shape[3]
         if height < min_res or width < min_res:
             raise UnityTrainerException(
                 f"Visual observation resolution ({width}x{height}) is too small for"
@@ -81,6 +78,9 @@ def create_encoders(
         vector_size = 0
         for i, dimension in enumerate(observation_shapes):
             if len(dimension) == 3:
+                ModelUtils._check_resolution_for_encoder(
+                    dimension[0], dimension[1], vis_encode_type
+                )
                 visual_encoders.append(
                     visual_encoder_class(
                         dimension[0], dimension[1], dimension[2], h_size

From d0bed895c11ddc33be8bcbfcb0222178605dad47 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 4 Aug 2020 18:44:48 -0700
Subject: [PATCH 10/14] Add typing to distributions

---
 .../mlagents/trainers/torch/distributions.py  | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
index bdca1a0382..c83ae4649e 100644
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -1,4 +1,5 @@
 import abc
+from typing import List
 import torch
 from torch import nn
 import numpy as np
@@ -114,13 +115,12 @@ def entropy(self):
 class GaussianDistribution(nn.Module):
     def __init__(
         self,
-        hidden_size,
-        num_outputs,
-        conditional_sigma=False,
-        tanh_squash=False,
-        **kwargs
+        hidden_size: int,
+        num_outputs: int,
+        conditional_sigma: bool = False,
+        tanh_squash: bool = False,
     ):
-        super().__init__(**kwargs)
+        super().__init__()
         self.conditional_sigma = conditional_sigma
         self.mu = nn.Linear(hidden_size, num_outputs)
         self.tanh_squash = tanh_squash
@@ -133,7 +133,7 @@ def __init__(
                 torch.zeros(1, num_outputs, requires_grad=True)
             )
 
-    def forward(self, inputs):
+    def forward(self, inputs: torch.Tensor) -> List[DistInstance]:
         mu = self.mu(inputs)
         if self.conditional_sigma:
             log_sigma = torch.clamp(self.log_sigma(inputs), min=-20, max=2)
@@ -146,12 +146,12 @@ def forward(self, inputs):
 
 
 class MultiCategoricalDistribution(nn.Module):
-    def __init__(self, hidden_size, act_sizes):
+    def __init__(self, hidden_size: int, act_sizes: List[int]):
         super().__init__()
         self.act_sizes = act_sizes
-        self.branches = self.create_policy_branches(hidden_size)
+        self.branches = self._create_policy_branches(hidden_size)
 
-    def create_policy_branches(self, hidden_size):
+    def _create_policy_branches(self, hidden_size: int) -> nn.ModuleList:
         branches = []
         for size in self.act_sizes:
             branch_output_layer = nn.Linear(hidden_size, size)
@@ -159,13 +159,13 @@ def create_policy_branches(self, hidden_size):
             branches.append(branch_output_layer)
         return nn.ModuleList(branches)
 
-    def mask_branch(self, logits, mask):
+    def _mask_branch(self, logits: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
         raw_probs = torch.nn.functional.softmax(logits, dim=-1) * mask
         normalized_probs = raw_probs / torch.sum(raw_probs, dim=-1).unsqueeze(-1)
         normalized_logits = torch.log(normalized_probs + EPSILON)
         return normalized_logits
 
-    def split_masks(self, masks):
+    def _split_masks(self, masks: torch.Tensor) -> List[torch.Tensor]:
         split_masks = []
         for idx, _ in enumerate(self.act_sizes):
             start = int(np.sum(self.act_sizes[:idx]))
@@ -173,13 +173,13 @@ def split_masks(self, masks):
             split_masks.append(masks[:, start:end])
         return split_masks
 
-    def forward(self, inputs, masks):
+    def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> List[DistInstance]:
         # Todo - Support multiple branches in mask code
         branch_distributions = []
-        masks = self.split_masks(masks)
+        masks = self._split_masks(masks)
         for idx, branch in enumerate(self.branches):
             logits = branch(inputs)
-            norm_logits = self.mask_branch(logits, masks[idx])
+            norm_logits = self._mask_branch(logits, masks[idx])
             distribution = CategoricalDistInstance(norm_logits)
             branch_distributions.append(distribution)
         return branch_distributions

From d228d4b921077f58790167d3669ed7af2c1bec10 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Tue, 4 Aug 2020 18:45:28 -0700
Subject: [PATCH 11/14] Tests for distributions

---
 .../trainers/tests/torch/test_distribution.py | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 ml-agents/mlagents/trainers/tests/torch/test_distribution.py

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py
new file mode 100644
index 0000000000..273ceeaee8
--- /dev/null
+++ b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py
@@ -0,0 +1,126 @@
+import pytest
+import torch
+
+from mlagents.trainers.torch.distributions import (
+    GaussianDistribution,
+    MultiCategoricalDistribution,
+    GaussianDistInstance,
+    TanhGaussianDistInstance,
+    CategoricalDistInstance,
+)
+
+
+@pytest.mark.parametrize("tanh_squash", [True, False])
+@pytest.mark.parametrize("conditional_sigma", [True, False])
+def test_gaussian_distribution(conditional_sigma, tanh_squash):
+    hidden_size = 16
+    act_size = 4
+    sample_embedding = torch.ones((1, 16))
+    gauss_dist = GaussianDistribution(
+        hidden_size,
+        act_size,
+        conditional_sigma=conditional_sigma,
+        tanh_squash=tanh_squash,
+    )
+
+    # Make sure backprop works
+    force_action = torch.zeros((1, act_size))
+    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)
+
+    for _ in range(50):
+        dist_inst = gauss_dist(sample_embedding)[0]
+        if tanh_squash:
+            assert isinstance(dist_inst, TanhGaussianDistInstance)
+        else:
+            assert isinstance(dist_inst, GaussianDistInstance)
+        log_prob = dist_inst.log_prob(force_action)
+        loss = torch.nn.functional.mse_loss(log_prob, -2 * torch.ones(log_prob.shape))
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+    for prob in log_prob.flatten():
+        assert prob == pytest.approx(-2, abs=0.1)
+
+
+def test_multi_categorical_distribution():
+    hidden_size = 16
+    act_size = [3, 3, 4]
+    sample_embedding = torch.ones((1, 16))
+    gauss_dist = MultiCategoricalDistribution(hidden_size, act_size)
+
+    # Make sure backprop works
+    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)
+
+    for _ in range(50):
+        dist_insts = gauss_dist(sample_embedding, masks=torch.ones((1, sum(act_size))))
+        loss = 0
+        for i, dist_inst in enumerate(dist_insts):
+            assert isinstance(dist_inst, CategoricalDistInstance)
+            log_prob = dist_inst.all_log_prob()
+            test_prob = torch.tensor(
+                [1.0 - 0.01 * (act_size[i] - 1)] + [0.01] * (act_size[i] - 1)
+            )  # High prob for first action
+            test_log_prob = test_prob.log()
+            loss += torch.nn.functional.mse_loss(log_prob, test_log_prob)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+    for dist_inst in dist_insts:
+        assert dist_inst.all_log_prob().flatten()[0] == pytest.approx(0, abs=0.1)
+
+    # Test masks
+    masks = []
+    for branch in act_size:
+        masks += [0] * (branch - 1) + [1]
+    masks = torch.tensor([masks])
+    dist_insts = gauss_dist(sample_embedding, masks=masks)
+    for dist_inst in dist_insts:
+        log_prob = dist_inst.all_log_prob()
+        assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
+
+
+def test_gaussian_dist_instance():
+    act_size = 4
+    dist_instance = GaussianDistInstance(
+        torch.zeros(1, act_size), torch.ones(1, act_size)
+    )
+    action = dist_instance.sample()
+    assert action.shape == (1, act_size)
+    for log_prob in dist_instance.log_prob(torch.zeros((1, act_size))).flatten():
+        # Log prob of standard normal at 0
+        assert log_prob == pytest.approx(-0.919, abs=0.01)
+
+    for ent in dist_instance.entropy().flatten():
+        # entropy of standard normal at 0
+        assert ent == pytest.approx(2.83, abs=0.01)
+
+
+def test_tanh_gaussian_dist_instance():
+    act_size = 4
+    dist_instance = GaussianDistInstance(
+        torch.zeros(1, act_size), torch.ones(1, act_size)
+    )
+    for _ in range(10):
+        action = dist_instance.sample()
+        assert action.shape == (1, act_size)
+        assert torch.max(action) < 1.0 and torch.min(action) > -1.0
+
+
+def test_categorical_dist_instance():
+    act_size = 4
+    test_prob = torch.tensor(
+        [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)
+    )  # High prob for first action
+    dist_instance = CategoricalDistInstance(test_prob)
+
+    for _ in range(10):
+        action = dist_instance.sample()
+        assert action.shape == (1,)
+        assert action < act_size
+
+    # Make sure log_prob of 1st action is high
+    prob_first_action = dist_instance.log_prob(torch.tensor([0]))
+
+    # Make sure log_prob of other actions is low
+    for i in range(1, act_size):
+        assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action

From df2a29c23b2e9573b7585e91741d3cd4f0bfbe15 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 5 Aug 2020 10:56:39 -0700
Subject: [PATCH 12/14] Address comments

---
 .../trainers/tests/torch/test_decoders.py     |  2 +-
 .../trainers/tests/torch/test_distribution.py | 33 ++++++++++++++-----
 .../trainers/tests/torch/test_encoders.py     |  1 +
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_decoders.py b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py
index 00172d0719..aa417edd05 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_decoders.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_decoders.py
@@ -17,7 +17,7 @@ def test_valueheads():
     for stream_name in stream_names:
         assert value_out[stream_name].shape == (batch_size,)
 
-    # Test that iinputting the wrong size input will throw an error
+    # Test that inputting the wrong size input will throw an error
     with pytest.raises(Exception):
         value_out = value_heads(torch.ones((batch_size, input_size + 2)))
 
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py
index 273ceeaee8..6637eb159b 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_distribution.py
@@ -13,6 +13,7 @@
 @pytest.mark.parametrize("tanh_squash", [True, False])
 @pytest.mark.parametrize("conditional_sigma", [True, False])
 def test_gaussian_distribution(conditional_sigma, tanh_squash):
+    torch.manual_seed(0)
     hidden_size = 16
     act_size = 4
     sample_embedding = torch.ones((1, 16))
@@ -43,6 +44,7 @@ def test_gaussian_distribution(conditional_sigma, tanh_squash):
 
 
 def test_multi_categorical_distribution():
+    torch.manual_seed(0)
     hidden_size = 16
     act_size = [3, 3, 4]
     sample_embedding = torch.ones((1, 16))
@@ -51,22 +53,33 @@ def test_multi_categorical_distribution():
     # Make sure backprop works
     optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)
 
-    for _ in range(50):
+    def create_test_prob(size: int) -> torch.Tensor:
+        test_prob = torch.tensor(
+            [[1.0 - 0.01 * (size - 1)] + [0.01] * (size - 1)]
+        )  # High prob for first action
+        return test_prob.log()
+
+    for _ in range(100):
         dist_insts = gauss_dist(sample_embedding, masks=torch.ones((1, sum(act_size))))
         loss = 0
         for i, dist_inst in enumerate(dist_insts):
             assert isinstance(dist_inst, CategoricalDistInstance)
             log_prob = dist_inst.all_log_prob()
-            test_prob = torch.tensor(
-                [1.0 - 0.01 * (act_size[i] - 1)] + [0.01] * (act_size[i] - 1)
-            )  # High prob for first action
-            test_log_prob = test_prob.log()
+            test_log_prob = create_test_prob(act_size[i])
+            # Force log_probs to match the high probability for the first action generated by
+            # create_test_prob
             loss += torch.nn.functional.mse_loss(log_prob, test_log_prob)
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
-    for dist_inst in dist_insts:
-        assert dist_inst.all_log_prob().flatten()[0] == pytest.approx(0, abs=0.1)
+    for dist_inst, size in zip(dist_insts, act_size):
+        # Check that the log probs are close to the fake ones that we generated.
+        test_log_probs = create_test_prob(size)
+        for _prob, _test_prob in zip(
+            dist_inst.all_log_prob().flatten().tolist(),
+            test_log_probs.flatten().tolist(),
+        ):
+            assert _prob == pytest.approx(_test_prob, abs=0.1)
 
     # Test masks
     masks = []
@@ -80,6 +93,7 @@ def test_multi_categorical_distribution():
 
 
 def test_gaussian_dist_instance():
+    torch.manual_seed(0)
     act_size = 4
     dist_instance = GaussianDistInstance(
         torch.zeros(1, act_size), torch.ones(1, act_size)
@@ -96,6 +110,7 @@ def test_gaussian_dist_instance():
 
 
 def test_tanh_gaussian_dist_instance():
+    torch.manual_seed(0)
     act_size = 4
     dist_instance = GaussianDistInstance(
         torch.zeros(1, act_size), torch.ones(1, act_size)
@@ -107,6 +122,7 @@ def test_tanh_gaussian_dist_instance():
 
 
 def test_categorical_dist_instance():
+    torch.manual_seed(0)
     act_size = 4
     test_prob = torch.tensor(
         [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)
@@ -118,9 +134,8 @@ def test_categorical_dist_instance():
         assert action.shape == (1,)
         assert action < act_size
 
-    # Make sure log_prob of 1st action is high
+    # Make sure the first action as higher probability than the others.
     prob_first_action = dist_instance.log_prob(torch.tensor([0]))
 
-    # Make sure log_prob of other actions is low
     for i in range(1, act_size):
         assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
index 76253611b0..7f77b3d72a 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_encoders.py
@@ -18,6 +18,7 @@ def compare_models(module_1, module_2):
     for key_item_1, key_item_2 in zip(
         module_1.state_dict().items(), module_2.state_dict().items()
     ):
+        # Compare tensors in state_dict and not the keys.
         is_same = torch.equal(key_item_1[1], key_item_2[1]) and is_same
     return is_same
 

From 503dd4645a331b85759d40d00d549bc259dbb4e6 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 5 Aug 2020 17:27:25 -0700
Subject: [PATCH 13/14] Add test for get_probs_and_entropy

---
 .../trainers/tests/torch/test_utils.py        | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
index 9467880b7b..25c7a6c05e 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
@@ -9,6 +9,10 @@
     VectorEncoder,
     VectorAndUnnormalizedInputEncoder,
 )
+from mlagents.trainers.torch.distributions import (
+    CategoricalDistInstance,
+    GaussianDistInstance,
+)
 
 
 def test_min_visual_size():
@@ -120,3 +124,43 @@ def test_actions_to_onehot():
     ]
     for res, exp in zip(oh_actions, expected_result):
         assert torch.equal(res, exp)
+
+
+def test_get_probs_and_entropy():
+    # Test continuous
+    # Add two dists to the list. This isn't done in the code but we'd like to support it.
+    dist_list = [
+        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
+        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
+    ]
+    action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
+    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
+        action_list, dist_list
+    )
+    assert log_probs.shape == (1, 2, 2)
+    assert entropies.shape == (1, 2, 2)
+    assert all_probs is None
+
+    for log_prob in log_probs.flatten():
+        # Log prob of standard normal at 0
+        assert log_prob == pytest.approx(-0.919, abs=0.01)
+
+    for ent in entropies.flatten():
+        # entropy of standard normal at 0
+        assert ent == pytest.approx(2.83, abs=0.01)
+
+    # Test continuous
+    # Add two dists to the list.
+    act_size = 2
+    test_prob = torch.tensor(
+        [1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)
+    )  # High prob for first action
+    dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
+    action_list = [torch.tensor([0]), torch.tensor([1])]
+    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
+        action_list, dist_list
+    )
+    assert all_probs.shape == (len(dist_list * act_size),)
+    assert entropies.shape == (len(dist_list),)
+    # Make sure the first action has high probability than the others.
+    assert log_probs.flatten()[0] > log_probs.flatten()[1]

From 594cb8633465216e86be4becef928f3444177d2e Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 5 Aug 2020 17:29:06 -0700
Subject: [PATCH 14/14] Rename test_distribution to test_distributions

---
 .../tests/torch/{test_distribution.py => test_distributions.py}   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename ml-agents/mlagents/trainers/tests/torch/{test_distribution.py => test_distributions.py} (100%)

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distribution.py b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
similarity index 100%
rename from ml-agents/mlagents/trainers/tests/torch/test_distribution.py
rename to ml-agents/mlagents/trainers/tests/torch/test_distributions.py