From 8bd70f809ae0739843322220424cde2a6be58313 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Sat, 7 Dec 2019 15:04:15 -0800
Subject: [PATCH 01/16] use forked mypy, turn on namespace packages

---
 .pre-commit-config.yaml | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7f77c2d7f9..265c24bca1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,23 +9,28 @@ repos:
                 .*_pb2_grpc.py
             )$
 
--   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.740
+-   repo: https://github.com/chriselion/mypy
+    # This is a fork of mypy from the 0.750 release with two changes:
+    #  * adds its own pre-commit hook definition
+    #  * disables an assert that fires when a file is processed twice
+    # Disabling the assert is necessary to enable --namespace-packages
+    # which we need in turn to get some types recognized across files.
+    rev: 3d0b6164a9487a6c5cf9d144110b86600fd85e25
     hooks:
     -   id: mypy
         name: mypy-ml-agents
         files: "ml-agents/.*"
-        args: [--ignore-missing-imports, --disallow-incomplete-defs]
+        args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages]
     -   id: mypy
         name: mypy-ml-agents-envs
         files: "ml-agents-envs/.*"
         # Exclude protobuf files and don't follow them when imported
         exclude: ".*_pb2.py"
-        args: [--ignore-missing-imports, --disallow-incomplete-defs]
+        args: [--ignore-missing-imports, --disallow-incomplete-defs,  --namespace-packages]
     -   id: mypy
         name: mypy-gym-unity
         files: "gym-unity/.*"
-        args: [--ignore-missing-imports, --disallow-incomplete-defs]
+        args: [--ignore-missing-imports, --disallow-incomplete-defs,  --namespace-packages]
 
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v2.4.0

From 5066ec6d474ff8c927d98bb546ebdbbe689fd792 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Sat, 7 Dec 2019 15:28:44 -0800
Subject: [PATCH 02/16] fix mypy errors in mlagents

---
 ml-agents/mlagents/trainers/demo_loader.py  | 5 +++--
 ml-agents/mlagents/trainers/models.py       | 4 ++++
 ml-agents/mlagents/trainers/sac/models.py   | 6 ++----
 ml-agents/mlagents/trainers/trainer.py      | 7 ++++++-
 ml-agents/mlagents/trainers/trainer_util.py | 2 +-
 5 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py
index ad2833986c..46ccadfc1a 100644
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
@@ -27,6 +27,7 @@ def make_demo_buffer(
     sequence_length: int,
 ) -> AgentBuffer:
     # Create and populate buffer using experiences
+    fake_agent_id = "0"
     demo_process_buffer = ProcessingBuffer()
     demo_buffer = AgentBuffer()
     for idx, experience in enumerate(pair_infos):
@@ -64,11 +65,11 @@ def make_demo_buffer(
         demo_process_buffer[0]["prev_action"].append(previous_action)
         if next_brain_info.local_done[0]:
             demo_process_buffer.append_to_update_buffer(
-                demo_buffer, 0, batch_size=None, training_length=sequence_length
+                demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length
             )
             demo_process_buffer.reset_local_buffers()
     demo_process_buffer.append_to_update_buffer(
-        demo_buffer, 0, batch_size=None, training_length=sequence_length
+        demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length
     )
     return demo_buffer
 
diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py
index 362878a58d..75746ee22c 100644
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
@@ -88,6 +88,10 @@ def __init__(
         self.running_variance: Optional[tf.Variable] = None
         self.update_normalization: Optional[tf.Operation] = None
         self.value: Optional[tf.Tensor] = None
+        self.all_log_probs: Optional[tf.Tensor] = None
+        self.output: Optional[tf.Tensor] = None
+        self.selected_actions: Optional[tf.Tensor] = None
+        self.action_holder: Optional[tf.Tensor] = None
 
     @staticmethod
     def create_global_steps():
diff --git a/ml-agents/mlagents/trainers/sac/models.py b/ml-agents/mlagents/trainers/sac/models.py
index 1e2911d8e8..4a0adc1fc4 100644
--- a/ml-agents/mlagents/trainers/sac/models.py
+++ b/ml-agents/mlagents/trainers/sac/models.py
@@ -57,18 +57,16 @@ def __init__(
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
-        self.action_holder: Optional[tf.Tensor] = None
+        #self.action_holder: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None
-        self.all_log_probs: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
-        self.selected_actions: Optional[tf.Tensor] = None
-        self.output: Optional[tf.Tensor] = None
+        #self.selected_actions: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None
 
diff --git a/ml-agents/mlagents/trainers/trainer.py b/ml-agents/mlagents/trainers/trainer.py
index 744b06e4fc..88d0c0bd38 100644
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
@@ -60,7 +60,7 @@ def __init__(
         )
         self.summary_writer = tf.summary.FileWriter(self.summary_path)
         self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
-        self.policy: TFPolicy = None
+        self.policy: TFPolicy = None  # type: ignore  # this will always get set
         self.step: int = 0
 
     def check_param_keys(self):
@@ -282,3 +282,8 @@ def update_policy(self):
         Uses demonstration_buffer to update model.
         """
         raise UnityTrainerException("The update_model method was not implemented.")
+
+    def clear_update_buffer(self) -> None:
+        raise UnityTrainerException(
+            "The clear_update_buffer method was not implemented."
+        )
diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py
index 7539a8acba..ed2b98985d 100644
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
@@ -96,7 +96,7 @@ def initialize_trainer(
             _brain_key = trainer_config[_brain_key]
         trainer_parameters.update(trainer_config[_brain_key])
 
-    trainer = None
+    trainer: Trainer = None  # type: ignore  # will be set to one of these, or raise
     if trainer_parameters["trainer"] == "offline_bc":
         trainer = OfflineBCTrainer(
             brain_parameters, trainer_parameters, train_model, load_model, seed, run_id

From 4a42120aedccbabc393b9e1e0799b0d7fd37b473 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Sat, 7 Dec 2019 16:14:50 -0800
Subject: [PATCH 03/16] fix mypy errors from mlagents-envs

---
 ml-agents-envs/mlagents/envs/brain.py         |  8 ++++----
 ml-agents-envs/mlagents/envs/environment.py   |  9 +++++----
 .../mlagents/envs/mock_communicator.py        |  3 ++-
 .../engine_configuration_channel.py           |  2 +-
 .../side_channel/float_properties_channel.py  | 19 +++++++++++++------
 .../envs/side_channel/raw_bytes_channel.py    |  2 +-
 .../envs/side_channel/side_channel.py         |  2 +-
 .../mlagents/envs/simple_env_manager.py       |  5 +----
 8 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py
index 21e349fe28..ebf136c880 100644
--- a/ml-agents-envs/mlagents/envs/brain.py
+++ b/ml-agents-envs/mlagents/envs/brain.py
@@ -6,7 +6,7 @@
 from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
 from mlagents.envs.communicator_objects.observation_pb2 import ObservationProto
 from mlagents.envs.timers import hierarchical_timer, timed
-from typing import Dict, List, NamedTuple, Optional
+from typing import Dict, List, NamedTuple, Optional, Collection
 from PIL import Image
 
 logger = logging.getLogger("mlagents.envs")
@@ -160,7 +160,7 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
     @timed
     def from_agent_proto(
         worker_id: int,
-        agent_info_list: List[AgentInfoProto],
+        agent_info_list: Collection[AgentInfoProto],
         brain_params: BrainParameters,
     ) -> "BrainInfo":
         """
@@ -202,7 +202,7 @@ def from_agent_proto(
 
     @staticmethod
     def _process_visual_observations(
-        brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
+        brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto]
     ) -> List[np.ndarray]:
 
         visual_observation_protos: List[List[ObservationProto]] = []
@@ -231,7 +231,7 @@ def _process_visual_observations(
 
     @staticmethod
     def _process_vector_observations(
-        brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
+        brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto]
     ) -> np.ndarray:
         if len(agent_info_list) == 0:
             vector_obs = np.zeros(
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 20954931a1..510887aed6 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -9,7 +9,7 @@
 from mlagents.envs.side_channel.side_channel import SideChannel
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.timers import timed, hierarchical_timer
-from .brain import AllBrainInfo, BrainInfo, BrainParameters
+from mlagents.envs.brain import AllBrainInfo, BrainInfo, BrainParameters
 from .exception import (
     UnityEnvironmentException,
     UnityCommunicationException,
@@ -17,6 +17,7 @@
     UnityTimeOutException,
 )
 
+from mlagents.envs.communicator_objects.command_pb2 import CommandProto
 from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto
 from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
 from mlagents.envs.communicator_objects.agent_action_pb2 import AgentActionProto
@@ -494,7 +495,7 @@ def _get_state(self, output: UnityRLOutputProto) -> AllBrainInfo:
 
     @staticmethod
     def _parse_side_channel_message(
-        side_channels: Dict[int, SideChannel], data: bytearray
+        side_channels: Dict[int, SideChannel], data: bytes
     ) -> None:
         offset = 0
         while offset < len(data):
@@ -566,13 +567,13 @@ def _generate_step_input(
                     if value[b] is not None:
                         action.value = float(value[b][i])
                 rl_in.agent_actions[b].value.extend([action])
-                rl_in.command = 0
+                rl_in.command = CommandProto.STEP
         rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
         return self.wrap_unity_input(rl_in)
 
     def _generate_reset_input(self) -> UnityInputProto:
         rl_in = UnityRLInputProto()
-        rl_in.command = 1
+        rl_in.command = CommandProto.RESET
         rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
         return self.wrap_unity_input(rl_in)
 
diff --git a/ml-agents-envs/mlagents/envs/mock_communicator.py b/ml-agents-envs/mlagents/envs/mock_communicator.py
index 1f614952df..69cb6faedb 100755
--- a/ml-agents-envs/mlagents/envs/mock_communicator.py
+++ b/ml-agents-envs/mlagents/envs/mock_communicator.py
@@ -13,6 +13,7 @@
     NONE as COMPRESSION_TYPE_NONE,
     PNG as COMPRESSION_TYPE_PNG,
 )
+from mlagents.envs.communicator_objects.space_type_pb2 import SpaceTypeProto
 
 
 class MockCommunicator(Communicator):
@@ -43,7 +44,7 @@ def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
         bp = BrainParametersProto(
             vector_action_size=[2],
             vector_action_descriptions=["", ""],
-            vector_action_space_type=int(not self.is_discrete),
+            vector_action_space_type=SpaceTypeProto.discrete if self.is_discrete else SpaceTypeProto.continuous,
             brain_name=self.brain_name,
             is_training=True,
         )
diff --git a/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py b/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py
index c250f4cd0e..49388a6f9d 100644
--- a/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py
+++ b/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py
@@ -31,7 +31,7 @@ class EngineConfigurationChannel(SideChannel):
     def channel_type(self) -> int:
         return SideChannelType.EngineSettings
 
-    def on_message_received(self, data: bytearray) -> None:
+    def on_message_received(self, data: bytes) -> None:
         """
         Is called by the environment to the side channel. Can be called
         multiple times per step if multiple messages are meant for that
diff --git a/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py b/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py
index 4628b631ca..250c5d1dde 100644
--- a/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py
+++ b/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py
@@ -1,6 +1,6 @@
 from mlagents.envs.side_channel.side_channel import SideChannel, SideChannelType
 import struct
-from typing import Tuple, Optional, List
+from typing import Dict, Tuple, Optional, List
 
 
 class FloatPropertiesChannel(SideChannel):
@@ -10,15 +10,15 @@ class FloatPropertiesChannel(SideChannel):
     set_property, get_property and list_properties.
     """
 
-    def __init__(self):
-        self._float_properties = {}
+    def __init__(self) -> None:
+        self._float_properties: Dict[str, float] = {}
         super().__init__()
 
     @property
     def channel_type(self) -> int:
         return SideChannelType.FloatProperties
 
-    def on_message_received(self, data: bytearray) -> None:
+    def on_message_received(self, data: bytes) -> None:
         """
         Is called by the environment to the side channel. Can be called
         multiple times per step if multiple messages are meant for that
@@ -52,7 +52,14 @@ def list_properties(self) -> List[str]:
         Returns a list of all the string identifiers of the properties
         currently present in the Unity Environment.
         """
-        return self._float_properties.keys()
+        return list(self._float_properties.keys())
+
+    def get_property_dict(self) -> Dict[str, float]:
+        """
+        Returns a copy of the float properties.
+        :return:
+        """
+        return dict(self._float_properties)
 
     @staticmethod
     def serialize_float_prop(key: str, value: float) -> bytearray:
@@ -64,7 +71,7 @@ def serialize_float_prop(key: str, value: float) -> bytearray:
         return result
 
     @staticmethod
-    def deserialize_float_prop(data: bytearray) -> Tuple[str, float]:
+    def deserialize_float_prop(data: bytes) -> Tuple[str, float]:
         offset = 0
         encoded_key_len = struct.unpack_from("<i", data, offset)[0]
         offset = offset + 4
diff --git a/ml-agents-envs/mlagents/envs/side_channel/raw_bytes_channel.py b/ml-agents-envs/mlagents/envs/side_channel/raw_bytes_channel.py
index 1b6e434063..42e7c4c08d 100644
--- a/ml-agents-envs/mlagents/envs/side_channel/raw_bytes_channel.py
+++ b/ml-agents-envs/mlagents/envs/side_channel/raw_bytes_channel.py
@@ -17,7 +17,7 @@ def __init__(self, channel_id=0):
     def channel_type(self) -> int:
         return SideChannelType.RawBytesChannelStart + self._channel_id
 
-    def on_message_received(self, data: bytearray) -> None:
+    def on_message_received(self, data: bytes) -> None:
         """
         Is called by the environment to the side channel. Can be called
         multiple times per step if multiple messages are meant for that
diff --git a/ml-agents-envs/mlagents/envs/side_channel/side_channel.py b/ml-agents-envs/mlagents/envs/side_channel/side_channel.py
index 4a1c611612..d50257b8ed 100644
--- a/ml-agents-envs/mlagents/envs/side_channel/side_channel.py
+++ b/ml-agents-envs/mlagents/envs/side_channel/side_channel.py
@@ -33,7 +33,7 @@ def queue_message_to_send(self, data: bytearray) -> None:
         self.message_queue.append(data)
 
     @abstractmethod
-    def on_message_received(self, data: bytearray) -> None:
+    def on_message_received(self, data: bytes) -> None:
         """
         Is called by the environment to the side channel. Can be called
         multiple times per step if multiple messages are meant for that
diff --git a/ml-agents-envs/mlagents/envs/simple_env_manager.py b/ml-agents-envs/mlagents/envs/simple_env_manager.py
index 248416ea59..4d197bd875 100644
--- a/ml-agents-envs/mlagents/envs/simple_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/simple_env_manager.py
@@ -60,10 +60,7 @@ def external_brains(self) -> Dict[str, BrainParameters]:
 
     @property
     def get_properties(self) -> Dict[str, float]:
-        reset_params = {}
-        for k in self.shared_float_properties.list_properties():
-            reset_params[k] = self.shared_float_properties.get_property(k)
-        return reset_params
+        return self.shared_float_properties.get_property_dict()
 
     def close(self):
         self.env.close()

From 593982376228ebdf755bca0d3684ef94d49c0a7a Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Sat, 7 Dec 2019 16:25:26 -0800
Subject: [PATCH 04/16] fix unit tests

---
 ml-agents/mlagents/trainers/agent_processor.py | 2 +-
 ml-agents/mlagents/trainers/demo_loader.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
index 49479cf234..fb06d64457 100644
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
@@ -28,7 +28,7 @@ def reset_local_buffers(self) -> None:
     def append_to_update_buffer(
         self,
         update_buffer: AgentBuffer,
-        agent_id: str,
+        agent_id: int,
         key_list: List[str] = None,
         batch_size: int = None,
         training_length: int = None,
diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py
index 46ccadfc1a..a09c5d686a 100644
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
@@ -27,7 +27,7 @@ def make_demo_buffer(
     sequence_length: int,
 ) -> AgentBuffer:
     # Create and populate buffer using experiences
-    fake_agent_id = "0"
+    agent_id: int = 0
     demo_process_buffer = ProcessingBuffer()
     demo_buffer = AgentBuffer()
     for idx, experience in enumerate(pair_infos):

From 821639efce1073a39d307e46c457ec4cda00a146 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Sat, 7 Dec 2019 16:25:33 -0800
Subject: [PATCH 05/16] fix unit tests

---
 ml-agents/mlagents/trainers/demo_loader.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py
index a09c5d686a..ad2833986c 100644
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
@@ -27,7 +27,6 @@ def make_demo_buffer(
     sequence_length: int,
 ) -> AgentBuffer:
     # Create and populate buffer using experiences
-    agent_id: int = 0
     demo_process_buffer = ProcessingBuffer()
     demo_buffer = AgentBuffer()
     for idx, experience in enumerate(pair_infos):
@@ -65,11 +64,11 @@ def make_demo_buffer(
         demo_process_buffer[0]["prev_action"].append(previous_action)
         if next_brain_info.local_done[0]:
             demo_process_buffer.append_to_update_buffer(
-                demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length
+                demo_buffer, 0, batch_size=None, training_length=sequence_length
             )
             demo_process_buffer.reset_local_buffers()
     demo_process_buffer.append_to_update_buffer(
-        demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length
+        demo_buffer, 0, batch_size=None, training_length=sequence_length
     )
     return demo_buffer
 

From ea216c713c54c4df3b9235ab63b249eba8ccf2d0 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Sat, 7 Dec 2019 16:53:43 -0800
Subject: [PATCH 06/16] cleanup

---
 .pylintrc                                         | 6 +++++-
 ml-agents-envs/mlagents/envs/environment.py       | 6 +++---
 ml-agents-envs/mlagents/envs/mock_communicator.py | 4 ++--
 ml-agents/mlagents/trainers/sac/models.py         | 2 --
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index 11d1f0b75c..748cce0088 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -40,4 +40,8 @@ disable =
     # E0401: Unable to import...
     # E0611: No name '...' in module '...'
     # need to look into these, probably namespace packages
-    E0401, E0611
+    E0401, E0611,
+
+    # This was causing false positives
+    # Appears to be https://github.com/PyCQA/pylint/issues/2981
+    W0201
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index 510887aed6..4c3bc938be 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -17,7 +17,7 @@
     UnityTimeOutException,
 )
 
-from mlagents.envs.communicator_objects.command_pb2 import CommandProto
+from mlagents.envs.communicator_objects.command_pb2 import STEP, RESET
 from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto
 from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
 from mlagents.envs.communicator_objects.agent_action_pb2 import AgentActionProto
@@ -567,13 +567,13 @@ def _generate_step_input(
                     if value[b] is not None:
                         action.value = float(value[b][i])
                 rl_in.agent_actions[b].value.extend([action])
-                rl_in.command = CommandProto.STEP
+                rl_in.command = STEP
         rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
         return self.wrap_unity_input(rl_in)
 
     def _generate_reset_input(self) -> UnityInputProto:
         rl_in = UnityRLInputProto()
-        rl_in.command = CommandProto.RESET
+        rl_in.command = RESET
         rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
         return self.wrap_unity_input(rl_in)
 
diff --git a/ml-agents-envs/mlagents/envs/mock_communicator.py b/ml-agents-envs/mlagents/envs/mock_communicator.py
index 69cb6faedb..d0abd9562a 100755
--- a/ml-agents-envs/mlagents/envs/mock_communicator.py
+++ b/ml-agents-envs/mlagents/envs/mock_communicator.py
@@ -13,7 +13,7 @@
     NONE as COMPRESSION_TYPE_NONE,
     PNG as COMPRESSION_TYPE_PNG,
 )
-from mlagents.envs.communicator_objects.space_type_pb2 import SpaceTypeProto
+from mlagents.envs.communicator_objects.space_type_pb2 import discrete, continuous
 
 
 class MockCommunicator(Communicator):
@@ -44,7 +44,7 @@ def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
         bp = BrainParametersProto(
             vector_action_size=[2],
             vector_action_descriptions=["", ""],
-            vector_action_space_type=SpaceTypeProto.discrete if self.is_discrete else SpaceTypeProto.continuous,
+            vector_action_space_type=discrete if self.is_discrete else continuous,
             brain_name=self.brain_name,
             is_training=True,
         )
diff --git a/ml-agents/mlagents/trainers/sac/models.py b/ml-agents/mlagents/trainers/sac/models.py
index 4a0adc1fc4..7336b50940 100644
--- a/ml-agents/mlagents/trainers/sac/models.py
+++ b/ml-agents/mlagents/trainers/sac/models.py
@@ -57,7 +57,6 @@ def __init__(
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
-        #self.action_holder: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
@@ -66,7 +65,6 @@ def __init__(
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
-        #self.selected_actions: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None
 

From b5ebe677777782af42aa8c594faa837f718bd7d9 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Sat, 7 Dec 2019 18:21:42 -0800
Subject: [PATCH 07/16] fix pylint false positives

---
 .pre-commit-config.yaml               |  9 ++-------
 .pylintrc                             |  3 ++-
 ml-agents-envs/mlagents/envs/brain.py | 14 +++++++++++---
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 265c24bca1..3d956fe9f3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,13 +9,8 @@ repos:
                 .*_pb2_grpc.py
             )$
 
--   repo: https://github.com/chriselion/mypy
-    # This is a fork of mypy from the 0.750 release with two changes:
-    #  * adds its own pre-commit hook definition
-    #  * disables an assert that fires when a file is processed twice
-    # Disabling the assert is necessary to enable --namespace-packages
-    # which we need in turn to get some types recognized across files.
-    rev: 3d0b6164a9487a6c5cf9d144110b86600fd85e25
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.750
     hooks:
     -   id: mypy
         name: mypy-ml-agents
diff --git a/.pylintrc b/.pylintrc
index 748cce0088..6aa891290a 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -44,4 +44,5 @@ disable =
 
     # This was causing false positives
     # Appears to be https://github.com/PyCQA/pylint/issues/2981
-    W0201
+    W0201,
+
diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py
index ebf136c880..394b8c606d 100644
--- a/ml-agents-envs/mlagents/envs/brain.py
+++ b/ml-agents-envs/mlagents/envs/brain.py
@@ -160,7 +160,9 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
     @timed
     def from_agent_proto(
         worker_id: int,
-        agent_info_list: Collection[AgentInfoProto],
+        agent_info_list: Collection[
+            AgentInfoProto
+        ],  # pylint: disable=unsubscriptable-object
         brain_params: BrainParameters,
     ) -> "BrainInfo":
         """
@@ -202,7 +204,10 @@ def from_agent_proto(
 
     @staticmethod
     def _process_visual_observations(
-        brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto]
+        brain_params: BrainParameters,
+        agent_info_list: Collection[
+            AgentInfoProto
+        ],  # pylint: disable=unsubscriptable-object
     ) -> List[np.ndarray]:
 
         visual_observation_protos: List[List[ObservationProto]] = []
@@ -231,7 +236,10 @@ def _process_visual_observations(
 
     @staticmethod
     def _process_vector_observations(
-        brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto]
+        brain_params: BrainParameters,
+        agent_info_list: Collection[
+            AgentInfoProto
+        ],  # pylint: disable=unsubscriptable-object
     ) -> np.ndarray:
         if len(agent_info_list) == 0:
             vector_obs = np.zeros(

From f4af166f7559e4bbbafdae4845ff5416f5656f09 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Mon, 9 Dec 2019 15:05:14 -0800
Subject: [PATCH 08/16] Trainer.advance()

---
 ml-agents-envs/mlagents/envs/subprocess_env_manager.py  | 5 +----
 ml-agents-envs/mlagents/envs/tests/test_side_channel.py | 3 +++
 ml-agents/mlagents/trainers/rl_trainer.py               | 6 ++++++
 ml-agents/mlagents/trainers/trainer.py                  | 6 ++----
 ml-agents/mlagents/trainers/trainer_controller.py       | 5 ++++-
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
index 930559b9a7..69cbccd2b4 100644
--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
@@ -121,10 +121,7 @@ def _send_response(cmd_name, payload):
             elif cmd.name == "external_brains":
                 _send_response("external_brains", env.external_brains)
             elif cmd.name == "get_properties":
-                reset_params = {}
-                for k in shared_float_properties.list_properties():
-                    reset_params[k] = shared_float_properties.get_property(k)
-
+                reset_params = shared_float_properties.get_property_dict()
                 _send_response("get_properties", reset_params)
             elif cmd.name == "reset":
                 for k, v in cmd.payload.items():
diff --git a/ml-agents-envs/mlagents/envs/tests/test_side_channel.py b/ml-agents-envs/mlagents/envs/tests/test_side_channel.py
index 19ca3a0e4c..a475d5a0aa 100644
--- a/ml-agents-envs/mlagents/envs/tests/test_side_channel.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_side_channel.py
@@ -69,6 +69,9 @@ def test_float_properties():
     val = sender.get_property("prop1")
     assert val == 1.0
 
+    assert receiver.get_property_dict() == {"prop1": 1.0, "prop2": 2.0}
+    assert receiver.get_property_dict() == sender.get_property_dict()
+
 
 def test_raw_bytes():
     sender = RawBytesChannel()
diff --git a/ml-agents/mlagents/trainers/rl_trainer.py b/ml-agents/mlagents/trainers/rl_trainer.py
index c86a9d2629..88d32f5864 100644
--- a/ml-agents/mlagents/trainers/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/rl_trainer.py
@@ -264,3 +264,9 @@ def add_rewards_outputs(
         raise UnityTrainerException(
             "The add_rewards_outputs method was not implemented."
         )
+
+    def advance(self):
+        """
+        Eventually logic from TrainerController.advance() will live here.
+        """
+        self.clear_update_buffer()
diff --git a/ml-agents/mlagents/trainers/trainer.py b/ml-agents/mlagents/trainers/trainer.py
index 88d0c0bd38..e71860a828 100644
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
@@ -283,7 +283,5 @@ def update_policy(self):
         """
         raise UnityTrainerException("The update_model method was not implemented.")
 
-    def clear_update_buffer(self) -> None:
-        raise UnityTrainerException(
-            "The clear_update_buffer method was not implemented."
-        )
+    def advance(self) -> None:
+        pass
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
index 572f5d39d1..4994f61f4a 100644
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -295,5 +295,8 @@ def advance(self, env: EnvManager) -> int:
                     env.set_policy(brain_name, trainer.policy)
             else:
                 # Avoid memory leak during inference
-                trainer.clear_update_buffer()
+                # Eventually this whole block will take place in advance()
+                # But currently this only calls clear_update_buffer() in RLTrainer
+                # and nothing in the base class
+                trainer.advance()
         return len(new_step_infos)

From b80ca34136cf198bed91b75ae41ce7edc3cba783 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Mon, 9 Dec 2019 15:44:23 -0800
Subject: [PATCH 09/16] fix unit test

---
 ml-agents/mlagents/trainers/tests/test_trainer_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
index ff6ca5b9de..fee2c8fc90 100644
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
@@ -213,4 +213,4 @@ def test_take_step_if_not_training():
         new_step_info.previous_all_brain_info[brain_name],
         new_step_info.current_all_brain_info[brain_name],
     )
-    trainer_mock.clear_update_buffer.assert_called_once()
+    trainer_mock.advance.assert_called_once()

From 4e97a7cf3dfa28c73c5a59b7f11000c98415dce2 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Wed, 11 Dec 2019 10:51:18 -0800
Subject: [PATCH 10/16] remove --namespace-packages on mlagents, fix envs

---
 .pre-commit-config.yaml                          |  2 +-
 ml-agents-envs/mlagents/envs/base_env.py         |  6 +++---
 ml-agents-envs/mlagents/envs/environment.py      |  4 ++--
 ml-agents-envs/mlagents/envs/rpc_utils.py        | 16 ++++++++++------
 .../mlagents/envs/tests/test_rpc_utils.py        | 10 +++++++---
 5 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3d956fe9f3..be30007001 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
     -   id: mypy
         name: mypy-ml-agents
         files: "ml-agents/.*"
-        args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages]
+        args: [--ignore-missing-imports, --disallow-incomplete-defs]
     -   id: mypy
         name: mypy-ml-agents-envs
         files: "ml-agents-envs/.*"
diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py
index f6c678f80d..dbb880f052 100644
--- a/ml-agents-envs/mlagents/envs/base_env.py
+++ b/ml-agents-envs/mlagents/envs/base_env.py
@@ -18,12 +18,12 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import List, NamedTuple, Tuple, Optional, Union, Dict, NewType
+from typing import List, NamedTuple, Tuple, Optional, Union, Dict
 import numpy as np
 from enum import Enum
 
-AgentId = NewType("AgentId", int)
-AgentGroup = NewType("AgentGroup", str)
+AgentId = int
+AgentGroup = str
 
 
 class StepResult(NamedTuple):
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
index e06fb810ad..eaa4760169 100644
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
@@ -372,8 +372,8 @@ def set_action_for_agent(
             action = action.astype(expected_type)
 
         if agent_group not in self._env_actions:
-            self._env_actions[agent_group] = self._empty_action(
-                spec, self._env_state[agent_group].n_agents()
+            self._env_actions[agent_group] = spec.create_empty_action(
+                self._env_state[agent_group].n_agents()
             )
         try:
             index = np.where(self._env_state[agent_group].agent_id == agent_id)[0][0]
diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
index c08f5c3f97..257c83da98 100644
--- a/ml-agents-envs/mlagents/envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -5,7 +5,7 @@
 import logging
 import numpy as np
 import io
-from typing import List, Tuple
+from typing import cast, List, Tuple, Union, Collection
 from PIL import Image
 
 logger = logging.getLogger("mlagents.envs")
@@ -26,9 +26,10 @@ def agent_group_spec_from_proto(
         if brain_param_proto.vector_action_space_type == 0
         else ActionType.CONTINUOUS
     )
-    action_shape = None
     if action_type == ActionType.CONTINUOUS:
-        action_shape = brain_param_proto.vector_action_size[0]
+        action_shape: Union[
+            int, Tuple[int, ...]
+        ] = brain_param_proto.vector_action_size[0]
     else:
         action_shape = tuple(brain_param_proto.vector_action_size)
     return AgentGroupSpec(observation_shape, action_type, action_shape)
@@ -57,7 +58,9 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
 
 @timed
 def _process_visual_observation(
-    obs_index: int, shape: Tuple[int, int, int], agent_info_list: List[AgentInfoProto]
+    obs_index: int,
+    shape: Tuple[int, int, int],
+    agent_info_list: Collection[AgentInfoProto],
 ) -> np.ndarray:
     if len(agent_info_list) == 0:
         return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)
@@ -72,7 +75,7 @@ def _process_visual_observation(
 
 @timed
 def _process_vector_observation(
-    obs_index: int, shape: Tuple[int, ...], agent_info_list: List[AgentInfoProto]
+    obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
 ) -> np.ndarray:
     if len(agent_info_list) == 0:
         return np.zeros((0, shape[0]), dtype=np.float32)
@@ -104,12 +107,13 @@ def _process_vector_observation(
 
 @timed
 def batched_step_result_from_proto(
-    agent_info_list: List[AgentInfoProto], group_spec: AgentGroupSpec
+    agent_info_list: Collection[AgentInfoProto], group_spec: AgentGroupSpec
 ) -> BatchedStepResult:
     obs_list: List[np.ndarray] = []
     for obs_index, obs_shape in enumerate(group_spec.observation_shapes):
         is_visual = len(obs_shape) == 3
         if is_visual:
+            obs_shape = cast(Tuple[int, int, int], obs_shape)
             obs_list += [
                 _process_visual_observation(obs_index, obs_shape, agent_info_list)
             ]
diff --git a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
index b47fd97401..b9edc06055 100644
--- a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py
@@ -1,6 +1,10 @@
 from typing import List, Tuple
 from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
-from mlagents.envs.communicator_objects.observation_pb2 import ObservationProto
+from mlagents.envs.communicator_objects.observation_pb2 import (
+    ObservationProto,
+    NONE,
+    PNG,
+)
 from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
 import numpy as np
 from mlagents.envs.base_env import AgentGroupSpec, ActionType
@@ -30,7 +34,7 @@ def generate_list_agent_proto(
         for obs_index in range(len(shape)):
             obs_proto = ObservationProto()
             obs_proto.shape.extend(list(shape[obs_index]))
-            obs_proto.compression_type = 0
+            obs_proto.compression_type = NONE
             obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index]))
             obs_proto_list.append(obs_proto)
         ap.observations.extend(obs_proto_list)
@@ -49,7 +53,7 @@ def generate_compressed_data(in_array: np.ndarray) -> bytes:
 def generate_compressed_proto_obs(in_array: np.ndarray) -> ObservationProto:
     obs_proto = ObservationProto()
     obs_proto.compressed_data = generate_compressed_data(in_array)
-    obs_proto.compression_type = 1
+    obs_proto.compression_type = PNG
     obs_proto.shape.extend(in_array.shape)
     return obs_proto
 

From 36e8ec83f312b7e3afed6ec8aea73f7f24b1cb31 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Wed, 11 Dec 2019 11:01:07 -0800
Subject: [PATCH 11/16] fix pylint bugs

---
 ml-agents-envs/mlagents/envs/rpc_utils.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py
index 257c83da98..105d8bf1a9 100644
--- a/ml-agents-envs/mlagents/envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents/envs/rpc_utils.py
@@ -60,7 +60,9 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
 def _process_visual_observation(
     obs_index: int,
     shape: Tuple[int, int, int],
-    agent_info_list: Collection[AgentInfoProto],
+    agent_info_list: Collection[
+        AgentInfoProto
+    ],  # pylint: disable=unsubscriptable-object
 ) -> np.ndarray:
     if len(agent_info_list) == 0:
         return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)
@@ -75,7 +77,11 @@ def _process_visual_observation(
 
 @timed
 def _process_vector_observation(
-    obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
+    obs_index: int,
+    shape: Tuple[int, ...],
+    agent_info_list: Collection[
+        AgentInfoProto
+    ],  # pylint: disable=unsubscriptable-object
 ) -> np.ndarray:
     if len(agent_info_list) == 0:
         return np.zeros((0, shape[0]), dtype=np.float32)
@@ -107,7 +113,10 @@ def _process_vector_observation(
 
 @timed
 def batched_step_result_from_proto(
-    agent_info_list: Collection[AgentInfoProto], group_spec: AgentGroupSpec
+    agent_info_list: Collection[
+        AgentInfoProto
+    ],  # pylint: disable=unsubscriptable-object
+    group_spec: AgentGroupSpec,
 ) -> BatchedStepResult:
     obs_list: List[np.ndarray] = []
     for obs_index, obs_shape in enumerate(group_spec.observation_shapes):

From 901b791a10af284abd2c2841050df2dbed0f4fa0 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Wed, 11 Dec 2019 14:13:54 -0800
Subject: [PATCH 12/16] agent_id as str or int

---
 ml-agents/mlagents/trainers/agent_processor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
index fb06d64457..d2d13644de 100644
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Union
 
 from mlagents.trainers.buffer import AgentBuffer, BufferException
 
@@ -28,7 +28,7 @@ def reset_local_buffers(self) -> None:
     def append_to_update_buffer(
         self,
         update_buffer: AgentBuffer,
-        agent_id: int,
+        agent_id: Union[int, str],
         key_list: List[str] = None,
         batch_size: int = None,
         training_length: int = None,

From 0682eb43a9745c4c3dcc57ed76436d33acfe0cca Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Wed, 11 Dec 2019 14:52:10 -0800
Subject: [PATCH 13/16] fix sac and ppo trainers

---
 ml-agents/mlagents/trainers/demo_loader.py |  2 ++
 ml-agents/mlagents/trainers/ppo/trainer.py |  7 ++++---
 ml-agents/mlagents/trainers/sac/trainer.py | 10 ++++++----
 ml-agents/mlagents/trainers/tf_policy.py   |  2 ++
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py
index fa2b68dfc0..597d010759 100644
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
@@ -153,4 +153,6 @@ def load_demonstration(
                         break
                     pos += next_pos
                 obs_decoded += 1
+    if not brain_params:
+        raise RuntimeError(f"No BrainParameters found in demonstration file at {file_path}.")
     return brain_params, info_action_pairs, total_expected
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 45dd45c6ea..8b7a152408 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -67,13 +67,14 @@ def __init__(
         self.check_param_keys()
 
         if multi_gpu and len(get_devices()) > 1:
-            self.policy = MultiGpuPPOPolicy(
+            self.ppo_policy = MultiGpuPPOPolicy(
                 seed, brain, trainer_parameters, self.is_training, load
             )
         else:
-            self.policy = PPOPolicy(
+            self.ppo_policy = PPOPolicy(
                 seed, brain, trainer_parameters, self.is_training, load
             )
+        self.policy = self.ppo_policy
 
         for _reward_signal in self.policy.reward_signals.keys():
             self.collected_rewards[_reward_signal] = {}
@@ -104,7 +105,7 @@ def process_experiences(
                 else:
                     bootstrapping_info = next_info
                     idx = l
-                value_next = self.policy.get_value_estimates(
+                value_next = self.ppo_policy.get_value_estimates(
                     bootstrapping_info,
                     idx,
                     next_info.local_done[l] and not next_info.max_reached[l],
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index 727798c1ea..8cda361a9e 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -81,7 +81,8 @@ def __init__(
             if "save_replay_buffer" in trainer_parameters
             else False
         )
-        self.policy = SACPolicy(seed, brain, trainer_parameters, self.is_training, load)
+        self.sac_policy = SACPolicy(seed, brain, trainer_parameters, self.is_training, load)
+        self.policy = self.sac_policy
 
         # Load the replay buffer if load
         if load and self.checkpoint_replay_buffer:
@@ -293,8 +294,9 @@ def update_sac_policy(self) -> None:
         for stat, stat_list in batch_update_stats.items():
             self.stats[stat].append(np.mean(stat_list))
 
-        if self.policy.bc_module:
-            update_stats = self.policy.bc_module.update()
+        bc_module = self.sac_policy.bc_module
+        if bc_module:
+            update_stats = bc_module.update()
             for stat, val in update_stats.items():
                 self.stats[stat].append(val)
 
@@ -325,7 +327,7 @@ def update_reward_signals(self) -> None:
                         self.trainer_parameters["batch_size"],
                         sequence_length=self.policy.sequence_length,
                     )
-            update_stats = self.policy.update_reward_signals(
+            update_stats = self.sac_policy.update_reward_signals(
                 reward_signal_minibatches, n_sequences
             )
             for stat_name, value in update_stats.items():
diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py
index 3247c94959..db03908736 100644
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
@@ -11,6 +11,7 @@
 from tensorflow.python.framework import graph_util
 from mlagents.trainers import tensorflow_to_barracuda as tf2bc
 from mlagents.trainers.brain import BrainInfo
+from mlagents.trainers.components.reward_signals import RewardSignal
 
 
 logger = logging.getLogger("mlagents.trainers")
@@ -57,6 +58,7 @@ def __init__(self, seed, brain, trainer_parameters):
         self.brain = brain
         self.use_recurrent = trainer_parameters["use_recurrent"]
         self.memory_dict: Dict[str, np.ndarray] = {}
+        self.reward_signals: Dict[str, RewardSignal] = {}
         self.num_branches = len(self.brain.vector_action_space_size)
         self.previous_action_dict: Dict[str, np.array] = {}
         self.normalize = trainer_parameters.get("normalize", False)

From 1647902942e5679e228a727f194badaea220131a Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Wed, 11 Dec 2019 15:01:53 -0800
Subject: [PATCH 14/16] remove Optionals

---
 .pre-commit-config.yaml                               | 6 +++++-
 ml-agents/mlagents/trainers/action_info.py            | 4 ++--
 ml-agents/mlagents/trainers/demo_loader.py            | 4 +++-
 ml-agents/mlagents/trainers/env_manager.py            | 8 ++++----
 ml-agents/mlagents/trainers/sac/trainer.py            | 4 +++-
 ml-agents/mlagents/trainers/simple_env_manager.py     | 4 ++--
 ml-agents/mlagents/trainers/subprocess_env_manager.py | 4 ++--
 ml-agents/mlagents/trainers/tf_policy.py              | 2 +-
 8 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index be30007001..f3faf63b3a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,11 +11,15 @@ repos:
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
     rev: v0.750
+    # Currently mypy may assert after logging one message. To get all the messages at once, change repo and rev to
+    #   repo: https://github.com/chriselion/mypy
+    #   rev: 3d0b6164a9487a6c5cf9d144110b86600fd85e25
+    # This is a fork with the assert disabled, although precommit has trouble installing it sometimes.
     hooks:
     -   id: mypy
         name: mypy-ml-agents
         files: "ml-agents/.*"
-        args: [--ignore-missing-imports, --disallow-incomplete-defs]
+        args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages]
     -   id: mypy
         name: mypy-ml-agents-envs
         files: "ml-agents-envs/.*"
diff --git a/ml-agents/mlagents/trainers/action_info.py b/ml-agents/mlagents/trainers/action_info.py
index 782223648f..694b600689 100644
--- a/ml-agents/mlagents/trainers/action_info.py
+++ b/ml-agents/mlagents/trainers/action_info.py
@@ -1,6 +1,6 @@
-from typing import NamedTuple, Any, Dict, Optional
+from typing import NamedTuple, Any, Dict
 
-ActionInfoOutputs = Optional[Dict[str, Any]]
+ActionInfoOutputs = Dict[str, Any]
 
 
 class ActionInfo(NamedTuple):
diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py
index 597d010759..6722503380 100644
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
@@ -154,5 +154,7 @@ def load_demonstration(
                     pos += next_pos
                 obs_decoded += 1
     if not brain_params:
-        raise RuntimeError(f"No BrainParameters found in demonstration file at {file_path}.")
+        raise RuntimeError(
+            f"No BrainParameters found in demonstration file at {file_path}."
+        )
     return brain_params, info_action_pairs, total_expected
diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py
index 7a28aa2598..5190841e5d 100644
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
@@ -1,18 +1,18 @@
 from abc import ABC, abstractmethod
-from typing import List, Dict, NamedTuple, Optional
+from typing import List, Dict, NamedTuple
 from mlagents.trainers.brain import AllBrainInfo, BrainParameters
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.action_info import ActionInfo
 
 
 class EnvironmentStep(NamedTuple):
-    previous_all_brain_info: Optional[AllBrainInfo]
+    previous_all_brain_info: AllBrainInfo
     current_all_brain_info: AllBrainInfo
-    brain_name_to_action_info: Optional[Dict[str, ActionInfo]]
+    brain_name_to_action_info: Dict[str, ActionInfo]
 
     def has_actions_for_brain(self, brain_name: str) -> bool:
         return (
-            self.brain_name_to_action_info is not None
+            bool(self.brain_name_to_action_info)
             and brain_name in self.brain_name_to_action_info
             and self.brain_name_to_action_info[brain_name].outputs is not None
         )
diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py
index 8cda361a9e..669aa87c47 100644
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
@@ -81,7 +81,9 @@ def __init__(
             if "save_replay_buffer" in trainer_parameters
             else False
         )
-        self.sac_policy = SACPolicy(seed, brain, trainer_parameters, self.is_training, load)
+        self.sac_policy = SACPolicy(
+            seed, brain, trainer_parameters, self.is_training, load
+        )
         self.policy = self.sac_policy
 
         # Load the replay buffer if load
diff --git a/ml-agents/mlagents/trainers/simple_env_manager.py b/ml-agents/mlagents/trainers/simple_env_manager.py
index aef44c41f7..51c3b3376e 100644
--- a/ml-agents/mlagents/trainers/simple_env_manager.py
+++ b/ml-agents/mlagents/trainers/simple_env_manager.py
@@ -22,7 +22,7 @@ def __init__(self, env: BaseEnv, float_prop_channel: FloatPropertiesChannel):
         super().__init__()
         self.shared_float_properties = float_prop_channel
         self.env = env
-        self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
+        self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {})
         self.previous_all_action_info: Dict[str, ActionInfo] = {}
 
     def step(self) -> List[EnvironmentStep]:
@@ -51,7 +51,7 @@ def reset(
                 self.shared_float_properties.set_property(k, v)
         self.env.reset()
         all_brain_info = self._generate_all_brain_info()
-        self.previous_step = EnvironmentStep(None, all_brain_info, None)
+        self.previous_step = EnvironmentStep({}, all_brain_info, {})
         return [self.previous_step]
 
     @property
diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py
index 9347a823d9..ca93feac08 100644
--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py
@@ -53,7 +53,7 @@ def __init__(self, process: Process, worker_id: int, conn: Connection):
         self.process = process
         self.worker_id = worker_id
         self.conn = conn
-        self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
+        self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {})
         self.previous_all_action_info: Dict[str, ActionInfo] = {}
         self.waiting = False
 
@@ -253,7 +253,7 @@ def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
             ew.send("reset", config)
         # Next (synchronously) collect the reset observations from each worker in sequence
         for ew in self.env_workers:
-            ew.previous_step = EnvironmentStep(None, ew.recv().payload, None)
+            ew.previous_step = EnvironmentStep({}, ew.recv().payload, {})
         return list(map(lambda ew: ew.previous_step, self.env_workers))
 
     @property
diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py
index db03908736..7505f8b2cb 100644
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
@@ -128,7 +128,7 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo:
         to be passed to add experiences
         """
         if len(brain_info.agents) == 0:
-            return ActionInfo([], [], None)
+            return ActionInfo([], [], {})
 
         agents_done = [
             agent

From 86e3217d6c44133ac8a1671114f704cf72ebdc24 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Wed, 11 Dec 2019 15:15:34 -0800
Subject: [PATCH 15/16] fix unit tests

---
 ml-agents/mlagents/trainers/tests/test_policy.py | 2 +-
 ml-agents/mlagents/trainers/tf_policy.py         | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/test_policy.py b/ml-agents/mlagents/trainers/tests/test_policy.py
index f7b63234b4..cd17d8bfca 100644
--- a/ml-agents/mlagents/trainers/tests/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_policy.py
@@ -20,7 +20,7 @@ def test_take_action_returns_empty_with_no_agents():
     policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
     no_agent_brain_info = BrainInfo([], [], [], agents=[])
     result = policy.get_action(no_agent_brain_info)
-    assert result == ActionInfo([], [], None)
+    assert result == ActionInfo([], [], {})
 
 
 def test_take_action_returns_nones_on_missing_values():
diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py
index 7505f8b2cb..aaa2998baa 100644
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
@@ -11,7 +11,6 @@
 from tensorflow.python.framework import graph_util
 from mlagents.trainers import tensorflow_to_barracuda as tf2bc
 from mlagents.trainers.brain import BrainInfo
-from mlagents.trainers.components.reward_signals import RewardSignal
 
 
 logger = logging.getLogger("mlagents.trainers")
@@ -58,7 +57,7 @@ def __init__(self, seed, brain, trainer_parameters):
         self.brain = brain
         self.use_recurrent = trainer_parameters["use_recurrent"]
         self.memory_dict: Dict[str, np.ndarray] = {}
-        self.reward_signals: Dict[str, RewardSignal] = {}
+        self.reward_signals: Dict[str, "RewardSignal"] = {}
         self.num_branches = len(self.brain.vector_action_space_size)
         self.previous_action_dict: Dict[str, np.array] = {}
         self.normalize = trainer_parameters.get("normalize", False)

From a02b1ce01bd981479c2110e317eb4ebf98a053f7 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Wed, 11 Dec 2019 15:20:21 -0800
Subject: [PATCH 16/16] simplify

---
 ml-agents/mlagents/trainers/env_manager.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py
index 5190841e5d..d00dfe07b5 100644
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
@@ -12,8 +12,7 @@ class EnvironmentStep(NamedTuple):
 
     def has_actions_for_brain(self, brain_name: str) -> bool:
         return (
-            bool(self.brain_name_to_action_info)
-            and brain_name in self.brain_name_to_action_info
+            brain_name in self.brain_name_to_action_info
             and self.brain_name_to_action_info[brain_name].outputs is not None
         )