From 8bd70f809ae0739843322220424cde2a6be58313 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Sat, 7 Dec 2019 15:04:15 -0800 Subject: [PATCH 01/16] use forked mypy, turn on namespace packages --- .pre-commit-config.yaml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7f77c2d7f9..265c24bca1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,23 +9,28 @@ repos: .*_pb2_grpc.py )$ -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.740 +- repo: https://github.com/chriselion/mypy + # This is a fork of mypy from the 0.750 release with two changes: + # * adds its own pre-commit hook definition + # * disables an assert that fires when a file is processed twice + # Disabling the assert is necessary to enable --namespace-packages + # which we need in turn to get some types recognized across files. + rev: 3d0b6164a9487a6c5cf9d144110b86600fd85e25 hooks: - id: mypy name: mypy-ml-agents files: "ml-agents/.*" - args: [--ignore-missing-imports, --disallow-incomplete-defs] + args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages] - id: mypy name: mypy-ml-agents-envs files: "ml-agents-envs/.*" # Exclude protobuf files and don't follow them when imported exclude: ".*_pb2.py" - args: [--ignore-missing-imports, --disallow-incomplete-defs] + args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages] - id: mypy name: mypy-gym-unity files: "gym-unity/.*" - args: [--ignore-missing-imports, --disallow-incomplete-defs] + args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 From 5066ec6d474ff8c927d98bb546ebdbbe689fd792 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Sat, 7 Dec 2019 15:28:44 -0800 Subject: [PATCH 02/16] fix mypy errors in mlagents --- ml-agents/mlagents/trainers/demo_loader.py | 5 +++-- ml-agents/mlagents/trainers/models.py | 4 ++++ ml-agents/mlagents/trainers/sac/models.py | 6 ++---- ml-agents/mlagents/trainers/trainer.py | 7 ++++++- ml-agents/mlagents/trainers/trainer_util.py | 2 +- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py index ad2833986c..46ccadfc1a 100644 --- a/ml-agents/mlagents/trainers/demo_loader.py +++ b/ml-agents/mlagents/trainers/demo_loader.py @@ -27,6 +27,7 @@ def make_demo_buffer( sequence_length: int, ) -> AgentBuffer: # Create and populate buffer using experiences + fake_agent_id = "0" demo_process_buffer = ProcessingBuffer() demo_buffer = AgentBuffer() for idx, experience in enumerate(pair_infos): @@ -64,11 +65,11 @@ def make_demo_buffer( demo_process_buffer[0]["prev_action"].append(previous_action) if next_brain_info.local_done[0]: demo_process_buffer.append_to_update_buffer( - demo_buffer, 0, batch_size=None, training_length=sequence_length + demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length ) demo_process_buffer.reset_local_buffers() demo_process_buffer.append_to_update_buffer( - demo_buffer, 0, batch_size=None, training_length=sequence_length + demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length ) return demo_buffer diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index 362878a58d..75746ee22c 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -88,6 +88,10 @@ def __init__( self.running_variance: Optional[tf.Variable] = None self.update_normalization: Optional[tf.Operation] = None self.value: Optional[tf.Tensor] = None + self.all_log_probs: Optional[tf.Tensor] = None + self.output: Optional[tf.Tensor] = None + self.selected_actions: Optional[tf.Tensor] = None + self.action_holder: Optional[tf.Tensor] = None @staticmethod def create_global_steps(): diff --git a/ml-agents/mlagents/trainers/sac/models.py b/ml-agents/mlagents/trainers/sac/models.py index 1e2911d8e8..4a0adc1fc4 100644 --- a/ml-agents/mlagents/trainers/sac/models.py +++ b/ml-agents/mlagents/trainers/sac/models.py @@ -57,18 +57,16 @@ def __init__( self.q2_memory_in: Optional[tf.Tensor] = None self.q1_memory_out: Optional[tf.Tensor] = None self.q2_memory_out: Optional[tf.Tensor] = None - self.action_holder: Optional[tf.Tensor] = None + #self.action_holder: Optional[tf.Tensor] = None self.prev_action: Optional[tf.Tensor] = None self.action_masks: Optional[tf.Tensor] = None self.external_action_in: Optional[tf.Tensor] = None self.log_sigma_sq: Optional[tf.Tensor] = None self.entropy: Optional[tf.Tensor] = None self.deterministic_output: Optional[tf.Tensor] = None - self.all_log_probs: Optional[tf.Tensor] = None self.normalized_logprobs: Optional[tf.Tensor] = None self.action_probs: Optional[tf.Tensor] = None - self.selected_actions: Optional[tf.Tensor] = None - self.output: Optional[tf.Tensor] = None + #self.selected_actions: Optional[tf.Tensor] = None self.output_oh: Optional[tf.Tensor] = None self.output_pre: Optional[tf.Tensor] = None diff --git a/ml-agents/mlagents/trainers/trainer.py b/ml-agents/mlagents/trainers/trainer.py index 744b06e4fc..88d0c0bd38 100644 --- a/ml-agents/mlagents/trainers/trainer.py +++ b/ml-agents/mlagents/trainers/trainer.py @@ -60,7 +60,7 @@ def __init__( ) self.summary_writer = tf.summary.FileWriter(self.summary_path) self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap) - self.policy: TFPolicy = None + self.policy: TFPolicy = None # type: ignore # this will always get set self.step: int = 0 def check_param_keys(self): @@ -282,3 +282,8 @@ def update_policy(self): Uses demonstration_buffer to update model. """ raise UnityTrainerException("The update_model method was not implemented.") + + def clear_update_buffer(self) -> None: + raise UnityTrainerException( + "The clear_update_buffer method was not implemented." + ) diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 7539a8acba..ed2b98985d 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -96,7 +96,7 @@ def initialize_trainer( _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) - trainer = None + trainer: Trainer = None # type: ignore # will be set to one of these, or raise if trainer_parameters["trainer"] == "offline_bc": trainer = OfflineBCTrainer( brain_parameters, trainer_parameters, train_model, load_model, seed, run_id From 4a42120aedccbabc393b9e1e0799b0d7fd37b473 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Sat, 7 Dec 2019 16:14:50 -0800 Subject: [PATCH 03/16] fix mypy errors from mlagents-envs --- ml-agents-envs/mlagents/envs/brain.py | 8 ++++---- ml-agents-envs/mlagents/envs/environment.py | 9 +++++---- .../mlagents/envs/mock_communicator.py | 3 ++- .../engine_configuration_channel.py | 2 +- .../side_channel/float_properties_channel.py | 19 +++++++++++++------ .../envs/side_channel/raw_bytes_channel.py | 2 +- .../envs/side_channel/side_channel.py | 2 +- .../mlagents/envs/simple_env_manager.py | 5 +---- 8 files changed, 28 insertions(+), 22 deletions(-) diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py index 21e349fe28..ebf136c880 100644 --- a/ml-agents-envs/mlagents/envs/brain.py +++ b/ml-agents-envs/mlagents/envs/brain.py @@ -6,7 +6,7 @@ from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto from mlagents.envs.communicator_objects.observation_pb2 import ObservationProto from mlagents.envs.timers import hierarchical_timer, timed -from typing import Dict, List, NamedTuple, Optional +from typing import Dict, List, NamedTuple, Optional, Collection from PIL import Image logger = logging.getLogger("mlagents.envs") @@ -160,7 +160,7 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray: @timed def from_agent_proto( worker_id: int, - agent_info_list: List[AgentInfoProto], + agent_info_list: Collection[AgentInfoProto], brain_params: BrainParameters, ) -> "BrainInfo": """ @@ -202,7 +202,7 @@ def from_agent_proto( @staticmethod def _process_visual_observations( - brain_params: BrainParameters, agent_info_list: List[AgentInfoProto] + brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto] ) -> List[np.ndarray]: visual_observation_protos: List[List[ObservationProto]] = [] @@ -231,7 +231,7 @@ def _process_visual_observations( @staticmethod def _process_vector_observations( - brain_params: BrainParameters, agent_info_list: List[AgentInfoProto] + brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto] ) -> np.ndarray: if len(agent_info_list) == 0: vector_obs = np.zeros( diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py index 20954931a1..510887aed6 100644 --- a/ml-agents-envs/mlagents/envs/environment.py +++ b/ml-agents-envs/mlagents/envs/environment.py @@ -9,7 +9,7 @@ from mlagents.envs.side_channel.side_channel import SideChannel from mlagents.envs.base_unity_environment import BaseUnityEnvironment from mlagents.envs.timers import timed, hierarchical_timer -from .brain import AllBrainInfo, BrainInfo, BrainParameters +from mlagents.envs.brain import AllBrainInfo, BrainInfo, BrainParameters from .exception import ( UnityEnvironmentException, UnityCommunicationException, @@ -17,6 +17,7 @@ UnityTimeOutException, ) +from mlagents.envs.communicator_objects.command_pb2 import CommandProto from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto from mlagents.envs.communicator_objects.agent_action_pb2 import AgentActionProto @@ -494,7 +495,7 @@ def _get_state(self, output: UnityRLOutputProto) -> AllBrainInfo: @staticmethod def _parse_side_channel_message( - side_channels: Dict[int, SideChannel], data: bytearray + side_channels: Dict[int, SideChannel], data: bytes ) -> None: offset = 0 while offset < len(data): @@ -566,13 +567,13 @@ def _generate_step_input( if value[b] is not None: action.value = float(value[b][i]) rl_in.agent_actions[b].value.extend([action]) - rl_in.command = 0 + rl_in.command = CommandProto.STEP rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels)) return self.wrap_unity_input(rl_in) def _generate_reset_input(self) -> UnityInputProto: rl_in = UnityRLInputProto() - rl_in.command = 1 + rl_in.command = CommandProto.RESET rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels)) return self.wrap_unity_input(rl_in) diff --git a/ml-agents-envs/mlagents/envs/mock_communicator.py b/ml-agents-envs/mlagents/envs/mock_communicator.py index 1f614952df..69cb6faedb 100755 --- a/ml-agents-envs/mlagents/envs/mock_communicator.py +++ b/ml-agents-envs/mlagents/envs/mock_communicator.py @@ -13,6 +13,7 @@ NONE as COMPRESSION_TYPE_NONE, PNG as COMPRESSION_TYPE_PNG, ) +from mlagents.envs.communicator_objects.space_type_pb2 import SpaceTypeProto class MockCommunicator(Communicator): @@ -43,7 +44,7 @@ def initialize(self, inputs: UnityInputProto) -> UnityOutputProto: bp = BrainParametersProto( vector_action_size=[2], vector_action_descriptions=["", ""], - vector_action_space_type=int(not self.is_discrete), + vector_action_space_type=SpaceTypeProto.discrete if self.is_discrete else SpaceTypeProto.continuous, brain_name=self.brain_name, is_training=True, ) diff --git a/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py b/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py index c250f4cd0e..49388a6f9d 100644 --- a/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py +++ b/ml-agents-envs/mlagents/envs/side_channel/engine_configuration_channel.py @@ -31,7 +31,7 @@ class EngineConfigurationChannel(SideChannel): def channel_type(self) -> int: return SideChannelType.EngineSettings - def on_message_received(self, data: bytearray) -> None: + def on_message_received(self, data: bytes) -> None: """ Is called by the environment to the side channel. Can be called multiple times per step if multiple messages are meant for that diff --git a/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py b/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py index 4628b631ca..250c5d1dde 100644 --- a/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py +++ b/ml-agents-envs/mlagents/envs/side_channel/float_properties_channel.py @@ -1,6 +1,6 @@ from mlagents.envs.side_channel.side_channel import SideChannel, SideChannelType import struct -from typing import Tuple, Optional, List +from typing import Dict, Tuple, Optional, List class FloatPropertiesChannel(SideChannel): @@ -10,15 +10,15 @@ class FloatPropertiesChannel(SideChannel): set_property, get_property and list_properties. """ - def __init__(self): - self._float_properties = {} + def __init__(self) -> None: + self._float_properties: Dict[str, float] = {} super().__init__() @property def channel_type(self) -> int: return SideChannelType.FloatProperties - def on_message_received(self, data: bytearray) -> None: + def on_message_received(self, data: bytes) -> None: """ Is called by the environment to the side channel. Can be called multiple times per step if multiple messages are meant for that @@ -52,7 +52,14 @@ def list_properties(self) -> List[str]: Returns a list of all the string identifiers of the properties currently present in the Unity Environment. """ - return self._float_properties.keys() + return list(self._float_properties.keys()) + + def get_property_dict(self) -> Dict[str, float]: + """ + Returns a copy of the float properties. + :return: + """ + return dict(self._float_properties) @staticmethod def serialize_float_prop(key: str, value: float) -> bytearray: @@ -64,7 +71,7 @@ def serialize_float_prop(key: str, value: float) -> bytearray: return result @staticmethod - def deserialize_float_prop(data: bytearray) -> Tuple[str, float]: + def deserialize_float_prop(data: bytes) -> Tuple[str, float]: offset = 0 encoded_key_len = struct.unpack_from(" int: return SideChannelType.RawBytesChannelStart + self._channel_id - def on_message_received(self, data: bytearray) -> None: + def on_message_received(self, data: bytes) -> None: """ Is called by the environment to the side channel. Can be called multiple times per step if multiple messages are meant for that diff --git a/ml-agents-envs/mlagents/envs/side_channel/side_channel.py b/ml-agents-envs/mlagents/envs/side_channel/side_channel.py index 4a1c611612..d50257b8ed 100644 --- a/ml-agents-envs/mlagents/envs/side_channel/side_channel.py +++ b/ml-agents-envs/mlagents/envs/side_channel/side_channel.py @@ -33,7 +33,7 @@ def queue_message_to_send(self, data: bytearray) -> None: self.message_queue.append(data) @abstractmethod - def on_message_received(self, data: bytearray) -> None: + def on_message_received(self, data: bytes) -> None: """ Is called by the environment to the side channel. Can be called multiple times per step if multiple messages are meant for that diff --git a/ml-agents-envs/mlagents/envs/simple_env_manager.py b/ml-agents-envs/mlagents/envs/simple_env_manager.py index 248416ea59..4d197bd875 100644 --- a/ml-agents-envs/mlagents/envs/simple_env_manager.py +++ b/ml-agents-envs/mlagents/envs/simple_env_manager.py @@ -60,10 +60,7 @@ def external_brains(self) -> Dict[str, BrainParameters]: @property def get_properties(self) -> Dict[str, float]: - reset_params = {} - for k in self.shared_float_properties.list_properties(): - reset_params[k] = self.shared_float_properties.get_property(k) - return reset_params + return self.shared_float_properties.get_property_dict() def close(self): self.env.close() From 593982376228ebdf755bca0d3684ef94d49c0a7a Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Sat, 7 Dec 2019 16:25:26 -0800 Subject: [PATCH 04/16] fix unit tests --- ml-agents/mlagents/trainers/agent_processor.py | 2 +- ml-agents/mlagents/trainers/demo_loader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py index 49479cf234..fb06d64457 100644 --- a/ml-agents/mlagents/trainers/agent_processor.py +++ b/ml-agents/mlagents/trainers/agent_processor.py @@ -28,7 +28,7 @@ def reset_local_buffers(self) -> None: def append_to_update_buffer( self, update_buffer: AgentBuffer, - agent_id: str, + agent_id: int, key_list: List[str] = None, batch_size: int = None, training_length: int = None, diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py index 46ccadfc1a..a09c5d686a 100644 --- a/ml-agents/mlagents/trainers/demo_loader.py +++ b/ml-agents/mlagents/trainers/demo_loader.py @@ -27,7 +27,7 @@ def make_demo_buffer( sequence_length: int, ) -> AgentBuffer: # Create and populate buffer using experiences - fake_agent_id = "0" + agent_id: int = 0 demo_process_buffer = ProcessingBuffer() demo_buffer = AgentBuffer() for idx, experience in enumerate(pair_infos): From 821639efce1073a39d307e46c457ec4cda00a146 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Sat, 7 Dec 2019 16:25:33 -0800 Subject: [PATCH 05/16] fix unit tests --- ml-agents/mlagents/trainers/demo_loader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py index a09c5d686a..ad2833986c 100644 --- a/ml-agents/mlagents/trainers/demo_loader.py +++ b/ml-agents/mlagents/trainers/demo_loader.py @@ -27,7 +27,6 @@ def make_demo_buffer( sequence_length: int, ) -> AgentBuffer: # Create and populate buffer using experiences - agent_id: int = 0 demo_process_buffer = ProcessingBuffer() demo_buffer = AgentBuffer() for idx, experience in enumerate(pair_infos): @@ -65,11 +64,11 @@ def make_demo_buffer( demo_process_buffer[0]["prev_action"].append(previous_action) if next_brain_info.local_done[0]: demo_process_buffer.append_to_update_buffer( - demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length + demo_buffer, 0, batch_size=None, training_length=sequence_length ) demo_process_buffer.reset_local_buffers() demo_process_buffer.append_to_update_buffer( - demo_buffer, fake_agent_id, batch_size=None, training_length=sequence_length + demo_buffer, 0, batch_size=None, training_length=sequence_length ) return demo_buffer From ea216c713c54c4df3b9235ab63b249eba8ccf2d0 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Sat, 7 Dec 2019 16:53:43 -0800 Subject: [PATCH 06/16] cleanup --- .pylintrc | 6 +++++- ml-agents-envs/mlagents/envs/environment.py | 6 +++--- ml-agents-envs/mlagents/envs/mock_communicator.py | 4 ++-- ml-agents/mlagents/trainers/sac/models.py | 2 -- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.pylintrc b/.pylintrc index 11d1f0b75c..748cce0088 100644 --- a/.pylintrc +++ b/.pylintrc @@ -40,4 +40,8 @@ disable = # E0401: Unable to import... # E0611: No name '...' in module '...' # need to look into these, probably namespace packages - E0401, E0611 + E0401, E0611, + + # This was causing false positives + # Appears to be https://github.com/PyCQA/pylint/issues/2981 + W0201 diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py index 510887aed6..4c3bc938be 100644 --- a/ml-agents-envs/mlagents/envs/environment.py +++ b/ml-agents-envs/mlagents/envs/environment.py @@ -17,7 +17,7 @@ UnityTimeOutException, ) -from mlagents.envs.communicator_objects.command_pb2 import CommandProto +from mlagents.envs.communicator_objects.command_pb2 import STEP, RESET from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto from mlagents.envs.communicator_objects.agent_action_pb2 import AgentActionProto @@ -567,13 +567,13 @@ def _generate_step_input( if value[b] is not None: action.value = float(value[b][i]) rl_in.agent_actions[b].value.extend([action]) - rl_in.command = CommandProto.STEP + rl_in.command = STEP rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels)) return self.wrap_unity_input(rl_in) def _generate_reset_input(self) -> UnityInputProto: rl_in = UnityRLInputProto() - rl_in.command = CommandProto.RESET + rl_in.command = RESET rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels)) return self.wrap_unity_input(rl_in) diff --git a/ml-agents-envs/mlagents/envs/mock_communicator.py b/ml-agents-envs/mlagents/envs/mock_communicator.py index 69cb6faedb..d0abd9562a 100755 --- a/ml-agents-envs/mlagents/envs/mock_communicator.py +++ b/ml-agents-envs/mlagents/envs/mock_communicator.py @@ -13,7 +13,7 @@ NONE as COMPRESSION_TYPE_NONE, PNG as COMPRESSION_TYPE_PNG, ) -from mlagents.envs.communicator_objects.space_type_pb2 import SpaceTypeProto +from mlagents.envs.communicator_objects.space_type_pb2 import discrete, continuous class MockCommunicator(Communicator): @@ -44,7 +44,7 @@ def initialize(self, inputs: UnityInputProto) -> UnityOutputProto: bp = BrainParametersProto( vector_action_size=[2], vector_action_descriptions=["", ""], - vector_action_space_type=SpaceTypeProto.discrete if self.is_discrete else SpaceTypeProto.continuous, + vector_action_space_type=discrete if self.is_discrete else continuous, brain_name=self.brain_name, is_training=True, ) diff --git a/ml-agents/mlagents/trainers/sac/models.py b/ml-agents/mlagents/trainers/sac/models.py index 4a0adc1fc4..7336b50940 100644 --- a/ml-agents/mlagents/trainers/sac/models.py +++ b/ml-agents/mlagents/trainers/sac/models.py @@ -57,7 +57,6 @@ def __init__( self.q2_memory_in: Optional[tf.Tensor] = None self.q1_memory_out: Optional[tf.Tensor] = None self.q2_memory_out: Optional[tf.Tensor] = None - #self.action_holder: Optional[tf.Tensor] = None self.prev_action: Optional[tf.Tensor] = None self.action_masks: Optional[tf.Tensor] = None self.external_action_in: Optional[tf.Tensor] = None @@ -66,7 +65,6 @@ def __init__( self.deterministic_output: Optional[tf.Tensor] = None self.normalized_logprobs: Optional[tf.Tensor] = None self.action_probs: Optional[tf.Tensor] = None - #self.selected_actions: Optional[tf.Tensor] = None self.output_oh: Optional[tf.Tensor] = None self.output_pre: Optional[tf.Tensor] = None From b5ebe677777782af42aa8c594faa837f718bd7d9 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Sat, 7 Dec 2019 18:21:42 -0800 Subject: [PATCH 07/16] fix pylint false positives --- .pre-commit-config.yaml | 9 ++------- .pylintrc | 3 ++- ml-agents-envs/mlagents/envs/brain.py | 14 +++++++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 265c24bca1..3d956fe9f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,13 +9,8 @@ repos: .*_pb2_grpc.py )$ -- repo: https://github.com/chriselion/mypy - # This is a fork of mypy from the 0.750 release with two changes: - # * adds its own pre-commit hook definition - # * disables an assert that fires when a file is processed twice - # Disabling the assert is necessary to enable --namespace-packages - # which we need in turn to get some types recognized across files. - rev: 3d0b6164a9487a6c5cf9d144110b86600fd85e25 +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.750 hooks: - id: mypy name: mypy-ml-agents diff --git a/.pylintrc b/.pylintrc index 748cce0088..6aa891290a 100644 --- a/.pylintrc +++ b/.pylintrc @@ -44,4 +44,5 @@ disable = # This was causing false positives # Appears to be https://github.com/PyCQA/pylint/issues/2981 - W0201 + W0201, + diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py index ebf136c880..394b8c606d 100644 --- a/ml-agents-envs/mlagents/envs/brain.py +++ b/ml-agents-envs/mlagents/envs/brain.py @@ -160,7 +160,9 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray: @timed def from_agent_proto( worker_id: int, - agent_info_list: Collection[AgentInfoProto], + agent_info_list: Collection[ + AgentInfoProto + ], # pylint: disable=unsubscriptable-object brain_params: BrainParameters, ) -> "BrainInfo": """ @@ -202,7 +204,10 @@ def from_agent_proto( @staticmethod def _process_visual_observations( - brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto] + brain_params: BrainParameters, + agent_info_list: Collection[ + AgentInfoProto + ], # pylint: disable=unsubscriptable-object ) -> List[np.ndarray]: visual_observation_protos: List[List[ObservationProto]] = [] @@ -231,7 +236,10 @@ def _process_visual_observations( @staticmethod def _process_vector_observations( - brain_params: BrainParameters, agent_info_list: Collection[AgentInfoProto] + brain_params: BrainParameters, + agent_info_list: Collection[ + AgentInfoProto + ], # pylint: disable=unsubscriptable-object ) -> np.ndarray: if len(agent_info_list) == 0: vector_obs = np.zeros( From f4af166f7559e4bbbafdae4845ff5416f5656f09 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 9 Dec 2019 15:05:14 -0800 Subject: [PATCH 08/16] Trainer.advance() --- ml-agents-envs/mlagents/envs/subprocess_env_manager.py | 5 +---- ml-agents-envs/mlagents/envs/tests/test_side_channel.py | 3 +++ ml-agents/mlagents/trainers/rl_trainer.py | 6 ++++++ ml-agents/mlagents/trainers/trainer.py | 6 ++---- ml-agents/mlagents/trainers/trainer_controller.py | 5 ++++- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py index 930559b9a7..69cbccd2b4 100644 --- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py +++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py @@ -121,10 +121,7 @@ def _send_response(cmd_name, payload): elif cmd.name == "external_brains": _send_response("external_brains", env.external_brains) elif cmd.name == "get_properties": - reset_params = {} - for k in shared_float_properties.list_properties(): - reset_params[k] = shared_float_properties.get_property(k) - + reset_params = shared_float_properties.get_property_dict() _send_response("get_properties", reset_params) elif cmd.name == "reset": for k, v in cmd.payload.items(): diff --git a/ml-agents-envs/mlagents/envs/tests/test_side_channel.py b/ml-agents-envs/mlagents/envs/tests/test_side_channel.py index 19ca3a0e4c..a475d5a0aa 100644 --- a/ml-agents-envs/mlagents/envs/tests/test_side_channel.py +++ b/ml-agents-envs/mlagents/envs/tests/test_side_channel.py @@ -69,6 +69,9 @@ def test_float_properties(): val = sender.get_property("prop1") assert val == 1.0 + assert receiver.get_property_dict() == {"prop1": 1.0, "prop2": 2.0} + assert receiver.get_property_dict() == sender.get_property_dict() + def test_raw_bytes(): sender = RawBytesChannel() diff --git a/ml-agents/mlagents/trainers/rl_trainer.py b/ml-agents/mlagents/trainers/rl_trainer.py index c86a9d2629..88d32f5864 100644 --- a/ml-agents/mlagents/trainers/rl_trainer.py +++ b/ml-agents/mlagents/trainers/rl_trainer.py @@ -264,3 +264,9 @@ def add_rewards_outputs( raise UnityTrainerException( "The add_rewards_outputs method was not implemented." ) + + def advance(self): + """ + Eventually logic from TrainerController.advance() will live here. + """ + self.clear_update_buffer() diff --git a/ml-agents/mlagents/trainers/trainer.py b/ml-agents/mlagents/trainers/trainer.py index 88d0c0bd38..e71860a828 100644 --- a/ml-agents/mlagents/trainers/trainer.py +++ b/ml-agents/mlagents/trainers/trainer.py @@ -283,7 +283,5 @@ def update_policy(self): """ raise UnityTrainerException("The update_model method was not implemented.") - def clear_update_buffer(self) -> None: - raise UnityTrainerException( - "The clear_update_buffer method was not implemented." - ) + def advance(self) -> None: + pass diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 572f5d39d1..4994f61f4a 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -295,5 +295,8 @@ def advance(self, env: EnvManager) -> int: env.set_policy(brain_name, trainer.policy) else: # Avoid memory leak during inference - trainer.clear_update_buffer() + # Eventually this whole block will take place in advance() + # But currently this only calls clear_update_buffer() in RLTrainer + # and nothing in the base class + trainer.advance() return len(new_step_infos) From b80ca34136cf198bed91b75ae41ce7edc3cba783 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 9 Dec 2019 15:44:23 -0800 Subject: [PATCH 09/16] fix unit test --- ml-agents/mlagents/trainers/tests/test_trainer_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index ff6ca5b9de..fee2c8fc90 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -213,4 +213,4 @@ def test_take_step_if_not_training(): new_step_info.previous_all_brain_info[brain_name], new_step_info.current_all_brain_info[brain_name], ) - trainer_mock.clear_update_buffer.assert_called_once() + trainer_mock.advance.assert_called_once() From 4e97a7cf3dfa28c73c5a59b7f11000c98415dce2 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 11 Dec 2019 10:51:18 -0800 Subject: [PATCH 10/16] remove --namespace-packages on mlagents, fix envs --- .pre-commit-config.yaml | 2 +- ml-agents-envs/mlagents/envs/base_env.py | 6 +++--- ml-agents-envs/mlagents/envs/environment.py | 4 ++-- ml-agents-envs/mlagents/envs/rpc_utils.py | 16 ++++++++++------ .../mlagents/envs/tests/test_rpc_utils.py | 10 +++++++--- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d956fe9f3..be30007001 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: mypy name: mypy-ml-agents files: "ml-agents/.*" - args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages] + args: [--ignore-missing-imports, --disallow-incomplete-defs] - id: mypy name: mypy-ml-agents-envs files: "ml-agents-envs/.*" diff --git a/ml-agents-envs/mlagents/envs/base_env.py b/ml-agents-envs/mlagents/envs/base_env.py index f6c678f80d..dbb880f052 100644 --- a/ml-agents-envs/mlagents/envs/base_env.py +++ b/ml-agents-envs/mlagents/envs/base_env.py @@ -18,12 +18,12 @@ """ from abc import ABC, abstractmethod -from typing import List, NamedTuple, Tuple, Optional, Union, Dict, NewType +from typing import List, NamedTuple, Tuple, Optional, Union, Dict import numpy as np from enum import Enum -AgentId = NewType("AgentId", int) -AgentGroup = NewType("AgentGroup", str) +AgentId = int +AgentGroup = str class StepResult(NamedTuple): diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py index e06fb810ad..eaa4760169 100644 --- a/ml-agents-envs/mlagents/envs/environment.py +++ b/ml-agents-envs/mlagents/envs/environment.py @@ -372,8 +372,8 @@ def set_action_for_agent( action = action.astype(expected_type) if agent_group not in self._env_actions: - self._env_actions[agent_group] = self._empty_action( - spec, self._env_state[agent_group].n_agents() + self._env_actions[agent_group] = spec.create_empty_action( + self._env_state[agent_group].n_agents() ) try: index = np.where(self._env_state[agent_group].agent_id == agent_id)[0][0] diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py index c08f5c3f97..257c83da98 100644 --- a/ml-agents-envs/mlagents/envs/rpc_utils.py +++ b/ml-agents-envs/mlagents/envs/rpc_utils.py @@ -5,7 +5,7 @@ import logging import numpy as np import io -from typing import List, Tuple +from typing import cast, List, Tuple, Union, Collection from PIL import Image logger = logging.getLogger("mlagents.envs") @@ -26,9 +26,10 @@ def agent_group_spec_from_proto( if brain_param_proto.vector_action_space_type == 0 else ActionType.CONTINUOUS ) - action_shape = None if action_type == ActionType.CONTINUOUS: - action_shape = brain_param_proto.vector_action_size[0] + action_shape: Union[ + int, Tuple[int, ...] + ] = brain_param_proto.vector_action_size[0] else: action_shape = tuple(brain_param_proto.vector_action_size) return AgentGroupSpec(observation_shape, action_type, action_shape) @@ -57,7 +58,9 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray: @timed def _process_visual_observation( - obs_index: int, shape: Tuple[int, int, int], agent_info_list: List[AgentInfoProto] + obs_index: int, + shape: Tuple[int, int, int], + agent_info_list: Collection[AgentInfoProto], ) -> np.ndarray: if len(agent_info_list) == 0: return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32) @@ -72,7 +75,7 @@ def _process_visual_observation( @timed def _process_vector_observation( - obs_index: int, shape: Tuple[int, ...], agent_info_list: List[AgentInfoProto] + obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto] ) -> np.ndarray: if len(agent_info_list) == 0: return np.zeros((0, shape[0]), dtype=np.float32) @@ -104,12 +107,13 @@ def _process_vector_observation( @timed def batched_step_result_from_proto( - agent_info_list: List[AgentInfoProto], group_spec: AgentGroupSpec + agent_info_list: Collection[AgentInfoProto], group_spec: AgentGroupSpec ) -> BatchedStepResult: obs_list: List[np.ndarray] = [] for obs_index, obs_shape in enumerate(group_spec.observation_shapes): is_visual = len(obs_shape) == 3 if is_visual: + obs_shape = cast(Tuple[int, int, int], obs_shape) obs_list += [ _process_visual_observation(obs_index, obs_shape, agent_info_list) ] diff --git a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py index b47fd97401..b9edc06055 100644 --- a/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py +++ b/ml-agents-envs/mlagents/envs/tests/test_rpc_utils.py @@ -1,6 +1,10 @@ from typing import List, Tuple from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto -from mlagents.envs.communicator_objects.observation_pb2 import ObservationProto +from mlagents.envs.communicator_objects.observation_pb2 import ( + ObservationProto, + NONE, + PNG, +) from mlagents.envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto import numpy as np from mlagents.envs.base_env import AgentGroupSpec, ActionType @@ -30,7 +34,7 @@ def generate_list_agent_proto( for obs_index in range(len(shape)): obs_proto = ObservationProto() obs_proto.shape.extend(list(shape[obs_index])) - obs_proto.compression_type = 0 + obs_proto.compression_type = NONE obs_proto.float_data.data.extend([0.1] * np.prod(shape[obs_index])) obs_proto_list.append(obs_proto) ap.observations.extend(obs_proto_list) @@ -49,7 +53,7 @@ def generate_compressed_data(in_array: np.ndarray) -> bytes: def generate_compressed_proto_obs(in_array: np.ndarray) -> ObservationProto: obs_proto = ObservationProto() obs_proto.compressed_data = generate_compressed_data(in_array) - obs_proto.compression_type = 1 + obs_proto.compression_type = PNG obs_proto.shape.extend(in_array.shape) return obs_proto From 36e8ec83f312b7e3afed6ec8aea73f7f24b1cb31 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 11 Dec 2019 11:01:07 -0800 Subject: [PATCH 11/16] fix pylint bugs --- ml-agents-envs/mlagents/envs/rpc_utils.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ml-agents-envs/mlagents/envs/rpc_utils.py b/ml-agents-envs/mlagents/envs/rpc_utils.py index 257c83da98..105d8bf1a9 100644 --- a/ml-agents-envs/mlagents/envs/rpc_utils.py +++ b/ml-agents-envs/mlagents/envs/rpc_utils.py @@ -60,7 +60,9 @@ def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray: def _process_visual_observation( obs_index: int, shape: Tuple[int, int, int], - agent_info_list: Collection[AgentInfoProto], + agent_info_list: Collection[ + AgentInfoProto + ], # pylint: disable=unsubscriptable-object ) -> np.ndarray: if len(agent_info_list) == 0: return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32) @@ -75,7 +77,11 @@ def _process_visual_observation( @timed def _process_vector_observation( - obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto] + obs_index: int, + shape: Tuple[int, ...], + agent_info_list: Collection[ + AgentInfoProto + ], # pylint: disable=unsubscriptable-object ) -> np.ndarray: if len(agent_info_list) == 0: return np.zeros((0, shape[0]), dtype=np.float32) @@ -107,7 +113,10 @@ def _process_vector_observation( @timed def batched_step_result_from_proto( - agent_info_list: Collection[AgentInfoProto], group_spec: AgentGroupSpec + agent_info_list: Collection[ + AgentInfoProto + ], # pylint: disable=unsubscriptable-object + group_spec: AgentGroupSpec, ) -> BatchedStepResult: obs_list: List[np.ndarray] = [] for obs_index, obs_shape in enumerate(group_spec.observation_shapes): From 901b791a10af284abd2c2841050df2dbed0f4fa0 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 11 Dec 2019 14:13:54 -0800 Subject: [PATCH 12/16] agent_id as str or int --- ml-agents/mlagents/trainers/agent_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py index fb06d64457..d2d13644de 100644 --- a/ml-agents/mlagents/trainers/agent_processor.py +++ b/ml-agents/mlagents/trainers/agent_processor.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Union from mlagents.trainers.buffer import AgentBuffer, BufferException @@ -28,7 +28,7 @@ def reset_local_buffers(self) -> None: def append_to_update_buffer( self, update_buffer: AgentBuffer, - agent_id: int, + agent_id: Union[int, str], key_list: List[str] = None, batch_size: int = None, training_length: int = None, From 0682eb43a9745c4c3dcc57ed76436d33acfe0cca Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 11 Dec 2019 14:52:10 -0800 Subject: [PATCH 13/16] fix sac and ppo trainers --- ml-agents/mlagents/trainers/demo_loader.py | 2 ++ ml-agents/mlagents/trainers/ppo/trainer.py | 7 ++++--- ml-agents/mlagents/trainers/sac/trainer.py | 10 ++++++---- ml-agents/mlagents/trainers/tf_policy.py | 2 ++ 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py index fa2b68dfc0..597d010759 100644 --- a/ml-agents/mlagents/trainers/demo_loader.py +++ b/ml-agents/mlagents/trainers/demo_loader.py @@ -153,4 +153,6 @@ def load_demonstration( break pos += next_pos obs_decoded += 1 + if not brain_params: + raise RuntimeError(f"No BrainParameters found in demonstration file at {file_path}.") return brain_params, info_action_pairs, total_expected diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 45dd45c6ea..8b7a152408 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -67,13 +67,14 @@ def __init__( self.check_param_keys() if multi_gpu and len(get_devices()) > 1: - self.policy = MultiGpuPPOPolicy( + self.ppo_policy = MultiGpuPPOPolicy( seed, brain, trainer_parameters, self.is_training, load ) else: - self.policy = PPOPolicy( + self.ppo_policy = PPOPolicy( seed, brain, trainer_parameters, self.is_training, load ) + self.policy = self.ppo_policy for _reward_signal in self.policy.reward_signals.keys(): self.collected_rewards[_reward_signal] = {} @@ -104,7 +105,7 @@ def process_experiences( else: bootstrapping_info = next_info idx = l - value_next = self.policy.get_value_estimates( + value_next = self.ppo_policy.get_value_estimates( bootstrapping_info, idx, next_info.local_done[l] and not next_info.max_reached[l], diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 727798c1ea..8cda361a9e 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -81,7 +81,8 @@ def __init__( if "save_replay_buffer" in trainer_parameters else False ) - self.policy = SACPolicy(seed, brain, trainer_parameters, self.is_training, load) + self.sac_policy = SACPolicy(seed, brain, trainer_parameters, self.is_training, load) + self.policy = self.sac_policy # Load the replay buffer if load if load and self.checkpoint_replay_buffer: @@ -293,8 +294,9 @@ def update_sac_policy(self) -> None: for stat, stat_list in batch_update_stats.items(): self.stats[stat].append(np.mean(stat_list)) - if self.policy.bc_module: - update_stats = self.policy.bc_module.update() + bc_module = self.sac_policy.bc_module + if bc_module: + update_stats = bc_module.update() for stat, val in update_stats.items(): self.stats[stat].append(val) @@ -325,7 +327,7 @@ def update_reward_signals(self) -> None: self.trainer_parameters["batch_size"], sequence_length=self.policy.sequence_length, ) - update_stats = self.policy.update_reward_signals( + update_stats = self.sac_policy.update_reward_signals( reward_signal_minibatches, n_sequences ) for stat_name, value in update_stats.items(): diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index 3247c94959..db03908736 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -11,6 +11,7 @@ from tensorflow.python.framework import graph_util from mlagents.trainers import tensorflow_to_barracuda as tf2bc from mlagents.trainers.brain import BrainInfo +from mlagents.trainers.components.reward_signals import RewardSignal logger = logging.getLogger("mlagents.trainers") @@ -57,6 +58,7 @@ def __init__(self, seed, brain, trainer_parameters): self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] self.memory_dict: Dict[str, np.ndarray] = {} + self.reward_signals: Dict[str, RewardSignal] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = trainer_parameters.get("normalize", False) From 1647902942e5679e228a727f194badaea220131a Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 11 Dec 2019 15:01:53 -0800 Subject: [PATCH 14/16] remove Optionals --- .pre-commit-config.yaml | 6 +++++- ml-agents/mlagents/trainers/action_info.py | 4 ++-- ml-agents/mlagents/trainers/demo_loader.py | 4 +++- ml-agents/mlagents/trainers/env_manager.py | 8 ++++---- ml-agents/mlagents/trainers/sac/trainer.py | 4 +++- ml-agents/mlagents/trainers/simple_env_manager.py | 4 ++-- ml-agents/mlagents/trainers/subprocess_env_manager.py | 4 ++-- ml-agents/mlagents/trainers/tf_policy.py | 2 +- 8 files changed, 22 insertions(+), 14 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index be30007001..f3faf63b3a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,11 +11,15 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy rev: v0.750 + # Currently mypy may assert after logging one message. To get all the messages at once, change repo and rev to + # repo: https://github.com/chriselion/mypy + # rev: 3d0b6164a9487a6c5cf9d144110b86600fd85e25 + # This is a fork with the assert disabled, although precommit has trouble installing it sometimes. hooks: - id: mypy name: mypy-ml-agents files: "ml-agents/.*" - args: [--ignore-missing-imports, --disallow-incomplete-defs] + args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages] - id: mypy name: mypy-ml-agents-envs files: "ml-agents-envs/.*" diff --git a/ml-agents/mlagents/trainers/action_info.py b/ml-agents/mlagents/trainers/action_info.py index 782223648f..694b600689 100644 --- a/ml-agents/mlagents/trainers/action_info.py +++ b/ml-agents/mlagents/trainers/action_info.py @@ -1,6 +1,6 @@ -from typing import NamedTuple, Any, Dict, Optional +from typing import NamedTuple, Any, Dict -ActionInfoOutputs = Optional[Dict[str, Any]] +ActionInfoOutputs = Dict[str, Any] class ActionInfo(NamedTuple): diff --git a/ml-agents/mlagents/trainers/demo_loader.py b/ml-agents/mlagents/trainers/demo_loader.py index 597d010759..6722503380 100644 --- a/ml-agents/mlagents/trainers/demo_loader.py +++ b/ml-agents/mlagents/trainers/demo_loader.py @@ -154,5 +154,7 @@ def load_demonstration( pos += next_pos obs_decoded += 1 if not brain_params: - raise RuntimeError(f"No BrainParameters found in demonstration file at {file_path}.") + raise RuntimeError( + f"No BrainParameters found in demonstration file at {file_path}." + ) return brain_params, info_action_pairs, total_expected diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py index 7a28aa2598..5190841e5d 100644 --- a/ml-agents/mlagents/trainers/env_manager.py +++ b/ml-agents/mlagents/trainers/env_manager.py @@ -1,18 +1,18 @@ from abc import ABC, abstractmethod -from typing import List, Dict, NamedTuple, Optional +from typing import List, Dict, NamedTuple from mlagents.trainers.brain import AllBrainInfo, BrainParameters from mlagents.trainers.policy import Policy from mlagents.trainers.action_info import ActionInfo class EnvironmentStep(NamedTuple): - previous_all_brain_info: Optional[AllBrainInfo] + previous_all_brain_info: AllBrainInfo current_all_brain_info: AllBrainInfo - brain_name_to_action_info: Optional[Dict[str, ActionInfo]] + brain_name_to_action_info: Dict[str, ActionInfo] def has_actions_for_brain(self, brain_name: str) -> bool: return ( - self.brain_name_to_action_info is not None + bool(self.brain_name_to_action_info) and brain_name in self.brain_name_to_action_info and self.brain_name_to_action_info[brain_name].outputs is not None ) diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 8cda361a9e..669aa87c47 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -81,7 +81,9 @@ def __init__( if "save_replay_buffer" in trainer_parameters else False ) - self.sac_policy = SACPolicy(seed, brain, trainer_parameters, self.is_training, load) + self.sac_policy = SACPolicy( + seed, brain, trainer_parameters, self.is_training, load + ) self.policy = self.sac_policy # Load the replay buffer if load diff --git a/ml-agents/mlagents/trainers/simple_env_manager.py b/ml-agents/mlagents/trainers/simple_env_manager.py index aef44c41f7..51c3b3376e 100644 --- a/ml-agents/mlagents/trainers/simple_env_manager.py +++ b/ml-agents/mlagents/trainers/simple_env_manager.py @@ -22,7 +22,7 @@ def __init__(self, env: BaseEnv, float_prop_channel: FloatPropertiesChannel): super().__init__() self.shared_float_properties = float_prop_channel self.env = env - self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None) + self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {}) self.previous_all_action_info: Dict[str, ActionInfo] = {} def step(self) -> List[EnvironmentStep]: @@ -51,7 +51,7 @@ def reset( self.shared_float_properties.set_property(k, v) self.env.reset() all_brain_info = self._generate_all_brain_info() - self.previous_step = EnvironmentStep(None, all_brain_info, None) + self.previous_step = EnvironmentStep({}, all_brain_info, {}) return [self.previous_step] @property diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 9347a823d9..ca93feac08 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -53,7 +53,7 @@ def __init__(self, process: Process, worker_id: int, conn: Connection): self.process = process self.worker_id = worker_id self.conn = conn - self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None) + self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {}) self.previous_all_action_info: Dict[str, ActionInfo] = {} self.waiting = False @@ -253,7 +253,7 @@ def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]: ew.send("reset", config) # Next (synchronously) collect the reset observations from each worker in sequence for ew in self.env_workers: - ew.previous_step = EnvironmentStep(None, ew.recv().payload, None) + ew.previous_step = EnvironmentStep({}, ew.recv().payload, {}) return list(map(lambda ew: ew.previous_step, self.env_workers)) @property diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index db03908736..7505f8b2cb 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -128,7 +128,7 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo: to be passed to add experiences """ if len(brain_info.agents) == 0: - return ActionInfo([], [], None) + return ActionInfo([], [], {}) agents_done = [ agent From 86e3217d6c44133ac8a1671114f704cf72ebdc24 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 11 Dec 2019 15:15:34 -0800 Subject: [PATCH 15/16] fix unit tests --- ml-agents/mlagents/trainers/tests/test_policy.py | 2 +- ml-agents/mlagents/trainers/tf_policy.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_policy.py b/ml-agents/mlagents/trainers/tests/test_policy.py index f7b63234b4..cd17d8bfca 100644 --- a/ml-agents/mlagents/trainers/tests/test_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_policy.py @@ -20,7 +20,7 @@ def test_take_action_returns_empty_with_no_agents(): policy = TFPolicy(test_seed, basic_mock_brain(), basic_params()) no_agent_brain_info = BrainInfo([], [], [], agents=[]) result = policy.get_action(no_agent_brain_info) - assert result == ActionInfo([], [], None) + assert result == ActionInfo([], [], {}) def test_take_action_returns_nones_on_missing_values(): diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index 7505f8b2cb..aaa2998baa 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -11,7 +11,6 @@ from tensorflow.python.framework import graph_util from mlagents.trainers import tensorflow_to_barracuda as tf2bc from mlagents.trainers.brain import BrainInfo -from mlagents.trainers.components.reward_signals import RewardSignal logger = logging.getLogger("mlagents.trainers") @@ -58,7 +57,7 @@ def __init__(self, seed, brain, trainer_parameters): self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] self.memory_dict: Dict[str, np.ndarray] = {} - self.reward_signals: Dict[str, RewardSignal] = {} + self.reward_signals: Dict[str, "RewardSignal"] = {} self.num_branches = len(self.brain.vector_action_space_size) self.previous_action_dict: Dict[str, np.array] = {} self.normalize = trainer_parameters.get("normalize", False) From a02b1ce01bd981479c2110e317eb4ebf98a053f7 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 11 Dec 2019 15:20:21 -0800 Subject: [PATCH 16/16] simplify --- ml-agents/mlagents/trainers/env_manager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py index 5190841e5d..d00dfe07b5 100644 --- a/ml-agents/mlagents/trainers/env_manager.py +++ b/ml-agents/mlagents/trainers/env_manager.py @@ -12,8 +12,7 @@ class EnvironmentStep(NamedTuple): def has_actions_for_brain(self, brain_name: str) -> bool: return ( - bool(self.brain_name_to_action_info) - and brain_name in self.brain_name_to_action_info + brain_name in self.brain_name_to_action_info and self.brain_name_to_action_info[brain_name].outputs is not None )