Unity-Technologies · vincentpierre · May 12, 2020 · May 11, 2020 · May 11, 2020 · May 11, 2020
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
 - `beta` and `epsilon` in `PPO` are no longer decayed by default but follow the same schedule as learning rate. (#3940)
+- `get_behavior_names()` and `get_behavior_spec()` on UnityEnvironment were replaced by the `behavior_specs` property. (#3946)
 ### Minor Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)

diff --git a/docs/Migrating.md b/docs/Migrating.md
@@ -22,6 +22,7 @@ double-check that the versions are in the same. The versions can be found in
 - Trainer configuration, curriculum configuration, and parameter randomization
   configuration have all been moved to a single YAML file. (#3791)
 - `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
+- On the UnityEnvironment API, `get_behavior_names()` and `get_behavior_specs()` methods were combined into the property `behavior_specs` that contains a mapping from behavior names to behavior spec.
 
 ### Steps to Migrate
 - Before upgrading, copy your `Behavior Name` sections from `trainer_config.yaml` into
@@ -34,6 +35,7 @@ double-check that the versions are in the same. The versions can be found in
   the contents of the sampler config to `parameter_randomization` in the main trainer configuration.
 - If you are using `UnityEnvironment` directly, replace `max_step` with `interrupted`
 in the `TerminalStep` and `TerminalSteps` objects.
+ - Replace usage of `get_behavior_names()` and `get_behavior_specs()` in UnityEnvironment with `behavior_specs`.
 
 ## Migrating from 0.15 to Release 1
 

diff --git a/docs/Python-API.md b/docs/Python-API.md
@@ -69,7 +69,7 @@ from mlagents_envs.environment import UnityEnvironment
 env = UnityEnvironment(file_name="3DBall", seed=1, side_channels=[])
 # Start interacting with the evironment.
 env.reset()
-behavior_names = env.get_behavior_names()
+behavior_names = env.behavior_spec.keys()
 ...
 ```
 **NOTE:** Please read [Interacting with a Unity Environment](#interacting-with-a-unity-environment)
@@ -109,14 +109,13 @@ A `BaseEnv` has the following methods:
   act.
 - **Close : `env.close()`** Sends a shutdown signal to the environment and
   terminates the communication.
-- **Get Behavior Names : `env.get_behavior_names()`** Returns a list of
-  `BehaviorName`. Note that the number of groups can change over time in the
-  simulation if new Agent behaviors are created in the simulation.
-- **Get Behavior Spec : `env.get_behavior_spec(behavior_name: str)`** Returns
-  the `BehaviorSpec` corresponding to the behavior_name given as input. A
-  `BehaviorSpec` contains information such as the observation shapes, the action
-  type (multi-discrete or continuous) and the action shape. Note that the
-  `BehaviorSpec` for a specific group is fixed throughout the simulation.
+- **Behavior Specs : `env.behavior_specs`** Returns a Mapping of
+  `BehaviorName` to `BehaviorSpec` objects (read only).
+  A `BehaviorSpec` contains information such as the observation shapes, the
+  action type (multi-discrete or continuous) and the action shape. Note that
+  the `BehaviorSpec` for a specific group is fixed throughout the simulation.
+  The number of entries in the Mapping can change over time in the simulation
+  if new Agent behaviors are created in the simulation.
 - **Get Steps : `env.get_steps(behavior_name: str)`** Returns a tuple
   `DecisionSteps, TerminalSteps` corresponding to the behavior_name given as
   input. The `DecisionSteps` contains information about the state of the agents

diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py
@@ -49,7 +49,7 @@ def __init__(
         self._env = unity_env
 
         # Take a single step so that the brain information will be sent over
-        if not self._env.get_behavior_names():
+        if not self._env.behavior_specs:
             self._env.step()
 
         self.visual_obs = None
@@ -63,14 +63,14 @@ def __init__(
         self._allow_multiple_visual_obs = allow_multiple_visual_obs
 
         # Check brain configuration
-        if len(self._env.get_behavior_names()) != 1:
+        if len(self._env.behavior_specs) != 1:
             raise UnityGymException(
                 "There can only be one behavior in a UnityEnvironment "
                 "if it is wrapped in a gym."
             )
 
-        self.name = self._env.get_behavior_names()[0]
-        self.group_spec = self._env.get_behavior_spec(self.name)
+        self.name = list(self._env.behavior_specs.keys())[0]
+        self.group_spec = self._env.behavior_specs[self.name]
 
         if use_visual and self._get_n_vis_obs() == 0:
             raise UnityGymException(

diff --git a/gym-unity/gym_unity/tests/test_gym.py b/gym-unity/gym_unity/tests/test_gym.py
@@ -9,6 +9,7 @@
     ActionType,
     DecisionSteps,
     TerminalSteps,
+    BehaviorMapping,
 )
 
 
@@ -19,7 +20,6 @@ def test_gym_wrapper():
     setup_mock_unityenvironment(
         mock_env, mock_spec, mock_decision_step, mock_terminal_step
     )
-
     env = UnityToGymWrapper(mock_env, use_visual=False)
     assert isinstance(env, UnityToGymWrapper)
     assert isinstance(env.reset(), np.ndarray)
@@ -137,6 +137,5 @@ def setup_mock_unityenvironment(mock_env, mock_spec, mock_decision, mock_termina
     :Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
     :Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
     """
-    mock_env.get_behavior_names.return_value = ["MockBrain"]
-    mock_env.get_behavior_spec.return_value = mock_spec
+    mock_env.behavior_specs = BehaviorMapping({"MockBrain": mock_spec})
     mock_env.get_steps.return_value = (mock_decision, mock_termination)
diff --git a/ml-agents-envs/mlagents_envs/base_env.py b/ml-agents-envs/mlagents_envs/base_env.py
@@ -18,7 +18,17 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Mapping
-from typing import List, NamedTuple, Tuple, Optional, Union, Dict, Iterator, Any
+from typing import (
+    List,
+    NamedTuple,
+    Tuple,
+    Optional,
+    Union,
+    Dict,
+    Iterator,
+    Any,
+    Mapping as MappingType,
+)
 import numpy as np
 from enum import Enum
 
@@ -308,6 +318,20 @@ def create_empty_action(self, n_agents: int) -> np.ndarray:
             return np.zeros((n_agents, self.action_size), dtype=np.float32)
 
 
+class BehaviorMapping(Mapping):
+    def __init__(self, specs: Dict[BehaviorName, BehaviorSpec]):
+        self._dict = specs
+
+    def __len__(self) -> int:
+        return len(self._dict)
+
+    def __getitem__(self, behavior: BehaviorName) -> BehaviorSpec:
+        return self._dict[behavior]
+
+    def __iter__(self) -> Iterator[Any]:
+        yield from self._dict
+
+
 class BaseEnv(ABC):
     @abstractmethod
     def step(self) -> None:
@@ -331,17 +355,16 @@ def close(self) -> None:
         """
         pass
 
+    @property
     @abstractmethod
-    def get_behavior_names(self) -> List[BehaviorName]:
+    def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
         """
-        Returns the list of the behavior names present in the environment.
+        Returns a Mapping from behavior names to behavior specs.
         Agents grouped under the same behavior name have the same action and
         observation specs, and are expected to behave similarly in the
         environment.
-        This list can grow with time as new policies are instantiated.
-        :return: the list of agent BehaviorName.
+        Note that new keys can be added to this mapping as new policies are instantiated.
         """
-        pass
 
     @abstractmethod
     def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
@@ -386,12 +409,3 @@ def get_steps(
          episode terminated last step.
         """
         pass
-
-    @abstractmethod
-    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
-        """
-        Get the BehaviorSpec corresponding to the behavior name
-        :param behavior_name: The name of the behavior the agents are part of
-        :return: A BehaviorSpec corresponding to that behavior
-        """
-        pass
diff --git a/ml-agents-envs/mlagents_envs/environment.py b/ml-agents-envs/mlagents_envs/environment.py
@@ -4,7 +4,7 @@
 import numpy as np
 import os
 import subprocess
-from typing import Dict, List, Optional, Any, Tuple
+from typing import Dict, List, Optional, Any, Tuple, Mapping as MappingType
 
 import mlagents_envs
 
@@ -20,6 +20,7 @@
     BehaviorSpec,
     BehaviorName,
     AgentId,
+    BehaviorMapping,
 )
 from mlagents_envs.timers import timed, hierarchical_timer
 from mlagents_envs.exception import (
@@ -321,8 +322,9 @@ def step(self) -> None:
         self._update_state(rl_output)
         self._env_actions.clear()
 
-    def get_behavior_names(self):
-        return list(self._env_specs.keys())
+    @property
+    def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
+        return BehaviorMapping(self._env_specs)
 
     def _assert_behavior_exists(self, behavior_name: str) -> None:
         if behavior_name not in self._env_specs:
@@ -389,10 +391,6 @@ def get_steps(
         self._assert_behavior_exists(behavior_name)
         return self._env_state[behavior_name]
 
-    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
-        self._assert_behavior_exists(behavior_name)
-        return self._env_specs[behavior_name]
-
     def close(self):
         """
         Sends a shutdown signal to the unity environment, and closes the socket connection.

diff --git a/ml-agents-envs/mlagents_envs/tests/test_envs.py b/ml-agents-envs/mlagents_envs/tests/test_envs.py
@@ -22,7 +22,7 @@ def test_initialization(mock_communicator, mock_launcher):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    assert env.get_behavior_names() == ["RealFakeBrain"]
+    assert list(env.behavior_specs.keys()) == ["RealFakeBrain"]
     env.close()
 
 
@@ -68,7 +68,7 @@ def test_reset(mock_communicator, mock_launcher):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    spec = env.get_behavior_spec("RealFakeBrain")
+    spec = env.behavior_specs["RealFakeBrain"]
     env.reset()
     decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
     env.close()
@@ -91,7 +91,7 @@ def test_step(mock_communicator, mock_launcher):
         discrete_action=False, visual_inputs=0
     )
     env = UnityEnvironment(" ")
-    spec = env.get_behavior_spec("RealFakeBrain")
+    spec = env.behavior_specs["RealFakeBrain"]
     env.step()
     decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
     n_agents = len(decision_steps)

diff --git a/ml-agents/mlagents/trainers/simple_env_manager.py b/ml-agents/mlagents/trainers/simple_env_manager.py
@@ -53,9 +53,9 @@ def _reset_env(
     @property
     def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
         result = {}
-        for brain_name in self.env.get_behavior_names():
-            result[brain_name] = behavior_spec_to_brain_parameters(
-                brain_name, self.env.get_behavior_spec(brain_name)
+        for behavior_name, behavior_spec in self.env.behavior_specs.items():
+            result[behavior_name] = behavior_spec_to_brain_parameters(
+                behavior_name, behavior_spec
             )
         return result
 
@@ -74,6 +74,6 @@ def _take_step(self, last_step: EnvironmentStep) -> Dict[BehaviorName, ActionInf
 
     def _generate_all_results(self) -> AllStepResult:
         all_step_result: AllStepResult = {}
-        for brain_name in self.env.get_behavior_names():
+        for brain_name in self.env.behavior_specs:
             all_step_result[brain_name] = self.env.get_steps(brain_name)
         return all_step_result
diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py
@@ -131,15 +131,15 @@ def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
 
     def _generate_all_results() -> AllStepResult:
         all_step_result: AllStepResult = {}
-        for brain_name in env.get_behavior_names():
+        for brain_name in env.behavior_specs:
             all_step_result[brain_name] = env.get_steps(brain_name)
         return all_step_result
 
     def external_brains():
         result = {}
-        for brain_name in env.get_behavior_names():
-            result[brain_name] = behavior_spec_to_brain_parameters(
-                brain_name, env.get_behavior_spec(brain_name)
+        for behavior_name, behavior_specs in env.behavior_specs.items():
+            result[behavior_name] = behavior_spec_to_brain_parameters(
+                behavior_name, behavior_specs
             )
         return result
 

diff --git a/ml-agents/mlagents/trainers/tests/simple_test_envs.py b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
@@ -8,6 +8,7 @@
     DecisionSteps,
     TerminalSteps,
     ActionType,
+    BehaviorMapping,
 )
 from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
 from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
@@ -94,11 +95,12 @@ def _make_obs(self, value: float) -> List[np.ndarray]:
             obs.append(np.ones((1,) + self.vis_obs_size, dtype=np.float32) * value)
         return obs
 
-    def get_behavior_names(self):
-        return self.names
-
-    def get_behavior_spec(self, behavior_name):
-        return self.behavior_spec
+    @property
+    def behavior_specs(self):
+        behavior_dict = {}
+        for n in self.names:
+            behavior_dict[n] = self.behavior_spec
+        return BehaviorMapping(behavior_dict)
 
     def set_action_for_agent(self, behavior_name, agent_id, action):
         pass

diff --git a/ml-agents/tests/yamato/scripts/run_llapi.py b/ml-agents/tests/yamato/scripts/run_llapi.py
@@ -25,8 +25,8 @@ def test_run_environment(env_name):
         env.reset()
 
         # Set the default brain to work with
-        group_name = env.get_behavior_names()[0]
-        group_spec = env.get_behavior_spec(group_name)
+        group_name = list(env.behavior_specs.keys())[0]
+        group_spec = env.behavior_specs[group_name]
 
         # Set the time scale of the engine
         engine_configuration_channel.set_configuration_parameters(time_scale=3.0)