From 48261d33a610331750234208228475e35d4c55cb Mon Sep 17 00:00:00 2001
From: Ervin T <ervin@unity3d.com>
Date: Wed, 13 May 2020 15:17:48 -0700
Subject: [PATCH 1/6] [bug-fix] Fix issue with initialize not resetting step
 count (#3962)

---
 com.unity.ml-agents/CHANGELOG.md                    | 1 +
 ml-agents/mlagents/trainers/policy/tf_policy.py     | 1 +
 ml-agents/mlagents/trainers/tests/test_nn_policy.py | 4 ++++
 3 files changed, 6 insertions(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index c7664fad07..1013eb20e8 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to
 
 ## [1.0.1-preview] - 2020-05-19
 ### Bug Fixes
+- An issue was fixed where using `--initialize-from` would resume from the past step count. (#3962)
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 
diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
index 7832d60cd8..3645a0866c 100644
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -137,6 +137,7 @@ def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None
                     )
                 )
             if reset_global_steps:
+                self._set_step(0)
                 logger.info(
                     "Starting training from step 0 and saving to {}.".format(
                         self.model_path
diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
index 1f669e3b45..74fb455b9b 100644
--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
@@ -86,6 +86,7 @@ def test_load_save(dummy_config, tmp_path):
     trainer_params["model_path"] = path1
     policy = create_policy_mock(trainer_params)
     policy.initialize_or_load()
+    policy._set_step(2000)
     policy.save_model(2000)
 
     assert len(os.listdir(tmp_path)) > 0
@@ -94,6 +95,7 @@ def test_load_save(dummy_config, tmp_path):
     policy2 = create_policy_mock(trainer_params, load=True, seed=1)
     policy2.initialize_or_load()
     _compare_two_policies(policy, policy2)
+    assert policy2.get_current_step() == 2000
 
     # Try initialize from path 1
     trainer_params["model_path"] = path2
@@ -102,6 +104,8 @@ def test_load_save(dummy_config, tmp_path):
     policy3.initialize_or_load()
 
     _compare_two_policies(policy2, policy3)
+    # Assert that the steps are 0.
+    assert policy3.get_current_step() == 0
 
 
 def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None:

From de53137fdd1d7f32ed0e1970cd239cae3b3f6a5d Mon Sep 17 00:00:00 2001
From: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
Date: Wed, 13 May 2020 12:46:03 -0700
Subject: [PATCH 2/6] Develop better error message for #3953 (#3963)

* Making the error for wrong number of agents raise consistently

* Better error message for inputs of wrong dimensions
---
 gym-unity/gym_unity/envs/__init__.py        | 13 +++++--------
 ml-agents-envs/mlagents_envs/environment.py |  6 +++---
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py
index ce5385bd3c..f042c2b65e 100644
--- a/gym-unity/gym_unity/envs/__init__.py
+++ b/gym-unity/gym_unity/envs/__init__.py
@@ -53,7 +53,6 @@ def __init__(
             self._env.step()
 
         self.visual_obs = None
-        self._n_agents = -1
 
         # Save the step result from the last time all Agents requested decisions.
         self._previous_decision_step: DecisionSteps = None
@@ -172,6 +171,7 @@ def step(self, action: List[Any]) -> GymStepResult:
 
         self._env.step()
         decision_step, terminal_step = self._env.get_steps(self.name)
+        self._check_agents(max(len(decision_step), len(terminal_step)))
         if len(terminal_step) != 0:
             # The agent is done
             self.game_over = True
@@ -264,10 +264,11 @@ def seed(self, seed: Any = None) -> None:
         logger.warning("Could not seed environment %s", self.name)
         return
 
-    def _check_agents(self, n_agents: int) -> None:
-        if self._n_agents > 1:
+    @staticmethod
+    def _check_agents(n_agents: int) -> None:
+        if n_agents > 1:
             raise UnityGymException(
-                "There can only be one Agent in the environment but {n_agents} were detected."
+                f"There can only be one Agent in the environment but {n_agents} were detected."
             )
 
     @property
@@ -290,10 +291,6 @@ def action_space(self):
     def observation_space(self):
         return self._observation_space
 
-    @property
-    def number_agents(self):
-        return self._n_agents
-
 
 class ActionFlattener:
     """
diff --git a/ml-agents-envs/mlagents_envs/environment.py b/ml-agents-envs/mlagents_envs/environment.py
index 9a525f661d..cb30d81056 100644
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
@@ -415,9 +415,9 @@ def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
         expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
         if action.shape != expected_shape:
             raise UnityActionException(
-                "The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
-                    behavior_name, expected_shape, action.shape
-                )
+                "The behavior {0} needs an input of dimension {1} for "
+                "(<number of agents>, <action size>) but received input of "
+                "dimension {2}".format(behavior_name, expected_shape, action.shape)
             )
         if action.dtype != expected_type:
             action = action.astype(expected_type)

From af0172db50dcd6d0e7b4a2bebf372303ef00a986 Mon Sep 17 00:00:00 2001
From: Chris Goy <christopherg@unity3d.com>
Date: Tue, 12 May 2020 16:37:44 -0700
Subject: [PATCH 3/6] Fix #3932, stop the editor from going into a loop when a
 prefab is selected. (#3949)

---
 .../Editor/BrainParametersDrawer.cs           | 20 +++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
index 9b919d8e40..a6e5db5a45 100644
--- a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
+++ b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
@@ -124,7 +124,14 @@ static void DrawVectorAction(Rect position, SerializedProperty property)
         static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
         {
             var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-            vecActionSize.arraySize = 1;
+
+            // This check is here due to:
+            // https://fogbugz.unity3d.com/f/cases/1246524/
+            // If this case has been resolved, please remove this if condition.
+            if (vecActionSize.arraySize != 1)
+            {
+                vecActionSize.arraySize = 1;
+            }
             var continuousActionSize =
                 vecActionSize.GetArrayElementAtIndex(0);
             EditorGUI.PropertyField(
@@ -142,8 +149,17 @@ static void DrawContinuousVectorAction(Rect position, SerializedProperty propert
         static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
         {
             var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-            vecActionSize.arraySize = EditorGUI.IntField(
+            var newSize = EditorGUI.IntField(
                 position, "Branches Size", vecActionSize.arraySize);
+
+            // This check is here due to:
+            // https://fogbugz.unity3d.com/f/cases/1246524/
+            // If this case has been resolved, please remove this if condition.
+            if (newSize != vecActionSize.arraySize)
+            {
+                vecActionSize.arraySize = newSize;
+            }
+
             position.y += k_LineHeight;
             position.x += 20;
             position.width -= 20;

From 0a05e3c16c553a4299fb5fa43c875b264ab8fbb1 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 1 May 2020 15:11:04 -0700
Subject: [PATCH 4/6] Minor doc updates to release

---
 docs/Training-ML-Agents.md | 4 ++--
 docs/Using-Tensorboard.md  | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
index 096ae36380..4afdfa9f3d 100644
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
@@ -445,8 +445,8 @@ Below is a list of included `sampler-type` as part of the toolkit.
     `interval_2_max`], ...]
   - **sub-arguments** - `intervals`
 
-The implementation of the samplers can be found at
-`ml-agents-envs/mlagents_envs/sampler_class.py`.
+The implementation of the samplers can be found in the
+[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py).
 
 #### Defining a New Sampler Type
 
diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md
index 2b86e78961..0cdf682a8a 100644
--- a/docs/Using-Tensorboard.md
+++ b/docs/Using-Tensorboard.md
@@ -21,10 +21,8 @@ session running on port 6006 a new session can be launched on an open port using
 the --port option.
 
 **Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the
-default string, "ppo". All the statistics will be saved to the same sub-folder
-and displayed as one session in TensorBoard. After a few runs, the displays can
-become difficult to interpret in this situation. You can delete the folders
-under the `summaries` directory to clear out old statistics.
+default string, "ppo". You can delete the folders under the `results` directory
+to clear out old statistics.
 
 On the left side of the TensorBoard window, you can select which of the training
 runs you want to display. You can select multiple run-ids to compare statistics.

From dbda7d0c0bb413b2724a58a2edf9d3d12f337f83 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Thu, 7 May 2020 15:32:43 -0700
Subject: [PATCH 5/6] add unit tests and fix exceptions (#3930)

---
 .../Runtime/Communicator/GrpcExtensions.cs    | 18 +++++----
 .../Communicator/GrpcExtensionsTests.cs       | 37 +++++++++++++++++++
 .../Tests/Editor/MLAgentsEditModeTest.cs      | 13 ++++---
 3 files changed, 56 insertions(+), 12 deletions(-)
 create mode 100644 com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs

diff --git a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
index 0a21fd38d1..e41353fc44 100644
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
@@ -26,10 +26,11 @@ public static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai)
         {
             var agentInfoProto = ai.ToAgentInfoProto();
 
-            var agentActionProto = new AgentActionProto
+            var agentActionProto = new AgentActionProto();
+            if(ai.storedVectorActions != null)
             {
-                VectorActions = { ai.storedVectorActions }
-            };
+                agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
+            }
 
             return new AgentInfoActionPairProto
             {
@@ -95,12 +96,14 @@ public static BrainParametersProto ToProto(this BrainParameters bp, string name,
             var brainParametersProto = new BrainParametersProto
             {
                 VectorActionSize = { bp.VectorActionSize },
-                VectorActionSpaceType =
-                    (SpaceTypeProto)bp.VectorActionSpaceType,
+                VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType,
                 BrainName = name,
                 IsTraining = isTraining
             };
-            brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            if(bp.VectorActionDescriptions != null)
+            {
+                brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            }
             return brainParametersProto;
         }
 
@@ -128,13 +131,14 @@ public static BrainParameters ToBrainParameters(this BrainParametersProto bpp)
         /// </summary>
         public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
         {
+            var demonstrationName = dm.demonstrationName ?? "";
             var demoProto = new DemonstrationMetaProto
             {
                 ApiVersion = DemonstrationMetaData.ApiVersion,
                 MeanReward = dm.meanReward,
                 NumberSteps = dm.numberSteps,
                 NumberEpisodes = dm.numberEpisodes,
-                DemonstrationName = dm.demonstrationName
+                DemonstrationName = demonstrationName
             };
             return demoProto;
         }
diff --git a/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
new file mode 100644
index 0000000000..adca387790
--- /dev/null
+++ b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
@@ -0,0 +1,37 @@
+using NUnit.Framework;
+using UnityEngine;
+using Unity.MLAgents.Policies;
+using Unity.MLAgents.Demonstrations;
+using Unity.MLAgents.Sensors;
+
+namespace Unity.MLAgents.Tests
+{
+    [TestFixture]
+    public class GrpcExtensionsTests
+    {
+        [Test]
+        public void TestDefaultBrainParametersToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var brain = new BrainParameters();
+            brain.ToProto("foo", false);
+        }
+
+        [Test]
+        public void TestDefaultAgentInfoToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var agentInfo = new AgentInfo();
+            agentInfo.ToInfoActionPairProto();
+            agentInfo.ToAgentInfoProto();
+        }
+
+        [Test]
+        public void TestDefaultDemonstrationMetaDataToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var demoMetaData = new DemonstrationMetaData();
+            demoMetaData.ToProto();
+        }
+    }
+}
diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
index 1a74313bd1..3b3e6ca1d4 100644
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
@@ -13,8 +13,10 @@ internal class TestPolicy : IPolicy
     {
         public Action OnRequestDecision;
         ObservationWriter m_ObsWriter = new ObservationWriter();
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
-            foreach(var sensor in sensors){
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
+        {
+            foreach (var sensor in sensors)
+            {
                 sensor.GetObservationProto(m_ObsWriter);
             }
             OnRequestDecision?.Invoke();
@@ -517,8 +519,10 @@ public void AssertStackingReset()
             agent1.SetPolicy(policy);
 
             StackingSensor sensor = null;
-            foreach(ISensor s in agent1.sensors){
-                if (s is  StackingSensor){
+            foreach (ISensor s in agent1.sensors)
+            {
+                if (s is  StackingSensor)
+                {
                     sensor = s as StackingSensor;
                 }
             }
@@ -529,7 +533,6 @@ public void AssertStackingReset()
             {
                 agent1.RequestDecision();
                 aca.EnvironmentStep();
-
             }
 
             policy.OnRequestDecision = () =>  SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});

From 67607ec9a6ea3ee65318736dff6a422f7ab9197f Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Fri, 15 May 2020 12:46:29 -0700
Subject: [PATCH 6/6] Add memory_size hyperparameter

---
 docs/Learning-Environment-Create-New.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md
index df4deef039..e32a9b9510 100644
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
@@ -413,6 +413,7 @@ RollerBall:
   learning_rate: 3.0e-4
   learning_rate_schedule: linear
   max_steps: 5.0e4
+  memory_size: 128
   normalize: false
   num_epoch: 3
   num_layers: 2