From 48261d33a610331750234208228475e35d4c55cb Mon Sep 17 00:00:00 2001 From: Ervin T Date: Wed, 13 May 2020 15:17:48 -0700 Subject: [PATCH 1/6] [bug-fix] Fix issue with initialize not resetting step count (#3962) --- com.unity.ml-agents/CHANGELOG.md | 1 + ml-agents/mlagents/trainers/policy/tf_policy.py | 1 + ml-agents/mlagents/trainers/tests/test_nn_policy.py | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index c7664fad07..1013eb20e8 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to ## [1.0.1-preview] - 2020-05-19 ### Bug Fixes +- An issue was fixed where using `--initialize-from` would resume from the past step count. (#3962) #### com.unity.ml-agents (C#) #### ml-agents / ml-agents-envs / gym-unity (Python) diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index 7832d60cd8..3645a0866c 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -137,6 +137,7 @@ def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None ) ) if reset_global_steps: + self._set_step(0) logger.info( "Starting training from step 0 and saving to {}.".format( self.model_path diff --git a/ml-agents/mlagents/trainers/tests/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/test_nn_policy.py index 1f669e3b45..74fb455b9b 100644 --- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py @@ -86,6 +86,7 @@ def test_load_save(dummy_config, tmp_path): trainer_params["model_path"] = path1 policy = create_policy_mock(trainer_params) policy.initialize_or_load() + policy._set_step(2000) policy.save_model(2000) assert len(os.listdir(tmp_path)) > 0 @@ -94,6 +95,7 @@ def test_load_save(dummy_config, tmp_path): policy2 = create_policy_mock(trainer_params, load=True, seed=1) policy2.initialize_or_load() _compare_two_policies(policy, policy2) + assert policy2.get_current_step() == 2000 # Try initialize from path 1 trainer_params["model_path"] = path2 @@ -102,6 +104,8 @@ def test_load_save(dummy_config, tmp_path): policy3.initialize_or_load() _compare_two_policies(policy2, policy3) + # Assert that the steps are 0. + assert policy3.get_current_step() == 0 def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None: From de53137fdd1d7f32ed0e1970cd239cae3b3f6a5d Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES Date: Wed, 13 May 2020 12:46:03 -0700 Subject: [PATCH 2/6] Develop better error message for #3953 (#3963) * Making the error for wrong number of agents raise consistently * Better error message for inputs of wrong dimensions --- gym-unity/gym_unity/envs/__init__.py | 13 +++++-------- ml-agents-envs/mlagents_envs/environment.py | 6 +++--- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py index ce5385bd3c..f042c2b65e 100644 --- a/gym-unity/gym_unity/envs/__init__.py +++ b/gym-unity/gym_unity/envs/__init__.py @@ -53,7 +53,6 @@ def __init__( self._env.step() self.visual_obs = None - self._n_agents = -1 # Save the step result from the last time all Agents requested decisions. self._previous_decision_step: DecisionSteps = None @@ -172,6 +171,7 @@ def step(self, action: List[Any]) -> GymStepResult: self._env.step() decision_step, terminal_step = self._env.get_steps(self.name) + self._check_agents(max(len(decision_step), len(terminal_step))) if len(terminal_step) != 0: # The agent is done self.game_over = True @@ -264,10 +264,11 @@ def seed(self, seed: Any = None) -> None: logger.warning("Could not seed environment %s", self.name) return - def _check_agents(self, n_agents: int) -> None: - if self._n_agents > 1: + @staticmethod + def _check_agents(n_agents: int) -> None: + if n_agents > 1: raise UnityGymException( - "There can only be one Agent in the environment but {n_agents} were detected." + f"There can only be one Agent in the environment but {n_agents} were detected." ) @property @@ -290,10 +291,6 @@ def action_space(self): def observation_space(self): return self._observation_space - @property - def number_agents(self): - return self._n_agents - class ActionFlattener: """ diff --git a/ml-agents-envs/mlagents_envs/environment.py b/ml-agents-envs/mlagents_envs/environment.py index 9a525f661d..cb30d81056 100644 --- a/ml-agents-envs/mlagents_envs/environment.py +++ b/ml-agents-envs/mlagents_envs/environment.py @@ -415,9 +415,9 @@ def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None: expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size) if action.shape != expected_shape: raise UnityActionException( - "The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format( - behavior_name, expected_shape, action.shape - ) + "The behavior {0} needs an input of dimension {1} for " + "(, ) but received input of " + "dimension {2}".format(behavior_name, expected_shape, action.shape) ) if action.dtype != expected_type: action = action.astype(expected_type) From af0172db50dcd6d0e7b4a2bebf372303ef00a986 Mon Sep 17 00:00:00 2001 From: Chris Goy Date: Tue, 12 May 2020 16:37:44 -0700 Subject: [PATCH 3/6] Fix #3932, stop the editor from going into a loop when a prefab is selected. (#3949) --- .../Editor/BrainParametersDrawer.cs | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs index 9b919d8e40..a6e5db5a45 100644 --- a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs +++ b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs @@ -124,7 +124,14 @@ static void DrawVectorAction(Rect position, SerializedProperty property) static void DrawContinuousVectorAction(Rect position, SerializedProperty property) { var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName); - vecActionSize.arraySize = 1; + + // This check is here due to: + // https://fogbugz.unity3d.com/f/cases/1246524/ + // If this case has been resolved, please remove this if condition. + if (vecActionSize.arraySize != 1) + { + vecActionSize.arraySize = 1; + } var continuousActionSize = vecActionSize.GetArrayElementAtIndex(0); EditorGUI.PropertyField( @@ -142,8 +149,17 @@ static void DrawContinuousVectorAction(Rect position, SerializedProperty propert static void DrawDiscreteVectorAction(Rect position, SerializedProperty property) { var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName); - vecActionSize.arraySize = EditorGUI.IntField( + var newSize = EditorGUI.IntField( position, "Branches Size", vecActionSize.arraySize); + + // This check is here due to: + // https://fogbugz.unity3d.com/f/cases/1246524/ + // If this case has been resolved, please remove this if condition. + if (newSize != vecActionSize.arraySize) + { + vecActionSize.arraySize = newSize; + } + position.y += k_LineHeight; position.x += 20; position.width -= 20; From 0a05e3c16c553a4299fb5fa43c875b264ab8fbb1 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 1 May 2020 15:11:04 -0700 Subject: [PATCH 4/6] Minor doc updates to release --- docs/Training-ML-Agents.md | 4 ++-- docs/Using-Tensorboard.md | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 096ae36380..4afdfa9f3d 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -445,8 +445,8 @@ Below is a list of included `sampler-type` as part of the toolkit. `interval_2_max`], ...] - **sub-arguments** - `intervals` -The implementation of the samplers can be found at -`ml-agents-envs/mlagents_envs/sampler_class.py`. +The implementation of the samplers can be found in the +[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py). #### Defining a New Sampler Type diff --git a/docs/Using-Tensorboard.md b/docs/Using-Tensorboard.md index 2b86e78961..0cdf682a8a 100644 --- a/docs/Using-Tensorboard.md +++ b/docs/Using-Tensorboard.md @@ -21,10 +21,8 @@ session running on port 6006 a new session can be launched on an open port using the --port option. **Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the -default string, "ppo". All the statistics will be saved to the same sub-folder -and displayed as one session in TensorBoard. After a few runs, the displays can -become difficult to interpret in this situation. You can delete the folders -under the `summaries` directory to clear out old statistics. +default string, "ppo". You can delete the folders under the `results` directory +to clear out old statistics. On the left side of the TensorBoard window, you can select which of the training runs you want to display. You can select multiple run-ids to compare statistics. From dbda7d0c0bb413b2724a58a2edf9d3d12f337f83 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 7 May 2020 15:32:43 -0700 Subject: [PATCH 5/6] add unit tests and fix exceptions (#3930) --- .../Runtime/Communicator/GrpcExtensions.cs | 18 +++++---- .../Communicator/GrpcExtensionsTests.cs | 37 +++++++++++++++++++ .../Tests/Editor/MLAgentsEditModeTest.cs | 13 ++++--- 3 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs diff --git a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs index 0a21fd38d1..e41353fc44 100644 --- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs +++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs @@ -26,10 +26,11 @@ public static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai) { var agentInfoProto = ai.ToAgentInfoProto(); - var agentActionProto = new AgentActionProto + var agentActionProto = new AgentActionProto(); + if(ai.storedVectorActions != null) { - VectorActions = { ai.storedVectorActions } - }; + agentActionProto.VectorActions.AddRange(ai.storedVectorActions); + } return new AgentInfoActionPairProto { @@ -95,12 +96,14 @@ public static BrainParametersProto ToProto(this BrainParameters bp, string name, var brainParametersProto = new BrainParametersProto { VectorActionSize = { bp.VectorActionSize }, - VectorActionSpaceType = - (SpaceTypeProto)bp.VectorActionSpaceType, + VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType, BrainName = name, IsTraining = isTraining }; - brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions); + if(bp.VectorActionDescriptions != null) + { + brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions); + } return brainParametersProto; } @@ -128,13 +131,14 @@ public static BrainParameters ToBrainParameters(this BrainParametersProto bpp) /// public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm) { + var demonstrationName = dm.demonstrationName ?? ""; var demoProto = new DemonstrationMetaProto { ApiVersion = DemonstrationMetaData.ApiVersion, MeanReward = dm.meanReward, NumberSteps = dm.numberSteps, NumberEpisodes = dm.numberEpisodes, - DemonstrationName = dm.demonstrationName + DemonstrationName = demonstrationName }; return demoProto; } diff --git a/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs new file mode 100644 index 0000000000..adca387790 --- /dev/null +++ b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs @@ -0,0 +1,37 @@ +using NUnit.Framework; +using UnityEngine; +using Unity.MLAgents.Policies; +using Unity.MLAgents.Demonstrations; +using Unity.MLAgents.Sensors; + +namespace Unity.MLAgents.Tests +{ + [TestFixture] + public class GrpcExtensionsTests + { + [Test] + public void TestDefaultBrainParametersToProto() + { + // Should be able to convert a default instance to proto. + var brain = new BrainParameters(); + brain.ToProto("foo", false); + } + + [Test] + public void TestDefaultAgentInfoToProto() + { + // Should be able to convert a default instance to proto. + var agentInfo = new AgentInfo(); + agentInfo.ToInfoActionPairProto(); + agentInfo.ToAgentInfoProto(); + } + + [Test] + public void TestDefaultDemonstrationMetaDataToProto() + { + // Should be able to convert a default instance to proto. + var demoMetaData = new DemonstrationMetaData(); + demoMetaData.ToProto(); + } + } +} diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs index 1a74313bd1..3b3e6ca1d4 100644 --- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs +++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs @@ -13,8 +13,10 @@ internal class TestPolicy : IPolicy { public Action OnRequestDecision; ObservationWriter m_ObsWriter = new ObservationWriter(); - public void RequestDecision(AgentInfo info, List sensors) { - foreach(var sensor in sensors){ + public void RequestDecision(AgentInfo info, List sensors) + { + foreach (var sensor in sensors) + { sensor.GetObservationProto(m_ObsWriter); } OnRequestDecision?.Invoke(); @@ -517,8 +519,10 @@ public void AssertStackingReset() agent1.SetPolicy(policy); StackingSensor sensor = null; - foreach(ISensor s in agent1.sensors){ - if (s is StackingSensor){ + foreach (ISensor s in agent1.sensors) + { + if (s is StackingSensor) + { sensor = s as StackingSensor; } } @@ -529,7 +533,6 @@ public void AssertStackingReset() { agent1.RequestDecision(); aca.EnvironmentStep(); - } policy.OnRequestDecision = () => SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f}); From 67607ec9a6ea3ee65318736dff6a422f7ab9197f Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 15 May 2020 12:46:29 -0700 Subject: [PATCH 6/6] Add memory_size hyperparameter --- docs/Learning-Environment-Create-New.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md index df4deef039..e32a9b9510 100644 --- a/docs/Learning-Environment-Create-New.md +++ b/docs/Learning-Environment-Create-New.md @@ -413,6 +413,7 @@ RollerBall: learning_rate: 3.0e-4 learning_rate_schedule: linear max_steps: 5.0e4 + memory_size: 128 normalize: false num_epoch: 3 num_layers: 2