diff --git a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab index 34bf31e657..f64724101b 100644 --- a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab +++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab @@ -2178,17 +2178,18 @@ MonoBehaviour: m_Name: m_EditorClassIdentifier: m_BrainParameters: - vectorObservationSize: 4 - numStackedVectorObservations: 1 - vectorActionSize: 03000000030000000300000002000000 - vectorActionDescriptions: [] - vectorActionSpaceType: 0 + VectorObservationSize: 4 + NumStackedVectorObservations: 1 + VectorActionSize: 03000000030000000300000002000000 + VectorActionDescriptions: [] + VectorActionSpaceType: 0 m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3} m_InferenceDevice: 0 m_BehaviorType: 0 m_BehaviorName: FoodCollector TeamId: 0 m_UseChildSensors: 1 + m_ObservableAttributeHandling: 0 --- !u!114 &114176228333253036 MonoBehaviour: m_ObjectHideFlags: 0 @@ -2204,7 +2205,7 @@ MonoBehaviour: agentParameters: maxStep: 0 hasUpgradedFromAgentParameters: 1 - maxStep: 5000 + MaxStep: 5000 area: {fileID: 1819751139121548} turnSpeed: 300 moveSpeed: 2 @@ -2213,7 +2214,7 @@ MonoBehaviour: goodMaterial: {fileID: 2100000, guid: c67450f290f3e4897bc40276a619e78d, type: 2} frozenMaterial: {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2} myLaser: {fileID: 1081721624670010} - contribute: 0 + contribute: 1 useVectorObs: 1 --- !u!114 &114725457980523372 MonoBehaviour: @@ -2260,7 +2261,6 @@ MonoBehaviour: m_EditorClassIdentifier: DecisionPeriod: 5 TakeActionsBetweenDecisions: 1 - offsetStep: 0 --- !u!114 &1222199865870203693 MonoBehaviour: m_ObjectHideFlags: 0 @@ -2273,6 +2273,7 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3} m_Name: m_EditorClassIdentifier: + debugCommandLineOverride: --- !u!1 &1482701732800114 GameObject: m_ObjectHideFlags: 0 @@ -2517,17 +2518,18 @@ MonoBehaviour: m_Name: m_EditorClassIdentifier: m_BrainParameters: - vectorObservationSize: 4 - numStackedVectorObservations: 1 - vectorActionSize: 03000000030000000300000002000000 - vectorActionDescriptions: [] - vectorActionSpaceType: 0 + VectorObservationSize: 4 + NumStackedVectorObservations: 1 + VectorActionSize: 03000000030000000300000002000000 + VectorActionDescriptions: [] + VectorActionSpaceType: 0 m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3} m_InferenceDevice: 0 m_BehaviorType: 0 m_BehaviorName: FoodCollector TeamId: 0 m_UseChildSensors: 1 + m_ObservableAttributeHandling: 0 --- !u!114 &114711827726849508 MonoBehaviour: m_ObjectHideFlags: 0 @@ -2543,7 +2545,7 @@ MonoBehaviour: agentParameters: maxStep: 0 hasUpgradedFromAgentParameters: 1 - maxStep: 5000 + MaxStep: 5000 area: {fileID: 1819751139121548} turnSpeed: 300 moveSpeed: 2 @@ -2599,7 +2601,6 @@ MonoBehaviour: m_EditorClassIdentifier: DecisionPeriod: 5 TakeActionsBetweenDecisions: 1 - offsetStep: 0 --- !u!1 &1528397385587768 GameObject: m_ObjectHideFlags: 0 @@ -2848,17 +2849,18 @@ MonoBehaviour: m_Name: m_EditorClassIdentifier: m_BrainParameters: - vectorObservationSize: 4 - numStackedVectorObservations: 1 - vectorActionSize: 03000000030000000300000002000000 - vectorActionDescriptions: [] - vectorActionSpaceType: 0 + VectorObservationSize: 4 + NumStackedVectorObservations: 1 + VectorActionSize: 03000000030000000300000002000000 + VectorActionDescriptions: [] + VectorActionSpaceType: 0 m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3} m_InferenceDevice: 0 m_BehaviorType: 0 m_BehaviorName: FoodCollector TeamId: 0 m_UseChildSensors: 1 + m_ObservableAttributeHandling: 0 --- !u!114 &114542632553128056 MonoBehaviour: m_ObjectHideFlags: 0 @@ -2874,7 +2876,7 @@ MonoBehaviour: agentParameters: maxStep: 0 hasUpgradedFromAgentParameters: 1 - maxStep: 5000 + MaxStep: 5000 area: {fileID: 1819751139121548} turnSpeed: 300 moveSpeed: 2 @@ -2930,7 +2932,6 @@ MonoBehaviour: m_EditorClassIdentifier: DecisionPeriod: 5 TakeActionsBetweenDecisions: 1 - offsetStep: 0 --- !u!1 &1617924810425504 GameObject: m_ObjectHideFlags: 0 @@ -3442,17 +3443,18 @@ MonoBehaviour: m_Name: m_EditorClassIdentifier: m_BrainParameters: - vectorObservationSize: 4 - numStackedVectorObservations: 1 - vectorActionSize: 03000000030000000300000002000000 - vectorActionDescriptions: [] - vectorActionSpaceType: 0 + VectorObservationSize: 4 + NumStackedVectorObservations: 1 + VectorActionSize: 03000000030000000300000002000000 + VectorActionDescriptions: [] + VectorActionSpaceType: 0 m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3} m_InferenceDevice: 0 m_BehaviorType: 0 m_BehaviorName: FoodCollector TeamId: 0 m_UseChildSensors: 1 + m_ObservableAttributeHandling: 0 --- !u!114 &114189751434580810 MonoBehaviour: m_ObjectHideFlags: 0 @@ -3468,7 +3470,7 @@ MonoBehaviour: agentParameters: maxStep: 0 hasUpgradedFromAgentParameters: 1 - maxStep: 5000 + MaxStep: 5000 area: {fileID: 1819751139121548} turnSpeed: 300 moveSpeed: 2 @@ -3524,7 +3526,6 @@ MonoBehaviour: m_EditorClassIdentifier: DecisionPeriod: 5 TakeActionsBetweenDecisions: 1 - offsetStep: 0 --- !u!1 &1688105343773098 GameObject: m_ObjectHideFlags: 0 @@ -3759,17 +3760,18 @@ MonoBehaviour: m_Name: m_EditorClassIdentifier: m_BrainParameters: - vectorObservationSize: 4 - numStackedVectorObservations: 1 - vectorActionSize: 03000000030000000300000002000000 - vectorActionDescriptions: [] - vectorActionSpaceType: 0 + VectorObservationSize: 4 + NumStackedVectorObservations: 1 + VectorActionSize: 03000000030000000300000002000000 + VectorActionDescriptions: [] + VectorActionSpaceType: 0 m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3} m_InferenceDevice: 0 m_BehaviorType: 0 m_BehaviorName: FoodCollector TeamId: 0 m_UseChildSensors: 1 + m_ObservableAttributeHandling: 0 --- !u!114 &114235147148547996 MonoBehaviour: m_ObjectHideFlags: 0 @@ -3785,7 +3787,7 @@ MonoBehaviour: agentParameters: maxStep: 0 hasUpgradedFromAgentParameters: 1 - maxStep: 5000 + MaxStep: 5000 area: {fileID: 1819751139121548} turnSpeed: 300 moveSpeed: 2 @@ -3841,7 +3843,6 @@ MonoBehaviour: m_EditorClassIdentifier: DecisionPeriod: 5 TakeActionsBetweenDecisions: 1 - offsetStep: 0 --- !u!1 &1729825611722018 GameObject: m_ObjectHideFlags: 0 diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 8f8deb1af6..8bce6addc6 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -9,8 +9,15 @@ and this project adheres to ## [Unreleased] ### Major Changes +#### com.unity.ml-agents (C#) +#### ml-agents / ml-agents-envs / gym-unity (Python) ### Minor Changes +#### com.unity.ml-agents (C#) +#### ml-agents / ml-agents-envs / gym-unity (Python) +- StatsSideChannel now stores multiple values per key. This means that multiple +calls to `StatsRecorder.Add()` with the same key in the same step will no +longer overwrite each other. (#4236) - Model checkpoints are now also saved as .nn files during training. (#4127) - Model checkpoint info is saved in TrainingStatus.json after training is concluded (#4127) @@ -20,6 +27,7 @@ and this project adheres to recursively (for example, by an Agent's CollectObservations method). Previously, this would result in an infinite loop and cause the editor to hang. (#4226) +#### ml-agents / ml-agents-envs / gym-unity (Python) ## [1.2.0-preview] - 2020-07-15 diff --git a/com.unity.ml-agents/Runtime/StatsRecorder.cs b/com.unity.ml-agents/Runtime/StatsRecorder.cs index 1ca520050a..96e4e6a29d 100644 --- a/com.unity.ml-agents/Runtime/StatsRecorder.cs +++ b/com.unity.ml-agents/Runtime/StatsRecorder.cs @@ -9,7 +9,6 @@ public enum StatAggregationMethod { /// /// Values within the summary period are averaged before reporting. - /// Note that values from the same C# environment in the same step may replace each other. /// Average = 0, diff --git a/docs/Python-API.md b/docs/Python-API.md index 45efc2f77c..648f9c131b 100644 --- a/docs/Python-API.md +++ b/docs/Python-API.md @@ -67,7 +67,7 @@ in python, run: from mlagents_envs.environment import UnityEnvironment # This is a non-blocking call that only loads the environment. env = UnityEnvironment(file_name="3DBall", seed=1, side_channels=[]) -# Start interacting with the evironment. +# Start interacting with the environment. env.reset() behavior_names = env.behavior_specs.keys() ... diff --git a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py index b332e693cd..4d8a481f69 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py @@ -1,7 +1,9 @@ -from mlagents_envs.side_channel import SideChannel, IncomingMessage import uuid -from typing import Dict, Tuple +from typing import Tuple, List, Mapping from enum import Enum +from collections import defaultdict + +from mlagents_envs.side_channel import SideChannel, IncomingMessage # Determines the behavior of how multiple stats within the same summary period are combined. @@ -13,6 +15,10 @@ class StatsAggregationMethod(Enum): MOST_RECENT = 1 +StatList = List[Tuple[float, StatsAggregationMethod]] +EnvironmentStats = Mapping[str, StatList] + + class StatsSideChannel(SideChannel): """ Side channel that receives (string, float) pairs from the environment, so that they can eventually @@ -24,7 +30,7 @@ def __init__(self) -> None: # UUID('a1d8f7b7-cec8-50f9-b78b-d3e165a78520') super().__init__(uuid.UUID("a1d8f7b7-cec8-50f9-b78b-d3e165a78520")) - self.stats: Dict[str, Tuple[float, StatsAggregationMethod]] = {} + self.stats: EnvironmentStats = defaultdict(list) def on_message_received(self, msg: IncomingMessage) -> None: """ @@ -36,13 +42,13 @@ def on_message_received(self, msg: IncomingMessage) -> None: val = msg.read_float32() agg_type = StatsAggregationMethod(msg.read_int32()) - self.stats[key] = (val, agg_type) + self.stats[key].append((val, agg_type)) - def get_and_reset_stats(self) -> Dict[str, Tuple[float, StatsAggregationMethod]]: + def get_and_reset_stats(self) -> EnvironmentStats: """ Returns the current stats, and resets the internal storage of the stats. :return: """ s = self.stats - self.stats = {} + self.stats = defaultdict(list) return s diff --git a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py index 1667bb44cb..14e46d1916 100644 --- a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py +++ b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py @@ -253,6 +253,6 @@ def test_stats_channel(): stats = receiver.get_and_reset_stats() assert len(stats) == 1 - val, method = stats["stats-1"] + val, method = stats["stats-1"][0] assert val - 42.0 < 1e-8 assert method == StatsAggregationMethod.MOST_RECENT diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py index 05837be730..63f8561bc3 100644 --- a/ml-agents/mlagents/trainers/agent_processor.py +++ b/ml-agents/mlagents/trainers/agent_processor.py @@ -9,7 +9,10 @@ TerminalSteps, TerminalStep, ) -from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod +from mlagents_envs.side_channel.stats_side_channel import ( + StatsAggregationMethod, + EnvironmentStats, +) from mlagents.trainers.trajectory import Trajectory, AgentExperience from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.policy import Policy @@ -306,7 +309,7 @@ def __init__( self.publish_trajectory_queue(self.trajectory_queue) def record_environment_stats( - self, env_stats: Dict[str, Tuple[float, StatsAggregationMethod]], worker_id: int + self, env_stats: EnvironmentStats, worker_id: int ) -> None: """ Pass stats from the environment to the StatsReporter. @@ -316,11 +319,12 @@ def record_environment_stats( :param worker_id: :return: """ - for stat_name, (val, agg_type) in env_stats.items(): - if agg_type == StatsAggregationMethod.AVERAGE: - self.stats_reporter.add_stat(stat_name, val) - elif agg_type == StatsAggregationMethod.MOST_RECENT: - # In order to prevent conflicts between multiple environments, - # only stats from the first environment are recorded. - if worker_id == 0: - self.stats_reporter.set_stat(stat_name, val) + for stat_name, value_list in env_stats.items(): + for val, agg_type in value_list: + if agg_type == StatsAggregationMethod.AVERAGE: + self.stats_reporter.add_stat(stat_name, val) + elif agg_type == StatsAggregationMethod.MOST_RECENT: + # In order to prevent conflicts between multiple environments, + # only stats from the first environment are recorded. + if worker_id == 0: + self.stats_reporter.set_stat(stat_name, val) diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py index c96555cdaf..e50fd1ce24 100644 --- a/ml-agents/mlagents/trainers/env_manager.py +++ b/ml-agents/mlagents/trainers/env_manager.py @@ -6,7 +6,7 @@ BehaviorSpec, BehaviorName, ) -from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod +from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue @@ -23,7 +23,7 @@ class EnvironmentStep(NamedTuple): current_all_step_result: AllStepResult worker_id: int brain_name_to_action_info: Dict[BehaviorName, ActionInfo] - environment_stats: Dict[str, Tuple[float, StatsAggregationMethod]] + environment_stats: EnvironmentStats @property def name_behavior_ids(self) -> Iterable[BehaviorName]: diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 08b0c3402f..c688beaa57 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -1,4 +1,4 @@ -from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set, Tuple +from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set import cloudpickle import enum @@ -33,7 +33,7 @@ ) from mlagents_envs.side_channel.stats_side_channel import ( StatsSideChannel, - StatsAggregationMethod, + EnvironmentStats, ) from mlagents_envs.side_channel.side_channel import SideChannel @@ -64,7 +64,7 @@ class EnvironmentResponse(NamedTuple): class StepResponse(NamedTuple): all_step_result: AllStepResult timer_root: Optional[TimerNode] - environment_stats: Dict[str, Tuple[float, StatsAggregationMethod]] + environment_stats: EnvironmentStats class UnityEnvWorker: diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py index eff55d82d8..26da308d66 100644 --- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py +++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py @@ -246,12 +246,12 @@ def test_agent_manager_stats(): all_env_stats = [ { - "averaged": (1.0, StatsAggregationMethod.AVERAGE), - "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT), + "averaged": [(1.0, StatsAggregationMethod.AVERAGE)], + "most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)], }, { - "averaged": (3.0, StatsAggregationMethod.AVERAGE), - "most_recent": (4.0, StatsAggregationMethod.MOST_RECENT), + "averaged": [(3.0, StatsAggregationMethod.AVERAGE)], + "most_recent": [(4.0, StatsAggregationMethod.MOST_RECENT)], }, ] for env_stats in all_env_stats: