diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs index 053072d785..c6ac5988a2 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs @@ -60,7 +60,7 @@ public void TestStoreInitalize() reward = 1f, actionMasks = new[] { false, true }, done = true, - id = 5, + episodeId = 5, maxStepReached = true, storedVectorActions = new[] { 0f, 1f }, }; diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs index 6f0dab9c86..6f97ebf210 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs @@ -20,10 +20,6 @@ public AgentInfo _Info } } - public bool IsDone() - { - return (bool)typeof(Agent).GetField("m_Done", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this); - } public int initializeAgentCalls; public int collectObservationsCalls; public int agentActionCalls; @@ -191,8 +187,6 @@ public void TestAgent() agentGo2.AddComponent(); var agent2 = agentGo2.GetComponent(); - Assert.AreEqual(false, agent1.IsDone()); - Assert.AreEqual(false, agent2.IsDone()); Assert.AreEqual(0, agent1.agentResetCalls); Assert.AreEqual(0, agent2.agentResetCalls); Assert.AreEqual(0, agent1.initializeAgentCalls); @@ -206,8 +200,6 @@ public void TestAgent() agentEnableMethod?.Invoke(agent2, new object[] { }); agentEnableMethod?.Invoke(agent1, new object[] { }); - Assert.AreEqual(false, agent1.IsDone()); - Assert.AreEqual(false, agent2.IsDone()); // agent1 was not enabled when the academy started // The agents have been initialized Assert.AreEqual(0, agent1.agentResetCalls); @@ -422,18 +414,14 @@ public void TestAgent() if (i % 11 == 5) { agent1.Done(); + numberAgent1Reset += 1; } // Resetting agent 2 regularly if (i % 13 == 3) { - if (!(agent2.IsDone())) - { - // If the agent was already reset before the request decision - // We should not reset again - agent2.Done(); - numberAgent2Reset += 1; - agent2StepSinceReset = 0; - } + agent2.Done(); + numberAgent2Reset += 1; + agent2StepSinceReset = 0; } // Request a decision for agent 2 regularly if (i % 3 == 2) @@ -445,16 +433,9 @@ public void TestAgent() // Request an action without decision regularly agent2.RequestAction(); } - if (agent1.IsDone()) - { - numberAgent1Reset += 1; - } acaStepsSinceReset += 1; agent2StepSinceReset += 1; - //Agent 1 is only initialized at step 2 - if (i < 2) - { } aca.EnvironmentStep(); } } @@ -500,19 +481,23 @@ public void TestCumulativeReward() var j = 0; for (var i = 0; i < 500; i++) { + if (i % 20 == 0) + { + j = 0; + } + else + { + j++; + } agent2.RequestAction(); - Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f); + Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f); Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f); - - aca.EnvironmentStep(); agent1.AddReward(10f); + aca.EnvironmentStep(); + + - if ((i % 21 == 0) && (i > 0)) - { - j = 0; - } - j++; } } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs index f17f66aab3..08af88c8cb 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs @@ -119,11 +119,6 @@ public bool IsCommunicatorOn // in addition to aligning on the step count of the global episode. public event System.Action AgentSetStatus; - // Signals to all the agents at each environment step so they can reset - // if their flag has been set to done (assuming the agent has requested a - // decision). - public event System.Action AgentResetIfDone; - // Signals to all the agents at each environment step so they can send // their state to their Policy if they have requested a decision. public event System.Action AgentSendState; @@ -314,7 +309,6 @@ void ResetActions() DecideAction = () => { }; DestroyAction = () => { }; AgentSetStatus = i => { }; - AgentResetIfDone = () => { }; AgentSendState = () => { }; AgentAct = () => { }; AgentForceReset = () => { }; @@ -392,10 +386,6 @@ public void EnvironmentStep() AgentSetStatus?.Invoke(m_StepCount); - using (TimerStack.Instance.Scoped("AgentResetIfDone")) - { - AgentResetIfDone?.Invoke(); - } using (TimerStack.Instance.Scoped("AgentSendState")) { diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs index 25a6875467..e138eea921 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs @@ -41,10 +41,10 @@ public struct AgentInfo public bool maxStepReached; /// - /// Unique identifier each agent receives at initialization. It is used + /// Episode identifier each agent receives at every reset. It is used /// to separate between different agents in the environment. /// - public int id; + public int episodeId; } /// @@ -148,13 +148,6 @@ public abstract class Agent : MonoBehaviour /// Whether or not the agent requests a decision. bool m_RequestDecision; - /// Whether or not the agent has completed the episode. This may be due - /// to either reaching a success or fail state, or reaching the maximum - /// number of steps (i.e. timing out). - bool m_Done; - - /// Whether or not the agent reached the maximum number of steps. - bool m_MaxStepReached; /// Keeps track of the number of steps taken by the agent in this episode. /// Note that this value is different for each agent, and may not overlap @@ -162,9 +155,10 @@ public abstract class Agent : MonoBehaviour /// their own experience. int m_StepCount; - /// Unique identifier each agent receives at initialization. It is used + /// Episode identifier each agent receives. It is used /// to separate between different agents in the environment. - int m_Id; + /// This Id will be changed every time the Agent resets. + int m_EpisodeId; /// Keeps track of the actions that are masked at each step. ActionMasker m_ActionMasker; @@ -190,7 +184,7 @@ public abstract class Agent : MonoBehaviour /// becomes enabled or active. void OnEnable() { - m_Id = gameObject.GetInstanceID(); + m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); OnEnableHelper(); m_Recorder = GetComponent(); @@ -204,7 +198,6 @@ void OnEnableHelper() m_Action = new AgentAction(); sensors = new List(); - Academy.Instance.AgentResetIfDone += ResetIfDone; Academy.Instance.AgentSendState += SendInfo; Academy.Instance.DecideAction += DecideAction; Academy.Instance.AgentAct += AgentStep; @@ -224,7 +217,6 @@ void OnDisable() // We don't want to even try, because this will lazily create a new Academy! if (Academy.IsInitialized) { - Academy.Instance.AgentResetIfDone -= ResetIfDone; Academy.Instance.AgentSendState -= SendInfo; Academy.Instance.DecideAction -= DecideAction; Academy.Instance.AgentAct -= AgentStep; @@ -234,12 +226,20 @@ void OnDisable() m_Brain?.Dispose(); } - void NotifyAgentDone() + void NotifyAgentDone(bool maxStepReached = false) { + m_Info.reward = m_Reward; m_Info.done = true; + m_Info.maxStepReached = maxStepReached; // Request the last decision with no callbacks - // We request a decision so Python knows the Agent is disabled + // We request a decision so Python knows the Agent is done immediately m_Brain?.RequestDecision(m_Info, sensors, (a) => { }); + // The Agent is done, so we give it a new episode Id + m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); + m_Reward = 0f; + m_CumulativeReward = 0f; + m_RequestAction = false; + m_RequestDecision = false; } /// @@ -322,7 +322,9 @@ public float GetCumulativeReward() /// public void Done() { - m_Done = true; + NotifyAgentDone(); + _AgentReset(); + } /// @@ -342,28 +344,6 @@ public void RequestAction() m_RequestAction = true; } - /// - /// Indicates if the agent has reached his maximum number of steps. - /// - /// - /// true, if max step reached was reached, false otherwise. - /// - public bool IsMaxStepReached() - { - return m_MaxStepReached; - } - - /// - /// Indicates if the agent is done - /// - /// - /// true, if the agent is done, false otherwise. - /// - public bool IsDone() - { - return m_Done; - } - /// Helper function that resets all the data structures associated with /// the agent. Typically used when the agent is being initialized or reset /// at the end of an episode. @@ -489,9 +469,9 @@ void SendInfoToBrain() m_Info.actionMasks = m_ActionMasker.GetMask(); m_Info.reward = m_Reward; - m_Info.done = m_Done; - m_Info.maxStepReached = m_MaxStepReached; - m_Info.id = m_Id; + m_Info.done = false; + m_Info.maxStepReached = false; + m_Info.episodeId = m_EpisodeId; m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction); @@ -742,31 +722,16 @@ protected float ScaleAction(float rawAction, float min, float max) } - /// Signals the agent that it must reset if its done flag is set to true. - void ResetIfDone() - { - if (m_Done) - { - _AgentReset(); - } - } - /// /// Signals the agent that it must sent its decision to the brain. /// void SendInfo() { // If the Agent is done, it has just reset and thus requires a new decision - if (m_RequestDecision || m_Done) + if (m_RequestDecision) { SendInfoToBrain(); m_Reward = 0f; - if (m_Done) - { - m_CumulativeReward = 0f; - } - m_Done = false; - m_MaxStepReached = false; m_RequestDecision = false; } } @@ -774,19 +739,21 @@ void SendInfo() /// Used by the brain to make the agent perform a step. void AgentStep() { + if ((m_StepCount >= maxStep - 1) && (maxStep > 0)) + { + NotifyAgentDone(true); + _AgentReset(); + + } + else + { + m_StepCount += 1; + } if ((m_RequestAction) && (m_Brain != null)) { m_RequestAction = false; AgentAction(m_Action.vectorActions); } - - if ((m_StepCount >= maxStep) && (maxStep > 0)) - { - m_MaxStepReached = true; - Done(); - } - - m_StepCount += 1; } void DecideAction() diff --git a/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs new file mode 100644 index 0000000000..aed7431cd1 --- /dev/null +++ b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs @@ -0,0 +1,11 @@ +namespace MLAgents +{ + public static class EpisodeIdCounter + { + private static int Counter; + public static int GetEpisodeId() + { + return Counter++; + } + } +} \ No newline at end of file diff --git a/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta new file mode 100644 index 0000000000..c377f5004b --- /dev/null +++ b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 847786b7bcf9d4817b3f3879d57517c7 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs index f8c84223fe..4df7e18854 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs @@ -43,7 +43,7 @@ public static AgentInfoProto ToAgentInfoProto(this AgentInfo ai) Reward = ai.reward, MaxStepReached = ai.maxStepReached, Done = ai.done, - Id = ai.id, + Id = ai.episodeId, }; if (ai.actionMasks != null) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs index 1a1a4e3bbc..7b8717c683 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs @@ -275,7 +275,7 @@ public void PutObservations(string brainKey, AgentInfo info, List senso { m_ActionCallbacks[brainKey] = new List(); } - m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.id, Callback = action }); + m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.episodeId, Callback = action }); } /// diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs index b3b2cb4ce3..ac5a903dad 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs @@ -165,9 +165,9 @@ public void Generate( if (info.done) { - m_Memories.Remove(info.id); + m_Memories.Remove(info.episodeId); } - if (!m_Memories.TryGetValue(info.id, out memory)) + if (!m_Memories.TryGetValue(info.episodeId, out memory)) { for (var j = 0; j < memorySize; j++) { @@ -221,9 +221,9 @@ public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable memory; if (info.done) { - m_Memories.Remove(info.id); + m_Memories.Remove(info.episodeId); } - if (!m_Memories.TryGetValue(info.id, out memory)) + if (!m_Memories.TryGetValue(info.episodeId, out memory)) { for (var j = 0; j < memorySize; j++) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs index 10107c4fde..3a85090b62 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs @@ -130,7 +130,7 @@ public void PutObservations(AgentInfo info, List sensors, Action None: + """ + Removes the data for an Agent. + """ + del self.experience_buffers[global_id] + del self.last_take_action_outputs[global_id] + del self.episode_steps[global_id] + del self.episode_rewards[global_id] + del self.last_step_result[global_id] + self.policy.remove_previous_action([global_id]) + self.policy.remove_memories([global_id]) + def publish_trajectory_queue( self, trajectory_queue: "AgentManagerQueue[Trajectory]" ) -> None: