diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
index 053072d785..c6ac5988a2 100644
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
@@ -60,7 +60,7 @@ public void TestStoreInitalize()
                 reward = 1f,
                 actionMasks = new[] { false, true },
                 done = true,
-                id = 5,
+                episodeId = 5,
                 maxStepReached = true,
                 storedVectorActions = new[] { 0f, 1f },
             };
diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
index 6f0dab9c86..6f97ebf210 100644
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
@@ -20,10 +20,6 @@ public AgentInfo _Info
             }
         }
 
-        public bool IsDone()
-        {
-            return (bool)typeof(Agent).GetField("m_Done", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
-        }
         public int initializeAgentCalls;
         public int collectObservationsCalls;
         public int agentActionCalls;
@@ -191,8 +187,6 @@ public void TestAgent()
             agentGo2.AddComponent<TestAgent>();
             var agent2 = agentGo2.GetComponent<TestAgent>();
 
-            Assert.AreEqual(false, agent1.IsDone());
-            Assert.AreEqual(false, agent2.IsDone());
             Assert.AreEqual(0, agent1.agentResetCalls);
             Assert.AreEqual(0, agent2.agentResetCalls);
             Assert.AreEqual(0, agent1.initializeAgentCalls);
@@ -206,8 +200,6 @@ public void TestAgent()
             agentEnableMethod?.Invoke(agent2, new object[] { });
             agentEnableMethod?.Invoke(agent1, new object[] { });
 
-            Assert.AreEqual(false, agent1.IsDone());
-            Assert.AreEqual(false, agent2.IsDone());
             // agent1 was not enabled when the academy started
             // The agents have been initialized
             Assert.AreEqual(0, agent1.agentResetCalls);
@@ -422,18 +414,14 @@ public void TestAgent()
                 if (i % 11 == 5)
                 {
                     agent1.Done();
+                    numberAgent1Reset += 1;
                 }
                 // Resetting agent 2 regularly
                 if (i % 13 == 3)
                 {
-                    if (!(agent2.IsDone()))
-                    {
-                        // If the agent was already reset before the request decision
-                        // We should not reset again
-                        agent2.Done();
-                        numberAgent2Reset += 1;
-                        agent2StepSinceReset = 0;
-                    }
+                    agent2.Done();
+                    numberAgent2Reset += 1;
+                    agent2StepSinceReset = 0;
                 }
                 // Request a decision for agent 2 regularly
                 if (i % 3 == 2)
@@ -445,16 +433,9 @@ public void TestAgent()
                     // Request an action without decision regularly
                     agent2.RequestAction();
                 }
-                if (agent1.IsDone())
-                {
-                    numberAgent1Reset += 1;
-                }
 
                 acaStepsSinceReset += 1;
                 agent2StepSinceReset += 1;
-                //Agent 1 is only initialized at step 2
-                if (i < 2)
-                { }
                 aca.EnvironmentStep();
             }
         }
@@ -500,19 +481,23 @@ public void TestCumulativeReward()
             var j = 0;
             for (var i = 0; i < 500; i++)
             {
+                if (i % 20 == 0)
+                {
+                    j = 0;
+                }
+                else
+                {
+                    j++;
+                }
                 agent2.RequestAction();
-                Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
+                Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f);
                 Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
 
-
-                aca.EnvironmentStep();
                 agent1.AddReward(10f);
+                aca.EnvironmentStep();
+
+
 
-                if ((i % 21 == 0) && (i > 0))
-                {
-                    j = 0;
-                }
-                j++;
             }
         }
     }
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
index f17f66aab3..08af88c8cb 100644
--- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
@@ -119,11 +119,6 @@ public bool IsCommunicatorOn
         // in addition to aligning on the step count of the global episode.
         public event System.Action<int> AgentSetStatus;
 
-        // Signals to all the agents at each environment step so they can reset
-        // if their flag has been set to done (assuming the agent has requested a
-        // decision).
-        public event System.Action AgentResetIfDone;
-
         // Signals to all the agents at each environment step so they can send
         // their state to their Policy if they have requested a decision.
         public event System.Action AgentSendState;
@@ -314,7 +309,6 @@ void ResetActions()
             DecideAction = () => { };
             DestroyAction = () => { };
             AgentSetStatus = i => { };
-            AgentResetIfDone = () => { };
             AgentSendState = () => { };
             AgentAct = () => { };
             AgentForceReset = () => { };
@@ -392,10 +386,6 @@ public void EnvironmentStep()
 
             AgentSetStatus?.Invoke(m_StepCount);
 
-            using (TimerStack.Instance.Scoped("AgentResetIfDone"))
-            {
-                AgentResetIfDone?.Invoke();
-            }
 
             using (TimerStack.Instance.Scoped("AgentSendState"))
             {
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
index 25a6875467..e138eea921 100644
--- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
@@ -41,10 +41,10 @@ public struct AgentInfo
         public bool maxStepReached;
 
         /// <summary>
-        /// Unique identifier each agent receives at initialization. It is used
+        /// Episode identifier each agent receives at every reset. It is used
         /// to separate between different agents in the environment.
         /// </summary>
-        public int id;
+        public int episodeId;
     }
 
     /// <summary>
@@ -148,13 +148,6 @@ public abstract class Agent : MonoBehaviour
         /// Whether or not the agent requests a decision.
         bool m_RequestDecision;
 
-        /// Whether or not the agent has completed the episode. This may be due
-        /// to either reaching a success or fail state, or reaching the maximum
-        /// number of steps (i.e. timing out).
-        bool m_Done;
-
-        /// Whether or not the agent reached the maximum number of steps.
-        bool m_MaxStepReached;
 
         /// Keeps track of the number of steps taken by the agent in this episode.
         /// Note that this value is different for each agent, and may not overlap
@@ -162,9 +155,10 @@ public abstract class Agent : MonoBehaviour
         /// their own experience.
         int m_StepCount;
 
-        /// Unique identifier each agent receives at initialization. It is used
+        /// Episode identifier each agent receives. It is used
         /// to separate between different agents in the environment.
-        int m_Id;
+        /// This Id will be changed every time the Agent resets.
+        int m_EpisodeId;
 
         /// Keeps track of the actions that are masked at each step.
         ActionMasker m_ActionMasker;
@@ -190,7 +184,7 @@ public abstract class Agent : MonoBehaviour
         /// becomes enabled or active.
         void OnEnable()
         {
-            m_Id = gameObject.GetInstanceID();
+            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
             OnEnableHelper();
 
             m_Recorder = GetComponent<DemonstrationRecorder>();
@@ -204,7 +198,6 @@ void OnEnableHelper()
             m_Action = new AgentAction();
             sensors = new List<ISensor>();
 
-            Academy.Instance.AgentResetIfDone += ResetIfDone;
             Academy.Instance.AgentSendState += SendInfo;
             Academy.Instance.DecideAction += DecideAction;
             Academy.Instance.AgentAct += AgentStep;
@@ -224,7 +217,6 @@ void OnDisable()
             // We don't want to even try, because this will lazily create a new Academy!
             if (Academy.IsInitialized)
             {
-                Academy.Instance.AgentResetIfDone -= ResetIfDone;
                 Academy.Instance.AgentSendState -= SendInfo;
                 Academy.Instance.DecideAction -= DecideAction;
                 Academy.Instance.AgentAct -= AgentStep;
@@ -234,12 +226,20 @@ void OnDisable()
             m_Brain?.Dispose();
         }
 
-        void NotifyAgentDone()
+        void NotifyAgentDone(bool maxStepReached = false)
         {
+            m_Info.reward = m_Reward;
             m_Info.done = true;
+            m_Info.maxStepReached = maxStepReached;
             // Request the last decision with no callbacks
-            // We request a decision so Python knows the Agent is disabled
+            // We request a decision so Python knows the Agent is done immediately
             m_Brain?.RequestDecision(m_Info, sensors, (a) => { });
+            // The Agent is done, so we give it a new episode Id
+            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
+            m_Reward = 0f;
+            m_CumulativeReward = 0f;
+            m_RequestAction = false;
+            m_RequestDecision = false;
         }
 
         /// <summary>
@@ -322,7 +322,9 @@ public float GetCumulativeReward()
         /// </summary>
         public void Done()
         {
-            m_Done = true;
+            NotifyAgentDone();
+            _AgentReset();
+
         }
 
         /// <summary>
@@ -342,28 +344,6 @@ public void RequestAction()
             m_RequestAction = true;
         }
 
-        /// <summary>
-        /// Indicates if the agent has reached his maximum number of steps.
-        /// </summary>
-        /// <returns>
-        /// <c>true</c>, if max step reached was reached, <c>false</c> otherwise.
-        /// </returns>
-        public bool IsMaxStepReached()
-        {
-            return m_MaxStepReached;
-        }
-
-        /// <summary>
-        /// Indicates if the agent is done
-        /// </summary>
-        /// <returns>
-        /// <c>true</c>, if the agent is done, <c>false</c> otherwise.
-        /// </returns>
-        public bool IsDone()
-        {
-            return m_Done;
-        }
-
         /// Helper function that resets all the data structures associated with
         /// the agent. Typically used when the agent is being initialized or reset
         /// at the end of an episode.
@@ -489,9 +469,9 @@ void SendInfoToBrain()
             m_Info.actionMasks = m_ActionMasker.GetMask();
 
             m_Info.reward = m_Reward;
-            m_Info.done = m_Done;
-            m_Info.maxStepReached = m_MaxStepReached;
-            m_Info.id = m_Id;
+            m_Info.done = false;
+            m_Info.maxStepReached = false;
+            m_Info.episodeId = m_EpisodeId;
 
             m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction);
 
@@ -742,31 +722,16 @@ protected float ScaleAction(float rawAction, float min, float max)
         }
 
 
-        /// Signals the agent that it must reset if its done flag is set to true.
-        void ResetIfDone()
-        {
-            if (m_Done)
-            {
-                _AgentReset();
-            }
-        }
-
         /// <summary>
         /// Signals the agent that it must sent its decision to the brain.
         /// </summary>
         void SendInfo()
         {
             // If the Agent is done, it has just reset and thus requires a new decision
-            if (m_RequestDecision || m_Done)
+            if (m_RequestDecision)
             {
                 SendInfoToBrain();
                 m_Reward = 0f;
-                if (m_Done)
-                {
-                    m_CumulativeReward = 0f;
-                }
-                m_Done = false;
-                m_MaxStepReached = false;
                 m_RequestDecision = false;
             }
         }
@@ -774,19 +739,21 @@ void SendInfo()
         /// Used by the brain to make the agent perform a step.
         void AgentStep()
         {
+            if ((m_StepCount >= maxStep - 1) && (maxStep > 0))
+            {
+                NotifyAgentDone(true);
+                _AgentReset();
+
+            }
+            else
+            {
+                m_StepCount += 1;
+            }
             if ((m_RequestAction) && (m_Brain != null))
             {
                 m_RequestAction = false;
                 AgentAction(m_Action.vectorActions);
             }
-
-            if ((m_StepCount >= maxStep) && (maxStep > 0))
-            {
-                m_MaxStepReached = true;
-                Done();
-            }
-
-            m_StepCount += 1;
         }
 
         void DecideAction()
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs
new file mode 100644
index 0000000000..aed7431cd1
--- /dev/null
+++ b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs
@@ -0,0 +1,11 @@
+namespace MLAgents
+{
+    public static class EpisodeIdCounter
+    {
+        private static int Counter;
+        public static int GetEpisodeId()
+        {
+            return Counter++;
+        }
+    }
+}
\ No newline at end of file
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta
new file mode 100644
index 0000000000..c377f5004b
--- /dev/null
+++ b/UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 847786b7bcf9d4817b3f3879d57517c7
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
index f8c84223fe..4df7e18854 100644
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
@@ -43,7 +43,7 @@ public static AgentInfoProto ToAgentInfoProto(this AgentInfo ai)
                 Reward = ai.reward,
                 MaxStepReached = ai.maxStepReached,
                 Done = ai.done,
-                Id = ai.id,
+                Id = ai.episodeId,
             };
 
             if (ai.actionMasks != null)
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
index 1a1a4e3bbc..7b8717c683 100644
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
@@ -275,7 +275,7 @@ public void PutObservations(string brainKey, AgentInfo info, List<ISensor> senso
             {
                 m_ActionCallbacks[brainKey] = new List<IdCallbackPair>();
             }
-            m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.id, Callback = action });
+            m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.episodeId, Callback = action });
         }
 
         /// <summary>
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
index b3b2cb4ce3..ac5a903dad 100644
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
@@ -165,9 +165,9 @@ public void Generate(
 
                 if (info.done)
                 {
-                    m_Memories.Remove(info.id);
+                    m_Memories.Remove(info.episodeId);
                 }
-                if (!m_Memories.TryGetValue(info.id, out memory))
+                if (!m_Memories.TryGetValue(info.episodeId, out memory))
                 {
                     for (var j = 0; j < memorySize; j++)
                     {
@@ -221,9 +221,9 @@ public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentIn
                 List<float> memory;
                 if (info.done)
                 {
-                    m_Memories.Remove(info.id);
+                    m_Memories.Remove(info.episodeId);
                 }
-                if (!m_Memories.TryGetValue(info.id, out memory))
+                if (!m_Memories.TryGetValue(info.episodeId, out memory))
                 {
 
                     for (var j = 0; j < memorySize; j++)
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs
index 10107c4fde..3a85090b62 100644
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs
@@ -130,7 +130,7 @@ public void PutObservations(AgentInfo info, List<ISensor> sensors, Action<AgentA
                 sensors = sensors
             });
 
-            m_ActionFuncs.Add(new AgentIdActionPair { action = action, agentId = info.id });
+            m_ActionFuncs.Add(new AgentIdActionPair { action = action, agentId = info.episodeId });
         }
         public void DecideBatch()
         {
diff --git a/docs/Migrating.md b/docs/Migrating.md
index a189f18719..e6d0b86e40 100644
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
@@ -25,6 +25,7 @@ The versions can be found in
 * The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
 * The `GetValueEstimate()` method on the Agent has been removed.
 * The `UpdateValueAction()` method on the Agent has been removed.
+* Calling `Done()` on the Agent will now reset it immediately and call the `AgentReset` virtual method. (This is to simplify the previous logic in which the Agent had to wait for the next `EnvironmentStep` to reset)
 
 ### Steps to Migrate
 * If you were not using `On Demand Decision` for your Agent, you **must** add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.
diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
index 0dae70e072..0be92b7458 100644
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
@@ -69,6 +69,7 @@ def add_experiences(
                 "Policy/Learning Rate", take_action_outputs["learning_rate"]
             )
 
+        terminated_agents: List[str] = []
         # Make unique agent_ids that are global across workers
         action_global_agent_ids = [
             get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids
@@ -154,8 +155,7 @@ def add_experiences(
                             "Environment/Episode Length",
                             self.episode_steps.get(global_id, 0),
                         )
-                        del self.episode_steps[global_id]
-                        del self.episode_rewards[global_id]
+                        terminated_agents += [global_id]
                 elif not curr_agent_step.done:
                     self.episode_steps[global_id] += 1
 
@@ -166,6 +166,21 @@ def add_experiences(
                 previous_action.agent_ids, take_action_outputs["action"]
             )
 
+        for terminated_id in terminated_agents:
+            self._clean_agent_data(terminated_id)
+
+    def _clean_agent_data(self, global_id: str) -> None:
+        """
+        Removes the data for an Agent.
+        """
+        del self.experience_buffers[global_id]
+        del self.last_take_action_outputs[global_id]
+        del self.episode_steps[global_id]
+        del self.episode_rewards[global_id]
+        del self.last_step_result[global_id]
+        self.policy.remove_previous_action([global_id])
+        self.policy.remove_memories([global_id])
+
     def publish_trajectory_queue(
         self, trajectory_queue: "AgentManagerQueue[Trajectory]"
     ) -> None: