From 6ae29209c0099d976dbfc8487e8ae3ea6a34ca9e Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 3 Sep 2020 16:38:52 -0700 Subject: [PATCH 1/3] allow ending the episode for MaxStepsReached --- com.unity.ml-agents/Runtime/Agent.cs | 35 ++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index 2543888b3a..472d58d395 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -445,7 +445,7 @@ public void LazyInitialize() enum DoneReason { /// - /// The method was called. + /// The episode was ended manually by calling . /// DoneCalled, @@ -691,10 +691,41 @@ void UpdateRewardStats() /// /// Sets the done flag to true and resets the agent. /// + /// + /// This should be used when the episode can no longer continue, such as when the Agent + /// reaches the goal or fails at the task. + /// /// + /// public void EndEpisode() { - NotifyAgentDone(DoneReason.DoneCalled); + EndEpisodeAndReset(DoneReason.DoneCalled); + } + + /// + /// Indicate that the episode has reached a maximum number of steps. + /// This has the same end result as calling , but has a + /// slightly different effect on training. + /// + /// + /// This should be used when the episode could continue, but has gone on for + /// a sufficient number of steps. + /// + /// + /// + public void EpisodeMaxStepReached() + { + EndEpisodeAndReset(DoneReason.MaxStepReached); + } + + + /// + /// Internal method to end the episode and reset the Agent. + /// + /// + void EndEpisodeAndReset(DoneReason reason) + { + NotifyAgentDone(reason); _AgentReset(); } From 44fe5595b337e099d63dd85f3908704997422d6e Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 3 Sep 2020 16:51:45 -0700 Subject: [PATCH 2/3] changelog --- com.unity.ml-agents/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index a07545592a..94f2241a09 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -19,6 +19,9 @@ and this project adheres to - Enabled C# formatting using `dotnet-format`. (#4362) - GridSensor was added to the com.unity.ml-agents.extensions package. Thank you to Jaden Travnik from Eidos Montreal for the contribution! (#4399) +- Added `Agent.EpisodeMaxStepReached()` to reset the agent when it has reached +a user-determined maximum number of steps. This behaves similarly to +`Agent.EndEpsiode()` but has a slightly different effect on training (#4453). #### ml-agents / ml-agents-envs / gym-unity (Python) - Experimental PyTorch support has been added. Use `--torch` when running `mlagents-learn`, or add `framework: pytorch` to your trainer configuration (under the behavior name) to enable it. From 93fec02317bee91ce55270eadfb8a7d219030747 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 9 Sep 2020 09:58:30 -0700 Subject: [PATCH 3/3] rename and update docs --- com.unity.ml-agents/CHANGELOG.md | 6 +++--- com.unity.ml-agents/Runtime/Agent.cs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 94f2241a09..5e9eff9ce0 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -19,9 +19,9 @@ and this project adheres to - Enabled C# formatting using `dotnet-format`. (#4362) - GridSensor was added to the com.unity.ml-agents.extensions package. Thank you to Jaden Travnik from Eidos Montreal for the contribution! (#4399) -- Added `Agent.EpisodeMaxStepReached()` to reset the agent when it has reached -a user-determined maximum number of steps. This behaves similarly to -`Agent.EndEpsiode()` but has a slightly different effect on training (#4453). +- Added `Agent.EpisodeInterrupted()`, which can be used to reset the agent when +it has reached a user-determined maximum number of steps. This behaves similarly +to `Agent.EndEpsiode()` but has a slightly different effect on training (#4453). #### ml-agents / ml-agents-envs / gym-unity (Python) - Experimental PyTorch support has been added. Use `--torch` when running `mlagents-learn`, or add `framework: pytorch` to your trainer configuration (under the behavior name) to enable it. diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index 472d58d395..af5a7c00f9 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -696,14 +696,14 @@ void UpdateRewardStats() /// reaches the goal or fails at the task. /// /// - /// + /// public void EndEpisode() { EndEpisodeAndReset(DoneReason.DoneCalled); } /// - /// Indicate that the episode has reached a maximum number of steps. + /// Indicate that the episode is over but not due to the "fault" of the Agent. /// This has the same end result as calling , but has a /// slightly different effect on training. /// @@ -713,7 +713,7 @@ public void EndEpisode() /// /// /// - public void EpisodeMaxStepReached() + public void EpisodeInterrupted() { EndEpisodeAndReset(DoneReason.MaxStepReached); }