From 5e5250d8cca3441b77d173f2cc560c98edadf57f Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 8 Jul 2020 21:27:07 -0700 Subject: [PATCH] better logging for NaN rewards --- .../Examples/Crawler/Scripts/CrawlerAgent.cs | 25 ++++++++++++-- .../Examples/Walker/Scripts/WalkerAgent.cs | 33 ++++++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs b/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs index c77db9a0df..e0b7951833 100644 --- a/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs +++ b/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs @@ -1,7 +1,9 @@ +using System; using UnityEngine; using Unity.MLAgents; using Unity.MLAgentsExamples; using Unity.MLAgents.Sensors; +using Random = UnityEngine.Random; [RequireComponent(typeof(JointDriveController))] // Required to set joint forces public class CrawlerAgent : Agent @@ -111,7 +113,7 @@ public override void CollectObservations(VectorSensor sensor) { //Add body rotation delta relative to orientation cube sensor.AddObservation(Quaternion.FromToRotation(body.forward, orientationCube.transform.forward)); - + //Add pos of target relative to orientation cube sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position)); @@ -211,7 +213,15 @@ void RewardFunctionMovingTowards() { var movingTowardsDot = Vector3.Dot(orientationCube.transform.forward, Vector3.ClampMagnitude(m_JdController.bodyPartsDict[body].rb.velocity, maximumWalkingSpeed)); - ; + if (float.IsNaN(movingTowardsDot)) + { + throw new ArgumentException( + "NaN in movingTowardsDot.\n" + + $" orientationCube.transform.forward: {orientationCube.transform.forward}\n"+ + $" body.velocity: {m_JdController.bodyPartsDict[body].rb.velocity}\n"+ + $" maximumWalkingSpeed: {maximumWalkingSpeed}" + ); + } AddReward(0.03f * movingTowardsDot); } @@ -220,7 +230,16 @@ void RewardFunctionMovingTowards() /// void RewardFunctionFacingTarget() { - AddReward(0.01f * Vector3.Dot(orientationCube.transform.forward, body.forward)); + var facingReward = Vector3.Dot(orientationCube.transform.forward, body.forward); + if (float.IsNaN(facingReward)) + { + throw new ArgumentException( + "NaN in movingTowardsDot.\n" + + $" orientationCube.transform.forward: {orientationCube.transform.forward}\n"+ + $" body.forward: {body.forward}" + ); + } + AddReward(0.01f * facingReward); } /// diff --git a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs index e6ddb9a71f..d42c946594 100644 --- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs +++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs @@ -1,9 +1,11 @@ +using System; using MLAgentsExamples; using UnityEngine; using Unity.MLAgents; using Unity.MLAgentsExamples; using Unity.MLAgents.Sensors; using BodyPart = Unity.MLAgentsExamples.BodyPart; +using Random = UnityEngine.Random; public class WalkerAgent : Agent { @@ -171,11 +173,40 @@ void FixedUpdate() // a. Velocity alignment with goal direction. var moveTowardsTargetReward = Vector3.Dot(cubeForward, Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed)); + if (float.IsNaN(moveTowardsTargetReward)) + { + throw new ArgumentException( + "NaN in moveTowardsTargetReward.\n" + + $" cubeForward: {cubeForward}\n"+ + $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n"+ + $" maximumWalkingSpeed: {maximumWalkingSpeed}" + ); + } + // b. Rotation alignment with goal direction. var lookAtTargetReward = Vector3.Dot(cubeForward, head.forward); + if (float.IsNaN(lookAtTargetReward)) + { + throw new ArgumentException( + "NaN in lookAtTargetReward.\n" + + $" cubeForward: {cubeForward}\n"+ + $" head.forward: {head.forward}" + ); + } + // c. Encourage head height. //Should normalize to ~1 - var headHeightOverFeetReward = + var headHeightOverFeetReward = ((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10); + if (float.IsNaN(headHeightOverFeetReward)) + { + throw new ArgumentException( + "NaN in headHeightOverFeetReward.\n" + + $" head.position: {head.position}\n"+ + $" footL.position: {footL.position}\n"+ + $" footR.position: {footR.position}" + ); + } + AddReward( + 0.02f * moveTowardsTargetReward + 0.02f * lookAtTargetReward