Use tanh squash in PPO trainer

Ervin Teng · Ervin Teng · commit 7d10fd74f76c · 2020-03-09T18:19:08.000-07:00
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -221,6 +221,7 @@ def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
             self.load,
             condition_sigma_on_obs=False,  # Faster training for PPO
             create_tf_graph=False,  # We will create the TF graph in the Optimizer
+            tanh_squash=True,
         )
 
         return policy

Original file line number	Diff line number	Diff line change
`@@ -221,6 +221,7 @@ def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:`
`221`	`221`	`self.load,`
`222`	`222`	`condition_sigma_on_obs=False, # Faster training for PPO`
`223`	`223`	`create_tf_graph=False, # We will create the TF graph in the Optimizer`
	`224`	`+ tanh_squash=True,`
`224`	`225`	`)`
`225`	`226`
`226`	`227`	`return policy`