Unity-Technologies · andrewcoh · Jul 7, 2020 · Jun 17, 2020 · Jun 17, 2020 · Jun 17, 2020
diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml
@@ -2,7 +2,7 @@ behaviors:
   3DBallHard:
     trainer_type: ppo
     hyperparameters:
-      batch_size: 1200
+      batch_size: 120
       buffer_size: 12000
       learning_rate: 0.0003
       beta: 0.001
@@ -17,10 +17,10 @@ behaviors:
       vis_encode_type: simple
     reward_signals:
       extrinsic:
-        gamma: 0.995
+        gamma: 0.99
         strength: 1.0
     keep_checkpoints: 5
-    max_steps: 5000000
+    max_steps: 500000
     time_horizon: 1000
     summary_freq: 12000
     threaded: true