diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 6dc83100f2..ca921e26f2 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -2,7 +2,7 @@ behaviors: 3DBallHard: trainer_type: ppo hyperparameters: - batch_size: 1200 + batch_size: 120 buffer_size: 12000 learning_rate: 0.0003 beta: 0.001 @@ -17,10 +17,10 @@ behaviors: vis_encode_type: simple reward_signals: extrinsic: - gamma: 0.995 + gamma: 0.99 strength: 1.0 keep_checkpoints: 5 - max_steps: 5000000 + max_steps: 500000 time_horizon: 1000 summary_freq: 12000 threaded: true