diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 4afdfa9f3d..001939ec89 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -204,7 +204,9 @@ BehaviorPPO: normalize: false num_layers: 2 time_horizon: 64 + summary_freq: 10000 vis_encoder_type: simple + init_path: null # PPO-specific configs beta: 5.0e-3 @@ -226,7 +228,6 @@ BehaviorPPO: batch_size: 512 num_epoch: 3 samples_per_update: 0 - init_path: reward_signals: # environment reward @@ -239,7 +240,7 @@ BehaviorPPO: strength: 0.02 gamma: 0.99 encoding_size: 256 - learning_rate: 3e-4 + learning_rate: 3.0e-4 # GAIL gail: @@ -247,7 +248,7 @@ BehaviorPPO: gamma: 0.99 encoding_size: 128 demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo - learning_rate: 3e-4 + learning_rate: 3.0e-4 use_actions: false use_vail: false