Unity-Technologies · ervteng · Apr 30, 2020 · Apr 14, 2020 · Apr 14, 2020 · Apr 15, 2020
diff --git a/Project/Recordings/movie.mp4 b/Project/Recordings/movie.mp4
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -36,7 +36,10 @@ and this project adheres to
   communication between Unity and the Python process.
 - The obsolete `Agent` methods `GiveModel`, `Done`, `InitializeAgent`,
   `AgentAction` and `AgentReset` have been removed.
-- The GhostTrainer has been extended to support asymmetric games and the asymmetric example environment Strikers Vs. Goalie has been added.
+- The GhostTrainer has been extended to support asymmetric games and the asymmetric example environment Strikers Vs.   Goalie has been added.
+- Curriculum and Parameter Randomization configurations have been merged
+  into the main training configuration file. Note that this means training
+  configuration files are environment-specific. (#3791)
 
 ### Minor Changes
 

diff --git a/config/3dball_randomize.yaml b/config/3dball_randomize.yaml
diff --git a/config/curricula/soccer.yaml b/config/curricula/soccer.yaml
diff --git a/config/curricula/test.yaml b/config/curricula/test.yaml
diff --git a/config/curricula/wall_jump.yaml b/config/curricula/wall_jump.yaml
diff --git a/config/gail_config.yaml b/config/gail_config.yaml
diff --git a/config/imitation/CrawlerStatic.yaml b/config/imitation/CrawlerStatic.yaml
@@ -0,0 +1,29 @@
+behaviors:
+  CrawlerStatic:
+    trainer: ppo
+    batch_size: 2024
+    beta: 0.005
+    buffer_size: 20240
+    epsilon: 0.2
+    hidden_units: 512
+    lambd: 0.95
+    learning_rate: 0.0003
+    max_steps: 1e7
+    memory_size: 256
+    normalize: true
+    num_epoch: 3
+    num_layers: 3
+    time_horizon: 1000
+    sequence_length: 64
+    summary_freq: 30000
+    use_recurrent: false
+    reward_signals:
+      gail:
+        strength: 1.0
+        gamma: 0.99
+        encoding_size: 128
+        demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
+    behavioral_cloning:
+      demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
+      strength: 0.5
+      steps: 50000
diff --git a/config/imitation/FoodCollector.yaml b/config/imitation/FoodCollector.yaml
@@ -0,0 +1,29 @@
+behaviors:
+  FoodCollector:
+    trainer: ppo
+    batch_size: 64
+    beta: 0.005
+    buffer_size: 10240
+    epsilon: 0.2
+    hidden_units: 128
+    lambd: 0.95
+    learning_rate: 0.0003
+    max_steps: 2.0e6
+    memory_size: 256
+    normalize: false
+    num_epoch: 3
+    num_layers: 2
+    time_horizon: 64
+    sequence_length: 32
+    summary_freq: 10000
+    use_recurrent: false
+    reward_signals:
+      gail:
+        strength: 0.1
+        gamma: 0.99
+        encoding_size: 128
+        demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
+    behavioral_cloning:
+      demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
+      strength: 1.0
+      steps: 0
diff --git a/config/imitation/Hallway.yaml b/config/imitation/Hallway.yaml
@@ -0,0 +1,28 @@
+behaviors:
+  Hallway:
+    trainer: ppo
+    batch_size: 128
+    beta: 0.01
+    buffer_size: 1024
+    epsilon: 0.2
+    hidden_units: 128
+    lambd: 0.95
+    learning_rate: 0.0003
+    max_steps: 1.0e7
+    memory_size: 256
+    normalize: false
+    num_epoch: 3
+    num_layers: 2
+    time_horizon: 64
+    sequence_length: 64
+    summary_freq: 10000
+    use_recurrent: true
+    reward_signals:
+      extrinsic:
+        strength: 1.0
+        gamma: 0.99
+      gail:
+        strength: 0.1
+        gamma: 0.99
+        encoding_size: 128
+        demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
diff --git a/config/imitation/PushBlock.yaml b/config/imitation/PushBlock.yaml
@@ -0,0 +1,25 @@
+behaviors:
+  PushBlock:
+    trainer: ppo
+    batch_size: 128
+    beta: 0.01
+    buffer_size: 2048
+    epsilon: 0.2
+    hidden_units: 256
+    lambd: 0.95
+    learning_rate: 0.0003
+    max_steps: 1.5e7
+    memory_size: 256
+    normalize: false
+    num_epoch: 3
+    num_layers: 2
+    time_horizon: 64
+    sequence_length: 64
+    summary_freq: 60000
+    use_recurrent: false
+    reward_signals:
+      gail:
+        strength: 1.0
+        gamma: 0.99
+        encoding_size: 128
+        demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
diff --git a/config/imitation/Pyramids.yaml b/config/imitation/Pyramids.yaml
@@ -0,0 +1,36 @@
+behaviors:
+  Pyramids:
+    trainer: ppo
+    batch_size: 128
+    beta: 0.01
+    buffer_size: 2048
+    epsilon: 0.2
+    hidden_units: 512
+    lambd: 0.95
+    learning_rate: 0.0003
+    max_steps: 1.0e7
+    memory_size: 256
+    normalize: false
+    num_epoch: 3
+    num_layers: 2
+    time_horizon: 128
+    sequence_length: 64
+    summary_freq: 30000
+    use_recurrent: false
+    reward_signals:
+      extrinsic:
+        strength: 1.0
+        gamma: 0.99
+      curiosity:
+        strength: 0.02
+        gamma: 0.99
+        encoding_size: 256
+      gail:
+        strength: 0.01
+        gamma: 0.99
+        encoding_size: 128
+        demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+    behavioral_cloning:
+      demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+      strength: 0.5
+      steps: 150000
diff --git a/config/ppo/3DBall.yaml b/config/ppo/3DBall.yaml
@@ -0,0 +1,25 @@
+behaviors:
+  3DBall:
+    trainer: ppo
+    batch_size: 64
+    beta: 0.001
+    buffer_size: 12000
+    epsilon: 0.2
+    hidden_units: 128
+    lambd: 0.99
+    learning_rate: 0.0003
+    learning_rate_schedule: linear
+    max_steps: 5.0e5
+    memory_size: 128
+    normalize: true
+    num_epoch: 3
+    num_layers: 2
+    time_horizon: 1000
+    sequence_length: 64
+    summary_freq: 12000
+    use_recurrent: false
+    vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        strength: 1.0
+        gamma: 0.99
diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml
@@ -0,0 +1,25 @@
+behaviors:
+  3DBallHard:
+    trainer: ppo
+    batch_size: 1200
+    beta: 0.001
+    buffer_size: 12000
+    epsilon: 0.2
+    hidden_units: 128
+    lambd: 0.95
+    learning_rate: 0.0003
+    learning_rate_schedule: linear
+    max_steps: 5.0e6
+    memory_size: 128
+    normalize: true
+    num_epoch: 3
+    num_layers: 2
+    time_horizon: 1000
+    sequence_length: 64
+    summary_freq: 12000
+    use_recurrent: false
+    vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        strength: 1.0
+        gamma: 0.995