From 818d37c52d0633bb06131ec65785e890ea24badc Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 21 Oct 2020 15:26:43 -0700
Subject: [PATCH 1/6] Always initialize non-wrapped policy

---
 ml-agents/mlagents/trainers/ghost/trainer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py
index a93b616df5..7d16551e8c 100644
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
@@ -319,7 +319,9 @@ def create_policy(
         policy = self.trainer.create_policy(
             parsed_behavior_id, behavior_spec, create_graph=True
         )
-        self.trainer.model_saver.initialize_or_load(policy)
+        # Initialize policy. You never want to load it, since that should only
+        # be done to the wrapped policy
+        policy.initialize()
         team_id = parsed_behavior_id.team_id
         self.controller.subscribe_team_id(team_id, self)
 

From 4f4520be208654a14985a08578573ee2d62dcd18 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 21 Oct 2020 15:33:48 -0700
Subject: [PATCH 2/6] Load ghosted policy

---
 ml-agents/mlagents/trainers/ghost/trainer.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py
index 7d16551e8c..fccc9366eb 100644
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
@@ -146,11 +146,11 @@ def get_step(self) -> int:
     @property
     def reward_buffer(self) -> Deque[float]:
         """
-         Returns the reward buffer. The reward buffer contains the cumulative
-         rewards of the most recent episodes completed by agents using this
-         trainer.
-         :return: the reward buffer.
-         """
+        Returns the reward buffer. The reward buffer contains the cumulative
+        rewards of the most recent episodes completed by agents using this
+        trainer.
+        :return: the reward buffer.
+        """
         return self.trainer.reward_buffer
 
     @property
@@ -339,6 +339,11 @@ def create_policy(
             self._save_snapshot()  # Need to save after trainer initializes policy
             self._learning_team = self.controller.get_learning_team
             self.wrapped_trainer_team = team_id
+        else:
+            # Load the weights of the ghost policy from the wrapped one
+            policy.load_weights(
+                self.trainer.get_policy(parsed_behavior_id).get_weights()
+            )
         return policy
 
     def add_policy(

From de996869690757c209b07e20292fb2d995bad68e Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 21 Oct 2020 15:36:04 -0700
Subject: [PATCH 3/6] Update changelog

---
 com.unity.ml-agents/CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index c2482f3ed7..abb2880b53 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -24,6 +24,7 @@ and this project adheres to
 if they are called recursively (for example, if they call `Agent.EndEpisode()`).
 Previously, this would result in an infinite loop and cause the editor to hang. (#4573)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
+- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
 
 
 ## [1.5.0-preview] - 2020-10-14

From 2cb25e26c2e8fe748924198a1ee857963daf01c5 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 21 Oct 2020 16:09:20 -0700
Subject: [PATCH 4/6] Resume test

---
 .../trainers/tests/tensorflow/test_ghost.py   | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py b/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
index acc9711830..11e64474b5 100644
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
@@ -57,6 +57,51 @@ def test_load_and_set(dummy_config, use_discrete):
         np.testing.assert_array_equal(w, lw)
 
 
+def test_resume(dummy_config, tmp_path):
+    mock_specs = mb.setup_test_behavior_specs(
+        True, False, vector_action_space=[2], vector_obs_space=1
+    )
+    behavior_id_team0 = "test_brain?team=0"
+    behavior_id_team1 = "test_brain?team=1"
+    brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
+
+    ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
+    controller = GhostController(100)
+    trainer = GhostTrainer(
+        ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+
+    parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
+    policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
+    trainer.add_policy(parsed_behavior_id0, policy)
+
+    parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
+    policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
+    trainer.add_policy(parsed_behavior_id1, policy)
+
+    trainer.save_model()
+
+    # Make a new trainer, check that the policies are the same
+    trainer2 = GhostTrainer(
+        ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+    policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
+    trainer2.add_policy(parsed_behavior_id0, policy)
+
+    policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
+    trainer2.add_policy(parsed_behavior_id1, policy)
+
+    trainer1_policy = trainer.get_policy(parsed_behavior_id1)
+    trainer2_policy = trainer2.get_policy(parsed_behavior_id1)
+    weights = trainer1_policy.get_weights()
+    weights2 = trainer2_policy.get_weights()
+    try:
+        for w, lw in zip(weights, weights2):
+            np.testing.assert_array_equal(w, lw)
+    except AssertionError:
+        pass
+
+
 def test_process_trajectory(dummy_config):
     mock_specs = mb.setup_test_behavior_specs(
         True, False, vector_action_space=[2], vector_obs_space=1

From b6f90857d371d91aae49971860cd88a928d6d4e4 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 21 Oct 2020 16:32:40 -0700
Subject: [PATCH 5/6] Add test

---
 .../trainers/tests/tensorflow/test_ghost.py     | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py b/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
index 11e64474b5..2e72303b56 100644
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
@@ -64,7 +64,7 @@ def test_resume(dummy_config, tmp_path):
     behavior_id_team0 = "test_brain?team=0"
     behavior_id_team1 = "test_brain?team=1"
     brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
-
+    tmp_path = tmp_path.as_posix()
     ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
     controller = GhostController(100)
     trainer = GhostTrainer(
@@ -82,8 +82,9 @@ def test_resume(dummy_config, tmp_path):
     trainer.save_model()
 
     # Make a new trainer, check that the policies are the same
+    ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
     trainer2 = GhostTrainer(
-        ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
+        ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
     )
     policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
     trainer2.add_policy(parsed_behavior_id0, policy)
@@ -91,15 +92,13 @@ def test_resume(dummy_config, tmp_path):
     policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
     trainer2.add_policy(parsed_behavior_id1, policy)
 
-    trainer1_policy = trainer.get_policy(parsed_behavior_id1)
-    trainer2_policy = trainer2.get_policy(parsed_behavior_id1)
+    trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
+    trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
     weights = trainer1_policy.get_weights()
     weights2 = trainer2_policy.get_weights()
-    try:
-        for w, lw in zip(weights, weights2):
-            np.testing.assert_array_equal(w, lw)
-    except AssertionError:
-        pass
+
+    for w, lw in zip(weights, weights2):
+        np.testing.assert_array_equal(w, lw)
 
 
 def test_process_trajectory(dummy_config):

From 58960627b881a205317fc5e416a801cc6c25865a Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Wed, 21 Oct 2020 16:34:47 -0700
Subject: [PATCH 6/6] Add torch test and fix torch.

---
 ml-agents/mlagents/trainers/ghost/trainer.py  |  3 --
 .../trainers/tests/torch/test_ghost.py        | 44 +++++++++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py
index fccc9366eb..b2db465c20 100644
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
@@ -319,9 +319,6 @@ def create_policy(
         policy = self.trainer.create_policy(
             parsed_behavior_id, behavior_spec, create_graph=True
         )
-        # Initialize policy. You never want to load it, since that should only
-        # be done to the wrapped policy
-        policy.initialize()
         team_id = parsed_behavior_id.team_id
         self.controller.subscribe_team_id(team_id, self)
 
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_ghost.py b/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
index 06f0666cc8..0d96084d47 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
@@ -59,6 +59,50 @@ def test_load_and_set(dummy_config, use_discrete):
         np.testing.assert_array_equal(w, lw)
 
 
+def test_resume(dummy_config, tmp_path):
+    mock_specs = mb.setup_test_behavior_specs(
+        True, False, vector_action_space=[2], vector_obs_space=1
+    )
+    behavior_id_team0 = "test_brain?team=0"
+    behavior_id_team1 = "test_brain?team=1"
+    brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
+    tmp_path = tmp_path.as_posix()
+    ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
+    controller = GhostController(100)
+    trainer = GhostTrainer(
+        ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+
+    parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
+    policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
+    trainer.add_policy(parsed_behavior_id0, policy)
+
+    parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
+    policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
+    trainer.add_policy(parsed_behavior_id1, policy)
+
+    trainer.save_model()
+
+    # Make a new trainer, check that the policies are the same
+    ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
+    trainer2 = GhostTrainer(
+        ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+    policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
+    trainer2.add_policy(parsed_behavior_id0, policy)
+
+    policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
+    trainer2.add_policy(parsed_behavior_id1, policy)
+
+    trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
+    trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
+    weights = trainer1_policy.get_weights()
+    weights2 = trainer2_policy.get_weights()
+
+    for w, lw in zip(weights, weights2):
+        np.testing.assert_array_equal(w, lw)
+
+
 def test_process_trajectory(dummy_config):
     mock_specs = mb.setup_test_behavior_specs(
         True, False, vector_action_space=[2], vector_obs_space=1