From 0d5632c02fdef17dd1932ecce238740d7579275a Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 1 Jul 2020 17:24:14 -0700
Subject: [PATCH 01/29] Experiment branch for comparing torch

---
 experiment_torch.py                           | 49 +++++++++++++++++++
 ml-agents/mlagents/trainers/learn.py          | 10 ++--
 .../mlagents/trainers/policy/nn_policy.py     |  1 -
 .../mlagents/trainers/policy/torch_policy.py  |  2 +-
 ml-agents/mlagents/trainers/ppo/trainer.py    |  9 +++-
 5 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 experiment_torch.py

diff --git a/experiment_torch.py b/experiment_torch.py
new file mode 100644
index 0000000000..96e6aa484b
--- /dev/null
+++ b/experiment_torch.py
@@ -0,0 +1,49 @@
+
+import json
+import os
+from mlagents.trainers.learn import run_cli, parse_command_line
+from mlagents.trainers.settings import RunOptions
+from mlagents.trainers.stats import StatsReporter
+from mlagents.trainers.ppo.trainer import TestingConfiguration
+from mlagents_envs.timers import _thread_timer_stacks
+
+
+results = {}
+
+def run_experiment(name:str, steps:int, torch:bool):
+	TestingConfiguration.env_name = name
+	TestingConfiguration.max_steps = steps
+	TestingConfiguration.use_torch = torch
+	run_options = parse_command_line([f"config/ppo/{name}.yaml"])
+	run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if torch else "tf")
+	run_options.checkpoint_settings.force = True
+	for trainer_settings in run_options.behaviors.values():
+		trainer_settings.threaded = False
+	timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json")
+	run_cli(run_options)
+	StatsReporter.writers.clear()
+	StatsReporter.stats_dict.clear()
+	_thread_timer_stacks.clear()
+	with open(timers_path) as timers_json_file:
+		timers_json = json.load(timers_json_file)
+		total = timers_json["total"]
+		evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"]
+		update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"]
+	if torch:
+		update = update["TorchPPOOptimizer.update"]["total"]
+		evaluate = evaluate["TorchPolicy.evaluate"]["total"]
+	else:
+		update = update["TFPPOOptimizer.update"]["total"]
+		evaluate = evaluate["NNPolicy.evaluate"]["total"]
+	return total, update, evaluate
+
+results["3DBall Torch"] = run_experiment("3DBall", 20000, True)
+results["3DBall TF"] = run_experiment("3DBall", 20000, False)
+results["GridWorld Torch"] = run_experiment("GridWorld", 20000, True)
+results["GridWorld TF"] = run_experiment("GridWorld", 20000, False)
+print("experiment\t", "total\t", "update\t", "evaluate")
+for key, value in results.items():
+	print(key + "\t", str(value[0])+ "\t", str(value[1])+ "\t", str(value[2])+ "\t")
+
+
+
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index ead4b92cd8..829f076e81 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -35,6 +35,9 @@
 )
 from mlagents_envs import logging_util
 
+from mlagents.trainers.ppo.trainer import TestingConfiguration
+from mlagents_envs.registry import default_registry
+
 logger = logging_util.get_logger(__name__)
 
 TRAINING_STATUS_FILE_NAME = "training_status.json"
@@ -233,9 +236,10 @@ def create_unity_environment(
     ) -> UnityEnvironment:
         # Make sure that each environment gets a different seed
         env_seed = seed + worker_id
-        return UnityEnvironment(
-            file_name=env_path,
-            worker_id=worker_id,
+        return default_registry[TestingConfiguration.env_name].make(
+            # return UnityEnvironment(
+            #     file_name=env_path,
+            #     worker_id=worker_id,
             seed=env_seed,
             no_graphics=no_graphics,
             base_port=start_port,
diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py
index ce08f06f0a..ea550cb652 100644
--- a/ml-agents/mlagents/trainers/policy/nn_policy.py
+++ b/ml-agents/mlagents/trainers/policy/nn_policy.py
@@ -21,7 +21,6 @@ def __init__(
         seed: int,
         brain: BrainParameters,
         trainer_settings: TrainerSettings,
-        is_training: bool,
         model_path: str,
         load: bool,
         tanh_squash: bool = False,
diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py
index d140a6e1d5..149e064528 100644
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
@@ -94,7 +94,7 @@ def __init__(
         self.inference_dict: Dict[str, tf.Tensor] = {}
         self.update_dict: Dict[str, tf.Tensor] = {}
         # TF defaults to 32-bit, so we use the same here.
-        torch.set_default_tensor_type(torch.DoubleTensor)
+        torch.set_default_tensor_type(torch.FloatTensor)
 
         reward_signal_configs = trainer_settings.reward_signals
         reward_signal_names = [key.value for key, _ in reward_signal_configs.items()]
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 19519aaeb8..bbc8f701b4 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -22,6 +22,12 @@
 logger = get_logger(__name__)
 
 
+class TestingConfiguration:
+    use_torch = False
+    max_steps = 0
+    env_name = ""
+
+
 class PPOTrainer(RLTrainer):
     """The PPOTrainer is an implementation of the PPO algorithm."""
 
@@ -53,7 +59,8 @@ def __init__(
         )
         self.load = load
         self.seed = seed
-        self.framework = "torch"
+        self.framework = "torch" if TestingConfiguration.use_torch else "tf"
+        self.trainer_settings.max_steps = TestingConfiguration.max_steps
         self.policy: Policy = None  # type: ignore
 
     def _process_trajectory(self, trajectory: Trajectory) -> None:

From c7c8df2e71f36eff166f1a835b2c1e72a9e77b42 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 1 Jul 2020 18:35:49 -0700
Subject: [PATCH 02/29] Updates and merging ervin changes

---
 experiment_torch.py                           |  8 ++--
 ml-agents/mlagents/trainers/learn.py          | 31 +++++++++-----
 .../mlagents/trainers/policy/torch_policy.py  | 40 ++++++++++---------
 3 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 96e6aa484b..3fbe7c7d9e 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -39,9 +39,11 @@ def run_experiment(name:str, steps:int, torch:bool):
 
 results["3DBall Torch"] = run_experiment("3DBall", 20000, True)
 results["3DBall TF"] = run_experiment("3DBall", 20000, False)
-results["GridWorld Torch"] = run_experiment("GridWorld", 20000, True)
-results["GridWorld TF"] = run_experiment("GridWorld", 20000, False)
-print("experiment\t", "total\t", "update\t", "evaluate")
+results["GridWorld Torch"] = run_experiment("GridWorld", 2000, True)
+results["GridWorld TF"] = run_experiment("GridWorld", 2000, False)
+results["PushBlock Torch"] = run_experiment("PushBlock", 20000, True)
+results["PushBlock TF"] = run_experiment("PushBlock", 20000, False)
+print("experiment\t", "total\t\t\t\t", "update\t\t\t\t", "evaluate")
 for key, value in results.items():
 	print(key + "\t", str(value[0])+ "\t", str(value[1])+ "\t", str(value[2])+ "\t")
 
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index 829f076e81..b4abd94988 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -236,17 +236,26 @@ def create_unity_environment(
     ) -> UnityEnvironment:
         # Make sure that each environment gets a different seed
         env_seed = seed + worker_id
-        return default_registry[TestingConfiguration.env_name].make(
-            # return UnityEnvironment(
-            #     file_name=env_path,
-            #     worker_id=worker_id,
-            seed=env_seed,
-            no_graphics=no_graphics,
-            base_port=start_port,
-            additional_args=env_args,
-            side_channels=side_channels,
-            log_folder=log_folder,
-        )
+        if TestingConfiguration.env_name == "":
+            return UnityEnvironment(
+                file_name=env_path,
+                worker_id=worker_id,
+                seed=env_seed,
+                no_graphics=no_graphics,
+                base_port=start_port,
+                additional_args=env_args,
+                side_channels=side_channels,
+                log_folder=log_folder,
+            )
+        else:
+            return default_registry[TestingConfiguration.env_name].make(
+                seed=env_seed,
+                no_graphics=no_graphics,
+                base_port=start_port,
+                additional_args=env_args,
+                side_channels=side_channels,
+                log_folder=log_folder,
+            )
 
     return create_unity_environment
 
diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py
index d3fb2db022..470012d3f0 100644
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
@@ -249,24 +249,28 @@ def load_model(self, step=0):
         self.actor_critic.load_state_dict(torch.load(load_path))
 
     def export_model(self, step=0):
-        fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
-        fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
-        fake_masks = torch.ones([1] + self.actor_critic.act_size)
-        # fake_memories = torch.zeros([1] + [self.m_size])
-        export_path = "./model-" + str(step) + ".onnx"
-        output_names = ["action", "action_probs"]
-        input_names = ["vector_observation", "action_mask"]
-        dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
-        onnx.export(
-            self.actor_critic,
-            (fake_vec_obs, fake_vis_obs, fake_masks),
-            export_path,
-            verbose=True,
-            opset_version=12,
-            input_names=input_names,
-            output_names=output_names,
-            dynamic_axes=dynamic_axes,
-        )
+        try:
+            fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
+            fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
+            fake_masks = torch.ones([1] + self.actor_critic.act_size)
+            # fake_memories = torch.zeros([1] + [self.m_size])
+            export_path = "./model-" + str(step) + ".onnx"
+            output_names = ["action", "action_probs"]
+            input_names = ["vector_observation", "action_mask"]
+            dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
+            onnx.export(
+                self.actor_critic,
+                (fake_vec_obs, fake_vis_obs, fake_masks),
+                export_path,
+                verbose=True,
+                opset_version=12,
+                input_names=input_names,
+                output_names=output_names,
+                dynamic_axes=dynamic_axes,
+            )
+        except:
+            print("Could not export torch model")
+            return
 
     @property
     def vis_obs_size(self):

From e0120ec3ddae3d8ea01ad10ea6364bb828596827 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 10:14:44 -0700
Subject: [PATCH 03/29] improvements on experiment_torch.py

---
 experiment_torch.py | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 3fbe7c7d9e..f2f78c1cc8 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -1,6 +1,7 @@
 
 import json
 import os
+import torch
 from mlagents.trainers.learn import run_cli, parse_command_line
 from mlagents.trainers.settings import RunOptions
 from mlagents.trainers.stats import StatsReporter
@@ -10,11 +11,13 @@
 
 results = {}
 
-def run_experiment(name:str, steps:int, torch:bool):
+def run_experiment(name:str, steps:int, torch:bool, config_name=None):
 	TestingConfiguration.env_name = name
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = torch
-	run_options = parse_command_line([f"config/ppo/{name}.yaml"])
+	if config_name is None:
+		config_name = name
+	run_options = parse_command_line([f"config/ppo/{config_name}.yaml"])
 	run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if torch else "tf")
 	run_options.checkpoint_settings.force = True
 	for trainer_settings in run_options.behaviors.values():
@@ -35,17 +38,34 @@ def run_experiment(name:str, steps:int, torch:bool):
 	else:
 		update = update["TFPPOOptimizer.update"]["total"]
 		evaluate = evaluate["NNPolicy.evaluate"]["total"]
+	# todo: do total / count
 	return total, update, evaluate
 
+
+
 results["3DBall Torch"] = run_experiment("3DBall", 20000, True)
 results["3DBall TF"] = run_experiment("3DBall", 20000, False)
 results["GridWorld Torch"] = run_experiment("GridWorld", 2000, True)
 results["GridWorld TF"] = run_experiment("GridWorld", 2000, False)
 results["PushBlock Torch"] = run_experiment("PushBlock", 20000, True)
 results["PushBlock TF"] = run_experiment("PushBlock", 20000, False)
-print("experiment\t", "total\t\t\t\t", "update\t\t\t\t", "evaluate")
+results["Hallway Torch"] = run_experiment("Hallway", 20000, True)
+results["Hallway TF"] = run_experiment("Hallway", 20000, False)
+results["CrawlerStaticTarget Torch"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic")
+results["CrawlerStaticTarget TF"] = run_experiment("CrawlerStaticTarget", 50000, False, "CrawlerStatic")
+
+torch.set_num_threads(1)
+
+results["3DBall Torch 1 thread"] = run_experiment("3DBall", 20000, True)
+results["GridWorld Torch 1 thread"] = run_experiment("GridWorld", 2000, True)
+results["PushBlock Torch 1 thread"] = run_experiment("PushBlock", 20000, True)
+results["Hallway Torch 1 thread"] = run_experiment("Hallway", 20000, True)
+results["CrawlerStaticTarget Torch 1 thread"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic")
+
+
+print("experiment,\t", "total,\t", "update,\t", "evaluate")
 for key, value in results.items():
-	print(key + "\t", str(value[0])+ "\t", str(value[1])+ "\t", str(value[2])+ "\t")
+	print(key + ",\t", str(value[0])+ ",\t", str(value[1])+ ",\t", str(value[2])+ "\t")
 
 
 

From 4ad81e0e16ca64d85b18f5bf8e14fc0d1c30930b Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 14:31:52 -0700
Subject: [PATCH 04/29] Better printing of results

---
 experiment_torch.py | 67 +++++++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index f2f78c1cc8..8662b9a5e0 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -9,20 +9,22 @@
 from mlagents_envs.timers import _thread_timer_stacks
 
 
-results = {}
+results = [("name", "steps", "use_torch", "num_torch_threads", "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")]
 
-def run_experiment(name:str, steps:int, torch:bool, config_name=None):
+def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, config_name=None):
 	TestingConfiguration.env_name = name
 	TestingConfiguration.max_steps = steps
-	TestingConfiguration.use_torch = torch
+	TestingConfiguration.use_torch = use_torch
 	if config_name is None:
 		config_name = name
 	run_options = parse_command_line([f"config/ppo/{config_name}.yaml"])
-	run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if torch else "tf")
+	run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf")
 	run_options.checkpoint_settings.force = True
 	for trainer_settings in run_options.behaviors.values():
 		trainer_settings.threaded = False
 	timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json")
+	if use_torch:
+		torch.set_num_threads(num_torch_threads)
 	run_cli(run_options)
 	StatsReporter.writers.clear()
 	StatsReporter.stats_dict.clear()
@@ -30,42 +32,49 @@ def run_experiment(name:str, steps:int, torch:bool, config_name=None):
 	with open(timers_path) as timers_json_file:
 		timers_json = json.load(timers_json_file)
 		total = timers_json["total"]
+		tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]
 		evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"]
 		update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"]
-	if torch:
-		update = update["TorchPPOOptimizer.update"]["total"]
-		evaluate = evaluate["TorchPolicy.evaluate"]["total"]
+		tc_advance_total = tc_advance["total"]
+		tc_advance_count = tc_advance["count"]
+	if use_torch:
+		update_total = update["TorchPPOOptimizer.update"]["total"]
+		evaluate_total = evaluate["TorchPolicy.evaluate"]["total"]
+		update_count = update["TorchPPOOptimizer.update"]["count"]
+		evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
 	else:
-		update = update["TFPPOOptimizer.update"]["total"]
-		evaluate = evaluate["NNPolicy.evaluate"]["total"]
+		update_total = update["TFPPOOptimizer.update"]["total"]
+		evaluate_total = evaluate["NNPolicy.evaluate"]["total"]
+		update_count = update["TFPPOOptimizer.update"]["count"]
+		evaluate_count= evaluate["NNPolicy.evaluate"]["count"]
 	# todo: do total / count
-	return total, update, evaluate
+	return name, steps, use_torch, num_torch_threads, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count
 
 
 
-results["3DBall Torch"] = run_experiment("3DBall", 20000, True)
-results["3DBall TF"] = run_experiment("3DBall", 20000, False)
-results["GridWorld Torch"] = run_experiment("GridWorld", 2000, True)
-results["GridWorld TF"] = run_experiment("GridWorld", 2000, False)
-results["PushBlock Torch"] = run_experiment("PushBlock", 20000, True)
-results["PushBlock TF"] = run_experiment("PushBlock", 20000, False)
-results["Hallway Torch"] = run_experiment("Hallway", 20000, True)
-results["Hallway TF"] = run_experiment("Hallway", 20000, False)
-results["CrawlerStaticTarget Torch"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic")
-results["CrawlerStaticTarget TF"] = run_experiment("CrawlerStaticTarget", 50000, False, "CrawlerStatic")
 
-torch.set_num_threads(1)
+results.append(run_experiment("3DBall", 20000, True, 4))
+results.append(run_experiment("3DBall", 20000, True, 1))
+results.append(run_experiment("3DBall", 20000, False, None))
 
-results["3DBall Torch 1 thread"] = run_experiment("3DBall", 20000, True)
-results["GridWorld Torch 1 thread"] = run_experiment("GridWorld", 2000, True)
-results["PushBlock Torch 1 thread"] = run_experiment("PushBlock", 20000, True)
-results["Hallway Torch 1 thread"] = run_experiment("Hallway", 20000, True)
-results["CrawlerStaticTarget Torch 1 thread"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic")
+results.append(run_experiment("GridWorld", 2000, True, 4))
+results.append(run_experiment("GridWorld", 2000, True, 1))
+results.append(run_experiment("GridWorld", 2000, False, None))
 
+results.append(run_experiment("PushBlock", 20000, True, 4))
+results.append(run_experiment("PushBlock", 20000, True, 1))
+results.append(run_experiment("PushBlock", 20000, False, None))
 
-print("experiment,\t", "total,\t", "update,\t", "evaluate")
-for key, value in results.items():
-	print(key + ",\t", str(value[0])+ ",\t", str(value[1])+ ",\t", str(value[2])+ "\t")
+results.append(run_experiment("Hallway", 20000, True, 4))
+results.append(run_experiment("Hallway", 20000, True, 1))
+results.append(run_experiment("Hallway", 20000, False, None))
 
+results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, "CrawlerStatic"))
+
+
+for r in results:
+	print(*r)
 
 

From b1eb17d3502125d5f9bfe9f72437fabee46b4766 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 14:54:36 -0700
Subject: [PATCH 05/29] preliminary gpu experiment

---
 experiment_torch.py                           | 39 ++++++++++---------
 .../mlagents/trainers/policy/torch_policy.py  |  6 +++
 ml-agents/mlagents/trainers/ppo/trainer.py    | 12 ++++--
 3 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 8662b9a5e0..bff66d56c1 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -9,12 +9,15 @@
 from mlagents_envs.timers import _thread_timer_stacks
 
 
-results = [("name", "steps", "use_torch", "num_torch_threads", "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")]
+results = [("name", "steps", "use_torch", "num_torch_threads", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")]
 
-def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, config_name=None):
+def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, config_name=None):
 	TestingConfiguration.env_name = name
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = use_torch
+	TestingConfiguration.use_gpu = use_gpu
+	if (not torch.cuda.is_available() and use_gpu) or (not use_torch and use_gpu):
+		return ("na", )*12
 	if config_name is None:
 		config_name = name
 	run_options = parse_command_line([f"config/ppo/{config_name}.yaml"])
@@ -48,30 +51,30 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, c
 		update_count = update["TFPPOOptimizer.update"]["count"]
 		evaluate_count= evaluate["NNPolicy.evaluate"]["count"]
 	# todo: do total / count
-	return name, steps, use_torch, num_torch_threads, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count
+	return name, steps, use_torch, num_torch_threads, use_gpu, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count
 
 
 
 
-results.append(run_experiment("3DBall", 20000, True, 4))
-results.append(run_experiment("3DBall", 20000, True, 1))
-results.append(run_experiment("3DBall", 20000, False, None))
+results.append(run_experiment("3DBall", 20000, True, 4, False))
+results.append(run_experiment("3DBall", 20000, True, 1, False))
+results.append(run_experiment("3DBall", 20000, False, None, False))
 
-results.append(run_experiment("GridWorld", 2000, True, 4))
-results.append(run_experiment("GridWorld", 2000, True, 1))
-results.append(run_experiment("GridWorld", 2000, False, None))
+results.append(run_experiment("GridWorld", 2000, True, 4, False))
+results.append(run_experiment("GridWorld", 2000, True, 1, False))
+results.append(run_experiment("GridWorld", 2000, False, None, False))
 
-results.append(run_experiment("PushBlock", 20000, True, 4))
-results.append(run_experiment("PushBlock", 20000, True, 1))
-results.append(run_experiment("PushBlock", 20000, False, None))
+results.append(run_experiment("PushBlock", 20000, True, 4, False))
+results.append(run_experiment("PushBlock", 20000, True, 1, False))
+results.append(run_experiment("PushBlock", 20000, False, None, False))
 
-results.append(run_experiment("Hallway", 20000, True, 4))
-results.append(run_experiment("Hallway", 20000, True, 1))
-results.append(run_experiment("Hallway", 20000, False, None))
+results.append(run_experiment("Hallway", 20000, True, 4, False))
+results.append(run_experiment("Hallway", 20000, True, 1, False))
+results.append(run_experiment("Hallway", 20000, False, None, False))
 
-results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, "CrawlerStatic"))
-results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, "CrawlerStatic"))
-results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic"))
 
 
 for r in results:
diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py
index 470012d3f0..295a89c377 100644
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
@@ -17,6 +17,8 @@
 from mlagents.trainers.brain import BrainParameters
 from mlagents.trainers.models_torch import ActorCritic
 
+from mlagents.trainers.ppo.trainer import TestingConfiguration
+
 EPSILON = 1e-7  # Small value to avoid divide by zero
 
 
@@ -117,6 +119,10 @@ def __init__(
             separate_critic=self.use_continuous_act,
         )
 
+        if TestingConfiguration.use_gpu:
+            #move to gpu
+            self.actor_critic.to(torch.device("cuda:0"))
+
     def split_decision_step(self, decision_requests):
         vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
         mask = None
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index bbc8f701b4..cecd9eff3d 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -2,6 +2,14 @@
 # ## ML-Agent Learning (PPO)
 # Contains an implementation of PPO as described in: https://arxiv.org/abs/1707.06347
 
+class TestingConfiguration:
+    use_torch = False
+    max_steps = 0
+    env_name = ""
+    use_gpu = False
+
+
+
 from collections import defaultdict
 from typing import cast
 
@@ -22,10 +30,6 @@
 logger = get_logger(__name__)
 
 
-class TestingConfiguration:
-    use_torch = False
-    max_steps = 0
-    env_name = ""
 
 
 class PPOTrainer(RLTrainer):

From ee1c2a97d7ef1da5b1e35e87f1d5f9c519e906c9 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 17:49:10 -0700
Subject: [PATCH 06/29] Testing gpu

---
 experiment_torch.py                           | 33 ++++++++++---------
 .../mlagents/trainers/policy/torch_policy.py  | 19 ++++++-----
 ml-agents/mlagents/trainers/ppo/trainer.py    |  2 +-
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index bff66d56c1..214448c8b9 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -15,9 +15,9 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 	TestingConfiguration.env_name = name
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = use_torch
-	TestingConfiguration.use_gpu = use_gpu
+	TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
 	if (not torch.cuda.is_available() and use_gpu) or (not use_torch and use_gpu):
-		return ("na", )*12
+		return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na"
 	if config_name is None:
 		config_name = name
 	run_options = parse_command_line([f"config/ppo/{config_name}.yaml"])
@@ -57,24 +57,25 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 
 
 results.append(run_experiment("3DBall", 20000, True, 4, False))
-results.append(run_experiment("3DBall", 20000, True, 1, False))
-results.append(run_experiment("3DBall", 20000, False, None, False))
+results.append(run_experiment("3DBall", 20000, True, 4, True))
+# results.append(run_experiment("3DBall", 20000, True, 1, False))
+# results.append(run_experiment("3DBall", 20000, False, None, False))
 
-results.append(run_experiment("GridWorld", 2000, True, 4, False))
-results.append(run_experiment("GridWorld", 2000, True, 1, False))
-results.append(run_experiment("GridWorld", 2000, False, None, False))
+# results.append(run_experiment("GridWorld", 2000, True, 4, False))
+# results.append(run_experiment("GridWorld", 2000, True, 1, False))
+# results.append(run_experiment("GridWorld", 2000, False, None, False))
 
-results.append(run_experiment("PushBlock", 20000, True, 4, False))
-results.append(run_experiment("PushBlock", 20000, True, 1, False))
-results.append(run_experiment("PushBlock", 20000, False, None, False))
+# results.append(run_experiment("PushBlock", 20000, True, 4, False))
+# results.append(run_experiment("PushBlock", 20000, True, 1, False))
+# results.append(run_experiment("PushBlock", 20000, False, None, False))
 
-results.append(run_experiment("Hallway", 20000, True, 4, False))
-results.append(run_experiment("Hallway", 20000, True, 1, False))
-results.append(run_experiment("Hallway", 20000, False, None, False))
+# results.append(run_experiment("Hallway", 20000, True, 4, False))
+# results.append(run_experiment("Hallway", 20000, True, 1, False))
+# results.append(run_experiment("Hallway", 20000, False, None, False))
 
-results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic"))
-results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic"))
-results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic"))
 
 
 for r in results:
diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py
index 295a89c377..cabffb0842 100644
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
@@ -93,6 +93,9 @@ def __init__(
         self.log_std_min = -20
         self.log_std_max = 2
 
+        if TestingConfiguration.device != "cpu":
+            torch.set_default_tensor_type(torch.cuda.FloatTensor)
+
         self.inference_dict: Dict[str, tf.Tensor] = {}
         self.update_dict: Dict[str, tf.Tensor] = {}
 
@@ -119,9 +122,7 @@ def __init__(
             separate_critic=self.use_continuous_act,
         )
 
-        if TestingConfiguration.use_gpu:
-            #move to gpu
-            self.actor_critic.to(torch.device("cuda:0"))
+        self.actor_critic.to(TestingConfiguration.device)
 
     def split_decision_step(self, decision_requests):
         vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
@@ -196,18 +197,18 @@ def evaluate(
             action, log_probs, entropy, value_heads, memories = self.sample_actions(
                 vec_obs, vis_obs, masks=masks, memories=memories
             )
-        run_out["action"] = action.detach().numpy()
-        run_out["pre_action"] = action.detach().numpy()
+        run_out["action"] = action.detach().to(TestingConfiguration.device).numpy()
+        run_out["pre_action"] = action.detach().to(TestingConfiguration.device).numpy()
         # Todo - make pre_action difference
-        run_out["log_probs"] = log_probs.detach().numpy()
-        run_out["entropy"] = entropy.detach().numpy()
+        run_out["log_probs"] = log_probs.detach().to(TestingConfiguration.device).numpy()
+        run_out["entropy"] = entropy.detach().to(TestingConfiguration.device).numpy()
         run_out["value_heads"] = {
-            name: t.detach().numpy() for name, t in value_heads.items()
+            name: t.detach().to(TestingConfiguration.device).numpy() for name, t in value_heads.items()
         }
         run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
         run_out["learning_rate"] = 0.0
         if self.use_recurrent:
-            run_out["memories"] = memories.detach().numpy()
+            run_out["memories"] = memories.detach().to(TestingConfiguration.device).numpy()
         self.actor_critic.update_normalization(vec_obs)
         return run_out
 
diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index cecd9eff3d..9f1ac67f6b 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -6,7 +6,7 @@ class TestingConfiguration:
     use_torch = False
     max_steps = 0
     env_name = ""
-    use_gpu = False
+    device = "cpu"
 
 
 

From a280e3c629b0ddf4fe379f3aeb978999ab11d3e0 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 17:51:41 -0700
Subject: [PATCH 07/29] Prepare to see a lot of commits, because I like my IDE
 and I am testing on a server and I am using git to sync the two

---
 experiment_torch.py                                |  3 ++-
 ml-agents/mlagents/trainers/policy/torch_policy.py | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 214448c8b9..ced7d2a5e8 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -56,8 +56,9 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 
 
 
-results.append(run_experiment("3DBall", 20000, True, 4, False))
+
 results.append(run_experiment("3DBall", 20000, True, 4, True))
+results.append(run_experiment("3DBall", 20000, True, 4, False))
 # results.append(run_experiment("3DBall", 20000, True, 1, False))
 # results.append(run_experiment("3DBall", 20000, False, None, False))
 
diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py
index cabffb0842..9bcaf270cf 100644
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
@@ -197,18 +197,18 @@ def evaluate(
             action, log_probs, entropy, value_heads, memories = self.sample_actions(
                 vec_obs, vis_obs, masks=masks, memories=memories
             )
-        run_out["action"] = action.detach().to(TestingConfiguration.device).numpy()
-        run_out["pre_action"] = action.detach().to(TestingConfiguration.device).numpy()
+        run_out["action"] = action.detach().cpu().numpy()
+        run_out["pre_action"] = action.detach().cpu().numpy()
         # Todo - make pre_action difference
-        run_out["log_probs"] = log_probs.detach().to(TestingConfiguration.device).numpy()
-        run_out["entropy"] = entropy.detach().to(TestingConfiguration.device).numpy()
+        run_out["log_probs"] = log_probs.detach().cpu().numpy()
+        run_out["entropy"] = entropy.detach().cpu().numpy()
         run_out["value_heads"] = {
-            name: t.detach().to(TestingConfiguration.device).numpy() for name, t in value_heads.items()
+            name: t.detach().cpu().numpy() for name, t in value_heads.items()
         }
         run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
         run_out["learning_rate"] = 0.0
         if self.use_recurrent:
-            run_out["memories"] = memories.detach().to(TestingConfiguration.device).numpy()
+            run_out["memories"] = memories.detach().cpu().numpy()
         self.actor_critic.update_normalization(vec_obs)
         return run_out
 

From a66a40dfe9489be2d625ded04bd89a3294890bb6 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 17:52:57 -0700
Subject: [PATCH 08/29] Prepare to see a lot of commits, because I like my IDE
 and I am testing on a server and I am using git to sync the two

---
 ml-agents/mlagents/trainers/ppo/optimizer_torch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
index 2e8ece92dc..afe3f91712 100644
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
@@ -143,8 +143,8 @@ def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
 
         self.optimizer.step()
         update_stats = {
-            "Losses/Policy Loss": abs(policy_loss.detach().numpy()),
-            "Losses/Value Loss": value_loss.detach().numpy(),
+            "Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
+            "Losses/Value Loss": value_loss.detach().cpu().numpy(),
         }
 
         return update_stats

From dcde9453df4818aaa7500b8dd22b77549dc248ac Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 17:54:04 -0700
Subject: [PATCH 09/29] _

---
 ml-agents/mlagents/trainers/optimizer/torch_optimizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
index 762ad00357..80cefaee39 100644
--- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
@@ -106,8 +106,8 @@ def get_trajectory_value_estimates(
         )
 
         for name, estimate in value_estimates.items():
-            value_estimates[name] = estimate.detach().numpy()
-            next_value_estimate[name] = next_value_estimate[name].detach().numpy()
+            value_estimates[name] = estimate.detach().cpu().numpy()
+            next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()
 
         if done:
             for k in next_value_estimate:

From 92f3194d5df9151f568492937b38a06825adfaee Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 17:56:12 -0700
Subject: [PATCH 10/29] _

---
 ml-agents/mlagents/trainers/policy/torch_policy.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py
index 9bcaf270cf..5ea2f729ee 100644
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
@@ -95,6 +95,9 @@ def __init__(
 
         if TestingConfiguration.device != "cpu":
             torch.set_default_tensor_type(torch.cuda.FloatTensor)
+        else:
+            torch.set_default_tensor_type(torch.FloatTensor)
+        
 
         self.inference_dict: Dict[str, tf.Tensor] = {}
         self.update_dict: Dict[str, tf.Tensor] = {}

From 3b7e1e09c5fa69cdd60a23b9025c7aa301d0aa27 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 18:03:48 -0700
Subject: [PATCH 11/29] _

---
 experiment_torch.py | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index ced7d2a5e8..6c7bb847da 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -55,28 +55,33 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 
 
 
+steps = 100000
 
 
-results.append(run_experiment("3DBall", 20000, True, 4, True))
-results.append(run_experiment("3DBall", 20000, True, 4, False))
-# results.append(run_experiment("3DBall", 20000, True, 1, False))
-# results.append(run_experiment("3DBall", 20000, False, None, False))
+results.append(run_experiment("3DBall", steps, True, 4, False))
+results.append(run_experiment("3DBall", steps, True, 1, False))
+results.append(run_experiment("3DBall", steps, True, 1, True))
+results.append(run_experiment("3DBall", steps, False, None, False))
 
-# results.append(run_experiment("GridWorld", 2000, True, 4, False))
-# results.append(run_experiment("GridWorld", 2000, True, 1, False))
-# results.append(run_experiment("GridWorld", 2000, False, None, False))
+results.append(run_experiment("GridWorld", steps, True, 4, False))
+results.append(run_experiment("GridWorld", steps, True, 1, False))
+results.append(run_experiment("GridWorld", steps, True, 1, True))
+results.append(run_experiment("GridWorld", steps, False, None, False))
 
-# results.append(run_experiment("PushBlock", 20000, True, 4, False))
-# results.append(run_experiment("PushBlock", 20000, True, 1, False))
-# results.append(run_experiment("PushBlock", 20000, False, None, False))
+results.append(run_experiment("PushBlock", steps, True, 4, False))
+results.append(run_experiment("PushBlock", steps, True, 1, False))
+results.append(run_experiment("PushBlock", steps, True, 1, True))
+results.append(run_experiment("PushBlock", steps, False, None, False))
 
-# results.append(run_experiment("Hallway", 20000, True, 4, False))
-# results.append(run_experiment("Hallway", 20000, True, 1, False))
-# results.append(run_experiment("Hallway", 20000, False, None, False))
+results.append(run_experiment("Hallway", steps, True, 4, False))
+results.append(run_experiment("Hallway", steps, True, 1, False))
+results.append(run_experiment("Hallway", steps, True, 1, True))
+results.append(run_experiment("Hallway", steps, False, None, False))
 
-# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic"))
+results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic"))
 
 
 for r in results:

From 42e2e73d1004c2d278e75a11195d48b235f6213e Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 18:41:28 -0700
Subject: [PATCH 12/29] _

---
 ml-agents/mlagents/trainers/models_torch.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py
index 434634fbce..a10579ef9b 100644
--- a/ml-agents/mlagents/trainers/models_torch.py
+++ b/ml-agents/mlagents/trainers/models_torch.py
@@ -407,7 +407,8 @@ def __init__(self, height, width, initial_channels, output_size):
     def forward(self, visual_obs):
         conv_1 = torch.relu(self.conv1(visual_obs))
         conv_2 = torch.relu(self.conv2(conv_1))
-        hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
+        # hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
+        hidden = torch.relu(self.dense(torch.reshape(conv_2,(-1, self.final_flat))))
         return hidden
 
 

From 2224fa03f04b3788da6bf949d47c7dbbf14bdbe5 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 19:56:03 -0700
Subject: [PATCH 13/29] _

---
 ml-agents/mlagents/trainers/models_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py
index a10579ef9b..80fab6342b 100644
--- a/ml-agents/mlagents/trainers/models_torch.py
+++ b/ml-agents/mlagents/trainers/models_torch.py
@@ -136,7 +136,7 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1):
         if self.use_lstm:
             embedding = embedding.view([sequence_length, -1, self.h_size])
             memories = torch.split(memories, self.m_size // 2, dim=-1)
-            embedding, memories = self.lstm(embedding, memories)
+            embedding, memories = self.lstm(embedding.continuous(), memories.continuous())
             embedding = embedding.view([-1, self.m_size // 2])
             memories = torch.cat(memories, dim=-1)
         return embedding, memories

From f306f2983424e32f21c241f45b6ef2b40c1cebc6 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 19:58:56 -0700
Subject: [PATCH 14/29] _

---
 ml-agents/mlagents/trainers/models_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py
index 80fab6342b..6966d69a89 100644
--- a/ml-agents/mlagents/trainers/models_torch.py
+++ b/ml-agents/mlagents/trainers/models_torch.py
@@ -136,7 +136,7 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1):
         if self.use_lstm:
             embedding = embedding.view([sequence_length, -1, self.h_size])
             memories = torch.split(memories, self.m_size // 2, dim=-1)
-            embedding, memories = self.lstm(embedding.continuous(), memories.continuous())
+            embedding, memories = self.lstm(embedding.contiguous(), memories.contiguous())
             embedding = embedding.view([-1, self.m_size // 2])
             memories = torch.cat(memories, dim=-1)
         return embedding, memories

From 2c706ce497d047d2626bac465ff6017496331e67 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 20:00:18 -0700
Subject: [PATCH 15/29] _

---
 ml-agents/mlagents/trainers/models_torch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py
index 6966d69a89..42bcf6f07d 100644
--- a/ml-agents/mlagents/trainers/models_torch.py
+++ b/ml-agents/mlagents/trainers/models_torch.py
@@ -135,8 +135,8 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1):
 
         if self.use_lstm:
             embedding = embedding.view([sequence_length, -1, self.h_size])
-            memories = torch.split(memories, self.m_size // 2, dim=-1)
-            embedding, memories = self.lstm(embedding.contiguous(), memories.contiguous())
+            memories = torch.split(memories.contiguous(), self.m_size // 2, dim=-1)
+            embedding, memories = self.lstm(embedding.contiguous(), memories)
             embedding = embedding.view([-1, self.m_size // 2])
             memories = torch.cat(memories, dim=-1)
         return embedding, memories

From 3250d73503005851ba4be617d7050df88b099301 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 2 Jul 2020 20:03:15 -0700
Subject: [PATCH 16/29] _

---
 ml-agents/mlagents/trainers/models_torch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py
index 42bcf6f07d..c2e0fc27de 100644
--- a/ml-agents/mlagents/trainers/models_torch.py
+++ b/ml-agents/mlagents/trainers/models_torch.py
@@ -135,8 +135,8 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1):
 
         if self.use_lstm:
             embedding = embedding.view([sequence_length, -1, self.h_size])
-            memories = torch.split(memories.contiguous(), self.m_size // 2, dim=-1)
-            embedding, memories = self.lstm(embedding.contiguous(), memories)
+            memories = torch.split(memories, self.m_size // 2, dim=-1)
+            embedding, memories = self.lstm(embedding.contiguous(), (memories[0].contiguous(), memories[1].contiguous()))
             embedding = embedding.view([-1, self.m_size // 2])
             memories = torch.cat(memories, dim=-1)
         return embedding, memories

From 78022dcf347b260fb8b89d9c9af2fa35458e84af Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 10:40:23 -0700
Subject: [PATCH 17/29] Attempt at gpu on tf. Does not work

---
 experiment_torch.py                           | 54 ++++++++++---------
 .../mlagents/trainers/policy/nn_policy.py     |  8 +++
 2 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 6c7bb847da..c929dcbc48 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -16,7 +16,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = use_torch
 	TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
-	if (not torch.cuda.is_available() and use_gpu) or (not use_torch and use_gpu):
+	if (not torch.cuda.is_available() and use_gpu and use_torch):
 		return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na"
 	if config_name is None:
 		config_name = name
@@ -57,31 +57,33 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 
 steps = 100000
 
-
-results.append(run_experiment("3DBall", steps, True, 4, False))
-results.append(run_experiment("3DBall", steps, True, 1, False))
-results.append(run_experiment("3DBall", steps, True, 1, True))
-results.append(run_experiment("3DBall", steps, False, None, False))
-
-results.append(run_experiment("GridWorld", steps, True, 4, False))
-results.append(run_experiment("GridWorld", steps, True, 1, False))
-results.append(run_experiment("GridWorld", steps, True, 1, True))
-results.append(run_experiment("GridWorld", steps, False, None, False))
-
-results.append(run_experiment("PushBlock", steps, True, 4, False))
-results.append(run_experiment("PushBlock", steps, True, 1, False))
-results.append(run_experiment("PushBlock", steps, True, 1, True))
-results.append(run_experiment("PushBlock", steps, False, None, False))
-
-results.append(run_experiment("Hallway", steps, True, 4, False))
-results.append(run_experiment("Hallway", steps, True, 1, False))
-results.append(run_experiment("Hallway", steps, True, 1, True))
-results.append(run_experiment("Hallway", steps, False, None, False))
-
-results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic"))
-results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic"))
-results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic"))
-results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic"))
+results.append(run_experiment("3DBall", steps, False, 1, True))
+results.append(run_experiment("3DBall", steps, False, 1, False))
+
+# results.append(run_experiment("3DBall", steps, True, 4, False))
+# results.append(run_experiment("3DBall", steps, True, 1, False))
+# results.append(run_experiment("3DBall", steps, True, 1, True))
+# results.append(run_experiment("3DBall", steps, False, None, False))
+
+# results.append(run_experiment("GridWorld", steps, True, 4, False))
+# results.append(run_experiment("GridWorld", steps, True, 1, False))
+# results.append(run_experiment("GridWorld", steps, True, 1, True))
+# results.append(run_experiment("GridWorld", steps, False, None, False))
+
+# results.append(run_experiment("PushBlock", steps, True, 4, False))
+# results.append(run_experiment("PushBlock", steps, True, 1, False))
+# results.append(run_experiment("PushBlock", steps, True, 1, True))
+# results.append(run_experiment("PushBlock", steps, False, None, False))
+
+# results.append(run_experiment("Hallway", steps, True, 4, False))
+# results.append(run_experiment("Hallway", steps, True, 1, False))
+# results.append(run_experiment("Hallway", steps, True, 1, True))
+# results.append(run_experiment("Hallway", steps, False, None, False))
+
+# results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic"))
 
 
 for r in results:
diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py
index ea550cb652..f8ae47c9a1 100644
--- a/ml-agents/mlagents/trainers/policy/nn_policy.py
+++ b/ml-agents/mlagents/trainers/policy/nn_policy.py
@@ -12,6 +12,8 @@
     MultiCategoricalDistribution,
 )
 
+from mlagents.trainers.ppo.trainer import TestingConfiguration
+
 EPSILON = 1e-6  # Small value to avoid divide by zero
 
 
@@ -42,6 +44,12 @@ def __init__(
         :param reparameterize: Whether we are using the resampling trick to update the policy in continuous output.
         """
         super().__init__(seed, brain, trainer_settings, model_path, load)
+        if TestingConfiguration.device == "cuda:0":
+            tf.device("/gpu:0")
+            print("using GPU")
+        else:
+            tf.device("/cpu:0")
+            print("using CPU")
         self.grads = None
         self.update_batch: Optional[tf.Operation] = None
         num_layers = self.network_settings.num_layers

From 35df5f703e0225d2970b1eabc551c0f85694bb99 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 12:40:45 -0700
Subject: [PATCH 18/29] _

---
 experiment_torch.py                             | 1 +
 ml-agents/mlagents/trainers/policy/nn_policy.py | 6 ------
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index c929dcbc48..b3f9d05124 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -16,6 +16,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = use_torch
 	TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
+	os.environ["CUDA_VISIBLE_DEVICES"] = "2" if use_gpu else "0"
 	if (not torch.cuda.is_available() and use_gpu and use_torch):
 		return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na"
 	if config_name is None:
diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py
index f8ae47c9a1..879065d3bc 100644
--- a/ml-agents/mlagents/trainers/policy/nn_policy.py
+++ b/ml-agents/mlagents/trainers/policy/nn_policy.py
@@ -44,12 +44,6 @@ def __init__(
         :param reparameterize: Whether we are using the resampling trick to update the policy in continuous output.
         """
         super().__init__(seed, brain, trainer_settings, model_path, load)
-        if TestingConfiguration.device == "cuda:0":
-            tf.device("/gpu:0")
-            print("using GPU")
-        else:
-            tf.device("/cpu:0")
-            print("using CPU")
         self.grads = None
         self.update_batch: Optional[tf.Operation] = None
         num_layers = self.network_settings.num_layers

From a28bba2bb185b5960845a016a056c36c981202ff Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 14:06:48 -0700
Subject: [PATCH 19/29] _

---
 experiment_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index b3f9d05124..ebafe6d7dc 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -16,7 +16,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = use_torch
 	TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
-	os.environ["CUDA_VISIBLE_DEVICES"] = "2" if use_gpu else "0"
+	os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" if use_gpu else "-1"
 	if (not torch.cuda.is_available() and use_gpu and use_torch):
 		return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na"
 	if config_name is None:

From f306fc99daf2baf0be0ae6edd3ede194d8a88e34 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 14:21:14 -0700
Subject: [PATCH 20/29] _

---
 experiment_torch.py | 52 ++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index ebafe6d7dc..e789e3546f 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -11,12 +11,13 @@
 
 results = [("name", "steps", "use_torch", "num_torch_threads", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")]
 
-def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, config_name=None):
+def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool,num_envs :int= 1, config_name=None):
 	TestingConfiguration.env_name = name
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = use_torch
 	TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
 	os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" if use_gpu else "-1"
+	import tensorflow as tf
 	if (not torch.cuda.is_available() and use_gpu and use_torch):
 		return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na"
 	if config_name is None:
@@ -24,6 +25,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 	run_options = parse_command_line([f"config/ppo/{config_name}.yaml"])
 	run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf")
 	run_options.checkpoint_settings.force = True
+	run_options.env_settings.num_envs = num_envs
 	for trainer_settings in run_options.behaviors.values():
 		trainer_settings.threaded = False
 	timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json")
@@ -56,35 +58,37 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 
 
 
-steps = 100000
+n_steps = 100000
 
-results.append(run_experiment("3DBall", steps, False, 1, True))
-results.append(run_experiment("3DBall", steps, False, 1, False))
+envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")]
 
-# results.append(run_experiment("3DBall", steps, True, 4, False))
-# results.append(run_experiment("3DBall", steps, True, 1, False))
-# results.append(run_experiment("3DBall", steps, True, 1, True))
-# results.append(run_experiment("3DBall", steps, False, None, False))
+results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None))
+results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None))
 
-# results.append(run_experiment("GridWorld", steps, True, 4, False))
-# results.append(run_experiment("GridWorld", steps, True, 1, False))
-# results.append(run_experiment("GridWorld", steps, True, 1, True))
-# results.append(run_experiment("GridWorld", steps, False, None, False))
+# results.append(run_experiment("3DBall", n_steps, True, 4, False))
+# results.append(run_experiment("3DBall", n_steps, True, 1, False))
+# results.append(run_experiment("3DBall", n_steps, True, 1, True))
+# results.append(run_experiment("3DBall", n_steps, False, None, False))
 
-# results.append(run_experiment("PushBlock", steps, True, 4, False))
-# results.append(run_experiment("PushBlock", steps, True, 1, False))
-# results.append(run_experiment("PushBlock", steps, True, 1, True))
-# results.append(run_experiment("PushBlock", steps, False, None, False))
+# results.append(run_experiment("GridWorld", n_steps, True, 4, False))
+# results.append(run_experiment("GridWorld", n_steps, True, 1, False))
+# results.append(run_experiment("GridWorld", n_steps, True, 1, True))
+# results.append(run_experiment("GridWorld", n_steps, False, None, False))
 
-# results.append(run_experiment("Hallway", steps, True, 4, False))
-# results.append(run_experiment("Hallway", steps, True, 1, False))
-# results.append(run_experiment("Hallway", steps, True, 1, True))
-# results.append(run_experiment("Hallway", steps, False, None, False))
+# results.append(run_experiment("PushBlock", n_steps, True, 4, False))
+# results.append(run_experiment("PushBlock", n_steps, True, 1, False))
+# results.append(run_experiment("PushBlock", n_steps, True, 1, True))
+# results.append(run_experiment("PushBlock", n_steps, False, None, False))
 
-# results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic"))
+# results.append(run_experiment("Hallway", n_steps, True, 4, False))
+# results.append(run_experiment("Hallway", n_steps, True, 1, False))
+# results.append(run_experiment("Hallway", n_steps, True, 1, True))
+# results.append(run_experiment("Hallway", n_steps, False, None, False))
+
+# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 4, False, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, False, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, True, "CrawlerStatic"))
+# results.append(run_experiment("CrawlerStaticTarget", n_steps, False, None, False, "CrawlerStatic"))
 
 
 for r in results:

From aa26a5cd51c47be3f85f82c5c876b5eee4c8401a Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 14:31:22 -0700
Subject: [PATCH 21/29] _

---
 experiment_torch.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index e789e3546f..b53137612f 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -2,6 +2,7 @@
 import json
 import os
 import torch
+import tensorflow as tf
 from mlagents.trainers.learn import run_cli, parse_command_line
 from mlagents.trainers.settings import RunOptions
 from mlagents.trainers.stats import StatsReporter
@@ -16,8 +17,10 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 	TestingConfiguration.max_steps = steps
 	TestingConfiguration.use_torch = use_torch
 	TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
-	os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" if use_gpu else "-1"
-	import tensorflow as tf
+	if use_gpu:
+		tf.device("/GPU:0")
+	else:
+		tf.device("/device:CPU:0")
 	if (not torch.cuda.is_available() and use_gpu and use_torch):
 		return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na"
 	if config_name is None:
@@ -57,7 +60,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 	return name, steps, use_torch, num_torch_threads, use_gpu, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count
 
 
-
+os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
 n_steps = 100000
 
 envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")]

From 80b6b83534061df3a201602bade0703195efa180 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 14:32:52 -0700
Subject: [PATCH 22/29] _

---
 experiment_torch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index b53137612f..814ee39f83 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -65,8 +65,8 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 
 envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")]
 
-results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None))
-results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None))
+results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None))
+results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None))
 
 # results.append(run_experiment("3DBall", n_steps, True, 4, False))
 # results.append(run_experiment("3DBall", n_steps, True, 1, False))

From e17e79ca86d4e62bd82971df2f1d51a4887d9bcd Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 15:36:14 -0700
Subject: [PATCH 23/29] _

---
 experiment_torch.py                  | 177 ++++++++++++++-------------
 ml-agents/mlagents/trainers/learn.py |   1 +
 2 files changed, 92 insertions(+), 86 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 814ee39f83..7180352e82 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -3,6 +3,7 @@
 import os
 import torch
 import tensorflow as tf
+import argparse
 from mlagents.trainers.learn import run_cli, parse_command_line
 from mlagents.trainers.settings import RunOptions
 from mlagents.trainers.stats import StatsReporter
@@ -10,91 +11,95 @@
 from mlagents_envs.timers import _thread_timer_stacks
 
 
-results = [("name", "steps", "use_torch", "num_torch_threads", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")]
-
-def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool,num_envs :int= 1, config_name=None):
-	TestingConfiguration.env_name = name
-	TestingConfiguration.max_steps = steps
-	TestingConfiguration.use_torch = use_torch
-	TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
-	if use_gpu:
-		tf.device("/GPU:0")
-	else:
-		tf.device("/device:CPU:0")
-	if (not torch.cuda.is_available() and use_gpu and use_torch):
-		return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na"
-	if config_name is None:
-		config_name = name
-	run_options = parse_command_line([f"config/ppo/{config_name}.yaml"])
-	run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf")
-	run_options.checkpoint_settings.force = True
-	run_options.env_settings.num_envs = num_envs
-	for trainer_settings in run_options.behaviors.values():
-		trainer_settings.threaded = False
-	timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json")
-	if use_torch:
-		torch.set_num_threads(num_torch_threads)
-	run_cli(run_options)
-	StatsReporter.writers.clear()
-	StatsReporter.stats_dict.clear()
-	_thread_timer_stacks.clear()
-	with open(timers_path) as timers_json_file:
-		timers_json = json.load(timers_json_file)
-		total = timers_json["total"]
-		tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]
-		evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"]
-		update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"]
-		tc_advance_total = tc_advance["total"]
-		tc_advance_count = tc_advance["count"]
-	if use_torch:
-		update_total = update["TorchPPOOptimizer.update"]["total"]
-		evaluate_total = evaluate["TorchPolicy.evaluate"]["total"]
-		update_count = update["TorchPPOOptimizer.update"]["count"]
-		evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
-	else:
-		update_total = update["TFPPOOptimizer.update"]["total"]
-		evaluate_total = evaluate["NNPolicy.evaluate"]["total"]
-		update_count = update["TFPPOOptimizer.update"]["count"]
-		evaluate_count= evaluate["NNPolicy.evaluate"]["count"]
-	# todo: do total / count
-	return name, steps, use_torch, num_torch_threads, use_gpu, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count
-
-
-os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
-n_steps = 100000
-
-envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")]
-
-results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None))
-results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None))
-
-# results.append(run_experiment("3DBall", n_steps, True, 4, False))
-# results.append(run_experiment("3DBall", n_steps, True, 1, False))
-# results.append(run_experiment("3DBall", n_steps, True, 1, True))
-# results.append(run_experiment("3DBall", n_steps, False, None, False))
-
-# results.append(run_experiment("GridWorld", n_steps, True, 4, False))
-# results.append(run_experiment("GridWorld", n_steps, True, 1, False))
-# results.append(run_experiment("GridWorld", n_steps, True, 1, True))
-# results.append(run_experiment("GridWorld", n_steps, False, None, False))
-
-# results.append(run_experiment("PushBlock", n_steps, True, 4, False))
-# results.append(run_experiment("PushBlock", n_steps, True, 1, False))
-# results.append(run_experiment("PushBlock", n_steps, True, 1, True))
-# results.append(run_experiment("PushBlock", n_steps, False, None, False))
-
-# results.append(run_experiment("Hallway", n_steps, True, 4, False))
-# results.append(run_experiment("Hallway", n_steps, True, 1, False))
-# results.append(run_experiment("Hallway", n_steps, True, 1, True))
-# results.append(run_experiment("Hallway", n_steps, False, None, False))
-
-# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 4, False, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, False, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, True, "CrawlerStatic"))
-# results.append(run_experiment("CrawlerStaticTarget", n_steps, False, None, False, "CrawlerStatic"))
-
-
-for r in results:
-	print(*r)
 
 
+def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, num_envs :int= 1, config_name=None):
+    TestingConfiguration.env_name = name
+    TestingConfiguration.max_steps = steps
+    TestingConfiguration.use_torch = use_torch
+    TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
+    if use_gpu:
+        tf.device("/GPU:0")
+    else:
+        tf.device("/device:CPU:0")
+    if (not torch.cuda.is_available() and use_gpu and use_torch):
+        return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), "na","na","na","na","na","na","na"
+    if config_name is None:
+        config_name = name
+    run_options = parse_command_line([f"config/ppo/{config_name}.yaml", "--num-envs", f"{num_envs}"])
+    run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf")
+    run_options.checkpoint_settings.force = True
+    # run_options.env_settings.num_envs = num_envs
+    for trainer_settings in run_options.behaviors.values():
+        trainer_settings.threaded = False
+    timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json")
+    if use_torch:
+        torch.set_num_threads(num_torch_threads)
+    run_cli(run_options)
+    StatsReporter.writers.clear()
+    StatsReporter.stats_dict.clear()
+    _thread_timer_stacks.clear()
+    with open(timers_path) as timers_json_file:
+        timers_json = json.load(timers_json_file)
+        total = timers_json["total"]
+        tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]
+        evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"]
+        update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"]
+        tc_advance_total = tc_advance["total"]
+        tc_advance_count = tc_advance["count"]
+    if use_torch:
+        update_total = update["TorchPPOOptimizer.update"]["total"]
+        evaluate_total = evaluate["TorchPolicy.evaluate"]["total"]
+        update_count = update["TorchPPOOptimizer.update"]["count"]
+        evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
+    else:
+        update_total = update["TFPPOOptimizer.update"]["total"]
+        evaluate_total = evaluate["NNPolicy.evaluate"]["total"]
+        update_count = update["TFPPOOptimizer.update"]["count"]
+        evaluate_count= evaluate["NNPolicy.evaluate"]["count"]
+    # todo: do total / count
+    return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), str(total), str(tc_advance_total), str(tc_advance_count), str(update_total), str(update_count), str(evaluate_total), str(evaluate_count)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--steps", default=25000, type=int, help="The number of steps")
+    parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")  
+    args = parser.parse_args()
+
+    envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")]
+    
+
+
+    labels = ("name", "steps", "use_torch", "num_torch_threads", "num_envs", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")
+    
+    results = []
+    results.append(labels)
+    f = open("result_data.txt", "w")
+    f.write(" ".join(labels))
+    
+    for env_config in envs_config_tuples:
+        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
+        results.append(data)
+        f.write(" ".join(data))
+
+        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
+        results.append(data)
+        f.write(" ".join(data))
+
+
+        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=True, num_envs = args.num_envs, config_name=env_config[1])
+        results.append(data)
+        f.write(" ".join(data))
+
+        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
+        results.append(data)
+        f.write(" ".join(data))
+    for r in results:
+        print(*r)
+    f.close()
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
index b4abd94988..0e9188e66d 100644
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
@@ -252,6 +252,7 @@ def create_unity_environment(
                 seed=env_seed,
                 no_graphics=no_graphics,
                 base_port=start_port,
+                worker_id=worker_id,
                 additional_args=env_args,
                 side_channels=side_channels,
                 log_folder=log_folder,

From 216007734c3489f2e7ef0f69647f2680cbf06192 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 15:37:25 -0700
Subject: [PATCH 24/29] _

---
 experiment_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 7180352e82..f084587fd6 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -67,7 +67,7 @@ def main():
     parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")  
     args = parser.parse_args()
 
-    envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")]
+    envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")]
     
 
 

From c5ba857acc96c66782daa8f3fb8c21ea58cf063e Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 15:45:37 -0700
Subject: [PATCH 25/29] _

---
 experiment_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index f084587fd6..c0bcee55ef 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -75,7 +75,7 @@ def main():
     
     results = []
     results.append(labels)
-    f = open("result_data.txt", "w")
+    f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}.txt", "w")
     f.write(" ".join(labels))
     
     for env_config in envs_config_tuples:

From 771f2f14011e75341d9f6a42abc42f61f0220f4f Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Tue, 7 Jul 2020 15:55:30 -0700
Subject: [PATCH 26/29] _

---
 experiment_torch.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index c0bcee55ef..75dee08b0f 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -67,7 +67,7 @@ def main():
     parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")  
     args = parser.parse_args()
 
-    envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")]
+    envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")]
     
 
 
@@ -81,20 +81,20 @@ def main():
     for env_config in envs_config_tuples:
         data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
         results.append(data)
-        f.write(" ".join(data))
+        f.write(" ".join(data) + "\n")
 
         data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
         results.append(data)
-        f.write(" ".join(data))
+        f.write(" ".join(data)+ "\n")
 
 
         data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=True, num_envs = args.num_envs, config_name=env_config[1])
         results.append(data)
-        f.write(" ".join(data))
+        f.write(" ".join(data)+ "\n")
 
         data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
         results.append(data)
-        f.write(" ".join(data))
+        f.write(" ".join(data)+ "\n")
     for r in results:
         print(*r)
     f.close()

From c16d87d63233549db8c447669bb44ad3984fff0b Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 8 Jul 2020 10:01:39 -0700
Subject: [PATCH 27/29] _

---
 experiment_torch.py | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index 75dee08b0f..d3128247d1 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -22,7 +22,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
         tf.device("/GPU:0")
     else:
         tf.device("/device:CPU:0")
-    if (not torch.cuda.is_available() and use_gpu and use_torch):
+    if (not torch.cuda.is_available() and use_gpu):
         return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), "na","na","na","na","na","na","na"
     if config_name is None:
         config_name = name
@@ -64,35 +64,41 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--steps", default=25000, type=int, help="The number of steps")
-    parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")  
+    parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")
+    parser.add_argument("--gpu", default = False, action="store_true", help="If true, will use the GPU")
+    parser.add_argument("--threads", default=False, action="store_true", help="If true, will try both 1 and 8 threads for torch")
+    parser.add_argument("--ball", default=False, action="store_true", help="If true, will only do 3dball")
     args = parser.parse_args()
 
-    envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")]
-    
+    if args.gpu:
+        os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
+    else:
+        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+    envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "VisualHallway")]
+    if args.ball:
+        envs_config_tuples=[("3DBall", "3DBall")]
 
 
     labels = ("name", "steps", "use_torch", "num_torch_threads", "num_envs", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")
     
     results = []
     results.append(labels)
-    f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}.txt", "w")
-    f.write(" ".join(labels))
+    f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", "w")
+    f.write(" ".join(labels)+ "\n")
     
     for env_config in envs_config_tuples:
-        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
+        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
         results.append(data)
         f.write(" ".join(data) + "\n")
 
-        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
-        results.append(data)
-        f.write(" ".join(data)+ "\n")
-
+        if args.threads:
+            data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
+            results.append(data)
+            f.write(" ".join(data)+ "\n")
 
-        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=True, num_envs = args.num_envs, config_name=env_config[1])
-        results.append(data)
-        f.write(" ".join(data)+ "\n")
 
-        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1])
+        data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
         results.append(data)
         f.write(" ".join(data)+ "\n")
     for r in results:

From 7113dd82375216caaeee6cee5303f1a186246232 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Wed, 8 Jul 2020 10:04:06 -0700
Subject: [PATCH 28/29] _

---
 experiment_torch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiment_torch.py b/experiment_torch.py
index d3128247d1..8ccb56be2b 100644
--- a/experiment_torch.py
+++ b/experiment_torch.py
@@ -71,7 +71,7 @@ def main():
     args = parser.parse_args()
 
     if args.gpu:
-        os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
+        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
     else:
         os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 

From 01d38e00af8bdbdf853666985e5c1bc7155b156e Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 9 Jul 2020 17:49:05 -0700
Subject: [PATCH 29/29] Fixing learn.py

---
 ml-agents/mlagents/trainers/ppo/trainer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 9f1ac67f6b..365a7634de 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -64,7 +64,8 @@ def __init__(
         self.load = load
         self.seed = seed
         self.framework = "torch" if TestingConfiguration.use_torch else "tf"
-        self.trainer_settings.max_steps = TestingConfiguration.max_steps
+        if TestingConfiguration.max_steps > 0:
+            self.trainer_settings.max_steps = TestingConfiguration.max_steps
         self.policy: Policy = None  # type: ignore
 
     def _process_trajectory(self, trajectory: Trajectory) -> None: