From 0d5632c02fdef17dd1932ecce238740d7579275a Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 1 Jul 2020 17:24:14 -0700 Subject: [PATCH 01/29] Experiment branch for comparing torch --- experiment_torch.py | 49 +++++++++++++++++++ ml-agents/mlagents/trainers/learn.py | 10 ++-- .../mlagents/trainers/policy/nn_policy.py | 1 - .../mlagents/trainers/policy/torch_policy.py | 2 +- ml-agents/mlagents/trainers/ppo/trainer.py | 9 +++- 5 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 experiment_torch.py diff --git a/experiment_torch.py b/experiment_torch.py new file mode 100644 index 0000000000..96e6aa484b --- /dev/null +++ b/experiment_torch.py @@ -0,0 +1,49 @@ + +import json +import os +from mlagents.trainers.learn import run_cli, parse_command_line +from mlagents.trainers.settings import RunOptions +from mlagents.trainers.stats import StatsReporter +from mlagents.trainers.ppo.trainer import TestingConfiguration +from mlagents_envs.timers import _thread_timer_stacks + + +results = {} + +def run_experiment(name:str, steps:int, torch:bool): + TestingConfiguration.env_name = name + TestingConfiguration.max_steps = steps + TestingConfiguration.use_torch = torch + run_options = parse_command_line([f"config/ppo/{name}.yaml"]) + run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if torch else "tf") + run_options.checkpoint_settings.force = True + for trainer_settings in run_options.behaviors.values(): + trainer_settings.threaded = False + timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json") + run_cli(run_options) + StatsReporter.writers.clear() + StatsReporter.stats_dict.clear() + _thread_timer_stacks.clear() + with open(timers_path) as timers_json_file: + timers_json = json.load(timers_json_file) + total = timers_json["total"] + evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"] + update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"] + if torch: + update = update["TorchPPOOptimizer.update"]["total"] + evaluate = evaluate["TorchPolicy.evaluate"]["total"] + else: + update = update["TFPPOOptimizer.update"]["total"] + evaluate = evaluate["NNPolicy.evaluate"]["total"] + return total, update, evaluate + +results["3DBall Torch"] = run_experiment("3DBall", 20000, True) +results["3DBall TF"] = run_experiment("3DBall", 20000, False) +results["GridWorld Torch"] = run_experiment("GridWorld", 20000, True) +results["GridWorld TF"] = run_experiment("GridWorld", 20000, False) +print("experiment\t", "total\t", "update\t", "evaluate") +for key, value in results.items(): + print(key + "\t", str(value[0])+ "\t", str(value[1])+ "\t", str(value[2])+ "\t") + + + diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index ead4b92cd8..829f076e81 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -35,6 +35,9 @@ ) from mlagents_envs import logging_util +from mlagents.trainers.ppo.trainer import TestingConfiguration +from mlagents_envs.registry import default_registry + logger = logging_util.get_logger(__name__) TRAINING_STATUS_FILE_NAME = "training_status.json" @@ -233,9 +236,10 @@ def create_unity_environment( ) -> UnityEnvironment: # Make sure that each environment gets a different seed env_seed = seed + worker_id - return UnityEnvironment( - file_name=env_path, - worker_id=worker_id, + return default_registry[TestingConfiguration.env_name].make( + # return UnityEnvironment( + # file_name=env_path, + # worker_id=worker_id, seed=env_seed, no_graphics=no_graphics, base_port=start_port, diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py index ce08f06f0a..ea550cb652 100644 --- a/ml-agents/mlagents/trainers/policy/nn_policy.py +++ b/ml-agents/mlagents/trainers/policy/nn_policy.py @@ -21,7 +21,6 @@ def __init__( seed: int, brain: BrainParameters, trainer_settings: TrainerSettings, - is_training: bool, model_path: str, load: bool, tanh_squash: bool = False, diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index d140a6e1d5..149e064528 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -94,7 +94,7 @@ def __init__( self.inference_dict: Dict[str, tf.Tensor] = {} self.update_dict: Dict[str, tf.Tensor] = {} # TF defaults to 32-bit, so we use the same here. - torch.set_default_tensor_type(torch.DoubleTensor) + torch.set_default_tensor_type(torch.FloatTensor) reward_signal_configs = trainer_settings.reward_signals reward_signal_names = [key.value for key, _ in reward_signal_configs.items()] diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 19519aaeb8..bbc8f701b4 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -22,6 +22,12 @@ logger = get_logger(__name__) +class TestingConfiguration: + use_torch = False + max_steps = 0 + env_name = "" + + class PPOTrainer(RLTrainer): """The PPOTrainer is an implementation of the PPO algorithm.""" @@ -53,7 +59,8 @@ def __init__( ) self.load = load self.seed = seed - self.framework = "torch" + self.framework = "torch" if TestingConfiguration.use_torch else "tf" + self.trainer_settings.max_steps = TestingConfiguration.max_steps self.policy: Policy = None # type: ignore def _process_trajectory(self, trajectory: Trajectory) -> None: From c7c8df2e71f36eff166f1a835b2c1e72a9e77b42 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 1 Jul 2020 18:35:49 -0700 Subject: [PATCH 02/29] Updates and merging ervin changes --- experiment_torch.py | 8 ++-- ml-agents/mlagents/trainers/learn.py | 31 +++++++++----- .../mlagents/trainers/policy/torch_policy.py | 40 ++++++++++--------- 3 files changed, 47 insertions(+), 32 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index 96e6aa484b..3fbe7c7d9e 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -39,9 +39,11 @@ def run_experiment(name:str, steps:int, torch:bool): results["3DBall Torch"] = run_experiment("3DBall", 20000, True) results["3DBall TF"] = run_experiment("3DBall", 20000, False) -results["GridWorld Torch"] = run_experiment("GridWorld", 20000, True) -results["GridWorld TF"] = run_experiment("GridWorld", 20000, False) -print("experiment\t", "total\t", "update\t", "evaluate") +results["GridWorld Torch"] = run_experiment("GridWorld", 2000, True) +results["GridWorld TF"] = run_experiment("GridWorld", 2000, False) +results["PushBlock Torch"] = run_experiment("PushBlock", 20000, True) +results["PushBlock TF"] = run_experiment("PushBlock", 20000, False) +print("experiment\t", "total\t\t\t\t", "update\t\t\t\t", "evaluate") for key, value in results.items(): print(key + "\t", str(value[0])+ "\t", str(value[1])+ "\t", str(value[2])+ "\t") diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 829f076e81..b4abd94988 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -236,17 +236,26 @@ def create_unity_environment( ) -> UnityEnvironment: # Make sure that each environment gets a different seed env_seed = seed + worker_id - return default_registry[TestingConfiguration.env_name].make( - # return UnityEnvironment( - # file_name=env_path, - # worker_id=worker_id, - seed=env_seed, - no_graphics=no_graphics, - base_port=start_port, - additional_args=env_args, - side_channels=side_channels, - log_folder=log_folder, - ) + if TestingConfiguration.env_name == "": + return UnityEnvironment( + file_name=env_path, + worker_id=worker_id, + seed=env_seed, + no_graphics=no_graphics, + base_port=start_port, + additional_args=env_args, + side_channels=side_channels, + log_folder=log_folder, + ) + else: + return default_registry[TestingConfiguration.env_name].make( + seed=env_seed, + no_graphics=no_graphics, + base_port=start_port, + additional_args=env_args, + side_channels=side_channels, + log_folder=log_folder, + ) return create_unity_environment diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index d3fb2db022..470012d3f0 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -249,24 +249,28 @@ def load_model(self, step=0): self.actor_critic.load_state_dict(torch.load(load_path)) def export_model(self, step=0): - fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])] - fake_vis_obs = [torch.zeros([1] + [84, 84, 3])] - fake_masks = torch.ones([1] + self.actor_critic.act_size) - # fake_memories = torch.zeros([1] + [self.m_size]) - export_path = "./model-" + str(step) + ".onnx" - output_names = ["action", "action_probs"] - input_names = ["vector_observation", "action_mask"] - dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]} - onnx.export( - self.actor_critic, - (fake_vec_obs, fake_vis_obs, fake_masks), - export_path, - verbose=True, - opset_version=12, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes, - ) + try: + fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])] + fake_vis_obs = [torch.zeros([1] + [84, 84, 3])] + fake_masks = torch.ones([1] + self.actor_critic.act_size) + # fake_memories = torch.zeros([1] + [self.m_size]) + export_path = "./model-" + str(step) + ".onnx" + output_names = ["action", "action_probs"] + input_names = ["vector_observation", "action_mask"] + dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]} + onnx.export( + self.actor_critic, + (fake_vec_obs, fake_vis_obs, fake_masks), + export_path, + verbose=True, + opset_version=12, + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + ) + except: + print("Could not export torch model") + return @property def vis_obs_size(self): From e0120ec3ddae3d8ea01ad10ea6364bb828596827 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 10:14:44 -0700 Subject: [PATCH 03/29] improvements on experiment_torch.py --- experiment_torch.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index 3fbe7c7d9e..f2f78c1cc8 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -1,6 +1,7 @@ import json import os +import torch from mlagents.trainers.learn import run_cli, parse_command_line from mlagents.trainers.settings import RunOptions from mlagents.trainers.stats import StatsReporter @@ -10,11 +11,13 @@ results = {} -def run_experiment(name:str, steps:int, torch:bool): +def run_experiment(name:str, steps:int, torch:bool, config_name=None): TestingConfiguration.env_name = name TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = torch - run_options = parse_command_line([f"config/ppo/{name}.yaml"]) + if config_name is None: + config_name = name + run_options = parse_command_line([f"config/ppo/{config_name}.yaml"]) run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if torch else "tf") run_options.checkpoint_settings.force = True for trainer_settings in run_options.behaviors.values(): @@ -35,17 +38,34 @@ def run_experiment(name:str, steps:int, torch:bool): else: update = update["TFPPOOptimizer.update"]["total"] evaluate = evaluate["NNPolicy.evaluate"]["total"] + # todo: do total / count return total, update, evaluate + + results["3DBall Torch"] = run_experiment("3DBall", 20000, True) results["3DBall TF"] = run_experiment("3DBall", 20000, False) results["GridWorld Torch"] = run_experiment("GridWorld", 2000, True) results["GridWorld TF"] = run_experiment("GridWorld", 2000, False) results["PushBlock Torch"] = run_experiment("PushBlock", 20000, True) results["PushBlock TF"] = run_experiment("PushBlock", 20000, False) -print("experiment\t", "total\t\t\t\t", "update\t\t\t\t", "evaluate") +results["Hallway Torch"] = run_experiment("Hallway", 20000, True) +results["Hallway TF"] = run_experiment("Hallway", 20000, False) +results["CrawlerStaticTarget Torch"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic") +results["CrawlerStaticTarget TF"] = run_experiment("CrawlerStaticTarget", 50000, False, "CrawlerStatic") + +torch.set_num_threads(1) + +results["3DBall Torch 1 thread"] = run_experiment("3DBall", 20000, True) +results["GridWorld Torch 1 thread"] = run_experiment("GridWorld", 2000, True) +results["PushBlock Torch 1 thread"] = run_experiment("PushBlock", 20000, True) +results["Hallway Torch 1 thread"] = run_experiment("Hallway", 20000, True) +results["CrawlerStaticTarget Torch 1 thread"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic") + + +print("experiment,\t", "total,\t", "update,\t", "evaluate") for key, value in results.items(): - print(key + "\t", str(value[0])+ "\t", str(value[1])+ "\t", str(value[2])+ "\t") + print(key + ",\t", str(value[0])+ ",\t", str(value[1])+ ",\t", str(value[2])+ "\t") From 4ad81e0e16ca64d85b18f5bf8e14fc0d1c30930b Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 14:31:52 -0700 Subject: [PATCH 04/29] Better printing of results --- experiment_torch.py | 67 +++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index f2f78c1cc8..8662b9a5e0 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -9,20 +9,22 @@ from mlagents_envs.timers import _thread_timer_stacks -results = {} +results = [("name", "steps", "use_torch", "num_torch_threads", "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")] -def run_experiment(name:str, steps:int, torch:bool, config_name=None): +def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, config_name=None): TestingConfiguration.env_name = name TestingConfiguration.max_steps = steps - TestingConfiguration.use_torch = torch + TestingConfiguration.use_torch = use_torch if config_name is None: config_name = name run_options = parse_command_line([f"config/ppo/{config_name}.yaml"]) - run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if torch else "tf") + run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf") run_options.checkpoint_settings.force = True for trainer_settings in run_options.behaviors.values(): trainer_settings.threaded = False timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json") + if use_torch: + torch.set_num_threads(num_torch_threads) run_cli(run_options) StatsReporter.writers.clear() StatsReporter.stats_dict.clear() @@ -30,42 +32,49 @@ def run_experiment(name:str, steps:int, torch:bool, config_name=None): with open(timers_path) as timers_json_file: timers_json = json.load(timers_json_file) total = timers_json["total"] + tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"] evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"] update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"] - if torch: - update = update["TorchPPOOptimizer.update"]["total"] - evaluate = evaluate["TorchPolicy.evaluate"]["total"] + tc_advance_total = tc_advance["total"] + tc_advance_count = tc_advance["count"] + if use_torch: + update_total = update["TorchPPOOptimizer.update"]["total"] + evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] + update_count = update["TorchPPOOptimizer.update"]["count"] + evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] else: - update = update["TFPPOOptimizer.update"]["total"] - evaluate = evaluate["NNPolicy.evaluate"]["total"] + update_total = update["TFPPOOptimizer.update"]["total"] + evaluate_total = evaluate["NNPolicy.evaluate"]["total"] + update_count = update["TFPPOOptimizer.update"]["count"] + evaluate_count= evaluate["NNPolicy.evaluate"]["count"] # todo: do total / count - return total, update, evaluate + return name, steps, use_torch, num_torch_threads, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count -results["3DBall Torch"] = run_experiment("3DBall", 20000, True) -results["3DBall TF"] = run_experiment("3DBall", 20000, False) -results["GridWorld Torch"] = run_experiment("GridWorld", 2000, True) -results["GridWorld TF"] = run_experiment("GridWorld", 2000, False) -results["PushBlock Torch"] = run_experiment("PushBlock", 20000, True) -results["PushBlock TF"] = run_experiment("PushBlock", 20000, False) -results["Hallway Torch"] = run_experiment("Hallway", 20000, True) -results["Hallway TF"] = run_experiment("Hallway", 20000, False) -results["CrawlerStaticTarget Torch"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic") -results["CrawlerStaticTarget TF"] = run_experiment("CrawlerStaticTarget", 50000, False, "CrawlerStatic") -torch.set_num_threads(1) +results.append(run_experiment("3DBall", 20000, True, 4)) +results.append(run_experiment("3DBall", 20000, True, 1)) +results.append(run_experiment("3DBall", 20000, False, None)) -results["3DBall Torch 1 thread"] = run_experiment("3DBall", 20000, True) -results["GridWorld Torch 1 thread"] = run_experiment("GridWorld", 2000, True) -results["PushBlock Torch 1 thread"] = run_experiment("PushBlock", 20000, True) -results["Hallway Torch 1 thread"] = run_experiment("Hallway", 20000, True) -results["CrawlerStaticTarget Torch 1 thread"] = run_experiment("CrawlerStaticTarget", 50000, True, "CrawlerStatic") +results.append(run_experiment("GridWorld", 2000, True, 4)) +results.append(run_experiment("GridWorld", 2000, True, 1)) +results.append(run_experiment("GridWorld", 2000, False, None)) +results.append(run_experiment("PushBlock", 20000, True, 4)) +results.append(run_experiment("PushBlock", 20000, True, 1)) +results.append(run_experiment("PushBlock", 20000, False, None)) -print("experiment,\t", "total,\t", "update,\t", "evaluate") -for key, value in results.items(): - print(key + ",\t", str(value[0])+ ",\t", str(value[1])+ ",\t", str(value[2])+ "\t") +results.append(run_experiment("Hallway", 20000, True, 4)) +results.append(run_experiment("Hallway", 20000, True, 1)) +results.append(run_experiment("Hallway", 20000, False, None)) +results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, "CrawlerStatic")) + + +for r in results: + print(*r) From b1eb17d3502125d5f9bfe9f72437fabee46b4766 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 14:54:36 -0700 Subject: [PATCH 05/29] preliminary gpu experiment --- experiment_torch.py | 39 ++++++++++--------- .../mlagents/trainers/policy/torch_policy.py | 6 +++ ml-agents/mlagents/trainers/ppo/trainer.py | 12 ++++-- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index 8662b9a5e0..bff66d56c1 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -9,12 +9,15 @@ from mlagents_envs.timers import _thread_timer_stacks -results = [("name", "steps", "use_torch", "num_torch_threads", "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")] +results = [("name", "steps", "use_torch", "num_torch_threads", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")] -def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, config_name=None): +def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, config_name=None): TestingConfiguration.env_name = name TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch + TestingConfiguration.use_gpu = use_gpu + if (not torch.cuda.is_available() and use_gpu) or (not use_torch and use_gpu): + return ("na", )*12 if config_name is None: config_name = name run_options = parse_command_line([f"config/ppo/{config_name}.yaml"]) @@ -48,30 +51,30 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, c update_count = update["TFPPOOptimizer.update"]["count"] evaluate_count= evaluate["NNPolicy.evaluate"]["count"] # todo: do total / count - return name, steps, use_torch, num_torch_threads, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count + return name, steps, use_torch, num_torch_threads, use_gpu, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count -results.append(run_experiment("3DBall", 20000, True, 4)) -results.append(run_experiment("3DBall", 20000, True, 1)) -results.append(run_experiment("3DBall", 20000, False, None)) +results.append(run_experiment("3DBall", 20000, True, 4, False)) +results.append(run_experiment("3DBall", 20000, True, 1, False)) +results.append(run_experiment("3DBall", 20000, False, None, False)) -results.append(run_experiment("GridWorld", 2000, True, 4)) -results.append(run_experiment("GridWorld", 2000, True, 1)) -results.append(run_experiment("GridWorld", 2000, False, None)) +results.append(run_experiment("GridWorld", 2000, True, 4, False)) +results.append(run_experiment("GridWorld", 2000, True, 1, False)) +results.append(run_experiment("GridWorld", 2000, False, None, False)) -results.append(run_experiment("PushBlock", 20000, True, 4)) -results.append(run_experiment("PushBlock", 20000, True, 1)) -results.append(run_experiment("PushBlock", 20000, False, None)) +results.append(run_experiment("PushBlock", 20000, True, 4, False)) +results.append(run_experiment("PushBlock", 20000, True, 1, False)) +results.append(run_experiment("PushBlock", 20000, False, None, False)) -results.append(run_experiment("Hallway", 20000, True, 4)) -results.append(run_experiment("Hallway", 20000, True, 1)) -results.append(run_experiment("Hallway", 20000, False, None)) +results.append(run_experiment("Hallway", 20000, True, 4, False)) +results.append(run_experiment("Hallway", 20000, True, 1, False)) +results.append(run_experiment("Hallway", 20000, False, None, False)) -results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, "CrawlerStatic")) -results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, "CrawlerStatic")) -results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic")) for r in results: diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index 470012d3f0..295a89c377 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -17,6 +17,8 @@ from mlagents.trainers.brain import BrainParameters from mlagents.trainers.models_torch import ActorCritic +from mlagents.trainers.ppo.trainer import TestingConfiguration + EPSILON = 1e-7 # Small value to avoid divide by zero @@ -117,6 +119,10 @@ def __init__( separate_critic=self.use_continuous_act, ) + if TestingConfiguration.use_gpu: + #move to gpu + self.actor_critic.to(torch.device("cuda:0")) + def split_decision_step(self, decision_requests): vec_vis_obs = SplitObservations.from_observations(decision_requests.obs) mask = None diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index bbc8f701b4..cecd9eff3d 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -2,6 +2,14 @@ # ## ML-Agent Learning (PPO) # Contains an implementation of PPO as described in: https://arxiv.org/abs/1707.06347 +class TestingConfiguration: + use_torch = False + max_steps = 0 + env_name = "" + use_gpu = False + + + from collections import defaultdict from typing import cast @@ -22,10 +30,6 @@ logger = get_logger(__name__) -class TestingConfiguration: - use_torch = False - max_steps = 0 - env_name = "" class PPOTrainer(RLTrainer): From ee1c2a97d7ef1da5b1e35e87f1d5f9c519e906c9 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 17:49:10 -0700 Subject: [PATCH 06/29] Testing gpu --- experiment_torch.py | 33 ++++++++++--------- .../mlagents/trainers/policy/torch_policy.py | 19 ++++++----- ml-agents/mlagents/trainers/ppo/trainer.py | 2 +- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index bff66d56c1..214448c8b9 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -15,9 +15,9 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u TestingConfiguration.env_name = name TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch - TestingConfiguration.use_gpu = use_gpu + TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" if (not torch.cuda.is_available() and use_gpu) or (not use_torch and use_gpu): - return ("na", )*12 + return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na" if config_name is None: config_name = name run_options = parse_command_line([f"config/ppo/{config_name}.yaml"]) @@ -57,24 +57,25 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u results.append(run_experiment("3DBall", 20000, True, 4, False)) -results.append(run_experiment("3DBall", 20000, True, 1, False)) -results.append(run_experiment("3DBall", 20000, False, None, False)) +results.append(run_experiment("3DBall", 20000, True, 4, True)) +# results.append(run_experiment("3DBall", 20000, True, 1, False)) +# results.append(run_experiment("3DBall", 20000, False, None, False)) -results.append(run_experiment("GridWorld", 2000, True, 4, False)) -results.append(run_experiment("GridWorld", 2000, True, 1, False)) -results.append(run_experiment("GridWorld", 2000, False, None, False)) +# results.append(run_experiment("GridWorld", 2000, True, 4, False)) +# results.append(run_experiment("GridWorld", 2000, True, 1, False)) +# results.append(run_experiment("GridWorld", 2000, False, None, False)) -results.append(run_experiment("PushBlock", 20000, True, 4, False)) -results.append(run_experiment("PushBlock", 20000, True, 1, False)) -results.append(run_experiment("PushBlock", 20000, False, None, False)) +# results.append(run_experiment("PushBlock", 20000, True, 4, False)) +# results.append(run_experiment("PushBlock", 20000, True, 1, False)) +# results.append(run_experiment("PushBlock", 20000, False, None, False)) -results.append(run_experiment("Hallway", 20000, True, 4, False)) -results.append(run_experiment("Hallway", 20000, True, 1, False)) -results.append(run_experiment("Hallway", 20000, False, None, False)) +# results.append(run_experiment("Hallway", 20000, True, 4, False)) +# results.append(run_experiment("Hallway", 20000, True, 1, False)) +# results.append(run_experiment("Hallway", 20000, False, None, False)) -results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic")) -results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic")) -results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic")) for r in results: diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index 295a89c377..cabffb0842 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -93,6 +93,9 @@ def __init__( self.log_std_min = -20 self.log_std_max = 2 + if TestingConfiguration.device != "cpu": + torch.set_default_tensor_type(torch.cuda.FloatTensor) + self.inference_dict: Dict[str, tf.Tensor] = {} self.update_dict: Dict[str, tf.Tensor] = {} @@ -119,9 +122,7 @@ def __init__( separate_critic=self.use_continuous_act, ) - if TestingConfiguration.use_gpu: - #move to gpu - self.actor_critic.to(torch.device("cuda:0")) + self.actor_critic.to(TestingConfiguration.device) def split_decision_step(self, decision_requests): vec_vis_obs = SplitObservations.from_observations(decision_requests.obs) @@ -196,18 +197,18 @@ def evaluate( action, log_probs, entropy, value_heads, memories = self.sample_actions( vec_obs, vis_obs, masks=masks, memories=memories ) - run_out["action"] = action.detach().numpy() - run_out["pre_action"] = action.detach().numpy() + run_out["action"] = action.detach().to(TestingConfiguration.device).numpy() + run_out["pre_action"] = action.detach().to(TestingConfiguration.device).numpy() # Todo - make pre_action difference - run_out["log_probs"] = log_probs.detach().numpy() - run_out["entropy"] = entropy.detach().numpy() + run_out["log_probs"] = log_probs.detach().to(TestingConfiguration.device).numpy() + run_out["entropy"] = entropy.detach().to(TestingConfiguration.device).numpy() run_out["value_heads"] = { - name: t.detach().numpy() for name, t in value_heads.items() + name: t.detach().to(TestingConfiguration.device).numpy() for name, t in value_heads.items() } run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0) run_out["learning_rate"] = 0.0 if self.use_recurrent: - run_out["memories"] = memories.detach().numpy() + run_out["memories"] = memories.detach().to(TestingConfiguration.device).numpy() self.actor_critic.update_normalization(vec_obs) return run_out diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index cecd9eff3d..9f1ac67f6b 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -6,7 +6,7 @@ class TestingConfiguration: use_torch = False max_steps = 0 env_name = "" - use_gpu = False + device = "cpu" From a280e3c629b0ddf4fe379f3aeb978999ab11d3e0 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 17:51:41 -0700 Subject: [PATCH 07/29] Prepare to see a lot of commits, because I like my IDE and I am testing on a server and I am using git to sync the two --- experiment_torch.py | 3 ++- ml-agents/mlagents/trainers/policy/torch_policy.py | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index 214448c8b9..ced7d2a5e8 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -56,8 +56,9 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u -results.append(run_experiment("3DBall", 20000, True, 4, False)) + results.append(run_experiment("3DBall", 20000, True, 4, True)) +results.append(run_experiment("3DBall", 20000, True, 4, False)) # results.append(run_experiment("3DBall", 20000, True, 1, False)) # results.append(run_experiment("3DBall", 20000, False, None, False)) diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index cabffb0842..9bcaf270cf 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -197,18 +197,18 @@ def evaluate( action, log_probs, entropy, value_heads, memories = self.sample_actions( vec_obs, vis_obs, masks=masks, memories=memories ) - run_out["action"] = action.detach().to(TestingConfiguration.device).numpy() - run_out["pre_action"] = action.detach().to(TestingConfiguration.device).numpy() + run_out["action"] = action.detach().cpu().numpy() + run_out["pre_action"] = action.detach().cpu().numpy() # Todo - make pre_action difference - run_out["log_probs"] = log_probs.detach().to(TestingConfiguration.device).numpy() - run_out["entropy"] = entropy.detach().to(TestingConfiguration.device).numpy() + run_out["log_probs"] = log_probs.detach().cpu().numpy() + run_out["entropy"] = entropy.detach().cpu().numpy() run_out["value_heads"] = { - name: t.detach().to(TestingConfiguration.device).numpy() for name, t in value_heads.items() + name: t.detach().cpu().numpy() for name, t in value_heads.items() } run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0) run_out["learning_rate"] = 0.0 if self.use_recurrent: - run_out["memories"] = memories.detach().to(TestingConfiguration.device).numpy() + run_out["memories"] = memories.detach().cpu().numpy() self.actor_critic.update_normalization(vec_obs) return run_out From a66a40dfe9489be2d625ded04bd89a3294890bb6 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 17:52:57 -0700 Subject: [PATCH 08/29] Prepare to see a lot of commits, because I like my IDE and I am testing on a server and I am using git to sync the two --- ml-agents/mlagents/trainers/ppo/optimizer_torch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py index 2e8ece92dc..afe3f91712 100644 --- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py +++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py @@ -143,8 +143,8 @@ def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]: self.optimizer.step() update_stats = { - "Losses/Policy Loss": abs(policy_loss.detach().numpy()), - "Losses/Value Loss": value_loss.detach().numpy(), + "Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()), + "Losses/Value Loss": value_loss.detach().cpu().numpy(), } return update_stats From dcde9453df4818aaa7500b8dd22b77549dc248ac Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 17:54:04 -0700 Subject: [PATCH 09/29] _ --- ml-agents/mlagents/trainers/optimizer/torch_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py index 762ad00357..80cefaee39 100644 --- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py +++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py @@ -106,8 +106,8 @@ def get_trajectory_value_estimates( ) for name, estimate in value_estimates.items(): - value_estimates[name] = estimate.detach().numpy() - next_value_estimate[name] = next_value_estimate[name].detach().numpy() + value_estimates[name] = estimate.detach().cpu().numpy() + next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy() if done: for k in next_value_estimate: From 92f3194d5df9151f568492937b38a06825adfaee Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 17:56:12 -0700 Subject: [PATCH 10/29] _ --- ml-agents/mlagents/trainers/policy/torch_policy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index 9bcaf270cf..5ea2f729ee 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -95,6 +95,9 @@ def __init__( if TestingConfiguration.device != "cpu": torch.set_default_tensor_type(torch.cuda.FloatTensor) + else: + torch.set_default_tensor_type(torch.FloatTensor) + self.inference_dict: Dict[str, tf.Tensor] = {} self.update_dict: Dict[str, tf.Tensor] = {} From 3b7e1e09c5fa69cdd60a23b9025c7aa301d0aa27 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 18:03:48 -0700 Subject: [PATCH 11/29] _ --- experiment_torch.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index ced7d2a5e8..6c7bb847da 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -55,28 +55,33 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u +steps = 100000 -results.append(run_experiment("3DBall", 20000, True, 4, True)) -results.append(run_experiment("3DBall", 20000, True, 4, False)) -# results.append(run_experiment("3DBall", 20000, True, 1, False)) -# results.append(run_experiment("3DBall", 20000, False, None, False)) +results.append(run_experiment("3DBall", steps, True, 4, False)) +results.append(run_experiment("3DBall", steps, True, 1, False)) +results.append(run_experiment("3DBall", steps, True, 1, True)) +results.append(run_experiment("3DBall", steps, False, None, False)) -# results.append(run_experiment("GridWorld", 2000, True, 4, False)) -# results.append(run_experiment("GridWorld", 2000, True, 1, False)) -# results.append(run_experiment("GridWorld", 2000, False, None, False)) +results.append(run_experiment("GridWorld", steps, True, 4, False)) +results.append(run_experiment("GridWorld", steps, True, 1, False)) +results.append(run_experiment("GridWorld", steps, True, 1, True)) +results.append(run_experiment("GridWorld", steps, False, None, False)) -# results.append(run_experiment("PushBlock", 20000, True, 4, False)) -# results.append(run_experiment("PushBlock", 20000, True, 1, False)) -# results.append(run_experiment("PushBlock", 20000, False, None, False)) +results.append(run_experiment("PushBlock", steps, True, 4, False)) +results.append(run_experiment("PushBlock", steps, True, 1, False)) +results.append(run_experiment("PushBlock", steps, True, 1, True)) +results.append(run_experiment("PushBlock", steps, False, None, False)) -# results.append(run_experiment("Hallway", 20000, True, 4, False)) -# results.append(run_experiment("Hallway", 20000, True, 1, False)) -# results.append(run_experiment("Hallway", 20000, False, None, False)) +results.append(run_experiment("Hallway", steps, True, 4, False)) +results.append(run_experiment("Hallway", steps, True, 1, False)) +results.append(run_experiment("Hallway", steps, True, 1, True)) +results.append(run_experiment("Hallway", steps, False, None, False)) -# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 4, False, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", 50000, True, 1, False, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", 50000, False, None, False, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic")) +results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic")) for r in results: From 42e2e73d1004c2d278e75a11195d48b235f6213e Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 18:41:28 -0700 Subject: [PATCH 12/29] _ --- ml-agents/mlagents/trainers/models_torch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py index 434634fbce..a10579ef9b 100644 --- a/ml-agents/mlagents/trainers/models_torch.py +++ b/ml-agents/mlagents/trainers/models_torch.py @@ -407,7 +407,8 @@ def __init__(self, height, width, initial_channels, output_size): def forward(self, visual_obs): conv_1 = torch.relu(self.conv1(visual_obs)) conv_2 = torch.relu(self.conv2(conv_1)) - hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat]))) + # hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat]))) + hidden = torch.relu(self.dense(torch.reshape(conv_2,(-1, self.final_flat)))) return hidden From 2224fa03f04b3788da6bf949d47c7dbbf14bdbe5 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 19:56:03 -0700 Subject: [PATCH 13/29] _ --- ml-agents/mlagents/trainers/models_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py index a10579ef9b..80fab6342b 100644 --- a/ml-agents/mlagents/trainers/models_torch.py +++ b/ml-agents/mlagents/trainers/models_torch.py @@ -136,7 +136,7 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1): if self.use_lstm: embedding = embedding.view([sequence_length, -1, self.h_size]) memories = torch.split(memories, self.m_size // 2, dim=-1) - embedding, memories = self.lstm(embedding, memories) + embedding, memories = self.lstm(embedding.continuous(), memories.continuous()) embedding = embedding.view([-1, self.m_size // 2]) memories = torch.cat(memories, dim=-1) return embedding, memories From f306f2983424e32f21c241f45b6ef2b40c1cebc6 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 19:58:56 -0700 Subject: [PATCH 14/29] _ --- ml-agents/mlagents/trainers/models_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py index 80fab6342b..6966d69a89 100644 --- a/ml-agents/mlagents/trainers/models_torch.py +++ b/ml-agents/mlagents/trainers/models_torch.py @@ -136,7 +136,7 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1): if self.use_lstm: embedding = embedding.view([sequence_length, -1, self.h_size]) memories = torch.split(memories, self.m_size // 2, dim=-1) - embedding, memories = self.lstm(embedding.continuous(), memories.continuous()) + embedding, memories = self.lstm(embedding.contiguous(), memories.contiguous()) embedding = embedding.view([-1, self.m_size // 2]) memories = torch.cat(memories, dim=-1) return embedding, memories From 2c706ce497d047d2626bac465ff6017496331e67 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 20:00:18 -0700 Subject: [PATCH 15/29] _ --- ml-agents/mlagents/trainers/models_torch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py index 6966d69a89..42bcf6f07d 100644 --- a/ml-agents/mlagents/trainers/models_torch.py +++ b/ml-agents/mlagents/trainers/models_torch.py @@ -135,8 +135,8 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1): if self.use_lstm: embedding = embedding.view([sequence_length, -1, self.h_size]) - memories = torch.split(memories, self.m_size // 2, dim=-1) - embedding, memories = self.lstm(embedding.contiguous(), memories.contiguous()) + memories = torch.split(memories.contiguous(), self.m_size // 2, dim=-1) + embedding, memories = self.lstm(embedding.contiguous(), memories) embedding = embedding.view([-1, self.m_size // 2]) memories = torch.cat(memories, dim=-1) return embedding, memories From 3250d73503005851ba4be617d7050df88b099301 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 2 Jul 2020 20:03:15 -0700 Subject: [PATCH 16/29] _ --- ml-agents/mlagents/trainers/models_torch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/models_torch.py b/ml-agents/mlagents/trainers/models_torch.py index 42bcf6f07d..c2e0fc27de 100644 --- a/ml-agents/mlagents/trainers/models_torch.py +++ b/ml-agents/mlagents/trainers/models_torch.py @@ -135,8 +135,8 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1): if self.use_lstm: embedding = embedding.view([sequence_length, -1, self.h_size]) - memories = torch.split(memories.contiguous(), self.m_size // 2, dim=-1) - embedding, memories = self.lstm(embedding.contiguous(), memories) + memories = torch.split(memories, self.m_size // 2, dim=-1) + embedding, memories = self.lstm(embedding.contiguous(), (memories[0].contiguous(), memories[1].contiguous())) embedding = embedding.view([-1, self.m_size // 2]) memories = torch.cat(memories, dim=-1) return embedding, memories From 78022dcf347b260fb8b89d9c9af2fa35458e84af Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 10:40:23 -0700 Subject: [PATCH 17/29] Attempt at gpu on tf. Does not work --- experiment_torch.py | 54 ++++++++++--------- .../mlagents/trainers/policy/nn_policy.py | 8 +++ 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index 6c7bb847da..c929dcbc48 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -16,7 +16,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" - if (not torch.cuda.is_available() and use_gpu) or (not use_torch and use_gpu): + if (not torch.cuda.is_available() and use_gpu and use_torch): return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na" if config_name is None: config_name = name @@ -57,31 +57,33 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u steps = 100000 - -results.append(run_experiment("3DBall", steps, True, 4, False)) -results.append(run_experiment("3DBall", steps, True, 1, False)) -results.append(run_experiment("3DBall", steps, True, 1, True)) -results.append(run_experiment("3DBall", steps, False, None, False)) - -results.append(run_experiment("GridWorld", steps, True, 4, False)) -results.append(run_experiment("GridWorld", steps, True, 1, False)) -results.append(run_experiment("GridWorld", steps, True, 1, True)) -results.append(run_experiment("GridWorld", steps, False, None, False)) - -results.append(run_experiment("PushBlock", steps, True, 4, False)) -results.append(run_experiment("PushBlock", steps, True, 1, False)) -results.append(run_experiment("PushBlock", steps, True, 1, True)) -results.append(run_experiment("PushBlock", steps, False, None, False)) - -results.append(run_experiment("Hallway", steps, True, 4, False)) -results.append(run_experiment("Hallway", steps, True, 1, False)) -results.append(run_experiment("Hallway", steps, True, 1, True)) -results.append(run_experiment("Hallway", steps, False, None, False)) - -results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic")) -results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic")) -results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic")) -results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic")) +results.append(run_experiment("3DBall", steps, False, 1, True)) +results.append(run_experiment("3DBall", steps, False, 1, False)) + +# results.append(run_experiment("3DBall", steps, True, 4, False)) +# results.append(run_experiment("3DBall", steps, True, 1, False)) +# results.append(run_experiment("3DBall", steps, True, 1, True)) +# results.append(run_experiment("3DBall", steps, False, None, False)) + +# results.append(run_experiment("GridWorld", steps, True, 4, False)) +# results.append(run_experiment("GridWorld", steps, True, 1, False)) +# results.append(run_experiment("GridWorld", steps, True, 1, True)) +# results.append(run_experiment("GridWorld", steps, False, None, False)) + +# results.append(run_experiment("PushBlock", steps, True, 4, False)) +# results.append(run_experiment("PushBlock", steps, True, 1, False)) +# results.append(run_experiment("PushBlock", steps, True, 1, True)) +# results.append(run_experiment("PushBlock", steps, False, None, False)) + +# results.append(run_experiment("Hallway", steps, True, 4, False)) +# results.append(run_experiment("Hallway", steps, True, 1, False)) +# results.append(run_experiment("Hallway", steps, True, 1, True)) +# results.append(run_experiment("Hallway", steps, False, None, False)) + +# results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic")) for r in results: diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py index ea550cb652..f8ae47c9a1 100644 --- a/ml-agents/mlagents/trainers/policy/nn_policy.py +++ b/ml-agents/mlagents/trainers/policy/nn_policy.py @@ -12,6 +12,8 @@ MultiCategoricalDistribution, ) +from mlagents.trainers.ppo.trainer import TestingConfiguration + EPSILON = 1e-6 # Small value to avoid divide by zero @@ -42,6 +44,12 @@ def __init__( :param reparameterize: Whether we are using the resampling trick to update the policy in continuous output. """ super().__init__(seed, brain, trainer_settings, model_path, load) + if TestingConfiguration.device == "cuda:0": + tf.device("/gpu:0") + print("using GPU") + else: + tf.device("/cpu:0") + print("using CPU") self.grads = None self.update_batch: Optional[tf.Operation] = None num_layers = self.network_settings.num_layers From 35df5f703e0225d2970b1eabc551c0f85694bb99 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 12:40:45 -0700 Subject: [PATCH 18/29] _ --- experiment_torch.py | 1 + ml-agents/mlagents/trainers/policy/nn_policy.py | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index c929dcbc48..b3f9d05124 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -16,6 +16,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" + os.environ["CUDA_VISIBLE_DEVICES"] = "2" if use_gpu else "0" if (not torch.cuda.is_available() and use_gpu and use_torch): return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na" if config_name is None: diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py index f8ae47c9a1..879065d3bc 100644 --- a/ml-agents/mlagents/trainers/policy/nn_policy.py +++ b/ml-agents/mlagents/trainers/policy/nn_policy.py @@ -44,12 +44,6 @@ def __init__( :param reparameterize: Whether we are using the resampling trick to update the policy in continuous output. """ super().__init__(seed, brain, trainer_settings, model_path, load) - if TestingConfiguration.device == "cuda:0": - tf.device("/gpu:0") - print("using GPU") - else: - tf.device("/cpu:0") - print("using CPU") self.grads = None self.update_batch: Optional[tf.Operation] = None num_layers = self.network_settings.num_layers From a28bba2bb185b5960845a016a056c36c981202ff Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 14:06:48 -0700 Subject: [PATCH 19/29] _ --- experiment_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment_torch.py b/experiment_torch.py index b3f9d05124..ebafe6d7dc 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -16,7 +16,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" - os.environ["CUDA_VISIBLE_DEVICES"] = "2" if use_gpu else "0" + os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" if use_gpu else "-1" if (not torch.cuda.is_available() and use_gpu and use_torch): return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na" if config_name is None: From f306fc99daf2baf0be0ae6edd3ede194d8a88e34 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 14:21:14 -0700 Subject: [PATCH 20/29] _ --- experiment_torch.py | 52 ++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index ebafe6d7dc..e789e3546f 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -11,12 +11,13 @@ results = [("name", "steps", "use_torch", "num_torch_threads", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")] -def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, config_name=None): +def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool,num_envs :int= 1, config_name=None): TestingConfiguration.env_name = name TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" if use_gpu else "-1" + import tensorflow as tf if (not torch.cuda.is_available() and use_gpu and use_torch): return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na" if config_name is None: @@ -24,6 +25,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u run_options = parse_command_line([f"config/ppo/{config_name}.yaml"]) run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf") run_options.checkpoint_settings.force = True + run_options.env_settings.num_envs = num_envs for trainer_settings in run_options.behaviors.values(): trainer_settings.threaded = False timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json") @@ -56,35 +58,37 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u -steps = 100000 +n_steps = 100000 -results.append(run_experiment("3DBall", steps, False, 1, True)) -results.append(run_experiment("3DBall", steps, False, 1, False)) +envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")] -# results.append(run_experiment("3DBall", steps, True, 4, False)) -# results.append(run_experiment("3DBall", steps, True, 1, False)) -# results.append(run_experiment("3DBall", steps, True, 1, True)) -# results.append(run_experiment("3DBall", steps, False, None, False)) +results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None)) +results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None)) -# results.append(run_experiment("GridWorld", steps, True, 4, False)) -# results.append(run_experiment("GridWorld", steps, True, 1, False)) -# results.append(run_experiment("GridWorld", steps, True, 1, True)) -# results.append(run_experiment("GridWorld", steps, False, None, False)) +# results.append(run_experiment("3DBall", n_steps, True, 4, False)) +# results.append(run_experiment("3DBall", n_steps, True, 1, False)) +# results.append(run_experiment("3DBall", n_steps, True, 1, True)) +# results.append(run_experiment("3DBall", n_steps, False, None, False)) -# results.append(run_experiment("PushBlock", steps, True, 4, False)) -# results.append(run_experiment("PushBlock", steps, True, 1, False)) -# results.append(run_experiment("PushBlock", steps, True, 1, True)) -# results.append(run_experiment("PushBlock", steps, False, None, False)) +# results.append(run_experiment("GridWorld", n_steps, True, 4, False)) +# results.append(run_experiment("GridWorld", n_steps, True, 1, False)) +# results.append(run_experiment("GridWorld", n_steps, True, 1, True)) +# results.append(run_experiment("GridWorld", n_steps, False, None, False)) -# results.append(run_experiment("Hallway", steps, True, 4, False)) -# results.append(run_experiment("Hallway", steps, True, 1, False)) -# results.append(run_experiment("Hallway", steps, True, 1, True)) -# results.append(run_experiment("Hallway", steps, False, None, False)) +# results.append(run_experiment("PushBlock", n_steps, True, 4, False)) +# results.append(run_experiment("PushBlock", n_steps, True, 1, False)) +# results.append(run_experiment("PushBlock", n_steps, True, 1, True)) +# results.append(run_experiment("PushBlock", n_steps, False, None, False)) -# results.append(run_experiment("CrawlerStaticTarget", steps, True, 4, False, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, False, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", steps, True, 1, True, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", steps, False, None, False, "CrawlerStatic")) +# results.append(run_experiment("Hallway", n_steps, True, 4, False)) +# results.append(run_experiment("Hallway", n_steps, True, 1, False)) +# results.append(run_experiment("Hallway", n_steps, True, 1, True)) +# results.append(run_experiment("Hallway", n_steps, False, None, False)) + +# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 4, False, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, False, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, True, "CrawlerStatic")) +# results.append(run_experiment("CrawlerStaticTarget", n_steps, False, None, False, "CrawlerStatic")) for r in results: From aa26a5cd51c47be3f85f82c5c876b5eee4c8401a Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 14:31:22 -0700 Subject: [PATCH 21/29] _ --- experiment_torch.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index e789e3546f..b53137612f 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -2,6 +2,7 @@ import json import os import torch +import tensorflow as tf from mlagents.trainers.learn import run_cli, parse_command_line from mlagents.trainers.settings import RunOptions from mlagents.trainers.stats import StatsReporter @@ -16,8 +17,10 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" - os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" if use_gpu else "-1" - import tensorflow as tf + if use_gpu: + tf.device("/GPU:0") + else: + tf.device("/device:CPU:0") if (not torch.cuda.is_available() and use_gpu and use_torch): return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na" if config_name is None: @@ -57,7 +60,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u return name, steps, use_torch, num_torch_threads, use_gpu, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count - +os.environ["CUDA_VISIBLE_DEVICES"]="0,1" n_steps = 100000 envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")] From 80b6b83534061df3a201602bade0703195efa180 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 14:32:52 -0700 Subject: [PATCH 22/29] _ --- experiment_torch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index b53137612f..814ee39f83 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -65,8 +65,8 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")] -results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None)) -results.append(run_experiment(name = "3DBall", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None)) +results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None)) +results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None)) # results.append(run_experiment("3DBall", n_steps, True, 4, False)) # results.append(run_experiment("3DBall", n_steps, True, 1, False)) From e17e79ca86d4e62bd82971df2f1d51a4887d9bcd Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 15:36:14 -0700 Subject: [PATCH 23/29] _ --- experiment_torch.py | 177 ++++++++++++++------------- ml-agents/mlagents/trainers/learn.py | 1 + 2 files changed, 92 insertions(+), 86 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index 814ee39f83..7180352e82 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -3,6 +3,7 @@ import os import torch import tensorflow as tf +import argparse from mlagents.trainers.learn import run_cli, parse_command_line from mlagents.trainers.settings import RunOptions from mlagents.trainers.stats import StatsReporter @@ -10,91 +11,95 @@ from mlagents_envs.timers import _thread_timer_stacks -results = [("name", "steps", "use_torch", "num_torch_threads", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")] - -def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool,num_envs :int= 1, config_name=None): - TestingConfiguration.env_name = name - TestingConfiguration.max_steps = steps - TestingConfiguration.use_torch = use_torch - TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" - if use_gpu: - tf.device("/GPU:0") - else: - tf.device("/device:CPU:0") - if (not torch.cuda.is_available() and use_gpu and use_torch): - return name, steps, use_torch, num_torch_threads, use_gpu, "na","na","na","na","na","na","na" - if config_name is None: - config_name = name - run_options = parse_command_line([f"config/ppo/{config_name}.yaml"]) - run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf") - run_options.checkpoint_settings.force = True - run_options.env_settings.num_envs = num_envs - for trainer_settings in run_options.behaviors.values(): - trainer_settings.threaded = False - timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json") - if use_torch: - torch.set_num_threads(num_torch_threads) - run_cli(run_options) - StatsReporter.writers.clear() - StatsReporter.stats_dict.clear() - _thread_timer_stacks.clear() - with open(timers_path) as timers_json_file: - timers_json = json.load(timers_json_file) - total = timers_json["total"] - tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"] - evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"] - update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"] - tc_advance_total = tc_advance["total"] - tc_advance_count = tc_advance["count"] - if use_torch: - update_total = update["TorchPPOOptimizer.update"]["total"] - evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] - update_count = update["TorchPPOOptimizer.update"]["count"] - evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] - else: - update_total = update["TFPPOOptimizer.update"]["total"] - evaluate_total = evaluate["NNPolicy.evaluate"]["total"] - update_count = update["TFPPOOptimizer.update"]["count"] - evaluate_count= evaluate["NNPolicy.evaluate"]["count"] - # todo: do total / count - return name, steps, use_torch, num_torch_threads, use_gpu, total, tc_advance_total, tc_advance_count, update_total, update_count, evaluate_total, evaluate_count - - -os.environ["CUDA_VISIBLE_DEVICES"]="0,1" -n_steps = 100000 - -envs_config_tuple = [("3DBall","3DBall"), ("GridWorld","GridWorld"), ("PushBlock","PushBlock"),("Hallway","Hallway"), ("CrawlerStaticTarget","CrawlerStatic")] - -results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = 1, config_name=None)) -results.append(run_experiment(name = "GridWorld", steps=n_steps, use_torch=False, num_torch_threads=1, use_gpu=True, num_envs = 1, config_name=None)) - -# results.append(run_experiment("3DBall", n_steps, True, 4, False)) -# results.append(run_experiment("3DBall", n_steps, True, 1, False)) -# results.append(run_experiment("3DBall", n_steps, True, 1, True)) -# results.append(run_experiment("3DBall", n_steps, False, None, False)) - -# results.append(run_experiment("GridWorld", n_steps, True, 4, False)) -# results.append(run_experiment("GridWorld", n_steps, True, 1, False)) -# results.append(run_experiment("GridWorld", n_steps, True, 1, True)) -# results.append(run_experiment("GridWorld", n_steps, False, None, False)) - -# results.append(run_experiment("PushBlock", n_steps, True, 4, False)) -# results.append(run_experiment("PushBlock", n_steps, True, 1, False)) -# results.append(run_experiment("PushBlock", n_steps, True, 1, True)) -# results.append(run_experiment("PushBlock", n_steps, False, None, False)) - -# results.append(run_experiment("Hallway", n_steps, True, 4, False)) -# results.append(run_experiment("Hallway", n_steps, True, 1, False)) -# results.append(run_experiment("Hallway", n_steps, True, 1, True)) -# results.append(run_experiment("Hallway", n_steps, False, None, False)) - -# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 4, False, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, False, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", n_steps, True, 1, True, "CrawlerStatic")) -# results.append(run_experiment("CrawlerStaticTarget", n_steps, False, None, False, "CrawlerStatic")) - - -for r in results: - print(*r) +def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, num_envs :int= 1, config_name=None): + TestingConfiguration.env_name = name + TestingConfiguration.max_steps = steps + TestingConfiguration.use_torch = use_torch + TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" + if use_gpu: + tf.device("/GPU:0") + else: + tf.device("/device:CPU:0") + if (not torch.cuda.is_available() and use_gpu and use_torch): + return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), "na","na","na","na","na","na","na" + if config_name is None: + config_name = name + run_options = parse_command_line([f"config/ppo/{config_name}.yaml", "--num-envs", f"{num_envs}"]) + run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf") + run_options.checkpoint_settings.force = True + # run_options.env_settings.num_envs = num_envs + for trainer_settings in run_options.behaviors.values(): + trainer_settings.threaded = False + timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json") + if use_torch: + torch.set_num_threads(num_torch_threads) + run_cli(run_options) + StatsReporter.writers.clear() + StatsReporter.stats_dict.clear() + _thread_timer_stacks.clear() + with open(timers_path) as timers_json_file: + timers_json = json.load(timers_json_file) + total = timers_json["total"] + tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"] + evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"] + update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"] + tc_advance_total = tc_advance["total"] + tc_advance_count = tc_advance["count"] + if use_torch: + update_total = update["TorchPPOOptimizer.update"]["total"] + evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] + update_count = update["TorchPPOOptimizer.update"]["count"] + evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] + else: + update_total = update["TFPPOOptimizer.update"]["total"] + evaluate_total = evaluate["NNPolicy.evaluate"]["total"] + update_count = update["TFPPOOptimizer.update"]["count"] + evaluate_count= evaluate["NNPolicy.evaluate"]["count"] + # todo: do total / count + return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), str(total), str(tc_advance_total), str(tc_advance_count), str(update_total), str(update_count), str(evaluate_total), str(evaluate_count) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--steps", default=25000, type=int, help="The number of steps") + parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") + args = parser.parse_args() + + envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")] + + + + labels = ("name", "steps", "use_torch", "num_torch_threads", "num_envs", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count") + + results = [] + results.append(labels) + f = open("result_data.txt", "w") + f.write(" ".join(labels)) + + for env_config in envs_config_tuples: + data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) + results.append(data) + f.write(" ".join(data)) + + data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) + results.append(data) + f.write(" ".join(data)) + + + data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=True, num_envs = args.num_envs, config_name=env_config[1]) + results.append(data) + f.write(" ".join(data)) + + data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) + results.append(data) + f.write(" ".join(data)) + for r in results: + print(*r) + f.close() + + +if __name__ == "__main__": + main() + diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index b4abd94988..0e9188e66d 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -252,6 +252,7 @@ def create_unity_environment( seed=env_seed, no_graphics=no_graphics, base_port=start_port, + worker_id=worker_id, additional_args=env_args, side_channels=side_channels, log_folder=log_folder, From 216007734c3489f2e7ef0f69647f2680cbf06192 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 15:37:25 -0700 Subject: [PATCH 24/29] _ --- experiment_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment_torch.py b/experiment_torch.py index 7180352e82..f084587fd6 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -67,7 +67,7 @@ def main(): parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") args = parser.parse_args() - envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")] + envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")] From c5ba857acc96c66782daa8f3fb8c21ea58cf063e Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 15:45:37 -0700 Subject: [PATCH 25/29] _ --- experiment_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment_torch.py b/experiment_torch.py index f084587fd6..c0bcee55ef 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -75,7 +75,7 @@ def main(): results = [] results.append(labels) - f = open("result_data.txt", "w") + f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}.txt", "w") f.write(" ".join(labels)) for env_config in envs_config_tuples: From 771f2f14011e75341d9f6a42abc42f61f0220f4f Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Tue, 7 Jul 2020 15:55:30 -0700 Subject: [PATCH 26/29] _ --- experiment_torch.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index c0bcee55ef..75dee08b0f 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -67,7 +67,7 @@ def main(): parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") args = parser.parse_args() - envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")] + envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")] @@ -81,20 +81,20 @@ def main(): for env_config in envs_config_tuples: data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) results.append(data) - f.write(" ".join(data)) + f.write(" ".join(data) + "\n") data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) results.append(data) - f.write(" ".join(data)) + f.write(" ".join(data)+ "\n") data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=True, num_envs = args.num_envs, config_name=env_config[1]) results.append(data) - f.write(" ".join(data)) + f.write(" ".join(data)+ "\n") data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) results.append(data) - f.write(" ".join(data)) + f.write(" ".join(data)+ "\n") for r in results: print(*r) f.close() From c16d87d63233549db8c447669bb44ad3984fff0b Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 8 Jul 2020 10:01:39 -0700 Subject: [PATCH 27/29] _ --- experiment_torch.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/experiment_torch.py b/experiment_torch.py index 75dee08b0f..d3128247d1 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -22,7 +22,7 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u tf.device("/GPU:0") else: tf.device("/device:CPU:0") - if (not torch.cuda.is_available() and use_gpu and use_torch): + if (not torch.cuda.is_available() and use_gpu): return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), "na","na","na","na","na","na","na" if config_name is None: config_name = name @@ -64,35 +64,41 @@ def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, u def main(): parser = argparse.ArgumentParser() parser.add_argument("--steps", default=25000, type=int, help="The number of steps") - parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") + parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") + parser.add_argument("--gpu", default = False, action="store_true", help="If true, will use the GPU") + parser.add_argument("--threads", default=False, action="store_true", help="If true, will try both 1 and 8 threads for torch") + parser.add_argument("--ball", default=False, action="store_true", help="If true, will only do 3dball") args = parser.parse_args() - envs_config_tuples = [("3DBall", "3DBall")]#, ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "Hallway")] - + if args.gpu: + os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" + else: + os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + + envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "VisualHallway")] + if args.ball: + envs_config_tuples=[("3DBall", "3DBall")] labels = ("name", "steps", "use_torch", "num_torch_threads", "num_envs", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count") results = [] results.append(labels) - f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}.txt", "w") - f.write(" ".join(labels)) + f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", "w") + f.write(" ".join(labels)+ "\n") for env_config in envs_config_tuples: - data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) + data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1]) results.append(data) f.write(" ".join(data) + "\n") - data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) - results.append(data) - f.write(" ".join(data)+ "\n") - + if args.threads: + data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1]) + results.append(data) + f.write(" ".join(data)+ "\n") - data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=True, num_envs = args.num_envs, config_name=env_config[1]) - results.append(data) - f.write(" ".join(data)+ "\n") - data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=False, num_envs = args.num_envs, config_name=env_config[1]) + data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1]) results.append(data) f.write(" ".join(data)+ "\n") for r in results: From 7113dd82375216caaeee6cee5303f1a186246232 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 8 Jul 2020 10:04:06 -0700 Subject: [PATCH 28/29] _ --- experiment_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment_torch.py b/experiment_torch.py index d3128247d1..8ccb56be2b 100644 --- a/experiment_torch.py +++ b/experiment_torch.py @@ -71,7 +71,7 @@ def main(): args = parser.parse_args() if args.gpu: - os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" + os.environ["CUDA_VISIBLE_DEVICES"] = "0" else: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" From 01d38e00af8bdbdf853666985e5c1bc7155b156e Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 9 Jul 2020 17:49:05 -0700 Subject: [PATCH 29/29] Fixing learn.py --- ml-agents/mlagents/trainers/ppo/trainer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 9f1ac67f6b..365a7634de 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -64,7 +64,8 @@ def __init__( self.load = load self.seed = seed self.framework = "torch" if TestingConfiguration.use_torch else "tf" - self.trainer_settings.max_steps = TestingConfiguration.max_steps + if TestingConfiguration.max_steps > 0: + self.trainer_settings.max_steps = TestingConfiguration.max_steps self.policy: Policy = None # type: ignore def _process_trajectory(self, trajectory: Trajectory) -> None: