Skip to content

Develop add fire exp framework #4213

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
0d5632c
Experiment branch for comparing torch
vincentpierre Jul 2, 2020
3cc88b0
merging base
vincentpierre Jul 2, 2020
5d9a4d0
Merge branch 'develop-add-fire' into develop-add-fire-exp-framework
vincentpierre Jul 2, 2020
c7c8df2
Updates and merging ervin changes
vincentpierre Jul 2, 2020
e0120ec
improvements on experiment_torch.py
vincentpierre Jul 2, 2020
019c063
Merge branch 'develop-add-fire' into develop-add-fire-exp-framework
vincentpierre Jul 2, 2020
4ad81e0
Better printing of results
vincentpierre Jul 2, 2020
b1eb17d
preliminary gpu experiment
vincentpierre Jul 2, 2020
ee1c2a9
Testing gpu
vincentpierre Jul 3, 2020
a280e3c
Prepare to see a lot of commits, because I like my IDE and I am testi…
vincentpierre Jul 3, 2020
a66a40d
Prepare to see a lot of commits, because I like my IDE and I am testi…
vincentpierre Jul 3, 2020
dcde945
_
vincentpierre Jul 3, 2020
92f3194
_
vincentpierre Jul 3, 2020
3b7e1e0
_
vincentpierre Jul 3, 2020
42e2e73
_
vincentpierre Jul 3, 2020
2224fa0
_
vincentpierre Jul 3, 2020
f306f29
_
vincentpierre Jul 3, 2020
2c706ce
_
vincentpierre Jul 3, 2020
3250d73
_
vincentpierre Jul 3, 2020
78022dc
Attempt at gpu on tf. Does not work
vincentpierre Jul 7, 2020
35df5f7
_
vincentpierre Jul 7, 2020
a28bba2
_
vincentpierre Jul 7, 2020
f306fc9
_
vincentpierre Jul 7, 2020
aa26a5c
_
vincentpierre Jul 7, 2020
80b6b83
_
vincentpierre Jul 7, 2020
e17e79c
_
vincentpierre Jul 7, 2020
2160077
_
vincentpierre Jul 7, 2020
c5ba857
_
vincentpierre Jul 7, 2020
771f2f1
_
vincentpierre Jul 7, 2020
c16d87d
_
vincentpierre Jul 8, 2020
7113dd8
_
vincentpierre Jul 8, 2020
01d38e0
Fixing learn.py
vincentpierre Jul 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions experiment_torch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@

import json
import os
import torch
import tensorflow as tf
import argparse
from mlagents.trainers.learn import run_cli, parse_command_line
from mlagents.trainers.settings import RunOptions
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.ppo.trainer import TestingConfiguration
from mlagents_envs.timers import _thread_timer_stacks




def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, num_envs :int= 1, config_name=None):
TestingConfiguration.env_name = name
TestingConfiguration.max_steps = steps
TestingConfiguration.use_torch = use_torch
TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
if use_gpu:
tf.device("/GPU:0")
else:
tf.device("/device:CPU:0")
if (not torch.cuda.is_available() and use_gpu):
return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), "na","na","na","na","na","na","na"
if config_name is None:
config_name = name
run_options = parse_command_line([f"config/ppo/{config_name}.yaml", "--num-envs", f"{num_envs}"])
run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf")
run_options.checkpoint_settings.force = True
# run_options.env_settings.num_envs = num_envs
for trainer_settings in run_options.behaviors.values():
trainer_settings.threaded = False
timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json")
if use_torch:
torch.set_num_threads(num_torch_threads)
run_cli(run_options)
StatsReporter.writers.clear()
StatsReporter.stats_dict.clear()
_thread_timer_stacks.clear()
with open(timers_path) as timers_json_file:
timers_json = json.load(timers_json_file)
total = timers_json["total"]
tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]
evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"]
update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"]
tc_advance_total = tc_advance["total"]
tc_advance_count = tc_advance["count"]
if use_torch:
update_total = update["TorchPPOOptimizer.update"]["total"]
evaluate_total = evaluate["TorchPolicy.evaluate"]["total"]
update_count = update["TorchPPOOptimizer.update"]["count"]
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
else:
update_total = update["TFPPOOptimizer.update"]["total"]
evaluate_total = evaluate["NNPolicy.evaluate"]["total"]
update_count = update["TFPPOOptimizer.update"]["count"]
evaluate_count= evaluate["NNPolicy.evaluate"]["count"]
# todo: do total / count
return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), str(total), str(tc_advance_total), str(tc_advance_count), str(update_total), str(update_count), str(evaluate_total), str(evaluate_count)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--steps", default=25000, type=int, help="The number of steps")
parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")
parser.add_argument("--gpu", default = False, action="store_true", help="If true, will use the GPU")
parser.add_argument("--threads", default=False, action="store_true", help="If true, will try both 1 and 8 threads for torch")
parser.add_argument("--ball", default=False, action="store_true", help="If true, will only do 3dball")
args = parser.parse_args()

if args.gpu:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
else:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "VisualHallway")]
if args.ball:
envs_config_tuples=[("3DBall", "3DBall")]


labels = ("name", "steps", "use_torch", "num_torch_threads", "num_envs", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count")

results = []
results.append(labels)
f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", "w")
f.write(" ".join(labels)+ "\n")

for env_config in envs_config_tuples:
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
results.append(data)
f.write(" ".join(data) + "\n")

if args.threads:
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
results.append(data)
f.write(" ".join(data)+ "\n")


data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1])
results.append(data)
f.write(" ".join(data)+ "\n")
for r in results:
print(*r)
f.close()


if __name__ == "__main__":
main()

34 changes: 24 additions & 10 deletions ml-agents/mlagents/trainers/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
)
from mlagents_envs import logging_util

from mlagents.trainers.ppo.trainer import TestingConfiguration
from mlagents_envs.registry import default_registry

logger = logging_util.get_logger(__name__)

TRAINING_STATUS_FILE_NAME = "training_status.json"
Expand Down Expand Up @@ -233,16 +236,27 @@ def create_unity_environment(
) -> UnityEnvironment:
# Make sure that each environment gets a different seed
env_seed = seed + worker_id
return UnityEnvironment(
file_name=env_path,
worker_id=worker_id,
seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
additional_args=env_args,
side_channels=side_channels,
log_folder=log_folder,
)
if TestingConfiguration.env_name == "":
return UnityEnvironment(
file_name=env_path,
worker_id=worker_id,
seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
additional_args=env_args,
side_channels=side_channels,
log_folder=log_folder,
)
else:
return default_registry[TestingConfiguration.env_name].make(
seed=env_seed,
no_graphics=no_graphics,
base_port=start_port,
worker_id=worker_id,
additional_args=env_args,
side_channels=side_channels,
log_folder=log_folder,
)

return create_unity_environment

Expand Down
5 changes: 3 additions & 2 deletions ml-agents/mlagents/trainers/models_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1):
if self.use_lstm:
embedding = embedding.view([sequence_length, -1, self.h_size])
memories = torch.split(memories, self.m_size // 2, dim=-1)
embedding, memories = self.lstm(embedding, memories)
embedding, memories = self.lstm(embedding.contiguous(), (memories[0].contiguous(), memories[1].contiguous()))
embedding = embedding.view([-1, self.m_size // 2])
memories = torch.cat(memories, dim=-1)
return embedding, memories
Expand Down Expand Up @@ -407,7 +407,8 @@ def __init__(self, height, width, initial_channels, output_size):
def forward(self, visual_obs):
conv_1 = torch.relu(self.conv1(visual_obs))
conv_2 = torch.relu(self.conv2(conv_1))
hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
# hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
hidden = torch.relu(self.dense(torch.reshape(conv_2,(-1, self.final_flat))))
return hidden


Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ def get_trajectory_value_estimates(
)

for name, estimate in value_estimates.items():
value_estimates[name] = estimate.detach().numpy()
next_value_estimate[name] = next_value_estimate[name].detach().numpy()
value_estimates[name] = estimate.detach().cpu().numpy()
next_value_estimate[name] = next_value_estimate[name].detach().cpu().numpy()

if done:
for k in next_value_estimate:
Expand Down
2 changes: 2 additions & 0 deletions ml-agents/mlagents/trainers/policy/nn_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
MultiCategoricalDistribution,
)

from mlagents.trainers.ppo.trainer import TestingConfiguration

EPSILON = 1e-6 # Small value to avoid divide by zero


Expand Down
62 changes: 38 additions & 24 deletions ml-agents/mlagents/trainers/policy/torch_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.models_torch import ActorCritic

from mlagents.trainers.ppo.trainer import TestingConfiguration

EPSILON = 1e-7 # Small value to avoid divide by zero


Expand Down Expand Up @@ -91,6 +93,12 @@ def __init__(
self.log_std_min = -20
self.log_std_max = 2

if TestingConfiguration.device != "cpu":
torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:
torch.set_default_tensor_type(torch.FloatTensor)


self.inference_dict: Dict[str, tf.Tensor] = {}
self.update_dict: Dict[str, tf.Tensor] = {}

Expand All @@ -117,6 +125,8 @@ def __init__(
separate_critic=self.use_continuous_act,
)

self.actor_critic.to(TestingConfiguration.device)

def split_decision_step(self, decision_requests):
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
mask = None
Expand Down Expand Up @@ -190,18 +200,18 @@ def evaluate(
action, log_probs, entropy, value_heads, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = action.detach().numpy()
run_out["pre_action"] = action.detach().numpy()
run_out["action"] = action.detach().cpu().numpy()
run_out["pre_action"] = action.detach().cpu().numpy()
# Todo - make pre_action difference
run_out["log_probs"] = log_probs.detach().numpy()
run_out["entropy"] = entropy.detach().numpy()
run_out["log_probs"] = log_probs.detach().cpu().numpy()
run_out["entropy"] = entropy.detach().cpu().numpy()
run_out["value_heads"] = {
name: t.detach().numpy() for name, t in value_heads.items()
name: t.detach().cpu().numpy() for name, t in value_heads.items()
}
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0)
run_out["learning_rate"] = 0.0
if self.use_recurrent:
run_out["memories"] = memories.detach().numpy()
run_out["memories"] = memories.detach().cpu().numpy()
self.actor_critic.update_normalization(vec_obs)
return run_out

Expand Down Expand Up @@ -249,24 +259,28 @@ def load_model(self, step=0):
self.actor_critic.load_state_dict(torch.load(load_path))

def export_model(self, step=0):
fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
fake_masks = torch.ones([1] + self.actor_critic.act_size)
# fake_memories = torch.zeros([1] + [self.m_size])
export_path = "./model-" + str(step) + ".onnx"
output_names = ["action", "action_probs"]
input_names = ["vector_observation", "action_mask"]
dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
onnx.export(
self.actor_critic,
(fake_vec_obs, fake_vis_obs, fake_masks),
export_path,
verbose=True,
opset_version=12,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
)
try:
fake_vec_obs = [torch.zeros([1] + [self.brain.vector_observation_space_size])]
fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
fake_masks = torch.ones([1] + self.actor_critic.act_size)
# fake_memories = torch.zeros([1] + [self.m_size])
export_path = "./model-" + str(step) + ".onnx"
output_names = ["action", "action_probs"]
input_names = ["vector_observation", "action_mask"]
dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
onnx.export(
self.actor_critic,
(fake_vec_obs, fake_vis_obs, fake_masks),
export_path,
verbose=True,
opset_version=12,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
)
except:
print("Could not export torch model")
return

@property
def vis_obs_size(self):
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/ppo/optimizer_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:

self.optimizer.step()
update_stats = {
"Losses/Policy Loss": abs(policy_loss.detach().numpy()),
"Losses/Value Loss": value_loss.detach().numpy(),
"Losses/Policy Loss": abs(policy_loss.detach().cpu().numpy()),
"Losses/Value Loss": value_loss.detach().cpu().numpy(),
}

return update_stats
14 changes: 13 additions & 1 deletion ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
# ## ML-Agent Learning (PPO)
# Contains an implementation of PPO as described in: https://arxiv.org/abs/1707.06347

class TestingConfiguration:
use_torch = False
max_steps = 0
env_name = ""
device = "cpu"



from collections import defaultdict
from typing import cast

Expand All @@ -22,6 +30,8 @@
logger = get_logger(__name__)




class PPOTrainer(RLTrainer):
"""The PPOTrainer is an implementation of the PPO algorithm."""

Expand Down Expand Up @@ -53,7 +63,9 @@ def __init__(
)
self.load = load
self.seed = seed
self.framework = "torch"
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self.policy: Policy = None # type: ignore

def _process_trajectory(self, trajectory: Trajectory) -> None:
Expand Down