Skip to content

Yamato inference tests #4066

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jun 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .yamato/training-int-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ test_mac_training_int_{{ editor.version }}:
logs:
paths:
- "artifacts/standalone_build.txt"
- "artifacts/inference.nn.txt"
- "artifacts/inference.onnx.txt"
standalonebuild:
paths:
- "artifacts/testplayer*/**"
- "artifacts/models/**"
{% endfor %}
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ namespace Unity.MLAgentsExamples
/// Utility class to allow the NNModel file for an agent to be overriden during inference.
/// This is used internally to validate the file after training is done.
/// The behavior name to override and file path are specified on the commandline, e.g.
/// player.exe --mlagents-override-model behavior1 /path/to/model1.nn --mlagents-override-model behavior2 /path/to/model2.nn
/// player.exe --mlagents-override-model-directory /path/to/models
///
/// Additionally, a number of episodes to run can be specified; after this, the application will quit.
/// Note this will only work with example scenes that have 1:1 Agent:Behaviors. More complicated scenes like WallJump
/// probably won't override correctly.
/// </summary>
public class ModelOverrider : MonoBehaviour
{
HashSet<string> k_SupportedExtensions = new HashSet<string>{"nn", "onnx"};
const string k_CommandLineModelOverrideFlag = "--mlagents-override-model";
const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory";
const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension";
const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";

Expand All @@ -36,6 +38,8 @@ public class ModelOverrider : MonoBehaviour

string m_BehaviorNameOverrideDirectory;

string m_OverrideExtension = "nn";

// Cached loaded NNModels, with the behavior name as the key.
Dictionary<string, NNModel> m_CachedModels = new Dictionary<string, NNModel>();

Expand Down Expand Up @@ -105,6 +109,21 @@ void GetAssetPathFromCommandLine()
{
m_BehaviorNameOverrideDirectory = args[i + 1].Trim();
}
else if (args[i] == k_CommandLineModelOverrideExtensionFlag && i < args.Length-1)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added support to allow overriding with .onnx files, but later found out that that we need to make some other changes in the import code to support this. Waiting to hear back from the Barracuda team the best way to handle this.

{
m_OverrideExtension = args[i + 1].Trim().ToLower();
var isKnownExtension = k_SupportedExtensions.Contains(m_OverrideExtension);
// Not supported yet - need to update the model loading code to support
var isOnnx = m_OverrideExtension.Equals("onnx");
if (!isKnownExtension || isOnnx)
{
Debug.LogError($"loading unsupported format: {m_OverrideExtension}");
Application.Quit(1);
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
}
}
else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length-1)
{
Int32.TryParse(args[i + 1], out maxEpisodes);
Expand Down Expand Up @@ -181,7 +200,7 @@ public NNModel GetModelForBehaviorName(string behaviorName)
}
else if(!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
{
assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.nn");
assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.{m_OverrideExtension}");
}

if (string.IsNullOrEmpty(assetPath))
Expand All @@ -203,6 +222,8 @@ public NNModel GetModelForBehaviorName(string behaviorName)
return null;
}

// Note - this approach doesn't work for onnx files. Need to replace with
// the equivalent of ONNXModelImporter.OnImportAsset()
var asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
asset.modelData.Value = model;
Expand All @@ -217,27 +238,47 @@ public NNModel GetModelForBehaviorName(string behaviorName)
/// </summary>
void OverrideModel()
{
bool overrideOk = false;
string overrideError = null;

m_Agent.LazyInitialize();
var bp = m_Agent.GetComponent<BehaviorParameters>();
var behaviorName = bp.BehaviorName;

var nnModel = GetModelForBehaviorName(behaviorName);
if (nnModel == null && m_QuitOnLoadFailure)
if (nnModel == null)
{
Debug.Log(
overrideError =
$"Didn't find a model for behaviorName {behaviorName}. Make " +
$"sure the behaviorName is set correctly in the commandline " +
$"and that the model file exists"
);
$"and that the model file exists";
}
else
{
var modelName = nnModel != null ? nnModel.name : "<null>";
Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}");
try
{
m_Agent.SetModel(GetOverrideBehaviorName(behaviorName), nnModel);
overrideOk = true;
}
catch (Exception e)
{
overrideError = $"Exception calling Agent.SetModel: {e}";
}
}

if (!overrideOk && m_QuitOnLoadFailure)
{
if(!string.IsNullOrEmpty(overrideError))
{
Debug.LogWarning(overrideError);
}
Application.Quit(1);
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
}
var modelName = nnModel != null ? nnModel.name : "<null>";
Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}");
// This might give a null model; that's better because we'll fall back to the Heuristic
m_Agent.SetModel(GetOverrideBehaviorName(behaviorName), nnModel);

}
}
Expand Down
10 changes: 10 additions & 0 deletions com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,16 @@ public static IEnumerable<string> CheckModel(Model model, BrainParameters brainP
return failedModelChecks;
}

foreach (var constantName in TensorNames.RequiredConstants)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This prevents null-reference exceptions when trying to load models without the expected constants in them (which was happening for onnx files before a fix on master). This can be moved to another PR, but it's a nice to have.

{
var tensor = model.GetTensorByName(constantName);
if (tensor == null)
{
failedModelChecks.Add($"Required constant \"{constantName}\" was not found in the model file.");
return failedModelChecks;
}
}

var modelApiVersion = (int)model.GetTensorByName(TensorNames.VersionNumber)[0];
var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0];
Expand Down
5 changes: 5 additions & 0 deletions com.unity.ml-agents/Runtime/Inference/TensorNames.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,10 @@ internal static class TensorNames
public const string IsContinuousControl = "is_continuous_control";
public const string ActionOutputShape = "action_output_shape";
public const string ActionOutput = "action";

public static readonly string[] RequiredConstants =
{
VersionNumber, MemorySize, IsContinuousControl, ActionOutputShape
};
}
}
90 changes: 79 additions & 11 deletions ml-agents/tests/yamato/training_int_tests.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import argparse
import os
import shutil
import sys
import subprocess
import time
from typing import Any

from .yamato_utils import (
find_executables,
get_base_path,
get_base_output_path,
run_standalone_build,
Expand All @@ -16,18 +19,21 @@
)


def run_training(python_version, csharp_version):
def run_training(python_version: str, csharp_version: str) -> bool:
latest = "latest"
run_id = int(time.time() * 1000.0)
print(
f"Running training with python={python_version or latest} and c#={csharp_version or latest}"
)
output_dir = "models" if python_version else "results"
nn_file_expected = f"./{output_dir}/{run_id}/3DBall.nn"
onnx_file_expected = f"./{output_dir}/{run_id}/3DBall.onnx"
frozen_graph_file_expected = f"./{output_dir}/{run_id}/3DBall/frozen_graph_def.pb"

if os.path.exists(nn_file_expected):
# Should never happen - make sure nothing leftover from an old test.
print("Artifacts from previous build found!")
sys.exit(1)
return False

base_path = get_base_path()
print(f"Running in base path {base_path}")
Expand All @@ -50,8 +56,8 @@ def run_training(python_version, csharp_version):
build_returncode = run_standalone_build(base_path)

if build_returncode != 0:
print("Standalone build FAILED!")
sys.exit(build_returncode)
print(f"Standalone build FAILED! with return code {build_returncode}")
return False

# Now rename the newly-built executable, and restore the old one
os.rename(full_player_path, final_player_path)
Expand All @@ -66,7 +72,7 @@ def run_training(python_version, csharp_version):
# and reduce the batch_size and buffer_size enough to ensure an update step happens.
yaml_out = "override.yaml"
if python_version:
overrides = {"max_steps": 100, "batch_size": 10, "buffer_size": 10}
overrides: Any = {"max_steps": 100, "batch_size": 10, "buffer_size": 10}
override_legacy_config_file(
python_version, "config/trainer_config.yaml", yaml_out, **overrides
)
Expand All @@ -77,21 +83,80 @@ def run_training(python_version, csharp_version):
}
override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides)

env_path = os.path.join(get_base_output_path(), standalone_player_path + ".app")
mla_learn_cmd = (
f"mlagents-learn {yaml_out} --force --env="
f"{os.path.join(get_base_output_path(), standalone_player_path)} "
f"mlagents-learn {yaml_out} --force --env={env_path} "
f"--run-id={run_id} --no-graphics --env-args -logFile -"
) # noqa
res = subprocess.run(
f"source {venv_path}/bin/activate; {mla_learn_cmd}", shell=True
)

if res.returncode != 0 or not os.path.exists(nn_file_expected):
# Save models as artifacts (only if we're using latest python and C#)
if csharp_version is None and python_version is None:
model_artifacts_dir = os.path.join(get_base_output_path(), "models")
os.makedirs(model_artifacts_dir, exist_ok=True)
shutil.copy(nn_file_expected, model_artifacts_dir)
shutil.copy(onnx_file_expected, model_artifacts_dir)
shutil.copy(frozen_graph_file_expected, model_artifacts_dir)

if (
res.returncode != 0
or not os.path.exists(nn_file_expected)
or not os.path.exists(onnx_file_expected)
):
print("mlagents-learn run FAILED!")
sys.exit(1)
return False

if csharp_version is None and python_version is None:
# Use abs path so that loading doesn't get confused
model_path = os.path.abspath(os.path.dirname(nn_file_expected))
# Onnx loading for overrides not currently supported, but this is
# where to add it in when it is.
for extension in ["nn"]:
inference_ok = run_inference(env_path, model_path, extension)
if not inference_ok:
return False

print("mlagents-learn run SUCCEEDED!")
sys.exit(0)
return True


def run_inference(env_path: str, output_path: str, model_extension: str) -> bool:
start_time = time.time()
exes = find_executables(env_path)
if len(exes) != 1:
print(f"Can't determine the player executable in {env_path}. Found {exes}.")
return False

log_output_path = f"{get_base_output_path()}/inference.{model_extension}.txt"

exe_path = exes[0]
args = [
exe_path,
"-nographics",
"-batchmode",
"-logfile",
log_output_path,
"--mlagents-override-model-directory",
output_path,
"--mlagents-quit-on-load-failure",
"--mlagents-quit-after-episodes",
"1",
"--mlagents-override-model-extension",
model_extension,
]
res = subprocess.run(args)
end_time = time.time()
if res.returncode != 0:
print("Error running inference!")
print("Command line: " + " ".join(args))
subprocess.run(["cat", log_output_path])
return False
else:
print(f"Inference succeeded! Took {end_time - start_time} seconds")

return True


def main():
Expand All @@ -101,7 +166,10 @@ def main():
args = parser.parse_args()

try:
run_training(args.python, args.csharp)
ok = run_training(args.python, args.csharp)
if not ok:
sys.exit(1)

finally:
# Cleanup - this gets executed even if we hit sys.exit()
undo_git_checkout()
Expand Down
18 changes: 18 additions & 0 deletions ml-agents/tests/yamato/yamato_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,23 @@ def run_standalone_build(
return res.returncode


def find_executables(root_dir: str) -> List[str]:
"""
Try to find the player executable. This seems to vary between Unity versions.
"""
ignored_extension = frozenset([".dll", ".dylib", ".bundle"])
exes = []
for root, _, files in os.walk(root_dir):
for filename in files:
file_root, ext = os.path.splitext(filename)
if ext in ignored_extension:
continue
file_path = os.path.join(root, filename)
if os.access(file_path, os.X_OK):
exes.append(file_path)
return exes


def init_venv(
mlagents_python_version: str = None, extra_packages: Optional[List[str]] = None
) -> str:
Expand All @@ -105,6 +122,7 @@ def init_venv(
"--upgrade setuptools",
# TODO build these and publish to internal pypi
"~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl",
"tf2onnx==1.6.1",
]
if mlagents_python_version:
# install from pypi
Expand Down