Skip to content

Enabling few more torchbench models with AOT Autograd #127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions torchbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,16 @@
"maml",
# Known issues with training
"demucs", # https://github.com/pytorch/benchmark/pull/639
"densenet121", # https://github.com/pytorch/benchmark/issues/652
"hf_Albert", # https://github.com/pytorch/benchmark/issues/652
"hf_Reformer", # Can only be used in the training phase
# AOT Autograd known issues
"dlrm", # No sparse support
"resnet50_quantized_qat", # Con2DBnRelu
# Known TorchDynamo bug
"hf_GPT2", # Hard to debug stashed tensor issue
"tacotron2", # Model uses Variable
}

# Some models have bad train dataset. We read eval dataset.
ONLY_EVAL_DATASET = {"yolov3"}
# yolov3 - seems to have different number of inputs between eval and train
# densenet121 - OOM for train, using eval for now.
ONLY_EVAL_DATASET = {"yolov3", "densenet121"}

# These models support only train mode. So accuracy checking can't be done in
# eval mode.
Expand All @@ -93,6 +90,8 @@
REQUIRE_HIGHER_TOLERANCE = {
"alexnet",
"attention_is_all_you_need_pytorch",
"densenet121",
"hf_Albert",
"vgg16",
"mobilenet_v3_large",
}
Expand Down Expand Up @@ -574,6 +573,11 @@ def main():
action="store_true",
help="Generates AOT Autograd stats like how mnay graphs are sent to AOT",
)
parser.add_argument(
"--disable-functionalization",
action="store_true",
help="Disables functionalization",
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--coverage", action="store_true", help="(default) " + help(coverage_experiment)
Expand Down Expand Up @@ -856,6 +860,9 @@ def main():
if output_filename:
output_filename = os.path.join(torchdynamo.config.base_dir, output_filename)

if args.disable_functionalization:
torchdynamo.config.normalize_ir = False

if args.minimum_call_count:
torchdynamo.config.minimum_call_count = args.minimum_call_count
if args.only:
Expand Down
16 changes: 9 additions & 7 deletions torchdynamo/optimizations/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch

from torchdynamo import config
from torchdynamo.utils import clone_inputs
from torchdynamo.utils import count_calls
from torchdynamo.utils import counters
Expand All @@ -27,13 +28,14 @@ def __init__(self, gm: torch.fx.GraphModule, example_inputs):
counters["aot_autograd"]["total"] += 1
self.use_fallback = False
self.original_example_inputs = example_inputs
try:
self.gm = normalize_ir(gm, self.example_inputs)
except Exception:
log.debug("TorchDynamo unable to remove mutation")
self.gm = gm
self.use_fallback = True
pass
self.gm = gm
if config.normalize_ir:
try:
self.gm = normalize_ir(gm, self.example_inputs)
except Exception:
log.debug("TorchDynamo unable to remove mutation")
self.use_fallback = True
pass

gm_inputs = list(filter(lambda x: x.op == "placeholder", gm.graph.nodes))

Expand Down
4 changes: 4 additions & 0 deletions torchdynamo/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ def same(a, b, cos_similarity=False, tol=1e-4):
return False
return True
elif isinstance(a, torch.Tensor):
if a.is_sparse:
assert b.is_sparse
a = a.to_dense()
b = b.to_dense()
assert isinstance(b, torch.Tensor)
if cos_similarity:
# TRT will bring error loss larger than current threshold. Use cosine similarity as replacement
Expand Down