Skip to content

Commit 6ae8652

Browse files
author
Anurag Dixit
committed
feat: Added Python accuracy tests using Nox
Signed-off-by: Anurag Dixit <[email protected]>
1 parent 09afccb commit 6ae8652

File tree

3 files changed

+175
-1
lines changed

3 files changed

+175
-1
lines changed

docker/dist-accuracy-test.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
set -o nounset
3+
set -o errexit
4+
set -o pipefail
5+
set -e
6+
7+
post=${1:-""}
8+
9+
# fetch bazel executable
10+
BAZEL_VERSION=4.2.1
11+
ARCH=$(uname -m)
12+
if [[ "$ARCH" == "aarch64" ]]; then ARCH="arm64"; fi
13+
wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel
14+
chmod a+x /usr/bin/bazel
15+
export NVIDIA_TF32_OVERRIDE=0
16+
17+
cd /opt/pytorch/torch_tensorrt
18+
cp cp /opt/pytorch/torch_tensorrt/docker/WORKSPACE.docker /opt/pytorch/torch_tensorrt/WORKSPACE
19+
20+
pip install --user --upgrade nox
21+
nox

noxfile.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import nox
2+
import os
3+
4+
# Use system installed Python packages
5+
PYT_PATH='/opt/conda/lib/python3.8/site-packages'
6+
7+
# Root directory for torch_tensorrt. Set according to docker container by default
8+
TOP_DIR='/opt/pytorch/torch_tensorrt'
9+
10+
# Download the dataset
11+
@nox.session(python=["3"], reuse_venv=True)
12+
def download_datasets(session):
13+
session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16'))
14+
session.run_always('wget', 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz')
15+
session.run_always('tar', '-xvzf', 'cifar-10-binary.tar.gz')
16+
session.run_always('mkdir', '-p',
17+
os.path.join(TOP_DIR, 'tests/accuracy/datasets/data'))
18+
session.run_always('cp', '-rpf',
19+
os.path.join(TOP_DIR, 'examples/int8/training/vgg16/cifar-10-batches-bin'),
20+
os.path.join(TOP_DIR, 'tests/accuracy/datasets/data/cidar-10-batches-bin'),
21+
external=True)
22+
23+
# Download the model
24+
@nox.session(python=["3"], reuse_venv=True)
25+
def download_models(session):
26+
session.install('timm')
27+
session.chdir('tests/modules')
28+
session.run_always('python',
29+
'hub.py',
30+
env={'PYTHONPATH': PYT_PATH})
31+
32+
# Train the model
33+
@nox.session(python=["3"], reuse_venv=True)
34+
def train_model(session):
35+
session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16'))
36+
session.run_always('python',
37+
'main.py',
38+
'--lr', '0.01',
39+
'--batch-size', '128',
40+
'--drop-ratio', '0.15',
41+
'--ckpt-dir', 'vgg16_ckpts',
42+
'--epochs', '25',
43+
env={'PYTHONPATH': PYT_PATH})
44+
45+
# Export model
46+
session.run_always('python',
47+
'export_ckpt.py',
48+
'vgg16_ckpts/ckpt_epoch25.pth',
49+
env={'PYTHONPATH': PYT_PATH})
50+
51+
# Finetune the model
52+
@nox.session(python=["3"], reuse_venv=True)
53+
def finetune_model(session):
54+
# Install pytorch-quantization dependency
55+
session.install('pytorch-quantization', '--extra-index-url', 'https://pypi.ngc.nvidia.com')
56+
57+
session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16'))
58+
session.run_always('python',
59+
'finetune_qat.py',
60+
'--lr', '0.01',
61+
'--batch-size', '128',
62+
'--drop-ratio', '0.15',
63+
'--ckpt-dir', 'vgg16_ckpts',
64+
'--start-from', '25',
65+
'--epochs', '26',
66+
env={'PYTHONPATH': PYT_PATH})
67+
68+
# Export model
69+
session.run_always('python',
70+
'export_qat.py',
71+
'vgg16_ckpts/ckpt_epoch26.pth',
72+
env={'PYTHONPATH': PYT_PATH})
73+
74+
# Run PTQ tests
75+
@nox.session(python=["3"], reuse_venv=True)
76+
def ptq_test(session):
77+
session.chdir(os.path.join(TOP_DIR, 'tests/py'))
78+
session.run_always('cp', '-rf',
79+
os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16.jit.pt'),
80+
'.',
81+
external=True)
82+
tests = [
83+
'test_ptq_dataloader_calibrator.py',
84+
'test_ptq_to_backend.py',
85+
'test_ptq_trt_calibrator.py'
86+
]
87+
for test in tests:
88+
session.run_always('python', test,
89+
env={'PYTHONPATH': PYT_PATH})
90+
91+
# Run QAT tests
92+
@nox.session(python=["3"], reuse_venv=True)
93+
def qat_test(session):
94+
session.chdir(os.path.join(TOP_DIR, 'tests/py'))
95+
session.run_always('cp', '-rf',
96+
os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16_qat.jit.pt'),
97+
'.',
98+
external=True)
99+
100+
session.run_always('python',
101+
'test_qat_trt_accuracy.py',
102+
env={'PYTHONPATH': PYT_PATH})
103+
104+
# Run Python API tests
105+
@nox.session(python=["3"], reuse_venv=True)
106+
def api_test(session):
107+
session.chdir(os.path.join(TOP_DIR, 'tests/py'))
108+
tests = [
109+
"test_api.py",
110+
"test_to_backend_api.py"
111+
]
112+
for test in tests:
113+
session.run_always('python',
114+
test,
115+
env={'PYTHONPATH': PYT_PATH})
116+
117+
# Clean up
118+
@nox.session(reuse_venv=True)
119+
def cleanup(session):
120+
target = [
121+
'examples/int8/training/vgg16/*.jit.pt',
122+
'examples/int8/training/vgg16/vgg16_ckpts',
123+
'examples/int8/training/vgg16/cifar-10-*',
124+
'examples/int8/training/vgg16/data',
125+
'tests/modules/*.jit.pt',
126+
'tests/py/*.jit.pt'
127+
]
128+
129+
target = ' '.join(x for x in [os.path.join(TOP_DIR, i) for i in target])
130+
session.run_always('bash', '-c',
131+
str('rm -rf ') + target,
132+
external=True)

tests/README.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Tests
22

3-
Right now there are two types of tests. Converter level tests and Module level tests.
3+
Currently, following tests are supported:
4+
1. Converter level tests
5+
2. Module level tests
6+
3. Accuracy tests
47

58
The goal of Converter tests are to tests individual converters againsts specific subgraphs. The current tests in `core/conveters` are good examples on how to write these tests. In general every converter should have at least 1 test. More may be required if the operation has switches that change the behavior of the op.
69

@@ -20,6 +23,24 @@ bazel test //tests --compilation_mode=dbg --test_output=errors --jobs=4 --runs_p
2023

2124
`--jobs=4` is useful and is sometimes required to prevent too many processes to use GPU memory and cause CUDA out of memory issues.
2225

26+
Additionally, accuracy tests are supported for Python backend using Nox. Please refer [dist-accuracy-test.sh](../docker/dist-accuracy-test.sh) for reference.
27+
```
28+
# To run complete Python accuracy + API tests
29+
nox
30+
31+
nox -l
32+
```
33+
34+
Note: Supported Python tests
35+
```
36+
* download_datasets-3
37+
* download_models-3
38+
* train_model-3
39+
* finetune_model-3
40+
* ptq_test-3
41+
* qat_test-3
42+
* cleanup
43+
```
2344
### Testing using pre-built Torch-TensorRT library
2445

2546
Currently, the default strategy when we run all the tests (`bazel test //tests`) is to build the testing scripts along with the full Torch-TensorRT library (`libtorchtrt.so`) from scratch. This can lead to increased testing time and might not be needed incase you already have a pre-built Torch-TensorRT library that you want to link against.

0 commit comments

Comments
 (0)