diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index bb06df22868ea9..3e400d59934415 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -33,6 +33,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then export HCC_AMDGPU_TARGET=gfx900 export LANG=C.UTF-8 export LC_ALL=C.UTF-8 + export PYTORCH_BUILT_WITH_ROCM=1 sudo chown -R jenkins:jenkins /usr/local rm -rf "$(dirname "${BASH_SOURCE[0]}")/../../../pytorch_amd/" || true diff --git a/test/common.py b/test/common.py index 5debc66905624c..8ee3b2be855157 100644 --- a/test/common.py +++ b/test/common.py @@ -73,6 +73,7 @@ def run_tests(argv=UNITTEST_ARGS): NO_MULTIPROCESSING_SPAWN = os.environ.get('NO_MULTIPROCESSING_SPAWN', '0') == '1' TEST_WITH_ASAN = os.getenv('PYTORCH_TEST_WITH_ASAN', '0') == '1' TEST_WITH_UBSAN = os.getenv('PYTORCH_TEST_WITH_UBSAN', '0') == '1' +BUILT_WITH_ROCM = os.getenv('PYTORCH_BUILT_WITH_ROCM', '0') == '1' def skipIfNoLapack(fn): diff --git a/test/test_autograd.py b/test/test_autograd.py index 3b519ea5a86874..88b7954be3d7b0 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -14,7 +14,7 @@ from torch.autograd.function import once_differentiable from torch.autograd.profiler import profile from common import TEST_MKL, TestCase, run_tests, skipIfNoLapack, \ - suppress_warnings, skipIfNoZeroSize + suppress_warnings, skipIfNoZeroSize, BUILT_WITH_ROCM from torch.autograd import Variable, Function, detect_anomaly from torch.autograd.function import InplaceFunction from torch.testing import make_non_contiguous, randn_like @@ -1561,6 +1561,7 @@ def test_pyscalar_conversions(self): self._test_pyscalar_conversions(lambda x: x.cuda(), lambda x: long(x)) @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_pin_memory(self): x = torch.randn(2, 2, requires_grad=True) self.assertEqual(x, x.pin_memory()) @@ -2359,6 +2360,7 @@ def f3(dt): f(dt) @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_set_requires_grad_only_for_floats_cuda(self): self._test_set_requires_grad_only_for_floats(self, True) @@ -2366,6 +2368,7 @@ def test_set_requires_grad_only_for_floats(self): self._test_set_requires_grad_only_for_floats(self, False) @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_rnn_backward_to_input_but_not_parameters_cuda(self): # this checks whether it is possible to not require # weight parameters, but require inputs, see #7722 diff --git a/test/test_dataloader.py b/test/test_dataloader.py index ef636a1ec77e72..d9a03f3401158d 100644 --- a/test/test_dataloader.py +++ b/test/test_dataloader.py @@ -13,7 +13,7 @@ from torch.utils.data import Dataset, TensorDataset, DataLoader, ConcatDataset from torch.utils.data.dataset import random_split from torch.utils.data.dataloader import default_collate, ExceptionWrapper, MANAGER_STATUS_CHECK_INTERVAL -from common import TestCase, run_tests, TEST_NUMPY, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN +from common import TestCase, run_tests, TEST_NUMPY, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN, BUILT_WITH_ROCM # We cannot import TEST_CUDA from common_nn here, because if we do that, # the TEST_CUDNN line from common_nn will be executed multiple times @@ -338,12 +338,14 @@ def test_growing_dataset(self): self.assertEqual(len(dataloader_shuffle), 5) @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_sequential_pin_memory(self): loader = DataLoader(self.dataset, batch_size=2, pin_memory=True) for input, target in loader: self.assertTrue(input.is_pinned()) self.assertTrue(target.is_pinned()) + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_multiple_dataloaders(self): loader1_it = iter(DataLoader(self.dataset, num_workers=1)) loader2_it = iter(DataLoader(self.dataset, num_workers=2)) @@ -444,6 +446,7 @@ def test_batch_sampler(self): self._test_batch_sampler(num_workers=4) @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_shuffle_pin_memory(self): loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True) for input, target in loader: @@ -476,6 +479,7 @@ def test_error_workers(self): @unittest.skipIf(IS_WINDOWS, "FIXME: stuck test") @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_partial_workers(self): "check that workers exit even if the iterator is not exhausted" loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4, pin_memory=True)) @@ -529,6 +533,7 @@ def _is_process_alive(pid, pname): "spawn start method is not supported in Python 2, \ but we need it for creating another process with CUDA") @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_manager_unclean_exit(self): '''there might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \ but they are all safe to ignore''' @@ -632,6 +637,7 @@ def setUp(self): self.dataset = StringDataset() @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_shuffle_pin_memory(self): loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True) for batch_ndx, (s, n) in enumerate(loader): @@ -675,6 +681,7 @@ def test_sequential_batch(self): self.assertEqual(n[1], idx + 1) @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_pin_memory(self): loader = DataLoader(self.dataset, batch_size=2, pin_memory=True) for batch_ndx, sample in enumerate(loader): @@ -714,6 +721,7 @@ def _run_ind_worker_queue_test(self, batch_size, num_workers): if current_worker_idx == num_workers: current_worker_idx = 0 + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_ind_worker_queue(self): for batch_size in (8, 16, 32, 64): for num_workers in range(1, 6): diff --git a/test/test_jit.py b/test/test_jit.py index 0663b41b67e08f..f93985281522bf 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -9,7 +9,7 @@ from torch.autograd.function import traceable from torch.testing import assert_allclose from torch.onnx import OperatorExportTypes -from common import TestCase, run_tests, IS_WINDOWS, TEST_WITH_UBSAN +from common import TestCase, run_tests, IS_WINDOWS, TEST_WITH_UBSAN, BUILT_WITH_ROCM from textwrap import dedent import os import io @@ -344,6 +344,7 @@ def forward(self, x): # TODO: Fuser doesn't work at all when inputs require grad. Fix that @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_lstm_fusion_cuda(self): inputs = get_lstm_inputs('cuda') ge = self.checkTrace(LSTMCellF, inputs) @@ -367,6 +368,7 @@ def test_lstm_fusion_cpu(self): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_lstm_fusion_concat(self): inputs = get_lstm_inputs('cuda') ge = self.checkTrace(LSTMCellC, inputs) @@ -374,6 +376,7 @@ def test_lstm_fusion_concat(self): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_concat_fusion(self): hx = torch.randn(3, 20, dtype=torch.float, device='cuda') cx = torch.randn(3, 20, dtype=torch.float, device='cuda') @@ -386,6 +389,7 @@ def foo(hx, cx): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_fusion_distribute(self): def f(x, y): z1, z2 = (x + y).chunk(2, dim=1) @@ -407,6 +411,7 @@ def fn_test_comparison_gt_lt(x, y): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_comparison_gt_lt(self): x = torch.randn(4, 4, dtype=torch.float, device='cuda') y = torch.randn(4, 4, dtype=torch.float, device='cuda') @@ -415,6 +420,7 @@ def test_comparison_gt_lt(self): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_comparison_ge_le(self): def f(x, y): mask = (x >= 0).type_as(x) @@ -434,6 +440,7 @@ def fn_test_relu(x, y): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_relu(self): x = torch.randn(4, 4, dtype=torch.float, device='cuda') y = torch.randn(4, 4, dtype=torch.float, device='cuda') @@ -446,6 +453,7 @@ def fn_test_exp(x, y): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_exp(self): x = torch.randn(4, 4, dtype=torch.float, device='cuda') y = torch.randn(4, 4, dtype=torch.float, device='cuda') @@ -790,6 +798,7 @@ def doit(x, y): @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows") @unittest.skipIf(not RUN_CUDA, "cpp tests require CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_cpp(self): # rather than rebuild assertExpected in cpp, # just glob all the cpp outputs into one file for now @@ -2005,6 +2014,7 @@ def test_tensor_number_math(self): self._test_tensor_number_math() @unittest.skipIf(not RUN_CUDA, "No CUDA") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_tensor_number_math_cuda(self): self._test_tensor_number_math(device='cuda') diff --git a/test/test_optim.py b/test/test_optim.py index 57fe9e5da53944..67328919c32df6 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -10,7 +10,7 @@ from torch.autograd import Variable from torch import sparse from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau -from common import TestCase, run_tests, TEST_WITH_UBSAN +from common import TestCase, run_tests, TEST_WITH_UBSAN, BUILT_WITH_ROCM def rosenbrock(tensor): @@ -437,6 +437,7 @@ def test_asgd(self): with self.assertRaisesRegex(ValueError, "Invalid weight_decay value: -0.5"): optim.ASGD(None, lr=1e-2, weight_decay=-0.5) + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_rprop(self): self._test_rosenbrock( lambda params: optim.Rprop(params, lr=1e-3), diff --git a/test/test_utils.py b/test/test_utils.py index e8c33ca761c7e8..dcafe5103f9c0d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -19,7 +19,7 @@ from torch.utils.trainer.plugins.plugin import Plugin from torch.autograd._functions.utils import prepare_onnx_paddings from torch.autograd._functions.utils import check_onnx_broadcast -from common import IS_WINDOWS, IS_PPC +from common import IS_WINDOWS, IS_PPC, BUILT_WITH_ROCM HAS_CUDA = torch.cuda.is_available() @@ -412,6 +412,7 @@ def test_cpu(self): @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package") @unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows") + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_gpu(self): create_extension( name='gpulib', @@ -615,6 +616,7 @@ def test_bottleneck_cpu_only(self): self._check_cuda(out) @unittest.skipIf(not HAS_CUDA, 'No CUDA') + @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_bottleneck_cuda(self): rc, out, err = self._run_bottleneck('bottleneck/test_cuda.py') self.assertEqual(rc, 0, 'Run failed with\n{}'.format(err))