diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh
index bb06df22868ea9..3e400d59934415 100755
--- a/.jenkins/pytorch/build.sh
+++ b/.jenkins/pytorch/build.sh
@@ -33,6 +33,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
   export HCC_AMDGPU_TARGET=gfx900
   export LANG=C.UTF-8
   export LC_ALL=C.UTF-8
+  export PYTORCH_BUILT_WITH_ROCM=1
 
   sudo chown -R jenkins:jenkins /usr/local
   rm -rf "$(dirname "${BASH_SOURCE[0]}")/../../../pytorch_amd/" || true
diff --git a/test/common.py b/test/common.py
index 5debc66905624c..8ee3b2be855157 100644
--- a/test/common.py
+++ b/test/common.py
@@ -73,6 +73,7 @@ def run_tests(argv=UNITTEST_ARGS):
 NO_MULTIPROCESSING_SPAWN = os.environ.get('NO_MULTIPROCESSING_SPAWN', '0') == '1'
 TEST_WITH_ASAN = os.getenv('PYTORCH_TEST_WITH_ASAN', '0') == '1'
 TEST_WITH_UBSAN = os.getenv('PYTORCH_TEST_WITH_UBSAN', '0') == '1'
+BUILT_WITH_ROCM = os.getenv('PYTORCH_BUILT_WITH_ROCM', '0') == '1'
 
 
 def skipIfNoLapack(fn):
diff --git a/test/test_autograd.py b/test/test_autograd.py
index 3b519ea5a86874..88b7954be3d7b0 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -14,7 +14,7 @@
 from torch.autograd.function import once_differentiable
 from torch.autograd.profiler import profile
 from common import TEST_MKL, TestCase, run_tests, skipIfNoLapack, \
-    suppress_warnings, skipIfNoZeroSize
+    suppress_warnings, skipIfNoZeroSize, BUILT_WITH_ROCM
 from torch.autograd import Variable, Function, detect_anomaly
 from torch.autograd.function import InplaceFunction
 from torch.testing import make_non_contiguous, randn_like
@@ -1561,6 +1561,7 @@ def test_pyscalar_conversions(self):
                 self._test_pyscalar_conversions(lambda x: x.cuda(), lambda x: long(x))
 
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_pin_memory(self):
         x = torch.randn(2, 2, requires_grad=True)
         self.assertEqual(x, x.pin_memory())
@@ -2359,6 +2360,7 @@ def f3(dt):
                         f(dt)
 
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_set_requires_grad_only_for_floats_cuda(self):
         self._test_set_requires_grad_only_for_floats(self, True)
 
@@ -2366,6 +2368,7 @@ def test_set_requires_grad_only_for_floats(self):
         self._test_set_requires_grad_only_for_floats(self, False)
 
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_rnn_backward_to_input_but_not_parameters_cuda(self):
         # this checks whether it is possible to not require
         # weight parameters, but require inputs, see #7722
diff --git a/test/test_dataloader.py b/test/test_dataloader.py
index ef636a1ec77e72..d9a03f3401158d 100644
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@@ -13,7 +13,7 @@
 from torch.utils.data import Dataset, TensorDataset, DataLoader, ConcatDataset
 from torch.utils.data.dataset import random_split
 from torch.utils.data.dataloader import default_collate, ExceptionWrapper, MANAGER_STATUS_CHECK_INTERVAL
-from common import TestCase, run_tests, TEST_NUMPY, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN
+from common import TestCase, run_tests, TEST_NUMPY, IS_WINDOWS, NO_MULTIPROCESSING_SPAWN, BUILT_WITH_ROCM
 
 # We cannot import TEST_CUDA from common_nn here, because if we do that,
 # the TEST_CUDNN line from common_nn will be executed multiple times
@@ -338,12 +338,14 @@ def test_growing_dataset(self):
         self.assertEqual(len(dataloader_shuffle), 5)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_sequential_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, pin_memory=True)
         for input, target in loader:
             self.assertTrue(input.is_pinned())
             self.assertTrue(target.is_pinned())
 
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_multiple_dataloaders(self):
         loader1_it = iter(DataLoader(self.dataset, num_workers=1))
         loader2_it = iter(DataLoader(self.dataset, num_workers=2))
@@ -444,6 +446,7 @@ def test_batch_sampler(self):
         self._test_batch_sampler(num_workers=4)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_shuffle_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
         for input, target in loader:
@@ -476,6 +479,7 @@ def test_error_workers(self):
 
     @unittest.skipIf(IS_WINDOWS, "FIXME: stuck test")
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_partial_workers(self):
         "check that workers exit even if the iterator is not exhausted"
         loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4, pin_memory=True))
@@ -529,6 +533,7 @@ def _is_process_alive(pid, pname):
                      "spawn start method is not supported in Python 2, \
                      but we need it for creating another process with CUDA")
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_manager_unclean_exit(self):
         '''there might be ConnectionResetError or leaked semaphore warning (due to dirty process exit), \
 but they are all safe to ignore'''
@@ -632,6 +637,7 @@ def setUp(self):
         self.dataset = StringDataset()
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_shuffle_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
         for batch_ndx, (s, n) in enumerate(loader):
@@ -675,6 +681,7 @@ def test_sequential_batch(self):
             self.assertEqual(n[1], idx + 1)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, pin_memory=True)
         for batch_ndx, sample in enumerate(loader):
@@ -714,6 +721,7 @@ def _run_ind_worker_queue_test(self, batch_size, num_workers):
             if current_worker_idx == num_workers:
                 current_worker_idx = 0
 
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_ind_worker_queue(self):
         for batch_size in (8, 16, 32, 64):
             for num_workers in range(1, 6):
diff --git a/test/test_jit.py b/test/test_jit.py
index 0663b41b67e08f..f93985281522bf 100644
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -9,7 +9,7 @@
 from torch.autograd.function import traceable
 from torch.testing import assert_allclose
 from torch.onnx import OperatorExportTypes
-from common import TestCase, run_tests, IS_WINDOWS, TEST_WITH_UBSAN
+from common import TestCase, run_tests, IS_WINDOWS, TEST_WITH_UBSAN, BUILT_WITH_ROCM
 from textwrap import dedent
 import os
 import io
@@ -344,6 +344,7 @@ def forward(self, x):
     # TODO: Fuser doesn't work at all when inputs require grad. Fix that
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_lstm_fusion_cuda(self):
         inputs = get_lstm_inputs('cuda')
         ge = self.checkTrace(LSTMCellF, inputs)
@@ -367,6 +368,7 @@ def test_lstm_fusion_cpu(self):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_lstm_fusion_concat(self):
         inputs = get_lstm_inputs('cuda')
         ge = self.checkTrace(LSTMCellC, inputs)
@@ -374,6 +376,7 @@ def test_lstm_fusion_concat(self):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_concat_fusion(self):
         hx = torch.randn(3, 20, dtype=torch.float, device='cuda')
         cx = torch.randn(3, 20, dtype=torch.float, device='cuda')
@@ -386,6 +389,7 @@ def foo(hx, cx):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_fusion_distribute(self):
         def f(x, y):
             z1, z2 = (x + y).chunk(2, dim=1)
@@ -407,6 +411,7 @@ def fn_test_comparison_gt_lt(x, y):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_comparison_gt_lt(self):
         x = torch.randn(4, 4, dtype=torch.float, device='cuda')
         y = torch.randn(4, 4, dtype=torch.float, device='cuda')
@@ -415,6 +420,7 @@ def test_comparison_gt_lt(self):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_comparison_ge_le(self):
         def f(x, y):
             mask = (x >= 0).type_as(x)
@@ -434,6 +440,7 @@ def fn_test_relu(x, y):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_relu(self):
         x = torch.randn(4, 4, dtype=torch.float, device='cuda')
         y = torch.randn(4, 4, dtype=torch.float, device='cuda')
@@ -446,6 +453,7 @@ def fn_test_exp(x, y):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_exp(self):
         x = torch.randn(4, 4, dtype=torch.float, device='cuda')
         y = torch.randn(4, 4, dtype=torch.float, device='cuda')
@@ -790,6 +798,7 @@ def doit(x, y):
 
     @unittest.skipIf(IS_WINDOWS, "NYI: fuser support for Windows")
     @unittest.skipIf(not RUN_CUDA, "cpp tests require CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_cpp(self):
         # rather than rebuild assertExpected in cpp,
         # just glob all the cpp outputs into one file for now
@@ -2005,6 +2014,7 @@ def test_tensor_number_math(self):
         self._test_tensor_number_math()
 
     @unittest.skipIf(not RUN_CUDA, "No CUDA")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_tensor_number_math_cuda(self):
         self._test_tensor_number_math(device='cuda')
 
diff --git a/test/test_optim.py b/test/test_optim.py
index 57fe9e5da53944..67328919c32df6 100644
--- a/test/test_optim.py
+++ b/test/test_optim.py
@@ -10,7 +10,7 @@
 from torch.autograd import Variable
 from torch import sparse
 from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau
-from common import TestCase, run_tests, TEST_WITH_UBSAN
+from common import TestCase, run_tests, TEST_WITH_UBSAN, BUILT_WITH_ROCM
 
 
 def rosenbrock(tensor):
@@ -437,6 +437,7 @@ def test_asgd(self):
         with self.assertRaisesRegex(ValueError, "Invalid weight_decay value: -0.5"):
             optim.ASGD(None, lr=1e-2, weight_decay=-0.5)
 
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_rprop(self):
         self._test_rosenbrock(
             lambda params: optim.Rprop(params, lr=1e-3),
diff --git a/test/test_utils.py b/test/test_utils.py
index e8c33ca761c7e8..dcafe5103f9c0d 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -19,7 +19,7 @@
 from torch.utils.trainer.plugins.plugin import Plugin
 from torch.autograd._functions.utils import prepare_onnx_paddings
 from torch.autograd._functions.utils import check_onnx_broadcast
-from common import IS_WINDOWS, IS_PPC
+from common import IS_WINDOWS, IS_PPC, BUILT_WITH_ROCM
 
 HAS_CUDA = torch.cuda.is_available()
 
@@ -412,6 +412,7 @@ def test_cpu(self):
 
     @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package")
     @unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows")
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_gpu(self):
         create_extension(
             name='gpulib',
@@ -615,6 +616,7 @@ def test_bottleneck_cpu_only(self):
         self._check_cuda(out)
 
     @unittest.skipIf(not HAS_CUDA, 'No CUDA')
+    @unittest.skipIf(BUILT_WITH_ROCM, "test doesn't currently work on the ROCm stack")
     def test_bottleneck_cuda(self):
         rc, out, err = self._run_bottleneck('bottleneck/test_cuda.py')
         self.assertEqual(rc, 0, 'Run failed with\n{}'.format(err))