Merge pull request #160 from jithunnair-amd/skip_tests

iotamudelta · web-flow · commit 58ce20b253c5 · 2018-08-28T22:46:53.000-05:00
Skip KLDivLoss_cuda tests due to hang
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -269,8 +269,7 @@ than Linux, which are worth keeping in mind when fixing these problems.
 3. If you have a Windows box (we have a few on EC2 which you can request access to) and
    you want to run the build, the easiest way is to just run `.jenkins/pytorch/win-build.sh`.
    If you need to rebuild, run `REBUILD=1 .jenkins/pytorch/win-build.sh` (this will avoid
-   blowing away your Conda environment.)  I recommend opening `cmd.exe`, and then running
-   `bash` to work in a bash shell (which will make various Linux commands available.)
+   blowing away your Conda environment.)
 
 Even if you don't know anything about MSVC, you can use cmake to build simple programs on
 Windows; this can be helpful if you want to learn more about some peculiar linking behavior
@@ -296,6 +295,53 @@ cmake ..
 cmake --build .
 ```
 
+### Known MSVC (and MSVC with NVCC) bugs
+
+The PyTorch codebase sometimes likes to use exciting C++ features, and
+these exciting features lead to exciting bugs in Windows compilers.
+To add insult to injury, the error messages will often not tell you
+which line of code actually induced the erroring template instantiation.
+
+I've found the most effective way to debug these problems is to
+carefully read over diffs, keeping in mind known bugs in MSVC/NVCC.
+Here are a few well known pitfalls and workarounds:
+
+* This is not actually a bug per se, but in general, code generated by MSVC
+  is more sensitive to memory errors; you may have written some code
+  that does a use-after-free or stack overflows; on Linux the code
+  might work, but on Windows your program will crash.  ASAN may not
+  catch all of these problems: stay vigilant to the possibility that
+  your crash is due to a real memory problem.
+
+* (NVCC) `at::optional` does not work when used from device code.  Don't use
+  it from kernels.  Upstream issue: https://github.com/akrzemi1/Optional/issues/58
+  and our local issue #10329.
+
+* `constexpr` generally works less well on MSVC.
+
+  * The idiom `static_assert(f() == f())` to test if `f` is constexpr
+    does not work; you'll get "error C2131: expression did not evaluate
+    to a constant".  Don't use these asserts on Windows.
+    (Example: `aten/src/ATen/core/intrusive_ptr.h`)
+
+* (NVCC) Code you access inside a `static_assert` will eagerly be
+  evaluated as if it were device code, and so you might get an error
+  that the code is "not accessible".
+
+```
+class A {
+  static A singleton_;
+  static constexpr inline A* singleton() {
+    return &singleton_;
+  }
+};
+static_assert(std::is_same(A*, decltype(A::singelton()))::value, "hmm");
+```
+
+* The compiler will run out of heap if you attempt to compile files that
+  are too large.  Splitting such files into separate files helps.
+  (Example: `THTensorMath`, `THTensorMoreMath`, `THTensorEvenMoreMath`.)
+
 ## Caffe2 notes
 
 In 2018, we merged Caffe2 into the PyTorch source repository.  While the
diff --git a/aten/src/ATen/Scalar.cpp b/aten/src/ATen/Scalar.cpp
@@ -13,9 +13,7 @@
 namespace at {
 
 Tensor Scalar::toTensor() const {
-  if (Tag::HAS_t == tag) {
-    return Tensor(t);
-  } else if (Tag::HAS_d == tag) {
+  if (Tag::HAS_d == tag) {
     return CPU(kDouble).scalarTensor(*this);
   } else {
     assert(Tag::HAS_i == tag);
@@ -24,19 +22,14 @@ Tensor Scalar::toTensor() const {
 }
 
 Scalar Scalar::local() const {
-  if (Tag::HAS_t != tag) {
-    return *this;
-  }
-  return Tensor(t)._local_scalar();
+  return *this;
 }
 
 Scalar Scalar::operator-() const {
  if (isFloatingPoint()) {
    return Scalar(-v.d);
- } else if (isIntegral()) {
-   return Scalar(-v.i);
  } else {
-   return -Tensor(t)._local_scalar();
+   return Scalar(-v.i);
  }
 }
 
diff --git a/aten/src/ATen/Scalar.h b/aten/src/ATen/Scalar.h
@@ -8,7 +8,6 @@
 
 #include "ATen/core/ATenGeneral.h"
 #include "ATen/core/ScalarType.h"
-#include "ATen/TensorBase.h"
 #include "ATen/core/Half.h"
 
 namespace at {
@@ -34,9 +33,7 @@ class AT_API Scalar {
 
 #define DEFINE_ACCESSOR(type,name,member) \
   type to##name () const { \
-    if (Tag::HAS_t == tag) { \
-      return local().to##name(); \
-    } else if (Tag::HAS_d == tag) { \
+    if (Tag::HAS_d == tag) { \
       return checked_convert<type, double>(v.d, #type); \
     } else { \
       return checked_convert<type, int64_t>(v.i, #type); \
@@ -58,20 +55,16 @@ class AT_API Scalar {
   bool isIntegral() const {
     return Tag::HAS_i == tag;
   }
-  bool isBackedByTensor() const {
-    return Tag::HAS_t == tag;
-  }
 
   Scalar operator-() const;
 
 private:
-  enum class Tag { HAS_d, HAS_i, HAS_t };
+  enum class Tag { HAS_d, HAS_i };
   Tag tag;
   union {
     double d;
     int64_t i = 0;
   } v;
-  detail::TensorBase t;
   friend struct Type;
 };
 
diff --git a/aten/src/ATen/templates/Type.cpp b/aten/src/ATen/templates/Type.cpp
@@ -91,8 +91,6 @@ Tensor Type::tensorWithAllocator(IntList sizes, IntList strides, Allocator* allo
   return tensor(storage, 0, sizes, strides);
 }
 Tensor Type::scalarTensor(Scalar s) const {
-  if(s.isBackedByTensor())
-    return Tensor(s.t).toType(*this);
   return tensor({}).fill_(s);
 }
 
diff --git a/aten/src/THC/THCAtomics.cuh b/aten/src/THC/THCAtomics.cuh
@@ -138,8 +138,10 @@ static inline  __device__  void atomicAdd(double *address, double val) {
 } while (assumed != old);
 }
 #elif !defined(__CUDA_ARCH__) && (CUDA_VERSION < 8000) || defined(__HIP_PLATFORM_HCC__)
+#if defined(__HIP_PLATFORM_HCC__) && __hcc_workwee__ < 18312
   // This needs to be defined for the host side pass
   static inline  __device__  void atomicAdd(double *address, double val) { }
 #endif
+#endif
 
 #endif // THC_ATOMICS_INC
diff --git a/test/common_nn.py b/test/common_nn.py
@@ -573,6 +573,7 @@ def ctcloss_reference(log_probs, targets, input_lengths, target_lengths, blank=0
         reference_fn=lambda i, t, m:
             kldivloss_reference(i, t, get_reduction(m)),
         check_sum_reduction=True,
+        test_cuda=(not TEST_WITH_ROCM)
     ),
     dict(
         module_name='MSELoss',
diff --git a/tools/autograd/templates/python_torch_functions.cpp b/tools/autograd/templates/python_torch_functions.cpp
@@ -83,7 +83,7 @@ inline Tensor dispatch_arange(Scalar start, Scalar end, Scalar step, const Tenso
 
 static inline bool allIntegral(std::initializer_list<std::reference_wrapper<Scalar>> l) {
   for (Scalar& s : l) {
-    if (!(s.isIntegral() || (s.isBackedByTensor() && at::isIntegralType(s.toTensor().type().scalarType())))) {
+    if (!s.isIntegral()) {
       return false;
     }
   }

Original file line number	Diff line number	Diff line change
`@@ -91,8 +91,6 @@ Tensor Type::tensorWithAllocator(IntList sizes, IntList strides, Allocator* allo`
`91`	`91`	`return tensor(storage, 0, sizes, strides);`
`92`	`92`	`}`
`93`	`93`	`Tensor Type::scalarTensor(Scalar s) const {`
`94`		`- if(s.isBackedByTensor())`
`95`		`- return Tensor(s.t).toType(*this);`
`96`	`94`	`return tensor({}).fill_(s);`
`97`	`95`	`}`
`98`	`96`
Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,7 @@ inline Tensor dispatch_arange(Scalar start, Scalar end, Scalar step, const Tenso`
`83`	`83`
`84`	`84`	`static inline bool allIntegral(std::initializer_list<std::reference_wrapper<Scalar>> l) {`
`85`	`85`	`for (Scalar& s : l) {`
`86`		`- if (!(s.isIntegral() \|\| (s.isBackedByTensor() && at::isIntegralType(s.toTensor().type().scalarType())))) {`
	`86`	`+ if (!s.isIntegral()) {`
`87`	`87`	`return false;`
`88`	`88`	`}`
`89`	`89`	`}`