ROCm
diff --git a/‎.jenkins/caffe2/build.sh
Lines changed: 1 addition & 1 deletion b/‎.jenkins/caffe2/build.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 0 additions & 5 deletions b/‎CMakeLists.txt
Lines changed: 0 additions & 5 deletions
diff --git a/‎aten/src/ATen/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/CPUApplyUtils.h
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/CPUApplyUtils.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/Declarations.cwrap
Lines changed: 3 additions & 3 deletions b/‎aten/src/ATen/Declarations.cwrap
Lines changed: 3 additions & 3 deletions
diff --git a/‎aten/src/ATen/Device.cpp
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/Device.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/Device.h
Lines changed: 3 additions & 1 deletion b/‎aten/src/ATen/Device.h
Lines changed: 3 additions & 1 deletion
diff --git a/‎aten/src/ATen/Layout.h
Lines changed: 2 additions & 1 deletion b/‎aten/src/ATen/Layout.h
Lines changed: 2 additions & 1 deletion
diff --git a/‎aten/src/ATen/THSizeStrideCompat.h
Lines changed: 0 additions & 32 deletions b/‎aten/src/ATen/THSizeStrideCompat.h
Lines changed: 0 additions & 32 deletions
diff --git a/‎aten/src/ATen/TensorGeometry.cpp
Lines changed: 3 additions & 0 deletions b/‎aten/src/ATen/TensorGeometry.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎aten/src/ATen/TensorGeometry.h
Lines changed: 5 additions & 8 deletions b/‎aten/src/ATen/TensorGeometry.h
Lines changed: 5 additions & 8 deletions
diff --git a/‎aten/src/ATen/TensorUtils.cpp
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/TensorUtils.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/core/DeviceType.cpp
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/core/DeviceType.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/core/DeviceType.h
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/core/DeviceType.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/cuda/detail/KernelUtils.h
Lines changed: 4 additions & 0 deletions b/‎aten/src/ATen/cuda/detail/KernelUtils.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎aten/src/ATen/cuda/detail/TensorInfo.cuh
Lines changed: 13 additions & 6 deletions b/‎aten/src/ATen/cuda/detail/TensorInfo.cuh
Lines changed: 13 additions & 6 deletions
diff --git a/‎aten/src/ATen/function_wrapper.py
Lines changed: 6 additions & 19 deletions b/‎aten/src/ATen/function_wrapper.py
Lines changed: 6 additions & 19 deletions
@@ -217,7 +217,7 @@ if [[ -z "$INTEGRATED" ]]; then
 
 else
 
-  sudo FULL_CAFFE2=1 python setup.py install
+  FULL_CAFFE2=1 python setup.py install --user
   # TODO: I'm not sure why this is necessary
   cp -r torch/lib/tmp_install $INSTALL_PREFIX
 
 
@@ -288,11 +288,6 @@ include_directories(BEFORE ${PROJECT_BINARY_DIR})
 
 include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
 
-# ---[ Old caffe protobuf
-if(BUILD_CAFFE2)
-  add_subdirectory(caffe/proto)
-endif()
-
 # ---[ Main build
 add_subdirectory(caffe2)
 
 
@@ -251,6 +251,7 @@ IF(USE_CUDA AND NOT USE_ROCM)
   ENDIF(USE_MAGMA)
   IF ($ENV{ATEN_STATIC_CUDA})
     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a")
+    list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a")
   ENDIF($ENV{ATEN_STATIC_CUDA})
 ENDIF()
 
 
@@ -57,7 +57,7 @@ inline void _setup_arrays(Tensor& tensor, Arg* iter) {
   for (int64_t i = 0; i < max_dim; i++) {
     int64_t size = tensor.size(i);
     int64_t stride = tensor.stride(i);
-    while (i + 1 < max_dim &&
+    while (tensor.stride(i) > 0 && i + 1 < max_dim &&
            (tensor.size(i + 1) == 1 ||
             tensor.stride(i) == tensor.size(i + 1) * tensor.stride(i + 1))) {
       size = size * tensor.size(i + 1);
 
@@ -58,7 +58,7 @@
         - THStorage* source
         - long storage_offset
         - IntListSize size
-        - arg: IntListStride stride
+        - arg: IntList stride
           default: {}
 ]]
 [[
@@ -3408,13 +3408,13 @@
     - cname: newWithSize
       arguments:
         - IntListSize size
-        - arg: IntListStride stride
+        - IntList stride
     - cname: newWithStorage
       arguments:
         - THStorage* storage
         - int64_t storageOffset
         - IntListSize size
-        - arg: IntListStride stride
+        - arg: IntList stride
           default: {}
 ]]
 
 
@@ -75,12 +75,12 @@ Device::Device(const std::string& device_string) : Device(Type::CPU) {
   }
 }
 
-} // namespace at
-
 std::ostream& operator<<(std::ostream& stream, const at::Device& device) {
   stream << device.type();
   if (device.has_index()) {
     stream << ":" << device.index();
   }
   return stream;
 }
+
+} // namespace at
@@ -111,10 +111,12 @@ struct Device {
   DeviceType type_;
   int32_t index_ = -1;
 };
-} // namespace at
 
 AT_API std::ostream& operator<<(std::ostream& stream, const at::Device& device);
 
+} // namespace at
+
+
 namespace std {
   template<> struct hash<at::Device>
   {
 
@@ -20,7 +20,6 @@ inline Layout layout_from_backend(Backend backend) {
       return Layout::Strided;
   }
 }
-} // namespace at
 
 inline std::ostream& operator<<(std::ostream& stream, at::Layout layout) {
   switch (layout) {
@@ -32,3 +31,5 @@ inline std::ostream& operator<<(std::ostream& stream, at::Layout layout) {
       AT_ERROR("Unknown layout");
   }
 }
+
+} // namespace at
@@ -5,6 +5,9 @@
 namespace at {
 
 bool TensorGeometry::is_contiguous() const {
+  if (numel_ == 0) {
+    return true;
+  }
   int64_t dim = sizes_.size();
   int64_t expected_stride = 1;
   for (int64_t i = dim - 1; i >= 0; i--) {
 
@@ -18,12 +18,14 @@ struct AT_API TensorGeometry {
         strides_[i] = expected_stride;
         expected_stride *= sizes_[i];
       }
+      numel_ = expected_stride;
   }
 
   explicit TensorGeometry(const Tensor& t)
     : sizes_(t.sizes().vec())
     , strides_(t.strides().vec())
-    , storage_offset_(t.storage_offset()) {}
+    , storage_offset_(t.storage_offset())
+    , numel_(t.numel()) {}
 
   // true if the tensor is contiguous
   bool is_contiguous() const;
@@ -43,13 +45,7 @@ struct AT_API TensorGeometry {
   }
   IntList strides() const { return IntList{ strides_ }; }
   int64_t storage_offset() const { return storage_offset_; }
-  int64_t numel() const {
-    int64_t r = 1;
-    for (auto s : sizes()) {
-      r *= s;
-    }
-    return r;
-  }
+  int64_t numel() const { return numel_; }
 
   TensorGeometry transpose(int64_t dim0, int64_t dim1) {
     TensorGeometry r = *this; // copy
@@ -63,6 +59,7 @@ struct AT_API TensorGeometry {
   std::vector<int64_t> sizes_;
   std::vector<int64_t> strides_;
   int64_t storage_offset_;
+  int64_t numel_;
 };
 
 } // namespace at
@@ -118,7 +118,7 @@ void checkSameGPU(CheckedFrom c, const TensorArg& t1, const TensorArg& t2) {
       oss << "Tensor for " << t2 << " is on CPU, ";
     }
     oss << "but expected " << ((!(t1->is_cuda() || t2->is_cuda())) ? "them" : "it")
-	      << " to be on GPU (while checking arguments for " << c << ")";
+        << " to be on GPU (while checking arguments for " << c << ")";
     AT_ERROR(oss.str());
   }
   AT_CHECK(
 
@@ -34,9 +34,9 @@ std::string DeviceTypeName(at::DeviceType d, bool lower_case) {
   }
 }
 
-} // namespace at
-
 std::ostream& operator<<(std::ostream& stream, at::DeviceType type) {
   stream << at::DeviceTypeName(type, /* lower case */ true);
   return stream;
 }
+
+} // namespace at
@@ -27,6 +27,6 @@ AT_CORE_API std::string DeviceTypeName(
     at::DeviceType d,
     bool lower_case = false);
 
-} // namespace at
-
 AT_CORE_API std::ostream& operator<<(std::ostream& stream, at::DeviceType type);
+
+} // namespace at
@@ -1,4 +1,7 @@
 #pragma once
+
+#include "ATen/ATen.h"
+
 // Contents of this file are copied from THCUNN/common.h for the ease of porting
 // THCUNN functions into ATen.
 
@@ -14,6 +17,7 @@ constexpr int CUDA_NUM_THREADS = 1024;
 // CUDA: number of blocks for threads.
 inline int GET_BLOCKS(const int N)
 {
+  AT_ASSERTM(N > 0, "CUDA kernel launch blocks must be positive, but got N=", N);
   return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
 }
 
 
@@ -11,6 +11,7 @@ namespace detail {
 // CUDA kernel argument that defines tensor layout
 template <typename T, typename IndexType>
 struct TensorInfo {
+  TensorInfo();
   TensorInfo(T* p,
              int dim,
              IndexType sz[MAX_TENSORINFO_DIMS],
@@ -47,6 +48,12 @@ struct TensorInfo {
   int dims;
 };
 
+template <typename T, typename IndexType>
+TensorInfo<T, IndexType>::TensorInfo() {
+  data = nullptr;
+  dims = 0;
+}
+
 template <typename T, typename IndexType>
 TensorInfo<T, IndexType>::TensorInfo(T* p,
                                      int dim,
@@ -73,7 +80,7 @@ template <typename T, typename IndexType>
 int
 TensorInfo<T, IndexType>::collapseDims(const int excludeDim) {
 
-  AT_CHECK(excludeDim >= -1 && excludeDim < dims, 
+  AT_CHECK(excludeDim >= -1 && excludeDim < dims,
     "expected excluded dim between -1 and dims - 1");
 
   int stopDim = (excludeDim == -1) ? dims : excludeDim;
@@ -87,20 +94,20 @@ TensorInfo<T, IndexType>::collapseDims(const int excludeDim) {
       if (sizes[oldIndex] == 1) {
         continue;
       }
-      
+
       ++newIndex;
       sizes[newIndex] = sizes[oldIndex];
       strides[newIndex] = strides[oldIndex];
       ++oldIndex;
-      break; 
+      break;
     }
 
     // Collapses dims
     for (; oldIndex < stopDim; ++oldIndex) {
       if (sizes[oldIndex] == 1) {
         continue;
       }
-  
+
       if (strides[newIndex] == sizes[oldIndex] * strides[oldIndex]) {
         sizes[newIndex] *= sizes[oldIndex];
         strides[newIndex] = strides[oldIndex];
@@ -113,7 +120,7 @@ TensorInfo<T, IndexType>::collapseDims(const int excludeDim) {
 
     // Handles excludeDim being set (oldIndex == excludeDim)
     if (oldIndex != dims) {
-      
+
       // Preserves excluded dimension
       ++newIndex;
       sizes[newIndex] = sizes[oldIndex];
@@ -146,7 +153,7 @@ struct IndexToOffset {
   static __host__ __device__ IndexType get(
     IndexType linearId,
     const TensorInfo<T, IndexType>& info) {
-    
+
     IndexType offset = 0;
 
     // Uses static dims
 
@@ -211,7 +211,6 @@ def __init__(self, reason):
     'THStorage*': 'Storage &',
     'THGenerator*': 'Generator *',
     'IntListSize': 'IntList',
-    'IntListStride': 'IntList',
     'accreal': 'Scalar',
     'real': 'Scalar',
     'long': 'int64_t',
@@ -228,7 +227,6 @@ def __init__(self, reason):
     'THStorage*': 'Storage',
     'THGenerator*': 'Generator*',
     'IntListSize': 'IntList',
-    'IntListStride': 'IntList',
     'accreal': 'accreal',
     'real': 'real',
     'long': 'int64_t',
@@ -297,8 +295,6 @@ def __init__(self, reason):
         CodeTemplate(
             'check_generator<${Backend}Generator>(${arg_name}, &globalContext().defaultGenerator(backend()))'),
     # This is a cast done via direct-construction
-    'IntListSize': CodeTemplate('at::IntList ${result_name} = get_intlist_size_th(${arg_name});'),
-    'IntListStride': CodeTemplate('at::IntList ${result_name} = get_intlist_stride_th(${arg_name});'),
     'real': CodeTemplate('${arg_name}.to${ScalarName}()'),
     'accreal': CodeTemplate('${arg_name}.to${AccScalarName}()'),
     'TensorList': CodeTemplate(
@@ -308,8 +304,6 @@ def __init__(self, reason):
     'IntList': CodeTemplate('check_intlist<${size}>(${arg_name}, "${arg_name}", ${arg_pos}${,default_init})')
 }
 
-DIRECT_CONSTRUCTION_CHECKED_CAST = {'IntListSize', 'IntListStride'}
-
 CHECKED_USE = {
     'THTensor*': '{}_->tensor',
     'THSTensor*': '{}_->tensor',
@@ -1374,19 +1368,12 @@ def emit_body(env, option):
                     if 'default_init' in arg:
                         default_init.append(arg['default_init'])
 
-                    if arg['type'] in DIRECT_CONSTRUCTION_CHECKED_CAST:
-                        body.append(CHECKED_CAST[arg['type']].substitute(
-                            env, arg_name=arg['name'], arg_pos=count,
-                            null_okay=null_okay, default_init=default_init,
-                            size=arg.get('size'),
-                            result_name=arg['name'] + '_'))
-                    else:
-                        check_cast = CHECKED_CAST[arg['type']].substitute(
-                            env, arg_name=arg['name'], arg_pos=count,
-                            null_okay=null_okay, default_init=default_init,
-                            size=arg.get('size'))
-                        body.append("auto {}_ = {};".format(
-                            arg['name'], check_cast))
+                    check_cast = CHECKED_CAST[arg['type']].substitute(
+                        env, arg_name=arg['name'], arg_pos=count,
+                        null_okay=null_okay, default_init=default_init,
+                        size=arg.get('size'))
+                    body.append("auto {}_ = {};".format(
+                        arg['name'], check_cast))
                 if drop_argument(arg, option) or replace_with_null(arg):
                     body.append(
                         "(void) {}_; //silence unused warning".format(arg['name']))
Original file line number	Diff line number	Diff line change
`@@ -75,12 +75,12 @@ Device::Device(const std::string& device_string) : Device(Type::CPU) {`
`75`	`75`	`}`
`76`	`76`	`}`
`77`	`77`
`78`		`-} // namespace at`
`79`		`-`
`80`	`78`	`std::ostream& operator<<(std::ostream& stream, const at::Device& device) {`
`81`	`79`	`stream << device.type();`
`82`	`80`	`if (device.has_index()) {`
`83`	`81`	`stream << ":" << device.index();`
`84`	`82`	`}`
`85`	`83`	`return stream;`
`86`	`84`	`}`
	`85`	`+`
	`86`	`+} // namespace at`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,6 @@ inline Layout layout_from_backend(Backend backend) {`
`20`	`20`	`return Layout::Strided;`
`21`	`21`	`}`
`22`	`22`	`}`
`23`		`-} // namespace at`
`24`	`23`
`25`	`24`	`inline std::ostream& operator<<(std::ostream& stream, at::Layout layout) {`
`26`	`25`	`switch (layout) {`
`@@ -32,3 +31,5 @@ inline std::ostream& operator<<(std::ostream& stream, at::Layout layout) {`
`32`	`31`	`AT_ERROR("Unknown layout");`
`33`	`32`	`}`
`34`	`33`	`}`
	`34`	`+`
	`35`	`+} // namespace at`
Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ void checkSameGPU(CheckedFrom c, const TensorArg& t1, const TensorArg& t2) {`
`118`	`118`	`oss << "Tensor for " << t2 << " is on CPU, ";`
`119`	`119`	`}`
`120`	`120`	`oss << "but expected " << ((!(t1->is_cuda() \|\| t2->is_cuda())) ? "them" : "it")`
`121`		`- << " to be on GPU (while checking arguments for " << c << ")";`
	`121`	`+ << " to be on GPU (while checking arguments for " << c << ")";`
`122`	`122`	`AT_ERROR(oss.str());`
`123`	`123`	`}`
`124`	`124`	`AT_CHECK(`
Original file line number	Diff line number	Diff line change
`@@ -34,9 +34,9 @@ std::string DeviceTypeName(at::DeviceType d, bool lower_case) {`
`34`	`34`	`}`
`35`	`35`	`}`
`36`	`36`
`37`		`-} // namespace at`
`38`		`-`
`39`	`37`	`std::ostream& operator<<(std::ostream& stream, at::DeviceType type) {`
`40`	`38`	`stream << at::DeviceTypeName(type, /* lower case */ true);`
`41`	`39`	`return stream;`
`42`	`40`	`}`
	`41`	`+`
	`42`	`+} // namespace at`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,7 @@`
`1`	`1`	`#pragma once`
	`2`	`+`
	`3`	`+#include "ATen/ATen.h"`
	`4`	`+`
`2`	`5`	`// Contents of this file are copied from THCUNN/common.h for the ease of porting`
`3`	`6`	`// THCUNN functions into ATen.`
`4`	`7`
`@@ -14,6 +17,7 @@ constexpr int CUDA_NUM_THREADS = 1024;`
`14`	`17`	`// CUDA: number of blocks for threads.`
`15`	`18`	`inline int GET_BLOCKS(const int N)`
`16`	`19`	`{`
	`20`	`+ AT_ASSERTM(N > 0, "CUDA kernel launch blocks must be positive, but got N=", N);`
`17`	`21`	`return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;`
`18`	`22`	`}`
`19`	`23`