ROCm · iotamudelta · Aug 24, 2018 · Aug 20, 2018 · Aug 20, 2018 · Aug 20, 2018
diff --git a/.jenkins/caffe2/build.sh b/.jenkins/caffe2/build.sh
@@ -157,6 +157,7 @@ if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
   export HCC_AMDGPU_TARGET=gfx900
 
   ########## HIPIFY Caffe2 operators
+  ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py"
   ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py"
 fi
 
@@ -190,7 +191,6 @@ else
 fi
 
 
-
 ###############################################################################
 # Configure and make
 ###############################################################################

diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh
@@ -30,7 +30,6 @@ cmake --version
 pip install -r requirements.txt || true
 
 if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
-  export MAX_JOBS=4
   # This is necessary in order to cross compile (or else we'll have missing GPU device).
   export HCC_AMDGPU_TARGET=gfx900
 
@@ -48,6 +47,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
   sudo apt-get install libc++abi1
 
   python tools/amd_build/build_pytorch_amd.py
+  python tools/amd_build/build_caffe2_amd.py
   USE_ROCM=1 python setup.py install --user
   exit 0
 fi

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -55,7 +55,8 @@ endif()
 include(CMakeDependentOption)
 option(BUILD_TORCH "Build Torch" OFF)
 option(BUILD_CAFFE2 "Build Caffe2" ON)
-option(BUILD_ATEN "Build ATen" OFF)
+option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
+option(BUILD_ATEN_MOBILE "Build ATen for Android and iOS" OFF)
 option(BUILD_BINARY "Build C++ binaries" ON)
 option(BUILD_DOCS "Build Caffe2 documentation" OFF)
 option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON)
@@ -75,7 +76,6 @@ cmake_dependent_option(
     "BUILD_TEST" OFF)
 option(USE_ACL "Use ARM Compute Library" OFF)
 option(USE_ASAN "Use Address Sanitizer" OFF)
-option(USE_ATEN "Use ATen" OFF)
 option(USE_CUDA "Use CUDA" ON)
 option(USE_ROCM "Use ROCm" OFF)
 option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
@@ -145,8 +145,8 @@ option(USE_DISTRIBUTED_MW "Use THD (distributed) master worker" OFF)
 # Used when building Caffe2 through setup.py
 option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF)
 
-if (USE_ATEN)
-    set(BUILD_ATEN ${USE_ATEN})
+if (ANDROID OR IOS) 
+  set(BUILD_ATEN_MOBILE ON)
 endif()
 
 # ---[ CMake scripts + modules

diff --git a/aten/CMakeLists.txt b/aten/CMakeLists.txt
@@ -1,8 +1,4 @@
-if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
-  if (NOT BUILD_ATEN)
-    return()
-  endif()
-else()
+if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
   cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
   project(ATen CXX C)
   include(CMakeDependentOption)
@@ -14,9 +10,10 @@ else()
       USE_CUDNN "Use cuDNN" ON
       "USE_CUDA" OFF)
   option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
-
-  # Flag for shared dependencies
-  set(BUILD_ATEN ON)
+else()
+  if (BUILD_ATEN_MOBILE)
+    return()
+  endif()
 endif()
 
 # Find modules

diff --git a/aten/src/ATen/ATenGeneral.h b/aten/src/ATen/ATenGeneral.h
@@ -3,4 +3,4 @@
 #include "ATen/core/Macros.h"
 
 // TODO: Merge the *_API macros.
-#define AT_API AT_CORE_API
+#define AT_API AT_CORE_API
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
@@ -13,23 +13,6 @@ IF(NOT MSVC)
   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-absolute-value")
 ENDIF(NOT MSVC)
 
-################################################################################
-# Helper functions
-################################################################################
-
-function(filter_list output input)
-    unset(result)
-    foreach(filename ${${input}})
-        foreach(pattern ${ARGN})
-            if("${filename}" MATCHES "${pattern}")
-                list(APPEND result "${filename}")
-            endif()
-        endforeach()
-    endforeach()
-    set(${output} ${result} PARENT_SCOPE)
-endfunction()
-
-
 # Can be compiled standalone
 IF(NOT AT_INSTALL_BIN_DIR OR NOT AT_INSTALL_LIB_DIR OR NOT AT_INSTALL_INCLUDE_DIR OR NOT AT_INSTALL_SHARE_DIR)
   SET(AT_INSTALL_BIN_DIR "bin" CACHE PATH "AT install binary subdirectory")

diff --git a/aten/src/ATen/cuda/ATenCUDAGeneral.h b/aten/src/ATen/cuda/ATenCUDAGeneral.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #ifdef _WIN32
-# if defined(ATen_cuda_EXPORTS) || defined(caffe2_gpu_EXPORTS)
+# if defined(ATen_cuda_EXPORTS) || defined(caffe2_gpu_EXPORTS) || defined(CAFFE2_BUILD_MAIN_LIB)
 #  define AT_CUDA_API __declspec(dllexport)
 # else
 #  define AT_CUDA_API __declspec(dllimport)

diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
@@ -127,7 +127,7 @@ auto ConvParams::use_miopen(const at::Tensor& input) const -> bool {
 
 auto ConvParams::use_mkldnn(const at::Tensor& input) const -> bool {
 #if AT_MKLDNN_ENABLED()
-  return input.type().backend() == kCPU &&
+  return input.type().backend() == at::Backend::CPU &&
          input.type().scalarType() == kFloat && // only on CPU Float Tensors
          !is_dilated() && // doesn't support dilation
          !transposed && // or transposed tensors

diff --git a/aten/src/ATen/native/cuda/SpectralOps.cu b/aten/src/ATen/native/cuda/SpectralOps.cu
@@ -29,13 +29,16 @@ using namespace at::native::detail;
 // counting_iterator => index to fill
 struct cnt_to_dst_idx_functor : public thrust::unary_function<int64_t, int64_t>
 {
-  const int64_t last_dim_size;
-  const int64_t last_dim_start_slice;
-  const int64_t last_dim_to_fill_size;
+  int64_t last_dim_size;
+  int64_t last_dim_start_slice;
+  int64_t last_dim_to_fill_size;
 
   cnt_to_dst_idx_functor(int64_t last_dim_size, int64_t last_dim_start_slice) :
     last_dim_size(last_dim_size), last_dim_start_slice(last_dim_start_slice),
     last_dim_to_fill_size(last_dim_size - last_dim_start_slice) {}
+
+  __host__ __device__
+  cnt_to_dst_idx_functor & operator=(const cnt_to_dst_idx_functor&) = default;
 
   __host__ __device__ __forceinline__
   int64_t operator()(const int64_t& i) const

diff --git a/aten/src/ATen/native/miopen/BatchNorm.cpp → ...c/ATen/native/miopen/BatchNorm_miopen.cpp b/aten/src/ATen/native/miopen/BatchNorm.cpp → ...c/ATen/native/miopen/BatchNorm_miopen.cpp
diff --git a/aten/src/ATen/native/miopen/Conv.cpp → aten/src/ATen/native/miopen/Conv_miopen.cpp b/aten/src/ATen/native/miopen/Conv.cpp → aten/src/ATen/native/miopen/Conv_miopen.cpp
diff --git a/aten/src/THC/THCBlas.cu b/aten/src/THC/THCBlas.cu
@@ -514,7 +514,7 @@ void THCudaBlas_Dgetrf(THCState *state, int n, double **a, int lda, int *pivot,
   THCublasCheck(cublasDgetrfBatched(handle, n, a, lda, pivot, info, batchSize));
 }
 
-THC_API void THCudaBlas_Sgetrs(THCState *state, char transa, int n, int nrhs, const float **a, int lda, int *pivot, float **b, int ldb, int *info, int batchSize)
+void THCudaBlas_Sgetrs(THCState *state, char transa, int n, int nrhs, const float **a, int lda, int *pivot, float **b, int ldb, int *info, int batchSize)
 {
   if( (n >= INT_MAX) || (nrhs >= INT_MAX) || (lda >= INT_MAX) || (ldb >= INT_MAX) || (batchSize >= INT_MAX) )
   {
@@ -531,7 +531,7 @@ THC_API void THCudaBlas_Sgetrs(THCState *state, char transa, int n, int nrhs, co
 }
 
 
-THC_API void THCudaBlas_Dgetrs(THCState *state, char transa, int n, int nrhs, const double **a, int lda, int *pivot, double **b, int ldb, int *info, int batchSize)
+void THCudaBlas_Dgetrs(THCState *state, char transa, int n, int nrhs, const double **a, int lda, int *pivot, double **b, int ldb, int *info, int batchSize)
 {
   if( (n >= INT_MAX) || (nrhs >= INT_MAX) || (lda >= INT_MAX) || (ldb >= INT_MAX) || (batchSize >= INT_MAX) )
   {

diff --git a/aten/src/THC/THCGeneral.h.in b/aten/src/THC/THCGeneral.h.in
@@ -23,7 +23,7 @@
 #endif
 
 #ifdef _WIN32
-# if defined(ATen_cuda_EXPORTS) || defined(caffe2_gpu_EXPORTS)
+# if defined(ATen_cuda_EXPORTS) || defined(caffe2_gpu_EXPORTS) || defined(CAFFE2_BUILD_MAIN_LIB)
 #  define THC_API THC_EXTERNC __declspec(dllexport)
 #  define THC_CLASS __declspec(dllexport)
 # else

diff --git a/aten/src/THC/THCSleep.cu b/aten/src/THC/THCSleep.cu
@@ -12,7 +12,7 @@ __global__ void spin_kernel(int64_t cycles)
   }
 }
 
-THC_API void THC_sleep(THCState* state, int64_t cycles)
+void THC_sleep(THCState* state, int64_t cycles)
 {
   dim3 grid(1);
   dim3 block(1);

diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
@@ -7,7 +7,7 @@ include(../cmake/Codegen.cmake)
 add_subdirectory(utils)
 
 # ---[ ATen build
-if(BUILD_ATEN)
+if (NOT BUILD_ATEN_MOBILE)
   set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE})
   set(CMAKE_POSITION_INDEPENDENT_CODE ON)
   set(AT_LINK_STYLE INTERFACE)
@@ -49,7 +49,7 @@ if(BUILD_ATEN)
 
   IF(USE_ROCM)
     # Set the HIP Variables
-    set(Caffe2_HIP_SRCS ${ATen_CUDA_SRCS})
+    set(Caffe2_HIP_SRCS ${Caffe2_HIP_SRCS} ${ATen_CUDA_SRCS})
     set(Caffe2_HIP_INCLUDES ${Caffe2_HIP_INCLUDES} ${Caffe2_GPU_INCLUDE})
   ENDIF(USE_ROCM)
 else()
@@ -340,6 +340,12 @@ if(USE_CUDA)
       target_compile_options(caffe2_gpu PUBLIC "-DAT_CORE_STATIC_WINDOWS=1")
   endif()
 
+  # NB: This must be target_compile_definitions, not target_compile_options,
+  # as the latter is not respected by nvcc
+  if (MSVC)
+      target_compile_definitions(caffe2_gpu PRIVATE "-DCAFFE2_BUILD_MAIN_LIB")
+  endif()
+
   # Set standard properties on the target
   aten_set_target_props(caffe2_gpu)
 
@@ -351,21 +357,19 @@ endif()
 # ---[ Caffe2 HIP sources.
 if(USE_ROCM)
   # Call again since Caffe2_HIP_INCLUDES is extended with ATen include dirs.
-  if(BUILD_ATEN)
-    # Get Compile Definitions from the directory (FindHIP.cmake bug)
-    get_directory_property(MY_DEFINITIONS COMPILE_DEFINITIONS)
-    if(MY_DEFINITIONS)
-      foreach(_item ${MY_DEFINITIONS})
-        LIST(APPEND HIP_HCC_FLAGS "-D${_item}")
-      endforeach()
-    endif()
-
-    # Call again since Caffe2_HIP_INCLUDES is extended with ATen include dirs.
-    hip_include_directories(${Caffe2_HIP_INCLUDES})
+  # Get Compile Definitions from the directory (FindHIP.CMake bug)
+  get_directory_property(MY_DEFINITIONS COMPILE_DEFINITIONS)
+  if(MY_DEFINITIONS)
+    foreach(_item ${MY_DEFINITIONS})
+      LIST(APPEND HIP_HCC_FLAGS "-D${_item}")
+    endforeach()
   endif()
-  IF(BUILD_CAFFE2)
-     set_source_files_properties(${Caffe2_HIP_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
-  ENDIF()
+
+  # Call again since Caffe2_HIP_INCLUDES is extended with ATen include dirs.
+  hip_include_directories(${Caffe2_HIP_INCLUDES})
+
+  filter_list(__caffe2_hip_srcs_cpp Caffe2_HIP_SRCS "\\.(cc|cpp|cu)$")
+  set_source_files_properties(${__caffe2_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
 
   # FindHIP.CMake checks if the SHARED flag is set and adds extra logic accordingly.
   hip_add_library(caffe2_hip ${Caffe2_HIP_SRCS})
@@ -444,7 +448,7 @@ if(BUILD_CAFFE2)
   set(__aten_test_dir "test/aten")
 endif()
 # Todo - Set up ATen tests for ROCm in an upcoming PR
-if(BUILD_ATEN AND NOT USE_ROCM)
+if(NOT USE_ROCM)
   foreach(test_src ${ATen_CPU_TEST_SRCS})
     get_filename_component(test_name ${test_src} NAME_WE)
     add_executable(${test_name} "${test_src}")

diff --git a/caffe2/README.md b/caffe2/README.md
@@ -6,7 +6,7 @@ Caffe2 is a lightweight, modular, and scalable deep learning framework. Building
 
 ## Questions and Feedback
 
-Please use Github issues (https://github.com/caffe2/caffe2/issues) to ask questions, report bugs, and request new features.
+Please use Github issues (https://github.com/pytorch/pytorch/issues) to ask questions, report bugs, and request new features.
 
 ### Further Resources on [Caffe2.ai](http://caffe2.ai)
 

diff --git a/caffe2/contrib/aten/CMakeLists.txt b/caffe2/contrib/aten/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(BUILD_ATEN)
+if(NOT BUILD_ATEN_MOBILE)
   # Add source generated by Codegen.cmake and pass to parent
   list(APPEND Caffe2_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/aten_op.cc)
   list(APPEND Caffe2_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/aten_op_cuda.cc)

diff --git a/caffe2/core/common.h b/caffe2/core/common.h
@@ -94,19 +94,6 @@ using std::vector;
 #define CAFFE2_NORETURN __attribute__((noreturn))
 #endif
 
-/**
- * Macro for marking functions as having public visibility.
- * Ported from folly/CPortability.h
- */
-#ifndef __GNUC_PREREQ
-#if defined __GNUC__ && defined __GNUC_MINOR__
-#define __GNUC_PREREQ(maj, min) \
-  ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
-#else
-#define __GNUC_PREREQ(maj, min) 0
-#endif
-#endif
-
 // Defines CAFFE2_EXPORT and CAFFE2_IMPORT. On Windows, this corresponds to
 // different declarations (dllexport and dllimport). On Linux/Mac, it just
 // resolves to the same "default visibility" setting.
@@ -120,11 +107,7 @@ using std::vector;
 #endif
 #else
 #if defined(__GNUC__)
-#if __GNUC_PREREQ(4, 9)
-#define CAFFE2_EXPORT [[gnu::visibility("default")]]
-#else
 #define CAFFE2_EXPORT __attribute__((__visibility__("default")))
-#endif
 #else
 #define CAFFE2_EXPORT
 #endif

diff --git a/caffe2/core/graph.h b/caffe2/core/graph.h
@@ -16,7 +16,7 @@ namespace transform {
 /**
  *  Graph representation of an operator.
  */
-struct Node {
+struct CAFFE2_API Node {
  public:
   // Empty constructor for resize
   Node() {}
@@ -45,7 +45,7 @@ struct Node {
 /**
  *  Graph representation of a Netdef.
  */
-struct Graph {
+struct CAFFE2_API Graph {
  public:
   /**
    * Given a subgraph, gets all of the parents of the subgraph, as well as
@@ -155,7 +155,7 @@ struct Graph {
 
 // Adds an operator def to a netdef.
 // Returns the ptr, if you want to add anything extra (such as device_option)
-OperatorDef* AddOp(
+CAFFE2_API OperatorDef* AddOp(
     NetDef* netdef_ptr,
     string op_type,
     std::vector<string> inputs,
@@ -168,12 +168,12 @@ OperatorDef* AddOp(
  * For example, if we wanted to match an operator to Conv or FC, we can give:
  * "Conv|FC" as the type() of that op.
  */
-bool MatchStrings(string p, string s);
+CAFFE2_API bool MatchStrings(string p, string s);
 
 /**
  * This ensures that each named arg that exists in the pattern exists in g_op,
  * is equal in value.
  */
-bool MatchArguments(const OperatorDef& p_op, const OperatorDef& g_op);
+CAFFE2_API bool MatchArguments(const OperatorDef& p_op, const OperatorDef& g_op);
 
 } // namespace caffe2
diff --git a/caffe2/core/net_simple.h b/caffe2/core/net_simple.h
@@ -16,7 +16,7 @@ namespace caffe2 {
 // This is the very basic structure you need to run a network - all it
 // does is simply to run everything in sequence. If you want more fancy control
 // such as a DAG-like execution, check out other better net implementations.
-class SimpleNet : public NetBase {
+class CAFFE2_API SimpleNet : public NetBase {
  public:
   SimpleNet(const std::shared_ptr<const NetDef>& net_def, Workspace* ws);
   bool SupportsAsync() override {

diff --git a/caffe2/core/nomnigraph/Representations/NeuralNet.cc b/caffe2/core/nomnigraph/Representations/NeuralNet.cc
@@ -199,12 +199,13 @@ NNNodeMatchCriteria matchAnyNode() {
       [](NNGraph::NodeRef /* unused */) { return true; }, "matchAnyNode");
 }
 
-NNMatchGraph::NodeRef operatorTree(
+NNMatchGraph::NodeRef operatorSubgraph(
     NNMatchGraph& g,
     const NNNodeMatchCriteria& root,
     const std::vector<NNMatchGraph::NodeRef>& childrenCriteria,
     int count) {
-  return tree(g, matchAnyNode(), {tree(g, root, childrenCriteria)}, count);
+  return subgraph(
+      g, matchAnyNode(), {subgraph(g, root, childrenCriteria)}, count);
 }
 
 } // namespace nn

diff --git a/caffe2/core/nomnigraph/include/nomnigraph/Graph/Graph.h b/caffe2/core/nomnigraph/include/nomnigraph/Graph/Graph.h
@@ -412,6 +412,10 @@ class Graph {
     return result;
   }
 
+  size_t getEdgesCount() const {
+    return (size_t)edges_.size();
+  }
+
  private:
   std::list<Node<T, U...>> nodes_;
   std::list<Edge<T, U...>> edges_;