Skip to content

merge from upstream #16

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aten/src/ATen/THLongStorageView.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class THLongStorageView {
storage.scalar_type = at::CTypeToScalarType<th::from_type<int64_t>>::to();
storage.refcount = 0;
storage.flag = 0;
storage.allocator = nullptr;
storage.allocatorVoidPtr = nullptr;
storage.allocatorContext = nullptr;
}
private:
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/cuda/PinnedMemoryAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <ATen/Config.h>

#include <THC/THC.h>
#include <THC/THCGeneral.hpp>

#include <stdexcept>

Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/cuda/detail/CUDAHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <ATen/detail/CUDAHooksInterface.h>

#include "THC/THC.h"
#include <THC/THCGeneral.hpp>

#if AT_CUDNN_ENABLED()
#include "ATen/cudnn/cudnn-wrapper.h"
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/templates/StorageDerived.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace at {
${Storage}::${Storage}(Context* context):
storage(${THStorage}_new(${state})), context(context) {}

${Storage}::${Storage}(Context* context, ${THStorage}* storage):
${Storage}::${Storage}(Context* context, THStorage* storage):
storage(storage), context(context) {}

${Storage}::${Storage}(Context* context, size_t storage_size)
Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/templates/StorageDerived.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct Allocator;
struct ${Storage} final : public Storage {
public:
explicit ${Storage}(Context* context);
${Storage}(Context* context, ${THStorage} *wrapped);
${Storage}(Context* context, THStorage *wrapped);
${Storage}(Context* context, size_t size);
${Storage}(Context* context, size_t size, Allocator* allocator);
${Storage}(Context* context,
Expand Down Expand Up @@ -50,7 +50,7 @@ struct ${Storage} final : public Storage {

protected:
friend struct ${Type};
${THStorage} *storage;
THStorage *storage;
Context* context;
};

Expand Down
33 changes: 32 additions & 1 deletion aten/src/TH/THStorage.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include <climits>

#include "THStorage.hpp"

#include "generic/THStorage.cpp"
Expand All @@ -13,6 +15,8 @@
#include "THGenerateHalfType.h"

void THStorage_free(THStorage *storage) {
AT_ASSERT(storage->backend == at::kCPU);

if(!storage)
return;

Expand All @@ -21,7 +25,7 @@ void THStorage_free(THStorage *storage) {
if(--storage->refcount == 0)
{
if(storage->flag & TH_STORAGE_FREEMEM) {
storage->allocator->free(storage->allocatorContext, storage->data_ptr);
static_cast<THAllocator*>(storage->allocatorVoidPtr)->free(storage->allocatorContext, storage->data_ptr);
}
if(storage->flag & TH_STORAGE_VIEW) {
THStorage_free(storage->view);
Expand Down Expand Up @@ -65,3 +69,30 @@ THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElemen
}
return copy;
}

THStorage* THStorage_new(at::ScalarType scalar_type)
{
return THStorage_newWithSize(scalar_type, 0);
}

THStorage* THStorage_newWithSize(at::ScalarType scalar_type, ptrdiff_t size)
{
return THStorage_newWithAllocator(scalar_type, size, &THDefaultAllocator, nullptr);
}

THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size,
THAllocator *allocator,
void *allocatorContext)
{
THStorage *storage = static_cast<THStorage*>(THAlloc(sizeof(THStorage)));
storage->backend = at::kCPU;
storage->scalar_type = scalar_type;
storage->data_ptr = allocator->malloc(allocatorContext, at::elementSize(scalar_type)*size);
storage->size = size;
new (&storage->refcount) std::atomic<int>(1);
storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
storage->allocatorVoidPtr = allocator;
storage->allocatorContext = allocatorContext;
storage->device = INT_MIN; // device is not meaningful on CPU
return storage;
}
14 changes: 11 additions & 3 deletions aten/src/TH/THStorage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,23 @@

#include "THStorage.h"

#include "ATen/ScalarType.h"
#include "ATen/ScalarTypeUtils.h"
#include <ATen/ScalarType.h>
#include <ATen/ScalarTypeUtils.h>
#include "THTypeConversion.hpp"
#include <atomic>

typedef struct THStorage
{
at::Backend backend; // kCPU or kCUDA only
at::ScalarType scalar_type;
void *data_ptr;
ptrdiff_t size;
std::atomic<int> refcount;
char flag;
THAllocator *allocator;
void *allocatorVoidPtr; // Either THDeviceAllocator or THCDeviceAllocator
void *allocatorContext;
struct THStorage *view;
int device;

template <typename T>
inline T * data() const {
Expand All @@ -36,3 +38,9 @@ typedef struct THStorage
return static_cast<T*>(this->data_ptr);
}
} THStorage;

TH_API THStorage* THStorage_new(at::ScalarType scalar_type);
TH_API THStorage* THStorage_newWithSize(at::ScalarType scalar_type, ptrdiff_t size);
TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size,
THAllocator *allocator,
void *allocatorContext);
37 changes: 19 additions & 18 deletions aten/src/TH/generic/THStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,22 @@ size_t THStorage_(elementSize)()

THStorage* THStorage_(new)(void)
{
return THStorage_(newWithSize)(0);
return THStorage_new(at::CTypeToScalarType<th::from_type<real>>::to());
}

THStorage* THStorage_(newWithSize)(ptrdiff_t size)
{
return THStorage_(newWithAllocator)(size, &THDefaultAllocator, NULL);
return THStorage_newWithSize(at::CTypeToScalarType<th::from_type<real>>::to(), size);
}

THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,
THAllocator *allocator,
void *allocatorContext)
{
THStorage *storage = static_cast<THStorage*>(THAlloc(sizeof(THStorage)));
storage->scalar_type = at::CTypeToScalarType<th::from_type<real>>::to();
storage->data_ptr = allocator->malloc(allocatorContext, sizeof(real)*size);
storage->size = size;
new (&storage->refcount) std::atomic<int>(1);
storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
storage->allocator = allocator;
storage->allocatorContext = allocatorContext;
return storage;
return THStorage_newWithAllocator(at::CTypeToScalarType<th::from_type<real>>::to(), size, allocator, allocatorContext);
}


THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags)
{
THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags);
Expand Down Expand Up @@ -142,28 +135,34 @@ THStorage* THStorage_(newWithDataAndAllocator)(real* data, ptrdiff_t size,
THAllocator* allocator,
void* allocatorContext) {
THStorage *storage = static_cast<THStorage*>(THAlloc(sizeof(THStorage)));
storage->backend = at::kCPU;
storage->scalar_type = at::CTypeToScalarType<th::from_type<real>>::to();
storage->data_ptr = data;
storage->size = size;
storage->refcount = 1;
storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
storage->allocator = allocator;
storage->allocatorVoidPtr = allocator;
storage->allocatorContext = allocatorContext;
storage->device = 0;
return storage;
}

void THStorage_(resize)(THStorage *storage, ptrdiff_t size)
{
AT_ASSERT(storage->backend == at::kCPU);

auto* th_allocator = static_cast<THAllocator*>(storage->allocatorVoidPtr);

if(storage->flag & TH_STORAGE_RESIZABLE)
{
if(storage->allocator->realloc == NULL) {
if(th_allocator->realloc == NULL) {
/* case when the allocator does not have a realloc defined */
real *old_data = THStorage_(data)(storage);
ptrdiff_t old_size = storage->size;
if (size == 0) {
storage->data_ptr = NULL;
} else {
storage->data_ptr = storage->allocator->malloc(
storage->data_ptr = th_allocator->malloc(
storage->allocatorContext,
sizeof(real)*size);
}
Expand All @@ -176,10 +175,10 @@ void THStorage_(resize)(THStorage *storage, ptrdiff_t size)
if (copy_size > 0) {
memcpy(THStorage_(data)(storage), old_data, sizeof(real)*copy_size);
}
storage->allocator->free(storage->allocatorContext, old_data);
th_allocator->free(storage->allocatorContext, old_data);
}
} else {
storage->data_ptr = storage->allocator->realloc(
storage->data_ptr = th_allocator->realloc(
storage->allocatorContext,
THStorage_(data)(storage),
sizeof(real)*size);
Expand Down Expand Up @@ -215,17 +214,19 @@ void THStorage_(swap)(THStorage *storage1, THStorage *storage2)
void *data_ptr;
ptrdiff_t size;
char flag;
THAllocator *allocator;
void *allocatorVoidPtr;
void *allocatorContext;
struct THStorage *view;
int device;

SWAP(data_ptr);
SWAP(size);
SWAP(flag);
// don't swap refcount!
SWAP(allocator);
SWAP(allocatorVoidPtr);
SWAP(allocatorContext);
SWAP(view);
SWAP(device);
#undef SWAP
}

Expand Down
1 change: 1 addition & 0 deletions aten/src/THC/THCGeneral.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "THCStream.h"
#include "THCThreadLocal.h"
#include "THCTensorRandom.h"
#include "THCGeneral.hpp"
#include <stdlib.h>
#include <stdint.h>

Expand Down
49 changes: 1 addition & 48 deletions aten/src/THC/THCGeneral.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
struct THCRNGState; /* Random number generator state. */
typedef struct THCStream THCStream;
typedef struct THCState THCState;
struct THCState;

typedef struct _THCDeviceAllocator {
cudaError_t (*malloc)( void*, void**, size_t, cudaStream_t);
Expand All @@ -70,54 +71,6 @@ typedef struct _THCCudaResourcesPerDevice {
size_t scratchSpacePerStream;
} THCCudaResourcesPerDevice;


/* Global state to be held in the cutorch table. */
struct THCState {
struct THCRNGState* rngState;
struct cudaDeviceProp* deviceProperties;
/* Set of all allocated resources. blasHandles and sparseHandles do not have
a default and must be explicitly initialized. We always initialize 1
blasHandle and 1 sparseHandle but we can use more.
*/
THCCudaResourcesPerDevice* resourcesPerDevice;
/* Captured number of devices upon startup; convenience for bounds checking */
int numDevices;
int numUserBlasHandles;
int numUserSparseHandles;

/* Allocator using cudaMallocHost. */
THAllocator* cudaHostAllocator;
THAllocator* cudaUVAAllocator;
THCDeviceAllocator* cudaDeviceAllocator;

/* Index of the current selected BLAS handle. The actual BLAS handle used
depends on the current device. */
THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle;
/* Index of the current selected sparse handle. The actual sparse handle used
depends on the current device. */
THCThreadLocal/*<int>*/ currentPerDeviceSparseHandle;
/* Array of thread locals containing the current stream for each device */
THCThreadLocal* currentStreams;

/* Table of enabled peer-to-peer access between directed pairs of GPUs.
If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */
int** p2pAccessEnabled;

/* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU
copies are allowed via p2p if p2p access is enabled at all for
the pair of GPUs in question, but if this flag is true, then
all cross-GPU access checks are disabled, allowing kernels to
directly access memory on another GPUs.
Note that p2p access must exist and be enabled for the pair of
GPUs in question. */
int p2pKernelAccessEnabled;

void (*cutorchGCFunction)(void *data);
void *cutorchGCData;
ptrdiff_t heapSoftmax;
ptrdiff_t heapDelta;
};

THC_API THCState* THCState_alloc(void);
THC_API void THCState_free(THCState* state);

Expand Down
50 changes: 50 additions & 0 deletions aten/src/THC/THCGeneral.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once

#include "THCGeneral.h"

/* Global state of THC. */
struct THCState {
struct THCRNGState* rngState;
struct cudaDeviceProp* deviceProperties;
/* Set of all allocated resources. blasHandles and sparseHandles do not have
a default and must be explicitly initialized. We always initialize 1
blasHandle and 1 sparseHandle but we can use more.
*/
THCCudaResourcesPerDevice* resourcesPerDevice;
/* Captured number of devices upon startup; convenience for bounds checking */
int numDevices;
int numUserBlasHandles;
int numUserSparseHandles;

/* Allocator using cudaMallocHost. */
THAllocator* cudaHostAllocator;
THAllocator* cudaUVAAllocator;
THCDeviceAllocator* cudaDeviceAllocator;

/* Index of the current selected BLAS handle. The actual BLAS handle used
depends on the current device. */
THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle;
/* Index of the current selected sparse handle. The actual sparse handle used
depends on the current device. */
THCThreadLocal/*<int>*/ currentPerDeviceSparseHandle;
/* Array of thread locals containing the current stream for each device */
THCThreadLocal* currentStreams;

/* Table of enabled peer-to-peer access between directed pairs of GPUs.
If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */
int** p2pAccessEnabled;

/* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU
copies are allowed via p2p if p2p access is enabled at all for
the pair of GPUs in question, but if this flag is true, then
all cross-GPU access checks are disabled, allowing kernels to
directly access memory on another GPUs.
Note that p2p access must exist and be enabled for the pair of
GPUs in question. */
int p2pKernelAccessEnabled;

void (*cutorchGCFunction)(void *data);
void *cutorchGCData;
ptrdiff_t heapSoftmax;
ptrdiff_t heapDelta;
};
Loading