diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 6dd26948dff03e..2194310a46d522 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -282,12 +282,26 @@ PyObject *THPModule_hasDistributed(PyObject *_unused) #endif } +void DLPack_Capsule_Destructor(PyObject* data) { + HANDLE_TH_ERRORS + DLManagedTensor * dlMTensor = (DLManagedTensor *)PyCapsule_GetPointer(data, "dltensor"); + if (dlMTensor) { + // the dlMTensor has not been consumed, call deleter ourselves + dlMTensor->deleter(const_cast(dlMTensor)); + } else { + // the dlMTensor has been consumed + // PyCapsule_GetPointer has set an error indicator + PyErr_Clear(); + } + END_HANDLE_TH_ERRORS_RET() +} + PyObject *THPModule_toDLPack(PyObject *_unused, PyObject *data) { HANDLE_TH_ERRORS THPUtils_assert(THPVariable_Check(data), "data must be a Tensor"); DLManagedTensor* dlMTensor = at::toDLPack(THPVariable_UnpackData(data)); - return PyCapsule_New(dlMTensor, "dltensor", NULL); + return PyCapsule_New(dlMTensor, "dltensor", DLPack_Capsule_Destructor); END_HANDLE_TH_ERRORS } diff --git a/torch/nn/utils/clip_grad.py b/torch/nn/utils/clip_grad.py index a81415d75d3f9b..db808adcf70b29 100644 --- a/torch/nn/utils/clip_grad.py +++ b/torch/nn/utils/clip_grad.py @@ -29,12 +29,12 @@ def clip_grad_norm_(parameters, max_norm, norm_type=2): total_norm = 0 for p in parameters: param_norm = p.grad.data.norm(norm_type) - total_norm += param_norm ** norm_type + total_norm += param_norm.item() ** norm_type total_norm = total_norm ** (1. / norm_type) clip_coef = max_norm / (total_norm + 1e-6) if clip_coef < 1: for p in parameters: - p.grad.data.mul_(clip_coef.item()) + p.grad.data.mul_(clip_coef) return total_norm