Open
Description
🐛 Bug
The application crashes during deconstruction when using Cuda.
To Reproduce
Steps to reproduce the behavior:
#include <torch/torch.h>
int main(int, const char**) {
const bool use_cuda = true;
torch::nn::Conv2d cnv(torch::nn::Conv2dOptions(3, 3, 1));
torch::Tensor x = torch::randn({1,3, 50, 50});
if(use_cuda){
cnv->to(at::kCUDA);
x = x.to(at::kCUDA);
}
x = cnv->forward(x);
std::cout << "This line will always be executed, but the destructor will cause a crash iff use_cuda==true." << std::endl;
}
Thread 4 (Thread 0x7fffb9416700 (LWP 21522)):
#0 0x00007fffbb042f85 in futex_abstimed_wait_cancelable (private=<optimised out>, abstime=0x7fffb940f930, expected=0, futex_word=0x7fffb0000b48) at ../sysdeps/unix/sysv/linux/futex-internal.h:205
__ret = -516
oldtype = 0
err = <optimised out>
oldtype = <optimised out>
err = <optimised out>
__ret = <optimised out>
resultvar = <optimised out>
__arg6 = <optimised out>
__arg5 = <optimised out>
__arg4 = <optimised out>
__arg3 = <optimised out>
__arg2 = <optimised out>
__arg1 = <optimised out>
_a6 = <optimised out>
_a5 = <optimised out>
_a4 = <optimised out>
_a3 = <optimised out>
_a2 = <optimised out>
_a1 = <optimised out>
#1 __pthread_cond_wait_common (abstime=0x7fffb940f930, mutex=0x5555561fb178, cond=0x7fffb0000b20) at pthread_cond_wait.c:539
spin = 0
buffer = {__routine = 0x7fffbb042690 <__condvar_cleanup_waiting>, __arg = 0x7fffb940f8c0, __canceltype = -1342174448, __prev = 0x0}
cbuffer = {wseq = 8, cond = 0x7fffb0000b20, mutex = 0x5555561fb178, private = 0}
err = <optimised out>
g = 0
flags = <optimised out>
g1_start = <optimised out>
maxspin = 0
signals = <optimised out>
result = 0
wseq = <optimised out>
seq = 4
private = <optimised out>
maxspin = <optimised out>
err = <optimised out>
result = <optimised out>
wseq = <optimised out>
g = <optimised out>
seq = <optimised out>
flags = <optimised out>
private = <optimised out>
signals = <optimised out>
g1_start = <optimised out>
spin = <optimised out>
buffer = <optimised out>
cbuffer = <optimised out>
rt = <optimised out>
s = <optimised out>
#2 __pthread_cond_timedwait (cond=0x7fffb0000b20, mutex=0x5555561fb178, abstime=0x7fffb940f930) at pthread_cond_wait.c:667
No locals.
#3 0x00007ffff6463ce7 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#4 0x00007ffff641c4b7 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#5 0x00007ffff6463110 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#6 0x00007fffbb03c6db in start_thread (arg=0x7fffb9416700) at pthread_create.c:463
pd = 0x7fffb9416700
now = <optimised out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140736301459200, 377580923374187937, 140736301431488, 0, 93825007060912, 140737488342240, -377707152272345695, -377710973598124639}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimised out>
#7 0x00007fffbb9f788f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.
Thread 3 (Thread 0x7fffb9c17700 (LWP 21521)):
#0 0x00007fffbb9eabf9 in __GI___poll (fds=0x7fffac000b80, nfds=8, timeout=100) at ../sysdeps/unix/sysv/linux/poll.c:29
resultvar = 18446744073709551100
sc_cancel_oldtype = 0
sc_ret = <optimised out>
#1 0x00007ffff646169b in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#2 0x00007ffff64c6a6f in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#3 0x00007ffff6463110 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#4 0x00007fffbb03c6db in start_thread (arg=0x7fffb9c17700) at pthread_create.c:463
pd = 0x7fffb9c17700
now = <optimised out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140736309851904, 377580923374187937, 140736309824192, 0, 93825005575072, 140737488342160, -377708239435942495, -377710973598124639}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimised out>
#5 0x00007fffbb9f788f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.
Thread 2 (Thread 0x7fffba418700 (LWP 21520)):
#0 0x00007fffbb9f9237 in accept4 (fd=11, addr=..., addr_len=0x7fffba411908, flags=524288) at ../sysdeps/unix/sysv/linux/accept4.c:32
resultvar = 18446744073709551104
sc_cancel_oldtype = 0
sc_ret = <optimised out>
#1 0x00007ffff64624a6 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#2 0x00007ffff6456a3d in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#3 0x00007ffff6463110 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
No symbol table info available.
#4 0x00007fffbb03c6db in start_thread (arg=0x7fffba418700) at pthread_create.c:463
pd = 0x7fffba418700
now = <optimised out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140736318244608, 377580923374187937, 140736318216896, 0, 93825005446752, 140737488346048, -377709339484441183, -377710973598124639}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimised out>
#5 0x00007fffbb9f788f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.
Thread 1 (Thread 0x7ffff7fbdc40 (LWP 21513)):
#0 0x00007ffff45ebfa7 in ?? () from /usr/local/cuda/lib64/libcudart.so.9.0
No symbol table info available.
#1 0x00007ffff45de11e in ?? () from /usr/local/cuda/lib64/libcudart.so.9.0
No symbol table info available.
#2 0x00007ffff45cac0a in ?? () from /usr/local/cuda/lib64/libcudart.so.9.0
No symbol table info available.
#3 0x00007ffff45f0f5c in cudaDeviceSynchronize () from /usr/local/cuda/lib64/libcudart.so.9.0
No symbol table info available.
#4 0x00007fffc19d4f94 in cudnnDestroy () from /home/martin/libraries/libtorch/lib/libcaffe2_gpu.so
No symbol table info available.
#5 0x00007fffbd896a91 in std::unordered_map<int, at::native::(anonymous namespace)::Handle, std::hash<int>, std::equal_to<int>, std::allocator<std::pair<int const, at::native::(anonymous namespace)::Handle> > >::~unordered_map() () from /home/martin/libraries/libtorch/lib/libcaffe2_gpu.so
No symbol table info available.
#6 0x00007fffbb919615 in __cxa_finalize (d=0x7fffe746f680) at cxa_finalize.c:83
check = 3473
cxafn = <optimised out>
cxaarg = <optimised out>
f = 0x55555598fa20
funcs = 0x55555598f7d0
#7 0x00007fffbd6a4f13 in __do_global_dtors_aux () from /home/martin/libraries/libtorch/lib/libcaffe2_gpu.so
No symbol table info available.
#8 0x00007fffffffe440 in ?? ()
No symbol table info available.
#9 0x00007ffff7de5b73 in _dl_fini () at dl-fini.c:138
array = 0x7ffff7ffd068 <_rtld_global+8>
i = <optimised out>
l = 0x7ffff7fc6000
maps = 0x7fffffffe330
i = <optimised out>
l = <optimised out>
nmaps = <optimised out>
nloaded = <optimised out>
ns = 0
do_audit = <optimised out>
__PRETTY_FUNCTION__ = "_dl_fini"
Backtrace stopped: frame did not save the PC
Expected behavior
No crash (same behavior when using CPU and GPU).
Environment
Output of collect_env.py:
Collecting environment information...
PyTorch version: N/A
Is debug build: N/A
CUDA used to build PyTorch: N/A
OS: Ubuntu 18.04.2 LTS
GCC version: (Ubuntu 6.5.0-2ubuntu1~18.04) 6.5.0 20181026
CMake version: version 3.10.2
Python version: 2.7
Is CUDA available: N/A
CUDA runtime version: 9.0.176
GPU models and configuration:
GPU 0: GeForce GTX TITAN X
GPU 1: GeForce GTX TITAN X
Nvidia driver version: 390.77
cuDNN version: Could not collect
Versions of relevant libraries:
[pip] numpy==1.16.0
[pip] torch==1.0.0
[pip] torchvision==0.2.1
[conda] Could not collect
Further:
$cat /usr/local/cuda/version.txt
CUDA Version 9.0.176
CUDA Patch Version 9.0.176.1
CUDA Patch Version 9.0.176.2
CUDA Patch Version 9.0.176.3
$cat /usr/local/cuda/include/cudnn.h | grep CUDNN_MAJOR -A 2
#define CUDNN_MAJOR 7
#define CUDNN_MINOR 4
#define CUDNN_PATCHLEVEL 2