Skip to content

Commit f98ba59

Browse files
ezyangpytorchmergebot
authored andcommitted
Use CapturedTraceback symbolizer for C++ exceptions from Python library (pytorch#113207)
This is the cheap and cheerful implementation, which is only enabled on TORCH_SHOW_CPP_STACKTRACES, because it *eagerly* symbolizes immediately at exception throw time, even if the exception will end up getting caught. It would be better to do this lazily and only symbolize when we try to print the exception, but that requires a more involved refactor of c10::Error that I don't feel like doing. Compare the output before: ``` frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0x95 (0x7fa21b99d975 in /data/users/ezyang/c/pytorch/torch/lib/libc10.so) frame #1: c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const + 0x8d (0x7fa21b951269 in /data/users/ezyang/c/pytorch/torch/lib/libc10.so) frame #2: c10::TensorImpl::sizes_custom() const + 0x9f (0x7fa21b9770df in /data/users/ezyang/c/pytorch/torch/lib/libc10.so) frame #3: at::meta::structured_mm::meta(at::Tensor const&, at::Tensor const&) + 0x31e (0x7fa20a202a8e in /data/users/ezyang/c/pytorch/torch/lib/libtorch_cpu.so) frame #4: <unknown function> + 0x29f34de (0x7fa20b5f34de in /data/users/ezyang/c/pytorch/torch/lib/libtorch_cpu.so) frame #5: <unknown function> + 0x2a1fd8e (0x7fa20b61fd8e in /data/users/ezyang/c/pytorch/torch/lib/libtorch_cpu.so) frame #6: <unknown function> + 0x6b907b (0x7fa2142b907b in /data/users/ezyang/c/pytorch/torch/lib/libtorch_python.so) frame #7: <unknown function> + 0x6b6175 (0x7fa2142b6175 in /data/users/ezyang/c/pytorch/torch/lib/libtorch_python.so) ``` and after: ``` #4 c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) from ??:0 #5 c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const from ??:0 #6 c10::TensorImpl::sizes_custom() const [clone .localalias] from TensorImpl.cpp:0 #7 at::meta::structured_mm::meta(at::Tensor const&, at::Tensor const&) from ??:0 #8 at::(anonymous namespace)::wrapper_Meta_mm_out_out(at::Tensor const&, at::Tensor const&, at::Tensor&) from RegisterMeta.cpp:0 #9 c10::impl::make_boxed_from_unboxed_functor<c10::impl::detail::WrapFunctionIntoFunctor_<c10::CompileTimeFunctionPointer<at::Tensor& (at::Tensor const&, at::Tensor const&, at::Tensor&), &at::(anonymous namespace)::wrapper_Meta_mm_out_out>, at::Tensor&, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, at::Tensor&> >, false>::call(c10::OperatorKernel*, c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) from RegisterMeta.cpp:0 ``` Signed-off-by: Edward Z. Yang <[email protected]> Pull Request resolved: pytorch#113207 Approved by: https://github.com/Skylion007
1 parent e6eab49 commit f98ba59

File tree

3 files changed

+57
-1
lines changed

3 files changed

+57
-1
lines changed

torch/csrc/Module.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@
8585
#include <torch/csrc/utils/tensor_qschemes.h>
8686
#include <torch/csrc/utils/verbose.h>
8787

88+
#include <c10/util/Logging.h>
89+
#include <torch/csrc/profiler/combined_traceback.h>
90+
#include <sstream>
91+
8892
#ifdef USE_DISTRIBUTED
8993
#ifdef USE_C10D
9094
#include <torch/csrc/distributed/autograd/python_autograd.h>
@@ -141,6 +145,34 @@ static PyObject* THPModule_initExtension(
141145
PyObject* _unused,
142146
PyObject* shm_manager_path) {
143147
HANDLE_TH_ERRORS
148+
#if !defined(FBCODE_CAFFE2)
149+
if (torch::get_cpp_stacktraces_enabled() && !torch::get_disable_addr2line()) {
150+
c10::SetStackTraceFetcher([]() -> std::string {
151+
auto tb = torch::CapturedTraceback::gather(false, false, true);
152+
LOG(WARNING)
153+
<< "symbolizing C++ stack trace for exception; if this hangs, rerun with TORCH_DISABLE_ADDR2LINE=1..."
154+
<< std::endl;
155+
auto s_tbs = torch::symbolize({tb.get()});
156+
std::stringstream oss;
157+
oss << "C++ CapturedTraceback:" << std::endl;
158+
const auto& s_tb = s_tbs.tracebacks.at(0);
159+
for (auto idx : c10::irange(s_tb.size())) {
160+
// Skip the first few frames:
161+
// #1 torch::CapturedTraceback::gather(bool, bool, bool)
162+
// #2 THPModule_initExtension
163+
// #3 THPModule_initExtension(_object*, _object*)::{lambda()#1}
164+
if (idx <= 3) {
165+
continue;
166+
}
167+
auto frame_id = s_tb[idx];
168+
const auto& frame = s_tbs.all_frames.at(frame_id);
169+
oss << "#" << idx << " " << frame.funcname << " from " << frame.filename
170+
<< ":" << frame.lineno << std::endl;
171+
}
172+
return oss.str();
173+
});
174+
}
175+
#endif
144176
if (!THPUtils_checkString(shm_manager_path)) {
145177
THPUtils_setError(
146178
"initialization error - expected bytes/string object as shm_manager_path!");

torch/csrc/utils/cpp_stacktraces.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,33 @@ bool compute_cpp_stack_traces_enabled() {
2323
}
2424
return false;
2525
}
26+
27+
bool compute_disable_addr2line() {
28+
auto envar = std::getenv("TORCH_DISABLE_ADDR2LINE");
29+
if (envar) {
30+
if (strcmp(envar, "0") == 0) {
31+
return false;
32+
}
33+
if (strcmp(envar, "1") == 0) {
34+
return true;
35+
}
36+
TORCH_WARN(
37+
"ignoring invalid value for TORCH_DISABLE_ADDR2LINE: ",
38+
envar,
39+
" valid values are 0 or 1.");
40+
}
41+
return false;
42+
}
2643
} // namespace
2744

2845
bool get_cpp_stacktraces_enabled() {
2946
static bool enabled = compute_cpp_stack_traces_enabled();
3047
return enabled;
3148
}
49+
50+
bool get_disable_addr2line() {
51+
static bool disabled = compute_disable_addr2line();
52+
return disabled;
53+
}
54+
3255
} // namespace torch

torch/csrc/utils/cpp_stacktraces.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44

55
namespace torch {
66
TORCH_API bool get_cpp_stacktraces_enabled();
7-
}
7+
TORCH_API bool get_disable_addr2line();
8+
} // namespace torch

0 commit comments

Comments
 (0)