Skip to content

Commit 314a502

Browse files
Revert "Reland "[C10] PG observability hooks. (pytorch#108815)" (pytorch#110907)"
This reverts commit 7678cd2. Reverted pytorch#110907 on behalf of https://github.com/huydhn due to Sorry for reverting this, but macos job in trunk starts failing after this https://hud.pytorch.org/pytorch/pytorch/commit/7678cd22af46c9df4fb47a409d3e8ad71a6127ea ([comment](pytorch#110907 (comment)))
1 parent 2edc75a commit 314a502

17 files changed

+29
-690
lines changed

build_variables.bzl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,6 @@ libtorch_distributed_base_sources = [
521521
"torch/csrc/distributed/c10d/Backend.cpp",
522522
"torch/csrc/distributed/c10d/FileStore.cpp",
523523
"torch/csrc/distributed/c10d/GlooDeviceFactory.cpp",
524-
"torch/csrc/distributed/c10d/Hooks.cpp",
525524
"torch/csrc/distributed/c10d/Ops.cpp",
526525
"torch/csrc/distributed/c10d/ParamCommsUtils.cpp",
527526
"torch/csrc/distributed/c10d/PrefixStore.cpp",

test/distributed/test_hooks.py

Lines changed: 0 additions & 270 deletions
This file was deleted.

torch/_C/_distributed_c10d.pyi

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@ _DEFAULT_FIRST_BUCKET_BYTES: int
1111
_DEFAULT_NO_TIMEOUT: timedelta
1212
_DEFAULT_PG_TIMEOUT: timedelta
1313

14-
class EventKind(Enum):
15-
START = ...
16-
END = ...
17-
1814
class BuiltinCommHookType(Enum):
1915
ALLREDUCE = ...
2016
FP16_COMPRESS = ...
@@ -24,8 +20,6 @@ def _register_builtin_comm_hook(
2420
reducer: Reducer,
2521
comm_hook_type: BuiltinCommHookType,
2622
): ...
27-
def _dequeue_c10d_event() -> Dict[str, object]: ...
28-
def _enable_event_collection(pipe_fs: int) -> None: ...
2923

3024
class GradBucket:
3125
def index(self) -> int: ...
Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,9 @@
11
#include <c10/util/Logging.h>
22
#include <fmt/format.h>
33
#include <torch/csrc/distributed/c10d/Backend.hpp>
4-
#include <torch/csrc/distributed/c10d/Hooks.hpp>
5-
#include <torch/csrc/distributed/c10d/logging.h>
64

75
namespace c10d {
86

9-
namespace {
10-
void commonEventinit(
11-
details::EventInfo& evt,
12-
const Backend& backend,
13-
const Work& work) {
14-
evt.timestamp =
15-
std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
16-
evt.pg_name = backend.getGroupName();
17-
evt.backend = backend.getBackendName();
18-
evt.sequence_number = work.getSequencenumber();
19-
evt.operation = c10d::opTypeToString(work.retrieveOpType());
20-
evt.drop_count = 0;
21-
}
22-
} // namespace
23-
247
Backend::Backend(int rank, int size)
258
: rank_(rank), size_(size), dist_debug_level_(debug_level()) {
269
C10_LOG_API_USAGE_ONCE("c10d.backend");
@@ -32,21 +15,4 @@ void Backend::init() {
3215
C10_LOG_API_USAGE_ONCE(fmt::format("c10d.backend_{}", getBackendName()));
3316
}
3417

35-
void Backend::emitCollectiveStart(const Work& work) {
36-
details::EventInfo evt;
37-
commonEventinit(evt, *this, work);
38-
39-
evt.event_kind = ::c10d::EventKind::CollectiveStart;
40-
details::enqueue_c10d_event(std::move(evt));
41-
}
42-
43-
void Backend::emitCollectiveEnd(const Work& work) {
44-
details::EventInfo evt;
45-
commonEventinit(evt, *this, work);
46-
47-
evt.event_kind = ::c10d::EventKind::CollectiveEnd;
48-
evt.duration_ms = work.getDuration();
49-
details::enqueue_c10d_event(std::move(evt));
50-
}
51-
5218
} // namespace c10d

torch/csrc/distributed/c10d/Backend.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,6 @@ class TORCH_API Backend : public torch::CustomClassHolder {
366366
// Implementations of this interface need to call this to setup
367367
// appropriate logging etc.
368368
void init();
369-
void emitCollectiveStart(const Work& work);
370-
void emitCollectiveEnd(const Work& work);
371369

372370
const int rank_;
373371
const int size_;

0 commit comments

Comments
 (0)