Skip to content
This repository was archived by the owner on Jul 1, 2024. It is now read-only.

Commit d705344

Browse files
vreisfacebook-github-bot
authored andcommitted
Reduce data logged to tensorboard
Summary: We were logging learning rate and loss for every single step, which makes tensorboard too slow to load in long training runs. Log every 10th step, which should be enough for all cases: we always log at the end of every phase as well. Differential Revision: D20441202 fbshipit-source-id: 6516dee931dcc114b1ccd3074bf4f0fbbd61719f
1 parent 9f405b2 commit d705344

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

classy_vision/hooks/tensorboard_plot_hook.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,15 @@ def __init__(self, tb_writer) -> None:
5252
self.tb_writer = tb_writer
5353
self.learning_rates: Optional[List[float]] = None
5454
self.wall_times: Optional[List[float]] = None
55-
self.num_steps_global: Optional[List[int]] = None
55+
self.num_updates: Optional[List[int]] = None
56+
self.log_period = 10
5657

5758
def on_phase_start(self, task: "tasks.ClassyTask") -> None:
5859
"""Initialize losses and learning_rates."""
5960
self.learning_rates = []
6061
self.wall_times = []
61-
self.num_steps_global = []
62+
self.num_updates = []
63+
self.step_idx = 0
6264

6365
def on_step(self, task: "tasks.ClassyTask") -> None:
6466
"""Store the observed learning rates."""
@@ -70,11 +72,14 @@ def on_step(self, task: "tasks.ClassyTask") -> None:
7072
# Only need to log the average loss during the test phase
7173
return
7274

73-
learning_rate_val = task.optimizer.parameters.lr
75+
if self.step_idx % self.log_period == 0:
76+
learning_rate_val = task.optimizer.parameters.lr
7477

75-
self.learning_rates.append(learning_rate_val)
76-
self.wall_times.append(time.time())
77-
self.num_steps_global.append(task.num_updates)
78+
self.learning_rates.append(learning_rate_val)
79+
self.wall_times.append(time.time())
80+
self.num_updates.append(task.num_updates)
81+
82+
self.step_idx += 1
7883

7984
def on_phase_end(self, task: "tasks.ClassyTask") -> None:
8085
"""Add the losses and learning rates to tensorboard."""
@@ -97,7 +102,7 @@ def on_phase_end(self, task: "tasks.ClassyTask") -> None:
97102

98103
if task.train:
99104
for loss, learning_rate, global_step, wall_time in zip(
100-
task.losses, self.learning_rates, self.num_steps_global, self.wall_times
105+
task.losses, self.learning_rates, self.num_updates, self.wall_times
101106
):
102107
loss /= task.get_batchsize_per_replica()
103108
self.tb_writer.add_scalar(

test/manual/hooks_tensorboard_plot_hook_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def flush(self):
146146

147147
writer = DummySummaryWriter()
148148
hook = TensorboardPlotHook(writer)
149+
hook.log_period = 1
149150
task.set_hooks([hook])
150151
task.optimizer.param_schedulers["lr"] = mock_lr_scheduler
151152

0 commit comments

Comments
 (0)