Skip to content

Commit 884182f

Browse files
committed
Add "status" to replace "running" and "queued" in output of exp show
fix: #7986 1. Add two new flags `--hide-queued` and `--hide-failed` to `exp show` 2. Allow `exp show` to show failed experiments. 3. Add unit test for the failed experiments shown. 4. Add name support for failed exp 5. Add error msg to the `exp show` output
1 parent 7393ebd commit 884182f

File tree

5 files changed

+168
-60
lines changed

5 files changed

+168
-60
lines changed

dvc/commands/experiments/show.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,8 @@ def _collect_rows(
9595

9696
exp = results.get("data", {})
9797

98-
if exp.get("running"):
99-
state = "Running"
100-
elif exp.get("queued"):
101-
state = "Queued"
102-
else:
98+
state = exp.get("status")
99+
if state == "Success":
103100
state = fill_value
104101

105102
is_baseline = rev == "baseline"
@@ -476,6 +473,8 @@ def run(self):
476473
all_branches=self.args.all_branches,
477474
all_tags=self.args.all_tags,
478475
all_commits=self.args.all_commits,
476+
hide_queued=self.args.hide_queued,
477+
hide_failed=self.args.hide_failed,
479478
revs=self.args.rev,
480479
num=self.args.num,
481480
sha_only=self.args.sha,
@@ -594,6 +593,18 @@ def add_parser(experiments_subparsers, parent_parser):
594593
default=False,
595594
help="Always show git commit SHAs instead of branch/tag names.",
596595
)
596+
experiments_show_parser.add_argument(
597+
"--hide-failed",
598+
action="store_true",
599+
default=False,
600+
help="Hide failed experiments in the table.",
601+
)
602+
experiments_show_parser.add_argument(
603+
"--hide-queued",
604+
action="store_true",
605+
default=False,
606+
help="Hide queued experiments in the table.",
607+
)
597608
experiments_show_parser.add_argument(
598609
"--json",
599610
"--show-json",

dvc/repo/experiments/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,8 @@ def get_exact_name(self, rev: str):
427427
pass
428428
if rev in self.stash_revs:
429429
return self.stash_revs[rev].name
430+
if rev in self.celery_queue.failed_stash.stash_revs:
431+
return self.celery_queue.failed_stash.stash_revs[rev].name
430432
return None
431433

432434
def get_running_exps(self, fetch_refs: bool = True) -> Dict[str, Any]:

dvc/repo/experiments/show.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import logging
22
from collections import OrderedDict, defaultdict
33
from datetime import datetime
4+
from enum import Enum
45
from itertools import chain
56
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
67

8+
from dvc.repo.experiments.queue.base import QueueDoneResult
79
from dvc.repo.metrics.show import _gather_metrics
810
from dvc.repo.params.show import _gather_params
911
from dvc.scm import iter_revs
@@ -17,11 +19,18 @@
1719
logger = logging.getLogger(__name__)
1820

1921

22+
class ExpStatus(Enum):
23+
Success = 0
24+
Queued = 1
25+
Running = 2
26+
Failed = 3
27+
28+
2029
@error_handler
2130
def _collect_experiment_commit(
22-
repo,
23-
exp_rev,
24-
stash=False,
31+
repo: "Repo",
32+
exp_rev: str,
33+
status: ExpStatus = ExpStatus.Success,
2534
sha_only=True,
2635
param_deps=False,
2736
running=None,
@@ -67,14 +76,19 @@ def _collect_experiment_commit(
6776
if not (out.is_metric or out.is_plot)
6877
}
6978

70-
res["queued"] = stash
71-
if running is not None and exp_rev in running:
72-
res["running"] = True
79+
res["status"] = status.name
80+
if status == ExpStatus.Running:
7381
res["executor"] = running[exp_rev].get("location")
7482
else:
75-
res["running"] = False
7683
res["executor"] = None
77-
if not stash:
84+
85+
if status == ExpStatus.Failed:
86+
res["error"] = {
87+
"msg": "Experiment run failed.",
88+
"type": "",
89+
}
90+
91+
if status not in {ExpStatus.Queued, ExpStatus.Failed}:
7892
vals = _gather_metrics(
7993
repo, targets=None, rev=rev, recursive=False, onerror=onerror
8094
)
@@ -97,16 +111,28 @@ def _collect_experiment_commit(
97111

98112

99113
def _collect_experiment_branch(
100-
res, repo, branch, baseline, onerror: Optional[Callable] = None, **kwargs
114+
res,
115+
repo,
116+
branch,
117+
baseline,
118+
onerror: Optional[Callable] = None,
119+
running=None,
120+
**kwargs
101121
):
102122
from dvc.scm import resolve_rev
103123

104124
exp_rev = resolve_rev(repo.scm, branch)
105125
prev = None
106126
revs = list(repo.scm.branch_revs(exp_rev, baseline))
107127
for rev in revs:
128+
status = ExpStatus.Running if rev in running else ExpStatus.Success
108129
collected_exp = _collect_experiment_commit(
109-
repo, rev, onerror=onerror, **kwargs
130+
repo,
131+
rev,
132+
onerror=onerror,
133+
status=status,
134+
running=running,
135+
**kwargs
110136
)
111137
if len(revs) > 1:
112138
exp = {"checkpoint_tip": exp_rev}
@@ -135,6 +161,8 @@ def show(
135161
all_tags=False,
136162
revs: Union[List[str], str, None] = None,
137163
all_commits=False,
164+
hide_queued=False,
165+
hide_failed=False,
138166
sha_only=False,
139167
num=1,
140168
param_deps=False,
@@ -163,10 +191,12 @@ def show(
163191
running = repo.experiments.get_running_exps(fetch_refs=fetch_running)
164192

165193
for rev in found_revs:
194+
status = ExpStatus.Running if rev in running else ExpStatus.Success
166195
res[rev]["baseline"] = _collect_experiment_commit(
167196
repo,
168197
rev,
169198
sha_only=sha_only,
199+
status=status,
170200
param_deps=param_deps,
171201
running=running,
172202
onerror=onerror,
@@ -202,7 +232,19 @@ def show(
202232
repo.experiments.tempdir_queue.iter_active(),
203233
repo.experiments.celery_queue.iter_active(),
204234
repo.experiments.celery_queue.iter_queued(),
235+
repo.experiments.celery_queue.iter_failed(),
205236
):
237+
if isinstance(entry, QueueDoneResult):
238+
entry = entry.entry
239+
if hide_failed:
240+
continue
241+
status = ExpStatus.Failed
242+
elif entry.stash_rev in running:
243+
status = ExpStatus.Running
244+
else:
245+
if hide_queued:
246+
continue
247+
status = ExpStatus.Queued
206248
stash_rev = entry.stash_rev
207249
if entry.baseline_rev in found_revs:
208250
if stash_rev not in running or not running[stash_rev].get(
@@ -212,7 +254,7 @@ def show(
212254
repo,
213255
stash_rev,
214256
sha_only=sha_only,
215-
stash=stash_rev not in running,
257+
status=status,
216258
param_deps=param_deps,
217259
running=running,
218260
onerror=onerror,

tests/func/experiments/test_show.py

Lines changed: 83 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ def test_show_simple(tmp_dir, scm, dvc, exp_stage):
6363
"metrics": {"metrics.yaml": {"data": {"foo": 1}}},
6464
"outs": {},
6565
"params": {"params.yaml": {"data": {"foo": 1}}},
66-
"queued": False,
67-
"running": False,
66+
"status": "Success",
6867
"executor": None,
6968
"timestamp": None,
7069
}
@@ -97,8 +96,7 @@ def test_show_experiment(tmp_dir, scm, dvc, exp_stage, workspace):
9796
"metrics": {"metrics.yaml": {"data": {"foo": 1}}},
9897
"outs": {},
9998
"params": {"params.yaml": {"data": {"foo": 1}}},
100-
"queued": False,
101-
"running": False,
99+
"status": "Success",
102100
"executor": None,
103101
"timestamp": timestamp,
104102
"name": "master",
@@ -130,7 +128,7 @@ def test_show_queued(tmp_dir, scm, dvc, exp_stage):
130128
assert len(results) == 2
131129
exp = results[exp_rev]["data"]
132130
assert exp["name"] == "test_name"
133-
assert exp["queued"]
131+
assert exp["status"] == "Queued"
134132
assert exp["params"]["params.yaml"] == {"data": {"foo": 2}}
135133

136134
# test that only queued experiments for the current baseline are returned
@@ -145,10 +143,67 @@ def test_show_queued(tmp_dir, scm, dvc, exp_stage):
145143
results = dvc.experiments.show()[new_rev]
146144
assert len(results) == 2
147145
exp = results[exp_rev]["data"]
148-
assert exp["queued"]
146+
assert exp["status"] == "Queued"
149147
assert exp["params"]["params.yaml"] == {"data": {"foo": 3}}
150148

151149

150+
@pytest.mark.vscode
151+
def test_show_failed_experiment(tmp_dir, scm, dvc, failed_exp_stage):
152+
baseline_rev = scm.get_rev()
153+
timestamp = datetime.fromtimestamp(
154+
scm.gitpython.repo.rev_parse(baseline_rev).committed_date
155+
)
156+
157+
dvc.experiments.run(
158+
failed_exp_stage.addressing, params=["foo=2"], queue=True
159+
)
160+
exp_rev = dvc.experiments.scm.resolve_rev(f"{CELERY_STASH}@{{0}}")
161+
dvc.experiments.run(run_all=True)
162+
experiments = dvc.experiments.show()[baseline_rev]
163+
164+
expected_baseline = {
165+
"data": {
166+
"deps": {
167+
"copy.py": {
168+
"hash": ANY,
169+
"size": ANY,
170+
"nfiles": None,
171+
}
172+
},
173+
"metrics": {},
174+
"outs": {},
175+
"params": {"params.yaml": {"data": {"foo": 1}}},
176+
"status": "Success",
177+
"executor": None,
178+
"timestamp": timestamp,
179+
"name": "master",
180+
}
181+
}
182+
183+
expected_failed = {
184+
"data": {
185+
"timestamp": ANY,
186+
"params": {"params.yaml": {"data": {"foo": 2}}},
187+
"deps": {"copy.py": {"hash": None, "size": None, "nfiles": None}},
188+
"outs": {},
189+
"status": "Failed",
190+
"executor": None,
191+
"error": {
192+
"msg": "Experiment run failed.",
193+
"type": "",
194+
},
195+
}
196+
}
197+
198+
assert len(experiments) == 2
199+
for rev, exp in experiments.items():
200+
if rev == "baseline":
201+
assert exp == expected_baseline
202+
else:
203+
assert rev == exp_rev
204+
assert exp == expected_failed
205+
206+
152207
@pytest.mark.vscode
153208
@pytest.mark.parametrize("workspace", [True, False])
154209
def test_show_checkpoint(
@@ -339,12 +394,8 @@ def test_show_sort(tmp_dir, scm, dvc, exp_stage, caplog):
339394

340395

341396
@pytest.mark.vscode
342-
@pytest.mark.parametrize(
343-
"status, running", [(TaskStatus.RUNNING, True), (TaskStatus.FAILED, False)]
344-
)
345-
def test_show_running_workspace(
346-
tmp_dir, scm, dvc, exp_stage, capsys, status, running
347-
):
397+
@pytest.mark.parametrize("status", [TaskStatus.RUNNING, TaskStatus.FAILED])
398+
def test_show_running_workspace(tmp_dir, scm, dvc, exp_stage, capsys, status):
348399
pid_dir = os.path.join(dvc.tmp_dir, EXEC_TMP_DIR, EXEC_PID_DIR)
349400
info = make_executor_info(
350401
location=BaseExecutor.DEFAULT_LOCATION, status=status
@@ -357,7 +408,8 @@ def test_show_running_workspace(
357408
makedirs(os.path.dirname(pidfile), True)
358409
(tmp_dir / pidfile).dump_json(info.asdict())
359410

360-
print(dvc.experiments.show())
411+
print(dvc.experiments.show().get("workspace"))
412+
361413
assert dvc.experiments.show().get("workspace") == {
362414
"baseline": {
363415
"data": {
@@ -371,17 +423,20 @@ def test_show_running_workspace(
371423
"metrics": {"metrics.yaml": {"data": {"foo": 1}}},
372424
"params": {"params.yaml": {"data": {"foo": 1}}},
373425
"outs": {},
374-
"queued": False,
375-
"running": True if running else False,
376-
"executor": info.location if running else None,
426+
"status": "Running"
427+
if status == TaskStatus.RUNNING
428+
else "Success",
429+
"executor": info.location
430+
if status == TaskStatus.RUNNING
431+
else None,
377432
"timestamp": None,
378433
}
379434
}
380435
}
381436
capsys.readouterr()
382437
assert main(["exp", "show", "--csv"]) == 0
383438
cap = capsys.readouterr()
384-
if running:
439+
if status == TaskStatus.RUNNING:
385440
assert "Running" in cap.out
386441
assert info.location in cap.out
387442

@@ -428,10 +483,10 @@ def test_show_running_tempdir(tmp_dir, scm, dvc, exp_stage, mocker):
428483
[mocker.call(stash_rev, pidfile, True)],
429484
)
430485
exp_data = get_in(results, [baseline_rev, exp_rev, "data"])
431-
assert exp_data["running"]
486+
assert exp_data["status"] == "Running"
432487
assert exp_data["executor"] == info.location
433488

434-
assert not results["workspace"]["baseline"]["data"]["running"]
489+
assert results["workspace"]["baseline"]["data"]["status"] == "Success"
435490

436491

437492
def test_show_running_celery(tmp_dir, scm, dvc, exp_stage, mocker):
@@ -453,11 +508,10 @@ def test_show_running_celery(tmp_dir, scm, dvc, exp_stage, mocker):
453508

454509
results = dvc.experiments.show()
455510
exp_data = get_in(results, [baseline_rev, exp_rev, "data"])
456-
assert not exp_data["queued"]
457-
assert exp_data["running"]
511+
assert exp_data["status"] == "Running"
458512
assert exp_data["executor"] == info.location
459513

460-
assert not results["workspace"]["baseline"]["data"]["running"]
514+
assert results["workspace"]["baseline"]["data"]["status"] == "Success"
461515

462516

463517
def test_show_running_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, mocker):
@@ -479,6 +533,11 @@ def test_show_running_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, mocker):
479533
"iter_active",
480534
return_value=entries,
481535
)
536+
mocker.patch.object(
537+
dvc.experiments.celery_queue,
538+
"iter_failed",
539+
return_value=[],
540+
)
482541
pidfile = queue.get_infofile_path(entries[0].stash_rev)
483542
info = make_executor_info(
484543
git_url="foo.git",
@@ -495,10 +554,10 @@ def test_show_running_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, mocker):
495554
results = dvc.experiments.show()
496555

497556
checkpoint_res = get_in(results, [baseline_rev, checkpoint_rev, "data"])
498-
assert checkpoint_res["running"]
557+
assert checkpoint_res["status"] == "Running"
499558
assert checkpoint_res["executor"] == info.location
500559

501-
assert not results["workspace"]["baseline"]["data"]["running"]
560+
assert results["workspace"]["baseline"]["data"]["status"] == "Success"
502561

503562

504563
def test_show_with_broken_repo(tmp_dir, scm, dvc, exp_stage, caplog):

0 commit comments

Comments
 (0)