From 6fcd1517ebb5b644a39a3c4fff61299341e2521b Mon Sep 17 00:00:00 2001 From: Ioana Grigoropol Date: Mon, 9 Nov 2020 15:00:42 +0000 Subject: [PATCH 1/7] api: add support for simple wildcards Related to #4816. Signed-off-by: Ioana Grigoropol --- dvc/repo/add.py | 14 ++++++-- tests/func/test_add.py | 72 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 187de18941..ab24b8efab 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -1,3 +1,4 @@ +import glob import logging import os @@ -152,12 +153,19 @@ def _find_all_targets(repo, target, recursive): def _create_stages(repo, targets, fname, pbar=None, external=False): from dvc.stage import Stage, create_stage - stages = [] + expanded_targets = [] + + for target in targets: + found_target = glob.glob(target, recursive=True) + if not found_target: + found_target = [target] + expanded_targets.extend(found_target) + stages = [] for out in Tqdm( - targets, + expanded_targets, desc="Creating DVC-files", - disable=len(targets) < LARGE_DIR_SIZE, + disable=len(expanded_targets) < LARGE_DIR_SIZE, unit="file", ): path, wdir, out = resolve_paths(repo, out) diff --git a/tests/func/test_add.py b/tests/func/test_add.py index 017f02d0b1..60ccb560e1 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -190,6 +190,77 @@ def test_add_file_in_dir(tmp_dir, dvc): assert stage.outs[0].def_path == "subdata" +@pytest.mark.parametrize( + "path_pattern, expected_def_paths, expected_rel_paths", + [ + ( + os.path.join("dir", "subdir", "subdata*"), + ["subdata", "subdata123"], + [ + os.path.join("dir", "subdir", "subdata") + ".dvc", + os.path.join("dir", "subdir", "subdata123") + ".dvc", + ], + ), + ( + os.path.join("dir", "subdir", "?subdata"), + ["esubdata", "isubdata"], + [ + os.path.join("dir", "subdir", "esubdata") + ".dvc", + os.path.join("dir", "subdir", "isubdata") + ".dvc", + ], + ), + ( + os.path.join("dir", "subdir", "[aiou]subdata"), + ["isubdata"], + [os.path.join("dir", "subdir", "isubdata") + ".dvc"], + ), + ( + os.path.join("dir", "**", "subdata*"), + ["subdata", "subdata123", "subdata4", "subdata5"], + [ + os.path.join("dir", "subdir", "subdata") + ".dvc", + os.path.join("dir", "subdir", "subdata123") + ".dvc", + os.path.join("dir", "anotherdir", "subdata4") + ".dvc", + os.path.join("dir", "subdata5") + ".dvc", + ], + ), + ], +) +def test_add_filtered_files_in_dir( + tmp_dir, dvc, path_pattern, expected_def_paths, expected_rel_paths +): + tmp_dir.gen( + { + "dir": { + "subdir": { + "subdata": "subdata content", + "esubdata": "extra subdata content", + "isubdata": "i subdata content", + "subdata123": "subdata content 123", + }, + "anotherdir": { + "subdata4": "subdata 4 content", + "esubdata": "extra 2 subdata content", + }, + "subdata5": "subdata 5 content", + } + } + ) + + stages = dvc.add(path_pattern) + + assert len(stages) == len(expected_def_paths) + for stage in stages: + assert stage is not None + assert len(stage.deps) == 0 + assert len(stage.outs) == 1 + assert stage.relpath in expected_rel_paths + + # Current dir should not be taken into account + assert stage.wdir == os.path.dirname(stage.path) + assert stage.outs[0].def_path in expected_def_paths + + @pytest.mark.parametrize( "workspace, hash_name, hash_value", [ @@ -440,7 +511,6 @@ def is_symlink_true_below_dvc_root(path): with patch.object( System, "is_symlink", side_effect=is_symlink_true_below_dvc_root ): - ret = main(["add", self.DATA]) self.assertEqual(0, ret) From a1d0f879c2a9f8ee4d49bf6a41e07e7504c8d731 Mon Sep 17 00:00:00 2001 From: Ioana Grigoropol Date: Tue, 10 Nov 2020 13:18:59 +0000 Subject: [PATCH 2/7] api: make wildcard interpretation optional Adds a new argument for the add command `glob` that is disabled by default and when enabled it passes the input targets through glob filtering. Related: #4816 Signed-off-by: Ioana Grigoropol --- dvc/command/add.py | 7 +++++++ dvc/repo/add.py | 37 ++++++++++++++++++++++++++----------- tests/func/test_add.py | 7 ++++--- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/dvc/command/add.py b/dvc/command/add.py index e81631d525..fb70ef2792 100644 --- a/dvc/command/add.py +++ b/dvc/command/add.py @@ -20,6 +20,7 @@ def run(self): no_commit=self.args.no_commit, fname=self.args.file, external=self.args.external, + glob=self.args.glob, ) except DvcException: @@ -57,6 +58,12 @@ def add_parser(subparsers, parent_parser): default=False, help="Allow targets that are outside of the DVC repository.", ) + parser.add_argument( + "--glob", + action="store_true", + default=False, + help="Allows targets containing shell-style wildcards.", + ) parser.add_argument( "--file", help="Specify name of the DVC-file this command will generate.", diff --git a/dvc/repo/add.py b/dvc/repo/add.py index ab24b8efab..fce560828c 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -1,4 +1,3 @@ -import glob import logging import os @@ -24,7 +23,13 @@ @locked @scm_context def add( - repo, targets, recursive=False, no_commit=False, fname=None, external=False + repo, + targets, + recursive=False, + no_commit=False, + fname=None, + external=False, + glob=False, ): if recursive and fname: raise RecursiveAddingWhileUsingFilename() @@ -58,7 +63,12 @@ def add( ) stages = _create_stages( - repo, sub_targets, fname, pbar=pbar, external=external + repo, + sub_targets, + fname, + pbar=pbar, + external=external, + enable_glob=glob, ) try: @@ -150,16 +160,21 @@ def _find_all_targets(repo, target, recursive): return [target] -def _create_stages(repo, targets, fname, pbar=None, external=False): - from dvc.stage import Stage, create_stage +def _create_stages( + repo, targets, fname, pbar=None, external=False, enable_glob=False +): + import glob - expanded_targets = [] + from dvc.stage import Stage, create_stage - for target in targets: - found_target = glob.glob(target, recursive=True) - if not found_target: - found_target = [target] - expanded_targets.extend(found_target) + if enable_glob: + expanded_targets = [ + exp_target + for target in targets + for exp_target in glob.iglob(target, recursive=True) + ] + else: + expanded_targets = targets stages = [] for out in Tqdm( diff --git a/tests/func/test_add.py b/tests/func/test_add.py index 60ccb560e1..483cdb8575 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -191,7 +191,7 @@ def test_add_file_in_dir(tmp_dir, dvc): @pytest.mark.parametrize( - "path_pattern, expected_def_paths, expected_rel_paths", + "target, expected_def_paths, expected_rel_paths", [ ( os.path.join("dir", "subdir", "subdata*"), @@ -227,7 +227,7 @@ def test_add_file_in_dir(tmp_dir, dvc): ], ) def test_add_filtered_files_in_dir( - tmp_dir, dvc, path_pattern, expected_def_paths, expected_rel_paths + tmp_dir, dvc, target, expected_def_paths, expected_rel_paths ): tmp_dir.gen( { @@ -247,7 +247,7 @@ def test_add_filtered_files_in_dir( } ) - stages = dvc.add(path_pattern) + stages = dvc.add(target, glob=True) assert len(stages) == len(expected_def_paths) for stage in stages: @@ -511,6 +511,7 @@ def is_symlink_true_below_dvc_root(path): with patch.object( System, "is_symlink", side_effect=is_symlink_true_below_dvc_root ): + ret = main(["add", self.DATA]) self.assertEqual(0, ret) From 2f89e9f2391988daf78eef7b42e406898e4dbaaa Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 11 Nov 2020 17:50:08 +0200 Subject: [PATCH 3/7] Update dvc/repo/add.py --- dvc/repo/add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index fce560828c..30bd31e8cf 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -161,7 +161,7 @@ def _find_all_targets(repo, target, recursive): def _create_stages( - repo, targets, fname, pbar=None, external=False, enable_glob=False + repo, targets, fname, pbar=None, external=False, glob=False ): import glob From aa9098f14e50ebad68710b50fafc481220eb9079 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 11 Nov 2020 17:50:26 +0200 Subject: [PATCH 4/7] Update dvc/repo/add.py --- dvc/repo/add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 30bd31e8cf..a7385f9d13 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -163,7 +163,7 @@ def _find_all_targets(repo, target, recursive): def _create_stages( repo, targets, fname, pbar=None, external=False, glob=False ): - import glob + from glob import iglob from dvc.stage import Stage, create_stage From f782e8bb1305589c1fc58b17ece1f0c5260eab06 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 11 Nov 2020 17:50:42 +0200 Subject: [PATCH 5/7] Update dvc/repo/add.py --- dvc/repo/add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index a7385f9d13..4535e6936c 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -167,7 +167,7 @@ def _create_stages( from dvc.stage import Stage, create_stage - if enable_glob: + if glob: expanded_targets = [ exp_target for target in targets From 8aba2b7a65559dfab49e9a3c60473fd87244cc8a Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 11 Nov 2020 17:50:57 +0200 Subject: [PATCH 6/7] Update dvc/repo/add.py --- dvc/repo/add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 4535e6936c..90aafece13 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -171,7 +171,7 @@ def _create_stages( expanded_targets = [ exp_target for target in targets - for exp_target in glob.iglob(target, recursive=True) + for exp_target in iglob(target, recursive=True) ] else: expanded_targets = targets From 0a49415ab12082c2fd10bf50c67fb61ed3f85257 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 11 Nov 2020 17:51:12 +0200 Subject: [PATCH 7/7] Update dvc/repo/add.py --- dvc/repo/add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 90aafece13..b78a81a7a8 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -68,7 +68,7 @@ def add( fname, pbar=pbar, external=external, - enable_glob=glob, + glob=glob, ) try: