From f8ef28c4c66feff3e18b7e47015a733b3dffcbf0 Mon Sep 17 00:00:00 2001 From: pared Date: Mon, 25 Feb 2019 17:56:39 +0100 Subject: [PATCH] stage: check if local path contains symlink and, if so, treat added files as relpath --- dvc/stage.py | 25 ++++++++---- dvc/system.py | 7 +++- dvc/utils/fs.py | 22 +++++++++++ tests/test_add.py | 75 ++++++++++++++++++++++++++++++++++++ tests/unit/fs.py | 26 ------------- tests/unit/utils/fs.py | 87 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 206 insertions(+), 36 deletions(-) delete mode 100644 tests/unit/fs.py create mode 100644 tests/unit/utils/fs.py diff --git a/dvc/stage.py b/dvc/stage.py index 89cf7e850c..ce3d69414c 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -4,8 +4,9 @@ import os import yaml -import posixpath import subprocess + +from dvc.utils.fs import contains_symlink_up_to from schema import Schema, SchemaError, Optional, Or, And import dvc.prompt as prompt @@ -306,16 +307,24 @@ def _stage_fname(cls, fname, outs, add): return cls.STAGE_FILE out = outs[0] - if out.scheme == "local": - path = os.path - else: - path = posixpath + path_handler = out.remote.ospath - fname = path.basename(out.path) + cls.STAGE_FILE_SUFFIX + fname = path_handler.basename(out.path) + cls.STAGE_FILE_SUFFIX + + fname = Stage._expand_to_path_on_add_local( + add, fname, out, path_handler + ) - if add and out.is_local: - fname = path.join(path.dirname(out.path), fname) + return fname + @staticmethod + def _expand_to_path_on_add_local(add, fname, out, path_handler): + if ( + add + and out.is_local + and not contains_symlink_up_to(out.path, out.repo.root_dir) + ): + fname = path_handler.join(path_handler.dirname(out.path), fname) return fname @staticmethod diff --git a/dvc/system.py b/dvc/system.py index dd914923b0..fd8befe447 100644 --- a/dvc/system.py +++ b/dvc/system.py @@ -267,8 +267,11 @@ def is_symlink(path): # https://docs.microsoft.com/en-us/windows/desktop/fileio/ # file-attribute-constants FILE_ATTRIBUTE_REPARSE_POINT = 0x400 - info = System.getdirinfo(path) - return info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT + + if os.path.lexists(path): + info = System.getdirinfo(path) + return info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT + return False @staticmethod def is_hardlink(path): diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 86f5e1712a..07efa2d1f7 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -4,6 +4,7 @@ import os import dvc.logger as logger +from dvc.exceptions import DvcException from dvc.system import System from dvc.utils.compat import str @@ -31,3 +32,24 @@ def get_mtime_and_size(path): # State of files handled by dvc is stored in db as TEXT. # We cast results to string for later comparisons with stored values. return str(int(nanotime.timestamp(mtime))), str(size) + + +class BasePathNotInCheckedPathException(DvcException): + def __init__(self, path, base_path): + msg = "Path: {} does not overlap with base path: {}".format( + path, base_path + ) + super(DvcException, self).__init__(msg) + + +def contains_symlink_up_to(path, base_path): + if base_path not in path: + raise BasePathNotInCheckedPathException(path, base_path) + + if path == base_path: + return False + if System.is_symlink(path): + return True + if os.path.dirname(path) == path: + return False + return contains_symlink_up_to(os.path.dirname(path), base_path) diff --git a/tests/test_add.py b/tests/test_add.py index 7e5a0002f9..f97ce8387a 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -7,7 +7,9 @@ import time import shutil import filecmp +import posixpath +from dvc.system import System from mock import patch from dvc.main import main @@ -331,3 +333,76 @@ def test(self): ret = main(["status"]) self.assertEqual(0, ret) self.assertEqual(1, collect_dir_counter.mock.call_count) + + +class SymlinkAddTestBase(TestDvc): + def _get_data_dir(self): + raise NotImplementedError + + def _prepare_external_data(self): + data_dir = self._get_data_dir() + + self.data_file_name = "data_file" + external_data_path = os.path.join(data_dir, self.data_file_name) + with open(external_data_path, "w+") as f: + f.write("data") + + self.link_name = "data_link" + System.symlink(data_dir, self.link_name) + + def _test(self): + self._prepare_external_data() + + ret = main(["add", os.path.join(self.link_name, self.data_file_name)]) + self.assertEqual(0, ret) + + stage_file = self.data_file_name + Stage.STAGE_FILE_SUFFIX + self.assertTrue(os.path.exists(stage_file)) + + with open(stage_file, "r") as fobj: + d = yaml.safe_load(fobj) + relative_data_path = posixpath.join( + self.link_name, self.data_file_name + ) + self.assertEqual(relative_data_path, d["outs"][0]["path"]) + + +class TestShouldAddDataFromExternalSymlink(SymlinkAddTestBase): + def _get_data_dir(self): + return self.mkdtemp() + + def test(self): + self._test() + + +class TestShouldAddDataFromInternalSymlink(SymlinkAddTestBase): + def _get_data_dir(self): + return self.DATA_DIR + + def test(self): + self._test() + + +class TestShouldPlaceStageInDataDirIfRepositoryBelowSymlink(TestDvc): + def test(self): + def is_symlink_true_below_dvc_root(path): + if path == os.path.dirname(self.dvc.root_dir): + return True + return False + + with patch.object( + System, "is_symlink", side_effect=is_symlink_true_below_dvc_root + ): + + ret = main(["add", self.DATA]) + self.assertEqual(0, ret) + + stage_file_path_on_data_below_symlink = ( + os.path.basename(self.DATA) + Stage.STAGE_FILE_SUFFIX + ) + self.assertFalse( + os.path.exists(stage_file_path_on_data_below_symlink) + ) + + stage_file_path = self.DATA + Stage.STAGE_FILE_SUFFIX + self.assertTrue(os.path.exists(stage_file_path)) diff --git a/tests/unit/fs.py b/tests/unit/fs.py deleted file mode 100644 index cb33971293..0000000000 --- a/tests/unit/fs.py +++ /dev/null @@ -1,26 +0,0 @@ -import os - -from dvc.utils.compat import str -from dvc.utils.fs import get_mtime_and_size -from tests.basic_env import TestDir - - -class TestMtimeAndSize(TestDir): - def test(self): - file_time, file_size = get_mtime_and_size(self.DATA) - dir_time, dir_size = get_mtime_and_size(self.DATA_DIR) - - actual_file_size = os.path.getsize(self.DATA) - actual_dir_size = ( - os.path.getsize(self.DATA_DIR) - + os.path.getsize(self.DATA) - + os.path.getsize(self.DATA_SUB_DIR) - + os.path.getsize(self.DATA_SUB) - ) - - self.assertIs(type(file_time), str) - self.assertIs(type(file_size), str) - self.assertEqual(file_size, str(actual_file_size)) - self.assertIs(type(dir_time), str) - self.assertIs(type(dir_size), str) - self.assertEqual(dir_size, str(actual_dir_size)) diff --git a/tests/unit/utils/fs.py b/tests/unit/utils/fs.py new file mode 100644 index 0000000000..79caceb80f --- /dev/null +++ b/tests/unit/utils/fs.py @@ -0,0 +1,87 @@ +import os +from unittest import TestCase + +import dvc +from dvc.system import System +from dvc.utils.compat import str +from dvc.utils.fs import ( + get_mtime_and_size, + contains_symlink_up_to, + BasePathNotInCheckedPathException, +) +from mock import patch +from tests.basic_env import TestDir +from tests.utils import spy + + +class TestMtimeAndSize(TestDir): + def test(self): + file_time, file_size = get_mtime_and_size(self.DATA) + dir_time, dir_size = get_mtime_and_size(self.DATA_DIR) + + actual_file_size = os.path.getsize(self.DATA) + actual_dir_size = ( + os.path.getsize(self.DATA_DIR) + + os.path.getsize(self.DATA) + + os.path.getsize(self.DATA_SUB_DIR) + + os.path.getsize(self.DATA_SUB) + ) + + self.assertIs(type(file_time), str) + self.assertIs(type(file_size), str) + self.assertEqual(file_size, str(actual_file_size)) + self.assertIs(type(dir_time), str) + self.assertIs(type(dir_size), str) + self.assertEqual(dir_size, str(actual_dir_size)) + + +class TestContainsLink(TestCase): + def test_should_raise_exception_on_base_path_not_in_path(self): + with self.assertRaises(BasePathNotInCheckedPathException): + contains_symlink_up_to(os.path.join("foo", "path"), "bar") + + @patch.object(System, "is_symlink", return_value=True) + def test_should_return_true_on_symlink_in_path(self, _): + base_path = "foo" + path = os.path.join(base_path, "bar") + self.assertTrue(contains_symlink_up_to(path, base_path)) + + @patch.object(System, "is_symlink", return_value=False) + def test_should_return_false_on_path_eq_to_base_path(self, _): + path = "path" + self.assertFalse(contains_symlink_up_to(path, path)) + + @patch.object(System, "is_symlink", return_value=False) + @patch.object(os.path, "dirname", side_effect=lambda arg: arg) + def test_should_return_false_on_no_more_dirs_below_path( + self, dirname_patch, _ + ): + self.assertFalse(contains_symlink_up_to("path")) + dirname_patch.assert_called_once() + + @patch.object(System, "is_symlink", return_value=False) + @patch.object(os.path, "dirname", return_value="") + def test_should_call_recursive_on_no_condition_matched(self, _, __): + contains_symlink_spy = spy(contains_symlink_up_to) + with patch.object( + dvc.utils.fs, "contains_symlink_up_to", contains_symlink_spy + ): + + # call from full path to match contains_symlink_spy patch path + self.assertFalse(dvc.utils.fs.contains_symlink_up_to("path")) + self.assertEqual(2, contains_symlink_spy.mock.call_count) + + @patch.object(System, "is_symlink", return_value=True) + def test_should_return_false_when_base_path_is_symlink(self, _): + base_path = "foo" + target_path = os.path.join(base_path, "bar") + + def base_path_is_symlink(path): + if path == base_path: + return True + return False + + with patch.object( + System, "is_symlink", side_effect=base_path_is_symlink + ): + self.assertFalse(contains_symlink_up_to(target_path, base_path))