Skip to content

stage: check if local path contains symlink and, if so, treat added files as relpath #1662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions dvc/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

import os
import yaml
import posixpath
import subprocess

from dvc.utils.fs import contains_symlink_up_to
from schema import Schema, SchemaError, Optional, Or, And

import dvc.prompt as prompt
Expand Down Expand Up @@ -306,16 +307,24 @@ def _stage_fname(cls, fname, outs, add):
return cls.STAGE_FILE

out = outs[0]
if out.scheme == "local":
path = os.path
else:
path = posixpath
path_handler = out.remote.ospath

fname = path.basename(out.path) + cls.STAGE_FILE_SUFFIX
fname = path_handler.basename(out.path) + cls.STAGE_FILE_SUFFIX

fname = Stage._expand_to_path_on_add_local(
add, fname, out, path_handler
)

if add and out.is_local:
fname = path.join(path.dirname(out.path), fname)
return fname

@staticmethod
def _expand_to_path_on_add_local(add, fname, out, path_handler):
if (
add
and out.is_local
and not contains_symlink_up_to(out.path, out.repo.root_dir)
):
fname = path_handler.join(path_handler.dirname(out.path), fname)
return fname

@staticmethod
Expand Down
7 changes: 5 additions & 2 deletions dvc/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,11 @@ def is_symlink(path):
# https://docs.microsoft.com/en-us/windows/desktop/fileio/
# file-attribute-constants
FILE_ATTRIBUTE_REPARSE_POINT = 0x400
info = System.getdirinfo(path)
return info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT

if os.path.lexists(path):
info = System.getdirinfo(path)
return info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT
return False

@staticmethod
def is_hardlink(path):
Expand Down
22 changes: 22 additions & 0 deletions dvc/utils/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os

import dvc.logger as logger
from dvc.exceptions import DvcException
from dvc.system import System
from dvc.utils.compat import str

Expand Down Expand Up @@ -31,3 +32,24 @@ def get_mtime_and_size(path):
# State of files handled by dvc is stored in db as TEXT.
# We cast results to string for later comparisons with stored values.
return str(int(nanotime.timestamp(mtime))), str(size)


class BasePathNotInCheckedPathException(DvcException):
def __init__(self, path, base_path):
msg = "Path: {} does not overlap with base path: {}".format(
path, base_path
)
super(DvcException, self).__init__(msg)


def contains_symlink_up_to(path, base_path):
if base_path not in path:
raise BasePathNotInCheckedPathException(path, base_path)

if path == base_path:
return False
if System.is_symlink(path):
return True
if os.path.dirname(path) == path:
return False
return contains_symlink_up_to(os.path.dirname(path), base_path)
75 changes: 75 additions & 0 deletions tests/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import time
import shutil
import filecmp
import posixpath

from dvc.system import System
from mock import patch

from dvc.main import main
Expand Down Expand Up @@ -331,3 +333,76 @@ def test(self):
ret = main(["status"])
self.assertEqual(0, ret)
self.assertEqual(1, collect_dir_counter.mock.call_count)


class SymlinkAddTestBase(TestDvc):
def _get_data_dir(self):
raise NotImplementedError

def _prepare_external_data(self):
data_dir = self._get_data_dir()

self.data_file_name = "data_file"
external_data_path = os.path.join(data_dir, self.data_file_name)
with open(external_data_path, "w+") as f:
f.write("data")

self.link_name = "data_link"
System.symlink(data_dir, self.link_name)

def _test(self):
self._prepare_external_data()

ret = main(["add", os.path.join(self.link_name, self.data_file_name)])
self.assertEqual(0, ret)

stage_file = self.data_file_name + Stage.STAGE_FILE_SUFFIX
self.assertTrue(os.path.exists(stage_file))

with open(stage_file, "r") as fobj:
d = yaml.safe_load(fobj)
relative_data_path = posixpath.join(
self.link_name, self.data_file_name
)
self.assertEqual(relative_data_path, d["outs"][0]["path"])


class TestShouldAddDataFromExternalSymlink(SymlinkAddTestBase):
def _get_data_dir(self):
return self.mkdtemp()

def test(self):
self._test()


class TestShouldAddDataFromInternalSymlink(SymlinkAddTestBase):
def _get_data_dir(self):
return self.DATA_DIR

def test(self):
self._test()


class TestShouldPlaceStageInDataDirIfRepositoryBelowSymlink(TestDvc):
def test(self):
def is_symlink_true_below_dvc_root(path):
if path == os.path.dirname(self.dvc.root_dir):
return True
return False

with patch.object(
System, "is_symlink", side_effect=is_symlink_true_below_dvc_root
):

ret = main(["add", self.DATA])
self.assertEqual(0, ret)

stage_file_path_on_data_below_symlink = (
os.path.basename(self.DATA) + Stage.STAGE_FILE_SUFFIX
)
self.assertFalse(
os.path.exists(stage_file_path_on_data_below_symlink)
)

stage_file_path = self.DATA + Stage.STAGE_FILE_SUFFIX
self.assertTrue(os.path.exists(stage_file_path))
26 changes: 0 additions & 26 deletions tests/unit/fs.py

This file was deleted.

87 changes: 87 additions & 0 deletions tests/unit/utils/fs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import os
from unittest import TestCase

import dvc
from dvc.system import System
from dvc.utils.compat import str
from dvc.utils.fs import (
get_mtime_and_size,
contains_symlink_up_to,
BasePathNotInCheckedPathException,
)
from mock import patch
from tests.basic_env import TestDir
from tests.utils import spy


class TestMtimeAndSize(TestDir):
def test(self):
file_time, file_size = get_mtime_and_size(self.DATA)
dir_time, dir_size = get_mtime_and_size(self.DATA_DIR)

actual_file_size = os.path.getsize(self.DATA)
actual_dir_size = (
os.path.getsize(self.DATA_DIR)
+ os.path.getsize(self.DATA)
+ os.path.getsize(self.DATA_SUB_DIR)
+ os.path.getsize(self.DATA_SUB)
)

self.assertIs(type(file_time), str)
self.assertIs(type(file_size), str)
self.assertEqual(file_size, str(actual_file_size))
self.assertIs(type(dir_time), str)
self.assertIs(type(dir_size), str)
self.assertEqual(dir_size, str(actual_dir_size))


class TestContainsLink(TestCase):
def test_should_raise_exception_on_base_path_not_in_path(self):
with self.assertRaises(BasePathNotInCheckedPathException):
contains_symlink_up_to(os.path.join("foo", "path"), "bar")

@patch.object(System, "is_symlink", return_value=True)
def test_should_return_true_on_symlink_in_path(self, _):
base_path = "foo"
path = os.path.join(base_path, "bar")
self.assertTrue(contains_symlink_up_to(path, base_path))

@patch.object(System, "is_symlink", return_value=False)
def test_should_return_false_on_path_eq_to_base_path(self, _):
path = "path"
self.assertFalse(contains_symlink_up_to(path, path))

@patch.object(System, "is_symlink", return_value=False)
@patch.object(os.path, "dirname", side_effect=lambda arg: arg)
def test_should_return_false_on_no_more_dirs_below_path(
self, dirname_patch, _
):
self.assertFalse(contains_symlink_up_to("path"))
dirname_patch.assert_called_once()

@patch.object(System, "is_symlink", return_value=False)
@patch.object(os.path, "dirname", return_value="")
def test_should_call_recursive_on_no_condition_matched(self, _, __):
contains_symlink_spy = spy(contains_symlink_up_to)
with patch.object(
dvc.utils.fs, "contains_symlink_up_to", contains_symlink_spy
):

# call from full path to match contains_symlink_spy patch path
self.assertFalse(dvc.utils.fs.contains_symlink_up_to("path"))
self.assertEqual(2, contains_symlink_spy.mock.call_count)

@patch.object(System, "is_symlink", return_value=True)
def test_should_return_false_when_base_path_is_symlink(self, _):
base_path = "foo"
target_path = os.path.join(base_path, "bar")

def base_path_is_symlink(path):
if path == base_path:
return True
return False

with patch.object(
System, "is_symlink", side_effect=base_path_is_symlink
):
self.assertFalse(contains_symlink_up_to(target_path, base_path))