Skip to content

client: add support for nocopy, raw-leaves to add method #159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion ipfshttpclient/client/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,12 @@ def add(self, file, *files, **kwargs):
The chunking algorithm to use
pin : bool
Pin this object when adding (Default: ``True``)
raw_leaves : bool
Use raw blocks for leaf nodes. (experimental). (Default: ``True``
when ``nocopy`` is True, or ``False`` otherwise)
nocopy : bool
Add the file using filestore. Implies raw-leaves. (experimental).
(Default: ``False``)

Returns
-------
Expand All @@ -274,11 +280,14 @@ def add(self, file, *files, **kwargs):
#PY2: No support for kw-only parameters after glob parameters
recursive = kwargs.pop("recursive", False)
pattern = kwargs.pop("pattern", "**")
nocopy = kwargs.pop("nocopy", False)
opts = {
"trickle": kwargs.pop("trickle", False),
"only-hash": kwargs.pop("only_hash", False),
"wrap-with-directory": kwargs.pop("wrap_with_directory", False),
"pin": kwargs.pop("pin", True)
"pin": kwargs.pop("pin", True),
"raw-leaves": kwargs.pop("raw_leaves", nocopy),
'nocopy': nocopy
}
if "chunker" in kwargs:
opts["chunker"] = kwargs.pop("chunker")
Expand Down
31 changes: 22 additions & 9 deletions ipfshttpclient/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,15 @@ def _gen_end(self):


class StreamFileMixin(object):
def _gen_file(self, filename, file=None, content_type=None):
def _gen_file(self, filename, file_location=None, file=None, content_type=None):
"""Yields the entire contents of a file.

Parameters
----------
filename : str
Filename of the file being opened and added to the HTTP body
file_location : str
Full path to the file being added, including the filename
file : io.RawIOBase
The binary file-like object whose contents should be streamed

Expand All @@ -218,18 +220,21 @@ def _gen_file(self, filename, file=None, content_type=None):
The Content-Type of the file; if not set a value will be guessed
"""
#PY2: Use `yield from` instead
for chunk in self._gen_file_start(filename, content_type): yield chunk
for chunk in self._gen_file_start(filename, file_location, content_type):
yield chunk
if file:
for chunk in self._gen_file_chunks(file): yield chunk
for chunk in self._gen_file_end(): yield chunk

def _gen_file_start(self, filename, content_type=None):
def _gen_file_start(self, filename, file_location=None, content_type=None):
"""Yields the opening text of a file section in multipart HTTP.

Parameters
----------
filename : str
Filename of the file being opened and added to the HTTP body
file_location : str
Full path to the file being added, including the filename
content_type : str
The Content-Type of the file; if not set a value will be guessed
"""
Expand All @@ -238,6 +243,8 @@ def _gen_file_start(self, filename, content_type=None):

headers = content_disposition_headers(filename.replace(os.sep, "/"), disptype="file")
headers.update(content_type_headers(filename, content_type))
if file_location and os.path.isabs(file_location):
headers.update({"Abspath": file_location})
#PY2: Use `yield from` instead
for chunk in self._gen_headers(headers): yield chunk

Expand Down Expand Up @@ -286,12 +293,15 @@ def _body(self):
for file, need_close in self.files:
try:
try:
filename = os.path.basename(file.name)
file_location = file.name
filename = os.path.basename(file_location)
except AttributeError:
file_location = None
filename = ''

#PY2: Use `yield from` instead
for chunk in self._gen_file(filename, file): yield chunk
for chunk in self._gen_file(filename, file_location, file):
yield chunk
finally:
if need_close:
file.close()
Expand Down Expand Up @@ -347,7 +357,7 @@ def glob_compile(pat):
res = '%s[%s]' % (res, stuff)
else:
res = res + re.escape(c)
return re.compile('^' + res + '\\Z(?ms)' + '$')
return re.compile(r'^' + res + r'\Z$', flags=re.M | re.S)


class DirectoryStream(StreamBase, StreamFileMixin):
Expand Down Expand Up @@ -418,11 +428,14 @@ def _body_directory(self, short_path, visited_directories):
def _body_file(self, short_path, file_location, dir_fd=-1):
try:
if dir_fd >= 0:
file_location = os.open(file_location, os.O_RDONLY | os.O_CLOEXEC, dir_fd=dir_fd)
f_path_or_desc = os.open(file_location, os.O_RDONLY | os.O_CLOEXEC, dir_fd=dir_fd)
else:
f_path_or_desc = file_location
# Stream file to client
with open(file_location, "rb") as file:
with open(f_path_or_desc, "rb") as file:
#PY2: Use `yield from`
for chunk in self._gen_file(short_path, file): yield chunk
for chunk in self._gen_file(short_path, file_location, file):
yield chunk
except OSError as e:
print(e)
# File might have disappeared between `os.walk()` and `open()`
Expand Down
69 changes: 69 additions & 0 deletions test/functional/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@

FAKE_FILE1_HASH = {"Hash": "QmQcCtMgLVwvMQGu6mvsRYLjwqrZJcYtH4mboM9urWW9vX",
"Name": "fsdfgh", "Size": "16"}
FAKE_FILE1_RAW_LEAVES_HASH = {
"Hash": "zb2rhXxZH5PFgCwBAm7xQMoBa6QWqytN8NPvXK7Qc9McDz9zJ",
"Name": "fsdfgh", "Size": "8"
}

FAKE_FILE1_DIR_HASH = [
{"Hash": "QmQcCtMgLVwvMQGu6mvsRYLjwqrZJcYtH4mboM9urWW9vX",
"Name": "fsdfgh", "Size": "16"},
Expand Down Expand Up @@ -103,6 +108,13 @@
]


def calc_path_rel_to_cwd(p):
p = str(p) # for Python < 3.5
prefix = os.path.commonprefix([p, os.getcwd()])
relpath = os.path.relpath(p, prefix)
assert not os.path.isabs(relpath)
return relpath


def test_add_single_from_str_with_dir(client, cleanup_pins):
res = client.add(FAKE_FILE1_PATH, wrap_with_directory=True)
Expand Down Expand Up @@ -132,6 +144,63 @@ def test_add_multiple_from_list(client, cleanup_pins):
assert FAKE_FILES_HASH == res


def test_add_with_raw_leaves(client, cleanup_pins):
res = client.add(FAKE_FILE1_PATH, raw_leaves=True)
check_add_with_raw_leaves(client, res)


def check_add_with_raw_leaves(client, res):
assert FAKE_FILE1_RAW_LEAVES_HASH == res
assert res["Hash"] in client.pin.ls(type="recursive")["Keys"]


def test_add_nocopy_without_raw_leaves(client):
error_msg = None
try:
client.add(FAKE_FILE1_PATH, nocopy=True, raw_leaves=False)
except ipfshttpclient.exceptions.ErrorResponse as exc:
error_msg = exc.args[0]
assert error_msg is not None and "--raw-leaves" in error_msg


def test_nocopy_with_raw_leaves_file(client, cleanup_pins):
res = client.add(FAKE_FILE1_PATH, nocopy=True, raw_leaves=True)
check_no_copy(client, res)


def test_nocopy_with_default_raw_leaves_file(client, cleanup_pins):
res = client.add(FAKE_FILE1_PATH, nocopy=True)
check_no_copy(client, res)


def check_no_copy(client, res):
check_add_with_raw_leaves(client, res)
# TODO: assert client.filestore.ls(res["Hash"])["Status"] == 0
# TODO: assert client.filestore.verify(res["Hash"])["Status"] == 0


def test_add_relative_path(client, cleanup_pins):
res = client.add(calc_path_rel_to_cwd(FAKE_FILE1_PATH))
assert FAKE_FILE1_HASH == res
assert res["Hash"] in client.pin.ls(type="recursive")["Keys"]


@pytest.mark.skip(reason="Requires go-ipfs 77cd41acfef6cc442309eda565c29e36fdd488d9 PR#5937")
def test_add_nocopy_with_relative_path(client):
error_msg = None
try:
client.add(calc_path_rel_to_cwd(FAKE_FILE1_PATH), nocopy=True)
except ipfshttpclient.exceptions.ErrorResponse as exc:
error_msg = exc.args[0]

# For relative paths, multipart streaming layer won't append the
# Abspath header, and server will report the missing header. Note that
# currently, the server does report an error if Abspath is present but
# is a relative or nonexistent path -- instead, it silently ignores
# nocopy and adds the file to the blockstore (bug).
assert error_msg is not None and "missing file path" in error_msg


def test_add_multiple_from_dirname(client, cleanup_pins):
res = client.add(FAKE_DIR_TEST2_PATH)
assert conftest.sort_by_key(FAKE_DIR_TEST2_HASH) == conftest.sort_by_key(res)
Expand Down
1 change: 1 addition & 0 deletions test/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def _contextlib_suppress(*exceptions):
subprocess.call(["ipfs", "init"])
subprocess.call(["ipfs", "config", "Addresses.Gateway", ""])
subprocess.call(["ipfs", "config", "Addresses.API", "/ip4/{}/tcp/{}".format(HOST, PORT)])
subprocess.call(["ipfs", "config", "--bool", "Experimental.FilestoreEnabled", "true"])


################
Expand Down
91 changes: 64 additions & 27 deletions test/unit/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@
import os
import re
import unittest
import urllib

import pytest
import six

from six.moves import urllib_parse

import ipfshttpclient.multipart

ENC = "UTF-8"

class TestContentHelpers(unittest.TestCase):
"""Tests the functionality of the three content-oriented helper functions.
Expand Down Expand Up @@ -119,8 +123,8 @@ def test_init_defaults(self):
"""Test the __init__ function for default parameter values."""
name = "test_name"
expected_disposition = 'form-data; filename="test_name"'
expected_type = 'multipart/form-data; boundary="\S*"'
expected_boundary_pattern = '\S*'
expected_type = r'multipart/form-data; boundary="\S*"'
expected_boundary_pattern = r'\S*'
generator = StreamBaseSub(name)
assert generator._headers['Content-Disposition'] == expected_disposition
assert re.search(expected_type, generator.headers()['Content-Type'])
Expand Down Expand Up @@ -174,7 +178,7 @@ def test__gen_end(self):
"""Test the close function against example output."""
name = "fsdfgh"
instance = StreamBaseSub(name)
expected = b'--\S+--\r\n'
expected = b'--\\S+--\r\n'
actual = b''
for i in instance._gen_end():
actual += i
Expand All @@ -192,35 +196,54 @@ class TestStreamFileMixin(unittest.TestCase):
test__gen_file_end -- test the _gen_file_end function against example output
"""

def test__gen_file(self):
def do_test__gen_file(self, name, file_location, abspath):
"""Test the _gen_file function against sample output."""
name = "functional/fake_dir/fsdfgh"
generator = StreamFileMixinSub(name)

file = io.BytesIO()
file.write(b"!234")
file.seek(0)

expected = b'--' + generator._boundary.encode() + b'\r\nContent-Disposition: file; '\
+ b'filename="functional%2Ffake_dir%2Ffsdfgh"\r\nContent-Type: '\
+ b'text/plain\r\n\r\n' \
expected = b'--' + generator._boundary.encode() + b'\r\n'
expected += b'Abspath: ' + name.encode(ENC) + b'\r\n' if abspath else b''
expected += b'Content-Disposition: file; '\
+ b'filename="' + urllib_parse.quote_plus(name).encode(ENC) + b'"\r\n'\
+ b'Content-Type: text/plain\r\n'\
+ b'\r\n' \
+ b'!234\r\n'

headers = b"".join(generator._gen_file(name, file, content_type="text/plain"))
headers = b"".join(generator._gen_file(name, file_location, file,
content_type="text/plain"))
assert headers == expected

def test__gen_file_start(self):
def test__gen_file(self):
self.do_test__gen_file("functional/fake_dir/fsdfgh",
file_location=None, abspath=False)
def test__gen_file_relative(self):
filepath = "functional/fake_dir/fsdfgh"
self.do_test__gen_file(filepath, filepath, abspath=False)
def test__gen_file_absolute(self):
filepath = "/functional/fake_dir/fsdfgh"
self.do_test__gen_file(filepath, filepath, abspath=True)

def do_test__gen_file_start(self, name, file_location, abspath):
"""Test the _gen_file_start function against sample output."""
name = "test_name"
generator = StreamFileMixinSub(name)

expected = b'--' + generator._boundary.encode() + b'\r\nContent-Disposition: file; '\
+ b'filename="test_name"\r\nContent-Type: '\
+ b'application/octet-stream\r\n\r\n'
expected = b'--' + generator._boundary.encode() + b'\r\n'
expected += b'Abspath: ' + file_location.encode(ENC) + b'\r\n' if abspath else b''
expected += b'Content-Disposition: file; filename="' + name.encode(ENC) + b'"\r\n'\
+ b'Content-Type: application/octet-stream\r\n'\
+ b'\r\n'

headers = b"".join(generator._gen_file_start(name))
headers = b"".join(generator._gen_file_start(name, file_location))
assert headers == expected

def test__gen_file_start(self):
self.do_test__gen_file_start("test_name", file_location=None, abspath=False)
def test__gen_file_start_with_filepath(self):
name = "test_name"
self.do_test__gen_file_start(name, os.path.join(os.path.sep, name), abspath=True)

def test__gen_file_chunks(self):
"""Test the _gen_file_chunks function against example output.

Expand Down Expand Up @@ -259,7 +282,7 @@ class TestFilesStream(unittest.TestCase):
test_body -- check file stream body for proper structure
"""

def test_body(self):
def prep_test_body(self):
"""Test the body function against expected output.

Warning: This test depends on the contents of
Expand All @@ -274,15 +297,29 @@ def test_body(self):
for (dirpath, _, filenames) in os.walk(path):
temp_list = [os.path.join(dirpath, name) for name in filenames]
filenames_list.extend(temp_list)
return filenames_list

def test_body_absolute(self):
filenames_list= self.prep_test_body()
instance = ipfshttpclient.multipart.FilesStream(filenames_list)
self.check_test_body(instance, abspath=True)

def test_body_relative(self):
filenames_list= self.prep_test_body()

# Convert absolute paths to relative
relative_paths_list = [os.path.relpath(cur_path, os.getcwd())
for cur_path in filenames_list]

instance = ipfshttpclient.multipart.FilesStream(relative_paths_list)

expected = "(--\S+\r\nContent-Disposition: file; filename=\"\S+\""\
+ "\r\nContent-Type: application/\S+\r\n"\
+ "\r\n(.|\n)*\r\n)+--\S+--\r\n"
self.check_test_body(instance, abspath=False)

def check_test_body(self, instance, abspath):
expected = r"(--\S+\r\n"
expected += r"Abspath: \S+\r\n" if abspath else r""
expected += r"Content-Disposition: file; filename=\"\S+\"\r\n"
expected += r"Content-Type: application/\S+\r\n"
expected += r"\r\n(.|\n)*\r\n)+--\S+--\r\n"
actual = ""
for i in instance.body():
if type(i) is not str and type(i) is not memoryview:
Expand All @@ -308,9 +345,9 @@ def test_body(self):
path = os.path.join(os.path.dirname(os.path.dirname(__file__)),
"functional", "fake_dir")
instance = ipfshttpclient.multipart.DirectoryStream(path)
expected = b"^(--\S+\r\nContent-Disposition: file; filename=\"\S+\""\
+ b"\r\nContent-Type: application/\S+\r\n\r\n(.|\n)*"\
+ b"\r\n)+--\S+--\r\n$"
expected = b"^(--\\S+\r\nContent-Disposition: file; filename=\"\\S+\""\
+ b"\r\nContent-Type: application/\\S+\r\n\r\n(.|\n)*"\
+ b"\r\n)+--\\S+--\r\n$"
actual = b"".join(instance.body())
assert re.search(expected, actual)

Expand All @@ -327,9 +364,9 @@ def test_body(self):
# Get OS-agnostic path to test files
text = b"Here is some text for this test."
instance = ipfshttpclient.multipart.BytesFileStream(text)
expected = b"(--\S+\r\nContent-Disposition: file; filename=\"\S+\""\
+ b"\r\nContent-Type: application/\S+\r\n"\
+ b"\r\n(.|\n)*\r\n)+--\S+--\r\n"
expected = b"(--\\S+\r\nContent-Disposition: file; filename=\"\\S+\""\
+ b"\r\nContent-Type: application/\\S+\r\n"\
+ b"\r\n(.|\n)*\r\n)+--\\S+--\r\n"
actual = b"".join(instance.body())
assert re.search(expected, actual)

Expand Down