Skip to content

Commit 0053a86

Browse files
authored
add unicode regex for paths and fix path encoding (#1420)
1 parent 8bdc152 commit 0053a86

File tree

5 files changed

+107
-47
lines changed

5 files changed

+107
-47
lines changed

cwltool/command_line_tool.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import shutil
1111
import threading
1212
import urllib
13+
import urllib.parse
1314
from functools import cmp_to_key, partial
1415
from typing import (
1516
Any,
@@ -83,7 +84,9 @@
8384
if TYPE_CHECKING:
8485
from .provenance_profile import ProvenanceProfile # pylint: disable=unused-import
8586

86-
ACCEPTLIST_EN_STRICT_RE = re.compile(r"^[a-zA-Z0-9._+-]+$")
87+
ACCEPTLIST_EN_STRICT_RE = re.compile(
88+
r"^[\w.+\-\u2600-\u26FF\U0001f600-\U0001f64f]+$"
89+
) # accept unicode word characters and emojis
8790
ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") # Accept anything
8891
ACCEPTLIST_RE = ACCEPTLIST_EN_STRICT_RE
8992
DEFAULT_CONTAINER_MSG = """
@@ -1178,7 +1181,10 @@ def collect_output(
11781181
{
11791182
"location": g,
11801183
"path": fs_access.join(
1181-
builder.outdir, g[len(prefix[0]) + 1 :]
1184+
builder.outdir,
1185+
urllib.parse.unquote(
1186+
g[len(prefix[0]) + 1 :]
1187+
),
11821188
),
11831189
"basename": os.path.basename(g),
11841190
"nameroot": os.path.splitext(

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ schema-salad>=7.1,<8
66
prov==1.5.1
77
bagit==1.7.0
88
mypy-extensions
9-
psutil
9+
psutil<5.8.0
1010
typing-extensions
1111
coloredlogs
1212
pydot>=1.4.1

tests/test_path_checks.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# -*- coding: utf-8 -*-
2+
import pytest
3+
from pathlib import Path
4+
5+
from cwltool.main import main
6+
7+
from .util import needs_docker
8+
9+
script = """
10+
#!/usr/bin/env cwl-runner
11+
cwlVersion: v1.0
12+
class: CommandLineTool
13+
inputs:
14+
- id: input
15+
type: File
16+
inputBinding:
17+
position: 0
18+
- id: output
19+
type: string
20+
outputs:
21+
- id: output
22+
type: File
23+
outputBinding:
24+
glob: "$(inputs.output)"
25+
stdout: "$(inputs.output)"
26+
baseCommand: [cat]
27+
"""
28+
29+
30+
@needs_docker
31+
def test_spaces_in_input_files(tmp_path: Path) -> None:
32+
script_name = tmp_path / "script"
33+
spaces = tmp_path / "test with spaces"
34+
spaces.touch()
35+
with script_name.open(mode="w") as script_file:
36+
script_file.write(script)
37+
38+
params = [
39+
"--debug",
40+
"--outdir",
41+
str(tmp_path / "outdir"),
42+
str(script_name),
43+
"--input",
44+
str(spaces),
45+
"--output",
46+
"test.txt",
47+
]
48+
assert main(params) == 1
49+
assert main(["--relax-path-checks"] + params) == 0
50+
51+
52+
@needs_docker
53+
@pytest.mark.parametrize(
54+
"filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"]
55+
)
56+
def test_unicode_in_input_files(tmp_path: Path, filename: str) -> None:
57+
script_name = tmp_path / "script"
58+
inputfile = tmp_path / filename
59+
inputfile.touch()
60+
with script_name.open(mode="w") as script_file:
61+
script_file.write(script)
62+
63+
params = [
64+
"--debug",
65+
"--outdir",
66+
str(tmp_path / "outdir"),
67+
str(script_name),
68+
"--input",
69+
str(inputfile),
70+
"--output",
71+
"test.txt",
72+
]
73+
assert main(params) == 0
74+
75+
76+
@needs_docker
77+
@pytest.mark.parametrize(
78+
"filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"]
79+
)
80+
def test_unicode_in_output_files(tmp_path: Path, filename: str) -> None:
81+
script_name = tmp_path / "script"
82+
inputfile = tmp_path / "test"
83+
inputfile.touch()
84+
with script_name.open(mode="w") as script_file:
85+
script_file.write(script)
86+
87+
params = [
88+
"--debug",
89+
"--outdir",
90+
str(tmp_path / "outdir"),
91+
str(script_name),
92+
"--input",
93+
str(inputfile),
94+
"--output",
95+
filename,
96+
]
97+
assert main(params) == 0

tests/test_relax_path_checks.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ deps =
4444
py{36,37,38,39}-mypy: mypy==0.800
4545

4646
setenv =
47-
py{36,37,38,39}-unit: LC_ALL = C
47+
py{36,37,38,39}-unit: LC_ALL = C.UTF-8
4848

4949
commands =
5050
py{36,37,38,39}-unit: python3 -m pip install -U pip setuptools wheel

0 commit comments

Comments
 (0)