Skip to content

Commit 66c5a1c

Browse files
gegnewzbjornson
authored andcommitted
feat: Add support for transfers from S3 to FcsFile.create
1 parent 6e94832 commit 66c5a1c

File tree

6 files changed

+162
-18
lines changed

6 files changed

+162
-18
lines changed

.github/workflows/push.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ jobs:
4646
pip install pytest
4747
pip install pytest-cov
4848
python -m pytest --cov=./ --cov-report=xml
49+
env:
50+
S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY}}
51+
S3_SECRET_KEY: ${{ secrets.S3_SECRET_KEY}}
4952
- name: Upload coverage to Codecov
5053
uses: codecov/codecov-action@v1
5154
with:

cellengine/resources/fcs_file.py

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from __future__ import annotations
2+
from cellengine.utils.helpers import is_valid_id
23
from cellengine.utils.parse_fcs_file import parse_fcs_file
34
from cellengine.utils.dataclass_mixin import DataClassMixin, ReadOnly
45
from dataclasses import dataclass, field
@@ -102,25 +103,65 @@ def upload(cls, experiment_id: str, filepath: str) -> FcsFile:
102103
def create(
103104
cls,
104105
experiment_id: str,
105-
fcs_files: List[str],
106+
fcs_files: Union[str, List[str], Dict[str, str]],
106107
filename: str = None,
107108
add_file_number: bool = False,
108109
add_event_number: bool = False,
109110
pre_subsample_n: int = None,
110111
pre_subsample_p: float = None,
111112
seed: int = None,
112113
) -> FcsFile:
113-
"""Creates an FCS file by copying, concatenating and/or subsampling
114-
existing file(s) from this or other experiments.
115-
116-
This endpoint can be used to import files from other experiments.
114+
"""Creates an FCS file by copying, concatenating and/or
115+
subsampling existing file(s) from this or other experiments, or by
116+
importing from an S3-compatible service. This endpoint can be used to
117+
import files from other experiments.
118+
119+
When concatenating and subsampling at the same time, subsampling is
120+
applied to each file before concatenating.
121+
122+
If addFileNumber is true, a file number column (channel) will be added to the
123+
output file indicating which file each event (cell) came from. The values in
124+
this column have a uniform random spread (±0.25 of the integer value) to ease
125+
visualization. While this column can be useful for analysis, it will cause the
126+
experiment to have FCS files with different panels unless all FCS files that
127+
have not been concatenated are deleted.
128+
129+
During concatenation, any FCS header parameters that do not match
130+
between files will be removed, with some exceptions:
131+
132+
- $BTIM (clock time at beginning of acquisition) and $DATE will be
133+
set to the earliest value among the input files.
134+
- $ETIM (clock time at end of acquisition) will be set to the latest value
135+
among the input files.
136+
- $PnR (range for parameter n) will be set to
137+
the highest value among the input files.
138+
139+
All channels present in the first FCS file in the fcsFiles parameter
140+
must also be present in the other FCS files.
141+
142+
When importing from an S3-compatible service, be aware of the
143+
following:
144+
145+
- Only a single file can be imported at a time.
146+
- The host property must include the bucket and region as
147+
applicable. For example, for AWS, this would look like
148+
mybucket.s3.us-east-2.amazonaws.com.
149+
- The path property must specify the full path to the object, e.g.
150+
/Study001/Specimen01.fcs.
151+
- Importing private S3 objects requires an accessKey and a
152+
secretKey for a user with appropriate permissions. For AWS,
153+
GetObject is required.
154+
- Importing objects may incur fees from the S3 service provider.
117155
118156
Args:
119157
experiment_id: ID of the experiment to which the file belongs
120158
fcs_files: ID of file or list of IDs of files or objects to process.
121159
If more than one file is provided, they will be concatenated in
122160
order. To import files from other experiments, pass a list of dicts
123-
with _id and experimentId properties.
161+
with _id and experimentId properties. To import a file from an
162+
S3-compatible service, provide a Dict with keys "host" and
163+
"path"; if the S3 object is private, additionally provide
164+
"access_key" and "secret_key".
124165
filename (optional): Rename the uploaded file.
125166
add_file_number (optional): If
126167
concatenating files, adds a file number channel to the
@@ -142,10 +183,17 @@ def create(
142183
"""
143184

144185
def _parse_fcs_file_args(args):
145-
if type(args) is list:
186+
if type(args) is list and all(is_valid_id(arg) for arg in args):
146187
return args
147-
else:
188+
elif type(args) is dict:
189+
if {"host", "path"} <= args.keys():
190+
return [args]
191+
if {"_id", "experiment_id"} <= args.keys():
192+
return [args]
193+
elif type(args) is str and is_valid_id(args):
148194
return [args]
195+
else:
196+
raise ValueError("Invalid parameters for 'fcs_file'.")
149197

150198
body = {"fcsFiles": _parse_fcs_file_args(fcs_files), "filename": filename}
151199
optional_params = {
@@ -179,7 +227,7 @@ def plot(
179227
population_id: str = None,
180228
**kwargs,
181229
) -> Plot:
182-
"""Buid a plot for an FcsFile.
230+
"""Build a plot for an FcsFile.
183231
184232
See [`Plot.get`][cellengine.resources.plot.Plot.get] for more information.
185233
"""

tests/integration/test_integration.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import pytest
23
import pandas
34

@@ -18,7 +19,9 @@
1819

1920
@pytest.fixture(scope="module")
2021
def client():
21-
return cellengine.APIClient("gegnew", "testpass123")
22+
username = os.environ.get("CELLENGINE_USERNAME", "gegnew")
23+
password = os.environ.get("CELLENGINE_PASSWORD", "testpass1")
24+
return cellengine.APIClient(username=username, password=password)
2225

2326

2427
@pytest.fixture(scope="module")
@@ -255,3 +258,20 @@ def test_experiment_populations(setup_experiment, client):
255258
# DELETE
256259
complex_pop.delete()
257260
assert "complex pop" not in [p.name for p in experiment.populations]
261+
262+
263+
def test_create_new_fcsfile_from_s3(setup_experiment, client):
264+
experiment = client.get_experiment(name="new_experiment")
265+
s3_dict = {
266+
"host": "ce-test-s3-a.s3.us-east-2.amazonaws.com",
267+
"path": "/Specimen_001_A6_A06.fcs",
268+
"access_key": os.environ.get("S3_ACCESS_KEY"),
269+
"secret_key": os.environ.get("S3_SECRET_KEY"),
270+
}
271+
272+
file = FcsFile.create(
273+
experiment._id,
274+
s3_dict,
275+
"new name",
276+
)
277+
assert file.size == 22625

tests/unit/resources/test_fcs_file_parse_args.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,37 @@ def test_should_create_fcs_file(ENDPOINT_BASE, client, fcs_files):
3838

3939

4040
params = [
41+
# (request, expected response)
4142
(FCSFILE_ID, [FCSFILE_ID]),
4243
([FCSFILE_ID], [FCSFILE_ID]),
4344
(
44-
["fcs_file_id_1", "fcs_file_id_2", "fcs_file_id_3"],
45-
["fcs_file_id_1", "fcs_file_id_2", "fcs_file_id_3"],
45+
[
46+
"5d64abe2ca9df61349ed8e7a",
47+
"5d64abe2ca9df61349ed8e7b",
48+
"5d64abe2ca9df61349ed8e7c",
49+
],
50+
[
51+
"5d64abe2ca9df61349ed8e7a",
52+
"5d64abe2ca9df61349ed8e7b",
53+
"5d64abe2ca9df61349ed8e7c",
54+
],
55+
),
56+
(
57+
{"experiment_id": EXP_ID, "_id": FCSFILE_ID},
58+
[{"experimentId": EXP_ID, "_id": FCSFILE_ID}],
59+
),
60+
(
61+
{
62+
"host": "ce-test-s3-a.s3.us-east-2.amazonaws.com",
63+
"path": "/Specimen_001_A6_A06.fcs",
64+
},
65+
[
66+
{
67+
"host": "ce-test-s3-a.s3.us-east-2.amazonaws.com",
68+
"path": "/Specimen_001_A6_A06.fcs",
69+
}
70+
],
4671
),
47-
({EXP_ID: FCSFILE_ID}, [{EXP_ID: FCSFILE_ID}]),
48-
([{EXP_ID: FCSFILE_ID}], [{EXP_ID: FCSFILE_ID}]),
4972
]
5073

5174

tests/unit/resources/test_fcsfile.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from cellengine.resources.fcs_file import FcsFile
99
from cellengine.resources.compensation import Compensation
10+
from cellengine.utils.helpers import to_camel_case
1011

1112

1213
EXP_ID = "5d38a6f79fae87499999a74b"
@@ -159,3 +160,52 @@ def test_get_events_save_kwargs(ENDPOINT_BASE, client, fcs_files):
159160

160161
# Then:
161162
assert file._events_kwargs == {"compensatedQ": False, "seed": 10}
163+
164+
165+
@responses.activate
166+
def test_create_from_s3(ENDPOINT_BASE, client, fcs_files):
167+
responses.add(
168+
responses.POST,
169+
ENDPOINT_BASE + f"/experiments/{EXP_ID}/fcsfiles",
170+
json=fcs_files[1],
171+
)
172+
173+
s3_dict = {
174+
"host": "ce-test-s3-a.s3.us-east-2.amazonaws.com",
175+
"path": "/Specimen_001_A6_A06.fcs",
176+
"access_key": os.environ.get("S3_ACCESS_KEY"),
177+
"secret_key": os.environ.get("S3_SECRET_KEY"),
178+
}
179+
180+
FcsFile.create(
181+
EXP_ID,
182+
s3_dict,
183+
"new name",
184+
)
185+
payload = json.loads(responses.calls[0].request.body)["fcsFiles"][0] # type: ignore
186+
187+
assert {"host", "path", "accessKey", "secretKey"} <= payload.keys()
188+
assert payload == {to_camel_case(k): v for k, v in s3_dict.items()}
189+
190+
191+
@responses.activate
192+
def test_create_from_another_experiment(ENDPOINT_BASE, client, fcs_files):
193+
responses.add(
194+
responses.POST,
195+
ENDPOINT_BASE + f"/experiments/{EXP_ID}/fcsfiles",
196+
json=fcs_files[1],
197+
)
198+
199+
file_dict = {
200+
"_id": fcs_files[1]["_id"],
201+
"experiment_id": EXP_ID,
202+
}
203+
204+
FcsFile.create(
205+
EXP_ID,
206+
file_dict,
207+
"new name",
208+
)
209+
payload = json.loads(responses.calls[0].request.body) # type: ignore
210+
211+
assert {"_id", "experimentId"} <= payload["fcsFiles"][0].keys()

tests/unit/utils/test_helpers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def test_alter_keys_converts_dict_to_camel_case():
99
"fcsFile": "baz",
1010
}
1111
r = alter_keys(d, to_camel_case)
12-
assert {"_id", "experimentId", "fcsFile"} <= r.keys()
12+
assert {"_id", "experimentId", "fcsFile"} == r.keys()
1313

1414

1515
def test_alter_keys_converts_nested_dict_to_camel_case():
@@ -23,8 +23,8 @@ def test_alter_keys_converts_nested_dict_to_camel_case():
2323
},
2424
}
2525
r = alter_keys(d, to_camel_case)
26-
assert {"_id", "experimentId", "fcsFile"} <= r.keys()
27-
assert {"_id", "experimentId", "fcsFile"} <= r["fcsFile"].keys()
26+
assert {"_id", "experimentId", "fcsFile"} == r.keys()
27+
assert {"_id", "experimentId", "fcsFile"} == r["fcsFile"].keys()
2828

2929

3030
def test_alter_keys_converts_list_of_dicts_to_camel_case():
@@ -41,7 +41,7 @@ def test_alter_keys_converts_list_of_dicts_to_camel_case():
4141
},
4242
]
4343
r = alter_keys(d, to_camel_case)
44-
assert {"_id", "experimentId", "fcsFile"} <= r[0].keys()
44+
assert {"_id", "experimentId", "fcsFile"} == r[0].keys()
4545

4646

4747
params = [

0 commit comments

Comments
 (0)