Skip to content
This repository was archived by the owner on Oct 15, 2020. It is now read-only.

Commit 3748617

Browse files
committed
Use encode_array from sgkit.
Make coverage 100%. Add GH Action to run test and build.
1 parent a014507 commit 3748617

File tree

4 files changed

+22
-8
lines changed

4 files changed

+22
-8
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ dask[array]
22
dask[dataframe]
33
fsspec
44
numpy
5+
scipy
56
xarray
67
bgen_reader
7-
git+https://github.com/tomwhite/sgkit@dosages
8+
git+https://github.com/pystatgen/sgkit

sgkit_bgen/bgen_reader.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from xarray import Dataset
1313

1414
from sgkit import create_genotype_dosage_dataset
15+
from sgkit.utils import encode_array
1516

1617
PathType = Union[str, Path]
1718

@@ -88,9 +89,11 @@ def split(allele_row):
8889

8990
def __getitem__(self, idx):
9091
if not isinstance(idx, tuple):
91-
raise IndexError(f"Indexer must be tuple (received {type(idx)})")
92+
raise IndexError( # pragma: no cover
93+
f"Indexer must be tuple (received {type(idx)})"
94+
)
9295
if len(idx) != self.ndim:
93-
raise IndexError(
96+
raise IndexError( # pragma: no cover
9497
f"Indexer must be two-item tuple (received {len(idx)} slices)"
9598
)
9699

@@ -138,9 +141,9 @@ def read_bgen(
138141
path : PathType
139142
Path to BGEN file.
140143
chunks : Union[str, int, tuple], optional
141-
Chunk size for genotype (i.e. `.bed`) data, by default "auto"
144+
Chunk size for genotype data, by default "auto"
142145
lock : bool, optional
143-
Whether or not to synchronize concurrent reads of `.bed`
146+
Whether or not to synchronize concurrent reads of
144147
file blocks, by default False. This is passed through to
145148
[dask.array.from_array](https://docs.dask.org/en/latest/array-api.html#dask.array.from_array).
146149
persist : bool, optional
@@ -152,9 +155,7 @@ def read_bgen(
152155

153156
bgen_reader = BgenReader(path, persist)
154157

155-
variant_contig_names, variant_contig = np.unique(
156-
np.array(bgen_reader.contig, dtype=str), return_inverse=True
157-
)
158+
variant_contig, variant_contig_names = encode_array(bgen_reader.contig.compute())
158159
variant_contig_names = list(variant_contig_names)
159160
variant_contig = variant_contig.astype("int16")
160161

Binary file not shown.

sgkit_bgen/tests/test_bgen_reader.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,15 @@ def test_read_bgen_with_sample_file(shared_datadir):
1616
ds = read_bgen(path)
1717
# Check the sample IDs are the ones from the .sample file
1818
assert ds["sample/id"].values.tolist() == ["s0", "s1", "s2", "s3"]
19+
20+
21+
def test_read_bgen_with_no_samples(shared_datadir):
22+
path = shared_datadir / "complex.23bits.no.samples.bgen"
23+
ds = read_bgen(path)
24+
# Check the sample IDs are generated
25+
assert ds["sample/id"].values.tolist() == [
26+
"sample_0",
27+
"sample_1",
28+
"sample_2",
29+
"sample_3",
30+
]

0 commit comments

Comments
 (0)