Skip to content

Commit a510e12

Browse files
ravwojdylamergify[bot]
authored andcommitted
Use Hashable instead of string for var names
1 parent d652c34 commit a510e12

File tree

5 files changed

+29
-29
lines changed

5 files changed

+29
-29
lines changed

sgkit/stats/aggregation.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def _swap(dim: Dimension) -> Dimension:
279279
return "samples" if dim == "variants" else "variants"
280280

281281

282-
def call_rate(ds: Dataset, dim: Dimension, call_genotype_mask: str) -> Dataset:
282+
def call_rate(ds: Dataset, dim: Dimension, call_genotype_mask: Hashable) -> Dataset:
283283
odim = _swap(dim)[:-1]
284284
n_called = (~ds[call_genotype_mask].any(dim="ploidy")).sum(dim=dim)
285285
return xr.Dataset(
@@ -288,7 +288,7 @@ def call_rate(ds: Dataset, dim: Dimension, call_genotype_mask: str) -> Dataset:
288288

289289

290290
def genotype_count(
291-
ds: Dataset, dim: Dimension, call_genotype: str, call_genotype_mask: str
291+
ds: Dataset, dim: Dimension, call_genotype: Hashable, call_genotype_mask: Hashable
292292
) -> Dataset:
293293
odim = _swap(dim)[:-1]
294294
M, G = ds[call_genotype_mask].any(dim="ploidy"), ds[call_genotype]
@@ -310,9 +310,9 @@ def genotype_count(
310310

311311
def allele_frequency(
312312
ds: Dataset,
313-
call_genotype: str,
314-
call_genotype_mask: str,
315-
variant_allele_count: Optional[str],
313+
call_genotype: Hashable,
314+
call_genotype_mask: Hashable,
315+
variant_allele_count: Optional[Hashable],
316316
) -> Dataset:
317317
data_vars: Dict[Hashable, Any] = {}
318318
# only compute variant allele count if not already in dataset
@@ -339,9 +339,9 @@ def allele_frequency(
339339
def variant_stats(
340340
ds: Dataset,
341341
*,
342-
call_genotype_mask: str = variables.call_genotype_mask,
343-
call_genotype: str = variables.call_genotype,
344-
variant_allele_count: Optional[str] = None,
342+
call_genotype_mask: Hashable = variables.call_genotype_mask,
343+
call_genotype: Hashable = variables.call_genotype,
344+
variant_allele_count: Optional[Hashable] = None,
345345
merge: bool = True,
346346
) -> Dataset:
347347
"""Compute quality control variant statistics from genotype calls.

sgkit/stats/association.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from dataclasses import dataclass
2-
from typing import Optional, Sequence, Union
2+
from typing import Hashable, Optional, Sequence, Union
33

44
import dask.array as da
55
import numpy as np
@@ -105,7 +105,7 @@ def linear_regression(
105105

106106

107107
def _get_loop_covariates(
108-
ds: Dataset, call_genotype: str, dosage: Optional[str] = None
108+
ds: Dataset, call_genotype: Hashable, dosage: Optional[Hashable] = None
109109
) -> Array:
110110
if dosage is None:
111111
# TODO: This should be (probably gwas-specific) allele
@@ -119,11 +119,11 @@ def _get_loop_covariates(
119119
def gwas_linear_regression(
120120
ds: Dataset,
121121
*,
122-
dosage: str,
123-
covariates: Union[str, Sequence[str]],
124-
traits: Union[str, Sequence[str]],
122+
dosage: Hashable,
123+
covariates: Union[Hashable, Sequence[Hashable]],
124+
traits: Union[Hashable, Sequence[Hashable]],
125125
add_intercept: bool = True,
126-
call_genotype: str = variables.call_genotype,
126+
call_genotype: Hashable = variables.call_genotype,
127127
merge: bool = True,
128128
) -> Dataset:
129129
"""Run linear regression to identify continuous trait associations with genetic variants.
@@ -192,9 +192,9 @@ def gwas_linear_regression(
192192
Nature Genetics 47 (3): 284–90.
193193
194194
"""
195-
if isinstance(covariates, str):
195+
if isinstance(covariates, Hashable):
196196
covariates = [covariates]
197-
if isinstance(traits, str):
197+
if isinstance(traits, Hashable):
198198
traits = [traits]
199199

200200
variables.validate(

sgkit/stats/hwe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ def hardy_weinberg_test(
127127
ds: Dataset,
128128
*,
129129
genotype_counts: Optional[Hashable] = None,
130-
call_genotype: str = variables.call_genotype,
131-
call_genotype_mask: str = variables.call_genotype_mask,
130+
call_genotype: Hashable = variables.call_genotype,
131+
call_genotype_mask: Hashable = variables.call_genotype_mask,
132132
merge: bool = True,
133133
) -> Dataset:
134134
"""Exact test for HWE as described in Wigginton et al. 2005 [1].

sgkit/stats/pc_relate.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Tuple
1+
from typing import Hashable, Tuple
22

33
import dask.array as da
44
import xarray as xr
@@ -24,8 +24,8 @@ def _impute_genotype_call_with_variant_mean(
2424

2525
def _collapse_ploidy(
2626
ds: xr.Dataset,
27-
call_genotype: str = variables.call_genotype,
28-
call_genotype_mask: str = variables.call_genotype_mask,
27+
call_genotype: Hashable = variables.call_genotype,
28+
call_genotype_mask: Hashable = variables.call_genotype_mask,
2929
) -> Tuple[xr.DataArray, xr.DataArray]:
3030
call_g_mask = ds[call_genotype_mask].any(dim="ploidy")
3131
call_g = xr.where(call_g_mask, -1, ds[call_genotype].sum(dim="ploidy")) # type: ignore[no-untyped-call]
@@ -36,9 +36,9 @@ def pc_relate(
3636
ds: xr.Dataset,
3737
*,
3838
maf: float = 0.01,
39-
call_genotype: str = variables.call_genotype,
40-
call_genotype_mask: str = variables.call_genotype_mask,
41-
sample_pcs: str = variables.sample_pcs,
39+
call_genotype: Hashable = variables.call_genotype,
40+
call_genotype_mask: Hashable = variables.call_genotype_mask,
41+
sample_pcs: Hashable = variables.sample_pcs,
4242
merge: bool = True
4343
) -> xr.Dataset:
4444
"""Compute PC-Relate as described in Conomos, et al. 2016 [1].

sgkit/stats/regenie.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -729,9 +729,9 @@ def regenie(
729729
ds: Dataset,
730730
*,
731731
dosage: str,
732-
covariates: Union[str, Sequence[str]],
733-
traits: Union[str, Sequence[str]],
734-
variant_contig: str = variables.variant_contig,
732+
covariates: Union[Hashable, Sequence[Hashable]],
733+
traits: Union[Hashable, Sequence[Hashable]],
734+
variant_contig: Hashable = variables.variant_contig,
735735
variant_block_size: Optional[Union[int, Tuple[int, ...]]] = None,
736736
sample_block_size: Optional[Union[int, Tuple[int, ...]]] = None,
737737
alphas: Optional[Sequence[float]] = None,
@@ -857,9 +857,9 @@ def regenie(
857857
858858
[2] - https://glow.readthedocs.io/en/latest/tertiary/whole-genome-regression.html
859859
"""
860-
if isinstance(covariates, str):
860+
if isinstance(covariates, Hashable):
861861
covariates = [covariates]
862-
if isinstance(traits, str):
862+
if isinstance(traits, Hashable):
863863
traits = [traits]
864864

865865
variables.validate(

0 commit comments

Comments
 (0)