Skip to content

Commit 722e50f

Browse files
committed
Add sgkit.variables to the doc
1 parent 54b56f1 commit 722e50f

File tree

5 files changed

+108
-36
lines changed

5 files changed

+108
-36
lines changed

docs/api.rst

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,48 @@ Methods
3434
Tajimas_D
3535
pc_relate
3636

37+
Variables
38+
=========
39+
40+
.. autosummary::
41+
:toctree: generated/
42+
43+
variables.call_genotype
44+
variables.call_genotype_mask
45+
variables.variant_contig
46+
variables.variant_position
47+
variables.variant_allele
48+
variables.sample_id
49+
variables.call_genotype_phased
50+
variables.variant_id
51+
variables.call_dosage
52+
variables.call_dosage_mask
53+
variables.call_genotype_probability
54+
variables.call_genotype_probability_mask
55+
variables.genotype_counts
56+
variables.call_allele_count
57+
variables.variant_allele_count
58+
variables.variant_hwe_p_value
59+
variables.variant_beta
60+
variables.variant_t_value
61+
variables.variant_p_value
62+
variables.covariates
63+
variables.traits
64+
variables.dosage
65+
variables.sample_pcs
66+
variables.pc_relate_phi
67+
variables.base_prediction
68+
variables.meta_prediction
69+
variables.loco_prediction
70+
variables.variant_n_called
71+
variables.variant_call_rate
72+
variables.variant_n_het
73+
variables.variant_n_hom_ref
74+
variables.variant_n_hom_alt
75+
variables.variant_n_non_ref
76+
variables.variant_allele_total
77+
variables.variant_allele_frequency
78+
3779
Utilities
3880
=========
3981

docs/conf.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
# -- Path setup --------------------------------------------------------------
88

9+
import logging as pylogging
10+
911
# If extensions (or modules to document with autodoc) are in another directory,
1012
# add these directories to sys.path here. If the directory is relative to the
1113
# documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -15,6 +17,7 @@
1517
from pathlib import Path
1618

1719
import xarray
20+
from sphinx.util import logging
1821

1922
sys.path.insert(0, os.path.abspath(".."))
2023

@@ -50,6 +53,26 @@
5053
*[p.stem for p in (HERE / "extensions").glob("*.py")],
5154
]
5255

56+
57+
# Workaround https://github.com/agronholm/sphinx-autodoc-typehints/issues/123
58+
# When this https://github.com/agronholm/sphinx-autodoc-typehints/pull/153
59+
# gets merged, we can remove this
60+
class FilterForIssue123(pylogging.Filter):
61+
def filter(self, record: pylogging.LogRecord) -> bool:
62+
msg = record.getMessage()
63+
return not (
64+
msg.startswith("Cannot treat a function")
65+
and any(
66+
s in msg
67+
for s in ["sgkit.variables.Spec", "sgkit.variables.ArrayLikeSpec"]
68+
)
69+
)
70+
71+
72+
logging.getLogger("sphinx_autodoc_typehints").logger.addFilter(FilterForIssue123())
73+
# End of workaround
74+
75+
5376
# Add any paths that contain templates here, relative to this directory.
5477
templates_path = ["_templates"]
5578

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ ignore_missing_imports = True
9292
ignore_missing_imports = True
9393
[mypy-sklearn.*]
9494
ignore_missing_imports = True
95+
[mypy-sphinx.*]
96+
ignore_missing_imports = True
9597
[mypy-sgkit.*]
9698
allow_redefinition = True
9799
[mypy-sgkit.tests.*]

sgkit/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,5 @@
3535
"Fst",
3636
"Tajimas_D",
3737
"pc_relate",
38+
"variables",
3839
]

sgkit/variables.py

Lines changed: 40 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -21,107 +21,111 @@ class ArrayLikeSpec(Spec):
2121

2222
call_genotype = ArrayLikeSpec("call_genotype", kind="i", ndim=3)
2323
"""
24-
Genotype, encoded as allele values (0 for the reference, 1 for
24+
Call genotype. Encoded as allele values (0 for the reference, 1 for
2525
the first allele, 2 for the second allele), or -1 to indicate a
2626
missing value.
2727
"""
2828
call_genotype_mask = ArrayLikeSpec("call_genotype_mask", kind="b", ndim=3)
29+
"""TODO"""
2930
variant_contig = ArrayLikeSpec("variant_contig", kind="i", ndim=1)
30-
"""The (index of the) contig for each variant"""
31+
"""The (index of the) contig for each variant."""
3132
variant_position = ArrayLikeSpec("variant_position", kind="i", ndim=1)
32-
"""The reference position of the variant"""
33+
"""The reference position of the variant."""
3334
variant_allele = ArrayLikeSpec("variant_allele", kind={"S", "O"}, ndim=2)
34-
"""The possible alleles for the variant"""
35+
"""The possible alleles for the variant."""
3536
sample_id = ArrayLikeSpec("sample_id", kind={"U", "O"}, ndim=1)
36-
"""The unique identifier of the sample"""
37+
"""The unique identifier of the sample."""
3738
call_genotype_phased = ArrayLikeSpec("call_genotype_phased", kind="b", ndim=2)
3839
"""
39-
A flag for each call indicating if it is phased or not. If
40-
omitted all calls are unphased.
40+
A flag for each call indicating if it is phased or not. If omitted
41+
all calls are unphased.
4142
"""
4243
variant_id = ArrayLikeSpec("variant_id", kind="U", ndim=1)
43-
"""The unique identifier of the variant"""
44+
"""The unique identifier of the variant."""
4445
call_dosage = ArrayLikeSpec("call_dosage", kind="f", ndim=2)
45-
"""Dosages, encoded as floats, with NaN indicating a missing value"""
46+
"""Dosages, encoded as floats, with NaN indicating a missing value."""
4647
call_dosage_mask = ArrayLikeSpec("call_dosage_mask", kind="b", ndim=2)
48+
"""TODO"""
4749
call_genotype_probability = ArrayLikeSpec("call_genotype_probability", kind="f", ndim=3)
50+
"""TODO"""
4851
call_genotype_probability_mask = ArrayLikeSpec(
4952
"call_genotype_probability_mask", kind="b", ndim=3
5053
)
54+
"""TODO"""
5155
genotype_counts = ArrayLikeSpec("genotype_counts", ndim=2, kind="i")
5256
"""
53-
Genotype counts, must correspond to an (`N`, 3) array where `N` is equal
57+
Genotype counts. Must correspond to an (`N`, 3) array where `N` is equal
5458
to the number of variants and the 3 columns contain heterozygous,
5559
homozygous reference, and homozygous alternate counts (in that order)
5660
across all samples for a variant.
5761
"""
5862
call_allele_count = ArrayLikeSpec("call_allele_count", ndim=3, kind="u")
5963
"""
60-
Allele counts with shape (variants, samples, alleles) and values
64+
Allele counts. With shape (variants, samples, alleles) and values
6165
corresponding to the number of non-missing occurrences of each allele.
6266
"""
6367
variant_allele_count = ArrayLikeSpec("variant_allele_count", ndim=2, kind="u")
6468
"""
65-
Variant allele counts with shape (variants, alleles) and values
69+
Variant allele counts. With shape (variants, alleles) and values
6670
corresponding to the number of non-missing occurrences of each allele.
6771
"""
6872
variant_hwe_p_value = ArrayLikeSpec("variant_hwe_p_value", kind="f")
69-
"""P values from HWE test for each variant as float in [0, 1]"""
73+
"""P values from HWE test for each variant as float in [0, 1]."""
7074
variant_beta = ArrayLikeSpec("variant_beta")
71-
"""Beta values associated with each variant and trait"""
75+
"""Beta values associated with each variant and trait."""
7276
variant_t_value = ArrayLikeSpec("variant_t_value")
73-
"""T statistics for each beta"""
77+
"""T statistics for each beta."""
7478
variant_p_value = ArrayLikeSpec("variant_p_value", kind="f")
75-
"""P values as float in [0, 1]"""
79+
"""P values as float in [0, 1]."""
7680
covariates = ArrayLikeSpec("covariates", ndim={1, 2})
7781
"""
78-
Covariate variable names, must correspond to 1 or 2D dataset
82+
Covariate variable names. Must correspond to 1 or 2D dataset
7983
variables of shape (samples[, covariates]). All covariate arrays
8084
will be concatenated along the second axis (columns).
8185
"""
8286
traits = ArrayLikeSpec("traits", ndim={1, 2})
8387
"""
84-
Trait (e.g. phenotype) variable names, must all be continuous and
88+
Trait (for example phenotype) variable names. Must all be continuous and
8589
correspond to 1 or 2D dataset variables of shape (samples[, traits]).
8690
2D trait arrays will be assumed to contain separate traits within columns
8791
and concatenated to any 1D traits along the second axis (columns).
8892
"""
8993
dosage = ArrayLikeSpec("dosage")
9094
"""
91-
Dosage variable name where "dosage" array can contain represent
95+
Dosage variable name. Where "dosage" array can contain represent
9296
one of several possible quantities, e.g.:
93-
- Alternate allele counts
94-
- Recessive or dominant allele encodings
95-
- True dosages as computed from imputed or probabilistic variant calls
96-
- Any other custom encoding in a user-defined variable
97+
- Alternate allele counts
98+
- Recessive or dominant allele encodings
99+
- True dosages as computed from imputed or probabilistic variant calls
100+
- Any other custom encoding in a user-defined variable
97101
"""
98102
sample_pcs = ArrayLikeSpec("sample_pcs", ndim=2, kind="f")
99-
"""Sample PCs. Dimensions: (PCxS)"""
103+
"""Sample PCs (PCxS)."""
100104
pc_relate_phi = ArrayLikeSpec("pc_relate_phi", ndim=2, kind="f")
101-
"""PC Relate kinship coefficient matrix"""
105+
"""PC Relate kinship coefficient matrix."""
102106
base_prediction = ArrayLikeSpec("base_prediction", ndim=4, kind="f")
103107
"""
104-
REGENIE's base prediction: (blocks, alphas, samples, outcomes): Stage 1
108+
REGENIE's base prediction (blocks, alphas, samples, outcomes). Stage 1
105109
predictions from ridge regression reduction.
106110
"""
107111
meta_prediction = ArrayLikeSpec("meta_prediction", ndim=2, kind="f")
108112
"""
109-
REGENIE's meta_prediction: (samples, outcomes): Stage 2 predictions from
113+
REGENIE's meta_prediction (samples, outcomes). Stage 2 predictions from
110114
the best meta estimator trained on the out-of-sample Stage 1 predictions.
111115
"""
112116
loco_prediction = ArrayLikeSpec("loco_prediction", ndim=3, kind="f")
113117
"""
114-
REGENIE's loco_prediction: (contigs, samples, outcomes): LOCO predictions
118+
REGENIE's loco_prediction (contigs, samples, outcomes). LOCO predictions
115119
resulting from Stage 2 predictions ignoring effects for variant blocks on
116120
held out contigs. This will be absent if the data provided does not contain
117121
at least 2 contigs.
118122
"""
119123
variant_n_called = ArrayLikeSpec("variant_n_called", ndim=1, kind="i")
120124
"""The number of samples with called genotypes."""
121125
variant_call_rate = ArrayLikeSpec("variant_call_rate", ndim=1, kind="f")
122-
"""The number of samples with heterozygous calls"""
126+
"""The number of samples with heterozygous calls."""
123127
variant_n_het = ArrayLikeSpec("variant_n_het", ndim=1, kind="i")
124-
"""The number of samples with heterozygous calls"""
128+
"""The number of samples with heterozygous calls."""
125129
variant_n_hom_ref = ArrayLikeSpec("variant_n_hom_ref", ndim=1, kind="i")
126130
"""The number of samples with homozygous reference calls."""
127131
variant_n_hom_alt = ArrayLikeSpec("variant_n_hom_alt", ndim=1, kind="i")
@@ -150,7 +154,7 @@ def register_variable(cls, spec: ArrayLikeSpec) -> None:
150154

151155
@classmethod
152156
@overload
153-
def validate(
157+
def _validate(
154158
cls,
155159
xr_dataset: xr.Dataset,
156160
*specs: Mapping[Hashable, ArrayLikeSpec],
@@ -163,7 +167,7 @@ def validate(
163167

164168
@classmethod
165169
@overload
166-
def validate(cls, xr_dataset: xr.Dataset, *specs: ArrayLikeSpec) -> xr.Dataset:
170+
def _validate(cls, xr_dataset: xr.Dataset, *specs: ArrayLikeSpec) -> xr.Dataset:
167171
"""
168172
Validate that xr_dataset contains array(s) of interest with default
169173
variable name(s).
@@ -172,15 +176,15 @@ def validate(cls, xr_dataset: xr.Dataset, *specs: ArrayLikeSpec) -> xr.Dataset:
172176

173177
@classmethod
174178
@overload
175-
def validate(cls, xr_dataset: xr.Dataset, *specs: Hashable) -> xr.Dataset:
179+
def _validate(cls, xr_dataset: xr.Dataset, *specs: Hashable) -> xr.Dataset:
176180
"""
177181
Validate that xr_dataset contains array(s) of interest with variable
178182
name(s). Variable must be registered in `SgkitVariables.registered_variables`.
179183
"""
180184
...
181185

182186
@classmethod
183-
def validate(
187+
def _validate(
184188
cls,
185189
xr_dataset: xr.Dataset,
186190
*specs: Union[ArrayLikeSpec, Mapping[Hashable, ArrayLikeSpec], Hashable],
@@ -217,5 +221,5 @@ def _check_field(
217221
) from e
218222

219223

220-
validate = SgkitVariables.validate
221-
"""Shorthand for SgkitVariables.validate"""
224+
validate = SgkitVariables._validate
225+
"""Shortcut for the SgkitVariables.validate"""

0 commit comments

Comments
 (0)