@@ -21,107 +21,111 @@ class ArrayLikeSpec(Spec):
21
21
22
22
call_genotype = ArrayLikeSpec ("call_genotype" , kind = "i" , ndim = 3 )
23
23
"""
24
- Genotype, encoded as allele values (0 for the reference, 1 for
24
+ Call genotype. Encoded as allele values (0 for the reference, 1 for
25
25
the first allele, 2 for the second allele), or -1 to indicate a
26
26
missing value.
27
27
"""
28
28
call_genotype_mask = ArrayLikeSpec ("call_genotype_mask" , kind = "b" , ndim = 3 )
29
+ """TODO"""
29
30
variant_contig = ArrayLikeSpec ("variant_contig" , kind = "i" , ndim = 1 )
30
- """The (index of the) contig for each variant"""
31
+ """The (index of the) contig for each variant. """
31
32
variant_position = ArrayLikeSpec ("variant_position" , kind = "i" , ndim = 1 )
32
- """The reference position of the variant"""
33
+ """The reference position of the variant. """
33
34
variant_allele = ArrayLikeSpec ("variant_allele" , kind = {"S" , "O" }, ndim = 2 )
34
- """The possible alleles for the variant"""
35
+ """The possible alleles for the variant. """
35
36
sample_id = ArrayLikeSpec ("sample_id" , kind = {"U" , "O" }, ndim = 1 )
36
- """The unique identifier of the sample"""
37
+ """The unique identifier of the sample. """
37
38
call_genotype_phased = ArrayLikeSpec ("call_genotype_phased" , kind = "b" , ndim = 2 )
38
39
"""
39
- A flag for each call indicating if it is phased or not. If
40
- omitted all calls are unphased.
40
+ A flag for each call indicating if it is phased or not. If omitted
41
+ all calls are unphased.
41
42
"""
42
43
variant_id = ArrayLikeSpec ("variant_id" , kind = "U" , ndim = 1 )
43
- """The unique identifier of the variant"""
44
+ """The unique identifier of the variant. """
44
45
call_dosage = ArrayLikeSpec ("call_dosage" , kind = "f" , ndim = 2 )
45
- """Dosages, encoded as floats, with NaN indicating a missing value"""
46
+ """Dosages, encoded as floats, with NaN indicating a missing value. """
46
47
call_dosage_mask = ArrayLikeSpec ("call_dosage_mask" , kind = "b" , ndim = 2 )
48
+ """TODO"""
47
49
call_genotype_probability = ArrayLikeSpec ("call_genotype_probability" , kind = "f" , ndim = 3 )
50
+ """TODO"""
48
51
call_genotype_probability_mask = ArrayLikeSpec (
49
52
"call_genotype_probability_mask" , kind = "b" , ndim = 3
50
53
)
54
+ """TODO"""
51
55
genotype_counts = ArrayLikeSpec ("genotype_counts" , ndim = 2 , kind = "i" )
52
56
"""
53
- Genotype counts, must correspond to an (`N`, 3) array where `N` is equal
57
+ Genotype counts. Must correspond to an (`N`, 3) array where `N` is equal
54
58
to the number of variants and the 3 columns contain heterozygous,
55
59
homozygous reference, and homozygous alternate counts (in that order)
56
60
across all samples for a variant.
57
61
"""
58
62
call_allele_count = ArrayLikeSpec ("call_allele_count" , ndim = 3 , kind = "u" )
59
63
"""
60
- Allele counts with shape (variants, samples, alleles) and values
64
+ Allele counts. With shape (variants, samples, alleles) and values
61
65
corresponding to the number of non-missing occurrences of each allele.
62
66
"""
63
67
variant_allele_count = ArrayLikeSpec ("variant_allele_count" , ndim = 2 , kind = "u" )
64
68
"""
65
- Variant allele counts with shape (variants, alleles) and values
69
+ Variant allele counts. With shape (variants, alleles) and values
66
70
corresponding to the number of non-missing occurrences of each allele.
67
71
"""
68
72
variant_hwe_p_value = ArrayLikeSpec ("variant_hwe_p_value" , kind = "f" )
69
- """P values from HWE test for each variant as float in [0, 1]"""
73
+ """P values from HWE test for each variant as float in [0, 1]. """
70
74
variant_beta = ArrayLikeSpec ("variant_beta" )
71
- """Beta values associated with each variant and trait"""
75
+ """Beta values associated with each variant and trait. """
72
76
variant_t_value = ArrayLikeSpec ("variant_t_value" )
73
- """T statistics for each beta"""
77
+ """T statistics for each beta. """
74
78
variant_p_value = ArrayLikeSpec ("variant_p_value" , kind = "f" )
75
- """P values as float in [0, 1]"""
79
+ """P values as float in [0, 1]. """
76
80
covariates = ArrayLikeSpec ("covariates" , ndim = {1 , 2 })
77
81
"""
78
- Covariate variable names, must correspond to 1 or 2D dataset
82
+ Covariate variable names. Must correspond to 1 or 2D dataset
79
83
variables of shape (samples[, covariates]). All covariate arrays
80
84
will be concatenated along the second axis (columns).
81
85
"""
82
86
traits = ArrayLikeSpec ("traits" , ndim = {1 , 2 })
83
87
"""
84
- Trait (e.g. phenotype) variable names, must all be continuous and
88
+ Trait (for example phenotype) variable names. Must all be continuous and
85
89
correspond to 1 or 2D dataset variables of shape (samples[, traits]).
86
90
2D trait arrays will be assumed to contain separate traits within columns
87
91
and concatenated to any 1D traits along the second axis (columns).
88
92
"""
89
93
dosage = ArrayLikeSpec ("dosage" )
90
94
"""
91
- Dosage variable name where "dosage" array can contain represent
95
+ Dosage variable name. Where "dosage" array can contain represent
92
96
one of several possible quantities, e.g.:
93
- - Alternate allele counts
94
- - Recessive or dominant allele encodings
95
- - True dosages as computed from imputed or probabilistic variant calls
96
- - Any other custom encoding in a user-defined variable
97
+ - Alternate allele counts
98
+ - Recessive or dominant allele encodings
99
+ - True dosages as computed from imputed or probabilistic variant calls
100
+ - Any other custom encoding in a user-defined variable
97
101
"""
98
102
sample_pcs = ArrayLikeSpec ("sample_pcs" , ndim = 2 , kind = "f" )
99
- """Sample PCs. Dimensions: (PCxS)"""
103
+ """Sample PCs (PCxS). """
100
104
pc_relate_phi = ArrayLikeSpec ("pc_relate_phi" , ndim = 2 , kind = "f" )
101
- """PC Relate kinship coefficient matrix"""
105
+ """PC Relate kinship coefficient matrix. """
102
106
base_prediction = ArrayLikeSpec ("base_prediction" , ndim = 4 , kind = "f" )
103
107
"""
104
- REGENIE's base prediction: (blocks, alphas, samples, outcomes): Stage 1
108
+ REGENIE's base prediction (blocks, alphas, samples, outcomes). Stage 1
105
109
predictions from ridge regression reduction.
106
110
"""
107
111
meta_prediction = ArrayLikeSpec ("meta_prediction" , ndim = 2 , kind = "f" )
108
112
"""
109
- REGENIE's meta_prediction: (samples, outcomes): Stage 2 predictions from
113
+ REGENIE's meta_prediction (samples, outcomes). Stage 2 predictions from
110
114
the best meta estimator trained on the out-of-sample Stage 1 predictions.
111
115
"""
112
116
loco_prediction = ArrayLikeSpec ("loco_prediction" , ndim = 3 , kind = "f" )
113
117
"""
114
- REGENIE's loco_prediction: (contigs, samples, outcomes): LOCO predictions
118
+ REGENIE's loco_prediction (contigs, samples, outcomes). LOCO predictions
115
119
resulting from Stage 2 predictions ignoring effects for variant blocks on
116
120
held out contigs. This will be absent if the data provided does not contain
117
121
at least 2 contigs.
118
122
"""
119
123
variant_n_called = ArrayLikeSpec ("variant_n_called" , ndim = 1 , kind = "i" )
120
124
"""The number of samples with called genotypes."""
121
125
variant_call_rate = ArrayLikeSpec ("variant_call_rate" , ndim = 1 , kind = "f" )
122
- """The number of samples with heterozygous calls"""
126
+ """The number of samples with heterozygous calls. """
123
127
variant_n_het = ArrayLikeSpec ("variant_n_het" , ndim = 1 , kind = "i" )
124
- """The number of samples with heterozygous calls"""
128
+ """The number of samples with heterozygous calls. """
125
129
variant_n_hom_ref = ArrayLikeSpec ("variant_n_hom_ref" , ndim = 1 , kind = "i" )
126
130
"""The number of samples with homozygous reference calls."""
127
131
variant_n_hom_alt = ArrayLikeSpec ("variant_n_hom_alt" , ndim = 1 , kind = "i" )
@@ -150,7 +154,7 @@ def register_variable(cls, spec: ArrayLikeSpec) -> None:
150
154
151
155
@classmethod
152
156
@overload
153
- def validate (
157
+ def _validate (
154
158
cls ,
155
159
xr_dataset : xr .Dataset ,
156
160
* specs : Mapping [Hashable , ArrayLikeSpec ],
@@ -163,7 +167,7 @@ def validate(
163
167
164
168
@classmethod
165
169
@overload
166
- def validate (cls , xr_dataset : xr .Dataset , * specs : ArrayLikeSpec ) -> xr .Dataset :
170
+ def _validate (cls , xr_dataset : xr .Dataset , * specs : ArrayLikeSpec ) -> xr .Dataset :
167
171
"""
168
172
Validate that xr_dataset contains array(s) of interest with default
169
173
variable name(s).
@@ -172,15 +176,15 @@ def validate(cls, xr_dataset: xr.Dataset, *specs: ArrayLikeSpec) -> xr.Dataset:
172
176
173
177
@classmethod
174
178
@overload
175
- def validate (cls , xr_dataset : xr .Dataset , * specs : Hashable ) -> xr .Dataset :
179
+ def _validate (cls , xr_dataset : xr .Dataset , * specs : Hashable ) -> xr .Dataset :
176
180
"""
177
181
Validate that xr_dataset contains array(s) of interest with variable
178
182
name(s). Variable must be registered in `SgkitVariables.registered_variables`.
179
183
"""
180
184
...
181
185
182
186
@classmethod
183
- def validate (
187
+ def _validate (
184
188
cls ,
185
189
xr_dataset : xr .Dataset ,
186
190
* specs : Union [ArrayLikeSpec , Mapping [Hashable , ArrayLikeSpec ], Hashable ],
@@ -217,5 +221,5 @@ def _check_field(
217
221
) from e
218
222
219
223
220
- validate = SgkitVariables .validate
221
- """Shorthand for SgkitVariables.validate"""
224
+ validate = SgkitVariables ._validate
225
+ """Shortcut for the SgkitVariables.validate"""
0 commit comments