@@ -44,14 +44,17 @@ def count_alleles(g: ArrayLike, _: ArrayLike, out: ArrayLike) -> None:
44
44
out [a ] += 1
45
45
46
46
47
- def count_call_alleles (ds : Dataset ) -> Dataset :
47
+ def count_call_alleles (ds : Dataset , merge : bool = True ) -> Dataset :
48
48
"""Compute per sample allele counts from genotype calls.
49
49
50
50
Parameters
51
51
----------
52
52
ds : Dataset
53
53
Genotype call dataset such as from
54
54
`sgkit.create_genotype_call_dataset`.
55
+ merge : bool
56
+ If True, merge the input dataset and the computed variables into
57
+ a single dataset, otherwise return only the computed variables.
55
58
56
59
Returns
57
60
-------
@@ -91,7 +94,7 @@ def count_call_alleles(ds: Dataset) -> Dataset:
91
94
G = da .asarray (ds ["call_genotype" ])
92
95
shape = (G .chunks [0 ], G .chunks [1 ], n_alleles )
93
96
N = da .empty (n_alleles , dtype = np .uint8 )
94
- return Dataset (
97
+ new_ds = Dataset (
95
98
{
96
99
"call_allele_count" : (
97
100
("variants" , "samples" , "alleles" ),
@@ -101,16 +104,20 @@ def count_call_alleles(ds: Dataset) -> Dataset:
101
104
)
102
105
}
103
106
)
107
+ return ds .merge (new_ds ) if merge else new_ds
104
108
105
109
106
- def count_variant_alleles (ds : Dataset ) -> Dataset :
110
+ def count_variant_alleles (ds : Dataset , merge : bool = True ) -> Dataset :
107
111
"""Compute allele count from genotype calls.
108
112
109
113
Parameters
110
114
----------
111
115
ds : Dataset
112
116
Genotype call dataset such as from
113
117
`sgkit.create_genotype_call_dataset`.
118
+ merge : bool
119
+ If True, merge the input dataset and the computed variables into
120
+ a single dataset, otherwise return only the computed variables.
114
121
115
122
Returns
116
123
-------
@@ -139,11 +146,12 @@ def count_variant_alleles(ds: Dataset) -> Dataset:
139
146
[2, 2],
140
147
[4, 0]], dtype=uint64)
141
148
"""
142
- return Dataset (
149
+ new_ds = Dataset (
143
150
{
144
151
"variant_allele_count" : (
145
152
("variants" , "alleles" ),
146
153
count_call_alleles (ds )["call_allele_count" ].sum (dim = "samples" ),
147
154
)
148
155
}
149
156
)
157
+ return ds .merge (new_ds ) if merge else new_ds
0 commit comments