Skip to content

Commit a7cb326

Browse files
tomwhitemergify[bot]
authored andcommitted
Remove references to renamed window function.
1 parent d9538b0 commit a7cb326

File tree

8 files changed

+37
-42
lines changed

8 files changed

+37
-42
lines changed

docs/api.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ Utilities
107107
infer_sample_ploidy
108108
infer_variant_ploidy
109109
simulate_genotype_call_dataset
110-
window
111110
window_by_position
112111
window_by_variant
113112

docs/getting_started.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ example shows how to give cohorts names.
279279
.. ipython:: python
280280
:okwarning:
281281
282-
ds = sg.window(ds, size=20)
282+
ds = sg.window_by_variant(ds, size=20)
283283
ds = sg.Fst(ds)
284284
285285
cohort_names = ["Africa", "Asia", "Europe"]
@@ -320,7 +320,7 @@ Xarray and Pandas operations in a single pipeline:
320320
# Apply filter to include variants present across > 80% of samples
321321
.pipe(lambda ds: ds.sel(variants=ds.variant_call_rate > .8))
322322
# Create windows of size 20 variants
323-
.pipe(lambda ds: sg.window(ds, size=20))
323+
.pipe(lambda ds: sg.window_by_variant(ds, size=20))
324324
# Assign a "cohort" variable that splits samples into two groups
325325
.assign(sample_cohort=np.repeat([0, 1], ds.dims['samples'] // 2))
326326
# Compute Fst between the groups

sgkit/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from .stats.preprocessing import filter_partial_calls
4242
from .stats.regenie import regenie
4343
from .testing import simulate_genotype_call_dataset
44-
from .window import window, window_by_position, window_by_variant
44+
from .window import window_by_position, window_by_variant
4545

4646
try:
4747
__version__ = get_distribution(__name__).version
@@ -85,7 +85,6 @@
8585
"variables",
8686
"observed_heterozygosity",
8787
"pca",
88-
"window",
8988
"window_by_position",
9089
"window_by_variant",
9190
"load_dataset",

sgkit/stats/ld.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def ld_matrix(
8888
Parameters
8989
----------
9090
ds
91-
Dataset containing genotype dosages. Must already be windowed with :func:`window`.
91+
Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
9292
dosage
9393
Name of genetic dosage variable.
9494
Defined by :data:`sgkit.variables.dosage_spec`.
@@ -410,7 +410,7 @@ def ld_prune(
410410
Parameters
411411
----------
412412
ds
413-
Dataset containing genotype dosages. Must already be windowed with :func:`window`.
413+
Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
414414
dosage
415415
Name of genetic dosage variable.
416416
Defined by :data:`sgkit.variables.dosage_spec`.
@@ -445,7 +445,7 @@ def ld_prune(
445445
>>> ds["dosage"] = ds["call_genotype"].sum(dim="ploidy")
446446
447447
>>> # Divide into windows of size five (variants)
448-
>>> ds = sg.window(ds, size=5)
448+
>>> ds = sg.window_by_variant(ds, size=5)
449449
450450
>>> pruned_ds = sg.ld_prune(ds)
451451
>>> pruned_ds.dims["variants"]

sgkit/stats/popgen.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def diversity(
3535
"""Compute diversity from cohort allele counts.
3636
3737
By default, values of this statistic are calculated per variant.
38-
To compute values in windows, call :func:`window` before calling
38+
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
3939
this function.
4040
4141
Parameters
@@ -83,7 +83,7 @@ def diversity(
8383
[0.5 , 0.5 ]])
8484
8585
>>> # Divide into windows of size three (variants)
86-
>>> ds = sg.window(ds, size=3)
86+
>>> ds = sg.window_by_variant(ds, size=3)
8787
>>> sg.diversity(ds)["stat_diversity"].values # doctest: +NORMALIZE_WHITESPACE
8888
array([[1.83333333, 1.83333333],
8989
[1. , 1. ]])
@@ -188,7 +188,7 @@ def divergence(
188188
is the diversity for cohort i.
189189
190190
By default, values of this statistic are calculated per variant.
191-
To compute values in windows, call :func:`window` before calling
191+
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
192192
this function.
193193
194194
Parameters
@@ -247,7 +247,7 @@ def divergence(
247247
[0.625 , 0.5 ]]])
248248
249249
>>> # Divide into windows of size three (variants)
250-
>>> ds = sg.window(ds, size=3)
250+
>>> ds = sg.window_by_variant(ds, size=3)
251251
>>> sg.divergence(ds)["stat_divergence"].values # doctest: +NORMALIZE_WHITESPACE
252252
array([[[1.83333333, 1.5 ],
253253
[1.5 , 1.83333333]],
@@ -373,7 +373,7 @@ def Fst(
373373
"""Compute Fst between pairs of cohorts.
374374
375375
By default, values of this statistic are calculated per variant.
376-
To compute values in windows, call :func:`window` before calling
376+
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
377377
this function.
378378
379379
Parameters
@@ -439,7 +439,7 @@ def Fst(
439439
[ 0.2 , nan]]])
440440
441441
>>> # Divide into windows of size three (variants)
442-
>>> ds = sg.window(ds, size=3)
442+
>>> ds = sg.window_by_variant(ds, size=3)
443443
>>> sg.Fst(ds)["stat_Fst"].values # doctest: +NORMALIZE_WHITESPACE
444444
array([[[ nan, -0.22222222],
445445
[-0.22222222, nan]],
@@ -480,7 +480,7 @@ def Tajimas_D(
480480
"""Compute Tajimas' D for a genotype call dataset.
481481
482482
By default, values of this statistic are calculated per variant.
483-
To compute values in windows, call :func:`window` before calling
483+
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
484484
this function.
485485
486486
Parameters
@@ -533,7 +533,7 @@ def Tajimas_D(
533533
[0.88883234, 0.88883234]])
534534
535535
>>> # Divide into windows of size three (variants)
536-
>>> ds = sg.window(ds, size=3)
536+
>>> ds = sg.window_by_variant(ds, size=3)
537537
>>> sg.Tajimas_D(ds)["stat_Tajimas_D"].values # doctest: +NORMALIZE_WHITESPACE
538538
array([[2.40517586, 2.40517586],
539539
[1.10393559, 1.10393559]])
@@ -671,7 +671,7 @@ def pbs(
671671
"""Compute the population branching statistic (PBS) between cohort triples.
672672
673673
By default, values of this statistic are calculated per variant.
674-
To compute values in windows, call :func:`window` before calling
674+
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
675675
this function.
676676
677677
Parameters
@@ -721,7 +721,7 @@ def pbs(
721721
>>> ds = ds.assign_coords({"cohorts_0": cohort_names, "cohorts_1": cohort_names, "cohorts_2": cohort_names})
722722
723723
>>> # Divide into two windows of size three (variants)
724-
>>> ds = sg.window(ds, size=3)
724+
>>> ds = sg.window_by_variant(ds, size=3)
725725
>>> sg.pbs(ds)["stat_pbs"].sel(cohorts_0="co_0", cohorts_1="co_1", cohorts_2="co_2").values # doctest: +NORMALIZE_WHITESPACE
726726
array([ 0. , -0.160898])
727727
"""
@@ -806,7 +806,7 @@ def Garud_H(
806806
of soft sweeps, as defined in Garud et al. (2015).
807807
808808
By default, values of this statistic are calculated across all variants.
809-
To compute values in windows, call :func:`window` before calling
809+
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
810810
this function.
811811
812812
Parameters
@@ -868,7 +868,7 @@ def Garud_H(
868868
>>> ds["sample_cohort"] = xr.DataArray(sample_cohort, dims="samples")
869869
870870
>>> # Divide into windows of size three (variants)
871-
>>> ds = sg.window(ds, size=3, step=3)
871+
>>> ds = sg.window_by_variant(ds, size=3, step=3)
872872
873873
>>> gh = sg.Garud_H(ds)
874874
>>> gh["stat_Garud_h1"].values # doctest: +NORMALIZE_WHITESPACE
@@ -999,7 +999,7 @@ def observed_heterozygosity(
999999
mean.
10001000
10011001
By default, values of this statistic are calculated per variant.
1002-
To compute values in windows, call :func:`window` before calling
1002+
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
10031003
this function.
10041004
10051005
Parameters
@@ -1045,7 +1045,7 @@ def observed_heterozygosity(
10451045
[0.5, 0.5]])
10461046
10471047
>>> # Divide into windows of size three (variants)
1048-
>>> ds = sg.window(ds, size=3)
1048+
>>> ds = sg.window_by_variant(ds, size=3)
10491049
>>> sg.observed_heterozygosity(ds)["stat_observed_heterozygosity"].values # doctest: +NORMALIZE_WHITESPACE
10501050
array([[1.5, 2.5],
10511051
[1. , 1. ]])

sgkit/tests/test_ld.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from hypothesis import strategies as st
1111
from hypothesis.extra.numpy import arrays
1212

13-
from sgkit import variables, window
13+
from sgkit import variables, window_by_variant
1414
from sgkit.stats.ld import (
1515
ld_matrix,
1616
ld_prune,
@@ -66,7 +66,7 @@ def ldm_df(
6666
) -> DataFrame:
6767
ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
6868
ds["dosage"] = (["variants", "samples"], x)
69-
ds = window(ds, size=size, step=step)
69+
ds = window_by_variant(ds, size=size, step=step)
7070
df = ld_matrix(ds, threshold=threshold).compute()
7171
if not diag:
7272
df = df.pipe(lambda df: df[df["i"] != df["j"]])
@@ -156,7 +156,7 @@ def test_vs_skallel(args):
156156

157157
ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
158158
ds["dosage"] = (["variants", "samples"], da.asarray(x).rechunk({0: chunks}))
159-
ds = window(ds, size=size, step=step)
159+
ds = window_by_variant(ds, size=size, step=step)
160160

161161
ldm = ld_matrix(ds, threshold=threshold)
162162
has_duplicates = ldm.compute().duplicated(subset=["i", "j"]).any()
@@ -183,7 +183,7 @@ def test_scores():
183183

184184
ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
185185
ds["dosage"] = (["variants", "samples"], x)
186-
ds = window(ds, size=10)
186+
ds = window_by_variant(ds, size=10)
187187

188188
ldm = ld_matrix(ds, threshold=0.2)
189189
idx_drop_ds = maximal_independent_set(ldm)

sgkit/tests/test_popgen.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
simulate_genotype_call_dataset,
2424
variables,
2525
)
26-
from sgkit.window import window
26+
from sgkit.window import window_by_variant
2727

2828
from .test_aggregation import get_dataset
2929

@@ -135,7 +135,7 @@ def test_diversity__windowed(sample_size):
135135
ts = simulate_ts(sample_size, length=200)
136136
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
137137
ds, subsets = add_cohorts(ds, ts, cohort_key_names=["cohorts"]) # type: ignore[no-untyped-call]
138-
ds = window(ds, size=25)
138+
ds = window_by_variant(ds, size=25)
139139
ds = diversity(ds)
140140
div = ds["stat_diversity"].sel(cohorts="co_0").compute()
141141

@@ -195,7 +195,7 @@ def test_divergence__windowed(sample_size, n_cohorts, chunks):
195195
ts = simulate_ts(sample_size, length=200)
196196
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
197197
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
198-
ds = window(ds, size=25)
198+
ds = window_by_variant(ds, size=25)
199199
ds = divergence(ds)
200200
div = ds["stat_divergence"].values
201201
# test off-diagonal entries, by replacing diagonal with NaNs
@@ -222,7 +222,7 @@ def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts, ch
222222
ts = simulate_ts(sample_size, length=200)
223223
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
224224
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
225-
ds = window(ds, size=25)
225+
ds = window_by_variant(ds, size=25)
226226
ds = divergence(ds)
227227
div = ds["stat_divergence"].values
228228
# test off-diagonal entries, by replacing diagonal with NaNs
@@ -261,7 +261,7 @@ def test_Fst__Hudson(sample_size):
261261
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
262262
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
263263
n_variants = ds.dims["variants"]
264-
ds = window(ds, size=n_variants) # single window
264+
ds = window_by_variant(ds, size=n_variants) # single window
265265
ds = Fst(ds, estimator="Hudson")
266266
fst = ds.stat_Fst.sel(cohorts_0="co_0", cohorts_1="co_1").values
267267

@@ -283,7 +283,7 @@ def test_Fst__Nei(sample_size, n_cohorts):
283283
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
284284
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
285285
n_variants = ds.dims["variants"]
286-
ds = window(ds, size=n_variants) # single window
286+
ds = window_by_variant(ds, size=n_variants) # single window
287287
ds = Fst(ds, estimator="Nei")
288288
fst = ds.stat_Fst.values
289289

@@ -312,7 +312,7 @@ def test_Fst__windowed(sample_size, n_cohorts, chunks):
312312
ts = simulate_ts(sample_size, length=200)
313313
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
314314
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
315-
ds = window(ds, size=25)
315+
ds = window_by_variant(ds, size=25)
316316
fst_ds = Fst(ds, estimator="Nei")
317317
fst = fst_ds["stat_Fst"].values
318318

@@ -354,7 +354,7 @@ def test_Tajimas_D(sample_size):
354354
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
355355
ds, subsets = add_cohorts(ds, ts, cohort_key_names=None) # type: ignore[no-untyped-call]
356356
n_variants = ds.dims["variants"]
357-
ds = window(ds, size=n_variants) # single window
357+
ds = window_by_variant(ds, size=n_variants) # single window
358358
ds = Tajimas_D(ds)
359359
d = ds.stat_Tajimas_D.compute()
360360
ts_d = ts.Tajimas_D()
@@ -382,7 +382,7 @@ def test_pbs(sample_size, n_cohorts):
382382
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
383383
ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call]
384384
n_variants = ds.dims["variants"]
385-
ds = window(ds, size=n_variants) # single window
385+
ds = window_by_variant(ds, size=n_variants) # single window
386386

387387
ds = pbs(ds)
388388

@@ -416,7 +416,7 @@ def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks):
416416
ts = simulate_ts(sample_size, length=200)
417417
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
418418
ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call]
419-
ds = window(ds, size=25)
419+
ds = window_by_variant(ds, size=25)
420420

421421
ds = pbs(ds, cohorts=cohorts)
422422

@@ -466,7 +466,7 @@ def test_Garud_h(
466466
cohort_names = [f"co_{i}" for i in range(n_cohorts)]
467467
coords = {k: cohort_names for k in ["cohorts"]}
468468
ds = ds.assign_coords(coords) # type: ignore[no-untyped-call]
469-
ds = window(ds, size=3)
469+
ds = window_by_variant(ds, size=3)
470470

471471
gh = Garud_H(ds, cohorts=cohorts)
472472
h1 = gh.stat_Garud_h1.values
@@ -635,7 +635,7 @@ def test_observed_heterozygosity__windowed(chunks, cohorts, expectation):
635635
["samples"],
636636
da.asarray(cohorts).rechunk(chunks[1]),
637637
)
638-
ds = window(ds, size=2)
638+
ds = window_by_variant(ds, size=2)
639639
ho = observed_heterozygosity(ds)["stat_observed_heterozygosity"]
640640
np.testing.assert_almost_equal(
641641
ho,
@@ -662,7 +662,7 @@ def test_observed_heterozygosity__scikit_allel_comparison(
662662
["samples"],
663663
np.zeros(n_sample, int),
664664
)
665-
ds = window(ds, size=window_size)
665+
ds = window_by_variant(ds, size=window_size)
666666
ho_sg = observed_heterozygosity(ds)["stat_observed_heterozygosity"].values
667667
if n_sample % window_size:
668668
# scikit-allel will drop the ragged end

sgkit/window.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,6 @@ def window_by_variant(
9191
return _window_per_contig(ds, variant_contig, merge, _get_windows, size, step)
9292

9393

94-
window = window_by_variant
95-
96-
9794
def window_by_position(
9895
ds: Dataset,
9996
*,

0 commit comments

Comments
 (0)