Remove references to renamed window function.

tomwhite · mergify[bot] · commit a7cb3266d501 · 2021-07-05T09:10:25.000Z
diff --git a/docs/api.rst b/docs/api.rst
@@ -107,7 +107,6 @@ Utilities
    infer_sample_ploidy
    infer_variant_ploidy
    simulate_genotype_call_dataset
-   window
    window_by_position
    window_by_variant
 
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
@@ -279,7 +279,7 @@ example shows how to give cohorts names.
 .. ipython:: python
     :okwarning:
 
-    ds = sg.window(ds, size=20)
+    ds = sg.window_by_variant(ds, size=20)
     ds = sg.Fst(ds)
 
     cohort_names = ["Africa", "Asia", "Europe"]
@@ -320,7 +320,7 @@ Xarray and Pandas operations in a single pipeline:
         # Apply filter to include variants present across > 80% of samples
         .pipe(lambda ds: ds.sel(variants=ds.variant_call_rate > .8))
         # Create windows of size 20 variants
-        .pipe(lambda ds: sg.window(ds, size=20))
+        .pipe(lambda ds: sg.window_by_variant(ds, size=20))
         # Assign a "cohort" variable that splits samples into two groups
         .assign(sample_cohort=np.repeat([0, 1], ds.dims['samples'] // 2))
         # Compute Fst between the groups
diff --git a/sgkit/__init__.py b/sgkit/__init__.py
@@ -41,7 +41,7 @@
 from .stats.preprocessing import filter_partial_calls
 from .stats.regenie import regenie
 from .testing import simulate_genotype_call_dataset
-from .window import window, window_by_position, window_by_variant
+from .window import window_by_position, window_by_variant
 
 try:
     __version__ = get_distribution(__name__).version
@@ -85,7 +85,6 @@
     "variables",
     "observed_heterozygosity",
     "pca",
-    "window",
     "window_by_position",
     "window_by_variant",
     "load_dataset",
diff --git a/sgkit/stats/ld.py b/sgkit/stats/ld.py
@@ -88,7 +88,7 @@ def ld_matrix(
     Parameters
     ----------
     ds
-        Dataset containing genotype dosages. Must already be windowed with :func:`window`.
+        Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
     dosage
         Name of genetic dosage variable.
         Defined by :data:`sgkit.variables.dosage_spec`.
@@ -410,7 +410,7 @@ def ld_prune(
     Parameters
     ----------
     ds
-        Dataset containing genotype dosages. Must already be windowed with :func:`window`.
+        Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
     dosage
         Name of genetic dosage variable.
         Defined by :data:`sgkit.variables.dosage_spec`.
@@ -445,7 +445,7 @@ def ld_prune(
     >>> ds["dosage"] = ds["call_genotype"].sum(dim="ploidy")
 
     >>> # Divide into windows of size five (variants)
-    >>> ds = sg.window(ds, size=5)
+    >>> ds = sg.window_by_variant(ds, size=5)
 
     >>> pruned_ds = sg.ld_prune(ds)
     >>> pruned_ds.dims["variants"]
diff --git a/sgkit/stats/popgen.py b/sgkit/stats/popgen.py
@@ -35,7 +35,7 @@ def diversity(
     """Compute diversity from cohort allele counts.
 
     By default, values of this statistic are calculated per variant.
-    To compute values in windows, call :func:`window` before calling
+    To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
     this function.
 
     Parameters
@@ -83,7 +83,7 @@ def diversity(
         [0.5       , 0.5       ]])
 
     >>> # Divide into windows of size three (variants)
-    >>> ds = sg.window(ds, size=3)
+    >>> ds = sg.window_by_variant(ds, size=3)
     >>> sg.diversity(ds)["stat_diversity"].values # doctest: +NORMALIZE_WHITESPACE
     array([[1.83333333, 1.83333333],
         [1.        , 1.        ]])
@@ -188,7 +188,7 @@ def divergence(
     is the diversity for cohort i.
 
     By default, values of this statistic are calculated per variant.
-    To compute values in windows, call :func:`window` before calling
+    To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
     this function.
 
     Parameters
@@ -247,7 +247,7 @@ def divergence(
             [0.625     , 0.5       ]]])
 
     >>> # Divide into windows of size three (variants)
-    >>> ds = sg.window(ds, size=3)
+    >>> ds = sg.window_by_variant(ds, size=3)
     >>> sg.divergence(ds)["stat_divergence"].values # doctest: +NORMALIZE_WHITESPACE
     array([[[1.83333333, 1.5       ],
             [1.5       , 1.83333333]],
@@ -373,7 +373,7 @@ def Fst(
     """Compute Fst between pairs of cohorts.
 
     By default, values of this statistic are calculated per variant.
-    To compute values in windows, call :func:`window` before calling
+    To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
     this function.
 
     Parameters
@@ -439,7 +439,7 @@ def Fst(
             [ 0.2       ,         nan]]])
 
     >>> # Divide into windows of size three (variants)
-    >>> ds = sg.window(ds, size=3)
+    >>> ds = sg.window_by_variant(ds, size=3)
     >>> sg.Fst(ds)["stat_Fst"].values # doctest: +NORMALIZE_WHITESPACE
     array([[[        nan, -0.22222222],
             [-0.22222222,         nan]],
@@ -480,7 +480,7 @@ def Tajimas_D(
     """Compute Tajimas' D for a genotype call dataset.
 
     By default, values of this statistic are calculated per variant.
-    To compute values in windows, call :func:`window` before calling
+    To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
     this function.
 
     Parameters
@@ -533,7 +533,7 @@ def Tajimas_D(
            [0.88883234, 0.88883234]])
 
     >>> # Divide into windows of size three (variants)
-    >>> ds = sg.window(ds, size=3)
+    >>> ds = sg.window_by_variant(ds, size=3)
     >>> sg.Tajimas_D(ds)["stat_Tajimas_D"].values # doctest: +NORMALIZE_WHITESPACE
     array([[2.40517586, 2.40517586],
            [1.10393559, 1.10393559]])
@@ -671,7 +671,7 @@ def pbs(
     """Compute the population branching statistic (PBS) between cohort triples.
 
     By default, values of this statistic are calculated per variant.
-    To compute values in windows, call :func:`window` before calling
+    To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
     this function.
 
     Parameters
@@ -721,7 +721,7 @@ def pbs(
     >>> ds = ds.assign_coords({"cohorts_0": cohort_names, "cohorts_1": cohort_names, "cohorts_2": cohort_names})
 
     >>> # Divide into two windows of size three (variants)
-    >>> ds = sg.window(ds, size=3)
+    >>> ds = sg.window_by_variant(ds, size=3)
     >>> sg.pbs(ds)["stat_pbs"].sel(cohorts_0="co_0", cohorts_1="co_1", cohorts_2="co_2").values # doctest: +NORMALIZE_WHITESPACE
     array([ 0.      , -0.160898])
     """
@@ -806,7 +806,7 @@ def Garud_H(
     of soft sweeps, as defined in Garud et al. (2015).
 
     By default, values of this statistic are calculated across all variants.
-    To compute values in windows, call :func:`window` before calling
+    To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
     this function.
 
     Parameters
@@ -868,7 +868,7 @@ def Garud_H(
     >>> ds["sample_cohort"] = xr.DataArray(sample_cohort, dims="samples")
 
     >>> # Divide into windows of size three (variants)
-    >>> ds = sg.window(ds, size=3, step=3)
+    >>> ds = sg.window_by_variant(ds, size=3, step=3)
 
     >>> gh = sg.Garud_H(ds)
     >>> gh["stat_Garud_h1"].values # doctest: +NORMALIZE_WHITESPACE
@@ -999,7 +999,7 @@ def observed_heterozygosity(
     mean.
 
     By default, values of this statistic are calculated per variant.
-    To compute values in windows, call :func:`window` before calling
+    To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
     this function.
 
     Parameters
@@ -1045,7 +1045,7 @@ def observed_heterozygosity(
         [0.5, 0.5]])
 
     >>> # Divide into windows of size three (variants)
-    >>> ds = sg.window(ds, size=3)
+    >>> ds = sg.window_by_variant(ds, size=3)
     >>> sg.observed_heterozygosity(ds)["stat_observed_heterozygosity"].values # doctest: +NORMALIZE_WHITESPACE
     array([[1.5, 2.5],
         [1. , 1. ]])
diff --git a/sgkit/tests/test_ld.py b/sgkit/tests/test_ld.py
@@ -10,7 +10,7 @@
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import arrays
 
-from sgkit import variables, window
+from sgkit import variables, window_by_variant
 from sgkit.stats.ld import (
     ld_matrix,
     ld_prune,
@@ -66,7 +66,7 @@ def ldm_df(
 ) -> DataFrame:
     ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
     ds["dosage"] = (["variants", "samples"], x)
-    ds = window(ds, size=size, step=step)
+    ds = window_by_variant(ds, size=size, step=step)
     df = ld_matrix(ds, threshold=threshold).compute()
     if not diag:
         df = df.pipe(lambda df: df[df["i"] != df["j"]])
@@ -156,7 +156,7 @@ def test_vs_skallel(args):
 
     ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
     ds["dosage"] = (["variants", "samples"], da.asarray(x).rechunk({0: chunks}))
-    ds = window(ds, size=size, step=step)
+    ds = window_by_variant(ds, size=size, step=step)
 
     ldm = ld_matrix(ds, threshold=threshold)
     has_duplicates = ldm.compute().duplicated(subset=["i", "j"]).any()
@@ -183,7 +183,7 @@ def test_scores():
 
     ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
     ds["dosage"] = (["variants", "samples"], x)
-    ds = window(ds, size=10)
+    ds = window_by_variant(ds, size=10)
 
     ldm = ld_matrix(ds, threshold=0.2)
     idx_drop_ds = maximal_independent_set(ldm)
diff --git a/sgkit/tests/test_popgen.py b/sgkit/tests/test_popgen.py
@@ -23,7 +23,7 @@
     simulate_genotype_call_dataset,
     variables,
 )
-from sgkit.window import window
+from sgkit.window import window_by_variant
 
 from .test_aggregation import get_dataset
 
@@ -135,7 +135,7 @@ def test_diversity__windowed(sample_size):
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, cohort_key_names=["cohorts"])  # type: ignore[no-untyped-call]
-    ds = window(ds, size=25)
+    ds = window_by_variant(ds, size=25)
     ds = diversity(ds)
     div = ds["stat_diversity"].sel(cohorts="co_0").compute()
 
@@ -195,7 +195,7 @@ def test_divergence__windowed(sample_size, n_cohorts, chunks):
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, n_cohorts)  # type: ignore[no-untyped-call]
-    ds = window(ds, size=25)
+    ds = window_by_variant(ds, size=25)
     ds = divergence(ds)
     div = ds["stat_divergence"].values
     # test off-diagonal entries, by replacing diagonal with NaNs
@@ -222,7 +222,7 @@ def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts, ch
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, n_cohorts)  # type: ignore[no-untyped-call]
-    ds = window(ds, size=25)
+    ds = window_by_variant(ds, size=25)
     ds = divergence(ds)
     div = ds["stat_divergence"].values
     # test off-diagonal entries, by replacing diagonal with NaNs
@@ -261,7 +261,7 @@ def test_Fst__Hudson(sample_size):
     ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, n_cohorts)  # type: ignore[no-untyped-call]
     n_variants = ds.dims["variants"]
-    ds = window(ds, size=n_variants)  # single window
+    ds = window_by_variant(ds, size=n_variants)  # single window
     ds = Fst(ds, estimator="Hudson")
     fst = ds.stat_Fst.sel(cohorts_0="co_0", cohorts_1="co_1").values
 
@@ -283,7 +283,7 @@ def test_Fst__Nei(sample_size, n_cohorts):
     ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, n_cohorts)  # type: ignore[no-untyped-call]
     n_variants = ds.dims["variants"]
-    ds = window(ds, size=n_variants)  # single window
+    ds = window_by_variant(ds, size=n_variants)  # single window
     ds = Fst(ds, estimator="Nei")
     fst = ds.stat_Fst.values
 
@@ -312,7 +312,7 @@ def test_Fst__windowed(sample_size, n_cohorts, chunks):
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, n_cohorts)  # type: ignore[no-untyped-call]
-    ds = window(ds, size=25)
+    ds = window_by_variant(ds, size=25)
     fst_ds = Fst(ds, estimator="Nei")
     fst = fst_ds["stat_Fst"].values
 
@@ -354,7 +354,7 @@ def test_Tajimas_D(sample_size):
     ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, cohort_key_names=None)  # type: ignore[no-untyped-call]
     n_variants = ds.dims["variants"]
-    ds = window(ds, size=n_variants)  # single window
+    ds = window_by_variant(ds, size=n_variants)  # single window
     ds = Tajimas_D(ds)
     d = ds.stat_Tajimas_D.compute()
     ts_d = ts.Tajimas_D()
@@ -382,7 +382,7 @@ def test_pbs(sample_size, n_cohorts):
     ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"])  # type: ignore[no-untyped-call]
     n_variants = ds.dims["variants"]
-    ds = window(ds, size=n_variants)  # single window
+    ds = window_by_variant(ds, size=n_variants)  # single window
 
     ds = pbs(ds)
 
@@ -416,7 +416,7 @@ def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks):
     ts = simulate_ts(sample_size, length=200)
     ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
     ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"])  # type: ignore[no-untyped-call]
-    ds = window(ds, size=25)
+    ds = window_by_variant(ds, size=25)
 
     ds = pbs(ds, cohorts=cohorts)
 
@@ -466,7 +466,7 @@ def test_Garud_h(
     cohort_names = [f"co_{i}" for i in range(n_cohorts)]
     coords = {k: cohort_names for k in ["cohorts"]}
     ds = ds.assign_coords(coords)  # type: ignore[no-untyped-call]
-    ds = window(ds, size=3)
+    ds = window_by_variant(ds, size=3)
 
     gh = Garud_H(ds, cohorts=cohorts)
     h1 = gh.stat_Garud_h1.values
@@ -635,7 +635,7 @@ def test_observed_heterozygosity__windowed(chunks, cohorts, expectation):
         ["samples"],
         da.asarray(cohorts).rechunk(chunks[1]),
     )
-    ds = window(ds, size=2)
+    ds = window_by_variant(ds, size=2)
     ho = observed_heterozygosity(ds)["stat_observed_heterozygosity"]
     np.testing.assert_almost_equal(
         ho,
@@ -662,7 +662,7 @@ def test_observed_heterozygosity__scikit_allel_comparison(
         ["samples"],
         np.zeros(n_sample, int),
     )
-    ds = window(ds, size=window_size)
+    ds = window_by_variant(ds, size=window_size)
     ho_sg = observed_heterozygosity(ds)["stat_observed_heterozygosity"].values
     if n_sample % window_size:
         # scikit-allel will drop the ragged end
diff --git a/sgkit/window.py b/sgkit/window.py
@@ -91,9 +91,6 @@ def window_by_variant(
     return _window_per_contig(ds, variant_contig, merge, _get_windows, size, step)
 
 
-window = window_by_variant
-
-
 def window_by_position(
     ds: Dataset,
     *,