diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 833d85439..c65419916 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.1.0 + rev: v3.2.0 hooks: - id: check-merge-conflict - id: debug-statements @@ -8,11 +8,11 @@ repos: - id: check-case-conflict - id: check-yaml - repo: https://github.com/timothycrosley/isort - rev: 5.1.1 + rev: 5.5.3 hooks: - id: isort - repo: https://github.com/python/black - rev: 19.10b0 + rev: 20.8b1 hooks: - id: black language_version: python3 diff --git a/sgkit/display.py b/sgkit/display.py index 40f84189a..17fbea229 100644 --- a/sgkit/display.py +++ b/sgkit/display.py @@ -179,7 +179,7 @@ def display_genotypes( ds_abbr = truncate( ds_calls, max_sizes={"variants": max_variants, "samples": max_samples} ) - df = ds_abbr.to_dataframe().unstack(level="ploidy") # type: ignore[no-untyped-call] + df = ds_abbr.to_dataframe().unstack(level="ploidy") # Convert each genotype to a string representation def calls_to_str(r: pd.DataFrame) -> str: @@ -193,5 +193,8 @@ def calls_to_str(r: pd.DataFrame) -> str: df = df.apply(calls_to_str, axis=1).unstack("samples") return GenotypeDisplay( - df, (ds.sizes["variants"], ds.sizes["samples"]), max_variants, max_samples, + df, + (ds.sizes["variants"], ds.sizes["samples"]), + max_variants, + max_samples, ) diff --git a/sgkit/stats/hwe.py b/sgkit/stats/hwe.py index 0eb7dbf5e..61cff16d9 100644 --- a/sgkit/stats/hwe.py +++ b/sgkit/stats/hwe.py @@ -123,7 +123,9 @@ def hardy_weinberg_p_value_vec( def hardy_weinberg_test( - ds: Dataset, genotype_counts: Optional[Hashable] = None, merge: bool = True, + ds: Dataset, + genotype_counts: Optional[Hashable] = None, + merge: bool = True, ) -> Dataset: """Exact test for HWE as described in Wigginton et al. 2005 [1]. diff --git a/sgkit/stats/popgen.py b/sgkit/stats/popgen.py index bfe364bf7..416732bb2 100644 --- a/sgkit/stats/popgen.py +++ b/sgkit/stats/popgen.py @@ -9,7 +9,8 @@ def diversity( - ds: Dataset, allele_counts: Hashable = "variant_allele_count", + ds: Dataset, + allele_counts: Hashable = "variant_allele_count", ) -> DataArray: """Compute diversity from allele counts. @@ -49,7 +50,9 @@ def diversity( def divergence( - ds1: Dataset, ds2: Dataset, allele_counts: Hashable = "variant_allele_count", + ds1: Dataset, + ds2: Dataset, + allele_counts: Hashable = "variant_allele_count", ) -> DataArray: """Compute divergence between two genotype call datasets. @@ -87,7 +90,9 @@ def divergence( def Fst( - ds1: Dataset, ds2: Dataset, allele_counts: Hashable = "variant_allele_count", + ds1: Dataset, + ds2: Dataset, + allele_counts: Hashable = "variant_allele_count", ) -> DataArray: """Compute Fst between two genotype call datasets. @@ -112,7 +117,8 @@ def Fst( def Tajimas_D( - ds: Dataset, allele_counts: Hashable = "variant_allele_count", + ds: Dataset, + allele_counts: Hashable = "variant_allele_count", ) -> DataArray: """Compute Tajimas' D for a genotype call dataset. diff --git a/sgkit/stats/utils.py b/sgkit/stats/utils.py index 05776a5cc..63c1ea0d7 100644 --- a/sgkit/stats/utils.py +++ b/sgkit/stats/utils.py @@ -43,7 +43,7 @@ def concat_2d(ds: Dataset, dims: Tuple[Hashable, Hashable]) -> DataArray: # Add concatenation axis arr = arr.expand_dims(dim=dims[1], axis=1) arrs.append(arr) - return xr.concat(arrs, dim=dims[1]) # type: ignore[no-any-return,no-untyped-call] + return xr.concat(arrs, dim=dims[1]) def r2_score(YP: ArrayLike, YT: ArrayLike) -> ArrayLike: diff --git a/sgkit/tests/test_association.py b/sgkit/tests/test_association.py index 78b80ce24..f4031f195 100644 --- a/sgkit/tests/test_association.py +++ b/sgkit/tests/test_association.py @@ -132,7 +132,7 @@ def _get_statistics( ) res = _sm_statistics(ds, i, add_intercept) df_pred.append( - dsr.to_dataframe() # type: ignore[no-untyped-call] + dsr.to_dataframe() .rename(columns=lambda c: c.replace("variant_", "")) .iloc[i] .to_dict() @@ -183,9 +183,9 @@ def run(traits: Sequence[str]) -> Dataset: traits = [f"trait_{i}" for i in range(ds.attrs["n_trait"])] # Run regressions on individual traits and concatenate resulting statistics - dfr_single = xr.concat([run([t]) for t in traits], dim="traits").to_dataframe() # type: ignore[no-untyped-call] + dfr_single = xr.concat([run([t]) for t in traits], dim="traits").to_dataframe() # Run regressions on all traits simulatenously - dfr_multi: DataFrame = run(traits).to_dataframe() # type: ignore[no-untyped-call] + dfr_multi: DataFrame = run(traits).to_dataframe() pd.testing.assert_frame_equal(dfr_single, dfr_multi) diff --git a/sgkit/tests/test_hwe.py b/sgkit/tests/test_hwe.py index 8281d2388..02702d2e3 100644 --- a/sgkit/tests/test_hwe.py +++ b/sgkit/tests/test_hwe.py @@ -137,7 +137,7 @@ def test_hwep_dataset__precomputed_counts(ds_neq: Dataset) -> None: ds = ds_neq ac = ds["call_genotype"].sum(dim="ploidy") cts = [1, 0, 2] # arg order: hets, hom1, hom2 - gtc = xr.concat([(ac == ct).sum(dim="samples") for ct in cts], dim="counts").T # type: ignore[no-untyped-call] + gtc = xr.concat([(ac == ct).sum(dim="samples") for ct in cts], dim="counts").T ds = ds.assign(**{"variant_genotype_counts": gtc}) p = hwep_test(ds, genotype_counts="variant_genotype_counts", merge=False) assert np.all(p < 1e-8) diff --git a/sgkit/tests/test_regenie.py b/sgkit/tests/test_regenie.py index e6f54e9ba..b62dcbb26 100644 --- a/sgkit/tests/test_regenie.py +++ b/sgkit/tests/test_regenie.py @@ -187,7 +187,7 @@ def prepare_stage_3_sgkit_results( ) dsr = dsr.merge(ds[["variant_id"]]) dsr = dsr.assign(outcome=xr.DataArray(df_trait.columns, dims=("outcomes"))) - df = dsr.to_dataframe().reset_index(drop=True) # type: ignore[no-untyped-call] + df = dsr.to_dataframe().reset_index(drop=True) return df diff --git a/sgkit/tests/test_stats_utils.py b/sgkit/tests/test_stats_utils.py index 4b9111d75..4e4b96551 100644 --- a/sgkit/tests/test_stats_utils.py +++ b/sgkit/tests/test_stats_utils.py @@ -76,7 +76,11 @@ def test_concat_2d__values(n: int) -> None: x, y = np.arange(n), np.arange(n * n).reshape(n, n) z = np.copy(y) ds = xr.Dataset( - dict(x=(("dim0"), x), y=(("dim0", "dim1"), y), z=(("dim1", "dim2"), z),) + dict( + x=(("dim0"), x), + y=(("dim0", "dim1"), y), + z=(("dim1", "dim2"), z), + ) ) actual = concat_2d(ds, dims=("dim0", "dim1")) expected = np.concatenate([x.reshape(-1, 1), y], axis=1)