diff --git a/docs/usage/indexing.rst b/docs/usage/indexing.rst index efa75ce3f..a622b7b16 100644 --- a/docs/usage/indexing.rst +++ b/docs/usage/indexing.rst @@ -10,3 +10,23 @@ Boost-histogram specific details -------------------------------- Boost-histogram implements ``bh.loc``, ``builtins.sum``, ``bh.rebin``, ``bh.underflow``, and ``bh.overflow`` from the UHI spec. A ``bh.tag.at`` locator is provided as well, which simulates the Boost.Histogram C++ ``.at()`` indexing using the UHI locator protocol. + +Boost-histogram allows "picking" using lists, similar to NumPy. If you select +with multiple lists, boost-histogram instead selects per-axis, rather than +group-selecting and reducing to a single axis, like NumPy does. You can use +``bh.loc(...)`` inside these lists. + +Example:: + + h = bh.histogram( + bh.axis.Regular(10, 0, 1), + bh.axis.StrCategory(["a", "b", "c"]), + bh.axis.IntCategory([5, 6, 7]), + ) + + minihist = h[:, [bh.loc("a"), bh.loc("c")], [0, 2]] + + # Produces a 3D histgoram with Regular(10, 0, 1) x StrCategory(["a", "c"]) x IntCategory([5, 7]) + + +This feature is considered experimental in boost-histogram 1.1.0. Removed bins are not added to the overflow bin currently. diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index d9c217a3d..fa5ddb585 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -1,3 +1,4 @@ +import collections.abc import copy import logging import threading @@ -55,8 +56,10 @@ CppAxis = NewType("CppAxis", object) -InnerIndexing = Union[SupportsIndex, Callable[[Axis], int], slice, "ellipsis"] -IndexingWithMapping = Union[InnerIndexing, Mapping[int, InnerIndexing]] +SimpleIndexing = Union[SupportsIndex, slice] +InnerIndexing = Union[SimpleIndexing, Callable[[Axis], int], "ellipsis"] +FullInnerIndexing = Union[InnerIndexing, List[InnerIndexing]] +IndexingWithMapping = Union[FullInnerIndexing, Mapping[int, FullInnerIndexing]] IndexingExpr = Union[IndexingWithMapping, Tuple[IndexingWithMapping, ...]] T = TypeVar("T") @@ -582,6 +585,26 @@ def __repr__(self) -> str: ret += f" ({outer} with flow)" return ret + def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: + """ + Converts an expression that contains UHI locators to one that does not. + """ + # Support sum and rebin directly + if index is sum or hasattr(index, "factor"): # type: ignore + index = slice(None, None, index) + + # General locators + # Note that MyPy doesn't like these very much - the fix + # will be to properly set input types + elif callable(index): + index = index(self.axes[axis]) + elif isinstance(index, SupportsIndex): + if abs(int(index)) >= self._hist.axis(axis).size: + raise IndexError("histogram index is out of range") + index %= self._hist.axis(axis).size + + return index # type: ignore + def _compute_commonindex( self, index: IndexingExpr ) -> List[Union[SupportsIndex, slice, Mapping[int, Union[SupportsIndex, slice]]]]: @@ -613,18 +636,11 @@ def _compute_commonindex( # Allow [bh.loc(...)] to work for i in range(len(indexes)): - # Support sum and rebin directly - if indexes[i] is sum or hasattr(indexes[i], "factor"): - indexes[i] = slice(None, None, indexes[i]) - # General locators - # Note that MyPy doesn't like these very much - the fix - # will be to properly set input types - elif callable(indexes[i]): - indexes[i] = indexes[i](self.axes[i]) - elif hasattr(indexes[i], "__index__"): - if abs(indexes[i]) >= hist.axis(i).size: - raise IndexError("histogram index is out of range") - indexes[i] %= hist.axis(i).size + # Support list of UHI indexers + if isinstance(indexes[i], list): + indexes[i] = [self._compute_uhi_index(index, i) for index in indexes[i]] + else: + indexes[i] = self._compute_uhi_index(indexes[i], i) return indexes @@ -729,6 +745,7 @@ def __getitem__( # noqa: C901 integrations: Set[int] = set() slices: List[_core.algorithm.reduce_command] = [] pick_each: Dict[int, int] = dict() + pick_set: Dict[int, List[int]] = dict() # Compute needed slices and projections for i, ind in enumerate(indexes): @@ -737,6 +754,9 @@ def __getitem__( # noqa: C901 1 if self.axes[i].traits.underflow else 0 ) continue + elif isinstance(ind, collections.abc.Sequence): + pick_set[i] = list(ind) + continue elif not isinstance(ind, slice): raise IndexError( "Must be a slice, an integer, or follow the locator protocol." @@ -782,17 +802,45 @@ def __getitem__( # noqa: C901 logger.debug("Reduce with %s", slices) reduced = self._hist.reduce(*slices) + if pick_set: + warnings.warn( + "List indexing selection is experimental. Removed bins are not placed in overflow." + ) + logger.debug("Slices for picking sets: %s", pick_set) + axes = [reduced.axis(i) for i in range(reduced.rank())] + reduced_view = reduced.view(flow=True) + for i in pick_set: + selection = copy.copy(pick_set[i]) + ax = reduced.axis(i) + if ax.traits_ordered: + raise RuntimeError( + f"Axis {i} is not a categorical axis, cannot pick with list" + ) + + if ax.traits_overflow and ax.size not in pick_set[i]: + selection.append(ax.size) + + new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]]) + new_axis.metadata = axes[i].metadata + axes[i] = new_axis + reduced_view = np.take(reduced_view, selection, axis=i) + + logger.debug("Axes: %s", axes) + new_reduced = reduced.__class__(axes) + new_reduced.view(flow=True)[...] = reduced_view + reduced = new_reduced + if pick_each: - my_slice = tuple( + tuple_slice = tuple( pick_each.get(i, slice(None)) for i in range(reduced.rank()) ) - logger.debug("Slices: %s", my_slice) + logger.debug("Slices for pick each: %s", tuple_slice) axes = [ reduced.axis(i) for i in range(reduced.rank()) if i not in pick_each ] logger.debug("Axes: %s", axes) new_reduced = reduced.__class__(axes) - new_reduced.view(flow=True)[...] = reduced.view(flow=True)[my_slice] + new_reduced.view(flow=True)[...] = reduced.view(flow=True)[tuple_slice] reduced = new_reduced integrations = {i - sum(j <= i for j in pick_each) for i in integrations} diff --git a/tests/test_internal_histogram.py b/tests/test_internal_histogram.py index 9bf96dd39..3a1489cf4 100644 --- a/tests/test_internal_histogram.py +++ b/tests/test_internal_histogram.py @@ -1,6 +1,7 @@ import numpy as np import pytest from numpy.testing import assert_allclose, assert_array_equal +from pytest import approx import boost_histogram as bh @@ -258,3 +259,106 @@ def test_int_cat_hist(): with pytest.raises(RuntimeError): h.fill(0.5) + + +@pytest.mark.filterwarnings("ignore:List indexing selection is experimental") +def test_int_cat_hist_pick_several(): + h = bh.Histogram( + bh.axis.IntCategory([1, 2, 7], __dict__={"xval": 5}), storage=bh.storage.Int64() + ) + + h.fill(1, weight=8) + h.fill(2, weight=7) + h.fill(7, weight=6) + + assert h.view() == approx(np.array([8, 7, 6])) + assert h.sum() == 21 + + assert h[[0, 2]].values() == approx(np.array([8, 6])) + assert h[[2, 0]].values() == approx(np.array([6, 8])) + assert h[[1]].values() == approx(np.array([7])) + + assert h[[bh.loc(1), bh.loc(7)]].values() == approx(np.array([8, 6])) + assert h[[bh.loc(7), bh.loc(1)]].values() == approx(np.array([6, 8])) + assert h[[bh.loc(2)]].values() == approx(np.array([7])) + + assert tuple(h[[0, 2]].axes[0]) == (1, 7) + assert tuple(h[[2, 0]].axes[0]) == (7, 1) + assert tuple(h[[1]].axes[0]) == (2,) + + assert h.axes[0].xval == 5 + assert h[[0]].axes[0].xval == 5 + assert h[[0, 1, 2]].axes[0].xval == 5 + + +@pytest.mark.filterwarnings("ignore:List indexing selection is experimental") +def test_str_cat_pick_several(): + h = bh.Histogram(bh.axis.StrCategory(["a", "b", "c"])) + + h.fill(["a", "a", "a", "b", "b", "c"], weight=0.25) + + assert h[[0, 1, 2]].values() == approx(np.array([0.75, 0.5, 0.25])) + assert h[[2, 1, 0]].values() == approx(np.array([0.25, 0.5, 0.75])) + assert h[[1, 0]].values() == approx(np.array([0.5, 0.75])) + + assert h[[bh.loc("a"), bh.loc("b"), bh.loc("c")]].values() == approx( + np.array([0.75, 0.5, 0.25]) + ) + assert h[[bh.loc("c"), bh.loc("b"), bh.loc("a")]].values() == approx( + np.array([0.25, 0.5, 0.75]) + ) + assert h[[bh.loc("b"), bh.loc("a")]].values() == approx(np.array([0.5, 0.75])) + + assert tuple(h[[1, 0]].axes[0]) == ("b", "a") + + +@pytest.mark.filterwarnings("ignore:List indexing selection is experimental") +def test_pick_invalid(): + h = bh.Histogram(bh.axis.Regular(10, 0, 1)) + with pytest.raises(RuntimeError): + h[[0, 1]] + + h = bh.Histogram(bh.axis.Integer(0, 10)) + with pytest.raises(RuntimeError): + h[[0, 1]] + + +@pytest.mark.filterwarnings("ignore:List indexing selection is experimental") +def test_str_cat_pick_dual(): + h = bh.Histogram( + bh.axis.StrCategory(["a", "b", "c"]), bh.axis.StrCategory(["d", "e", "f"]) + ) + vals = np.arange(9).reshape(3, 3) + h.values()[...] = vals + + assert h[[0], [0]].values() == approx(vals[[0]][:, [0]]) + assert h[[1], [2]].values() == approx(vals[[1]][:, [2]]) + assert h[[1], [0, 1]].values() == approx(vals[[1]][:, [0, 1]]) + assert h[[0, 1], [1]].values() == approx(vals[[0, 1]][:, [1]]) + assert h[[0, 1], [0, 1]].values() == approx(vals[[0, 1]][:, [0, 1]]) + assert h[[0, 1], [2, 1]].values() == approx(vals[[0, 1]][:, [2, 1]]) + + +@pytest.mark.filterwarnings("ignore:List indexing selection is experimental") +def test_pick_multiaxis(): + h = bh.Histogram( + bh.axis.StrCategory(["a", "b", "c"]), + bh.axis.IntCategory([-5, 0, 10]), + bh.axis.Regular(5, 0, 1), + bh.axis.StrCategory(["d", "e", "f"]), + storage=bh.storage.Int64(), + ) + + h.fill("b", -5, 0.65, "f") + h.fill("b", -5, 0.65, "e") + + mini = h[[bh.loc("b"), 2], [1, bh.loc(-5)], sum, bh.loc("f")] + + assert mini.ndim == 2 + assert tuple(mini.axes[0]) == ("b", "c") + assert tuple(mini.axes[1]) == (0, -5) + + assert h[[1, 2], [0, 1], sum, bh.loc("f")].sum() == 1 + assert h[[1, 2], [1, 0], sum, bh.loc("f")].sum() == 1 + + assert mini.values() == approx(np.array(((0, 1), (0, 0))))