Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/usage/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,23 @@ Boost-histogram specific details
--------------------------------

Boost-histogram implements ``bh.loc``, ``builtins.sum``, ``bh.rebin``, ``bh.underflow``, and ``bh.overflow`` from the UHI spec. A ``bh.tag.at`` locator is provided as well, which simulates the Boost.Histogram C++ ``.at()`` indexing using the UHI locator protocol.

Boost-histogram allows "picking" using lists, similar to NumPy. If you select
with multiple lists, boost-histogram instead selects per-axis, rather than
group-selecting and reducing to a single axis, like NumPy does. You can use
``bh.loc(...)`` inside these lists.

Example::

h = bh.histogram(
bh.axis.Regular(10, 0, 1),
bh.axis.StrCategory(["a", "b", "c"]),
bh.axis.IntCategory([5, 6, 7]),
)

minihist = h[:, [bh.loc("a"), bh.loc("c")], [0, 2]]

# Produces a 3D histgoram with Regular(10, 0, 1) x StrCategory(["a", "c"]) x IntCategory([5, 7])


This feature is considered experimental in boost-histogram 1.1.0. Removed bins are not added to the overflow bin currently.
82 changes: 65 additions & 17 deletions src/boost_histogram/_internal/hist.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import collections.abc
import copy
import logging
import threading
Expand Down Expand Up @@ -55,8 +56,10 @@

CppAxis = NewType("CppAxis", object)

InnerIndexing = Union[SupportsIndex, Callable[[Axis], int], slice, "ellipsis"]
IndexingWithMapping = Union[InnerIndexing, Mapping[int, InnerIndexing]]
SimpleIndexing = Union[SupportsIndex, slice]
InnerIndexing = Union[SimpleIndexing, Callable[[Axis], int], "ellipsis"]
FullInnerIndexing = Union[InnerIndexing, List[InnerIndexing]]
IndexingWithMapping = Union[FullInnerIndexing, Mapping[int, FullInnerIndexing]]
IndexingExpr = Union[IndexingWithMapping, Tuple[IndexingWithMapping, ...]]

T = TypeVar("T")
Expand Down Expand Up @@ -582,6 +585,26 @@ def __repr__(self) -> str:
ret += f" ({outer} with flow)"
return ret

def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing:
"""
Converts an expression that contains UHI locators to one that does not.
"""
# Support sum and rebin directly
if index is sum or hasattr(index, "factor"): # type: ignore
index = slice(None, None, index)

# General locators
# Note that MyPy doesn't like these very much - the fix
# will be to properly set input types
elif callable(index):
index = index(self.axes[axis])
elif isinstance(index, SupportsIndex):
if abs(int(index)) >= self._hist.axis(axis).size:
raise IndexError("histogram index is out of range")
index %= self._hist.axis(axis).size

return index # type: ignore

def _compute_commonindex(
self, index: IndexingExpr
) -> List[Union[SupportsIndex, slice, Mapping[int, Union[SupportsIndex, slice]]]]:
Expand Down Expand Up @@ -613,18 +636,11 @@ def _compute_commonindex(

# Allow [bh.loc(...)] to work
for i in range(len(indexes)):
# Support sum and rebin directly
if indexes[i] is sum or hasattr(indexes[i], "factor"):
indexes[i] = slice(None, None, indexes[i])
# General locators
# Note that MyPy doesn't like these very much - the fix
# will be to properly set input types
elif callable(indexes[i]):
indexes[i] = indexes[i](self.axes[i])
elif hasattr(indexes[i], "__index__"):
if abs(indexes[i]) >= hist.axis(i).size:
raise IndexError("histogram index is out of range")
indexes[i] %= hist.axis(i).size
# Support list of UHI indexers
if isinstance(indexes[i], list):
indexes[i] = [self._compute_uhi_index(index, i) for index in indexes[i]]
else:
indexes[i] = self._compute_uhi_index(indexes[i], i)

return indexes

Expand Down Expand Up @@ -729,6 +745,7 @@ def __getitem__( # noqa: C901
integrations: Set[int] = set()
slices: List[_core.algorithm.reduce_command] = []
pick_each: Dict[int, int] = dict()
pick_set: Dict[int, List[int]] = dict()

# Compute needed slices and projections
for i, ind in enumerate(indexes):
Expand All @@ -737,6 +754,9 @@ def __getitem__( # noqa: C901
1 if self.axes[i].traits.underflow else 0
)
continue
elif isinstance(ind, collections.abc.Sequence):
pick_set[i] = list(ind)
continue
elif not isinstance(ind, slice):
raise IndexError(
"Must be a slice, an integer, or follow the locator protocol."
Expand Down Expand Up @@ -782,17 +802,45 @@ def __getitem__( # noqa: C901
logger.debug("Reduce with %s", slices)
reduced = self._hist.reduce(*slices)

if pick_set:
warnings.warn(
"List indexing selection is experimental. Removed bins are not placed in overflow."
)
logger.debug("Slices for picking sets: %s", pick_set)
axes = [reduced.axis(i) for i in range(reduced.rank())]
reduced_view = reduced.view(flow=True)
for i in pick_set:
selection = copy.copy(pick_set[i])
ax = reduced.axis(i)
if ax.traits_ordered:
raise RuntimeError(
f"Axis {i} is not a categorical axis, cannot pick with list"
)

if ax.traits_overflow and ax.size not in pick_set[i]:
selection.append(ax.size)

new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]])
new_axis.metadata = axes[i].metadata
axes[i] = new_axis
reduced_view = np.take(reduced_view, selection, axis=i)

logger.debug("Axes: %s", axes)
new_reduced = reduced.__class__(axes)
new_reduced.view(flow=True)[...] = reduced_view
reduced = new_reduced

if pick_each:
my_slice = tuple(
tuple_slice = tuple(
pick_each.get(i, slice(None)) for i in range(reduced.rank())
)
logger.debug("Slices: %s", my_slice)
logger.debug("Slices for pick each: %s", tuple_slice)
axes = [
reduced.axis(i) for i in range(reduced.rank()) if i not in pick_each
]
logger.debug("Axes: %s", axes)
new_reduced = reduced.__class__(axes)
new_reduced.view(flow=True)[...] = reduced.view(flow=True)[my_slice]
new_reduced.view(flow=True)[...] = reduced.view(flow=True)[tuple_slice]
reduced = new_reduced
integrations = {i - sum(j <= i for j in pick_each) for i in integrations}

Expand Down
104 changes: 104 additions & 0 deletions tests/test_internal_histogram.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_array_equal
from pytest import approx

import boost_histogram as bh

Expand Down Expand Up @@ -258,3 +259,106 @@ def test_int_cat_hist():

with pytest.raises(RuntimeError):
h.fill(0.5)


@pytest.mark.filterwarnings("ignore:List indexing selection is experimental")
def test_int_cat_hist_pick_several():
h = bh.Histogram(
bh.axis.IntCategory([1, 2, 7], __dict__={"xval": 5}), storage=bh.storage.Int64()
)

h.fill(1, weight=8)
h.fill(2, weight=7)
h.fill(7, weight=6)

assert h.view() == approx(np.array([8, 7, 6]))
assert h.sum() == 21

assert h[[0, 2]].values() == approx(np.array([8, 6]))
assert h[[2, 0]].values() == approx(np.array([6, 8]))
assert h[[1]].values() == approx(np.array([7]))

assert h[[bh.loc(1), bh.loc(7)]].values() == approx(np.array([8, 6]))
assert h[[bh.loc(7), bh.loc(1)]].values() == approx(np.array([6, 8]))
assert h[[bh.loc(2)]].values() == approx(np.array([7]))

assert tuple(h[[0, 2]].axes[0]) == (1, 7)
assert tuple(h[[2, 0]].axes[0]) == (7, 1)
assert tuple(h[[1]].axes[0]) == (2,)

assert h.axes[0].xval == 5
assert h[[0]].axes[0].xval == 5
assert h[[0, 1, 2]].axes[0].xval == 5


@pytest.mark.filterwarnings("ignore:List indexing selection is experimental")
def test_str_cat_pick_several():
h = bh.Histogram(bh.axis.StrCategory(["a", "b", "c"]))

h.fill(["a", "a", "a", "b", "b", "c"], weight=0.25)

assert h[[0, 1, 2]].values() == approx(np.array([0.75, 0.5, 0.25]))
assert h[[2, 1, 0]].values() == approx(np.array([0.25, 0.5, 0.75]))
assert h[[1, 0]].values() == approx(np.array([0.5, 0.75]))

assert h[[bh.loc("a"), bh.loc("b"), bh.loc("c")]].values() == approx(
np.array([0.75, 0.5, 0.25])
)
assert h[[bh.loc("c"), bh.loc("b"), bh.loc("a")]].values() == approx(
np.array([0.25, 0.5, 0.75])
)
assert h[[bh.loc("b"), bh.loc("a")]].values() == approx(np.array([0.5, 0.75]))

assert tuple(h[[1, 0]].axes[0]) == ("b", "a")


@pytest.mark.filterwarnings("ignore:List indexing selection is experimental")
def test_pick_invalid():
h = bh.Histogram(bh.axis.Regular(10, 0, 1))
with pytest.raises(RuntimeError):
h[[0, 1]]

h = bh.Histogram(bh.axis.Integer(0, 10))
with pytest.raises(RuntimeError):
h[[0, 1]]


@pytest.mark.filterwarnings("ignore:List indexing selection is experimental")
def test_str_cat_pick_dual():
h = bh.Histogram(
bh.axis.StrCategory(["a", "b", "c"]), bh.axis.StrCategory(["d", "e", "f"])
)
vals = np.arange(9).reshape(3, 3)
h.values()[...] = vals

assert h[[0], [0]].values() == approx(vals[[0]][:, [0]])
assert h[[1], [2]].values() == approx(vals[[1]][:, [2]])
assert h[[1], [0, 1]].values() == approx(vals[[1]][:, [0, 1]])
assert h[[0, 1], [1]].values() == approx(vals[[0, 1]][:, [1]])
assert h[[0, 1], [0, 1]].values() == approx(vals[[0, 1]][:, [0, 1]])
assert h[[0, 1], [2, 1]].values() == approx(vals[[0, 1]][:, [2, 1]])


@pytest.mark.filterwarnings("ignore:List indexing selection is experimental")
def test_pick_multiaxis():
h = bh.Histogram(
bh.axis.StrCategory(["a", "b", "c"]),
bh.axis.IntCategory([-5, 0, 10]),
bh.axis.Regular(5, 0, 1),
bh.axis.StrCategory(["d", "e", "f"]),
storage=bh.storage.Int64(),
)

h.fill("b", -5, 0.65, "f")
h.fill("b", -5, 0.65, "e")

mini = h[[bh.loc("b"), 2], [1, bh.loc(-5)], sum, bh.loc("f")]

assert mini.ndim == 2
assert tuple(mini.axes[0]) == ("b", "c")
assert tuple(mini.axes[1]) == (0, -5)

assert h[[1, 2], [0, 1], sum, bh.loc("f")].sum() == 1
assert h[[1, 2], [1, 0], sum, bh.loc("f")].sum() == 1

assert mini.values() == approx(np.array(((0, 1), (0, 0))))