From 79fb89b33011ac3476e2055b061dca29901fe74d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 31 Oct 2022 20:17:08 +0100 Subject: [PATCH 01/23] clean up nodes and add Workflow --- bioimageio/core/resource_io/nodes.py | 65 +++++++++++++++++++--------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/bioimageio/core/resource_io/nodes.py b/bioimageio/core/resource_io/nodes.py index 47e2035f..7d8465ca 100644 --- a/bioimageio/core/resource_io/nodes.py +++ b/bioimageio/core/resource_io/nodes.py @@ -6,10 +6,12 @@ from marshmallow import missing from marshmallow.utils import _Missing +from bioimageio.spec.collection import raw_nodes as collection_raw_nodes +from bioimageio.spec.dataset import raw_nodes as dataset_raw_nodes from bioimageio.spec.model import raw_nodes as model_raw_nodes from bioimageio.spec.rdf import raw_nodes as rdf_raw_nodes -from bioimageio.spec.collection import raw_nodes as collection_raw_nodes from bioimageio.spec.shared import raw_nodes +from bioimageio.spec.workflow import raw_nodes as workflow_raw_nodes @dataclass @@ -48,12 +50,12 @@ class CiteEntry(Node, rdf_raw_nodes.CiteEntry): @dataclass -class Author(Node, model_raw_nodes.Author): +class Author(Node, rdf_raw_nodes.Author): pass @dataclass -class Maintainer(Node, model_raw_nodes.Maintainer): +class Maintainer(Node, rdf_raw_nodes.Maintainer): pass @@ -62,10 +64,19 @@ class Badge(Node, rdf_raw_nodes.Badge): pass +@dataclass +class Attachments(Node, rdf_raw_nodes.Attachments): + files: Union[_Missing, List[Path]] = missing + unknown: Union[_Missing, Dict[str, Any]] = missing + + @dataclass class RDF(rdf_raw_nodes.RDF, ResourceDescription): + authors: Union[_Missing, List[Author]] = missing + attachments: Union[_Missing, Attachments] = missing badges: Union[_Missing, List[Badge]] = missing - covers: Union[_Missing, List[Path]] = missing + cite: Union[_Missing, List[CiteEntry]] = missing + maintainers: Union[_Missing, List[Maintainer]] = missing @dataclass @@ -74,17 +85,22 @@ class CollectionEntry(Node, collection_raw_nodes.CollectionEntry): @dataclass -class LinkedDataset(Node, model_raw_nodes.LinkedDataset): +class Collection(collection_raw_nodes.Collection, RDF): + collection: List[CollectionEntry] = missing + + +@dataclass +class Dataset(Node, dataset_raw_nodes.Dataset): pass @dataclass -class ModelParent(Node, model_raw_nodes.ModelParent): +class LinkedDataset(Node, model_raw_nodes.LinkedDataset): pass @dataclass -class Collection(collection_raw_nodes.Collection, RDF): +class ModelParent(Node, model_raw_nodes.ModelParent): pass @@ -106,6 +122,7 @@ class Postprocessing(Node, model_raw_nodes.Postprocessing): @dataclass class InputTensor(Node, model_raw_nodes.InputTensor): axes: Tuple[str, ...] = missing + preprocessing: Union[_Missing, List[Preprocessing]] = missing def __post_init__(self): super().__post_init__() @@ -116,6 +133,7 @@ def __post_init__(self): @dataclass class OutputTensor(Node, model_raw_nodes.OutputTensor): axes: Tuple[str, ...] = missing + postprocessing: Union[_Missing, List[Postprocessing]] = missing def __post_init__(self): super().__post_init__() @@ -132,40 +150,39 @@ def __call__(self, *args, **kwargs): @dataclass -class KerasHdf5WeightsEntry(Node, model_raw_nodes.KerasHdf5WeightsEntry): - source: Path = missing +class WeightsEntryBase(model_raw_nodes.WeightsEntryBase): + dependencies: Union[_Missing, Dependencies] = missing @dataclass -class OnnxWeightsEntry(Node, model_raw_nodes.OnnxWeightsEntry): +class KerasHdf5WeightsEntry(WeightsEntryBase, model_raw_nodes.KerasHdf5WeightsEntry): source: Path = missing @dataclass -class PytorchStateDictWeightsEntry(Node, model_raw_nodes.PytorchStateDictWeightsEntry): +class OnnxWeightsEntry(WeightsEntryBase, model_raw_nodes.OnnxWeightsEntry): source: Path = missing - architecture: Union[_Missing, ImportedSource] = missing @dataclass -class TorchscriptWeightsEntry(Node, model_raw_nodes.TorchscriptWeightsEntry): +class PytorchStateDictWeightsEntry(WeightsEntryBase, model_raw_nodes.PytorchStateDictWeightsEntry): source: Path = missing + architecture: Union[_Missing, ImportedSource] = missing @dataclass -class TensorflowJsWeightsEntry(Node, model_raw_nodes.TensorflowJsWeightsEntry): +class TorchscriptWeightsEntry(WeightsEntryBase, model_raw_nodes.TorchscriptWeightsEntry): source: Path = missing @dataclass -class TensorflowSavedModelBundleWeightsEntry(Node, model_raw_nodes.TensorflowSavedModelBundleWeightsEntry): +class TensorflowJsWeightsEntry(WeightsEntryBase, model_raw_nodes.TensorflowJsWeightsEntry): source: Path = missing @dataclass -class Attachments(Node, rdf_raw_nodes.Attachments): - files: Union[_Missing, List[Path]] = missing - unknown: Union[_Missing, Dict[str, Any]] = missing +class TensorflowSavedModelBundleWeightsEntry(WeightsEntryBase, model_raw_nodes.TensorflowSavedModelBundleWeightsEntry): + source: Path = missing WeightsEntry = Union[ @@ -180,8 +197,16 @@ class Attachments(Node, rdf_raw_nodes.Attachments): @dataclass class Model(model_raw_nodes.Model, RDF): - authors: List[Author] = missing - maintainers: Union[_Missing, List[Maintainer]] = missing + inputs: List[InputTensor] = missing + outputs: List[OutputTensor] = missing + parent: Union[_Missing, ModelParent] = missing + run_mode: Union[_Missing, RunMode] = missing test_inputs: List[Path] = missing test_outputs: List[Path] = missing + training_data: Union[_Missing, Dataset, LinkedDataset] = missing weights: Dict[model_raw_nodes.WeightsFormat, WeightsEntry] = missing + + +@dataclass +class Workflow(workflow_raw_nodes.Workflow, RDF): + pass From 6373c886e0f9f32ebaaf686b5f4a06fdf2bb1ebf Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 4 Nov 2022 14:10:45 +0100 Subject: [PATCH 02/23] update workflow nodes --- bioimageio/core/resource_io/nodes.py | 56 +++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/resource_io/nodes.py b/bioimageio/core/resource_io/nodes.py index 7d8465ca..b369a97f 100644 --- a/bioimageio/core/resource_io/nodes.py +++ b/bioimageio/core/resource_io/nodes.py @@ -208,5 +208,59 @@ class Model(model_raw_nodes.Model, RDF): @dataclass -class Workflow(workflow_raw_nodes.Workflow, RDF): +class Axis(Node, workflow_raw_nodes.Axis): + pass + + +@dataclass +class BatchAxis(Node, workflow_raw_nodes.BatchAxis): + pass + + +@dataclass +class ChannelAxis(Node, workflow_raw_nodes.ChannelAxis): + pass + + +@dataclass +class IndexAxis(Node, workflow_raw_nodes.IndexAxis): + pass + + +@dataclass +class SpaceAxis(Node, workflow_raw_nodes.SpaceAxis): + pass + + +@dataclass +class TimeAxis(Node, workflow_raw_nodes.TimeAxis): pass + + +@dataclass +class InputSpec(Node, workflow_raw_nodes.InputSpec): + pass + + +@dataclass +class OptionSpec(Node, workflow_raw_nodes.OptionSpec): + pass + + +@dataclass +class OutputSpec(Node, workflow_raw_nodes.OutputSpec): + pass + + +@dataclass +class Step(Node, workflow_raw_nodes.Step): + pass + + +@dataclass +class Workflow(workflow_raw_nodes.Workflow, RDF): + inputs: List[InputSpec] = missing + options: List[OptionSpec] = missing + outputs: List[OutputSpec] = missing + steps: List[Step] = missing + test_steps: List[Step] = missing From d473faa84927db435b1cf40f881840fa6cfa6c8f Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 4 Nov 2022 14:11:40 +0100 Subject: [PATCH 03/23] allow for node defaults (don't init with missing) --- bioimageio/core/resource_io/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bioimageio/core/resource_io/utils.py b/bioimageio/core/resource_io/utils.py index 430d30fc..ffeefbaa 100644 --- a/bioimageio/core/resource_io/utils.py +++ b/bioimageio/core/resource_io/utils.py @@ -6,6 +6,8 @@ import typing from types import ModuleType +from marshmallow import missing + from bioimageio.spec.shared import raw_nodes, resolve_source, source_available from bioimageio.spec.shared.node_transformer import ( GenericRawNode, @@ -85,7 +87,9 @@ def __init__(self, nodes_module: ModuleType): def generic_transformer(self, node: GenericRawNode) -> GenericResolvedNode: if isinstance(node, raw_nodes.RawNode): resolved_data = { - field.name: self.transform(getattr(node, field.name)) for field in dataclasses.fields(node) + field.name: self.transform(getattr(node, field.name)) + for field in dataclasses.fields(node) + if getattr(node, field.name) is not missing # exclude missing fields to respect for node defaults } resolved_node_type: typing.Type[GenericResolvedNode] = getattr(self.nodes, node.__class__.__name__) return resolved_node_type(**resolved_data) # type: ignore From 91f7d973f099394eb2e1f810808cf974c6cf18b9 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Fri, 4 Nov 2022 19:32:32 +0100 Subject: [PATCH 04/23] make image_helper agnostic to axis letter vs name --- bioimageio/core/image_helper.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/bioimageio/core/image_helper.py b/bioimageio/core/image_helper.py index 0468b61f..2bbb3229 100644 --- a/bioimageio/core/image_helper.py +++ b/bioimageio/core/image_helper.py @@ -13,7 +13,7 @@ # -def transform_input_image(image: np.ndarray, tensor_axes: str, image_axes: Optional[str] = None): +def transform_input_image(image: np.ndarray, tensor_axes: Sequence[str], image_axes: Optional[str] = None): """Transform input image into output tensor with desired axes. Args: @@ -35,7 +35,16 @@ def transform_input_image(image: np.ndarray, tensor_axes: str, image_axes: Optio image_axes = "bczyx" else: raise ValueError(f"Invalid number of image dimensions: {ndim}") - tensor = DataArray(image, dims=tuple(image_axes)) + + # instead of 'b' we might want 'batch', etc... + axis_letter_map = { + letter: name + for letter, name in {"b": "batch", "c": "channel", "i": "index", "t": "time"} + if name in tensor_axes # only do this mapping if the full name is in the desired tensor_axes + } + image_axes = tuple(axis_letter_map.get(a, a) for a in image_axes) + + tensor = DataArray(image, dims=image_axes) # expand the missing image axes missing_axes = tuple(set(tensor_axes) - set(image_axes)) tensor = tensor.expand_dims(dim=missing_axes) @@ -75,9 +84,10 @@ def transform_output_tensor(tensor: np.ndarray, tensor_axes: str, output_axes: s def to_channel_last(image): - chan_id = image.dims.index("c") + c = "c" if "c" in image.dims else "channel" + chan_id = image.dims.index(c) if chan_id != image.ndim - 1: - target_axes = tuple(ax for ax in image.dims if ax != "c") + ("c",) + target_axes = tuple(ax for ax in image.dims if ax != c) + (c,) image = image.transpose(*target_axes) return image @@ -113,9 +123,9 @@ def save_image(out_path, image): squeeze = {ax: 0 if (ax in "bc" and sh == 1) else slice(None) for ax, sh in zip(image.dims, image.shape)} image = image[squeeze] - if "b" in image.dims: + if "b" in image.dims or "batch" in image.dims: raise RuntimeError(f"Cannot save prediction with batchsize > 1 as {ext}-file") - if "c" in image.dims: # image formats need channel last + if "c" in image.dims or "channel" in image.dims: # image formats need channel last image = to_channel_last(image) save_function = imageio.volsave if is_volume else imageio.imsave From e3371997189fc92d998ffde332af6f047e802b37 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 5 Nov 2022 00:30:26 +0100 Subject: [PATCH 05/23] accept pathlib.Path in save_image --- bioimageio/core/image_helper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/image_helper.py b/bioimageio/core/image_helper.py index 2bbb3229..fdbead71 100644 --- a/bioimageio/core/image_helper.py +++ b/bioimageio/core/image_helper.py @@ -112,10 +112,10 @@ def load_tensors(sources, tensor_specs: List[Union[InputTensor, OutputTensor]]) return [load_image(s, sspec.axes) for s, sspec in zip(sources, tensor_specs)] -def save_image(out_path, image): - ext = os.path.splitext(out_path)[1] +def save_image(out_path: os.PathLike, image): + ext = os.path.splitext(str(out_path))[1] if ext == ".npy": - np.save(out_path, image) + np.save(str(out_path), image) else: is_volume = "z" in image.dims From 8a52f600fb2f4eed895fa0195ad3f03312e3ee93 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 5 Nov 2022 00:31:11 +0100 Subject: [PATCH 06/23] fix nodes --- bioimageio/core/resource_io/nodes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bioimageio/core/resource_io/nodes.py b/bioimageio/core/resource_io/nodes.py index b369a97f..1c850795 100644 --- a/bioimageio/core/resource_io/nodes.py +++ b/bioimageio/core/resource_io/nodes.py @@ -8,7 +8,7 @@ from bioimageio.spec.collection import raw_nodes as collection_raw_nodes from bioimageio.spec.dataset import raw_nodes as dataset_raw_nodes -from bioimageio.spec.model import raw_nodes as model_raw_nodes +from bioimageio.spec.model.v0_4 import raw_nodes as model_raw_nodes from bioimageio.spec.rdf import raw_nodes as rdf_raw_nodes from bioimageio.spec.shared import raw_nodes from bioimageio.spec.workflow import raw_nodes as workflow_raw_nodes @@ -150,7 +150,7 @@ def __call__(self, *args, **kwargs): @dataclass -class WeightsEntryBase(model_raw_nodes.WeightsEntryBase): +class WeightsEntryBase(model_raw_nodes._WeightsEntryBase): dependencies: Union[_Missing, Dependencies] = missing @@ -259,8 +259,8 @@ class Step(Node, workflow_raw_nodes.Step): @dataclass class Workflow(workflow_raw_nodes.Workflow, RDF): - inputs: List[InputSpec] = missing - options: List[OptionSpec] = missing - outputs: List[OutputSpec] = missing + inputs_spec: List[InputSpec] = missing + options_spec: List[OptionSpec] = missing + outputs_spec: List[OutputSpec] = missing steps: List[Step] = missing test_steps: List[Step] = missing From f028bd28cdd0dfcd4570a95e75e89be607ff7ad1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 5 Nov 2022 00:33:09 +0100 Subject: [PATCH 07/23] add cli command run --- bioimageio/core/__main__.py | 99 ++++++++++++++++++++++++++++++++++--- 1 file changed, 93 insertions(+), 6 deletions(-) diff --git a/bioimageio/core/__main__.py b/bioimageio/core/__main__.py index a26f43d1..8be385ff 100644 --- a/bioimageio/core/__main__.py +++ b/bioimageio/core/__main__.py @@ -3,19 +3,23 @@ import os import sys import warnings +from argparse import ArgumentParser +from functools import partial from glob import glob - from pathlib import Path -from pprint import pformat, pprint -from typing import List, Optional +from pprint import pformat +from typing import List, Optional, Union import typer -from bioimageio.core import __version__, prediction, commands, resource_tests, load_raw_resource_description +from bioimageio.core import __version__, commands, load_raw_resource_description, prediction, resource_tests from bioimageio.core.common import TestSummary -from bioimageio.core.prediction_pipeline import get_weight_formats +from bioimageio.core.image_helper import load_image, save_image +from bioimageio.core.resource_io import nodes +from bioimageio.core.workflow.operators import run_workflow from bioimageio.spec.__main__ import app, help_version as help_version_spec from bioimageio.spec.model.raw_nodes import WeightsFormat +from bioimageio.spec.workflow.raw_nodes import Workflow try: from typing import get_args @@ -244,7 +248,7 @@ def predict_images( tiling = json.loads(tiling.replace("'", '"')) assert isinstance(tiling, dict) - # this is a weird typer bug: default devices are empty tuple although they should be None + # this is a weird typer bug: default devices are empty tuple, although they should be None if len(devices) == 0: devices = None prediction.predict_images( @@ -310,5 +314,88 @@ def convert_keras_weights_to_tensorflow( ) +@app.command(context_settings=dict(allow_extra_args=True, ignore_unknown_options=True), add_help_option=False) +def run( + rdf_source: str = typer.Argument(..., help="BioImage.IO RDF id/url/path."), + *, + output_folder: Path = Path("outputs"), + output_tensor_extension: str = ".npy", + ctx: typer.Context, +): + resource = load_raw_resource_description(rdf_source, update_to_format="latest") + if not isinstance(resource, Workflow): + raise NotImplementedError(f"Non-workflow RDFs not yet supported (got type {resource.type})") + + map_type = dict( + any=str, + boolean=bool, + float=float, + int=int, + list=str, + string=str, + ) + wf = resource + parser = ArgumentParser(description=f"CLI for {wf.name}") + + # replicate typer args to show up in help + parser.add_argument( + metavar="rdf-source", + dest="rdf_source", + help="BioImage.IO RDF id/url/path. The optional arguments below are RDF specific.", + ) + parser.add_argument( + metavar="output-folder", dest="output_folder", help="Folder to save outputs to.", default=Path("outputs") + ) + parser.add_argument( + metavar="output-tensor-extension", + dest="output_tensor_extension", + help="Output tensor extension.", + default=".npy", + ) + + def add_param_args(params): + for param in params: + argument_kwargs = {} + if param.type == "tensor": + argument_kwargs["type"] = partial(load_image, axes=[a.name or a.type for a in param.axes]) + else: + argument_kwargs["type"] = map_type[param.type] + + if param.type == "list": + argument_kwargs["nargs"] = "*" + + argument_kwargs["help"] = param.description or "" + if hasattr(param, "default"): + argument_kwargs["default"] = param.default + else: + argument_kwargs["required"] = True + + argument_kwargs["metavar"] = param.name[0].capitalize() + parser.add_argument("--" + param.name.replace("_", "-"), **argument_kwargs) + + def prepare_parameter(value, param: Union[nodes.InputSpec, nodes.OptionSpec]): + if param.type == "tensor": + return load_image(value, [a.name or a.type for a in param.axes]) + else: + return value + + add_param_args(wf.inputs_spec) + add_param_args(wf.options_spec) + args = parser.parse_args([rdf_source, str(output_folder), output_tensor_extension] + list(ctx.args)) + outputs = run_workflow( + rdf_source, + inputs=[prepare_parameter(getattr(args, ipt.name), ipt) for ipt in wf.inputs_spec], + options={opt.name: prepare_parameter(getattr(args, opt.name), opt) for opt in wf.options_spec}, + ) + output_folder.mkdir(parents=True, exist_ok=True) + for out_spec, out in zip(wf.outputs_spec, outputs): + out_path = output_folder / out_spec.name + if out_spec.type == "tensor": + save_image(out_path.with_suffix(output_tensor_extension), out) + else: + with out_path.with_suffix(".json").open("w") as f: + json.dump(out, f) + + if __name__ == "__main__": app() From 7d58fa7a740584e541ccd689e54b6d8398dd54be Mon Sep 17 00:00:00 2001 From: fynnbe Date: Sat, 5 Nov 2022 00:34:38 +0100 Subject: [PATCH 08/23] add workflow operators --- bioimageio/core/workflow/__init__.py | 0 .../core/workflow/operators/__init__.py | 4 + bioimageio/core/workflow/operators/_assert.py | 8 + .../core/workflow/operators/_generate.py | 15 ++ bioimageio/core/workflow/operators/_run.py | 162 ++++++++++++++++++ .../core/workflow/operators/_various.py | 57 ++++++ 6 files changed, 246 insertions(+) create mode 100644 bioimageio/core/workflow/__init__.py create mode 100644 bioimageio/core/workflow/operators/__init__.py create mode 100644 bioimageio/core/workflow/operators/_assert.py create mode 100644 bioimageio/core/workflow/operators/_generate.py create mode 100644 bioimageio/core/workflow/operators/_run.py create mode 100644 bioimageio/core/workflow/operators/_various.py diff --git a/bioimageio/core/workflow/__init__.py b/bioimageio/core/workflow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bioimageio/core/workflow/operators/__init__.py b/bioimageio/core/workflow/operators/__init__.py new file mode 100644 index 00000000..87e447e4 --- /dev/null +++ b/bioimageio/core/workflow/operators/__init__.py @@ -0,0 +1,4 @@ +from ._assert import assert_shape +from ._generate import generate_random_uniform_tensor +from ._run import run_model_inference, run_workflow +from ._various import binarize, load_tensors, log, select_outputs diff --git a/bioimageio/core/workflow/operators/_assert.py b/bioimageio/core/workflow/operators/_assert.py new file mode 100644 index 00000000..fdf54302 --- /dev/null +++ b/bioimageio/core/workflow/operators/_assert.py @@ -0,0 +1,8 @@ +from typing import Sequence + +import xarray as xr + + +def assert_shape(tensor: xr.DataArray, shape: Sequence[int]) -> xr.DataArray: + assert tensor.shape == tuple(shape) + return tensor diff --git a/bioimageio/core/workflow/operators/_generate.py b/bioimageio/core/workflow/operators/_generate.py new file mode 100644 index 00000000..c5f09237 --- /dev/null +++ b/bioimageio/core/workflow/operators/_generate.py @@ -0,0 +1,15 @@ +from typing import Sequence, Union + +import numpy as np +import xarray as xr + + +def generate_random_uniform_tensor( + shape: Sequence[Union[int, str]], axes: Sequence[str], *, low: Union[int, float] = 0, high: Union[int, float] = 1 +) -> xr.DataArray: + """generate a tensor with uniformly distributed samples in the interval [low, high) + Returns: + xr.DataArray: random tensor + """ + assert len(shape) == len(axes) + return xr.DataArray(np.random.uniform(low=low, high=high, size=[int(s) for s in shape]), dims=tuple(axes)) diff --git a/bioimageio/core/workflow/operators/_run.py b/bioimageio/core/workflow/operators/_run.py new file mode 100644 index 00000000..1ef173c4 --- /dev/null +++ b/bioimageio/core/workflow/operators/_run.py @@ -0,0 +1,162 @@ +from os import PathLike +from typing import Any, Dict, IO, List, Optional, Sequence, Tuple, Union + +import numpy as np +import xarray as xr +from marshmallow import missing + +from bioimageio.core import load_resource_description +from bioimageio.core.prediction_pipeline import create_prediction_pipeline +from bioimageio.core.resource_io import nodes +from bioimageio.spec import load_raw_resource_description +from bioimageio.spec.model import raw_nodes +from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription + + +def run_model_inference( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *tensors, + enable_preprocessing: bool = True, + enable_postprocessing: bool = True, + devices: Optional[Sequence[str]] = None, +) -> List[xr.DataArray]: + """run model inference + + Returns: + list: model outputs + """ + model = load_raw_resource_description(rdf_source, update_to_format="latest") + assert isinstance(model, raw_nodes.Model) + # remove pre-/postprocessing if specified + if not enable_preprocessing: + for ipt in model.inputs: + if ipt.preprocessing: + ipt.preprocessing = missing + if not enable_postprocessing: + for ipt in model.outputs: + if ipt.postprocessing: + ipt.postprocessing = missing + + with create_prediction_pipeline(model, devices=devices) as pred_pipeline: + return pred_pipeline.forward(*tensors) + + +def run_workflow( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + inputs: Sequence = tuple(), + options: Dict[str, Any] = None, +) -> Sequence: + return _run_workflow(rdf_source, test_steps=False, inputs=inputs, options=options) + + +def run_workflow_test( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], +) -> Sequence: + return _run_workflow(rdf_source, test_steps=True) + + +def _run_workflow( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *, + test_steps: bool, + inputs: Sequence = tuple(), + options: Dict[str, Any] = None, +) -> Tuple: + import bioimageio.core.workflow.operators as ops + + workflow = load_resource_description(rdf_source) + assert isinstance(workflow, nodes.Workflow) + wf_options = {opt.name: opt.default for opt in workflow.options_spec} + if test_steps: + assert not inputs + assert not options + wf_inputs = {} + steps = workflow.test_steps + else: + if not len(workflow.inputs_spec) == len(inputs): + raise ValueError(f"Expected {len(workflow.inputs_spec)} inputs, but got {len(inputs)}.") + + wf_inputs = {ipt_spec.name: ipt for ipt_spec, ipt in zip(workflow.inputs_spec, inputs)} + for k, v in options.items(): + if k not in wf_options: + raise ValueError(f"Got unknown option {k}, expected one of {set(wf_options)}.") + + wf_options[k] = v + + steps = workflow.steps + + named_outputs = {} # for later referencing + + def map_ref(value): + assert isinstance(workflow, nodes.Workflow) + if isinstance(value, str) and value.startswith("${{") and value.endswith("}}"): + ref = value[4:-2].strip() + if ref.startswith("self.inputs."): + ref = ref[len("self.inputs.") :] + if ref not in wf_inputs: + raise ValueError(f"Invalid workflow input reference {value}.") + + return wf_inputs[ref] + elif ref.startswith("self.options."): + ref = ref[len("self.options.") :] + if ref not in wf_options: + raise ValueError(f"Invalid workflow option reference {value}.") + + return wf_options[ref] + elif ref == "self.rdf_source": + assert workflow.rdf_source is not missing + return str(workflow.rdf_source) + elif ref in named_outputs: + return named_outputs[ref] + else: + raise ValueError(f"Invalid reference {value}.") + else: + return value + + # implicit inputs to a step are the outputs of the previous step. + # For the first step these are the workflow inputs. + outputs = inputs + for step in steps: + if not hasattr(ops, step.op): + raise NotImplementedError(f"{step.op} not implemented in {ops}") + + op = getattr(ops, step.op) + if step.inputs is missing: + inputs = outputs + else: + inputs = [map_ref(ipt) for ipt in step.inputs] + + options = {k: map_ref(v) for k, v in (step.options or {}).items()} + outputs = op(*inputs, **options) + if not isinstance(outputs, tuple): + outputs = (outputs,) + + if step.outputs: + assert step.id is not missing + if len(step.outputs) != len(outputs): + raise ValueError( + f"Got {len(step.outputs)} step output name{'s' if len(step.outputs) > 1 else ''} ({step.id}.outputs), " + f"but op {step.op} returned {len(outputs)} outputs." + ) + + named_outputs.update({f"{step.id}.outputs.{out_name}": out for out_name, out in zip(step.outputs, outputs)}) + + if len(workflow.outputs_spec) != len(outputs): + raise ValueError(f"Expected {len(workflow.outputs_spec)} outputs from last step, but got {len(outputs)}.") + + def tensor_as_xr(tensor, axes: Sequence[nodes.Axis]): + spec_axes = [a.name or a.type for a in axes] + if isinstance(tensor, xr.DataArray): + if list(tensor.dims) != spec_axes: + raise ValueError( + f"Last workflow step returned xarray.DataArray with dims {tensor.dims}, but expected dims {spec_axes}." + ) + + return tensor + else: + return xr.DataArray(tensor, dims=spec_axes) + + return [ + tensor_as_xr(out, out_spec.axes) if out_spec.type == "tensor" else out + for out_spec, out in zip(workflow.outputs_spec, outputs) + ] diff --git a/bioimageio/core/workflow/operators/_various.py b/bioimageio/core/workflow/operators/_various.py new file mode 100644 index 00000000..37bb3520 --- /dev/null +++ b/bioimageio/core/workflow/operators/_various.py @@ -0,0 +1,57 @@ +import logging +from typing import List, Sequence, Tuple + +import numpy as np +import xarray as xr +from imageio import imread + +logger = logging.getLogger(__name__) + + +def binarize(tensor: xr.DataArray, threshold: float): + return tensor > threshold + + +def select_outputs(*args) -> Tuple: + """helper to select workflow outputs (to be used as a final step in a workflow) + + Returns: + tuple: selected outputs (inputs to this op) + + """ + + return args + + +def log(*args, log_level: int = logging.INFO, **kwargs) -> Tuple: + """log any key word arguments (kwargs/options) + + Returns: + tuple: positional inputs to this op + + """ + for k, v in kwargs.items(): + logger.log( + log_level, + f"{k}: %s", + f"{v.shape} mean: {v.mean().item():.4f} std: {v.std().item():.4f}" + if isinstance(v, (np.ndarray, xr.DataArray)) + else v, + ) + + return args + + +def load_tensors(sources: List[str], axes: Sequence[str]) -> List[xr.DataArray]: + """load tensors""" + assert len(sources) == len(axes) + tensors = [] + for source, ax in zip(sources, axes): + if source.split(".")[-1] == ".npy": + data = np.load(str(source)) + else: + data = imread(source) + + tensors.append(xr.DataArray(data, dims=ax)) + + return tensors From 82f28f5c12df3243688b8070944da4bf89d2a324 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 8 Nov 2022 11:51:21 +0100 Subject: [PATCH 09/23] add iterate_workflow_steps and iterate_test_workflow_steps --- bioimageio/core/workflow/operators/_run.py | 66 +++++++++++++++++----- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/bioimageio/core/workflow/operators/_run.py b/bioimageio/core/workflow/operators/_run.py index 1ef173c4..d803b88f 100644 --- a/bioimageio/core/workflow/operators/_run.py +++ b/bioimageio/core/workflow/operators/_run.py @@ -1,5 +1,6 @@ +from dataclasses import dataclass from os import PathLike -from typing import Any, Dict, IO, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, Generator, IO, List, Optional, Sequence, Tuple, Union import numpy as np import xarray as xr @@ -45,32 +46,64 @@ def run_workflow( rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], inputs: Sequence = tuple(), options: Dict[str, Any] = None, -) -> Sequence: - return _run_workflow(rdf_source, test_steps=False, inputs=inputs, options=options) +) -> tuple: + outputs = tuple() + for state in _iterate_workflow_steps_impl(rdf_source, test_steps=False, inputs=inputs, options=options): + outputs = state.outputs + + return outputs def run_workflow_test( rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], -) -> Sequence: - return _run_workflow(rdf_source, test_steps=True) +) -> tuple: + outputs = tuple() + for state in _iterate_workflow_steps_impl(rdf_source, test_steps=True): + outputs = state.outputs + + return outputs + + +@dataclass +class WorkflowState: + wf_inputs: Dict[str, Any] + wf_options: Dict[str, Any] + inputs: tuple + outputs: tuple + named_outputs: Dict[str, Any] + + +def iterate_workflow_steps( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *, + inputs: Sequence = tuple(), + options: Dict[str, Any] = None, +) -> Generator[WorkflowState]: + yield from _iterate_workflow_steps_impl(rdf_source, inputs=inputs, options=options, test_steps=False) + + +def iterate_test_workflow_steps( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription] +) -> Generator[WorkflowState]: + yield from _iterate_workflow_steps_impl(rdf_source, test_steps=True) -def _run_workflow( +def _iterate_workflow_steps_impl( rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], *, test_steps: bool, inputs: Sequence = tuple(), options: Dict[str, Any] = None, -) -> Tuple: +) -> Generator[WorkflowState]: import bioimageio.core.workflow.operators as ops workflow = load_resource_description(rdf_source) assert isinstance(workflow, nodes.Workflow) - wf_options = {opt.name: opt.default for opt in workflow.options_spec} + wf_options: Dict[str, Any] = {opt.name: opt.default for opt in workflow.options_spec} if test_steps: assert not inputs assert not options - wf_inputs = {} + wf_inputs: Dict[str, Any] = {} steps = workflow.test_steps else: if not len(workflow.inputs_spec) == len(inputs): @@ -115,7 +148,7 @@ def map_ref(value): # implicit inputs to a step are the outputs of the previous step. # For the first step these are the workflow inputs. - outputs = inputs + outputs = tuple(inputs) for step in steps: if not hasattr(ops, step.op): raise NotImplementedError(f"{step.op} not implemented in {ops}") @@ -124,8 +157,9 @@ def map_ref(value): if step.inputs is missing: inputs = outputs else: - inputs = [map_ref(ipt) for ipt in step.inputs] + inputs = tuple(map_ref(ipt) for ipt in step.inputs) + assert isinstance(inputs, tuple) options = {k: map_ref(v) for k, v in (step.options or {}).items()} outputs = op(*inputs, **options) if not isinstance(outputs, tuple): @@ -141,6 +175,9 @@ def map_ref(value): named_outputs.update({f"{step.id}.outputs.{out_name}": out for out_name, out in zip(step.outputs, outputs)}) + yield WorkflowState( + wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs + ) if len(workflow.outputs_spec) != len(outputs): raise ValueError(f"Expected {len(workflow.outputs_spec)} outputs from last step, but got {len(outputs)}.") @@ -156,7 +193,10 @@ def tensor_as_xr(tensor, axes: Sequence[nodes.Axis]): else: return xr.DataArray(tensor, dims=spec_axes) - return [ + outputs = tuple( tensor_as_xr(out, out_spec.axes) if out_spec.type == "tensor" else out for out_spec, out in zip(workflow.outputs_spec, outputs) - ] + ) + yield WorkflowState( + wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs + ) From dacb26fd831b730b54718f1f14f2d47533d8df4d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 15 Nov 2022 01:09:51 +0100 Subject: [PATCH 10/23] wip run_model_inference_with_chunking --- bioimageio/core/image_helper.py | 2 +- .../core/{workflow => }/operators/__init__.py | 2 +- .../core/{workflow => }/operators/_assert.py | 0 .../{workflow => }/operators/_generate.py | 0 bioimageio/core/operators/_run.py | 538 ++++++++++++++++++ .../core/{workflow => }/operators/_various.py | 0 .../_prediction_pipeline.py | 13 +- bioimageio/core/resource_io/utils.py | 9 +- bioimageio/core/workflow/__init__.py | 0 bioimageio/core/workflow/operators/_run.py | 202 ------- 10 files changed, 555 insertions(+), 211 deletions(-) rename bioimageio/core/{workflow => }/operators/__init__.py (63%) rename bioimageio/core/{workflow => }/operators/_assert.py (100%) rename bioimageio/core/{workflow => }/operators/_generate.py (100%) create mode 100644 bioimageio/core/operators/_run.py rename bioimageio/core/{workflow => }/operators/_various.py (100%) delete mode 100644 bioimageio/core/workflow/__init__.py delete mode 100644 bioimageio/core/workflow/operators/_run.py diff --git a/bioimageio/core/image_helper.py b/bioimageio/core/image_helper.py index fdbead71..8526e692 100644 --- a/bioimageio/core/image_helper.py +++ b/bioimageio/core/image_helper.py @@ -105,7 +105,7 @@ def load_image(in_path, axes: Sequence[str]) -> DataArray: is_volume = "z" in axes im = imageio.volread(in_path) if is_volume else imageio.imread(in_path) im = transform_input_image(im, axes) - return DataArray(im, dims=axes) + return DataArray(im, dims=tuple(axes)) def load_tensors(sources, tensor_specs: List[Union[InputTensor, OutputTensor]]) -> List[DataArray]: diff --git a/bioimageio/core/workflow/operators/__init__.py b/bioimageio/core/operators/__init__.py similarity index 63% rename from bioimageio/core/workflow/operators/__init__.py rename to bioimageio/core/operators/__init__.py index 87e447e4..ffbef74f 100644 --- a/bioimageio/core/workflow/operators/__init__.py +++ b/bioimageio/core/operators/__init__.py @@ -1,4 +1,4 @@ from ._assert import assert_shape from ._generate import generate_random_uniform_tensor -from ._run import run_model_inference, run_workflow +from ._run import run_model_inference, run_model_inference_with_chunking, run_workflow from ._various import binarize, load_tensors, log, select_outputs diff --git a/bioimageio/core/workflow/operators/_assert.py b/bioimageio/core/operators/_assert.py similarity index 100% rename from bioimageio/core/workflow/operators/_assert.py rename to bioimageio/core/operators/_assert.py diff --git a/bioimageio/core/workflow/operators/_generate.py b/bioimageio/core/operators/_generate.py similarity index 100% rename from bioimageio/core/workflow/operators/_generate.py rename to bioimageio/core/operators/_generate.py diff --git a/bioimageio/core/operators/_run.py b/bioimageio/core/operators/_run.py new file mode 100644 index 00000000..b5389947 --- /dev/null +++ b/bioimageio/core/operators/_run.py @@ -0,0 +1,538 @@ +import math +import warnings +from collections import defaultdict +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor +from contextlib import ExitStack +from dataclasses import dataclass +from functools import partial +from os import PathLike +from typing import Any, Dict, Generator, IO, List, Optional, Sequence, Tuple, Union + +import dask +import dask.bag as db +import dask.array as da +import dask.dataframe as dd +import numpy as np +import pandas as pd +import xarray as xr + +from bioimageio.core.prediction_pipeline._model_adapters import ModelAdapter, create_model_adapter + + +from marshmallow import missing + +from bioimageio.core import load_resource_description +from bioimageio.core.prediction_pipeline import create_prediction_pipeline +from bioimageio.core.resource_io import nodes +from bioimageio.spec import load_raw_resource_description +from bioimageio.spec.model import raw_nodes +from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription + +try: + import torch.multiprocessing as multiprocessing +except ImportError: + import multiprocessing + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal + + +BoundaryMode = Literal["reflect"] + +# def get_model_gufunc_signature(model: raw_nodes.Model) -> str: +# ipt_sig = ",".join(f"({','.join(ipt.name + '_' + a for a in ipt.axes)})" for ipt in model.inputs) +# out_sig_parts = [] +# for out in model.outputs: +# if isinstance( out.shape, ImplicitOutputShape): +# +# print(ipts) +# return ipts + + +def transpose_seq(seq, seq_axes, desired_axes, default): + return np.array([default if ia not in seq_axes else seq[seq_axes.index(ia)] for ia in desired_axes]) + + +def correct_chunk( + chunk, ipt: raw_nodes.InputTensor, outputs: Sequence[raw_nodes.OutputTensor], tensor +) -> Tuple[Dict[str, int], Dict[int, int], Dict[str, Tuple[int, int]]]: + ipt_shape = np.array([chunk[a] for a in ipt.axes], dtype=int) + referencing_outputs = [ + ot + for ot in outputs + if isinstance(ot.shape, raw_nodes.ImplicitOutputShape) and ot.shape.reference_tensor == ipt.name + ] + if not referencing_outputs: + return ( + chunk, + defaultdict(lambda: 0), + defaultdict(lambda: (0, 0)), + ) + + if len(referencing_outputs) > 1: + raise NotImplementedError("more than one output references an input") + + sohs = [ + ( + transpose_seq(ot.shape.scale, ot.axes, ipt.axes, 1.0), + transpose_seq(ot.shape.offset, ot.axes, ipt.axes, 0.0), + transpose_seq(ot.halo, ot.axes, ipt.axes, 0.0), + ) + for ot in referencing_outputs + ] + scale, offset, halo = sohs[0] + if any((s != scale).any() or (off != offset).any() or (h != halo).any() for s, off, h in sohs[1:]): + # todo: ignore any new dimensions denoted by scale entry of None + raise ValueError( + f"Incompatible output specs referencing same input tensor with different scale/offset/halo: {[out.name for out in referencing_outputs]}." + ) + + if any(off > 0 for a, off in zip(offset, ipt.axes) if a in ("x", "y", "z", "t", "time")): + raise NotImplementedError( + "offset>0; space/time output is larger than input. todo: cut offset on tiles, but leave at image edge." + ) + + assert all(h >= 0 for h in halo) + overlap = np.maximum((halo - offset) / scale, 0) # no negative overlap + overlap = np.ceil(overlap).astype(int) + corrected_chunk = ipt_shape - 2 * overlap + t_shape = np.array(tensor.shape, dtype=int) + assert len(t_shape) == len(ipt_shape) + padding_total = (corrected_chunk - (t_shape % corrected_chunk)) % corrected_chunk + padding = [(0, p) for p in padding_total] + + return ( + dict(zip(ipt.axes, corrected_chunk)), + dict(enumerate(overlap)), # xr.DataArray.overlap not yet available: key by index for da.overlap + dict(zip(ipt.axes, padding)), + ) + + +def tuple_roi_to_slices(tuple_roi: Sequence[Tuple[int, int]]) -> Tuple[slice, ...]: + return tuple(np.s_[r0:-r1] if r1 else np.s_[r0:] for r0, r1 in tuple_roi) + + +def get_default_input_chunk(ipt: raw_nodes.InputTensor) -> List[int]: + if isinstance(ipt.shape, list): + shape = ipt.shape + elif isinstance(ipt.shape, raw_nodes.ParametrizedInputShape): + is3d = len([a for a in ipt.axes if a not in "bc"]) > 2 + min_len = 64 if is3d else 256 + shape = [] + for ax, min_ax, step_ax in zip(ipt.axes, ipt.shape.min_shape, ipt.shape.step): + if ax in "zyx" and step_ax > 0: + len_ax = min_ax + while len_ax < min_len: + len_ax += step_ax + shape.append(len_ax) + else: + shape.append(min_ax) + else: + raise TypeError(type(ipt.shape)) + + assert len(ipt.axes) == len(shape) + return shape + + +def get_asymmetric_halolike(value: float) -> Tuple[int, int]: + assert value >= 0 + if value % 1: + assert value % 0.5 == 0 + return (math.floor(value), math.ceil(value)) + else: + return (int(value), int(value)) + + +def get_output_rois( + out: raw_nodes.OutputTensor, + input_overlaps: Dict[str, Dict[int, int]], + input_paddings: Dict[str, Dict[str, Tuple[int, int]]], + ipt_by_name: Dict[str, raw_nodes.InputTensor], +): + if isinstance(out.shape, raw_nodes.ImplicitOutputShape): + scale = np.array([1.0 if s is None else s for s in out.shape.scale]) + offset: Sequence[float] = out.shape.offset + ref_ipt = ipt_by_name[out.shape.reference_tensor] + eff_halo_float: List[float] = [ + input_overlaps[out.shape.reference_tensor].get(ref_ipt.axes.index(a), 0) * s + off + for a, s, off in zip(out.axes, scale, offset) + ] + ref_input_padding_dict = input_paddings[out.shape.reference_tensor] + else: + scale = np.ones(len(out.shape)) + offset = np.zeros(len(out.shape)) + eff_halo_float = [0.0] * len(out.shape) + ref_input_padding_dict = {} + + # effective halo to be trimmed from output. (only for space and time dims) + effective_halo: List[Tuple[int, int]] = [] + for i, a in enumerate(out.axes): + if a in ("b", "batch"): + errors_in = (["halo"] if eff_halo_float[i] else []) + (["offset"] if offset[i] else []) + if errors_in: + raise ValueError(f"invalid {' and '.join(errors_in)} for batch dimension of output {out.name}") + elif a in ("x", "y", "z", "t", "time"): + pass + elif a in ("i", "index", "c", "channel"): + # ignore offset. As we cannot tile across these dimensions, offsets should be returned, not trimmed. + eff_halo_float[i] -= offset[i] + if eff_halo_float[i]: + warnings.warn(f"Trimming off halo for axis {a} of output {out.name}.") + + else: + raise NotImplementedError(a) + + effective_halo.append(get_asymmetric_halolike(eff_halo_float[i])) + + output_chunk_roi = tuple_roi_to_slices(effective_halo) + + # undo input padding for the resulting final output tensor + # also trim any negative offset, which we padded for each chunk + output_trim = [] + for a, s, off in zip(out.axes, scale, offset): + p0, p1 = ref_input_padding_dict.get(a, (0, 0)) + off0, off1 = get_asymmetric_halolike(-min(off, 0)) + output_trim.append((math.ceil(p0 * s + off0), math.ceil(p1 * s + off1))) + + output_roi = tuple_roi_to_slices(output_trim) + + return output_chunk_roi, output_roi + + +def forward(*tensors, model_adapter: ModelAdapter, output_chunk_roi: Tuple[slice, ...]): + print("forward", [t.shape for t in tensors]) + assert len(model_adapter.bioimageio_model.inputs) == len(tensors), ( + len(model_adapter.bioimageio_model.inputs), + len(tensors), + ) + tensors = [xr.DataArray(t, dims=tuple(ipt.axes)) for ipt, t, in zip(model_adapter.bioimageio_model.inputs, tensors)] + output = model_adapter.forward(*tensors)[0] # todo: allow more than 1 output + cropped_output = output[output_chunk_roi] + print("model out", output.shape, "cropped", cropped_output.shape) + return cropped_output + + +def run_model_inference_with_chunking( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *tensors: xr.DataArray, + chunks: Optional[Sequence[Dict[str, int]]] = None, + enable_preprocessing: bool = True, + enable_postprocessing: bool = True, + devices: Sequence[str] = ("cpu",), + boundary_mode: Union[ + BoundaryMode, + Sequence[BoundaryMode], + ] = "reflect", +) -> List[xr.DataArray]: + """run model inference with tiling + + pre- and postprocessing are run with the same chunks as the model inference. + For better performance disable pre-/postprocessing here and call it as a separate operators. + + Returns: + list: model outputs + """ + model: raw_nodes.Model = load_raw_resource_description(rdf_source, update_to_format="latest") # noqa + if len(model.outputs) > 1: + raise NotImplementedError("More than one model output not yet implemented") + + assert isinstance(model, raw_nodes.Model) + # always remove pre-/postprocessing, but save it if enabled + preprocessing = [] + for ipt in model.inputs: + if enable_preprocessing: + preprocessing.append(ipt.preprocessing) + + ipt.preprocessing = missing + + postprocessing = [] + for out in model.outputs: + if enable_postprocessing: + postprocessing.append(out.postprocessing) + + out.postprocessing = missing + + # transpose tensors to match ipt spec + assert len(tensors) == len(model.inputs) + tensors = [t.transpose(*s.axes) for t, s in zip(tensors, model.inputs)] + if isinstance(boundary_mode, str): + boundary_mode = [boundary_mode] * len(tensors) + + if chunks is None: + chunks = [get_default_input_chunk(ipt) for ipt in model.inputs] + + # the input tensors need an adapted chunking due to halo and offset + actual_chunks, overlap_depths, paddings = zip( + *(correct_chunk(c, ipt, model.outputs, t) for c, ipt, t in zip(chunks, model.inputs, tensors)) + ) + actual_chunks_by_name = {ipt.name: c for ipt, c in zip(model.inputs, actual_chunks)} + padded_input_tensor_shapes = { + ipt.name: [ts + sum(p[a]) for ts, a in zip(t.shape, ipt.axes)] + for ipt, t, p in zip(model.inputs, tensors, paddings) + } + + # note: da.overlap.overlap or da.overlap.map_overlap equivalents are not yet available in xarray + tensors = [ + da.overlap.overlap(t.pad(p, mode=bm).chunk(c).data, depth=d, boundary=bm) + for t, c, d, p, bm in zip(tensors, actual_chunks, overlap_depths, paddings, boundary_mode) + ] + + output_chunk_roi, output_roi = get_output_rois( + model.outputs[0], + input_overlaps={ipt.name: d for ipt, d in zip(model.inputs, overlap_depths)}, + input_paddings={ipt.name: p for ipt, p in zip(model.inputs, paddings)}, + ipt_by_name={ipt.name: ipt for ipt in model.inputs}, + ) + + n_batches = tensors[0].npartitions + assert all(t.npartitions == n_batches for t in tensors[1:]), [t.npartitions for t in tensors] + + model_adapter = create_model_adapter(bioimageio_model=model, devices=devices) + + # todo: generalize to multiple outputs + out = model.outputs[0] + if isinstance(out.shape, raw_nodes.ImplicitOutputShape): + ipt_shape = padded_input_tensor_shapes[out.shape.reference_tensor] + ipt_by_name = {ipt.name: ipt for ipt in model.inputs} + ipt_axes = ipt_by_name[out.shape.reference_tensor].axes + ipt_shape = transpose_seq(ipt_shape, ipt_axes, out.axes, 0) + out_scale = np.array([0.0 if s is None else s for s in out.shape.scale]) + out_offset = np.array(out.shape.offset) + out_shape_float = ipt_shape * out_scale + 2 * out_offset + assert (out_shape_float == out_shape_float.astype(int)).all(), out_shape_float + out_shape: Sequence[int] = out_shape_float.astype(int) + else: + out_shape = out.shape + ipt_axes = [] + + # set up da.blockwise to orchestrate tiled forward + out_ind = [] + new_axes = {} + adjust_chunks = {} + for a, s, sc in zip(out.axes, out_shape, out_scale): + if a in ("b", "batch"): + out_ind.append(a) + elif a in ipt_axes: + axis_name = f"{out.shape.reference_tensor}_{a}" + out_ind.append(axis_name) + adjust_chunks[axis_name] = ( + lambda _, aa=a, scc=sc: actual_chunks_by_name[out.shape.reference_tensor][aa] * scc + ) + else: + out_ind.append(f"{out.name}_{a}") + new_axes[f"{out.name}_{a}"] = s + + inputs_sequence = [] + for t, ipt in zip(tensors, model.inputs): + inputs_sequence.append(t) + inputs_sequence.append(tuple("b" if a == "b" else f"{ipt.name}_{a}" for a in ipt.axes)) + + result = da.blockwise( + forward, + tuple(out_ind), + *inputs_sequence, + new_axes=new_axes, + dtype=np.dtype(out.data_type), + # meta=xr.DataArray(np.empty(out_shape, dtype=np.dtype(out.data_type)), dims=tuple(out.axes)), + meta=np.empty(out_shape, dtype=np.dtype(out.data_type)), + # align_arrays=False, + name=(model.config or {}).get("bioimageio", {}).get("nickname") or f"model_{model.id}", + adjust_chunks=adjust_chunks, + model_adapter=model_adapter, + output_chunk_roi=output_chunk_roi, + ) + + res = result[output_roi] + # todo: rechunk depending on output_roi to account for offset + # res = res.rechunk() + return [xr.DataArray(res, dims=tuple(out.axes))] + + +def run_model_inference( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *tensors: xr.DataArray, + enable_preprocessing: bool = True, + enable_postprocessing: bool = True, + devices: Optional[Sequence[str]] = None, +) -> List[xr.DataArray]: + """run model inference + + Returns: + list: model outputs + """ + model = load_raw_resource_description(rdf_source, update_to_format="latest") + assert isinstance(model, raw_nodes.Model) + # remove pre-/postprocessing if specified + if not enable_preprocessing: + for ipt in model.inputs: + if ipt.preprocessing: + ipt.preprocessing = missing + if not enable_postprocessing: + for out in model.outputs: + if out.postprocessing: + out.postprocessing = missing + + with create_prediction_pipeline(model, devices=devices) as pred_pipeline: + return pred_pipeline.forward(*tensors) + + +def run_workflow( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + inputs: Sequence = tuple(), + options: Dict[str, Any] = None, +) -> tuple: + outputs = tuple() + for state in _iterate_workflow_steps_impl(rdf_source, test_steps=False, inputs=inputs, options=options): + outputs = state.outputs + + return outputs + + +def run_workflow_test( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], +) -> tuple: + outputs = tuple() + for state in _iterate_workflow_steps_impl(rdf_source, test_steps=True): + outputs = state.outputs + + return outputs + + +@dataclass +class WorkflowState: + wf_inputs: Dict[str, Any] + wf_options: Dict[str, Any] + inputs: tuple + outputs: tuple + named_outputs: Dict[str, Any] + + +def iterate_workflow_steps( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *, + inputs: Sequence = tuple(), + options: Dict[str, Any] = None, +) -> Generator[WorkflowState, None, None]: + yield from _iterate_workflow_steps_impl(rdf_source, inputs=inputs, options=options, test_steps=False) + + +def iterate_test_workflow_steps( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription] +) -> Generator[WorkflowState, None, None]: + yield from _iterate_workflow_steps_impl(rdf_source, test_steps=True) + + +def _iterate_workflow_steps_impl( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *, + test_steps: bool, + inputs: Sequence = tuple(), + options: Dict[str, Any] = None, +) -> Generator[WorkflowState, None, None]: + import bioimageio.core.operators as ops + + workflow = load_resource_description(rdf_source) + assert isinstance(workflow, nodes.Workflow) + wf_options: Dict[str, Any] = {opt.name: opt.default for opt in workflow.options_spec} + if test_steps: + assert not inputs + assert not options + wf_inputs: Dict[str, Any] = {} + steps = workflow.test_steps + else: + if not len(workflow.inputs_spec) == len(inputs): + raise ValueError(f"Expected {len(workflow.inputs_spec)} inputs, but got {len(inputs)}.") + + wf_inputs = {ipt_spec.name: ipt for ipt_spec, ipt in zip(workflow.inputs_spec, inputs)} + for k, v in options.items(): + if k not in wf_options: + raise ValueError(f"Got unknown option {k}, expected one of {set(wf_options)}.") + + wf_options[k] = v + + steps = workflow.steps + + named_outputs = {} # for later referencing + + def map_ref(value): + assert isinstance(workflow, nodes.Workflow) + if isinstance(value, str) and value.startswith("${{") and value.endswith("}}"): + ref = value[4:-2].strip() + if ref.startswith("self.inputs."): + ref = ref[len("self.inputs.") :] + if ref not in wf_inputs: + raise ValueError(f"Invalid workflow input reference {value}.") + + return wf_inputs[ref] + elif ref.startswith("self.options."): + ref = ref[len("self.options.") :] + if ref not in wf_options: + raise ValueError(f"Invalid workflow option reference {value}.") + + return wf_options[ref] + elif ref == "self.rdf_source": + assert workflow.rdf_source is not missing + return str(workflow.rdf_source) + elif ref in named_outputs: + return named_outputs[ref] + else: + raise ValueError(f"Invalid reference {value}.") + else: + return value + + # implicit inputs to a step are the outputs of the previous step. + # For the first step these are the workflow inputs. + outputs = tuple(inputs) + for step in steps: + if not hasattr(ops, step.op): + raise NotImplementedError(f"{step.op} not implemented in {ops}") + + op = getattr(ops, step.op) + if step.inputs is missing: + inputs = outputs + else: + inputs = tuple(map_ref(ipt) for ipt in step.inputs) + + assert isinstance(inputs, tuple) + options = {k: map_ref(v) for k, v in (step.options or {}).items()} + outputs = op(*inputs, **options) + if not isinstance(outputs, tuple): + outputs = (outputs,) + + if step.outputs: + assert step.id is not missing + if len(step.outputs) != len(outputs): + raise ValueError( + f"Got {len(step.outputs)} step output name{'s' if len(step.outputs) > 1 else ''} ({step.id}.outputs), " + f"but op {step.op} returned {len(outputs)} outputs." + ) + + named_outputs.update({f"{step.id}.outputs.{out_name}": out for out_name, out in zip(step.outputs, outputs)}) + + yield WorkflowState( + wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs + ) + if len(workflow.outputs_spec) != len(outputs): + raise ValueError(f"Expected {len(workflow.outputs_spec)} outputs from last step, but got {len(outputs)}.") + + def tensor_as_xr(tensor, axes: Sequence[nodes.Axis]): + spec_axes = [a.name or a.type for a in axes] + if isinstance(tensor, xr.DataArray): + if list(tensor.dims) != spec_axes: + raise ValueError( + f"Last workflow step returned xarray.DataArray with dims {tensor.dims}, but expected dims {spec_axes}." + ) + + return tensor + else: + return xr.DataArray(tensor, dims=tuple(spec_axes)) + + outputs = tuple( + tensor_as_xr(out, out_spec.axes) if out_spec.type == "tensor" else out + for out_spec, out in zip(workflow.outputs_spec, outputs) + ) + yield WorkflowState( + wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs + ) diff --git a/bioimageio/core/workflow/operators/_various.py b/bioimageio/core/operators/_various.py similarity index 100% rename from bioimageio/core/workflow/operators/_various.py rename to bioimageio/core/operators/_various.py diff --git a/bioimageio/core/prediction_pipeline/_prediction_pipeline.py b/bioimageio/core/prediction_pipeline/_prediction_pipeline.py index 8419c354..b9029093 100644 --- a/bioimageio/core/prediction_pipeline/_prediction_pipeline.py +++ b/bioimageio/core/prediction_pipeline/_prediction_pipeline.py @@ -106,8 +106,12 @@ def __init__( self._output_specs = bioimageio_model.outputs else: assert isinstance(bioimageio_model, raw_nodes.Model) - self._input_specs = [resolve_raw_node(s, nodes) for s in bioimageio_model.inputs] - self._output_specs = [resolve_raw_node(s, nodes) for s in bioimageio_model.outputs] + self._input_specs = [ + resolve_raw_node(s, nodes, root_path=bioimageio_model.root_path) for s in bioimageio_model.inputs + ] + self._output_specs = [ + resolve_raw_node(s, nodes, root_path=bioimageio_model.root_path) for s in bioimageio_model.outputs + ] self._preprocessing = preprocessing self._postprocessing = postprocessing @@ -207,11 +211,10 @@ def create_prediction_pipeline( if isinstance(bioimageio_model, nodes.Model): ipts = bioimageio_model.inputs outs = bioimageio_model.outputs - else: assert isinstance(bioimageio_model, raw_nodes.Model) - ipts = [resolve_raw_node(s, nodes) for s in bioimageio_model.inputs] - outs = [resolve_raw_node(s, nodes) for s in bioimageio_model.outputs] + ipts = [resolve_raw_node(s, nodes, root_path=bioimageio_model.root_path) for s in bioimageio_model.inputs] + outs = [resolve_raw_node(s, nodes, root_path=bioimageio_model.root_path) for s in bioimageio_model.outputs] preprocessing = CombinedProcessing.from_tensor_specs(ipts) diff --git a/bioimageio/core/resource_io/utils.py b/bioimageio/core/resource_io/utils.py index ffeefbaa..9f5587de 100644 --- a/bioimageio/core/resource_io/utils.py +++ b/bioimageio/core/resource_io/utils.py @@ -109,10 +109,15 @@ def all_sources_available( def resolve_raw_node( - raw_rd: GenericRawNode, nodes_module: typing.Any, uri_only_if_in_package: bool = True + raw_rd: GenericRawNode, + nodes_module: typing.Any, + uri_only_if_in_package: bool = True, + root_path: typing.Optional[pathlib.Path] = None, ) -> GenericResolvedNode: """resolve all uris and paths (that are included when packaging)""" - rd = UriNodeTransformer(root_path=raw_rd.root_path, uri_only_if_in_package=uri_only_if_in_package).transform(raw_rd) + rd = UriNodeTransformer( + root_path=root_path or raw_rd.root_path, uri_only_if_in_package=uri_only_if_in_package + ).transform(raw_rd) rd = SourceNodeTransformer().transform(rd) rd = RawNodeTypeTransformer(nodes_module).transform(rd) return rd diff --git a/bioimageio/core/workflow/__init__.py b/bioimageio/core/workflow/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/bioimageio/core/workflow/operators/_run.py b/bioimageio/core/workflow/operators/_run.py deleted file mode 100644 index d803b88f..00000000 --- a/bioimageio/core/workflow/operators/_run.py +++ /dev/null @@ -1,202 +0,0 @@ -from dataclasses import dataclass -from os import PathLike -from typing import Any, Dict, Generator, IO, List, Optional, Sequence, Tuple, Union - -import numpy as np -import xarray as xr -from marshmallow import missing - -from bioimageio.core import load_resource_description -from bioimageio.core.prediction_pipeline import create_prediction_pipeline -from bioimageio.core.resource_io import nodes -from bioimageio.spec import load_raw_resource_description -from bioimageio.spec.model import raw_nodes -from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription - - -def run_model_inference( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *tensors, - enable_preprocessing: bool = True, - enable_postprocessing: bool = True, - devices: Optional[Sequence[str]] = None, -) -> List[xr.DataArray]: - """run model inference - - Returns: - list: model outputs - """ - model = load_raw_resource_description(rdf_source, update_to_format="latest") - assert isinstance(model, raw_nodes.Model) - # remove pre-/postprocessing if specified - if not enable_preprocessing: - for ipt in model.inputs: - if ipt.preprocessing: - ipt.preprocessing = missing - if not enable_postprocessing: - for ipt in model.outputs: - if ipt.postprocessing: - ipt.postprocessing = missing - - with create_prediction_pipeline(model, devices=devices) as pred_pipeline: - return pred_pipeline.forward(*tensors) - - -def run_workflow( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - inputs: Sequence = tuple(), - options: Dict[str, Any] = None, -) -> tuple: - outputs = tuple() - for state in _iterate_workflow_steps_impl(rdf_source, test_steps=False, inputs=inputs, options=options): - outputs = state.outputs - - return outputs - - -def run_workflow_test( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], -) -> tuple: - outputs = tuple() - for state in _iterate_workflow_steps_impl(rdf_source, test_steps=True): - outputs = state.outputs - - return outputs - - -@dataclass -class WorkflowState: - wf_inputs: Dict[str, Any] - wf_options: Dict[str, Any] - inputs: tuple - outputs: tuple - named_outputs: Dict[str, Any] - - -def iterate_workflow_steps( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *, - inputs: Sequence = tuple(), - options: Dict[str, Any] = None, -) -> Generator[WorkflowState]: - yield from _iterate_workflow_steps_impl(rdf_source, inputs=inputs, options=options, test_steps=False) - - -def iterate_test_workflow_steps( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription] -) -> Generator[WorkflowState]: - yield from _iterate_workflow_steps_impl(rdf_source, test_steps=True) - - -def _iterate_workflow_steps_impl( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *, - test_steps: bool, - inputs: Sequence = tuple(), - options: Dict[str, Any] = None, -) -> Generator[WorkflowState]: - import bioimageio.core.workflow.operators as ops - - workflow = load_resource_description(rdf_source) - assert isinstance(workflow, nodes.Workflow) - wf_options: Dict[str, Any] = {opt.name: opt.default for opt in workflow.options_spec} - if test_steps: - assert not inputs - assert not options - wf_inputs: Dict[str, Any] = {} - steps = workflow.test_steps - else: - if not len(workflow.inputs_spec) == len(inputs): - raise ValueError(f"Expected {len(workflow.inputs_spec)} inputs, but got {len(inputs)}.") - - wf_inputs = {ipt_spec.name: ipt for ipt_spec, ipt in zip(workflow.inputs_spec, inputs)} - for k, v in options.items(): - if k not in wf_options: - raise ValueError(f"Got unknown option {k}, expected one of {set(wf_options)}.") - - wf_options[k] = v - - steps = workflow.steps - - named_outputs = {} # for later referencing - - def map_ref(value): - assert isinstance(workflow, nodes.Workflow) - if isinstance(value, str) and value.startswith("${{") and value.endswith("}}"): - ref = value[4:-2].strip() - if ref.startswith("self.inputs."): - ref = ref[len("self.inputs.") :] - if ref not in wf_inputs: - raise ValueError(f"Invalid workflow input reference {value}.") - - return wf_inputs[ref] - elif ref.startswith("self.options."): - ref = ref[len("self.options.") :] - if ref not in wf_options: - raise ValueError(f"Invalid workflow option reference {value}.") - - return wf_options[ref] - elif ref == "self.rdf_source": - assert workflow.rdf_source is not missing - return str(workflow.rdf_source) - elif ref in named_outputs: - return named_outputs[ref] - else: - raise ValueError(f"Invalid reference {value}.") - else: - return value - - # implicit inputs to a step are the outputs of the previous step. - # For the first step these are the workflow inputs. - outputs = tuple(inputs) - for step in steps: - if not hasattr(ops, step.op): - raise NotImplementedError(f"{step.op} not implemented in {ops}") - - op = getattr(ops, step.op) - if step.inputs is missing: - inputs = outputs - else: - inputs = tuple(map_ref(ipt) for ipt in step.inputs) - - assert isinstance(inputs, tuple) - options = {k: map_ref(v) for k, v in (step.options or {}).items()} - outputs = op(*inputs, **options) - if not isinstance(outputs, tuple): - outputs = (outputs,) - - if step.outputs: - assert step.id is not missing - if len(step.outputs) != len(outputs): - raise ValueError( - f"Got {len(step.outputs)} step output name{'s' if len(step.outputs) > 1 else ''} ({step.id}.outputs), " - f"but op {step.op} returned {len(outputs)} outputs." - ) - - named_outputs.update({f"{step.id}.outputs.{out_name}": out for out_name, out in zip(step.outputs, outputs)}) - - yield WorkflowState( - wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs - ) - if len(workflow.outputs_spec) != len(outputs): - raise ValueError(f"Expected {len(workflow.outputs_spec)} outputs from last step, but got {len(outputs)}.") - - def tensor_as_xr(tensor, axes: Sequence[nodes.Axis]): - spec_axes = [a.name or a.type for a in axes] - if isinstance(tensor, xr.DataArray): - if list(tensor.dims) != spec_axes: - raise ValueError( - f"Last workflow step returned xarray.DataArray with dims {tensor.dims}, but expected dims {spec_axes}." - ) - - return tensor - else: - return xr.DataArray(tensor, dims=spec_axes) - - outputs = tuple( - tensor_as_xr(out, out_spec.axes) if out_spec.type == "tensor" else out - for out_spec, out in zip(workflow.outputs_spec, outputs) - ) - yield WorkflowState( - wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs - ) From 4f13d603a9b3b81b36a0e7a4f4a9e84248cc4811 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 15 Nov 2022 23:23:29 +0100 Subject: [PATCH 11/23] correct output chunks --- bioimageio/core/operators/_run.py | 50 +++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/bioimageio/core/operators/_run.py b/bioimageio/core/operators/_run.py index b5389947..1393265d 100644 --- a/bioimageio/core/operators/_run.py +++ b/bioimageio/core/operators/_run.py @@ -150,7 +150,7 @@ def get_output_rois( input_overlaps: Dict[str, Dict[int, int]], input_paddings: Dict[str, Dict[str, Tuple[int, int]]], ipt_by_name: Dict[str, raw_nodes.InputTensor], -): +) -> Tuple[Sequence[Tuple[int, int]], Sequence[Tuple[int, int]]]: if isinstance(out.shape, raw_nodes.ImplicitOutputShape): scale = np.array([1.0 if s is None else s for s in out.shape.scale]) offset: Sequence[float] = out.shape.offset @@ -167,7 +167,7 @@ def get_output_rois( ref_input_padding_dict = {} # effective halo to be trimmed from output. (only for space and time dims) - effective_halo: List[Tuple[int, int]] = [] + output_chunk_roi: List[Tuple[int, int]] = [] for i, a in enumerate(out.axes): if a in ("b", "batch"): errors_in = (["halo"] if eff_halo_float[i] else []) + (["offset"] if offset[i] else []) @@ -184,19 +184,15 @@ def get_output_rois( else: raise NotImplementedError(a) - effective_halo.append(get_asymmetric_halolike(eff_halo_float[i])) - - output_chunk_roi = tuple_roi_to_slices(effective_halo) + output_chunk_roi.append(get_asymmetric_halolike(eff_halo_float[i])) # undo input padding for the resulting final output tensor # also trim any negative offset, which we padded for each chunk - output_trim = [] + output_roi = [] for a, s, off in zip(out.axes, scale, offset): p0, p1 = ref_input_padding_dict.get(a, (0, 0)) off0, off1 = get_asymmetric_halolike(-min(off, 0)) - output_trim.append((math.ceil(p0 * s + off0), math.ceil(p1 * s + off1))) - - output_roi = tuple_roi_to_slices(output_trim) + output_roi.append((math.ceil(p0 * s + off0), math.ceil(p1 * s + off1))) return output_chunk_roi, output_roi @@ -341,12 +337,40 @@ def run_model_inference_with_chunking( name=(model.config or {}).get("bioimageio", {}).get("nickname") or f"model_{model.id}", adjust_chunks=adjust_chunks, model_adapter=model_adapter, - output_chunk_roi=output_chunk_roi, + output_chunk_roi=tuple_roi_to_slices(output_chunk_roi), ) - res = result[output_roi] - # todo: rechunk depending on output_roi to account for offset - # res = res.rechunk() + corrected_chunks = [] + rechunk = False + for i, (s, roi) in enumerate(zip(result.shape, output_roi)): + c = result.chunks[i] + assert s == sum(c), (s, c) + if sum(roi): + c = list(c) + r0 = roi[0] + while r0 >= c[0]: + r0 -= c[0] + c = c[1:] + if not c: + raise ValueError(f"Trimming too much from output {result.shape} with roi {output_roi}") + + c[0] -= r0 + + r1 = roi[1] + while r1 >= c[-1]: + r1 -= c[-1] + c = c[:-1] + if not c: + raise ValueError(f"Trimming too much from output {result.shape} with roi {output_roi}") + + c[-1] -= r1 + + corrected_chunks.append(c) + + res = result[tuple_roi_to_slices(output_roi)] + if rechunk: + res = res.rechunk(corrected_chunks) + return [xr.DataArray(res, dims=tuple(out.axes))] From d28b4545137be837eeb585447b5482f7b232fe0a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 16 Nov 2022 00:19:12 +0100 Subject: [PATCH 12/23] impl pre/postprocessing --- bioimageio/core/operators/__init__.py | 2 +- bioimageio/core/operators/_run.py | 150 +++++++++--------- .../_combined_processing.py | 16 +- .../core/prediction_pipeline/_processing.py | 5 +- 4 files changed, 86 insertions(+), 87 deletions(-) diff --git a/bioimageio/core/operators/__init__.py b/bioimageio/core/operators/__init__.py index ffbef74f..3441e489 100644 --- a/bioimageio/core/operators/__init__.py +++ b/bioimageio/core/operators/__init__.py @@ -1,4 +1,4 @@ from ._assert import assert_shape from ._generate import generate_random_uniform_tensor -from ._run import run_model_inference, run_model_inference_with_chunking, run_workflow +from ._run import run_model_inference, run_model_inference_without_tiling, run_workflow from ._various import binarize, load_tensors, log, select_outputs diff --git a/bioimageio/core/operators/_run.py b/bioimageio/core/operators/_run.py index 1393265d..bc671594 100644 --- a/bioimageio/core/operators/_run.py +++ b/bioimageio/core/operators/_run.py @@ -1,29 +1,21 @@ import math import warnings from collections import defaultdict -from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor -from contextlib import ExitStack from dataclasses import dataclass -from functools import partial from os import PathLike from typing import Any, Dict, Generator, IO, List, Optional, Sequence, Tuple, Union -import dask -import dask.bag as db import dask.array as da -import dask.dataframe as dd import numpy as np -import pandas as pd import xarray as xr - -from bioimageio.core.prediction_pipeline._model_adapters import ModelAdapter, create_model_adapter - - from marshmallow import missing from bioimageio.core import load_resource_description from bioimageio.core.prediction_pipeline import create_prediction_pipeline +from bioimageio.core.prediction_pipeline._combined_processing import CombinedProcessing +from bioimageio.core.prediction_pipeline._model_adapters import ModelAdapter, create_model_adapter from bioimageio.core.resource_io import nodes +from bioimageio.core.resource_io.utils import resolve_raw_node from bioimageio.spec import load_raw_resource_description from bioimageio.spec.model import raw_nodes from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription @@ -41,15 +33,6 @@ BoundaryMode = Literal["reflect"] -# def get_model_gufunc_signature(model: raw_nodes.Model) -> str: -# ipt_sig = ",".join(f"({','.join(ipt.name + '_' + a for a in ipt.axes)})" for ipt in model.inputs) -# out_sig_parts = [] -# for out in model.outputs: -# if isinstance( out.shape, ImplicitOutputShape): -# -# print(ipts) -# return ipts - def transpose_seq(seq, seq_axes, desired_axes, default): return np.array([default if ia not in seq_axes else seq[seq_axes.index(ia)] for ia in desired_axes]) @@ -140,9 +123,9 @@ def get_asymmetric_halolike(value: float) -> Tuple[int, int]: assert value >= 0 if value % 1: assert value % 0.5 == 0 - return (math.floor(value), math.ceil(value)) + return math.floor(value), math.ceil(value) else: - return (int(value), int(value)) + return int(value), int(value) def get_output_rois( @@ -198,7 +181,6 @@ def get_output_rois( def forward(*tensors, model_adapter: ModelAdapter, output_chunk_roi: Tuple[slice, ...]): - print("forward", [t.shape for t in tensors]) assert len(model_adapter.bioimageio_model.inputs) == len(tensors), ( len(model_adapter.bioimageio_model.inputs), len(tensors), @@ -206,58 +188,98 @@ def forward(*tensors, model_adapter: ModelAdapter, output_chunk_roi: Tuple[slice tensors = [xr.DataArray(t, dims=tuple(ipt.axes)) for ipt, t, in zip(model_adapter.bioimageio_model.inputs, tensors)] output = model_adapter.forward(*tensors)[0] # todo: allow more than 1 output cropped_output = output[output_chunk_roi] - print("model out", output.shape, "cropped", cropped_output.shape) return cropped_output -def run_model_inference_with_chunking( +def get_corrected_chunks(chunks: Dict[int, Sequence[int]], shape: Sequence[int], roi: Sequence[Tuple[int, int]]): + corrected_chunks = [] + rechunk = False + for i, (s, roi) in enumerate(zip(shape, roi)): + c = chunks[i] + assert s == sum(c), (s, c) + if sum(roi): + c = list(c) + r0 = roi[0] + while r0 >= c[0]: + r0 -= c[0] + c = c[1:] + if not c: + raise ValueError(f"Trimming too much from output {shape} with roi {roi}") + + c[0] -= r0 + + r1 = roi[1] + while r1 >= c[-1]: + r1 -= c[-1] + c = c[:-1] + if not c: + raise ValueError(f"Trimming too much from output {shape} with roi {roi}") + + c[-1] -= r1 + + corrected_chunks.append(c) + return corrected_chunks, rechunk + + +def run_model_inference( rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], *tensors: xr.DataArray, - chunks: Optional[Sequence[Dict[str, int]]] = None, enable_preprocessing: bool = True, enable_postprocessing: bool = True, devices: Sequence[str] = ("cpu",), + tiles: Union[None, Literal["auto"], Sequence[Dict[str, int]]] = "auto", boundary_mode: Union[ BoundaryMode, Sequence[BoundaryMode], ] = "reflect", ) -> List[xr.DataArray]: - """run model inference with tiling - - pre- and postprocessing are run with the same chunks as the model inference. - For better performance disable pre-/postprocessing here and call it as a separate operators. + """run model inference Returns: list: model outputs """ + if tiles is None: + return run_model_inference_without_tiling( + rdf_source, + *tensors, + enable_preprocessing=enable_preprocessing, + enable_postprocessing=enable_postprocessing, + devices=devices, + ) model: raw_nodes.Model = load_raw_resource_description(rdf_source, update_to_format="latest") # noqa if len(model.outputs) > 1: raise NotImplementedError("More than one model output not yet implemented") assert isinstance(model, raw_nodes.Model) # always remove pre-/postprocessing, but save it if enabled - preprocessing = [] + # todo: improve pre- and postprocessing! + preprocessing = CombinedProcessing.from_tensor_specs( + [resolve_raw_node(ipt, nodes, root_path=model.root_path) for ipt in model.inputs] + ) for ipt in model.inputs: - if enable_preprocessing: - preprocessing.append(ipt.preprocessing) - ipt.preprocessing = missing - postprocessing = [] + postprocessing = CombinedProcessing.from_tensor_specs( + [resolve_raw_node(out, nodes, root_path=model.root_path) for out in model.outputs] + ) for out in model.outputs: - if enable_postprocessing: - postprocessing.append(out.postprocessing) - out.postprocessing = missing + if preprocessing.procs: + sample = {ipt.name: t for ipt, t in zip(model.inputs, tensors)} + preprocessing.apply(sample, {}) + tensors = [sample[ipt.name] for ipt in model.inputs] + # transpose tensors to match ipt spec assert len(tensors) == len(model.inputs) tensors = [t.transpose(*s.axes) for t, s in zip(tensors, model.inputs)] if isinstance(boundary_mode, str): boundary_mode = [boundary_mode] * len(tensors) - if chunks is None: + if tiles is "auto": chunks = [get_default_input_chunk(ipt) for ipt in model.inputs] + else: + chunks = tiles # the input tensors need an adapted chunking due to halo and offset actual_chunks, overlap_depths, paddings = zip( @@ -294,13 +316,14 @@ def run_model_inference_with_chunking( ipt_by_name = {ipt.name: ipt for ipt in model.inputs} ipt_axes = ipt_by_name[out.shape.reference_tensor].axes ipt_shape = transpose_seq(ipt_shape, ipt_axes, out.axes, 0) - out_scale = np.array([0.0 if s is None else s for s in out.shape.scale]) + out_scale = [0.0 if s is None else s for s in out.shape.scale] out_offset = np.array(out.shape.offset) out_shape_float = ipt_shape * out_scale + 2 * out_offset assert (out_shape_float == out_shape_float.astype(int)).all(), out_shape_float out_shape: Sequence[int] = out_shape_float.astype(int) else: out_shape = out.shape + out_scale = [1.0] * len(out_shape) ipt_axes = [] # set up da.blockwise to orchestrate tiled forward @@ -331,55 +354,32 @@ def run_model_inference_with_chunking( *inputs_sequence, new_axes=new_axes, dtype=np.dtype(out.data_type), - # meta=xr.DataArray(np.empty(out_shape, dtype=np.dtype(out.data_type)), dims=tuple(out.axes)), - meta=np.empty(out_shape, dtype=np.dtype(out.data_type)), - # align_arrays=False, + meta=np.empty((), dtype=np.dtype(out.data_type)), name=(model.config or {}).get("bioimageio", {}).get("nickname") or f"model_{model.id}", adjust_chunks=adjust_chunks, - model_adapter=model_adapter, - output_chunk_roi=tuple_roi_to_slices(output_chunk_roi), + **dict(model_adapter=model_adapter, output_chunk_roi=tuple_roi_to_slices(output_chunk_roi)), ) - corrected_chunks = [] - rechunk = False - for i, (s, roi) in enumerate(zip(result.shape, output_roi)): - c = result.chunks[i] - assert s == sum(c), (s, c) - if sum(roi): - c = list(c) - r0 = roi[0] - while r0 >= c[0]: - r0 -= c[0] - c = c[1:] - if not c: - raise ValueError(f"Trimming too much from output {result.shape} with roi {output_roi}") - - c[0] -= r0 - - r1 = roi[1] - while r1 >= c[-1]: - r1 -= c[-1] - c = c[:-1] - if not c: - raise ValueError(f"Trimming too much from output {result.shape} with roi {output_roi}") - - c[-1] -= r1 - - corrected_chunks.append(c) - + corrected_chunks, rechunk = get_corrected_chunks(result.chunks, result.shape, output_roi) res = result[tuple_roi_to_slices(output_roi)] if rechunk: res = res.rechunk(corrected_chunks) - return [xr.DataArray(res, dims=tuple(out.axes))] + outputs = [xr.DataArray(res, dims=tuple(out.axes))] + if preprocessing.procs: + sample = {out.name: t for out, t in zip(model.outputs, outputs)} + postprocessing.apply(sample, {}) + outputs = [sample[out.name] for out in model.outputs] + + return outputs -def run_model_inference( +def run_model_inference_without_tiling( rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], *tensors: xr.DataArray, enable_preprocessing: bool = True, enable_postprocessing: bool = True, - devices: Optional[Sequence[str]] = None, + devices: Optional[Sequence[str]] = ("cpu",), ) -> List[xr.DataArray]: """run model inference diff --git a/bioimageio/core/prediction_pipeline/_combined_processing.py b/bioimageio/core/prediction_pipeline/_combined_processing.py index bbd3e354..a67cd51f 100644 --- a/bioimageio/core/prediction_pipeline/_combined_processing.py +++ b/bioimageio/core/prediction_pipeline/_combined_processing.py @@ -28,30 +28,30 @@ class ProcessingInfo: class CombinedProcessing: def __init__(self, combine_tensors: Dict[TensorName, ProcessingInfo]): - self._procs = [] + self.procs = [] known = dict(KNOWN_PROCESSING["pre"]) known.update(KNOWN_PROCESSING["post"]) # ensure all tensors have correct data type before any processing for tensor_name, info in combine_tensors.items(): if info.assert_dtype_before is not None: - self._procs.append(AssertDtype(tensor_name=tensor_name, dtype=info.assert_dtype_before)) + self.procs.append(AssertDtype(tensor_name=tensor_name, dtype=info.assert_dtype_before)) if info.ensure_dtype_before is not None: - self._procs.append(EnsureDtype(tensor_name=tensor_name, dtype=info.ensure_dtype_before)) + self.procs.append(EnsureDtype(tensor_name=tensor_name, dtype=info.ensure_dtype_before)) for tensor_name, info in combine_tensors.items(): for step in info.steps: - self._procs.append(known[step.name](tensor_name=tensor_name, **step.kwargs)) + self.procs.append(known[step.name](tensor_name=tensor_name, **step.kwargs)) if info.assert_dtype_after is not None: - self._procs.append(AssertDtype(tensor_name=tensor_name, dtype=info.assert_dtype_after)) + self.procs.append(AssertDtype(tensor_name=tensor_name, dtype=info.assert_dtype_after)) # ensure tensor has correct data type right after its processing if info.ensure_dtype_after is not None: - self._procs.append(EnsureDtype(tensor_name=tensor_name, dtype=info.ensure_dtype_after)) + self.procs.append(EnsureDtype(tensor_name=tensor_name, dtype=info.ensure_dtype_after)) - self.required_measures: RequiredMeasures = self._collect_required_measures(self._procs) + self.required_measures: RequiredMeasures = self._collect_required_measures(self.procs) self.tensor_names = list(combine_tensors) @classmethod @@ -85,7 +85,7 @@ def from_tensor_specs(cls, tensor_specs: List[Union[nodes.InputTensor, nodes.Out return inst def apply(self, sample: Sample, computed_measures: ComputedMeasures) -> None: - for proc in self._procs: + for proc in self.procs: proc.set_computed_measures(computed_measures) sample[proc.tensor_name] = proc.apply(sample[proc.tensor_name]) diff --git a/bioimageio/core/prediction_pipeline/_processing.py b/bioimageio/core/prediction_pipeline/_processing.py index 6fbea8c6..f5b57081 100644 --- a/bioimageio/core/prediction_pipeline/_processing.py +++ b/bioimageio/core/prediction_pipeline/_processing.py @@ -2,9 +2,8 @@ see https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/preprocessing_spec_latest.md and https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/postprocessing_spec_latest.md """ -import numbers -from dataclasses import InitVar, dataclass, field, fields -from typing import List, Mapping, Optional, Sequence, Tuple, Type, Union +from dataclasses import dataclass, field, fields +from typing import Mapping, Optional, Sequence, Tuple, Type, Union import numpy import numpy as np From eb79ab2d8e90d850859a6ac10cdd3c58d9dcecd1 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Wed, 16 Nov 2022 09:09:48 +0100 Subject: [PATCH 13/23] clean up --- bioimageio/core/operators/_run.py | 67 ++++++++++++++++--------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/bioimageio/core/operators/_run.py b/bioimageio/core/operators/_run.py index bc671594..f79a4b78 100644 --- a/bioimageio/core/operators/_run.py +++ b/bioimageio/core/operators/_run.py @@ -38,9 +38,15 @@ def transpose_seq(seq, seq_axes, desired_axes, default): return np.array([default if ia not in seq_axes else seq[seq_axes.index(ia)] for ia in desired_axes]) -def correct_chunk( +def get_chunk( chunk, ipt: raw_nodes.InputTensor, outputs: Sequence[raw_nodes.OutputTensor], tensor ) -> Tuple[Dict[str, int], Dict[int, int], Dict[str, Tuple[int, int]]]: + """correct chunk to account for offset and halo + + Returns: + corrected chunk: to tile the input array with + overlap: overlap of corrected chunks (yields original chunks) + """ ipt_shape = np.array([chunk[a] for a in ipt.axes], dtype=int) referencing_outputs = [ ot @@ -180,18 +186,19 @@ def get_output_rois( return output_chunk_roi, output_roi -def forward(*tensors, model_adapter: ModelAdapter, output_chunk_roi: Tuple[slice, ...]): +def forward(*tensors, model_adapter: ModelAdapter, output_tile_roi: Tuple[slice, ...]): + """helper to cast dask array chunks to xr.DataArray and apply a roi to the output""" assert len(model_adapter.bioimageio_model.inputs) == len(tensors), ( len(model_adapter.bioimageio_model.inputs), len(tensors), ) tensors = [xr.DataArray(t, dims=tuple(ipt.axes)) for ipt, t, in zip(model_adapter.bioimageio_model.inputs, tensors)] output = model_adapter.forward(*tensors)[0] # todo: allow more than 1 output - cropped_output = output[output_chunk_roi] - return cropped_output + return output[output_tile_roi] def get_corrected_chunks(chunks: Dict[int, Sequence[int]], shape: Sequence[int], roi: Sequence[Tuple[int, int]]): + """adapt `chunks` chunking `shape` for `shape[roi]`""" corrected_chunks = [] rechunk = False for i, (s, roi) in enumerate(zip(shape, roi)): @@ -253,39 +260,36 @@ def run_model_inference( assert isinstance(model, raw_nodes.Model) # always remove pre-/postprocessing, but save it if enabled # todo: improve pre- and postprocessing! - preprocessing = CombinedProcessing.from_tensor_specs( - [resolve_raw_node(ipt, nodes, root_path=model.root_path) for ipt in model.inputs] - ) - for ipt in model.inputs: - ipt.preprocessing = missing - - postprocessing = CombinedProcessing.from_tensor_specs( - [resolve_raw_node(out, nodes, root_path=model.root_path) for out in model.outputs] - ) - for out in model.outputs: - out.postprocessing = missing - if preprocessing.procs: + if enable_preprocessing: + preprocessing = CombinedProcessing.from_tensor_specs( + [resolve_raw_node(ipt, nodes, root_path=model.root_path) for ipt in model.inputs] + ) sample = {ipt.name: t for ipt, t in zip(model.inputs, tensors)} preprocessing.apply(sample, {}) tensors = [sample[ipt.name] for ipt in model.inputs] + if enable_postprocessing: + postprocessing = CombinedProcessing.from_tensor_specs( + [resolve_raw_node(out, nodes, root_path=model.root_path) for out in model.outputs] + ) + else: + postprocessing = None + # transpose tensors to match ipt spec assert len(tensors) == len(model.inputs) tensors = [t.transpose(*s.axes) for t, s in zip(tensors, model.inputs)] if isinstance(boundary_mode, str): boundary_mode = [boundary_mode] * len(tensors) - if tiles is "auto": - chunks = [get_default_input_chunk(ipt) for ipt in model.inputs] - else: - chunks = tiles + if tiles == "auto": + tiles = [get_default_input_chunk(ipt) for ipt in model.inputs] - # the input tensors need an adapted chunking due to halo and offset - actual_chunks, overlap_depths, paddings = zip( - *(correct_chunk(c, ipt, model.outputs, t) for c, ipt, t in zip(chunks, model.inputs, tensors)) + # calculate chunking of the input tensors from tiles taking halo and offset into account + chunks, overlap_depths, paddings = zip( + *(get_chunk(c, ipt, model.outputs, t) for c, ipt, t in zip(tiles, model.inputs, tensors)) ) - actual_chunks_by_name = {ipt.name: c for ipt, c in zip(model.inputs, actual_chunks)} + chunks_by_name = {ipt.name: c for ipt, c in zip(model.inputs, chunks)} padded_input_tensor_shapes = { ipt.name: [ts + sum(p[a]) for ts, a in zip(t.shape, ipt.axes)] for ipt, t, p in zip(model.inputs, tensors, paddings) @@ -294,10 +298,10 @@ def run_model_inference( # note: da.overlap.overlap or da.overlap.map_overlap equivalents are not yet available in xarray tensors = [ da.overlap.overlap(t.pad(p, mode=bm).chunk(c).data, depth=d, boundary=bm) - for t, c, d, p, bm in zip(tensors, actual_chunks, overlap_depths, paddings, boundary_mode) + for t, c, d, p, bm in zip(tensors, chunks, overlap_depths, paddings, boundary_mode) ] - output_chunk_roi, output_roi = get_output_rois( + output_tile_roi, output_roi = get_output_rois( model.outputs[0], input_overlaps={ipt.name: d for ipt, d in zip(model.inputs, overlap_depths)}, input_paddings={ipt.name: p for ipt, p in zip(model.inputs, paddings)}, @@ -336,9 +340,7 @@ def run_model_inference( elif a in ipt_axes: axis_name = f"{out.shape.reference_tensor}_{a}" out_ind.append(axis_name) - adjust_chunks[axis_name] = ( - lambda _, aa=a, scc=sc: actual_chunks_by_name[out.shape.reference_tensor][aa] * scc - ) + adjust_chunks[axis_name] = lambda _, aa=a, scc=sc: chunks_by_name[out.shape.reference_tensor][aa] * scc else: out_ind.append(f"{out.name}_{a}") new_axes[f"{out.name}_{a}"] = s @@ -357,7 +359,7 @@ def run_model_inference( meta=np.empty((), dtype=np.dtype(out.data_type)), name=(model.config or {}).get("bioimageio", {}).get("nickname") or f"model_{model.id}", adjust_chunks=adjust_chunks, - **dict(model_adapter=model_adapter, output_chunk_roi=tuple_roi_to_slices(output_chunk_roi)), + **dict(model_adapter=model_adapter, output_tile_roi=tuple_roi_to_slices(output_tile_roi)), ) corrected_chunks, rechunk = get_corrected_chunks(result.chunks, result.shape, output_roi) @@ -366,7 +368,8 @@ def run_model_inference( res = res.rechunk(corrected_chunks) outputs = [xr.DataArray(res, dims=tuple(out.axes))] - if preprocessing.procs: + if enable_postprocessing: + assert postprocessing is not None sample = {out.name: t for out, t in zip(model.outputs, outputs)} postprocessing.apply(sample, {}) outputs = [sample[out.name] for out in model.outputs] @@ -388,7 +391,7 @@ def run_model_inference_without_tiling( """ model = load_raw_resource_description(rdf_source, update_to_format="latest") assert isinstance(model, raw_nodes.Model) - # remove pre-/postprocessing if specified + # remove pre-/postprocessing if not enabled if not enable_preprocessing: for ipt in model.inputs: if ipt.preprocessing: From a719c245b4374df10cf1525818fc48fe03c1ca9a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Nov 2022 14:53:44 +0100 Subject: [PATCH 14/23] draft bioimageio.core.contrib --- bioimageio/core/contrib/__init__.py | 1 + bioimageio/core/contrib/__main__.py | 16 + bioimageio/core/contrib/contrib_a/__init__.py | 4 + bioimageio/core/contrib/contrib_a/__main__.py | 9 + bioimageio/core/contrib/contrib_a/_demo.py | 20 + .../core/contrib/contrib_a/_inference.py | 182 +++++++++ bioimageio/core/contrib/contrib_a/local.py | 2 + bioimageio/core/contrib/contrib_a/remote.py | 8 + bioimageio/core/contrib/utils/__init__.py | 10 + bioimageio/core/contrib/utils/_ast.py | 7 + bioimageio/core/contrib/utils/_rpc.py | 116 ++++++ bioimageio/core/contrib/utils/_tiling.py | 201 ++++++++++ bioimageio/core/operators/__init__.py | 2 +- bioimageio/core/operators/_run.py | 361 +----------------- dev/environment-base.yaml | 1 + setup.py | 11 +- tests/conftest.py | 2 +- 17 files changed, 582 insertions(+), 371 deletions(-) create mode 100644 bioimageio/core/contrib/__init__.py create mode 100644 bioimageio/core/contrib/__main__.py create mode 100644 bioimageio/core/contrib/contrib_a/__init__.py create mode 100644 bioimageio/core/contrib/contrib_a/__main__.py create mode 100644 bioimageio/core/contrib/contrib_a/_demo.py create mode 100644 bioimageio/core/contrib/contrib_a/_inference.py create mode 100644 bioimageio/core/contrib/contrib_a/local.py create mode 100644 bioimageio/core/contrib/contrib_a/remote.py create mode 100644 bioimageio/core/contrib/utils/__init__.py create mode 100644 bioimageio/core/contrib/utils/_ast.py create mode 100644 bioimageio/core/contrib/utils/_rpc.py create mode 100644 bioimageio/core/contrib/utils/_tiling.py diff --git a/bioimageio/core/contrib/__init__.py b/bioimageio/core/contrib/__init__.py new file mode 100644 index 00000000..4ead98f9 --- /dev/null +++ b/bioimageio/core/contrib/__init__.py @@ -0,0 +1 @@ +from .contrib_a import * diff --git a/bioimageio/core/contrib/__main__.py b/bioimageio/core/contrib/__main__.py new file mode 100644 index 00000000..2f8e61fb --- /dev/null +++ b/bioimageio/core/contrib/__main__.py @@ -0,0 +1,16 @@ +import argparse +import asyncio +from pathlib import Path + +from bioimageio.core.contrib.utils import start_contrib_service + +parser = argparse.ArgumentParser() +parser.add_argument("contrib_name", nargs="+") + +args = parser.parse_args() + +loop = asyncio.get_event_loop() +for contrib_name in args.contrib_name: + loop.create_task(start_contrib_service(Path(__file__).parent.stem)) + +loop.run_forever() diff --git a/bioimageio/core/contrib/contrib_a/__init__.py b/bioimageio/core/contrib/contrib_a/__init__.py new file mode 100644 index 00000000..4903a9c8 --- /dev/null +++ b/bioimageio/core/contrib/contrib_a/__init__.py @@ -0,0 +1,4 @@ +try: + from .local import * +except ImportError: + from .remote import * diff --git a/bioimageio/core/contrib/contrib_a/__main__.py b/bioimageio/core/contrib/contrib_a/__main__.py new file mode 100644 index 00000000..c7453ae5 --- /dev/null +++ b/bioimageio/core/contrib/contrib_a/__main__.py @@ -0,0 +1,9 @@ +# todo: maybe only keep bioimageio.core.contrib.__main__ to avoid redundant code and multiple entry points? +import asyncio +from pathlib import Path + +from bioimageio.core.contrib.utils import start_contrib_service + +loop = asyncio.get_event_loop() +loop.create_task(start_contrib_service(Path(__file__).parent.stem)) +loop.run_forever() diff --git a/bioimageio/core/contrib/contrib_a/_demo.py b/bioimageio/core/contrib/contrib_a/_demo.py new file mode 100644 index 00000000..cd0b2e15 --- /dev/null +++ b/bioimageio/core/contrib/contrib_a/_demo.py @@ -0,0 +1,20 @@ +import asyncio + + +async def hello(msg="Hello!"): + print(msg) + return msg + + +# async def main(): +# task = asyncio.create_task(meh()) +# +# out1 = await task +# out2 = await task +# +# print(out1, out2) +# +# +# if __name__ == "__main__": +# loop = asyncio.get_event_loop() +# loop.run_until_complete(main()) diff --git a/bioimageio/core/contrib/contrib_a/_inference.py b/bioimageio/core/contrib/contrib_a/_inference.py new file mode 100644 index 00000000..f2f1d2e3 --- /dev/null +++ b/bioimageio/core/contrib/contrib_a/_inference.py @@ -0,0 +1,182 @@ +from os import PathLike +from typing import Dict, IO, List, Optional, Sequence, Tuple, Union + +import dask.array as da +import numpy as np +import xarray as xr + +from bioimageio.core.contrib.utils import ( + get_chunk, + get_corrected_chunks, + get_default_input_tile, + get_output_rois, + transpose_sequence, + tuple_roi_to_slices, +) +from bioimageio.core.prediction_pipeline._combined_processing import CombinedProcessing +from bioimageio.core.prediction_pipeline._model_adapters import ModelAdapter, create_model_adapter +from bioimageio.core.resource_io import nodes +from bioimageio.core.resource_io.utils import resolve_raw_node +from bioimageio.spec import load_raw_resource_description +from bioimageio.spec.model import raw_nodes +from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal + + +BoundaryMode = Literal["reflect"] + + +def forward(*tensors, model_adapter: ModelAdapter, output_tile_roi: Tuple[slice, ...]): + """helper to cast dask array chunks to xr.DataArray and apply a roi to the output""" + assert len(model_adapter.bioimageio_model.inputs) == len(tensors), ( + len(model_adapter.bioimageio_model.inputs), + len(tensors), + ) + tensors = [xr.DataArray(t, dims=tuple(ipt.axes)) for ipt, t, in zip(model_adapter.bioimageio_model.inputs, tensors)] + output = model_adapter.forward(*tensors)[0] # todo: allow more than 1 output + return output[output_tile_roi] + + +def run_model_inference_with_dask( + rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], + *tensors: xr.DataArray, + enable_preprocessing: bool = True, + enable_postprocessing: bool = True, + devices: Sequence[str] = ("cpu",), + tiles: Optional[Sequence[Dict[str, int]]] = None, + boundary_mode: Union[ + BoundaryMode, + Sequence[BoundaryMode], + ] = "reflect", +) -> List[xr.DataArray]: + """run model inference + + Returns: + list: model outputs + """ + model: raw_nodes.Model = load_raw_resource_description(rdf_source, update_to_format="latest") # noqa + if len(model.outputs) > 1: + raise NotImplementedError("More than one model output not yet implemented") + + assert isinstance(model, raw_nodes.Model) + # always remove pre-/postprocessing, but save it if enabled + # todo: improve pre- and postprocessing! + + if enable_preprocessing: + preprocessing = CombinedProcessing.from_tensor_specs( + [resolve_raw_node(ipt, nodes, root_path=model.root_path) for ipt in model.inputs] + ) + sample = {ipt.name: t for ipt, t in zip(model.inputs, tensors)} + preprocessing.apply(sample, {}) + tensors = [sample[ipt.name] for ipt in model.inputs] + + if enable_postprocessing: + postprocessing = CombinedProcessing.from_tensor_specs( + [resolve_raw_node(out, nodes, root_path=model.root_path) for out in model.outputs] + ) + else: + postprocessing = None + + # transpose tensors to match ipt spec + assert len(tensors) == len(model.inputs) + tensors = [t.transpose(*s.axes) for t, s in zip(tensors, model.inputs)] + if isinstance(boundary_mode, str): + boundary_mode = [boundary_mode] * len(tensors) + + if tiles is None: + tiles = [get_default_input_tile(ipt) for ipt in model.inputs] + + # calculate chunking of the input tensors from tiles taking halo and offset into account + chunks, overlap_depths, paddings = zip( + *(get_chunk(c, ipt, model.outputs, t) for c, ipt, t in zip(tiles, model.inputs, tensors)) + ) + chunks_by_name = {ipt.name: c for ipt, c in zip(model.inputs, chunks)} + padded_input_tensor_shapes = { + ipt.name: [ts + sum(p[a]) for ts, a in zip(t.shape, ipt.axes)] + for ipt, t, p in zip(model.inputs, tensors, paddings) + } + + # note: da.overlap.overlap or da.overlap.map_overlap equivalents are not yet available in xarray + tensors = [ + da.overlap.overlap(t.pad(p, mode=bm).chunk(c).data, depth=d, boundary=bm) + for t, c, d, p, bm in zip(tensors, chunks, overlap_depths, paddings, boundary_mode) + ] + + output_tile_roi, output_roi = get_output_rois( + model.outputs[0], + input_overlaps={ipt.name: d for ipt, d in zip(model.inputs, overlap_depths)}, + input_paddings={ipt.name: p for ipt, p in zip(model.inputs, paddings)}, + ipt_by_name={ipt.name: ipt for ipt in model.inputs}, + ) + + n_batches = tensors[0].npartitions + assert all(t.npartitions == n_batches for t in tensors[1:]), [t.npartitions for t in tensors] + + model_adapter = create_model_adapter(bioimageio_model=model, devices=devices) + + # todo: generalize to multiple outputs + out = model.outputs[0] + if isinstance(out.shape, raw_nodes.ImplicitOutputShape): + ipt_shape = padded_input_tensor_shapes[out.shape.reference_tensor] + ipt_by_name = {ipt.name: ipt for ipt in model.inputs} + ipt_axes = ipt_by_name[out.shape.reference_tensor].axes + ipt_shape = np.array(transpose_sequence(ipt_shape, ipt_axes, out.axes, 0)) + out_scale = [0.0 if s is None else s for s in out.shape.scale] + out_offset = np.array(out.shape.offset) + out_shape_float = ipt_shape * out_scale + 2 * out_offset + assert (out_shape_float == out_shape_float.astype(int)).all(), out_shape_float + out_shape: Sequence[int] = out_shape_float.astype(int) + else: + out_shape = out.shape + out_scale = [1.0] * len(out_shape) + ipt_axes = [] + + # set up da.blockwise to orchestrate tiled forward + out_ind = [] + new_axes = {} + adjust_chunks = {} + for a, s, sc in zip(out.axes, out_shape, out_scale): + if a in ("b", "batch"): + out_ind.append(a) + elif a in ipt_axes: + axis_name = f"{out.shape.reference_tensor}_{a}" + out_ind.append(axis_name) + adjust_chunks[axis_name] = lambda _, aa=a, scc=sc: chunks_by_name[out.shape.reference_tensor][aa] * scc + else: + out_ind.append(f"{out.name}_{a}") + new_axes[f"{out.name}_{a}"] = s + + inputs_sequence = [] + for t, ipt in zip(tensors, model.inputs): + inputs_sequence.append(t) + inputs_sequence.append(tuple("b" if a == "b" else f"{ipt.name}_{a}" for a in ipt.axes)) + + result = da.blockwise( + forward, + tuple(out_ind), + *inputs_sequence, + new_axes=new_axes, + dtype=np.dtype(out.data_type), + meta=np.empty((), dtype=np.dtype(out.data_type)), + name=(model.config or {}).get("bioimageio", {}).get("nickname") or f"model_{model.id}", + adjust_chunks=adjust_chunks, + **dict(model_adapter=model_adapter, output_tile_roi=tuple_roi_to_slices(output_tile_roi)), + ) + + corrected_chunks, rechunk = get_corrected_chunks(result.chunks, result.shape, output_roi) + res = result[tuple_roi_to_slices(output_roi)] + if rechunk: + res = res.rechunk(corrected_chunks) + + outputs = [xr.DataArray(res, dims=tuple(out.axes))] + if enable_postprocessing: + assert postprocessing is not None + sample = {out.name: t for out, t in zip(model.outputs, outputs)} + postprocessing.apply(sample, {}) + outputs = [sample[out.name] for out in model.outputs] + + return outputs diff --git a/bioimageio/core/contrib/contrib_a/local.py b/bioimageio/core/contrib/contrib_a/local.py new file mode 100644 index 00000000..4ce3402f --- /dev/null +++ b/bioimageio/core/contrib/contrib_a/local.py @@ -0,0 +1,2 @@ +from ._demo import hello +from ._inference import run_model_inference_with_dask diff --git a/bioimageio/core/contrib/contrib_a/remote.py b/bioimageio/core/contrib/contrib_a/remote.py new file mode 100644 index 00000000..084f7d2e --- /dev/null +++ b/bioimageio/core/contrib/contrib_a/remote.py @@ -0,0 +1,8 @@ +import sys +from pathlib import Path + +from bioimageio.core.contrib.utils import RemoteContrib + +remote_module = RemoteContrib(Path(__file__).parent.stem) +__all__ = remote_module.__all__ +sys.modules[__name__] = remote_module # noqa diff --git a/bioimageio/core/contrib/utils/__init__.py b/bioimageio/core/contrib/utils/__init__.py new file mode 100644 index 00000000..a420ef9c --- /dev/null +++ b/bioimageio/core/contrib/utils/__init__.py @@ -0,0 +1,10 @@ +from ._ast import get_ast_tree +from ._rpc import ImportCollector, RemoteContrib, start_contrib_service +from ._tiling import ( + get_chunk, + get_corrected_chunks, + get_default_input_tile, + get_output_rois, + transpose_sequence, + tuple_roi_to_slices, +) diff --git a/bioimageio/core/contrib/utils/_ast.py b/bioimageio/core/contrib/utils/_ast.py new file mode 100644 index 00000000..d157cbc0 --- /dev/null +++ b/bioimageio/core/contrib/utils/_ast.py @@ -0,0 +1,7 @@ +import ast +from pathlib import Path + + +def get_ast_tree(path: Path): + src = path.read_text() + return ast.parse(src) diff --git a/bioimageio/core/contrib/utils/_rpc.py b/bioimageio/core/contrib/utils/_rpc.py new file mode 100644 index 00000000..cf2015d0 --- /dev/null +++ b/bioimageio/core/contrib/utils/_rpc.py @@ -0,0 +1,116 @@ +import ast +import asyncio +import logging +import os +from functools import partial +from inspect import getmembers, isfunction +from pathlib import Path +from typing import List, Optional + +from imjoy_rpc.hypha import connect_to_server + +from bioimageio.core import contrib +from ._ast import get_ast_tree + +logger = logging.getLogger(__name__) + + +async def start_contrib_service(contrib_name: str, server_url: Optional[str] = None): + server = await connect_to_server({"server_url": server_url or get_contrib_server_url(contrib_name)}) + + contrib_part = getattr(contrib, contrib_name) + service_name = f"BioImageIO {' '.join(n.capitalize() for n in contrib_name.split('_'))} Module" + service_config = dict( + name=service_name, + id=f"bioimageio-{contrib_name}", + config={ + "visibility": "public", + "run_in_executor": True, # This will make sure all the sync functions run in a separate thread + }, + ) + + for func_name, func in getmembers(contrib_part, isfunction): + assert func_name not in service_config + service_config[func_name] = func + + await server.register_service(service_config) + + logger.info(f"{service_name} service registered at workspace: {server.config.workspace}") + + +class ImportCollector(ast.NodeVisitor): + def __init__(self): + self.imported: List[str] = [] + + def visit_Import(self, node: ast.Import): + raise ValueError("Found 'import' statement. Expected 'from . import ' only") + + def visit_ImportFrom(self, node: ast.ImportFrom): + if not node.level: + raise ValueError(f"Unsupported absolute import from {node.module}") + + if "." in node.module: + raise ValueError(f"Unsupported nested import from {node.module}") + + for alias_node in node.names: + self.imported.append(alias_node.name) + if alias_node.asname is not None: + raise ValueError( + f"Please import contrib functions without 'as', i.e. use '{alias_node.name}' instead of '{alias_node.asname}'." + ) + + +SERVER_URL_ENV_NAME = "BIOIMAGEIO_CONTRIB_URL" +DEFAULT_SERVER_URL = "http://localhost:9000" + + +def get_contrib_specific_server_url_env_name(contrib_name): + return f"BIOIMAGEIO_{contrib_name.capitalize()}_URL" + + +def get_contrib_server_url(contrib_name) -> str: + return os.getenv( + get_contrib_specific_server_url_env_name(contrib_name), os.getenv(SERVER_URL_ENV_NAME, DEFAULT_SERVER_URL) + ) + + +class RemoteContrib: + def __init__(self, contrib_name: str, server_url: Optional[str] = None): + self.server_url = server_url or get_contrib_server_url(contrib_name) + self.contrib_name = contrib_name + self.contrib = None + local_src = Path(__file__).parent.parent / contrib_name / "local.py" + tree = get_ast_tree(local_src) + import_collector = ImportCollector() + import_collector.visit(tree) + self.__all__ = import_collector.imported + self.service_funcs = {} + for name in self.__all__: + setattr(self, name, partial(self._service_call, _contrib_func_name=name)) + + def __await__(self): + yield from self._ainit().__await__() + + async def _ainit(self): + try: + server = await asyncio.create_task(connect_to_server({"server_url": self.server_url})) + except Exception as e: + raise Exception( + f"Failed to connect to {self.server_url}. " + f"Make sure {get_contrib_specific_server_url_env_name(self.contrib_name)} or {SERVER_URL_ENV_NAME} " + f"is set or {self.server_url} is running." + ) from e + try: + contrib_service = await server.get_service(f"bioimageio-{self.contrib_name}") + except Exception as e: + raise Exception( + f"bioimageio-{self.contrib_name} service not found. Start with 'python -m bioimageio.core.contrib.{self.contrib_name}' in a suitable (conda) environment." + ) from e + # todo: start contrib service entry point, e.g. f"bioimageio start {contrib_name}" + + self.service_funcs = {name: getattr(contrib_service, name) for name in self.__all__} + return self + + async def _service_call(self, *args, _contrib_func_name, **kwargs): + await self + return await self.service_funcs[_contrib_func_name](*args, **kwargs) diff --git a/bioimageio/core/contrib/utils/_tiling.py b/bioimageio/core/contrib/utils/_tiling.py new file mode 100644 index 00000000..7bad622c --- /dev/null +++ b/bioimageio/core/contrib/utils/_tiling.py @@ -0,0 +1,201 @@ +import math +import warnings +from collections import defaultdict +from typing import Dict, List, Sequence, Tuple, TypeVar + +import numpy as np + +from bioimageio.spec.model import raw_nodes + +TA = TypeVar("TA") +TS = TypeVar("TS") + + +def transpose_sequence(sequence: Sequence[TS], axes: Sequence[TA], desired_axes: Sequence[TA], default) -> List[TS]: + """transpose a sequence according to its axes to match a desired axes order, + filling non-exising entries with default + + Returns + sequence: the transposed sequence as a list + """ + return [default if ia not in axes else sequence[axes.index(ia)] for ia in desired_axes] + + +def get_chunk( + chunk, ipt: raw_nodes.InputTensor, outputs: Sequence[raw_nodes.OutputTensor], tensor +) -> Tuple[Dict[str, int], Dict[int, int], Dict[str, Tuple[int, int]]]: + """correct chunk to account for offset and halo + + Returns: + corrected chunk: to tile the input array with + overlap: overlap of corrected chunks (yields original chunks) + """ + ipt_shape = np.array([chunk[a] for a in ipt.axes], dtype=int) + referencing_outputs = [ + ot + for ot in outputs + if isinstance(ot.shape, raw_nodes.ImplicitOutputShape) and ot.shape.reference_tensor == ipt.name + ] + if not referencing_outputs: + return ( + chunk, + defaultdict(lambda: 0), + defaultdict(lambda: (0, 0)), + ) + + if len(referencing_outputs) > 1: + raise NotImplementedError("more than one output references an input") + + sohs = [ + ( + np.array(transpose_sequence(ot.shape.scale, ot.axes, ipt.axes, 1.0)), + np.array(transpose_sequence(ot.shape.offset, ot.axes, ipt.axes, 0.0)), + np.array(transpose_sequence(ot.halo, ot.axes, ipt.axes, 0.0)), + ) + for ot in referencing_outputs + ] + scale, offset, halo = sohs[0] + if any((s != scale).any() or (off != offset).any() or (h != halo).any() for s, off, h in sohs[1:]): + # todo: ignore any new dimensions denoted by scale entry of None + raise ValueError( + f"Incompatible output specs referencing same input tensor with different scale/offset/halo: {[out.name for out in referencing_outputs]}." + ) + + if any(off > 0 for a, off in zip(offset, ipt.axes) if a in ("x", "y", "z", "t", "time")): + raise NotImplementedError( + "offset>0; space/time output is larger than input. todo: cut offset on tiles, but leave at image edge." + ) + + assert all(h >= 0 for h in halo) + overlap = np.maximum((halo - offset) / scale, 0) # no negative overlap + overlap = np.ceil(overlap).astype(int) + corrected_chunk = ipt_shape - 2 * overlap + t_shape = np.array(tensor.shape, dtype=int) + assert len(t_shape) == len(ipt_shape) + padding_total = (corrected_chunk - (t_shape % corrected_chunk)) % corrected_chunk + padding = [(0, p) for p in padding_total] + + return ( + dict(zip(ipt.axes, corrected_chunk)), + dict(enumerate(overlap)), # xr.DataArray.overlap not yet available: key by index for da.overlap + dict(zip(ipt.axes, padding)), + ) + + +def tuple_roi_to_slices(tuple_roi: Sequence[Tuple[int, int]]) -> Tuple[slice, ...]: + return tuple(np.s_[r0:-r1] if r1 else np.s_[r0:] for r0, r1 in tuple_roi) + + +def get_default_input_tile(ipt: raw_nodes.InputTensor) -> List[int]: + """Guess a good""" + if isinstance(ipt.shape, list): + shape = ipt.shape + elif isinstance(ipt.shape, raw_nodes.ParametrizedInputShape): + is3d = len([a for a in ipt.axes if a not in "bc"]) > 2 + min_len = 64 if is3d else 256 + shape = [] + for ax, min_ax, step_ax in zip(ipt.axes, ipt.shape.min_shape, ipt.shape.step): + if ax in "zyx" and step_ax > 0: + len_ax = min_ax + while len_ax < min_len: + len_ax += step_ax + shape.append(len_ax) + else: + shape.append(min_ax) + else: + raise TypeError(type(ipt.shape)) + + assert len(ipt.axes) == len(shape) + return shape + + +def get_asymmetric_halolike(value: float) -> Tuple[int, int]: + assert value >= 0 + if value % 1: + assert value % 0.5 == 0 + return math.floor(value), math.ceil(value) + else: + return int(value), int(value) + + +def get_output_rois( + out: raw_nodes.OutputTensor, + input_overlaps: Dict[str, Dict[int, int]], + input_paddings: Dict[str, Dict[str, Tuple[int, int]]], + ipt_by_name: Dict[str, raw_nodes.InputTensor], +) -> Tuple[Sequence[Tuple[int, int]], Sequence[Tuple[int, int]]]: + if isinstance(out.shape, raw_nodes.ImplicitOutputShape): + scale = np.array([1.0 if s is None else s for s in out.shape.scale]) + offset: Sequence[float] = out.shape.offset + ref_ipt = ipt_by_name[out.shape.reference_tensor] + eff_halo_float: List[float] = [ + input_overlaps[out.shape.reference_tensor].get(ref_ipt.axes.index(a), 0) * s + off + for a, s, off in zip(out.axes, scale, offset) + ] + ref_input_padding_dict = input_paddings[out.shape.reference_tensor] + else: + scale = np.ones(len(out.shape)) + offset = np.zeros(len(out.shape)) + eff_halo_float = [0.0] * len(out.shape) + ref_input_padding_dict = {} + + # effective halo to be trimmed from output. (only for space and time dims) + output_chunk_roi: List[Tuple[int, int]] = [] + for i, a in enumerate(out.axes): + if a in ("b", "batch"): + errors_in = (["halo"] if eff_halo_float[i] else []) + (["offset"] if offset[i] else []) + if errors_in: + raise ValueError(f"invalid {' and '.join(errors_in)} for batch dimension of output {out.name}") + elif a in ("x", "y", "z", "t", "time"): + pass + elif a in ("i", "index", "c", "channel"): + # ignore offset. As we cannot tile across these dimensions, offsets should be returned, not trimmed. + eff_halo_float[i] -= offset[i] + if eff_halo_float[i]: + warnings.warn(f"Trimming off halo for axis {a} of output {out.name}.") + + else: + raise NotImplementedError(a) + + output_chunk_roi.append(get_asymmetric_halolike(eff_halo_float[i])) + + # undo input padding for the resulting final output tensor + # also trim any negative offset, which we padded for each chunk + output_roi = [] + for a, s, off in zip(out.axes, scale, offset): + p0, p1 = ref_input_padding_dict.get(a, (0, 0)) + off0, off1 = get_asymmetric_halolike(-min(off, 0)) + output_roi.append((math.ceil(p0 * s + off0), math.ceil(p1 * s + off1))) + + return output_chunk_roi, output_roi + + +def get_corrected_chunks(chunks: Dict[int, Sequence[int]], shape: Sequence[int], roi: Sequence[Tuple[int, int]]): + """adapt `chunks` chunking `shape` for `shape[roi]`""" + corrected_chunks = [] + rechunk = False + for i, (s, roi) in enumerate(zip(shape, roi)): + c = chunks[i] + assert s == sum(c), (s, c) + if sum(roi): + c = list(c) + r0 = roi[0] + while r0 >= c[0]: + r0 -= c[0] + c = c[1:] + if not c: + raise ValueError(f"Trimming too much from output {shape} with roi {roi}") + + c[0] -= r0 + + r1 = roi[1] + while r1 >= c[-1]: + r1 -= c[-1] + c = c[:-1] + if not c: + raise ValueError(f"Trimming too much from output {shape} with roi {roi}") + + c[-1] -= r1 + + corrected_chunks.append(c) + return corrected_chunks, rechunk diff --git a/bioimageio/core/operators/__init__.py b/bioimageio/core/operators/__init__.py index 3441e489..066450bb 100644 --- a/bioimageio/core/operators/__init__.py +++ b/bioimageio/core/operators/__init__.py @@ -1,4 +1,4 @@ from ._assert import assert_shape from ._generate import generate_random_uniform_tensor -from ._run import run_model_inference, run_model_inference_without_tiling, run_workflow +from ._run import WorkflowState, run_model_inference_without_tiling, run_workflow, run_workflow_test from ._various import binarize, load_tensors, log, select_outputs diff --git a/bioimageio/core/operators/_run.py b/bioimageio/core/operators/_run.py index f79a4b78..d274ec67 100644 --- a/bioimageio/core/operators/_run.py +++ b/bioimageio/core/operators/_run.py @@ -1,382 +1,23 @@ -import math -import warnings -from collections import defaultdict from dataclasses import dataclass from os import PathLike -from typing import Any, Dict, Generator, IO, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, Generator, IO, List, Optional, Sequence, Union -import dask.array as da -import numpy as np import xarray as xr from marshmallow import missing from bioimageio.core import load_resource_description from bioimageio.core.prediction_pipeline import create_prediction_pipeline -from bioimageio.core.prediction_pipeline._combined_processing import CombinedProcessing -from bioimageio.core.prediction_pipeline._model_adapters import ModelAdapter, create_model_adapter from bioimageio.core.resource_io import nodes -from bioimageio.core.resource_io.utils import resolve_raw_node from bioimageio.spec import load_raw_resource_description from bioimageio.spec.model import raw_nodes from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription -try: - import torch.multiprocessing as multiprocessing -except ImportError: - import multiprocessing - try: from typing import Literal except ImportError: from typing_extensions import Literal -BoundaryMode = Literal["reflect"] - - -def transpose_seq(seq, seq_axes, desired_axes, default): - return np.array([default if ia not in seq_axes else seq[seq_axes.index(ia)] for ia in desired_axes]) - - -def get_chunk( - chunk, ipt: raw_nodes.InputTensor, outputs: Sequence[raw_nodes.OutputTensor], tensor -) -> Tuple[Dict[str, int], Dict[int, int], Dict[str, Tuple[int, int]]]: - """correct chunk to account for offset and halo - - Returns: - corrected chunk: to tile the input array with - overlap: overlap of corrected chunks (yields original chunks) - """ - ipt_shape = np.array([chunk[a] for a in ipt.axes], dtype=int) - referencing_outputs = [ - ot - for ot in outputs - if isinstance(ot.shape, raw_nodes.ImplicitOutputShape) and ot.shape.reference_tensor == ipt.name - ] - if not referencing_outputs: - return ( - chunk, - defaultdict(lambda: 0), - defaultdict(lambda: (0, 0)), - ) - - if len(referencing_outputs) > 1: - raise NotImplementedError("more than one output references an input") - - sohs = [ - ( - transpose_seq(ot.shape.scale, ot.axes, ipt.axes, 1.0), - transpose_seq(ot.shape.offset, ot.axes, ipt.axes, 0.0), - transpose_seq(ot.halo, ot.axes, ipt.axes, 0.0), - ) - for ot in referencing_outputs - ] - scale, offset, halo = sohs[0] - if any((s != scale).any() or (off != offset).any() or (h != halo).any() for s, off, h in sohs[1:]): - # todo: ignore any new dimensions denoted by scale entry of None - raise ValueError( - f"Incompatible output specs referencing same input tensor with different scale/offset/halo: {[out.name for out in referencing_outputs]}." - ) - - if any(off > 0 for a, off in zip(offset, ipt.axes) if a in ("x", "y", "z", "t", "time")): - raise NotImplementedError( - "offset>0; space/time output is larger than input. todo: cut offset on tiles, but leave at image edge." - ) - - assert all(h >= 0 for h in halo) - overlap = np.maximum((halo - offset) / scale, 0) # no negative overlap - overlap = np.ceil(overlap).astype(int) - corrected_chunk = ipt_shape - 2 * overlap - t_shape = np.array(tensor.shape, dtype=int) - assert len(t_shape) == len(ipt_shape) - padding_total = (corrected_chunk - (t_shape % corrected_chunk)) % corrected_chunk - padding = [(0, p) for p in padding_total] - - return ( - dict(zip(ipt.axes, corrected_chunk)), - dict(enumerate(overlap)), # xr.DataArray.overlap not yet available: key by index for da.overlap - dict(zip(ipt.axes, padding)), - ) - - -def tuple_roi_to_slices(tuple_roi: Sequence[Tuple[int, int]]) -> Tuple[slice, ...]: - return tuple(np.s_[r0:-r1] if r1 else np.s_[r0:] for r0, r1 in tuple_roi) - - -def get_default_input_chunk(ipt: raw_nodes.InputTensor) -> List[int]: - if isinstance(ipt.shape, list): - shape = ipt.shape - elif isinstance(ipt.shape, raw_nodes.ParametrizedInputShape): - is3d = len([a for a in ipt.axes if a not in "bc"]) > 2 - min_len = 64 if is3d else 256 - shape = [] - for ax, min_ax, step_ax in zip(ipt.axes, ipt.shape.min_shape, ipt.shape.step): - if ax in "zyx" and step_ax > 0: - len_ax = min_ax - while len_ax < min_len: - len_ax += step_ax - shape.append(len_ax) - else: - shape.append(min_ax) - else: - raise TypeError(type(ipt.shape)) - - assert len(ipt.axes) == len(shape) - return shape - - -def get_asymmetric_halolike(value: float) -> Tuple[int, int]: - assert value >= 0 - if value % 1: - assert value % 0.5 == 0 - return math.floor(value), math.ceil(value) - else: - return int(value), int(value) - - -def get_output_rois( - out: raw_nodes.OutputTensor, - input_overlaps: Dict[str, Dict[int, int]], - input_paddings: Dict[str, Dict[str, Tuple[int, int]]], - ipt_by_name: Dict[str, raw_nodes.InputTensor], -) -> Tuple[Sequence[Tuple[int, int]], Sequence[Tuple[int, int]]]: - if isinstance(out.shape, raw_nodes.ImplicitOutputShape): - scale = np.array([1.0 if s is None else s for s in out.shape.scale]) - offset: Sequence[float] = out.shape.offset - ref_ipt = ipt_by_name[out.shape.reference_tensor] - eff_halo_float: List[float] = [ - input_overlaps[out.shape.reference_tensor].get(ref_ipt.axes.index(a), 0) * s + off - for a, s, off in zip(out.axes, scale, offset) - ] - ref_input_padding_dict = input_paddings[out.shape.reference_tensor] - else: - scale = np.ones(len(out.shape)) - offset = np.zeros(len(out.shape)) - eff_halo_float = [0.0] * len(out.shape) - ref_input_padding_dict = {} - - # effective halo to be trimmed from output. (only for space and time dims) - output_chunk_roi: List[Tuple[int, int]] = [] - for i, a in enumerate(out.axes): - if a in ("b", "batch"): - errors_in = (["halo"] if eff_halo_float[i] else []) + (["offset"] if offset[i] else []) - if errors_in: - raise ValueError(f"invalid {' and '.join(errors_in)} for batch dimension of output {out.name}") - elif a in ("x", "y", "z", "t", "time"): - pass - elif a in ("i", "index", "c", "channel"): - # ignore offset. As we cannot tile across these dimensions, offsets should be returned, not trimmed. - eff_halo_float[i] -= offset[i] - if eff_halo_float[i]: - warnings.warn(f"Trimming off halo for axis {a} of output {out.name}.") - - else: - raise NotImplementedError(a) - - output_chunk_roi.append(get_asymmetric_halolike(eff_halo_float[i])) - - # undo input padding for the resulting final output tensor - # also trim any negative offset, which we padded for each chunk - output_roi = [] - for a, s, off in zip(out.axes, scale, offset): - p0, p1 = ref_input_padding_dict.get(a, (0, 0)) - off0, off1 = get_asymmetric_halolike(-min(off, 0)) - output_roi.append((math.ceil(p0 * s + off0), math.ceil(p1 * s + off1))) - - return output_chunk_roi, output_roi - - -def forward(*tensors, model_adapter: ModelAdapter, output_tile_roi: Tuple[slice, ...]): - """helper to cast dask array chunks to xr.DataArray and apply a roi to the output""" - assert len(model_adapter.bioimageio_model.inputs) == len(tensors), ( - len(model_adapter.bioimageio_model.inputs), - len(tensors), - ) - tensors = [xr.DataArray(t, dims=tuple(ipt.axes)) for ipt, t, in zip(model_adapter.bioimageio_model.inputs, tensors)] - output = model_adapter.forward(*tensors)[0] # todo: allow more than 1 output - return output[output_tile_roi] - - -def get_corrected_chunks(chunks: Dict[int, Sequence[int]], shape: Sequence[int], roi: Sequence[Tuple[int, int]]): - """adapt `chunks` chunking `shape` for `shape[roi]`""" - corrected_chunks = [] - rechunk = False - for i, (s, roi) in enumerate(zip(shape, roi)): - c = chunks[i] - assert s == sum(c), (s, c) - if sum(roi): - c = list(c) - r0 = roi[0] - while r0 >= c[0]: - r0 -= c[0] - c = c[1:] - if not c: - raise ValueError(f"Trimming too much from output {shape} with roi {roi}") - - c[0] -= r0 - - r1 = roi[1] - while r1 >= c[-1]: - r1 -= c[-1] - c = c[:-1] - if not c: - raise ValueError(f"Trimming too much from output {shape} with roi {roi}") - - c[-1] -= r1 - - corrected_chunks.append(c) - return corrected_chunks, rechunk - - -def run_model_inference( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *tensors: xr.DataArray, - enable_preprocessing: bool = True, - enable_postprocessing: bool = True, - devices: Sequence[str] = ("cpu",), - tiles: Union[None, Literal["auto"], Sequence[Dict[str, int]]] = "auto", - boundary_mode: Union[ - BoundaryMode, - Sequence[BoundaryMode], - ] = "reflect", -) -> List[xr.DataArray]: - """run model inference - - Returns: - list: model outputs - """ - if tiles is None: - return run_model_inference_without_tiling( - rdf_source, - *tensors, - enable_preprocessing=enable_preprocessing, - enable_postprocessing=enable_postprocessing, - devices=devices, - ) - model: raw_nodes.Model = load_raw_resource_description(rdf_source, update_to_format="latest") # noqa - if len(model.outputs) > 1: - raise NotImplementedError("More than one model output not yet implemented") - - assert isinstance(model, raw_nodes.Model) - # always remove pre-/postprocessing, but save it if enabled - # todo: improve pre- and postprocessing! - - if enable_preprocessing: - preprocessing = CombinedProcessing.from_tensor_specs( - [resolve_raw_node(ipt, nodes, root_path=model.root_path) for ipt in model.inputs] - ) - sample = {ipt.name: t for ipt, t in zip(model.inputs, tensors)} - preprocessing.apply(sample, {}) - tensors = [sample[ipt.name] for ipt in model.inputs] - - if enable_postprocessing: - postprocessing = CombinedProcessing.from_tensor_specs( - [resolve_raw_node(out, nodes, root_path=model.root_path) for out in model.outputs] - ) - else: - postprocessing = None - - # transpose tensors to match ipt spec - assert len(tensors) == len(model.inputs) - tensors = [t.transpose(*s.axes) for t, s in zip(tensors, model.inputs)] - if isinstance(boundary_mode, str): - boundary_mode = [boundary_mode] * len(tensors) - - if tiles == "auto": - tiles = [get_default_input_chunk(ipt) for ipt in model.inputs] - - # calculate chunking of the input tensors from tiles taking halo and offset into account - chunks, overlap_depths, paddings = zip( - *(get_chunk(c, ipt, model.outputs, t) for c, ipt, t in zip(tiles, model.inputs, tensors)) - ) - chunks_by_name = {ipt.name: c for ipt, c in zip(model.inputs, chunks)} - padded_input_tensor_shapes = { - ipt.name: [ts + sum(p[a]) for ts, a in zip(t.shape, ipt.axes)] - for ipt, t, p in zip(model.inputs, tensors, paddings) - } - - # note: da.overlap.overlap or da.overlap.map_overlap equivalents are not yet available in xarray - tensors = [ - da.overlap.overlap(t.pad(p, mode=bm).chunk(c).data, depth=d, boundary=bm) - for t, c, d, p, bm in zip(tensors, chunks, overlap_depths, paddings, boundary_mode) - ] - - output_tile_roi, output_roi = get_output_rois( - model.outputs[0], - input_overlaps={ipt.name: d for ipt, d in zip(model.inputs, overlap_depths)}, - input_paddings={ipt.name: p for ipt, p in zip(model.inputs, paddings)}, - ipt_by_name={ipt.name: ipt for ipt in model.inputs}, - ) - - n_batches = tensors[0].npartitions - assert all(t.npartitions == n_batches for t in tensors[1:]), [t.npartitions for t in tensors] - - model_adapter = create_model_adapter(bioimageio_model=model, devices=devices) - - # todo: generalize to multiple outputs - out = model.outputs[0] - if isinstance(out.shape, raw_nodes.ImplicitOutputShape): - ipt_shape = padded_input_tensor_shapes[out.shape.reference_tensor] - ipt_by_name = {ipt.name: ipt for ipt in model.inputs} - ipt_axes = ipt_by_name[out.shape.reference_tensor].axes - ipt_shape = transpose_seq(ipt_shape, ipt_axes, out.axes, 0) - out_scale = [0.0 if s is None else s for s in out.shape.scale] - out_offset = np.array(out.shape.offset) - out_shape_float = ipt_shape * out_scale + 2 * out_offset - assert (out_shape_float == out_shape_float.astype(int)).all(), out_shape_float - out_shape: Sequence[int] = out_shape_float.astype(int) - else: - out_shape = out.shape - out_scale = [1.0] * len(out_shape) - ipt_axes = [] - - # set up da.blockwise to orchestrate tiled forward - out_ind = [] - new_axes = {} - adjust_chunks = {} - for a, s, sc in zip(out.axes, out_shape, out_scale): - if a in ("b", "batch"): - out_ind.append(a) - elif a in ipt_axes: - axis_name = f"{out.shape.reference_tensor}_{a}" - out_ind.append(axis_name) - adjust_chunks[axis_name] = lambda _, aa=a, scc=sc: chunks_by_name[out.shape.reference_tensor][aa] * scc - else: - out_ind.append(f"{out.name}_{a}") - new_axes[f"{out.name}_{a}"] = s - - inputs_sequence = [] - for t, ipt in zip(tensors, model.inputs): - inputs_sequence.append(t) - inputs_sequence.append(tuple("b" if a == "b" else f"{ipt.name}_{a}" for a in ipt.axes)) - - result = da.blockwise( - forward, - tuple(out_ind), - *inputs_sequence, - new_axes=new_axes, - dtype=np.dtype(out.data_type), - meta=np.empty((), dtype=np.dtype(out.data_type)), - name=(model.config or {}).get("bioimageio", {}).get("nickname") or f"model_{model.id}", - adjust_chunks=adjust_chunks, - **dict(model_adapter=model_adapter, output_tile_roi=tuple_roi_to_slices(output_tile_roi)), - ) - - corrected_chunks, rechunk = get_corrected_chunks(result.chunks, result.shape, output_roi) - res = result[tuple_roi_to_slices(output_roi)] - if rechunk: - res = res.rechunk(corrected_chunks) - - outputs = [xr.DataArray(res, dims=tuple(out.axes))] - if enable_postprocessing: - assert postprocessing is not None - sample = {out.name: t for out, t in zip(model.outputs, outputs)} - postprocessing.apply(sample, {}) - outputs = [sample[out.name] for out in model.outputs] - - return outputs - - def run_model_inference_without_tiling( rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], *tensors: xr.DataArray, diff --git a/dev/environment-base.yaml b/dev/environment-base.yaml index b12e97f2..cd4026e9 100644 --- a/dev/environment-base.yaml +++ b/dev/environment-base.yaml @@ -18,3 +18,4 @@ dependencies: - tifffile <=2022.4.8 # pin fixes Syntax error; see https://github.com/bioimage-io/core-bioimage-io-python/pull/259 - pip: - keras==1.2.2 + - imjoy-rpc diff --git a/setup.py b/setup.py index 521f26c3..fbbacaf9 100644 --- a/setup.py +++ b/setup.py @@ -25,15 +25,7 @@ "Programming Language :: Python :: 3.8", ], packages=find_namespace_packages(exclude=["tests"]), # Required - install_requires=[ - "bioimageio.spec==0.4.8.*", - "imageio>=2.5", - "numpy", - "ruamel.yaml", - "tqdm", - "xarray", - "tifffile" - ], + install_requires=["bioimageio.spec==0.4.8.*", "imageio>=2.5", "numpy", "ruamel.yaml", "tqdm", "xarray", "tifffile"], include_package_data=True, extras_require={ "test": ["pytest", "black", "mypy"], @@ -41,6 +33,7 @@ "pytorch": ["torch>=1.6", "torchvision"], "tensorflow": ["tensorflow"], "onnx": ["onnxruntime"], + "contrib_a": ["dask"], }, project_urls={ # Optional "Bug Reports": "https://github.com/bioimage-io/core-bioimage-io-python/issues", diff --git a/tests/conftest.py b/tests/conftest.py index da28b57e..6b950622 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,7 +73,7 @@ "shape_change": ( "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_specs/models/" "upsample_test_model/rdf.yaml" - ) + ), } try: From 1566fbea0d535a20bb79a8024330bcfdad391221 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 17 Nov 2022 14:54:42 +0100 Subject: [PATCH 15/23] add example use_contrib.py --- example/use_contrib.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 example/use_contrib.py diff --git a/example/use_contrib.py b/example/use_contrib.py new file mode 100644 index 00000000..4e563792 --- /dev/null +++ b/example/use_contrib.py @@ -0,0 +1,23 @@ +import asyncio + +from bioimageio.core.contrib.contrib_a import hello as auto_hello +from bioimageio.core.contrib.contrib_a.local import hello as local_hello +from bioimageio.core.contrib.contrib_a.remote import hello as remote_hello + + +async def main(): + print(await auto_hello("auto hello")) + print(await local_hello("local hello")) + print(await remote_hello("remote hello")) + + print("auto func type:", type(auto_hello)) + print("local func type:", type(local_hello)) + print("remote func type:", type(remote_hello)) + + +if __name__ == "__main__": + # start up contrib_a service before with these two processes: + # $python -m hypha.server --host=0.0.0.0 --port=9000 + # $python -m bioimageio.core.contrib.contrib_a + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) From df510153dc4ea0e0f2ea2f460c637ed59778fe08 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 23:14:10 +0100 Subject: [PATCH 16/23] ignore mypy cache folder --- .gitignore | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index c75e8e7a..c56bceff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ -build/ -dist/ -.idea/ -*.egg-info/ -cache **/tmp -.tox/ +*.egg-info/ *.pyc +.idea/ +.mypy_cache/ +.tox/ +build/ +cache +dist/ From 03ecf09b18ebab941d0b62fed53ccd40da13d263 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 23:15:59 +0100 Subject: [PATCH 17/23] update various --- bioimageio/core/build_spec/build_model.py | 8 ++-- bioimageio/core/contrib/contrib_a/_demo.py | 38 ++++++++++++++- .../core/contrib/contrib_a/_inference.py | 47 +++++++++++++------ .../_model_adapters/_pytorch_model_adapter.py | 2 +- bioimageio/core/resource_io/nodes.py | 47 ++++--------------- bioimageio/core/resource_io/utils.py | 10 ++-- tests/resource_io/test_utils.py | 4 +- 7 files changed, 92 insertions(+), 64 deletions(-) diff --git a/bioimageio/core/build_spec/build_model.py b/bioimageio/core/build_spec/build_model.py index 4036bf90..a9b7361c 100644 --- a/bioimageio/core/build_spec/build_model.py +++ b/bioimageio/core/build_spec/build_model.py @@ -14,7 +14,7 @@ import bioimageio.spec.model as model_spec from bioimageio.core import export_resource_package, load_raw_resource_description from bioimageio.core.resource_io.nodes import URI -from bioimageio.spec.shared.raw_nodes import ImportableModule, ImportableSourceFile +from bioimageio.spec.shared.raw_nodes import CallableFromModule, CallableFromSourceFile from bioimageio.spec.shared import resolve_local_source, resolve_source try: @@ -58,12 +58,12 @@ def _get_pytorch_state_dict_weight_kwargs(architecture, model_kwargs, root): # note: path itself might include : for absolute paths in windows *arch_file_parts, callable_name = architecture.replace("::", ":").split(":") arch_file = _ensure_local(":".join(arch_file_parts), root) - arch = ImportableSourceFile(callable_name, arch_file) + arch = CallableFromSourceFile(callable_name, arch_file) arch_hash = _get_hash(root / arch.source_file) weight_kwargs["architecture_sha256"] = arch_hash else: - arch = spec.shared.fields.ImportableSource().deserialize(architecture) - assert isinstance(arch, ImportableModule) + arch = spec.shared.fields.CallableSource().deserialize(architecture) + assert isinstance(arch, CallableFromModule) weight_kwargs["architecture"] = arch return weight_kwargs, tmp_archtecture diff --git a/bioimageio/core/contrib/contrib_a/_demo.py b/bioimageio/core/contrib/contrib_a/_demo.py index cd0b2e15..31d75325 100644 --- a/bioimageio/core/contrib/contrib_a/_demo.py +++ b/bioimageio/core/contrib/contrib_a/_demo.py @@ -1,7 +1,43 @@ import asyncio +from typing import Optional +import xarray as xr -async def hello(msg="Hello!"): +async def hello( + msg: str = "Hello!", tensor_a: Optional[xr.DataArray] = None, tensor_b: Optional[xr.DataArray] = None +) -> str: + """dummy workflow printing msg + + This dummy workflow is intended as a demonstration and for testing. + + .. code-block:: yaml + cite: [{text: BioImage.IO, url: "https://doi.org/10.1101/2022.06.07.495102"}] + + Args: + msg: Message + tensor_a: tensor_a whose shape is added to message + axes: arbitrary + tensor_b: tensor_b whose shape is added to message + axes: + - type: batch + - type: space + name: x + description: x dimension + unit: millimeter + step: 1.5 + - type: index + name: demo index + description: a special index axis + + Returns: + msg. A possibly manipulated message. + """ + if tensor_a is not None: + msg += f" tensor_a shape: {tensor_a.shape}" + + if tensor_a is not None: + msg += f" tensor_a shape: {tensor_a.shape}" + print(msg) return msg diff --git a/bioimageio/core/contrib/contrib_a/_inference.py b/bioimageio/core/contrib/contrib_a/_inference.py index f2f1d2e3..5a9142b4 100644 --- a/bioimageio/core/contrib/contrib_a/_inference.py +++ b/bioimageio/core/contrib/contrib_a/_inference.py @@ -1,5 +1,6 @@ +import collections from os import PathLike -from typing import Dict, IO, List, Optional, Sequence, Tuple, Union +from typing import Dict, IO, List, Optional, OrderedDict, Sequence, Tuple, Union import dask.array as da import numpy as np @@ -41,24 +42,40 @@ def forward(*tensors, model_adapter: ModelAdapter, output_tile_roi: Tuple[slice, return output[output_tile_roi] -def run_model_inference_with_dask( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *tensors: xr.DataArray, - enable_preprocessing: bool = True, - enable_postprocessing: bool = True, - devices: Sequence[str] = ("cpu",), - tiles: Optional[Sequence[Dict[str, int]]] = None, +async def run_model_inference_with_dask( + model_rdf: Union[str, PathLike, dict, IO, bytes, raw_nodes.URI, RawResourceDescription], + tensors: Sequence[xr.DataArray], boundary_mode: Union[ BoundaryMode, Sequence[BoundaryMode], ] = "reflect", -) -> List[xr.DataArray]: - """run model inference + enable_preprocessing: bool = True, + enable_postprocessing: bool = True, + devices: Sequence[str] = ("cpu",), + tiles: Optional[Sequence[Dict[str, int]]] = None, +) -> OrderedDict[str, xr.DataArray]: + """run model inference using chunked dask arrays for tiling + + To run inference on arbitrary input tensors, they are chunked such that with halo and offset all inputs to the + model have `tiles` shape. + + .. code-block:: yaml + authors: [{name: Fynn Beuttenmüller, github_user: fynnbe}] + cite: [{text: BioImage.IO, url: "https://doi.org/10.1101/2022.06.07.495102"}] + + Args: + model_rdf: the (source/raw) model RDF that describes the model to be used for inference + tensors: model input tensors + boundary_mode: How to pad missing values. + enable_preprocessing: If true, apply the preprocessing specified in the model RDF + enable_postprocessing: If true, apply the postprocessing specified in the model RDF + devices: devices to use for inference (device management is handled by the created model adapter) + tiles: Defaults to using an estimated tile sizes based on the model RDF. Returns: - list: model outputs + outputs. named model outputs """ - model: raw_nodes.Model = load_raw_resource_description(rdf_source, update_to_format="latest") # noqa + model: raw_nodes.Model = load_raw_resource_description(model_rdf, update_to_format="latest") # noqa if len(model.outputs) > 1: raise NotImplementedError("More than one model output not yet implemented") @@ -172,11 +189,11 @@ def run_model_inference_with_dask( if rechunk: res = res.rechunk(corrected_chunks) - outputs = [xr.DataArray(res, dims=tuple(out.axes))] + outputs = collections.OrderedDict({out.name: xr.DataArray(res, dims=tuple(out.axes))}) if enable_postprocessing: assert postprocessing is not None - sample = {out.name: t for out, t in zip(model.outputs, outputs)} + sample = {name: t for name, t in outputs.items()} postprocessing.apply(sample, {}) - outputs = [sample[out.name] for out in model.outputs] + outputs = collections.OrderedDict({out.name: sample[out.name] for out in model.outputs}) return outputs diff --git a/bioimageio/core/prediction_pipeline/_model_adapters/_pytorch_model_adapter.py b/bioimageio/core/prediction_pipeline/_model_adapters/_pytorch_model_adapter.py index f47aa1d7..b3709f30 100644 --- a/bioimageio/core/prediction_pipeline/_model_adapters/_pytorch_model_adapter.py +++ b/bioimageio/core/prediction_pipeline/_model_adapters/_pytorch_model_adapter.py @@ -55,7 +55,7 @@ def _unload(self) -> None: def get_nn_instance(model_node: nodes.Model, **kwargs): weight_spec = model_node.weights.get("pytorch_state_dict") assert weight_spec is not None - assert isinstance(weight_spec.architecture, nodes.ImportedSource) + assert isinstance(weight_spec.architecture, nodes.ImportedCallable) model_kwargs = weight_spec.kwargs joined_kwargs = {} if model_kwargs is missing else dict(model_kwargs) joined_kwargs.update(kwargs) diff --git a/bioimageio/core/resource_io/nodes.py b/bioimageio/core/resource_io/nodes.py index 1c850795..a6350e56 100644 --- a/bioimageio/core/resource_io/nodes.py +++ b/bioimageio/core/resource_io/nodes.py @@ -142,7 +142,7 @@ def __post_init__(self): @dataclass -class ImportedSource(Node): +class ImportedCallable(Node): factory: Callable def __call__(self, *args, **kwargs): @@ -167,7 +167,7 @@ class OnnxWeightsEntry(WeightsEntryBase, model_raw_nodes.OnnxWeightsEntry): @dataclass class PytorchStateDictWeightsEntry(WeightsEntryBase, model_raw_nodes.PytorchStateDictWeightsEntry): source: Path = missing - architecture: Union[_Missing, ImportedSource] = missing + architecture: Union[_Missing, ImportedCallable] = missing @dataclass @@ -213,54 +213,27 @@ class Axis(Node, workflow_raw_nodes.Axis): @dataclass -class BatchAxis(Node, workflow_raw_nodes.BatchAxis): - pass - - -@dataclass -class ChannelAxis(Node, workflow_raw_nodes.ChannelAxis): - pass - - -@dataclass -class IndexAxis(Node, workflow_raw_nodes.IndexAxis): - pass - - -@dataclass -class SpaceAxis(Node, workflow_raw_nodes.SpaceAxis): - pass - - -@dataclass -class TimeAxis(Node, workflow_raw_nodes.TimeAxis): - pass - - -@dataclass -class InputSpec(Node, workflow_raw_nodes.InputSpec): - pass +class Parameter(Node, workflow_raw_nodes.Parameter): + axes: Union[_Missing, List[Axis], workflow_raw_nodes.ArbitraryAxes] = missing @dataclass -class OptionSpec(Node, workflow_raw_nodes.OptionSpec): +class Input(Parameter, workflow_raw_nodes.Input): pass @dataclass -class OutputSpec(Node, workflow_raw_nodes.OutputSpec): +class Option(Parameter, workflow_raw_nodes.Option): pass @dataclass -class Step(Node, workflow_raw_nodes.Step): +class Output(Parameter, workflow_raw_nodes.Output): pass @dataclass class Workflow(workflow_raw_nodes.Workflow, RDF): - inputs_spec: List[InputSpec] = missing - options_spec: List[OptionSpec] = missing - outputs_spec: List[OutputSpec] = missing - steps: List[Step] = missing - test_steps: List[Step] = missing + inputs_spec: List[Input] = missing + options_spec: List[Option] = missing + outputs_spec: List[Output] = missing diff --git a/bioimageio/core/resource_io/utils.py b/bioimageio/core/resource_io/utils.py index 9f5587de..9adaf0d2 100644 --- a/bioimageio/core/resource_io/utils.py +++ b/bioimageio/core/resource_io/utils.py @@ -62,21 +62,23 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): sys.path.remove(self.path) - def transform_LocalImportableModule(self, node: raw_nodes.LocalImportableModule) -> nodes.ImportedSource: + def transform_LocalCallableFromModule(self, node: raw_nodes.LocalCallableFromModule) -> nodes.ImportedCallable: with self.TemporaryInsertionIntoPythonPath(str(node.root_path)): module = importlib.import_module(node.module_name) - return nodes.ImportedSource(factory=getattr(module, node.callable_name)) + return nodes.ImportedCallable(factory=getattr(module, node.callable_name)) @staticmethod - def transform_ResolvedImportableSourceFile(node: raw_nodes.ResolvedImportableSourceFile) -> nodes.ImportedSource: + def transform_ResolvedCallableFromSourceFile( + node: raw_nodes.ResolvedCallableFromSourceFile, + ) -> nodes.ImportedCallable: module_path = resolve_source(node.source_file) module_name = f"module_from_source.{module_path.stem}" importlib_spec = importlib.util.spec_from_file_location(module_name, module_path) assert importlib_spec is not None dep = importlib.util.module_from_spec(importlib_spec) importlib_spec.loader.exec_module(dep) # type: ignore # todo: possible to use "loader.load_module"? - return nodes.ImportedSource(factory=getattr(dep, node.callable_name)) + return nodes.ImportedCallable(factory=getattr(dep, node.callable_name)) class RawNodeTypeTransformer(NodeTransformer): diff --git a/tests/resource_io/test_utils.py b/tests/resource_io/test_utils.py index 30889a1d..fbd9648b 100644 --- a/tests/resource_io/test_utils.py +++ b/tests/resource_io/test_utils.py @@ -10,10 +10,10 @@ def test_resolve_import_path(tmpdir): manifest_path.touch() source_file = Path("my_mod.py") (tmpdir / str(source_file)).write_text("class Foo: pass", encoding="utf8") - node = raw_nodes.ImportableSourceFile(source_file=source_file, callable_name="Foo") + node = raw_nodes.CallableFromSourceFile(source_file=source_file, callable_name="Foo") uri_transformed = utils.UriNodeTransformer(root_path=tmpdir).transform(node) source_transformed = utils.SourceNodeTransformer().transform(uri_transformed) - assert isinstance(source_transformed, nodes.ImportedSource) + assert isinstance(source_transformed, nodes.ImportedCallable) Foo = source_transformed.factory assert Foo.__name__ == "Foo" assert isinstance(Foo, type) From f474da72959bd0bf2ff4e2f3839d3f3df60222fe Mon Sep 17 00:00:00 2001 From: fynnbe Date: Thu, 24 Nov 2022 23:16:20 +0100 Subject: [PATCH 18/23] add script generate_workflow_rdfs.py --- .github/workflows/build.yml | 16 +++ scripts/generate_workflow_rdfs.py | 211 ++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+) create mode 100644 scripts/generate_workflow_rdfs.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 12dc8cb6..95cc9310 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -142,3 +142,19 @@ jobs: run: | conda install -n base -c conda-forge conda-build pip -y conda build -c conda-forge conda-recipe + + generate_workflow_rdfs: # todo: move to contrib repo + runs-on: ubuntu-latest + needs: test + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Install dependencies + # todo: use mamba envs + run: | + pip install . dask + - name: Generate workflow RDFs + run: python scripts/generate_workflow_rdfs.py contrib_a diff --git a/scripts/generate_workflow_rdfs.py b/scripts/generate_workflow_rdfs.py new file mode 100644 index 00000000..88efcdcd --- /dev/null +++ b/scripts/generate_workflow_rdfs.py @@ -0,0 +1,211 @@ +import collections.abc +import inspect +import sys +import types +import typing +import warnings +from argparse import ArgumentParser +from dataclasses import asdict +from importlib import import_module +from pathlib import Path + +import docstring_parser +import numpy as np +import xarray as xr +from marshmallow import missing +from marshmallow.utils import _Missing + +import bioimageio.core.contrib +from bioimageio.spec import serialize_raw_resource_description_to_dict, load_raw_resource_description +import bioimageio.spec.workflow.schema as wf_schema +from bioimageio.spec.shared import field_validators, fields, yaml +from bioimageio.spec.workflow.raw_nodes import ( + ArbitraryAxes, + Axis, + DEFAULT_TYPE_NAME_MAP, + Input, + Option, + Output, + Workflow as WorkflowRawNode, +) + +try: + from typing import get_args, get_origin, Literal +except ImportError: + from typing_extensions import get_args, get_origin, Literal # type: ignore + + +TYPE_NAME_MAP = {**DEFAULT_TYPE_NAME_MAP, **{xr.DataArray: "tensor", np.ndarray: "tensor"}} +ARBITRARY_AXES = get_args(ArbitraryAxes) + +# keep this axes_field in sync with wf_schema.Workflow.axes +axes_field = fields.Union( + [ + fields.List(fields.Nested(wf_schema.Axis())), + fields.String( + validate=field_validators.OneOf(get_args(ArbitraryAxes)), + ), + ], +) + + +def get_type_name(annotation): + + orig = get_origin(annotation) + if orig is list or orig is tuple or orig is collections.abc.Sequence: + annotation = list + elif orig is dict or orig is collections.OrderedDict: + annotation = dict + elif orig is typing.Union: + args = get_args(annotation) + args = [a for a in args if a is not type(None)] + assert args + annotation = get_type_name(args[0]) # use first type in union annotation + elif orig is Literal: + args = get_args(annotation) + assert args + annotation = get_type_name(type(args[0])) # use type of first literal + + if isinstance(annotation, str): + assert annotation in TYPE_NAME_MAP.values(), annotation + return annotation + else: + return TYPE_NAME_MAP[annotation] + + +def parse_args(): + p = ArgumentParser(description="Generate workflow RDFs for one contrib submodule") + p.add_argument("contrib_name", choices=[c for c in dir(bioimageio.core.contrib) if c.startswith("contrib_")]) + + return p.parse_args() + + +class WorkflowSignature(inspect.Signature): + pass + + +def extract_axes_from_param_descr( + descr: str, +) -> typing.Tuple[str, typing.Union[_Missing, ArbitraryAxes, typing.List[Axis]]]: + if "\n" in descr: + descr, *axes_descr_lines = descr.split("\n") + axes_descr = "\n".join(axes_descr_lines).strip() + assert axes_descr.startswith("axes:") + axes_descr = axes_descr[len("axes:") :].strip() + try: + axes_data = yaml.load(axes_descr) + axes = axes_field._deserialize(axes_data) + except Exception as e: + raise ValueError("Invalid axes description") from e + else: + axes = missing + + return descr, axes + + +def extract_serialized_wf_kwargs(descr: str) -> typing.Tuple[str, typing.Dict[str, typing.Any]]: + separator = ".. code-block:: yaml" + if separator in descr: + descr, kwarg_descr = descr.split(separator) + # kwarg_descr = + try: + kwargs = yaml.load(kwarg_descr) + except Exception as e: + raise ValueError("Invalid additional fields") from e + else: + kwargs = {} + + return descr.strip(), kwargs + + +def main(contrib_name): + dist = Path(__file__).parent / "../dist/workflows" + dist.mkdir(exist_ok=True) + + local_contrib = import_module(f"bioimageio.core.contrib.{contrib_name}.local") + for wf_id in dir(local_contrib): + wf_func = getattr(local_contrib, wf_id) + if not isinstance(wf_func, types.FunctionType): + if not wf_id.startswith("_"): + warnings.warn(f"ignoring non-function {wf_id}") + + continue + + doc = docstring_parser.parse(wf_func.__doc__) + + param_descriptions = {param.arg_name: param.description for param in doc.params} + inputs = [] + options = [] + sig = WorkflowSignature.from_callable(wf_func) + assert sig.return_annotation is not inspect.Signature.empty + for name, param in sig.parameters.items(): + type_name = get_type_name(param.annotation) + descr = param_descriptions[name] + if type_name == "tensor": + descr, axes = extract_axes_from_param_descr(descr) + if axes is missing: + raise ValueError( + f"Missing axes description in description of parameter '{name}' of workflow '{wf_id}'. Change '{name}: ' to e.g. '{name}: axes: arbitrary. ' or '{name}: axes: b,c,x,y. ." + ) + else: + axes = missing + + if param.default is inspect.Parameter.empty: + inputs.append(Input(name=name, description=descr, type=type_name, axes=axes)) + else: + default_value: typing.Any = param.default + if isinstance(default_value, tuple): + default_value = list(default_value) + + options.append(Option(name=name, description=descr, type=type_name, axes=axes, default=default_value)) + + return_descriptions = {} + for ret in doc.many_returns: + # note: doctring_parser seems to be buggy and not recover the return name + # extract return name from return description + name, *remaining = ret.description.split(".") + return_descriptions[name.strip()] = ".".join(remaining).strip() + + outputs = [] + ret_annotations = sig.return_annotation + if isinstance(ret_annotations, typing.Tuple): + ret_annotations = get_args(ret_annotations) + else: + ret_annotations = [ret_annotations] + + if len(doc.many_returns) != len(ret_annotations): + raise ValueError("number of documented return values does not match return annotation") + + for ret_type, (name, descr) in zip(ret_annotations, return_descriptions.items()): + type_name = get_type_name(ret_type) + if type_name == "tensor": + descr, axes = extract_axes_from_param_descr(descr) + else: + axes = missing + + assert descr + outputs.append(Output(name=name, description=descr, type=type_name, axes=axes)) + + assert doc.long_description is not None + description, serialized_kwargs = extract_serialized_wf_kwargs(doc.long_description) + wf = WorkflowRawNode( + name=doc.short_description, + description=description, + inputs=inputs, + options=options, + outputs=outputs, + ) + serialized = serialize_raw_resource_description_to_dict(wf) + serialized.update(serialized_kwargs) + with (dist / wf_id).with_suffix(".yaml").open("w", encoding="utf-8") as f: + yaml.dump(serialized, f) + wf = load_raw_resource_description(serialized) + serialized = serialize_raw_resource_description_to_dict(wf) + + print(f"saved {wf_id}") + print("done") + + +if __name__ == "__main__": + args = parse_args() + sys.exit(main(args.contrib_name)) From 996a92a7b97b25ee9f6ae78cfd93c5a11223c9d2 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 28 Nov 2022 10:14:40 +0100 Subject: [PATCH 19/23] start workflow cleanup --- bioimageio/core/__main__.py | 93 +---------- bioimageio/core/contrib/__init__.py | 1 - bioimageio/core/operators/__init__.py | 4 - bioimageio/core/operators/_assert.py | 8 - bioimageio/core/operators/_generate.py | 15 -- bioimageio/core/operators/_run.py | 206 ------------------------ bioimageio/core/operators/_various.py | 57 ------- scripts/generate_workflow_rdfs.py | 211 ------------------------- setup.py | 1 + 9 files changed, 3 insertions(+), 593 deletions(-) delete mode 100644 bioimageio/core/contrib/__init__.py delete mode 100644 bioimageio/core/operators/__init__.py delete mode 100644 bioimageio/core/operators/_assert.py delete mode 100644 bioimageio/core/operators/_generate.py delete mode 100644 bioimageio/core/operators/_run.py delete mode 100644 bioimageio/core/operators/_various.py delete mode 100644 scripts/generate_workflow_rdfs.py diff --git a/bioimageio/core/__main__.py b/bioimageio/core/__main__.py index 8be385ff..9861aab4 100644 --- a/bioimageio/core/__main__.py +++ b/bioimageio/core/__main__.py @@ -3,23 +3,17 @@ import os import sys import warnings -from argparse import ArgumentParser -from functools import partial from glob import glob from pathlib import Path from pprint import pformat -from typing import List, Optional, Union +from typing import List, Optional import typer -from bioimageio.core import __version__, commands, load_raw_resource_description, prediction, resource_tests +from bioimageio.core import __version__, commands, prediction, resource_tests from bioimageio.core.common import TestSummary -from bioimageio.core.image_helper import load_image, save_image -from bioimageio.core.resource_io import nodes -from bioimageio.core.workflow.operators import run_workflow from bioimageio.spec.__main__ import app, help_version as help_version_spec from bioimageio.spec.model.raw_nodes import WeightsFormat -from bioimageio.spec.workflow.raw_nodes import Workflow try: from typing import get_args @@ -314,88 +308,5 @@ def convert_keras_weights_to_tensorflow( ) -@app.command(context_settings=dict(allow_extra_args=True, ignore_unknown_options=True), add_help_option=False) -def run( - rdf_source: str = typer.Argument(..., help="BioImage.IO RDF id/url/path."), - *, - output_folder: Path = Path("outputs"), - output_tensor_extension: str = ".npy", - ctx: typer.Context, -): - resource = load_raw_resource_description(rdf_source, update_to_format="latest") - if not isinstance(resource, Workflow): - raise NotImplementedError(f"Non-workflow RDFs not yet supported (got type {resource.type})") - - map_type = dict( - any=str, - boolean=bool, - float=float, - int=int, - list=str, - string=str, - ) - wf = resource - parser = ArgumentParser(description=f"CLI for {wf.name}") - - # replicate typer args to show up in help - parser.add_argument( - metavar="rdf-source", - dest="rdf_source", - help="BioImage.IO RDF id/url/path. The optional arguments below are RDF specific.", - ) - parser.add_argument( - metavar="output-folder", dest="output_folder", help="Folder to save outputs to.", default=Path("outputs") - ) - parser.add_argument( - metavar="output-tensor-extension", - dest="output_tensor_extension", - help="Output tensor extension.", - default=".npy", - ) - - def add_param_args(params): - for param in params: - argument_kwargs = {} - if param.type == "tensor": - argument_kwargs["type"] = partial(load_image, axes=[a.name or a.type for a in param.axes]) - else: - argument_kwargs["type"] = map_type[param.type] - - if param.type == "list": - argument_kwargs["nargs"] = "*" - - argument_kwargs["help"] = param.description or "" - if hasattr(param, "default"): - argument_kwargs["default"] = param.default - else: - argument_kwargs["required"] = True - - argument_kwargs["metavar"] = param.name[0].capitalize() - parser.add_argument("--" + param.name.replace("_", "-"), **argument_kwargs) - - def prepare_parameter(value, param: Union[nodes.InputSpec, nodes.OptionSpec]): - if param.type == "tensor": - return load_image(value, [a.name or a.type for a in param.axes]) - else: - return value - - add_param_args(wf.inputs_spec) - add_param_args(wf.options_spec) - args = parser.parse_args([rdf_source, str(output_folder), output_tensor_extension] + list(ctx.args)) - outputs = run_workflow( - rdf_source, - inputs=[prepare_parameter(getattr(args, ipt.name), ipt) for ipt in wf.inputs_spec], - options={opt.name: prepare_parameter(getattr(args, opt.name), opt) for opt in wf.options_spec}, - ) - output_folder.mkdir(parents=True, exist_ok=True) - for out_spec, out in zip(wf.outputs_spec, outputs): - out_path = output_folder / out_spec.name - if out_spec.type == "tensor": - save_image(out_path.with_suffix(output_tensor_extension), out) - else: - with out_path.with_suffix(".json").open("w") as f: - json.dump(out, f) - - if __name__ == "__main__": app() diff --git a/bioimageio/core/contrib/__init__.py b/bioimageio/core/contrib/__init__.py deleted file mode 100644 index 4ead98f9..00000000 --- a/bioimageio/core/contrib/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .contrib_a import * diff --git a/bioimageio/core/operators/__init__.py b/bioimageio/core/operators/__init__.py deleted file mode 100644 index 066450bb..00000000 --- a/bioimageio/core/operators/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from ._assert import assert_shape -from ._generate import generate_random_uniform_tensor -from ._run import WorkflowState, run_model_inference_without_tiling, run_workflow, run_workflow_test -from ._various import binarize, load_tensors, log, select_outputs diff --git a/bioimageio/core/operators/_assert.py b/bioimageio/core/operators/_assert.py deleted file mode 100644 index fdf54302..00000000 --- a/bioimageio/core/operators/_assert.py +++ /dev/null @@ -1,8 +0,0 @@ -from typing import Sequence - -import xarray as xr - - -def assert_shape(tensor: xr.DataArray, shape: Sequence[int]) -> xr.DataArray: - assert tensor.shape == tuple(shape) - return tensor diff --git a/bioimageio/core/operators/_generate.py b/bioimageio/core/operators/_generate.py deleted file mode 100644 index c5f09237..00000000 --- a/bioimageio/core/operators/_generate.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Sequence, Union - -import numpy as np -import xarray as xr - - -def generate_random_uniform_tensor( - shape: Sequence[Union[int, str]], axes: Sequence[str], *, low: Union[int, float] = 0, high: Union[int, float] = 1 -) -> xr.DataArray: - """generate a tensor with uniformly distributed samples in the interval [low, high) - Returns: - xr.DataArray: random tensor - """ - assert len(shape) == len(axes) - return xr.DataArray(np.random.uniform(low=low, high=high, size=[int(s) for s in shape]), dims=tuple(axes)) diff --git a/bioimageio/core/operators/_run.py b/bioimageio/core/operators/_run.py deleted file mode 100644 index d274ec67..00000000 --- a/bioimageio/core/operators/_run.py +++ /dev/null @@ -1,206 +0,0 @@ -from dataclasses import dataclass -from os import PathLike -from typing import Any, Dict, Generator, IO, List, Optional, Sequence, Union - -import xarray as xr -from marshmallow import missing - -from bioimageio.core import load_resource_description -from bioimageio.core.prediction_pipeline import create_prediction_pipeline -from bioimageio.core.resource_io import nodes -from bioimageio.spec import load_raw_resource_description -from bioimageio.spec.model import raw_nodes -from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription - -try: - from typing import Literal -except ImportError: - from typing_extensions import Literal - - -def run_model_inference_without_tiling( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *tensors: xr.DataArray, - enable_preprocessing: bool = True, - enable_postprocessing: bool = True, - devices: Optional[Sequence[str]] = ("cpu",), -) -> List[xr.DataArray]: - """run model inference - - Returns: - list: model outputs - """ - model = load_raw_resource_description(rdf_source, update_to_format="latest") - assert isinstance(model, raw_nodes.Model) - # remove pre-/postprocessing if not enabled - if not enable_preprocessing: - for ipt in model.inputs: - if ipt.preprocessing: - ipt.preprocessing = missing - if not enable_postprocessing: - for out in model.outputs: - if out.postprocessing: - out.postprocessing = missing - - with create_prediction_pipeline(model, devices=devices) as pred_pipeline: - return pred_pipeline.forward(*tensors) - - -def run_workflow( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - inputs: Sequence = tuple(), - options: Dict[str, Any] = None, -) -> tuple: - outputs = tuple() - for state in _iterate_workflow_steps_impl(rdf_source, test_steps=False, inputs=inputs, options=options): - outputs = state.outputs - - return outputs - - -def run_workflow_test( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], -) -> tuple: - outputs = tuple() - for state in _iterate_workflow_steps_impl(rdf_source, test_steps=True): - outputs = state.outputs - - return outputs - - -@dataclass -class WorkflowState: - wf_inputs: Dict[str, Any] - wf_options: Dict[str, Any] - inputs: tuple - outputs: tuple - named_outputs: Dict[str, Any] - - -def iterate_workflow_steps( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *, - inputs: Sequence = tuple(), - options: Dict[str, Any] = None, -) -> Generator[WorkflowState, None, None]: - yield from _iterate_workflow_steps_impl(rdf_source, inputs=inputs, options=options, test_steps=False) - - -def iterate_test_workflow_steps( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription] -) -> Generator[WorkflowState, None, None]: - yield from _iterate_workflow_steps_impl(rdf_source, test_steps=True) - - -def _iterate_workflow_steps_impl( - rdf_source: Union[dict, PathLike, IO, str, bytes, raw_nodes.URI, RawResourceDescription], - *, - test_steps: bool, - inputs: Sequence = tuple(), - options: Dict[str, Any] = None, -) -> Generator[WorkflowState, None, None]: - import bioimageio.core.operators as ops - - workflow = load_resource_description(rdf_source) - assert isinstance(workflow, nodes.Workflow) - wf_options: Dict[str, Any] = {opt.name: opt.default for opt in workflow.options_spec} - if test_steps: - assert not inputs - assert not options - wf_inputs: Dict[str, Any] = {} - steps = workflow.test_steps - else: - if not len(workflow.inputs_spec) == len(inputs): - raise ValueError(f"Expected {len(workflow.inputs_spec)} inputs, but got {len(inputs)}.") - - wf_inputs = {ipt_spec.name: ipt for ipt_spec, ipt in zip(workflow.inputs_spec, inputs)} - for k, v in options.items(): - if k not in wf_options: - raise ValueError(f"Got unknown option {k}, expected one of {set(wf_options)}.") - - wf_options[k] = v - - steps = workflow.steps - - named_outputs = {} # for later referencing - - def map_ref(value): - assert isinstance(workflow, nodes.Workflow) - if isinstance(value, str) and value.startswith("${{") and value.endswith("}}"): - ref = value[4:-2].strip() - if ref.startswith("self.inputs."): - ref = ref[len("self.inputs.") :] - if ref not in wf_inputs: - raise ValueError(f"Invalid workflow input reference {value}.") - - return wf_inputs[ref] - elif ref.startswith("self.options."): - ref = ref[len("self.options.") :] - if ref not in wf_options: - raise ValueError(f"Invalid workflow option reference {value}.") - - return wf_options[ref] - elif ref == "self.rdf_source": - assert workflow.rdf_source is not missing - return str(workflow.rdf_source) - elif ref in named_outputs: - return named_outputs[ref] - else: - raise ValueError(f"Invalid reference {value}.") - else: - return value - - # implicit inputs to a step are the outputs of the previous step. - # For the first step these are the workflow inputs. - outputs = tuple(inputs) - for step in steps: - if not hasattr(ops, step.op): - raise NotImplementedError(f"{step.op} not implemented in {ops}") - - op = getattr(ops, step.op) - if step.inputs is missing: - inputs = outputs - else: - inputs = tuple(map_ref(ipt) for ipt in step.inputs) - - assert isinstance(inputs, tuple) - options = {k: map_ref(v) for k, v in (step.options or {}).items()} - outputs = op(*inputs, **options) - if not isinstance(outputs, tuple): - outputs = (outputs,) - - if step.outputs: - assert step.id is not missing - if len(step.outputs) != len(outputs): - raise ValueError( - f"Got {len(step.outputs)} step output name{'s' if len(step.outputs) > 1 else ''} ({step.id}.outputs), " - f"but op {step.op} returned {len(outputs)} outputs." - ) - - named_outputs.update({f"{step.id}.outputs.{out_name}": out for out_name, out in zip(step.outputs, outputs)}) - - yield WorkflowState( - wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs - ) - if len(workflow.outputs_spec) != len(outputs): - raise ValueError(f"Expected {len(workflow.outputs_spec)} outputs from last step, but got {len(outputs)}.") - - def tensor_as_xr(tensor, axes: Sequence[nodes.Axis]): - spec_axes = [a.name or a.type for a in axes] - if isinstance(tensor, xr.DataArray): - if list(tensor.dims) != spec_axes: - raise ValueError( - f"Last workflow step returned xarray.DataArray with dims {tensor.dims}, but expected dims {spec_axes}." - ) - - return tensor - else: - return xr.DataArray(tensor, dims=tuple(spec_axes)) - - outputs = tuple( - tensor_as_xr(out, out_spec.axes) if out_spec.type == "tensor" else out - for out_spec, out in zip(workflow.outputs_spec, outputs) - ) - yield WorkflowState( - wf_inputs=wf_inputs, wf_options=wf_options, inputs=inputs, outputs=outputs, named_outputs=named_outputs - ) diff --git a/bioimageio/core/operators/_various.py b/bioimageio/core/operators/_various.py deleted file mode 100644 index 37bb3520..00000000 --- a/bioimageio/core/operators/_various.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging -from typing import List, Sequence, Tuple - -import numpy as np -import xarray as xr -from imageio import imread - -logger = logging.getLogger(__name__) - - -def binarize(tensor: xr.DataArray, threshold: float): - return tensor > threshold - - -def select_outputs(*args) -> Tuple: - """helper to select workflow outputs (to be used as a final step in a workflow) - - Returns: - tuple: selected outputs (inputs to this op) - - """ - - return args - - -def log(*args, log_level: int = logging.INFO, **kwargs) -> Tuple: - """log any key word arguments (kwargs/options) - - Returns: - tuple: positional inputs to this op - - """ - for k, v in kwargs.items(): - logger.log( - log_level, - f"{k}: %s", - f"{v.shape} mean: {v.mean().item():.4f} std: {v.std().item():.4f}" - if isinstance(v, (np.ndarray, xr.DataArray)) - else v, - ) - - return args - - -def load_tensors(sources: List[str], axes: Sequence[str]) -> List[xr.DataArray]: - """load tensors""" - assert len(sources) == len(axes) - tensors = [] - for source, ax in zip(sources, axes): - if source.split(".")[-1] == ".npy": - data = np.load(str(source)) - else: - data = imread(source) - - tensors.append(xr.DataArray(data, dims=ax)) - - return tensors diff --git a/scripts/generate_workflow_rdfs.py b/scripts/generate_workflow_rdfs.py deleted file mode 100644 index 88efcdcd..00000000 --- a/scripts/generate_workflow_rdfs.py +++ /dev/null @@ -1,211 +0,0 @@ -import collections.abc -import inspect -import sys -import types -import typing -import warnings -from argparse import ArgumentParser -from dataclasses import asdict -from importlib import import_module -from pathlib import Path - -import docstring_parser -import numpy as np -import xarray as xr -from marshmallow import missing -from marshmallow.utils import _Missing - -import bioimageio.core.contrib -from bioimageio.spec import serialize_raw_resource_description_to_dict, load_raw_resource_description -import bioimageio.spec.workflow.schema as wf_schema -from bioimageio.spec.shared import field_validators, fields, yaml -from bioimageio.spec.workflow.raw_nodes import ( - ArbitraryAxes, - Axis, - DEFAULT_TYPE_NAME_MAP, - Input, - Option, - Output, - Workflow as WorkflowRawNode, -) - -try: - from typing import get_args, get_origin, Literal -except ImportError: - from typing_extensions import get_args, get_origin, Literal # type: ignore - - -TYPE_NAME_MAP = {**DEFAULT_TYPE_NAME_MAP, **{xr.DataArray: "tensor", np.ndarray: "tensor"}} -ARBITRARY_AXES = get_args(ArbitraryAxes) - -# keep this axes_field in sync with wf_schema.Workflow.axes -axes_field = fields.Union( - [ - fields.List(fields.Nested(wf_schema.Axis())), - fields.String( - validate=field_validators.OneOf(get_args(ArbitraryAxes)), - ), - ], -) - - -def get_type_name(annotation): - - orig = get_origin(annotation) - if orig is list or orig is tuple or orig is collections.abc.Sequence: - annotation = list - elif orig is dict or orig is collections.OrderedDict: - annotation = dict - elif orig is typing.Union: - args = get_args(annotation) - args = [a for a in args if a is not type(None)] - assert args - annotation = get_type_name(args[0]) # use first type in union annotation - elif orig is Literal: - args = get_args(annotation) - assert args - annotation = get_type_name(type(args[0])) # use type of first literal - - if isinstance(annotation, str): - assert annotation in TYPE_NAME_MAP.values(), annotation - return annotation - else: - return TYPE_NAME_MAP[annotation] - - -def parse_args(): - p = ArgumentParser(description="Generate workflow RDFs for one contrib submodule") - p.add_argument("contrib_name", choices=[c for c in dir(bioimageio.core.contrib) if c.startswith("contrib_")]) - - return p.parse_args() - - -class WorkflowSignature(inspect.Signature): - pass - - -def extract_axes_from_param_descr( - descr: str, -) -> typing.Tuple[str, typing.Union[_Missing, ArbitraryAxes, typing.List[Axis]]]: - if "\n" in descr: - descr, *axes_descr_lines = descr.split("\n") - axes_descr = "\n".join(axes_descr_lines).strip() - assert axes_descr.startswith("axes:") - axes_descr = axes_descr[len("axes:") :].strip() - try: - axes_data = yaml.load(axes_descr) - axes = axes_field._deserialize(axes_data) - except Exception as e: - raise ValueError("Invalid axes description") from e - else: - axes = missing - - return descr, axes - - -def extract_serialized_wf_kwargs(descr: str) -> typing.Tuple[str, typing.Dict[str, typing.Any]]: - separator = ".. code-block:: yaml" - if separator in descr: - descr, kwarg_descr = descr.split(separator) - # kwarg_descr = - try: - kwargs = yaml.load(kwarg_descr) - except Exception as e: - raise ValueError("Invalid additional fields") from e - else: - kwargs = {} - - return descr.strip(), kwargs - - -def main(contrib_name): - dist = Path(__file__).parent / "../dist/workflows" - dist.mkdir(exist_ok=True) - - local_contrib = import_module(f"bioimageio.core.contrib.{contrib_name}.local") - for wf_id in dir(local_contrib): - wf_func = getattr(local_contrib, wf_id) - if not isinstance(wf_func, types.FunctionType): - if not wf_id.startswith("_"): - warnings.warn(f"ignoring non-function {wf_id}") - - continue - - doc = docstring_parser.parse(wf_func.__doc__) - - param_descriptions = {param.arg_name: param.description for param in doc.params} - inputs = [] - options = [] - sig = WorkflowSignature.from_callable(wf_func) - assert sig.return_annotation is not inspect.Signature.empty - for name, param in sig.parameters.items(): - type_name = get_type_name(param.annotation) - descr = param_descriptions[name] - if type_name == "tensor": - descr, axes = extract_axes_from_param_descr(descr) - if axes is missing: - raise ValueError( - f"Missing axes description in description of parameter '{name}' of workflow '{wf_id}'. Change '{name}: ' to e.g. '{name}: axes: arbitrary. ' or '{name}: axes: b,c,x,y. ." - ) - else: - axes = missing - - if param.default is inspect.Parameter.empty: - inputs.append(Input(name=name, description=descr, type=type_name, axes=axes)) - else: - default_value: typing.Any = param.default - if isinstance(default_value, tuple): - default_value = list(default_value) - - options.append(Option(name=name, description=descr, type=type_name, axes=axes, default=default_value)) - - return_descriptions = {} - for ret in doc.many_returns: - # note: doctring_parser seems to be buggy and not recover the return name - # extract return name from return description - name, *remaining = ret.description.split(".") - return_descriptions[name.strip()] = ".".join(remaining).strip() - - outputs = [] - ret_annotations = sig.return_annotation - if isinstance(ret_annotations, typing.Tuple): - ret_annotations = get_args(ret_annotations) - else: - ret_annotations = [ret_annotations] - - if len(doc.many_returns) != len(ret_annotations): - raise ValueError("number of documented return values does not match return annotation") - - for ret_type, (name, descr) in zip(ret_annotations, return_descriptions.items()): - type_name = get_type_name(ret_type) - if type_name == "tensor": - descr, axes = extract_axes_from_param_descr(descr) - else: - axes = missing - - assert descr - outputs.append(Output(name=name, description=descr, type=type_name, axes=axes)) - - assert doc.long_description is not None - description, serialized_kwargs = extract_serialized_wf_kwargs(doc.long_description) - wf = WorkflowRawNode( - name=doc.short_description, - description=description, - inputs=inputs, - options=options, - outputs=outputs, - ) - serialized = serialize_raw_resource_description_to_dict(wf) - serialized.update(serialized_kwargs) - with (dist / wf_id).with_suffix(".yaml").open("w", encoding="utf-8") as f: - yaml.dump(serialized, f) - wf = load_raw_resource_description(serialized) - serialized = serialize_raw_resource_description_to_dict(wf) - - print(f"saved {wf_id}") - print("done") - - -if __name__ == "__main__": - args = parse_args() - sys.exit(main(args.contrib_name)) diff --git a/setup.py b/setup.py index fbbacaf9..5053b545 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ "Intended Audience :: Developers", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], packages=find_namespace_packages(exclude=["tests"]), # Required install_requires=["bioimageio.spec==0.4.8.*", "imageio>=2.5", "numpy", "ruamel.yaml", "tqdm", "xarray", "tifffile"], From 8f5804fe8f4a9cf4d6903bbe709de9d93cfcc096 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 28 Nov 2022 10:15:47 +0100 Subject: [PATCH 20/23] bump patch version --- bioimageio/core/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioimageio/core/VERSION b/bioimageio/core/VERSION index 0920ab14..b74bd993 100644 --- a/bioimageio/core/VERSION +++ b/bioimageio/core/VERSION @@ -1,3 +1,3 @@ { - "version": "0.5.7" + "version": "0.5.8" } From 43f989990044152da9089866f35e5ddfa70ee95d Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 28 Nov 2022 14:45:47 +0100 Subject: [PATCH 21/23] remove contrib module --- bioimageio/core/contrib/__main__.py | 16 -- bioimageio/core/contrib/contrib_a/__init__.py | 4 - bioimageio/core/contrib/contrib_a/__main__.py | 9 - bioimageio/core/contrib/contrib_a/_demo.py | 56 ----- .../core/contrib/contrib_a/_inference.py | 199 ----------------- bioimageio/core/contrib/contrib_a/local.py | 2 - bioimageio/core/contrib/contrib_a/remote.py | 8 - bioimageio/core/contrib/utils/__init__.py | 10 - bioimageio/core/contrib/utils/_ast.py | 7 - bioimageio/core/contrib/utils/_rpc.py | 116 ---------- bioimageio/core/contrib/utils/_tiling.py | 201 ------------------ 11 files changed, 628 deletions(-) delete mode 100644 bioimageio/core/contrib/__main__.py delete mode 100644 bioimageio/core/contrib/contrib_a/__init__.py delete mode 100644 bioimageio/core/contrib/contrib_a/__main__.py delete mode 100644 bioimageio/core/contrib/contrib_a/_demo.py delete mode 100644 bioimageio/core/contrib/contrib_a/_inference.py delete mode 100644 bioimageio/core/contrib/contrib_a/local.py delete mode 100644 bioimageio/core/contrib/contrib_a/remote.py delete mode 100644 bioimageio/core/contrib/utils/__init__.py delete mode 100644 bioimageio/core/contrib/utils/_ast.py delete mode 100644 bioimageio/core/contrib/utils/_rpc.py delete mode 100644 bioimageio/core/contrib/utils/_tiling.py diff --git a/bioimageio/core/contrib/__main__.py b/bioimageio/core/contrib/__main__.py deleted file mode 100644 index 2f8e61fb..00000000 --- a/bioimageio/core/contrib/__main__.py +++ /dev/null @@ -1,16 +0,0 @@ -import argparse -import asyncio -from pathlib import Path - -from bioimageio.core.contrib.utils import start_contrib_service - -parser = argparse.ArgumentParser() -parser.add_argument("contrib_name", nargs="+") - -args = parser.parse_args() - -loop = asyncio.get_event_loop() -for contrib_name in args.contrib_name: - loop.create_task(start_contrib_service(Path(__file__).parent.stem)) - -loop.run_forever() diff --git a/bioimageio/core/contrib/contrib_a/__init__.py b/bioimageio/core/contrib/contrib_a/__init__.py deleted file mode 100644 index 4903a9c8..00000000 --- a/bioimageio/core/contrib/contrib_a/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -try: - from .local import * -except ImportError: - from .remote import * diff --git a/bioimageio/core/contrib/contrib_a/__main__.py b/bioimageio/core/contrib/contrib_a/__main__.py deleted file mode 100644 index c7453ae5..00000000 --- a/bioimageio/core/contrib/contrib_a/__main__.py +++ /dev/null @@ -1,9 +0,0 @@ -# todo: maybe only keep bioimageio.core.contrib.__main__ to avoid redundant code and multiple entry points? -import asyncio -from pathlib import Path - -from bioimageio.core.contrib.utils import start_contrib_service - -loop = asyncio.get_event_loop() -loop.create_task(start_contrib_service(Path(__file__).parent.stem)) -loop.run_forever() diff --git a/bioimageio/core/contrib/contrib_a/_demo.py b/bioimageio/core/contrib/contrib_a/_demo.py deleted file mode 100644 index 31d75325..00000000 --- a/bioimageio/core/contrib/contrib_a/_demo.py +++ /dev/null @@ -1,56 +0,0 @@ -import asyncio -from typing import Optional -import xarray as xr - - -async def hello( - msg: str = "Hello!", tensor_a: Optional[xr.DataArray] = None, tensor_b: Optional[xr.DataArray] = None -) -> str: - """dummy workflow printing msg - - This dummy workflow is intended as a demonstration and for testing. - - .. code-block:: yaml - cite: [{text: BioImage.IO, url: "https://doi.org/10.1101/2022.06.07.495102"}] - - Args: - msg: Message - tensor_a: tensor_a whose shape is added to message - axes: arbitrary - tensor_b: tensor_b whose shape is added to message - axes: - - type: batch - - type: space - name: x - description: x dimension - unit: millimeter - step: 1.5 - - type: index - name: demo index - description: a special index axis - - Returns: - msg. A possibly manipulated message. - """ - if tensor_a is not None: - msg += f" tensor_a shape: {tensor_a.shape}" - - if tensor_a is not None: - msg += f" tensor_a shape: {tensor_a.shape}" - - print(msg) - return msg - - -# async def main(): -# task = asyncio.create_task(meh()) -# -# out1 = await task -# out2 = await task -# -# print(out1, out2) -# -# -# if __name__ == "__main__": -# loop = asyncio.get_event_loop() -# loop.run_until_complete(main()) diff --git a/bioimageio/core/contrib/contrib_a/_inference.py b/bioimageio/core/contrib/contrib_a/_inference.py deleted file mode 100644 index 5a9142b4..00000000 --- a/bioimageio/core/contrib/contrib_a/_inference.py +++ /dev/null @@ -1,199 +0,0 @@ -import collections -from os import PathLike -from typing import Dict, IO, List, Optional, OrderedDict, Sequence, Tuple, Union - -import dask.array as da -import numpy as np -import xarray as xr - -from bioimageio.core.contrib.utils import ( - get_chunk, - get_corrected_chunks, - get_default_input_tile, - get_output_rois, - transpose_sequence, - tuple_roi_to_slices, -) -from bioimageio.core.prediction_pipeline._combined_processing import CombinedProcessing -from bioimageio.core.prediction_pipeline._model_adapters import ModelAdapter, create_model_adapter -from bioimageio.core.resource_io import nodes -from bioimageio.core.resource_io.utils import resolve_raw_node -from bioimageio.spec import load_raw_resource_description -from bioimageio.spec.model import raw_nodes -from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription - -try: - from typing import Literal -except ImportError: - from typing_extensions import Literal - - -BoundaryMode = Literal["reflect"] - - -def forward(*tensors, model_adapter: ModelAdapter, output_tile_roi: Tuple[slice, ...]): - """helper to cast dask array chunks to xr.DataArray and apply a roi to the output""" - assert len(model_adapter.bioimageio_model.inputs) == len(tensors), ( - len(model_adapter.bioimageio_model.inputs), - len(tensors), - ) - tensors = [xr.DataArray(t, dims=tuple(ipt.axes)) for ipt, t, in zip(model_adapter.bioimageio_model.inputs, tensors)] - output = model_adapter.forward(*tensors)[0] # todo: allow more than 1 output - return output[output_tile_roi] - - -async def run_model_inference_with_dask( - model_rdf: Union[str, PathLike, dict, IO, bytes, raw_nodes.URI, RawResourceDescription], - tensors: Sequence[xr.DataArray], - boundary_mode: Union[ - BoundaryMode, - Sequence[BoundaryMode], - ] = "reflect", - enable_preprocessing: bool = True, - enable_postprocessing: bool = True, - devices: Sequence[str] = ("cpu",), - tiles: Optional[Sequence[Dict[str, int]]] = None, -) -> OrderedDict[str, xr.DataArray]: - """run model inference using chunked dask arrays for tiling - - To run inference on arbitrary input tensors, they are chunked such that with halo and offset all inputs to the - model have `tiles` shape. - - .. code-block:: yaml - authors: [{name: Fynn Beuttenmüller, github_user: fynnbe}] - cite: [{text: BioImage.IO, url: "https://doi.org/10.1101/2022.06.07.495102"}] - - Args: - model_rdf: the (source/raw) model RDF that describes the model to be used for inference - tensors: model input tensors - boundary_mode: How to pad missing values. - enable_preprocessing: If true, apply the preprocessing specified in the model RDF - enable_postprocessing: If true, apply the postprocessing specified in the model RDF - devices: devices to use for inference (device management is handled by the created model adapter) - tiles: Defaults to using an estimated tile sizes based on the model RDF. - - Returns: - outputs. named model outputs - """ - model: raw_nodes.Model = load_raw_resource_description(model_rdf, update_to_format="latest") # noqa - if len(model.outputs) > 1: - raise NotImplementedError("More than one model output not yet implemented") - - assert isinstance(model, raw_nodes.Model) - # always remove pre-/postprocessing, but save it if enabled - # todo: improve pre- and postprocessing! - - if enable_preprocessing: - preprocessing = CombinedProcessing.from_tensor_specs( - [resolve_raw_node(ipt, nodes, root_path=model.root_path) for ipt in model.inputs] - ) - sample = {ipt.name: t for ipt, t in zip(model.inputs, tensors)} - preprocessing.apply(sample, {}) - tensors = [sample[ipt.name] for ipt in model.inputs] - - if enable_postprocessing: - postprocessing = CombinedProcessing.from_tensor_specs( - [resolve_raw_node(out, nodes, root_path=model.root_path) for out in model.outputs] - ) - else: - postprocessing = None - - # transpose tensors to match ipt spec - assert len(tensors) == len(model.inputs) - tensors = [t.transpose(*s.axes) for t, s in zip(tensors, model.inputs)] - if isinstance(boundary_mode, str): - boundary_mode = [boundary_mode] * len(tensors) - - if tiles is None: - tiles = [get_default_input_tile(ipt) for ipt in model.inputs] - - # calculate chunking of the input tensors from tiles taking halo and offset into account - chunks, overlap_depths, paddings = zip( - *(get_chunk(c, ipt, model.outputs, t) for c, ipt, t in zip(tiles, model.inputs, tensors)) - ) - chunks_by_name = {ipt.name: c for ipt, c in zip(model.inputs, chunks)} - padded_input_tensor_shapes = { - ipt.name: [ts + sum(p[a]) for ts, a in zip(t.shape, ipt.axes)] - for ipt, t, p in zip(model.inputs, tensors, paddings) - } - - # note: da.overlap.overlap or da.overlap.map_overlap equivalents are not yet available in xarray - tensors = [ - da.overlap.overlap(t.pad(p, mode=bm).chunk(c).data, depth=d, boundary=bm) - for t, c, d, p, bm in zip(tensors, chunks, overlap_depths, paddings, boundary_mode) - ] - - output_tile_roi, output_roi = get_output_rois( - model.outputs[0], - input_overlaps={ipt.name: d for ipt, d in zip(model.inputs, overlap_depths)}, - input_paddings={ipt.name: p for ipt, p in zip(model.inputs, paddings)}, - ipt_by_name={ipt.name: ipt for ipt in model.inputs}, - ) - - n_batches = tensors[0].npartitions - assert all(t.npartitions == n_batches for t in tensors[1:]), [t.npartitions for t in tensors] - - model_adapter = create_model_adapter(bioimageio_model=model, devices=devices) - - # todo: generalize to multiple outputs - out = model.outputs[0] - if isinstance(out.shape, raw_nodes.ImplicitOutputShape): - ipt_shape = padded_input_tensor_shapes[out.shape.reference_tensor] - ipt_by_name = {ipt.name: ipt for ipt in model.inputs} - ipt_axes = ipt_by_name[out.shape.reference_tensor].axes - ipt_shape = np.array(transpose_sequence(ipt_shape, ipt_axes, out.axes, 0)) - out_scale = [0.0 if s is None else s for s in out.shape.scale] - out_offset = np.array(out.shape.offset) - out_shape_float = ipt_shape * out_scale + 2 * out_offset - assert (out_shape_float == out_shape_float.astype(int)).all(), out_shape_float - out_shape: Sequence[int] = out_shape_float.astype(int) - else: - out_shape = out.shape - out_scale = [1.0] * len(out_shape) - ipt_axes = [] - - # set up da.blockwise to orchestrate tiled forward - out_ind = [] - new_axes = {} - adjust_chunks = {} - for a, s, sc in zip(out.axes, out_shape, out_scale): - if a in ("b", "batch"): - out_ind.append(a) - elif a in ipt_axes: - axis_name = f"{out.shape.reference_tensor}_{a}" - out_ind.append(axis_name) - adjust_chunks[axis_name] = lambda _, aa=a, scc=sc: chunks_by_name[out.shape.reference_tensor][aa] * scc - else: - out_ind.append(f"{out.name}_{a}") - new_axes[f"{out.name}_{a}"] = s - - inputs_sequence = [] - for t, ipt in zip(tensors, model.inputs): - inputs_sequence.append(t) - inputs_sequence.append(tuple("b" if a == "b" else f"{ipt.name}_{a}" for a in ipt.axes)) - - result = da.blockwise( - forward, - tuple(out_ind), - *inputs_sequence, - new_axes=new_axes, - dtype=np.dtype(out.data_type), - meta=np.empty((), dtype=np.dtype(out.data_type)), - name=(model.config or {}).get("bioimageio", {}).get("nickname") or f"model_{model.id}", - adjust_chunks=adjust_chunks, - **dict(model_adapter=model_adapter, output_tile_roi=tuple_roi_to_slices(output_tile_roi)), - ) - - corrected_chunks, rechunk = get_corrected_chunks(result.chunks, result.shape, output_roi) - res = result[tuple_roi_to_slices(output_roi)] - if rechunk: - res = res.rechunk(corrected_chunks) - - outputs = collections.OrderedDict({out.name: xr.DataArray(res, dims=tuple(out.axes))}) - if enable_postprocessing: - assert postprocessing is not None - sample = {name: t for name, t in outputs.items()} - postprocessing.apply(sample, {}) - outputs = collections.OrderedDict({out.name: sample[out.name] for out in model.outputs}) - - return outputs diff --git a/bioimageio/core/contrib/contrib_a/local.py b/bioimageio/core/contrib/contrib_a/local.py deleted file mode 100644 index 4ce3402f..00000000 --- a/bioimageio/core/contrib/contrib_a/local.py +++ /dev/null @@ -1,2 +0,0 @@ -from ._demo import hello -from ._inference import run_model_inference_with_dask diff --git a/bioimageio/core/contrib/contrib_a/remote.py b/bioimageio/core/contrib/contrib_a/remote.py deleted file mode 100644 index 084f7d2e..00000000 --- a/bioimageio/core/contrib/contrib_a/remote.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -from pathlib import Path - -from bioimageio.core.contrib.utils import RemoteContrib - -remote_module = RemoteContrib(Path(__file__).parent.stem) -__all__ = remote_module.__all__ -sys.modules[__name__] = remote_module # noqa diff --git a/bioimageio/core/contrib/utils/__init__.py b/bioimageio/core/contrib/utils/__init__.py deleted file mode 100644 index a420ef9c..00000000 --- a/bioimageio/core/contrib/utils/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from ._ast import get_ast_tree -from ._rpc import ImportCollector, RemoteContrib, start_contrib_service -from ._tiling import ( - get_chunk, - get_corrected_chunks, - get_default_input_tile, - get_output_rois, - transpose_sequence, - tuple_roi_to_slices, -) diff --git a/bioimageio/core/contrib/utils/_ast.py b/bioimageio/core/contrib/utils/_ast.py deleted file mode 100644 index d157cbc0..00000000 --- a/bioimageio/core/contrib/utils/_ast.py +++ /dev/null @@ -1,7 +0,0 @@ -import ast -from pathlib import Path - - -def get_ast_tree(path: Path): - src = path.read_text() - return ast.parse(src) diff --git a/bioimageio/core/contrib/utils/_rpc.py b/bioimageio/core/contrib/utils/_rpc.py deleted file mode 100644 index cf2015d0..00000000 --- a/bioimageio/core/contrib/utils/_rpc.py +++ /dev/null @@ -1,116 +0,0 @@ -import ast -import asyncio -import logging -import os -from functools import partial -from inspect import getmembers, isfunction -from pathlib import Path -from typing import List, Optional - -from imjoy_rpc.hypha import connect_to_server - -from bioimageio.core import contrib -from ._ast import get_ast_tree - -logger = logging.getLogger(__name__) - - -async def start_contrib_service(contrib_name: str, server_url: Optional[str] = None): - server = await connect_to_server({"server_url": server_url or get_contrib_server_url(contrib_name)}) - - contrib_part = getattr(contrib, contrib_name) - service_name = f"BioImageIO {' '.join(n.capitalize() for n in contrib_name.split('_'))} Module" - service_config = dict( - name=service_name, - id=f"bioimageio-{contrib_name}", - config={ - "visibility": "public", - "run_in_executor": True, # This will make sure all the sync functions run in a separate thread - }, - ) - - for func_name, func in getmembers(contrib_part, isfunction): - assert func_name not in service_config - service_config[func_name] = func - - await server.register_service(service_config) - - logger.info(f"{service_name} service registered at workspace: {server.config.workspace}") - - -class ImportCollector(ast.NodeVisitor): - def __init__(self): - self.imported: List[str] = [] - - def visit_Import(self, node: ast.Import): - raise ValueError("Found 'import' statement. Expected 'from . import ' only") - - def visit_ImportFrom(self, node: ast.ImportFrom): - if not node.level: - raise ValueError(f"Unsupported absolute import from {node.module}") - - if "." in node.module: - raise ValueError(f"Unsupported nested import from {node.module}") - - for alias_node in node.names: - self.imported.append(alias_node.name) - if alias_node.asname is not None: - raise ValueError( - f"Please import contrib functions without 'as', i.e. use '{alias_node.name}' instead of '{alias_node.asname}'." - ) - - -SERVER_URL_ENV_NAME = "BIOIMAGEIO_CONTRIB_URL" -DEFAULT_SERVER_URL = "http://localhost:9000" - - -def get_contrib_specific_server_url_env_name(contrib_name): - return f"BIOIMAGEIO_{contrib_name.capitalize()}_URL" - - -def get_contrib_server_url(contrib_name) -> str: - return os.getenv( - get_contrib_specific_server_url_env_name(contrib_name), os.getenv(SERVER_URL_ENV_NAME, DEFAULT_SERVER_URL) - ) - - -class RemoteContrib: - def __init__(self, contrib_name: str, server_url: Optional[str] = None): - self.server_url = server_url or get_contrib_server_url(contrib_name) - self.contrib_name = contrib_name - self.contrib = None - local_src = Path(__file__).parent.parent / contrib_name / "local.py" - tree = get_ast_tree(local_src) - import_collector = ImportCollector() - import_collector.visit(tree) - self.__all__ = import_collector.imported - self.service_funcs = {} - for name in self.__all__: - setattr(self, name, partial(self._service_call, _contrib_func_name=name)) - - def __await__(self): - yield from self._ainit().__await__() - - async def _ainit(self): - try: - server = await asyncio.create_task(connect_to_server({"server_url": self.server_url})) - except Exception as e: - raise Exception( - f"Failed to connect to {self.server_url}. " - f"Make sure {get_contrib_specific_server_url_env_name(self.contrib_name)} or {SERVER_URL_ENV_NAME} " - f"is set or {self.server_url} is running." - ) from e - try: - contrib_service = await server.get_service(f"bioimageio-{self.contrib_name}") - except Exception as e: - raise Exception( - f"bioimageio-{self.contrib_name} service not found. Start with 'python -m bioimageio.core.contrib.{self.contrib_name}' in a suitable (conda) environment." - ) from e - # todo: start contrib service entry point, e.g. f"bioimageio start {contrib_name}" - - self.service_funcs = {name: getattr(contrib_service, name) for name in self.__all__} - return self - - async def _service_call(self, *args, _contrib_func_name, **kwargs): - await self - return await self.service_funcs[_contrib_func_name](*args, **kwargs) diff --git a/bioimageio/core/contrib/utils/_tiling.py b/bioimageio/core/contrib/utils/_tiling.py deleted file mode 100644 index 7bad622c..00000000 --- a/bioimageio/core/contrib/utils/_tiling.py +++ /dev/null @@ -1,201 +0,0 @@ -import math -import warnings -from collections import defaultdict -from typing import Dict, List, Sequence, Tuple, TypeVar - -import numpy as np - -from bioimageio.spec.model import raw_nodes - -TA = TypeVar("TA") -TS = TypeVar("TS") - - -def transpose_sequence(sequence: Sequence[TS], axes: Sequence[TA], desired_axes: Sequence[TA], default) -> List[TS]: - """transpose a sequence according to its axes to match a desired axes order, - filling non-exising entries with default - - Returns - sequence: the transposed sequence as a list - """ - return [default if ia not in axes else sequence[axes.index(ia)] for ia in desired_axes] - - -def get_chunk( - chunk, ipt: raw_nodes.InputTensor, outputs: Sequence[raw_nodes.OutputTensor], tensor -) -> Tuple[Dict[str, int], Dict[int, int], Dict[str, Tuple[int, int]]]: - """correct chunk to account for offset and halo - - Returns: - corrected chunk: to tile the input array with - overlap: overlap of corrected chunks (yields original chunks) - """ - ipt_shape = np.array([chunk[a] for a in ipt.axes], dtype=int) - referencing_outputs = [ - ot - for ot in outputs - if isinstance(ot.shape, raw_nodes.ImplicitOutputShape) and ot.shape.reference_tensor == ipt.name - ] - if not referencing_outputs: - return ( - chunk, - defaultdict(lambda: 0), - defaultdict(lambda: (0, 0)), - ) - - if len(referencing_outputs) > 1: - raise NotImplementedError("more than one output references an input") - - sohs = [ - ( - np.array(transpose_sequence(ot.shape.scale, ot.axes, ipt.axes, 1.0)), - np.array(transpose_sequence(ot.shape.offset, ot.axes, ipt.axes, 0.0)), - np.array(transpose_sequence(ot.halo, ot.axes, ipt.axes, 0.0)), - ) - for ot in referencing_outputs - ] - scale, offset, halo = sohs[0] - if any((s != scale).any() or (off != offset).any() or (h != halo).any() for s, off, h in sohs[1:]): - # todo: ignore any new dimensions denoted by scale entry of None - raise ValueError( - f"Incompatible output specs referencing same input tensor with different scale/offset/halo: {[out.name for out in referencing_outputs]}." - ) - - if any(off > 0 for a, off in zip(offset, ipt.axes) if a in ("x", "y", "z", "t", "time")): - raise NotImplementedError( - "offset>0; space/time output is larger than input. todo: cut offset on tiles, but leave at image edge." - ) - - assert all(h >= 0 for h in halo) - overlap = np.maximum((halo - offset) / scale, 0) # no negative overlap - overlap = np.ceil(overlap).astype(int) - corrected_chunk = ipt_shape - 2 * overlap - t_shape = np.array(tensor.shape, dtype=int) - assert len(t_shape) == len(ipt_shape) - padding_total = (corrected_chunk - (t_shape % corrected_chunk)) % corrected_chunk - padding = [(0, p) for p in padding_total] - - return ( - dict(zip(ipt.axes, corrected_chunk)), - dict(enumerate(overlap)), # xr.DataArray.overlap not yet available: key by index for da.overlap - dict(zip(ipt.axes, padding)), - ) - - -def tuple_roi_to_slices(tuple_roi: Sequence[Tuple[int, int]]) -> Tuple[slice, ...]: - return tuple(np.s_[r0:-r1] if r1 else np.s_[r0:] for r0, r1 in tuple_roi) - - -def get_default_input_tile(ipt: raw_nodes.InputTensor) -> List[int]: - """Guess a good""" - if isinstance(ipt.shape, list): - shape = ipt.shape - elif isinstance(ipt.shape, raw_nodes.ParametrizedInputShape): - is3d = len([a for a in ipt.axes if a not in "bc"]) > 2 - min_len = 64 if is3d else 256 - shape = [] - for ax, min_ax, step_ax in zip(ipt.axes, ipt.shape.min_shape, ipt.shape.step): - if ax in "zyx" and step_ax > 0: - len_ax = min_ax - while len_ax < min_len: - len_ax += step_ax - shape.append(len_ax) - else: - shape.append(min_ax) - else: - raise TypeError(type(ipt.shape)) - - assert len(ipt.axes) == len(shape) - return shape - - -def get_asymmetric_halolike(value: float) -> Tuple[int, int]: - assert value >= 0 - if value % 1: - assert value % 0.5 == 0 - return math.floor(value), math.ceil(value) - else: - return int(value), int(value) - - -def get_output_rois( - out: raw_nodes.OutputTensor, - input_overlaps: Dict[str, Dict[int, int]], - input_paddings: Dict[str, Dict[str, Tuple[int, int]]], - ipt_by_name: Dict[str, raw_nodes.InputTensor], -) -> Tuple[Sequence[Tuple[int, int]], Sequence[Tuple[int, int]]]: - if isinstance(out.shape, raw_nodes.ImplicitOutputShape): - scale = np.array([1.0 if s is None else s for s in out.shape.scale]) - offset: Sequence[float] = out.shape.offset - ref_ipt = ipt_by_name[out.shape.reference_tensor] - eff_halo_float: List[float] = [ - input_overlaps[out.shape.reference_tensor].get(ref_ipt.axes.index(a), 0) * s + off - for a, s, off in zip(out.axes, scale, offset) - ] - ref_input_padding_dict = input_paddings[out.shape.reference_tensor] - else: - scale = np.ones(len(out.shape)) - offset = np.zeros(len(out.shape)) - eff_halo_float = [0.0] * len(out.shape) - ref_input_padding_dict = {} - - # effective halo to be trimmed from output. (only for space and time dims) - output_chunk_roi: List[Tuple[int, int]] = [] - for i, a in enumerate(out.axes): - if a in ("b", "batch"): - errors_in = (["halo"] if eff_halo_float[i] else []) + (["offset"] if offset[i] else []) - if errors_in: - raise ValueError(f"invalid {' and '.join(errors_in)} for batch dimension of output {out.name}") - elif a in ("x", "y", "z", "t", "time"): - pass - elif a in ("i", "index", "c", "channel"): - # ignore offset. As we cannot tile across these dimensions, offsets should be returned, not trimmed. - eff_halo_float[i] -= offset[i] - if eff_halo_float[i]: - warnings.warn(f"Trimming off halo for axis {a} of output {out.name}.") - - else: - raise NotImplementedError(a) - - output_chunk_roi.append(get_asymmetric_halolike(eff_halo_float[i])) - - # undo input padding for the resulting final output tensor - # also trim any negative offset, which we padded for each chunk - output_roi = [] - for a, s, off in zip(out.axes, scale, offset): - p0, p1 = ref_input_padding_dict.get(a, (0, 0)) - off0, off1 = get_asymmetric_halolike(-min(off, 0)) - output_roi.append((math.ceil(p0 * s + off0), math.ceil(p1 * s + off1))) - - return output_chunk_roi, output_roi - - -def get_corrected_chunks(chunks: Dict[int, Sequence[int]], shape: Sequence[int], roi: Sequence[Tuple[int, int]]): - """adapt `chunks` chunking `shape` for `shape[roi]`""" - corrected_chunks = [] - rechunk = False - for i, (s, roi) in enumerate(zip(shape, roi)): - c = chunks[i] - assert s == sum(c), (s, c) - if sum(roi): - c = list(c) - r0 = roi[0] - while r0 >= c[0]: - r0 -= c[0] - c = c[1:] - if not c: - raise ValueError(f"Trimming too much from output {shape} with roi {roi}") - - c[0] -= r0 - - r1 = roi[1] - while r1 >= c[-1]: - r1 -= c[-1] - c = c[:-1] - if not c: - raise ValueError(f"Trimming too much from output {shape} with roi {roi}") - - c[-1] -= r1 - - corrected_chunks.append(c) - return corrected_chunks, rechunk From 4d876425ac0e776320274d3b1d745c0adb3ca407 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 28 Nov 2022 14:46:37 +0100 Subject: [PATCH 22/23] set default Axis name to axis type --- bioimageio/core/resource_io/nodes.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bioimageio/core/resource_io/nodes.py b/bioimageio/core/resource_io/nodes.py index a6350e56..6420389b 100644 --- a/bioimageio/core/resource_io/nodes.py +++ b/bioimageio/core/resource_io/nodes.py @@ -209,12 +209,18 @@ class Model(model_raw_nodes.Model, RDF): @dataclass class Axis(Node, workflow_raw_nodes.Axis): - pass + name: Union[str, List[str]] = missing + + def __post_init__(self): + if self.name is missing: + self.name = self.type + + super().__post_init__() @dataclass class Parameter(Node, workflow_raw_nodes.Parameter): - axes: Union[_Missing, List[Axis], workflow_raw_nodes.ArbitraryAxes] = missing + axes: Union[_Missing, List[Axis], workflow_raw_nodes.UnknownAxes] = missing @dataclass From bd60fcabaf5afeca0ba50904f82c4a82e64febb5 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 28 Nov 2022 14:46:57 +0100 Subject: [PATCH 23/23] fix Workflow node --- bioimageio/core/resource_io/nodes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bioimageio/core/resource_io/nodes.py b/bioimageio/core/resource_io/nodes.py index 6420389b..068173cf 100644 --- a/bioimageio/core/resource_io/nodes.py +++ b/bioimageio/core/resource_io/nodes.py @@ -240,6 +240,6 @@ class Output(Parameter, workflow_raw_nodes.Output): @dataclass class Workflow(workflow_raw_nodes.Workflow, RDF): - inputs_spec: List[Input] = missing - options_spec: List[Option] = missing - outputs_spec: List[Output] = missing + inputs: List[Input] = missing + options: List[Option] = missing + outputs: List[Output] = missing