Skip to content

Commit fa3a4d2

Browse files
committed
Issue #401 Improve automatic adding of save_result
1 parent d505757 commit fa3a4d2

File tree

6 files changed

+335
-63
lines changed

6 files changed

+335
-63
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5656
([#412](https://github.com/Open-EO/openeo-python-client/issues/412)).
5757
- More robust handling of billing currency/plans in capabilities
5858
([#414](https://github.com/Open-EO/openeo-python-client/issues/414))
59+
- Avoid blindly adding a `save_result` node from `DataCube.execute_batch()` when there is already one
60+
([#401](https://github.com/Open-EO/openeo-python-client/issues/401))
5961

6062

6163
## [0.15.0] - 2023-03-03

openeo/rest/connection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1131,7 +1131,7 @@ def download(
11311131
graph: Union[dict, str, Path],
11321132
outputfile: Union[Path, str, None] = None,
11331133
timeout: int = 30 * 60,
1134-
):
1134+
) -> Union[None, bytes]:
11351135
"""
11361136
Downloads the result of a process graph synchronously,
11371137
and save the result to the given file or return bytes object if no outputfile is specified.

openeo/rest/datacube.py

Lines changed: 73 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ class DataCube(_ProcessGraphAbstraction):
5858
and this process graph can be "grown" to a desired workflow by calling the appropriate methods.
5959
"""
6060

61+
# TODO: set this based on back-end or user preference?
62+
_DEFAULT_RASTER_FORMAT = "GTiff"
63+
6164
def __init__(self, graph: PGNode, connection: 'openeo.Connection', metadata: CollectionMetadata = None):
6265
super().__init__(pgnode=graph, connection=connection)
6366
self.metadata = CollectionMetadata.get_or_create(metadata)
@@ -1810,36 +1813,45 @@ def atmospheric_correction(
18101813
})
18111814

18121815
@openeo_process
1813-
def save_result(self, format: str = "GTiff", options: dict = None) -> 'DataCube':
1816+
def save_result(
1817+
self,
1818+
format: str = _DEFAULT_RASTER_FORMAT,
1819+
options: Optional[dict] = None,
1820+
) -> "DataCube":
18141821
formats = set(self._connection.list_output_formats().keys())
1822+
# TODO: map format to correct casing too?
18151823
if format.lower() not in {f.lower() for f in formats}:
18161824
raise ValueError("Invalid format {f!r}. Should be one of {s}".format(f=format, s=formats))
18171825
return self.process(
18181826
process_id="save_result",
18191827
arguments={
18201828
"data": THIS,
18211829
"format": format,
1830+
# TODO: leave out options if unset?
18221831
"options": options or {}
18231832
}
18241833
)
18251834

1826-
def download(
1827-
self, outputfile: Union[str, pathlib.Path, None] = None, format: Optional[str] = None,
1828-
options: Optional[dict] = None
1829-
):
1835+
def _ensure_save_result(
1836+
self,
1837+
format: Optional[str] = None,
1838+
options: Optional[dict] = None,
1839+
) -> "DataCube":
18301840
"""
1831-
Download image collection, e.g. as GeoTIFF.
1832-
If outputfile is provided, the result is stored on disk locally, otherwise, a bytes object is returned.
1833-
The bytes object can be passed on to a suitable decoder for decoding.
1841+
Make sure there is a (final) `save_result` node in the process graph.
1842+
If there is already one: check if it is consistent with the given format/options (if any)
1843+
and add a new one otherwise.
18341844
1835-
:param outputfile: Optional, an output file if the result needs to be stored on disk.
1836-
:param format: Optional, an output format supported by the backend.
1837-
:param options: Optional, file format options
1838-
:return: None if the result is stored to disk, or a bytes object returned by the backend.
1845+
:param format: (optional) desired `save_result` file format
1846+
:param options: (optional) desired `save_result` file format parameters
1847+
:return:
18391848
"""
1840-
if self.result_node().process_id == "save_result":
1841-
# There is already a `save_result` node: check if it is consistent with given format/options
1842-
args = self.result_node().arguments
1849+
# TODO: move to generic data cube parent class (not only for raster cubes, but also vector cubes)
1850+
result_node = self.result_node()
1851+
if result_node.process_id == "save_result":
1852+
# There is already a `save_result` node:
1853+
# check if it is consistent with given format/options (if any)
1854+
args = result_node.arguments
18431855
if format is not None and format.lower() != args["format"].lower():
18441856
raise ValueError(
18451857
f"Existing `save_result` node with different format {args['format']!r} != {format!r}"
@@ -1851,10 +1863,30 @@ def download(
18511863
cube = self
18521864
else:
18531865
# No `save_result` node yet: automatically add it.
1854-
if not format:
1855-
format = guess_format(outputfile) if outputfile else "GTiff"
1856-
cube = self.save_result(format=format, options=options)
1866+
cube = self.save_result(
1867+
format=format or self._DEFAULT_RASTER_FORMAT, options=options
1868+
)
1869+
return cube
1870+
1871+
def download(
1872+
self,
1873+
outputfile: Optional[Union[str, pathlib.Path]] = None,
1874+
format: Optional[str] = None,
1875+
options: Optional[dict] = None,
1876+
) -> Union[None, bytes]:
1877+
"""
1878+
Download the raster data cube, e.g. as GeoTIFF.
1879+
If outputfile is provided, the result is stored on disk locally, otherwise, a bytes object is returned.
1880+
The bytes object can be passed on to a suitable decoder for decoding.
18571881
1882+
:param outputfile: Optional, an output file if the result needs to be stored on disk.
1883+
:param format: Optional, an output format supported by the backend.
1884+
:param options: Optional, file format options
1885+
:return: None if the result is stored to disk, or a bytes object returned by the backend.
1886+
"""
1887+
if format is None and outputfile is not None:
1888+
format = guess_format(outputfile)
1889+
cube = self._ensure_save_result(format=format, options=options)
18581890
return self._connection.download(cube.flat_graph(), outputfile)
18591891

18601892
def validate(self) -> List[dict]:
@@ -1869,27 +1901,35 @@ def tiled_viewing_service(self, type: str, **kwargs) -> Service:
18691901
return self._connection.create_service(self.flat_graph(), type=type, **kwargs)
18701902

18711903
def execute_batch(
1872-
self,
1873-
outputfile: Union[str, pathlib.Path] = None, out_format: str = None,
1874-
print=print, max_poll_interval=60, connection_retry_interval=30,
1875-
job_options=None, **format_options) -> BatchJob:
1904+
self,
1905+
outputfile: Optional[Union[str, pathlib.Path]] = None,
1906+
out_format: Optional[str] = None,
1907+
*,
1908+
print: typing.Callable[[str], None] = print,
1909+
max_poll_interval: float = 60,
1910+
connection_retry_interval: float = 30,
1911+
job_options: Optional[dict] = None,
1912+
# TODO: avoid `format_options` as keyword arguments
1913+
**format_options,
1914+
) -> BatchJob:
18761915
"""
18771916
Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
18781917
This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.
18791918
18801919
For very long-running jobs, you probably do not want to keep the client running.
18811920
1882-
:param job_options:
18831921
:param outputfile: The path of a file to which a result can be written
1884-
:param out_format: (optional) Format of the job result.
1885-
:param format_options: String Parameters for the job result format
1886-
1922+
:param out_format: (optional) File format to use for the job result.
1923+
:param job_options:
18871924
"""
18881925
if "format" in format_options and not out_format:
18891926
out_format = format_options["format"] # align with 'download' call arg name
1890-
if not out_format:
1891-
out_format = guess_format(outputfile) if outputfile else "GTiff"
1892-
job = self.create_job(out_format, job_options=job_options, **format_options)
1927+
if not out_format and outputfile:
1928+
out_format = guess_format(outputfile)
1929+
1930+
job = self.create_job(
1931+
out_format=out_format, job_options=job_options, **format_options
1932+
)
18931933
return job.run_synchronous(
18941934
outputfile=outputfile,
18951935
print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
@@ -1904,6 +1944,7 @@ def create_job(
19041944
plan: Optional[str] = None,
19051945
budget: Optional[float] = None,
19061946
job_options: Optional[dict] = None,
1947+
# TODO: avoid `format_options` as keyword arguments
19071948
**format_options,
19081949
) -> BatchJob:
19091950
"""
@@ -1914,22 +1955,18 @@ def create_job(
19141955
it still needs to be started and tracked explicitly.
19151956
Use :py:meth:`execute_batch` instead to have the openEO Python client take care of that job management.
19161957
1917-
:param out_format: String Format of the job result.
1958+
:param out_format: output file format.
19181959
:param title: job title
19191960
:param description: job description
19201961
:param plan: billing plan
19211962
:param budget: maximum cost the request is allowed to produce
1922-
:param job_options: A dictionary containing (custom) job options
1923-
:param format_options: String Parameters for the job result format
1963+
:param job_options: custom job options.
19241964
:return: Created job.
19251965
"""
19261966
# TODO: add option to also automatically start the job?
19271967
# TODO: avoid using all kwargs as format_options
19281968
# TODO: centralize `create_job` for `DataCube`, `VectorCube`, `MlModel`, ...
1929-
cube = self
1930-
if out_format:
1931-
# add `save_result` node
1932-
cube = cube.save_result(format=out_format, options=format_options)
1969+
cube = self._ensure_save_result(format=out_format, options=format_options)
19331970
return self._connection.create_job(
19341971
process_graph=cube.flat_graph(),
19351972
title=title,

openeo/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def deep_set(data: dict, *keys, value):
437437
raise ValueError("No keys given")
438438

439439

440-
def guess_format(filename: Union[str, Path]):
440+
def guess_format(filename: Union[str, Path]) -> str:
441441
"""
442442
Guess the output format from a given filename and return the corrected format.
443443
Any names not in the dict get passed through.

0 commit comments

Comments
 (0)