From 92aa00181250c41837118c3e48a72809fbf724b5 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Thu, 13 Mar 2025 15:17:51 +0100 Subject: [PATCH 01/20] feature: artifact-helper Allow uploading artifacts using: ``` from openeo.extra.artifacts import ArtifactHelper artifact_helper = ArtifactHelper.from_openeo_connection(connection) storage_uri = artifact_helper.upload_file(object_name, src_file_path) presigned_uri = artifact_helper.get_presigned_url(storage_uri) ``` --- openeo/extra/artifacts/__init__.py | 9 ++ openeo/extra/artifacts/artifact_helper.py | 54 +++++++++ openeo/extra/artifacts/config.py | 36 ++++++ .../extra/artifacts/internal_s3/__init__.py | 0 .../artifacts/internal_s3/artifact_helper.py | 68 +++++++++++ openeo/extra/artifacts/internal_s3/config.py | 112 ++++++++++++++++++ openeo/extra/artifacts/internal_s3/model.py | 56 +++++++++ openeo/extra/artifacts/internal_s3/sts.py | 37 ++++++ openeo/extra/artifacts/internal_s3/tracer.py | 30 +++++ openeo/extra/artifacts/uri.py | 15 +++ setup.py | 9 +- tests/extra/artifacts/__init__.py | 0 tests/extra/artifacts/internal_s3/__init__.py | 0 .../artifacts/internal_s3/test_config.py | 33 ++++++ .../extra/artifacts/internal_s3/test_s3uri.py | 12 ++ 15 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 openeo/extra/artifacts/__init__.py create mode 100644 openeo/extra/artifacts/artifact_helper.py create mode 100644 openeo/extra/artifacts/config.py create mode 100644 openeo/extra/artifacts/internal_s3/__init__.py create mode 100644 openeo/extra/artifacts/internal_s3/artifact_helper.py create mode 100644 openeo/extra/artifacts/internal_s3/config.py create mode 100644 openeo/extra/artifacts/internal_s3/model.py create mode 100644 openeo/extra/artifacts/internal_s3/sts.py create mode 100644 openeo/extra/artifacts/internal_s3/tracer.py create mode 100644 openeo/extra/artifacts/uri.py create mode 100644 tests/extra/artifacts/__init__.py create mode 100644 tests/extra/artifacts/internal_s3/__init__.py create mode 100644 tests/extra/artifacts/internal_s3/test_config.py create mode 100644 tests/extra/artifacts/internal_s3/test_s3uri.py diff --git a/openeo/extra/artifacts/__init__.py b/openeo/extra/artifacts/__init__.py new file mode 100644 index 000000000..3985d9e1d --- /dev/null +++ b/openeo/extra/artifacts/__init__.py @@ -0,0 +1,9 @@ +# Hard coded at this time but this could be a builder that depending on connection builds a client for the Storage +from openeo.extra.artifacts.internal_s3.artifact_helper import S3ArtifactHelper as ArtifactHelper +""" +from openeo.extra.artifacts import ArtifactHelper + +artifact_helper = ArtifactHelper.from_openeo_connection(connection) +storage_uri = artifact_helper.upload_file(object_name, src_file_path) +presigned_uri = artifact_helper.get_presigned_url(storage_uri) +""" \ No newline at end of file diff --git a/openeo/extra/artifacts/artifact_helper.py b/openeo/extra/artifacts/artifact_helper.py new file mode 100644 index 000000000..b2af479c9 --- /dev/null +++ b/openeo/extra/artifacts/artifact_helper.py @@ -0,0 +1,54 @@ +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import Optional + +from openeo.extra.artifacts.config import StorageConfig +from openeo.extra.artifacts.uri import StorageURI +from openeo.rest.connection import Connection +from pathlib import Path + + +class ArtifactHelper(ABC): + @classmethod + def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig] = None) -> ArtifactHelper: + """ + Create a new Artifact helper from the OpenEO connection. This is the starting point to upload artifacts + """ + if config is None: + config = cls._get_default_storage_config() + config.load_openeo_connection_metadata(conn) + return cls._from_openeo_connection(conn, config) + + @abstractmethod + def upload_file(self, object_name: str, src_file_path: str | Path) -> StorageURI: + """ + A method to store an artifact remotely and get a URI understandable by the OpenEO processor + """ + + @abstractmethod + def get_presigned_url(self, storage_uri: StorageURI, expires_in_seconds: int) -> str: + """ + A method to convert a StorageURI to a signed https URL which can be accessed via normal http libraries. + + These URIs should be kept secret as they provide access to the data. + """ + + def __init__(self, config: StorageConfig): + if not config.is_openeo_connection_metadata_loaded(): + raise RuntimeError("config should have openeo connection metadata loaded prior to initialization.") + self._config = config + + @classmethod + @abstractmethod + def _get_default_storage_config(cls) -> StorageConfig: + """ + A method that provides a default storage config for the Artifact Helper + """ + + @classmethod + @abstractmethod + def _from_openeo_connection(cls, conn: Connection, config: StorageConfig) -> ArtifactHelper: + """ + The implementation that creates an artifact helper. This method takes a config which has already been + initialized from the metadata of the OpenEO connection. + """ diff --git a/openeo/extra/artifacts/config.py b/openeo/extra/artifacts/config.py new file mode 100644 index 000000000..b3c235579 --- /dev/null +++ b/openeo/extra/artifacts/config.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod + +from openeo import Connection + +_METADATA_LOADED = "_sc_metadata_loaded" + + +class StorageConfig(ABC): + """ + Storage config allows overriding configuration for the interaction with the backend storage. + It greatly depends on the type of storage so the enforced API is limited to load metadata using the connection. + """ + @abstractmethod + def _load_openeo_connection_metadata(self, conn: Connection) -> None: + """ + Implementations implement their logic of adapting config based on metadata from the current OpenEO connection + in this method. + """ + + def load_openeo_connection_metadata(self, conn: Connection) -> None: + """ + This is the method that is actually used to load metadata. Metadata is only loaded once. + """ + if not self.is_openeo_connection_metadata_loaded(): + self._load_openeo_connection_metadata(conn) + setattr(self, _METADATA_LOADED, True) + + def is_openeo_connection_metadata_loaded(self) -> bool: + """ + This is a helper to check whether metadata is loaded. + """ + if not hasattr(self, _METADATA_LOADED): + return False + return getattr(self, _METADATA_LOADED) diff --git a/openeo/extra/artifacts/internal_s3/__init__.py b/openeo/extra/artifacts/internal_s3/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openeo/extra/artifacts/internal_s3/artifact_helper.py b/openeo/extra/artifacts/internal_s3/artifact_helper.py new file mode 100644 index 000000000..05fe2da2a --- /dev/null +++ b/openeo/extra/artifacts/internal_s3/artifact_helper.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import datetime +from typing import TYPE_CHECKING +from boto3.s3.transfer import TransferConfig + +from openeo.extra.artifacts.artifact_helper import ArtifactHelper +from openeo.extra.artifacts.internal_s3.sts import OpenEOSTSClient +from openeo.extra.artifacts.internal_s3.config import S3Config +from openeo.extra.artifacts.internal_s3.model import S3URI + +if TYPE_CHECKING: + from pathlib import Path + from openeo.rest.connection import Connection + from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials + + +class S3ArtifactHelper(ArtifactHelper): + BUCKET_NAME = "openeo-artifacts" + # From what size will we switch to multi-part-upload + MULTIPART_THRESHOLD_IN_MB = 50 + + def __init__(self, creds: AWSSTSCredentials, config: S3Config): + super().__init__(config) + self._creds = creds + self.s3 = config.build_client("s3", session_kwargs=creds.as_kwargs()) + + @classmethod + def _from_openeo_connection(cls, conn: Connection, config: S3Config) -> S3ArtifactHelper: + sts = OpenEOSTSClient(config=config) + creds = sts.assume_from_openeo_connection(conn) + return S3ArtifactHelper(creds, config=config) + + def _user_prefix(self) -> str: + """Each user has its own prefix retrieve it""" + return self._creds.get_user_hash() + + def _get_upload_prefix(self) -> str: + return f"{self._user_prefix()}/{datetime.datetime.now(datetime.UTC).strftime('%Y/%m/%d')}/" + + def _get_upload_key(self, object_name: str) -> str: + return f"{self._get_upload_prefix()}{object_name}" + + def upload_file(self, object_name: str, src_file_path: str | Path) -> S3URI: + mb = 1024 ** 2 + config = TransferConfig(multipart_threshold=self.MULTIPART_THRESHOLD_IN_MB * mb) + bucket = self.BUCKET_NAME + key = self._get_upload_key(object_name) + self.s3.upload_file( + str(src_file_path), + bucket, + key, + Config=config + ) + return S3URI(bucket, key) + + def get_presigned_url(self, storage_uri: S3URI, expires_in_seconds: int = 7 * 3600 * 24) -> str: + url = self.s3.generate_presigned_url( + 'get_object', + Params={'Bucket': storage_uri.bucket, 'Key': storage_uri.key}, + ExpiresIn=expires_in_seconds + ) + assert isinstance(self._config, S3Config) + return self._config.add_trace_id_qp_if_needed(url) + + @classmethod + def _get_default_storage_config(cls) -> S3Config: + return S3Config() diff --git a/openeo/extra/artifacts/internal_s3/config.py b/openeo/extra/artifacts/internal_s3/config.py new file mode 100644 index 000000000..8183f4cd8 --- /dev/null +++ b/openeo/extra/artifacts/internal_s3/config.py @@ -0,0 +1,112 @@ +from dataclasses import dataclass +import boto3 +import botocore + +from openeo import Connection +from openeo.extra.artifacts.internal_s3.tracer import add_trace_id, add_trace_id_as_query_parameter +from openeo.extra.artifacts.config import StorageConfig +from botocore.config import Config +from typing import Optional +from packaging.version import Version + + +if Version(botocore.__version__) < Version("1.36.0"): + # Before 1.36 checksuming was not done by default anyway and therefore + # there was no opt-out. + no_default_checksum_cfg = Config() +else: + no_default_checksum_cfg = Config( + request_checksum_calculation='when_required', + ) + + +@dataclass +class S3Config(StorageConfig): + """The s3 endpoint url protocol:://fqdn[:portnumber]""" + s3_endpoint_url: Optional[str] = None + """The sts endpoint url protocol:://fqdn[:portnumber]""" + sts_endpoint_url: Optional[str] = None + """The trace_id is if you want to send a uuid4 identifier to the backend""" + trace_id: str = "" + """You can change the botocore_config used but this is an expert option""" + botocore_config: Optional[Config] = None + """The role ARN to be assumed""" + sts_role_arn: Optional[str] = None + + def _load_openeo_connection_metadata(self, conn: Connection) -> None: + """ + Hard coding since connection does not allow automatic determining config yet. + """ + if self.s3_endpoint_url is None: + self.s3_endpoint_url = "https://s3.waw3-1.openeo.v1.dataspace.copernicus.eu" + + if self.sts_endpoint_url is None: + self.sts_endpoint_url = "https://sts.waw3-1.openeo.v1.dataspace.copernicus.eu" + + if self.sts_role_arn is None: + self.sts_role_arn = "arn:aws:iam::000000000000:role/S3Access" + + def __post_init__(self): + self.botocore_config = self.botocore_config or no_default_checksum_cfg + + def build_client(self, service_name: str, session_kwargs: Optional[dict] = None): + """ + Build a boto3 client for an OpenEO service provider. + + service_name is the service you want to consume: s3|sts + session_kwargs: a dictionary with keyword arguments that will be passed when creating the boto session + """ + session_kwargs = session_kwargs or {} + session = boto3.Session(region_name=self._get_storage_region(), **session_kwargs) + client = session.client( + service_name, + endpoint_url=self._get_endpoint_url(service_name), + config=self.botocore_config, + ) + if self.trace_id != "": + add_trace_id(client, self.trace_id) + return client + + @staticmethod + def _remove_protocol_from_uri(uri: str): + uri_separator = "://" + idx = uri.find(uri_separator) + if idx < 0: + raise ValueError("_remove_protocol_from_uri must be of form protocol://...") + return uri[idx+len(uri_separator):] + + def _get_storage_region(self) -> str: + """ + S3 URIs follow the convention detailed on https://docs.aws.amazon.com/general/latest/gr/s3.html + """ + s3_names = ["s3", "s3-fips"] + reserved_words = ["dualstack", "prod", "stag", "dev"] + s3_endpoint_parts = self._remove_protocol_from_uri(self.s3_endpoint_url).split(".") + for s3_name in s3_names: + try: + old_idx = s3_endpoint_parts.index(s3_name) + idx = old_idx + 1 + while idx != old_idx: + old_idx = idx + for reserved_word in reserved_words: + if s3_endpoint_parts[idx] in reserved_word: + idx += 1 + return s3_endpoint_parts[idx] + except ValueError: + continue + raise ValueError(f"Cannot determine region from {self.s3_endpoint_url}") + + def _get_endpoint_url(self, service_name: str) -> str: + if service_name == "s3": + return self.s3_endpoint_url + elif service_name == "sts": + return self.sts_endpoint_url + raise ValueError(f"Unsupported service {service_name}") + + def add_trace_id_qp_if_needed(self, url: str) -> str: + if self.trace_id == "": + return url + return add_trace_id_as_query_parameter(url, self.trace_id) + + def get_sts_role_arn(self) -> str: + return self.sts_role_arn diff --git a/openeo/extra/artifacts/internal_s3/model.py b/openeo/extra/artifacts/internal_s3/model.py new file mode 100644 index 000000000..88476e4d6 --- /dev/null +++ b/openeo/extra/artifacts/internal_s3/model.py @@ -0,0 +1,56 @@ +from __future__ import annotations +import hashlib +from dataclasses import dataclass +from openeo.extra.artifacts.uri import StorageURI + + +@dataclass(frozen=True) +class AWSSTSCredentials: + AWS_ACCESS_KEY_ID: str + AWS_SECRET_ACCESS_KEY: str + AWS_SESSION_TOKEN: str + subject_from_web_identity_token: str + + @classmethod + def from_assume_role_response(cls, resp: dict) -> AWSSTSCredentials: + d = resp["Credentials"] + return AWSSTSCredentials( + AWS_ACCESS_KEY_ID=d["AccessKeyId"], + AWS_SECRET_ACCESS_KEY=d["SecretAccessKey"], + AWS_SESSION_TOKEN=d["SessionToken"], + subject_from_web_identity_token=resp["SubjectFromWebIdentityToken"] + ) + + def as_kwargs(self) -> dict: + return { + "aws_access_key_id": self.AWS_ACCESS_KEY_ID, + "aws_secret_access_key": self.AWS_SECRET_ACCESS_KEY, + "aws_session_token": self.AWS_SESSION_TOKEN + } + + def get_user_hash(self) -> str: + hash_object = hashlib.sha1(self.subject_from_web_identity_token.encode()) + return hash_object.hexdigest() + + +@dataclass(frozen=True) +class S3URI(StorageURI): + bucket: str + key: str + + @classmethod + def from_str(cls, uri: str) -> S3URI: + s3_prefix = "s3://" + if uri.startswith(s3_prefix): + without_prefix = uri[len(s3_prefix):] + without_prefix_parts = without_prefix.split("/") + bucket = without_prefix_parts[0] + if len(without_prefix_parts) == 1: + return S3URI(bucket, "") + else: + return S3URI(bucket, "/".join(without_prefix_parts[1:])) + else: + raise ValueError(f"Input {uri} is not a valid S3 URI should be of form s3:///") + + def __str__(self): + return f"s3://{self.bucket}/{self.key}" diff --git a/openeo/extra/artifacts/internal_s3/sts.py b/openeo/extra/artifacts/internal_s3/sts.py new file mode 100644 index 000000000..73792e99b --- /dev/null +++ b/openeo/extra/artifacts/internal_s3/sts.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials +from openeo.extra.artifacts.internal_s3.config import S3Config +from openeo.rest.auth.auth import BearerAuth + +if TYPE_CHECKING: + from openeo.rest.connection import Connection + + +class OpenEOSTSClient: + def __init__(self, config: S3Config): + self.config = config + + def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: + """ + Takes an OpenEO connection object and returns temporary credentials to interact with S3 + """ + auth = conn.auth + assert auth is not None + if not isinstance(auth, BearerAuth): + raise ValueError("Only connections that use federation are allowed.") + auth_token = auth.bearer.split('/') + sts = self.config.build_client("sts") + + return AWSSTSCredentials.from_assume_role_response( + sts.assume_role_with_web_identity( + RoleArn=self._get_aws_access_role(), + RoleSessionName=auth_token[1], + WebIdentityToken=auth_token[2], + DurationSeconds=43200, + ) + ) + + def _get_aws_access_role(self) -> str: + return self.config.sts_role_arn diff --git a/openeo/extra/artifacts/internal_s3/tracer.py b/openeo/extra/artifacts/internal_s3/tracer.py new file mode 100644 index 000000000..98bf63bb8 --- /dev/null +++ b/openeo/extra/artifacts/internal_s3/tracer.py @@ -0,0 +1,30 @@ +from typing import Callable +import logging + +""" +The trace helps to pass on an X-Request-ID header value in all requests made by a +boto3 call. +""" + +TRACE_ID_KEY = "X-Request-ID" + + +def create_header_adder(request_id: str) -> Callable: + def add_request_id_header(request, **kwargs) -> None: + logger = logging.getLogger("openeo.extra.artifacts") + signature_version = kwargs.get("signature_version", "unknown") + if "query" in signature_version: + logger.debug("Do not add trace header for requests using query parameters instead of headers") + return + logger.debug("Adding trace id: {request_id}") + request.headers.add_header(TRACE_ID_KEY, request_id) + return add_request_id_header + + +def add_trace_id(client, trace_id: str = "") -> None: + header_adder = create_header_adder(trace_id) + client.meta.events.register('before-sign.s3', header_adder) + + +def add_trace_id_as_query_parameter(url, trace_id: str) -> str: + return f"{url}&{TRACE_ID_KEY}={trace_id}" diff --git a/openeo/extra/artifacts/uri.py b/openeo/extra/artifacts/uri.py new file mode 100644 index 000000000..c51989713 --- /dev/null +++ b/openeo/extra/artifacts/uri.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod + + +class StorageURI(ABC): + """A URI that is specific to a storage backend. The protocol determines what this URL looks like""" + @classmethod + @abstractmethod + def from_str(cls, uri: str) -> StorageURI: + """factory method to create a typed object from its string representation""" + + @abstractmethod + def __str__(self): + """The __str__ method is expected to be implemented""" diff --git a/setup.py b/setup.py index 7d57686ec..7862c9a3f 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,12 @@ "ipython", ] +artifacts_require = [ + "boto3", + "botocore", + "packaging" +] + name = "openeo" setup( @@ -86,7 +92,7 @@ "importlib_resources; python_version<'3.9'", ], extras_require={ - "tests": tests_require, + "tests": tests_require + artifacts_require, "dev": tests_require + docs_require, "docs": docs_require, "oschmod": [ # install oschmod even when platform is not Windows, e.g. for testing in CI. @@ -94,6 +100,7 @@ ], "localprocessing": localprocessing_require, "jupyter": jupyter_require, + "artifacts": artifacts_require, }, entry_points={ "console_scripts": ["openeo-auth=openeo.rest.auth.cli:main"], diff --git a/tests/extra/artifacts/__init__.py b/tests/extra/artifacts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/extra/artifacts/internal_s3/__init__.py b/tests/extra/artifacts/internal_s3/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/extra/artifacts/internal_s3/test_config.py b/tests/extra/artifacts/internal_s3/test_config.py new file mode 100644 index 000000000..d40fd444f --- /dev/null +++ b/tests/extra/artifacts/internal_s3/test_config.py @@ -0,0 +1,33 @@ +from openeo.extra.artifacts.internal_s3.config import S3Config +import pytest + + +@pytest.mark.parametrize("s3_endpoint_uri,expected", [ + ("https://s3.us-east-2.amazonaws.com", "us-east-2"), + ("https://s3.dualstack.us-east-2.amazonaws.com", "us-east-2"), + ("https://s3-fips.dualstack.us-east-2.amazonaws.com", "us-east-2"), + ("https://s3-fips.us-east-2.amazonaws.com", "us-east-2"), + ("https://s3.waw3-1.openeo.v1.dataspace.copernicus.eu", "waw3-1"), + ("https://s3.prod.waw3-1.openeo.v1.dataspace.copernicus.eu", "waw3-1"), + ("https://s3.stag.waw3-1.openeo.v1.dataspace.copernicus.eu", "waw3-1"), + ("https://s3.dev.waw3-1.openeo.v1.dataspace.copernicus.eu", "waw3-1") +]) +def test_region_should_be_derived_correctly(s3_endpoint_uri: str, expected: str): + # GIVEN config with a certain endpoint + config = S3Config(s3_endpoint_url=s3_endpoint_uri) + + # WHEN a region is extracted + region = config._get_storage_region() + + # THEN is is the expected + assert region == expected, f"Got {region}, expected {expected}" + + +def test_extracting_region_from_invalid_url_must_give_value_error(): + # GIVEN a config with invalid url + config = S3Config(s3_endpoint_url="https://www.google.com") + + # WHEN a region is extracted + # THEN a Value error is raised + with pytest.raises(ValueError): + config._get_storage_region() diff --git a/tests/extra/artifacts/internal_s3/test_s3uri.py b/tests/extra/artifacts/internal_s3/test_s3uri.py new file mode 100644 index 000000000..359cf651d --- /dev/null +++ b/tests/extra/artifacts/internal_s3/test_s3uri.py @@ -0,0 +1,12 @@ +from openeo.extra.artifacts.internal_s3.model import S3URI + + +def test_s3uri_serialization_is_idempotent(): + # GIVEN an S3 URI + my_s3_uri = "s3://mybucket/my-key1" + + # WHEN we convert it to an S3URI object + s3_obj = S3URI.from_str(my_s3_uri) + + # THEN getting the string value must result in same + assert str(s3_obj) == my_s3_uri From a109d2becb9da2d5c7ea27bad40ffad36e00dbac Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 10:44:54 +0100 Subject: [PATCH 02/20] refactor: cleanup TYPE_CHECKING guards No need for the type checking guards as also mentioned in PR feedback. --- openeo/extra/artifacts/internal_s3/artifact_helper.py | 9 +++------ openeo/extra/artifacts/internal_s3/sts.py | 4 +--- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/openeo/extra/artifacts/internal_s3/artifact_helper.py b/openeo/extra/artifacts/internal_s3/artifact_helper.py index 05fe2da2a..19d45152d 100644 --- a/openeo/extra/artifacts/internal_s3/artifact_helper.py +++ b/openeo/extra/artifacts/internal_s3/artifact_helper.py @@ -1,18 +1,15 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING from boto3.s3.transfer import TransferConfig +from pathlib import Path +from openeo.rest.connection import Connection from openeo.extra.artifacts.artifact_helper import ArtifactHelper from openeo.extra.artifacts.internal_s3.sts import OpenEOSTSClient from openeo.extra.artifacts.internal_s3.config import S3Config from openeo.extra.artifacts.internal_s3.model import S3URI - -if TYPE_CHECKING: - from pathlib import Path - from openeo.rest.connection import Connection - from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials +from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials class S3ArtifactHelper(ArtifactHelper): diff --git a/openeo/extra/artifacts/internal_s3/sts.py b/openeo/extra/artifacts/internal_s3/sts.py index 73792e99b..44432fc30 100644 --- a/openeo/extra/artifacts/internal_s3/sts.py +++ b/openeo/extra/artifacts/internal_s3/sts.py @@ -4,9 +4,7 @@ from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials from openeo.extra.artifacts.internal_s3.config import S3Config from openeo.rest.auth.auth import BearerAuth - -if TYPE_CHECKING: - from openeo.rest.connection import Connection +from openeo.rest.connection import Connection class OpenEOSTSClient: From 70386211e3e752ba9484d624c7503d5729736f6e Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 10:59:11 +0100 Subject: [PATCH 03/20] refactor: use ComparableVersion rather than another dependency --- openeo/extra/artifacts/internal_s3/config.py | 4 ++-- setup.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/openeo/extra/artifacts/internal_s3/config.py b/openeo/extra/artifacts/internal_s3/config.py index 8183f4cd8..d4cedf864 100644 --- a/openeo/extra/artifacts/internal_s3/config.py +++ b/openeo/extra/artifacts/internal_s3/config.py @@ -3,14 +3,14 @@ import botocore from openeo import Connection +from openeo.utils.version import ComparableVersion from openeo.extra.artifacts.internal_s3.tracer import add_trace_id, add_trace_id_as_query_parameter from openeo.extra.artifacts.config import StorageConfig from botocore.config import Config from typing import Optional -from packaging.version import Version -if Version(botocore.__version__) < Version("1.36.0"): +if ComparableVersion(botocore.__version__).below("1.36.0"): # Before 1.36 checksuming was not done by default anyway and therefore # there was no opt-out. no_default_checksum_cfg = Config() diff --git a/setup.py b/setup.py index 7862c9a3f..233606e92 100644 --- a/setup.py +++ b/setup.py @@ -58,8 +58,7 @@ artifacts_require = [ "boto3", - "botocore", - "packaging" + "botocore" ] From 55dc834e68f031141cf0f2ee9e03ea13551eda15 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 11:07:16 +0100 Subject: [PATCH 04/20] refactor: follow Python style guidelines --- openeo/extra/artifacts/internal_s3/model.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/openeo/extra/artifacts/internal_s3/model.py b/openeo/extra/artifacts/internal_s3/model.py index 88476e4d6..c202c8107 100644 --- a/openeo/extra/artifacts/internal_s3/model.py +++ b/openeo/extra/artifacts/internal_s3/model.py @@ -6,26 +6,26 @@ @dataclass(frozen=True) class AWSSTSCredentials: - AWS_ACCESS_KEY_ID: str - AWS_SECRET_ACCESS_KEY: str - AWS_SESSION_TOKEN: str + aws_access_key_id: str + aws_secret_access_key: str + aws_session_token: str subject_from_web_identity_token: str @classmethod def from_assume_role_response(cls, resp: dict) -> AWSSTSCredentials: d = resp["Credentials"] return AWSSTSCredentials( - AWS_ACCESS_KEY_ID=d["AccessKeyId"], - AWS_SECRET_ACCESS_KEY=d["SecretAccessKey"], - AWS_SESSION_TOKEN=d["SessionToken"], + aws_access_key_id=d["AccessKeyId"], + aws_secret_access_key=d["SecretAccessKey"], + aws_session_token=d["SessionToken"], subject_from_web_identity_token=resp["SubjectFromWebIdentityToken"] ) def as_kwargs(self) -> dict: return { - "aws_access_key_id": self.AWS_ACCESS_KEY_ID, - "aws_secret_access_key": self.AWS_SECRET_ACCESS_KEY, - "aws_session_token": self.AWS_SESSION_TOKEN + "aws_access_key_id": self.aws_access_key_id, + "aws_secret_access_key": self.aws_secret_access_key, + "aws_session_token": self.aws_session_token } def get_user_hash(self) -> str: From 2589022aaea4b28e6d9972cefc561aa3c3ec9889 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 11:27:49 +0100 Subject: [PATCH 05/20] refactor: use urlparse to handle S3 URI strings --- openeo/extra/artifacts/internal_s3/model.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/openeo/extra/artifacts/internal_s3/model.py b/openeo/extra/artifacts/internal_s3/model.py index c202c8107..2369d3e00 100644 --- a/openeo/extra/artifacts/internal_s3/model.py +++ b/openeo/extra/artifacts/internal_s3/model.py @@ -2,6 +2,7 @@ import hashlib from dataclasses import dataclass from openeo.extra.artifacts.uri import StorageURI +from urllib.parse import urlparse @dataclass(frozen=True) @@ -40,17 +41,16 @@ class S3URI(StorageURI): @classmethod def from_str(cls, uri: str) -> S3URI: - s3_prefix = "s3://" - if uri.startswith(s3_prefix): - without_prefix = uri[len(s3_prefix):] - without_prefix_parts = without_prefix.split("/") - bucket = without_prefix_parts[0] - if len(without_prefix_parts) == 1: - return S3URI(bucket, "") - else: - return S3URI(bucket, "/".join(without_prefix_parts[1:])) - else: + _parsed = urlparse(uri, allow_fragments=False) + if _parsed.scheme != "s3": raise ValueError(f"Input {uri} is not a valid S3 URI should be of form s3:///") + bucket = _parsed.netloc + if _parsed.query: + key = _parsed.path.lstrip('/') + '?' + _parsed.query + else: + key = _parsed.path.lstrip('/') + + return S3URI(bucket, key) def __str__(self): return f"s3://{self.bucket}/{self.key}" From a3eea6214d998dff461f39e6ec12c1fe0fb79969 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 11:54:23 +0100 Subject: [PATCH 06/20] refactor: have builder with dummy implementation. --- openeo/extra/artifacts/__init__.py | 10 +-- openeo/extra/artifacts/artifact_helper.py | 60 +++++------------ openeo/extra/artifacts/artifact_helper_abc.py | 65 +++++++++++++++++++ .../artifacts/internal_s3/artifact_helper.py | 4 +- 4 files changed, 86 insertions(+), 53 deletions(-) create mode 100644 openeo/extra/artifacts/artifact_helper_abc.py diff --git a/openeo/extra/artifacts/__init__.py b/openeo/extra/artifacts/__init__.py index 3985d9e1d..585a494a7 100644 --- a/openeo/extra/artifacts/__init__.py +++ b/openeo/extra/artifacts/__init__.py @@ -1,9 +1 @@ -# Hard coded at this time but this could be a builder that depending on connection builds a client for the Storage -from openeo.extra.artifacts.internal_s3.artifact_helper import S3ArtifactHelper as ArtifactHelper -""" -from openeo.extra.artifacts import ArtifactHelper - -artifact_helper = ArtifactHelper.from_openeo_connection(connection) -storage_uri = artifact_helper.upload_file(object_name, src_file_path) -presigned_uri = artifact_helper.get_presigned_url(storage_uri) -""" \ No newline at end of file +from openeo.extra.artifacts.artifact_helper import ArtifactHelper diff --git a/openeo/extra/artifacts/artifact_helper.py b/openeo/extra/artifacts/artifact_helper.py index b2af479c9..0e194ccb7 100644 --- a/openeo/extra/artifacts/artifact_helper.py +++ b/openeo/extra/artifacts/artifact_helper.py @@ -1,54 +1,30 @@ -from __future__ import annotations -from abc import ABC, abstractmethod from typing import Optional +from openeo import Connection +from openeo.extra.artifacts.artifact_helper_abc import ArtifactHelperBuilderABC, ArtifactHelperABC +from openeo.extra.artifacts.internal_s3.artifact_helper import S3ArtifactHelper from openeo.extra.artifacts.config import StorageConfig -from openeo.extra.artifacts.uri import StorageURI -from openeo.rest.connection import Connection -from pathlib import Path -class ArtifactHelper(ABC): +class ArtifactHelper(ArtifactHelperBuilderABC): @classmethod - def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig] = None) -> ArtifactHelper: + def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig] = None) -> ArtifactHelperABC: """ - Create a new Artifact helper from the OpenEO connection. This is the starting point to upload artifacts - """ - if config is None: - config = cls._get_default_storage_config() - config.load_openeo_connection_metadata(conn) - return cls._from_openeo_connection(conn, config) - - @abstractmethod - def upload_file(self, object_name: str, src_file_path: str | Path) -> StorageURI: - """ - A method to store an artifact remotely and get a URI understandable by the OpenEO processor - """ - - @abstractmethod - def get_presigned_url(self, storage_uri: StorageURI, expires_in_seconds: int) -> str: - """ - A method to convert a StorageURI to a signed https URL which can be accessed via normal http libraries. + Create an artifactHelper for an openEO backend. - These URIs should be kept secret as they provide access to the data. - """ + :param conn ``openeo.Connection`` connection to an openEOBackend + :param config: Optional object to specify configuration for Artifact storage - def __init__(self, config: StorageConfig): - if not config.is_openeo_connection_metadata_loaded(): - raise RuntimeError("config should have openeo connection metadata loaded prior to initialization.") - self._config = config + :return: An Artifact helper based on info provided by the backend . - @classmethod - @abstractmethod - def _get_default_storage_config(cls) -> StorageConfig: - """ - A method that provides a default storage config for the Artifact Helper - """ + Example usage: + ``` + from openeo.extra.artifacts import ArtifactHelper - @classmethod - @abstractmethod - def _from_openeo_connection(cls, conn: Connection, config: StorageConfig) -> ArtifactHelper: - """ - The implementation that creates an artifact helper. This method takes a config which has already been - initialized from the metadata of the OpenEO connection. + artifact_helper = ArtifactHelper.from_openeo_connection(connection) + storage_uri = artifact_helper.upload_file(object_name, src_file_path) + presigned_uri = artifact_helper.get_presigned_url(storage_uri) + ``` """ + # At time of writing there is only one type of artifact store supported so no resolving done yet. + return S3ArtifactHelper.from_openeo_connection(conn, config) diff --git a/openeo/extra/artifacts/artifact_helper_abc.py b/openeo/extra/artifacts/artifact_helper_abc.py new file mode 100644 index 000000000..df3fbe062 --- /dev/null +++ b/openeo/extra/artifacts/artifact_helper_abc.py @@ -0,0 +1,65 @@ +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import Optional + +from openeo.extra.artifacts.config import StorageConfig +from openeo.extra.artifacts.uri import StorageURI +from openeo.rest.connection import Connection +from pathlib import Path + + +class ArtifactHelperBuilderABC(ABC): + @classmethod + @abstractmethod + def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig] = None) -> ArtifactHelperABC: + """ + Builder pattern, only used for implementing support for other Artifact stores. + """ + raise NotImplementedError("ArtifactHelperBuilders must have their own implementation") + + +class ArtifactHelperABC(ArtifactHelperBuilderABC, ABC): + @classmethod + def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig] = None) -> ArtifactHelperABC: + """ + Create a new Artifact helper from the OpenEO connection. This is the starting point to upload artifacts. + Each implementation has its own builder + """ + if config is None: + config = cls._get_default_storage_config() + config.load_openeo_connection_metadata(conn) + return cls._from_openeo_connection(conn, config) + + @abstractmethod + def upload_file(self, object_name: str, src_file_path: str | Path) -> StorageURI: + """ + A method to store an artifact remotely and get a URI understandable by the OpenEO processor + """ + + @abstractmethod + def get_presigned_url(self, storage_uri: StorageURI, expires_in_seconds: int) -> str: + """ + A method to convert a StorageURI to a signed https URL which can be accessed via normal http libraries. + + These URIs should be kept secret as they provide access to the data. + """ + + def __init__(self, config: StorageConfig): + if not config.is_openeo_connection_metadata_loaded(): + raise RuntimeError("config should have openeo connection metadata loaded prior to initialization.") + self._config = config + + @classmethod + @abstractmethod + def _get_default_storage_config(cls) -> StorageConfig: + """ + A method that provides a default storage config for the Artifact Helper + """ + + @classmethod + @abstractmethod + def _from_openeo_connection(cls, conn: Connection, config: StorageConfig) -> ArtifactHelperABC: + """ + The implementation that creates an artifact helper. This method takes a config which has already been + initialized from the metadata of the OpenEO connection. + """ diff --git a/openeo/extra/artifacts/internal_s3/artifact_helper.py b/openeo/extra/artifacts/internal_s3/artifact_helper.py index 19d45152d..5f44eb019 100644 --- a/openeo/extra/artifacts/internal_s3/artifact_helper.py +++ b/openeo/extra/artifacts/internal_s3/artifact_helper.py @@ -5,14 +5,14 @@ from pathlib import Path from openeo.rest.connection import Connection -from openeo.extra.artifacts.artifact_helper import ArtifactHelper +from openeo.extra.artifacts.artifact_helper_abc import ArtifactHelperABC from openeo.extra.artifacts.internal_s3.sts import OpenEOSTSClient from openeo.extra.artifacts.internal_s3.config import S3Config from openeo.extra.artifacts.internal_s3.model import S3URI from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials -class S3ArtifactHelper(ArtifactHelper): +class S3ArtifactHelper(ArtifactHelperABC): BUCKET_NAME = "openeo-artifacts" # From what size will we switch to multi-part-upload MULTIPART_THRESHOLD_IN_MB = 50 From 82eb006ef5b957c53668fd4d8bf51d2bbdbbaa99 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 12:05:16 +0100 Subject: [PATCH 07/20] pr-feedback: use _s3 for internal module --- openeo/extra/artifacts/{internal_s3 => _s3}/__init__.py | 0 .../artifacts/{internal_s3 => _s3}/artifact_helper.py | 8 ++++---- openeo/extra/artifacts/{internal_s3 => _s3}/config.py | 2 +- openeo/extra/artifacts/{internal_s3 => _s3}/model.py | 0 openeo/extra/artifacts/{internal_s3 => _s3}/sts.py | 4 ++-- openeo/extra/artifacts/{internal_s3 => _s3}/tracer.py | 0 openeo/extra/artifacts/artifact_helper.py | 2 +- tests/extra/artifacts/internal_s3/test_config.py | 2 +- tests/extra/artifacts/internal_s3/test_s3uri.py | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) rename openeo/extra/artifacts/{internal_s3 => _s3}/__init__.py (100%) rename openeo/extra/artifacts/{internal_s3 => _s3}/artifact_helper.py (89%) rename openeo/extra/artifacts/{internal_s3 => _s3}/config.py (97%) rename openeo/extra/artifacts/{internal_s3 => _s3}/model.py (100%) rename openeo/extra/artifacts/{internal_s3 => _s3}/sts.py (89%) rename openeo/extra/artifacts/{internal_s3 => _s3}/tracer.py (100%) diff --git a/openeo/extra/artifacts/internal_s3/__init__.py b/openeo/extra/artifacts/_s3/__init__.py similarity index 100% rename from openeo/extra/artifacts/internal_s3/__init__.py rename to openeo/extra/artifacts/_s3/__init__.py diff --git a/openeo/extra/artifacts/internal_s3/artifact_helper.py b/openeo/extra/artifacts/_s3/artifact_helper.py similarity index 89% rename from openeo/extra/artifacts/internal_s3/artifact_helper.py rename to openeo/extra/artifacts/_s3/artifact_helper.py index 5f44eb019..5f037ff4a 100644 --- a/openeo/extra/artifacts/internal_s3/artifact_helper.py +++ b/openeo/extra/artifacts/_s3/artifact_helper.py @@ -6,10 +6,10 @@ from openeo.rest.connection import Connection from openeo.extra.artifacts.artifact_helper_abc import ArtifactHelperABC -from openeo.extra.artifacts.internal_s3.sts import OpenEOSTSClient -from openeo.extra.artifacts.internal_s3.config import S3Config -from openeo.extra.artifacts.internal_s3.model import S3URI -from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials +from openeo.extra.artifacts._s3.sts import OpenEOSTSClient +from openeo.extra.artifacts._s3.config import S3Config +from openeo.extra.artifacts._s3.model import S3URI +from openeo.extra.artifacts._s3.model import AWSSTSCredentials class S3ArtifactHelper(ArtifactHelperABC): diff --git a/openeo/extra/artifacts/internal_s3/config.py b/openeo/extra/artifacts/_s3/config.py similarity index 97% rename from openeo/extra/artifacts/internal_s3/config.py rename to openeo/extra/artifacts/_s3/config.py index d4cedf864..82d7c5556 100644 --- a/openeo/extra/artifacts/internal_s3/config.py +++ b/openeo/extra/artifacts/_s3/config.py @@ -4,7 +4,7 @@ from openeo import Connection from openeo.utils.version import ComparableVersion -from openeo.extra.artifacts.internal_s3.tracer import add_trace_id, add_trace_id_as_query_parameter +from openeo.extra.artifacts._s3.tracer import add_trace_id, add_trace_id_as_query_parameter from openeo.extra.artifacts.config import StorageConfig from botocore.config import Config from typing import Optional diff --git a/openeo/extra/artifacts/internal_s3/model.py b/openeo/extra/artifacts/_s3/model.py similarity index 100% rename from openeo/extra/artifacts/internal_s3/model.py rename to openeo/extra/artifacts/_s3/model.py diff --git a/openeo/extra/artifacts/internal_s3/sts.py b/openeo/extra/artifacts/_s3/sts.py similarity index 89% rename from openeo/extra/artifacts/internal_s3/sts.py rename to openeo/extra/artifacts/_s3/sts.py index 44432fc30..f41238c02 100644 --- a/openeo/extra/artifacts/internal_s3/sts.py +++ b/openeo/extra/artifacts/_s3/sts.py @@ -1,8 +1,8 @@ from __future__ import annotations from typing import TYPE_CHECKING -from openeo.extra.artifacts.internal_s3.model import AWSSTSCredentials -from openeo.extra.artifacts.internal_s3.config import S3Config +from openeo.extra.artifacts._s3.model import AWSSTSCredentials +from openeo.extra.artifacts._s3.config import S3Config from openeo.rest.auth.auth import BearerAuth from openeo.rest.connection import Connection diff --git a/openeo/extra/artifacts/internal_s3/tracer.py b/openeo/extra/artifacts/_s3/tracer.py similarity index 100% rename from openeo/extra/artifacts/internal_s3/tracer.py rename to openeo/extra/artifacts/_s3/tracer.py diff --git a/openeo/extra/artifacts/artifact_helper.py b/openeo/extra/artifacts/artifact_helper.py index 0e194ccb7..05b0f1683 100644 --- a/openeo/extra/artifacts/artifact_helper.py +++ b/openeo/extra/artifacts/artifact_helper.py @@ -2,7 +2,7 @@ from openeo import Connection from openeo.extra.artifacts.artifact_helper_abc import ArtifactHelperBuilderABC, ArtifactHelperABC -from openeo.extra.artifacts.internal_s3.artifact_helper import S3ArtifactHelper +from openeo.extra.artifacts._s3.artifact_helper import S3ArtifactHelper from openeo.extra.artifacts.config import StorageConfig diff --git a/tests/extra/artifacts/internal_s3/test_config.py b/tests/extra/artifacts/internal_s3/test_config.py index d40fd444f..064ca0c50 100644 --- a/tests/extra/artifacts/internal_s3/test_config.py +++ b/tests/extra/artifacts/internal_s3/test_config.py @@ -1,4 +1,4 @@ -from openeo.extra.artifacts.internal_s3.config import S3Config +from openeo.extra.artifacts._s3.config import S3Config import pytest diff --git a/tests/extra/artifacts/internal_s3/test_s3uri.py b/tests/extra/artifacts/internal_s3/test_s3uri.py index 359cf651d..70735aae5 100644 --- a/tests/extra/artifacts/internal_s3/test_s3uri.py +++ b/tests/extra/artifacts/internal_s3/test_s3uri.py @@ -1,4 +1,4 @@ -from openeo.extra.artifacts.internal_s3.model import S3URI +from openeo.extra.artifacts._s3.model import S3URI def test_s3uri_serialization_is_idempotent(): From a7ae529a1202c3fef7607d065bde5255d0f6d7b1 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 12:55:25 +0100 Subject: [PATCH 08/20] pr-feedback: more sensible role session name --- openeo/extra/artifacts/_s3/sts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openeo/extra/artifacts/_s3/sts.py b/openeo/extra/artifacts/_s3/sts.py index f41238c02..ca030a8c0 100644 --- a/openeo/extra/artifacts/_s3/sts.py +++ b/openeo/extra/artifacts/_s3/sts.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime from typing import TYPE_CHECKING from openeo.extra.artifacts._s3.model import AWSSTSCredentials from openeo.extra.artifacts._s3.config import S3Config @@ -18,14 +19,14 @@ def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: auth = conn.auth assert auth is not None if not isinstance(auth, BearerAuth): - raise ValueError("Only connections that use federation are allowed.") + raise ValueError("Only connections that have BearerAuth can be used.") auth_token = auth.bearer.split('/') sts = self.config.build_client("sts") return AWSSTSCredentials.from_assume_role_response( sts.assume_role_with_web_identity( RoleArn=self._get_aws_access_role(), - RoleSessionName=auth_token[1], + RoleSessionName=f"artifact-helper-{datetime.datetime.now(datetime.UTC).strftime('%Y%m%d%H%M%S')}", WebIdentityToken=auth_token[2], DurationSeconds=43200, ) From 41df22acb2c1a3c7e27eb753fcf61bbdf9558a6c Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 12:58:58 +0100 Subject: [PATCH 09/20] pr-feedback: do not use dunder method as abstract method --- openeo/extra/artifacts/_s3/model.py | 2 +- openeo/extra/artifacts/uri.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/openeo/extra/artifacts/_s3/model.py b/openeo/extra/artifacts/_s3/model.py index 2369d3e00..f0a9b93ba 100644 --- a/openeo/extra/artifacts/_s3/model.py +++ b/openeo/extra/artifacts/_s3/model.py @@ -52,5 +52,5 @@ def from_str(cls, uri: str) -> S3URI: return S3URI(bucket, key) - def __str__(self): + def to_string(self) -> str: return f"s3://{self.bucket}/{self.key}" diff --git a/openeo/extra/artifacts/uri.py b/openeo/extra/artifacts/uri.py index c51989713..1bbdcf9b1 100644 --- a/openeo/extra/artifacts/uri.py +++ b/openeo/extra/artifacts/uri.py @@ -10,6 +10,9 @@ class StorageURI(ABC): def from_str(cls, uri: str) -> StorageURI: """factory method to create a typed object from its string representation""" - @abstractmethod def __str__(self): - """The __str__ method is expected to be implemented""" + return self.to_string() + + @abstractmethod + def to_string(self) -> str: + raise NotImplementedError("Implementation must implement explicit handling.") \ No newline at end of file From cfac40616653d2c053c644d9b0be8832498170cc Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 13:08:02 +0100 Subject: [PATCH 10/20] pr-feedback: handling default values of dataclass --- openeo/extra/artifacts/_s3/config.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/openeo/extra/artifacts/_s3/config.py b/openeo/extra/artifacts/_s3/config.py index 82d7c5556..160a758cf 100644 --- a/openeo/extra/artifacts/_s3/config.py +++ b/openeo/extra/artifacts/_s3/config.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field import boto3 import botocore @@ -20,6 +20,9 @@ ) +DISABLE_TRACING_TRACE_ID = "00000000-0000-0000-0000-000000000000" + + @dataclass class S3Config(StorageConfig): """The s3 endpoint url protocol:://fqdn[:portnumber]""" @@ -27,9 +30,9 @@ class S3Config(StorageConfig): """The sts endpoint url protocol:://fqdn[:portnumber]""" sts_endpoint_url: Optional[str] = None """The trace_id is if you want to send a uuid4 identifier to the backend""" - trace_id: str = "" + trace_id: str = DISABLE_TRACING_TRACE_ID """You can change the botocore_config used but this is an expert option""" - botocore_config: Optional[Config] = None + botocore_config: Config = field(default_factory=lambda: no_default_checksum_cfg) """The role ARN to be assumed""" sts_role_arn: Optional[str] = None @@ -46,8 +49,8 @@ def _load_openeo_connection_metadata(self, conn: Connection) -> None: if self.sts_role_arn is None: self.sts_role_arn = "arn:aws:iam::000000000000:role/S3Access" - def __post_init__(self): - self.botocore_config = self.botocore_config or no_default_checksum_cfg + def should_trace(self) -> bool: + return self.trace_id != DISABLE_TRACING_TRACE_ID def build_client(self, service_name: str, session_kwargs: Optional[dict] = None): """ @@ -63,7 +66,7 @@ def build_client(self, service_name: str, session_kwargs: Optional[dict] = None) endpoint_url=self._get_endpoint_url(service_name), config=self.botocore_config, ) - if self.trace_id != "": + if self.should_trace(): add_trace_id(client, self.trace_id) return client @@ -104,7 +107,7 @@ def _get_endpoint_url(self, service_name: str) -> str: raise ValueError(f"Unsupported service {service_name}") def add_trace_id_qp_if_needed(self, url: str) -> str: - if self.trace_id == "": + if not self.should_trace(): return url return add_trace_id_as_query_parameter(url, self.trace_id) From 5ab78a038b7a2b580ecc965306b819301e7a46e2 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 13:23:37 +0100 Subject: [PATCH 11/20] pr-feedback: have config frozen --- openeo/extra/artifacts/_s3/config.py | 8 ++++---- openeo/extra/artifacts/config.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openeo/extra/artifacts/_s3/config.py b/openeo/extra/artifacts/_s3/config.py index 160a758cf..e5929e4c3 100644 --- a/openeo/extra/artifacts/_s3/config.py +++ b/openeo/extra/artifacts/_s3/config.py @@ -23,7 +23,7 @@ DISABLE_TRACING_TRACE_ID = "00000000-0000-0000-0000-000000000000" -@dataclass +@dataclass(frozen=True) class S3Config(StorageConfig): """The s3 endpoint url protocol:://fqdn[:portnumber]""" s3_endpoint_url: Optional[str] = None @@ -41,13 +41,13 @@ def _load_openeo_connection_metadata(self, conn: Connection) -> None: Hard coding since connection does not allow automatic determining config yet. """ if self.s3_endpoint_url is None: - self.s3_endpoint_url = "https://s3.waw3-1.openeo.v1.dataspace.copernicus.eu" + object.__setattr__(self, "s3_endpoint_url", "https://s3.waw3-1.openeo.v1.dataspace.copernicus.eu") if self.sts_endpoint_url is None: - self.sts_endpoint_url = "https://sts.waw3-1.openeo.v1.dataspace.copernicus.eu" + object.__setattr__(self, "sts_endpoint_url", "https://sts.waw3-1.openeo.v1.dataspace.copernicus.eu") if self.sts_role_arn is None: - self.sts_role_arn = "arn:aws:iam::000000000000:role/S3Access" + object.__setattr__(self, "sts_role_arn", "arn:aws:iam::000000000000:role/S3Access") def should_trace(self) -> bool: return self.trace_id != DISABLE_TRACING_TRACE_ID diff --git a/openeo/extra/artifacts/config.py b/openeo/extra/artifacts/config.py index b3c235579..38184b15e 100644 --- a/openeo/extra/artifacts/config.py +++ b/openeo/extra/artifacts/config.py @@ -25,7 +25,7 @@ def load_openeo_connection_metadata(self, conn: Connection) -> None: """ if not self.is_openeo_connection_metadata_loaded(): self._load_openeo_connection_metadata(conn) - setattr(self, _METADATA_LOADED, True) + object.__setattr__(self, _METADATA_LOADED, True) def is_openeo_connection_metadata_loaded(self) -> bool: """ From 3f8d6c74a173ad26140eca7593f7cf5511a48fc4 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Fri, 14 Mar 2025 14:30:44 +0100 Subject: [PATCH 12/20] pr-feedback: upload_file signature --- openeo/extra/artifacts/_s3/artifact_helper.py | 30 ++++++++++++++++--- openeo/extra/artifacts/artifact_helper_abc.py | 2 +- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/openeo/extra/artifacts/_s3/artifact_helper.py b/openeo/extra/artifacts/_s3/artifact_helper.py index 5f037ff4a..95a3c58f2 100644 --- a/openeo/extra/artifacts/_s3/artifact_helper.py +++ b/openeo/extra/artifacts/_s3/artifact_helper.py @@ -37,14 +37,28 @@ def _get_upload_prefix(self) -> str: def _get_upload_key(self, object_name: str) -> str: return f"{self._get_upload_prefix()}{object_name}" - - def upload_file(self, object_name: str, src_file_path: str | Path) -> S3URI: + + @staticmethod + def get_object_name_from_path(path: str | Path) -> str: + if isinstance(path, str): + path = Path(path) + return path.name + + def upload_file(self, path: str | Path, object_name: str = "") -> S3URI: + """ + Upload a file to a backend understanding the S3 API + + :param path A file path to the file that must be uploaded + :param object_name: Optional the final part of the name to be uploaded. If omitted the filename is used. + + :return: `S3URI` A S3URI that points to the uploaded file in the S3 compatible backend + """ mb = 1024 ** 2 config = TransferConfig(multipart_threshold=self.MULTIPART_THRESHOLD_IN_MB * mb) bucket = self.BUCKET_NAME - key = self._get_upload_key(object_name) + key = self._get_upload_key(object_name or self.get_object_name_from_path(path)) self.s3.upload_file( - str(src_file_path), + str(path), bucket, key, Config=config @@ -52,6 +66,14 @@ def upload_file(self, object_name: str, src_file_path: str | Path) -> S3URI: return S3URI(bucket, key) def get_presigned_url(self, storage_uri: S3URI, expires_in_seconds: int = 7 * 3600 * 24) -> str: + """ + Get a presigned URL to allow retrieval of an object. + + :param storage_uri `S3URI` A S3URI that points to the uploaded file in the S3 compatible backend + :param expires_in_seconds: Optional the number of seconds the link is valid for (defaults to 7 days) + + :return: `str` A HTTP url that can be used to download a file. It also supports Range header in its requests. + """ url = self.s3.generate_presigned_url( 'get_object', Params={'Bucket': storage_uri.bucket, 'Key': storage_uri.key}, diff --git a/openeo/extra/artifacts/artifact_helper_abc.py b/openeo/extra/artifacts/artifact_helper_abc.py index df3fbe062..6dea5f1c0 100644 --- a/openeo/extra/artifacts/artifact_helper_abc.py +++ b/openeo/extra/artifacts/artifact_helper_abc.py @@ -31,7 +31,7 @@ def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig return cls._from_openeo_connection(conn, config) @abstractmethod - def upload_file(self, object_name: str, src_file_path: str | Path) -> StorageURI: + def upload_file(self, path: str | Path, object_name: str = "") -> StorageURI: """ A method to store an artifact remotely and get a URI understandable by the OpenEO processor """ From 7b70a9a3793edc3e0230ce6336253d95f4f8b62d Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Mon, 28 Apr 2025 10:02:07 +0200 Subject: [PATCH 13/20] pr: refactor with backend capabilities --- .../artifacts/{_s3 => _s3sts}/__init__.py | 0 .../{_s3 => _s3sts}/artifact_helper.py | 74 ++++++++------- .../extra/artifacts/{_s3 => _s3sts}/config.py | 45 ++++++---- .../extra/artifacts/{_s3 => _s3sts}/model.py | 22 +++-- openeo/extra/artifacts/{_s3 => _s3sts}/sts.py | 12 +-- .../extra/artifacts/{_s3 => _s3sts}/tracer.py | 5 +- openeo/extra/artifacts/artifact_helper.py | 30 +++++-- openeo/extra/artifacts/artifact_helper_abc.py | 12 ++- openeo/extra/artifacts/backend.py | 90 +++++++++++++++++++ openeo/extra/artifacts/config.py | 24 +++-- openeo/extra/artifacts/exceptions.py | 51 +++++++++++ setup.py | 4 +- .../artifacts/internal_s3/test_config.py | 7 +- .../extra/artifacts/internal_s3/test_s3uri.py | 4 +- 14 files changed, 292 insertions(+), 88 deletions(-) rename openeo/extra/artifacts/{_s3 => _s3sts}/__init__.py (100%) rename openeo/extra/artifacts/{_s3 => _s3sts}/artifact_helper.py (60%) rename openeo/extra/artifacts/{_s3 => _s3sts}/config.py (80%) rename openeo/extra/artifacts/{_s3 => _s3sts}/model.py (74%) rename openeo/extra/artifacts/{_s3 => _s3sts}/sts.py (82%) rename openeo/extra/artifacts/{_s3 => _s3sts}/tracer.py (93%) create mode 100644 openeo/extra/artifacts/backend.py create mode 100644 openeo/extra/artifacts/exceptions.py diff --git a/openeo/extra/artifacts/_s3/__init__.py b/openeo/extra/artifacts/_s3sts/__init__.py similarity index 100% rename from openeo/extra/artifacts/_s3/__init__.py rename to openeo/extra/artifacts/_s3sts/__init__.py diff --git a/openeo/extra/artifacts/_s3/artifact_helper.py b/openeo/extra/artifacts/_s3sts/artifact_helper.py similarity index 60% rename from openeo/extra/artifacts/_s3/artifact_helper.py rename to openeo/extra/artifacts/_s3sts/artifact_helper.py index 95a3c58f2..b83392a98 100644 --- a/openeo/extra/artifacts/_s3/artifact_helper.py +++ b/openeo/extra/artifacts/_s3sts/artifact_helper.py @@ -1,40 +1,48 @@ from __future__ import annotations import datetime -from boto3.s3.transfer import TransferConfig +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from types_boto3_s3.client import S3Client + from pathlib import Path -from openeo.rest.connection import Connection +from boto3.s3.transfer import TransferConfig + +from openeo.extra.artifacts._s3sts.config import S3STSConfig +from openeo.extra.artifacts._s3sts.model import S3URI, AWSSTSCredentials +from openeo.extra.artifacts._s3sts.sts import OpenEOSTSClient from openeo.extra.artifacts.artifact_helper_abc import ArtifactHelperABC -from openeo.extra.artifacts._s3.sts import OpenEOSTSClient -from openeo.extra.artifacts._s3.config import S3Config -from openeo.extra.artifacts._s3.model import S3URI -from openeo.extra.artifacts._s3.model import AWSSTSCredentials +from openeo.rest.connection import Connection -class S3ArtifactHelper(ArtifactHelperABC): - BUCKET_NAME = "openeo-artifacts" +class S3STSArtifactHelper(ArtifactHelperABC): # From what size will we switch to multi-part-upload MULTIPART_THRESHOLD_IN_MB = 50 - def __init__(self, creds: AWSSTSCredentials, config: S3Config): + def __init__(self, conn: Connection, config: S3STSConfig): super().__init__(config) - self._creds = creds - self.s3 = config.build_client("s3", session_kwargs=creds.as_kwargs()) - + self.conn = conn + self.config = config + self._creds = self.get_new_creds() + self._s3: S3Client = config.build_client("s3", session_kwargs=self._creds.as_kwargs()) + @classmethod - def _from_openeo_connection(cls, conn: Connection, config: S3Config) -> S3ArtifactHelper: - sts = OpenEOSTSClient(config=config) - creds = sts.assume_from_openeo_connection(conn) - return S3ArtifactHelper(creds, config=config) + def _from_openeo_connection(cls, conn: Connection, config: S3STSConfig) -> S3STSArtifactHelper: + return S3STSArtifactHelper(conn, config=config) + + def get_new_creds(self) -> AWSSTSCredentials: + sts = OpenEOSTSClient(config=self.config) + return sts.assume_from_openeo_connection(self.conn) def _user_prefix(self) -> str: """Each user has its own prefix retrieve it""" return self._creds.get_user_hash() - + def _get_upload_prefix(self) -> str: return f"{self._user_prefix()}/{datetime.datetime.now(datetime.UTC).strftime('%Y/%m/%d')}/" - + def _get_upload_key(self, object_name: str) -> str: return f"{self._get_upload_prefix()}{object_name}" @@ -44,6 +52,11 @@ def get_object_name_from_path(path: str | Path) -> str: path = Path(path) return path.name + def _get_s3_client(self): + # TODO: validate whether credentials are still reasonably long valid + # and if not refresh credentials and rebuild client + return self._s3 + def upload_file(self, path: str | Path, object_name: str = "") -> S3URI: """ Upload a file to a backend understanding the S3 API @@ -53,18 +66,13 @@ def upload_file(self, path: str | Path, object_name: str = "") -> S3URI: :return: `S3URI` A S3URI that points to the uploaded file in the S3 compatible backend """ - mb = 1024 ** 2 + mb = 1024**2 config = TransferConfig(multipart_threshold=self.MULTIPART_THRESHOLD_IN_MB * mb) - bucket = self.BUCKET_NAME + bucket = self.config.bucket key = self._get_upload_key(object_name or self.get_object_name_from_path(path)) - self.s3.upload_file( - str(path), - bucket, - key, - Config=config - ) + self._get_s3_client().upload_file(str(path), bucket, key, Config=config) return S3URI(bucket, key) - + def get_presigned_url(self, storage_uri: S3URI, expires_in_seconds: int = 7 * 3600 * 24) -> str: """ Get a presigned URL to allow retrieval of an object. @@ -74,14 +82,12 @@ def get_presigned_url(self, storage_uri: S3URI, expires_in_seconds: int = 7 * 36 :return: `str` A HTTP url that can be used to download a file. It also supports Range header in its requests. """ - url = self.s3.generate_presigned_url( - 'get_object', - Params={'Bucket': storage_uri.bucket, 'Key': storage_uri.key}, - ExpiresIn=expires_in_seconds + url = self._get_s3_client().generate_presigned_url( + "get_object", Params={"Bucket": storage_uri.bucket, "Key": storage_uri.key}, ExpiresIn=expires_in_seconds ) - assert isinstance(self._config, S3Config) + assert isinstance(self._config, S3STSConfig) return self._config.add_trace_id_qp_if_needed(url) @classmethod - def _get_default_storage_config(cls) -> S3Config: - return S3Config() + def _get_default_storage_config(cls) -> S3STSConfig: + return S3STSConfig() diff --git a/openeo/extra/artifacts/_s3/config.py b/openeo/extra/artifacts/_s3sts/config.py similarity index 80% rename from openeo/extra/artifacts/_s3/config.py rename to openeo/extra/artifacts/_s3sts/config.py index e5929e4c3..c525cd466 100644 --- a/openeo/extra/artifacts/_s3/config.py +++ b/openeo/extra/artifacts/_s3sts/config.py @@ -1,14 +1,17 @@ from dataclasses import dataclass, field +from typing import Optional + import boto3 import botocore - -from openeo import Connection -from openeo.utils.version import ComparableVersion -from openeo.extra.artifacts._s3.tracer import add_trace_id, add_trace_id_as_query_parameter -from openeo.extra.artifacts.config import StorageConfig from botocore.config import Config -from typing import Optional +from openeo.extra.artifacts._s3sts.tracer import ( + add_trace_id, + add_trace_id_as_query_parameter, +) +from openeo.extra.artifacts.backend import ProviderCfg +from openeo.extra.artifacts.config import StorageConfig +from openeo.utils.version import ComparableVersion if ComparableVersion(botocore.__version__).below("1.36.0"): # Before 1.36 checksuming was not done by default anyway and therefore @@ -16,7 +19,7 @@ no_default_checksum_cfg = Config() else: no_default_checksum_cfg = Config( - request_checksum_calculation='when_required', + request_checksum_calculation="when_required", ) @@ -24,8 +27,9 @@ @dataclass(frozen=True) -class S3Config(StorageConfig): +class S3STSConfig(StorageConfig): """The s3 endpoint url protocol:://fqdn[:portnumber]""" + s3_endpoint_url: Optional[str] = None """The sts endpoint url protocol:://fqdn[:portnumber]""" sts_endpoint_url: Optional[str] = None @@ -35,19 +39,22 @@ class S3Config(StorageConfig): botocore_config: Config = field(default_factory=lambda: no_default_checksum_cfg) """The role ARN to be assumed""" sts_role_arn: Optional[str] = None + """The bucket to store the object into""" + bucket: Optional[str] = None - def _load_openeo_connection_metadata(self, conn: Connection) -> None: - """ - Hard coding since connection does not allow automatic determining config yet. - """ + def _load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: + assert provider_cfg.type == "S3STSConfig" if self.s3_endpoint_url is None: - object.__setattr__(self, "s3_endpoint_url", "https://s3.waw3-1.openeo.v1.dataspace.copernicus.eu") + object.__setattr__(self, "s3_endpoint_url", provider_cfg["s3_endpoint"]) if self.sts_endpoint_url is None: - object.__setattr__(self, "sts_endpoint_url", "https://sts.waw3-1.openeo.v1.dataspace.copernicus.eu") + object.__setattr__(self, "sts_endpoint_url", provider_cfg["sts_endpoint"]) if self.sts_role_arn is None: - object.__setattr__(self, "sts_role_arn", "arn:aws:iam::000000000000:role/S3Access") + object.__setattr__(self, "sts_role_arn", provider_cfg["role"]) + + if self.bucket is None: + object.__setattr__(self, "bucket", provider_cfg["bucket"]) def should_trace(self) -> bool: return self.trace_id != DISABLE_TRACING_TRACE_ID @@ -62,9 +69,9 @@ def build_client(self, service_name: str, session_kwargs: Optional[dict] = None) session_kwargs = session_kwargs or {} session = boto3.Session(region_name=self._get_storage_region(), **session_kwargs) client = session.client( - service_name, + service_name, endpoint_url=self._get_endpoint_url(service_name), - config=self.botocore_config, + config=self.botocore_config, ) if self.should_trace(): add_trace_id(client, self.trace_id) @@ -76,8 +83,8 @@ def _remove_protocol_from_uri(uri: str): idx = uri.find(uri_separator) if idx < 0: raise ValueError("_remove_protocol_from_uri must be of form protocol://...") - return uri[idx+len(uri_separator):] - + return uri[idx + len(uri_separator) :] + def _get_storage_region(self) -> str: """ S3 URIs follow the convention detailed on https://docs.aws.amazon.com/general/latest/gr/s3.html diff --git a/openeo/extra/artifacts/_s3/model.py b/openeo/extra/artifacts/_s3sts/model.py similarity index 74% rename from openeo/extra/artifacts/_s3/model.py rename to openeo/extra/artifacts/_s3sts/model.py index f0a9b93ba..f39f9cbc1 100644 --- a/openeo/extra/artifacts/_s3/model.py +++ b/openeo/extra/artifacts/_s3sts/model.py @@ -1,9 +1,17 @@ from __future__ import annotations + +import datetime +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from types_boto3_sts.type_defs import AssumeRoleWithWebIdentityResponseTypeDef + import hashlib from dataclasses import dataclass -from openeo.extra.artifacts.uri import StorageURI from urllib.parse import urlparse +from openeo.extra.artifacts.uri import StorageURI + @dataclass(frozen=True) class AWSSTSCredentials: @@ -11,22 +19,24 @@ class AWSSTSCredentials: aws_secret_access_key: str aws_session_token: str subject_from_web_identity_token: str + expiration: datetime.datetime @classmethod - def from_assume_role_response(cls, resp: dict) -> AWSSTSCredentials: + def from_assume_role_response(cls, resp: AssumeRoleWithWebIdentityResponseTypeDef) -> AWSSTSCredentials: d = resp["Credentials"] return AWSSTSCredentials( aws_access_key_id=d["AccessKeyId"], aws_secret_access_key=d["SecretAccessKey"], aws_session_token=d["SessionToken"], - subject_from_web_identity_token=resp["SubjectFromWebIdentityToken"] + subject_from_web_identity_token=resp["SubjectFromWebIdentityToken"], + expiration=d["Expiration"], ) def as_kwargs(self) -> dict: return { "aws_access_key_id": self.aws_access_key_id, "aws_secret_access_key": self.aws_secret_access_key, - "aws_session_token": self.aws_session_token + "aws_session_token": self.aws_session_token, } def get_user_hash(self) -> str: @@ -46,9 +56,9 @@ def from_str(cls, uri: str) -> S3URI: raise ValueError(f"Input {uri} is not a valid S3 URI should be of form s3:///") bucket = _parsed.netloc if _parsed.query: - key = _parsed.path.lstrip('/') + '?' + _parsed.query + key = _parsed.path.lstrip("/") + "?" + _parsed.query else: - key = _parsed.path.lstrip('/') + key = _parsed.path.lstrip("/") return S3URI(bucket, key) diff --git a/openeo/extra/artifacts/_s3/sts.py b/openeo/extra/artifacts/_s3sts/sts.py similarity index 82% rename from openeo/extra/artifacts/_s3/sts.py rename to openeo/extra/artifacts/_s3sts/sts.py index ca030a8c0..a596486d6 100644 --- a/openeo/extra/artifacts/_s3/sts.py +++ b/openeo/extra/artifacts/_s3sts/sts.py @@ -1,15 +1,15 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING -from openeo.extra.artifacts._s3.model import AWSSTSCredentials -from openeo.extra.artifacts._s3.config import S3Config + +from openeo.extra.artifacts._s3sts.config import S3STSConfig +from openeo.extra.artifacts._s3sts.model import AWSSTSCredentials from openeo.rest.auth.auth import BearerAuth from openeo.rest.connection import Connection class OpenEOSTSClient: - def __init__(self, config: S3Config): + def __init__(self, config: S3STSConfig): self.config = config def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: @@ -20,7 +20,7 @@ def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: assert auth is not None if not isinstance(auth, BearerAuth): raise ValueError("Only connections that have BearerAuth can be used.") - auth_token = auth.bearer.split('/') + auth_token = auth.bearer.split("/") sts = self.config.build_client("sts") return AWSSTSCredentials.from_assume_role_response( @@ -31,6 +31,6 @@ def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: DurationSeconds=43200, ) ) - + def _get_aws_access_role(self) -> str: return self.config.sts_role_arn diff --git a/openeo/extra/artifacts/_s3/tracer.py b/openeo/extra/artifacts/_s3sts/tracer.py similarity index 93% rename from openeo/extra/artifacts/_s3/tracer.py rename to openeo/extra/artifacts/_s3sts/tracer.py index 98bf63bb8..43e856053 100644 --- a/openeo/extra/artifacts/_s3/tracer.py +++ b/openeo/extra/artifacts/_s3sts/tracer.py @@ -1,5 +1,5 @@ -from typing import Callable import logging +from typing import Callable """ The trace helps to pass on an X-Request-ID header value in all requests made by a @@ -18,12 +18,13 @@ def add_request_id_header(request, **kwargs) -> None: return logger.debug("Adding trace id: {request_id}") request.headers.add_header(TRACE_ID_KEY, request_id) + return add_request_id_header def add_trace_id(client, trace_id: str = "") -> None: header_adder = create_header_adder(trace_id) - client.meta.events.register('before-sign.s3', header_adder) + client.meta.events.register("before-sign.s3", header_adder) def add_trace_id_as_query_parameter(url, trace_id: str) -> str: diff --git a/openeo/extra/artifacts/artifact_helper.py b/openeo/extra/artifacts/artifact_helper.py index 05b0f1683..e4d9b77d4 100644 --- a/openeo/extra/artifacts/artifact_helper.py +++ b/openeo/extra/artifacts/artifact_helper.py @@ -1,10 +1,20 @@ -from typing import Optional +from typing import Dict, Optional, Type from openeo import Connection -from openeo.extra.artifacts.artifact_helper_abc import ArtifactHelperBuilderABC, ArtifactHelperABC -from openeo.extra.artifacts._s3.artifact_helper import S3ArtifactHelper +from openeo.extra.artifacts._s3sts.artifact_helper import S3STSArtifactHelper +from openeo.extra.artifacts._s3sts.config import S3STSConfig +from openeo.extra.artifacts.artifact_helper_abc import ( + ArtifactHelperABC, + ArtifactHelperBuilderABC, +) +from openeo.extra.artifacts.backend import ArtifactCapabilities from openeo.extra.artifacts.config import StorageConfig +from openeo.extra.artifacts.exceptions import UnsupportedArtifactsType +cfg_to_helper: Dict[Type[StorageConfig], Type[ArtifactHelperABC]] = {S3STSConfig: S3STSArtifactHelper} +cfg_type_to_helper: Dict[str, Type[ArtifactHelperABC]] = { + StorageConfig.get_type_from(cfg): helper for cfg, helper in cfg_to_helper.items() +} class ArtifactHelper(ArtifactHelperBuilderABC): @classmethod @@ -26,5 +36,15 @@ def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig presigned_uri = artifact_helper.get_presigned_url(storage_uri) ``` """ - # At time of writing there is only one type of artifact store supported so no resolving done yet. - return S3ArtifactHelper.from_openeo_connection(conn, config) + if config is None: + config_type = ArtifactCapabilities(conn).get_preferred_artifacts_provider().type + else: + config_type = config.get_type() + + try: + artifact_helper = cfg_type_to_helper[config_type] + return artifact_helper.from_openeo_connection( + conn, ArtifactCapabilities(conn).get_preferred_artifacts_provider(), config=config + ) + except KeyError as ke: + raise UnsupportedArtifactsType(config_type) from ke diff --git a/openeo/extra/artifacts/artifact_helper_abc.py b/openeo/extra/artifacts/artifact_helper_abc.py index 6dea5f1c0..dac0eee0c 100644 --- a/openeo/extra/artifacts/artifact_helper_abc.py +++ b/openeo/extra/artifacts/artifact_helper_abc.py @@ -1,11 +1,13 @@ from __future__ import annotations + from abc import ABC, abstractmethod +from pathlib import Path from typing import Optional +from openeo.extra.artifacts.backend import ProviderCfg from openeo.extra.artifacts.config import StorageConfig from openeo.extra.artifacts.uri import StorageURI from openeo.rest.connection import Connection -from pathlib import Path class ArtifactHelperBuilderABC(ABC): @@ -18,16 +20,18 @@ def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig raise NotImplementedError("ArtifactHelperBuilders must have their own implementation") -class ArtifactHelperABC(ArtifactHelperBuilderABC, ABC): +class ArtifactHelperABC(ABC): @classmethod - def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig] = None) -> ArtifactHelperABC: + def from_openeo_connection( + cls, conn: Connection, provider_cfg: ProviderCfg, *, config: Optional[StorageConfig] = None + ) -> ArtifactHelperABC: """ Create a new Artifact helper from the OpenEO connection. This is the starting point to upload artifacts. Each implementation has its own builder """ if config is None: config = cls._get_default_storage_config() - config.load_openeo_connection_metadata(conn) + config.load_connection_provided_cfg(provider_cfg) return cls._from_openeo_connection(conn, config) @abstractmethod diff --git a/openeo/extra/artifacts/backend.py b/openeo/extra/artifacts/backend.py new file mode 100644 index 000000000..2576b11d7 --- /dev/null +++ b/openeo/extra/artifacts/backend.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +from typing import Any, Dict, List, TypedDict + +from openeo import Connection +from openeo.extra.artifacts.exceptions import ( + ArtifactsException, + EmptyAdvertisedProviders, + InvalidProviderCfg, + NoAdvertisedProviders, + NoArtifactsCapability, + NoDefaultConfig, +) + +_capabilities_cache: Dict[str, Dict] = {} + + +class TProviderCfg(TypedDict): + """The ID of the provider config not used in MVP""" + + id: str + """The type of artifacts storage which defines how to interact with the storage""" + type: str + """The config for the artifacts storage this will depend on the type""" + cfg: Dict[str, Any] + + +class ProviderCfg: + def __init__(self, id: str, type: str, cfg: dict): + self.id = id + self.type = type + self.cfg: dict = cfg + + @classmethod + def from_typed_dict(cls, d: TProviderCfg) -> ProviderCfg: + try: + return cls( + id=d["id"], + type=d["type"], + cfg=d["cfg"], + ) + except KeyError as ke: + raise InvalidProviderCfg("Provider config needs id, type and cfg fields.") from ke + + def get_key(self, key: str) -> Any: + try: + return self.cfg[key] + except KeyError as ke: + raise NoDefaultConfig(key) from ke + + def __getitem__(self, key): + return self.get_key(key) + + + +class ProvidersCfg(TypedDict): + providers: List[TProviderCfg] + + +class TArtifactsCapabilty(TypedDict): + artifacts: TypedDict + + +class ArtifactCapabilities: + def __init__(self, conn: Connection): + self.conn = conn + + def get_artifacts_capabilities(self) -> ProvidersCfg: + """ + Get the artifacts capabilities corresponding to the OpenEO connection + """ + url = self.conn.root_url + if url not in _capabilities_cache: + try: + _capabilities_cache[url] = self.conn.get("/").json()["artifacts"] + except KeyError: + raise NoArtifactsCapability() + return _capabilities_cache[url] + + def _get_artifacts_providers(self) -> List[TProviderCfg]: + try: + return self.get_artifacts_capabilities()["providers"] + except (KeyError, ArtifactsException) as e: + raise NoAdvertisedProviders() from e + + def get_preferred_artifacts_provider(self) -> ProviderCfg: + try: + return ProviderCfg.from_typed_dict(self._get_artifacts_providers()[0]) + except (IndexError, ArtifactsException) as e: + raise EmptyAdvertisedProviders() from e diff --git a/openeo/extra/artifacts/config.py b/openeo/extra/artifacts/config.py index 38184b15e..742d96540 100644 --- a/openeo/extra/artifacts/config.py +++ b/openeo/extra/artifacts/config.py @@ -1,8 +1,9 @@ from __future__ import annotations from abc import ABC, abstractmethod +from typing import Any -from openeo import Connection +from openeo.extra.artifacts.backend import ProviderCfg _METADATA_LOADED = "_sc_metadata_loaded" @@ -13,18 +14,29 @@ class StorageConfig(ABC): It greatly depends on the type of storage so the enforced API is limited to load metadata using the connection. """ @abstractmethod - def _load_openeo_connection_metadata(self, conn: Connection) -> None: + def _load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: """ - Implementations implement their logic of adapting config based on metadata from the current OpenEO connection - in this method. + Implementations implement their logic of adapting config based on metadata from the current OpenEO connection. + + The config depends on the storage type so here it can deal with settings specific for this type of storage. """ - def load_openeo_connection_metadata(self, conn: Connection) -> None: + @staticmethod + def get_type_from(cls: Any) -> str: + """The type is the name of the implementing class so that is the first object used for method resolution""" + return cls.__mro__[0].__name__ + + @classmethod + def get_type(cls) -> str: + """Return the storage config type""" + return cls.get_type_from(cls) + + def load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: """ This is the method that is actually used to load metadata. Metadata is only loaded once. """ if not self.is_openeo_connection_metadata_loaded(): - self._load_openeo_connection_metadata(conn) + self._load_connection_provided_cfg(provider_cfg) object.__setattr__(self, _METADATA_LOADED, True) def is_openeo_connection_metadata_loaded(self) -> bool: diff --git a/openeo/extra/artifacts/exceptions.py b/openeo/extra/artifacts/exceptions.py new file mode 100644 index 000000000..ea6af6450 --- /dev/null +++ b/openeo/extra/artifacts/exceptions.py @@ -0,0 +1,51 @@ +class ArtifactsException(Exception): + """ + Family of exceptions related to artifacts + """ + + +class NoArtifactsCapability(ArtifactsException): + """ + There is no artifacts capability exposed by the backend + """ + + +class NoAdvertisedProviders(ArtifactsException): + """ + The OpenEO backend does not advertise providers for artifacts storage + """ + + +class EmptyAdvertisedProviders(ArtifactsException): + """ + The providers list is empty + """ + + def __str__(self): + return """ +The OpenEO backend used does not advertise providers for managing artifacts. +""".lstrip() + + +class UnsupportedArtifactsType(ArtifactsException): + """ + The artifacts type is not supported + """ + + def __init__(self, type_id: str): + self.type_id = type_id + + def __str__(self): + return f"The OpenEO backend does not support {self.type_id}" + + +class NoDefaultConfig(ArtifactsException): + def __init__(self, key: str): + self.key = key + + def __str__(self): + return f"There was no default config provided by backend for {self.key}" + + +class InvalidProviderCfg(ArtifactsException): + """The backend has an invalid provider config. This must be fixed by the provider of the backend.""" diff --git a/setup.py b/setup.py index 233606e92..c79495937 100644 --- a/setup.py +++ b/setup.py @@ -61,6 +61,8 @@ "botocore" ] +typing_requires = ["types-boto3-s3", "types-boto3-sts"] + name = "openeo" setup( @@ -92,7 +94,7 @@ ], extras_require={ "tests": tests_require + artifacts_require, - "dev": tests_require + docs_require, + "dev": tests_require + docs_require + typing_requires + artifacts_require, "docs": docs_require, "oschmod": [ # install oschmod even when platform is not Windows, e.g. for testing in CI. "oschmod>=0.3.12" diff --git a/tests/extra/artifacts/internal_s3/test_config.py b/tests/extra/artifacts/internal_s3/test_config.py index 064ca0c50..e9e609fe7 100644 --- a/tests/extra/artifacts/internal_s3/test_config.py +++ b/tests/extra/artifacts/internal_s3/test_config.py @@ -1,6 +1,7 @@ -from openeo.extra.artifacts._s3.config import S3Config import pytest +from openeo.extra.artifacts._s3sts.config import S3STSConfig + @pytest.mark.parametrize("s3_endpoint_uri,expected", [ ("https://s3.us-east-2.amazonaws.com", "us-east-2"), @@ -14,7 +15,7 @@ ]) def test_region_should_be_derived_correctly(s3_endpoint_uri: str, expected: str): # GIVEN config with a certain endpoint - config = S3Config(s3_endpoint_url=s3_endpoint_uri) + config = S3STSConfig(s3_endpoint_url=s3_endpoint_uri) # WHEN a region is extracted region = config._get_storage_region() @@ -25,7 +26,7 @@ def test_region_should_be_derived_correctly(s3_endpoint_uri: str, expected: str) def test_extracting_region_from_invalid_url_must_give_value_error(): # GIVEN a config with invalid url - config = S3Config(s3_endpoint_url="https://www.google.com") + config = S3STSConfig(s3_endpoint_url="https://www.google.com") # WHEN a region is extracted # THEN a Value error is raised diff --git a/tests/extra/artifacts/internal_s3/test_s3uri.py b/tests/extra/artifacts/internal_s3/test_s3uri.py index 70735aae5..b320a96da 100644 --- a/tests/extra/artifacts/internal_s3/test_s3uri.py +++ b/tests/extra/artifacts/internal_s3/test_s3uri.py @@ -1,10 +1,10 @@ -from openeo.extra.artifacts._s3.model import S3URI +from openeo.extra.artifacts._s3sts.model import S3URI def test_s3uri_serialization_is_idempotent(): # GIVEN an S3 URI my_s3_uri = "s3://mybucket/my-key1" - + # WHEN we convert it to an S3URI object s3_obj = S3URI.from_str(my_s3_uri) From daf0b2bee1dd611943b948a2a18a3cf1d0bb9172 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Mon, 12 May 2025 16:12:03 +0200 Subject: [PATCH 14/20] pr: refactor with backend capabilities --- .../extra/artifacts/_s3sts/artifact_helper.py | 6 +- openeo/extra/artifacts/_s3sts/config.py | 28 ++-- openeo/extra/artifacts/_s3sts/sts.py | 15 ++- openeo/extra/artifacts/backend.py | 11 +- openeo/extra/artifacts/exceptions.py | 15 +-- tests/extra/artifacts/conftest.py | 29 +++++ .../artifacts/internal_s3/test_config.py | 4 +- .../extra/artifacts/internal_s3/test_s3sts.py | 122 ++++++++++++++++++ tests/extra/artifacts/test_artifact_helper.py | 110 ++++++++++++++++ 9 files changed, 299 insertions(+), 41 deletions(-) create mode 100644 tests/extra/artifacts/conftest.py create mode 100644 tests/extra/artifacts/internal_s3/test_s3sts.py create mode 100644 tests/extra/artifacts/test_artifact_helper.py diff --git a/openeo/extra/artifacts/_s3sts/artifact_helper.py b/openeo/extra/artifacts/_s3sts/artifact_helper.py index b83392a98..38a465727 100644 --- a/openeo/extra/artifacts/_s3sts/artifact_helper.py +++ b/openeo/extra/artifacts/_s3sts/artifact_helper.py @@ -1,7 +1,7 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from types_boto3_s3.client import S3Client @@ -26,7 +26,7 @@ def __init__(self, conn: Connection, config: S3STSConfig): self.conn = conn self.config = config self._creds = self.get_new_creds() - self._s3: S3Client = config.build_client("s3", session_kwargs=self._creds.as_kwargs()) + self._s3: Optional[S3Client] = None @classmethod def _from_openeo_connection(cls, conn: Connection, config: S3STSConfig) -> S3STSArtifactHelper: @@ -55,6 +55,8 @@ def get_object_name_from_path(path: str | Path) -> str: def _get_s3_client(self): # TODO: validate whether credentials are still reasonably long valid # and if not refresh credentials and rebuild client + if self._s3 is None: + self._s3 = self.config.build_client("s3", session_kwargs=self._creds.as_kwargs()) return self._s3 def upload_file(self, path: str | Path, object_name: str = "") -> S3URI: diff --git a/openeo/extra/artifacts/_s3sts/config.py b/openeo/extra/artifacts/_s3sts/config.py index c525cd466..72f9c5d07 100644 --- a/openeo/extra/artifacts/_s3sts/config.py +++ b/openeo/extra/artifacts/_s3sts/config.py @@ -30,28 +30,28 @@ class S3STSConfig(StorageConfig): """The s3 endpoint url protocol:://fqdn[:portnumber]""" - s3_endpoint_url: Optional[str] = None + s3_endpoint: Optional[str] = None """The sts endpoint url protocol:://fqdn[:portnumber]""" - sts_endpoint_url: Optional[str] = None + sts_endpoint: Optional[str] = None """The trace_id is if you want to send a uuid4 identifier to the backend""" trace_id: str = DISABLE_TRACING_TRACE_ID """You can change the botocore_config used but this is an expert option""" botocore_config: Config = field(default_factory=lambda: no_default_checksum_cfg) """The role ARN to be assumed""" - sts_role_arn: Optional[str] = None + role: Optional[str] = None """The bucket to store the object into""" bucket: Optional[str] = None def _load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: assert provider_cfg.type == "S3STSConfig" - if self.s3_endpoint_url is None: - object.__setattr__(self, "s3_endpoint_url", provider_cfg["s3_endpoint"]) + if self.s3_endpoint is None: + object.__setattr__(self, "s3_endpoint", provider_cfg["s3_endpoint"]) - if self.sts_endpoint_url is None: - object.__setattr__(self, "sts_endpoint_url", provider_cfg["sts_endpoint"]) + if self.sts_endpoint is None: + object.__setattr__(self, "sts_endpoint", provider_cfg["sts_endpoint"]) - if self.sts_role_arn is None: - object.__setattr__(self, "sts_role_arn", provider_cfg["role"]) + if self.role is None: + object.__setattr__(self, "role", provider_cfg["role"]) if self.bucket is None: object.__setattr__(self, "bucket", provider_cfg["bucket"]) @@ -91,7 +91,7 @@ def _get_storage_region(self) -> str: """ s3_names = ["s3", "s3-fips"] reserved_words = ["dualstack", "prod", "stag", "dev"] - s3_endpoint_parts = self._remove_protocol_from_uri(self.s3_endpoint_url).split(".") + s3_endpoint_parts = self._remove_protocol_from_uri(self.s3_endpoint).split(".") for s3_name in s3_names: try: old_idx = s3_endpoint_parts.index(s3_name) @@ -104,13 +104,13 @@ def _get_storage_region(self) -> str: return s3_endpoint_parts[idx] except ValueError: continue - raise ValueError(f"Cannot determine region from {self.s3_endpoint_url}") + raise ValueError(f"Cannot determine region from {self.s3_endpoint}") def _get_endpoint_url(self, service_name: str) -> str: if service_name == "s3": - return self.s3_endpoint_url + return self.s3_endpoint elif service_name == "sts": - return self.sts_endpoint_url + return self.sts_endpoint raise ValueError(f"Unsupported service {service_name}") def add_trace_id_qp_if_needed(self, url: str) -> str: @@ -119,4 +119,4 @@ def add_trace_id_qp_if_needed(self, url: str) -> str: return add_trace_id_as_query_parameter(url, self.trace_id) def get_sts_role_arn(self) -> str: - return self.sts_role_arn + return self.role diff --git a/openeo/extra/artifacts/_s3sts/sts.py b/openeo/extra/artifacts/_s3sts/sts.py index a596486d6..78450475a 100644 --- a/openeo/extra/artifacts/_s3sts/sts.py +++ b/openeo/extra/artifacts/_s3sts/sts.py @@ -1,9 +1,14 @@ from __future__ import annotations import datetime +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from types_boto3_sts.client import STSClient from openeo.extra.artifacts._s3sts.config import S3STSConfig from openeo.extra.artifacts._s3sts.model import AWSSTSCredentials +from openeo.extra.artifacts.exceptions import ProviderSpecificException from openeo.rest.auth.auth import BearerAuth from openeo.rest.connection import Connection @@ -19,12 +24,11 @@ def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: auth = conn.auth assert auth is not None if not isinstance(auth, BearerAuth): - raise ValueError("Only connections that have BearerAuth can be used.") + raise ProviderSpecificException("Only connections that have BearerAuth can be used.") auth_token = auth.bearer.split("/") - sts = self.config.build_client("sts") return AWSSTSCredentials.from_assume_role_response( - sts.assume_role_with_web_identity( + self._get_sts_client().assume_role_with_web_identity( RoleArn=self._get_aws_access_role(), RoleSessionName=f"artifact-helper-{datetime.datetime.now(datetime.UTC).strftime('%Y%m%d%H%M%S')}", WebIdentityToken=auth_token[2], @@ -32,5 +36,8 @@ def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: ) ) + def _get_sts_client(self) -> STSClient: + return self.config.build_client("sts") + def _get_aws_access_role(self) -> str: - return self.config.sts_role_arn + return self.config.role diff --git a/openeo/extra/artifacts/backend.py b/openeo/extra/artifacts/backend.py index 2576b11d7..aa6ec3549 100644 --- a/openeo/extra/artifacts/backend.py +++ b/openeo/extra/artifacts/backend.py @@ -4,11 +4,8 @@ from openeo import Connection from openeo.extra.artifacts.exceptions import ( - ArtifactsException, - EmptyAdvertisedProviders, InvalidProviderCfg, NoAdvertisedProviders, - NoArtifactsCapability, NoDefaultConfig, ) @@ -74,17 +71,17 @@ def get_artifacts_capabilities(self) -> ProvidersCfg: try: _capabilities_cache[url] = self.conn.get("/").json()["artifacts"] except KeyError: - raise NoArtifactsCapability() + raise NoAdvertisedProviders() return _capabilities_cache[url] def _get_artifacts_providers(self) -> List[TProviderCfg]: try: return self.get_artifacts_capabilities()["providers"] - except (KeyError, ArtifactsException) as e: + except KeyError as e: raise NoAdvertisedProviders() from e def get_preferred_artifacts_provider(self) -> ProviderCfg: try: return ProviderCfg.from_typed_dict(self._get_artifacts_providers()[0]) - except (IndexError, ArtifactsException) as e: - raise EmptyAdvertisedProviders() from e + except IndexError as e: + raise NoAdvertisedProviders() from e diff --git a/openeo/extra/artifacts/exceptions.py b/openeo/extra/artifacts/exceptions.py index ea6af6450..197f7f420 100644 --- a/openeo/extra/artifacts/exceptions.py +++ b/openeo/extra/artifacts/exceptions.py @@ -4,19 +4,7 @@ class ArtifactsException(Exception): """ -class NoArtifactsCapability(ArtifactsException): - """ - There is no artifacts capability exposed by the backend - """ - - class NoAdvertisedProviders(ArtifactsException): - """ - The OpenEO backend does not advertise providers for artifacts storage - """ - - -class EmptyAdvertisedProviders(ArtifactsException): """ The providers list is empty """ @@ -49,3 +37,6 @@ def __str__(self): class InvalidProviderCfg(ArtifactsException): """The backend has an invalid provider config. This must be fixed by the provider of the backend.""" + + +class ProviderSpecificException(ArtifactsException): ... diff --git a/tests/extra/artifacts/conftest.py b/tests/extra/artifacts/conftest.py new file mode 100644 index 000000000..477ded327 --- /dev/null +++ b/tests/extra/artifacts/conftest.py @@ -0,0 +1,29 @@ +from typing import Iterator + +import pytest + +from openeo import Connection +from tests.rest.conftest import API_URL + + +@pytest.fixture +def extra_api_capabilities() -> dict: + """ + Fixture to be overridden for customizing the capabilities doc used by connection fixtures. + To be used as kwargs for `build_capabilities` + """ + return {} + + +@pytest.fixture +def conn_with_extra_capabilities(requests_mock, extra_api_capabilities) -> Iterator[Connection]: + requests_mock.get(API_URL, json={"api_version": "1.0.0", **extra_api_capabilities}) + yield Connection(API_URL) + + +@pytest.fixture +def clean_capabilities_cache() -> Iterator[None]: + from openeo.extra.artifacts.backend import _capabilities_cache + + _capabilities_cache.clear() + yield diff --git a/tests/extra/artifacts/internal_s3/test_config.py b/tests/extra/artifacts/internal_s3/test_config.py index e9e609fe7..a87499aea 100644 --- a/tests/extra/artifacts/internal_s3/test_config.py +++ b/tests/extra/artifacts/internal_s3/test_config.py @@ -15,7 +15,7 @@ ]) def test_region_should_be_derived_correctly(s3_endpoint_uri: str, expected: str): # GIVEN config with a certain endpoint - config = S3STSConfig(s3_endpoint_url=s3_endpoint_uri) + config = S3STSConfig(s3_endpoint=s3_endpoint_uri) # WHEN a region is extracted region = config._get_storage_region() @@ -26,7 +26,7 @@ def test_region_should_be_derived_correctly(s3_endpoint_uri: str, expected: str) def test_extracting_region_from_invalid_url_must_give_value_error(): # GIVEN a config with invalid url - config = S3STSConfig(s3_endpoint_url="https://www.google.com") + config = S3STSConfig(s3_endpoint="https://www.google.com") # WHEN a region is extracted # THEN a Value error is raised diff --git a/tests/extra/artifacts/internal_s3/test_s3sts.py b/tests/extra/artifacts/internal_s3/test_s3sts.py new file mode 100644 index 000000000..97ef109a2 --- /dev/null +++ b/tests/extra/artifacts/internal_s3/test_s3sts.py @@ -0,0 +1,122 @@ +import dataclasses +import datetime +from typing import Iterator +from unittest.mock import Mock + +import pytest +from types_boto3_sts.type_defs import AssumeRoleWithWebIdentityResponseTypeDef + +from openeo import Connection +from openeo.extra.artifacts import ArtifactHelper +from openeo.extra.artifacts._s3sts.artifact_helper import S3STSArtifactHelper +from openeo.extra.artifacts._s3sts.config import S3STSConfig +from openeo.extra.artifacts._s3sts.sts import OpenEOSTSClient +from openeo.rest.auth.auth import BearerAuth +from tests.rest.conftest import API_URL + +fake_creds_response: AssumeRoleWithWebIdentityResponseTypeDef = { + "Credentials": { + "AccessKeyId": "akid", + "SecretAccessKey": "secret", + "SessionToken": "token", + "Expiration": datetime.datetime.now(tz=datetime.UTC) + datetime.timedelta(hours=1), + }, + "SubjectFromWebIdentityToken": "tokensubject", + "AssumedRoleUser": {"AssumedRoleId": "1", "Arn": "not important"}, + "PackedPolicySize": 10, + "Provider": "notImportant", + "Audience": "notImportant", + "SourceIdentity": "", + "ResponseMetadata": { + "RequestId": "0000-00", + "HTTPStatusCode": 200, + "HTTPHeaders": {}, + "RetryAttempts": 0, + "HostId": "1", + }, +} + + +@pytest.fixture +def mocked_sts(monkeypatch): + mocked_sts_client = Mock(["assume_role_with_web_identity"]) + mocked_sts_client.assume_role_with_web_identity.return_value = fake_creds_response + monkeypatch.setattr(OpenEOSTSClient, "_get_sts_client", Mock(return_value=mocked_sts_client)) + yield mocked_sts_client + + +test_p_bucket_name = "openeo-artifacts" +test_p_role_arn = "arn:aws:iam::000000000000:role/S3Access" +test_p_s3_endpoint = "https://s3.oeo.test" +test_p_sts_endpoint = "https://sts.oeo.test" +test_p_config = { + "bucket": test_p_bucket_name, + "role": test_p_role_arn, + "s3_endpoint": test_p_s3_endpoint, + "sts_endpoint": test_p_sts_endpoint, +} + + +@pytest.fixture +def conn_with_stss3_capabilities(requests_mock, extra_api_capabilities) -> Iterator[Connection]: + extra_api_capabilities = {"artifacts": {"providers": [{"cfg": test_p_config, "id": "s3", "type": "S3STSConfig"}]}} + requests_mock.get(API_URL, json={"api_version": "1.0.0", **extra_api_capabilities}) + conn = Connection(API_URL) + conn.auth = BearerAuth("oidc/fake/token") + yield conn + + +def test_backend_provided_settings_s3sts(clean_capabilities_cache, conn_with_stss3_capabilities, mocked_sts): + # Given a backend that exposes stss3 capabilities (fixture) + # When creating an artifacthelper without specifying config + ah = ArtifactHelper.from_openeo_connection(conn_with_stss3_capabilities, None) + # Then the artifact helper is of the expected instance + assert isinstance(ah, S3STSArtifactHelper) + # Then the config is of the expected type + assert isinstance(ah.config, S3STSConfig) + # And the config contains the backend provided settings + assert test_p_bucket_name == ah.config.bucket + assert test_p_role_arn == ah.config.role + assert test_p_sts_endpoint == ah.config.sts_endpoint + assert test_p_s3_endpoint == ah.config.s3_endpoint + + +# Custom overrides +test_c_bucket_name = "openeo-custom-artifacts" +test_c_role_arn = "arn:aws:iam::000000000000:role/S3Artifacts" +test_c_s3_endpoint = "https://s3.oeo2.test" +test_c_sts_endpoint = "https://sts.oeo2.test" + + +def get_stss3_config_default_field_values() -> dict: + c = S3STSConfig() + return {field_name.name: getattr(c, field_name.name) for field_name in dataclasses.fields(c)} + + +@pytest.mark.parametrize( + "overrides", + [ + { + "s3_endpoint": test_c_s3_endpoint, + "sts_endpoint": test_c_sts_endpoint, + "role": test_c_role_arn, + "bucket": test_c_bucket_name, + } + ], +) +def test_config_overrides_take_precedence( + clean_capabilities_cache, conn_with_stss3_capabilities, mocked_sts, overrides: dict +): + defaults = get_stss3_config_default_field_values() + expected_values = {**defaults, **test_p_config, **overrides} + + # Given a backend that exposes stss3 capabilities (fixture) + # When creating an artifacthelper without specifying config + ah = ArtifactHelper.from_openeo_connection(conn_with_stss3_capabilities, S3STSConfig(**overrides)) + # Then the artifact helper is of the expected instance + assert isinstance(ah, S3STSArtifactHelper) + # Then the config is of the expected type + assert isinstance(ah.config, S3STSConfig) + # And the config contains the backend provided settings + for field_name in dataclasses.fields(ah.config): + assert expected_values[field_name.name] == getattr(ah.config, field_name.name) diff --git a/tests/extra/artifacts/test_artifact_helper.py b/tests/extra/artifacts/test_artifact_helper.py new file mode 100644 index 000000000..2858903ec --- /dev/null +++ b/tests/extra/artifacts/test_artifact_helper.py @@ -0,0 +1,110 @@ +from typing import Type + +import pytest + +from openeo.extra.artifacts import ArtifactHelper +from openeo.extra.artifacts.exceptions import ( + NoAdvertisedProviders, + ProviderSpecificException, + UnsupportedArtifactsType, +) + + +@pytest.mark.parametrize( + ( + "description", + "extra_api_capabilities", + "expected_ex", + ), + [ + [ + """When using the new PresignedS3AssetUrls with ipd in place but not required config in place we should + be on old behavior""", + {}, # No extra capabilities + NoAdvertisedProviders, + ], + [ + "When the backend advertises an unsupported provider it should raise an exception", + { + "artifacts": { + "providers": [ + { + "cfg": { + "bucket": "openeo-artifacts", + "role": "arn:aws:iam::000000000000:role/S3Access", + "s3_endpoint": "https://s3.oeo.test", + "sts_endpoint": "https://sts.oeo.test", + }, + "id": "s3", + "type": "NonExistingStorageProvider", + } + ] + } + }, + UnsupportedArtifactsType, + ], + [ + """When using the S3STS provider a connection requires to have authentication this is not present in this + test mocking""", + { + "artifacts": { + "providers": [ + { + "cfg": { + "bucket": "openeo-artifacts", + "role": "arn:aws:iam::000000000000:role/S3Access", + "s3_endpoint": "https://s3.oeo.test", + "sts_endpoint": "https://sts.oeo.test", + }, + "id": "s3", + "type": "S3STSConfig", + } + ] + } + }, + ProviderSpecificException, + ], + [ + """An artifacts section without providers must raise no providers exception""", + {"artifacts": {}}, + NoAdvertisedProviders, + ], + [ + """An artifacts section with an empty providers list must raise no providers exception""", + {"artifacts": {"providers": []}}, + NoAdvertisedProviders, + ], + [ + """When using the S3STS provider a connection requires to have authentication this is not present in this + test mocking""", + { + "artifacts": { + "providers": [ + { + "cfg": { + "bucket": "openeo-artifacts", + "role": "arn:aws:iam::000000000000:role/S3Access", + "s3_endpoint": "https://s3.oeo.test", + "sts_endpoint": "https://sts.oeo.test", + }, + "id": "s3", + "type": "S3STSConfig", + } + ] + } + }, + ProviderSpecificException, + ], + ], +) +def test_artifacts_raising_exceptions_when_required( + clean_capabilities_cache, + conn_with_extra_capabilities, + description: str, + extra_api_capabilities: dict, + expected_ex: Type[BaseException], +): + # Given no provided config then the client should raise exceptions when there is no appropriate way of + # configuring an Artifact helper + with pytest.raises(expected_ex): + ArtifactHelper.from_openeo_connection(conn_with_extra_capabilities, None) From db0c9b0a476b42ab47393f9b0b3312a0bd1da721 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Mon, 12 May 2025 16:18:47 +0200 Subject: [PATCH 15/20] tests: types should not be needed for unittests --- tests/extra/artifacts/internal_s3/test_s3sts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/extra/artifacts/internal_s3/test_s3sts.py b/tests/extra/artifacts/internal_s3/test_s3sts.py index 97ef109a2..fb2ad8f9e 100644 --- a/tests/extra/artifacts/internal_s3/test_s3sts.py +++ b/tests/extra/artifacts/internal_s3/test_s3sts.py @@ -1,10 +1,14 @@ +from __future__ import annotations + import dataclasses import datetime -from typing import Iterator +from typing import TYPE_CHECKING, Iterator from unittest.mock import Mock import pytest -from types_boto3_sts.type_defs import AssumeRoleWithWebIdentityResponseTypeDef + +if TYPE_CHECKING: + from types_boto3_sts.type_defs import AssumeRoleWithWebIdentityResponseTypeDef from openeo import Connection from openeo.extra.artifacts import ArtifactHelper From 22fbf32e2662efae2927bb9a1836022c8af69205 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Mon, 12 May 2025 16:28:36 +0200 Subject: [PATCH 16/20] tests: do not break tests in older Python versions --- tests/extra/artifacts/internal_s3/test_s3sts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/extra/artifacts/internal_s3/test_s3sts.py b/tests/extra/artifacts/internal_s3/test_s3sts.py index fb2ad8f9e..87c746b23 100644 --- a/tests/extra/artifacts/internal_s3/test_s3sts.py +++ b/tests/extra/artifacts/internal_s3/test_s3sts.py @@ -23,7 +23,8 @@ "AccessKeyId": "akid", "SecretAccessKey": "secret", "SessionToken": "token", - "Expiration": datetime.datetime.now(tz=datetime.UTC) + datetime.timedelta(hours=1), + # TODO: go for datetime.datetime.now(tz=datetime.UTC) once 3.10 support is no longer needed + "Expiration": datetime.datetime.utcnow() + datetime.timedelta(hours=1), }, "SubjectFromWebIdentityToken": "tokensubject", "AssumedRoleUser": {"AssumedRoleId": "1", "Arn": "not important"}, From 79c09f8f250fae2e63a9aa8d7c66632b5015b7e4 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Mon, 12 May 2025 16:54:47 +0200 Subject: [PATCH 17/20] py3.10: do not use datetime.UTC --- openeo/extra/artifacts/_s3sts/artifact_helper.py | 3 ++- openeo/extra/artifacts/_s3sts/sts.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/openeo/extra/artifacts/_s3sts/artifact_helper.py b/openeo/extra/artifacts/_s3sts/artifact_helper.py index 38a465727..dc8a5ffcc 100644 --- a/openeo/extra/artifacts/_s3sts/artifact_helper.py +++ b/openeo/extra/artifacts/_s3sts/artifact_helper.py @@ -41,7 +41,8 @@ def _user_prefix(self) -> str: return self._creds.get_user_hash() def _get_upload_prefix(self) -> str: - return f"{self._user_prefix()}/{datetime.datetime.now(datetime.UTC).strftime('%Y/%m/%d')}/" + # TODO: replace utcnow when `datetime.datetime.now(datetime.UTC)` in oldest supported Python version + return f"{self._user_prefix()}/{datetime.datetime.utcnow().strftime('%Y/%m/%d')}/" def _get_upload_key(self, object_name: str) -> str: return f"{self._get_upload_prefix()}{object_name}" diff --git a/openeo/extra/artifacts/_s3sts/sts.py b/openeo/extra/artifacts/_s3sts/sts.py index 78450475a..d0dbfabdc 100644 --- a/openeo/extra/artifacts/_s3sts/sts.py +++ b/openeo/extra/artifacts/_s3sts/sts.py @@ -30,7 +30,7 @@ def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: return AWSSTSCredentials.from_assume_role_response( self._get_sts_client().assume_role_with_web_identity( RoleArn=self._get_aws_access_role(), - RoleSessionName=f"artifact-helper-{datetime.datetime.now(datetime.UTC).strftime('%Y%m%d%H%M%S')}", + RoleSessionName=f"artifact-helper-{datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')}", WebIdentityToken=auth_token[2], DurationSeconds=43200, ) From b3cb5ce79e28ba61aaf1824920836d8d126da94c Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Thu, 22 May 2025 14:44:47 +0200 Subject: [PATCH 18/20] feature: allow client-side overrides for backend without capabilities. --- openeo/extra/artifacts/_s3sts/config.py | 1 - openeo/extra/artifacts/_s3sts/sts.py | 4 +-- openeo/extra/artifacts/artifact_helper.py | 2 +- openeo/extra/artifacts/artifact_helper_abc.py | 2 +- openeo/extra/artifacts/backend.py | 33 +++++++++++++++---- 5 files changed, 31 insertions(+), 11 deletions(-) diff --git a/openeo/extra/artifacts/_s3sts/config.py b/openeo/extra/artifacts/_s3sts/config.py index 72f9c5d07..455447546 100644 --- a/openeo/extra/artifacts/_s3sts/config.py +++ b/openeo/extra/artifacts/_s3sts/config.py @@ -43,7 +43,6 @@ class S3STSConfig(StorageConfig): bucket: Optional[str] = None def _load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: - assert provider_cfg.type == "S3STSConfig" if self.s3_endpoint is None: object.__setattr__(self, "s3_endpoint", provider_cfg["s3_endpoint"]) diff --git a/openeo/extra/artifacts/_s3sts/sts.py b/openeo/extra/artifacts/_s3sts/sts.py index d0dbfabdc..7473ddd84 100644 --- a/openeo/extra/artifacts/_s3sts/sts.py +++ b/openeo/extra/artifacts/_s3sts/sts.py @@ -1,6 +1,5 @@ from __future__ import annotations -import datetime from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -11,6 +10,7 @@ from openeo.extra.artifacts.exceptions import ProviderSpecificException from openeo.rest.auth.auth import BearerAuth from openeo.rest.connection import Connection +from openeo.util import Rfc3339 class OpenEOSTSClient: @@ -30,7 +30,7 @@ def assume_from_openeo_connection(self, conn: Connection) -> AWSSTSCredentials: return AWSSTSCredentials.from_assume_role_response( self._get_sts_client().assume_role_with_web_identity( RoleArn=self._get_aws_access_role(), - RoleSessionName=f"artifact-helper-{datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')}", + RoleSessionName=f"artifact-helper-{Rfc3339().now_utc()}", WebIdentityToken=auth_token[2], DurationSeconds=43200, ) diff --git a/openeo/extra/artifacts/artifact_helper.py b/openeo/extra/artifacts/artifact_helper.py index e4d9b77d4..c5de30d88 100644 --- a/openeo/extra/artifacts/artifact_helper.py +++ b/openeo/extra/artifacts/artifact_helper.py @@ -37,7 +37,7 @@ def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig ``` """ if config is None: - config_type = ArtifactCapabilities(conn).get_preferred_artifacts_provider().type + config_type = ArtifactCapabilities(conn).get_preferred_artifacts_provider().get_type() else: config_type = config.get_type() diff --git a/openeo/extra/artifacts/artifact_helper_abc.py b/openeo/extra/artifacts/artifact_helper_abc.py index dac0eee0c..80fa992be 100644 --- a/openeo/extra/artifacts/artifact_helper_abc.py +++ b/openeo/extra/artifacts/artifact_helper_abc.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional -from openeo.extra.artifacts.backend import ProviderCfg +from openeo.extra.artifacts.backend import ArtifactCapabilities, ProviderCfg from openeo.extra.artifacts.config import StorageConfig from openeo.extra.artifacts.uri import StorageURI from openeo.rest.connection import Connection diff --git a/openeo/extra/artifacts/backend.py b/openeo/extra/artifacts/backend.py index aa6ec3549..d44754e6e 100644 --- a/openeo/extra/artifacts/backend.py +++ b/openeo/extra/artifacts/backend.py @@ -1,9 +1,10 @@ from __future__ import annotations -from typing import Any, Dict, List, TypedDict +from typing import Any, Dict, List, Optional, TypedDict from openeo import Connection from openeo.extra.artifacts.exceptions import ( + ArtifactsException, InvalidProviderCfg, NoAdvertisedProviders, NoDefaultConfig, @@ -23,10 +24,16 @@ class TProviderCfg(TypedDict): class ProviderCfg: - def __init__(self, id: str, type: str, cfg: dict): + """ + Configuration as provided by the OpenEO backend. + It also holds exceptions if no such configuration is retrievable. + """ + + def __init__(self, id: str, type: str, cfg: dict, *, exc: Optional[Exception] = None): self.id = id self.type = type self.cfg: dict = cfg + self.exc: Exception = exc @classmethod def from_typed_dict(cls, d: TProviderCfg) -> ProviderCfg: @@ -39,12 +46,26 @@ def from_typed_dict(cls, d: TProviderCfg) -> ProviderCfg: except KeyError as ke: raise InvalidProviderCfg("Provider config needs id, type and cfg fields.") from ke + @classmethod + def from_exception(cls, exc: Exception) -> ProviderCfg: + return cls(id="undefined", type="undefined", cfg={}, exc=exc) + + def raise_if_needed(self, operation: str): + """Check if operation can be done if not raise an exception""" + if self.exc is not None: + raise RuntimeError(f"Trying to {operation} for provider config which was not available") from self.exc + def get_key(self, key: str) -> Any: + self.raise_if_needed(f"get key {key}") try: return self.cfg[key] except KeyError as ke: raise NoDefaultConfig(key) from ke + def get_type(self) -> str: + self.raise_if_needed("get type") + return self.type + def __getitem__(self, key): return self.get_key(key) @@ -62,7 +83,7 @@ class ArtifactCapabilities: def __init__(self, conn: Connection): self.conn = conn - def get_artifacts_capabilities(self) -> ProvidersCfg: + def _get_artifacts_capabilities(self) -> ProvidersCfg: """ Get the artifacts capabilities corresponding to the OpenEO connection """ @@ -76,12 +97,12 @@ def get_artifacts_capabilities(self) -> ProvidersCfg: def _get_artifacts_providers(self) -> List[TProviderCfg]: try: - return self.get_artifacts_capabilities()["providers"] + return self._get_artifacts_capabilities()["providers"] except KeyError as e: raise NoAdvertisedProviders() from e def get_preferred_artifacts_provider(self) -> ProviderCfg: try: return ProviderCfg.from_typed_dict(self._get_artifacts_providers()[0]) - except IndexError as e: - raise NoAdvertisedProviders() from e + except (IndexError, ArtifactsException) as e: + return ProviderCfg("id", "type", {}, exc=e) From ff54c76932f2a5fde78f3aa51db8a54363e9f09a Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Thu, 22 May 2025 14:56:59 +0200 Subject: [PATCH 19/20] refactor: prefer config over cfg --- openeo/extra/artifacts/_s3sts/config.py | 18 ++++---- openeo/extra/artifacts/artifact_helper.py | 8 ++-- openeo/extra/artifacts/artifact_helper_abc.py | 6 +-- openeo/extra/artifacts/backend.py | 42 +++++++++---------- openeo/extra/artifacts/config.py | 8 ++-- openeo/extra/artifacts/exceptions.py | 2 +- 6 files changed, 42 insertions(+), 42 deletions(-) diff --git a/openeo/extra/artifacts/_s3sts/config.py b/openeo/extra/artifacts/_s3sts/config.py index 455447546..4b7f2d780 100644 --- a/openeo/extra/artifacts/_s3sts/config.py +++ b/openeo/extra/artifacts/_s3sts/config.py @@ -9,16 +9,16 @@ add_trace_id, add_trace_id_as_query_parameter, ) -from openeo.extra.artifacts.backend import ProviderCfg +from openeo.extra.artifacts.backend import ProviderConfig from openeo.extra.artifacts.config import StorageConfig from openeo.utils.version import ComparableVersion if ComparableVersion(botocore.__version__).below("1.36.0"): # Before 1.36 checksuming was not done by default anyway and therefore # there was no opt-out. - no_default_checksum_cfg = Config() + no_default_checksum_config = Config() else: - no_default_checksum_cfg = Config( + no_default_checksum_config = Config( request_checksum_calculation="when_required", ) @@ -36,24 +36,24 @@ class S3STSConfig(StorageConfig): """The trace_id is if you want to send a uuid4 identifier to the backend""" trace_id: str = DISABLE_TRACING_TRACE_ID """You can change the botocore_config used but this is an expert option""" - botocore_config: Config = field(default_factory=lambda: no_default_checksum_cfg) + botocore_config: Config = field(default_factory=lambda: no_default_checksum_config) """The role ARN to be assumed""" role: Optional[str] = None """The bucket to store the object into""" bucket: Optional[str] = None - def _load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: + def _load_connection_provided_config(self, provider_config: ProviderConfig) -> None: if self.s3_endpoint is None: - object.__setattr__(self, "s3_endpoint", provider_cfg["s3_endpoint"]) + object.__setattr__(self, "s3_endpoint", provider_config["s3_endpoint"]) if self.sts_endpoint is None: - object.__setattr__(self, "sts_endpoint", provider_cfg["sts_endpoint"]) + object.__setattr__(self, "sts_endpoint", provider_config["sts_endpoint"]) if self.role is None: - object.__setattr__(self, "role", provider_cfg["role"]) + object.__setattr__(self, "role", provider_config["role"]) if self.bucket is None: - object.__setattr__(self, "bucket", provider_cfg["bucket"]) + object.__setattr__(self, "bucket", provider_config["bucket"]) def should_trace(self) -> bool: return self.trace_id != DISABLE_TRACING_TRACE_ID diff --git a/openeo/extra/artifacts/artifact_helper.py b/openeo/extra/artifacts/artifact_helper.py index c5de30d88..3538dce2e 100644 --- a/openeo/extra/artifacts/artifact_helper.py +++ b/openeo/extra/artifacts/artifact_helper.py @@ -11,9 +11,9 @@ from openeo.extra.artifacts.config import StorageConfig from openeo.extra.artifacts.exceptions import UnsupportedArtifactsType -cfg_to_helper: Dict[Type[StorageConfig], Type[ArtifactHelperABC]] = {S3STSConfig: S3STSArtifactHelper} -cfg_type_to_helper: Dict[str, Type[ArtifactHelperABC]] = { - StorageConfig.get_type_from(cfg): helper for cfg, helper in cfg_to_helper.items() +config_to_helper: Dict[Type[StorageConfig], Type[ArtifactHelperABC]] = {S3STSConfig: S3STSArtifactHelper} +config_type_to_helper: Dict[str, Type[ArtifactHelperABC]] = { + StorageConfig.get_type_from(cfg): helper for cfg, helper in config_to_helper.items() } class ArtifactHelper(ArtifactHelperBuilderABC): @@ -42,7 +42,7 @@ def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig config_type = config.get_type() try: - artifact_helper = cfg_type_to_helper[config_type] + artifact_helper = config_type_to_helper[config_type] return artifact_helper.from_openeo_connection( conn, ArtifactCapabilities(conn).get_preferred_artifacts_provider(), config=config ) diff --git a/openeo/extra/artifacts/artifact_helper_abc.py b/openeo/extra/artifacts/artifact_helper_abc.py index 80fa992be..66b217413 100644 --- a/openeo/extra/artifacts/artifact_helper_abc.py +++ b/openeo/extra/artifacts/artifact_helper_abc.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional -from openeo.extra.artifacts.backend import ArtifactCapabilities, ProviderCfg +from openeo.extra.artifacts.backend import ProviderConfig from openeo.extra.artifacts.config import StorageConfig from openeo.extra.artifacts.uri import StorageURI from openeo.rest.connection import Connection @@ -23,7 +23,7 @@ def from_openeo_connection(cls, conn: Connection, config: Optional[StorageConfig class ArtifactHelperABC(ABC): @classmethod def from_openeo_connection( - cls, conn: Connection, provider_cfg: ProviderCfg, *, config: Optional[StorageConfig] = None + cls, conn: Connection, provider_config: ProviderConfig, *, config: Optional[StorageConfig] = None ) -> ArtifactHelperABC: """ Create a new Artifact helper from the OpenEO connection. This is the starting point to upload artifacts. @@ -31,7 +31,7 @@ def from_openeo_connection( """ if config is None: config = cls._get_default_storage_config() - config.load_connection_provided_cfg(provider_cfg) + config.load_connection_provided_config(provider_config) return cls._from_openeo_connection(conn, config) @abstractmethod diff --git a/openeo/extra/artifacts/backend.py b/openeo/extra/artifacts/backend.py index d44754e6e..0c3763957 100644 --- a/openeo/extra/artifacts/backend.py +++ b/openeo/extra/artifacts/backend.py @@ -5,7 +5,7 @@ from openeo import Connection from openeo.extra.artifacts.exceptions import ( ArtifactsException, - InvalidProviderCfg, + InvalidProviderConfig, NoAdvertisedProviders, NoDefaultConfig, ) @@ -13,52 +13,52 @@ _capabilities_cache: Dict[str, Dict] = {} -class TProviderCfg(TypedDict): +class TProviderConfig(TypedDict): """The ID of the provider config not used in MVP""" id: str """The type of artifacts storage which defines how to interact with the storage""" type: str """The config for the artifacts storage this will depend on the type""" - cfg: Dict[str, Any] + config: Dict[str, Any] -class ProviderCfg: +class ProviderConfig: """ Configuration as provided by the OpenEO backend. - It also holds exceptions if no such configuration is retrievable. + It holds an exception if no such configuration is retrievable. """ - def __init__(self, id: str, type: str, cfg: dict, *, exc: Optional[Exception] = None): + def __init__(self, id: str, type: str, config: dict, *, exc: Optional[Exception] = None): self.id = id self.type = type - self.cfg: dict = cfg + self.config: dict = config self.exc: Exception = exc @classmethod - def from_typed_dict(cls, d: TProviderCfg) -> ProviderCfg: + def from_typed_dict(cls, d: TProviderConfig) -> ProviderConfig: try: return cls( id=d["id"], type=d["type"], - cfg=d["cfg"], + config=d["config"], ) except KeyError as ke: - raise InvalidProviderCfg("Provider config needs id, type and cfg fields.") from ke + raise InvalidProviderConfig("Provider config needs id, type and config fields.") from ke @classmethod - def from_exception(cls, exc: Exception) -> ProviderCfg: - return cls(id="undefined", type="undefined", cfg={}, exc=exc) + def from_exception(cls, exc: Exception) -> ProviderConfig: + return cls(id="undefined", type="undefined", config={}, exc=exc) def raise_if_needed(self, operation: str): """Check if operation can be done if not raise an exception""" if self.exc is not None: - raise RuntimeError(f"Trying to {operation} for provider config which was not available") from self.exc + raise RuntimeError(f"Trying to {operation} for backend config which was not available") from self.exc def get_key(self, key: str) -> Any: self.raise_if_needed(f"get key {key}") try: - return self.cfg[key] + return self.config[key] except KeyError as ke: raise NoDefaultConfig(key) from ke @@ -71,8 +71,8 @@ def __getitem__(self, key): -class ProvidersCfg(TypedDict): - providers: List[TProviderCfg] +class ProvidersConfig(TypedDict): + providers: List[TProviderConfig] class TArtifactsCapabilty(TypedDict): @@ -83,7 +83,7 @@ class ArtifactCapabilities: def __init__(self, conn: Connection): self.conn = conn - def _get_artifacts_capabilities(self) -> ProvidersCfg: + def _get_artifacts_capabilities(self) -> ProvidersConfig: """ Get the artifacts capabilities corresponding to the OpenEO connection """ @@ -95,14 +95,14 @@ def _get_artifacts_capabilities(self) -> ProvidersCfg: raise NoAdvertisedProviders() return _capabilities_cache[url] - def _get_artifacts_providers(self) -> List[TProviderCfg]: + def _get_artifacts_providers(self) -> List[TProviderConfig]: try: return self._get_artifacts_capabilities()["providers"] except KeyError as e: raise NoAdvertisedProviders() from e - def get_preferred_artifacts_provider(self) -> ProviderCfg: + def get_preferred_artifacts_provider(self) -> ProviderConfig: try: - return ProviderCfg.from_typed_dict(self._get_artifacts_providers()[0]) + return ProviderConfig.from_typed_dict(self._get_artifacts_providers()[0]) except (IndexError, ArtifactsException) as e: - return ProviderCfg("id", "type", {}, exc=e) + return ProviderConfig("id", "type", {}, exc=e) diff --git a/openeo/extra/artifacts/config.py b/openeo/extra/artifacts/config.py index 742d96540..76e938b14 100644 --- a/openeo/extra/artifacts/config.py +++ b/openeo/extra/artifacts/config.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from typing import Any -from openeo.extra.artifacts.backend import ProviderCfg +from openeo.extra.artifacts.backend import ProviderConfig _METADATA_LOADED = "_sc_metadata_loaded" @@ -14,7 +14,7 @@ class StorageConfig(ABC): It greatly depends on the type of storage so the enforced API is limited to load metadata using the connection. """ @abstractmethod - def _load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: + def _load_connection_provided_config(self, provider_config: ProviderConfig) -> None: """ Implementations implement their logic of adapting config based on metadata from the current OpenEO connection. @@ -31,12 +31,12 @@ def get_type(cls) -> str: """Return the storage config type""" return cls.get_type_from(cls) - def load_connection_provided_cfg(self, provider_cfg: ProviderCfg) -> None: + def load_connection_provided_config(self, provider_config: ProviderConfig) -> None: """ This is the method that is actually used to load metadata. Metadata is only loaded once. """ if not self.is_openeo_connection_metadata_loaded(): - self._load_connection_provided_cfg(provider_cfg) + self._load_connection_provided_config(provider_config) object.__setattr__(self, _METADATA_LOADED, True) def is_openeo_connection_metadata_loaded(self) -> bool: diff --git a/openeo/extra/artifacts/exceptions.py b/openeo/extra/artifacts/exceptions.py index 197f7f420..d6f99900c 100644 --- a/openeo/extra/artifacts/exceptions.py +++ b/openeo/extra/artifacts/exceptions.py @@ -35,7 +35,7 @@ def __str__(self): return f"There was no default config provided by backend for {self.key}" -class InvalidProviderCfg(ArtifactsException): +class InvalidProviderConfig(ArtifactsException): """The backend has an invalid provider config. This must be fixed by the provider of the backend.""" From 855c36222cefdea5f2f99fccb1a604b9ae03da57 Mon Sep 17 00:00:00 2001 From: Peter Van Bouwel Date: Thu, 22 May 2025 15:10:31 +0200 Subject: [PATCH 20/20] tests: fix unittests --- openeo/extra/artifacts/backend.py | 9 +++++++-- tests/extra/artifacts/internal_s3/test_s3sts.py | 4 +++- tests/extra/artifacts/test_artifact_helper.py | 6 +++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/openeo/extra/artifacts/backend.py b/openeo/extra/artifacts/backend.py index 0c3763957..d82d029a8 100644 --- a/openeo/extra/artifacts/backend.py +++ b/openeo/extra/artifacts/backend.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import Any, Dict, List, Optional, TypedDict from openeo import Connection @@ -11,6 +12,7 @@ ) _capabilities_cache: Dict[str, Dict] = {} +_log = logging.getLogger(__name__) class TProviderConfig(TypedDict): @@ -53,7 +55,8 @@ def from_exception(cls, exc: Exception) -> ProviderConfig: def raise_if_needed(self, operation: str): """Check if operation can be done if not raise an exception""" if self.exc is not None: - raise RuntimeError(f"Trying to {operation} for backend config which was not available") from self.exc + _log.error(f"Trying to {operation} for backend config which was not available") + raise self.exc def get_key(self, key: str) -> Any: self.raise_if_needed(f"get key {key}") @@ -104,5 +107,7 @@ def _get_artifacts_providers(self) -> List[TProviderConfig]: def get_preferred_artifacts_provider(self) -> ProviderConfig: try: return ProviderConfig.from_typed_dict(self._get_artifacts_providers()[0]) - except (IndexError, ArtifactsException) as e: + except IndexError: + return ProviderConfig("id", "type", {}, exc=NoAdvertisedProviders()) + except ArtifactsException as e: return ProviderConfig("id", "type", {}, exc=e) diff --git a/tests/extra/artifacts/internal_s3/test_s3sts.py b/tests/extra/artifacts/internal_s3/test_s3sts.py index 87c746b23..0360c6399 100644 --- a/tests/extra/artifacts/internal_s3/test_s3sts.py +++ b/tests/extra/artifacts/internal_s3/test_s3sts.py @@ -64,7 +64,9 @@ def mocked_sts(monkeypatch): @pytest.fixture def conn_with_stss3_capabilities(requests_mock, extra_api_capabilities) -> Iterator[Connection]: - extra_api_capabilities = {"artifacts": {"providers": [{"cfg": test_p_config, "id": "s3", "type": "S3STSConfig"}]}} + extra_api_capabilities = { + "artifacts": {"providers": [{"config": test_p_config, "id": "s3", "type": "S3STSConfig"}]} + } requests_mock.get(API_URL, json={"api_version": "1.0.0", **extra_api_capabilities}) conn = Connection(API_URL) conn.auth = BearerAuth("oidc/fake/token") diff --git a/tests/extra/artifacts/test_artifact_helper.py b/tests/extra/artifacts/test_artifact_helper.py index 2858903ec..020f0fad7 100644 --- a/tests/extra/artifacts/test_artifact_helper.py +++ b/tests/extra/artifacts/test_artifact_helper.py @@ -29,7 +29,7 @@ "artifacts": { "providers": [ { - "cfg": { + "config": { "bucket": "openeo-artifacts", "role": "arn:aws:iam::000000000000:role/S3Access", "s3_endpoint": "https://s3.oeo.test", @@ -50,7 +50,7 @@ "artifacts": { "providers": [ { - "cfg": { + "config": { "bucket": "openeo-artifacts", "role": "arn:aws:iam::000000000000:role/S3Access", "s3_endpoint": "https://s3.oeo.test", @@ -81,7 +81,7 @@ "artifacts": { "providers": [ { - "cfg": { + "config": { "bucket": "openeo-artifacts", "role": "arn:aws:iam::000000000000:role/S3Access", "s3_endpoint": "https://s3.oeo.test",