Skip to content

Commit 0758c7a

Browse files
launch v2 bundles API (#91)
* v2 bundles api copy * Formated with black * isort and fix 2 mypy errors launch/connection.py:14: error: Incompatible default for argument "endpoint" (default has type "None", argument has type "str") [assignment] launch/client.py:1811: error: Incompatible default for argument "urls" (default has type "None", argument has type "List[str]") [assignment] * Fix package import * Added readiness_initial_delay_seconds * updating docs * Added create TritonEnhancedRunnableImageFlavor bundle * fix * fix * fix * rm space * update docs * fix index docs to new apis --------- Co-authored-by: Phil Chen <[email protected]>
1 parent af9a108 commit 0758c7a

File tree

99 files changed

+1534
-415
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+1534
-415
lines changed

docs/concepts/model_bundles.md

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,22 @@ are created by packaging a model up into a deployable format.
55

66
## Creating Model Bundles
77

8-
There are three methods for creating model bundles:
8+
There are four methods for creating model bundles:
99
[`create_model_bundle_from_callable_v2`](/api/client/#launch.client.LaunchClient.create_model_bundle_from_callable_v2),
1010
[`create_model_bundle_from_dirs_v2`](/api/client/#launch.client.LaunchClient.create_model_bundle_from_dirs_v2),
11+
[`create_model_bundle_from_runnable_image_v2`](/api/client/#launch.client.LaunchClient.create_model_bundle_from_runnable_image_v2),
1112
and
12-
[`create_model_bundle_from_runnable_image_v2`](/api/client/#launch.client.LaunchClient.create_model_bundle_from_runnable_image_v2).
13+
[`create_model_bundle_from_triton_enhanced_runnable_image_v2`](/api/client/#launch.client.LaunchClient.create_model_bundle_from_triton_enhanced_runnable_image_v2).
14+
1315
The first directly pickles a user-specified `load_predict_fn`, a function which
1416
loads the model and returns a `predict_fn`, a function which takes in a request.
1517
The second takes in directories containing a `load_predict_fn` and the
1618
module path to the `load_predict_fn`.
1719
The third takes a Docker image and a command that starts a process listening for
1820
requests at port 5005 using HTTP and exposes `POST /predict` and
19-
`GET /healthz` endpoints.
21+
`GET /readyz` endpoints.
22+
The fourth is a variant of the third that also starts an instance of the NVidia
23+
Triton framework for efficient model serving.
2024

2125
Each of these modes of creating a model bundle is called a "Flavor".
2226

@@ -43,6 +47,12 @@ Each of these modes of creating a model bundle is called a "Flavor".
4347
* You are comfortable with building a web server and Docker image to serve your model.
4448

4549

50+
A `TritonEnhancedRunnableImageFlavor` (a runnable image variant) is good if:
51+
52+
* You want to use a `RunnableImageFlavor`
53+
* You also want to use [NVidia's `tritonserver`](https://developer.nvidia.com/nvidia-triton-inference-server) to accelerate model inference
54+
55+
4656
=== "Creating From Callables"
4757
```py
4858
import os
@@ -178,36 +188,66 @@ Each of these modes of creating a model bundle is called a "Flavor".
178188

179189
BUNDLE_PARAMS = {
180190
"model_bundle_name": "test-bundle",
181-
"load_model_fn": my_load_model_fn,
182-
"load_predict_fn": my_load_predict_fn,
183191
"request_schema": MyRequestSchema,
184192
"response_schema": MyResponseSchema,
185-
"repository": "launch_rearch",
186-
"tag": "12b9131c5a1489c76592cddd186962cce965f0f6-cpu",
193+
"repository": "...",
194+
"tag": "...",
187195
"command": [
188-
"dumb-init",
189-
"--",
190-
"ddtrace-run",
191-
"run-service",
192-
"--config",
193-
"/install/launch_rearch/config/service--user_defined_code.yaml",
194-
"--concurrency",
195-
"1",
196-
"--http",
197-
"production",
198-
"--port",
199-
"5005",
196+
...
200197
],
201198
"env": {
202199
"TEST_KEY": "test_value",
203200
},
201+
"readiness_initial_delay_seconds": 30,
204202
}
205203

206204
client = LaunchClient(api_key=os.getenv("LAUNCH_API_KEY"))
207205
client.create_model_bundle_from_runnable_image_v2(**BUNDLE_PARAMS)
208206
```
209207

210208

209+
=== "Creating From a Triton Enhanced Runnable Image"
210+
```py
211+
import os
212+
from pydantic import BaseModel
213+
from launch import LaunchClient
214+
215+
216+
class MyRequestSchema(BaseModel):
217+
x: int
218+
y: str
219+
220+
class MyResponseSchema(BaseModel):
221+
__root__: int
222+
223+
224+
BUNDLE_PARAMS = {
225+
"model_bundle_name": "test-bundle",
226+
"request_schema": MyRequestSchema,
227+
"response_schema": MyResponseSchema,
228+
"repository": "...",
229+
"tag": "...",
230+
"command": [
231+
...
232+
],
233+
"env": {
234+
"TEST_KEY": "test_value",
235+
},
236+
"readiness_initial_delay_seconds": 30,
237+
"triton_model_repository": "...",
238+
"triton_model_replicas": {"": ""},
239+
"triton_num_cpu": 4.0,
240+
"triton_commit_tag": "",
241+
"triton_storage": "",
242+
"triton_memory": "",
243+
"triton_readiness_initial_delay_seconds": 300,
244+
}
245+
246+
client = LaunchClient(api_key=os.getenv("LAUNCH_API_KEY"))
247+
client.create_model_bundle_from_triton_enhanced_runnable_image_v2(**BUNDLE_PARAMS)
248+
```
249+
250+
211251
## Configuring Model Bundles
212252

213253
The `app_config` field of a model bundle is a dictionary that can be used to

docs/index.md

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,20 @@ def my_load_predict_fn(model):
3333
return returns_model_of_x_plus_len_of_y
3434

3535

36-
def my_model(x):
37-
return x * 2
38-
39-
ENV_PARAMS = {
40-
"framework_type": "pytorch",
41-
"pytorch_image_tag": "1.7.1-cuda11.0-cudnn8-runtime",
42-
}
36+
def my_load_model_fn():
37+
def my_model(x):
38+
return x * 2
39+
40+
return my_model
4341

4442
BUNDLE_PARAMS = {
4543
"model_bundle_name": "test-bundle",
46-
"model": my_model,
4744
"load_predict_fn": my_load_predict_fn,
48-
"env_params": ENV_PARAMS,
49-
"requirements": ["pytest==7.2.1", "numpy"], # list your requirements here
45+
"load_model_fn": my_load_model_fn,
5046
"request_schema": MyRequestSchema,
5147
"response_schema": MyResponseSchema,
48+
"requirements": ["pytest==7.2.1", "numpy"], # list your requirements here
49+
"pytorch_image_tag": "1.7.1-cuda11.0-cudnn8-runtime",
5250
}
5351

5452
ENDPOINT_PARAMS = {
@@ -81,7 +79,7 @@ def predict_on_endpoint(request: MyRequestSchema) -> MyResponseSchema:
8179

8280
client = LaunchClient(api_key=os.getenv("LAUNCH_API_KEY"))
8381

84-
client.create_model_bundle(**BUNDLE_PARAMS)
82+
client.create_model_bundle_from_callable_v2(**BUNDLE_PARAMS)
8583
endpoint = client.create_model_endpoint(**ENDPOINT_PARAMS)
8684

8785
request = MyRequestSchema(x=5, y="hello")

launch/api_client/api_client.py

Lines changed: 7 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -416,9 +416,7 @@ def __init__(
416416
self.content = content
417417

418418
def _serialize_json(
419-
self,
420-
in_data: typing.Union[None, int, float, str, bool, dict, list],
421-
eliminate_whitespace: bool = False,
419+
self, in_data: typing.Union[None, int, float, str, bool, dict, list], eliminate_whitespace: bool = False
422420
) -> str:
423421
if eliminate_whitespace:
424422
return json.dumps(in_data, separators=self._json_encoder.compact_separators)
@@ -483,19 +481,7 @@ def __serialize_simple(
483481
def serialize(
484482
self,
485483
in_data: typing.Union[
486-
Schema,
487-
Decimal,
488-
int,
489-
float,
490-
str,
491-
date,
492-
datetime,
493-
None,
494-
bool,
495-
list,
496-
tuple,
497-
dict,
498-
frozendict.frozendict,
484+
Schema, Decimal, int, float, str, date, datetime, None, bool, list, tuple, dict, frozendict.frozendict
499485
],
500486
) -> typing.Dict[str, str]:
501487
if self.schema:
@@ -611,19 +597,7 @@ def get_prefix_separator_iterator(self) -> typing.Optional[PrefixSeparatorIterat
611597
def serialize(
612598
self,
613599
in_data: typing.Union[
614-
Schema,
615-
Decimal,
616-
int,
617-
float,
618-
str,
619-
date,
620-
datetime,
621-
None,
622-
bool,
623-
list,
624-
tuple,
625-
dict,
626-
frozendict.frozendict,
600+
Schema, Decimal, int, float, str, date, datetime, None, bool, list, tuple, dict, frozendict.frozendict
627601
],
628602
prefix_separator_iterator: typing.Optional[PrefixSeparatorIterator] = None,
629603
) -> typing.Dict[str, str]:
@@ -691,19 +665,7 @@ def __init__(
691665
def serialize(
692666
self,
693667
in_data: typing.Union[
694-
Schema,
695-
Decimal,
696-
int,
697-
float,
698-
str,
699-
date,
700-
datetime,
701-
None,
702-
bool,
703-
list,
704-
tuple,
705-
dict,
706-
frozendict.frozendict,
668+
Schema, Decimal, int, float, str, date, datetime, None, bool, list, tuple, dict, frozendict.frozendict
707669
],
708670
) -> typing.Dict[str, str]:
709671
if self.schema:
@@ -770,19 +732,7 @@ def __to_headers(in_data: typing.Tuple[typing.Tuple[str, str], ...]) -> HTTPHead
770732
def serialize(
771733
self,
772734
in_data: typing.Union[
773-
Schema,
774-
Decimal,
775-
int,
776-
float,
777-
str,
778-
date,
779-
datetime,
780-
None,
781-
bool,
782-
list,
783-
tuple,
784-
dict,
785-
frozendict.frozendict,
735+
Schema, Decimal, int, float, str, date, datetime, None, bool, list, tuple, dict, frozendict.frozendict
786736
],
787737
) -> HTTPHeaderDict:
788738
if self.schema:
@@ -940,9 +890,7 @@ def __deserialize_application_octet_stream(
940890
return response.data
941891

942892
@staticmethod
943-
def __deserialize_multipart_form_data(
944-
response: urllib3.HTTPResponse,
945-
) -> typing.Dict[str, typing.Any]:
893+
def __deserialize_multipart_form_data(response: urllib3.HTTPResponse) -> typing.Dict[str, typing.Any]:
946894
msg = email.message_from_bytes(response.data)
947895
return {
948896
part.get_param("name", header="Content-Disposition"): part.get_payload(decode=True).decode(
@@ -1295,9 +1243,7 @@ def _verify_typed_dict_inputs_oapg(
12951243
if required_keys_with_unset_values:
12961244
raise ApiValueError(
12971245
"{} contains invalid unset values for {} required keys: {}".format(
1298-
cls.__name__,
1299-
len(required_keys_with_unset_values),
1300-
required_keys_with_unset_values,
1246+
cls.__name__, len(required_keys_with_unset_values), required_keys_with_unset_values
13011247
)
13021248
)
13031249

launch/api_client/model/batch_job_serialization_format.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ from datetime import date, datetime # noqa: F401
1919

2020
import frozendict # noqa: F401
2121
import typing_extensions # noqa: F401
22-
from launch_client import schemas # noqa: F401
22+
23+
from launch.api_client import schemas # noqa: F401
2324

2425
class BatchJobSerializationFormat(schemas.EnumBase, schemas.StrSchema):
2526
"""NOTE: This class is auto generated by OpenAPI Generator.

launch/api_client/model/batch_job_status.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ from datetime import date, datetime # noqa: F401
1919

2020
import frozendict # noqa: F401
2121
import typing_extensions # noqa: F401
22-
from launch_client import schemas # noqa: F401
22+
23+
from launch.api_client import schemas # noqa: F401
2324

2425
class BatchJobStatus(schemas.EnumBase, schemas.StrSchema):
2526
"""NOTE: This class is auto generated by OpenAPI Generator.

launch/api_client/model/callback_auth.pyi

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ from datetime import date, datetime # noqa: F401
1919

2020
import frozendict # noqa: F401
2121
import typing_extensions # noqa: F401
22-
from launch_client import schemas # noqa: F401
22+
23+
from launch.api_client import schemas # noqa: F401
2324

2425
class CallbackAuth(
2526
schemas.ComposedSchema,
@@ -100,5 +101,5 @@ class CallbackAuth(
100101
**kwargs,
101102
)
102103

103-
from launch_client.model.callback_basic_auth import CallbackBasicAuth
104-
from launch_client.model.callbackm_tls_auth import CallbackmTLSAuth
104+
from launch.api_client.model.callback_basic_auth import CallbackBasicAuth
105+
from launch.api_client.model.callbackm_tls_auth import CallbackmTLSAuth

launch/api_client/model/callback_basic_auth.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ from datetime import date, datetime # noqa: F401
1919

2020
import frozendict # noqa: F401
2121
import typing_extensions # noqa: F401
22-
from launch_client import schemas # noqa: F401
22+
23+
from launch.api_client import schemas # noqa: F401
2324

2425
class CallbackBasicAuth(schemas.DictSchema):
2526
"""NOTE: This class is auto generated by OpenAPI Generator.

launch/api_client/model/callbackm_tls_auth.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ from datetime import date, datetime # noqa: F401
1919

2020
import frozendict # noqa: F401
2121
import typing_extensions # noqa: F401
22-
from launch_client import schemas # noqa: F401
22+
23+
from launch.api_client import schemas # noqa: F401
2324

2425
class CallbackmTLSAuth(schemas.DictSchema):
2526
"""NOTE: This class is auto generated by OpenAPI Generator.

launch/api_client/model/clone_model_bundle_v1_request.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ from datetime import date, datetime # noqa: F401
1919

2020
import frozendict # noqa: F401
2121
import typing_extensions # noqa: F401
22-
from launch_client import schemas # noqa: F401
22+
23+
from launch.api_client import schemas # noqa: F401
2324

2425
class CloneModelBundleV1Request(schemas.DictSchema):
2526
"""NOTE: This class is auto generated by OpenAPI Generator.

launch/api_client/model/clone_model_bundle_v2_request.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ from datetime import date, datetime # noqa: F401
1919

2020
import frozendict # noqa: F401
2121
import typing_extensions # noqa: F401
22-
from launch_client import schemas # noqa: F401
22+
23+
from launch.api_client import schemas # noqa: F401
2324

2425
class CloneModelBundleV2Request(schemas.DictSchema):
2526
"""NOTE: This class is auto generated by OpenAPI Generator.

0 commit comments

Comments
 (0)