Skip to content

Commit 096780a

Browse files
authored
External constants support (#163)
1 parent 80e091e commit 096780a

File tree

11 files changed

+93
-20
lines changed

11 files changed

+93
-20
lines changed

docs/applications/advanced/external-models.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ $ aws s3 cp model.zip s3://your-bucket/model.zip
2323
- kind: api
2424
name: my-api
2525
external_model:
26-
path: s3://your-bucket/model.zip
26+
path: s3://my-bucket/model.zip
2727
region: us-west-2
2828
compute:
2929
replicas: 5

docs/applications/resources/apis.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ Serve models at scale and use them to build smarter applications.
99
name: <string> # API name (required)
1010
model: <string> # reference to a Cortex model (this or external_model must be specified)
1111
external_model: # (this or model must be specified)
12-
path: <string> # path to a zipped model dir
13-
region: <string> # S3 region (default: us-west-2)
12+
path: <string> # path to a zipped model dir (e.g. s3://my-bucket/model.zip)
13+
region: <string> # S3 region (default: us-west-2)
1414
compute:
1515
replicas: <int> # number of replicas to launch (default: 1)
1616
cpu: <string> # CPU request (default: Null)

docs/applications/resources/constants.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ Constants represent literal values which can be used in other Cortex resources.
88
- kind: constant
99
name: <string> # constant name (required)
1010
type: <output_type> # the type of the constant (optional, will be inferred from value if not specified)
11-
value: <output_value> # a literal value (required)
11+
value: <output_value> # a literal value (this or external_model must be specified)
12+
external_model: # (this or value must be specified)
13+
path: <string> # path to a JSON object (e.g. s3://my-bucket/constant.json)
14+
region: <string> # S3 region (default: us-west-2)
15+
1216
```
1317

1418
See [Data Types](data-types.md) for details about output types and values.

docs/applications/resources/environments.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ Transfer data at scale from data warehouses like S3 into the Cortex environment.
2626
data:
2727
type: csv # file type (required)
2828
path: s3a://<bucket_name>/<file_name> # S3 is currently supported (required)
29-
region: us-west-2 # S3 region (default: us-west-2)
29+
region: us-west-2 # S3 region (default: us-west-2)
3030
drop_null: <bool> # drop any rows that contain at least 1 null value (default: false)
3131
csv_config: <csv_config> # optional configuration that can be provided
3232
schema:
@@ -65,7 +65,7 @@ csv_config:
6565
data:
6666
type: parquet # file type (required)
6767
path: s3a://<bucket_name>/<file_name> # S3 is currently supported (required)
68-
region: us-west-2 # S3 region (default: us-west-2)
68+
region: us-west-2 # S3 region (default: us-west-2)
6969
drop_null: <bool> # drop any rows that contain at least 1 null value (default: false)
7070
schema:
7171
- parquet_column_name: <string> # name of the column in the parquet file (required)

pkg/operator/api/userconfig/apis.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ func (api *API) Validate() error {
119119
}
120120

121121
if ok, err := aws.IsS3FileExternal(bucket, key, api.ExternalModel.Region); err != nil || !ok {
122-
return errors.Wrap(ErrorExternalModelNotFound(api.ExternalModel.Path), Identify(api), ExternalModelKey, PathKey)
122+
return errors.Wrap(ErrorExternalNotFound(api.ExternalModel.Path), Identify(api), ExternalModelKey, PathKey)
123123
}
124124
}
125125

pkg/operator/api/userconfig/config_key.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const (
2727
PathKey = "path"
2828
OutputTypeKey = "output_type"
2929
TagsKey = "tags"
30+
ExternalKey = "external"
3031

3132
// input schema options
3233
OptionalOptKey = "_optional"

pkg/operator/api/userconfig/constants.go

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package userconfig
1818

1919
import (
20+
"github.com/cortexlabs/cortex/pkg/lib/aws"
2021
cr "github.com/cortexlabs/cortex/pkg/lib/configreader"
2122
"github.com/cortexlabs/cortex/pkg/lib/errors"
2223
"github.com/cortexlabs/cortex/pkg/operator/api/resource"
@@ -26,9 +27,10 @@ type Constants []*Constant
2627

2728
type Constant struct {
2829
ResourceFields
29-
Type OutputSchema `json:"type" yaml:"type"`
30-
Value interface{} `json:"value" yaml:"value"`
31-
Tags Tags `json:"tags" yaml:"tags"`
30+
Type OutputSchema `json:"type" yaml:"type"`
31+
Value interface{} `json:"value" yaml:"value"`
32+
Tags Tags `json:"tags" yaml:"tags"`
33+
External *ExternalConstant `json:"external" yaml:"external"`
3234
}
3335

3436
var constantValidation = &cr.StructValidation{
@@ -50,14 +52,43 @@ var constantValidation = &cr.StructValidation{
5052
{
5153
StructField: "Value",
5254
InterfaceValidation: &cr.InterfaceValidation{
53-
Required: true,
55+
Required: false,
5456
},
5557
},
58+
{
59+
StructField: "External",
60+
StructValidation: externalModelFieldValidation,
61+
},
5662
tagsFieldValidation,
5763
typeFieldValidation,
5864
},
5965
}
6066

67+
type ExternalConstant struct {
68+
Path string `json:"path" yaml:"path"`
69+
Region string `json:"region" yaml:"region"`
70+
}
71+
72+
var externalConstantFieldValidation = &cr.StructValidation{
73+
DefaultNil: true,
74+
StructFieldValidations: []*cr.StructFieldValidation{
75+
{
76+
StructField: "Path",
77+
StringValidation: &cr.StringValidation{
78+
Validator: cr.GetS3PathValidator(),
79+
Required: true,
80+
},
81+
},
82+
{
83+
StructField: "Region",
84+
StringValidation: &cr.StringValidation{
85+
Default: aws.DefaultS3Region,
86+
AllowedValues: aws.S3Regions.Slice(),
87+
},
88+
},
89+
},
90+
}
91+
6192
func (constants Constants) Validate() error {
6293
for _, constant := range constants {
6394
if err := constant.Validate(); err != nil {
@@ -79,6 +110,25 @@ func (constants Constants) Validate() error {
79110
}
80111

81112
func (constant *Constant) Validate() error {
113+
if constant.External == nil && constant.Value == nil {
114+
return errors.Wrap(ErrorSpecifyOnlyOneMissing(ValueKey, ExternalKey), Identify(constant))
115+
}
116+
117+
if constant.External != nil && constant.Value != nil {
118+
return errors.Wrap(ErrorSpecifyOnlyOne(ValueKey, ExternalKey), Identify(constant))
119+
}
120+
121+
if constant.External != nil {
122+
bucket, key, err := aws.SplitS3Path(constant.External.Path)
123+
if err != nil {
124+
return errors.Wrap(err, Identify(constant), ExternalKey, PathKey)
125+
}
126+
127+
if ok, err := aws.IsS3FileExternal(bucket, key, constant.External.Region); err != nil || !ok {
128+
return errors.Wrap(ErrorExternalNotFound(constant.External.Path), Identify(constant), ExternalKey, PathKey)
129+
}
130+
}
131+
82132
if constant.Type != nil {
83133
castedValue, err := CastOutputValue(constant.Value, constant.Type)
84134
if err != nil {

pkg/operator/api/userconfig/errors.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ const (
7575
ErrEnvSchemaMismatch
7676
ErrExtraResourcesWithExternalAPIs
7777
ErrImplDoesNotExist
78-
ErrExternalModelNotFound
78+
ErrExternalNotFound
7979
)
8080

8181
var errorKinds = []string{
@@ -125,10 +125,10 @@ var errorKinds = []string{
125125
"err_env_schema_mismatch",
126126
"err_extra_resources_with_external_a_p_is",
127127
"err_impl_does_not_exist",
128-
"err_external_model_not_found",
128+
"err_external_not_found",
129129
}
130130

131-
var _ = [1]int{}[int(ErrExternalModelNotFound)-(len(errorKinds)-1)] // Ensure list length matches
131+
var _ = [1]int{}[int(ErrExternalNotFound)-(len(errorKinds)-1)] // Ensure list length matches
132132

133133
func (t ErrorKind) String() string {
134134
return errorKinds[t]
@@ -578,9 +578,9 @@ func ErrorImplDoesNotExist(path string) error {
578578
}
579579
}
580580

581-
func ErrorExternalModelNotFound(path string) error {
581+
func ErrorExternalNotFound(path string) error {
582582
return Error{
583-
Kind: ErrExternalModelNotFound,
583+
Kind: ErrExternalNotFound,
584584
message: fmt.Sprintf("%s: file not found or inaccessible", path),
585585
}
586586
}

pkg/workloads/lib/context.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,12 @@ def populate_values(self, input, input_schema, preserve_column_refs):
441441
if util.is_resource_ref(input):
442442
res_name = util.get_resource_ref(input)
443443
if res_name in self.constants:
444-
const_val = self.constants[res_name]["value"]
444+
if self.constants[res_name]["value"]:
445+
const_val = self.constants[res_name]["value"]
446+
elif self.constants[res_name]["external"]:
447+
const_val = self.storage.get_json_external(
448+
self.constants[res_name]["external"]["path"]
449+
)
445450
try:
446451
return self.populate_values(const_val, input_schema, preserve_column_refs)
447452
except CortexException as e:

pkg/workloads/lib/storage/s3.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,12 @@ def _get_matching_s3_keys_generator(self, prefix="", suffix=""):
114114
def _upload_string_to_s3(self, string, key):
115115
self.s3.put_object(Bucket=self.bucket, Key=key, Body=string)
116116

117-
def _read_bytes_from_s3(self, key, allow_missing=False):
117+
def _read_bytes_from_s3(self, key, allow_missing=False, ext_bucket=None):
118118
try:
119-
byte_array = self.s3.get_object(Bucket=self.bucket, Key=key)["Body"].read()
119+
bucket = self.bucket
120+
if ext_bucket is not None:
121+
bucket = ext_bucket
122+
byte_array = self.s3.get_object(Bucket=bucket, Key=key)["Body"].read()
120123
except self.s3.exceptions.NoSuchKey as e:
121124
if allow_missing:
122125
return None
@@ -190,3 +193,13 @@ def download_file_external(self, s3_path, local_path):
190193
return local_path
191194
except Exception as e:
192195
raise CortexException("bucket " + bucket, "key " + key) from e
196+
197+
def get_json_external(self, s3_path):
198+
try:
199+
bucket, key = self.deconstruct_s3_path(s3_path)
200+
obj = self._read_bytes_from_s3(key, ext_bucket=bucket)
201+
if obj is None:
202+
return None
203+
return json.loads(obj.decode("utf-8"))
204+
except Exception as e:
205+
raise CortexException("bucket " + bucket, "key " + key) from e

pkg/workloads/spark_job/test/unit/spark_util_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ def test_read_parquet_infer_invalid(spark, write_parquet_file, ctx_obj, get_cont
669669
},
670670
},
671671
{
672-
"data": [("1", 0.1, "yolo"), ("1", 1.0, "yolo"), ("1", 1.1, "yolo")],
672+
"data": [("1", 0.1, "a"), ("1", 1.0, "a"), ("1", 1.1, "a")],
673673
"schema": StructType(
674674
[
675675
StructField("a_str", StringType()),

0 commit comments

Comments
 (0)