Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f4503b0
Round 1
vishalbollu Aug 8, 2019
a0d82be
Update #2
vishalbollu Aug 8, 2019
2e42615
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 12, 2019
d604414
Refactor common code
vishalbollu Aug 13, 2019
a0a8598
Cleanup metrics.go
vishalbollu Aug 13, 2019
4257d7d
Remove unnecessary cloudwatch files
vishalbollu Aug 13, 2019
22d6a47
Refactor code and fix bug in classes
vishalbollu Aug 14, 2019
46c5bf4
Cleanup examples
vishalbollu Aug 14, 2019
1525630
More cleanup
vishalbollu Aug 14, 2019
ca80585
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 14, 2019
d65225e
Remove logger.info lines
vishalbollu Aug 14, 2019
8ed9d49
Reset cortex.yaml
vishalbollu Aug 14, 2019
9119ade
Reset cortex.yaml again
vishalbollu Aug 14, 2019
bf74184
Linting
vishalbollu Aug 14, 2019
9b24a4f
Respond to PR comments
vishalbollu Aug 15, 2019
cb05031
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 15, 2019
65f75ec
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 18, 2019
862b5dc
Add realtime
vishalbollu Aug 18, 2019
6ceca20
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 18, 2019
3cca1ff
Cleanup round 1
vishalbollu Aug 19, 2019
7d72882
Add realtime metrics querying
vishalbollu Aug 19, 2019
268e290
Cleanup 2
vishalbollu Aug 19, 2019
0de825c
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 19, 2019
02b2760
Fix linting
vishalbollu Aug 19, 2019
5a8b1ce
Address PR comments
vishalbollu Aug 20, 2019
7e8d424
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 20, 2019
b4c701c
Fix linting
vishalbollu Aug 20, 2019
e6baa02
Add custom error
vishalbollu Aug 20, 2019
1bfadb1
Fix typo in errors.go
vishalbollu Aug 20, 2019
147f155
Adjust error messages
vishalbollu Aug 20, 2019
8243dab
Updated error message when key is missing
vishalbollu Aug 20, 2019
0a2efa9
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 20, 2019
fe3abf4
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 20, 2019
64d4f0e
Merge branch 'master' into cloudwatch-metrics
vishalbollu Aug 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,6 @@ $ ./cortex.sh install cli

- **Log streaming:** Cortex streams logs from your deployed models to your CLI.

- **Prediction Monitoring:** Cortex can monitor network metrics and track predictions.

- **CPU / GPU support:** Cortex can run inference on CPU or GPU infrastructure.
162 changes: 156 additions & 6 deletions cli/cmd/get.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/cortexlabs/yaml"
"github.com/spf13/cobra"

"github.com/cortexlabs/cortex/pkg/consts"
"github.com/cortexlabs/cortex/pkg/lib/console"
"github.com/cortexlabs/cortex/pkg/lib/errors"
"github.com/cortexlabs/cortex/pkg/lib/json"
Expand Down Expand Up @@ -126,7 +127,7 @@ func allDeploymentsStr() (string, error) {
}

if len(resourcesRes.Deployments) == 0 {
return "no deployments found", nil
return console.Bold("\nno deployments found"), nil
}

rows := make([][]interface{}, len(resourcesRes.Deployments))
Expand Down Expand Up @@ -464,15 +465,25 @@ func describeAPI(name string, resourcesRes *schema.GetResourcesResponse, flagVer
apiEndpoint := urls.Join(resourcesRes.APIsBaseURL, anyAPIStatus.Path)

out := "\n" + console.Bold("url: ") + apiEndpoint + "\n"
out += fmt.Sprintf("%s curl -X POST -H \"Content-Type: application/json\" %s -d @samples.json\n\n", console.Bold("curl:"), apiEndpoint)
out += fmt.Sprintf(console.Bold("updated at:")+" %s\n", libtime.LocalTimestamp(updatedAt))
out += fmt.Sprintf("%s curl -X POST -H \"Content-Type: application/json\" %s -d @samples.json\n", console.Bold("curl:"), apiEndpoint)
out += fmt.Sprintf(console.Bold("updated at:")+" %s\n\n", libtime.LocalTimestamp(updatedAt))

out += "\n"
t := table.Table{
statusTable := table.Table{
Headers: headers,
Rows: [][]interface{}{row},
}
out += table.MustFormat(t)

var predictionMetrics string
apiMetrics, err := getAPIMetrics(ctx.App.Name, api.Name)
if err != nil || apiMetrics == nil {
predictionMetrics = "\n\nmetrics are not available yet"
} else {
statusTable = appendNetworkMetrics(statusTable, apiMetrics)
predictionMetrics = "\n\n" + predictionMetricsTable(apiMetrics, api)
}

out += table.MustFormat(statusTable)
out += predictionMetrics

if !flagVerbose {
return out, nil
Expand Down Expand Up @@ -507,6 +518,145 @@ func describeAPI(name string, resourcesRes *schema.GetResourcesResponse, flagVer
return out, nil
}

func getAPIMetrics(appName, apiName string) (*schema.APIMetrics, error) {
params := map[string]string{"appName": appName, "apiName": apiName}
httpResponse, err := HTTPGet("/metrics", params)
if err != nil {
return nil, err
}

var apiMetrics schema.APIMetrics
err = json.Unmarshal(httpResponse, &apiMetrics)
if err != nil {
return nil, err
}

return &apiMetrics, nil
}

func appendNetworkMetrics(apiTable table.Table, apiMetrics *schema.APIMetrics) table.Table {
latency := "-"
if apiMetrics.NetworkStats.Latency != nil {
latency = fmt.Sprintf("%.9g", *apiMetrics.NetworkStats.Latency)
}

headers := []table.Header{
{Title: "avg latency"},
{Title: "2XX", Hidden: apiMetrics.NetworkStats.Code2XX == 0},
{Title: "4XX", Hidden: apiMetrics.NetworkStats.Code4XX == 0},
{Title: "5XX", Hidden: apiMetrics.NetworkStats.Code5XX == 0},
}

row := []interface{}{
latency,
apiMetrics.NetworkStats.Code2XX,
apiMetrics.NetworkStats.Code4XX,
apiMetrics.NetworkStats.Code5XX,
}

apiTable.Headers = append(apiTable.Headers, headers...)
apiTable.Rows[0] = append(apiTable.Rows[0], row...)

return apiTable
}

func predictionMetricsTable(apiMetrics *schema.APIMetrics, api *context.API) string {
if api.Tracker == nil {
return "a tracker has not been configured to record predictions"
}

if api.Tracker.ModelType == userconfig.ClassificationModelType {
return classificationMetricsTable(apiMetrics)
}
return regressionMetricsTable(apiMetrics)
}

func regressionMetricsTable(apiMetrics *schema.APIMetrics) string {
minStr := "-"
if apiMetrics.RegressionStats.Min != nil {
minStr = fmt.Sprintf("%.9g", *apiMetrics.RegressionStats.Min)
}

maxStr := "-"
if apiMetrics.RegressionStats.Max != nil {
maxStr = fmt.Sprintf("%.9g", *apiMetrics.RegressionStats.Max)
}

avgStr := "-"
if apiMetrics.RegressionStats.Avg != nil {
avgStr = fmt.Sprintf("%.9g", *apiMetrics.RegressionStats.Avg)
}

t := table.Table{
Headers: []table.Header{
{Title: "min", MaxWidth: 10},
{Title: "max", MaxWidth: 10},
{Title: "avg", MaxWidth: 10},
},
Rows: [][]interface{}{{minStr, maxStr, avgStr}},
}

return table.MustFormat(t)
}

func classificationMetricsTable(apiMetrics *schema.APIMetrics) string {
classList := make([]string, len(apiMetrics.ClassDistribution))

i := 0
for inputName := range apiMetrics.ClassDistribution {
classList[i] = inputName
i++
}
sort.Strings(classList)

if len(classList) > 0 && len(classList) < 4 {
row := []interface{}{}
headers := []table.Header{}

for _, className := range classList {
headers = append(headers, table.Header{Title: s.TruncateEllipses(className, 20), MaxWidth: 20})
row = append(row, apiMetrics.ClassDistribution[className])
}

t := table.Table{
Headers: headers,
Rows: [][]interface{}{row},
}

return table.MustFormat(t)
}

rows := make([][]interface{}, len(classList))
for rowNum, className := range classList {
rows[rowNum] = []interface{}{
className,
apiMetrics.ClassDistribution[className],
}
}

if len(classList) == 0 {
rows = append(rows, []interface{}{
"-",
"-",
})
}

t := table.Table{
Headers: []table.Header{
{Title: "class", MaxWidth: 40},
{Title: "count", MaxWidth: 20},
},
Rows: rows,
}

out := table.MustFormat(t)

if len(classList) == consts.MaxClassesPerRequest {
out += fmt.Sprintf("\n\nlisting at most %d classes, the complete list can be found in your cloudwatch dashboard", consts.MaxClassesPerRequest)
}
return out
}

func describeModelInput(groupStatus *resource.APIGroupStatus, apiEndpoint string) string {
if groupStatus.Available() == 0 {
return "waiting for api to be ready"
Expand Down
3 changes: 3 additions & 0 deletions docs/deployments/apis.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ Serve models at scale.
model: <string> # path to an exported model (e.g. s3://my-bucket/model.zip)
model_format: <string> # model format, must be "tensorflow" or "onnx" (default: "onnx" if model path ends with .onnx, "tensorflow" if model path ends with .zip)
request_handler: <string> # path to the request handler implementation file, relative to the cortex root
tracker:
key: <string> # json key to track in the response payload
model_type: <string> # model type, must be "classification" or "regression"
compute:
min_replicas: <int> # minimum number of replicas (default: 1)
max_replicas: <int> # maximum number of replicas (default: 100)
Expand Down
2 changes: 1 addition & 1 deletion manager/manifests/istio-metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
params:
value: response.duration
dimensions:
REQUEST_PATH: request.url_path | "unknown"
RequestPath: request.url_path | "unknown"
---
apiVersion: config.istio.io/v1alpha2
kind: handler
Expand Down
2 changes: 2 additions & 0 deletions pkg/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,6 @@ var (
MetadataDir = "metadata"

TelemetryURL = "https://telemetry.cortexlabs.dev"

MaxClassesPerRequest = 75 // cloudwatch.GeMetricData can get up to 100 metrics per request, avoid multiple requests and have room for other stats
)
3 changes: 3 additions & 0 deletions pkg/lib/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package aws
import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudwatch"
"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/sts"
Expand All @@ -33,6 +34,7 @@ type Client struct {
s3Client *s3.S3
stsClient *sts.STS
cloudWatchLogsClient *cloudwatchlogs.CloudWatchLogs
CloudWatchMetrics *cloudwatch.CloudWatch
awsAccountID string
HashedAccountID string
}
Expand All @@ -48,6 +50,7 @@ func New(region, bucket string) *Client {
Region: region,
s3Client: s3.New(sess),
stsClient: sts.New(sess),
CloudWatchMetrics: cloudwatch.New(sess),
cloudWatchLogsClient: cloudwatchlogs.New(sess),
}
response, err := awsClient.stsClient.GetCallerIdentity(nil)
Expand Down
86 changes: 86 additions & 0 deletions pkg/lib/slices/float64_ptr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
Copyright 2019 Cortex Labs, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package slices

import (
"github.com/cortexlabs/cortex/pkg/lib/errors"
)

// For adding integers stored in floats
func Float64PtrSumInt(floats ...*float64) int {
sum := 0
for _, num := range floats {
if num != nil {
sum += int(*num)
}
}
return sum
}

func Float64PtrMin(floats ...*float64) *float64 {
var min *float64

for _, num := range floats {
switch {
case num != nil && min != nil && *num < *min:
min = num
case num != nil && min == nil:
min = num
}
}
return min
}

func Float64PtrMax(floats ...*float64) *float64 {
var max *float64

for _, num := range floats {
switch {
case num != nil && max != nil && *num > *max:
max = num
case num != nil && max == nil:
max = num
}
}
return max
}

func Float64PtrAvg(values []*float64, weights []*float64) (*float64, error) {
if len(values) != len(weights) {
return nil, errors.New("length of values is not equal to length of weights")
}

totalWeight := 0.0
for i, valPtr := range values {
if valPtr != nil && weights[i] != nil && *weights[i] > 0 {
totalWeight += *weights[i]
}
}

if totalWeight == 0.0 {
return nil, nil
}

avg := 0.0
for i, valPtr := range values {
if valPtr != nil && weights[i] != nil && *weights[i] > 0 {
avg += (*valPtr) * (*weights[i]) / float64(totalWeight)
}
}

return &avg, nil
}
Loading