Skip to content

Commit 31e133b

Browse files
vishalbolluMiguel Varela Ramos
authored and
Miguel Varela Ramos
committed
Reduce metrics in cortex get (#2333)
1 parent 24ddab4 commit 31e133b

File tree

21 files changed

+137
-815
lines changed

21 files changed

+137
-815
lines changed

cli/cmd/get.go

-4
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@ const (
4747
_titleRequested = "requested"
4848
_titleFailed = "failed"
4949
_titleLastupdated = "last update"
50-
_titleAvgRequest = "avg request"
51-
_title2XX = "2XX"
52-
_title4XX = "4XX"
53-
_title5XX = "5XX"
5450
)
5551

5652
var (

cli/cmd/lib_async_apis.go

-15
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ func asyncAPIsTable(asyncAPIs []schema.APIResponse, envNames []string) table.Tab
6262

6363
var totalFailed int32
6464
var totalStale int32
65-
var total4XX int
66-
var total5XX int
6765

6866
for i, asyncAPI := range asyncAPIs {
6967
lastUpdated := time.Unix(asyncAPI.Spec.LastUpdated, 0)
@@ -76,19 +74,10 @@ func asyncAPIsTable(asyncAPIs []schema.APIResponse, envNames []string) table.Tab
7674
asyncAPI.Status.Requested,
7775
asyncAPI.Status.Updated.TotalFailed(),
7876
libtime.SinceStr(&lastUpdated),
79-
latencyStr(asyncAPI.Metrics),
80-
code2XXStr(asyncAPI.Metrics),
81-
code4XXStr(asyncAPI.Metrics),
82-
code5XXStr(asyncAPI.Metrics),
8377
})
8478

8579
totalFailed += asyncAPI.Status.Updated.TotalFailed()
8680
totalStale += asyncAPI.Status.Stale.Ready
87-
88-
if asyncAPI.Metrics.NetworkStats != nil {
89-
total4XX += asyncAPI.Metrics.NetworkStats.Code4XX
90-
total5XX += asyncAPI.Metrics.NetworkStats.Code5XX
91-
}
9281
}
9382

9483
return table.Table{
@@ -101,10 +90,6 @@ func asyncAPIsTable(asyncAPIs []schema.APIResponse, envNames []string) table.Tab
10190
{Title: _titleRequested},
10291
{Title: _titleFailed, Hidden: totalFailed == 0},
10392
{Title: _titleLastupdated},
104-
{Title: _titleAvgRequest},
105-
{Title: _title2XX},
106-
{Title: _title4XX, Hidden: total4XX == 0},
107-
{Title: _title5XX, Hidden: total5XX == 0},
10893
},
10994
Rows: rows,
11095
}

cli/cmd/lib_batch_apis.go

+7-18
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,7 @@ func batchAPITable(batchAPI schema.APIResponse) string {
8787
if len(batchAPI.BatchJobStatuses) == 0 {
8888
out = console.Bold("no submitted batch jobs\n")
8989
} else {
90-
totalFailed := 0
9190
for _, job := range batchAPI.BatchJobStatuses {
92-
succeeded := 0
93-
failed := 0
94-
95-
if job.BatchMetrics != nil {
96-
failed = job.BatchMetrics.Failed
97-
succeeded = job.BatchMetrics.Succeeded
98-
totalFailed += failed
99-
}
10091

10192
jobEndTime := time.Now()
10293
if job.EndTime != nil {
@@ -108,8 +99,7 @@ func batchAPITable(batchAPI schema.APIResponse) string {
10899
jobRows = append(jobRows, []interface{}{
109100
job.ID,
110101
job.Status.Message(),
111-
fmt.Sprintf("%d/%d", succeeded, job.TotalBatchCount),
112-
failed,
102+
job.TotalBatchCount,
113103
job.StartTime.Format(_timeFormat),
114104
duration,
115105
})
@@ -119,8 +109,7 @@ func batchAPITable(batchAPI schema.APIResponse) string {
119109
Headers: []table.Header{
120110
{Title: "job id"},
121111
{Title: "status"},
122-
{Title: "progress"}, // (succeeded/total)
123-
{Title: "failed attempts", Hidden: totalFailed == 0},
112+
{Title: "total batches"},
124113
{Title: "start time"},
125114
{Title: "duration"},
126115
},
@@ -189,14 +178,14 @@ func getBatchJob(env cliconfig.Environment, apiName string, jobID string) (strin
189178
failed := "-"
190179
avgTimePerBatch := "-"
191180

192-
if job.BatchMetrics != nil {
193-
if job.BatchMetrics.AverageTimePerBatch != nil {
194-
batchMetricsDuration := time.Duration(*job.BatchMetrics.AverageTimePerBatch*1000000000) * time.Nanosecond
181+
if resp.Metrics != nil {
182+
if resp.Metrics.AverageTimePerBatch != nil {
183+
batchMetricsDuration := time.Duration(*resp.Metrics.AverageTimePerBatch*1000000000) * time.Nanosecond
195184
avgTimePerBatch = batchMetricsDuration.Truncate(time.Millisecond).String()
196185
}
197186

198-
succeeded = s.Int(job.BatchMetrics.Succeeded)
199-
failed = s.Int(job.BatchMetrics.Failed)
187+
succeeded = s.Int(resp.Metrics.Succeeded)
188+
failed = s.Int(resp.Metrics.Failed)
200189
}
201190

202191
t := table.Table{

cli/cmd/lib_realtime_apis.go

-49
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,14 @@ limitations under the License.
1717
package cmd
1818

1919
import (
20-
"fmt"
2120
"strings"
2221
"time"
2322

2423
"github.com/cortexlabs/cortex/cli/types/cliconfig"
2524
"github.com/cortexlabs/cortex/pkg/lib/console"
26-
s "github.com/cortexlabs/cortex/pkg/lib/strings"
2725
"github.com/cortexlabs/cortex/pkg/lib/table"
2826
libtime "github.com/cortexlabs/cortex/pkg/lib/time"
2927
"github.com/cortexlabs/cortex/pkg/operator/schema"
30-
"github.com/cortexlabs/cortex/pkg/types/metrics"
3128
)
3229

3330
func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment) (string, error) {
@@ -61,8 +58,6 @@ func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) tab
6158

6259
var totalFailed int32
6360
var totalStale int32
64-
var total4XX int
65-
var total5XX int
6661

6762
for i, realtimeAPI := range realtimeAPIs {
6863
lastUpdated := time.Unix(realtimeAPI.Spec.LastUpdated, 0)
@@ -75,19 +70,10 @@ func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) tab
7570
realtimeAPI.Status.Requested,
7671
realtimeAPI.Status.Updated.TotalFailed(),
7772
libtime.SinceStr(&lastUpdated),
78-
latencyStr(realtimeAPI.Metrics),
79-
code2XXStr(realtimeAPI.Metrics),
80-
code4XXStr(realtimeAPI.Metrics),
81-
code5XXStr(realtimeAPI.Metrics),
8273
})
8374

8475
totalFailed += realtimeAPI.Status.Updated.TotalFailed()
8576
totalStale += realtimeAPI.Status.Stale.Ready
86-
87-
if realtimeAPI.Metrics.NetworkStats != nil {
88-
total4XX += realtimeAPI.Metrics.NetworkStats.Code4XX
89-
total5XX += realtimeAPI.Metrics.NetworkStats.Code5XX
90-
}
9177
}
9278

9379
return table.Table{
@@ -100,42 +86,7 @@ func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) tab
10086
{Title: _titleRequested},
10187
{Title: _titleFailed, Hidden: totalFailed == 0},
10288
{Title: _titleLastupdated},
103-
{Title: _titleAvgRequest},
104-
{Title: _title2XX},
105-
{Title: _title4XX, Hidden: total4XX == 0},
106-
{Title: _title5XX, Hidden: total5XX == 0},
10789
},
10890
Rows: rows,
10991
}
11092
}
111-
112-
func latencyStr(metrics *metrics.Metrics) string {
113-
if metrics.NetworkStats == nil || metrics.NetworkStats.Latency == nil {
114-
return "-"
115-
}
116-
if *metrics.NetworkStats.Latency < 1000 {
117-
return fmt.Sprintf("%.6g ms", *metrics.NetworkStats.Latency)
118-
}
119-
return fmt.Sprintf("%.6g s", (*metrics.NetworkStats.Latency)/1000)
120-
}
121-
122-
func code2XXStr(metrics *metrics.Metrics) string {
123-
if metrics.NetworkStats == nil || metrics.NetworkStats.Code2XX == 0 {
124-
return "-"
125-
}
126-
return s.Int(metrics.NetworkStats.Code2XX)
127-
}
128-
129-
func code4XXStr(metrics *metrics.Metrics) string {
130-
if metrics.NetworkStats == nil || metrics.NetworkStats.Code4XX == 0 {
131-
return "-"
132-
}
133-
return s.Int(metrics.NetworkStats.Code4XX)
134-
}
135-
136-
func code5XXStr(metrics *metrics.Metrics) string {
137-
if metrics.NetworkStats == nil || metrics.NetworkStats.Code5XX == 0 {
138-
return "-"
139-
}
140-
return s.Int(metrics.NetworkStats.Code5XX)
141-
}

cli/cmd/lib_traffic_splitters.go

-6
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,6 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
8585
apiRes.Status.Message(),
8686
apiRes.Status.Requested,
8787
libtime.SinceStr(&lastUpdated),
88-
latencyStr(apiRes.Metrics),
89-
code2XXStr(apiRes.Metrics),
90-
code5XXStr(apiRes.Metrics),
9188
})
9289
}
9390

@@ -99,9 +96,6 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
9996
{Title: _titleStatus},
10097
{Title: _titleRequested},
10198
{Title: _titleLastupdated},
102-
{Title: _titleAvgRequest},
103-
{Title: _title2XX},
104-
{Title: _title5XX},
10599
},
106100
Rows: rows,
107101
}, nil

docs/workloads/batch/jobs.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,6 @@ RESPONSE:
195195
"sqs_url": <string>,
196196
"status": <string>,
197197
"batches_in_queue": <int> # number of batches remaining in the queue
198-
"batch_metrics": {
199-
"succeeded": <int> # number of succeeded batches
200-
"failed": int # number of failed attempts
201-
"avg_time_per_batch": <float> (optional) # average time spent working on a batch (only considers successful attempts)
202-
},
203198
"worker_counts": { # worker counts are only available while a job is running
204199
"pending": <int>, # number of workers that are waiting for compute resources to be provisioned
205200
"initializing": <int>, # number of workers that are initializing
@@ -215,6 +210,11 @@ RESPONSE:
215210
"endpoint": <string>
216211
"api_spec": {
217212
...
213+
},
214+
"metrics": {
215+
"succeeded": <int> # number of succeeded batches
216+
"failed": int # number of failed attempts
217+
"avg_time_per_batch": <float> (optional) # average time spent working on a batch (only considers successful attempts)
218218
}
219219
}
220220
```

pkg/crds/apis/batch/v1alpha1/batchjob_metrics.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ const (
3434
)
3535

3636
// GetMetrics retrieves the BatchJob metrics from prometheus
37-
func GetMetrics(promAPIv1 promv1.API, jobKey spec.JobKey, t time.Time) (metrics.BatchMetrics, error) {
37+
func GetMetrics(promAPIv1 promv1.API, jobKey spec.JobKey, t time.Time) (*metrics.BatchMetrics, error) {
3838
var (
3939
jobBatchesSucceeded float64
4040
jobBatchesFailed float64
@@ -59,10 +59,10 @@ func GetMetrics(promAPIv1 promv1.API, jobKey spec.JobKey, t time.Time) (metrics.
5959
},
6060
)
6161
if err != nil {
62-
return metrics.BatchMetrics{}, err
62+
return nil, err
6363
}
6464

65-
return metrics.BatchMetrics{
65+
return &metrics.BatchMetrics{
6666
Succeeded: int(jobBatchesSucceeded),
6767
Failed: int(jobBatchesFailed),
6868
AverageTimePerBatch: avgTimePerBatch,

pkg/crds/controllers/batch/batchjob_controller_helpers.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -697,7 +697,7 @@ func getMetrics(r *BatchJobReconciler, batchJob batch.BatchJob) (metrics.BatchMe
697697
return metrics.BatchMetrics{}, err
698698
}
699699

700-
return jobMetrics, nil
700+
return *jobMetrics, nil
701701
}
702702

703703
func saveJobMetrics(r *BatchJobReconciler, batchJob batch.BatchJob) error {

pkg/operator/endpoints/get_batch_job.go

+2-31
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,9 @@ package endpoints
1818

1919
import (
2020
"net/http"
21-
"net/url"
2221

23-
"github.com/cortexlabs/cortex/pkg/operator/operator"
2422
"github.com/cortexlabs/cortex/pkg/operator/resources"
2523
"github.com/cortexlabs/cortex/pkg/operator/resources/job/batchapi"
26-
"github.com/cortexlabs/cortex/pkg/operator/schema"
2724
"github.com/cortexlabs/cortex/pkg/types/spec"
2825
"github.com/cortexlabs/cortex/pkg/types/userconfig"
2926
"github.com/gorilla/mux"
@@ -54,37 +51,11 @@ func GetBatchJob(w http.ResponseWriter, r *http.Request) {
5451
Kind: userconfig.BatchAPIKind,
5552
}
5653

57-
jobStatus, err := batchapi.GetJobStatus(jobKey)
54+
jobResponse, err := batchapi.GetJob(jobKey)
5855
if err != nil {
5956
respondError(w, r, err)
6057
return
6158
}
6259

63-
apiSpec, err := operator.DownloadAPISpec(jobStatus.APIName, jobStatus.APIID)
64-
if err != nil {
65-
respondError(w, r, err)
66-
return
67-
}
68-
69-
endpoint, err := operator.APIEndpoint(apiSpec)
70-
if err != nil {
71-
respondError(w, r, err)
72-
return
73-
}
74-
75-
parsedURL, err := url.Parse(endpoint)
76-
if err != nil {
77-
respondError(w, r, err)
78-
}
79-
q := parsedURL.Query()
80-
q.Add("jobID", jobKey.ID)
81-
parsedURL.RawQuery = q.Encode()
82-
83-
response := schema.BatchJobResponse{
84-
JobStatus: *jobStatus,
85-
APISpec: *apiSpec,
86-
Endpoint: parsedURL.String(),
87-
}
88-
89-
respondJSON(w, r, response)
60+
respondJSON(w, r, jobResponse)
9061
}

pkg/operator/endpoints/logs_job.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func GetJobLogURL(w http.ResponseWriter, r *http.Request) {
8181

8282
switch deployedResource.Kind {
8383
case userconfig.BatchAPIKind:
84-
jobStatus, err := batchapi.GetJobStatus(spec.JobKey{
84+
jobResponse, err := batchapi.GetJob(spec.JobKey{
8585
ID: jobID,
8686
APIName: apiName,
8787
Kind: userconfig.BatchAPIKind,
@@ -90,7 +90,7 @@ func GetJobLogURL(w http.ResponseWriter, r *http.Request) {
9090
respondError(w, r, err)
9191
return
9292
}
93-
logURL, err := operator.BatchJobLogURL(apiName, *jobStatus)
93+
logURL, err := operator.BatchJobLogURL(apiName, jobResponse.JobStatus)
9494
if err != nil {
9595
respondError(w, r, err)
9696
return

pkg/operator/resources/asyncapi/api.go

-12
Original file line numberDiff line numberDiff line change
@@ -265,11 +265,6 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
265265
return nil, err
266266
}
267267

268-
metrics, err := GetMetrics(*api)
269-
if err != nil {
270-
return nil, err
271-
}
272-
273268
dashboardURL := pointer.String(getDashboardURL(api.Name))
274269

275270
return []schema.APIResponse{
@@ -278,7 +273,6 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
278273
Status: status,
279274
Endpoint: apiEndpoint,
280275
DashboardURL: dashboardURL,
281-
Metrics: metrics,
282276
},
283277
}, nil
284278
}
@@ -295,11 +289,6 @@ func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.APIR
295289
return nil, err
296290
}
297291

298-
allMetrics, err := GetMultipleMetrics(apis)
299-
if err != nil {
300-
return nil, err
301-
}
302-
303292
asyncAPIs := make([]schema.APIResponse, len(apis))
304293

305294
for i := range apis {
@@ -313,7 +302,6 @@ func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.APIR
313302
Spec: api,
314303
Status: &statuses[i],
315304
Endpoint: endpoint,
316-
Metrics: &allMetrics[i],
317305
}
318306
}
319307

0 commit comments

Comments
 (0)