Skip to content

Commit 96d2380

Browse files
committed
cmd/coordinator: add basic monitoring for reverse buildlets
Carried over from https://golang.org/cl/47490. Updates golang/go#15760 Change-Id: I8b4cc007dea8e32a23cac4cb13bb313d9ec5d4ac Reviewed-on: https://go-review.googlesource.com/47934 Reviewed-by: Brad Fitzpatrick <[email protected]>
1 parent b2cc0a7 commit 96d2380

File tree

8 files changed

+439
-25
lines changed

8 files changed

+439
-25
lines changed

cmd/coordinator/Dockerfile.0

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,57 +7,57 @@ LABEL maintainer "[email protected]"
77

88
# BEGIN deps (run `make update-deps` to update)
99

10-
# Repo cloud.google.com/go at 2f1da5d (2017-05-01)
11-
RUN go get -d cloud.google.com/go/compute/metadata `#and 9 other pkgs` &&\
12-
(cd /go/src/cloud.google.com/go && git reset --hard 2f1da5d762c81a12c516bfb8a9ede96f42750361)
10+
# Repo cloud.google.com/go at 242055b (2017-07-15)
11+
RUN go get -d cloud.google.com/go/compute/metadata `#and 10 other pkgs` &&\
12+
(cd /go/src/cloud.google.com/go && git reset --hard 242055bb2e2977620eca971b4e9aea46c4e31deb)
1313

14-
# Repo github.com/golang/protobuf at 6a1fa94 (2017-06-22)
15-
RUN go get -d github.com/golang/protobuf/proto `#and 6 other pkgs` &&\
16-
(cd /go/src/github.com/golang/protobuf && git reset --hard 6a1fa9404c0aebf36c879bc50152edcc953910d2)
14+
# Repo github.com/golang/protobuf at 0a4f71a (2017-07-11)
15+
RUN go get -d github.com/golang/protobuf/proto `#and 9 other pkgs` &&\
16+
(cd /go/src/github.com/golang/protobuf && git reset --hard 0a4f71a498b7c4812f64969510bcb4eca251e33a)
1717

18-
# Repo github.com/googleapis/gax-go at da06d19 (2016-11-07)
18+
# Repo github.com/googleapis/gax-go at 84ed267 (2017-06-10)
1919
RUN go get -d github.com/googleapis/gax-go &&\
20-
(cd /go/src/github.com/googleapis/gax-go && git reset --hard da06d194a00e19ce00d9011a13931c3f6f6887c7)
20+
(cd /go/src/github.com/googleapis/gax-go && git reset --hard 84ed26760e7f6f80887a2fbfb50db3cc415d2cea)
2121

2222
# Repo go4.org at 034d17a (2017-05-25)
2323
RUN go get -d go4.org/syncutil &&\
2424
(cd /go/src/go4.org && git reset --hard 034d17a462f7b2dcd1a4a73553ec5357ff6e6c6e)
2525

26-
# Repo golang.org/x/crypto at 0fe9631 (2017-05-16)
26+
# Repo golang.org/x/crypto at a48ac81 (2017-07-06)
2727
RUN go get -d golang.org/x/crypto/acme `#and 2 other pkgs` &&\
28-
(cd /go/src/golang.org/x/crypto && git reset --hard 0fe963104e9d1877082f8fb38f816fcd97eb1d10)
28+
(cd /go/src/golang.org/x/crypto && git reset --hard a48ac81e47fd6f9ed1258f3b60ae9e75f93cb7ed)
2929

30-
# Repo golang.org/x/net at f01ecb6 (2017-07-11)
30+
# Repo golang.org/x/net at b3756b4 (2017-07-16)
3131
RUN go get -d golang.org/x/net/context `#and 8 other pkgs` &&\
32-
(cd /go/src/golang.org/x/net && git reset --hard f01ecb60fe3835d80d9a0b7b2bf24b228c89260e)
32+
(cd /go/src/golang.org/x/net && git reset --hard b3756b4b77d7b13260a0a2ec658753cf48922eac)
3333

34-
# Repo golang.org/x/oauth2 at ad516a2 (2017-05-10)
34+
# Repo golang.org/x/oauth2 at cce311a (2017-06-29)
3535
RUN go get -d golang.org/x/oauth2 `#and 5 other pkgs` &&\
36-
(cd /go/src/golang.org/x/oauth2 && git reset --hard ad516a297a9f2a74ecc244861b298c94bdd28b9d)
36+
(cd /go/src/golang.org/x/oauth2 && git reset --hard cce311a261e6fcf29de72ca96827bdb0b7d9c9e6)
3737

38-
# Repo golang.org/x/perf at b74b457 (2017-04-21)
38+
# Repo golang.org/x/perf at 4979bd1 (2017-07-06)
3939
RUN go get -d golang.org/x/perf/storage `#and 2 other pkgs` &&\
40-
(cd /go/src/golang.org/x/perf && git reset --hard b74b45749c47cd1edf5b64df78ecf13bd2dd944f)
40+
(cd /go/src/golang.org/x/perf && git reset --hard 4979bd159b01a7695a1b277f4ea76cab354f278c)
4141

42-
# Repo golang.org/x/text at cfdf022 (2017-07-06)
42+
# Repo golang.org/x/text at 836efe4 (2017-07-14)
4343
RUN go get -d golang.org/x/text/secure/bidirule `#and 4 other pkgs` &&\
44-
(cd /go/src/golang.org/x/text && git reset --hard cfdf022e86b4ecfb646e1efbd7db175dd623a8fa)
44+
(cd /go/src/golang.org/x/text && git reset --hard 836efe42bb4aa16aaa17b9c155d8813d336ed720)
4545

4646
# Repo golang.org/x/time at 8be79e1 (2017-04-24)
4747
RUN go get -d golang.org/x/time/rate &&\
4848
(cd /go/src/golang.org/x/time && git reset --hard 8be79e1e0910c292df4e79c241bb7e8f7e725959)
4949

50-
# Repo google.golang.org/api at 16ab375 (2017-03-21)
50+
# Repo google.golang.org/api at e665075 (2017-07-10)
5151
RUN go get -d google.golang.org/api/compute/v1 `#and 12 other pkgs` &&\
52-
(cd /go/src/google.golang.org/api && git reset --hard 16ab375f94503bfa0d19db78e96bffbe1a34354f)
52+
(cd /go/src/google.golang.org/api && git reset --hard e665075b5ff79143ba49c58fab02df9dc122afd5)
5353

54-
# Repo google.golang.org/genproto at aa2eb68 (2017-06-01)
55-
RUN go get -d google.golang.org/genproto/googleapis/api/annotations `#and 5 other pkgs` &&\
56-
(cd /go/src/google.golang.org/genproto && git reset --hard aa2eb687b4d3e17154372564ad8d6bf11c3cf21f)
54+
# Repo google.golang.org/genproto at b0a3dcf (2017-07-12)
55+
RUN go get -d google.golang.org/genproto/googleapis/api/annotations `#and 10 other pkgs` &&\
56+
(cd /go/src/google.golang.org/genproto && git reset --hard b0a3dcfcd1a9bd48e63634bd8802960804cf8315)
5757

58-
# Repo google.golang.org/grpc at 41d9b6e (2017-07-05)
58+
# Repo google.golang.org/grpc at ce03e9c (2017-07-17)
5959
RUN go get -d google.golang.org/grpc `#and 15 other pkgs` &&\
60-
(cd /go/src/google.golang.org/grpc && git reset --hard 41d9b6ea2a6335f3a22074ed35c0542c9da1baf4)
60+
(cd /go/src/google.golang.org/grpc && git reset --hard ce03e9cc712c2009613184c73f2c140cc42beffe)
6161

6262
# Repo gopkg.in/inf.v0 at 3887ee9 (2015-09-11)
6363
RUN go get -d gopkg.in/inf.v0 &&\
@@ -76,10 +76,14 @@ RUN go install cloud.google.com/go/compute/metadata \
7676
cloud.google.com/go/internal/fields \
7777
cloud.google.com/go/internal/optional \
7878
cloud.google.com/go/internal/version \
79+
cloud.google.com/go/monitoring/apiv3 \
7980
cloud.google.com/go/storage \
8081
github.com/golang/protobuf/proto \
8182
github.com/golang/protobuf/protoc-gen-go/descriptor \
83+
github.com/golang/protobuf/ptypes \
8284
github.com/golang/protobuf/ptypes/any \
85+
github.com/golang/protobuf/ptypes/duration \
86+
github.com/golang/protobuf/ptypes/empty \
8387
github.com/golang/protobuf/ptypes/struct \
8488
github.com/golang/protobuf/ptypes/timestamp \
8589
github.com/golang/protobuf/ptypes/wrappers \
@@ -120,8 +124,13 @@ RUN go install cloud.google.com/go/compute/metadata \
120124
google.golang.org/api/storage/v1 \
121125
google.golang.org/api/transport \
122126
google.golang.org/genproto/googleapis/api/annotations \
127+
google.golang.org/genproto/googleapis/api/distribution \
128+
google.golang.org/genproto/googleapis/api/label \
129+
google.golang.org/genproto/googleapis/api/metric \
130+
google.golang.org/genproto/googleapis/api/monitoredres \
123131
google.golang.org/genproto/googleapis/datastore/v1 \
124132
google.golang.org/genproto/googleapis/iam/v1 \
133+
google.golang.org/genproto/googleapis/monitoring/v3 \
125134
google.golang.org/genproto/googleapis/rpc/status \
126135
google.golang.org/genproto/googleapis/type/latlng \
127136
google.golang.org/grpc \

cmd/coordinator/buildongce/create.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@ import (
2020
"text/template"
2121
"time"
2222

23+
monapi "cloud.google.com/go/monitoring/apiv3"
2324
"golang.org/x/build/buildenv"
25+
"golang.org/x/build/cmd/coordinator/metrics"
2426
"golang.org/x/oauth2/google"
2527
compute "google.golang.org/api/compute/v1"
2628
dm "google.golang.org/api/deploymentmanager/v2"
29+
monpb "google.golang.org/genproto/googleapis/monitoring/v3"
2730
)
2831

2932
var (
@@ -32,6 +35,7 @@ var (
3235
staging = flag.Bool("staging", false, "If true, buildenv.Staging will be used to provide default configuration values. Otherwise, buildenv.Production is used.")
3336
makeClusters = flag.String("make-clusters", "go,buildlets", "comma-separated list of clusters to create. Empty means none.")
3437
makeDisks = flag.Bool("make-basepin", false, "Create the basepin disk images for all builders, then stop. Does not create the VM.")
38+
makeMetrics = flag.Bool("make-metrics", false, "Create the Stackdriver metrics for buildlet monitoring.")
3539

3640
computeService *compute.Service
3741
deploymentService *dm.Service
@@ -124,6 +128,12 @@ func main() {
124128
log.Fatalf("Error creating Kubernetes cluster %q: %v", c.Name, err)
125129
}
126130
}
131+
132+
if *makeMetrics {
133+
if err := createMetrics(); err != nil {
134+
log.Fatalf("could not create metrics: %v", err)
135+
}
136+
}
127137
}
128138

129139
func awaitOp(svc *compute.Service, op *compute.Operation) error {
@@ -309,3 +319,24 @@ func makeBasepinDisks(svc *compute.Service) error {
309319
}
310320
return nil
311321
}
322+
323+
// createMetrics creates the Stackdriver metric types required to monitor
324+
// buildlets on Stackdriver.
325+
func createMetrics() error {
326+
ctx := context.Background()
327+
c, err := monapi.NewMetricClient(ctx)
328+
if err != nil {
329+
return err
330+
}
331+
332+
for _, m := range metrics.Metrics {
333+
if _, err = c.CreateMetricDescriptor(ctx, &monpb.CreateMetricDescriptorRequest{
334+
Name: m.DescriptorPath(buildEnv.ProjectName),
335+
MetricDescriptor: m.Descriptor,
336+
}); err != nil {
337+
return err
338+
}
339+
}
340+
341+
return nil
342+
}

cmd/coordinator/coordinator.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ func main() {
318318

319319
go findWorkLoop(workc)
320320
go findTryWorkLoop()
321+
go reportMetrics(context.Background())
321322
// TODO(cmang): gccgo will need its own findWorkLoop
322323
}
323324

cmd/coordinator/gce.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"cloud.google.com/go/compute/metadata"
2828
"cloud.google.com/go/datastore"
29+
monapi "cloud.google.com/go/monitoring/apiv3"
2930
"cloud.google.com/go/storage"
3031
"golang.org/x/build/buildenv"
3132
"golang.org/x/build/buildlet"
@@ -63,6 +64,7 @@ var (
6364
errTryDeps error // non-nil if try bots are disabled
6465
gerritClient *gerrit.Client
6566
storageClient *storage.Client
67+
metricsClient *monapi.MetricClient
6668
inStaging bool // are we running in the staging project? (named -dev)
6769

6870
initGCECalled bool
@@ -129,6 +131,11 @@ func initGCE() error {
129131
if err != nil {
130132
log.Fatalf("storage.NewClient: %v", err)
131133
}
134+
135+
metricsClient, err = monapi.NewMetricClient(ctx)
136+
if err != nil {
137+
log.Fatalf("monapi.NewMetricClient: %v", err)
138+
}
132139
}
133140

134141
dsClient, err = datastore.NewClient(ctx, buildEnv.ProjectName)

cmd/coordinator/metrics.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package main
6+
7+
import (
8+
"context"
9+
"log"
10+
"time"
11+
12+
"golang.org/x/build/cmd/coordinator/metrics"
13+
14+
"github.com/golang/protobuf/ptypes"
15+
metpb "google.golang.org/genproto/googleapis/api/metric"
16+
monpb "google.golang.org/genproto/googleapis/monitoring/v3"
17+
)
18+
19+
// reportMetrics gathers and reports buildlet metrics to Stackdriver.
20+
// It currently only reports count of running reverse buildlets per type.
21+
func reportMetrics(ctx context.Context) {
22+
for {
23+
err := reportReverseCountMetrics(ctx)
24+
if err != nil {
25+
log.Printf("error reporting %q metrics: %v\n",
26+
metrics.ReverseCount.Name, err)
27+
}
28+
29+
time.Sleep(5 * time.Minute)
30+
}
31+
32+
}
33+
34+
func reportReverseCountMetrics(ctx context.Context) error {
35+
m := metrics.ReverseCount
36+
// 1. Gather # buildlets up per reverse builder type
37+
totals := reversePool.hostTypeCount()
38+
// 2. Write counts to Stackdriver
39+
ts := []*monpb.TimeSeries{}
40+
now := ptypes.TimestampNow()
41+
for hostType, n := range totals {
42+
labels, err := m.Labels(hostType)
43+
if err != nil {
44+
return err
45+
}
46+
tv, err := m.TypedValue(n)
47+
if err != nil {
48+
return err
49+
}
50+
ts = append(ts, &monpb.TimeSeries{
51+
Metric: &metpb.Metric{
52+
Type: m.Descriptor.Type,
53+
Labels: labels,
54+
},
55+
Points: []*monpb.Point{
56+
{
57+
Interval: &monpb.TimeInterval{
58+
EndTime: now,
59+
},
60+
Value: tv,
61+
},
62+
},
63+
})
64+
}
65+
66+
return metricsClient.CreateTimeSeries(ctx, &monpb.CreateTimeSeriesRequest{
67+
Name: m.DescriptorPath(buildEnv.ProjectName),
68+
TimeSeries: ts,
69+
})
70+
}

cmd/coordinator/metrics/metrics.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Package metrics enumerates the set of Stackdriver metrics
6+
// used by the Go build system.
7+
package metrics
8+
9+
import (
10+
"errors"
11+
"fmt"
12+
13+
monapi "cloud.google.com/go/monitoring/apiv3"
14+
"google.golang.org/genproto/googleapis/api/label"
15+
metpb "google.golang.org/genproto/googleapis/api/metric"
16+
monpb "google.golang.org/genproto/googleapis/monitoring/v3"
17+
)
18+
19+
// Metric defines a custom metric type used by Go build system.
20+
type Metric struct {
21+
Name string
22+
Descriptor *metpb.MetricDescriptor
23+
}
24+
25+
// ReverseCount is the Stackdriver metric for monitoring
26+
// the number of reverse buildlets up at any given moment.
27+
var ReverseCount = &Metric{
28+
Name: "reverse/count",
29+
Descriptor: &metpb.MetricDescriptor{
30+
Type: "custom.googleapis.com/reverse/count",
31+
Labels: []*label.LabelDescriptor{
32+
{
33+
Key: "hosttype",
34+
ValueType: label.LabelDescriptor_STRING,
35+
},
36+
},
37+
MetricKind: metpb.MetricDescriptor_GAUGE,
38+
ValueType: metpb.MetricDescriptor_INT64,
39+
},
40+
}
41+
42+
// Metrics is the set of all Stackdriver metrics being used
43+
// to monitor the Go build system.
44+
var Metrics = []*Metric{
45+
ReverseCount,
46+
}
47+
48+
// DescriptorPath returns the unique path for this metric among all
49+
// GCP resources in all projects.
50+
// See cloud.google.com/monitoring/custom-metrics/creating-metrics
51+
// for details.
52+
func (m *Metric) DescriptorPath(project string) string {
53+
return monapi.MetricMetricDescriptorPath(project, m.Descriptor.Type)
54+
}
55+
56+
// Labels populates the set of labels with the given label values.
57+
// The labels should be passed in the same order as defined in the metric
58+
// descriptor. All labels listed in the Descriptor must be assigned values.
59+
func (m *Metric) Labels(labels ...string) (map[string]string, error) {
60+
if len(m.Descriptor.Labels) != len(labels) {
61+
return nil, errors.New("mismatch metric labels")
62+
}
63+
lm := make(map[string]string)
64+
for i, l := range m.Descriptor.Labels {
65+
lm[l.Key] = labels[i]
66+
}
67+
return lm, nil
68+
}
69+
70+
// TypedValue returns the cooresponding *monpb.TypedValue based on
71+
// the metric descriptor's value type.
72+
func (m *Metric) TypedValue(v interface{}) (*monpb.TypedValue, error) {
73+
var tv monpb.TypedValue
74+
switch m.Descriptor.ValueType {
75+
case metpb.MetricDescriptor_BOOL:
76+
if vt, ok := v.(bool); ok {
77+
tv.Value = &monpb.TypedValue_BoolValue{
78+
BoolValue: vt,
79+
}
80+
} else {
81+
return nil, fmt.Errorf("wrong value type (%T) for BOOL", v)
82+
}
83+
case metpb.MetricDescriptor_INT64:
84+
if vt, ok := v.(int); ok {
85+
tv.Value = &monpb.TypedValue_Int64Value{
86+
Int64Value: int64(vt),
87+
}
88+
} else {
89+
return nil, fmt.Errorf("wrong value type (%T) for INT64", v)
90+
}
91+
case metpb.MetricDescriptor_DOUBLE:
92+
if vt, ok := v.(float64); ok {
93+
tv.Value = &monpb.TypedValue_DoubleValue{
94+
DoubleValue: float64(vt),
95+
}
96+
} else {
97+
return nil, fmt.Errorf("wrong value type (%T) for DOUBLE", v)
98+
}
99+
case metpb.MetricDescriptor_STRING:
100+
if vt, ok := v.(string); ok {
101+
tv.Value = &monpb.TypedValue_StringValue{
102+
StringValue: string(vt),
103+
}
104+
} else {
105+
return nil, fmt.Errorf("wrong value type (%T) for STRING", v)
106+
}
107+
case metpb.MetricDescriptor_DISTRIBUTION, metpb.MetricDescriptor_MONEY:
108+
return nil, errors.New("unused metric descriptor value type")
109+
}
110+
return &tv, nil
111+
}

0 commit comments

Comments
 (0)