Skip to content

Commit 1dc9f8c

Browse files
committed
[ws-proxy] add some ssh metrics
1 2 [debug]
1 parent 56d2115 commit 1dc9f8c

File tree

1 file changed

+80
-8
lines changed

1 file changed

+80
-8
lines changed

components/ws-proxy/pkg/sshproxy/server.go

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ package sshproxy
66

77
import (
88
"context"
9-
"errors"
109
"fmt"
1110
"net"
1211
"strings"
@@ -17,16 +16,57 @@ import (
1716
supervisor "github.com/gitpod-io/gitpod/supervisor/api"
1817
tracker "github.com/gitpod-io/gitpod/ws-proxy/pkg/analytics"
1918
p "github.com/gitpod-io/gitpod/ws-proxy/pkg/proxy"
19+
"github.com/prometheus/client_golang/prometheus"
2020
"golang.org/x/crypto/ssh"
2121
"golang.org/x/xerrors"
2222
"google.golang.org/grpc"
23+
"sigs.k8s.io/controller-runtime/pkg/metrics"
2324
)
2425

2526
const GitpodUsername = "gitpod"
2627

27-
var ErrWorkspaceNotFound = errors.New("not found workspace")
28-
var ErrAuthFailed = errors.New("auth failed")
29-
var ErrUsernameFormat = errors.New("username format is not correct")
28+
var (
29+
SSHConnectionCount = prometheus.NewGauge(prometheus.GaugeOpts{
30+
Name: "gitpod_ws_proxy_ssh_connection_count",
31+
Help: "Current number of SSH connection",
32+
})
33+
34+
SSHAttemptTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
35+
Name: "gitpod_ws_proxy_ssh_attempt_total",
36+
Help: "Total number of SSH attempt",
37+
}, []string{"result"})
38+
39+
SSHErrorTypeTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
40+
Name: "gitpod_ws_proxy_ssh_error_total",
41+
Help: "Total number of SSH attempt per error type",
42+
}, []string{"type"})
43+
)
44+
45+
var (
46+
ErrWorkspaceNotFound = NewSSHError("WS_NOTFOUND", "not found workspace")
47+
ErrAuthFailed = NewSSHError("AUTH_FAILED", "auth failed")
48+
ErrUsernameFormat = NewSSHError("USER_FORMAT", "username format is not correct")
49+
ErrMissPrivateKey = NewSSHError("MISS_KEY", "missing privateKey")
50+
ErrConnFailed = NewSSHError("CONN_FAILED", "cannot to connect with workspace")
51+
ErrCreateSSHKey = NewSSHError("CREATE_KEY_FAILED", "cannot create private pair in workspace")
52+
)
53+
54+
type SSHError struct {
55+
shortName string
56+
description string
57+
}
58+
59+
func (e SSHError) Error() string {
60+
return e.description
61+
}
62+
63+
func (e SSHError) ShortName() string {
64+
return e.shortName
65+
}
66+
67+
func NewSSHError(shortName string, description string) SSHError {
68+
return SSHError{shortName: shortName, description: description}
69+
}
3070

3171
type Session struct {
3272
Conn *ssh.ServerConn
@@ -45,6 +85,14 @@ type Server struct {
4585
workspaceInfoProvider p.WorkspaceInfoProvider
4686
}
4787

88+
func init() {
89+
metrics.Registry.MustRegister(
90+
SSHConnectionCount,
91+
SSHAttemptTotal,
92+
SSHErrorTypeTotal,
93+
)
94+
}
95+
4896
// New creates a new SSH proxy server
4997

5098
func New(signers []ssh.Signer, workspaceInfoProvider p.WorkspaceInfoProvider, heartbeat Heartbeat) *Server {
@@ -75,7 +123,7 @@ func New(signers []ssh.Signer, workspaceInfoProvider p.WorkspaceInfoProvider, he
75123
workspaceId, ownerToken = args[0], args[1]
76124
wsInfo, err = server.Authenticator(workspaceId, ownerToken)
77125
if err == nil {
78-
err = errors.New("miss private key")
126+
err = ErrMissPrivateKey
79127
}
80128
return
81129
}
@@ -112,10 +160,26 @@ func New(signers []ssh.Signer, workspaceInfoProvider p.WorkspaceInfoProvider, he
112160
return server
113161
}
114162

163+
func ReportSSHAttemptMetrics(err error) {
164+
if err == nil {
165+
SSHAttemptTotal.WithLabelValues("success").Inc()
166+
return
167+
}
168+
errorType := "OTHERS"
169+
if serverAuthErr, ok := err.(*ssh.ServerAuthError); ok && len(serverAuthErr.Errors) > 0 {
170+
if authErr, ok := serverAuthErr.Errors[len(serverAuthErr.Errors)-1].(SSHError); ok {
171+
errorType = authErr.ShortName()
172+
}
173+
}
174+
SSHAttemptTotal.WithLabelValues("failed").Inc()
175+
SSHErrorTypeTotal.WithLabelValues(errorType).Inc()
176+
}
177+
115178
func (s *Server) HandleConn(c net.Conn) {
116179
sshConn, chans, reqs, err := ssh.NewServerConn(c, s.sshConfig)
117180
if err != nil {
118181
c.Close()
182+
ReportSSHAttemptMetrics(err)
119183
return
120184
}
121185
defer sshConn.Close()
@@ -127,13 +191,16 @@ func (s *Server) HandleConn(c net.Conn) {
127191
workspaceId := sshConn.Permissions.Extensions["workspaceId"]
128192
wsInfo := s.workspaceInfoProvider.WorkspaceInfo(workspaceId)
129193
if wsInfo == nil {
194+
ReportSSHAttemptMetrics(ErrWorkspaceNotFound)
130195
return
131196
}
132197
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
133198
key, err := s.GetWorkspaceSSHKey(ctx, wsInfo.IPAddress)
134199
if err != nil {
135200
cancel()
136-
s.TrackSSHConnection(wsInfo, "connect", err)
201+
s.TrackSSHConnection(wsInfo, "connect", ErrCreateSSHKey)
202+
ReportSSHAttemptMetrics(ErrCreateSSHKey)
203+
log.WithField("instanceId", wsInfo.InstanceID).WithError(err).Error("failed to create private pair in workspace")
137204
return
138205
}
139206
cancel()
@@ -147,7 +214,8 @@ func (s *Server) HandleConn(c net.Conn) {
147214
remoteAddr := wsInfo.IPAddress + ":23001"
148215
conn, err := net.Dial("tcp", remoteAddr)
149216
if err != nil {
150-
s.TrackSSHConnection(wsInfo, "connect", err)
217+
s.TrackSSHConnection(wsInfo, "connect", ErrConnFailed)
218+
ReportSSHAttemptMetrics(ErrConnFailed)
151219
log.WithField("instanceId", wsInfo.InstanceID).WithField("workspaceIP", wsInfo.IPAddress).WithError(err).Error("dail failed")
152220
return
153221
}
@@ -164,7 +232,8 @@ func (s *Server) HandleConn(c net.Conn) {
164232
Timeout: 10 * time.Second,
165233
})
166234
if err != nil {
167-
s.TrackSSHConnection(wsInfo, "connect", err)
235+
s.TrackSSHConnection(wsInfo, "connect", ErrConnFailed)
236+
ReportSSHAttemptMetrics(ErrConnFailed)
168237
log.WithField("instanceId", wsInfo.InstanceID).WithField("workspaceIP", wsInfo.IPAddress).WithError(err).Error("connect failed")
169238
return
170239
}
@@ -173,10 +242,13 @@ func (s *Server) HandleConn(c net.Conn) {
173242
ctx, cancel = context.WithCancel(context.Background())
174243

175244
s.TrackSSHConnection(wsInfo, "connect", nil)
245+
SSHConnectionCount.Inc()
246+
ReportSSHAttemptMetrics(nil)
176247

177248
go func() {
178249
client.Wait()
179250
cancel()
251+
defer SSHConnectionCount.Dec()
180252
}()
181253

182254
for newChannel := range chans {

0 commit comments

Comments
 (0)