Skip to content

Commit 5cf0992

Browse files
committed
refactor: update the mux server to expose both healthz and metrics, add graceful shutdown
1 parent 516027b commit 5cf0992

File tree

1 file changed

+72
-9
lines changed

1 file changed

+72
-9
lines changed

cmd/kar-controllers/app/server.go

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,21 @@ limitations under the License.
3131
package app
3232

3333
import (
34+
"context"
35+
"fmt"
36+
"net/http"
37+
"os"
38+
"os/signal"
39+
"syscall"
40+
"time"
41+
3442
"k8s.io/client-go/rest"
3543
"k8s.io/client-go/tools/clientcmd"
36-
"net/http"
3744

3845
"github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options"
3946
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejob"
4047
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/health"
48+
"github.com/prometheus/client_golang/prometheus/promhttp"
4149

4250
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
4351
)
@@ -77,13 +85,68 @@ func Run(opt *options.ServerOption) error {
7785

7886
// Starts the health probe listener
7987
func listenHealthProbe(opt *options.ServerOption) error {
80-
handler := http.NewServeMux()
81-
handler.Handle("/healthz", &health.Handler{})
82-
err := http.ListenAndServe(opt.HealthProbeListenAddr, handler)
83-
if err != nil {
84-
return err
85-
}
86-
87-
return nil
88+
// Start the metrics server
89+
RecordMetrics()
90+
91+
metricsHandler := http.NewServeMux()
92+
metricsHandler.Handle("/metrics", promhttp.Handler())
93+
healthHandler := http.NewServeMux()
94+
healthHandler.Handle("/healthz", &health.Handler{})
95+
96+
metricsServer := &http.Server{
97+
Addr: opt.MetricsListenAddr,
98+
Handler: metricsHandler,
99+
}
100+
101+
healthServer := &http.Server{
102+
Addr: opt.HealthProbeListenAddr,
103+
Handler: healthHandler,
104+
}
105+
106+
// make a channel for errors for each server
107+
metricsServerErrChan := make(chan error)
108+
healthServerErrChan := make(chan error)
109+
110+
// start servers in their own goroutines
111+
go func() {
112+
err := metricsServer.ListenAndServe()
113+
if err != nil && err != http.ErrServerClosed {
114+
metricsServerErrChan <- err
115+
}
116+
}()
117+
118+
go func() {
119+
err := healthServer.ListenAndServe()
120+
if err != nil && err != http.ErrServerClosed {
121+
healthServerErrChan <- err
122+
}
123+
}()
124+
125+
// make a channel to listen for OS shutdown signal
126+
stop := make(chan os.Signal, 1)
127+
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
128+
129+
// use select to wait for either a shutdown signal or an error
130+
select {
131+
case <-stop:
132+
// received an OS shutdown signal, shut down servers gracefully
133+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
134+
defer cancel()
135+
136+
errM := metricsServer.Shutdown(ctx)
137+
if errM != nil {
138+
return fmt.Errorf("metrics server shutdown error: %v", errM)
139+
}
140+
errH := healthServer.Shutdown(ctx)
141+
if errH != nil {
142+
return fmt.Errorf("health server shutdown error: %v", errH)
143+
}
144+
case err := <-metricsServerErrChan:
145+
return fmt.Errorf("metrics server error: %v", err)
146+
case err := <-healthServerErrChan:
147+
return fmt.Errorf("health server error: %v", err)
148+
}
149+
150+
return nil
88151
}
89152

0 commit comments

Comments
 (0)