Skip to content

Commit 03c4c85

Browse files
committed
Add support for CA/certificate rotation
Mounted secrets are automatically updated into pods, but... * It doesn't work with `subPath` mountings * When `subPath` is not used, then a bunch of directories are mounted * And one of those directories is a symlink, so `IsDir()` returns false * And a watch is needed to notice the change So, update the certificate volume patch, which requires a change in how we look for certificates in the CA cert directory. Add a watch, so when the certs do change, we can restart (because that's the easiest thing to do). Also look at validity dates of certificates, and error on expired certs. And add a restart when those certs *do* expire (hopefully they will be rotated before). The default cert-manager certificates have 90 days validities. Signed-off-by: Todd Short <[email protected]>
1 parent 6cd022e commit 03c4c85

File tree

9 files changed

+164
-11
lines changed

9 files changed

+164
-11
lines changed

.github/workflows/e2e.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,15 @@ jobs:
5959

6060
- name: Run the upgrade e2e test
6161
run: make test-upgrade-e2e
62+
63+
cert-e2e:
64+
runs-on: ubuntu-latest
65+
steps:
66+
- uses: actions/checkout@v4
67+
68+
- uses: actions/setup-go@v5
69+
with:
70+
go-version-file: go.mod
71+
72+
- name: Run the cert e2e test
73+
run: make test-cert-e2e

Makefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,15 @@ test-upgrade-e2e: export TEST_CLUSTER_CATALOG_NAME := test-catalog
188188
test-upgrade-e2e: export TEST_CLUSTER_EXTENSION_NAME := test-package
189189
test-upgrade-e2e: kind-cluster run-latest-release image-registry build-push-e2e-catalog registry-load-bundles pre-upgrade-setup docker-build kind-load kind-deploy post-upgrade-checks kind-clean #HELP Run upgrade e2e tests on a local kind cluster
190190

191+
.PHONY: run-cert-e2e
192+
run-cert-e2e:
193+
./hack/test/cert-e2e.sh
194+
195+
.PHONY: test-cert-e2e
196+
test-cert-e2e: KIND_CLUSTER_NAME := operator-controller-cert-e2e
197+
test-cert-e2e: KUSTOMIZE_BUILD_DIR := config/overlays/cert-manager
198+
test-cert-e2e: run run-cert-e2e kind-clean #HELP Run certificate e2e tests on a local kind cluster
199+
191200
.PHONY: e2e-coverage
192201
e2e-coverage:
193202
COVERAGE_OUTPUT=./coverage/e2e.out ./hack/e2e-coverage.sh

cmd/manager/main.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,15 @@ func main() {
176176
os.Exit(1)
177177
}
178178

179-
certPool, err := httputil.NewCertPool(caCertDir)
179+
certPool, err := httputil.NewCertPool(caCertDir, setupLog, func() {
180+
setupLog.WithName("cert-pool").Info("restarting to reload CA certificates")
181+
os.Exit(1)
182+
})
180183
if err != nil {
181184
setupLog.Error(err, "unable to create CA certificate pool")
182185
os.Exit(1)
183186
}
187+
184188
unpacker := &source.ImageRegistry{
185189
BaseCachePath: filepath.Join(cachePath, "unpack"),
186190
// TODO: This needs to be derived per extension via ext.Spec.InstallNamespace

config/components/tls/patches/manager_deployment_cert.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
value: {"name":"olmv1-certificate", "secret":{"secretName":"olmv1-cert", "optional": false, "items": [{"key": "ca.crt", "path": "olm-ca.crt"}]}}
44
- op: add
55
path: /spec/template/spec/containers/0/volumeMounts/-
6-
value: {"name":"olmv1-certificate", "readOnly": true, "mountPath":"/var/certs/olm-ca.crt", "subPath":"olm-ca.crt"}
6+
value: {"name":"olmv1-certificate", "readOnly": true, "mountPath":"/var/certs/"}
77
- op: add
88
path: /spec/template/spec/containers/0/args/-
99
value: "--ca-certs-dir=/var/certs"

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ require (
99
github.com/Masterminds/semver/v3 v3.2.1
1010
github.com/blang/semver/v4 v4.0.0
1111
github.com/containerd/containerd v1.7.19
12+
github.com/fsnotify/fsnotify v1.7.0
1213
github.com/go-logr/logr v1.4.2
1314
github.com/google/go-cmp v0.6.0
1415
github.com/google/go-containerregistry v0.20.0
@@ -113,7 +114,6 @@ require (
113114
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
114115
github.com/fatih/color v1.15.0 // indirect
115116
github.com/felixge/httpsnoop v1.0.4 // indirect
116-
github.com/fsnotify/fsnotify v1.7.0 // indirect
117117
github.com/go-errors/errors v1.4.2 // indirect
118118
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
119119
github.com/go-git/go-billy/v5 v5.5.0 // indirect

hack/test/cert-e2e.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# patch the certificates to renew every minute
6+
# (if set to 2 minutes, then the big timeout wait is about 5 minutes)
7+
kubectl patch certificate.cert-manager.io -n cert-manager olmv1-ca --type='merge' -p '{"spec":{"duration":"1h", "renewBefore":"59m"}}'
8+
kubectl patch certificate.cert-manager.io -n olmv1-system olmv1-cert --type='merge' -p '{"spec":{"duration":"1h", "renewBefore":"59m"}}'
9+
10+
# delete the old secrets, so new secrets are generated
11+
kubectl delete secret -n cert-manager olmv1-ca
12+
kubectl delete secret -n olmv1-system olmv1-cert
13+
14+
# delete the existing operator-controller, to force it to get a new secret
15+
# (deleting the secret itself isn't enough)
16+
kubectl delete pod -n olmv1-system -l control-plane=controller-manager
17+
kubectl wait --for=condition=Available --namespace=olmv1-system "deployment/operator-controller-controller-manager" --timeout="60s"
18+
19+
# Now we need to wait for a while, for the pod to restart...
20+
kubectl wait --for=condition=Available=false --namespace=olmv1-system "deployment/operator-controller-controller-manager" --timeout="10m"
21+
# ...and wait for it to come back
22+
kubectl wait --for=condition=Available --namespace=olmv1-system "deployment/operator-controller-controller-manager" --timeout="60s"
23+
24+
# and then search through the previous logs
25+
kubectl logs -p -c manager -n olmv1-system -l control-plane=controller-manager | tail -1 | grep "restarting to reload CA certificates"

internal/httputil/certutil.go

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ import (
88
"fmt"
99
"os"
1010
"path/filepath"
11+
"time"
12+
13+
"github.com/fsnotify/fsnotify"
14+
"github.com/go-logr/logr"
1115
)
1216

1317
var pemStart = []byte("\n-----BEGIN ")
@@ -157,7 +161,7 @@ func pemDecode(data []byte) (*pem.Block, []byte) {
157161
}
158162

159163
// This version of (*x509.CertPool).AppendCertsFromPEM() will error out if parsing fails
160-
func appendCertsFromPEM(s *x509.CertPool, pemCerts []byte) error {
164+
func appendCertsFromPEM(s *x509.CertPool, pemCerts []byte, firstExpiration *time.Time) error {
161165
n := 1
162166
for len(pemCerts) > 0 {
163167
var block *pem.Block
@@ -179,16 +183,27 @@ func appendCertsFromPEM(s *x509.CertPool, pemCerts []byte) error {
179183
if err != nil {
180184
return fmt.Errorf("unable to parse cert %d: %w", n, err)
181185
}
182-
// no return values - panics or always succeeds
183-
s.AddCert(cert)
186+
if firstExpiration.IsZero() || firstExpiration.After(cert.NotAfter) {
187+
*firstExpiration = cert.NotAfter
188+
}
189+
now := time.Now()
190+
if now.Before(cert.NotBefore) {
191+
return fmt.Errorf("not yet valid cert %d: %q", n, cert.NotBefore.Format(time.RFC3339))
192+
} else if now.After(cert.NotAfter) {
193+
return fmt.Errorf("expired cert %d: %q", n, cert.NotAfter.Format(time.RFC3339))
194+
} else {
195+
// no return values - panics or always succeeds
196+
s.AddCert(cert)
197+
}
184198
n++
185199
}
186200

187201
return nil
188202
}
189203

190-
func NewCertPool(caDir string) (*x509.CertPool, error) {
204+
func NewCertPool(caDir string, log logr.Logger, reload func()) (*x509.CertPool, error) {
191205
caCertPool, err := x509.SystemCertPool()
206+
log = log.WithName("cert-pool")
192207
if err != nil {
193208
return nil, err
194209
}
@@ -200,19 +215,71 @@ func NewCertPool(caDir string) (*x509.CertPool, error) {
200215
if err != nil {
201216
return nil, err
202217
}
218+
count := 0
219+
firstExpiration := time.Time{}
220+
221+
if reload != nil {
222+
watcher, err := fsnotify.NewWatcher()
223+
if err != nil {
224+
return nil, err
225+
}
226+
log.Info("adding watch", "directory", caDir)
227+
if err = watcher.Add(caDir); err != nil {
228+
return nil, err
229+
}
230+
go func() {
231+
for {
232+
select {
233+
case <-watcher.Events:
234+
watcher.Close()
235+
log.Info("directory updated")
236+
reload()
237+
case <-watcher.Errors:
238+
watcher.Close()
239+
log.Info("directory error")
240+
reload()
241+
}
242+
}
243+
}()
244+
}
245+
203246
for _, e := range dirEntries {
204-
if e.IsDir() {
247+
file := filepath.Join(caDir, e.Name())
248+
// These might be symlinks pointing to directories, so use Stat() to resolve
249+
fi, err := os.Stat(file)
250+
if err != nil {
251+
return nil, err
252+
}
253+
if fi.IsDir() {
254+
log.Info("skip directory", "name", e.Name())
205255
continue
206256
}
207-
file := filepath.Join(caDir, e.Name())
257+
log.Info("load certificate", "name", e.Name())
208258
data, err := os.ReadFile(file)
209259
if err != nil {
210260
return nil, fmt.Errorf("error reading cert file %q: %w", file, err)
211261
}
212-
err = appendCertsFromPEM(caCertPool, data)
262+
err = appendCertsFromPEM(caCertPool, data, &firstExpiration)
213263
if err != nil {
214264
return nil, fmt.Errorf("error adding cert file %q: %w", file, err)
215265
}
266+
count++
267+
}
268+
269+
// Found no certs!
270+
if count == 0 {
271+
return nil, fmt.Errorf("no certificates found in %q", caDir)
272+
}
273+
274+
if reload != nil {
275+
now := time.Now()
276+
log.Info("first expiration", "time", firstExpiration.Format(time.RFC3339))
277+
if now.Before(firstExpiration) {
278+
time.AfterFunc(firstExpiration.Sub(now), func() {
279+
log.Info("certificate expiration")
280+
reload()
281+
})
282+
}
216283
}
217284

218285
return caCertPool, nil

internal/httputil/certutil_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package httputil_test
22

33
import (
4+
"context"
45
"testing"
56

7+
"github.com/go-logr/logr"
68
"github.com/stretchr/testify/require"
79

810
"github.com/operator-framework/operator-controller/internal/httputil"
@@ -21,17 +23,20 @@ func TestNewCertPool(t *testing.T) {
2123
dir string
2224
msg string
2325
}{
26+
{"../../testdata/certs/", `no certificates found in "../../testdata/certs/"`},
2427
{"../../testdata/certs/good", ""},
2528
{"../../testdata/certs/bad", `error adding cert file "../../testdata/certs/bad/Amazon_Root_CA_2.pem": unable to PEM decode cert 1`},
2629
{"../../testdata/certs/ugly", `error adding cert file "../../testdata/certs/ugly/Amazon_Root_CA.pem": unable to PEM decode cert 2`},
2730
{"../../testdata/certs/ugly2", `error adding cert file "../../testdata/certs/ugly2/Amazon_Root_CA_1.pem": unable to PEM decode cert 1`},
2831
{"../../testdata/certs/ugly3", `error adding cert file "../../testdata/certs/ugly3/not_a_cert.pem": unable to PEM decode cert 1`},
2932
{"../../testdata/certs/empty", `error adding cert file "../../testdata/certs/empty/empty.pem": unable to parse cert 1: x509: malformed certificate`},
33+
{"../../testdata/certs/expired", `error adding cert file "../../testdata/certs/expired/expired.pem": expired cert 1: "2024-01-02T15:00:00Z"`},
3034
}
3135

36+
log, _ := logr.FromContext(context.Background())
3237
for _, caDir := range caDirs {
3338
t.Logf("Loading certs from %q", caDir.dir)
34-
pool, err := httputil.NewCertPool(caDir.dir)
39+
pool, err := httputil.NewCertPool(caDir.dir, log, nil)
3540
if caDir.msg == "" {
3641
require.NoError(t, err)
3742
require.NotNil(t, pool)

testdata/certs/expired/expired.pem

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
-----BEGIN CERTIFICATE-----
2+
MIIFXzCCA0egAwIBAgIUN5r8l1RrpH53+9e6pfj6CXoyqP0wDQYJKoZIhvcNAQEL
3+
BQAwPzELMAkGA1UEBhMCVVMxEDAOBgNVBAoMB1JlZCBIYXQxDDAKBgNVBAsMA09M
4+
TTEQMA4GA1UEAwwHZXhwaXJlZDAeFw0yNDAxMDExNTAwMDBaFw0yNDAxMDIxNTAw
5+
MDBaMD8xCzAJBgNVBAYTAlVTMRAwDgYDVQQKDAdSZWQgSGF0MQwwCgYDVQQLDANP
6+
TE0xEDAOBgNVBAMMB2V4cGlyZWQwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK
7+
AoICAQDFalyjEXz0cMGs3pt360Cz0uD0CDnnAqQFHxXPchfCMZnW/VRGrJQq29rZ
8+
UgU5PnxPgqadrw20BodfR2RS9xIacMP+092GY7Ep96xWokwXcsGPj2e5VMlEYVM1
9+
0MGqIbEv52ZnoEaZDHl4yprYeTs+b/7NGvdG1+N/YNAjkpk8cCBKUXo4ZhkgAZoW
10+
jbv3DkAdkpQHipUYkQZNRws1ebyfTbKaEPxw7abEh9TJrHD1EI9hbmYOGJWLfe1e
11+
zeBQjFioQA31FcQR3/v+aNEDX390+qi3p0LXe7GMabgcoFYcGXO7XvX0DdUBvdZZ
12+
dyHA7cJvyfWfcbucI7xQ9xvAnu/4Ih4D8mHnJXjZK5ReQn06FPM/ZCgZ5LrHAKcZ
13+
0mrOts/8noY9dMmBreSJmLCP8EqzY7yKJFFHVCeKo+bU6/KOyNhJGGSCHVJ/pZGK
14+
ZpOQcNwVvHciLH+MfpW12xJXPEs8Wv24KufDdBCDliSFnVTYH3kZaq4Ozb7+3A5j
15+
wUQ2aDg8nrq4oNORMSCafvia8MYH3NXbpUq1SAyD5DTKtMcWY3gcVnJgrBai1hPn
16+
TPhrMb2NMDFnMnj7/l8jdu9xHrsgOmOrv7Zj0ytmpT6ITJgWNGXsiq7Dp+HH1c6N
17+
ggG6g0zqoyoaxcPVN7PMrWTvfKUD3LHfIsesPc4+lT+TSlBQYwIDAQABo1MwUTAd
18+
BgNVHQ4EFgQU8mBHR/00anEl8Io/A2c0LQlGF5MwHwYDVR0jBBgwFoAU8mBHR/00
19+
anEl8Io/A2c0LQlGF5MwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC
20+
AgEABZYGEeJY2dgyi4W0LNVgN8mKuZuapIcisQ66foe46WuWGAjVONIHlb0Ciy75
21+
aaClLC8fiiIh+FUFZ5aIZkfhKH97QvehFO5O7mqCjM7ipvtEm+Vs1IVtXWDONUxo
22+
SfgbjEPBV8+eflgvKQ6jJqiSqs8EnqdbGAfhxVG/3RN1b5xSFtKz6kzHQE+Gy6QT
23+
DGCVhYvDq8j6G2LCePsqE8piOnSaXuRwD4/YEOaYhx4jjgOnaM0m/dM/Cx9wy2xg
24+
LMRBjBwxFf6palgiFUvyqvturIPONQICkM/lZkpmHbeM4FCat/CD5VW+JgpYiEtW
25+
2oFslTEbawUjmEYnzdo9iw9KPLJQqtasFEWzkWWnJrfm7AVGxcgAHVqGZhUMgq0k
26+
MccM2zYZN2fCSZUUueDB7VCxFq5jK2oLzE14ngXdR7ZbxT3qai/zvGg1kl9y1bIF
27+
WVTK0WZnHqZwVnHQVBH0Duv0uyRUzb6yRRziuLN5aBGQpy/Jm7MS0jLidCbqoCXC
28+
dYqGMFlImzU+6CwPyTJo+X+v6L+FATIxZRpBBeEhHqEU6wz51ms68Sjx4bpW33b+
29+
WFt0JKEmIxB1puJK1qQvKu/MxJyy52GNqiRg7HXkJH9MMYWoAkF2jKMLFoerUPun
30+
7GaV8SIUTFO/5pbnpxZ97a2FuB2RvDKs7GSdspEmC3wbAPU=
31+
-----END CERTIFICATE-----

0 commit comments

Comments
 (0)