From 068b74f004283a81837e1e769f92ce0cd8049000 Mon Sep 17 00:00:00 2001 From: Eoin Gallinagh Date: Tue, 16 Apr 2024 12:13:04 +0100 Subject: [PATCH 1/8] add: mutating webhook --- PROJECT | 4 + config/certmanager/certificate.yaml | 3 + config/certmanager/kustomization.yaml | 5 + config/certmanager/kustomizeconfig.yaml | 16 +++ config/default/kustomization.yaml | 11 +- config/default/manager_webhook_patch.yaml | 23 ++++ config/default/webhookcainjection_patch.yaml | 15 +++ config/webhook/kustomization.yaml | 6 + config/webhook/kustomizeconfig.yaml | 25 ++++ config/webhook/manifests.yaml | 27 ++++ config/webhook/service.yaml | 22 ++++ main.go | 3 + pkg/controllers/raycluster_webhook.go | 122 +++++++++++++++++++ 13 files changed, 279 insertions(+), 3 deletions(-) create mode 100644 config/certmanager/certificate.yaml create mode 100644 config/certmanager/kustomization.yaml create mode 100644 config/certmanager/kustomizeconfig.yaml create mode 100644 config/default/manager_webhook_patch.yaml create mode 100644 config/default/webhookcainjection_patch.yaml create mode 100644 config/webhook/kustomization.yaml create mode 100644 config/webhook/kustomizeconfig.yaml create mode 100644 config/webhook/manifests.yaml create mode 100644 config/webhook/service.yaml create mode 100644 pkg/controllers/raycluster_webhook.go diff --git a/PROJECT b/PROJECT index cb6903086..3e821aa02 100644 --- a/PROJECT +++ b/PROJECT @@ -15,5 +15,9 @@ resources: domain: codeflare.dev group: ray kind: RayCluster + path: github.com/project-codeflare/codeflare-operator/pkg/controllers version: v1 + webhooks: + defaulting: true + webhookVersion: v1 version: "3" diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml new file mode 100644 index 000000000..451153488 --- /dev/null +++ b/config/certmanager/certificate.yaml @@ -0,0 +1,3 @@ +# The following manifests contain a self-signed issuer CR and a certificate CR. +# More document can be found at https://docs.cert-manager.io +# WARNING: Targets CertManager v1.0. Check https://cert-manager.io/docs/installation/upgrading/ for breaking changes. diff --git a/config/certmanager/kustomization.yaml b/config/certmanager/kustomization.yaml new file mode 100644 index 000000000..bebea5a59 --- /dev/null +++ b/config/certmanager/kustomization.yaml @@ -0,0 +1,5 @@ +resources: +- certificate.yaml + +configurations: +- kustomizeconfig.yaml diff --git a/config/certmanager/kustomizeconfig.yaml b/config/certmanager/kustomizeconfig.yaml new file mode 100644 index 000000000..e631f7773 --- /dev/null +++ b/config/certmanager/kustomizeconfig.yaml @@ -0,0 +1,16 @@ +# This configuration is for teaching kustomize how to update name ref and var substitution +nameReference: +- kind: Issuer + group: cert-manager.io + fieldSpecs: + - kind: Certificate + group: cert-manager.io + path: spec/issuerRef/name + +varReference: +- kind: Certificate + group: cert-manager.io + path: spec/commonName +- kind: Certificate + group: cert-manager.io + path: spec/dnsNames diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 6e926aed2..fccc7164e 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -14,10 +14,15 @@ commonLabels: app.kubernetes.io/part-of: codeflare bases: -- ../rbac -- ../manager + - ../rbac + - ../manager + - ../webhook # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. # - ../prometheus resources: -- metrics_service.yaml + - metrics_service.yaml + +patches: +- path: manager_webhook_patch.yaml +- path: webhookcainjection_patch.yaml diff --git a/config/default/manager_webhook_patch.yaml b/config/default/manager_webhook_patch.yaml new file mode 100644 index 000000000..5d6b541dc --- /dev/null +++ b/config/default/manager_webhook_patch.yaml @@ -0,0 +1,23 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: manager + namespace: system +spec: + template: + spec: + containers: + - name: manager + ports: + - containerPort: 9443 + name: webhook-server + protocol: TCP + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: codeflare-operator-raycluster-webhook-cert diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml new file mode 100644 index 000000000..2770f6a9c --- /dev/null +++ b/config/default/webhookcainjection_patch.yaml @@ -0,0 +1,15 @@ +# This patch add annotation to admission webhook config +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/name: mutatingwebhookconfiguration + app.kubernetes.io/instance: mutating-webhook-configuration + app.kubernetes.io/component: webhook + app.kubernetes.io/created-by: codeflare-operator + app.kubernetes.io/part-of: codeflare-operator + app.kubernetes.io/managed-by: kustomize + name: mutating-webhook-configuration + annotations: + service.beta.openshift.io/inject-cabundle: "true" + service.beta.openshift.io/serving-cert-secret-name: codeflare-operator-raycluster-webhook-cert diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml new file mode 100644 index 000000000..9cf26134e --- /dev/null +++ b/config/webhook/kustomization.yaml @@ -0,0 +1,6 @@ +resources: +- manifests.yaml +- service.yaml + +configurations: +- kustomizeconfig.yaml diff --git a/config/webhook/kustomizeconfig.yaml b/config/webhook/kustomizeconfig.yaml new file mode 100644 index 000000000..25e21e3c9 --- /dev/null +++ b/config/webhook/kustomizeconfig.yaml @@ -0,0 +1,25 @@ +# the following config is for teaching kustomize where to look at when substituting vars. +# It requires kustomize v2.1.0 or newer to work properly. +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: MutatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + - kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/name + +namespace: +- kind: MutatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true +- kind: ValidatingWebhookConfiguration + group: admissionregistration.k8s.io + path: webhooks/clientConfig/service/namespace + create: true + +varReference: +- path: metadata/annotations diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml new file mode 100644 index 000000000..faf91696b --- /dev/null +++ b/config/webhook/manifests.yaml @@ -0,0 +1,27 @@ +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + creationTimestamp: null + name: mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /mutate-ray-io-v1-raycluster + failurePolicy: Fail + name: mraycluster.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - rayclusters + sideEffects: None diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml new file mode 100644 index 000000000..9caf0911a --- /dev/null +++ b/config/webhook/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: service + app.kubernetes.io/part-of: codeflare-operator + app.kubernetes.io/instance: webhook-service + app.kubernetes.io/component: webhook + app.kubernetes.io/created-by: codeflare-operator + app.kubernetes.io/managed-by: kustomize + name: webhook-service + namespace: openshift-operators + annotations: + service.beta.openshift.io/serving-cert-secret-name: codeflare-operator-raycluster-webhook-cert +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + selector: + app.kubernetes.io/part-of: codeflare + app.kubernetes.io/name: codeflare-operator diff --git a/main.go b/main.go index 09ebe2599..40f916a50 100644 --- a/main.go +++ b/main.go @@ -147,6 +147,9 @@ func main() { }) exitOnError(err, "unable to start manager") + rayClusterDefaulter := &controllers.RayClusterDefaulter{} + exitOnError(rayClusterDefaulter.SetupWebhookWithManager(mgr), "error setting up webhook") + ok, err := HasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster")) if ok { rayClusterController := controllers.RayClusterReconciler{Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Config: cfg.KubeRay} diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go new file mode 100644 index 000000000..5cabf2fae --- /dev/null +++ b/pkg/controllers/raycluster_webhook.go @@ -0,0 +1,122 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" +) + +// log is for logging in this package. +var rayclusterlog = logf.Log.WithName("raycluster-resource") + +func (r *RayClusterDefaulter) SetupWebhookWithManager(mgr ctrl.Manager) error { + return ctrl.NewWebhookManagedBy(mgr). + For(&rayv1.RayCluster{}). + WithDefaulter(r). + Complete() +} + +//+kubebuilder:webhook:path=/mutate-ray-io-v1-raycluster,mutating=true,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create;update,versions=v1,name=mraycluster.kb.io,admissionReviewVersions=v1 + +type RayClusterDefaulter struct{} + +var _ webhook.CustomDefaulter = &RayClusterDefaulter{} + +// Default implements webhook.Defaulter so a webhook will be registered for the type +func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) error { + raycluster := obj.(*rayv1.RayCluster) + + rayclusterlog.Info("default", "name", raycluster.Name) + // Check and add OAuth proxy if it does not exist. + alreadyExists := false + for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers { + if container.Name == "oauth-proxy" { + rayclusterlog.Info("OAuth sidecar already exists, no patch needed") + alreadyExists = true + break // exits the for loop + } + } + + if !alreadyExists { + rayclusterlog.Info("Adding OAuth sidecar container") + // definition of the new container + newOAuthSidecar := corev1.Container{ + Name: "oauth-proxy", + Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", + Ports: []corev1.ContainerPort{ + {ContainerPort: 8443, Name: "oauth-proxy"}, + }, + Args: []string{ + "--https-address=:8443", + "--provider=openshift", + "--openshift-service-account=" + raycluster.Name + "-oauth-proxy", + "--upstream=http://localhost:8265", + "--tls-cert=/etc/tls/private/tls.crt", + "--tls-key=/etc/tls/private/tls.key", + "--cookie-secret=$(COOKIE_SECRET)", + "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}", + "--added-label=True", + }, + Env: []corev1.EnvVar{ + { + Name: "COOKIE_SECRET", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: raycluster.Name + "-oauth-config", + }, + Key: "cookie_secret", + }, + }, + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "proxy-tls-secret", + MountPath: "/etc/tls/private", + ReadOnly: true, + }, + }, + } + + // Adding the new OAuth sidecar container + raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar) + + tlsSecretVolume := corev1.Volume{ + Name: "proxy-tls-secret", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: raycluster.Name + "-proxy-tls-secret", + }, + }, + } + + raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume) + + // Ensure the service account is set + if raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" { + raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy" + } + } + return nil +} From 0720a47418b7036a0519a580f57258df310305a8 Mon Sep 17 00:00:00 2001 From: Eoin Gallinagh Date: Tue, 16 Apr 2024 14:32:42 +0100 Subject: [PATCH 2/8] remove: added label --- pkg/controllers/raycluster_webhook.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index 5cabf2fae..63581907c 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -75,7 +75,6 @@ func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) e "--tls-key=/etc/tls/private/tls.key", "--cookie-secret=$(COOKIE_SECRET)", "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}", - "--added-label=True", }, Env: []corev1.EnvVar{ { From 4fdc7ff8de59393c7608f114221044c6a228eb94 Mon Sep 17 00:00:00 2001 From: Eoin Gallinagh Date: Tue, 16 Apr 2024 16:31:05 +0100 Subject: [PATCH 3/8] add: address feedback on pr --- config/certmanager/certificate.yaml | 3 --- config/certmanager/kustomization.yaml | 5 ----- config/certmanager/kustomizeconfig.yaml | 16 ---------------- config/default/webhookcainjection_patch.yaml | 10 +--------- config/webhook/service.yaml | 11 ++--------- pkg/controllers/raycluster_webhook.go | 4 +++- 6 files changed, 6 insertions(+), 43 deletions(-) delete mode 100644 config/certmanager/certificate.yaml delete mode 100644 config/certmanager/kustomization.yaml delete mode 100644 config/certmanager/kustomizeconfig.yaml diff --git a/config/certmanager/certificate.yaml b/config/certmanager/certificate.yaml deleted file mode 100644 index 451153488..000000000 --- a/config/certmanager/certificate.yaml +++ /dev/null @@ -1,3 +0,0 @@ -# The following manifests contain a self-signed issuer CR and a certificate CR. -# More document can be found at https://docs.cert-manager.io -# WARNING: Targets CertManager v1.0. Check https://cert-manager.io/docs/installation/upgrading/ for breaking changes. diff --git a/config/certmanager/kustomization.yaml b/config/certmanager/kustomization.yaml deleted file mode 100644 index bebea5a59..000000000 --- a/config/certmanager/kustomization.yaml +++ /dev/null @@ -1,5 +0,0 @@ -resources: -- certificate.yaml - -configurations: -- kustomizeconfig.yaml diff --git a/config/certmanager/kustomizeconfig.yaml b/config/certmanager/kustomizeconfig.yaml deleted file mode 100644 index e631f7773..000000000 --- a/config/certmanager/kustomizeconfig.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# This configuration is for teaching kustomize how to update name ref and var substitution -nameReference: -- kind: Issuer - group: cert-manager.io - fieldSpecs: - - kind: Certificate - group: cert-manager.io - path: spec/issuerRef/name - -varReference: -- kind: Certificate - group: cert-manager.io - path: spec/commonName -- kind: Certificate - group: cert-manager.io - path: spec/dnsNames diff --git a/config/default/webhookcainjection_patch.yaml b/config/default/webhookcainjection_patch.yaml index 2770f6a9c..39db86347 100644 --- a/config/default/webhookcainjection_patch.yaml +++ b/config/default/webhookcainjection_patch.yaml @@ -2,14 +2,6 @@ apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: - labels: - app.kubernetes.io/name: mutatingwebhookconfiguration - app.kubernetes.io/instance: mutating-webhook-configuration - app.kubernetes.io/component: webhook - app.kubernetes.io/created-by: codeflare-operator - app.kubernetes.io/part-of: codeflare-operator - app.kubernetes.io/managed-by: kustomize name: mutating-webhook-configuration annotations: - service.beta.openshift.io/inject-cabundle: "true" - service.beta.openshift.io/serving-cert-secret-name: codeflare-operator-raycluster-webhook-cert + service.beta.openshift.io/inject-cabundle: "true" diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml index 9caf0911a..ada0e3281 100644 --- a/config/webhook/service.yaml +++ b/config/webhook/service.yaml @@ -1,22 +1,15 @@ apiVersion: v1 kind: Service metadata: - labels: - app.kubernetes.io/name: service - app.kubernetes.io/part-of: codeflare-operator - app.kubernetes.io/instance: webhook-service - app.kubernetes.io/component: webhook - app.kubernetes.io/created-by: codeflare-operator - app.kubernetes.io/managed-by: kustomize name: webhook-service namespace: openshift-operators annotations: - service.beta.openshift.io/serving-cert-secret-name: codeflare-operator-raycluster-webhook-cert + service.beta.openshift.io/serving-cert-secret-name: codeflare-operator-raycluster-webhook-cert spec: ports: - port: 443 protocol: TCP targetPort: 9443 selector: - app.kubernetes.io/part-of: codeflare + app.kubernetes.io/part-of: codeflare app.kubernetes.io/name: codeflare-operator diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index 63581907c..bce533222 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -18,7 +18,9 @@ package controllers import ( "context" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -32,7 +34,7 @@ var rayclusterlog = logf.Log.WithName("raycluster-resource") func (r *RayClusterDefaulter) SetupWebhookWithManager(mgr ctrl.Manager) error { return ctrl.NewWebhookManagedBy(mgr). For(&rayv1.RayCluster{}). - WithDefaulter(r). + WithDefaulter(&RayClusterDefaulter{}). Complete() } From 2e4fbcd792e3865d9c183924a58320a35485563b Mon Sep 17 00:00:00 2001 From: Eoin Gallinagh Date: Wed, 17 Apr 2024 11:26:03 +0100 Subject: [PATCH 4/8] add: check for isRayDashboardOAuthEnabledWebhook before applying patch --- pkg/controllers/raycluster_webhook.go | 128 ++++++++++++++------------ pkg/controllers/support.go | 7 ++ 2 files changed, 76 insertions(+), 59 deletions(-) diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index bce533222..3406cd9b6 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -26,6 +26,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" + + "github.com/project-codeflare/codeflare-operator/pkg/config" ) // log is for logging in this package. @@ -34,13 +36,19 @@ var rayclusterlog = logf.Log.WithName("raycluster-resource") func (r *RayClusterDefaulter) SetupWebhookWithManager(mgr ctrl.Manager) error { return ctrl.NewWebhookManagedBy(mgr). For(&rayv1.RayCluster{}). - WithDefaulter(&RayClusterDefaulter{}). + WithDefaulter(&RayClusterDefaulter{ + Config: r.Config, + rayDashboardOauthEnabled: r.isRayDashboardOAuthEnabledWebhook(), + }). Complete() } //+kubebuilder:webhook:path=/mutate-ray-io-v1-raycluster,mutating=true,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create;update,versions=v1,name=mraycluster.kb.io,admissionReviewVersions=v1 -type RayClusterDefaulter struct{} +type RayClusterDefaulter struct { + Config *config.KubeRayConfiguration + rayDashboardOauthEnabled bool +} var _ webhook.CustomDefaulter = &RayClusterDefaulter{} @@ -48,75 +56,77 @@ var _ webhook.CustomDefaulter = &RayClusterDefaulter{} func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) error { raycluster := obj.(*rayv1.RayCluster) - rayclusterlog.Info("default", "name", raycluster.Name) - // Check and add OAuth proxy if it does not exist. - alreadyExists := false - for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers { - if container.Name == "oauth-proxy" { - rayclusterlog.Info("OAuth sidecar already exists, no patch needed") - alreadyExists = true - break // exits the for loop + if r.rayDashboardOauthEnabled { + rayclusterlog.Info("default", "name", raycluster.Name) + // Check and add OAuth proxy if it does not exist. + alreadyExists := false + for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers { + if container.Name == "oauth-proxy" { + rayclusterlog.Info("OAuth sidecar already exists, no patch needed") + alreadyExists = true + break // exits the for loop + } } - } - if !alreadyExists { - rayclusterlog.Info("Adding OAuth sidecar container") - // definition of the new container - newOAuthSidecar := corev1.Container{ - Name: "oauth-proxy", - Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", - Ports: []corev1.ContainerPort{ - {ContainerPort: 8443, Name: "oauth-proxy"}, - }, - Args: []string{ - "--https-address=:8443", - "--provider=openshift", - "--openshift-service-account=" + raycluster.Name + "-oauth-proxy", - "--upstream=http://localhost:8265", - "--tls-cert=/etc/tls/private/tls.crt", - "--tls-key=/etc/tls/private/tls.key", - "--cookie-secret=$(COOKIE_SECRET)", - "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}", - }, - Env: []corev1.EnvVar{ - { - Name: "COOKIE_SECRET", - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: raycluster.Name + "-oauth-config", + if !alreadyExists { + rayclusterlog.Info("Adding OAuth sidecar container") + // definition of the new container + newOAuthSidecar := corev1.Container{ + Name: "oauth-proxy", + Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", + Ports: []corev1.ContainerPort{ + {ContainerPort: 8443, Name: "oauth-proxy"}, + }, + Args: []string{ + "--https-address=:8443", + "--provider=openshift", + "--openshift-service-account=" + raycluster.Name + "-oauth-proxy", + "--upstream=http://localhost:8265", + "--tls-cert=/etc/tls/private/tls.crt", + "--tls-key=/etc/tls/private/tls.key", + "--cookie-secret=$(COOKIE_SECRET)", + "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}", + }, + Env: []corev1.EnvVar{ + { + Name: "COOKIE_SECRET", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: raycluster.Name + "-oauth-config", + }, + Key: "cookie_secret", }, - Key: "cookie_secret", }, }, }, - }, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "proxy-tls-secret", - MountPath: "/etc/tls/private", - ReadOnly: true, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "proxy-tls-secret", + MountPath: "/etc/tls/private", + ReadOnly: true, + }, }, - }, - } + } - // Adding the new OAuth sidecar container - raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar) + // Adding the new OAuth sidecar container + raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar) - tlsSecretVolume := corev1.Volume{ - Name: "proxy-tls-secret", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: raycluster.Name + "-proxy-tls-secret", + tlsSecretVolume := corev1.Volume{ + Name: "proxy-tls-secret", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: raycluster.Name + "-proxy-tls-secret", + }, }, - }, - } + } - raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume) + raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume) - // Ensure the service account is set - if raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" { - raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy" + // Ensure the service account is set + if raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" { + raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy" + } } } return nil diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 344e6215f..698bd8da3 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -155,3 +155,10 @@ func (r *RayClusterReconciler) isRayDashboardOAuthEnabled() bool { } return true } + +func (r *RayClusterDefaulter) isRayDashboardOAuthEnabledWebhook() bool { + if r.Config != nil && r.Config.RayDashboardOAuthEnabled != nil { + return *r.Config.RayDashboardOAuthEnabled + } + return true +} From 7930ced14c922a61bc086696d71f0cf0b324d9e4 Mon Sep 17 00:00:00 2001 From: Eoin Gallinagh Date: Wed, 17 Apr 2024 12:47:55 +0100 Subject: [PATCH 5/8] update: placement of cookie secret in the mutating webhook for rayclusters --- pkg/controllers/raycluster_webhook.go | 30 +++++++++++++++------------ 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index 3406cd9b6..d63616e13 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -87,19 +87,6 @@ func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) e "--cookie-secret=$(COOKIE_SECRET)", "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}", }, - Env: []corev1.EnvVar{ - { - Name: "COOKIE_SECRET", - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: raycluster.Name + "-oauth-config", - }, - Key: "cookie_secret", - }, - }, - }, - }, VolumeMounts: []corev1.VolumeMount{ { Name: "proxy-tls-secret", @@ -112,6 +99,23 @@ func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) e // Adding the new OAuth sidecar container raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar) + cookieSecret := corev1.EnvVar{ + Name: "COOKIE_SECRET", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: raycluster.Name + "-oauth-config", + }, + Key: "cookie_secret", + }, + }, + } + + raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env = append( + raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env, + cookieSecret, + ) + tlsSecretVolume := corev1.Volume{ Name: "proxy-tls-secret", VolumeSource: corev1.VolumeSource{ From 2cba143d0627c74ff5e265e95674685c2fa92bc5 Mon Sep 17 00:00:00 2001 From: Antonin Stefanutti Date: Wed, 17 Apr 2024 16:47:51 +0200 Subject: [PATCH 6/8] Refactor RayCluster mutating webhook --- PROJECT | 2 +- config/default/kustomization.yaml | 5 - config/openshift/kustomization.yaml | 22 +++ .../manager_webhook_patch.yaml | 0 .../webhookcainjection_patch.yaml | 0 main.go | 3 +- pkg/controllers/raycluster_webhook.go | 159 +++++++++--------- pkg/controllers/support.go | 7 - 8 files changed, 102 insertions(+), 96 deletions(-) create mode 100644 config/openshift/kustomization.yaml rename config/{default => openshift}/manager_webhook_patch.yaml (100%) rename config/{default => openshift}/webhookcainjection_patch.yaml (100%) diff --git a/PROJECT b/PROJECT index 3e821aa02..0fc5562e2 100644 --- a/PROJECT +++ b/PROJECT @@ -12,7 +12,7 @@ projectName: codeflare-operator repo: github.com/project-codeflare/codeflare-operator resources: - controller: true - domain: codeflare.dev + domain: ray.io group: ray kind: RayCluster path: github.com/project-codeflare/codeflare-operator/pkg/controllers diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index fccc7164e..42fa71796 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -16,13 +16,8 @@ commonLabels: bases: - ../rbac - ../manager - - ../webhook # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. # - ../prometheus resources: - metrics_service.yaml - -patches: -- path: manager_webhook_patch.yaml -- path: webhookcainjection_patch.yaml diff --git a/config/openshift/kustomization.yaml b/config/openshift/kustomization.yaml new file mode 100644 index 000000000..22e168162 --- /dev/null +++ b/config/openshift/kustomization.yaml @@ -0,0 +1,22 @@ +# Adds namespace to all resources. +namespace: openshift-operators + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: codeflare-operator- + +# Labels to add to all resources and selectors. +commonLabels: + app.kubernetes.io/name: codeflare-operator + app.kubernetes.io/part-of: codeflare + +bases: + - ../default + - ../webhook + +patches: +- path: manager_webhook_patch.yaml +- path: webhookcainjection_patch.yaml diff --git a/config/default/manager_webhook_patch.yaml b/config/openshift/manager_webhook_patch.yaml similarity index 100% rename from config/default/manager_webhook_patch.yaml rename to config/openshift/manager_webhook_patch.yaml diff --git a/config/default/webhookcainjection_patch.yaml b/config/openshift/webhookcainjection_patch.yaml similarity index 100% rename from config/default/webhookcainjection_patch.yaml rename to config/openshift/webhookcainjection_patch.yaml diff --git a/main.go b/main.go index 40f916a50..1ed191bd5 100644 --- a/main.go +++ b/main.go @@ -147,8 +147,7 @@ func main() { }) exitOnError(err, "unable to start manager") - rayClusterDefaulter := &controllers.RayClusterDefaulter{} - exitOnError(rayClusterDefaulter.SetupWebhookWithManager(mgr), "error setting up webhook") + exitOnError(controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay), "error setting up RayCluster webhook") ok, err := HasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster")) if ok { diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index d63616e13..0cd635040 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -23,6 +23,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" @@ -33,105 +34,101 @@ import ( // log is for logging in this package. var rayclusterlog = logf.Log.WithName("raycluster-resource") -func (r *RayClusterDefaulter) SetupWebhookWithManager(mgr ctrl.Manager) error { +func SetupRayClusterWebhookWithManager(mgr ctrl.Manager, cfg *config.KubeRayConfiguration) error { return ctrl.NewWebhookManagedBy(mgr). For(&rayv1.RayCluster{}). - WithDefaulter(&RayClusterDefaulter{ - Config: r.Config, - rayDashboardOauthEnabled: r.isRayDashboardOAuthEnabledWebhook(), + WithDefaulter(&rayClusterDefaulter{ + Config: cfg, }). Complete() } -//+kubebuilder:webhook:path=/mutate-ray-io-v1-raycluster,mutating=true,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create;update,versions=v1,name=mraycluster.kb.io,admissionReviewVersions=v1 +// +kubebuilder:webhook:path=/mutate-ray-io-v1-raycluster,mutating=true,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create;update,versions=v1,name=mraycluster.kb.io,admissionReviewVersions=v1 -type RayClusterDefaulter struct { - Config *config.KubeRayConfiguration - rayDashboardOauthEnabled bool +type rayClusterDefaulter struct { + Config *config.KubeRayConfiguration } -var _ webhook.CustomDefaulter = &RayClusterDefaulter{} +var _ webhook.CustomDefaulter = &rayClusterDefaulter{} // Default implements webhook.Defaulter so a webhook will be registered for the type -func (r *RayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) error { +func (r *rayClusterDefaulter) Default(ctx context.Context, obj runtime.Object) error { raycluster := obj.(*rayv1.RayCluster) - if r.rayDashboardOauthEnabled { - rayclusterlog.Info("default", "name", raycluster.Name) - // Check and add OAuth proxy if it does not exist. - alreadyExists := false - for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers { - if container.Name == "oauth-proxy" { - rayclusterlog.Info("OAuth sidecar already exists, no patch needed") - alreadyExists = true - break // exits the for loop - } + if !pointer.BoolDeref(r.Config.RayDashboardOAuthEnabled, true) { + return nil + } + + // Check and add OAuth proxy if it does not exist + for _, container := range raycluster.Spec.HeadGroupSpec.Template.Spec.Containers { + if container.Name == "oauth-proxy" { + rayclusterlog.V(2).Info("OAuth sidecar already exists, no patch needed") + return nil } + } - if !alreadyExists { - rayclusterlog.Info("Adding OAuth sidecar container") - // definition of the new container - newOAuthSidecar := corev1.Container{ - Name: "oauth-proxy", - Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", - Ports: []corev1.ContainerPort{ - {ContainerPort: 8443, Name: "oauth-proxy"}, - }, - Args: []string{ - "--https-address=:8443", - "--provider=openshift", - "--openshift-service-account=" + raycluster.Name + "-oauth-proxy", - "--upstream=http://localhost:8265", - "--tls-cert=/etc/tls/private/tls.crt", - "--tls-key=/etc/tls/private/tls.key", - "--cookie-secret=$(COOKIE_SECRET)", - "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}", - }, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "proxy-tls-secret", - MountPath: "/etc/tls/private", - ReadOnly: true, - }, - }, - } - - // Adding the new OAuth sidecar container - raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar) - - cookieSecret := corev1.EnvVar{ - Name: "COOKIE_SECRET", - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: raycluster.Name + "-oauth-config", - }, - Key: "cookie_secret", - }, - }, - } - - raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env = append( - raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env, - cookieSecret, - ) - - tlsSecretVolume := corev1.Volume{ - Name: "proxy-tls-secret", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: raycluster.Name + "-proxy-tls-secret", - }, + rayclusterlog.V(2).Info("Adding OAuth sidecar container") + // definition of the new container + newOAuthSidecar := corev1.Container{ + Name: "oauth-proxy", + Image: "registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", + Ports: []corev1.ContainerPort{ + {ContainerPort: 8443, Name: "oauth-proxy"}, + }, + Args: []string{ + "--https-address=:8443", + "--provider=openshift", + "--openshift-service-account=" + raycluster.Name + "-oauth-proxy", + "--upstream=http://localhost:8265", + "--tls-cert=/etc/tls/private/tls.crt", + "--tls-key=/etc/tls/private/tls.key", + "--cookie-secret=$(COOKIE_SECRET)", + "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"default\",\"verb\":\"get\"}}", + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "proxy-tls-secret", + MountPath: "/etc/tls/private", + ReadOnly: true, + }, + }, + } + + // Adding the new OAuth sidecar container + raycluster.Spec.HeadGroupSpec.Template.Spec.Containers = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Containers, newOAuthSidecar) + + cookieSecret := corev1.EnvVar{ + Name: "COOKIE_SECRET", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: raycluster.Name + "-oauth-config", }, - } + Key: "cookie_secret", + }, + }, + } - raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume) + raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env = append( + raycluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env, + cookieSecret, + ) + + tlsSecretVolume := corev1.Volume{ + Name: "proxy-tls-secret", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: raycluster.Name + "-proxy-tls-secret", + }, + }, + } - // Ensure the service account is set - if raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" { - raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy" - } - } + raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes = append(raycluster.Spec.HeadGroupSpec.Template.Spec.Volumes, tlsSecretVolume) + + // Ensure the service account is set + if raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName == "" { + raycluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = raycluster.Name + "-oauth-proxy" } + return nil } diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 698bd8da3..344e6215f 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -155,10 +155,3 @@ func (r *RayClusterReconciler) isRayDashboardOAuthEnabled() bool { } return true } - -func (r *RayClusterDefaulter) isRayDashboardOAuthEnabledWebhook() bool { - if r.Config != nil && r.Config.RayDashboardOAuthEnabled != nil { - return *r.Config.RayDashboardOAuthEnabled - } - return true -} From 5516a9af88c0d41da8d271eeb4a74c7beb27c7a7 Mon Sep 17 00:00:00 2001 From: Antonin Stefanutti Date: Wed, 17 Apr 2024 17:30:54 +0200 Subject: [PATCH 7/8] Start RayCluster webhook on OpenShift only --- main.go | 35 ++++++++++++++++++--- pkg/controllers/raycluster_controller.go | 18 ++++------- pkg/controllers/support.go | 39 ------------------------ 3 files changed, 37 insertions(+), 55 deletions(-) diff --git a/main.go b/main.go index 1ed191bd5..7eff5fc40 100644 --- a/main.go +++ b/main.go @@ -147,11 +147,21 @@ func main() { }) exitOnError(err, "unable to start manager") - exitOnError(controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay), "error setting up RayCluster webhook") + OpenShift := isOpenShift(ctx, kubeClient.DiscoveryClient) - ok, err := HasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster")) + if OpenShift { + // TODO: setup the RayCluster webhook on vanilla Kubernetes + exitOnError(controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay), "error setting up RayCluster webhook") + } + + ok, err := hasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster")) if ok { - rayClusterController := controllers.RayClusterReconciler{Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Config: cfg.KubeRay} + rayClusterController := controllers.RayClusterReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Config: cfg.KubeRay, + IsOpenShift: OpenShift, + } exitOnError(rayClusterController.SetupWithManager(mgr), "Error setting up RayCluster controller") } else if err != nil { exitOnError(err, "Could not determine if RayCluster CR present on cluster.") @@ -207,7 +217,7 @@ func createConfigMap(ctx context.Context, client kubernetes.Interface, ns, name return err } -func HasAPIResourceForGVK(dc discovery.DiscoveryInterface, gvk schema.GroupVersionKind) (bool, error) { +func hasAPIResourceForGVK(dc discovery.DiscoveryInterface, gvk schema.GroupVersionKind) (bool, error) { gv, kind := gvk.ToAPIVersionAndKind() if resources, err := dc.ServerResourcesForGroupVersion(gv); err != nil { if apierrors.IsNotFound(err) { @@ -247,3 +257,20 @@ func exitOnError(err error, msg string) { os.Exit(1) } } + +func isOpenShift(ctx context.Context, dc discovery.DiscoveryInterface) bool { + logger := ctrl.LoggerFrom(ctx) + apiGroupList, err := dc.ServerGroups() + if err != nil { + logger.Info("Error while querying ServerGroups, assuming we're on Vanilla Kubernetes") + return false + } + for i := 0; i < len(apiGroupList.Groups); i++ { + if strings.HasSuffix(apiGroupList.Groups[i].Name, ".openshift.io") { + logger.Info("We detected being on OpenShift!") + return true + } + } + logger.Info("We detected being on Vanilla Kubernetes!") + return false +} diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index 551367e49..461e3d01f 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -48,13 +48,12 @@ import ( // RayClusterReconciler reconciles a RayCluster object type RayClusterReconciler struct { client.Client - kubeClient *kubernetes.Clientset - routeClient *routev1client.RouteV1Client - Scheme *runtime.Scheme - CookieSalt string - Config *config.KubeRayConfiguration - IsOpenShift bool - IsOpenShiftInitialized bool + kubeClient *kubernetes.Clientset + routeClient *routev1client.RouteV1Client + Scheme *runtime.Scheme + CookieSalt string + Config *config.KubeRayConfiguration + IsOpenShift bool } const ( @@ -106,11 +105,6 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } - if !r.IsOpenShiftInitialized { - r.IsOpenShift = isOpenShift(ctx, r.kubeClient, &cluster) - r.IsOpenShiftInitialized = true - } - if cluster.ObjectMeta.DeletionTimestamp.IsZero() { if !controllerutil.ContainsFinalizer(&cluster, oAuthFinalizer) { logger.Info("Add a finalizer", "finalizer", oAuthFinalizer) diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 344e6215f..5a47993be 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -3,7 +3,6 @@ package controllers import ( "context" "fmt" - "strings" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" @@ -12,10 +11,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" v1 "k8s.io/client-go/applyconfigurations/meta/v1" networkingv1ac "k8s.io/client-go/applyconfigurations/networking/v1" - "k8s.io/client-go/discovery" "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - ctrl "sigs.k8s.io/controller-runtime" routeapply "github.com/openshift/client-go/route/applyconfigurations/route/v1" ) @@ -103,41 +99,6 @@ func desiredClusterIngress(cluster *rayv1.RayCluster, ingressHost string) *netwo ) } -// getDiscoveryClient returns a discovery client for the current reconciler -func getDiscoveryClient(config *rest.Config) (*discovery.DiscoveryClient, error) { - return discovery.NewDiscoveryClientForConfig(config) -} - -// Check where we are running. We are trying to distinguish here whether -// this is vanilla kubernetes cluster or Openshift -func isOpenShift(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster) bool { - // The discovery package is used to discover APIs supported by a Kubernetes API server. - logger := ctrl.LoggerFrom(ctx) - config, err := ctrl.GetConfig() - if err != nil && config == nil { - logger.Info("Cannot retrieve config, assuming we're on Vanilla Kubernetes") - return false - } - dclient, err := getDiscoveryClient(config) - if err != nil && dclient == nil { - logger.Info("Cannot retrieve a DiscoveryClient, assuming we're on Vanilla Kubernetes") - return false - } - apiGroupList, err := dclient.ServerGroups() - if err != nil { - logger.Info("Error while querying ServerGroups, assuming we're on Vanilla Kubernetes") - return false - } - for i := 0; i < len(apiGroupList.Groups); i++ { - if strings.HasSuffix(apiGroupList.Groups[i].Name, ".openshift.io") { - logger.Info("We detected being on OpenShift!") - return true - } - } - logger.Info("We detected being on Vanilla Kubernetes!") - return false -} - // getIngressHost generates the cluster URL string based on the cluster type, RayCluster, and ingress domain. func (r *RayClusterReconciler) getIngressHost(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressNameFromCluster string) (string, error) { ingressDomain := "" From 59c2b2bc93cbccb7a26d2fa0291a861b583fde61 Mon Sep 17 00:00:00 2001 From: Antonin Stefanutti Date: Wed, 17 Apr 2024 17:58:25 +0200 Subject: [PATCH 8/8] Refactor RayCluster controller --- pkg/controllers/raycluster_controller.go | 65 +++++++++++++++--------- pkg/controllers/support.go | 22 -------- 2 files changed, 42 insertions(+), 45 deletions(-) diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index 461e3d01f..ba6ca84fe 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -21,6 +21,7 @@ import ( "crypto/rand" "crypto/sha1" "encoding/base64" + "fmt" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" @@ -96,9 +97,9 @@ var ( func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) - var cluster rayv1.RayCluster + cluster := &rayv1.RayCluster{} - if err := r.Get(ctx, req.NamespacedName, &cluster); err != nil { + if err := r.Get(ctx, req.NamespacedName, cluster); err != nil { if !errors.IsNotFound(err) { logger.Error(err, "Error getting RayCluster resource") } @@ -106,21 +107,21 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } if cluster.ObjectMeta.DeletionTimestamp.IsZero() { - if !controllerutil.ContainsFinalizer(&cluster, oAuthFinalizer) { + if !controllerutil.ContainsFinalizer(cluster, oAuthFinalizer) { logger.Info("Add a finalizer", "finalizer", oAuthFinalizer) - controllerutil.AddFinalizer(&cluster, oAuthFinalizer) - if err := r.Update(ctx, &cluster); err != nil { + controllerutil.AddFinalizer(cluster, oAuthFinalizer) + if err := r.Update(ctx, cluster); err != nil { // this log is info level since errors are not fatal and are expected logger.Info("WARN: Failed to update RayCluster with finalizer", "error", err.Error(), logRequeueing, true) return ctrl.Result{RequeueAfter: requeueTime}, err } } - } else if controllerutil.ContainsFinalizer(&cluster, oAuthFinalizer) { + } else if controllerutil.ContainsFinalizer(cluster, oAuthFinalizer) { err := client.IgnoreNotFound(r.Client.Delete( ctx, &rbacv1.ClusterRoleBinding{ ObjectMeta: metav1.ObjectMeta{ - Name: crbNameFromCluster(&cluster), + Name: crbNameFromCluster(cluster), }, }, &deleteOptions, @@ -129,8 +130,8 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Error(err, "Failed to remove OAuth ClusterRoleBinding.", logRequeueing, true) return ctrl.Result{RequeueAfter: requeueTime}, err } - controllerutil.RemoveFinalizer(&cluster, oAuthFinalizer) - if err := r.Update(ctx, &cluster); err != nil { + controllerutil.RemoveFinalizer(cluster, oAuthFinalizer) + if err := r.Update(ctx, cluster); err != nil { logger.Error(err, "Failed to remove finalizer from RayCluster", logRequeueing, true) return ctrl.Result{RequeueAfter: requeueTime}, err } @@ -138,66 +139,66 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } - if cluster.Status.State != "suspended" && r.isRayDashboardOAuthEnabled() && r.IsOpenShift { + if cluster.Status.State != "suspended" && isRayDashboardOAuthEnabled(r.Config) && r.IsOpenShift { logger.Info("Creating OAuth Objects") - _, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update OAuth Route") return ctrl.Result{RequeueAfter: requeueTime}, err } - _, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredOAuthSecret(&cluster, r), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredOAuthSecret(cluster, r), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to create OAuth Secret") return ctrl.Result{RequeueAfter: requeueTime}, err } - _, err = r.kubeClient.CoreV1().Services(cluster.Namespace).Apply(ctx, desiredOAuthService(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err = r.kubeClient.CoreV1().Services(cluster.Namespace).Apply(ctx, desiredOAuthService(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update OAuth Service") return ctrl.Result{RequeueAfter: requeueTime}, err } - _, err = r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Apply(ctx, desiredServiceAccount(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err = r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Apply(ctx, desiredServiceAccount(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update OAuth ServiceAccount") return ctrl.Result{RequeueAfter: requeueTime}, err } - _, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update OAuth ClusterRoleBinding") return ctrl.Result{RequeueAfter: requeueTime}, err } logger.Info("Creating RayClient Route") - _, err = r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredRayClientRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err = r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredRayClientRoute(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update RayClient Route") return ctrl.Result{RequeueAfter: requeueTime}, err } - } else if cluster.Status.State != "suspended" && !r.isRayDashboardOAuthEnabled() && !r.IsOpenShift { + } else if cluster.Status.State != "suspended" && !isRayDashboardOAuthEnabled(r.Config) && !r.IsOpenShift { logger.Info("We detected being on Vanilla Kubernetes!") logger.Info("Creating Dashboard Ingress") - dashboardName := dashboardNameFromCluster(&cluster) - dashboardIngressHost, err := r.getIngressHost(ctx, r.kubeClient, &cluster, dashboardName) + dashboardName := dashboardNameFromCluster(cluster) + dashboardIngressHost, err := getIngressHost(r.Config, cluster, dashboardName) if err != nil { return ctrl.Result{RequeueAfter: requeueTime}, err } - _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, dashboardIngressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(cluster, dashboardIngressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { // This log is info level since errors are not fatal and are expected logger.Info("WARN: Failed to update Dashboard Ingress", "error", err.Error(), logRequeueing, true) return ctrl.Result{RequeueAfter: requeueTime}, err } logger.Info("Creating RayClient Ingress") - rayClientName := rayClientNameFromCluster(&cluster) - rayClientIngressHost, err := r.getIngressHost(ctx, r.kubeClient, &cluster, rayClientName) + rayClientName := rayClientNameFromCluster(cluster) + rayClientIngressHost, err := getIngressHost(r.Config, cluster, rayClientName) if err != nil { return ctrl.Result{RequeueAfter: requeueTime}, err } - _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredRayClientIngress(&cluster, rayClientIngressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredRayClientIngress(cluster, rayClientIngressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update RayClient Ingress") return ctrl.Result{RequeueAfter: requeueTime}, err @@ -207,6 +208,24 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } +// getIngressHost generates the cluster URL string based on the cluster type, RayCluster, and ingress domain. +func getIngressHost(cfg *config.KubeRayConfiguration, cluster *rayv1.RayCluster, ingressNameFromCluster string) (string, error) { + ingressDomain := "" + if cfg != nil && cfg.IngressDomain != "" { + ingressDomain = cfg.IngressDomain + } else { + return "", fmt.Errorf("missing IngressDomain configuration in ConfigMap 'codeflare-operator-config'") + } + return fmt.Sprintf("%s-%s.%s", ingressNameFromCluster, cluster.Namespace, ingressDomain), nil +} + +func isRayDashboardOAuthEnabled(cfg *config.KubeRayConfiguration) bool { + if cfg != nil && cfg.RayDashboardOAuthEnabled != nil { + return *cfg.RayDashboardOAuthEnabled + } + return true +} + func crbNameFromCluster(cluster *rayv1.RayCluster) string { return cluster.Name + "-" + cluster.Namespace + "-auth" // NOTE: potential naming conflicts ie {name: foo, ns: bar-baz} and {name: foo-bar, ns: baz} } diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 5a47993be..673d59419 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -1,9 +1,6 @@ package controllers import ( - "context" - "fmt" - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" networkingv1 "k8s.io/api/networking/v1" @@ -11,7 +8,6 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" v1 "k8s.io/client-go/applyconfigurations/meta/v1" networkingv1ac "k8s.io/client-go/applyconfigurations/networking/v1" - "k8s.io/client-go/kubernetes" routeapply "github.com/openshift/client-go/route/applyconfigurations/route/v1" ) @@ -98,21 +94,3 @@ func desiredClusterIngress(cluster *rayv1.RayCluster, ingressHost string) *netwo ), ) } - -// getIngressHost generates the cluster URL string based on the cluster type, RayCluster, and ingress domain. -func (r *RayClusterReconciler) getIngressHost(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressNameFromCluster string) (string, error) { - ingressDomain := "" - if r.Config != nil && r.Config.IngressDomain != "" { - ingressDomain = r.Config.IngressDomain - } else { - return "", fmt.Errorf("missing IngressDomain configuration in ConfigMap 'codeflare-operator-config'") - } - return fmt.Sprintf("%s-%s.%s", ingressNameFromCluster, cluster.Namespace, ingressDomain), nil -} - -func (r *RayClusterReconciler) isRayDashboardOAuthEnabled() bool { - if r.Config != nil && r.Config.RayDashboardOAuthEnabled != nil { - return *r.Config.RayDashboardOAuthEnabled - } - return true -}