Skip to content

Commit a47d660

Browse files
WIP - Creation/Deletion of ingress and routes in RayCluster Controller
1 parent cb6cfa1 commit a47d660

File tree

1 file changed

+196
-57
lines changed

1 file changed

+196
-57
lines changed

controllers/raycluster_controller.go

Lines changed: 196 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ import (
2222
"crypto/sha1"
2323
"encoding/base64"
2424
"strconv"
25+
"strings"
26+
27+
"github.com/go-logr/logr"
28+
29+
networkingv1 "k8s.io/api/networking/v1"
30+
networkingv1ac "k8s.io/client-go/applyconfigurations/networking/v1"
2531

2632
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
2733

@@ -35,7 +41,9 @@ import (
3541
coreapply "k8s.io/client-go/applyconfigurations/core/v1"
3642
v1 "k8s.io/client-go/applyconfigurations/meta/v1"
3743
rbacapply "k8s.io/client-go/applyconfigurations/rbac/v1"
44+
"k8s.io/client-go/discovery"
3845
"k8s.io/client-go/kubernetes"
46+
"k8s.io/client-go/rest"
3947
ctrl "sigs.k8s.io/controller-runtime"
4048
"sigs.k8s.io/controller-runtime/pkg/client"
4149
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
@@ -52,6 +60,20 @@ type RayClusterReconciler struct {
5260
routeClient *routev1client.RouteV1Client
5361
Scheme *runtime.Scheme
5462
CookieSalt string
63+
IsOpenShift bool
64+
}
65+
66+
type IngressOptions struct {
67+
Ingresses []Ingress `json:"ingresses"`
68+
}
69+
70+
type Ingress struct {
71+
IngressName string `json:"ingressName"`
72+
Port int `json:"port"`
73+
PathType string `json:"pathType"`
74+
Path string `json:"path"`
75+
Host string `json:"host"`
76+
Annotations map[string]string `json:"annotations"`
5577
}
5678

5779
const (
@@ -61,6 +83,7 @@ const (
6183
CodeflareOAuthFinalizer = "codeflare.dev/oauth-finalizer"
6284
OAuthServicePort = 443
6385
OAuthServicePortName = "oauth-proxy"
86+
RegularServicePortName = "dashboard"
6487
strTrue = "true"
6588
strFalse = "false"
6689
logRequeueing = "requeueing"
@@ -94,6 +117,8 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
94117

95118
var cluster rayv1.RayCluster
96119

120+
IsOpenShift := getClusterType(logger)
121+
97122
if err := r.Get(ctx, req.NamespacedName, &cluster); err != nil {
98123
if !errors.IsNotFound(err) {
99124
logger.Error(err, "Error getting RayCluster resource")
@@ -126,75 +151,97 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
126151
logger.Info("Successfully removed finalizer.", logRequeueing, strFalse)
127152
return ctrl.Result{}, nil
128153
}
129-
130-
val, ok := cluster.ObjectMeta.Annotations["codeflare.dev/oauth"]
131-
boolVal, err := strconv.ParseBool(val)
132-
if err != nil {
133-
logger.Error(err, "Could not convert codeflare.dev/oauth value to bool", "codeflare.dev/oauth", val)
154+
if IsOpenShift {
155+
val, ok := cluster.ObjectMeta.Annotations["codeflare.dev/oauth"]
156+
boolVal, err := strconv.ParseBool(val)
157+
if err != nil {
158+
logger.Error(err, "Could not convert codeflare.dev/oauth value to bool", "codeflare.dev/oauth", val)
159+
}
160+
if !ok || err != nil || !boolVal {
161+
logger.Info("Removing all OAuth Objects")
162+
err := r.deleteIfNotExist(
163+
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthSecretNameFromCluster(&cluster)}, &corev1.Secret{},
164+
)
165+
if err != nil {
166+
logger.Error(err, "Error deleting OAuth Secret, retrying", logRequeueing, strTrue)
167+
return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil
168+
}
169+
err = r.deleteIfNotExist(
170+
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthServiceNameFromCluster(&cluster)}, &corev1.Service{},
171+
)
172+
if err != nil {
173+
logger.Error(err, "Error deleting OAuth Service, retrying", logRequeueing, strTrue)
174+
return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil
175+
}
176+
err = r.deleteIfNotExist(
177+
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: cluster.Name}, &corev1.ServiceAccount{},
178+
)
179+
if err != nil {
180+
logger.Error(err, "Error deleting OAuth ServiceAccount, retrying", logRequeueing, strTrue)
181+
return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil
182+
}
183+
err = r.deleteIfNotExist(
184+
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: crbNameFromCluster(&cluster)}, &rbacv1.ClusterRoleBinding{},
185+
)
186+
if err != nil {
187+
logger.Error(err, "Error deleting OAuth CRB, retrying", logRequeueing, strTrue)
188+
return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil
189+
}
190+
err = r.deleteIfNotExist(
191+
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: cluster.Name}, &routev1.Route{},
192+
)
193+
if err != nil {
194+
logger.Error(err, "Error deleting OAuth Route, retrying", logRequeueing, strTrue)
195+
return ctrl.Result{Requeue: true, RequeueAfter: requeueTime}, nil
196+
}
197+
}
134198
}
135-
if !ok || err != nil || !boolVal {
136-
logger.Info("Removing all OAuth Objects")
137-
err := r.deleteIfNotExist(
138-
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthSecretNameFromCluster(&cluster)}, &corev1.Secret{},
139-
)
199+
200+
if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] == "True" && IsOpenShift {
201+
logger.Info("Creating OAuth Objects")
202+
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
140203
if err != nil {
141-
logger.Error(err, "Error deleting OAuth Secret, retrying", logRequeueing, strTrue)
142-
return ctrl.Result{RequeueAfter: requeueTime}, nil
204+
logger.Error(err, "Failed to update OAuth Route")
143205
}
144-
err = r.deleteIfNotExist(
145-
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthServiceNameFromCluster(&cluster)}, &corev1.Service{},
146-
)
206+
207+
_, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredOAuthSecret(&cluster, r), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
147208
if err != nil {
148-
logger.Error(err, "Error deleting OAuth Service, retrying", logRequeueing, strTrue)
149-
return ctrl.Result{RequeueAfter: requeueTime}, nil
209+
logger.Error(err, "Failed to create OAuth Secret")
150210
}
151-
err = r.deleteIfNotExist(
152-
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthServiceAccountNameFromCluster(&cluster)}, &corev1.ServiceAccount{},
153-
)
211+
212+
_, err = r.kubeClient.CoreV1().Services(cluster.Namespace).Apply(ctx, desiredOAuthService(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
154213
if err != nil {
155-
logger.Error(err, "Error deleting OAuth ServiceAccount, retrying", logRequeueing, strTrue)
156-
return ctrl.Result{RequeueAfter: requeueTime}, nil
214+
logger.Error(err, "Failed to update OAuth Service")
157215
}
158-
err = r.deleteIfNotExist(
159-
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: crbNameFromCluster(&cluster)}, &rbacv1.ClusterRoleBinding{},
160-
)
216+
217+
_, err = r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Apply(ctx, desiredServiceAccount(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
161218
if err != nil {
162-
logger.Error(err, "Error deleting OAuth CRB, retrying", logRequeueing, strTrue)
163-
return ctrl.Result{RequeueAfter: requeueTime}, nil
219+
logger.Error(err, "Failed to update OAuth ServiceAccount")
164220
}
165-
err = r.deleteIfNotExist(
166-
ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: routeNameFromCluster(&cluster)}, &routev1.Route{},
167-
)
221+
222+
_, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
168223
if err != nil {
169-
logger.Error(err, "Error deleting OAuth Route, retrying", logRequeueing, strTrue)
170-
return ctrl.Result{RequeueAfter: requeueTime}, nil
224+
logger.Error(err, "Failed to update OAuth ClusterRoleBinding")
225+
}
226+
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && IsOpenShift {
227+
logger.Info(string(cluster.Status.State))
228+
// create a route
229+
logger.Info("Creating Route")
230+
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, createRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
231+
if err != nil {
232+
logger.Error(err, "Failed to update Route")
171233
}
172234
return ctrl.Result{}, nil
173-
}
174-
175-
_, err = r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
176-
if err != nil {
177-
logger.Error(err, "Failed to update OAuth Route")
178-
}
179-
180-
_, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredOAuthSecret(&cluster, r), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
181-
if err != nil {
182-
logger.Error(err, "Failed to create OAuth Secret")
183-
}
184-
185-
_, err = r.kubeClient.CoreV1().Services(cluster.Namespace).Apply(ctx, desiredOAuthService(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
186-
if err != nil {
187-
logger.Error(err, "Failed to update OAuth Service")
188-
}
189-
190-
_, err = r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Apply(ctx, desiredServiceAccount(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
191-
if err != nil {
192-
logger.Error(err, "Failed to update OAuth ServiceAccount")
193-
}
194-
195-
_, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
196-
if err != nil {
197-
logger.Error(err, "Failed to update OAuth ClusterRoleBinding")
235+
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && !IsOpenShift {
236+
logger.Info(string(cluster.Status.State))
237+
// create an ingress
238+
logger.Info("Creating Ingress")
239+
_, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, createIngressApplyConfiguration(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
240+
if err != nil {
241+
logger.Error(err, "Failed to update Ingress")
242+
}
243+
logger.Info("Ingress HAS BEEN CREATED")
244+
return ctrl.Result{}, nil
198245
}
199246

200247
return ctrl.Result{}, nil
@@ -335,3 +382,95 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
335382
For(&rayv1.RayCluster{}).
336383
Complete(r)
337384
}
385+
386+
// getDiscoveryClient returns a discovery client for the current reconciler
387+
func getDiscoveryClient(config *rest.Config) (*discovery.DiscoveryClient, error) {
388+
return discovery.NewDiscoveryClientForConfig(config)
389+
}
390+
391+
// Check where we are running. We are trying to distinguish here whether
392+
// this is vanilla kubernetes cluster or Openshift
393+
func getClusterType(logger logr.Logger) bool {
394+
// The discovery package is used to discover APIs supported by a Kubernetes API server.
395+
config, err := ctrl.GetConfig()
396+
if err == nil && config != nil {
397+
dclient, err := getDiscoveryClient(config)
398+
if err == nil && dclient != nil {
399+
apiGroupList, err := dclient.ServerGroups()
400+
if err != nil {
401+
logger.Info("Error while querying ServerGroups, assuming we're on Vanilla Kubernetes")
402+
return false
403+
} else {
404+
for i := 0; i < len(apiGroupList.Groups); i++ {
405+
if strings.HasSuffix(apiGroupList.Groups[i].Name, ".openshift.io") {
406+
logger.Info("We detected being on OpenShift!")
407+
return true
408+
}
409+
}
410+
logger.Info("We detected being on Vanilla Kubernetes!")
411+
return false
412+
}
413+
} else {
414+
logger.Info("Cannot retrieve a DiscoveryClient, assuming we're on Vanilla Kubernetes")
415+
return false
416+
}
417+
} else {
418+
logger.Info("Cannot retrieve config, assuming we're on Vanilla Kubernetes")
419+
return false
420+
}
421+
}
422+
423+
func serviceNameFromCluster(cluster *rayv1.RayCluster) string {
424+
return cluster.Name + "-head-svc"
425+
}
426+
427+
func createRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
428+
return routeapply.Route(routeNameFromCluster(cluster), cluster.Namespace).
429+
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
430+
WithSpec(routeapply.RouteSpec().
431+
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(serviceNameFromCluster(cluster))).
432+
WithPort(routeapply.RoutePort().WithTargetPort(intstr.FromString(RegularServicePortName))).
433+
WithTLS(routeapply.TLSConfig().
434+
WithTermination("edge")),
435+
).
436+
WithOwnerReferences(
437+
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
438+
)
439+
}
440+
441+
// Create an Ingress object for the RayCluster
442+
func createIngressApplyConfiguration(cluster *rayv1.RayCluster) *networkingv1ac.IngressApplyConfiguration {
443+
pt := networkingv1.PathTypeImplementationSpecific
444+
445+
return networkingv1ac.Ingress(cluster.Name, cluster.Namespace).
446+
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
447+
WithOwnerReferences(v1.OwnerReference().
448+
WithAPIVersion(cluster.APIVersion).
449+
WithKind(cluster.Kind).
450+
WithName(cluster.Name).
451+
WithUID(types.UID(cluster.UID))).
452+
WithSpec(networkingv1ac.IngressSpec().
453+
WithRules(networkingv1ac.IngressRule().
454+
WithHost("kind"). // Specify the host name here
455+
WithHTTP(networkingv1ac.HTTPIngressRuleValue().
456+
WithPaths(networkingv1ac.HTTPIngressPath().
457+
WithPath("/"). // Specify the path here
458+
WithPathType(pt).
459+
WithBackend(networkingv1ac.IngressBackend().
460+
WithService(networkingv1ac.IngressServiceBackend().
461+
WithName(serviceNameFromCluster(cluster)).
462+
WithPort(networkingv1ac.ServiceBackendPort().
463+
WithName(RegularServicePortName), // Assuming RegularServicePortName is a string constant defining the port name
464+
),
465+
),
466+
),
467+
),
468+
),
469+
),
470+
)
471+
// Optionally, add TLS configuration here if needed
472+
}
473+
474+
// Need to create route for Ray Client for local_interactive
475+
// No more ingress_options - Removing completely.
476+
// What to do about ingress_domain?

0 commit comments

Comments
 (0)