Skip to content

Commit 991d9eb

Browse files
Create rayclient ingress and route leveraging annotations
1 parent f415ce9 commit 991d9eb

File tree

1 file changed

+102
-48
lines changed

1 file changed

+102
-48
lines changed

controllers/raycluster_controller.go

Lines changed: 102 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,6 @@ type RayClusterReconciler struct {
6363
CookieSalt string
6464
}
6565

66-
type IngressOptions struct {
67-
Ingresses []Ingress `json:"ingresses"`
68-
}
69-
70-
type Ingress struct {
71-
IngressName string `json:"ingressName"`
72-
Port int `json:"port"`
73-
PathType string `json:"pathType"`
74-
Path string `json:"path"`
75-
Host string `json:"host"`
76-
Annotations map[string]string `json:"annotations"`
77-
}
78-
7966
const (
8067
requeueTime = 10
8168
controllerName = "codeflare-raycluster-controller"
@@ -87,9 +74,6 @@ const (
8774
strTrue = "true"
8875
strFalse = "false"
8976
logRequeueing = "requeueing"
90-
defaultIngressName = "ray-dashboard"
91-
defaultIngressPath = "/"
92-
defaultIngressPathType = networkingv1.PathTypePrefix
9377
)
9478

9579
var (
@@ -120,15 +104,17 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
120104

121105
var cluster rayv1.RayCluster
122106

123-
IsOpenShift, ingressHost := getClusterType(logger, r.kubeClient, cluster.Name, cluster.Namespace)
124-
125107
if err := r.Get(ctx, req.NamespacedName, &cluster); err != nil {
126108
if !errors.IsNotFound(err) {
127109
logger.Error(err, "Error getting RayCluster resource")
128110
}
129111
return ctrl.Result{}, client.IgnoreNotFound(err)
130112
}
131113

114+
isLocalInteractive := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/local_interactive"]
115+
isOpenShift, ingressHost := getClusterType(logger, r.kubeClient, &cluster)
116+
ingressDomain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
117+
132118
if cluster.ObjectMeta.DeletionTimestamp.IsZero() {
133119
if !controllerutil.ContainsFinalizer(&cluster, CodeflareOAuthFinalizer) {
134120
logger.Info("Add a finalizer", "finalizer", CodeflareOAuthFinalizer)
@@ -154,7 +140,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
154140
logger.Info("Successfully removed finalizer.", logRequeueing, strFalse)
155141
return ctrl.Result{}, nil
156142
}
157-
if IsOpenShift {
143+
if isOpenShift {
158144
val, ok := cluster.ObjectMeta.Annotations["codeflare.dev/oauth"]
159145
boolVal, err := strconv.ParseBool(val)
160146
if err != nil {
@@ -200,7 +186,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
200186
}
201187
}
202188

203-
if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] == "True" && IsOpenShift {
189+
if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] == "True" && isOpenShift {
204190
logger.Info("Creating OAuth Objects")
205191
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
206192
if err != nil {
@@ -226,24 +212,36 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
226212
if err != nil {
227213
logger.Error(err, "Failed to update OAuth ClusterRoleBinding")
228214
}
229-
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && IsOpenShift {
230-
logger.Info(string(cluster.Status.State))
231-
// create a route
232-
logger.Info("Creating Route")
215+
216+
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && isOpenShift {
217+
logger.Info("Creating Dashboard Route")
233218
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, createRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
234219
if err != nil {
235-
logger.Error(err, "Failed to update Route")
220+
logger.Error(err, "Failed to update Dashboard Route")
221+
}
222+
if isLocalInteractive == "True" && ingressDomain != "" {
223+
logger.Info("Creating RayClient Route")
224+
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, createRayClientRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
225+
if err != nil {
226+
logger.Error(err, "Failed to update RayClient Route")
227+
}
236228
}
237229
return ctrl.Result{}, nil
238-
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && !IsOpenShift {
239-
logger.Info(string(cluster.Status.State))
240-
// create an ingress
241-
logger.Info("Creating Ingress")
230+
231+
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && !isOpenShift {
232+
logger.Info("Creating Dashboard Ingress")
242233
_, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, createIngressApplyConfiguration(&cluster, ingressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
243234
if err != nil {
244-
logger.Error(err, "Failed to update Ingress")
235+
// This log is info level since errors are not fatal and are expected
236+
logger.Info("WARN: Failed to update Dashboard Ingress", "error", err.Error(), logRequeueing, strTrue)
237+
}
238+
if isLocalInteractive == "True" && ingressDomain != "" {
239+
logger.Info("Creating RayClient Ingress")
240+
_, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, createRayClientIngress(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
241+
if err != nil {
242+
logger.Error(err, "Failed to update RayClient Ingress")
243+
}
245244
}
246-
logger.Info("Ingress HAS BEEN CREATED")
247245
return ctrl.Result{}, nil
248246
}
249247

@@ -295,19 +293,23 @@ func desiredServiceAccount(cluster *rayv1.RayCluster) *coreapply.ServiceAccountA
295293
WithAnnotations(map[string]string{
296294
"serviceaccounts.openshift.io/oauth-redirectreference.first": "" +
297295
`{"kind":"OAuthRedirectReference","apiVersion":"v1",` +
298-
`"reference":{"kind":"Route","name":"` + routeNameFromCluster(cluster) + `"}}`,
296+
`"reference":{"kind":"Route","name":"` + dashboardNameFromCluster(cluster) + `"}}`,
299297
}).
300298
WithOwnerReferences(
301299
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
302300
)
303301
}
304302

305-
func routeNameFromCluster(cluster *rayv1.RayCluster) string {
303+
func dashboardNameFromCluster(cluster *rayv1.RayCluster) string {
306304
return "ray-dashboard-" + cluster.Name
307305
}
308306

307+
func rayClientNameFromCluster(cluster *rayv1.RayCluster) string {
308+
return "rayclient-" + cluster.Name
309+
}
310+
309311
func desiredClusterRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
310-
return routeapply.Route(routeNameFromCluster(cluster), cluster.Namespace).
312+
return routeapply.Route(dashboardNameFromCluster(cluster), cluster.Namespace).
311313
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
312314
WithSpec(routeapply.RouteSpec().
313315
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(oauthServiceNameFromCluster(cluster))).
@@ -391,7 +393,7 @@ func serviceNameFromCluster(cluster *rayv1.RayCluster) string {
391393
}
392394

393395
func createRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
394-
return routeapply.Route(routeNameFromCluster(cluster), cluster.Namespace).
396+
return routeapply.Route(dashboardNameFromCluster(cluster), cluster.Namespace).
395397
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
396398
WithSpec(routeapply.RouteSpec().
397399
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(serviceNameFromCluster(cluster))).
@@ -404,9 +406,62 @@ func createRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration
404406
)
405407
}
406408

409+
func createRayClientRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
410+
ingress_domain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
411+
return routeapply.Route(rayClientNameFromCluster(cluster), cluster.Namespace).
412+
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
413+
WithSpec(routeapply.RouteSpec().
414+
WithHost(rayClientNameFromCluster(cluster) + "-" + cluster.Namespace + "." + ingress_domain).
415+
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(serviceNameFromCluster(cluster)).WithWeight(100)).
416+
WithPort(routeapply.RoutePort().WithTargetPort(intstr.FromString("client"))).
417+
WithTLS(routeapply.TLSConfig().WithTermination("passthrough")),
418+
).
419+
WithOwnerReferences(
420+
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
421+
)
422+
}
423+
424+
// Create an Ingress object for the RayCluster
425+
func createRayClientIngress(cluster *rayv1.RayCluster) *networkingv1ac.IngressApplyConfiguration {
426+
ingress_domain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
427+
return networkingv1ac.Ingress(rayClientNameFromCluster(cluster), cluster.Namespace).
428+
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
429+
WithAnnotations(map[string]string{
430+
"nginx.ingress.kubernetes.io/rewrite-target": "/",
431+
"nginx.ingress.kubernetes.io/ssl-redirect": "true",
432+
"nginx.ingress.kubernetes.io/ssl-passthrough": "true",
433+
}).
434+
WithOwnerReferences(v1.OwnerReference().
435+
WithAPIVersion(cluster.APIVersion).
436+
WithKind(cluster.Kind).
437+
WithName(cluster.Name).
438+
WithUID(types.UID(cluster.UID))).
439+
WithSpec(networkingv1ac.IngressSpec().
440+
WithIngressClassName("nginx").
441+
WithRules(networkingv1ac.IngressRule().
442+
WithHost(rayClientNameFromCluster(cluster) + "-" + cluster.Namespace + "." + ingress_domain).
443+
WithHTTP(networkingv1ac.HTTPIngressRuleValue().
444+
WithPaths(networkingv1ac.HTTPIngressPath().
445+
WithPath("/").
446+
WithPathType(networkingv1.PathTypeImplementationSpecific).
447+
WithBackend(networkingv1ac.IngressBackend().
448+
WithService(networkingv1ac.IngressServiceBackend().
449+
WithName(serviceNameFromCluster(cluster)).
450+
WithPort(networkingv1ac.ServiceBackendPort().
451+
WithNumber(10001),
452+
),
453+
),
454+
),
455+
),
456+
),
457+
),
458+
)
459+
// Optionally, add TLS configuration here if needed
460+
}
461+
407462
// Create an Ingress object for the RayCluster
408463
func createIngressApplyConfiguration(cluster *rayv1.RayCluster, ingressHost string) *networkingv1ac.IngressApplyConfiguration {
409-
return networkingv1ac.Ingress(cluster.Name, cluster.Namespace).
464+
return networkingv1ac.Ingress(dashboardNameFromCluster(cluster), cluster.Namespace).
410465
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
411466
WithOwnerReferences(v1.OwnerReference().
412467
WithAPIVersion(cluster.APIVersion).
@@ -415,11 +470,11 @@ func createIngressApplyConfiguration(cluster *rayv1.RayCluster, ingressHost stri
415470
WithUID(types.UID(cluster.UID))).
416471
WithSpec(networkingv1ac.IngressSpec().
417472
WithRules(networkingv1ac.IngressRule().
418-
WithHost(ingressHost). // host name for specific cluster type
473+
WithHost(ingressHost). // kind host name or ingress_domain
419474
WithHTTP(networkingv1ac.HTTPIngressRuleValue().
420475
WithPaths(networkingv1ac.HTTPIngressPath().
421-
WithPath(defaultIngressPath).
422-
WithPathType(defaultIngressPathType).
476+
WithPath("/").
477+
WithPathType(networkingv1.PathTypePrefix).
423478
WithBackend(networkingv1ac.IngressBackend().
424479
WithService(networkingv1ac.IngressServiceBackend().
425480
WithName(serviceNameFromCluster(cluster)).
@@ -455,9 +510,9 @@ func getDiscoveryClient(config *rest.Config) (*discovery.DiscoveryClient, error)
455510

456511
// Check where we are running. We are trying to distinguish here whether
457512
// this is vanilla kubernetes cluster or Openshift
458-
func getClusterType(logger logr.Logger, clientset *kubernetes.Clientset, clusterName string, namespace string) (bool, string) {
513+
func getClusterType(logger logr.Logger, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster) (bool, string) {
459514
// The discovery package is used to discover APIs supported by a Kubernetes API server.
460-
ingress_domain := "local" //TEMP until label is available
515+
ingress_domain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
461516
config, err := ctrl.GetConfig()
462517
if err == nil && config != nil {
463518
dclient, err := getDiscoveryClient(config)
@@ -474,24 +529,23 @@ func getClusterType(logger logr.Logger, clientset *kubernetes.Clientset, cluster
474529
}
475530
}
476531
onKind, _ := isOnKindCluster(clientset)
477-
if onKind && ingress_domain == "" { //TEMP until label is available
532+
if onKind && ingress_domain == "" {
478533
logger.Info("We detected being on a KinD cluster!")
479534
return false, "kind"
535+
} else {
536+
logger.Info("We detected being on Vanilla Kubernetes!")
537+
return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingress_domain)
480538
}
481-
// else if onKinD and ingress_domain is not none, use ingress_domain, else use ingress-domain
482-
logger.Info("We detected being on Vanilla Kubernetes!")
483-
return false, fmt.Sprintf("ray-dashboard-%s-%s.local", clusterName, namespace) //temp it would be .{ingress-domain}
484539
}
485540
} else {
486541
logger.Info("Cannot retrieve a DiscoveryClient, assuming we're on Vanilla Kubernetes")
487-
return false, "ingress-domain-here" //TEMP until label is available
542+
return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingress_domain)
488543
}
489544
} else {
490545
logger.Info("Cannot retrieve config, assuming we're on Vanilla Kubernetes")
491-
return false, "ingress-domain-here" //TEMP until label is available
546+
return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingress_domain)
492547
}
493548
}
494549

495-
// Need to create route for Ray Client for local_interactive
496550
// No more ingress_options - Removing completely.
497551
// What to do about ingress_domain? Needed for local_interactive?

0 commit comments

Comments
 (0)