Skip to content

Commit 6835c21

Browse files
Create rayclient ingress and route leveraging annotations
1 parent f415ce9 commit 6835c21

File tree

1 file changed

+104
-52
lines changed

1 file changed

+104
-52
lines changed

controllers/raycluster_controller.go

Lines changed: 104 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,10 @@ import (
2626
"strings"
2727

2828
"github.com/go-logr/logr"
29-
30-
networkingv1 "k8s.io/api/networking/v1"
31-
networkingv1ac "k8s.io/client-go/applyconfigurations/networking/v1"
32-
3329
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
3430

3531
corev1 "k8s.io/api/core/v1"
32+
networkingv1 "k8s.io/api/networking/v1"
3633
rbacv1 "k8s.io/api/rbac/v1"
3734
"k8s.io/apimachinery/pkg/api/errors"
3835
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -41,6 +38,7 @@ import (
4138
"k8s.io/apimachinery/pkg/util/intstr"
4239
coreapply "k8s.io/client-go/applyconfigurations/core/v1"
4340
v1 "k8s.io/client-go/applyconfigurations/meta/v1"
41+
networkingv1ac "k8s.io/client-go/applyconfigurations/networking/v1"
4442
rbacapply "k8s.io/client-go/applyconfigurations/rbac/v1"
4543
"k8s.io/client-go/discovery"
4644
"k8s.io/client-go/kubernetes"
@@ -63,19 +61,6 @@ type RayClusterReconciler struct {
6361
CookieSalt string
6462
}
6563

66-
type IngressOptions struct {
67-
Ingresses []Ingress `json:"ingresses"`
68-
}
69-
70-
type Ingress struct {
71-
IngressName string `json:"ingressName"`
72-
Port int `json:"port"`
73-
PathType string `json:"pathType"`
74-
Path string `json:"path"`
75-
Host string `json:"host"`
76-
Annotations map[string]string `json:"annotations"`
77-
}
78-
7964
const (
8065
requeueTime = 10
8166
controllerName = "codeflare-raycluster-controller"
@@ -87,9 +72,6 @@ const (
8772
strTrue = "true"
8873
strFalse = "false"
8974
logRequeueing = "requeueing"
90-
defaultIngressName = "ray-dashboard"
91-
defaultIngressPath = "/"
92-
defaultIngressPathType = networkingv1.PathTypePrefix
9375
)
9476

9577
var (
@@ -120,15 +102,17 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
120102

121103
var cluster rayv1.RayCluster
122104

123-
IsOpenShift, ingressHost := getClusterType(logger, r.kubeClient, cluster.Name, cluster.Namespace)
124-
125105
if err := r.Get(ctx, req.NamespacedName, &cluster); err != nil {
126106
if !errors.IsNotFound(err) {
127107
logger.Error(err, "Error getting RayCluster resource")
128108
}
129109
return ctrl.Result{}, client.IgnoreNotFound(err)
130110
}
131111

112+
isLocalInteractive := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/local_interactive"]
113+
isOpenShift, ingressHost := getClusterType(logger, r.kubeClient, &cluster)
114+
ingressDomain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
115+
132116
if cluster.ObjectMeta.DeletionTimestamp.IsZero() {
133117
if !controllerutil.ContainsFinalizer(&cluster, CodeflareOAuthFinalizer) {
134118
logger.Info("Add a finalizer", "finalizer", CodeflareOAuthFinalizer)
@@ -154,7 +138,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
154138
logger.Info("Successfully removed finalizer.", logRequeueing, strFalse)
155139
return ctrl.Result{}, nil
156140
}
157-
if IsOpenShift {
141+
if isOpenShift {
158142
val, ok := cluster.ObjectMeta.Annotations["codeflare.dev/oauth"]
159143
boolVal, err := strconv.ParseBool(val)
160144
if err != nil {
@@ -200,7 +184,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
200184
}
201185
}
202186

203-
if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] == "True" && IsOpenShift {
187+
if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] == "True" && isOpenShift {
204188
logger.Info("Creating OAuth Objects")
205189
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
206190
if err != nil {
@@ -226,24 +210,36 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
226210
if err != nil {
227211
logger.Error(err, "Failed to update OAuth ClusterRoleBinding")
228212
}
229-
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && IsOpenShift {
230-
logger.Info(string(cluster.Status.State))
231-
// create a route
232-
logger.Info("Creating Route")
213+
214+
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && isOpenShift {
215+
logger.Info("Creating Dashboard Route")
233216
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, createRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
234217
if err != nil {
235-
logger.Error(err, "Failed to update Route")
218+
logger.Error(err, "Failed to update Dashboard Route")
219+
}
220+
if isLocalInteractive == "True" && ingressDomain != "" {
221+
logger.Info("Creating RayClient Route")
222+
_, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, createRayClientRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
223+
if err != nil {
224+
logger.Error(err, "Failed to update RayClient Route")
225+
}
236226
}
237227
return ctrl.Result{}, nil
238-
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && !IsOpenShift {
239-
logger.Info(string(cluster.Status.State))
240-
// create an ingress
241-
logger.Info("Creating Ingress")
228+
229+
} else if cluster.Status.State != "suspended" && cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] != "True" && !isOpenShift {
230+
logger.Info("Creating Dashboard Ingress")
242231
_, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, createIngressApplyConfiguration(&cluster, ingressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
243232
if err != nil {
244-
logger.Error(err, "Failed to update Ingress")
233+
// This log is info level since errors are not fatal and are expected
234+
logger.Info("WARN: Failed to update Dashboard Ingress", "error", err.Error(), logRequeueing, strTrue)
235+
}
236+
if isLocalInteractive == "True" && ingressDomain != "" {
237+
logger.Info("Creating RayClient Ingress")
238+
_, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, createRayClientIngress(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
239+
if err != nil {
240+
logger.Error(err, "Failed to update RayClient Ingress")
241+
}
245242
}
246-
logger.Info("Ingress HAS BEEN CREATED")
247243
return ctrl.Result{}, nil
248244
}
249245

@@ -295,19 +291,23 @@ func desiredServiceAccount(cluster *rayv1.RayCluster) *coreapply.ServiceAccountA
295291
WithAnnotations(map[string]string{
296292
"serviceaccounts.openshift.io/oauth-redirectreference.first": "" +
297293
`{"kind":"OAuthRedirectReference","apiVersion":"v1",` +
298-
`"reference":{"kind":"Route","name":"` + routeNameFromCluster(cluster) + `"}}`,
294+
`"reference":{"kind":"Route","name":"` + dashboardNameFromCluster(cluster) + `"}}`,
299295
}).
300296
WithOwnerReferences(
301297
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
302298
)
303299
}
304300

305-
func routeNameFromCluster(cluster *rayv1.RayCluster) string {
301+
func dashboardNameFromCluster(cluster *rayv1.RayCluster) string {
306302
return "ray-dashboard-" + cluster.Name
307303
}
308304

305+
func rayClientNameFromCluster(cluster *rayv1.RayCluster) string {
306+
return "rayclient-" + cluster.Name
307+
}
308+
309309
func desiredClusterRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
310-
return routeapply.Route(routeNameFromCluster(cluster), cluster.Namespace).
310+
return routeapply.Route(dashboardNameFromCluster(cluster), cluster.Namespace).
311311
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
312312
WithSpec(routeapply.RouteSpec().
313313
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(oauthServiceNameFromCluster(cluster))).
@@ -391,7 +391,7 @@ func serviceNameFromCluster(cluster *rayv1.RayCluster) string {
391391
}
392392

393393
func createRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
394-
return routeapply.Route(routeNameFromCluster(cluster), cluster.Namespace).
394+
return routeapply.Route(dashboardNameFromCluster(cluster), cluster.Namespace).
395395
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
396396
WithSpec(routeapply.RouteSpec().
397397
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(serviceNameFromCluster(cluster))).
@@ -404,9 +404,62 @@ func createRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration
404404
)
405405
}
406406

407+
func createRayClientRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration {
408+
ingress_domain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
409+
return routeapply.Route(rayClientNameFromCluster(cluster), cluster.Namespace).
410+
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
411+
WithSpec(routeapply.RouteSpec().
412+
WithHost(rayClientNameFromCluster(cluster) + "-" + cluster.Namespace + "." + ingress_domain).
413+
WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(serviceNameFromCluster(cluster)).WithWeight(100)).
414+
WithPort(routeapply.RoutePort().WithTargetPort(intstr.FromString("client"))).
415+
WithTLS(routeapply.TLSConfig().WithTermination("passthrough")),
416+
).
417+
WithOwnerReferences(
418+
v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion),
419+
)
420+
}
421+
422+
// Create an Ingress object for the RayCluster
423+
func createRayClientIngress(cluster *rayv1.RayCluster) *networkingv1ac.IngressApplyConfiguration {
424+
ingress_domain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
425+
return networkingv1ac.Ingress(rayClientNameFromCluster(cluster), cluster.Namespace).
426+
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
427+
WithAnnotations(map[string]string{
428+
"nginx.ingress.kubernetes.io/rewrite-target": "/",
429+
"nginx.ingress.kubernetes.io/ssl-redirect": "true",
430+
"nginx.ingress.kubernetes.io/ssl-passthrough": "true",
431+
}).
432+
WithOwnerReferences(v1.OwnerReference().
433+
WithAPIVersion(cluster.APIVersion).
434+
WithKind(cluster.Kind).
435+
WithName(cluster.Name).
436+
WithUID(types.UID(cluster.UID))).
437+
WithSpec(networkingv1ac.IngressSpec().
438+
WithIngressClassName("nginx").
439+
WithRules(networkingv1ac.IngressRule().
440+
WithHost(rayClientNameFromCluster(cluster) + "-" + cluster.Namespace + "." + ingress_domain).
441+
WithHTTP(networkingv1ac.HTTPIngressRuleValue().
442+
WithPaths(networkingv1ac.HTTPIngressPath().
443+
WithPath("/").
444+
WithPathType(networkingv1.PathTypeImplementationSpecific).
445+
WithBackend(networkingv1ac.IngressBackend().
446+
WithService(networkingv1ac.IngressServiceBackend().
447+
WithName(serviceNameFromCluster(cluster)).
448+
WithPort(networkingv1ac.ServiceBackendPort().
449+
WithNumber(10001),
450+
),
451+
),
452+
),
453+
),
454+
),
455+
),
456+
)
457+
// Optionally, add TLS configuration here if needed
458+
}
459+
407460
// Create an Ingress object for the RayCluster
408461
func createIngressApplyConfiguration(cluster *rayv1.RayCluster, ingressHost string) *networkingv1ac.IngressApplyConfiguration {
409-
return networkingv1ac.Ingress(cluster.Name, cluster.Namespace).
462+
return networkingv1ac.Ingress(dashboardNameFromCluster(cluster), cluster.Namespace).
410463
WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}).
411464
WithOwnerReferences(v1.OwnerReference().
412465
WithAPIVersion(cluster.APIVersion).
@@ -415,11 +468,11 @@ func createIngressApplyConfiguration(cluster *rayv1.RayCluster, ingressHost stri
415468
WithUID(types.UID(cluster.UID))).
416469
WithSpec(networkingv1ac.IngressSpec().
417470
WithRules(networkingv1ac.IngressRule().
418-
WithHost(ingressHost). // host name for specific cluster type
471+
WithHost(ingressHost). // kind host name or ingress_domain
419472
WithHTTP(networkingv1ac.HTTPIngressRuleValue().
420473
WithPaths(networkingv1ac.HTTPIngressPath().
421-
WithPath(defaultIngressPath).
422-
WithPathType(defaultIngressPathType).
474+
WithPath("/").
475+
WithPathType(networkingv1.PathTypePrefix).
423476
WithBackend(networkingv1ac.IngressBackend().
424477
WithService(networkingv1ac.IngressServiceBackend().
425478
WithName(serviceNameFromCluster(cluster)).
@@ -455,9 +508,9 @@ func getDiscoveryClient(config *rest.Config) (*discovery.DiscoveryClient, error)
455508

456509
// Check where we are running. We are trying to distinguish here whether
457510
// this is vanilla kubernetes cluster or Openshift
458-
func getClusterType(logger logr.Logger, clientset *kubernetes.Clientset, clusterName string, namespace string) (bool, string) {
511+
func getClusterType(logger logr.Logger, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster) (bool, string) {
459512
// The discovery package is used to discover APIs supported by a Kubernetes API server.
460-
ingress_domain := "local" //TEMP until label is available
513+
ingress_domain := cluster.ObjectMeta.Annotations["sdk.codeflare.dev/ingress_domain"]
461514
config, err := ctrl.GetConfig()
462515
if err == nil && config != nil {
463516
dclient, err := getDiscoveryClient(config)
@@ -474,24 +527,23 @@ func getClusterType(logger logr.Logger, clientset *kubernetes.Clientset, cluster
474527
}
475528
}
476529
onKind, _ := isOnKindCluster(clientset)
477-
if onKind && ingress_domain == "" { //TEMP until label is available
530+
if onKind && ingress_domain == "" {
478531
logger.Info("We detected being on a KinD cluster!")
479532
return false, "kind"
533+
} else {
534+
logger.Info("We detected being on Vanilla Kubernetes!")
535+
return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingress_domain)
480536
}
481-
// else if onKinD and ingress_domain is not none, use ingress_domain, else use ingress-domain
482-
logger.Info("We detected being on Vanilla Kubernetes!")
483-
return false, fmt.Sprintf("ray-dashboard-%s-%s.local", clusterName, namespace) //temp it would be .{ingress-domain}
484537
}
485538
} else {
486539
logger.Info("Cannot retrieve a DiscoveryClient, assuming we're on Vanilla Kubernetes")
487-
return false, "ingress-domain-here" //TEMP until label is available
540+
return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingress_domain)
488541
}
489542
} else {
490543
logger.Info("Cannot retrieve config, assuming we're on Vanilla Kubernetes")
491-
return false, "ingress-domain-here" //TEMP until label is available
544+
return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingress_domain)
492545
}
493546
}
494547

495-
// Need to create route for Ray Client for local_interactive
496548
// No more ingress_options - Removing completely.
497549
// What to do about ingress_domain? Needed for local_interactive?

0 commit comments

Comments
 (0)