Skip to content

Added all-in-one command to Makefile #550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
162 changes: 162 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ BUNDLE_VERSION ?= $(VERSION:v%=%)
# KUEUE_VERSION defines the default version of Kueue (used for testing)
KUEUE_VERSION ?= v0.6.2

USE_RHOAI ?= true
# KUBERAY_VERSION defines the default version of the KubeRay operator (used for testing)
KUBERAY_VERSION ?= v1.1.0

Expand Down Expand Up @@ -419,3 +420,164 @@ image-mnist-job-test-push: image-mnist-job-test-build ## Push container image wi
.PHONY: kueue-setup
kueue-setup:
bash scripts/setup-kueue-resources.sh
# RHOAI/ODH related resources installation

# Basic Usage
# all-in-one will create all resources necessary to create GPU enabled ML workloads via OpenShift AI
# Users have the choice between installing RHOAI and ODH
# For RHOAI use `make all-in-one` and to remove all of the operators run `make delete-all-in-one`
# For ODH use `make all-in-one -e USE_RHOAI=false` and to remove all of the operators run `make delete-all-in-one -e USE_RHOAI=false`

##@ all-in-one
.PHONY: all-in-one
all-in-one:
@echo -e "\n ==> Installing Everything needed for distributed AI platform on OpenShift cluster \n"
-make install-nfd-operator
-make install-service-mesh-operator
-make install-ai-platform-operator
-make install-nvidia-operator

.PHONY: delete-all-in-one
delete-all-in-one:
@echo -e "\n ==> Removing Everything needed for distributed AI platform on OpenShift cluster \n"
-make delete-nfd-operator
-make delete-ai-platform-operator
-make delete-service-mesh-operator
-make delete-nvidia-operator

##@ general
.PHONY: delete-ai-platform-operator
delete-ai-platform-operator:
ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator
-make delete-rhoai-operator
-kubectl delete -f contrib/configuration/accelerator-profile.yaml -n redhat-ods-applications
else ## Delete Open Data Hub Operator
-make delete-opendatahub-operator
-kubectl delete -f contrib/configuration/accelerator-profile.yaml -n opendatahub
endif

.PHONY: install-ai-platform-operator
install-ai-platform-operator:
ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator
-make install-rhoai-operator
-kubectl apply -f contrib/configuration/accelerator-profile.yaml -n redhat-ods-applications
else ## Delete Open Data Hub Operator
-make install-opendatahub-operator
-kubectl apply -f contrib/configuration/accelerator-profile.yaml -n opendatahub
endif

.PHONY: delete-rhoai-operator
delete-rhoai-operator: ## Delete RHOAI Operator
@echo -e "\n ==> Deleting OpenShift AI Operator \n"
kubectl delete datasciencecluster/default-dsc
kubectl wait --for=delete datasciencecluster/default-dsc --timeout=180s
kubectl delete dsci/default-dsci
kubectl wait --for=delete dsci/default-dsci --timeout=180s
-kubectl delete subscription rhods-operator -n redhat-ods-operator
-export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -o custom-columns=:metadata.name`; \
kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n redhat-ods-operator
kubectl delete namespace redhat-ods-operator

.PHONY: install-rhoai-operator
install-rhoai-operator: ## Install RHOAI Operator
@echo -e "\n ==> Installing OpenShift AI Operator \n"
-kubectl create ns redhat-ods-operator
kubectl create -f contrib/configuration/rhoai/rhoai-operator-subscription.yaml
@echo Waiting for rhoai-operator Subscription to be ready
kubectl wait -n redhat-ods-operator subscription/rhods-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s
@while [[ -z $$(kubectl get deployment/rhods-operator -n redhat-ods-operator) ]]; do echo "."; sleep 10; done
-export RHOAI_POD_NAME=`kubectl get -n redhat-ods-operator pod -o custom-columns=:metadata.name | grep rhods-operator`; \
kubectl wait --for=condition=Ready pod/$$RHOAI_POD_NAME -n redhat-ods-operator
@echo -e "\n==> Creating default Data Science Cluster \n"
kubectl apply -f contrib/configuration/rhoai/default-dsci.yaml --server-side
kubectl apply -f contrib/configuration/rhoai/default-dsc.yaml --server-side

.PHONY: delete-opendatahub-operator
delete-opendatahub-operator: ## Delete OpenDataHub operator
@echo -e "\n==> Deleting OpenDataHub Operator \n"
kubectl delete datasciencecluster/default-dsc
kubectl wait --for=delete datasciencecluster/default-dsc --timeout=180s
kubectl delete dsci/default-dsci
kubectl wait --for=delete dsci/default-dsci --timeout=180s
-kubectl delete subscription opendatahub-operator -n openshift-operators
-export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n openshift-operators -l operators.coreos.com/opendatahub-operator.openshift-operators -o custom-columns=:metadata.name`; \
kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-operators
-kubectl delete namespace opendatahub

.PHONY: install-opendatahub-operator
install-opendatahub-operator: ## Install OpenDataHub operator
@echo -e "\n==> Installing OpenDataHub Operator \n"
-kubectl create ns opendatahub
kubectl create -f contrib/configuration/odh/opendatahub-operator-subscription.yaml
@echo Waiting for opendatahub-operator Subscription to be ready
kubectl wait -n openshift-operators subscription/opendatahub-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s
@while [[ -z $$(kubectl get deployment/opendatahub-operator-controller-manager -n openshift-operators) ]]; do echo "."; sleep 10; done
kubectl wait --for=condition=available deployment/opendatahub-operator-controller-manager -n openshift-operators --timeout=180s
-export ODH_POD_NAME=`kubectl get -n openshift-operators pod -o custom-columns=:metadata.name | grep opendatahub-operator-controller-manager`; \
kubectl wait --for=condition=Ready pod/$$ODH_POD_NAME -n openshift-operators
kubectl apply -f contrib/configuration/odh/default-dsci.yaml --server-side
kubectl apply -f contrib/configuration/odh/default-dsc.yaml --server-side

.PHONY: delete-service-mesh-operator
delete-service-mesh-operator: ## Delete Service Mesh Operator
@echo -e "\n==> Deleting Service Mesh Operator \n"
kubectl delete subscription servicemeshoperator -n openshift-operators
-export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n openshift-operators -l operators.coreos.com/servicemeshoperator.openshift-operators -o custom-columns=:metadata.name`; \
kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-operators

.PHONY: install-service-mesh-operator
install-service-mesh-operator: ## Install Service Mesh Operator
@echo -e "\n==> Installing OpenShift Service Mesh Operator"
kubectl create -f contrib/configuration/service-mesh-operator-subscription.yaml
kubectl wait -n openshift-operators subscription/servicemeshoperator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s
@while [[ -z $$(kubectl get deployment/istio-operator -n openshift-operators) ]]; do echo "."; sleep 10; done
kubectl wait --for=condition=available deployment/istio-operator -n openshift-operators --timeout=180s

##@ GPU Support
.PHONY: install-nfd-operator
install-nfd-operator: ## Install NFD operator ( Node Feature Discovery )
@echo -e "\n==> Installing NFD Operator \n"
-kubectl create ns openshift-nfd
kubectl create -f contrib/configuration/nfd-operator-subscription.yaml
@echo -e "\n==> Creating default NodeFeatureDiscovery CR \n"
@while [[ -z $$(kubectl get customresourcedefinition nodefeaturediscoveries.nfd.openshift.io) ]]; do echo "."; sleep 10; done
@while [[ -z $$(kubectl get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd) ]]; do echo "."; sleep 10; done
kubectl get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq '.[] | select(.kind=="NodeFeatureDiscovery")' | kubectl apply -f - --validate=false

.PHONY: delete-nfd-operator
delete-nfd-operator: ## Delete NFD operator
@echo -e "\n==> Deleting NodeFeatureDiscovery CR \n"
kubectl delete NodeFeatureDiscovery --all -n openshift-nfd
@while [[ -n $$(kubectl get NodeFeatureDiscovery -n openshift-nfd) ]]; do echo "."; sleep 10; done
@echo -e "\n==> Deleting NFD Operator \n"
-kubectl delete subscription nfd -n openshift-nfd
-export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n openshift-nfd -l operators.coreos.com/nfd.openshift-nfd -o custom-columns=:metadata.name`; \
kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-nfd
-kubectl delete ns openshift-nfd

.PHONY: install-nvidia-operator
install-nvidia-operator: ## Install nvidia operator
@echo -e "\n==> Installing nvidia Operator \n"
-kubectl create ns nvidia-gpu-operator
kubectl create -f contrib/configuration/nvidia-operator-subscription.yaml
@echo -e "\n==> Creating default ClusterPolicy CR \n"
@while [[ -z $$(kubectl get customresourcedefinition clusterpolicies.nvidia.com) ]]; do echo "."; sleep 10; done
@while [[ -z $$(kubectl get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator) ]]; do echo "."; sleep 10; done
kubectl get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq .[] | kubectl apply -f -
ifeq ($(USE_RHOAI), true) ## Additional steps required for RHOAI
kubectl delete configmap migration-gpu-status -n redhat-ods-applications --ignore-not-found=true
-export REPLICASET_NAME=`kubectl get replicaset -n redhat-ods-applications -l app=rhods-dashboard -o custom-columns=:metadata.name`; \
kubectl delete replicaset $$REPLICASET_NAME -n redhat-ods-applications
endif

.PHONY: delete-nvidia-operator
delete-nvidia-operator: ## Delete nvidia operator
@echo -e "\n==> Deleting ClusterPolicy CR \n"
kubectl delete --ignore-not-found=true NVIDIADriver gpu-driver
kubectl delete ClusterPolicy --all -n nvidia-gpu-operator
@while [[ -n $$(kubectl get ClusterPolicy -n nvidia-gpu-operator) ]]; do echo "."; sleep 10; done
@echo -e "\n==> Deleting nvidia Operator \n"
-kubectl delete subscription gpu-operator-certified -n nvidia-gpu-operator
-export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n nvidia-gpu-operator -l operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -o custom-columns=:metadata.name`; \
kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n nvidia-gpu-operator
-kubectl delete ns nvidia-gpu-operator
9 changes: 9 additions & 0 deletions contrib/configuration/accelerator-profile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: dashboard.opendatahub.io/v1
kind: AcceleratorProfile
metadata:
name: gpu-accelerator-profile
spec: {
displayName: nvidia-gpu,
enabled: true,
identifier: nvidia.com/gpu
}
22 changes: 22 additions & 0 deletions contrib/configuration/nfd-operator-subscription.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: nfd
namespace: openshift-nfd
spec:
targetNamespaces:
- openshift-nfd
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: nfd
labels:
operators.coreos.com/nfd.openshift-nfd: ''
namespace: openshift-nfd
spec:
channel: stable
name: nfd
installPlanApproval: Automatic
source: redhat-operators
sourceNamespace: openshift-marketplace
22 changes: 22 additions & 0 deletions contrib/configuration/nvidia-operator-subscription.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: gpu-operator-certified
namespace: nvidia-gpu-operator
spec:
targetNamespaces:
- opendatahub
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: gpu-operator-certified
labels:
operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator: ''
namespace: nvidia-gpu-operator
spec:
channel: stable
name: gpu-operator-certified
installPlanApproval: Automatic
source: certified-operators
sourceNamespace: openshift-marketplace
40 changes: 40 additions & 0 deletions contrib/configuration/odh/default-dsc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
kind: DataScienceCluster
apiVersion: datasciencecluster.opendatahub.io/v1
metadata:
labels:
app.kubernetes.io/created-by: opendatahub-operator
app.kubernetes.io/instance: default
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/name: datasciencecluster
app.kubernetes.io/part-of: opendatahub-operator
name: default-dsc
spec:
components:
codeflare:
managementState: Managed
dashboard:
managementState: Managed
datasciencepipelines:
managementState: Managed
kserve:
managementState: Managed
serving:
ingressGateway:
certificate:
type: SelfSigned
managementState: Managed
name: knative-serving
kueue:
managementState: Managed
modelmeshserving:
managementState: Managed
modelregistry:
managementState: Removed
ray:
managementState: Managed
trainingoperator:
managementState: Removed
trustyai:
managementState: Managed
workbenches:
managementState: Managed
24 changes: 24 additions & 0 deletions contrib/configuration/odh/default-dsci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
kind: DSCInitialization
apiVersion: dscinitialization.opendatahub.io/v1
metadata:
labels:
app.kubernetes.io/created-by: opendatahub-operator
app.kubernetes.io/instance: default
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/name: dscinitialization
app.kubernetes.io/part-of: opendatahub-operator
name: default-dsci
spec:
applicationsNamespace: opendatahub
monitoring:
managementState: Managed
namespace: opendatahub
serviceMesh:
controlPlane:
metricsCollection: Istio
name: data-science-smcp
namespace: istio-system
managementState: Managed
trustedCABundle:
customCABundle: ''
managementState: Managed
13 changes: 13 additions & 0 deletions contrib/configuration/odh/opendatahub-operator-subscription.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: opendatahub-operator
labels:
operators.coreos.com/opendatahub-operator.openshift-operators: ''
namespace: openshift-operators
spec:
channel: fast
name: opendatahub-operator
installPlanApproval: Automatic
source: community-operators
sourceNamespace: openshift-marketplace
34 changes: 34 additions & 0 deletions contrib/configuration/rhoai/default-dsc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
kind: DataScienceCluster
apiVersion: datasciencecluster.opendatahub.io/v1
metadata:
labels:
app.kubernetes.io/created-by: rhods-operator
app.kubernetes.io/instance: default-dsc
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/name: datasciencecluster
app.kubernetes.io/part-of: rhods-operator
name: default-dsc
spec:
components:
codeflare:
managementState: Managed
dashboard:
managementState: Managed
datasciencepipelines:
managementState: Managed
kserve:
managementState: Managed
serving:
ingressGateway:
certificate:
type: SelfSigned
managementState: Managed
name: knative-serving
kueue:
managementState: Managed
modelmeshserving:
managementState: Managed
ray:
managementState: Managed
workbenches:
managementState: Managed
24 changes: 24 additions & 0 deletions contrib/configuration/rhoai/default-dsci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
kind: DSCInitialization
apiVersion: dscinitialization.opendatahub.io/v1
metadata:
labels:
app.kubernetes.io/created-by: rhods-operator
app.kubernetes.io/instance: default-dsci
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/name: dscinitialization
app.kubernetes.io/part-of: rhods-operator
name: default-dsci
spec:
applicationsNamespace: redhat-ods-applications
monitoring:
managementState: Managed
namespace: redhat-ods-monitoring
serviceMesh:
controlPlane:
metricsCollection: Istio
name: data-science-smcp
namespace: istio-system
managementState: Managed
trustedCABundle:
customCABundle: ''
managementState: Managed
19 changes: 19 additions & 0 deletions contrib/configuration/rhoai/rhoai-operator-subscription.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: rhods-operator
namespace: redhat-ods-operator
---
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: rhods-operator
labels:
operators.coreos.com/rhods-operator.redhat-ods-operator: ''
namespace: redhat-ods-operator
spec:
channel: fast
name: rhods-operator
installPlanApproval: Automatic
source: redhat-operators
sourceNamespace: openshift-marketplace
Loading