@@ -101,32 +101,6 @@ kind create cluster --config /tmp/skypilot-kind.yaml --name skypilot
101
101
102
102
echo " Kind cluster created."
103
103
104
- # Function to wait for SkyPilot GPU labeling jobs to complete
105
- wait_for_gpu_labeling_jobs () {
106
- echo " Starting wait for SkyPilot GPU labeling jobs to complete..."
107
-
108
- SECONDS=0
109
- TIMEOUT=600 # 10 minutes in seconds
110
-
111
- while true ; do
112
- TOTAL_JOBS=$( kubectl get jobs -n kube-system -l job=sky-gpu-labeler --no-headers | wc -l)
113
- COMPLETED_JOBS=$( kubectl get jobs -n kube-system -l job=sky-gpu-labeler --no-headers | grep " 1/1" | wc -l)
114
-
115
- if [[ $COMPLETED_JOBS -eq $TOTAL_JOBS ]]; then
116
- echo " All SkyPilot GPU labeling jobs completed ($TOTAL_JOBS )."
117
- break
118
- elif [ $SECONDS -ge $TIMEOUT ]; then
119
- echo " Timeout reached while waiting for GPU labeling jobs."
120
- exit 1
121
- else
122
- echo " Waiting for GPU labeling jobs to complete... ($COMPLETED_JOBS /$TOTAL_JOBS completed)"
123
- echo " To check status, see GPU labeling pods:"
124
- echo " kubectl get jobs -n kube-system -l job=sky-gpu-labeler"
125
- sleep 5
126
- fi
127
- done
128
- }
129
-
130
104
# Function to wait for GPU operator to be correctly installed
131
105
wait_for_gpu_operator_installation () {
132
106
echo " Starting wait for GPU operator installation..."
@@ -150,22 +124,6 @@ wait_for_gpu_operator_installation() {
150
124
done
151
125
}
152
126
153
- wait_for_skypilot_gpu_image_pull () {
154
- echo " Pulling SkyPilot GPU image..."
155
- docker pull ${IMAGE_GPU}
156
- echo " Loading SkyPilot GPU image into kind cluster..."
157
- kind load docker-image --name skypilot ${IMAGE_GPU}
158
- echo " SkyPilot GPU image loaded into kind cluster."
159
- }
160
-
161
- wait_for_skypilot_cpu_image_pull () {
162
- echo " Pulling SkyPilot CPU image..."
163
- docker pull ${IMAGE}
164
- echo " Loading SkyPilot CPU image into kind cluster..."
165
- kind load docker-image --name skypilot ${IMAGE}
166
- echo " SkyPilot CPU image loaded into kind cluster."
167
- }
168
-
169
127
wait_for_nginx_ingress_controller_install () {
170
128
echo " Starting installation of Nginx Ingress Controller..."
171
129
@@ -206,21 +164,8 @@ if $ENABLE_GPUS; then
206
164
nvidia/gpu-operator --set driver.enabled=false
207
165
# Wait for GPU operator installation to succeed
208
166
wait_for_gpu_operator_installation
209
-
210
- # Load the SkyPilot GPU image into the cluster for faster labelling
211
- wait_for_skypilot_gpu_image_pull
212
-
213
- # Label nodes with GPUs
214
- echo " Labelling nodes with GPUs..."
215
- python -m sky.utils.kubernetes.gpu_labeler
216
-
217
- # Wait for all the GPU labeling jobs to complete
218
- wait_for_gpu_labeling_jobs
219
167
fi
220
168
221
- # Load local skypilot image on to the cluster for faster startup
222
- wait_for_skypilot_cpu_image_pull
223
-
224
169
# Install the Nginx Ingress Controller
225
170
wait_for_nginx_ingress_controller_install
226
171
0 commit comments