From 3a1834d1266c34f574229cf85d85d2b3167ff3a4 Mon Sep 17 00:00:00 2001 From: Fiona Waters Date: Thu, 25 Jul 2024 11:27:25 +0100 Subject: [PATCH] adding tls cert creation and updating num workers in hf notebook --- .../additional-demos/hf_interactive.ipynb | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/demo-notebooks/additional-demos/hf_interactive.ipynb b/demo-notebooks/additional-demos/hf_interactive.ipynb index fe4cd429..1c9f7cd1 100644 --- a/demo-notebooks/additional-demos/hf_interactive.ipynb +++ b/demo-notebooks/additional-demos/hf_interactive.ipynb @@ -89,7 +89,8 @@ "source": [ "# Create our cluster and submit\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", - "cluster = Cluster(ClusterConfiguration(name='hfgputest', \n", + "cluster_name= \"hfgputest\"\n", + "cluster = Cluster(ClusterConfiguration(name=cluster_name, \n", " head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n", " num_gpus=1,\n", " num_workers=1,\n", @@ -287,6 +288,27 @@ "ray_cluster_uri = cluster.cluster_uri()" ] }, + { + "cell_type": "markdown", + "id": "64d65c3c", + "metadata": {}, + "source": [ + "Now we can connect directly to our Ray cluster via the Ray python client:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60276d86", + "metadata": {}, + "outputs": [], + "source": [ + "from codeflare_sdk import generate_cert\n", + "# Create required TLS cert and export the environment variables to enable TLS\n", + "generate_cert.generate_tls_cert(cluster_name, cluster.config.namespace)\n", + "generate_cert.export_env(cluster_name, cluster.config.namespace)" + ] + }, { "cell_type": "markdown", "id": "44dba6a0-8275-4726-8911-6b6ec467b6a3", @@ -432,7 +454,7 @@ "\n", " ray_trainer = TorchTrainer(\n", " train_func,\n", - " scaling_config=ScalingConfig(num_workers=3, use_gpu=True),\n", + " scaling_config=ScalingConfig(num_workers=2, use_gpu=True),\n", " # Configure persistent storage that is accessible across \n", " # all worker nodes.\n", " # Uncomment and update the RunConfig below to include your storage details.\n",