From db38aa9751a6609ffc13612a67c92503df877e15 Mon Sep 17 00:00:00 2001
From: Karel Suta <ksuta@redhat.com>
Date: Mon, 30 Oct 2023 15:53:44 +0100
Subject: [PATCH] Setup existing DW tests to run in CFO

---
 test/odh/environment.go                  |  49 +++++++
 test/odh/mcad_ray_test.go                |  98 ++++++++++++++
 test/odh/notebook.go                     |  99 ++++++++++++++
 test/odh/pytorch_mcad_test.go            |  77 +++++++++++
 test/odh/resources/custom-nb-small.yaml  | 165 +++++++++++++++++++++++
 test/odh/resources/mnist.py              | 160 ++++++++++++++++++++++
 test/odh/resources/mnist_mcad_mini.ipynb |  93 +++++++++++++
 test/odh/resources/mnist_ray_mini.ipynb  | 145 ++++++++++++++++++++
 test/odh/resources/requirements.txt      |   4 +
 test/odh/support.go                      |  34 +++++
 10 files changed, 924 insertions(+)
 create mode 100644 test/odh/environment.go
 create mode 100644 test/odh/mcad_ray_test.go
 create mode 100644 test/odh/notebook.go
 create mode 100644 test/odh/pytorch_mcad_test.go
 create mode 100644 test/odh/resources/custom-nb-small.yaml
 create mode 100644 test/odh/resources/mnist.py
 create mode 100644 test/odh/resources/mnist_mcad_mini.ipynb
 create mode 100644 test/odh/resources/mnist_ray_mini.ipynb
 create mode 100644 test/odh/resources/requirements.txt
 create mode 100644 test/odh/support.go

diff --git a/test/odh/environment.go b/test/odh/environment.go
new file mode 100644
index 000000000..0a087c9d5
--- /dev/null
+++ b/test/odh/environment.go
@@ -0,0 +1,49 @@
+/*
+Copyright 2023.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package odh
+
+import (
+	"os"
+
+	. "github.com/project-codeflare/codeflare-common/support"
+)
+
+const (
+	// The environment variable for namespace where ODH is installed to.
+	odhNamespaceEnvVar = "ODH_NAMESPACE"
+	// The environment variable for ODH Notebook ImageStream name
+	notebookImageStreamName = "NOTEBOOK_IMAGE_STREAM_NAME"
+)
+
+func GetOpenDataHubNamespace() string {
+	return lookupEnvOrDefault(odhNamespaceEnvVar, "opendatahub")
+}
+
+func GetNotebookImageStreamName(t Test) string {
+	isName, ok := os.LookupEnv(notebookImageStreamName)
+	if !ok {
+		t.T().Fatalf("Expected environment variable %s not found, please use this environment variable to specify what ImageStream to use for Notebook.", notebookImageStreamName)
+	}
+	return isName
+}
+
+func lookupEnvOrDefault(key, value string) string {
+	if v, ok := os.LookupEnv(key); ok {
+		return v
+	}
+	return value
+}
diff --git a/test/odh/mcad_ray_test.go b/test/odh/mcad_ray_test.go
new file mode 100644
index 000000000..770b64d9d
--- /dev/null
+++ b/test/odh/mcad_ray_test.go
@@ -0,0 +1,98 @@
+/*
+Copyright 2023.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package odh
+
+import (
+	"testing"
+
+	. "github.com/onsi/gomega"
+	. "github.com/project-codeflare/codeflare-common/support"
+	mcadv1beta1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
+	rayv1alpha1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
+
+	rbacv1 "k8s.io/api/rbac/v1"
+)
+
+func TestMCADRay(t *testing.T) {
+	test := With(t)
+
+	// Create a namespace
+	namespace := test.NewTestNamespace()
+
+	// Test configuration
+	jupyterNotebookConfigMapFileName := "mnist_ray_mini.ipynb"
+	config := CreateConfigMap(test, namespace.Name, map[string][]byte{
+		// MNIST Ray Notebook
+		jupyterNotebookConfigMapFileName: ReadFile(test, "resources/mnist_ray_mini.ipynb"),
+		"mnist.py":                       ReadFile(test, "resources/mnist.py"),
+		"requirements.txt":               ReadFile(test, "resources/requirements.txt"),
+	})
+
+	// Create RBAC, retrieve token for user with limited rights
+	policyRules := []rbacv1.PolicyRule{
+		{
+			Verbs:     []string{"get", "create", "delete", "list", "patch", "update"},
+			APIGroups: []string{mcadv1beta1.GroupName},
+			Resources: []string{"appwrappers"},
+		},
+		{
+			Verbs:     []string{"get", "list"},
+			APIGroups: []string{rayv1alpha1.GroupVersion.Group},
+			Resources: []string{"rayclusters", "rayclusters/status"},
+		},
+		{
+			Verbs:     []string{"get", "list"},
+			APIGroups: []string{"route.openshift.io"},
+			Resources: []string{"routes"},
+		},
+	}
+
+	// Create cluster wide RBAC, required for SDK OpenShift check
+	// TODO reevaluate once SDK change OpenShift detection logic
+	clusterPolicyRules := []rbacv1.PolicyRule{
+		{
+			Verbs:         []string{"get", "list"},
+			APIGroups:     []string{"config.openshift.io"},
+			Resources:     []string{"ingresses"},
+			ResourceNames: []string{"cluster"},
+		},
+	}
+
+	sa := CreateServiceAccount(test, namespace.Name)
+	role := CreateRole(test, namespace.Name, policyRules)
+	CreateRoleBinding(test, namespace.Name, sa, role)
+	clusterRole := CreateClusterRole(test, clusterPolicyRules)
+	CreateClusterRoleBinding(test, sa, clusterRole)
+	token := CreateToken(test, namespace.Name, sa)
+
+	// Create Notebook CR
+	createNotebook(test, namespace, token, config.Name, jupyterNotebookConfigMapFileName)
+
+	// Make sure the AppWrapper is created and running
+	test.Eventually(AppWrappers(test, namespace), TestTimeoutLong).
+		Should(
+			And(
+				HaveLen(1),
+				ContainElement(WithTransform(AppWrapperName, HavePrefix("mnisttest"))),
+				ContainElement(WithTransform(AppWrapperState, Equal(mcadv1beta1.AppWrapperStateActive))),
+			),
+		)
+
+	// Make sure the AppWrapper finishes and is deleted
+	test.Eventually(AppWrappers(test, namespace), TestTimeoutLong).
+		Should(HaveLen(0))
+}
diff --git a/test/odh/notebook.go b/test/odh/notebook.go
new file mode 100644
index 000000000..8c7b28275
--- /dev/null
+++ b/test/odh/notebook.go
@@ -0,0 +1,99 @@
+/*
+Copyright 2023.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package odh
+
+import (
+	"bytes"
+	"html/template"
+
+	gomega "github.com/onsi/gomega"
+	. "github.com/project-codeflare/codeflare-common/support"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/util/yaml"
+
+	imagev1 "github.com/openshift/api/image/v1"
+)
+
+const recommendedTagAnnotation = "opendatahub.io/workbench-image-recommended"
+
+var notebookResource = schema.GroupVersionResource{Group: "kubeflow.org", Version: "v1", Resource: "notebooks"}
+
+type NotebookProps struct {
+	IngressDomain             string
+	OpenShiftApiUrl           string
+	KubernetesBearerToken     string
+	Namespace                 string
+	OpenDataHubNamespace      string
+	ImageStreamName           string
+	ImageStreamTag            string
+	NotebookConfigMapName     string
+	NotebookConfigMapFileName string
+	NotebookPVC               string
+}
+
+func createNotebook(test Test, namespace *corev1.Namespace, notebookToken, jupyterNotebookConfigMapName, jupyterNotebookConfigMapFileName string) {
+	// Create PVC for Notebook
+	notebookPVC := CreatePersistentVolumeClaim(test, namespace.Name, "10Gi", corev1.ReadWriteOnce)
+
+	// Retrieve ImageStream tag for
+	is := GetImageStream(test, GetOpenDataHubNamespace(), GetNotebookImageStreamName(test))
+	recommendedTagName := getRecommendedImageStreamTag(test, is)
+
+	// Read the Notebook CR from resources and perform replacements for custom values using go template
+	notebookProps := NotebookProps{
+		IngressDomain:             GetOpenShiftIngressDomain(test),
+		OpenShiftApiUrl:           GetOpenShiftApiUrl(test),
+		KubernetesBearerToken:     notebookToken,
+		Namespace:                 namespace.Name,
+		OpenDataHubNamespace:      GetOpenDataHubNamespace(),
+		ImageStreamName:           GetNotebookImageStreamName(test),
+		ImageStreamTag:            recommendedTagName,
+		NotebookConfigMapName:     jupyterNotebookConfigMapName,
+		NotebookConfigMapFileName: jupyterNotebookConfigMapFileName,
+		NotebookPVC:               notebookPVC.Name,
+	}
+	notebookTemplate, err := files.ReadFile("resources/custom-nb-small.yaml")
+	test.Expect(err).NotTo(gomega.HaveOccurred())
+	parsedNotebookTemplate, err := template.New("notebook").Parse(string(notebookTemplate))
+	test.Expect(err).NotTo(gomega.HaveOccurred())
+
+	// Filter template and store results to the buffer
+	notebookBuffer := new(bytes.Buffer)
+	err = parsedNotebookTemplate.Execute(notebookBuffer, notebookProps)
+	test.Expect(err).NotTo(gomega.HaveOccurred())
+
+	// Create Notebook CR
+	notebookCR := &unstructured.Unstructured{}
+	err = yaml.NewYAMLOrJSONDecoder(notebookBuffer, 8192).Decode(notebookCR)
+	test.Expect(err).NotTo(gomega.HaveOccurred())
+	_, err = test.Client().Dynamic().Resource(notebookResource).Namespace(namespace.Name).Create(test.Ctx(), notebookCR, metav1.CreateOptions{})
+	test.Expect(err).NotTo(gomega.HaveOccurred())
+}
+
+func getRecommendedImageStreamTag(test Test, is *imagev1.ImageStream) (tagName string) {
+	for _, tag := range is.Spec.Tags {
+		if tag.Annotations[recommendedTagAnnotation] == "true" {
+			return tag.Name
+		}
+	}
+	test.T().Fatalf("tag with annotation '%s' not found in ImageStream %s", recommendedTagAnnotation, is.Name)
+	return
+}
diff --git a/test/odh/pytorch_mcad_test.go b/test/odh/pytorch_mcad_test.go
new file mode 100644
index 000000000..0dd33a363
--- /dev/null
+++ b/test/odh/pytorch_mcad_test.go
@@ -0,0 +1,77 @@
+/*
+Copyright 2023.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package odh
+
+import (
+	"testing"
+
+	. "github.com/onsi/gomega"
+	. "github.com/project-codeflare/codeflare-common/support"
+	mcadv1beta1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
+
+	rbacv1 "k8s.io/api/rbac/v1"
+)
+
+func TestMnistPyTorchMCAD(t *testing.T) {
+	test := With(t)
+
+	// Create a namespace
+	namespace := test.NewTestNamespace()
+
+	// Test configuration
+	jupyterNotebookConfigMapFileName := "mnist_mcad_mini.ipynb"
+	config := CreateConfigMap(test, namespace.Name, map[string][]byte{
+		// MNIST MCAD Notebook
+		jupyterNotebookConfigMapFileName: ReadFile(test, "resources/mnist_mcad_mini.ipynb"),
+	})
+
+	// Create RBAC, retrieve token for user with limited rights
+	policyRules := []rbacv1.PolicyRule{
+		{
+			Verbs:     []string{"get", "create", "delete", "list", "patch", "update"},
+			APIGroups: []string{mcadv1beta1.GroupName},
+			Resources: []string{"appwrappers"},
+		},
+		// Needed for job.logs()
+		{
+			Verbs:     []string{"get"},
+			APIGroups: []string{""},
+			Resources: []string{"pods/log"},
+		},
+	}
+	sa := CreateServiceAccount(test, namespace.Name)
+	role := CreateRole(test, namespace.Name, policyRules)
+	CreateRoleBinding(test, namespace.Name, sa, role)
+	token := CreateToken(test, namespace.Name, sa)
+
+	// Create Notebook CR
+	createNotebook(test, namespace, token, config.Name, jupyterNotebookConfigMapFileName)
+
+	// Make sure the AppWrapper is created and running
+	test.Eventually(AppWrappers(test, namespace), TestTimeoutLong).
+		Should(
+			And(
+				HaveLen(1),
+				ContainElement(WithTransform(AppWrapperName, HavePrefix("mnistjob"))),
+				ContainElement(WithTransform(AppWrapperState, Equal(mcadv1beta1.AppWrapperStateActive))),
+			),
+		)
+
+	// Make sure the AppWrapper finishes and is deleted
+	test.Eventually(AppWrappers(test, namespace), TestTimeoutLong).
+		Should(HaveLen(0))
+}
diff --git a/test/odh/resources/custom-nb-small.yaml b/test/odh/resources/custom-nb-small.yaml
new file mode 100644
index 000000000..95aaaf106
--- /dev/null
+++ b/test/odh/resources/custom-nb-small.yaml
@@ -0,0 +1,165 @@
+# This template maybe used to spin up a custom notebook image
+# i.e.: sed s/{{.IngressDomain}}/$(oc get ingresses.config/cluster -o jsonpath={.spec.domain})/g tests/resources/custom-nb.template | oc apply -f -
+# resources generated:
+# pod/jupyter-nb-kube-3aadmin-0
+# service/jupyter-nb-kube-3aadmin
+# route.route.openshift.io/jupyter-nb-kube-3aadmin (jupyter-nb-kube-3aadmin-opendatahub.apps.tedbig412.cp.fyre.ibm.com)
+# service/jupyter-nb-kube-3aadmin-tls
+apiVersion: kubeflow.org/v1
+kind: Notebook
+metadata:
+  annotations:
+    notebooks.opendatahub.io/inject-oauth: "true"
+    notebooks.opendatahub.io/last-image-selection: codeflare-notebook:latest
+    notebooks.opendatahub.io/last-size-selection: Small
+    notebooks.opendatahub.io/oauth-logout-url: https://odh-dashboard-{{.OpenDataHubNamespace}}.{{.IngressDomain}}/notebookController/kube-3aadmin/home
+    opendatahub.io/link: https://jupyter-nb-kube-3aadmin-{{.Namespace}}.{{.IngressDomain}}/notebook/{{.Namespace}}/jupyter-nb-kube-3aadmin
+    opendatahub.io/username: kube:admin
+  generation: 1
+  labels:
+    app: jupyter-nb-kube-3aadmin
+    opendatahub.io/dashboard: "true"
+    opendatahub.io/odh-managed: "true"
+    opendatahub.io/user: kube-3aadmin
+  name: jupyter-nb-kube-3aadmin
+  namespace: {{.Namespace}}
+spec:
+  template:
+    spec:
+      affinity:
+        nodeAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - preference:
+              matchExpressions:
+              - key: nvidia.com/gpu.present
+                operator: NotIn
+                values:
+                - "true"
+            weight: 1
+      containers:
+      - env:
+        - name: NOTEBOOK_ARGS
+          value: |-
+            --ServerApp.port=8888
+                              --ServerApp.token=''
+                              --ServerApp.password=''
+                              --ServerApp.base_url=/notebook/{{.Namespace}}/jupyter-nb-kube-3aadmin
+                              --ServerApp.quit_button=False
+                              --ServerApp.tornado_settings={"user":"kube-3aadmin","hub_host":"https://odh-dashboard-{{.OpenDataHubNamespace}}.{{.IngressDomain}}","hub_prefix":"/notebookController/kube-3aadmin"}
+        - name: JUPYTER_IMAGE
+          value: image-registry.openshift-image-registry.svc:5000/{{.OpenDataHubNamespace}}/{{.ImageStreamName}}:{{.ImageStreamTag}}
+        - name: JUPYTER_NOTEBOOK_PORT
+          value: "8888"
+        - name: OCP_SERVER
+          value: {{.OpenShiftApiUrl}}
+        - name: OCP_TOKEN
+          value: {{.KubernetesBearerToken}}
+        image: image-registry.openshift-image-registry.svc:5000/{{.OpenDataHubNamespace}}/{{.ImageStreamName}}:{{.ImageStreamTag}}
+        command: ["/bin/sh", "-c", "pip install papermill && oc login --token=${OCP_TOKEN} --server=${OCP_SERVER} --insecure-skip-tls-verify=true && papermill /opt/app-root/notebooks/{{.NotebookConfigMapFileName}} /opt/app-root/src/mcad-out.ipynb -p namespace {{.Namespace}} && sleep infinity"]
+        # args: ["pip install papermill && oc login --token=${OCP_TOKEN} --server=${OCP_SERVER} --insecure-skip-tls-verify=true && papermill /opt/app-root/notebooks/mcad.ipynb /opt/app-root/src/mcad-out.ipynb" ]
+        imagePullPolicy: Always
+        # livenessProbe:
+        #   failureThreshold: 3
+        #   httpGet:
+        #     path: /notebook/{{.Namespace}}/jupyter-nb-kube-3aadmin/api
+        #     port: notebook-port
+        #     scheme: HTTP
+        #   initialDelaySeconds: 10
+        #   periodSeconds: 5
+        #   successThreshold: 1
+        #   timeoutSeconds: 1
+        name: jupyter-nb-kube-3aadmin
+        ports:
+        - containerPort: 8888
+          name: notebook-port
+          protocol: TCP
+        resources:
+          limits:
+            cpu: "2"
+            memory: 3Gi
+          requests:
+            cpu: "1"
+            memory: 3Gi
+        volumeMounts:
+        - mountPath: /opt/app-root/src
+          name: jupyterhub-nb-kube-3aadmin-pvc
+        - mountPath: /opt/app-root/notebooks
+          name: {{.NotebookConfigMapName}}
+        workingDir: /opt/app-root/src
+      - args:
+        - --provider=openshift
+        - --https-address=:8443
+        - --http-address=
+        - --openshift-service-account=jupyter-nb-kube-3aadmin
+        - --cookie-secret-file=/etc/oauth/config/cookie_secret
+        - --cookie-expire=24h0m0s
+        - --tls-cert=/etc/tls/private/tls.crt
+        - --tls-key=/etc/tls/private/tls.key
+        - --upstream=http://localhost:8888
+        - --upstream-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+        - --skip-auth-regex=^(?:/notebook/$(NAMESPACE)/jupyter-nb-kube-3aadmin)?/api$
+        - --email-domain=*
+        - --skip-provider-button
+        - --openshift-sar={"verb":"get","resource":"notebooks","resourceAPIGroup":"kubeflow.org","resourceName":"jupyter-nb-kube-3aadmin","namespace":"$(NAMESPACE)"}
+        - --logout-url=https://odh-dashboard-{{.OpenDataHubNamespace}}.{{.IngressDomain}}/notebookController/kube-3aadmin/home
+        env:
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.10
+        imagePullPolicy: Always
+        livenessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /oauth/healthz
+            port: oauth-proxy
+            scheme: HTTPS
+          initialDelaySeconds: 30
+          periodSeconds: 5
+          successThreshold: 1
+          timeoutSeconds: 1
+        name: oauth-proxy
+        ports:
+        - containerPort: 8443
+          name: oauth-proxy
+          protocol: TCP
+        readinessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /oauth/healthz
+            port: oauth-proxy
+            scheme: HTTPS
+          initialDelaySeconds: 5
+          periodSeconds: 5
+          successThreshold: 1
+          timeoutSeconds: 1
+        resources:
+          limits:
+            cpu: 100m
+            memory: 64Mi
+          requests:
+            cpu: 100m
+            memory: 64Mi
+        volumeMounts:
+        - mountPath: /etc/oauth/config
+          name: oauth-config
+        - mountPath: /etc/tls/private
+          name: tls-certificates
+      enableServiceLinks: false
+      serviceAccountName: jupyter-nb-kube-3aadmin
+      volumes:
+      - name: jupyterhub-nb-kube-3aadmin-pvc
+        persistentVolumeClaim:
+          claimName: {{.NotebookPVC}}
+      - name: oauth-config
+        secret:
+          defaultMode: 420
+          secretName: jupyter-nb-kube-3aadmin-oauth-config
+      - name: tls-certificates
+        secret:
+          defaultMode: 420
+          secretName: jupyter-nb-kube-3aadmin-tls
+      - name: {{.NotebookConfigMapName}}
+        configMap:
+          name: {{.NotebookConfigMapName}}
diff --git a/test/odh/resources/mnist.py b/test/odh/resources/mnist.py
new file mode 100644
index 000000000..d6a211944
--- /dev/null
+++ b/test/odh/resources/mnist.py
@@ -0,0 +1,160 @@
+# Copyright 2022 IBM, Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import torch
+from pytorch_lightning import LightningModule, Trainer
+from pytorch_lightning.callbacks.progress import TQDMProgressBar
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader, random_split
+from torchmetrics import Accuracy
+from torchvision import transforms
+from torchvision.datasets import MNIST
+
+PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
+BATCH_SIZE = 256 if torch.cuda.is_available() else 64
+# %%
+
+print("prior to running the trainer")
+print("MASTER_ADDR: is ", os.getenv("MASTER_ADDR"))
+print("MASTER_PORT: is ", os.getenv("MASTER_PORT"))
+
+
+class LitMNIST(LightningModule):
+    def __init__(self, data_dir=PATH_DATASETS, hidden_size=64, learning_rate=2e-4):
+
+        super().__init__()
+
+        # Set our init args as class attributes
+        self.data_dir = data_dir
+        self.hidden_size = hidden_size
+        self.learning_rate = learning_rate
+
+        # Hardcode some dataset specific attributes
+        self.num_classes = 10
+        self.dims = (1, 28, 28)
+        channels, width, height = self.dims
+        self.transform = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                transforms.Normalize((0.1307,), (0.3081,)),
+            ]
+        )
+
+        # Define PyTorch model
+        self.model = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(channels * width * height, hidden_size),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_size, hidden_size),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_size, self.num_classes),
+        )
+
+        self.val_accuracy = Accuracy()
+        self.test_accuracy = Accuracy()
+
+    def forward(self, x):
+        x = self.model(x)
+        return F.log_softmax(x, dim=1)
+
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        loss = F.nll_loss(logits, y)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        loss = F.nll_loss(logits, y)
+        preds = torch.argmax(logits, dim=1)
+        self.val_accuracy.update(preds, y)
+
+        # Calling self.log will surface up scalars for you in TensorBoard
+        self.log("val_loss", loss, prog_bar=True)
+        self.log("val_acc", self.val_accuracy, prog_bar=True)
+
+    def test_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        loss = F.nll_loss(logits, y)
+        preds = torch.argmax(logits, dim=1)
+        self.test_accuracy.update(preds, y)
+
+        # Calling self.log will surface up scalars for you in TensorBoard
+        self.log("test_loss", loss, prog_bar=True)
+        self.log("test_acc", self.test_accuracy, prog_bar=True)
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+        return optimizer
+
+    ####################
+    # DATA RELATED HOOKS
+    ####################
+
+    def prepare_data(self):
+        # download
+        print("Downloading MNIST dataset...")
+        MNIST(self.data_dir, train=True, download=True)
+        MNIST(self.data_dir, train=False, download=True)
+
+    def setup(self, stage=None):
+
+        # Assign train/val datasets for use in dataloaders
+        if stage == "fit" or stage is None:
+            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
+            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])
+
+        # Assign test dataset for use in dataloader(s)
+        if stage == "test" or stage is None:
+            self.mnist_test = MNIST(
+                self.data_dir, train=False, transform=self.transform
+            )
+
+    def train_dataloader(self):
+        return DataLoader(self.mnist_train, batch_size=BATCH_SIZE)
+
+    def val_dataloader(self):
+        return DataLoader(self.mnist_val, batch_size=BATCH_SIZE)
+
+    def test_dataloader(self):
+        return DataLoader(self.mnist_test, batch_size=BATCH_SIZE)
+
+
+# Init DataLoader from MNIST Dataset
+
+model = LitMNIST()
+
+print("GROUP: ", int(os.environ.get("GROUP_WORLD_SIZE", 1)))
+print("LOCAL: ", int(os.environ.get("LOCAL_WORLD_SIZE", 1)))
+
+# Initialize a trainer
+trainer = Trainer(
+    accelerator="auto",
+    # devices=1 if torch.cuda.is_available() else None,  # limiting got iPython runs
+    max_epochs=2,
+    callbacks=[TQDMProgressBar(refresh_rate=20)],
+    num_nodes=int(os.environ.get("GROUP_WORLD_SIZE", 1)),
+    devices=int(os.environ.get("LOCAL_WORLD_SIZE", 1)),
+    strategy="ddp",
+)
+
+# Train the model ⚡
+trainer.fit(model)
diff --git a/test/odh/resources/mnist_mcad_mini.ipynb b/test/odh/resources/mnist_mcad_mini.ipynb
new file mode 100644
index 000000000..0b53324ab
--- /dev/null
+++ b/test/odh/resources/mnist_mcad_mini.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Import pieces from codeflare-sdk\n",
+    "from codeflare_sdk.job.jobs import DDPJobDefinition\n",
+    "from time import sleep"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47ca5c15",
+   "metadata": {
+    "tags": ["parameters"]
+   },
+   "outputs": [],
+   "source": [
+    "#parameters\n",
+    "namespace = \"default\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "26b21373",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "job = DDPJobDefinition(name=\"mnistjob\", script=\"mnist.py\", scheduler_args={\"namespace\": namespace}, j=\"1x1\", gpu=0, cpu=1, memMB=2000, image=\"quay.io/project-codeflare/mnist-job-test:v0.0.1\").submit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d24e9f95",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "finished = False\n",
+    "while not finished:\n",
+    "    sleep(1)\n",
+    "    try:\n",
+    "        finished = (\"Epoch 4: 100%\" in job.logs())\n",
+    "    except:\n",
+    "        finished = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f078b7cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "job.cancel()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/test/odh/resources/mnist_ray_mini.ipynb b/test/odh/resources/mnist_ray_mini.ipynb
new file mode 100644
index 000000000..38992cc7d
--- /dev/null
+++ b/test/odh/resources/mnist_ray_mini.ipynb
@@ -0,0 +1,145 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Import pieces from codeflare-sdk\n",
+    "from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration\n",
+    "from codeflare_sdk.job.jobs import DDPJobDefinition\n",
+    "from time import sleep"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30888aed",
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "#parameters\n",
+    "namespace = \"default\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f4bc870-091f-4e11-9642-cba145710159",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Create our cluster and submit appwrapper\n",
+    "cluster = Cluster(ClusterConfiguration(namespace=namespace, name='mnisttest', head_cpus=1, head_memory=2, num_workers=1, min_cpus=1, max_cpus=1, min_memory=1, max_memory=2, num_gpus=0, instascale=False))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Bring up the cluster\n",
+    "cluster.up()\n",
+    "cluster.wait_ready()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df71c1ed",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "cluster.status()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "cluster.details()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47ca5c15",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "job = DDPJobDefinition(name=\"mnisttest\", script=\"mnist.py\", workspace=\"file:///opt/app-root/notebooks/..data\", scheduler_args={\"requirements\": \"/opt/app-root/notebooks/requirements.txt\"}).submit(cluster)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f63a178a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "finished = False\n",
+    "while not finished:\n",
+    "    sleep(1)\n",
+    "    status = job.status()\n",
+    "    finished = (str(status.state) == \"SUCCEEDED\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6b099777",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster.down()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/test/odh/resources/requirements.txt b/test/odh/resources/requirements.txt
new file mode 100644
index 000000000..7266b064a
--- /dev/null
+++ b/test/odh/resources/requirements.txt
@@ -0,0 +1,4 @@
+pytorch_lightning==1.5.10
+ray_lightning
+torchmetrics==0.9.1
+torchvision==0.12.0
diff --git a/test/odh/support.go b/test/odh/support.go
new file mode 100644
index 000000000..d828ed950
--- /dev/null
+++ b/test/odh/support.go
@@ -0,0 +1,34 @@
+/*
+Copyright 2023.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package odh
+
+import (
+	"embed"
+
+	"github.com/onsi/gomega"
+	"github.com/project-codeflare/codeflare-common/support"
+)
+
+//go:embed resources/*
+var files embed.FS
+
+func ReadFile(t support.Test, fileName string) []byte {
+	t.T().Helper()
+	file, err := files.ReadFile(fileName)
+	t.Expect(err).NotTo(gomega.HaveOccurred())
+	return file
+}