From 1b64e74b1f3935d74f394e7f8b3ada91a86fa5c3 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Thu, 20 Oct 2022 14:37:30 -0400
Subject: [PATCH 1/4] [Nova] GHA Linux GPU Job

---
 .github/workflows/test-linux-cpu.yml |  4 +-
 .github/workflows/test-linux-gpu.yml | 60 ++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/test-linux-gpu.yml

diff --git a/.github/workflows/test-linux-cpu.yml b/.github/workflows/test-linux-cpu.yml
index f78dd323da7..234ad97f4d6 100644
--- a/.github/workflows/test-linux-cpu.yml
+++ b/.github/workflows/test-linux-cpu.yml
@@ -16,7 +16,7 @@ jobs:
   tests:
     strategy:
       matrix:
-        py_vers: ["3.7", "3.8", "3.9", "3.10"]
+        python_version: ["3.7", "3.8", "3.9", "3.10"]
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
@@ -27,7 +27,7 @@ jobs:
         git config --global --add safe.directory /__w/vision/vision
 
         # Set up Environment Variables
-        export PYTHON_VERSION="${{ matrix.py_vers }}"
+        export PYTHON_VERSION="${{ matrix.python_version }}"
         export VERSION="cpu"
         export CUDATOOLKIT="cpuonly"
 
diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml
new file mode 100644
index 00000000000..8e6f562715e
--- /dev/null
+++ b/.github/workflows/test-linux-gpu.yml
@@ -0,0 +1,60 @@
+name: Unit-tests on Linux GPU
+
+on:
+  pull_request:
+  push:
+    branches:
+      - nightly
+      - main
+      - release/*
+  workflow_dispatch:
+
+env:
+  CHANNEL: "nightly"
+
+jobs:
+  tests:
+    strategy:
+      matrix:
+        python_version: ["3.7", "3.8", "3.9", "3.10"]
+        cuda_arch_version: ["11.6", "11.7"]
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      runner: linux.4xlarge.nvidia.gpu
+      repository: pytorch/vision
+      gpu-arch-type: cuda
+      gpu-arch-version: ${{ matrix.cuda_arch_version }}
+      script: |
+        # Mark Build Directory Safe
+        git config --global --add safe.directory /__w/vision/vision
+
+        # Set up Environment Variables
+        export PYTHON_VERSION="${{ matrix.python_version }}"
+        export VERSION="${{ matrix.cuda_arch_version }}"
+        export CUDATOOLKIT="pytorch-cuda=${VERSION}"
+
+        # Set CHANNEL
+        if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then
+          export CHANNEL=test
+        else
+          export CHANNEL=nightly
+        fi
+
+        # Create Conda Env
+        conda create -yp ci_env python="${PYTHON_VERSION}" numpy libpng jpeg scipy
+        conda activate /work/ci_env
+        
+        # Install PyTorch, Torchvision, and testing libraries
+        set -ex
+        conda install \
+          --yes \
+          -c "pytorch-${CHANNEL}" \
+          -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \
+          "${CUDATOOLKIT}"
+        python3 setup.py develop
+        python3 -m pip install pytest pytest-mock 'av<10'
+
+        # Run Tests
+        python3 -m torch.utils.collect_env
+        python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20

From 667f7b9e480f35469faf61c41c3473e6d203e839 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Thu, 20 Oct 2022 15:20:48 -0400
Subject: [PATCH 2/4] increase timeout since jobs timeout and cancel after 30
 mins

---
 .github/workflows/test-linux-gpu.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml
index 8e6f562715e..dc453ca2ee2 100644
--- a/.github/workflows/test-linux-gpu.yml
+++ b/.github/workflows/test-linux-gpu.yml
@@ -25,6 +25,7 @@ jobs:
       repository: pytorch/vision
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda_arch_version }}
+      timeout: 60
       script: |
         # Mark Build Directory Safe
         git config --global --add safe.directory /__w/vision/vision

From 31315f12c55f537c00a73a9773a276f9dd587289 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Fri, 21 Oct 2022 12:59:20 -0400
Subject: [PATCH 3/4] bigger instance and longer timeout

---
 .github/workflows/test-linux-gpu.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml
index dc453ca2ee2..bc97a07c969 100644
--- a/.github/workflows/test-linux-gpu.yml
+++ b/.github/workflows/test-linux-gpu.yml
@@ -21,11 +21,11 @@ jobs:
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
-      runner: linux.4xlarge.nvidia.gpu
+      runner: linux.8xlarge.nvidia.gpu
       repository: pytorch/vision
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda_arch_version }}
-      timeout: 60
+      timeout: 120
       script: |
         # Mark Build Directory Safe
         git config --global --add safe.directory /__w/vision/vision

From b907257f18fd3c1b4c289205404ff87943de02c3 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@fb.com>
Date: Fri, 21 Oct 2022 15:08:45 -0400
Subject: [PATCH 4/4] use instance with more gpu memory and only run py38 and
 cu116 on PR CI

---
 .github/workflows/test-linux-gpu.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml
index bc97a07c969..a4d938f23ed 100644
--- a/.github/workflows/test-linux-gpu.yml
+++ b/.github/workflows/test-linux-gpu.yml
@@ -16,12 +16,12 @@ jobs:
   tests:
     strategy:
       matrix:
-        python_version: ["3.7", "3.8", "3.9", "3.10"]
-        cuda_arch_version: ["11.6", "11.7"]
+        python_version: ["3.8"]
+        cuda_arch_version: ["11.6"]
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
-      runner: linux.8xlarge.nvidia.gpu
+      runner: linux.g5.4xlarge.nvidia.gpu
       repository: pytorch/vision
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda_arch_version }}