split workflows by hardware type

danielvegamyhre · danielvegamyhre · commit 99e42f0ef04c · 2025-07-18T09:13:18.000-07:00
diff --git a/.github/workflows/1xH100_tests.yml b/.github/workflows/1xH100_tests.yml
@@ -0,0 +1,53 @@
+name: Run 1xH100 Tests
+
+on:
+  push:
+    branches:
+      - main
+      - 'gh/**'
+  pull_request:
+    branches:
+      - main
+      - 'gh/**'
+
+concurrency:
+  group: 1xH100_tests-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+jobs:
+  test:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: H100
+            runs-on: linux.aws.h100
+            torch-spec: '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
+            gpu-arch-type: "cuda"
+            gpu-arch-version: "12.4"
+    permissions:
+      id-token: write
+      contents: read
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      timeout: 60
+      runner: ${{ matrix.runs-on }}
+      gpu-arch-type: ${{ matrix.gpu-arch-type }}
+      gpu-arch-version: ${{ matrix.gpu-arch-version }}
+      submodules: recursive
+      script: |
+        conda create -n venv python=3.9 -y
+        conda activate venv
+        export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
+        python -m pip install --upgrade pip
+        pip install uv
+        pip install ${{ matrix.torch-spec }}
+        uv pip install -r dev-requirements.txt
+        uv pip install vllm
+        pip install .
+        pytest test/integration --verbose -s
+        pytest test/dtypes/test_affine_quantized_float.py --verbose -s
+        ./test/float8/test_everything_single_gpu.sh
diff --git a/.github/workflows/1xL4_tests.yml b/.github/workflows/1xL4_tests.yml
@@ -1,4 +1,4 @@
-name: Run Float8 Single-GPU Tests
+name: Run 1xL4 Tests
 
 on:
   push:
@@ -11,7 +11,7 @@ on:
       - 'gh/**'
 
 concurrency:
-  group: float8_single_gpu_test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
+  group: 1xL4_tests-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
   cancel-in-progress: true
 
 env:
@@ -28,11 +28,6 @@ jobs:
             torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
             gpu-arch-type: "cuda"
             gpu-arch-version: "12.6"
-          - name: H100
-            runs-on: linux.aws.h100
-            torch-spec: '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
-            gpu-arch-type: "cuda"
-            gpu-arch-version: "12.4"
     permissions:
       id-token: write
       contents: read
@@ -53,4 +48,6 @@ jobs:
         uv pip install -r dev-requirements.txt
         uv pip install vllm
         pip install .
+        pytest test/integration --verbose -s
+        pytest test/dtypes/test_affine_quantized_float.py --verbose -s
         ./test/float8/test_everything_single_gpu.sh
diff --git a/.github/workflows/4xH100_tests.yml b/.github/workflows/4xH100_tests.yml
@@ -1,4 +1,4 @@
-name: Run Float8 Multi-GPU Tests
+name: Run 4xH100 tests
 
 on:
   push:
@@ -11,7 +11,7 @@ on:
       - 'gh/**'
 
 concurrency:
-  group: float8_multi_gpu_test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
+  group: 4xH100_tests-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
   cancel-in-progress: true
 
 env:
diff --git a/test/float8/test_everything_single_gpu.sh b/test/float8/test_everything_single_gpu.sh
@@ -12,7 +12,5 @@ pytest test/float8/test_base.py --verbose -s
 pytest test/float8/test_compile.py --verbose -s
 pytest test/float8/test_numerics_integration.py --verbose -s
 pytest test/float8/test_auto_filter.py --verbose -s
-pytest test/integration --verbose -s
-pytest test/dtypes/test_affine_quantized_float.py --verbose -s
 
-echo "all single gpu tests successful"
+echo "all float8 single gpu tests successful"