From 327550e126fcb78b08b0a12aa68173d3dbe37a02 Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@meta.com>
Date: Mon, 8 Jul 2024 10:35:54 -0700
Subject: [PATCH 1/3] push

---
 test/prototype/test_low_bit_optim.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/prototype/test_low_bit_optim.py b/test/prototype/test_low_bit_optim.py
index e037ef8b11..552ae787df 100644
--- a/test/prototype/test_low_bit_optim.py
+++ b/test/prototype/test_low_bit_optim.py
@@ -145,6 +145,8 @@ def test_optim_4bit_correctness(self, optim_name):
     def test_optim_fp8_smoke(self, optim_name, device):
         if device == "cuda" and torch.cuda.get_device_capability() < (8, 9):
             pytest.skip("FP8 requires compute capability >= 8.9")
+        if device == "cpu" and not TORCH_VERSION_AFTER_2_4:
+            pytest.skip("fill_cpu not implemented for 'Float8_e4m3fn")
 
         model = nn.Sequential(nn.Linear(32, 1024), nn.ReLU(), nn.Linear(1024, 128)).to(device)
         optim = getattr(low_bit_optim, optim_name)(model.parameters())

From 246e4b33f2da50aafb350c0a7e45d2c269582475 Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@meta.com>
Date: Mon, 8 Jul 2024 17:40:24 -0700
Subject: [PATCH 2/3] remove sparse test warning

---
 test/sparsity/test_fast_sparse_training.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/sparsity/test_fast_sparse_training.py b/test/sparsity/test_fast_sparse_training.py
index 2bd0d1878c..a0886dd898 100644
--- a/test/sparsity/test_fast_sparse_training.py
+++ b/test/sparsity/test_fast_sparse_training.py
@@ -14,7 +14,7 @@
 )
 from torchao.utils import TORCH_VERSION_AFTER_2_4, is_fbcode
 
-class TestModel(nn.Module):
+class ToyModel(nn.Module):
     def __init__(self):
         super().__init__()
         self.linear1 = nn.Linear(128, 256, bias=False)
@@ -36,7 +36,7 @@ def test_runtime_weight_sparsification(self):
         from torch.sparse import SparseSemiStructuredTensorCUSPARSELT
         input = torch.rand((128, 128)).half().cuda()
         grad = torch.rand((128, 128)).half().cuda()
-        model = TestModel().half().cuda()
+        model = ToyModel().half().cuda()
         model_c = copy.deepcopy(model)
 
         for name, mod in model.named_modules():
@@ -77,7 +77,7 @@ def test_runtime_weight_sparsification_compile(self):
         from torch.sparse import SparseSemiStructuredTensorCUSPARSELT
         input = torch.rand((128, 128)).half().cuda()
         grad = torch.rand((128, 128)).half().cuda()
-        model = TestModel().half().cuda()
+        model = ToyModel().half().cuda()
         model_c = copy.deepcopy(model)
 
         for name, mod in model.named_modules():

From 5c384b4b5a596459a341d373382ce4d5773f914b Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@meta.com>
Date: Mon, 8 Jul 2024 17:41:10 -0700
Subject: [PATCH 3/3] push

---
 test/prototype/test_low_bit_optim.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/prototype/test_low_bit_optim.py b/test/prototype/test_low_bit_optim.py
index 552ae787df..e037ef8b11 100644
--- a/test/prototype/test_low_bit_optim.py
+++ b/test/prototype/test_low_bit_optim.py
@@ -145,8 +145,6 @@ def test_optim_4bit_correctness(self, optim_name):
     def test_optim_fp8_smoke(self, optim_name, device):
         if device == "cuda" and torch.cuda.get_device_capability() < (8, 9):
             pytest.skip("FP8 requires compute capability >= 8.9")
-        if device == "cpu" and not TORCH_VERSION_AFTER_2_4:
-            pytest.skip("fill_cpu not implemented for 'Float8_e4m3fn")
 
         model = nn.Sequential(nn.Linear(32, 1024), nn.ReLU(), nn.Linear(1024, 128)).to(device)
         optim = getattr(low_bit_optim, optim_name)(model.parameters())