From badb80057e2a0d865e5167b3ca592015eb5cb1cf Mon Sep 17 00:00:00 2001
From: Wanchao Liang <wanchaol@users.noreply.github.com>
Date: Mon, 5 Aug 2024 18:14:19 -0700
Subject: [PATCH 1/2] Add warning to compile rmsnorm

as titled, add warning to compile rmsnorm as it's not fully ready yet,
i.e. this issue https://github.com/pytorch/torchtitan/issues/497

We can remove this warning once we fix the issue
---
 torchtitan/models/norms.py     | 4 ++++
 train_configs/debug_model.toml | 2 +-
 train_configs/llama2_13b.toml  | 2 +-
 train_configs/llama2_70b.toml  | 2 +-
 train_configs/llama2_7b.toml   | 2 +-
 train_configs/llama3_405b.toml | 2 +-
 train_configs/llama3_70b.toml  | 2 +-
 train_configs/llama3_8b.toml   | 2 +-
 8 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/torchtitan/models/norms.py b/torchtitan/models/norms.py
index c0ef6a8035..784546f10e 100644
--- a/torchtitan/models/norms.py
+++ b/torchtitan/models/norms.py
@@ -43,6 +43,10 @@ def build_norm(norm_type: str, dim: int, eps: float = 1e-6):
     elif norm_type == "rmsnorm":
         return RMSNorm(dim, eps=eps)
     elif norm_type == "compiled_rmsnorm":
+        import warnings
+        warnings.warn(
+            "compiled_rmsnorm is currently experimental and not ready to use yet."
+        )
         return RMSNorm(dim, eps=eps, compile=True)
     elif norm_type == "fused_rmsnorm":
         return FusedRMSNorm(dim, eps=eps)
diff --git a/train_configs/debug_model.toml b/train_configs/debug_model.toml
index 7d4187dc35..999bb95b19 100644
--- a/train_configs/debug_model.toml
+++ b/train_configs/debug_model.toml
@@ -21,7 +21,7 @@ save_tb_folder = "tb"
 [model]
 name = "llama3"
 flavor = "debugmodel"
-norm_type = "compiled_rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
+norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
 # test tokenizer.model, for debug purpose only
 tokenizer_path = "./test/assets/test_tiktoken.model"
 
diff --git a/train_configs/llama2_13b.toml b/train_configs/llama2_13b.toml
index 4727f965fb..df2f6bb3d9 100644
--- a/train_configs/llama2_13b.toml
+++ b/train_configs/llama2_13b.toml
@@ -18,7 +18,7 @@ save_tb_folder = "tb"
 [model]
 name = "llama2"
 flavor = "13B"
-norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
+norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / fused_rmsnorm
 tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model"
 
 [optimizer]
diff --git a/train_configs/llama2_70b.toml b/train_configs/llama2_70b.toml
index 83114876d1..354ebe11f2 100644
--- a/train_configs/llama2_70b.toml
+++ b/train_configs/llama2_70b.toml
@@ -18,7 +18,7 @@ save_tb_folder = "tb"
 [model]
 name = "llama2"
 flavor = "70B"
-norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
+norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / fused_rmsnorm
 tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model"
 
 [optimizer]
diff --git a/train_configs/llama2_7b.toml b/train_configs/llama2_7b.toml
index 22ab6c7601..e2b0e78d24 100644
--- a/train_configs/llama2_7b.toml
+++ b/train_configs/llama2_7b.toml
@@ -17,7 +17,7 @@ save_tb_folder = "tb"
 [model]
 name = "llama2"
 flavor = "7B"
-norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
+norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / fused_rmsnorm
 tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model"
 
 [optimizer]
diff --git a/train_configs/llama3_405b.toml b/train_configs/llama3_405b.toml
index b7f78dc250..5dca66a537 100644
--- a/train_configs/llama3_405b.toml
+++ b/train_configs/llama3_405b.toml
@@ -18,7 +18,7 @@ save_tb_folder = "tb"
 [model]
 name = "llama3"
 flavor = "405B"
-norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
+norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / fused_rmsnorm
 tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model"
 
 [optimizer]
diff --git a/train_configs/llama3_70b.toml b/train_configs/llama3_70b.toml
index 62d75dfb62..470149a58a 100644
--- a/train_configs/llama3_70b.toml
+++ b/train_configs/llama3_70b.toml
@@ -18,7 +18,7 @@ save_tb_folder = "tb"
 [model]
 name = "llama3"
 flavor = "70B"
-norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
+norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / fused_rmsnorm
 tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model"
 
 [optimizer]
diff --git a/train_configs/llama3_8b.toml b/train_configs/llama3_8b.toml
index 517dd81ee6..3d0c5160da 100644
--- a/train_configs/llama3_8b.toml
+++ b/train_configs/llama3_8b.toml
@@ -18,7 +18,7 @@ save_tb_folder = "tb"
 [model]
 name = "llama3"
 flavor = "8B"
-norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm
+norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / fused_rmsnorm
 tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model"
 
 [optimizer]

From acde09fa1c784bb4cb89b207bb6dcea3d650645a Mon Sep 17 00:00:00 2001
From: Wanchao Liang <wanchaol@users.noreply.github.com>
Date: Mon, 5 Aug 2024 18:17:54 -0700
Subject: [PATCH 2/2] lint

---
 torchtitan/models/norms.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/torchtitan/models/norms.py b/torchtitan/models/norms.py
index 784546f10e..798c7c4dbb 100644
--- a/torchtitan/models/norms.py
+++ b/torchtitan/models/norms.py
@@ -44,6 +44,7 @@ def build_norm(norm_type: str, dim: int, eps: float = 1e-6):
         return RMSNorm(dim, eps=eps)
     elif norm_type == "compiled_rmsnorm":
         import warnings
+
         warnings.warn(
             "compiled_rmsnorm is currently experimental and not ready to use yet."
         )