From badb80057e2a0d865e5167b3ca592015eb5cb1cf Mon Sep 17 00:00:00 2001 From: Wanchao Liang Date: Mon, 5 Aug 2024 18:14:19 -0700 Subject: [PATCH 1/2] Add warning to compile rmsnorm as titled, add warning to compile rmsnorm as it's not fully ready yet, i.e. this issue https://github.com/pytorch/torchtitan/issues/497 We can remove this warning once we fix the issue --- torchtitan/models/norms.py | 4 ++++ train_configs/debug_model.toml | 2 +- train_configs/llama2_13b.toml | 2 +- train_configs/llama2_70b.toml | 2 +- train_configs/llama2_7b.toml | 2 +- train_configs/llama3_405b.toml | 2 +- train_configs/llama3_70b.toml | 2 +- train_configs/llama3_8b.toml | 2 +- 8 files changed, 11 insertions(+), 7 deletions(-) diff --git a/torchtitan/models/norms.py b/torchtitan/models/norms.py index c0ef6a8035..784546f10e 100644 --- a/torchtitan/models/norms.py +++ b/torchtitan/models/norms.py @@ -43,6 +43,10 @@ def build_norm(norm_type: str, dim: int, eps: float = 1e-6): elif norm_type == "rmsnorm": return RMSNorm(dim, eps=eps) elif norm_type == "compiled_rmsnorm": + import warnings + warnings.warn( + "compiled_rmsnorm is currently experimental and not ready to use yet." + ) return RMSNorm(dim, eps=eps, compile=True) elif norm_type == "fused_rmsnorm": return FusedRMSNorm(dim, eps=eps) diff --git a/train_configs/debug_model.toml b/train_configs/debug_model.toml index 7d4187dc35..999bb95b19 100644 --- a/train_configs/debug_model.toml +++ b/train_configs/debug_model.toml @@ -21,7 +21,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "debugmodel" -norm_type = "compiled_rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm # test tokenizer.model, for debug purpose only tokenizer_path = "./test/assets/test_tiktoken.model" diff --git a/train_configs/llama2_13b.toml b/train_configs/llama2_13b.toml index 4727f965fb..df2f6bb3d9 100644 --- a/train_configs/llama2_13b.toml +++ b/train_configs/llama2_13b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama2" flavor = "13B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model" [optimizer] diff --git a/train_configs/llama2_70b.toml b/train_configs/llama2_70b.toml index 83114876d1..354ebe11f2 100644 --- a/train_configs/llama2_70b.toml +++ b/train_configs/llama2_70b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama2" flavor = "70B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model" [optimizer] diff --git a/train_configs/llama2_7b.toml b/train_configs/llama2_7b.toml index 22ab6c7601..e2b0e78d24 100644 --- a/train_configs/llama2_7b.toml +++ b/train_configs/llama2_7b.toml @@ -17,7 +17,7 @@ save_tb_folder = "tb" [model] name = "llama2" flavor = "7B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/tokenizer.model" [optimizer] diff --git a/train_configs/llama3_405b.toml b/train_configs/llama3_405b.toml index b7f78dc250..5dca66a537 100644 --- a/train_configs/llama3_405b.toml +++ b/train_configs/llama3_405b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "405B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model" [optimizer] diff --git a/train_configs/llama3_70b.toml b/train_configs/llama3_70b.toml index 62d75dfb62..470149a58a 100644 --- a/train_configs/llama3_70b.toml +++ b/train_configs/llama3_70b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "70B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model" [optimizer] diff --git a/train_configs/llama3_8b.toml b/train_configs/llama3_8b.toml index 517dd81ee6..3d0c5160da 100644 --- a/train_configs/llama3_8b.toml +++ b/train_configs/llama3_8b.toml @@ -18,7 +18,7 @@ save_tb_folder = "tb" [model] name = "llama3" flavor = "8B" -norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / compiled_rmsnorm / fused_rmsnorm +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm tokenizer_path = "./torchtitan/datasets/tokenizer/original/tokenizer.model" [optimizer] From acde09fa1c784bb4cb89b207bb6dcea3d650645a Mon Sep 17 00:00:00 2001 From: Wanchao Liang Date: Mon, 5 Aug 2024 18:17:54 -0700 Subject: [PATCH 2/2] lint --- torchtitan/models/norms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torchtitan/models/norms.py b/torchtitan/models/norms.py index 784546f10e..798c7c4dbb 100644 --- a/torchtitan/models/norms.py +++ b/torchtitan/models/norms.py @@ -44,6 +44,7 @@ def build_norm(norm_type: str, dim: int, eps: float = 1e-6): return RMSNorm(dim, eps=eps) elif norm_type == "compiled_rmsnorm": import warnings + warnings.warn( "compiled_rmsnorm is currently experimental and not ready to use yet." )