Update PT Pin: 2024-08-14 (#1029)

Jack-Khuu · byjlw · web-flow · commit 147c292dd299 · 2024-08-16T07:50:47.000-07:00
* Update install_requirements.sh to use PT 20240814 https://hud.pytorch.org/pytorch/pytorch/commit/3a023a67c47bcde45538c9991e332d21ac548e46 To pick up fix in pytorch/pytorch#133235 * Update AO pin * Remove unwrapping tensor_subclass * Conditional subclass unwrap since aoti doesn't support tensor_subclass * Missed intialize entry point with conditional unwrap --------- Co-authored-by: Jesse White <5281939+byjlw@users.noreply.github.com>
diff --git a/build/builder.py b/build/builder.py
@@ -440,6 +440,7 @@ def _initialize_model(
     quantize,
     tokenizer=None,
     max_seq_length=None,
+    support_tensor_subclass: bool = True,
 ):
     print("Loading model...")
 
@@ -510,7 +511,13 @@ def _initialize_model(
         if quantize:
             print(f"Quantizing the model with: {quantize}")
             with measure_time("Time to quantize model: {time:.02f} seconds"):
-                quantize_model(model, builder_args.device, quantize, tokenizer)
+                quantize_model(
+                    model,
+                    builder_args.device,
+                    quantize,
+                    tokenizer,
+                    support_tensor_subclass,
+                )
                 device_sync(device=builder_args.device)
 
         if builder_args.setup_caches:
diff --git a/export.py b/export.py
@@ -126,6 +126,7 @@ def main(args):
             quantize,
             tokenizer,
             max_seq_length=builder_args.max_seq_length,
+            support_tensor_subclass=output_dso_path is None,
         )
         model_to_pte = model
         model_to_dso = model
@@ -143,6 +144,7 @@ def main(args):
             model_to_dso = _initialize_model(
                 builder_args,
                 quantize,
+                support_tensor_subclass=False,
             )
             _unset_gguf_kwargs(builder_args)
 
diff --git a/install_requirements.sh b/install_requirements.sh
@@ -47,7 +47,7 @@ fi
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-NIGHTLY_VERSION=dev20240728
+NIGHTLY_VERSION=dev20240814
 
 # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
 (
@@ -82,7 +82,7 @@ REQUIREMENTS_TO_INSTALL=(
 # TODO: Remove this and install nightly build, once it supports macos
 (
   set -x
-  $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@d477c0e59b458b5617dcb3e999290a87df3070d8
+  $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@e11201a62669f582d81cdb33e031a07fb8dfc4f3
 )
 if [[ -x "$(command -v nvidia-smi)" ]]; then
   (
diff --git a/quantization/quantize.py b/quantization/quantize.py
@@ -50,7 +50,13 @@
 ###                  torchchat quantization API                       ###
 
 
-def quantize_model(model: nn.Module, device, quantize_options, tokenizer=None):
+def quantize_model(
+    model: nn.Module,
+    device,
+    quantize_options,
+    tokenizer=None,
+    support_tensor_subclass: bool = True,
+):
     """
     Quantize the specified model using the quantizers described by
     a quantization dict of the form:
@@ -74,7 +80,8 @@ def quantize_model(model: nn.Module, device, quantize_options, tokenizer=None):
             # Use tensor subclass API for int4 weight only.
             if device == "cuda" and quantizer == "linear:int4":
                 quantize_(model, int4_weight_only(q_kwargs["groupsize"]))
-                unwrap_tensor_subclass(model)
+                if not support_tensor_subclass:
+                    unwrap_tensor_subclass(model)
                 continue
             # Use dtype precision specified in user config, else fallback on global precision.
             if "precision" in quantize_options:

Original file line number	Diff line number	Diff line change
`@@ -126,6 +126,7 @@ def main(args):`
`126`	`126`	`quantize,`
`127`	`127`	`tokenizer,`
`128`	`128`	`max_seq_length=builder_args.max_seq_length,`
	`129`	`+ support_tensor_subclass=output_dso_path is None,`
`129`	`130`	`)`
`130`	`131`	`model_to_pte = model`
`131`	`132`	`model_to_dso = model`
`@@ -143,6 +144,7 @@ def main(args):`
`143`	`144`	`model_to_dso = _initialize_model(`
`144`	`145`	`builder_args,`
`145`	`146`	`quantize,`
	`147`	`+ support_tensor_subclass=False,`
`146`	`148`	`)`
`147`	`149`	`_unset_gguf_kwargs(builder_args)`
`148`	`150`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ fi`
`47`	`47`	`# NOTE: If a newly-fetched version of the executorch repo changes the value of`
`48`	`48`	`# NIGHTLY_VERSION, you should re-run this script to install the necessary`
`49`	`49`	`# package versions.`
`50`		`-NIGHTLY_VERSION=dev20240728`
	`50`	`+NIGHTLY_VERSION=dev20240814`
`51`	`51`
`52`	`52`	`# Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same`
`53`	`53`	`(`
`@@ -82,7 +82,7 @@ REQUIREMENTS_TO_INSTALL=(`
`82`	`82`	`# TODO: Remove this and install nightly build, once it supports macos`
`83`	`83`	`(`
`84`	`84`	`set -x`
`85`		`- $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@d477c0e59b458b5617dcb3e999290a87df3070d8`
	`85`	`+ $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@e11201a62669f582d81cdb33e031a07fb8dfc4f3`
`86`	`86`	`)`
`87`	`87`	`if [[ -x "$(command -v nvidia-smi)" ]]; then`
`88`	`88`	`(`