Skip to content

Commit 147c292

Browse files
Jack-Khuubyjlw
andauthored
Update PT Pin: 2024-08-14 (#1029)
* Update install_requirements.sh to use PT 20240814 https://hud.pytorch.org/pytorch/pytorch/commit/3a023a67c47bcde45538c9991e332d21ac548e46 To pick up fix in pytorch/pytorch#133235 * Update AO pin * Remove unwrapping tensor_subclass * Conditional subclass unwrap since aoti doesn't support tensor_subclass * Missed intialize entry point with conditional unwrap --------- Co-authored-by: Jesse White <[email protected]>
1 parent 507f470 commit 147c292

File tree

4 files changed

+21
-5
lines changed

4 files changed

+21
-5
lines changed

build/builder.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ def _initialize_model(
440440
quantize,
441441
tokenizer=None,
442442
max_seq_length=None,
443+
support_tensor_subclass: bool = True,
443444
):
444445
print("Loading model...")
445446

@@ -510,7 +511,13 @@ def _initialize_model(
510511
if quantize:
511512
print(f"Quantizing the model with: {quantize}")
512513
with measure_time("Time to quantize model: {time:.02f} seconds"):
513-
quantize_model(model, builder_args.device, quantize, tokenizer)
514+
quantize_model(
515+
model,
516+
builder_args.device,
517+
quantize,
518+
tokenizer,
519+
support_tensor_subclass,
520+
)
514521
device_sync(device=builder_args.device)
515522

516523
if builder_args.setup_caches:

export.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def main(args):
126126
quantize,
127127
tokenizer,
128128
max_seq_length=builder_args.max_seq_length,
129+
support_tensor_subclass=output_dso_path is None,
129130
)
130131
model_to_pte = model
131132
model_to_dso = model
@@ -143,6 +144,7 @@ def main(args):
143144
model_to_dso = _initialize_model(
144145
builder_args,
145146
quantize,
147+
support_tensor_subclass=False,
146148
)
147149
_unset_gguf_kwargs(builder_args)
148150

install_requirements.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ fi
4747
# NOTE: If a newly-fetched version of the executorch repo changes the value of
4848
# NIGHTLY_VERSION, you should re-run this script to install the necessary
4949
# package versions.
50-
NIGHTLY_VERSION=dev20240728
50+
NIGHTLY_VERSION=dev20240814
5151

5252
# Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
5353
(
@@ -82,7 +82,7 @@ REQUIREMENTS_TO_INSTALL=(
8282
# TODO: Remove this and install nightly build, once it supports macos
8383
(
8484
set -x
85-
$PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@d477c0e59b458b5617dcb3e999290a87df3070d8
85+
$PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@e11201a62669f582d81cdb33e031a07fb8dfc4f3
8686
)
8787
if [[ -x "$(command -v nvidia-smi)" ]]; then
8888
(

quantization/quantize.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,13 @@
5050
### torchchat quantization API ###
5151

5252

53-
def quantize_model(model: nn.Module, device, quantize_options, tokenizer=None):
53+
def quantize_model(
54+
model: nn.Module,
55+
device,
56+
quantize_options,
57+
tokenizer=None,
58+
support_tensor_subclass: bool = True,
59+
):
5460
"""
5561
Quantize the specified model using the quantizers described by
5662
a quantization dict of the form:
@@ -74,7 +80,8 @@ def quantize_model(model: nn.Module, device, quantize_options, tokenizer=None):
7480
# Use tensor subclass API for int4 weight only.
7581
if device == "cuda" and quantizer == "linear:int4":
7682
quantize_(model, int4_weight_only(q_kwargs["groupsize"]))
77-
unwrap_tensor_subclass(model)
83+
if not support_tensor_subclass:
84+
unwrap_tensor_subclass(model)
7885
continue
7986
# Use dtype precision specified in user config, else fallback on global precision.
8087
if "precision" in quantize_options:

0 commit comments

Comments
 (0)