Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit 40a8882

Browse files
committed
close neural-speed when using xpu
Signed-off-by: zhenwei-intel <[email protected]>
1 parent 2e3558e commit 40a8882

File tree

2 files changed

+19
-19
lines changed

2 files changed

+19
-19
lines changed

docs/weightonlyquant.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
166166
prompt = "Once upon a time, there existed a little girl,"
167167
inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
168168

169-
qmodel = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="xpu", trust_remote_code=True, use_neural_speed=False)
169+
qmodel = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="xpu", trust_remote_code=True)
170170

171171
# optimize the model with ipex, it will improve performance.
172172
qmodel = ipex.optimize_transformers(qmodel, inplace=True, dtype=torch.float16, quantization_config={}, device="xpu")

intel_extension_for_transformers/transformers/modeling/modeling_auto.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -197,25 +197,25 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
197197
device_map = kwargs.get("device_map", "cpu")
198198
use_cpu = (True if device_map == torch.device("cpu") or device_map == "cpu" else False)
199199
use_xpu = (True if device_map == torch.device("xpu") or device_map == "xpu" else False)
200-
201-
if kwargs.get("use_llm_runtime", None) is not None:
202-
use_neural_speed = kwargs.pop("use_llm_runtime", True) and not use_xpu
203-
logger.warning("use_llm_runtime is deprecated in version 1.3.2, please use_neural_speed instead.")
204-
elif kwargs.get("use_neural_speed", None) is not None:
205-
use_neural_speed = kwargs.pop("use_neural_speed", True) and not use_xpu
206-
else:
207-
config = transformers.AutoConfig.from_pretrained(pretrained_model_name_or_path,
208-
trust_remote_code = kwargs.get('trust_remote_code', False))
209-
if hasattr(config, "model_type") == False:
210-
logger.error("Can't get the model_type. Please check the correct model_type")
211-
exit(0)
212-
213-
if config.model_type in cls.model_type_list:
214-
logger.info("Using Neural Speed...")
215-
use_neural_speed = True
200+
use_neural_speed = False
201+
if not use_xpu:
202+
if kwargs.get("use_llm_runtime", None) is not None:
203+
use_neural_speed = kwargs.pop("use_llm_runtime", True) and not use_xpu
204+
logger.warning("use_llm_runtime is deprecated in version 1.3.2, please use_neural_speed instead.")
205+
elif kwargs.get("use_neural_speed", None) is not None:
206+
use_neural_speed = kwargs.pop("use_neural_speed", True) and not use_xpu
216207
else:
217-
logger.info("Using Pytorch...")
218-
use_neural_speed = False
208+
config = transformers.AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
209+
if hasattr(config, "model_type") == False:
210+
logger.error("Can't get the model_type. Please check the correct model_type")
211+
exit(0)
212+
213+
if config.model_type in cls.model_type_list:
214+
logger.info("Using Neural Speed...")
215+
use_neural_speed = True
216+
else:
217+
logger.info("Using Pytorch...")
218+
use_neural_speed = False
219219

220220
if os.path.isfile(os.path.join(pretrained_model_name_or_path, QUANT_CONFIG)):
221221
logger.info(

0 commit comments

Comments
 (0)