Skip to content

Commit 12dd0fd

Browse files
authored
Molly/v2 bkc (#2448)
* update for dlrm bkc * bugfix for dlrm int8 * dlrm_v2 bkc update * update chatglm bkc * tuning blocktime for llm optimized perf * license for dlrm_v2 bkc * bug fix for chatglm int8 * bugfix for chatglm quantization ---------
1 parent c7c93ad commit 12dd0fd

File tree

8 files changed

+102
-15
lines changed

8 files changed

+102
-15
lines changed

models_v2/pytorch/chatglm/inference/cpu/do_quantization.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ echo "### running with jit mode"
7979

8080
FINETUNED_MODEL=${FINETUNED_MODEL:-"'THUDM/chatglm3-6b'"}
8181

82-
EVAL_SCRIPT=${EVAL_SCRIPT:-"../../../../../../models/language_modeling/pytorch/chatglm/inference/cpu/run_llm.py"}
82+
EVAL_SCRIPT=${EVAL_SCRIPT:-"${PWD}/run_llm.py"}
8383
WORK_SPACE=${WORK_SPACE:-${OUTPUT_DIR}}
8484
rm -rf ${OUTPUT_DIR}/latency_log*
8585
python -m intel_extension_for_pytorch.cpu.launch --nodes-list 0 --memory-allocator tcmalloc --log_dir=${OUTPUT_DIR} --log_file_prefix="./latency_log_${precision}_${mode}" \

models_v2/pytorch/chatglm/inference/cpu/run_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ else
150150
}
151151
}
152152
'))
153-
first_token_latency=($(grep -i 'first-token-latency:' ${OUTPUT_DIR}/ChatGLM_${PRECISION}_${LOG_PREFIX}* |sed -e 's/.first-token-latency: //;s/[^0-9.]//g;s/\.$//' |awk '
153+
first_token_latency=($(grep -i 'first-token-latency:' ${OUTPUT_DIR}/ChatGLM_${PRECISION}_${LOG_PREFIX}* |sed -e 's/.*first-token-latency: //;s/[^0-9.]//g;s/\.$//' |awk '
154154
BEGIN {
155155
num = 0;
156156
sum = 0;

models_v2/pytorch/gptj/inference/cpu/run_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ elif [[ "${TEST_MODE}" == "REALTIME" ]]; then
4141
export OMP_NUM_THREADS=${CORE_PER_INSTANCE}
4242
BATCH_SIZE=1
4343
NUM_ITER=${NUM_ITER:-20}
44-
export KMP_BLOCKTIME=1
44+
export KMP_BLOCKTIME=-1
4545
rm -rf ${OUTPUT_DIR}/latency_log*
4646
export USECASE=latency
4747
ARGS="$ARGS --benchmark --num-warmup 10 --num-iter $NUM_ITER --token-latency"

models_v2/pytorch/llama/inference/cpu/run_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ elif [[ "$TEST_MODE" == "REALTIME" ]]; then
3838
export LOG_PREFIX="latency_log"
3939
BATCH_SIZE=${BATCH_SIZE:-1}
4040
export OMP_NUM_THREADS=${CORE_PER_INSTANCE}
41-
export KMP_BLOCKTIME=1
41+
export KMP_BLOCKTIME=-1
4242
rm -rf ${OUTPUT_DIR}/latency_log*
4343
export usecase=latency
4444
NUM_WARMUP=${NUM_WARMUP:-10}

models_v2/pytorch/llama/training/cpu/run_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ if [[ "${DDP}" == "True" ]]; then
6060
done
6161
6262
export CCL_WORKER_AFFINITY=`echo ${CCL_WORKER_AFFINITY} | tr " " ","`
63-
EOF
63+
EOF
6464

6565
#DDP settings
6666
export TORCH_CPP_LOG_LEVEL=INFO
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright (c) 2023 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
MODEL_DIR=${MODEL_DIR-$PWD}
17+
if [ ! -e "${MODEL_DIR}/models/recommendation/pytorch/torchrec_dlrm/dlrm_main.py" ]; then
18+
echo "Could not find the script of dlrm_s_pytorch.py. Please set environment variable '\${MODEL_DIR}'."
19+
echo "From which the dlrm_s_pytorch.py exist at the: \${MODEL_DIR}/models/recommendation/pytorch/torchrec_dlrm/dlrm_main.py"
20+
exit 1
21+
fi
22+
MODEL_SCRIPT=${MODEL_DIR}/models/recommendation/pytorch/torchrec_dlrm/dlrm_main.py
23+
INT8_CONFIG=${MODEL_DIR}/models/recommendation/pytorch/torchrec_dlrm/int8_configure.json
24+
25+
if [ -z "${OUTPUT_DIR}" ]; then
26+
echo "The required environment variable OUTPUT_DIR has not been set"
27+
exit 1
28+
fi
29+
30+
LOG_0="${LOG}/throughput.log"
31+
export BATCH_SIZE=32768
32+
python -m intel_extension_for_pytorch.cpu.launch --node_id 0 --enable_jemalloc $MODEL_SCRIPT \
33+
--embedding_dim 128 \
34+
--dense_arch_layer_sizes 512,256,128 \
35+
--over_arch_layer_sizes 1024,1024,512,256,1 \
36+
--num_embeddings_per_feature 40000000,39060,17295,7424,20265,3,7122,1543,63,40000000,3067956,405282,10,2209,11938,155,4,976,14,40000000,40000000,40000000,590152,12973,108,36 \
37+
--epochs 1 \
38+
--pin_memory \
39+
--mmap_mode \
40+
--batch_size $BATCH_SIZE \
41+
--interaction_type=dcn \
42+
--dcn_num_layers=3 \
43+
--dcn_low_rank_dim=512 \
44+
--ipex-optimize \
45+
--inference-only \
46+
--dtype int8 \
47+
--int8-configure-dir ${INT8_CONFIG}\
48+
--calibration \
49+
--synthetic_multi_hot_criteo_path $DATASET_DIR \
50+
--snapshot-dir $WEIGHT_DIR \
51+
--ipex-merged-emb-cat
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
# Copyright (c) 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
ARGS="--dtype int8 --int8-prepare --ipex-merged-emb-cat --int8-configure-dir ${INT8_CONFIG}"
18+
python $MODEL_SCRIPT \
19+
--embedding_dim 128 \
20+
--dense_arch_layer_sizes 512,256,128 \
21+
--over_arch_layer_sizes 1024,1024,512,256,1 \
22+
--num_embeddings_per_feature 40000000,39060,17295,7424,20265,3,7122,1543,63,40000000,3067956,405282,10,2209,11938,155,4,976,14,40000000,40000000,40000000,590152,12973,108,36 \
23+
--epochs 1 \
24+
--pin_memory \
25+
--mmap_mode \
26+
--batch_size $BATCH_SIZE \
27+
--interaction_type=dcn \
28+
--dcn_num_layers=3 \
29+
--dcn_low_rank_dim=512 \
30+
--limit_val_batches 1000 \
31+
--ipex-optimize \
32+
--log-freq 10 \
33+
--jit \
34+
--inference-only \
35+
--benchmark \
36+
$ARGS $EXTRA_ARGS

models_v2/pytorch/torchrec_dlrm/inference/cpu/run_model.sh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
16+
set -x
1717
ARGS=""
1818
EXTRA_ARGS=""
1919

@@ -25,10 +25,10 @@ if [[ "${TEST_MODE}" == "THROUGHPUT" ]]; then
2525
LOG_PREFIX=dlrm_inference_performance_log
2626
if [ -z "${DATASET_DIR}" ]; then
2727
echo "DATASET_DIR are not set, will use dummy generated dataset"
28-
EXTRA_ARGS="$EXTRA_ARGS --multi_hot_distribution_type uniform "
29-
EXTRA_ARGS="$EXTRA_ARGS --multi_hot_sizes 3,2,1,2,6,1,1,1,1,7,3,8,1,6,9,5,1,1,1,12,100,27,10,3,1,1 "
28+
export EXTRA_ARGS="$EXTRA_ARGS --multi_hot_distribution_type uniform "
29+
export EXTRA_ARGS="$EXTRA_ARGS --multi_hot_sizes 3,2,1,2,6,1,1,1,1,7,3,8,1,6,9,5,1,1,1,12,100,27,10,3,1,1 "
3030
else
31-
EXTRA_ARGS="$EXTRA_ARGS --synthetic_multi_hot_criteo_path $DATASET_DIR "
31+
export EXTRA_ARGS="$EXTRA_ARGS --synthetic_multi_hot_criteo_path $DATASET_DIR "
3232
fi
3333
elif [[ "${TEST_MODE}" == "ACCURACY" ]]; then
3434
echo "TEST_MODE set to ACCURACY"
@@ -42,7 +42,7 @@ elif [[ "${TEST_MODE}" == "ACCURACY" ]]; then
4242
echo "The required environment variable WEIGHT_DIR has not been set"
4343
exit 1
4444
fi
45-
EXTRA_ARGS="$EXTRA_ARGS --synthetic_multi_hot_criteo_path $DATASET_DIR "
45+
export EXTRA_ARGS="$EXTRA_ARGS --synthetic_multi_hot_criteo_path $DATASET_DIR "
4646
else
4747
echo "Please set TEST_MODE to THROUGHPUT or ACCURACY"
4848
exit 1
@@ -54,8 +54,8 @@ if [ ! -e "${MODEL_DIR}/dlrm_main.py" ]; then
5454
exit 1
5555
fi
5656

57-
MODEL_SCRIPT=${MODEL_DIR}/dlrm_main.py
58-
INT8_CONFIG=${MODEL_DIR}/int8_configure.json
57+
export MODEL_SCRIPT=${MODEL_DIR}/dlrm_main.py
58+
export INT8_CONFIG=${MODEL_DIR}/int8_configure.json
5959

6060
echo "PRECISION: ${PRECISION}"
6161
echo "OUTPUT_DIR: ${OUTPUT_DIR}"
@@ -85,9 +85,9 @@ elif [[ $PRECISION == "fp16" ]]; then
8585
echo "running fp16 path"
8686
ARGS="$ARGS --dtype fp16"
8787
elif [[ $PRECISION == "int8" ]]; then
88-
if [ ! -e "${MODEL_DIR}/int8_weight.json" ]; then
89-
echo "int8_weight.json not found in MODEL_DIR, will run weight conversion"
90-
ARGS="$ARGS --int8-prepare"
88+
if [[ "0" == ${TORCH_INDUCTOR} ]];then
89+
echo "prepare int8 weight"
90+
bash ${MODEL_DIR}/prepare_int8.sh
9191
fi
9292
echo "running int8 path"
9393
ARGS="$ARGS --dtype int8 --int8-configure-dir ${INT8_CONFIG}"

0 commit comments

Comments
 (0)