Fix ONNXRT example with upgraded optimum 1.14.0 (#1381)

mengniwang95 · web-flow · commit da3442dd3267 · 2023-11-13T13:07:45.000+08:00
Signed-off-by: Mengni Wang &lt;mengni.wang@intel.com&gt;
Signed-off-by: yuwenzho &lt;yuwen.zhou@intel.com&gt;
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py
@@ -1,6 +1,9 @@
 import argparse
 import os
 import subprocess
+import optimum.version
+from packaging.version import Version
+OPTIMUM114_VERSION = Version("1.14.0")
 
 
 def parse_arguments():
@@ -12,20 +15,37 @@ def parse_arguments():
 
 def prepare_model(input_model, output_model):
     print("\nexport model...")
-    subprocess.run(
-        [
-            "optimum-cli",
-            "export",
-            "onnx",
-            "--model",
-            f"{input_model}",
-            "--task",
-            "text-generation-with-past",
-            f"{output_model}",
-        ],
-        stdout=subprocess.PIPE,
-        text=True,
-    )
+    if Version(optimum.version.__version__) >= OPTIMUM114_VERSION:
+        subprocess.run(
+            [
+                "optimum-cli",
+                "export",
+                "onnx",
+                "--model",
+                f"{input_model}",
+                "--task",
+                "text-generation-with-past",
+                "--legacy",
+                f"{output_model}",
+            ],
+            stdout=subprocess.PIPE,
+            text=True,
+        )
+    else:
+        subprocess.run(
+            [
+                "optimum-cli",
+                "export",
+                "onnx",
+                "--model",
+                f"{input_model}",
+                "--task",
+                "text-generation-with-past",
+                f"{output_model}",
+            ],
+            stdout=subprocess.PIPE,
+            text=True,
+        )
 
     assert os.path.exists(output_model), f"{output_model} doesn't exist!"
 
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md
@@ -17,7 +17,7 @@ pip install -r requirements.txt
 ## 2. Prepare Model
 
 ```bash
-optimum-cli export onnx --model decapoda-research/llama-7b-hf --task text-generation-with-past ./llama_7b
+python prepare_model.py  --input_model="decapoda-research/llama-7b-hf" --output_model="./llama_7b"
 ```
 
 # Run
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py
@@ -0,0 +1,55 @@
+import argparse
+import os
+import subprocess
+import optimum.version
+from packaging.version import Version
+OPTIMUM114_VERSION = Version("1.14.0")
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_model", type=str, required=False, default="")
+    parser.add_argument("--output_model", type=str, required=True)
+    return parser.parse_args()
+
+
+def prepare_model(input_model, output_model):
+    print("\nexport model...")
+    if Version(optimum.version.__version__) >= OPTIMUM114_VERSION:
+        subprocess.run(
+            [
+                "optimum-cli",
+                "export",
+                "onnx",
+                "--model",
+                f"{input_model}",
+                "--task",
+                "text-generation-with-past",
+                "--legacy",
+                f"{output_model}",
+            ],
+            stdout=subprocess.PIPE,
+            text=True,
+        )
+    else:
+        subprocess.run(
+            [
+                "optimum-cli",
+                "export",
+                "onnx",
+                "--model",
+                f"{input_model}",
+                "--task",
+                "text-generation-with-past",
+                f"{output_model}",
+            ],
+            stdout=subprocess.PIPE,
+            text=True,
+        )
+
+    assert os.path.exists(output_model), f"{output_model} doesn't exist!"
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    prepare_model(args.input_model, args.output_model)
diff --git a/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py b/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py
@@ -38,7 +38,9 @@ def __iter__(self):
 class TestWeightOnlyAdaptor(unittest.TestCase):
     @classmethod
     def setUpClass(self):
-        cmd = "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/"
+        cmd = (
+            "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation --legacy gptj/"
+        )
         p = subprocess.Popen(
             cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
         )  # nosec
diff --git a/test/model/test_onnx_model.py b/test/model/test_onnx_model.py
@@ -193,7 +193,9 @@ def setUp(self):
         model = onnx.helper.make_model(graph, **{"opset_imports": [onnx.helper.make_opsetid("", 14)]})
         self.matmul_reshape_model = model
 
-        cmd = "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/"
+        cmd = (
+            "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation --legacy gptj/"
+        )
         p = subprocess.Popen(
             cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
         )  # nosec
@@ -216,7 +218,7 @@ def test_hf_model(self):
 
         config = AutoConfig.from_pretrained("hf_test")
         sessions = ORTModelForCausalLM.load_model("hf_test/decoder_model.onnx")
-        model = ORTModelForCausalLM(sessions[0], config, "hf_test", use_cache=False, use_io_binding=False)
+        model = ORTModelForCausalLM(sessions, config, model_save_dir="hf_test", use_cache=False, use_io_binding=False)
         self.assertNotEqual(model, None)
 
     def test_nodes(self):