intel
diff --git a/‎cmake/CMakeLists.txt‎
Lines changed: 23 additions & 1 deletion b/‎cmake/CMakeLists.txt‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎cmake/onnxruntime.cmake‎
Lines changed: 17 additions & 13 deletions b/‎cmake/onnxruntime.cmake‎
Lines changed: 17 additions & 13 deletions
diff --git a/‎cmake/onnxruntime_providers_cpu.cmake‎
Lines changed: 0 additions & 5 deletions b/‎cmake/onnxruntime_providers_cpu.cmake‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎cmake/onnxruntime_providers_openvino.cmake‎
Lines changed: 0 additions & 5 deletions b/‎cmake/onnxruntime_providers_openvino.cmake‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎cmake/winml.cmake‎
Lines changed: 0 additions & 5 deletions b/‎cmake/winml.cmake‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎include/onnxruntime/core/graph/basic_types.h‎
Lines changed: 2 additions & 0 deletions b/‎include/onnxruntime/core/graph/basic_types.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎java/build-android.gradle‎
Lines changed: 1 addition & 1 deletion b/‎java/build-android.gradle‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎js/web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts‎
Lines changed: 0 additions & 7 deletions b/‎js/web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎onnxruntime/core/mlas/inc/mlas_q4.h‎
Lines changed: 18 additions & 8 deletions b/‎onnxruntime/core/mlas/inc/mlas_q4.h‎
Lines changed: 18 additions & 8 deletions
@@ -551,7 +551,7 @@ if(NOT WIN32 AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android")
 endif()
 
 find_package(Patch)
-if (WIN32 AND NOT Patch_FOUND)
+if (CMAKE_HOST_WIN32 AND NOT Patch_FOUND)
     # work around CI machines missing patch from the git install by falling back to the binary in this repo.
     # replicate what happens in https://github.com/Kitware/CMake/blob/master/Modules/FindPatch.cmake but without
     # the hardcoded suffixes in the path to the patch binary.
@@ -1140,6 +1140,13 @@ endfunction()
 function(onnxruntime_add_shared_library target_name)
   add_library(${target_name} SHARED ${ARGN})
   onnxruntime_configure_target(${target_name})
+  if(WIN32)
+        target_compile_definitions(${target_name} PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_MINOR=${VERSION_MINOR_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_BUILD=${VERSION_BUILD_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_STRING=\"${VERSION_STRING}\")
+  endif()
 endfunction()
 
 function(onnxruntime_add_static_library target_name)
@@ -1154,6 +1161,13 @@ function(onnxruntime_add_shared_library_module target_name)
   else()
     #On Windows, this target shouldn't generate an import lib, but I don't know how to disable it.
     add_library(${target_name} MODULE ${ARGN})
+    if(WIN32)
+        target_compile_definitions(${target_name} PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_MINOR=${VERSION_MINOR_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_BUILD=${VERSION_BUILD_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
+        target_compile_definitions(${target_name} PRIVATE VER_STRING=\"${VERSION_STRING}\")
+    endif()
   endif()
 
   onnxruntime_configure_target(${target_name})
@@ -1636,6 +1650,14 @@ set(VERSION_MINOR_PART   0 CACHE STRING "Second part of numeric file/product ver
 set(VERSION_BUILD_PART       0 CACHE STRING "Third part of numeric file/product version.")
 set(VERSION_PRIVATE_PART     0 CACHE STRING "Fourth part of numeric file/product version.")
 set(VERSION_STRING       "Internal Build" CACHE STRING "String representation of file/product version.")
+if(VERSION_MAJOR_PART STREQUAL "0" AND VERSION_MINOR_PART STREQUAL "0" AND VERSION_BUILD_PART STREQUAL "0" AND VERSION_PRIVATE_PART STREQUAL "0")
+    string(REPLACE "." ";"  ORT_VERSION_STRING_LIST ${ORT_VERSION})
+    list(GET ORT_VERSION_STRING_LIST 0 VERSION_MAJOR_PART)
+    list(GET ORT_VERSION_STRING_LIST 1 VERSION_MINOR_PART)
+    list(GET ORT_VERSION_STRING_LIST 2 VERSION_BUILD_PART)
+    set(VERSION_STRING ORT_VERSION)
+endif()
+
 
 if (WIN32)
   list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SYS_PATH_LIB})
 
@@ -95,7 +95,6 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
     FRAMEWORK TRUE
     FRAMEWORK_VERSION A
     MACOSX_FRAMEWORK_INFO_PLIST ${INFO_PLIST_PATH}
-    SOVERSION ${ORT_VERSION}
     # Note: The PUBLIC_HEADER and VERSION properties for the 'onnxruntime' target will be set later in this file.
   )
 else()
@@ -108,11 +107,7 @@ endif()
 add_dependencies(onnxruntime onnxruntime_generate_def ${onnxruntime_EXTERNAL_DEPENDENCIES})
 target_include_directories(onnxruntime PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC "$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime>")
 
-target_compile_definitions(onnxruntime PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
-target_compile_definitions(onnxruntime PRIVATE VER_MINOR=${VERSION_MINOR_PART})
-target_compile_definitions(onnxruntime PRIVATE VER_BUILD=${VERSION_BUILD_PART})
-target_compile_definitions(onnxruntime PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
-target_compile_definitions(onnxruntime PRIVATE VER_STRING=\"${VERSION_STRING}\")
+
 target_compile_definitions(onnxruntime PRIVATE FILE_NAME=\"onnxruntime.dll\")
 
 if(UNIX)
@@ -130,7 +125,6 @@ if (NOT WIN32)
     set(ONNXRUNTIME_SO_LINK_FLAG " -Wl,-exported_symbols_list,${SYMBOL_FILE}")
     if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
       set_target_properties(onnxruntime PROPERTIES
-        SOVERSION ${ORT_VERSION}
         MACOSX_RPATH TRUE
         INSTALL_RPATH_USE_LINK_PATH FALSE
         BUILD_WITH_INSTALL_NAME_DIR TRUE
@@ -222,13 +216,23 @@ target_link_libraries(onnxruntime PRIVATE
 )
 
 set_property(TARGET onnxruntime APPEND_STRING PROPERTY LINK_FLAGS ${ONNXRUNTIME_SO_LINK_FLAG} ${onnxruntime_DELAYLOAD_FLAGS})
-set_target_properties(onnxruntime PROPERTIES
-  PUBLIC_HEADER "${ONNXRUNTIME_PUBLIC_HEADERS}"
-  LINK_DEPENDS ${SYMBOL_FILE}
-  VERSION ${ORT_VERSION}
-  FOLDER "ONNXRuntime"
-)
 
+#See: https://cmake.org/cmake/help/latest/prop_tgt/SOVERSION.html
+if(NOT APPLE AND NOT WIN32)
+  set_target_properties(onnxruntime PROPERTIES
+    PUBLIC_HEADER "${ONNXRUNTIME_PUBLIC_HEADERS}"
+    LINK_DEPENDS ${SYMBOL_FILE}
+    VERSION ${ORT_VERSION}
+    SOVERSION 1
+    FOLDER "ONNXRuntime")
+else()
+  # Omit the SOVERSION setting in Windows/macOS/iOS/.. build
+  set_target_properties(onnxruntime PROPERTIES
+    PUBLIC_HEADER "${ONNXRUNTIME_PUBLIC_HEADERS}"
+    LINK_DEPENDS ${SYMBOL_FILE}
+    VERSION ${ORT_VERSION}
+    FOLDER "ONNXRuntime")
+endif()
 install(TARGETS onnxruntime
         EXPORT ${PROJECT_NAME}Targets
         PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime
 
@@ -236,11 +236,6 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
   set_target_properties(onnxruntime_providers_shared PROPERTIES FOLDER "ONNXRuntime")
   set_target_properties(onnxruntime_providers_shared PROPERTIES LINKER_LANGUAGE CXX)
 
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_MINOR=${VERSION_MINOR_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_BUILD=${VERSION_BUILD_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
-  target_compile_definitions(onnxruntime_providers_shared PRIVATE VER_STRING=\"${VERSION_STRING}\")
   target_compile_definitions(onnxruntime_providers_shared PRIVATE FILE_NAME=\"onnxruntime_providers_shared.dll\")
 
 
 
@@ -45,11 +45,6 @@
   target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS} $ENV{OPENCL_INCS}/../../cl_headers/)
   target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS})
 
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_MINOR=${VERSION_MINOR_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_BUILD=${VERSION_BUILD_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
-  target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_STRING=\"${VERSION_STRING}\")
   target_compile_definitions(onnxruntime_providers_openvino PRIVATE FILE_NAME=\"onnxruntime_providers_openvino.dll\")
 
   if(MSVC)
 
@@ -718,11 +718,6 @@ target_compile_definitions(winml_dll PRIVATE ONNX_ML)
 target_compile_definitions(winml_dll PRIVATE LOTUS_LOG_THRESHOLD=2)
 target_compile_definitions(winml_dll PRIVATE LOTUS_ENABLE_STDERR_LOGGING)
 target_compile_definitions(winml_dll PRIVATE PLATFORM_WINDOWS)
-target_compile_definitions(winml_dll PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
-target_compile_definitions(winml_dll PRIVATE VER_MINOR=${VERSION_MINOR_PART})
-target_compile_definitions(winml_dll PRIVATE VER_BUILD=${VERSION_BUILD_PART})
-target_compile_definitions(winml_dll PRIVATE VER_PRIVATE=${VERSION_PRIVATE_PART})
-target_compile_definitions(winml_dll PRIVATE VER_STRING=\"${VERSION_STRING}\")
 target_compile_definitions(winml_dll PRIVATE BINARY_NAME=\"${BINARY_NAME}\")
 
 if (onnxruntime_WINML_NAMESPACE_OVERRIDE STREQUAL "Windows")
 
@@ -19,6 +19,8 @@ class TensorProto;
 class SparseTensorProto;
 class TypeProto;
 class AttributeProto;
+class FunctionProto;
+class OperatorSetIdProto;
 // define types that would come from the ONNX library if we were building against it.
 #if defined(ORT_MINIMAL_BUILD)
 using OperatorSetVersion = int;
 
@@ -105,7 +105,7 @@ task sourcesJar(type: Jar) {
 
 task javadoc(type: Javadoc) {
 	source = android.sourceSets.main.java.srcDirs
-	classpath += project.files(android.getBootClasspath().join(File.pathSeparator))
+	classpath += project.files(android.getBootClasspath())
 }
 
 task javadocJar(type: Jar, dependsOn: javadoc) {
 
@@ -328,13 +328,6 @@ fn main(@builtin(local_invocation_id) localId : vec3<u32>,
     var kStart = ${splitK ? `i32(globalId.z) * ${splitedDimInner}` : '0'};
 
     var acc : array<array<${type}, colPerThread>, rowPerThread>;
-
-    // Without this initialization strange values show up in acc.
-    for (var innerRow = 0; innerRow < rowPerThread; innerRow = innerRow + 1) {
-      for (var innerCol = 0; innerCol < colPerThread; innerCol = innerCol + 1) {
-        acc[innerRow][innerCol] = 0.0;
-      }
-    }
     ${matmulSnippet}
   }
 `;
 
@@ -360,12 +360,12 @@ MlasDequantizeBlockwise(
     );
 
 /**
- * @brief Blockwise 2 bits or 4 bits quantization. After quantization, the weights and zero points
- *        are packed row-wise. In terms of the qbits type, dst and src have the same shape, and
- *        scales and zero_points have the same shape.
- *        columns must be multiple of 8 / qbits.
+ * @brief Blockwise 4 bits quantization. After quantization, the weights and zero points
+ *        are packed row-wise. If zero_points is null, quantized type is int4 with default
+ *        zero point 0, to align with DQ schema. Otherwise, quantized type is uint4.
+ *        In int4/uint4, dst have the same shape as src, and zero_points have the same shape as scales.
  * @tparam Tin
- * @tparam qbits            number of bits used for quantization, 2 or 4
+ * @tparam qbits            number of bits used for quantization, only 4 is supported
  * @param src               points to the floating point matrix, to be quantized, row major shape [rows, columns]
  * @param scales            points to the scales matrix, row major
  * @param zero_points       points to the zero_points matrix, row major
@@ -376,9 +376,10 @@ MlasDequantizeBlockwise(
  * @param columns
  * @param quant_block_size  number of elements in a quantize block
  * @param thread_pool
+ * @return the quantized type is signed.
  */
 template <typename Tin, int qbits>
-void
+bool
 MlasQDQQuantizeBlockwise(
     const Tin* src,
     Tin* scales,
@@ -395,8 +396,17 @@ MlasQDQQuantizeBlockwise(
  * @brief Transpose blockwise quantized tensors. The src tensors are row major. src weights and zero
  *        points are packed row-wise. The dst tensors are column major. dst weights and zero points
  *        are packed column-wise.
+ *        dst_weights and dst_zero_points are in uint4.
+ *        If src_weights is int4 and has src_zero_points, src_weights and src_zero_points are
+ *        converted to uint4 by adding 8.
+ *        If src_weights is int4 and no src_zero_points, src_weights is converted to uint4 by adding 8.
+ *        src_zero_points is 0 and dst_zero_points is 8.
+ *        If src_weights is uint4 and has src_zero_points, just transpose.
+ *        If src_weights is uint4 and no src_zero_points, caller must allocate dst_zero_points with
+ *        0 values. Otherwise exception is thrown.
  * @tparam Tin
- * @tparam qbits            number of bits used for quantization, 2 or 4
+ * @tparam qbits            number of bits used for quantization, only 4 is supported
+ * @tparam signed_quant     true when quantized type is signed, false when quantized type is unsigned
  * @param src_weights       points to the quantized matrix, row major, shape [rows, columns] in qbits type.
  *                          In uint8_t type, shape is [rows, columns * qbits / 8].
  * @param src_scales        points to the scales matrix, row major
@@ -410,7 +420,7 @@ MlasQDQQuantizeBlockwise(
  * @param quant_block_size  number of elements in a quantize block
  * @param thread_pool
  */
-template <typename Tin, int qbits>
+template <typename Tin, int qbits, bool signed_quant>
 void
 MlasQDQTransposeBlockwiseQuantized(
     const uint8_t* src_weights,
Original file line number	Diff line number	Diff line change
`@@ -105,7 +105,7 @@ task sourcesJar(type: Jar) {`
`105`	`105`
`106`	`106`	`task javadoc(type: Javadoc) {`
`107`	`107`	`source = android.sourceSets.main.java.srcDirs`
`108`		`- classpath += project.files(android.getBootClasspath().join(File.pathSeparator))`
	`108`	`+ classpath += project.files(android.getBootClasspath())`
`109`	`109`	`}`
`110`	`110`
`111`	`111`	`task javadocJar(type: Jar, dependsOn: javadoc) {`