intel · rayngun · Dec 18, 2024 · Feb 11, 2025 · Feb 11, 2025
diff --git a/.github/workflows/internal_ci.yml b/.github/workflows/internal_ci.yml
@@ -0,0 +1,34 @@
+name : Internal CI
+
+on:
+  pull_request:
+    branches:
+      - '**'  # Triggers on a PR to any Branch
+
+jobs:
+  build:
+
+    runs-on: [self-hosted, Linux, X64]   # Runs on a Lunar lake
+    env:
+      BUILD_SOURCESDIRECTORY: ${{ github.workspace }}
+      BUILD_BINARIESDIRECTORY: ${{ github.workspace }}/build
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.pull_request.head.ref }}  # checkout the pr branch
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+       python-version: '3.10'
+
+    - name: Create build directory
+      run: |
+         mkdir -p ${{ env.BUILD_BINARIESDIRECTORY }}
+         chmod -R 777 ${{ env.BUILD_BINARIESDIRECTORY }}
+
+    - name: Running Internal CI   # Trigger Internal CI on the pr branch
+      run: |
+        cd tools/ci_build/github/linux/
+        dir
+        ./run_dockerbuild.sh -o ubuntu22.04 -p 3.10 -d openvino -v 2024.5.0 -x "--config Release --use_openvino CPU --build_wheel --build_shared_lib --parallel "
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -95,8 +95,10 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Atan", V_2020_4, {"CPU", "GPU"}},
     {"Atanh", V_2020_4, {"CPU"}},
     {"Atanh", V_2022_1, {"GPU"}},
+    {"Attention", V_2023_0, {"CPU", "GPU"}},
     {"AveragePool", V_2020_4, {"CPU", "GPU"}},
     {"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
+    {"BiasGelu", V_2023_0, {"CPU", "GPU"}},
     {"BitShift", V_2022_1, {"CPU"}},
     {"Cast", V_2020_4, {"CPU", "GPU"}},
     {"CastLike", V_2023_1, {"CPU", "GPU"}},
@@ -123,6 +125,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Dropout", V_2020_4, {"CPU", "GPU"}},
     {"Elu", V_2020_4, {"CPU", "GPU"}},
     {"Einsum", V_2023_1, {"CPU", "GPU"}},
+    {"EmbedLayerNormalization", V_2024_5, {"CPU", "GPU"}},
     {"EPContext", V_2024_0, {"CPU", "GPU", "NPU"}},
     {"Equal", V_2020_4, {"CPU", "GPU"}},
     {"Erf", V_2020_4, {"CPU", "GPU"}},
@@ -131,6 +134,8 @@ std::vector<SupportedOp> supported_op_mode = {
     {"EyeLike", V_2022_1, {"CPU"}},
     {"Flatten", V_2020_4, {"CPU", "GPU"}},
     {"Floor", V_2020_4, {"CPU", "GPU"}},
+    {"FusedConv", V_2023_0, {"CPU", "GPU"}},
+    {"FusedGemm", V_2023_0, {"CPU", "GPU"}},
     {"Gather", V_2020_4, {"CPU", "GPU"}},
     {"GatherElements", V_2022_2, {"CPU", "GPU"}},
     {"GatherND", V_2021_4, {"CPU", "GPU"}},
@@ -164,6 +169,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"LSTM", V_2020_4, {"CPU", "GPU"}},
     {"MatMul", V_2020_4, {"CPU", "GPU"}},
     {"MatMulInteger", V_2022_1, {"CPU"}},
+    {"MatMulNBits", V_2024_5, {"CPU", "GPU"}},
     {"Max", V_2020_4, {"CPU", "GPU"}},
     {"MaxPool", V_2020_4, {"CPU", "GPU"}},
     {"Mean", V_2020_4, {"CPU", "GPU"}},
@@ -222,6 +228,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Sin", V_2022_1, {"CPU", "GPU"}},
     {"Sinh", V_2020_4, {"CPU"}},
     {"Size", V_2022_1, {"CPU", "GPU"}},
+    {"SkipLayerNormalization", V_2024_5, {"CPU", "GPU"}},
     {"Slice", V_2020_4, {"CPU", "GPU"}},
     {"Softmax", V_2020_4, {"CPU", "GPU"}},
     {"Softplus", V_2022_1, {"CPU", "GPU"}},

diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc
@@ -478,21 +478,32 @@ static void AddStandaloneNodeUnit(onnxruntime::Graph& dst_graph, const onnxrunti
   };
 
   if (node_unit.OpType() == "QuantizeLinear") {
+    const auto& node =node_unit.GetNode();
     SkipReason reason;
     // keep if next target is supported
     if (CheckQRuleSet(node_unit, &node_unit.GetNode(), src_graph, reason))
       AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
     // #2 If input 0 is a constant initializer, then don't keep the Q
     else if (src_graph.IsConstantInitializer(node_unit.GetNode().InputDefs().at(0)->Name(), true))
       return;
+    else if (node.GetInputEdgesCount() == 1 &&
+        (node.InputNodesBegin()->OpType() == "Conv" || node.InputNodesBegin()->OpType() == "Add") &&
+        (GetQDQDataType(&node) == DT_UINT8 || GetQDQDataType(&node) == DT_INT8))
+      AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
     else
       add_identity_op(false);
   } else if (node_unit.OpType() == "DequantizeLinear") {
+    const auto& node =node_unit.GetNode();
     // keep if prev target is supported
     if (node_unit.GetNode().Name().find(DuplicateDQ) != std::string::npos)
       add_identity_op(true);
     else if (IsConnectedQPresent(src_graph, dst_graph.Nodes(), &node_unit.GetNode(), node_unit.GetNode().InputDefs()))
       AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
+    else if (src_graph.IsConstantInitializer(node_unit.GetNode().InputDefs().at(0)->Name(), true))
+      AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
+    else if (node.GetOutputEdgesCount() == 1 && node.OutputNodesBegin()->OpType() == "Conv" &&
+        (GetQDQDataType(&node) == DT_UINT16 || GetQDQDataType(&node) == DT_INT16))
+      add_identity_op(false);
     else if (DQFeedsASupportedOp(&node_unit.GetNode()))
       AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
     else

diff --git a/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc b/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc
@@ -158,7 +158,11 @@ static void RunTest(const embedlayernorm::OpData& data,
       execution_providers.push_back(DefaultDmlExecutionProvider());
       tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
     } else {
+#if defined(USE_OPENVINO)
+      tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
+#else
       tester.Run();
+#endif
     }
   }
 }

diff --git a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc
@@ -85,7 +85,7 @@ struct TestOptions {
   std::optional<float> output_rel_error{};
 };
 
-std::ostream& operator<<(std::ostream& os, const TestOptions& opts) {
+[[maybe_unused]] std::ostream& operator<<(std::ostream& os, const TestOptions& opts) {
   return os << "M:" << opts.M << ", N:" << opts.N << ", K:" << opts.K
             << ", block_size:" << opts.block_size
             << ", accuracy_level:" << opts.accuracy_level
@@ -327,6 +327,8 @@ void TestMatMulNBitsTyped() {
 #endif  // !defined(USE_DML) && !defined(USE_WEBGPU)
 }
 
+#if !defined(USE_OPENVINO)
+
 TEST(MatMulNBits, Float32_Accuracy0) {
   TestMatMulNBitsTyped<float, 1, 1, 16, 16, 0>();
   TestMatMulNBitsTyped<float, 1, 2, 16, 16, 0>();
@@ -462,6 +464,7 @@ TEST(MatMulNBits, Float16_Accuracy4) {
 }
 #endif
 #endif
+#endif
 
 #if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) || defined(USE_WEBGPU)