Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds conditions to keep QDQ #565

Open
wants to merge 3 commits into
base: ovep-develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/internal_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name : Internal CI

on:
pull_request:
branches:
- '**' # Triggers on a PR to any Branch

jobs:
build:

runs-on: [self-hosted, Linux, X64] # Runs on a Lunar lake
env:
BUILD_SOURCESDIRECTORY: ${{ github.workspace }}
BUILD_BINARIESDIRECTORY: ${{ github.workspace }}/build
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }} # checkout the pr branch

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Create build directory
run: |
mkdir -p ${{ env.BUILD_BINARIESDIRECTORY }}
chmod -R 777 ${{ env.BUILD_BINARIESDIRECTORY }}

- name: Running Internal CI # Trigger Internal CI on the pr branch
run: |
cd tools/ci_build/github/linux/
dir
./run_dockerbuild.sh -o ubuntu22.04 -p 3.10 -d openvino -v 2024.5.0 -x "--config Release --use_openvino CPU --build_wheel --build_shared_lib --parallel "
7 changes: 7 additions & 0 deletions onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,10 @@ std::vector<SupportedOp> supported_op_mode = {
{"Atan", V_2020_4, {"CPU", "GPU"}},
{"Atanh", V_2020_4, {"CPU"}},
{"Atanh", V_2022_1, {"GPU"}},
{"Attention", V_2023_0, {"CPU", "GPU"}},
{"AveragePool", V_2020_4, {"CPU", "GPU"}},
{"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
{"BiasGelu", V_2023_0, {"CPU", "GPU"}},
{"BitShift", V_2022_1, {"CPU"}},
{"Cast", V_2020_4, {"CPU", "GPU"}},
{"CastLike", V_2023_1, {"CPU", "GPU"}},
Expand All @@ -123,6 +125,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"Dropout", V_2020_4, {"CPU", "GPU"}},
{"Elu", V_2020_4, {"CPU", "GPU"}},
{"Einsum", V_2023_1, {"CPU", "GPU"}},
{"EmbedLayerNormalization", V_2024_5, {"CPU", "GPU"}},
{"EPContext", V_2024_0, {"CPU", "GPU", "NPU"}},
{"Equal", V_2020_4, {"CPU", "GPU"}},
{"Erf", V_2020_4, {"CPU", "GPU"}},
Expand All @@ -131,6 +134,8 @@ std::vector<SupportedOp> supported_op_mode = {
{"EyeLike", V_2022_1, {"CPU"}},
{"Flatten", V_2020_4, {"CPU", "GPU"}},
{"Floor", V_2020_4, {"CPU", "GPU"}},
{"FusedConv", V_2023_0, {"CPU", "GPU"}},
{"FusedGemm", V_2023_0, {"CPU", "GPU"}},
{"Gather", V_2020_4, {"CPU", "GPU"}},
{"GatherElements", V_2022_2, {"CPU", "GPU"}},
{"GatherND", V_2021_4, {"CPU", "GPU"}},
Expand Down Expand Up @@ -164,6 +169,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"LSTM", V_2020_4, {"CPU", "GPU"}},
{"MatMul", V_2020_4, {"CPU", "GPU"}},
{"MatMulInteger", V_2022_1, {"CPU"}},
{"MatMulNBits", V_2024_5, {"CPU", "GPU"}},
{"Max", V_2020_4, {"CPU", "GPU"}},
{"MaxPool", V_2020_4, {"CPU", "GPU"}},
{"Mean", V_2020_4, {"CPU", "GPU"}},
Expand Down Expand Up @@ -222,6 +228,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"Sin", V_2022_1, {"CPU", "GPU"}},
{"Sinh", V_2020_4, {"CPU"}},
{"Size", V_2022_1, {"CPU", "GPU"}},
{"SkipLayerNormalization", V_2024_5, {"CPU", "GPU"}},
{"Slice", V_2020_4, {"CPU", "GPU"}},
{"Softmax", V_2020_4, {"CPU", "GPU"}},
{"Softplus", V_2022_1, {"CPU", "GPU"}},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,21 +478,32 @@ static void AddStandaloneNodeUnit(onnxruntime::Graph& dst_graph, const onnxrunti
};

if (node_unit.OpType() == "QuantizeLinear") {
const auto& node =node_unit.GetNode();
SkipReason reason;
// keep if next target is supported
if (CheckQRuleSet(node_unit, &node_unit.GetNode(), src_graph, reason))
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
// #2 If input 0 is a constant initializer, then don't keep the Q
else if (src_graph.IsConstantInitializer(node_unit.GetNode().InputDefs().at(0)->Name(), true))
return;
else if (node.GetInputEdgesCount() == 1 &&
(node.InputNodesBegin()->OpType() == "Conv" || node.InputNodesBegin()->OpType() == "Add") &&
(GetQDQDataType(&node) == DT_UINT8 || GetQDQDataType(&node) == DT_INT8))
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
else
add_identity_op(false);
} else if (node_unit.OpType() == "DequantizeLinear") {
const auto& node =node_unit.GetNode();
// keep if prev target is supported
if (node_unit.GetNode().Name().find(DuplicateDQ) != std::string::npos)
add_identity_op(true);
else if (IsConnectedQPresent(src_graph, dst_graph.Nodes(), &node_unit.GetNode(), node_unit.GetNode().InputDefs()))
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
else if (src_graph.IsConstantInitializer(node_unit.GetNode().InputDefs().at(0)->Name(), true))
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
else if (node.GetOutputEdgesCount() == 1 && node.OutputNodesBegin()->OpType() == "Conv" &&
(GetQDQDataType(&node) == DT_UINT16 || GetQDQDataType(&node) == DT_INT16))
add_identity_op(false);
else if (DQFeedsASupportedOp(&node_unit.GetNode()))
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
else
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,11 @@ static void RunTest(const embedlayernorm::OpData& data,
execution_providers.push_back(DefaultDmlExecutionProvider());
tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
} else {
#if defined(USE_OPENVINO)
tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
#else
tester.Run();
#endif
}
}
}
Expand Down
5 changes: 4 additions & 1 deletion onnxruntime/test/contrib_ops/matmul_4bits_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct TestOptions {
std::optional<float> output_rel_error{};
};

std::ostream& operator<<(std::ostream& os, const TestOptions& opts) {
[[maybe_unused]] std::ostream& operator<<(std::ostream& os, const TestOptions& opts) {
return os << "M:" << opts.M << ", N:" << opts.N << ", K:" << opts.K
<< ", block_size:" << opts.block_size
<< ", accuracy_level:" << opts.accuracy_level
Expand Down Expand Up @@ -327,6 +327,8 @@ void TestMatMulNBitsTyped() {
#endif // !defined(USE_DML) && !defined(USE_WEBGPU)
}

#if !defined(USE_OPENVINO)

TEST(MatMulNBits, Float32_Accuracy0) {
TestMatMulNBitsTyped<float, 1, 1, 16, 16, 0>();
TestMatMulNBitsTyped<float, 1, 2, 16, 16, 0>();
Expand Down Expand Up @@ -462,6 +464,7 @@ TEST(MatMulNBits, Float16_Accuracy4) {
}
#endif
#endif
#endif

#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) || defined(USE_WEBGPU)

Expand Down
Loading