From b0973a236a493f7399f21b0d6c905936dc917900 Mon Sep 17 00:00:00 2001 From: Jaswanth Gannamaneni Date: Mon, 2 Feb 2026 23:00:10 -0800 Subject: [PATCH 1/4] [OpenVINO EP] Add workload type unit tests (CVS-166020) --- .../openvino_ep_workload_type_test.cc | 269 ++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc diff --git a/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc new file mode 100644 index 0000000000000..add2dd447ebb0 --- /dev/null +++ b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc @@ -0,0 +1,269 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + + + +#include +#include +#include "core/framework/provider_options.h" +#include "core/framework/tensor_shape.h" +#include "test/util/include/test_utils.h" +#include "test/util/include/test/test_environment.h" +#include "test/util/include/default_providers.h" +#include "core/session/onnxruntime_cxx_api.h" +#include "core/session/onnxruntime_session_options_config_keys.h" +#include "core/session/inference_session.h" +#include "gtest/gtest.h" +#include "gmock/gmock.h" + +using namespace ONNX_NAMESPACE; +using namespace onnxruntime::logging; + +extern std::unique_ptr ort_env; + +class OVEPWorkloadTypeTests : public ::testing::Test { +protected: + // Helper function to check if NPU is available + static bool IsNPUAvailable() { + try { + Ort::SessionOptions test_options; + std::unordered_map ov_options; + ov_options["device_type"] = "NPU"; + test_options.AppendExecutionProvider_OpenVINO_V2(ov_options); + return true; + } catch (...) { + return false; + } + } +}; + +namespace onnxruntime { +namespace test { + +// Test: SetEpDynamicOptions with workload_type transitions should not error +// baseline -> Efficient -> Default. +TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeDynamicSwitch) { + // Skip test if NPU is not available + if (!IsNPUAvailable()) { + GTEST_SKIP() << "NPU device not available, skipping workload type test"; + } + + Ort::SessionOptions session_options; + std::unordered_map ov_options; + ov_options["device_type"] = "NPU"; + + const std::unordered_map domain_to_version = {{"", 13}}; + auto& logging_manager = DefaultLoggingManager(); + logging_manager.SetDefaultLoggerSeverity(logging::Severity::kERROR); + + onnxruntime::Model model("WorkloadType_Test_Model", false, ModelMetaData(), + PathString(), IOnnxRuntimeOpSchemaRegistryList(), + domain_to_version, {}, + logging_manager.DefaultLogger()); + + auto& graph = model.MainGraph(); + + // Input: X [1, 3, 2, 2] float + ONNX_NAMESPACE::TypeProto float_tensor; + float_tensor.mutable_tensor_type()->set_elem_type( + ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + auto* shape = float_tensor.mutable_tensor_type()->mutable_shape(); + shape->add_dim()->set_dim_value(1); + shape->add_dim()->set_dim_value(3); + shape->add_dim()->set_dim_value(2); + shape->add_dim()->set_dim_value(2); + + auto& input_arg = graph.GetOrCreateNodeArg("X", &float_tensor); + auto& output_arg = graph.GetOrCreateNodeArg("Y", &float_tensor); + + // Constant initializer: scalar 2.0 + ONNX_NAMESPACE::TensorProto multiplier; + multiplier.set_name("Multiplier"); + multiplier.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + multiplier.add_dims(1); + multiplier.add_float_data(2.0f); + graph.AddInitializedTensor(multiplier); + + auto& multiplier_arg = graph.GetOrCreateNodeArg("Multiplier", nullptr); + + graph.AddNode("mul_node", "Mul", "Multiply by 2", + {&input_arg, &multiplier_arg}, {&output_arg}); + graph.SetInputs({&input_arg}); + graph.SetOutputs({&output_arg}); + + ASSERT_STATUS_OK(graph.Resolve()); + + // Serialize the model to a string. + std::string model_data; + model.ToProto().SerializeToString(&model_data); + const auto model_data_span = AsByteSpan(model_data.data(), model_data.size()); + + session_options.SetIntraOpNumThreads(1); + session_options.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_ALL); + session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); + + Ort::Session session(*ort_env, model_data_span.data(), + model_data_span.size(), session_options); + + // Prepare input: 12 floats (shape 1x3x2x2) all set to 1.0 + Ort::AllocatorWithDefaultOptions allocator; + std::string input_name = session.GetInputNameAllocated(0, allocator).get(); + std::string output_name = session.GetOutputNameAllocated(0, allocator).get(); + const char* input_names[] = {input_name.c_str()}; + const char* output_names[] = {output_name.c_str()}; + + std::vector input_shape = {1, 3, 2, 2}; + std::vector input_values(12, 1.0f); + Ort::MemoryInfo mem_info = + Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + auto run_and_verify = [&](const std::string& phase_label) { + Ort::Value input_tensor = Ort::Value::CreateTensor( + mem_info, input_values.data(), input_values.size(), + input_shape.data(), input_shape.size()); + auto outputs = session.Run(Ort::RunOptions{nullptr}, input_names, + &input_tensor, 1, output_names, 1); + ASSERT_EQ(outputs.size(), 1u) << phase_label; + const float* output_data = outputs[0].GetTensorData(); + size_t num_elements = + outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); + ASSERT_EQ(num_elements, 12u) << phase_label; + for (size_t i = 0; i < num_elements; ++i) { + EXPECT_NEAR(output_data[i], 2.0f, 1e-5f) << phase_label << " index " << i; + } + }; + + const char* const keys[] = {"ep.dynamic.workload_type"}; + + // Phase 1: Baseline (no workload type set) + run_and_verify("Baseline"); + + // Phase 2: Efficient + const char* const eff_val[] = {"Efficient"}; + session.SetEpDynamicOptions(keys, eff_val, 1); + run_and_verify("Efficient"); + + // Phase 3: Default + const char* const def_val[] = {"Default"}; + session.SetEpDynamicOptions(keys, def_val, 1); + run_and_verify("Default"); +} + +// Test: Multiple inferences per workload mode +// This validates sustained correctness under each workload type and ensures +// no degradation or resource leaks across multiple inferences. +TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeMultipleInferencesPerMode) { + // Skip test if NPU is not available + if (!IsNPUAvailable()) { + GTEST_SKIP() << "NPU device not available, skipping workload type test"; + } + + Ort::SessionOptions session_options; + std::unordered_map ov_options; + ov_options["device_type"] = "NPU"; + + const std::unordered_map domain_to_version = {{"", 13}}; + auto& logging_manager = DefaultLoggingManager(); + logging_manager.SetDefaultLoggerSeverity(logging::Severity::kERROR); + + onnxruntime::Model model("WorkloadType_MultiRun_Model", false, ModelMetaData(), + PathString(), IOnnxRuntimeOpSchemaRegistryList(), + domain_to_version, {}, + logging_manager.DefaultLogger()); + + auto& graph = model.MainGraph(); + + ONNX_NAMESPACE::TypeProto float_tensor; + float_tensor.mutable_tensor_type()->set_elem_type( + ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + auto* shape = float_tensor.mutable_tensor_type()->mutable_shape(); + shape->add_dim()->set_dim_value(1); + shape->add_dim()->set_dim_value(3); + shape->add_dim()->set_dim_value(2); + shape->add_dim()->set_dim_value(2); + + auto& input_arg = graph.GetOrCreateNodeArg("X", &float_tensor); + auto& output_arg = graph.GetOrCreateNodeArg("Y", &float_tensor); + + ONNX_NAMESPACE::TensorProto multiplier; + multiplier.set_name("Multiplier"); + multiplier.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + multiplier.add_dims(1); + multiplier.add_float_data(2.0f); + graph.AddInitializedTensor(multiplier); + + auto& multiplier_arg = graph.GetOrCreateNodeArg("Multiplier", nullptr); + + graph.AddNode("mul_node", "Mul", "Multiply by 2", + {&input_arg, &multiplier_arg}, {&output_arg}); + graph.SetInputs({&input_arg}); + graph.SetOutputs({&output_arg}); + + ASSERT_STATUS_OK(graph.Resolve()); + + std::string model_data; + model.ToProto().SerializeToString(&model_data); + const auto model_data_span = AsByteSpan(model_data.data(), model_data.size()); + + session_options.SetIntraOpNumThreads(1); + session_options.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_ALL); + session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); + + Ort::Session session(*ort_env, model_data_span.data(), + model_data_span.size(), session_options); + + Ort::AllocatorWithDefaultOptions allocator; + std::string input_name = session.GetInputNameAllocated(0, allocator).get(); + std::string output_name = session.GetOutputNameAllocated(0, allocator).get(); + const char* input_names[] = {input_name.c_str()}; + const char* output_names[] = {output_name.c_str()}; + + std::vector input_shape = {1, 3, 2, 2}; + std::vector input_values(12, 1.0f); + Ort::MemoryInfo mem_info = + Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + auto run_and_verify = [&](const std::string& phase_label) { + Ort::Value input_tensor = Ort::Value::CreateTensor( + mem_info, input_values.data(), input_values.size(), + input_shape.data(), input_shape.size()); + auto outputs = session.Run(Ort::RunOptions{nullptr}, input_names, + &input_tensor, 1, output_names, 1); + ASSERT_EQ(outputs.size(), 1u) << phase_label; + const float* output_data = outputs[0].GetTensorData(); + size_t num_elements = + outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); + ASSERT_EQ(num_elements, 12u) << phase_label; + for (size_t i = 0; i < num_elements; ++i) { + EXPECT_NEAR(output_data[i], 2.0f, 1e-5f) << phase_label << " index " << i; + } + }; + + const char* const keys[] = {"ep.dynamic.workload_type"}; + const char* const eff_val[] = {"Efficient"}; + const char* const def_val[] = {"Default"}; + + constexpr int kIterationsPerMode = 10; + + // Phase 1: Baseline - 10 runs without workload type + for (int i = 0; i < kIterationsPerMode; ++i) { + run_and_verify("Baseline iter " + std::to_string(i)); + } + + // Phase 2: Efficient - 10 runs with Efficient workload type + session.SetEpDynamicOptions(keys, eff_val, 1); + for (int i = 0; i < kIterationsPerMode; ++i) { + run_and_verify("Efficient iter " + std::to_string(i)); + } + + // Phase 3: Default - 10 runs with Default workload type + session.SetEpDynamicOptions(keys, def_val, 1); + for (int i = 0; i < kIterationsPerMode; ++i) { + run_and_verify("Default iter " + std::to_string(i)); + } +} + +} // namespace test +} // namespace onnxruntime From be1d601211f7812133eafd1df92117441c3abda1 Mon Sep 17 00:00:00 2001 From: Jaswanth51 Date: Thu, 5 Feb 2026 14:57:23 +0530 Subject: [PATCH 2/4] Updated openvino_ep_workload_type_test.cc --- .../providers/openvino/openvino_ep_workload_type_test.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc index add2dd447ebb0..02abe34324dbd 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc @@ -7,7 +7,10 @@ #include #include "core/framework/provider_options.h" #include "core/framework/tensor_shape.h" +#include "core/graph/model.h" +#include "core/common/logging/logging.h" #include "test/util/include/test_utils.h" +#include "test/util/include/asserts.h" #include "test/util/include/test/test_environment.h" #include "test/util/include/default_providers.h" #include "core/session/onnxruntime_cxx_api.h" @@ -15,6 +18,8 @@ #include "core/session/inference_session.h" #include "gtest/gtest.h" #include "gmock/gmock.h" +#include "test/unittest_util/qdq_test_utils.h" + using namespace ONNX_NAMESPACE; using namespace onnxruntime::logging; From 30a15b73103faba5343a83ad197f2fe273386b21 Mon Sep 17 00:00:00 2001 From: Jaswanth51 Date: Mon, 16 Feb 2026 11:01:00 +0530 Subject: [PATCH 3/4] Add Squeezenet Model --- .../openvino_ep_workload_type_test.cc | 333 +++++++----------- 1 file changed, 133 insertions(+), 200 deletions(-) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc index 02abe34324dbd..523921f8cf50e 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc @@ -2,53 +2,129 @@ // Licensed under the MIT License. - -#include +#include +#include #include -#include "core/framework/provider_options.h" -#include "core/framework/tensor_shape.h" -#include "core/graph/model.h" -#include "core/common/logging/logging.h" -#include "test/util/include/test_utils.h" -#include "test/util/include/asserts.h" -#include "test/util/include/test/test_environment.h" -#include "test/util/include/default_providers.h" +#include +#include +#include + #include "core/session/onnxruntime_cxx_api.h" -#include "core/session/onnxruntime_session_options_config_keys.h" -#include "core/session/inference_session.h" #include "gtest/gtest.h" -#include "gmock/gmock.h" -#include "test/unittest_util/qdq_test_utils.h" +extern std::unique_ptr ort_env; -using namespace ONNX_NAMESPACE; -using namespace onnxruntime::logging; +constexpr const ORTCHAR_T* kSqueezeNetModelUri = + ORT_TSTR("testdata/squeezenet/model.onnx"); -extern std::unique_ptr ort_env; class OVEPWorkloadTypeTests : public ::testing::Test { -protected: - // Helper function to check if NPU is available + protected: + // Check whether the NPU device can be registered at all. static bool IsNPUAvailable() { try { - Ort::SessionOptions test_options; - std::unordered_map ov_options; - ov_options["device_type"] = "NPU"; - test_options.AppendExecutionProvider_OpenVINO_V2(ov_options); + Ort::SessionOptions opts; + std::unordered_map ov; + ov["device_type"] = "NPU"; + opts.AppendExecutionProvider_OpenVINO_V2(ov); return true; } catch (...) { return false; } } + + // Allow NPU resources to be fully released between tests. + // Without this delay the NPU driver may fail to re-initialise + void TearDown() override { + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + } + + static Ort::Session CreateSqueezeNetSession( + Ort::SessionOptions& session_options, + std::unordered_map& ov_options) { + session_options.SetIntraOpNumThreads(1); + session_options.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_ALL); + session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); + + return Ort::Session(*ort_env, kSqueezeNetModelUri, session_options); + } + + // Run a single inference on the SqueezeNet session and return output + static std::vector RunSqueezeNet(Ort::Session& session, + const std::string& phase_label) { + Ort::AllocatorWithDefaultOptions allocator; + std::string input_name = + session.GetInputNameAllocated(0, allocator).get(); + std::string output_name = + session.GetOutputNameAllocated(0, allocator).get(); + const char* input_names[] = {input_name.c_str()}; + const char* output_names[] = {output_name.c_str()}; + + // SqueezeNet input: 1 × 3 × 224 × 224 = 150 528 floats + std::vector input_shape = {1, 3, 224, 224}; + constexpr size_t kInputSize = 1 * 3 * 224 * 224; + std::vector input_values(kInputSize, 1.0f); + + Ort::MemoryInfo mem_info = + Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + Ort::Value input_tensor = Ort::Value::CreateTensor( + mem_info, input_values.data(), input_values.size(), + input_shape.data(), input_shape.size()); + + auto outputs = session.Run(Ort::RunOptions{nullptr}, input_names, + &input_tensor, 1, output_names, 1); + + EXPECT_EQ(outputs.size(), 1u) << phase_label; + if (outputs.empty()) return {}; + + auto type_shape = outputs[0].GetTensorTypeAndShapeInfo(); + std::vector out_shape = type_shape.GetShape(); + + // Expected: [1, 1000, 1, 1] + EXPECT_EQ(out_shape.size(), 4u) << phase_label; + if (out_shape.size() == 4u) { + EXPECT_EQ(out_shape[0], 1) << phase_label; + EXPECT_EQ(out_shape[1], 1000) << phase_label; + EXPECT_EQ(out_shape[2], 1) << phase_label; + EXPECT_EQ(out_shape[3], 1) << phase_label; + } + + size_t num_elements = type_shape.GetElementCount(); + EXPECT_EQ(num_elements, 1000u) << phase_label; + + const float* out_data = outputs[0].GetTensorData(); + std::vector result(out_data, out_data + num_elements); + + for (size_t i = 0; i < num_elements; ++i) { + EXPECT_TRUE(std::isfinite(result[i])) + << phase_label << " index " << i << " is not finite"; + } + + return result; + } + + // Compare two output vectors element-wise within a tolerance. + static void CompareOutputs(const std::vector& expected, + const std::vector& actual, + const std::string& label, + float tolerance = 1e-4f) { + ASSERT_EQ(expected.size(), actual.size()) << label << " size mismatch"; + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_NEAR(expected[i], actual[i], tolerance) + << label << " mismatch at index " << i; + } + } }; namespace onnxruntime { namespace test { -// Test: SetEpDynamicOptions with workload_type transitions should not error -// baseline -> Efficient -> Default. + +// Test 1: Dynamic workload-type switching with consistency check +// Baseline (no workload type) → Efficient → Default TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeDynamicSwitch) { - // Skip test if NPU is not available if (!IsNPUAvailable()) { GTEST_SKIP() << "NPU device not available, skipping workload type test"; } @@ -57,109 +133,34 @@ TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeDynamicSwitch) { std::unordered_map ov_options; ov_options["device_type"] = "NPU"; - const std::unordered_map domain_to_version = {{"", 13}}; - auto& logging_manager = DefaultLoggingManager(); - logging_manager.SetDefaultLoggerSeverity(logging::Severity::kERROR); - - onnxruntime::Model model("WorkloadType_Test_Model", false, ModelMetaData(), - PathString(), IOnnxRuntimeOpSchemaRegistryList(), - domain_to_version, {}, - logging_manager.DefaultLogger()); - - auto& graph = model.MainGraph(); - - // Input: X [1, 3, 2, 2] float - ONNX_NAMESPACE::TypeProto float_tensor; - float_tensor.mutable_tensor_type()->set_elem_type( - ONNX_NAMESPACE::TensorProto_DataType_FLOAT); - auto* shape = float_tensor.mutable_tensor_type()->mutable_shape(); - shape->add_dim()->set_dim_value(1); - shape->add_dim()->set_dim_value(3); - shape->add_dim()->set_dim_value(2); - shape->add_dim()->set_dim_value(2); - - auto& input_arg = graph.GetOrCreateNodeArg("X", &float_tensor); - auto& output_arg = graph.GetOrCreateNodeArg("Y", &float_tensor); - - // Constant initializer: scalar 2.0 - ONNX_NAMESPACE::TensorProto multiplier; - multiplier.set_name("Multiplier"); - multiplier.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); - multiplier.add_dims(1); - multiplier.add_float_data(2.0f); - graph.AddInitializedTensor(multiplier); - - auto& multiplier_arg = graph.GetOrCreateNodeArg("Multiplier", nullptr); - - graph.AddNode("mul_node", "Mul", "Multiply by 2", - {&input_arg, &multiplier_arg}, {&output_arg}); - graph.SetInputs({&input_arg}); - graph.SetOutputs({&output_arg}); - - ASSERT_STATUS_OK(graph.Resolve()); - - // Serialize the model to a string. - std::string model_data; - model.ToProto().SerializeToString(&model_data); - const auto model_data_span = AsByteSpan(model_data.data(), model_data.size()); - - session_options.SetIntraOpNumThreads(1); - session_options.SetGraphOptimizationLevel( - GraphOptimizationLevel::ORT_ENABLE_ALL); - session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); - - Ort::Session session(*ort_env, model_data_span.data(), - model_data_span.size(), session_options); - - // Prepare input: 12 floats (shape 1x3x2x2) all set to 1.0 - Ort::AllocatorWithDefaultOptions allocator; - std::string input_name = session.GetInputNameAllocated(0, allocator).get(); - std::string output_name = session.GetOutputNameAllocated(0, allocator).get(); - const char* input_names[] = {input_name.c_str()}; - const char* output_names[] = {output_name.c_str()}; - - std::vector input_shape = {1, 3, 2, 2}; - std::vector input_values(12, 1.0f); - Ort::MemoryInfo mem_info = - Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - - auto run_and_verify = [&](const std::string& phase_label) { - Ort::Value input_tensor = Ort::Value::CreateTensor( - mem_info, input_values.data(), input_values.size(), - input_shape.data(), input_shape.size()); - auto outputs = session.Run(Ort::RunOptions{nullptr}, input_names, - &input_tensor, 1, output_names, 1); - ASSERT_EQ(outputs.size(), 1u) << phase_label; - const float* output_data = outputs[0].GetTensorData(); - size_t num_elements = - outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); - ASSERT_EQ(num_elements, 12u) << phase_label; - for (size_t i = 0; i < num_elements; ++i) { - EXPECT_NEAR(output_data[i], 2.0f, 1e-5f) << phase_label << " index " << i; - } - }; + Ort::Session session = CreateSqueezeNetSession(session_options, ov_options); const char* const keys[] = {"ep.dynamic.workload_type"}; // Phase 1: Baseline (no workload type set) - run_and_verify("Baseline"); + auto baseline_output = RunSqueezeNet(session, "Baseline"); - // Phase 2: Efficient + // Phase 2: Switch to Efficient const char* const eff_val[] = {"Efficient"}; session.SetEpDynamicOptions(keys, eff_val, 1); - run_and_verify("Efficient"); + auto efficient_output = RunSqueezeNet(session, "Efficient"); - // Phase 3: Default + // Phase 3: Switch to Default const char* const def_val[] = {"Default"}; session.SetEpDynamicOptions(keys, def_val, 1); - run_and_verify("Default"); + auto default_output = RunSqueezeNet(session, "Default"); + + // All modes should produce the same results + CompareOutputs(baseline_output, efficient_output, + "Baseline vs Efficient"); + CompareOutputs(baseline_output, default_output, + "Baseline vs Default"); } -// Test: Multiple inferences per workload mode -// This validates sustained correctness under each workload type and ensures -// no degradation or resource leaks across multiple inferences. +// Test 2: Multiple inferences per workload mode +// Runs 10 inferences in each mode: +// Baseline × 10 → Efficient × 10 → Default × 10 TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeMultipleInferencesPerMode) { - // Skip test if NPU is not available if (!IsNPUAvailable()) { GTEST_SKIP() << "NPU device not available, skipping workload type test"; } @@ -168,83 +169,7 @@ TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeMultipleInferencesPerMode) { std::unordered_map ov_options; ov_options["device_type"] = "NPU"; - const std::unordered_map domain_to_version = {{"", 13}}; - auto& logging_manager = DefaultLoggingManager(); - logging_manager.SetDefaultLoggerSeverity(logging::Severity::kERROR); - - onnxruntime::Model model("WorkloadType_MultiRun_Model", false, ModelMetaData(), - PathString(), IOnnxRuntimeOpSchemaRegistryList(), - domain_to_version, {}, - logging_manager.DefaultLogger()); - - auto& graph = model.MainGraph(); - - ONNX_NAMESPACE::TypeProto float_tensor; - float_tensor.mutable_tensor_type()->set_elem_type( - ONNX_NAMESPACE::TensorProto_DataType_FLOAT); - auto* shape = float_tensor.mutable_tensor_type()->mutable_shape(); - shape->add_dim()->set_dim_value(1); - shape->add_dim()->set_dim_value(3); - shape->add_dim()->set_dim_value(2); - shape->add_dim()->set_dim_value(2); - - auto& input_arg = graph.GetOrCreateNodeArg("X", &float_tensor); - auto& output_arg = graph.GetOrCreateNodeArg("Y", &float_tensor); - - ONNX_NAMESPACE::TensorProto multiplier; - multiplier.set_name("Multiplier"); - multiplier.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); - multiplier.add_dims(1); - multiplier.add_float_data(2.0f); - graph.AddInitializedTensor(multiplier); - - auto& multiplier_arg = graph.GetOrCreateNodeArg("Multiplier", nullptr); - - graph.AddNode("mul_node", "Mul", "Multiply by 2", - {&input_arg, &multiplier_arg}, {&output_arg}); - graph.SetInputs({&input_arg}); - graph.SetOutputs({&output_arg}); - - ASSERT_STATUS_OK(graph.Resolve()); - - std::string model_data; - model.ToProto().SerializeToString(&model_data); - const auto model_data_span = AsByteSpan(model_data.data(), model_data.size()); - - session_options.SetIntraOpNumThreads(1); - session_options.SetGraphOptimizationLevel( - GraphOptimizationLevel::ORT_ENABLE_ALL); - session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); - - Ort::Session session(*ort_env, model_data_span.data(), - model_data_span.size(), session_options); - - Ort::AllocatorWithDefaultOptions allocator; - std::string input_name = session.GetInputNameAllocated(0, allocator).get(); - std::string output_name = session.GetOutputNameAllocated(0, allocator).get(); - const char* input_names[] = {input_name.c_str()}; - const char* output_names[] = {output_name.c_str()}; - - std::vector input_shape = {1, 3, 2, 2}; - std::vector input_values(12, 1.0f); - Ort::MemoryInfo mem_info = - Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - - auto run_and_verify = [&](const std::string& phase_label) { - Ort::Value input_tensor = Ort::Value::CreateTensor( - mem_info, input_values.data(), input_values.size(), - input_shape.data(), input_shape.size()); - auto outputs = session.Run(Ort::RunOptions{nullptr}, input_names, - &input_tensor, 1, output_names, 1); - ASSERT_EQ(outputs.size(), 1u) << phase_label; - const float* output_data = outputs[0].GetTensorData(); - size_t num_elements = - outputs[0].GetTensorTypeAndShapeInfo().GetElementCount(); - ASSERT_EQ(num_elements, 12u) << phase_label; - for (size_t i = 0; i < num_elements; ++i) { - EXPECT_NEAR(output_data[i], 2.0f, 1e-5f) << phase_label << " index " << i; - } - }; + Ort::Session session = CreateSqueezeNetSession(session_options, ov_options); const char* const keys[] = {"ep.dynamic.workload_type"}; const char* const eff_val[] = {"Efficient"}; @@ -252,21 +177,29 @@ TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeMultipleInferencesPerMode) { constexpr int kIterationsPerMode = 10; - // Phase 1: Baseline - 10 runs without workload type - for (int i = 0; i < kIterationsPerMode; ++i) { - run_and_verify("Baseline iter " + std::to_string(i)); + // Phase 1: Baseline – 10 runs without workload type + // Save the first run as the reference output. + auto reference_output = RunSqueezeNet(session, "Baseline iter 0"); + for (int i = 1; i < kIterationsPerMode; ++i) { + auto output = RunSqueezeNet(session, "Baseline iter " + std::to_string(i)); + CompareOutputs(reference_output, output, + "Baseline iter " + std::to_string(i) + " vs reference"); } - // Phase 2: Efficient - 10 runs with Efficient workload type + // Phase 2: Efficient – 10 runs session.SetEpDynamicOptions(keys, eff_val, 1); for (int i = 0; i < kIterationsPerMode; ++i) { - run_and_verify("Efficient iter " + std::to_string(i)); + auto output = RunSqueezeNet(session, "Efficient iter " + std::to_string(i)); + CompareOutputs(reference_output, output, + "Efficient iter " + std::to_string(i) + " vs reference"); } - // Phase 3: Default - 10 runs with Default workload type + // Phase 3: Default – 10 runs session.SetEpDynamicOptions(keys, def_val, 1); for (int i = 0; i < kIterationsPerMode; ++i) { - run_and_verify("Default iter " + std::to_string(i)); + auto output = RunSqueezeNet(session, "Default iter " + std::to_string(i)); + CompareOutputs(reference_output, output, + "Default iter " + std::to_string(i) + " vs reference"); } } From 8dea5c0e09066377dbb9cec21c84740f2df41394 Mon Sep 17 00:00:00 2001 From: Jaswanth51 Date: Mon, 16 Feb 2026 11:05:33 +0530 Subject: [PATCH 4/4] Fix Lint issues --- .../test/providers/openvino/openvino_ep_workload_type_test.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc index 523921f8cf50e..5b9e4539f02a9 100644 --- a/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc +++ b/onnxruntime/test/providers/openvino/openvino_ep_workload_type_test.cc @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. - #include #include #include @@ -17,7 +16,6 @@ extern std::unique_ptr ort_env; constexpr const ORTCHAR_T* kSqueezeNetModelUri = ORT_TSTR("testdata/squeezenet/model.onnx"); - class OVEPWorkloadTypeTests : public ::testing::Test { protected: // Check whether the NPU device can be registered at all. @@ -121,7 +119,6 @@ class OVEPWorkloadTypeTests : public ::testing::Test { namespace onnxruntime { namespace test { - // Test 1: Dynamic workload-type switching with consistency check // Baseline (no workload type) → Efficient → Default TEST_F(OVEPWorkloadTypeTests, OVEPWorkloadTypeDynamicSwitch) {