microsoft · yuhuchua-qti · Jan 16, 2026
diff --git a/onnxruntime/test/providers/qnn/einsum_op_test.cc b/onnxruntime/test/providers/qnn/einsum_op_test.cc
@@ -351,9 +351,13 @@ TEST_F(QnnHTPBackendTests, EinsumRank3MatMulTransposeY_QK) {
       /*tolerance=*/1e-2f);
 }
 
-// The value pair (65.1049271, 65.0625076) at index #51 don't match, which is -0.0424194 from 65.1049
-// Disable this Rank3 test on HTP since it has accuracy issue.
-TEST_F(QnnHTPBackendTests, DISABLED_EinsumRank3MatMul_QK) {
+// Since QAIRT 2.35, the default floating‑point precision on QNN HTP is FP16.
+// The FP32 → FP16 → FP32 conversion can introduce accuracy loss, especially when the input tensors
+// are large because more elements participate in the matrix multiplication.
+// For example, a value such as 168.665131 may become 168.750015 after
+// conversion in a MatMul operation. The expected difference is ~0.0848846,
+// so the tolerance is adjusted to 9e-2f.
+TEST_F(QnnHTPBackendTests, EinsumRank3MatMul_QK) {
   const std::vector<int64_t> shape0{4, 5, 6};
   const std::vector<int64_t> shape1{4, 6, 5};
   const std::vector<float> data0 = GetSequentialFloatData(shape0, /*start=*/-0.1f, /*step=*/0.05f);
@@ -363,7 +367,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_EinsumRank3MatMul_QK) {
       /*in0=*/TestInputDef<float>(shape0, /*is_initializer=*/false, std::move(data0)),
       /*in1=*/TestInputDef<float>(shape1, /*is_initializer=*/false, std::move(data1)),
       /*equation=*/"hQK,hKd->hQd",
-      /*tolerance=*/1e-2f);
+      /*tolerance=*/9e-2f);
 }
 
 TEST_F(QnnHTPBackendTests, EinsumF16Rank4MatMulTransposeAll1) {

diff --git a/onnxruntime/test/providers/qnn/gemm_op_test.cc b/onnxruntime/test/providers/qnn/gemm_op_test.cc
@@ -73,9 +73,8 @@ TEST_F(QnnCPUBackendTests, Gemm_2D_Bias) {
                      ExpectedEPNodeAssignment::All);  // Assigned to QNN EP.
 }
 
-// since Qnn v2.34 value pair (120.73912, 121.73912) at index #0 don't match, which is 1 from 120.739
 // Test Gemm with dynamic (i.e., not initializer) inputs (A, B, Bias).
-TEST_F(QnnCPUBackendTests, DISABLED_Gemm_Dynamic_A_B_Bias) {
+TEST_F(QnnCPUBackendTests, Gemm_Dynamic_A_B_Bias) {
   std::vector<float> input_a_data = GetFloatDataInRange(-10.0f, 10.0f, 6);
   std::vector<float> input_b_data = GetFloatDataInRange(-5.0f, 5.0f, 24);
   std::vector<float> input_c_data = GetFloatDataInRange(-1.0f, 1.0f, 4);
@@ -111,9 +110,8 @@ TEST_F(QnnCPUBackendTests, Gemm_TransAB_Static_B_And_Bias) {
                      ExpectedEPNodeAssignment::All);
 }
 
-// Since Qnn 2.34 value pair (29.4347763, 30.4347763) at index #0 don't match, which is 1 from 29.4348
 // Test Gemm with transposed A/B and dynamic (i.e., not initializer) B and Bias inputs.
-TEST_F(QnnCPUBackendTests, DISABLED_Gemm_TransAB_Dynamic_B_And_Bias) {
+TEST_F(QnnCPUBackendTests, Gemm_TransAB_Dynamic_B_And_Bias) {
   std::vector<float> input_a_data = GetFloatDataInRange(-10.0f, 10.0f, 6);
   std::vector<float> input_b_data = GetFloatDataInRange(-5.0f, 5.0f, 24);
   std::vector<float> input_c_data = GetFloatDataInRange(-1.0f, 1.0f, 4);
@@ -125,8 +123,7 @@ TEST_F(QnnCPUBackendTests, DISABLED_Gemm_TransAB_Dynamic_B_And_Bias) {
                      ExpectedEPNodeAssignment::All);
 }
 
-// Since Qnn 2.34 value pair (11, 10) at index #0 don't match, which is -1 from 11
-TEST_F(QnnCPUBackendTests, DISABLED_Gemm_Broadcast_Bias_DynamicInputs) {
+TEST_F(QnnCPUBackendTests, Gemm_Broadcast_Bias_DynamicInputs) {
   std::vector<float> input_a_data = {1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f};
   std::vector<float> input_b_data(12, 1.0f);
   std::vector<float> input_c_data = {1.0f, 2.0f, 3.0f};
@@ -142,10 +139,7 @@ TEST_F(QnnCPUBackendTests, DISABLED_Gemm_Broadcast_Bias_DynamicInputs) {
                      ExpectedEPNodeAssignment::All);
 }
 
-// TODO: When this is fixed, enable GemmOpTypedTests/0.TestGemmBroadcast test in cpu/math/gemm_test.cc
-// This began failing in QNN SDK 2.17 for the CPU backend.
-// Log: the value pair (11, 10) at index #0 don't match, which is -1 from 11
-TEST_F(QnnCPUBackendTests, DISABLED_Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
+TEST_F(QnnCPUBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
   std::vector<float> input_a_data = {1.0f, 2.0f, 3.0f, 4.0f, -1.0f, -2.0f, -3.0f, -4.0f};
   std::vector<float> input_b_data(12, 1.0f);
   std::vector<float> input_c_data = {1.0f, 2.0f, 3.0f};

diff --git a/onnxruntime/test/providers/qnn/pool_op_test.cpp b/onnxruntime/test/providers/qnn/pool_op_test.cpp
@@ -118,7 +118,7 @@ TEST_F(QnnCPUBackendTests, MaxPool_Large_Input) {
                 ExpectedEPNodeAssignment::All);
 }
 
-// Fails on QNN v2.17, QNN.graphAddNode() failed for node `MaxPool` of type `PoolMax2d` with error code 6000
+// QNN CPU doesn't support ceil rounding mode. Enable this UT when QNN CPU support this case.
 TEST_F(QnnCPUBackendTests, DISABLED_MaxPool_Ceil) {
   RunPoolOpTest("MaxPool",
                 TestInputDef<float>({1, 2, 3, 3}, false, -10.0f, 10.0f),  // Dynamic input with range [-10, 10]
@@ -132,7 +132,7 @@ TEST_F(QnnCPUBackendTests, DISABLED_MaxPool_Ceil) {
                 ExpectedEPNodeAssignment::All);
 }
 
-// Fails on QNN v2.17, QNN.graphAddNode() failed for node `MaxPool` of type `PoolMax2d` with error code 6000
+// QNN CPU doesn't support ceil rounding mode. Enable this UT when QNN CPU support this case.
 TEST_F(QnnCPUBackendTests, DISABLED_MaxPool_Large_Input2_Ceil) {
   RunPoolOpTest("MaxPool",
                 TestInputDef<float>({1, 128, 16, 113}, false, -10.0f, 10.0f),  // Dynamic input with range [-10, 10]

diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc
@@ -296,16 +296,13 @@ TEST_F(QnnCPUBackendTests, Resize_DownSample_Linear_AlignCorners_scales) {
 }
 
 // Test Resize downsample with mode: "linear", coordinate_transformation_mode: "half_pixel"
-// Fails on QNN v2.17, the value pair (2.66666651, 3.5) at index #0 don't match, which is 0.833333 from 2.66667
-// TODO: Enable ResizeOpTest.ResizeOpLinearDownSampleTest_4DBilinear cpu resize_op tests when fixed.
-//
-// Input f32[1,1,2,4]: 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
-// Expected output f32[1, 1, 1, 2]: 2.6666 4.3333
-// Actual output f32[1, 1, 1, 2]: 3.5, 5.5
-TEST_F(QnnCPUBackendTests, DISABLED_Resize_DownSample_Linear_HalfPixel_scales) {
+// Note: The QNN CPU backend does not define explicit scale attributes. It derives scale values
+// implicitly from the input and output tensor shapes. Therefore, the selected parameters must
+// ensure that the product of the input dimensions and the inferred scales evaluates to an integer.
+TEST_F(QnnCPUBackendTests, Resize_DownSample_Linear_HalfPixel_scales) {
   std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
   RunCPUResizeOpTestWithScales(TestInputDef<float>({1, 1, 2, 4}, false, input_data),
-                               {1.0f, 1.0f, 0.6f, 0.6f}, "linear", "half_pixel", "",
+                               {1.0f, 1.0f, 0.5f, 0.5f}, "linear", "half_pixel", "",
                                ExpectedEPNodeAssignment::All);
 }