Fix builds

hariharans29 · hariharans29 · commit 9329b0d494c7 · 2026-01-24T14:05:58.000-08:00
diff --git a/onnxruntime/contrib_ops/cpu/cdist.cc b/onnxruntime/contrib_ops/cpu/cdist.cc
@@ -19,7 +19,8 @@ DEFINE_KERNEL(float);
 DEFINE_KERNEL(double);
 
 template <typename T>
-static void CalculateSqeuclidean(const Tensor& a, const Tensor& b, Tensor& c, concurrency::ThreadPool* threadpool) {
+static void CalculateSqeuclidean(const Tensor& a, const Tensor& b, Tensor& c, concurrency::ThreadPool* threadpool,
+                                 const MLAS_BACKEND_KERNEL_SELECTOR_CONFIG* mlas_backend_kernel_selector_config) {
   // input shapes have already been validated
   const auto& shape_a = a.Shape().GetDims();  // {m, k}
   const auto& shape_b = b.Shape().GetDims();  // {n, k}
@@ -64,7 +65,8 @@ static void CalculateSqeuclidean(const Tensor& a, const Tensor& b, Tensor& c, co
                 m, n, k,
                 static_cast<T>(-2.), a_data, b_data, static_cast<T>(0.),
                 c_data,
-                threadpool);
+                threadpool,
+                mlas_backend_kernel_selector_config);
 #else
   // the performance of this isn't great as the eigen matmul is single threaded by default
   // if you're on x86 and care about performance try MKL first. if there's a good enough argument for optimizing this
@@ -114,7 +116,7 @@ common::Status CDist<T>::Compute(OpKernelContext* context) const {
   Tensor* C = context->Output(0, output_shape);
   T* output = C->MutableData<T>();
 
-  CalculateSqeuclidean<T>(*A, *B, *C, tp);
+  CalculateSqeuclidean<T>(*A, *B, *C, tp, &mlas_backend_kernel_selector_config_);
   auto map_out = EigenVectorArrayMap<T>(output, narrow<size_t>(output_shape.Size()));
 
   // because we use GEMM in CalculateSqeuclidean there's a slight chance a number extremely close to zero
diff --git a/onnxruntime/contrib_ops/cpu/cdist.h b/onnxruntime/contrib_ops/cpu/cdist.h
@@ -5,6 +5,8 @@
 
 #include "core/common/common.h"
 #include "core/framework/op_kernel.h"
+#include "core/session/onnxruntime_session_options_config_keys.h"
+#include "core/mlas/inc/mlas.h"
 
 namespace onnxruntime {
 namespace contrib {
@@ -17,8 +19,13 @@ class CDist final : public OpKernel {
   enum class Mode { EUCLIDEAN,
                     SQEUCLIDEAN } mode_;
 
+
+  MLAS_BACKEND_KERNEL_SELECTOR_CONFIG mlas_backend_kernel_selector_config_;
+
  public:
   CDist(const OpKernelInfo& info) : OpKernel(info) {
+    mlas_backend_kernel_selector_config_.use_kleidiai =
+                              info.GetConfigOptions().GetConfigEntry(kOrtSessionOptionsMlasDisableKleidiai) != "1";
     std::string metric;
     ORT_ENFORCE(info.GetAttr<std::string>("metric", &metric).IsOK());
     if (metric.compare("sqeuclidean") == 0)