Add support for 4-bit fused-rowwise-quantized SLWS to Interpreter (pytorch#3719)

jfix71 · facebook-github-bot · commit cd383427c6e1 · 2019-11-02T19:38:38.000-07:00
Summary: This PR adds support for 4-bit rowwise quantization to FRWQ-SLWS. Pull Request resolved: pytorch#3719 Test Plan: Added tests to cover this. I also added a new set of tests (see second commit) that has two columns in addition to being weighted -- previously all of our weighted tests only had a single column of data. Related to pytorch#3463 CC: jsubag Differential Revision: D18265376 Pulled By: jfix71 fbshipit-source-id: fbab62a867eb6306f9cde82abdc374de48a8d94e
diff --git a/include/glow/Base/Type.h b/include/glow/Base/Type.h
@@ -306,6 +306,15 @@ inline bool isFusedQuantizedElemKind(ElemKind e) {
          e == ElemKind::UInt4FusedFP16QTy;
 }
 
+/// \returns the scale and offset ElemKind used by the fused ElemKind \p e.
+inline ElemKind getScaleOffsetElemKindFromFused(ElemKind e) {
+  assert(isFusedQuantizedElemKind(e) && "Must pass Fused ElemKind.");
+  if (e == ElemKind::UInt8FusedQTy) {
+    return ElemKind::FloatTy;
+  }
+  return ElemKind::Float16Ty;
+}
+
 /// A class that represents a type of a tensor.
 struct Type final {
   /// Contains the dimensions (sizes) of the tensor. Ex: [sx, sy, sz, ...].
diff --git a/include/glow/Graph/Graph.h b/include/glow/Graph/Graph.h
@@ -936,42 +936,45 @@ class Function final : public Named {
   /// Creates and \returns a node of \p name, performing the SparseLengthsSum
   /// operation, using fused rowwise quantization for the input \p data wherein
   /// the scales and offsets are fused inline with each row of data. \p data
-  /// must be ElemKind::UInt8FusedQTy. Gathers slices of the outer-most
-  /// dimension of data indexed by the \p indices vector, and then accumulates
-  /// them into len(\p lengths) entries: first Lengths[0] slices are aggregated
-  /// to Result[0], next Lengths[1] slices are aggregated to Result[1], etc.
-  /// I.e. sum(Lengths) must be equal to len(Indices). \p precision represents
-  /// what precision to use for Scale, Offset, and Result. If
-  /// \p useFP16Accumulation, then internal arithmetic will use FP16
+  /// must be of a fused ElemKind. Gathers slices of the outer-most dimension of
+  /// data indexed by the \p indices vector, and then accumulates them into
+  /// len(\p lengths) entries: first Lengths[0] slices are aggregated to
+  /// Result[0], next Lengths[1] slices are aggregated to Result[1], etc.  I.e.
+  /// sum(Lengths) must be equal to len(Indices).  The precision for the Result
+  /// is determined by the \p data input's ElemKind used for Scale and
+  /// Offset. If \p useFP16Accumulation, then internal arithmetic will use FP16
   /// accumulation; otherwise defaults to FP32.
   FusedRowwiseQuantizedSparseLengthsSumNode *
-  createFusedRowwiseQuantizedSparseLengthsSum(
-      llvm::StringRef name, Constant *data, NodeValue indices,
-      NodeValue lengths, ElemKind precision = ElemKind::FloatTy,
-      bool useFP16Accumulation = false);
+  createFusedRowwiseQuantizedSparseLengthsSum(llvm::StringRef name,
+                                              Constant *data, NodeValue indices,
+                                              NodeValue lengths,
+                                              bool useFP16Accumulation = false);
 
   /// Same as \ref createFusedRowwiseQuantizedSparseLengthsSum(), but expects
   /// float input \p data, which is rowwise-quantized and fused internally.
+  /// \p fusedElemKind represents the element kind to use for the final fused
+  /// rowwise-quantized data.
   FusedRowwiseQuantizedSparseLengthsSumNode *
   createFusedRowwiseQuantizedSparseLengthsSum(
       llvm::StringRef name, Tensor &data, NodeValue indices, NodeValue lengths,
-      ElemKind precision = ElemKind::FloatTy, bool useFP16Accumulation = false);
+      ElemKind fusedElemKind = ElemKind::UInt8FusedQTy,
+      bool useFP16Accumulation = false);
 
   /// Same as \ref createFusedRowwiseQuantizedSparseLengthsSum(), but i-th slice
   /// is multiplied by weights[i]. len(weights) must be equal to len(indices).
   FusedRowwiseQuantizedSparseLengthsWeightedSumNode *
   createFusedRowwiseQuantizedSparseLengthsWeightedSum(
       llvm::StringRef name, NodeValue data, NodeValue weights,
-      NodeValue indices, NodeValue lengths,
-      ElemKind precision = ElemKind::FloatTy, bool useFP16Accumulation = false);
+      NodeValue indices, NodeValue lengths, bool useFP16Accumulation = false);
 
   /// Same as \ref createFusedRowwiseQuantizedSparseLengthsWeightedSum(), but
   /// expects float input \p data, which is rowwise-quantized and fused
-  /// internally.
+  /// internally. \p fusedElemKind represents the element kind to use for the
+  /// final fused rowwise-quantized data.
   FusedRowwiseQuantizedSparseLengthsWeightedSumNode *
   createFusedRowwiseQuantizedSparseLengthsWeightedSum(
       llvm::StringRef name, Tensor &data, NodeValue weights, NodeValue indices,
-      NodeValue lengths, ElemKind precision = ElemKind::FloatTy,
+      NodeValue lengths, ElemKind fusedElemKind = ElemKind::UInt8FusedQTy,
       bool useFP16Accumulation = false);
 
   /// Given a vector of segment lengths, calculates offsets of each segment and
diff --git a/lib/Backends/CPU/tests/CPUOperatorTest.cpp b/lib/Backends/CPU/tests/CPUOperatorTest.cpp
@@ -100,6 +100,11 @@ std::set<std::string> glow::backendTestBlacklist = {
     "back2/0",
     "FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat/0",
     "FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSparseLengthsSum_Fused4Bit_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Fused4Bit_Float16_AccumFloat16/0",
+    "SLWSTwoColumn_Float16_AccumFloat/0",
     "SparseToDenseMask1/0",
     "SparseToDenseMask2/0",
     "FP16Reshape/0",
diff --git a/lib/Backends/Habana/tests/HabanaOperatorTest.cpp b/lib/Backends/Habana/tests/HabanaOperatorTest.cpp
@@ -147,8 +147,12 @@ std::set<std::string> glow::backendTestBlacklist = {
     "FP16SoftMax/0",
     "Fp16Splat/0",
     "FP16Transpose2Dims/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Fused4Bit_Float16_AccumFloat16",
     "FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat/0",
     "FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSparseLengthsSum_Fused4Bit_Float16_AccumFloat16/0",
     "FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16/0",
     "FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16_back_to_"
     "back/0",
@@ -266,6 +270,7 @@ std::set<std::string> glow::backendTestBlacklist = {
     "sliceReshape_Float16/0",
     "sliceVectors_Float16/0",
     "sliceVectors_Int64/0",
+    "SLWSTwoColumn_Float16_AccumFloat/0",
     "SLSAllZeroLengths_Float/0",
     "SLSAllZeroLengths_Float16/0",
     "SoftMax/0",
diff --git a/lib/Backends/Interpreter/Interpreter.cpp b/lib/Backends/Interpreter/Interpreter.cpp
@@ -306,32 +306,35 @@ bool Interpreter::isOpSupported(const NodeInfo &NI) const {
                 RowwiseQuantizedSparseLengthsWeightedSumNode::LengthsIdx) ==
             ElemKind::Int32ITy);
 
-  case Kinded::Kind::FusedRowwiseQuantizedSparseLengthsWeightedSumNodeKind:
+  case Kinded::Kind::FusedRowwiseQuantizedSparseLengthsWeightedSumNodeKind: {
     if (NI.getInElemTy(
-            FusedRowwiseQuantizedSparseLengthsWeightedSumNode::DataIdx) ==
-        ElemKind::UInt8FusedFP16QTy) {
+            FusedRowwiseQuantizedSparseLengthsWeightedSumNode::IndicesIdx) !=
+            ElemKind::Int64ITy ||
+        NI.getInElemTy(
+            FusedRowwiseQuantizedSparseLengthsWeightedSumNode::LengthsIdx) !=
+            ElemKind::Int32ITy) {
+      return false;
+    }
+
+    switch (NI.getInElemTy(
+        FusedRowwiseQuantizedSparseLengthsWeightedSumNode::DataIdx)) {
+    case ElemKind::UInt4FusedFP16QTy:
+    case ElemKind::UInt8FusedFP16QTy:
       return (NI.getInElemTy(FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
                                  WeightsIdx) == ElemKind::Float16Ty) &&
-             (NI.getInElemTy(FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
-                                 IndicesIdx) == ElemKind::Int64ITy) &&
-             (NI.getInElemTy(FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
-                                 LengthsIdx) == ElemKind::Int32ITy) &&
              (NI.getOutElemTy(
                   FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
                       ResultIdx) == ElemKind::Float16Ty);
+    case ElemKind::UInt8FusedQTy:
+      return (NI.getInElemTy(FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
+                                 WeightsIdx) == ElemKind::FloatTy) &&
+             (NI.getOutElemTy(
+                  FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
+                      ResultIdx) == ElemKind::FloatTy);
+    default:
+      return false;
     }
-    return (NI.getInElemTy(
-                FusedRowwiseQuantizedSparseLengthsWeightedSumNode::DataIdx) ==
-            ElemKind::UInt8FusedQTy) &&
-           (NI.getInElemTy(FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
-                               WeightsIdx) == ElemKind::FloatTy) &&
-           (NI.getInElemTy(FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
-                               IndicesIdx) == ElemKind::Int64ITy) &&
-           (NI.getInElemTy(FusedRowwiseQuantizedSparseLengthsWeightedSumNode::
-                               LengthsIdx) == ElemKind::Int32ITy) &&
-           (NI.getOutElemTy(
-                FusedRowwiseQuantizedSparseLengthsWeightedSumNode::ResultIdx) ==
-            ElemKind::FloatTy);
+  }
 
   case Kinded::Kind::LengthsRangeFillNodeKind:
   case Kinded::Kind::LengthsToRangesNodeKind:
diff --git a/lib/Backends/Interpreter/InterpreterNodes.cpp b/lib/Backends/Interpreter/InterpreterNodes.cpp
@@ -3353,7 +3353,9 @@ void BoundInterpreterFunction::
   assert(totalLength <= indices->dims()[0] &&
          "sum(Lengths) must be equal to len(Indices)");
 
-  const size_t inLineSize = data->size() / data->dims()[0];
+  const bool using4BitQuantization =
+      data->getType().getElementType() == ElemKind::UInt4FusedFP16QTy;
+
   const size_t outLineSize = out->size() / out->dims()[0];
 
   auto DH = data->getHandle<uint8_t>();
@@ -3366,13 +3368,20 @@ void BoundInterpreterFunction::
     for (size_t j = 0, e = LH.raw(i); j < e; j++) {
       const float weight = static_cast<float>(WH.raw(curIdx));
       const size_t rowIdx = IH.raw(curIdx++);
-      size_t offsetIn = rowIdx * inLineSize;
       T scale, offset;
       std::tie(scale, offset) = DH.getFusedScaleOffsetFromRow<T>(rowIdx);
       for (size_t k = 0; k < outLineSize; k++) {
-        float d = quantization::dequantizeWithFloatOffset(
-            DH.raw(offsetIn++), static_cast<float>(scale),
-            static_cast<float>(offset));
+        float d = 0.0f;
+        if (!using4BitQuantization) {
+          d = quantization::dequantizeWithFloatOffset(
+              DH.at({rowIdx, k}), static_cast<float>(scale),
+              static_cast<float>(offset));
+        } else {
+          const bool isMSB = (k % 2 == 1);
+          d = quantization::dequantize4BitWithFloatOffset(
+              DH.at({rowIdx, k / 2}), static_cast<float>(scale),
+              static_cast<float>(offset), isMSB);
+        }
         accum[k] += d * weight;
       }
     }
diff --git a/lib/Backends/NNPI/tests/NNPIOperatorTest.cpp b/lib/Backends/NNPI/tests/NNPIOperatorTest.cpp
@@ -47,6 +47,8 @@ std::set<std::string> glow::backendTestBlacklist = {
     "FullyConnected_Int16_BiasInt16/0",
     "FullyConnected_Int16_BiasInt32/0",
     "FullyConnected_Int8_BiasInt8/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Fused4Bit_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSparseLengthsSum_Fused4Bit_Float16_AccumFloat16/0",
     "GroupConv3D/0",
     "GroupwiseQuantizedConvolution/0",
     "insertTensorTest/0",
diff --git a/lib/Backends/OpenCL/tests/OpenCLOperatorTest.cpp b/lib/Backends/OpenCL/tests/OpenCLOperatorTest.cpp
@@ -181,6 +181,12 @@ std::set<std::string> glow::backendTestBlacklist = {
     "FusedRowwiseQuantizedSparseLengthsSum_Float/0",
     "FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat/0",
     "FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSparseLengthsSum_Fused4Bit_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Float/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat16/0",
+    "FusedRowwiseQuantizedSLWSTwoColumn_Fused4Bit_Float16_AccumFloat16/0",
+    "SLWSTwoColumn_Float16_AccumFloat/0",
     "SLSWithZeroLengths/0",
     "SparseToDense/0",
     "SparseToDenseMask1/0",
diff --git a/lib/Graph/Graph.cpp b/lib/Graph/Graph.cpp
@@ -1680,38 +1680,48 @@ Function::createRowwiseQuantizedSparseLengthsSum(
 /// Helper used to get specific output type required for
 /// createRowwiseQuantizedSparseLengthsSum and
 /// createRowwiseQuantizedSparseLengthsWeightedSum.
-/// Function \p F is used to get the speficific type, using inputs \p inDims and
-/// \p lenghtsDims to compute output dimensions.
-static TypeRef getOutputTypeOfFusedRowwiseQuantizedSLS(
-    Function *F, const llvm::ArrayRef<size_t> &inDims,
-    const llvm::ArrayRef<size_t> &lengthsDims, ElemKind scaleOffsetKind) {
-  ShapeVector outDims(inDims.begin(), inDims.end());
+/// Function \p F is used to get the specific type, using inputs \p data and
+/// \p lengthsDims to compute output dimensions.
+static TypeRef
+getOutputTypeOfFusedRowwiseQuantizedSLS(Function *F, NodeValue data,
+                                        llvm::ArrayRef<size_t> lengthsDims) {
+  ShapeVector outDims(data.dims().begin(), data.dims().end());
   outDims[0] = lengthsDims[0];
   // The output column count is the same as the input column count, but
   // without the extra bytes for the fused scale/offset, as the output is not
   // fused.
-  outDims[1] -=
-      2 * ((scaleOffsetKind == ElemKind::FloatTy) ? sizeof(float)
-                                                  : sizeof(float16_t));
-  return F->getParent()->uniqueType(scaleOffsetKind, outDims);
+  CHECK(isFusedQuantizedElemKind(data.getElementType()))
+      << "Must use a fused ElemKind for data.";
+  outDims[1] -= 2 * ((data.getElementType() == ElemKind::UInt8FusedQTy)
+                         ? sizeof(float)
+                         : sizeof(float16_t));
+  // If using 4-bit quantization, then the input data has packed two 4-bit
+  // elements into one byte, so we need to double the outDims.
+  if (data.getElementType() == ElemKind::UInt4FusedFP16QTy) {
+    outDims[1] *= 2;
+  }
+  const ElemKind outputK = (data.getElementType() == ElemKind::UInt8FusedQTy)
+                               ? ElemKind::FloatTy
+                               : ElemKind::Float16Ty;
+  return F->getParent()->uniqueType(outputK, outDims);
 }
 
 FusedRowwiseQuantizedSparseLengthsWeightedSumNode *
 Function::createFusedRowwiseQuantizedSparseLengthsWeightedSum(
     llvm::StringRef name, NodeValue data, NodeValue weights, NodeValue indices,
-    NodeValue lengths, ElemKind precision, bool useFP16Accumulation) {
-  auto outTy = getOutputTypeOfFusedRowwiseQuantizedSLS(
-      this, data.dims(), lengths.dims(), precision);
+    NodeValue lengths, bool useFP16Accumulation) {
+  auto outTy =
+      getOutputTypeOfFusedRowwiseQuantizedSLS(this, data, lengths.dims());
   return addNode(new FusedRowwiseQuantizedSparseLengthsWeightedSumNode(
       name, outTy, data, weights, indices, lengths, useFP16Accumulation));
 }
 
 FusedRowwiseQuantizedSparseLengthsSumNode *
 Function::createFusedRowwiseQuantizedSparseLengthsSum(
     llvm::StringRef name, Constant *data, NodeValue indices, NodeValue lengths,
-    ElemKind precision, bool useFP16Accumulation) {
-  auto outTy = getOutputTypeOfFusedRowwiseQuantizedSLS(
-      this, data->dims(), lengths.dims(), precision);
+    bool useFP16Accumulation) {
+  auto outTy =
+      getOutputTypeOfFusedRowwiseQuantizedSLS(this, data, lengths.dims());
   return addNode(new FusedRowwiseQuantizedSparseLengthsSumNode(
       name, outTy, data, indices, lengths, useFP16Accumulation));
 }
@@ -1734,18 +1744,30 @@ static Constant *quantizeDataForFusedRowwiseQuantizedSparseLengthsWeightedSum(
   // dimension to include space for the scale/offset, each 4 bytes
   // (float/int32_t).
   switch (precision) {
-  case ElemKind::FloatTy: {
+  case ElemKind::UInt8FusedQTy: {
     Constant *rwqData = F->getParent()->createConstant(
-        ElemKind::UInt8FusedQTy,
-        {fDims.first, fDims.second + 2 * sizeof(float)}, 0.0, 0, "data");
+        precision, {fDims.first, fDims.second + 2 * sizeof(float)}, 0.0, 0,
+        "data");
     quantization::tensorFusedRowwiseQuantization<float>(
         fData, rwqData->getPayloadMutable());
     return rwqData;
   }
-  case ElemKind::Float16Ty: {
+  case ElemKind::UInt8FusedFP16QTy: {
     Constant *rwqData = F->getParent()->createConstant(
-        ElemKind::UInt8FusedFP16QTy,
-        {fDims.first, fDims.second + 2 * sizeof(float16_t)}, 0.0, 0, "data");
+        precision, {fDims.first, fDims.second + 2 * sizeof(float16_t)}, 0.0, 0,
+        "data");
+    quantization::tensorFusedRowwiseQuantization<float16_t>(
+        fData, rwqData->getPayloadMutable());
+    return rwqData;
+  }
+  case ElemKind::UInt4FusedFP16QTy: {
+    // We pack 4-bit values into bytes, so given the input size in float we
+    // divide by two and take the ceiling to make sure we have enough space for
+    // all elements.
+    const size_t outerDim =
+        std::ceil(((float)fDims.second) / 2) + 2 * sizeof(float16_t);
+    Constant *rwqData = F->getParent()->createConstant(
+        precision, {fDims.first, outerDim}, 0.0, 0, "data");
     quantization::tensorFusedRowwiseQuantization<float16_t>(
         fData, rwqData->getPayloadMutable());
     return rwqData;
@@ -1758,23 +1780,23 @@ static Constant *quantizeDataForFusedRowwiseQuantizedSparseLengthsWeightedSum(
 FusedRowwiseQuantizedSparseLengthsWeightedSumNode *
 Function::createFusedRowwiseQuantizedSparseLengthsWeightedSum(
     llvm::StringRef name, Tensor &data, NodeValue weights, NodeValue indices,
-    NodeValue lengths, ElemKind precision, bool useFP16Accumulation) {
+    NodeValue lengths, ElemKind fusedElemKind, bool useFP16Accumulation) {
   Constant *rwqData =
-      quantizeDataForFusedRowwiseQuantizedSparseLengthsWeightedSum(this, data,
-                                                                   precision);
+      quantizeDataForFusedRowwiseQuantizedSparseLengthsWeightedSum(
+          this, data, fusedElemKind);
   return createFusedRowwiseQuantizedSparseLengthsWeightedSum(
-      name, rwqData, weights, indices, lengths, precision, useFP16Accumulation);
+      name, rwqData, weights, indices, lengths, useFP16Accumulation);
 }
 
 FusedRowwiseQuantizedSparseLengthsSumNode *
 Function::createFusedRowwiseQuantizedSparseLengthsSum(
     llvm::StringRef name, Tensor &data, NodeValue indices, NodeValue lengths,
-    ElemKind precision, bool useFP16Accumulation) {
+    ElemKind fusedElemKind, bool useFP16Accumulation) {
   Constant *rwqData =
-      quantizeDataForFusedRowwiseQuantizedSparseLengthsWeightedSum(this, data,
-                                                                   precision);
+      quantizeDataForFusedRowwiseQuantizedSparseLengthsWeightedSum(
+          this, data, fusedElemKind);
   return this->createFusedRowwiseQuantizedSparseLengthsSum(
-      name, rwqData, indices, lengths, precision, useFP16Accumulation);
+      name, rwqData, indices, lengths, useFP16Accumulation);
 }
 
 LengthsToRangesNode *Function::createLengthsToRanges(llvm::StringRef name,
diff --git a/lib/Graph/Nodes.cpp b/lib/Graph/Nodes.cpp
@@ -1264,10 +1264,16 @@ static bool verifyFusedRowwiseQuantizedSparseLengthsSum(
   // Wrap this in isValid to prevent potential segfault if the result is
   // incorrectly shaped.
   if (isValid) {
+    // If using 4-bit quantization for embeddings then the input is packed into
+    // two elements per byte.
+    size_t finalSize = result.dims()[1];
+    if (data.getType()->getElementType() == ElemKind::UInt4FusedFP16QTy) {
+      finalSize /= 2;
+    }
     isValid &=
         expectCompareTrue("Result output shape should have second dim without "
                           "extra columns from scale/offset in Data.",
-                          result.dims()[1] + extraCols, data.dims()[1], parent);
+                          finalSize + extraCols, data.dims()[1], parent);
   }
   return isValid;
 }
diff --git a/lib/Optimizer/GraphOptimizer/Lower.cpp b/lib/Optimizer/GraphOptimizer/Lower.cpp
@@ -1010,13 +1010,13 @@ static void lowerSparseLengthsSumNode(Function *F, CompilationContext &cctx,
 static void lowerFusedRowwiseQuantizedSparseLengthsSumNode(
     Function *F, CompilationContext &cctx,
     const FusedRowwiseQuantizedSparseLengthsSumNode &FRQSLSN) {
-  ElemKind precision = FRQSLSN.getResult().getType()->getElementType();
-  auto ty =
-      F->getParent()->uniqueType(precision, {FRQSLSN.getIndices().dims()[0]});
+  auto ty = F->getParent()->uniqueType(
+      FRQSLSN.getResult().getType()->getElementType(),
+      {FRQSLSN.getIndices().dims()[0]});
   auto *ones = F->createSplat(FRQSLSN.getName().str() + ".ones", ty, 1.0);
   auto *FRQSLWSN = F->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
       FRQSLSN.getName().str(), FRQSLSN.getData(), ones, FRQSLSN.getIndices(),
-      FRQSLSN.getLengths(), precision);
+      FRQSLSN.getLengths());
 
   replaceAllUsesOfWith(cctx.loweredInfoMap, FRQSLSN.getResult(), FRQSLWSN);
 }
diff --git a/tests/benchmark/SLSBench.cpp b/tests/benchmark/SLSBench.cpp
diff --git a/tests/stress/ParameterSweepTest.cpp b/tests/stress/ParameterSweepTest.cpp
diff --git a/tests/unittests/OperatorTest.cpp b/tests/unittests/OperatorTest.cpp
diff --git a/tests/unittests/RecommendationSystemTest.cpp b/tests/unittests/RecommendationSystemTest.cpp
diff --git a/tests/unittests/TypeAToTypeBFunctionConverterTest.cpp b/tests/unittests/TypeAToTypeBFunctionConverterTest.cpp