From 31230a559df4f0bb85a2c029dc70d576f27ffac3 Mon Sep 17 00:00:00 2001 From: sinceforYy <1017657683@qq.com> Date: Tue, 27 Aug 2024 09:49:29 +0800 Subject: [PATCH] Zfa: Support Zfa extension * Support fli.s, fli.d, fminm.s, fminm.d, fmaxm.s, fmaxm.d * Support fround.s, fround.d, froundnx.s, froundnx.d, fcvtmod.w.d * Support fleq.s, fleq.d, fltq.s, fltq.d --- src/main/scala/yunsuan/fpu/FloatAdder.scala | 69 +++++++++-- src/main/scala/yunsuan/package.scala | 4 + src/main/scala/yunsuan/scalar/Convert.scala | 4 + .../yunsuan/vector/VectorConvert/CVT64.scala | 116 ++++++++++++++++-- .../vector/VectorConvert/CVTparameter.scala | 1 + .../vector/VectorConvert/Convert.scala | 12 +- .../yunsuan/vector/VectorConvert/VCVT.scala | 8 +- src/test/scala/top/VectorSimTop.scala | 4 + 8 files changed, 193 insertions(+), 25 deletions(-) diff --git a/src/main/scala/yunsuan/fpu/FloatAdder.scala b/src/main/scala/yunsuan/fpu/FloatAdder.scala index c4713ca..1f5059a 100644 --- a/src/main/scala/yunsuan/fpu/FloatAdder.scala +++ b/src/main/scala/yunsuan/fpu/FloatAdder.scala @@ -83,7 +83,9 @@ class FloatAdder() extends Module { val is_flt = io.op_code === FaddOpCode.flt val is_fle = io.op_code === FaddOpCode.fle val is_fclass = io.op_code === FaddOpCode.fclass - val resultNeedBox = RegEnable(is_add || is_sub || is_min || is_max || is_fsgnj || is_fsgnjn || is_fsgnjx, fire) + val is_fminm = io.op_code === FaddOpCode.fminm + val is_fmaxm = io.op_code === FaddOpCode.fmaxm + val resultNeedBox = RegEnable(is_add || is_sub || is_min || is_max || is_fsgnj || is_fsgnjn || is_fsgnjx || is_fminm || is_fmaxm, fire) val fp_f64_result = F64_result val fp_f32_result = Cat(Fill(32, resultNeedBox), F32_result) val fp_f16_result = Cat(Fill(48, resultNeedBox), F16_result) @@ -177,8 +179,10 @@ private[fpu] class FloatAdderF32F16MixedPipeline(val is_print:Boolean = false,va ) val fp_a_is_NAN = io.fp_aIsFpCanonicalNAN | Efp_a_is_all_one & fp_a_mantissa_isnot_zero val fp_a_is_SNAN = !io.fp_aIsFpCanonicalNAN & Efp_a_is_all_one & fp_a_mantissa_isnot_zero & !fp_a_to32(significandWidth-2) + val fp_a_is_QNAN = !io.fp_aIsFpCanonicalNAN & Efp_a_is_all_one & fp_a_mantissa_isnot_zero & fp_a_to32(significandWidth-2) val fp_b_is_NAN = io.fp_bIsFpCanonicalNAN | Efp_b_is_all_one & fp_b_mantissa_isnot_zero val fp_b_is_SNAN = !io.fp_bIsFpCanonicalNAN & Efp_b_is_all_one & fp_b_mantissa_isnot_zero & !fp_b_to32(significandWidth-2) + val fp_b_is_QNAN = !io.fp_bIsFpCanonicalNAN & Efp_b_is_all_one & fp_b_mantissa_isnot_zero & fp_b_to32(significandWidth-2) val fp_a_is_infinite = !io.fp_aIsFpCanonicalNAN & Efp_a_is_all_one & (!fp_a_mantissa_isnot_zero) val fp_b_is_infinite = !io.fp_bIsFpCanonicalNAN & Efp_b_is_all_one & (!fp_b_mantissa_isnot_zero) val fp_a_is_zero = !io.fp_aIsFpCanonicalNAN & Efp_a_is_zero & !fp_a_mantissa_isnot_zero @@ -228,6 +232,10 @@ private[fpu] class FloatAdderF32F16MixedPipeline(val is_print:Boolean = false,va val is_fsgnjn = io.op_code === FaddOpCode.fsgnjn val is_fsgnjx = io.op_code === FaddOpCode.fsgnjx val is_fclass = io.op_code === FaddOpCode.fclass + val is_fminm = io.op_code === FaddOpCode.fminm + val is_fmaxm = io.op_code === FaddOpCode.fmaxm + val is_fleq = io.op_code === FaddOpCode.fleq + val is_fltq = io.op_code === FaddOpCode.fltq val fp_a_sign = fp_a_to32.head(1) val fp_b_sign = fp_b_to32.head(1) @@ -250,6 +258,8 @@ private[fpu] class FloatAdderF32F16MixedPipeline(val is_print:Boolean = false,va val result_flt = Wire(UInt(floatWidth.W)) val result_fle = Wire(UInt(floatWidth.W)) val result_fclass = Wire(UInt(floatWidth.W)) + val result_fminm = Wire(UInt(floatWidth.W)) + val result_fmaxm = Wire(UInt(floatWidth.W)) val in_NAN = Mux(res_is_f32, Cat(0.U(1.W),Fill(9, 1.U(1.W)),0.U(22.W)), Cat(0.U(17.W),Fill(6, 1.U(1.W)),0.U(9.W))) val fp_aFix = Mux(io.fp_aIsFpCanonicalNAN, in_NAN, io.fp_a) val fp_bFix = Mux(io.fp_bIsFpCanonicalNAN, in_NAN, io.fp_b) @@ -308,18 +318,32 @@ private[fpu] class FloatAdderF32F16MixedPipeline(val is_print:Boolean = false,va fp_a_is_SNAN, fp_a_is_NAN & !fp_a_is_SNAN ))) + result_fminm := Mux(!fp_a_is_NAN & !fp_b_is_NAN, + Mux(fp_b_is_less || (fp_b_sign.asBool && fp_b_is_zero && fp_a_is_zero), + fp_b_16_or_32, + fp_a_16_or_32), + out_NAN + ) + result_fmaxm := Mux(!fp_a_is_NAN & !fp_b_is_NAN, + Mux(fp_b_is_greater.asBool || (!fp_b_sign.asBool && fp_b_is_zero && fp_a_is_zero), + fp_b_16_or_32, + fp_a_16_or_32), + out_NAN + ) val result_stage0 = Mux1H( Seq( is_min, is_max, is_feq, - is_flt, - is_fle, + is_flt | is_fltq, + is_fle | is_fleq, is_fsgnj, is_fsgnjn, is_fsgnjx, is_fclass, + is_fminm, + is_fmaxm, ), Seq( result_min, @@ -331,11 +355,15 @@ private[fpu] class FloatAdderF32F16MixedPipeline(val is_print:Boolean = false,va result_fsgnjn, result_fsgnjx, result_fclass, + result_fminm, + result_fmaxm, ) ) val fflags_NV_stage0 = ((is_min | is_max) & (fp_a_is_SNAN | fp_b_is_SNAN)) | ((is_feq ) & (fp_a_is_SNAN | fp_b_is_SNAN)) | - ((is_flt | is_fle ) & (fp_a_is_NAN | fp_b_is_NAN)) + ((is_flt | is_fle ) & (fp_a_is_NAN | fp_b_is_NAN)) | + ((is_fminm | is_fmaxm) & (fp_a_is_SNAN | fp_b_is_SNAN)) | + ((is_fltq | is_fleq) & (fp_a_is_SNAN | fp_b_is_SNAN)) val fflags_stage0 = Cat(fflags_NV_stage0,0.U(4.W)) io.fp_c := Mux(RegEnable(is_add | is_sub , fire),float_adder_result,RegEnable(result_stage0, fire)) io.fflags := Mux(RegEnable(is_add | is_sub , fire),float_adder_fflags,RegEnable(fflags_stage0, fire)) @@ -390,8 +418,10 @@ private[fpu] class FloatAdderF64Pipeline(val is_print:Boolean = false,val hasMin val Efp_b_is_all_one = Efp_b.andR val fp_a_is_NAN = io.fp_aIsFpCanonicalNAN | Efp_a_is_all_one & fp_a_mantissa_isnot_zero val fp_a_is_SNAN = !io.fp_aIsFpCanonicalNAN & Efp_a_is_all_one & fp_a_mantissa_isnot_zero & !fp_a_to64(significandWidth-2) + val fp_a_is_QNAN = !io.fp_aIsFpCanonicalNAN & Efp_a_is_all_one & fp_a_mantissa_isnot_zero & fp_a_to64(significandWidth-2) val fp_b_is_NAN = io.fp_bIsFpCanonicalNAN | Efp_b_is_all_one & fp_b_mantissa_isnot_zero val fp_b_is_SNAN = !io.fp_bIsFpCanonicalNAN & Efp_b_is_all_one & fp_b_mantissa_isnot_zero & !fp_b_to64(significandWidth-2) + val fp_b_is_QNAN = !io.fp_bIsFpCanonicalNAN & Efp_b_is_all_one & fp_b_mantissa_isnot_zero & fp_b_to64(significandWidth-2) val fp_a_is_infinite = !io.fp_aIsFpCanonicalNAN & Efp_a_is_all_one & (!fp_a_mantissa_isnot_zero) val fp_b_is_infinite = !io.fp_bIsFpCanonicalNAN & Efp_b_is_all_one & (!fp_b_mantissa_isnot_zero) val fp_a_is_zero = !io.fp_aIsFpCanonicalNAN & Efp_a_is_zero & !fp_a_mantissa_isnot_zero @@ -435,6 +465,10 @@ private[fpu] class FloatAdderF64Pipeline(val is_print:Boolean = false,val hasMin val is_fsgnjn = io.op_code === FaddOpCode.fsgnjn val is_fsgnjx = io.op_code === FaddOpCode.fsgnjx val is_fclass = io.op_code === FaddOpCode.fclass + val is_fminm = io.op_code === FaddOpCode.fminm + val is_fmaxm = io.op_code === FaddOpCode.fmaxm + val is_fleq = io.op_code === FaddOpCode.fleq + val is_fltq = io.op_code === FaddOpCode.fltq val fp_a_sign = io.fp_a.head(1) val fp_b_sign = io.fp_b.head(1) val fp_b_sign_is_greater = fp_a_sign & !fp_b_sign @@ -455,6 +489,8 @@ private[fpu] class FloatAdderF64Pipeline(val is_print:Boolean = false,val hasMin val result_feq = Wire(UInt(floatWidth.W)) val result_flt = Wire(UInt(floatWidth.W)) val result_fle = Wire(UInt(floatWidth.W)) + val result_fminm = Wire(UInt(floatWidth.W)) + val result_fmaxm = Wire(UInt(floatWidth.W)) val in_NAN = Cat(0.U, Fill(exponentWidth, 1.U), 1.U, Fill(significandWidth - 2, 0.U)) val fp_aFix = Mux(io.fp_aIsFpCanonicalNAN, in_NAN, io.fp_a) val fp_bFix = Mux(io.fp_bIsFpCanonicalNAN, in_NAN, io.fp_b) @@ -506,17 +542,32 @@ private[fpu] class FloatAdderF64Pipeline(val is_print:Boolean = false,val hasMin fp_a_is_SNAN, fp_a_is_NAN & !fp_a_is_SNAN ))) + result_fminm := Mux(!fp_a_is_NAN & !fp_b_is_NAN, + Mux(fp_b_is_less || (fp_b_sign.asBool && fp_b_is_zero && fp_a_is_zero), + io.fp_b, + io.fp_a), + out_NAN + ) + result_fmaxm := Mux(!fp_a_is_NAN & !fp_b_is_NAN, + Mux(fp_b_is_greater.asBool || (!fp_b_sign.asBool && fp_b_is_zero && fp_a_is_zero), + io.fp_b, + io.fp_a), + out_NAN + ) + val result_stage0 = Mux1H( Seq( is_min, is_max, is_feq, - is_flt, - is_fle, + is_flt | is_fltq, + is_fle | is_fleq, is_fsgnj, is_fsgnjn, is_fsgnjx, is_fclass, + is_fminm, + is_fmaxm, ), Seq( result_min, @@ -528,11 +579,15 @@ private[fpu] class FloatAdderF64Pipeline(val is_print:Boolean = false,val hasMin result_fsgnjn, result_fsgnjx, result_fclass, + result_fminm, + result_fmaxm, ) ) val fflags_NV_stage0 = ((is_min | is_max) & (fp_a_is_SNAN | fp_b_is_SNAN)) | (is_feq & (fp_a_is_SNAN | fp_b_is_SNAN)) | - ((is_flt | is_fle ) & (fp_a_is_NAN | fp_b_is_NAN)) + ((is_flt | is_fle ) & (fp_a_is_NAN | fp_b_is_NAN)) | + ((is_fminm | is_fmaxm) & (fp_a_is_SNAN | fp_b_is_SNAN)) | + ((is_fltq | is_fleq) & (fp_a_is_SNAN | fp_b_is_SNAN)) val fflags_stage0 = Cat(fflags_NV_stage0, 0.U(4.W)) io.fp_c := Mux(RegEnable(is_add | is_sub, fire), float_adder_result, RegEnable(result_stage0, fire)) io.fflags := Mux(RegEnable(is_add | is_sub, fire), float_adder_fflags, RegEnable(fflags_stage0, fire)) diff --git a/src/main/scala/yunsuan/package.scala b/src/main/scala/yunsuan/package.scala index e16c551..ed7daae 100644 --- a/src/main/scala/yunsuan/package.scala +++ b/src/main/scala/yunsuan/package.scala @@ -582,6 +582,10 @@ object VfcvtType { def fsgnj = "b00110".U(width.W) def fsgnjx = "b01000".U(width.W) def fsgnjn = "b00111".U(width.W) + def fminm = "b11110".U(width.W) + def fmaxm = "b10011".U(width.W) + def fleq = "b11100".U(width.W) + def fltq = "b11011".U(width.W) } object FmaOpCode { def width = 4 diff --git a/src/main/scala/yunsuan/scalar/Convert.scala b/src/main/scala/yunsuan/scalar/Convert.scala index 530d0b1..d91e7ee 100644 --- a/src/main/scala/yunsuan/scalar/Convert.scala +++ b/src/main/scala/yunsuan/scalar/Convert.scala @@ -85,6 +85,8 @@ class FpCvtIO(width: Int) extends Bundle { val sew = Input(UInt(2.W)) val rm = Input(UInt(3.W)) val isFpToVecInst = Input(Bool()) + val isFround = Input(UInt(2.W)) + val isFcvtmod = Input(Bool()) val result = Output(UInt(width.W)) val fflags = Output(UInt(5.W)) @@ -151,6 +153,8 @@ class FPCVT(xlen :Int) extends Module{ fcvt.io.opType := io.opType fcvt.io.rm := io.rm fcvt.io.isFpToVecInst := io.isFpToVecInst + fcvt.io.isFround := io.isFround + fcvt.io.isFcvtmod := io.isFcvtmod fcvt.io.input1H := input1H fcvt.io.output1H := output1H diff --git a/src/main/scala/yunsuan/vector/VectorConvert/CVT64.scala b/src/main/scala/yunsuan/vector/VectorConvert/CVT64.scala index d7b605d..9f4387b 100644 --- a/src/main/scala/yunsuan/vector/VectorConvert/CVT64.scala +++ b/src/main/scala/yunsuan/vector/VectorConvert/CVT64.scala @@ -7,8 +7,8 @@ import yunsuan.vector.VectorConvert.utils._ import yunsuan.vector.VectorConvert.RoundingModle._ import yunsuan.util._ class CVT64(width: Int = 64,mode: Boolean) extends CVT(width){ - val (fire, src, sew, opType, rm, input1H, output1H, isFpToVecInst) = - (io.fire, io.src, io.sew, io.opType, io.rm, io.input1H, io.output1H, io.isFpToVecInst) + val (fire, src, sew, opType, rm, input1H, output1H, isFpToVecInst, isFround, isFcvtmod) = + (io.fire, io.src, io.sew, io.opType, io.rm, io.input1H, io.output1H, io.isFpToVecInst, io.isFround, io.isFcvtmod) val fireReg = GatedValidRegNext(fire) val outIsFpNext = opType.tail(1).head(1).asBool @@ -26,6 +26,8 @@ class CVT64(width: Int = 64,mode: Boolean) extends CVT(width){ (!inIsFpNext, inIsFpNext && outIsFpNext && isWiden, inIsFpNext && outIsFpNext && isNarrow, !outIsFpNext, inIsFpNext && outIsFpNext && isCrossHigh, inIsFpNext && outIsFpNext && isCrossLow) + val isFroundOrFroundnxNext = isFround.orR + val isInt2Fp = RegEnable(isInt2FpNext, false.B, fire) val isFpWiden = RegEnable(isFpWidenNext, false.B, fire) val isFpNarrow = RegEnable(isFpNarrowNext, false.B, fire) @@ -33,7 +35,9 @@ class CVT64(width: Int = 64,mode: Boolean) extends CVT(width){ val isFp2Int = RegEnable(isFp2IntNext, false.B, fire) val isFpCrossHigh = RegEnable(isFpCrossHighNext, false.B, fire) val isFpCrossLow = RegEnable(isFpCrossLowNext, false.B, fire) - val isFPsrc = isFpWiden || isFpNarrow || isFpCrossHigh || isFpCrossLow || isFp2Int + val isFroundReg = RegEnable(isFroundOrFroundnxNext, false.B, fire) + val isFcvtmodReg = RegEnable(isFcvtmod, false.B, fire) + val isFPsrc = isFpWiden || isFpNarrow || isFpCrossHigh || isFpCrossLow || isFp2Int || isFroundReg || isFcvtmodReg val s0_outIsF64 = outIsFpNext && output1H(3) val s0_outIsF32 = outIsFpNext && output1H(2) @@ -66,6 +70,8 @@ class CVT64(width: Int = 64,mode: Boolean) extends CVT(width){ fpcvt.io.input1H := input1H fpcvt.io.output1H := output1H fpcvt.io.isFpToVecInst := isFpToVecInst + fpcvt.io.isFround := isFround + fpcvt.io.isFcvtmod := isFcvtmod val s1_resultForfpCanonicalNAN = Mux1H( Seq(s1_outIsF64, s1_outIsF32, s1_outIsF16, s1_outIsU32 || s1_outIsU64, s1_outIsS32, s1_outIsS64), @@ -121,6 +127,8 @@ class CVT_IO extends Bundle{ val input1H = Input(UInt(4.W)) val output1H = Input(UInt(4.W)) val isFpToVecInst = Input(Bool()) + val isFround = Input(UInt(2.W)) + val isFcvtmod = Input(Bool()) val result = Output(UInt(64.W)) val fflags = Output(UInt(5.W)) } @@ -142,8 +150,8 @@ class FP_INCVT extends Module { val intParamMap = (0 to 3).map(i => (1 << i) * 8) val widthExpAdder = 13 // 13bits is enough //input - val (fire, src, opType, rmNext, input1H, output1H, isFpToVecInst) = - (io.fire, io.src, io.opType, io.rm, io.input1H, io.output1H, io.isFpToVecInst) + val (fire, src, opType, rmNext, input1H, output1H, isFpToVecInst, isFround, isFcvtmod) = + (io.fire, io.src, io.opType, io.rm, io.input1H, io.output1H, io.isFpToVecInst, io.isFround, io.isFcvtmod) val fireReg = GatedValidRegNext(fire) val isWiden = !opType(4) && opType(3) @@ -157,6 +165,9 @@ class FP_INCVT extends Module { val float1HSrcNext = input1H.head(3)//exclude f8 val float1HOutNext = output1H.head(3)//exclude f8 + val isFroundOrFroundnxNext = isFround.orR + val isFroundnxNext = isFround(1) + //fp input extend val srcMap = (0 to 3).map(i => src((1 << i) * 8 - 1, 0)) val floatMap = srcMap.zipWithIndex.map{case (float,i) => floatExtend(float, i)}.drop(1) @@ -180,6 +191,9 @@ class FP_INCVT extends Module { (outIsFpNext && isWiden, outIsFpNext && isNarrow, !outIsFpNext, outIsFpNext && isCrossHigh, outIsFpNext && isCrossLow) + val froundOrFroundnxIsZeroOrInfNext = isFroundOrFroundnxNext && (isZeroSrcNext || isInfSrcNext) + val fcvtmodIsInfOrNaNNext = isFcvtmod && (isInfSrcNext || isNaNSrcNext) + //s1 val expIsOnesSrc = RegEnable(expIsOnesSrcNext, false.B, fire) val fracNotZeroSrc = RegEnable(fracNotZeroSrcNext, false.B, fire) @@ -198,6 +212,11 @@ class FP_INCVT extends Module { val s0_fpCanonicalNAN = isFpToVecInst & (input1H(1) & !src.head(48).andR | input1H(2) & !src.head(32).andR) val s1_fpCanonicalNAN = RegEnable(s0_fpCanonicalNAN, fire) + val isFroundnxReg = RegEnable(isFroundnxNext, false.B, fire) + val isFroundOrFroundnxReg = RegEnable(isFroundOrFroundnxNext, false.B, fire) + val froundOrFroundnxIsZeroOrInf = RegEnable(froundOrFroundnxIsZeroOrInfNext, false.B, fire) + val fcvtmodIsInfOrNaN = RegEnable(fcvtmodIsInfOrNaNNext, false.B, fire) + // for fpnarrow sub val trunSticky = RegEnable(fracSrc.tail(f32.fracWidth).orR, false.B, fire) val signSrc = RegEnable(signSrcNext, false.B, fire) @@ -284,6 +303,46 @@ class FP_INCVT extends Module { val sticky = Wire(Bool()) inRounder := inRounderTmp sticky := stickyTmp + + /** + * fround + * frac + * cycle: 0 + */ + val froundExpDeltaNext = Wire(UInt(6.W)) + val froundFracShiftNext = Wire(UInt(64.W)) + val froundExpSubBias = Wire(UInt(f64.expWidth.W)) + + val froundMaxExpNext = Mux1H(float1HOutNext, fpParamMap.map(fp => fp.froundMaxExp.U)) + val froundFracNext = fracValueSrc ## 0.U(11.W) + + val froundExpLessThanBiasNext = Mux1H(float1HOutNext, fpParamMap.map(fp => !expSrcNext(fp.expWidth-1) && !expSrcNext(fp.expWidth-2, 0).andR)) + val froundExpGreaterThanMaxExpNext = expSrcNext > froundMaxExpNext + + froundExpSubBias := Mux1H(float1HOutNext, fpParamMap.map(fp => fp.bias.U)) - expSrcNext + froundExpDeltaNext := Mux(froundExpLessThanBiasNext, froundExpSubBias, 1.U + ~froundExpSubBias) + froundFracShiftNext := Mux(froundExpLessThanBiasNext, froundFracNext >> froundExpDeltaNext, froundFracNext << froundExpDeltaNext) + + val fracShiftMaskNext = f64.fracWidth.U - froundExpDeltaNext + + val froundFracShift = RegEnable(froundFracShiftNext, 0.U, fire) + val froundExpLessThanBias = RegEnable(froundExpLessThanBiasNext, false.B, fire) + val froundExpGreaterThanMaxExp = RegEnable(froundExpGreaterThanMaxExpNext, false.B, fire) + val fracShiftMask = RegEnable(fracShiftMaskNext, 0.U, fire) + val froundOldExp = RegEnable(expSrcNext, 0.U, fire) + val froundOldFrac = RegEnable(fracSrc, 0.U, fire) + + // cycle1 + val froundShiftMask = Wire(UInt(64.W)) + val froundUpShiftMask = Wire(UInt(52.W)) + val froundOldInput = Wire(UInt(64.W)) + val froundUpInput = Wire(UInt(64.W)) + + froundShiftMask := ~0.U(64.W) << fracShiftMask + froundUpShiftMask := 1.U << fracShiftMask + froundOldInput := Cat(signSrc, froundOldExp, froundOldFrac) & froundShiftMask + froundUpInput := froundOldInput + froundUpShiftMask + /** rounder * for: int->fp, fp-fp Narrow, fp->int * cycle: 1 @@ -304,10 +363,14 @@ class FP_INCVT extends Module { (rounderMap(0), rounderMap(1), rounderMap(2), rounderMap(3)) } val rounderInput = Mux(isFp2Int, inRounder.head(64), Mux1H(float1HOut, rounderInputMap)) + + val froundRoundIn = froundFracShift.tail(1).head(1).asBool + val froundStickyIn = Mux1H(float1HOut, fpParamMap.map(fp => froundFracShift.tail(2).head(fp.fracWidth - 1).orR)) + val rounder = Module(new RoundingUnit(64)) - rounder.io.in := rounderInput - rounder.io.roundIn := Mux(isFp2Int, inRounder(0), Mux1H(float1HOut, rounerInMap)) - rounder.io.stickyIn := Mux(isFp2Int, sticky, Mux1H(float1HOut, rounderStikyMap)) + rounder.io.in := Mux(isFroundOrFroundnxReg, froundFracShift, rounderInput) + rounder.io.roundIn := Mux(isFroundOrFroundnxReg, froundRoundIn, Mux(isFp2Int, inRounder(0), Mux1H(float1HOut, rounerInMap))) + rounder.io.stickyIn := Mux(isFroundOrFroundnxReg, froundStickyIn, Mux(isFp2Int, sticky, Mux1H(float1HOut, rounderStikyMap))) rounder.io.signIn := signSrc rounder.io.rm := rm @@ -433,6 +496,35 @@ class FP_INCVT extends Module { } val fpNarrowResultMap: Seq[UInt] = Seq(f16, f32).map(fp => Mux1H(result1H.asBools.reverse, fpNarrowResultMapGen(fp))) resultNext := Mux1H(float1HOut.tail(1), fpNarrowResultMap) + }.elsewhen(isFroundOrFroundnxReg) { + val oldInputReg = Mux1H(float1HOut, fpParamMap.map(fp => signSrc ## froundOldExp(fp.expWidth - 1, 0) ## froundOldFrac.head(fp.fracWidth))) + + nv := isSNaNSrc + dz := false.B + of := false.B + uf := false.B + nx := isFroundnxReg && nxRounded && !isNaNSrc + + val result1H = Cat( + froundOrFroundnxIsZeroOrInf || froundExpGreaterThanMaxExp && !isNaNSrc, + isNaNSrc, + froundExpLessThanBias, + !froundExpLessThanBias && !froundOrFroundnxIsZeroOrInf && !froundExpGreaterThanMaxExp, + ) + + def froundResultMapGen(fp: FloatFormat): Seq[UInt] = { + VecInit((0 to 3).map { + case 0 => oldInputReg + case 1 => 0.U ## ~0.U(fp.expWidth.W) ## 1.U ## 0.U((fp.fracWidth - 1).W) + case 2 => signSrc ## Mux(upRounded, 0.U ## Fill(fp.expWidth - 1, 1.U(1.W)), 0.U(fp.expWidth.W)) ## 0.U(fp.fracWidth.W) + case 3 => Mux(upRounded, + froundUpInput.head(1) ## froundUpInput.tail(1).head(f64.expWidth)(fp.expWidth - 1, 0) ## froundUpInput.tail(1 + f64.expWidth).head(fp.fracWidth), + froundOldInput.head(1) ## froundOldInput.tail(1).head(f64.expWidth)(fp.expWidth - 1, 0) ## froundOldInput.tail(1 + f64.expWidth).head(fp.fracWidth)) + }) + } + + val froundResultMap: Seq[UInt] = fpParamMap.map(fp => Mux1H(result1H.asBools.reverse, froundResultMapGen(fp))) + resultNext := Mux1H(float1HOut, froundResultMap) }.otherwise{ /** out is int, any fp->any int/uint * drop the shift left! @@ -475,10 +567,10 @@ class FP_INCVT extends Module { uf := false.B nx := Mux(hasSignInt, toInx, toUnx) val result1H = Cat( - (!hasSignInt && !toUnv) || (hasSignInt && !toInv), //toUnv include nan & inf - !hasSignInt && toUnv && (isNaNSrc || !signSrc && (isInfSrc || ofExpRounded)), - !hasSignInt && toUnv && signSrc && !isNaNSrc, - hasSignInt && toInv + ((!hasSignInt && !toUnv) || (hasSignInt && !toInv)) && !fcvtmodIsInfOrNaN, //toUnv include nan & inf + !hasSignInt && toUnv && (isNaNSrc || !signSrc && (isInfSrc || ofExpRounded)) && !fcvtmodIsInfOrNaN, + !hasSignInt && toUnv && signSrc && !isNaNSrc || fcvtmodIsInfOrNaN, + hasSignInt && toInv && !fcvtmodIsInfOrNaN ) resultNext := Mux1H(result1H.asBools.reverse, Seq( normalResult, diff --git a/src/main/scala/yunsuan/vector/VectorConvert/CVTparameter.scala b/src/main/scala/yunsuan/vector/VectorConvert/CVTparameter.scala index 503eb6d..ccd072c 100644 --- a/src/main/scala/yunsuan/vector/VectorConvert/CVTparameter.scala +++ b/src/main/scala/yunsuan/vector/VectorConvert/CVTparameter.scala @@ -21,6 +21,7 @@ trait FloatFormat{ def precision = fracWidth + 1 def maxExp = (BigInt(1) << expWidth) - 2 def minExp = 1 + def froundMaxExp = fracWidth + bias } object f16 extends FloatFormat { diff --git a/src/main/scala/yunsuan/vector/VectorConvert/Convert.scala b/src/main/scala/yunsuan/vector/VectorConvert/Convert.scala index d891936..c024c2d 100644 --- a/src/main/scala/yunsuan/vector/VectorConvert/Convert.scala +++ b/src/main/scala/yunsuan/vector/VectorConvert/Convert.scala @@ -12,6 +12,8 @@ class VectorCvtIO(width: Int) extends Bundle { val sew = Input(UInt(2.W)) val rm = Input(UInt(3.W)) val isFpToVecInst = Input(Bool()) + val isFround = Input(UInt(2.W)) + val isFcvtmod = Input(Bool()) val result = Output(UInt(width.W)) val fflags = Output(UInt(20.W)) @@ -20,7 +22,7 @@ class VectorCvtIO(width: Int) extends Bundle { class VectorCvt(xlen :Int) extends Module{ val io = IO(new VectorCvtIO(xlen)) - val (fire, src, opType, sew, rm, isFpToVecInst) = (io.fire, io.src, io.opType, io.sew, io.rm, io.isFpToVecInst) + val (fire, src, opType, sew, rm, isFpToVecInst, isFround, isFcvtmod) = (io.fire, io.src, io.opType, io.sew, io.rm, io.isFpToVecInst, io.isFround, io.isFcvtmod) val widen = opType(4, 3) // 0->single 1->widen 2->norrow => width of result // input width 8, 16, 32, 64 @@ -89,10 +91,10 @@ class VectorCvt(xlen :Int) extends Module{ val in3 = Mux1H(inputWidth1H, Seq(element8(3), element16(3), 0.U, 0.U)) - val (result0, fflags0) = VCVT(64)(fire, in0, opType, sew, rm, input1H, output1H, isFpToVecInst) - val (result1, fflags1) = VCVT(32)(fire, in1, opType, sew, rm, input1H, output1H, isFpToVecInst) - val (result2, fflags2) = VCVT(16)(fire, in2, opType, sew, rm, input1H, output1H, isFpToVecInst) - val (result3, fflags3) = VCVT(16)(fire, in3, opType, sew, rm, input1H, output1H, isFpToVecInst) + val (result0, fflags0) = VCVT(64)(fire, in0, opType, sew, rm, input1H, output1H, isFpToVecInst, isFround, isFcvtmod) + val (result1, fflags1) = VCVT(32)(fire, in1, opType, sew, rm, input1H, output1H, isFpToVecInst, isFround, isFcvtmod) + val (result2, fflags2) = VCVT(16)(fire, in2, opType, sew, rm, input1H, output1H, isFpToVecInst, isFround, isFcvtmod) + val (result3, fflags3) = VCVT(16)(fire, in3, opType, sew, rm, input1H, output1H, isFpToVecInst, isFround, isFcvtmod) io.result := Mux1H(outputWidth1H, Seq( result3(7,0) ## result2(7,0) ## result1(7,0) ## result0(7,0), diff --git a/src/main/scala/yunsuan/vector/VectorConvert/VCVT.scala b/src/main/scala/yunsuan/vector/VectorConvert/VCVT.scala index ca4734f..3c15d6d 100644 --- a/src/main/scala/yunsuan/vector/VectorConvert/VCVT.scala +++ b/src/main/scala/yunsuan/vector/VectorConvert/VCVT.scala @@ -12,6 +12,8 @@ class CVTIO(width: Int) extends Bundle { val input1H = Input(UInt(4.W)) val output1H = Input(UInt(4.W)) val isFpToVecInst = Input(Bool()) + val isFround = Input(UInt(2.W)) + val isFcvtmod = Input(Bool()) val result = Output(UInt(width.W)) val fflags = Output(UInt(5.W)) } @@ -39,7 +41,9 @@ object VCVT { rm: UInt, input1H: UInt, output1H: UInt, - isFpToVecInst: Bool + isFpToVecInst: Bool, + isFround: UInt, + isFcvtmod: Bool ): (UInt, UInt) = { val vcvtWraper = Module(new VCVT(width)) vcvtWraper.io.fire := fire @@ -50,6 +54,8 @@ object VCVT { vcvtWraper.io.input1H := input1H vcvtWraper.io.output1H := output1H vcvtWraper.io.isFpToVecInst := isFpToVecInst + vcvtWraper.io.isFround := isFround + vcvtWraper.io.isFcvtmod := isFcvtmod (vcvtWraper.io.result, vcvtWraper.io.fflags) } } \ No newline at end of file diff --git a/src/test/scala/top/VectorSimTop.scala b/src/test/scala/top/VectorSimTop.scala index 9d12b91..7851e5c 100644 --- a/src/test/scala/top/VectorSimTop.scala +++ b/src/test/scala/top/VectorSimTop.scala @@ -273,6 +273,8 @@ class SimTop() extends VPUTestModule { vcvt.io.rm := rm vcvt.io.src := src1 // 128 bit->vcvt vcvt.io.isFpToVecInst := false.B + vcvt.io.isFround := 0.U + vcvt.io.isFcvtmod := false.B vcvt_result.vxsat := 0.U vcvt_result.result(i) := vcvt.io.result vcvt_result.fflags(i) := vcvt.io.fflags @@ -296,6 +298,8 @@ class SimTop() extends VPUTestModule { fpcvt.io.rm := rm fpcvt.io.src := src1 fpcvt.io.isFpToVecInst := true.B + fpcvt.io.isFround := 0.U + fpcvt.io.isFcvtmod := false.B fpcvt_result.vxsat := 0.U fpcvt_result.result(i) := fpcvt.io.result fpcvt_result.fflags(i) := fpcvt.io.fflags