From 387d51650d9ffb56345c3a503e36fbeaeb7e292c Mon Sep 17 00:00:00 2001 From: Florent Rotenberg Date: Tue, 18 Jun 2019 16:44:40 +0200 Subject: [PATCH 01/13] :space_invader: Fix compilation issue in xcelium (#14) * Fix compilation issue in xcelium * Update src/fpnew_opgroup_block.sv Co-Authored-By: Stefan Mach <33124232+stmach@users.noreply.github.com> * Update src/fpnew_opgroup_block.sv Co-Authored-By: Stefan Mach <33124232+stmach@users.noreply.github.com> * :art: Align whitespace --- src/fpnew_opgroup_block.sv | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index d5f71a69..12713f24 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -128,7 +128,10 @@ module fpnew_opgroup_block #( assign fmt_out_valid[fmt] = 1'b0; // don't emit values assign fmt_busy[fmt] = 1'b0; // never busy // Outputs are don't care - assign fmt_outputs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); // Tie off disabled formats end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt @@ -136,7 +139,10 @@ module fpnew_opgroup_block #( assign fmt_out_valid[fmt] = 1'b0; // don't emit values assign fmt_busy[fmt] = 1'b0; // never busy // Outputs are don't care - assign fmt_outputs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); end end From 2a5a47cdc500137a62a3652178c880e036bda09c Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 21 Jun 2019 14:46:42 +0200 Subject: [PATCH 02/13] :books: Update changelog --- docs/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ddd51fca..4ca4f66d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,7 +13,7 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ### Added ### Changed ### Fixed - +- Don't care assignments to structs have been expanded for better tool support ## [0.5.6] - 2019-06-12 From 6bad2c2659ff7e4bd048577a04d5364e1a471694 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 20 Jun 2019 12:01:18 +0200 Subject: [PATCH 03/13] :bug: Fix undriven busy signals in output pipe When the output pipeline was bypassed, the busy_output signal remianed undriven causing issues for some pipeline configurations --- docs/CHANGELOG.md | 1 + src/fpnew_cast_multi.sv | 1 + src/fpnew_fma.sv | 1 + src/fpnew_fma_multi.sv | 1 + 4 files changed, 4 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 4ca4f66d..6a7cd2b7 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -14,6 +14,7 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ### Changed ### Fixed - Don't care assignments to structs have been expanded for better tool support +- Undriven busy signal in output pipeline bypass ## [0.5.6] - 2019-06-12 diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index dae8bd51..a75ceeed 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -739,6 +739,7 @@ module fpnew_cast_multi #( assign tag_o = tag_q2; assign aux_o = aux_q2; assign out_valid_o = out_valid_inside; + assign busy_output = 1'b0; end assign busy_o = busy_input | busy_inside | busy_output; diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index 256d7f36..6e1ac4fd 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -675,6 +675,7 @@ module fpnew_fma #( assign tag_o = tag_q2; assign aux_o = aux_q2; assign out_valid_o = out_valid_inside; + assign busy_output = 1'b0; end assign busy_o = busy_input | busy_inside | busy_output; diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index 26817f4a..385d8bb5 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -816,6 +816,7 @@ module fpnew_fma_multi #( assign tag_o = tag_q2; assign aux_o = aux_q2; assign out_valid_o = out_valid_inside; + assign busy_output = 1'b0; end assign busy_o = busy_input | busy_inside | busy_output; From 0a76d679bbbd7157f43f78dbb49d37a64ae9ae4b Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 17 Jun 2019 14:33:55 +0200 Subject: [PATCH 04/13] :books: Fix typo in documentation --- docs/CHANGELOG.md | 1 + docs/README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 6a7cd2b7..cb5f809d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ### Fixed - Don't care assignments to structs have been expanded for better tool support - Undriven busy signal in output pipeline bypass +- Typo in the documentation about the multiply operation ## [0.5.6] - 2019-06-12 diff --git a/docs/README.md b/docs/README.md index 7c071e30..b7d01e6b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -100,7 +100,7 @@ Unless noted otherwise, the first operand `op[0]` is used for the operation. | `FNMSUB` | `1` | Negated fused multiply-add (`-(op[0] * op[1]) - op[2]`) | | `ADD` | `0` | Addition (`op[1] + op[2]`) *note the operand indices* | | `ADD` | `1` | Subtraction (`op[1] - op[2]`) *note the operand indices* | -| `MUL` | `0` | Multiplication (`op[0] - op[1]`) | +| `MUL` | `0` | Multiplication (`op[0] * op[1]`) | | `DIV` | `0` | Division (`op[0] / op[1]`) | | `SQRT` | `0` | Square root | | `SGNJ` | `0` | Sign injection, operation encoded in rounding mode
`RNE`: `op[0]` with `sign(op[1])`
`RTZ`: `op[0]` with `~sign(op[1])`
`RDN`: `op[0]` with `sign(op[0]) ^ sign(op[1])`
`RUP`: `op[0]` (passthrough) | From c11bed1bc53816517cc90f4db82710e3202122e2 Mon Sep 17 00:00:00 2001 From: Andreas Kurth Date: Sun, 23 Jun 2019 19:05:09 +0200 Subject: [PATCH 05/13] :wrench: Bender: Fix dependencies (#15) This fixes two issues in the Bender manifest: versions are now correctly specified without a leading `v`, and the updated version of `fpu_div_sqrt_mvp` has a Bender manifest. --- Bender.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Bender.yml b/Bender.yml index 74318e60..e084d231 100644 --- a/Bender.yml +++ b/Bender.yml @@ -3,8 +3,8 @@ package: authors: ["Stefan Mach "] dependencies: - common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: v1.13.1} - fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: v1.0.1} + common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.13.1} + fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.2} sources: - src/fpnew_pkg.sv From da01b72c22b75e6dfbbf1d8b7410226b4fddb074 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 27 Jun 2019 15:17:56 +0200 Subject: [PATCH 06/13] :bug: Fix merged slice generation Fixes a bug where merged slices would create timing loops and driving conflicts in cases where the first format of the FPU was not enabled. --- src/fpnew_opgroup_block.sv | 11 ++++++----- src/fpnew_pkg.sv | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index 12713f24..ea9bf384 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -79,9 +79,9 @@ module fpnew_opgroup_block #( // ------------------------- for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices // Some constants for this format - localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes); + localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask); localparam logic IS_FIRST_MERGED = - fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes); + fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask); // Generate slice only if format enabled if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format @@ -123,7 +123,8 @@ module fpnew_opgroup_block #( end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused // Ready is split up into formats - assign fmt_in_ready[fmt] = fmt_in_ready[fpnew_pkg::get_first_enabled_multi(FmtUnitTypes)]; + assign fmt_in_ready[fmt] = fmt_in_ready[fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, + FpFmtMask)]; assign fmt_out_valid[fmt] = 1'b0; // don't emit values assign fmt_busy[fmt] = 1'b0; // never busy @@ -149,9 +150,9 @@ module fpnew_opgroup_block #( // ---------------------- // Generate Merged Slice // ---------------------- - if (fpnew_pkg::any_enabled_multi(FmtUnitTypes)) begin : gen_merged_slice + if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice - localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes); + localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask); logic in_valid; diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 3d1152c3..734ca96b 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -444,25 +444,28 @@ package fpnew_pkg; return res; endfunction - - - function automatic logic any_enabled_multi(fmt_unit_types_t types); + // Return whether any active format is set as MERGED + function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) - if (types[i] == MERGED) + if (cfg[i] && types[i] == MERGED) return 1'b1; return 1'b0; endfunction - function automatic logic is_first_enabled_multi(fp_format_e fmt, fmt_unit_types_t types); + // Return whether the given format is the first active one set as MERGED + function automatic logic is_first_enabled_multi(fp_format_e fmt, + fmt_unit_types_t types, + fmt_logic_t cfg); for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin - if (types[i] == MERGED) return (fp_format_e'(i) == fmt); + if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt); end return 1'b0; endfunction - function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types); + // Returns the first format that is active and is set as MERGED + function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) - if (types[i] == MERGED) + if (cfg[i] && types[i] == MERGED) return fp_format_e'(i); return fp_format_e'(0); endfunction From 8f8a7bfd7f149dac54565f79b25532b8fa8de684 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 28 Jun 2019 16:20:59 +0200 Subject: [PATCH 07/13] :space_invader: Fix potential sim/synth mismatch on UF flag --- src/fpnew_cast_multi.sv | 14 ++++++-------- src/fpnew_fma.sv | 14 ++++++-------- src/fpnew_fma_multi.sv | 12 +++++------- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index a75ceeed..2839e540 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -665,14 +665,12 @@ module fpnew_cast_multi #( logic [WIDTH-1:0] fp_result, int_result; fpnew_pkg::status_t fp_status, int_status; - assign fp_regular_status = '{ - NV: src_is_int_q & (of_before_round | of_after_round), // overflow is invalid for I2F casts - DZ: 1'b0, // no divisions - OF: ~src_is_int_q & (~info_q2.is_inf & (of_before_round | of_after_round)), // inf casts no OF - UF: uf_after_round & fp_regular_status.NX, - NX: src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f - : (| fp_round_sticky_bits) | (~info_q2.is_inf & (of_before_round | of_after_round)) - }; + assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts + assign fp_regular_status.DZ = 1'b0; // no divisions + assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF + assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX; + assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f + : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round)); assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0}; assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2]; diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index 6e1ac4fd..d78f49d2 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -611,14 +611,12 @@ module fpnew_fma #( fpnew_pkg::status_t regular_status; // Assemble regular result - assign regular_result = {rounded_sign, rounded_abs}; - assign regular_status = '{ - NV: 1'b0, // only valid cases are handled in regular path - DZ: 1'b0, // no divisions - OF: of_before_round | of_after_round, // rounding can introduce new overflow - UF: uf_after_round & regular_status.NX, // only inexact results raise UF - NX: (| round_sticky_bits) | of_before_round | of_after_round // RS bits mean loss in precision - }; + assign regular_result = {rounded_sign, rounded_abs}; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; // Final results for output pipeline fp_t result_d; diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index 385d8bb5..5c8a40f9 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -753,13 +753,11 @@ module fpnew_fma_multi #( // Assemble regular result assign regular_result = fmt_result[dst_fmt_q2]; - assign regular_status = '{ - NV: 1'b0, // only valid cases are handled in regular path - DZ: 1'b0, // no divisions - OF: of_before_round | of_after_round, // rounding can introduce new overflow - UF: uf_after_round & regular_status.NX, // only inexact results raise UF - NX: (| round_sticky_bits) | of_before_round | of_after_round // RS bits mean loss in precision - }; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; // Final results for output pipeline logic [WIDTH-1:0] result_d; From ebfeb2dbd36c35aba76581f05d7148fb9dec5989 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 28 Jun 2019 16:34:59 +0200 Subject: [PATCH 08/13] :books: Update changelog --- docs/CHANGELOG.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index cb5f809d..114f668d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,9 +13,12 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ### Added ### Changed ### Fixed -- Don't care assignments to structs have been expanded for better tool support +- Don't care assignments to structs have been expanded for better tool support [(#14)](https://github.com/pulp-platform/fpnew/pull/14) - Undriven busy signal in output pipeline bypass - Typo in the documentation about the multiply operation +- Generation of merged slices when the first package format is disabled +- Potential simulation/synthesis mismatch of the UF flag +- [Bender] Fixed dependencies for Bender [(#14)](https://github.com/pulp-platform/fpnew/pull/15) ## [0.5.6] - 2019-06-12 @@ -30,7 +33,7 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ## [0.5.5] - 2019-06-02 ### Fixed -- UF flag handling according to IEEE754-2008 (#11) +- UF flag handling according to IEEE754-2008 [(#11)](https://github.com/pulp-platform/fpnew/issues/11) ## [0.5.4] - 2019-06-02 From 8a1aea8b5f4b0d17d33cce370585893fb94c24b7 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 28 Jun 2019 16:15:19 +0200 Subject: [PATCH 09/13] :recycle: Directly integrate pipelines into modules Pipeline registers are now directly generated in the modules that need them instead of residing in instances therein. This removes some unused signals as well as reduces levels of design hierarchy. --- docs/CHANGELOG.md | 2 + src/fpnew_cast_multi.sv | 455 ++++++++++++++------------- src/fpnew_divsqrt_multi.sv | 223 +++++++------- src/fpnew_fma.sv | 432 +++++++++++++------------- src/fpnew_fma_multi.sv | 460 ++++++++++++++-------------- src/fpnew_noncomp.sv | 234 +++++++------- src/fpnew_opgroup_multifmt_slice.sv | 69 ++--- 7 files changed, 955 insertions(+), 920 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 114f668d..34a8927f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -12,6 +12,8 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ### Added ### Changed +- Pipelines are generated in the datapath modules instead of separate instances + ### Fixed - Don't care assignments to structs have been expanded for better tool support [(#14)](https://github.com/pulp-platform/fpnew/pull/14) - Undriven busy signal in output pipeline bypass diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index 2839e540..e21cc368 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_cast_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, @@ -62,7 +64,6 @@ module fpnew_cast_multi #( localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); - localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1; @@ -75,92 +76,102 @@ module fpnew_cast_multi #( // or the number of bits in an integer localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH), fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1; + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise // --------------- // Input pipeline // --------------- - // Pipelined input signals + // Selected pipeline output signals as non-arrays logic [WIDTH-1:0] operands_q; logic [NUM_FORMATS-1:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; logic op_mod_q; fpnew_pkg::fp_format_e src_fmt_q; fpnew_pkg::fp_format_e dst_fmt_q; fpnew_pkg::int_format_e int_fmt_q; - TagType tag_q; - AuxType aux_q; - logic out_valid_input; - logic in_ready_inside; // written by inside pipeline - logic busy_input; - - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : input_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? (NumPipeRegs / 3) // Last to get regs - : NumPipeRegs; - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .NumOperands ( 1 ), - .NumFormats ( NUM_FORMATS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i, - .dst_fmt_i, - .int_fmt_i, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( src_fmt_q ), - .dst_fmt_o ( dst_fmt_q ), - .int_fmt_o ( int_fmt_q ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( out_valid_input ), - .out_ready_i ( in_ready_inside ), - .busy_o ( busy_input ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign in_ready_o = in_ready_inside; - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; - assign src_fmt_q = src_fmt_i; - assign dst_fmt_q = dst_fmt_i; - assign int_fmt_q = int_fmt_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign out_valid_input = in_valid_i; - assign busy_input = 1'b0; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_int_fmt_q[0] = int_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS]; + assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS]; // ----------------- // Input processing // ----------------- logic src_is_int, dst_is_int; // if 0, it's a float - assign src_is_int = (op_q == fpnew_pkg::I2F); - assign dst_is_int = (op_q == fpnew_pkg::F2I); + assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F); + assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I); logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit @@ -169,7 +180,7 @@ module fpnew_cast_multi #( logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa; logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC - fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info_q; + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info; logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val; logic int_sign; @@ -190,16 +201,16 @@ module fpnew_cast_multi #( ) i_fpnew_classifier ( .operands_i ( operands_q[FP_WIDTH-1:0] ), .is_boxed_i ( is_boxed_q[fmt] ), - .info_o ( info_q[fmt] ) + .info_o ( info[fmt] ) ); assign fmt_sign[fmt] = operands_q[FP_WIDTH-1]; assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]}); - assign fmt_mantissa[fmt] = {info_q[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad + assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad // Compensation for the difference in mantissa widths used for leading-zero count assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS); end else begin : inactive_format - assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled @@ -241,7 +252,7 @@ module fpnew_cast_multi #( assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q)); assign src_exp = fmt_exponent[src_fmt_q]; - assign src_subnormal = signed'({1'b0, info_q[src_fmt_q].is_subnormal}); + assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal}); assign src_offset = fmt_shift_compensation[src_fmt_q]; logic input_sign; // input sign @@ -278,112 +289,118 @@ module fpnew_cast_multi #( assign input_exp = src_is_int ? int_input_exp : fp_input_exp; - logic signed [INT_EXP_WIDTH-1:0] destination_exp_d, destination_exp_q; // re-biased exponent for destination - logic signed [INT_EXP_WIDTH-1:0] dst_bias; // dst format bias - assign dst_bias = signed'(fpnew_pkg::bias(dst_fmt_q)); + logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination // Rebias the exponent - assign destination_exp_d = input_exp + dst_bias; + assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q)); // --------------- // Internal pipeline // --------------- - // Pipelined internal signals + // Pipeline output signals as non-arrays logic input_sign_q; logic signed [INT_EXP_WIDTH-1:0] input_exp_q; - logic [INT_MAN_WIDTH-1:0] input_mant_q; + logic [INT_MAN_WIDTH-1:0] input_mant_q; + logic signed [INT_EXP_WIDTH-1:0] destination_exp_q; logic src_is_int_q; logic dst_is_int_q; - fpnew_pkg::fp_info_t info_q2; + fpnew_pkg::fp_info_t info_q; logic mant_is_zero_q; logic op_mod_q2; - fpnew_pkg::roundmode_e rnd_mode_q2; + fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::fp_format_e src_fmt_q2; fpnew_pkg::fp_format_e dst_fmt_q2; fpnew_pkg::int_format_e int_fmt_q2; - TagType tag_q2; - AuxType aux_q2; - logic out_valid_inside; - logic in_ready_output; // written by output pipeline - logic busy_inside; - - // Generate pipeline between mul and add if needed - if (PipeConfig==fpnew_pkg::INSIDE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : inside_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 2) / 3) // First to get regs - : NumPipeRegs; - fpnew_pipe_inside_cast #( - .IntExpWidth ( INT_EXP_WIDTH ), - .IntManWidth ( INT_MAN_WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_inside_pipe ( - .clk_i, - .rst_ni, - .input_sign_i ( input_sign ), - .input_exp_i ( input_exp ), - .destination_exp_i ( destination_exp_d ), - .input_mant_i ( input_mant ), - .src_is_int_i ( src_is_int ), - .dst_is_int_i ( dst_is_int ), - .info_i ( info_q[src_fmt_q] ), - .mant_is_zero_i ( mant_is_zero ), - .op_mod_i ( op_mod_q ), - .rnd_mode_i ( rnd_mode_q ), - .src_fmt_i ( src_fmt_q ), - .dst_fmt_i ( dst_fmt_q ), - .int_fmt_i ( int_fmt_q ), - .tag_i ( tag_q ), - .aux_i ( aux_q ), - .in_valid_i ( out_valid_input ), - .in_ready_o ( in_ready_inside ), - .flush_i, - .input_sign_o ( input_sign_q ), - .input_exp_o ( input_exp_q ), - .destination_exp_o ( destination_exp_q ), - .input_mant_o ( input_mant_q ), - .src_is_int_o ( src_is_int_q ), - .dst_is_int_o ( dst_is_int_q ), - .info_o ( info_q2 ), - .mant_is_zero_o ( mant_is_zero_q ), - .op_mod_o ( op_mod_q2 ), - .rnd_mode_o ( rnd_mode_q2 ), - .src_fmt_o ( src_fmt_q2 ), - .dst_fmt_o ( dst_fmt_q2 ), - .int_fmt_o ( int_fmt_q2 ), - .tag_o ( tag_q2 ), - .aux_o ( aux_q2 ), - .out_valid_o ( out_valid_inside ), - .out_ready_i ( in_ready_output ), - .busy_o ( busy_inside ) - ); - // Otherwise pass through inputs - end else begin : no_inside_pipeline - assign in_ready_inside = in_ready_output; - assign input_sign_q = input_sign; - assign input_exp_q = input_exp; - assign destination_exp_q = destination_exp_d; - assign input_mant_q = input_mant; - assign src_is_int_q = src_is_int; - assign dst_is_int_q = dst_is_int; - assign info_q2 = info_q[src_fmt_q]; - assign mant_is_zero_q = mant_is_zero; - assign op_mod_q2 = op_mod_q; - assign rnd_mode_q2 = rnd_mode_q; - assign src_fmt_q2 = src_fmt_q; - assign dst_fmt_q2 = dst_fmt_q; - assign int_fmt_q2 = int_fmt_q; - assign tag_q2 = tag_q; - assign aux_q2 = aux_q; - assign out_valid_inside = out_valid_input; - assign busy_inside = 1'b0; + // Internal pipeline signals, index i holds signal after i register stages + + + logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; + logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q; + logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q; + logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q; + fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q; + logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q; + logic [0:NUM_MID_REGS] mid_pipe_op_mod_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_input_sign_q[0] = input_sign; + assign mid_pipe_input_exp_q[0] = input_exp; + assign mid_pipe_input_mant_q[0] = input_mant; + assign mid_pipe_dest_exp_q[0] = destination_exp; + assign mid_pipe_src_is_int_q[0] = src_is_int; + assign mid_pipe_dst_is_int_q[0] = dst_is_int; + assign mid_pipe_info_q[0] = info[src_fmt_q]; + assign mid_pipe_mant_zero_q[0] = mant_is_zero; + assign mid_pipe_op_mod_q[0] = op_mod_q; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_src_fmt_q[0] = src_fmt_q; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_int_fmt_q[0] = int_fmt_q; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0) + `FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0) + `FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0) + `FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; + assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS]; + assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS]; + assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS]; + assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS]; + assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS]; + assign info_q = mid_pipe_info_q[NUM_MID_REGS]; + assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS]; + assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS]; // -------- // Casting // -------- - logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments + logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit @@ -425,7 +442,7 @@ module fpnew_cast_multi #( end else begin // Overflow or infinities (for proper rounding) if ((destination_exp_q >= 2**fpnew_pkg::exp_bits(dst_fmt_q2)-1) || - (~src_is_int_q && info_q2.is_inf)) begin + (~src_is_int_q && info_q.is_inf)) begin final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value preshift_mant = '1; // largest normal value and RS bits set of_before_round = 1'b1; @@ -521,7 +538,7 @@ module fpnew_cast_multi #( .abs_value_i ( pre_round_abs ), .sign_i ( input_sign_q ), // source format .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q2 ), + .rnd_mode_i ( rnd_mode_q ), .effective_subtraction_i ( 1'b0 ), // no operation happened .abs_rounded_o ( rounded_abs ), .sign_o ( rounded_sign ), @@ -586,8 +603,7 @@ module fpnew_cast_multi #( if (FpFmtConfig[fmt]) begin : active_format always_comb begin : special_results logic [FP_WIDTH-1:0] special_res; - - special_res = info_q2.is_zero + special_res = info_q.is_zero ? input_sign_q << FP_WIDTH-1 // signed zero : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN @@ -601,12 +617,12 @@ module fpnew_cast_multi #( end // Detect special case from source format, I2F casts don't produce a special result - assign fp_result_is_special = ~src_is_int_q & (info_q2.is_zero | - info_q2.is_nan | - ~info_q2.is_boxed); + assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero | + info_q.is_nan | + ~info_q.is_boxed); // Signalling input NaNs raise invalid flag, otherwise no flags set - assign fp_special_status = '{NV: info_q2.is_signalling, default: 1'b0}; + assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0}; // Assemble result according to destination format assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format @@ -634,7 +650,7 @@ module fpnew_cast_multi #( special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1 // Negative special case (except for nans) tie to -max or 0 - if (input_sign_q && !info_q2.is_nan) + if (input_sign_q && !info_q.is_nan) special_res = ~special_res; // Initialize special result with sign-extension @@ -647,8 +663,8 @@ module fpnew_cast_multi #( end // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) - assign int_result_is_special = info_q2.is_nan | info_q2.is_inf | - of_before_round | ~info_q2.is_boxed | + assign int_result_is_special = info_q.is_nan | info_q.is_inf | + of_before_round | ~info_q.is_boxed | (input_sign_q & op_mod_q2 & ~rounded_int_res_zero); // All integer special cases are invalid @@ -682,7 +698,6 @@ module fpnew_cast_multi #( logic [WIDTH-1:0] result_d; fpnew_pkg::status_t status_d; logic extension_bit; - logic busy_output; // Select output depending on special case detection assign result_d = dst_is_int_q ? int_result : fp_result; @@ -694,52 +709,52 @@ module fpnew_cast_multi #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig==fpnew_pkg::AFTER || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : output_pipline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 1) / 3) // Second to get regs - : NumPipeRegs; - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( extension_bit ), - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( tag_q2 ), - .aux_i ( aux_q2 ), - .in_valid_i ( out_valid_inside ), - .in_ready_o ( in_ready_output ), - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( busy_output ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign in_ready_output = out_ready_i; - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = extension_bit; - assign tag_o = tag_q2; - assign aux_o = aux_q2; - assign out_valid_o = out_valid_inside; - assign busy_output = 1'b0; + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_ext_bit_q[0] = extension_bit; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - - assign busy_o = busy_input | busy_inside | busy_output; - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 382b1a1e..1aed3a55 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -51,72 +51,78 @@ module fpnew_divsqrt_multi #( output logic busy_o ); + // ---------- + // Constants + // ---------- + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [1:0][WIDTH-1:0] operands_q; - logic [NUM_FORMATS-1:0][1:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - fpnew_pkg::fp_format_e dst_fmt_q; - TagType tag_q; - AuxType aux_q; - logic in_valid_q, in_ready_q; - logic pipe_busy; + // Selected pipeline output signals as non-arrays + logic [1:0][WIDTH-1:0] operands_q; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::operation_e op_q; + fpnew_pkg::fp_format_e dst_fmt_q; + logic in_valid_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 2 ), - .NumFormats ( NUM_FORMATS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i ( 1'b0 ), // unused - .src_fmt_i ( fpnew_pkg::FP32 ), // unused - .dst_fmt_i, - .int_fmt_i ( fpnew_pkg::INT8 ), // unused, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( /* unused */ ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( dst_fmt_q ), - .int_fmt_o ( /* unused */ ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( in_valid_q ), - .out_ready_i ( in_ready_q ), - .busy_o ( pipe_busy ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign dst_fmt_q = dst_fmt_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign in_valid_q = in_valid_i; - assign in_ready_o = in_ready_q; + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign op_q = inp_pipe_op_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -158,7 +164,7 @@ module fpnew_divsqrt_multi #( fsm_state_e state_q, state_d; // Upstream ready comes from sanitization FSM - assign in_ready_q = in_ready; + assign inp_pipe_ready[NUM_INP_REGS] = in_ready; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; @@ -237,9 +243,9 @@ module fpnew_divsqrt_multi #( AuxType result_aux_q; // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_is_fp8_q, input_is_fp8, in_valid_q, '0) - `FFL(result_tag_q, tag_q, in_valid_q, '0) - `FFL(result_aux_q, aux_q, in_valid_q, '0) + `FFL(result_is_fp8_q, input_is_fp8, in_valid_q, '0) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], in_valid_q, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], in_valid_q, '0) // ----------------- // DIVSQRT instance @@ -284,48 +290,49 @@ module fpnew_divsqrt_multi #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-box - .tag_i ( result_tag_q ), - .aux_i ( result_aux_q ), - .in_valid_i ( out_valid ), - .in_ready_o ( out_ready ), - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( pipe_busy ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-box - assign tag_o = result_tag_q; - assign aux_o = result_aux_q; - assign out_valid_o = out_valid; - assign out_ready = out_ready_i; - end + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; - // Busy flag - assign busy_o = in_valid_q | unit_busy | pipe_busy; + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_aux_q[0] = result_aux_q; + assign out_pipe_valid_q[0] = out_valid; + // Input stage: Propagate pipeline ready signal to inside pipe + assign out_ready = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index d78f49d2..4ab3b182 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_fma #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, @@ -64,6 +66,22 @@ module fpnew_fma #( localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH); // Shift amount width: maximum internal mantissa size is 3p+3 bits localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise // ---------------- // Type definition @@ -77,71 +95,49 @@ module fpnew_fma #( // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [2:0][WIDTH-1:0] operands_q; - logic [2:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - TagType tag_q; - AuxType aux_q; - logic out_valid_input; - logic in_ready_inside; // written by inside pipeline - logic busy_input; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : input_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? (NumPipeRegs / 3) // Last to get regs - : NumPipeRegs; - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .NumOperands ( 3 ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( /* unused */ ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( out_valid_input ), - .out_ready_i ( in_ready_inside ), - .busy_o ( busy_input ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign in_ready_o = in_ready_inside; - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign out_valid_input = in_valid_i; - assign busy_input = 1'b0; + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // ----------------- @@ -154,9 +150,9 @@ module fpnew_fma #( .FpFormat ( FpFormat ), .NumOperands ( 3 ) ) i_class_inputs ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_q ) + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) ); fp_t operand_a, operand_b, operand_c; @@ -177,17 +173,17 @@ module fpnew_fma #( always_comb begin : op_select // Default assignments - packing-order-agnostic - operand_a = operands_q[0]; - operand_b = operands_q[1]; - operand_c = operands_q[2]; + operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + operand_c = inp_pipe_operands_q[NUM_INP_REGS][2]; info_a = info_q[0]; info_b = info_q[1]; info_c = info_q[2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ op_mod_q; + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; - unique case (op_q) + unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 @@ -357,121 +353,121 @@ module fpnew_fma #( assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; assign inject_carry_in = effective_subtraction & ~sticky_before_add; - // --------------- - // Internal pipeline - // --------------- - // Pipelined internal signals - logic effective_subtraction_q; - logic signed [EXP_WIDTH-1:0] exponent_product_q; - logic signed [EXP_WIDTH-1:0] exponent_difference_q; - logic signed [EXP_WIDTH-1:0] tentative_exponent_q; - logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; - logic sticky_before_add_q; - fpnew_pkg::roundmode_e rnd_mode_q2; - fp_t special_result_q; - fpnew_pkg::status_t special_status_q; - logic result_is_special_q; - TagType tag_q2; - AuxType aux_q2; - logic out_valid_inside; - logic in_ready_output; // written by output pipeline - logic busy_inside; - // ------ // Adder // ------ logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry logic sum_carry; // observe carry bit from sum for sign fixing - logic [3*PRECISION_BITS+3:0] sum_d, sum_q; // discard carry as sum won't overflow - logic final_sign_d, final_sign_q; + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; //Mantissa adder (ab+c). In normal addition, it cannot overflow. assign sum_raw = product_shifted + addend_shifted + inject_carry_in; assign sum_carry = sum_raw[3*PRECISION_BITS+4]; - // Complement negative sum (can only happen in subtraction -> overflows for positive results) - assign sum_d = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; // In case of a mispredicted subtraction result, do a sign flip - assign final_sign_d = (effective_subtraction && (sum_carry == tentative_sign)) + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) ? 1'b1 : (effective_subtraction ? 1'b0 : tentative_sign); - // Generate pipeline between mul and add if needed - if (PipeConfig==fpnew_pkg::INSIDE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : inside_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 2) / 3) // First to get regs - : NumPipeRegs; - - fpnew_pipe_inside_fma #( - .ExpWidth ( EXP_WIDTH ), - .PrecBits ( PRECISION_BITS ), - .NumPipeRegs ( NUM_REGS ), - .FpType ( fp_t ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_inside_pipe ( - .clk_i, - .rst_ni, - .effective_subtraction_i ( effective_subtraction ), - .final_sign_i ( final_sign_d ), - .exponent_product_i ( exponent_product ), - .exponent_difference_i ( exponent_difference ), - .tentative_exponent_i ( tentative_exponent ), - .addend_shamt_i ( addend_shamt ), - .sticky_before_add_i ( sticky_before_add ), - .sum_i ( sum_d ), - .rnd_mode_i ( rnd_mode_q ), - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .result_is_special_i ( result_is_special ), - .special_result_i ( special_result ), - .special_status_i ( special_status ), - .tag_i ( tag_q ), - .aux_i ( aux_q ), - .in_valid_i ( out_valid_input ), - .in_ready_o ( in_ready_inside ), - .flush_i, - .effective_subtraction_o ( effective_subtraction_q ), - .final_sign_o ( final_sign_q ), - .exponent_product_o ( exponent_product_q ), - .exponent_difference_o ( exponent_difference_q ), - .tentative_exponent_o ( tentative_exponent_q ), - .addend_shamt_o ( addend_shamt_q ), - .sticky_before_add_o ( sticky_before_add_q ), - .sum_o ( sum_q ), - .rnd_mode_o ( rnd_mode_q2 ), - .dst_fmt_o ( /* unused */ ), - .result_is_special_o ( result_is_special_q ), - .special_result_o ( special_result_q ), - .special_status_o ( special_status_q ), - .tag_o ( tag_q2 ), - .aux_o ( aux_q2 ), - .out_valid_o ( out_valid_inside ), - .out_ready_i ( in_ready_output ), - .busy_o ( busy_inside ) - ); - - // Otherwise pass through inputs - end else begin : no_inside_pipeline - assign in_ready_inside = in_ready_output; - assign effective_subtraction_q = effective_subtraction; - assign final_sign_q = final_sign_d; - assign exponent_product_q = exponent_product; - assign exponent_difference_q = exponent_difference; - assign tentative_exponent_q = tentative_exponent; - assign addend_shamt_q = addend_shamt; - assign sticky_before_add_q = sticky_before_add; - assign rnd_mode_q2 = rnd_mode_q; - assign result_is_special_q = result_is_special; - assign special_result_q = special_result; - assign special_status_q = special_status; - assign tag_q2 = tag_q; - assign aux_q2 = aux_q; - assign out_valid_inside = out_valid_input; - assign busy_inside = 1'b0; - assign sum_q = sum_d; + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; // -------------- // Normalization @@ -593,7 +589,7 @@ module fpnew_fma #( .abs_value_i ( pre_round_abs ), .sign_i ( pre_round_sign ), .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q2 ), + .rnd_mode_i ( rnd_mode_q ), .effective_subtraction_i ( effective_subtraction_q ), .abs_rounded_o ( rounded_abs ), .sign_o ( rounded_sign ), @@ -621,7 +617,6 @@ module fpnew_fma #( // Final results for output pipeline fp_t result_d; fpnew_pkg::status_t status_d; - logic busy_output; // Select output depending on special case detection assign result_d = result_is_special_q ? special_result_q : regular_result; @@ -630,52 +625,49 @@ module fpnew_fma #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig==fpnew_pkg::AFTER || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : output_pipline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 1) / 3) // Second to get regs - : NumPipeRegs; - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( tag_q2 ), - .aux_i ( aux_q2 ), - .in_valid_i ( out_valid_inside ), - .in_ready_o ( in_ready_output ), - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( busy_output ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign in_ready_output = out_ready_i; - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = tag_q2; - assign aux_o = aux_q2; - assign out_valid_o = out_valid_inside; - assign busy_output = 1'b0; + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - - assign busy_o = busy_input | busy_inside | busy_output; - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index 5c8a40f9..6b52237f 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_fma_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, @@ -70,6 +72,22 @@ module fpnew_fma_multi #( localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); // Shift amount width: maximum internal mantissa size is 3p+3 bits localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise // ---------------- // Type definition @@ -83,77 +101,65 @@ module fpnew_fma_multi #( // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [2:0][WIDTH-1:0] operands_q; - logic [NUM_FORMATS-1:0][2:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - fpnew_pkg::fp_format_e src_fmt_q; - fpnew_pkg::fp_format_e dst_fmt_q; - TagType tag_q; - AuxType aux_q; - logic out_valid_input; - logic in_ready_inside; // written by inside pipeline - logic busy_input; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : input_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? (NumPipeRegs / 3) // Last to get regs - : NumPipeRegs; - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .NumOperands ( 3 ), - .NumFormats ( NUM_FORMATS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i, - .dst_fmt_i, - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( src_fmt_q ), - .dst_fmt_o ( dst_fmt_q ), - .int_fmt_o ( /* unused */ ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( out_valid_input ), - .out_ready_i ( in_ready_inside ), - .busy_o ( busy_input ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign in_ready_o = in_ready_inside; - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; - assign src_fmt_q = src_fmt_i; - assign dst_fmt_q = dst_fmt_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign out_valid_input = in_valid_i; - assign busy_input = 1'b0; + // Selected pipeline output signals as non-arrays + logic [2:0][WIDTH-1:0] operands_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -162,9 +168,6 @@ module fpnew_fma_multi #( logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent; logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa; - logic [2:0][WIDTH-1:0] input_operands; - - fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q; // FP Input initialization @@ -182,9 +185,9 @@ module fpnew_fma_multi #( .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), .NumOperands ( 3 ) ) i_fpnew_classifier ( - .operands_i ( trimmed_ops ), - .is_boxed_i ( is_boxed_q[fmt] ), - .info_o ( info_q[fmt] ) + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ), + .info_o ( info_q[fmt] ) ); for (genvar op = 0; op < 3; op++) begin : gen_operands assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0]; @@ -227,9 +230,9 @@ module fpnew_fma_multi #( info_c = info_q[dst_fmt_q][2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ op_mod_q; + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; - unique case (op_q) + unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 @@ -431,127 +434,126 @@ module fpnew_fma_multi #( assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; assign inject_carry_in = effective_subtraction & ~sticky_before_add; - // --------------- - // Internal pipeline - // --------------- - // Pipelined internal signals - logic effective_subtraction_q; - logic signed [EXP_WIDTH-1:0] exponent_product_q; - logic signed [EXP_WIDTH-1:0] exponent_difference_q; - logic signed [EXP_WIDTH-1:0] tentative_exponent_q; - logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; - logic sticky_before_add_q; - logic [3*PRECISION_BITS+3:0] product_shifted_q; - logic [3*PRECISION_BITS+3:0] addend_shifted_q; - logic inject_carry_in_q; - fpnew_pkg::roundmode_e rnd_mode_q2; - fpnew_pkg::fp_format_e dst_fmt_q2; - fp_t special_result_q; - fpnew_pkg::status_t special_status_q; - logic result_is_special_q; - TagType tag_q2; - AuxType aux_q2; - logic out_valid_inside; - logic in_ready_output; // written by output pipeline - logic busy_inside; - // ------ // Adder // ------ logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry logic sum_carry; // observe carry bit from sum for sign fixing - logic [3*PRECISION_BITS+3:0] sum_d, sum_q; // discard carry as sum won't overflow - logic final_sign_d, final_sign_q; + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; //Mantissa adder (ab+c). In normal addition, it cannot overflow. assign sum_raw = product_shifted + addend_shifted + inject_carry_in; assign sum_carry = sum_raw[3*PRECISION_BITS+4]; - // Complement negative sum (can only happen in subtraction -> overflows for positive results) - assign sum_d = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; // In case of a mispredicted subtraction result, do a sign flip - assign final_sign_d = (effective_subtraction && (sum_carry == tentative_sign)) + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) ? 1'b1 : (effective_subtraction ? 1'b0 : tentative_sign); - - // Generate pipeline between mul and add if needed - if (PipeConfig==fpnew_pkg::INSIDE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : inside_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 2) / 3) // First to get regs - : NumPipeRegs; - - fpnew_pipe_inside_fma #( - .ExpWidth ( EXP_WIDTH ), - .PrecBits ( PRECISION_BITS ), - .NumPipeRegs ( NUM_REGS ), - .FpType ( fp_t ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_inside_pipe ( - .clk_i, - .rst_ni, - .effective_subtraction_i ( effective_subtraction ), - .final_sign_i ( final_sign_d ), - .exponent_product_i ( exponent_product ), - .exponent_difference_i ( exponent_difference ), - .tentative_exponent_i ( tentative_exponent ), - .addend_shamt_i ( addend_shamt ), - .sticky_before_add_i ( sticky_before_add ), - .sum_i ( sum_d ), - .rnd_mode_i ( rnd_mode_q ), - .dst_fmt_i ( dst_fmt_q ), // unused - .result_is_special_i ( result_is_special ), - .special_result_i ( special_result ), - .special_status_i ( special_status ), - .tag_i ( tag_q ), - .aux_i ( aux_q ), - .in_valid_i ( out_valid_input ), - .in_ready_o ( in_ready_inside ), - .flush_i, - .effective_subtraction_o ( effective_subtraction_q ), - .final_sign_o ( final_sign_q ), - .exponent_product_o ( exponent_product_q ), - .exponent_difference_o ( exponent_difference_q ), - .tentative_exponent_o ( tentative_exponent_q ), - .addend_shamt_o ( addend_shamt_q ), - .sticky_before_add_o ( sticky_before_add_q ), - .sum_o ( sum_q ), - .rnd_mode_o ( rnd_mode_q2 ), - .dst_fmt_o ( dst_fmt_q2 ), - .result_is_special_o ( result_is_special_q ), - .special_result_o ( special_result_q ), - .special_status_o ( special_status_q ), - .tag_o ( tag_q2 ), - .aux_o ( aux_q2 ), - .out_valid_o ( out_valid_inside ), - .out_ready_i ( in_ready_output ), - .busy_o ( busy_inside ) - ); - - // Otherwise pass through inputs - end else begin : no_inside_pipeline - assign in_ready_inside = in_ready_output; - assign effective_subtraction_q = effective_subtraction; - assign final_sign_q = final_sign_d; - assign exponent_product_q = exponent_product; - assign exponent_difference_q = exponent_difference; - assign tentative_exponent_q = tentative_exponent; - assign addend_shamt_q = addend_shamt; - assign sticky_before_add_q = sticky_before_add; - assign rnd_mode_q2 = rnd_mode_q; - assign dst_fmt_q2 = dst_fmt_q; - assign result_is_special_q = result_is_special; - assign special_result_q = special_result; - assign special_status_q = special_status; - assign tag_q2 = tag_q; - assign aux_q2 = aux_q; - assign out_valid_inside = out_valid_input; - assign busy_inside = 1'b0; - assign sum_q = sum_d; + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; // -------------- // Normalization @@ -708,7 +710,7 @@ module fpnew_fma_multi #( .abs_value_i ( pre_round_abs ), .sign_i ( pre_round_sign ), .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q2 ), + .rnd_mode_i ( rnd_mode_q ), .effective_subtraction_i ( effective_subtraction_q ), .abs_rounded_o ( rounded_abs ), .sign_o ( rounded_sign ), @@ -762,7 +764,6 @@ module fpnew_fma_multi #( // Final results for output pipeline logic [WIDTH-1:0] result_d; fpnew_pkg::status_t status_d; - logic busy_output; // Select output depending on special case detection assign result_d = result_is_special_q ? special_result_q : regular_result; @@ -771,52 +772,49 @@ module fpnew_fma_multi #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig==fpnew_pkg::AFTER || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : output_pipline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 1) / 3) // Second to get regs - : NumPipeRegs; - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( tag_q2 ), - .aux_i ( aux_q2 ), - .in_valid_i ( out_valid_inside ), - .in_ready_o ( in_ready_output ), - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( busy_output ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign in_ready_output = out_ready_i; - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = tag_q2; - assign aux_o = aux_q2; - assign out_valid_o = out_valid_inside; - assign busy_output = 1'b0; + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - - assign busy_o = busy_input | busy_inside | busy_output; - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index af23b8c8..9e485f9e 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_noncomp #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, @@ -54,6 +56,17 @@ module fpnew_noncomp #( // ---------- localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise // ---------------- // Type definition @@ -67,57 +80,49 @@ module fpnew_noncomp #( // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [1:0][WIDTH-1:0] operands_q; - logic [1:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 2 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // --------------------- @@ -130,17 +135,17 @@ module fpnew_noncomp #( .FpFormat ( FpFormat ), .NumOperands ( 2 ) ) i_class_a ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_q ) + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) ); fp_t operand_a, operand_b; fpnew_pkg::fp_info_t info_a, info_b; - // Packing-order-agnostic assignments - assign operand_a = operands_q[0]; - assign operand_b = operands_q[1]; + // Packing-order-agnostic assignments + assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; assign info_a = info_q[0]; assign info_b = info_q[1]; @@ -182,7 +187,7 @@ module fpnew_noncomp #( sign_b = operand_b.sign & info_b.is_boxed; // Do the sign injection based on rm field - unique case (rnd_mode_q) + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX @@ -193,8 +198,8 @@ module fpnew_noncomp #( assign sgnj_status = '0; // sign injections never raise exceptions - // op_mod_q enables sign-extension of result (for storing to integer regfile) - assign sgnj_extension_bit = op_mod_q ? sgnj_result.sign : 1'b1; // NaN-box regular float results + // op_mod_q enables integer sign-extension of result (for storing to integer regfile) + assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1; // ------------------ // Minimum / Maximum @@ -220,7 +225,7 @@ module fpnew_noncomp #( else if (info_b.is_nan) minmax_result = operand_a; // Otherwise decide according to the operation else begin - unique case (rnd_mode_q) + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care @@ -249,18 +254,18 @@ module fpnew_noncomp #( if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation // Otherwise do comparisons else begin - unique case (rnd_mode_q) + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) fpnew_pkg::RNE: begin // Less than or equal if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid - else cmp_result = (operand_a_smaller | operands_equal) ^ op_mod_q; + else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; end fpnew_pkg::RTZ: begin // Less than if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid - else cmp_result = (operand_a_smaller & ~operands_equal) ^ op_mod_q; // -0 = +0, not less + else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; end fpnew_pkg::RDN: begin // Equal - if (any_operand_nan) cmp_result = op_mod_q; // NaNs are valid, always campare as not equal - else cmp_result = operands_equal ^ op_mod_q; + if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal + else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS]; end default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care endcase @@ -306,7 +311,7 @@ module fpnew_noncomp #( // Select result always_comb begin : select_result - unique case (op_q) + unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::SGNJ: begin result_d = sgnj_result; status_d = sgnj_status; @@ -335,48 +340,65 @@ module fpnew_noncomp #( endcase end - assign is_class_d = (op_q == fpnew_pkg::CLASSIFY); + assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY); // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( extension_bit_d ), - .class_mask_i ( class_mask_d ), - .is_class_i ( is_class_d ), - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o, - .is_class_o, - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = extension_bit_d; - assign class_mask_o = class_mask_d; - assign is_class_o = is_class_d; + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; + fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; + logic [0:NUM_OUT_REGS] out_pipe_is_class_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_extension_bit_q[0] = extension_bit_d; + assign out_pipe_class_mask_q[0] = class_mask_d; + assign out_pipe_is_class_q[0] = is_class_d; + assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) + `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) + `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; + assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; + assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 597e9dc0..6719e837 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_opgroup_multifmt_slice #( parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, parameter int unsigned Width = 64, @@ -340,43 +342,40 @@ module fpnew_opgroup_multifmt_slice #( // Bypass lanes with target operand for vectorial casts if (OpGroup == fpnew_pkg::CONV) begin : target_regs - logic in_valid, out_ready; - - assign in_valid = in_valid_i & vectorial_op; - // instantiate pipe - fpnew_pipe_out #( - .Width ( Width ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( logic ), - .AuxType ( logic [2:0] ) - ) target_pipe ( - .clk_i, - .rst_ni, - .result_i ( conv_target_d ), - .status_i ( '0 ), // unused - .extension_bit_i ( 1'b0 ), // unused - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( 1'b0 ), // unused - .aux_i ( target_aux_d ), - .in_valid_i ( in_valid ), - .in_ready_o ( /* unused */ ), - .flush_i, - .result_o ( conv_target_q ), - .status_o ( /* unused */ ), - .extension_bit_o ( /* unused */ ), - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o ( /* unused */ ), - .aux_o ( target_aux_q ), - .out_valid_o ( /* unused */ ), - .out_ready_i ( out_ready ), - .busy_o ( /* unused */ ) - ); - assign out_ready = out_ready_i & result_is_vector; + // Bypass pipeline signals, index i holds signal after i register stages + logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; + logic [0:NumPipeRegs][2:0] byp_pipe_aux_q; + logic [0:NumPipeRegs] byp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] byp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign byp_pipe_target_q[0] = conv_target_d; + assign byp_pipe_aux_q[0] = target_aux_d; + assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) + `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs + assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; // decode the aux data - assign {result_vec_op, result_is_cpk} = target_aux_q; + assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs]; end else begin : no_conv assign {result_vec_op, result_is_cpk} = '0; end From 614d14b83413c35f6c882168c780e9d0829e762c Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 28 Jun 2019 20:23:58 +0200 Subject: [PATCH 10/13] :fire: Remove currently unused modules The following modules are being removed since they are currently not needed: - `fpnew_f2fcast` - `fpnew_f2icast` - `fpnew_i2fcast` - `fpnew_pipe_in` - `fpnew_pipe_inside_cast` - `fpnew_pipe_inside_fma` - `fpnew_pipe_out` They may be added back at a later point if they become used again. --- Bender.yml | 4 - docs/CHANGELOG.md | 3 + src/fpnew_f2fcast.sv | 374 ---------------------------------- src/fpnew_f2icast.sv | 356 -------------------------------- src/fpnew_i2fcast.sv | 322 ----------------------------- src/fpnew_pipe_in.sv | 187 ----------------- src/fpnew_pipe_inside_cast.sv | 226 -------------------- src/fpnew_pipe_inside_fma.sv | 233 --------------------- src/fpnew_pipe_out.sv | 159 --------------- src_files.yml | 4 - 10 files changed, 3 insertions(+), 1865 deletions(-) delete mode 100644 src/fpnew_f2fcast.sv delete mode 100644 src/fpnew_f2icast.sv delete mode 100644 src/fpnew_i2fcast.sv delete mode 100644 src/fpnew_pipe_in.sv delete mode 100644 src/fpnew_pipe_inside_cast.sv delete mode 100644 src/fpnew_pipe_inside_fma.sv delete mode 100644 src/fpnew_pipe_out.sv diff --git a/Bender.yml b/Bender.yml index e084d231..3dbe79e4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -17,9 +17,5 @@ sources: - src/fpnew_opgroup_block.sv - src/fpnew_opgroup_fmt_slice.sv - src/fpnew_opgroup_multifmt_slice.sv - - src/fpnew_pipe_in.sv - - src/fpnew_pipe_out.sv - - src/fpnew_pipe_inside_fma.sv - - src/fpnew_pipe_inside_cast.sv - src/fpnew_rounding.sv - src/fpnew_top.sv diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 34a8927f..a2261bcf 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -22,6 +22,9 @@ Versions of the IP in the same major relase are "pin-compatible" with each other - Potential simulation/synthesis mismatch of the UF flag - [Bender] Fixed dependencies for Bender [(#14)](https://github.com/pulp-platform/fpnew/pull/15) +### Removed +- Currently unused modules: `fpnew_pipe*`, `fpnew_{f2i,f2f,i2f}_cast` + ## [0.5.6] - 2019-06-12 ### Changed diff --git a/src/fpnew_f2fcast.sv b/src/fpnew_f2fcast.sv deleted file mode 100644 index 975b23de..00000000 --- a/src/fpnew_f2fcast.sv +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -module fpnew_f2fcast #( - parameter fpnew_pkg::fp_format_e SrcFpFormat = fpnew_pkg::fp_format_e'(0), - parameter fpnew_pkg::fp_format_e DstFpFormat = fpnew_pkg::fp_format_e'(0), - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, - // Do not change - localparam int unsigned SRC_WIDTH = fpnew_pkg::fp_width(SrcFpFormat), - localparam int unsigned DST_WIDTH = fpnew_pkg::fp_width(DstFpFormat) -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [SRC_WIDTH-1:0] operands_i, // 1 operand - input logic is_boxed_i, // 1 operand - input fpnew_pkg::roundmode_e rnd_mode_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [DST_WIDTH-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output TagType tag_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o -); - - // ---------- - // Constants - // ---------- - localparam int SRC_EXP_BITS = fpnew_pkg::exp_bits(SrcFpFormat); - localparam int SRC_MAN_BITS = fpnew_pkg::man_bits(SrcFpFormat); - localparam int SRC_BIAS = fpnew_pkg::bias(SrcFpFormat); - localparam int DST_EXP_BITS = fpnew_pkg::exp_bits(DstFpFormat); - localparam int DST_MAN_BITS = fpnew_pkg::man_bits(DstFpFormat); - localparam int DST_BIAS = fpnew_pkg::bias(DstFpFormat); - - // If needed, there will be a LZC for renormalization - localparam int unsigned LZC_RESULT_WIDTH = $clog2(SRC_MAN_BITS + 1); - // The wider of both exponent widths - localparam int unsigned SUPER_EXP_BITS = fpnew_pkg::maximum(SRC_EXP_BITS, DST_EXP_BITS); - // The internal exponent must be able to represent the smallest denormal input value as signed - localparam int unsigned INT_EXP_WIDTH = - fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SRC_BIAS + SRC_MAN_BITS)) + 1; // +1 for signed - // The wider of both mantissa widhts, includes normal bit - localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SRC_MAN_BITS, DST_MAN_BITS) + 1; - - // ---------------- - // Type definition - // ---------------- - typedef struct packed { - logic sign; - logic [SRC_EXP_BITS-1:0] exponent; - logic [SRC_MAN_BITS-1:0] mantissa; - } src_fp_t; - - typedef struct packed { - logic sign; - logic [DST_EXP_BITS-1:0] exponent; - logic [DST_MAN_BITS-1:0] mantissa; - } dst_fp_t; - - // --------------- - // Input pipeline - // --------------- - // Pipelined input signals - logic [SRC_WIDTH-1:0] operands_q; - logic is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( SRC_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 1 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i ( fpnew_pkg::FMADD ), // unused - .op_mod_i ( 1'b0 ), // unused - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( /* unused */ ), - .op_mod_o ( /* unused */ ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - end - - // ----------------- - // Input processing - // ----------------- - src_fp_t operand_a; - fpnew_pkg::fp_info_t info_a; - - logic signed [SRC_EXP_BITS:0] encoded_exp; // biased encoded exponent - logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent - logic [SRC_MAN_BITS:0] encoded_mant; // as encoded, includes normal bit - logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa - - assign operand_a = operands_q; - - // Classify input - fpnew_classifier #( - .FpFormat ( SrcFpFormat ), - .NumOperands ( 1 ) - ) i_class_a ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_a ) - ); - - assign encoded_mant = {info_a.is_normal, operand_a.mantissa}; - assign encoded_exp = signed'({1'b0, operand_a.exponent}); - - // In case of growing exponent size, denormal values need to be normalized - if (DST_EXP_BITS > SRC_EXP_BITS) begin : renormalize_mantissa - logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount - logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations - - // Leading-zero counter is needed for renormalization - lzc #( - .WIDTH ( SRC_MAN_BITS + 1 ), - .MODE ( 1 ) // MODE = 1 counts leading zeroes - ) i_lzc ( - .in_i ( encoded_mant ), - .cnt_o ( renorm_shamt ), - .empty_o ( /* unused */ ) - ); - - assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt}); - - // Realign input mantissa, append zeroes if destination is wider - assign input_mant = {>> {encoded_mant << renorm_shamt, '0}}; - // Unbias exponent and compensate for shift - assign input_exp = signed'(encoded_exp - renorm_shamt_sgn + info_a.is_subnormal - SRC_BIAS); - // Otherwise we leave the denormals be because they cannot become normal - end else begin : normalized_mantissa - assign input_mant = encoded_mant; - assign input_exp = signed'(encoded_exp + info_a.is_subnormal - SRC_BIAS); - end - - // ---------------------- - // Special case handling - // ---------------------- - dst_fp_t special_result; - fpnew_pkg::status_t special_status; - logic result_is_special; - - - // We handle zero and NaN inputs separately - assign result_is_special = info_a.is_zero | info_a.is_nan | ~info_a.is_boxed; - - // Signalling NaNs raise invalid flag, otherwise no flags set - assign special_status = '{NV: info_a.is_signalling, default: 1'b0}; - - // Assemble result according to special case - assign special_result = info_a.is_zero - ? '{sign: operand_a.sign, exponent: '0, mantissa: '0} // signed zero - : '{sign: 1'b0, exponent: '1, mantissa: 2**(DST_MAN_BITS-1)}; // qNaN - - // -------- - // Casting - // -------- - logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination - logic [DST_EXP_BITS-1:0] final_exp; // after eventual adjustments - - logic [INT_MAN_WIDTH+DST_MAN_BITS:0] preshift_mant; // mantissa before final shift - logic [INT_MAN_WIDTH+DST_MAN_BITS:0] destination_mant; // mantissa from shifter, with rnd bit - logic [DST_MAN_BITS-1:0] final_mant; // mantissa after adjustments - - logic [$clog2(DST_MAN_BITS+1)-1:0] denorm_shamt; // shift amount for denormalization - - logic [1:0] round_sticky_bits; - logic of_before_round, uf_before_round; - - // Rebias the exponent - assign destination_exp = signed'(input_exp + DST_BIAS); - - // Perform adjustments to mantissa and exponent - always_comb begin : cast_value - // Default assignment - final_exp = unsigned'(destination_exp); // take exponent as is, only look at lower bits - preshift_mant = '0; // initialize mantissa container with zeroes - denorm_shamt = '0; - of_before_round = 1'b0; - uf_before_round = 1'b0; - - // Place mantissa to the left of the shifter - preshift_mant = {>> {input_mant, '0}}; - - // Handle overflows or infinities (for proper rounding) - if ((destination_exp >= 2**DST_EXP_BITS-1) || info_a.is_inf) begin - final_exp = unsigned'(2**DST_EXP_BITS-2); // largest normal value - preshift_mant = '1; // largest normal value and RS bits set - of_before_round = 1'b1; - // In case the destination exponent is smaller, we need to denormalize the underflow - end else if (DST_EXP_BITS < SRC_EXP_BITS) begin // STATIC - // Denormalize underflowing values - if (destination_exp < 1 && destination_exp >= -DST_MAN_BITS) begin - final_exp = '0; // denormal result - denorm_shamt = unsigned'(1 - destination_exp); // adjust mantissa by right shifting - uf_before_round = 1'b1; - // Limit the shift to retain sticky bits - end else if (destination_exp < -signed'(DST_MAN_BITS)) begin - final_exp = '0; // denormal result - denorm_shamt = unsigned'(1 + DST_MAN_BITS); // shift mantissa into sticky bits - uf_before_round = 1'b1; - end - // Otherwise, only previously denormal values can be denormal with exponent 0 - end else if (destination_exp == 0) begin - denorm_shamt = 1; // the shifter becomes a trivial 2-input mux - uf_before_round = 1'b1; - end - end - - // Mantissa adjustment shift - assign destination_mant = preshift_mant >> denorm_shamt; - // Extract final mantissa and round bit, discard the normal bit - assign {final_mant, round_sticky_bits[1]} = - destination_mant[INT_MAN_WIDTH+DST_MAN_BITS-1:INT_MAN_WIDTH-1]; - // Collapse sticky bits - assign round_sticky_bits[0] = (| {destination_mant[INT_MAN_WIDTH-2:0]}); // unused bits are sticky - - // ---------------------------- - // Rounding and classification - // ---------------------------- - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding - - logic of_after_round; // overflow - logic uf_after_round; // underflow - - logic rounded_sign; - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding - - assign pre_round_abs = {final_exp, final_mant}; - - // Rounding is only needed if exponent size or mantissa became smaller - if ((DST_MAN_BITS < SRC_MAN_BITS) || (DST_EXP_BITS < SRC_EXP_BITS)) begin : gen_rounding - - fpnew_rounding #( - .AbsWidth ( DST_WIDTH - 1 ) - ) i_fpnew_rounding ( - .abs_value_i ( pre_round_abs ), - .sign_i ( operand_a.sign ), - .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q ), - .effective_subtraction_i ( 1'b0 ), // no operation happened - .abs_rounded_o ( rounded_abs ), - .sign_o ( rounded_sign ), - .exact_zero_o ( /* unused */ ) - ); - - end else begin : no_rounding - assign rounded_abs = pre_round_abs; - assign rounded_sign = operand_a.sign; - end - - // Classification after rounding - assign uf_after_round = rounded_abs[DST_EXP_BITS+DST_MAN_BITS-1:DST_MAN_BITS] == '0; // denormal - assign of_after_round = rounded_abs[DST_EXP_BITS+DST_MAN_BITS-1:DST_MAN_BITS] == '1; // inf exp. - - // ----------------- - // Result selection - // ----------------- - logic [DST_WIDTH-1:0] regular_result; - fpnew_pkg::status_t regular_status; - - // Assemble regular result - assign regular_result = {rounded_sign, rounded_abs}; - assign regular_status = '{ - NV: 1'b0, // only valid cases are handled in regular path - DZ: 1'b0, // no divisions - OF: ~info_a.is_inf & (of_before_round | of_after_round), // rounding can introduce new overflow - UF: uf_after_round & regular_status.NX, // only inexact results raise UF - NX: (| round_sticky_bits) | (~info_a.is_inf & (of_before_round | of_after_round)) - }; - - // Final results for output pipeline - logic [DST_WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - - // Select output depending on special case detection - assign result_d = result_is_special ? special_result : regular_result; - assign status_d = result_is_special ? special_status : regular_status; - - // ---------------- - // Output Pipeline - // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( DST_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - end - -endmodule diff --git a/src/fpnew_f2icast.sv b/src/fpnew_f2icast.sv deleted file mode 100644 index 23b6a0a1..00000000 --- a/src/fpnew_f2icast.sv +++ /dev/null @@ -1,356 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -module fpnew_f2icast #( - parameter fpnew_pkg::fp_format_e SrcFpFormat = fpnew_pkg::fp_format_e'(0), - parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '{default: 1'b1}, - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, - // Do not change - localparam int unsigned SRC_WIDTH = fpnew_pkg::fp_width(SrcFpFormat), - localparam int unsigned DST_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig) -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [SRC_WIDTH-1:0] operands_i, // 1 operand - input logic is_boxed_i, // 1 operand - input fpnew_pkg::roundmode_e rnd_mode_i, - input logic op_mod_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [DST_WIDTH-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output TagType tag_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o -); - - // ---------- - // Constants - // ---------- - localparam int SRC_EXP_BITS = fpnew_pkg::exp_bits(SrcFpFormat); - localparam int SRC_MAN_BITS = fpnew_pkg::man_bits(SrcFpFormat); - localparam int SRC_BIAS = fpnew_pkg::bias(SrcFpFormat); - - // The internal exponent is signed - localparam int unsigned INT_EXP_WIDTH = SRC_EXP_BITS + 1; // +1 for signed - // The internal mantissa contains the normal bit - localparam int unsigned INT_MAN_WIDTH = SRC_MAN_BITS + 1; - - // ---------------- - // Type definition - // ---------------- - typedef struct packed { - logic sign; - logic [SRC_EXP_BITS-1:0] exponent; - logic [SRC_MAN_BITS-1:0] mantissa; - } src_fp_t; - - // --------------- - // Input pipeline - // --------------- - // Pipelined input signals - logic [SRC_WIDTH-1:0] operands_q; - logic is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - logic op_mod_q; - fpnew_pkg::int_format_e int_fmt_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( SRC_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 1 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i ( fpnew_pkg::FMADD ), // unused - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( /* unused */ ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( int_fmt_q ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_mod_q = op_mod_i; - assign int_fmt_q = int_fmt_i; - end - - // ----------------- - // Input processing - // ----------------- - src_fp_t operand_a; - fpnew_pkg::fp_info_t info_a; - - logic signed [SRC_EXP_BITS:0] encoded_exp; // biased encoded exponent - logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent - logic [SRC_MAN_BITS:0] encoded_mant; // as encoded, includes normal bit - - assign operand_a = operands_q; - - // Classify input - fpnew_classifier #( - .FpFormat ( SrcFpFormat ), - .NumOperands ( 1 ) - ) i_class_a ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_a ) - ); - - assign encoded_mant = {1'b1, operand_a.mantissa}; // we don't care about denormals - assign encoded_exp = signed'({1'b0, operand_a.exponent}); - - assign input_exp = signed'(encoded_exp - SRC_BIAS); // Only handle normal cases - - // -------- - // Casting - // -------- - logic [INT_MAN_WIDTH+DST_WIDTH:0] preshift_mant; // mantissa before final shift with rnd bit - logic [INT_MAN_WIDTH+DST_WIDTH:0] destination_mant; // mantissa from shifter with rnd bit - logic [DST_WIDTH-1:0] final_mant; // final integer value after adjustments - - logic [$clog2(DST_WIDTH+1)-1:0] mant_shamt; // shift amount for mantissa - - logic [1:0] round_sticky_bits; - logic of_before_round, uf_before_round; - - // Perform adjustments to mantissa and exponent - always_comb begin : cast_value - // Default assignment - preshift_mant = '0; // initialize mantissa container with zeroes - // Mantissa with implicit bit can be right shifted to represent integer value - mant_shamt = unsigned'(DST_WIDTH - 1 - input_exp); - of_before_round = 1'b0; - uf_before_round = 1'b0; - - // Place mantissa to the left of the shifter - preshift_mant[INT_MAN_WIDTH+DST_WIDTH:DST_WIDTH+1] = encoded_mant; - - // Detect overflows. Range for conversions to unsigned is larger by one. - if (input_exp >= signed'(fpnew_pkg::int_width(int_fmt_q) - 1 + op_mod_q)) begin - mant_shamt = '0; // prevent shifting - of_before_round = 1'b1; - // Handle underflows: all bits to the sticky. - end else if (input_exp < -1) begin - mant_shamt = DST_WIDTH + 1; // Limit shift range to - uf_before_round = 1'b1; - end - end - - // Mantissa shift - assign destination_mant = preshift_mant >> mant_shamt; - - // Extract final mantissa and round/sticky bits - always_comb begin : assemble_result - logic [DST_WIDTH-1:0] fmt_mant[fpnew_pkg::NUM_INT_FORMATS]; // integer value for each format - - // Assemble result for each format - for (int unsigned i = 0; i < fpnew_pkg::NUM_INT_FORMATS; i++) begin - if (IntFmtConfig[i]) begin // only active formats - logic [DST_WIDTH-1:0] sign_ext_mask; // mask for the locations of sign extension bits - logic sign_ext_val; // the sign extension bits for this format - logic [DST_WIDTH-1:0] sign_ext_vector; // full of sign extension bits - - // Default assignment: the result mantissa - fmt_mant[i] = destination_mant[INT_MAN_WIDTH+DST_WIDTH:INT_MAN_WIDTH+1]; - // Set up sign extension mask and value - sign_ext_mask = '1 << fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i)); - sign_ext_val = fmt_mant[i][fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i))-1]; - sign_ext_vector = '{default: sign_ext_val}; - - // Combine result and sign extension vectors - fmt_mant[i] = (sign_ext_mask & sign_ext_vector) | (~sign_ext_mask & fmt_mant[i]); - end - end - // Select result according to format chosen - final_mant = fmt_mant[int_fmt_q]; - end - - assign round_sticky_bits[1] = destination_mant[INT_MAN_WIDTH]; // rnd bit - assign round_sticky_bits[0] = (| {destination_mant[INT_MAN_WIDTH-1:0]}); // unused bits are sticky - - // ---------------------------- - // Rounding and classification - // ---------------------------- - logic [DST_WIDTH-1:0] pre_round_abs; // absolute value of result before rounding - - logic rounded_sign; - logic [DST_WIDTH-1:0] rounded_abs; // absolute value of result after rounding - logic res_zero; - - assign pre_round_abs = final_mant; - - fpnew_rounding #( - .AbsWidth ( DST_WIDTH ) - ) i_fpnew_rounding ( - .abs_value_i ( pre_round_abs ), - .sign_i ( operand_a.sign ), - .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q ), - .effective_subtraction_i ( 1'b0 ), // no operation happened - .abs_rounded_o ( rounded_abs ), - .sign_o ( rounded_sign ), - .exact_zero_o ( res_zero ) - ); - - // ---------------------- - // Special case handling - // ---------------------- - logic [DST_WIDTH-1:0] special_result; - fpnew_pkg::status_t special_status; - logic result_is_special; - - // We handle Inf, NaN, overflows and negative unsigned values separately - assign result_is_special = info_a.is_nan | info_a.is_inf | of_before_round | ~info_a.is_boxed | - (operand_a.sign & op_mod_q & ~res_zero); - - // All special cases are invalid - assign special_status = '{NV: 1'b1, default: 1'b0}; - - // Assemble result according to special case - always_comb begin : special_cases - logic [DST_WIDTH-1:0] fmt_special_result[fpnew_pkg::NUM_INT_FORMATS]; - - // Determine special result for each format separately - for (int unsigned i = 0; i < fpnew_pkg::NUM_INT_FORMATS; i++) begin - if (IntFmtConfig[i]) begin // only active formats - logic [DST_WIDTH-1:0] sign_ext_mask; // mask for the locations of sign extension bits - logic [DST_WIDTH-1:0] sign_ext_vector; // full of sign extension bits - // Default assignment: set all ones - fmt_special_result[i] = '1; - // MSB of special result depends on signed/unsigned - fmt_special_result[i][fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i))-1] = op_mod_q; - - // In case of a negative result, flip the bits to procude -max or 0 - if (!info_a.is_nan && operand_a.sign) - fmt_special_result[i] = ~fmt_special_result[i]; - - // Set up sign extension mask and value - sign_ext_mask = '1 << fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i)); - sign_ext_vector = '{default: op_mod_q ^ (~info_a.is_nan & operand_a.sign)}; - - // Combine result and sign extension bits - fmt_special_result[i] = (sign_ext_mask & sign_ext_vector) | - (~sign_ext_mask & fmt_special_result[i]); - - end - end - // Select result according to format - special_result = fmt_special_result[int_fmt_q]; - - end - - // ----------------- - // Result selection - // ----------------- - logic [DST_WIDTH-1:0] regular_result; - fpnew_pkg::status_t regular_status; - - // Invert regular result depending on sign - assign regular_result = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs; - assign regular_status = '{NX: (| round_sticky_bits), default: 1'b0}; // only NX can be raised - - // Final results for output pipeline - logic [DST_WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - logic extension_bit; - - // Select output depending on special case detection - assign result_d = result_is_special ? special_result : regular_result; - assign status_d = result_is_special ? special_status : regular_status; - - // MSB of result decides extension - assign extension_bit = result_d[DST_WIDTH-1]; - - // ---------------- - // Output Pipeline - // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( DST_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( extension_bit ), - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = extension_bit; - end - -endmodule diff --git a/src/fpnew_i2fcast.sv b/src/fpnew_i2fcast.sv deleted file mode 100644 index 56176cdd..00000000 --- a/src/fpnew_i2fcast.sv +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -module fpnew_i2fcast #( - parameter fpnew_pkg::fp_format_e DstFpFormat = fpnew_pkg::fp_format_e'(0), - parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, - - localparam int unsigned SRC_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig), // do not change - localparam int unsigned DST_WIDTH = fpnew_pkg::fp_width(DstFpFormat) // do not change -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [SRC_WIDTH-1:0] operands_i, // 1 operand - input fpnew_pkg::roundmode_e rnd_mode_i, - input logic op_mod_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [DST_WIDTH-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output TagType tag_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o -); - - // ---------- - // Constants - // ---------- - localparam int DST_EXP_BITS = fpnew_pkg::exp_bits(DstFpFormat); - localparam int DST_MAN_BITS = fpnew_pkg::man_bits(DstFpFormat); - localparam int DST_BIAS = fpnew_pkg::bias(DstFpFormat); - - // The internal mantissa contains the normal and RS bits and must also be able to hold the integer - localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(DST_MAN_BITS + 3, SRC_WIDTH); - - // There is a LZC for normalization - localparam int unsigned LZC_RESULT_WIDTH = $clog2(SRC_WIDTH); - - // The internal exponent is signed and must be wide enough to hold readjustment shift - localparam int unsigned INT_EXP_WIDTH = - fpnew_pkg::maximum(DST_EXP_BITS, LZC_RESULT_WIDTH) + 1; // +1 for signed - - // ---------------- - // Type definition - // ---------------- - typedef struct packed { - logic sign; - logic [DST_EXP_BITS-1:0] exponent; - logic [DST_MAN_BITS-1:0] mantissa; - } dst_fp_t; - - // --------------- - // Input pipeline - // --------------- - // Pipelined input signals - logic [SRC_WIDTH-1:0] operands_q; - fpnew_pkg::roundmode_e rnd_mode_q; - logic op_mod_q; - fpnew_pkg::int_format_e int_fmt_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( SRC_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 1 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i ( '{default: fpnew_pkg::DONT_CARE} ), // unused - .rnd_mode_i, - .op_i ( fpnew_pkg::FMADD ), // unused - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( /* unused */ ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( /* unused */ ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( int_fmt_q ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign rnd_mode_q = rnd_mode_i; - assign op_mod_q = op_mod_i; - assign int_fmt_q = int_fmt_i; - end - - // ----------------- - // Input processing - // ----------------- - logic [SRC_WIDTH-1:0] operand_a; - logic [SRC_WIDTH-1:0] input_val; - logic input_sign; - logic [SRC_WIDTH-1:0] input_mag; - - assign operand_a = operands_q; - - // Sign-extend input value - always_comb begin : sign_ext_input - logic [SRC_WIDTH-1:0] fmt_input_val[fpnew_pkg::NUM_INT_FORMATS]; // per-format input value - - // sign-extend input for each format - for (int unsigned i = 0; i < fpnew_pkg::NUM_INT_FORMATS; i++) begin - if (IntFmtConfig[i]) begin // only active formats - logic [SRC_WIDTH-1:0] sign_ext_mask; // mask for the locations of sign extension bits - logic sign_ext_val; // the sign extension bits for this format - logic [SRC_WIDTH-1:0] sign_ext_vector; // full of sign extension bits - - // Set up sign extension mask and value - sign_ext_mask = '1 << fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i)); - sign_ext_val = operand_a[fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i))-1]; - sign_ext_vector = '{default: sign_ext_val & ~op_mod_q}; // only for signed casts - - // Combine result and sign extension vectors - fmt_input_val[i] = (sign_ext_mask & sign_ext_vector) | (~sign_ext_mask & operand_a); - end - end - - // Select input according to format chosen - input_val = fmt_input_val[int_fmt_q]; - end - - // Get the sign for signed casts - assign input_sign = input_val[SRC_WIDTH-1] & ~op_mod_q; - // Obtain the input's magnitude - assign input_mag = input_sign ? unsigned'(-input_val) : input_val; - - // -------- - // Casting - // -------- - logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination - logic [DST_EXP_BITS-1:0] final_exp; // after eventual adjustments - - logic [INT_MAN_WIDTH-1:0] preshift_mant; // mantissa before normalization shift - logic [INT_MAN_WIDTH-1:0] destination_mant; // mantissa from shifter - logic [DST_MAN_BITS-1:0] final_mant; // mantissa after adjustments - - logic [LZC_RESULT_WIDTH-1:0] norm_shamt; // shift amount for denormalization - logic signed [LZC_RESULT_WIDTH:0] norm_shamt_sgn; // shift amount in signed form - - logic result_zero; - - logic [1:0] round_sticky_bits; - logic of_before_round; - - // Leading-zero counter is needed for normalization - lzc #( - .WIDTH ( SRC_WIDTH ), - .MODE ( 1 ) // MODE = 1 counts leading zeroes - ) i_lzc ( - .in_i ( input_mag ), - .cnt_o ( norm_shamt ), - .empty_o ( result_zero ) - ); - assign norm_shamt_sgn = signed'({1'b0, norm_shamt}); - - // Place integer to the left of the shifter space (only matters if DST_MAN_BITS > SRC_WIDTH) - assign preshift_mant = input_mag << (INT_MAN_WIDTH - SRC_WIDTH); // constant shift - // Perform the Normalization shift - assign destination_mant = preshift_mant << norm_shamt; - - // Exponent is calculated from source width and the leading zeroes, bias is added - assign destination_exp = signed'(SRC_WIDTH - 1 - norm_shamt_sgn + DST_BIAS); - - // Handle the only special case we have: OF - always_comb begin : detect_overflow - // Default assignment - final_exp = unsigned'(destination_exp); // take exponent as is - final_mant = destination_mant[INT_MAN_WIDTH-2 -: DST_MAN_BITS]; - round_sticky_bits[1] = destination_mant[INT_MAN_WIDTH-DST_MAN_BITS-2]; - round_sticky_bits[0] = (| {destination_mant[INT_MAN_WIDTH-DST_MAN_BITS-3:0]}); // reduce sticky - of_before_round = 1'b0; - - // Properly set the exponent for zeroes - if (result_zero) begin - final_exp = '0; - // Handle overflows or infinities (for proper rounding) - end else if (destination_exp >= 2**DST_EXP_BITS-1) begin - final_exp = unsigned'(2**DST_EXP_BITS-2); // largest normal value - final_mant = '1; // largest normal value and RS bits set - round_sticky_bits = '1; // RS set - of_before_round = 1'b1; - end - end - - // ---------------------------- - // Rounding and classification - // ---------------------------- - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding - - logic of_after_round; // overflow - - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding - logic rounded_sign; - - assign pre_round_abs = {final_exp, final_mant}; - - // Rounding is only needed if mantissa smaller than integer or integer has larger range - if ((DST_MAN_BITS + 1 < SRC_WIDTH) || (SRC_WIDTH > DST_BIAS)) begin : gen_rounding - - fpnew_rounding #( - .AbsWidth ( DST_WIDTH - 1 ) - ) i_fpnew_rounding ( - .abs_value_i ( pre_round_abs ), - .sign_i ( input_sign ), - .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q ), - .effective_subtraction_i ( 1'b0 ), // no operation happened - .abs_rounded_o ( rounded_abs ), - .sign_o ( rounded_sign ), - .exact_zero_o ( /* unused */ ) - ); - - end else begin : no_rounding - assign rounded_abs = pre_round_abs; - assign rounded_sign = input_sign; - end - - // Classification after rounding - assign of_after_round = rounded_abs[DST_EXP_BITS+DST_MAN_BITS-1:DST_MAN_BITS] == '1; // inf exp. - - // ----------------- - // Result selection - // ----------------- - // Final results for output pipeline - logic [DST_WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - - // Assemble final result - assign result_d = {rounded_sign, rounded_abs}; - assign status_d = '{ - NV: of_before_round | of_after_round, // Overflowing values are invalid for casts - DZ: 1'b0, // no divisions - OF: 1'b0, // no overflow per se => it's invalid to cast too large integers - UF: 1'b0, // no underflow - NX: (| round_sticky_bits) // RS bits mean loss in precision - }; - - // ---------------- - // Output Pipeline - // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( DST_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - end - -endmodule diff --git a/src/fpnew_pipe_in.sv b/src/fpnew_pipe_in.sv deleted file mode 100644 index 818f5eb5..00000000 --- a/src/fpnew_pipe_in.sv +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -// Author: Stefan Mach - -`include "common_cells/registers.svh" - -module fpnew_pipe_in #( - parameter int unsigned Width = 32, - parameter int unsigned NumPipeRegs = 0, - parameter int unsigned NumOperands = 3, - parameter int unsigned NumFormats = 1, - parameter type TagType = logic, - parameter type AuxType = logic -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [NumOperands-1:0][Width-1:0] operands_i, - input logic [NumFormats-1:0][NumOperands-1:0] is_boxed_i, - input fpnew_pkg::roundmode_e rnd_mode_i, - input fpnew_pkg::operation_e op_i, - input logic op_mod_i, - input fpnew_pkg::fp_format_e src_fmt_i, - input fpnew_pkg::fp_format_e dst_fmt_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [NumOperands-1:0][Width-1:0] operands_o, - output logic [NumFormats-1:0][NumOperands-1:0] is_boxed_o, - output fpnew_pkg::roundmode_e rnd_mode_o, - output fpnew_pkg::operation_e op_o, - output logic op_mod_o, - output fpnew_pkg::fp_format_e src_fmt_o, - output fpnew_pkg::fp_format_e dst_fmt_o, - output fpnew_pkg::int_format_e int_fmt_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [0:NumPipeRegs][NumOperands-1:0][Width-1:0] operands_d; - logic [0:NumPipeRegs][NumFormats-1:0][NumOperands-1:0] is_boxed_d; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_d; - fpnew_pkg::operation_e [0:NumPipeRegs] op_d; - logic [0:NumPipeRegs] op_mod_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_d; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_d; - TagType [0:NumPipeRegs] tag_d; - AuxType [0:NumPipeRegs] aux_d; - logic [0:NumPipeRegs] valid_d; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign operands_d[0] = operands_i; - assign is_boxed_d[0] = is_boxed_i; - assign rnd_mode_d[0] = rnd_mode_i; - assign op_d[0] = op_i; - assign op_mod_d[0] = op_mod_i; - assign src_fmt_d[0] = src_fmt_i; - assign dst_fmt_d[0] = dst_fmt_i; - assign int_fmt_d[0] = int_fmt_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate pipeline ready signal - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [0:NumPipeRegs][NumOperands-1:0][Width-1:0] operands_q; - logic [0:NumPipeRegs][NumFormats-1:0][NumOperands-1:0] is_boxed_q; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_q; - fpnew_pkg::operation_e [0:NumPipeRegs] op_q; - logic [0:NumPipeRegs] op_mod_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_q; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_q; - TagType [0:NumPipeRegs] tag_q; - AuxType [0:NumPipeRegs] aux_q; - logic [0:NumPipeRegs] valid_q; - - for (genvar i = 0; i < int'(NumPipeRegs); i++) begin : pipeline_stages - // Internal register enable for this stage - logic reg_ena; - - // Next state from previous register to form a shift register - assign operands_d[i+1] = operands_q[i]; - assign is_boxed_d[i+1] = is_boxed_q[i]; - assign rnd_mode_d[i+1] = rnd_mode_q[i]; - assign op_d[i+1] = op_q[i]; - assign op_mod_d[i+1] = op_mod_q[i]; - assign src_fmt_d[i+1] = src_fmt_q[i]; - assign dst_fmt_d[i+1] = dst_fmt_q[i]; - assign int_fmt_d[i+1] = int_fmt_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(operands_q[i], operands_d[i], reg_ena, '0) - `FFL(is_boxed_q[i], is_boxed_d[i], reg_ena, '0) - `FFL(rnd_mode_q[i], rnd_mode_d[i], reg_ena, fpnew_pkg::RNE) - `FFL(op_q[i], op_d[i], reg_ena, fpnew_pkg::FMADD) - `FFL(op_mod_q[i], op_mod_d[i], reg_ena, '0) - `FFL(src_fmt_q[i], src_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(dst_fmt_q[i], dst_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(int_fmt_q[i], int_fmt_d[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign operands_o = operands_d[NumPipeRegs]; - assign is_boxed_o = is_boxed_d[NumPipeRegs]; - assign rnd_mode_o = rnd_mode_d[NumPipeRegs]; - assign op_o = op_d[NumPipeRegs]; - assign op_mod_o = op_mod_d[NumPipeRegs]; - assign dst_fmt_o = dst_fmt_d[NumPipeRegs]; - assign src_fmt_o = src_fmt_d[NumPipeRegs]; - assign int_fmt_o = int_fmt_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src/fpnew_pipe_inside_cast.sv b/src/fpnew_pipe_inside_cast.sv deleted file mode 100644 index bf699980..00000000 --- a/src/fpnew_pipe_inside_cast.sv +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -// Author: Stefan Mach - -`include "common_cells/registers.svh" - -module fpnew_pipe_inside_cast #( - parameter int unsigned IntExpWidth = 12, - parameter int unsigned IntManWidth = 64, - parameter int unsigned NumPipeRegs = 0, - parameter type TagType = logic, - parameter type AuxType = logic -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic input_sign_i, - input logic signed [IntExpWidth-1:0] input_exp_i, - input logic signed [IntExpWidth-1:0] destination_exp_i, - input logic [IntManWidth-1:0] input_mant_i, - input logic src_is_int_i, - input logic dst_is_int_i, - input fpnew_pkg::fp_info_t info_i, - input logic mant_is_zero_i, - input logic op_mod_i, - input fpnew_pkg::roundmode_e rnd_mode_i, - input fpnew_pkg::fp_format_e src_fmt_i, - input fpnew_pkg::fp_format_e dst_fmt_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic input_sign_o, - output logic signed [IntExpWidth-1:0] input_exp_o, - output logic signed [IntExpWidth-1:0] destination_exp_o, - output logic [IntManWidth-1:0] input_mant_o, - output logic src_is_int_o, - output logic dst_is_int_o, - output fpnew_pkg::fp_info_t info_o, - output logic mant_is_zero_o, - output logic op_mod_o, - output fpnew_pkg::roundmode_e rnd_mode_o, - output fpnew_pkg::fp_format_e src_fmt_o, - output fpnew_pkg::fp_format_e dst_fmt_o, - output fpnew_pkg::int_format_e int_fmt_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [0:NumPipeRegs] input_sign_d; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] input_exp_d; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] destination_exp_d; - logic [0:NumPipeRegs][IntManWidth-1:0] input_mant_d; - logic [0:NumPipeRegs] src_is_int_d; - logic [0:NumPipeRegs] dst_is_int_d; - fpnew_pkg::fp_info_t [0:NumPipeRegs] info_d; - logic [0:NumPipeRegs] mant_is_zero_d; - logic [0:NumPipeRegs] op_mod_d; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_d; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_d; - TagType [0:NumPipeRegs] tag_d; - AuxType [0:NumPipeRegs] aux_d; - logic [0:NumPipeRegs] valid_d; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign input_sign_d[0] = input_sign_i; - assign input_exp_d[0] = input_exp_i; - assign destination_exp_d[0] = destination_exp_i; - assign input_mant_d[0] = input_mant_i; - assign src_is_int_d[0] = src_is_int_i; - assign dst_is_int_d[0] = dst_is_int_i; - assign info_d[0] = info_i; - assign mant_is_zero_d[0] = mant_is_zero_i; - assign op_mod_d[0] = op_mod_i; - assign rnd_mode_d[0] = rnd_mode_i; - assign src_fmt_d[0] = src_fmt_i; - assign dst_fmt_d[0] = dst_fmt_i; - assign int_fmt_d[0] = int_fmt_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate pipeline ready signal - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [0:NumPipeRegs] input_sign_q; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] input_exp_q; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] destination_exp_q; - logic [0:NumPipeRegs][IntManWidth-1:0] input_mant_q; - logic [0:NumPipeRegs] src_is_int_q; - logic [0:NumPipeRegs] dst_is_int_q; - fpnew_pkg::fp_info_t [0:NumPipeRegs] info_q; - logic [0:NumPipeRegs] mant_is_zero_q; - logic [0:NumPipeRegs] op_mod_q; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_q; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_q; - TagType [0:NumPipeRegs] tag_q; - AuxType [0:NumPipeRegs] aux_q; - logic [0:NumPipeRegs] valid_q; - - for (genvar i = 0; i < int'(NumPipeRegs); i++) begin : pipeline_stages - // Internal register enable for this stage - logic reg_ena; - - // Next state from previous register to form a shift register - assign input_sign_d[i+1] = input_sign_q[i]; - assign input_exp_d[i+1] = input_exp_q[i]; - assign destination_exp_d[i+1] = destination_exp_q[i]; - assign input_mant_d[i+1] = input_mant_q[i]; - assign src_is_int_d[i+1] = src_is_int_q[i]; - assign dst_is_int_d[i+1] = dst_is_int_q[i]; - assign info_d[i+1] = info_q[i]; - assign mant_is_zero_d[i+1] = mant_is_zero_q[i]; - assign op_mod_d[i+1] = op_mod_q[i]; - assign rnd_mode_d[i+1] = rnd_mode_q[i]; - assign src_fmt_d[i+1] = src_fmt_q[i]; - assign dst_fmt_d[i+1] = dst_fmt_q[i]; - assign int_fmt_d[i+1] = int_fmt_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(input_sign_q[i], input_sign_d[i], reg_ena, '0) - `FFL(destination_exp_q[i], destination_exp_d[i], reg_ena, '0) - `FFL(input_exp_q[i], input_exp_d[i], reg_ena, '0) - `FFL(input_mant_q[i], input_mant_d[i], reg_ena, '0) - `FFL(src_is_int_q[i], src_is_int_d[i], reg_ena, '0) - `FFL(dst_is_int_q[i], dst_is_int_d[i], reg_ena, '0) - `FFL(info_q[i], info_d[i], reg_ena, '0) - `FFL(mant_is_zero_q[i], mant_is_zero_d[i], reg_ena, '0) - `FFL(op_mod_q[i], op_mod_d[i], reg_ena, '0) - `FFL(rnd_mode_q[i], rnd_mode_d[i], reg_ena, fpnew_pkg::RNE) - `FFL(src_fmt_q[i], src_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(dst_fmt_q[i], dst_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(int_fmt_q[i], int_fmt_d[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign input_sign_o = input_sign_d[NumPipeRegs]; - assign input_exp_o = input_exp_d[NumPipeRegs]; - assign destination_exp_o = destination_exp_d[NumPipeRegs]; - assign input_mant_o = input_mant_d[NumPipeRegs]; - assign src_is_int_o = src_is_int_d[NumPipeRegs]; - assign dst_is_int_o = dst_is_int_d[NumPipeRegs]; - assign info_o = info_d[NumPipeRegs]; - assign mant_is_zero_o = mant_is_zero_d[NumPipeRegs]; - assign op_mod_o = op_mod_d[NumPipeRegs]; - assign rnd_mode_o = rnd_mode_d[NumPipeRegs]; - assign src_fmt_o = src_fmt_d[NumPipeRegs]; - assign dst_fmt_o = dst_fmt_d[NumPipeRegs]; - assign int_fmt_o = int_fmt_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src/fpnew_pipe_inside_fma.sv b/src/fpnew_pipe_inside_fma.sv deleted file mode 100644 index a9ddf0df..00000000 --- a/src/fpnew_pipe_inside_fma.sv +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -// Author: Stefan Mach - -`include "common_cells/registers.svh" - -module fpnew_pipe_inside_fma #( - parameter int unsigned ExpWidth = 10, - parameter int unsigned PrecBits = 24, - parameter int unsigned NumPipeRegs = 0, - parameter type FpType = logic, - parameter type TagType = logic, - parameter type AuxType = logic, - // Do not change - localparam int unsigned SUM_WIDTH = 3*PrecBits+3+1, - localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PrecBits + 3) -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic effective_subtraction_i, - input logic final_sign_i, - input logic signed [ExpWidth-1:0] exponent_product_i, - input logic signed [ExpWidth-1:0] exponent_difference_i, - input logic signed [ExpWidth-1:0] tentative_exponent_i, - input logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_i, - input logic sticky_before_add_i, - input logic [SUM_WIDTH-1:0] sum_i, - input fpnew_pkg::roundmode_e rnd_mode_i, - input fpnew_pkg::fp_format_e dst_fmt_i, - input logic result_is_special_i, - input FpType special_result_i, - input fpnew_pkg::status_t special_status_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic effective_subtraction_o, - output logic final_sign_o, - output logic signed [ExpWidth-1:0] exponent_product_o, - output logic signed [ExpWidth-1:0] exponent_difference_o, - output logic signed [ExpWidth-1:0] tentative_exponent_o, - output logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_o, - output logic sticky_before_add_o, - output logic [SUM_WIDTH-1:0] sum_o, - output fpnew_pkg::roundmode_e rnd_mode_o, - output fpnew_pkg::fp_format_e dst_fmt_o, - output logic result_is_special_o, - output FpType special_result_o, - output fpnew_pkg::status_t special_status_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [0:NumPipeRegs] effective_subtraction_d; - logic [0:NumPipeRegs] final_sign_d; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_product_d; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_difference_d; - logic signed [0:NumPipeRegs][ExpWidth-1:0] tentative_exponent_d; - logic [0:NumPipeRegs][SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_d; - logic [0:NumPipeRegs] sticky_before_add_d; - logic [0:NumPipeRegs][SUM_WIDTH-1:0] sum_d; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_d; - logic [0:NumPipeRegs] result_is_special_d; - FpType [0:NumPipeRegs] special_result_d; - fpnew_pkg::status_t [0:NumPipeRegs] special_status_d; - TagType [0:NumPipeRegs] tag_d; - AuxType [0:NumPipeRegs] aux_d; - logic [0:NumPipeRegs] valid_d; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign effective_subtraction_d[0] = effective_subtraction_i; - assign final_sign_d[0] = final_sign_i; - assign exponent_product_d[0] = exponent_product_i; - assign exponent_difference_d[0] = exponent_difference_i; - assign tentative_exponent_d[0] = tentative_exponent_i; - assign addend_shamt_d[0] = addend_shamt_i; - assign sticky_before_add_d[0] = sticky_before_add_i; - assign sum_d[0] = sum_i; - assign rnd_mode_d[0] = rnd_mode_i; - assign dst_fmt_d[0] = dst_fmt_i; - assign result_is_special_d[0] = result_is_special_i; - assign special_result_d[0] = special_result_i; - assign special_status_d[0] = special_status_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate pipeline ready signal - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [0:NumPipeRegs] effective_subtraction_q; - logic [0:NumPipeRegs] final_sign_q; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_product_q; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_difference_q; - logic signed [0:NumPipeRegs][ExpWidth-1:0] tentative_exponent_q; - logic [0:NumPipeRegs][SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; - logic [0:NumPipeRegs] sticky_before_add_q; - logic [0:NumPipeRegs][SUM_WIDTH-1:0] sum_q; - logic [0:NumPipeRegs][3*PrecBits+3:0] product_shifted_q; - logic [0:NumPipeRegs][3*PrecBits+3:0] addend_shifted_q; - logic [0:NumPipeRegs] inject_carry_in_q; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_q; - logic [0:NumPipeRegs] result_is_special_q; - FpType [0:NumPipeRegs] special_result_q; - fpnew_pkg::status_t [0:NumPipeRegs] special_status_q; - TagType [0:NumPipeRegs] tag_q; - AuxType [0:NumPipeRegs] aux_q; - logic [0:NumPipeRegs] valid_q; - - for (genvar i = 0; i < int'(NumPipeRegs); i++) begin : pipeline_stages - // Internal register enable for this stage - logic reg_ena; - - // Next state from previous register to form a shift register - assign effective_subtraction_d[i+1] = effective_subtraction_q[i]; - assign final_sign_d[i+1] = final_sign_q[i]; - assign exponent_product_d[i+1] = exponent_product_q[i]; - assign exponent_difference_d[i+1] = exponent_difference_q[i]; - assign tentative_exponent_d[i+1] = tentative_exponent_q[i]; - assign addend_shamt_d[i+1] = addend_shamt_q[i]; - assign sticky_before_add_d[i+1] = sticky_before_add_q[i]; - assign sum_d[i+1] = sum_q[i]; - assign rnd_mode_d[i+1] = rnd_mode_q[i]; - assign dst_fmt_d[i+1] = dst_fmt_q[i]; - assign result_is_special_d[i+1] = result_is_special_q[i]; - assign special_result_d[i+1] = special_result_q[i]; - assign special_status_d[i+1] = special_status_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(effective_subtraction_q[i], effective_subtraction_d[i], reg_ena, '0) - `FFL(final_sign_q[i], final_sign_d[i], reg_ena, '0) - `FFL(exponent_product_q[i], exponent_product_d[i], reg_ena, '0) - `FFL(exponent_difference_q[i], exponent_difference_d[i], reg_ena, '0) - `FFL(tentative_exponent_q[i], tentative_exponent_d[i], reg_ena, '0) - `FFL(addend_shamt_q[i], addend_shamt_d[i], reg_ena, '0) - `FFL(sticky_before_add_q[i], sticky_before_add_d[i], reg_ena, '0) - `FFL(sum_q[i], sum_d[i], reg_ena, '0) - `FFL(rnd_mode_q[i], rnd_mode_d[i], reg_ena, fpnew_pkg::RNE) - `FFL(dst_fmt_q[i], dst_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(result_is_special_q[i], result_is_special_d[i], reg_ena, '0) - `FFL(special_result_q[i], special_result_d[i], reg_ena, '0) - `FFL(special_status_q[i], special_status_d[i], reg_ena, '0) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign effective_subtraction_o = effective_subtraction_d[NumPipeRegs]; - assign final_sign_o = final_sign_d[NumPipeRegs]; - assign exponent_product_o = exponent_product_d[NumPipeRegs]; - assign exponent_difference_o = exponent_difference_d[NumPipeRegs]; - assign tentative_exponent_o = tentative_exponent_d[NumPipeRegs]; - assign addend_shamt_o = addend_shamt_d[NumPipeRegs]; - assign sticky_before_add_o = sticky_before_add_d[NumPipeRegs]; - assign sum_o = sum_d[NumPipeRegs]; - assign rnd_mode_o = rnd_mode_d[NumPipeRegs]; - assign dst_fmt_o = dst_fmt_d[NumPipeRegs]; - assign result_is_special_o = result_is_special_d[NumPipeRegs]; - assign special_result_o = special_result_d[NumPipeRegs]; - assign special_status_o = special_status_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src/fpnew_pipe_out.sv b/src/fpnew_pipe_out.sv deleted file mode 100644 index 686af4bd..00000000 --- a/src/fpnew_pipe_out.sv +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -`include "common_cells/registers.svh" - -module fpnew_pipe_out #( - parameter int unsigned Width = 32, - parameter int unsigned NumPipeRegs = 0, - parameter type TagType = logic, - parameter type AuxType = logic -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [Width-1:0] result_i, - input fpnew_pkg::status_t status_i, - input logic extension_bit_i, - input fpnew_pkg::classmask_e class_mask_i, - input logic is_class_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [Width-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output fpnew_pkg::classmask_e class_mask_o, - output logic is_class_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [NumPipeRegs:0][Width-1:0] result_d; - fpnew_pkg::status_t [NumPipeRegs:0] status_d; - logic [NumPipeRegs:0] extension_bit_d; - fpnew_pkg::classmask_e [NumPipeRegs:0] class_mask_d; - logic [NumPipeRegs:0] is_class_d; - TagType [NumPipeRegs:0] tag_d; - AuxType [NumPipeRegs:0] aux_d; - logic [NumPipeRegs:0] valid_d; - // Ready signal is combinatorial for all stages - logic [NumPipeRegs:0] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign result_d[0] = result_i; - assign status_d[0] = status_i; - assign extension_bit_d[0] = extension_bit_i; - assign class_mask_d[0] = class_mask_i; - assign is_class_d[0] = is_class_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate ready signal from pipeline - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [NumPipeRegs-1:0][Width-1:0] result_q; - fpnew_pkg::status_t [NumPipeRegs-1:0] status_q; - logic [NumPipeRegs-1:0] extension_bit_q; - fpnew_pkg::classmask_e [NumPipeRegs-1:0] class_mask_q; - logic [NumPipeRegs-1:0] is_class_q; - TagType [NumPipeRegs-1:0] tag_q; - AuxType [NumPipeRegs-1:0] aux_q; - logic [NumPipeRegs-1:0] valid_q; - - for (genvar i = 0; i < NumPipeRegs; i++) begin : pipeline_stages - // Internal register enable for this stage -> creates gated registers if supported in synth - logic reg_ena; - - // Next state from previous register to form a shift register - assign result_d[i+1] = result_q[i]; - assign status_d[i+1] = status_q[i]; - assign extension_bit_d[i+1] = extension_bit_q[i]; - assign class_mask_d[i+1] = class_mask_q[i]; - assign is_class_d[i+1] = is_class_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage register only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid registers: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable the payload registers if pipleine ready and a valid data item is present (gating) - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(result_q[i], result_d[i], reg_ena, '0) - `FFL(status_q[i], status_d[i], reg_ena, '0) - `FFL(extension_bit_q[i], extension_bit_d[i], reg_ena, '0) - `FFL(class_mask_q[i], class_mask_d[i], reg_ena, fpnew_pkg::QNAN) - `FFL(is_class_q[i], is_class_d[i], reg_ena, '0) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign result_o = result_d[NumPipeRegs]; - assign status_o = status_d[NumPipeRegs]; - assign extension_bit_o = extension_bit_d[NumPipeRegs]; - assign class_mask_o = class_mask_d[NumPipeRegs]; - assign is_class_o = is_class_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src_files.yml b/src_files.yml index e476c378..1931258f 100644 --- a/src_files.yml +++ b/src_files.yml @@ -13,10 +13,6 @@ fpnew: src/fpnew_opgroup_block.sv, src/fpnew_opgroup_fmt_slice.sv, src/fpnew_opgroup_multifmt_slice.sv, - src/fpnew_pipe_in.sv, - src/fpnew_pipe_out.sv, - src/fpnew_pipe_inside_fma.sv, - src/fpnew_pipe_inside_cast.sv, src/fpnew_rounding.sv, src/fpnew_top.sv, ] From 991e376ea7fdea4dfc1d781c6a8b0ce37516e8ca Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 4 Jul 2019 10:47:07 +0200 Subject: [PATCH 11/13] :shirt: Remove linter warnings Various issues causing linter warnings have been addressed. --- docs/CHANGELOG.md | 1 + src/fpnew_fma.sv | 6 +++--- src/fpnew_opgroup_block.sv | 16 ++++++++-------- src/fpnew_opgroup_fmt_slice.sv | 14 +++++++++----- src/fpnew_opgroup_multifmt_slice.sv | 7 +++++++ 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a2261bcf..bdff122d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -20,6 +20,7 @@ Versions of the IP in the same major relase are "pin-compatible" with each other - Typo in the documentation about the multiply operation - Generation of merged slices when the first package format is disabled - Potential simulation/synthesis mismatch of the UF flag +- Various linter warnings - [Bender] Fixed dependencies for Bender [(#14)](https://github.com/pulp-platform/fpnew/pull/15) ### Removed diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index 4ab3b182..f9fa813b 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -63,7 +63,7 @@ module fpnew_fma #( // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid // datapath leakage. This is either given by the exponent bits or the width of the LZC result. // In most reasonable FP formats the internal exponent will be wider than the LZC result. - localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH); + localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH)); // Shift amount width: maximum internal mantissa size is 3p+3 bits localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); // Pipelines @@ -513,7 +513,7 @@ module fpnew_fma #( // Subnormal result end else begin // Cap the shift distance to align mantissa with minimum exponent - norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q)); + norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q); normalized_exponent = 0; // subnormals encoded as 0 end // Addend-anchored case @@ -575,7 +575,7 @@ module fpnew_fma #( // Assemble result before rounding. In case of overflow, the largest normal value is set. assign pre_round_sign = final_sign_q; - assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0]; + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]); assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit assign pre_round_abs = {pre_round_exponent, pre_round_mantissa}; diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index ea9bf384..b9daeeb7 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -91,13 +91,13 @@ module fpnew_opgroup_block #( assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format fpnew_opgroup_fmt_slice #( - .OpGroup ( OpGroup ), - .FpFormat ( fmt ), - .Width ( Width ), - .EnableVectors ( EnableVectors ), - .NumPipeRegs ( FmtPipeRegs[fmt] ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ) + .OpGroup ( OpGroup ), + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .Width ( Width ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( FmtPipeRegs[fmt] ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ) ) i_fmt_slice ( .clk_i, .rst_ni, @@ -143,7 +143,7 @@ module fpnew_opgroup_block #( assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; - assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); end end diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 80ddb0db..fda2a57f 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -126,7 +126,8 @@ module fpnew_opgroup_fmt_slice #( .out_ready_i ( out_ready ), .busy_o ( lane_busy[lane] ) ); - assign lane_is_class[lane] = 1'b0; + assign lane_is_class[lane] = 1'b0; + assign lane_class_mask[lane] = fpnew_pkg::NEGINF; end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance // fpnew_divsqrt #( // .FpFormat (FpFormat), @@ -245,11 +246,14 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8; - localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; + // Pad out unused vec_class bits + if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class + assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + end + + // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; - assign slice_class_result = result_is_vector - ? slice_vec_class_result & CLASS_VEC_MASK - : lane_class_mask[0]; // Scalar classification block + assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; // Select the proper result assign result_o = result_is_class ? slice_class_result : slice_regular_result; diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 6719e837..14c1b453 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -340,6 +340,13 @@ module fpnew_opgroup_multifmt_slice #( assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]}; end + // Mute int results if unused + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled + if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result + assign ifmt_slice_result[ifmt] = '0; + end + end + // Bypass lanes with target operand for vectorial casts if (OpGroup == fpnew_pkg::CONV) begin : target_regs // Bypass pipeline signals, index i holds signal after i register stages From 80b2e788b70fd79761b678d6f73b9591540aa6ff Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 4 Jul 2019 11:13:40 +0200 Subject: [PATCH 12/13] :arrow_up: [fpu_div_sqrt_mvp] Bump for lint warnings --- Bender.yml | 2 +- docs/CHANGELOG.md | 1 + ips_list.yml | 2 +- src/fpu_div_sqrt_mvp | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Bender.yml b/Bender.yml index 3dbe79e4..dfa19f31 100644 --- a/Bender.yml +++ b/Bender.yml @@ -4,7 +4,7 @@ package: dependencies: common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.13.1} - fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.2} + fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.3} sources: - src/fpnew_pkg.sv diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index bdff122d..fcc33aa1 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -21,6 +21,7 @@ Versions of the IP in the same major relase are "pin-compatible" with each other - Generation of merged slices when the first package format is disabled - Potential simulation/synthesis mismatch of the UF flag - Various linter warnings +- [fpu_div_sqrt_mvp] Bumped to fix linter warnings - [Bender] Fixed dependencies for Bender [(#14)](https://github.com/pulp-platform/fpnew/pull/15) ### Removed diff --git a/ips_list.yml b/ips_list.yml index 5918cd43..a1f78506 100644 --- a/ips_list.yml +++ b/ips_list.yml @@ -23,5 +23,5 @@ common_cells: domain: [soc, cluster] fpu_div_sqrt_mvp: - commit: v1.0.1 + commit: v1.0.3 domain: [cluster,soc] diff --git a/src/fpu_div_sqrt_mvp b/src/fpu_div_sqrt_mvp index 08e70e1c..83a601f9 160000 --- a/src/fpu_div_sqrt_mvp +++ b/src/fpu_div_sqrt_mvp @@ -1 +1 @@ -Subproject commit 08e70e1c176837d791942cac8e0d36ba782a038b +Subproject commit 83a601f97934ed5e06d737b9c80d98b08867c5fa From 80b7ef46bcebbc63998243374d5395d7c4ec52ff Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 4 Jul 2019 14:11:10 +0200 Subject: [PATCH 13/13] :books: Bump version to 0.6.0 --- docs/CHANGELOG.md | 7 +++++++ docs/README.md | 12 ++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index fcc33aa1..48e9d93f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,6 +11,11 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ## [Unreleased] ### Added +### Changed +### Fixed + +## [0.6.0] - 2019-07-04 + ### Changed - Pipelines are generated in the datapath modules instead of separate instances @@ -21,12 +26,14 @@ Versions of the IP in the same major relase are "pin-compatible" with each other - Generation of merged slices when the first package format is disabled - Potential simulation/synthesis mismatch of the UF flag - Various linter warnings +- Documentation to reflect on updated pipeline distribution order - [fpu_div_sqrt_mvp] Bumped to fix linter warnings - [Bender] Fixed dependencies for Bender [(#14)](https://github.com/pulp-platform/fpnew/pull/15) ### Removed - Currently unused modules: `fpnew_pipe*`, `fpnew_{f2i,f2f,i2f}_cast` + ## [0.5.6] - 2019-06-12 ### Changed diff --git a/docs/README.md b/docs/README.md index b7d01e6b..54322ddc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -336,12 +336,12 @@ For best results, we *strongly* encourage the use of automatic retiming options The configuration `pipe_config_t` is an enumeration of type `logic [1:0]` holding the following implementation options for the pipelines in operational units: -| Enumerator | Description | -|---------------|-----------------------------------------------------------------------------------------------------| -| `BEFORE` | All pipeline registers are inserted at the inputs of the operational unit | -| `AFTER` | All pipeline registers are inserted at the outputs of the operational unit | -| `INSIDE` | All registers are inserted at roughly the middle of the operational unit (if not possible, `AFTER`) | -| `DISTRIBUTED` | Registers are evenly distributed to `INSIDE`, `AFTER`, and `BEFORE` (if no `INSIDE`, all `AFTER`) | +| Enumerator | Description | +|---------------|------------------------------------------------------------------------------------------------------| +| `BEFORE` | All pipeline registers are inserted at the inputs of the operational unit | +| `AFTER` | All pipeline registers are inserted at the outputs of the operational unit | +| `INSIDE` | All registers are inserted at roughly the middle of the operational unit (if not possible, `BEFORE`) | +| `DISTRIBUTED` | Registers are evenly distributed to `INSIDE`, `BEFORE`, and `AFTER` (if no `INSIDE`, all `BEFORE`) |