diff --git a/finn-rtllib/mvu/add_multi.sv b/finn-rtllib/mvu/add_multi.sv index 6b45d42e5a..25f5b9a411 100644 --- a/finn-rtllib/mvu/add_multi.sv +++ b/finn-rtllib/mvu/add_multi.sv @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * @brief Pipelined multi-input adder tree. + * @brief Pipelined multi-input adder using LUT-based compressors. * @author Thomas B. Preußer *****************************************************************************/ @@ -50,13 +50,65 @@ module add_multi import mvu_pkg::*; #( output logic [SUM_WIDTH-1:0] sum ); - localparam int unsigned L = $clog2(N); // Number of levels with reductions +//--------------------------------------------------------------------------- +// Compressor Path +// +// CATCH_COMP entries instantiate a generated compressor module for a +// specific (N, ARG_WIDTH, delay) triple. The macro transposes arg[i][j] +// to the column-major bit-vector expected by the compressor and pads any +// remaining DEPTH with a shift-register delay. +// +// Generated compressors have no en port — when en=0, upstream holds +// inputs stable and the downstream accumulator does not latch, so +// correctness is preserved. - uwire [SUM_WIDTH-1:0] sum0; - if(L < 1) begin : genTrivial +`define CATCH_COMP(n,w,d) \ +else if(!RESET_ZERO && (N == n) && (ARG_WIDTH == w) && (DEPTH >= d) && (0 <= ARG_LO)) begin : genComp``n``u``w``_d``d \ + initial $display("[ADD_MULTI_PATH] COMP N=%0d D=%0d W=%0d", N, DEPTH, ARG_WIDTH); \ +\ + uwire [N*ARG_WIDTH-1:0] in; \ + uwire [SUM_WIDTH -1:0] out; \ + for(genvar i = 0; i < N; i++) begin : genIn \ + for(genvar j = 0; j < ARG_WIDTH; j++) begin : genBit \ + assign in[j*N+i] = arg[i][j]; \ + end : genBit \ + end : genIn \ + comp_``n``u``w``_d``d comp_inst ( \ + .clk, \ + .in, .out \ + ); \ + initial assert($bits(out) >= $bits(comp_inst.out)) else $warning("CATCH_COMP(%0d,%0d,%0d): compressor output width %0d > SUM_WIDTH %0d", n, w, d, $bits(comp_inst.out), SUM_WIDTH); \ +\ + localparam int unsigned COMP_DELAY = d; \ + localparam int unsigned SUM_DELAY = DEPTH - COMP_DELAY; \ + if(SUM_DELAY == 0) assign sum = out; \ + else begin : genDelay \ + logic [SUM_WIDTH-1:0] SumZ[SUM_DELAY] = '{ default: '0 }; \ + always_ff @(posedge clk) begin \ + if(rst) SumZ <= '{ default: '0 }; \ + else if(en) begin \ + for(int unsigned i = 0; i < SUM_DELAY-1; i++) SumZ[i] <= SumZ[i+1]; \ + SumZ[SUM_DELAY-1] <= out; \ + end \ + end \ + assign sum = SumZ[0]; \ + end : genDelay \ +end : genComp``n``u``w``_d``d + + if(0) begin end + // FINN_GENERATED_COMP_ENTRIES + +//- Generic Behavioral Addition --------- + else begin : genGeneric + + localparam int unsigned L = $clog2(N); // Tree levels + + logic [SUM_WIDTH-1:0] sum0; + if(L < 1) begin : genPassThrough assign sum0 = arg[0]; - end : genTrivial + end : genPassThrough else begin : genTree + initial $display("[ADD_MULTI_PATH] TREE N=%0d D=%0d W=%0d", N, DEPTH, ARG_WIDTH); localparam int unsigned D = L < DEPTH? L : DEPTH; // Pipeline stages absorbed by tree // Compute the count of decendents for all nodes in the reduction trees. @@ -117,16 +169,18 @@ module add_multi import mvu_pkg::*; #( // Delay Output if requested DEPTH exceeds Tree Height if(DEPTH <= L) assign sum = sum0; else begin : genDelay - localparam logic [SUM_WIDTH-1:0] SUM_RESET = {(SUM_WIDTH){RESET_ZERO? 1'b0 : 1'bx}}; - logic [SUM_WIDTH-1:0] SumZ[DEPTH - L] = '{ default: SUM_RESET }; + localparam int unsigned DELAY = DEPTH - L; + logic [SUM_WIDTH-1:0] SumZ[DELAY] = '{ default: '0 }; always_ff @(posedge clk) begin - if(rst) SumZ <= '{ default: SUM_RESET }; - else begin - for(int unsigned i = 0; i < DEPTH-L-1; i++) SumZ[i] <= SumZ[i+1]; - SumZ[DEPTH-L-1] <= sum0; + if(rst) SumZ <= '{ default: '0 }; + else if(en) begin + for(int unsigned i = 0; i < DELAY-1; i++) SumZ[i] <= SumZ[i+1]; + SumZ[DELAY-1] <= sum0; end end assign sum = SumZ[0]; end : genDelay + end : genGeneric + endmodule : add_multi diff --git a/finn-rtllib/mvu/mvu_vvu_axi.sv b/finn-rtllib/mvu/mvu_vvu_axi.sv index a890ac9aa3..86eccf7ca1 100644 --- a/finn-rtllib/mvu/mvu_vvu_axi.sv +++ b/finn-rtllib/mvu/mvu_vvu_axi.sv @@ -64,6 +64,13 @@ module mvu_vvu_axi #( bit FORCE_BEHAVIORAL = 0, bit M_REG_LUT = 1, + // LUT-based compressor tree pipeline depth. This is set by default for maximum Pipelining (inbetween every stage). + int unsigned COMP_PIPELINE_DEPTH = 1, + + // Passed at generation time, whether compressors were generated if deemed worth it. + // Decides wether to use LUT-based compressors instead of DSPs. + bit USE_COMPRESSOR = 0, + // Safely deducible parameters localparam int unsigned WEIGHT_STREAM_WIDTH = PE * SIMD * WEIGHT_WIDTH, localparam int unsigned WEIGHT_STREAM_WIDTH_BA = (WEIGHT_STREAM_WIDTH + 7)/8 * 8, @@ -310,7 +317,19 @@ module mvu_vvu_axi #( localparam int unsigned A_WIDTH = 25 + 2*(VERSION > 1); // Width of A datapath localparam int unsigned NUM_LANES = A_WIDTH == WEIGHT_WIDTH? 1 : 1 + (A_WIDTH - !NARROW_WEIGHTS - WEIGHT_WIDTH) / MIN_LANE_WIDTH; - if(!IS_MVU || ((VERSION > 2) && (NUM_LANES <= 3) && (WEIGHT_WIDTH <= 8) && (ACTIVATION_WIDTH <= 9))) begin : genINT8 + if(USE_COMPRESSOR) begin : genCompressor + $DOTP_MODULE_NAME$ #( + .PE(PE), .SIMD(DSP_SIMD), + .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), + .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), + .COMP_PIPELINE_DEPTH(COMP_PIPELINE_DEPTH) + ) core ( + .clk(ap_clk), .rst, .en('1), + .last(dsp_last), .zero(dsp_zero), .w(dsp_w), .a(dsp_a), + .vld(dsp_vld), .p(dsp_p) + ); + end : genCompressor + else if(!IS_MVU || ((VERSION > 2) && (NUM_LANES <= 3) && (WEIGHT_WIDTH <= 8) && (ACTIVATION_WIDTH <= 9))) begin : genINT8 initial $info("Sidestepping to INT8 mode of DSP58 for %0dx%0d.", WEIGHT_WIDTH, ACTIVATION_WIDTH); mvu_vvu_8sx9_dsp58 #( .IS_MVU(IS_MVU), @@ -343,11 +362,14 @@ module mvu_vvu_axi #( if(1) begin : blkOutput localparam int unsigned CORE_PIPELINE_DEPTH = - VERSION == 3? 3 + (SEGMENTLEN == 0? 0 : ((SIMD+2)/3 -1)/SEGMENTLEN) : - /* else */ 3 + $clog2(SIMD+1) + (SIMD == 1); - - // This is conservative and could be divided by a guaranteed minimum output interval, e.g. MW/SIMD. - localparam int unsigned MAX_IN_FLIGHT = CORE_PIPELINE_DEPTH; + USE_COMPRESSOR? COMP_PIPELINE_DEPTH : + VERSION == 3? 3 + (SEGMENTLEN == 0? 0 : ((SIMD+2)/3 -1)/SEGMENTLEN) : + /* else */ 3 + $clog2(SIMD+1) + (SIMD == 1); + + // Floor at the DSP-equivalent depth so the compressor path (shallow pipeline) + localparam int unsigned DSP_PIPELINE_DEPTH = 3 + $clog2(SIMD+1) + (SIMD == 1); + localparam int unsigned MAX_IN_FLIGHT = + CORE_PIPELINE_DEPTH > DSP_PIPELINE_DEPTH? CORE_PIPELINE_DEPTH : DSP_PIPELINE_DEPTH; typedef logic [PE-1:0][ACCU_WIDTH-1:0] output_t; logic signed [$clog2(MAX_IN_FLIGHT+1):0] OPtr = '1; // -1 | 0, 1, ..., MAX_IN_FLIGHT diff --git a/finn-rtllib/mvu/mvu_vvu_axi_wrapper.v b/finn-rtllib/mvu/mvu_vvu_axi_wrapper.v index 9815d67629..47ffa96ac5 100644 --- a/finn-rtllib/mvu/mvu_vvu_axi_wrapper.v +++ b/finn-rtllib/mvu/mvu_vvu_axi_wrapper.v @@ -45,6 +45,8 @@ module $MODULE_NAME_AXI_WRAPPER$ #( parameter NARROW_WEIGHTS = $NARROW_WEIGHTS$, parameter SIGNED_ACTIVATIONS = $SIGNED_ACTIVATIONS$, parameter SEGMENTLEN = $SEGMENTLEN$, + parameter COMP_PIPELINE_DEPTH = $COMP_PIPELINE_DEPTH$, + parameter USE_COMPRESSOR = $USE_COMPRESSOR$, // Safely deducible parameters parameter WEIGHT_STREAM_WIDTH_BA = (PE*SIMD*WEIGHT_WIDTH+7)/8 * 8, @@ -81,7 +83,8 @@ mvu_vvu_axi #( `endif .IS_MVU(IS_MVU), .VERSION(VERSION), .PUMPED_COMPUTE(PUMPED_COMPUTE), .MW(MW), .MH(MH), .PE(PE), .SIMD(SIMD), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), .NARROW_WEIGHTS(NARROW_WEIGHTS), - .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN) + .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN), + .COMP_PIPELINE_DEPTH(COMP_PIPELINE_DEPTH), .USE_COMPRESSOR(USE_COMPRESSOR) ) inst ( .ap_clk(ap_clk), .ap_clk2x(ap_clk2x), diff --git a/src/finn/compressor/Makefile b/src/finn/compressor/Makefile new file mode 100644 index 0000000000..7df3e6963e --- /dev/null +++ b/src/finn/compressor/Makefile @@ -0,0 +1,17 @@ +############################################################################# +# Copyright (C) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# @brief Build automation for compressor testing and generation +# @author Simon Gerber +############################################################################# + +# Default: no constant absorption +CA?= +.PHONY: default clean + +default: + ./run_tests.sh $(CA) +clean: + rm -rf *.log *.jou *.vivado .Xil xvlog.pb gen/* diff --git a/src/finn/compressor/README.md b/src/finn/compressor/README.md new file mode 100644 index 0000000000..b80dbe6a43 --- /dev/null +++ b/src/finn/compressor/README.md @@ -0,0 +1,103 @@ + + +# Python Compressor Generator +This tool can generate compressor trees for 7-Series, UltraScale(+) and Versal for arbitrary input shapes. + +# Getting started +1. Part of the FINN framework (integrated into MVAU RTL backend). +2. _standalone compressor generation_ requires no external dependencies. + +## FINN Integration +The compressor is automatically invoked during MVAU layer specialization (`SpecializeLayers` transformation). +FINN selects the between RTL compressor, RTL DSP and HLS implementations based on the node parameters. +See the [MVAU compressor integration flow diagram](mvau_compressor_inegration_flow.svg) for the complete decision tree. + +**Key integration files:** +- `src/finn/transformation/fpgadataflow/specialize_layers.py` - RTL vs HLS selection logic +- `src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py` - FINN-side RTL MVAU integration with compressor path selection +- `src/finn/compressor/src/dotp_finn.py` - FINN wrapper for dot-product compressor generation +- `src/finn/compressor/src/add_multi_finn.py` - FINN wrapper for multi-operand adder generation +- `finn-rtllib/mvu/mvu_vvu_axi.sv` - RTL template that instantiates generated compressors + +This project implements either the full dotp unit of the node with a compressor impleemntation, or optimizes the add_multi additions of the DSP lanes when the RTL DSP path is invoked. + +## Standalone Usage +Generate a compressor of shape `(12,12,12)` called `comp` and save it under `/gen/comp12_12_12.sv`: + +```python3 -m finn.compressor.src.main -s 12,12,12 -n comp -o gen/comp12_12_12.sv``` + +See `python3 -m finn.compressor.src.main -h` for details. + +## Testing +Run the test suite for verification on different platforms: + +```bash +# Core compressor tests (21 configs) +./run_tests.sh "" versal # or 7series, ultrascale + +# MVAU integration tests (8 configs) +./run_dotp_comp_tests.sh versal # or 7series, ultrascale + +# Multi-operand adder tests (8 configs) +./run_add_multi_comp_tests.sh versal # or 7series, ultrascale +``` + +## Features +### Custom Input Shape +The tool can generate compressors for any input shape. A shape is passed as a comma-separated list. Each digit indicates a column's height. *LSB* is *left*, *MSB* is *right*. + +### Accumulation +By passing `-a`, the tool generates an accumulator instead of just an adder. The accumulators width can be specified by `-w`. +### Gate Absorption +If desired, every input to the compressor can be preceded by a two-input gate. These gates can be integrated into the first compression stage. Each gate is specified as a HEX digit. The encoding is the same is Vivado's LUT2 primitive: +| Secondary Input | Primary Input | Output +|-----------------|---------------|---------------- +|0 |0 |(DIGIT << 0) & 1 +|0 |1 |(DIGIT << 1) & 1 +|1 |0 |(DIGIT << 2) & 1 +|1 |1 |(DIGIT << 3) & 1 + +For example, `8` maps to an AND gate and `6` maps to an XOR gate. + +In CLI, gates can be specified as a flat string like `-g 883ABC`. The *LSB* is *left* and *MSB* is *right*. The leftmost specified gate corresponds to the LSB input in the generated compressor input vector. + +### Target +Generate compressors for either Versal, 7-Series or UltraScale fabrics using `-t {Versal,7-Series,UltraScale}`. + +### Automated Testing +The tool can automatically generate a SystemVerilog testbench to fuzzy-test the generated compressors by passing `--test`. For testing, the `xvlog`, `xelab` and `xsim` commands have to be available. + +### Custom Pipeline Depth +Specify the maximum combinational delay for the compressor using `-p MAX_DEPTH`. Note that the final adder, which has at least one single routing delay, cannot be pipelined. +This excludes the `Quaternary Adder`, which can be split into two stages when not used in accumulation. The pipelined version is the default if `-a` is not passed. + +### Constant Input +Aside to the regular, variable compressor inputs, the tool also supports an additional constant input. It can be specified as a binary number by `-c NUMBER`. + +# Implementation Details - How the Code is Structured +The compressor is internally represented as a graph. Its nodes are defined in `src/graph/nodes.py`. +Compressor construction is done in several passes: +1. Create a graph with all scheduled counters and a final adder (in `src/passes/compressor_constructor.py`). + 1. (Optional) Generate a gate absorption stage. + 2. Generate regular compression stages until the compression goal is reached. + 3. Insert pipeline registers between compressor stages. + 4. Build either a final adder or an accumulator as the final stage. +2. Annotate LUT6CY instances with placement constraints so that the LUT Cascade will be utilized (in `src/passes/lut_placer.py`). +3. Replace inexpressible connections: Place wires between connected instantiated modules (in `src/passes/wire_inserter.py`). +4. Annotate input and output signals in the compressor (in `src/passes/io_annotator.py`). +5. Emit generated SystemVerilog source (in `src/passes/emitter.py`) + +## Extending the Tool +### Adding new Counters +Counters without gate absorption are defined in `graph/counters/counter_candidates.py`. +Counters with gate absorption are defined in `graph/counters/absorption_counter_candidates.py`. + +### Adding new Passes +Before adding new passes over the compressor graph, check out if the simple iterator defined in `node_iterator.py` can be inherited to save boilerplate code. + +# Authors +This tool was created as a standalone compressor generator by Konstantin Hossfeld and Thomas Preußer. It was extended and integrated into the finn flow by Simon Gerber. diff --git a/src/finn/compressor/__init__.py b/src/finn/compressor/__init__.py new file mode 100644 index 0000000000..5adb8a0127 --- /dev/null +++ b/src/finn/compressor/__init__.py @@ -0,0 +1,15 @@ +############################################################################# +# Copyright (C) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# @brief FINN compressor package initialization +# @author Simon Gerber +############################################################################# + +"""FINN compressor — LUT-based compressor tree generator for MVU.""" + +from .src.add_multi_finn import generate_add_multi_comps +from .src.dotp_finn import generate_dotp_comp + +__all__ = ["generate_add_multi_comps", "generate_dotp_comp"] diff --git a/src/finn/compressor/gen_dotp_netlist.sh b/src/finn/compressor/gen_dotp_netlist.sh new file mode 100755 index 0000000000..ea35a66b04 --- /dev/null +++ b/src/finn/compressor/gen_dotp_netlist.sh @@ -0,0 +1,62 @@ +#!/bin/bash +############################################################################# +# Copyright (C) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# Generate standalone dotp compressor netlist for inspection or integration. +# Output is a self-contained RTL directory that can be simulated or synthesized. +# +# Usage: Edit parameters below, then run: ./gen_dotp_netlist.sh +############################################################################# + +# === Configuration === +SIMD=256 +WW=4 +AW=4 +ACCU_WIDTH=16 +SIGNED_WEIGHTS=0 # 0=unsigned, 1=signed +SIGNED_ACT=0 # 0=unsigned, 1=signed +TARGET="Versal" # Versal, 7-Series, UltraScale +# ===================== + +set -e +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +export PYTHONPATH="$(cd "$SCRIPT_DIR/../../.." && pwd):${PYTHONPATH:-}" + +# Build output directory name from config +LABEL="simd${SIMD}_w${WW}_a${AW}" +[ "$SIGNED_WEIGHTS" -eq 0 ] && LABEL="${LABEL}_uw" +[ "$SIGNED_ACT" -eq 1 ] && LABEL="${LABEL}_sa" +LABEL="${LABEL}_$(echo "$TARGET" | tr '[:upper:]' '[:lower:]' | tr -d '-')" +OUT_DIR="$SCRIPT_DIR/gen/$LABEL" +mkdir -p "$OUT_DIR" + +echo "Generating dotp compressor netlist" +echo " Config: SIMD=$SIMD, WW=$WW, AW=$AW, ACCU=$ACCU_WIDTH" +echo " Target: $TARGET" +echo " Output: $OUT_DIR" +echo "" + +# Build flags +FLAGS="" +[ "$SIGNED_WEIGHTS" -eq 0 ] && FLAGS="--unsigned_weights" +[ "$SIGNED_ACT" -eq 1 ] && FLAGS="$FLAGS --signed_activations" + +# Generate compressor core and dotp wrapper +python3 -m finn.compressor.src.dotp_finn \ + --simd "$SIMD" --ww "$WW" --aw "$AW" \ + --accu_width "$ACCU_WIDTH" $FLAGS \ + --target "$TARGET" \ + --dotp-template "$SCRIPT_DIR/hdl/dotp_comp_template.sv" \ + --dotp-output-name dotp_comp.sv \ + -o "$OUT_DIR" + +# Include mul_comp_map for complete netlist +cp "$SCRIPT_DIR/hdl/mul_comp_map.sv" "$OUT_DIR/" + +echo "" +echo "Generated files:" +ls -1 "$OUT_DIR"/*.sv +echo "" +echo "Done. Netlist ready in: $OUT_DIR" diff --git a/src/finn/compressor/hdl/add_multi_comp_tb_template.sv b/src/finn/compressor/hdl/add_multi_comp_tb_template.sv new file mode 100644 index 0000000000..b5327262ce --- /dev/null +++ b/src/finn/compressor/hdl/add_multi_comp_tb_template.sv @@ -0,0 +1,142 @@ +/****************************************************************************** + * Copyright (C) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + * @brief Testbench template for add_multi compressor cores + * @author Simon Gerber + *****************************************************************************/ + +/** + * Standalone testbench for add_multi compressor (comp_NuW_dD). + * Tests the compressor directly without requiring add_multi.sv. + * + * Template placeholders expanded by run_add_multi_comp_tests.sh: + * {n} - Number of addends + * {arg_width} - Bit width of each addend + * {depth} - Pipeline depth of compressor + * {label} - Configuration label (e.g. n8_w4_p2) + * {comp_module} - Generated compressor module name (e.g. comp_8u4_d0) + *****************************************************************************/ + +module add_multi_comp_{label}_tb; + + localparam int unsigned N = {n}; + localparam int unsigned ARG_WIDTH = {arg_width}; + localparam int unsigned DEPTH = {depth}; + localparam int unsigned IN_WIDTH = N * ARG_WIDTH; + // Use same formula as mvu_pkg::sumwidth() for consistency + localparam int unsigned SUM_WIDTH = $clog2(N) + ARG_WIDTH; + localparam int unsigned ROUNDS = 257; + + //----------------------------------------------------------------------- + // Global Control + logic clk = 0; + always #5ns clk = !clk; + + logic rst = 1; + initial begin + repeat(16) @(posedge clk); + rst <= 0; + end + + bit done = 0; + always_comb begin + if(done) $finish; + end + + //----------------------------------------------------------------------- + // DUT: direct compressor instantiation + logic [IN_WIDTH-1:0] in; + logic [SUM_WIDTH-1:0] out; + + {comp_module} dut ( + .clk, + .in, + .out + ); + + //----------------------------------------------------------------------- + // Transpose function: convert row-major to column-major format. + // + // The compressor expects inputs in column-major (bit-slice) order: + // in[0..N-1] = bit 0 of all N addends + // in[N..2N-1] = bit 1 of all N addends + // ... + // in[(W-1)*N..W*N-1] = bit W-1 of all N addends + // + // This matches the transpose in add_multi.sv CATCH_COMP macro: + // assign in[j*N+i] = arg[i][j]; + // + // Without this transpose, addend bits would be misaligned and produce + // incorrect sums. + //----------------------------------------------------------------------- + function automatic logic [IN_WIDTH-1:0] transpose( + input logic [IN_WIDTH-1:0] row_major + ); + logic [IN_WIDTH-1:0] col_major; + for(int i = 0; i < N; i++) begin + for(int j = 0; j < ARG_WIDTH; j++) begin + col_major[j*N + i] = row_major[i*ARG_WIDTH + j]; + end + end + return col_major; + endfunction + + //----------------------------------------------------------------------- + // Input Feed + int Q[$]; + initial begin + in = 'x; + @(posedge clk iff !rst); + + repeat(ROUNDS) begin + automatic logic [IN_WIDTH-1:0] aa; + automatic int exp = 0; + void'(std::randomize(aa)); + + // Compute expected sum from row-major input + for(int unsigned i = 0; i < N; i++) begin + exp += aa[i*ARG_WIDTH +: ARG_WIDTH]; + end + + // Transpose to column-major before feeding compressor + in <= transpose(aa); + Q.push_back(exp); + @(posedge clk); + end + + in <= 'x; + repeat(DEPTH + 10) @(posedge clk); + + assert(Q.size == 0) else begin + $error("Missing %0d outputs.", Q.size); + end + done = 1; + end + + //----------------------------------------------------------------------- + // Output Checker + int unsigned Checks = 0; + int unsigned Errors = 0; + initial begin + @(posedge clk iff !rst); + repeat(DEPTH) @(posedge clk); + repeat(ROUNDS) @(posedge clk) begin + automatic int exp = Q.pop_front(); + automatic int hav = out; + assert(hav == exp) else begin + $error("Output mismatch %0d instead of %0d.", hav, exp); + $stop; + Errors <= Errors + 1; + end + Checks <= Checks + 1; + end + end + + final begin + $display("Performed %0d checks with %0d errors.", Checks, Errors); + assert(Checks == ROUNDS) else $error("Unexpected number of checks: %0d instead of %0d.", Checks, ROUNDS); + end + +endmodule : add_multi_comp_{label}_tb diff --git a/src/finn/compressor/hdl/add_multi_comp_template.tcl b/src/finn/compressor/hdl/add_multi_comp_template.tcl new file mode 100644 index 0000000000..7402bf2df6 --- /dev/null +++ b/src/finn/compressor/hdl/add_multi_comp_template.tcl @@ -0,0 +1,34 @@ +############################################################################# +# Copyright (C) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# @brief Vivado simulation script for add_multi compressor testbench +# @author Simon Gerber +############################################################################# + +# Template placeholders expanded by run_add_multi_comp_tests.sh: +# {label} - Configuration label (e.g. n8_w4_p2) +# {tb} - Testbench module name +# {gen_dir} - Absolute path to gen/