From 562bfe9e679de47274ed5625473ff1b425d3eb4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Wed, 27 May 2026 08:18:19 +0100 Subject: [PATCH 01/19] Adding FIFO tracing capabilities in simulation. --- finn-rtllib/fifo/hdl/fifo_gauge.sv | 35 ++++++++++++++++++++++----- finn-rtllib/fifo/hdl/fifo_gauge_tb.sv | 5 +++- finn-rtllib/fifo/sim.sh | 25 +++++++++++++++++++ 3 files changed, 58 insertions(+), 7 deletions(-) create mode 100755 finn-rtllib/fifo/sim.sh diff --git a/finn-rtllib/fifo/hdl/fifo_gauge.sv b/finn-rtllib/fifo/hdl/fifo_gauge.sv index 37660a0533..73904c1191 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge.sv @@ -34,7 +34,8 @@ module fifo_gauge #( int unsigned WIDTH, - int unsigned COUNT_WIDTH = 32 + int unsigned COUNT_WIDTH = 32, + parameter DATA_LOGFILE = "" // Log consumed data verbosely to this file )( input logic clk, input logic rst, @@ -51,25 +52,47 @@ module fifo_gauge #( output logic [COUNT_WIDTH-1:0] maxcount ); + //----------------------------------------------------------------------- + // Monitoring & Debug + + // Transaction counters + longint unsigned ITxnCnt = 0; + longint unsigned OTxnCnt = 0; + // Optional hex data trace + int LogFd = (DATA_LOGFILE != "")? $fopen(DATA_LOGFILE, "a") : 0; + // The internal Queue serving as data buffer and an output register logic [WIDTH-1:0] Q[$] = {}; - logic [COUNT_WIDTH-1:0] Count = 0; - logic [COUNT_WIDTH-1:0] MaxCount = 0; + longint unsigned Count = 0; + longint unsigned MaxCount = 0; logic OVld = 0; logic [WIDTH-1:0] ODat = 'x; + final begin + $display("[%m] MaxFill: %0d; Transactions: in=%0d out=%0d", MaxCount, ITxnCnt, OTxnCnt); + if(LogFd) $fclose(LogFd); + end + always_ff @(posedge clk) begin if(rst) begin - Q <= {}; + Q = {}; Count <= 0; MaxCount <= 0; OVld <= 0; ODat <= 'x; + + ITxnCnt <= 0; + OTxnCnt <= 0; end else begin - // Always take input - if(ivld) Q.push_back(idat); + // Always take input and track Transactions + if(ivld) begin + Q.push_back(idat); + if(LogFd) $fwrite(LogFd, "%0x\n", idat); + ITxnCnt <= ITxnCnt + 1; + end + if(OVld && ordy) OTxnCnt <= OTxnCnt + 1; // Take Count Count <= Q.size; diff --git a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv index b3e7d7647d..4e75834515 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv @@ -59,7 +59,7 @@ module fifo_gauge_tb; // Depth Monitoring uwire count_t maxcount; - fifo_gauge #(.WIDTH(W)) dut ( + fifo_gauge #(.WIDTH(W), .DATA_LOGFILE("fifo_trace.log")) dut ( .clk, .rst, .idat, .ivld, .irdy, .odat, .ovld, .ordy, @@ -70,6 +70,7 @@ module fifo_gauge_tb; // Stimulus data_t Q[$] = {}; initial begin + automatic int ref_fd = $fopen("fifo_ref.log", "w"); idat = 'x; ivld = 0; @(posedge clk iff !rst); @@ -79,10 +80,12 @@ module fifo_gauge_tb; idat <= data; ivld <= 1; Q.push_back(data); + $fwrite(ref_fd, "%0x\n", data); @(posedge clk); idat <= 'x; ivld <= 0; end + $fclose(ref_fd); end //----------------------------------------------------------------------- diff --git a/finn-rtllib/fifo/sim.sh b/finn-rtllib/fifo/sim.sh new file mode 100755 index 0000000000..c9d8eb7206 --- /dev/null +++ b/finn-rtllib/fifo/sim.sh @@ -0,0 +1,25 @@ +/**************************************************************************** + * Copyright Advanced Micro Devices, Inc. + * SPDX-License-Identifier: BSD-3-Clause + * + * @brief FIFO gauge simulation script. + * @author Thomas B. Preußer + ***************************************************************************/ +#!/bin/bash +set -euo pipefail +cd "$(dirname "$0")" + +rm -f fifo_trace.log fifo_ref.log + +xvlog -sv hdl/fifo_gauge.sv hdl/fifo_gauge_tb.sv +xelab fifo_gauge_tb -debug off -s sim +xsim sim -runall + +echo "---" +if diff -q fifo_ref.log fifo_trace.log; then + echo "PASS: trace matches reference ($(wc -l < fifo_ref.log) lines)" +else + echo "FAIL: trace mismatch" + diff fifo_ref.log fifo_trace.log | head -20 + exit 1 +fi From 30f94407270b89ba0803f66bc9aa9b78cf5887d8 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 27 May 2026 17:27:45 +0100 Subject: [PATCH 02/19] [CustomOp] Select behav sim for rtl eltwise ops with flag --- finn-rtllib/eltwise/eltwise_template.v | 4 +++- src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/finn-rtllib/eltwise/eltwise_template.v b/finn-rtllib/eltwise/eltwise_template.v index e451c55823..0e8018ddb0 100644 --- a/finn-rtllib/eltwise/eltwise_template.v +++ b/finn-rtllib/eltwise/eltwise_template.v @@ -34,7 +34,9 @@ eltwise #( .PE($PE$), .OP($OP$), .B_SCALE($B_SCALE$), - .FORCE_BEHAVIORAL($FORCE_BEHAVIORAL$), +`ifdef FINN_SIMULATION + .FORCE_BEHAVIORAL(1), +`endif .A_FLOAT($A_FLOAT$), .B_FLOAT($B_FLOAT$), .A_WIDTH($A_WIDTH$), diff --git a/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py index 2f919ffb3a..9c55e13b31 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py @@ -151,7 +151,6 @@ def generate_hdl(self, model, fpgapart, clk): "PE": pe, "OP": op_name, "B_SCALE": 1.0, - "FORCE_BEHAVIORAL": 0, "A_FLOAT": 1 if lhs_float else 0, "B_FLOAT": 1 if rhs_float else 0, "A_WIDTH": a_width, From badc3e1a8ccb61c474c8440c8b106f8e78db5454 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 27 May 2026 17:28:43 +0100 Subject: [PATCH 03/19] [Builder] Add flag to allow user to use behavioral models for functional simulation --- src/finn/builder/build_dataflow_config.py | 7 +++++++ src/finn/builder/build_dataflow_steps.py | 4 +++- src/finn/core/rtlsim_exec.py | 4 +++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 96ecfeb6b7..47a9f86519 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -382,6 +382,13 @@ class DataflowBuildConfig: #: rtlsim, otherwise they will be replaced by RTL implementations. rtlsim_use_vivado_comps: Optional[bool] = True + #: Use behavioral simulation for RTLSim verification steps. + #: When True, passes -define FINN_SIMULATION to xelab, enabling faster + #: behavioral models for DSP-heavy modules (MVU, LayerNorm, Elementwise) + #: and fifo_gauge (with debug capabilities) instead of Q_srl. + #: Does not affect FIFO sizing which always uses behavioral simulation. + verify_rtlsim_behavioral: Optional[bool] = False + #: If set to True, the FINN compiler tries to create an MLO design based on #: loop_body_hierarchy and loop_body_range mlo: Optional[bool] = False diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index f2164ca2c1..908a5aaf3f 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -907,7 +907,7 @@ def step_hw_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig): for node in model.graph.node: node_inst = getCustomOp(node) node_inst.set_nodeattr("rtlsim_trace", f"{abspath}/{node.name}_rtlsim.wdb") - model = model.transform(PrepareRTLSim()) + model = model.transform(PrepareRTLSim(behav=cfg.verify_rtlsim_behavioral)) model = model.transform(SetExecMode("rtlsim")) verify_step(model, cfg, "node_by_node_rtlsim", need_parent=True) # Clear rtlsim_trace attributes to prevent later simulations from @@ -1123,6 +1123,8 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): os.makedirs(waveform_dir, exist_ok=True) abspath = os.path.abspath(waveform_dir) verify_model.set_metadata_prop("rtlsim_trace", abspath + "/verify_rtlsim.wdb") + if cfg.verify_rtlsim_behavioral: + verify_model.set_metadata_prop("rtlsim_behavioral", "1") if is_mlo(model): verify_mlo(verify_model, cfg, "stitched_ip_rtlsim") else: diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index b734a181e5..b86692f2c1 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -341,8 +341,10 @@ def rtlsim_exec_finnxsi(model, execution_context, pre_hook=None, post_hook=None) top_module_name = top_module_file_name.strip(".v") single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") debug = not (trace_file is None or trace_file == "") + rtlsim_behavioral = model.get_metadata_prop("rtlsim_behavioral") + behav = rtlsim_behavioral is not None and rtlsim_behavioral == "1" rtlsim_so = finnxsi.compile_sim_obj( - top_module_name, all_verilog_srcs, single_src_dir, debug=debug + top_module_name, all_verilog_srcs, single_src_dir, debug=debug, behav=behav ) # save generated lib filename in attribute model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) From 989ce5c52962299a3b7fd6155783bc708dc52529 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Wed, 27 May 2026 22:59:44 +0100 Subject: [PATCH 04/19] [Debug FIFO] Logging working for fifo depth simulation with debug_fifo:bool = True however not quite there yet for the rtlsim.@ --- finn-rtllib/fifo/hdl/fifo_template.v | 2 +- src/finn/builder/build_dataflow.py | 4 +- src/finn/builder/build_dataflow_config.py | 2 + src/finn/builder/build_dataflow_steps.py | 43 +++++++++++++++++++ .../fpgadataflow/rtl/streamingfifo_rtl.py | 1 + .../custom_op/fpgadataflow/streamingfifo.py | 1 + .../fpgadataflow/set_fifo_depths.py | 12 ++++++ 7 files changed, 63 insertions(+), 2 deletions(-) diff --git a/finn-rtllib/fifo/hdl/fifo_template.v b/finn-rtllib/fifo/hdl/fifo_template.v index 3066d9b92f..b9692f1659 100644 --- a/finn-rtllib/fifo/hdl/fifo_template.v +++ b/finn-rtllib/fifo/hdl/fifo_template.v @@ -52,7 +52,7 @@ output $OUT_RANGE$ out0_V_TDATA ); `ifdef FINN_SIMULATION - fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$)) fifo ( + fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$), .DATA_LOGFILE("$DATA_LOGFILE$")) fifo ( .clk(ap_clk), .rst(!ap_rst_n), .idat(in0_V_TDATA), .ivld(in0_V_TVALID), .irdy(in0_V_TREADY), .odat(out0_V_TDATA), .ovld(out0_V_TVALID), .ordy(out0_V_TREADY), diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py index 198ccd167a..68c05c8ec4 100644 --- a/src/finn/builder/build_dataflow.py +++ b/src/finn/builder/build_dataflow.py @@ -45,7 +45,7 @@ DataflowBuildConfig, default_build_dataflow_steps, ) -from finn.builder.build_dataflow_steps import build_dataflow_step_lookup +from finn.builder.build_dataflow_steps import _maybe_enable_verify_behavioral, build_dataflow_step_lookup # adapted from https://stackoverflow.com/a/39215961 @@ -134,6 +134,8 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig): if not os.path.exists(cfg.output_dir): os.makedirs(cfg.output_dir) + _maybe_enable_verify_behavioral(cfg) + # Run configuration checks config_report = run_all_config_checks(cfg) print(format_report(config_report)) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 47a9f86519..81dff67716 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -307,6 +307,8 @@ class DataflowBuildConfig: #: Only relevant if auto_fifo_strategy = LARGEFIFO_RTLSIM fifosim_save_waveform: Optional[bool] = False + debug_fifo: Optional[bool] = False + #: Target clock frequency (in nanoseconds) for Vitis HLS synthesis. #: e.g. `hls_clk_period_ns=5.0` will target a 200 MHz clock. #: If not specified it will default to synth_clk_period_ns diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 908a5aaf3f..7cf89d3395 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -143,6 +143,37 @@ from finn.util.vivado import parse_ooc_synth_results +def _fifo_debug_live_dir(cfg): + return cfg.output_dir + "/debug/_live" + + +def _maybe_enable_verify_behavioral(cfg): + if cfg.debug_fifo and not cfg.verify_rtlsim_behavioral: + print( + "[debug_fifo] forcing verify_rtlsim_behavioral=True so that " + "the verify phase uses fifo_gauge and produces per-FIFO logs." + ) + cfg.verify_rtlsim_behavioral = True + + +def snapshot_fifo_logs(cfg, phase_subdir): + if not cfg.debug_fifo: + return + live_dir = _fifo_debug_live_dir(cfg) + if not os.path.isdir(live_dir): + return + dest_dir = cfg.output_dir + "/debug/" + phase_subdir + os.makedirs(dest_dir, exist_ok=True) + for fn in os.listdir(live_dir): + if not fn.endswith(".log"): + continue + src = os.path.join(live_dir, fn) + if not os.path.isfile(src) or os.path.getsize(src) == 0: + continue + shutil.copyfile(src, os.path.join(dest_dir, fn)) + open(src, "w").close() + + def verify_step( model: ModelWrapper, cfg: DataflowBuildConfig, @@ -310,6 +341,7 @@ def prepare_loop_ops_fifo_sizing(node, cfg): swg_exception=cfg.default_swg_exception, vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, + debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), ) ) loop_model = loop_model.transform(SplitLargeFIFOs()) @@ -850,6 +882,13 @@ def step_hw_codegen(model: ModelWrapper, cfg: DataflowBuildConfig): loop_nodes = model.get_nodes_by_op_type("FINNLoop") for node in loop_nodes: prepare_loop_ops_fifo_sizing(node, cfg) + snapshot_fifo_logs(cfg, "fifo_sizing") + if cfg.debug_fifo: + for loop_node in loop_nodes: + body_model = getCustomOp(loop_node).get_nodeattr("body") + for fifo_node in body_model.get_nodes_by_op_type("StreamingFIFO_rtl"): + getCustomOp(fifo_node).set_nodeattr("debug_log_path", "") + getCustomOp(loop_node).set_nodeattr("body", body_model.graph) model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()), apply_to_subgraphs=True, @@ -978,6 +1017,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, cfg_n_inferences=cfg.fifosim_n_inferences, + debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), ) ) model = model.transform(GiveUniqueNodeNames()) @@ -1043,6 +1083,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): # this will only run for the new nodes (e.g. FIFOs and DWCs) model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) model = model.transform(HLSSynthIP(cfg._resolve_fpga_part())) + snapshot_fifo_logs(cfg, "fifo_sizing") return model @@ -1130,6 +1171,7 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): else: verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True) os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness) + snapshot_fifo_logs(cfg, "verify_stitched_ip_rtlsim") return model @@ -1190,6 +1232,7 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi if cfg.verify_save_rtlsim_waveforms: # restore original trace depth os.environ["RTLSIM_TRACE_DEPTH"] = str(orig_rtlsim_trace_depth) + snapshot_fifo_logs(cfg, "rtlsim_perf") else: print( diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index 018d8f0417..399699996d 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -96,6 +96,7 @@ def generate_hdl(self, model, fpgapart, clk): code_gen_dict["$OUT_RANGE$"] = "[{}:0]".format(in_width - 1) code_gen_dict["$WIDTH$"] = str(in_width) code_gen_dict["$DEPTH$"] = str(depth) + code_gen_dict["$DATA_LOGFILE$"] = self.get_nodeattr("debug_log_path") # apply code generation to templates code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") with open(template_path, "r") as f: diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index e1ae3e894f..2351044387 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -64,6 +64,7 @@ def get_nodeattr_types(self): # the FIFO does not need its own FIFOs "inFIFODepths": ("ints", False, [0]), "outFIFODepths": ("ints", False, [0]), + "debug_log_path": ("s", False, ""), } ) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index fa317265a6..d568a4fcd5 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -275,6 +275,7 @@ def __init__( vivado_ram_style="auto", fifosim_input_throttle=True, cfg_n_inferences=2, + debug_log_dir=None, ): super().__init__() self.fpgapart = fpgapart @@ -287,6 +288,7 @@ def __init__( self.cfg_n_inferences = cfg_n_inferences self.mlo_max_iter = 0 self.ind_map = {} + self.debug_log_dir = debug_log_dir def apply(self, model): model = model.transform(GiveUniqueNodeNames()) @@ -403,6 +405,16 @@ def apply(self, model): if (self.max_depth is not None) and (node.get_nodeattr("depth") != self.max_depth): node.set_nodeattr("depth", self.max_depth) + if self.debug_log_dir is not None: + import os as _os + + _os.makedirs(_os.path.abspath(self.debug_log_dir), exist_ok=True) + for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"): + log_path = _os.path.abspath( + _os.path.join(self.debug_log_dir, node.name + ".log") + ) + getCustomOp(node).set_nodeattr("debug_log_path", log_path) + # insert FIFOs and do all transformations for RTLsim model = model.transform(AnnotateCycles()) perf = model.analysis(dataflow_performance) From fe6fc2f7ea40ca604e7b83b4e4d9347f14b604ba Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 28 May 2026 11:42:53 +0100 Subject: [PATCH 05/19] Linting --- src/finn/builder/build_dataflow.py | 5 ++++- src/finn/transformation/fpgadataflow/set_fifo_depths.py | 9 +++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py index 68c05c8ec4..b09ca43960 100644 --- a/src/finn/builder/build_dataflow.py +++ b/src/finn/builder/build_dataflow.py @@ -45,7 +45,10 @@ DataflowBuildConfig, default_build_dataflow_steps, ) -from finn.builder.build_dataflow_steps import _maybe_enable_verify_behavioral, build_dataflow_step_lookup +from finn.builder.build_dataflow_steps import ( + _maybe_enable_verify_behavioral, + build_dataflow_step_lookup, +) # adapted from https://stackoverflow.com/a/39215961 diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index d568a4fcd5..fff617f578 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -28,6 +28,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np +import os import warnings from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -406,13 +407,9 @@ def apply(self, model): node.set_nodeattr("depth", self.max_depth) if self.debug_log_dir is not None: - import os as _os - - _os.makedirs(_os.path.abspath(self.debug_log_dir), exist_ok=True) + os.makedirs(os.path.abspath(self.debug_log_dir), exist_ok=True) for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"): - log_path = _os.path.abspath( - _os.path.join(self.debug_log_dir, node.name + ".log") - ) + log_path = os.path.abspath(os.path.join(self.debug_log_dir, node.name + ".log")) getCustomOp(node).set_nodeattr("debug_log_path", log_path) # insert FIFOs and do all transformations for RTLsim From 2104c34883dff2e8496f7950494f50169953489d Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 29 May 2026 15:55:54 +0100 Subject: [PATCH 06/19] [xsi] Align python xsi with c++ driven simulation --- finn_xsi/finn_xsi/sim_engine.py | 38 +++++++++---------- src/finn/builder/build_dataflow_steps.py | 6 ++- src/finn/qnn-data/build_dataflow/build.py | 1 + .../build_dataflow/dataflow_build_config.json | 1 + 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/finn_xsi/finn_xsi/sim_engine.py b/finn_xsi/finn_xsi/sim_engine.py index 0d17e581af..cd6b09c5ba 100644 --- a/finn_xsi/finn_xsi/sim_engine.py +++ b/finn_xsi/finn_xsi/sim_engine.py @@ -24,31 +24,29 @@ def __init__(self, kernel, design, log=None, wdb=None): if p.isInput(): p.clear().write_back() - def cycle(updates): - # Rising Edge - clk.set(1).write_back() + # Match C++ driver structure: separate half_cycle calls with run(5) each + def half_cycle(up): + """Single half-cycle matching C++ driver behavior.""" + clk.set(up).write_back() if clk2x is not None: clk2x.set(1).write_back() - # Updates after Active Edge - top.run(1) + top.run(5) + clk2x.set(0).write_back() + top.run(5) + else: + top.run(5) + + def cycle(updates): + # Clock down - matches C++ cycle(0) + half_cycle(0) + + # Clock up - matches C++ cycle(1) + half_cycle(1) + + # Write port updates after clock up (matching C++ structure) for port, update in updates.items(): port.set_hexstr(update).write_back() - # Edges inactive on interface & finish Cycle - if clk2x is None: - top.run(4999) - clk.set(0).write_back() - top.run(5000) - else: - top.run(2499) - clk2x.set(0).write_back() - top.run(2500) - clk.set(0).write_back() - clk2x.set(1).write_back() - top.run(2500) - clk2x.set(0).write_back() - top.run(2500) - self.top = top self.cycle = cycle self.ticks = 0 diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 7cf89d3395..21fb84a371 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -171,7 +171,8 @@ def snapshot_fifo_logs(cfg, phase_subdir): if not os.path.isfile(src) or os.path.getsize(src) == 0: continue shutil.copyfile(src, os.path.join(dest_dir, fn)) - open(src, "w").close() + # Delete _live folder to ensure clean state for next simulation + shutil.rmtree(live_dir) def verify_step( @@ -1150,6 +1151,9 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): # prepare ip-stitched rtlsim verify_model = deepcopy(model) verify_model = prepare_for_stitched_ip_rtlsim(verify_model, cfg) + # Ensure _live folder exists for debug_fifo logging + if cfg.debug_fifo: + os.makedirs(_fifo_debug_live_dir(cfg), exist_ok=True) # use critical path estimate to set rtlsim liveness threshold # (very conservative) verify_model = verify_model.transform(AnnotateCycles()) diff --git a/src/finn/qnn-data/build_dataflow/build.py b/src/finn/qnn-data/build_dataflow/build.py index 6cc7ff2419..fbad13c237 100644 --- a/src/finn/qnn-data/build_dataflow/build.py +++ b/src/finn/qnn-data/build_dataflow/build.py @@ -48,6 +48,7 @@ synth_clk_period_ns=10.0, board=platform_name, shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, + debug_fifo=True, generate_outputs=[ build_cfg.DataflowOutputType.PYNQ_DRIVER, build_cfg.DataflowOutputType.STITCHED_IP, diff --git a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json index 81e77a1606..9360075916 100644 --- a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json +++ b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json @@ -9,6 +9,7 @@ "shell_flow_type": "vivado_zynq", "verify_save_rtlsim_waveforms": true, "fifosim_save_waveform": true, + "debug_fifo": true, "verify_steps": [ "initial_python", "streamlined_python", From 889da601f77c32c612dd58d05516eaa9706133a4 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 29 May 2026 17:10:32 +0100 Subject: [PATCH 07/19] [Builder] Disable FINN_SIMULATION for rtlsim performance --- src/finn/builder/build_dataflow_steps.py | 5 ++++- src/finn/core/rtlsim_exec.py | 5 ++++- src/finn/transformation/fpgadataflow/set_fifo_depths.py | 7 +++++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 21fb84a371..8df9c625a5 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -1205,7 +1205,10 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi perf = model.analysis(dataflow_performance) latency = perf["critical_path_cycles"] max_iters = latency * 1.1 + 50 - rtlsim_perf_dict = xsi_fifosim(model, rtlsim_bs, max_iters=max_iters) + # Use behav=False for performance measurement to use real RTL components + # instead of behavioral models (FINN_SIMULATION affects FIFOs, MVU, LayerNorm, + # and RTL elementwise ops) + rtlsim_perf_dict = xsi_fifosim(model, rtlsim_bs, max_iters=max_iters, behav=False) # keep keys consistent between the Python and C++-styles cycles = rtlsim_perf_dict["cycles"] clk_ns = cfg.synth_clk_period_ns diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index b86692f2c1..4543b808da 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -123,11 +123,14 @@ def rtlsim_exec_cppxsi( dummy_data_mode=False, timeout_cycles=None, throttle_cycles=0, + behav=True, ): """Use XSI C++ rtl simulation to execute given model with stitched IP. The dummy_data_mode flag controls whether the simulation is driven by dummy data or real data. The execution_context parameter must be formatted according to whether dummy or real data is used. + If behav=True (default), FINN_SIMULATION is defined and fifo_gauge is used. + If behav=False, Q_srl is used instead (no debug logging). Example with dummy_data = True: execution_context = { "inputs" : {"" : }, @@ -176,7 +179,7 @@ def rtlsim_exec_cppxsi( single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") debug = not (trace_file is None or trace_file == "") rtlsim_so = finnxsi.compile_sim_obj( - top_module_name, all_verilog_srcs, single_src_dir, debug=debug, behav=True + top_module_name, all_verilog_srcs, single_src_dir, debug=debug, behav=behav ) # save generated lib filename in attribute model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index fff617f578..8202e809cb 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -196,12 +196,14 @@ def apply(self, model): return (model, False) -def xsi_fifosim(model, n_inferences, max_iters=None, throttle_cycles=0): +def xsi_fifosim(model, n_inferences, max_iters=None, throttle_cycles=0, behav=True): """Create a XSI model of stitched IP and use a simple C++ driver to drive the input stream. Useful for FIFO sizing, latency and throughput measurement. If max_iters is None, use the default liveness threshold instead. throttle_cycles can be used for throttling - the input stream every time a frame is finished.""" + the input stream every time a frame is finished. + If behav=True (default), FINN_SIMULATION is defined and fifo_gauge is used. + If behav=False, Q_srl is used instead (no debug logging).""" iname = model.get_first_global_in() first_node = model.find_consumer(iname) @@ -218,6 +220,7 @@ def xsi_fifosim(model, n_inferences, max_iters=None, throttle_cycles=0): dummy_data_mode=True, timeout_cycles=max_iters, throttle_cycles=throttle_cycles, + behav=behav, ) return ret_dict From 86892caf305700dc03a74b1db631edcf55e0c9d5 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Fri, 5 Jun 2026 15:21:25 +0100 Subject: [PATCH 08/19] [Debug FIFO] FINN_LOOP prefix was missing from the debug fifo log names, this was causing issues during verify rtlsim where multiple SV modules were trying to fopen the same logfile simultaneously. --- src/finn/builder/build_dataflow_steps.py | 59 ++++++++++++------------ 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 8df9c625a5..443aaf227e 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -143,8 +143,8 @@ from finn.util.vivado import parse_ooc_synth_results -def _fifo_debug_live_dir(cfg): - return cfg.output_dir + "/debug/_live" +def _fifo_debug_dir(cfg): + return cfg.output_dir + "/debug/fifo_logs" def _maybe_enable_verify_behavioral(cfg): @@ -156,23 +156,31 @@ def _maybe_enable_verify_behavioral(cfg): cfg.verify_rtlsim_behavioral = True -def snapshot_fifo_logs(cfg, phase_subdir): +def _retarget_fifo_log_paths(model, cfg): + if not cfg.debug_fifo: + return model + dbg_dir = _fifo_debug_dir(cfg) + os.makedirs(dbg_dir, exist_ok=True) + for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"): + inst = getCustomOp(node) + old_path = inst.get_nodeattr("debug_log_path") + new_path = os.path.abspath(os.path.join(dbg_dir, node.name + ".log")) + if old_path and old_path != new_path and os.path.isfile(old_path): + os.rename(old_path, new_path) + inst.set_nodeattr("debug_log_path", new_path) + return model + + +def mark_fifo_debug_phase(cfg, phase_name): if not cfg.debug_fifo: return - live_dir = _fifo_debug_live_dir(cfg) - if not os.path.isdir(live_dir): - return - dest_dir = cfg.output_dir + "/debug/" + phase_subdir - os.makedirs(dest_dir, exist_ok=True) - for fn in os.listdir(live_dir): + dbg_dir = _fifo_debug_dir(cfg) + os.makedirs(dbg_dir, exist_ok=True) + for fn in os.listdir(dbg_dir): if not fn.endswith(".log"): continue - src = os.path.join(live_dir, fn) - if not os.path.isfile(src) or os.path.getsize(src) == 0: - continue - shutil.copyfile(src, os.path.join(dest_dir, fn)) - # Delete _live folder to ensure clean state for next simulation - shutil.rmtree(live_dir) + with open(os.path.join(dbg_dir, fn), "a") as f: + f.write(f"=== phase: {phase_name} ===\n") def verify_step( @@ -342,13 +350,14 @@ def prepare_loop_ops_fifo_sizing(node, cfg): swg_exception=cfg.default_swg_exception, vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, - debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), + debug_log_dir=(_fifo_debug_dir(cfg) if cfg.debug_fifo else None), ) ) loop_model = loop_model.transform(SplitLargeFIFOs()) loop_model = loop_model.transform(RemoveShallowFIFOs()) loop_model = loop_model.transform(GiveUniqueNodeNames(prefix=node.name + "_")) loop_model = loop_model.transform(GiveReadableTensorNames()) + loop_model = _retarget_fifo_log_paths(loop_model, cfg) node_inst.set_nodeattr("body", loop_model.graph) @@ -883,13 +892,6 @@ def step_hw_codegen(model: ModelWrapper, cfg: DataflowBuildConfig): loop_nodes = model.get_nodes_by_op_type("FINNLoop") for node in loop_nodes: prepare_loop_ops_fifo_sizing(node, cfg) - snapshot_fifo_logs(cfg, "fifo_sizing") - if cfg.debug_fifo: - for loop_node in loop_nodes: - body_model = getCustomOp(loop_node).get_nodeattr("body") - for fifo_node in body_model.get_nodes_by_op_type("StreamingFIFO_rtl"): - getCustomOp(fifo_node).set_nodeattr("debug_log_path", "") - getCustomOp(loop_node).set_nodeattr("body", body_model.graph) model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()), apply_to_subgraphs=True, @@ -1010,6 +1012,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model.set_metadata_prop( "rtlsim_trace", os.path.abspath(report_dir) + "/fifosim_trace.wdb" ) + mark_fifo_debug_phase(cfg, "fifo_sizing") model = model.transform( InsertAndSetFIFODepths( cfg._resolve_fpga_part(), @@ -1018,7 +1021,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, cfg_n_inferences=cfg.fifosim_n_inferences, - debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), + debug_log_dir=(_fifo_debug_dir(cfg) if cfg.debug_fifo else None), ) ) model = model.transform(GiveUniqueNodeNames()) @@ -1084,7 +1087,6 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): # this will only run for the new nodes (e.g. FIFOs and DWCs) model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) model = model.transform(HLSSynthIP(cfg._resolve_fpga_part())) - snapshot_fifo_logs(cfg, "fifo_sizing") return model @@ -1151,9 +1153,7 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): # prepare ip-stitched rtlsim verify_model = deepcopy(model) verify_model = prepare_for_stitched_ip_rtlsim(verify_model, cfg) - # Ensure _live folder exists for debug_fifo logging - if cfg.debug_fifo: - os.makedirs(_fifo_debug_live_dir(cfg), exist_ok=True) + mark_fifo_debug_phase(cfg, "verify_stitched_ip_rtlsim") # use critical path estimate to set rtlsim liveness threshold # (very conservative) verify_model = verify_model.transform(AnnotateCycles()) @@ -1175,7 +1175,6 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): else: verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True) os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness) - snapshot_fifo_logs(cfg, "verify_stitched_ip_rtlsim") return model @@ -1208,6 +1207,7 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi # Use behav=False for performance measurement to use real RTL components # instead of behavioral models (FINN_SIMULATION affects FIFOs, MVU, LayerNorm, # and RTL elementwise ops) + mark_fifo_debug_phase(cfg, "rtlsim_perf") rtlsim_perf_dict = xsi_fifosim(model, rtlsim_bs, max_iters=max_iters, behav=False) # keep keys consistent between the Python and C++-styles cycles = rtlsim_perf_dict["cycles"] @@ -1239,7 +1239,6 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi if cfg.verify_save_rtlsim_waveforms: # restore original trace depth os.environ["RTLSIM_TRACE_DEPTH"] = str(orig_rtlsim_trace_depth) - snapshot_fifo_logs(cfg, "rtlsim_perf") else: print( From 59184ec18b2db64baa904edc3d278a8d1c308beb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 9 Jun 2026 09:06:43 +0100 Subject: [PATCH 09/19] Consolidated testbench driving starting a cycle with the active clock edge. --- finn_xsi/finn_xsi/sim_engine.py | 15 +++------- finn_xsi/rtlsim_xsi.cpp | 51 ++++++++++++++++----------------- 2 files changed, 29 insertions(+), 37 deletions(-) diff --git a/finn_xsi/finn_xsi/sim_engine.py b/finn_xsi/finn_xsi/sim_engine.py index cd6b09c5ba..e15a37ff1e 100644 --- a/finn_xsi/finn_xsi/sim_engine.py +++ b/finn_xsi/finn_xsi/sim_engine.py @@ -24,28 +24,21 @@ def __init__(self, kernel, design, log=None, wdb=None): if p.isInput(): p.clear().write_back() - # Match C++ driver structure: separate half_cycle calls with run(5) each def half_cycle(up): - """Single half-cycle matching C++ driver behavior.""" clk.set(up).write_back() if clk2x is not None: clk2x.set(1).write_back() - top.run(5) + top.run(25) clk2x.set(0).write_back() - top.run(5) + top.run(25) else: - top.run(5) + top.run(50) def cycle(updates): - # Clock down - matches C++ cycle(0) - half_cycle(0) - - # Clock up - matches C++ cycle(1) half_cycle(1) - - # Write port updates after clock up (matching C++ structure) for port, update in updates.items(): port.set_hexstr(update).write_back() + half_cycle(0) self.top = top self.cycle = cycle diff --git a/finn_xsi/rtlsim_xsi.cpp b/finn_xsi/rtlsim_xsi.cpp index d4fe79581d..a81ebe986f 100644 --- a/finn_xsi/rtlsim_xsi.cpp +++ b/finn_xsi/rtlsim_xsi.cpp @@ -21,7 +21,7 @@ #include "xsi_finn.hpp" #include "rtlsim_config.hpp" -int main(int argc, char *argv[]) { +int main(int const argc, char const *const argv[]) { // Load Kernel and Design xsi::Kernel kernel(kernel_libname); @@ -54,11 +54,11 @@ int main(int argc, char *argv[]) { size_t job_size; size_t job_txns; // [0:job_size] size_t total_txns; - size_t first_complete; // First completion timestamp union { // Input Stream struct { + size_t first_complete; // First completion timestamp size_t job_ticks; // throttle if job_size < job_ticks size_t await_iter; // iteration allowing start of next job }; @@ -94,7 +94,8 @@ int main(int argc, char *argv[]) { } // Find Global Control & Run Startup Sequence - std::function cycle; + std::function>&)> cycle; + std::vector> to_write; { Port *const clk = top.getPort("ap_clk"); Port *const clk2x = top.getPort("ap_clk2x"); @@ -103,24 +104,30 @@ int main(int argc, char *argv[]) { std::cerr << "No clock found on the design." << std::endl; return 1; } - cycle = clk2x? - std::function([&top, clk, clk2x](bool const up) mutable { + cycle = [half = clk2x? + std::function([&top, clk, clk2x](bool const up) { clk->set(up).write_back(); clk2x->set(1).write_back(); - top.run(5); + top.run(25); clk2x->set(0).write_back(); - top.run(5); + top.run(25); }) : - std::function([&top, clk](bool const up) mutable { + std::function([&top, clk](bool const up) { clk->set(up).write_back(); - top.run(5); - }); + top.run(50); + }) + ](std::vector> &to_write) { + half(1); + for(Port &p : to_write) p.write_back(); + to_write.clear(); + half(0); + }; // Reset all Inputs, Wait for Reset Period for(Port &p : top.ports()) { if(p.isInput()) p.clear().write_back(); }; if(rst_n) { - for(unsigned i = 0; i < 16; i++) { cycle(0); cycle(1); } - rst_n->set(1).write_back(); + for(unsigned i = 0; i < 16; i++) cycle(to_write); + to_write.emplace_back(rst_n->set(1)); } } @@ -128,25 +135,21 @@ int main(int argc, char *argv[]) { std::cout << "Starting data feed with idle-output timeout of " << max_iters << " cycles ...\n" << std::endl; // Make all Inputs valid & all Outputs ready - for(auto &s : istreams) s.port_vld.set(1).write_back(); - for(auto &s : ostreams) s.port_rdy.set(1).write_back(); + for(auto &s : istreams) to_write.emplace_back(s.port_vld.set(1)); + for(auto &s : ostreams) to_write.emplace_back(s.port_rdy.set(1)); + cycle(to_write); // flush & settle before first read // Enter Simulation Loop and track Progress auto const begin = std::chrono::steady_clock::now(); - std::vector> to_write; while(true) { - //------------------------------------------------------------------- - // Clock down - then read signal updates from design - cycle(0); - // check for transactions on input streams for(auto &s : istreams) { bool const vld = s.port_vld[0]; bool const rdy = s.port_rdy.read()[0]; if(vld && !rdy) continue; - // Track successgul Transactions + // Track successful Transactions if(vld) { s.job_txns++; if(++s.total_txns == s.job_size * n_inferences) itodo--; @@ -194,12 +197,8 @@ int main(int argc, char *argv[]) { } //------------------------------------------------------------------- - // Clock up - then write signal updates back to design - cycle(1); - - // Write back Ports with registered updates - for(Port &p : to_write) p.write_back(); - to_write.clear(); + // Advance clock: rise, write back, fall + cycle(to_write); // Show a progress message once in a while if(++iters % 10000 == 0) { From 05577c4249f843d1c6fdcceac1d016667b57b1e0 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Fri, 12 Jun 2026 13:14:50 +0100 Subject: [PATCH 10/19] [Debug FIFO] work around logging crash issue by sending tagged log data to stderr instead of to individual logfiles. Also cleaner shutdown of RTL sim with exposed external sim port that calls in an always_ff block, this now enables the printing of transaction counts in the debug fifo when simulation terminates. --- finn-rtllib/fifo/hdl/fifo_gauge.sv | 11 +++--- finn-rtllib/fifo/hdl/fifo_gauge_tb.sv | 2 +- finn-rtllib/fifo/hdl/fifo_template.v | 2 +- finn_xsi/finn_xsi/adapter.py | 4 ++ finn_xsi/rtlsim_xsi.cpp | 33 ++++++++++++---- src/finn/builder/build_dataflow_steps.py | 39 +------------------ src/finn/core/rtlsim_exec.py | 2 +- .../fpgadataflow/rtl/streamingfifo_rtl.py | 2 +- .../custom_op/fpgadataflow/streamingfifo.py | 2 +- .../fpgadataflow/create_stitched_ip.py | 19 +++++++++ .../fpgadataflow/set_fifo_depths.py | 11 ++---- 11 files changed, 64 insertions(+), 63 deletions(-) diff --git a/finn-rtllib/fifo/hdl/fifo_gauge.sv b/finn-rtllib/fifo/hdl/fifo_gauge.sv index 73904c1191..9b0b994f76 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge.sv @@ -35,7 +35,7 @@ module fifo_gauge #( int unsigned WIDTH, int unsigned COUNT_WIDTH = 32, - parameter DATA_LOGFILE = "" // Log consumed data verbosely to this file + parameter bit DEBUG_LOG = 0 )( input logic clk, input logic rst, @@ -52,14 +52,14 @@ module fifo_gauge #( output logic [COUNT_WIDTH-1:0] maxcount ); + localparam int STDERR_FD = 32'h8000_0002; + //----------------------------------------------------------------------- // Monitoring & Debug // Transaction counters longint unsigned ITxnCnt = 0; longint unsigned OTxnCnt = 0; - // Optional hex data trace - int LogFd = (DATA_LOGFILE != "")? $fopen(DATA_LOGFILE, "a") : 0; // The internal Queue serving as data buffer and an output register logic [WIDTH-1:0] Q[$] = {}; @@ -70,8 +70,7 @@ module fifo_gauge #( logic [WIDTH-1:0] ODat = 'x; final begin - $display("[%m] MaxFill: %0d; Transactions: in=%0d out=%0d", MaxCount, ITxnCnt, OTxnCnt); - if(LogFd) $fclose(LogFd); + $fwrite(STDERR_FD, "[%m] MaxFill: %0d; Transactions: in=%0d out=%0d", MaxCount, ITxnCnt, OTxnCnt); end always_ff @(posedge clk) begin @@ -89,7 +88,7 @@ module fifo_gauge #( // Always take input and track Transactions if(ivld) begin Q.push_back(idat); - if(LogFd) $fwrite(LogFd, "%0x\n", idat); + if(DEBUG_LOG) $fwrite(STDERR_FD, "[FIFOLOG %m] %0x\n", idat); ITxnCnt <= ITxnCnt + 1; end if(OVld && ordy) OTxnCnt <= OTxnCnt + 1; diff --git a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv index 4e75834515..e74905d7d7 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv @@ -59,7 +59,7 @@ module fifo_gauge_tb; // Depth Monitoring uwire count_t maxcount; - fifo_gauge #(.WIDTH(W), .DATA_LOGFILE("fifo_trace.log")) dut ( + fifo_gauge #(.WIDTH(W), .DEBUG_LOG(1)) dut ( .clk, .rst, .idat, .ivld, .irdy, .odat, .ovld, .ordy, diff --git a/finn-rtllib/fifo/hdl/fifo_template.v b/finn-rtllib/fifo/hdl/fifo_template.v index b9692f1659..b128d40dd6 100644 --- a/finn-rtllib/fifo/hdl/fifo_template.v +++ b/finn-rtllib/fifo/hdl/fifo_template.v @@ -52,7 +52,7 @@ output $OUT_RANGE$ out0_V_TDATA ); `ifdef FINN_SIMULATION - fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$), .DATA_LOGFILE("$DATA_LOGFILE$")) fifo ( + fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$), .DEBUG_LOG($DEBUG_LOG$)) fifo ( .clk(ap_clk), .rst(!ap_rst_n), .idat(in0_V_TDATA), .ivld(in0_V_TVALID), .irdy(in0_V_TREADY), .odat(out0_V_TDATA), .ovld(out0_V_TVALID), .ordy(out0_V_TREADY), diff --git a/finn_xsi/finn_xsi/adapter.py b/finn_xsi/finn_xsi/adapter.py index 0b73787a60..c2bde7dcf1 100644 --- a/finn_xsi/finn_xsi/adapter.py +++ b/finn_xsi/finn_xsi/adapter.py @@ -157,6 +157,10 @@ def reset_rtlsim( def close_rtlsim(sim): + sim_finish = sim.top.getPort("sim_finish") + if sim_finish is not None: + sim_finish.set(1).write_back() + sim.cycle({}) del sim diff --git a/finn_xsi/rtlsim_xsi.cpp b/finn_xsi/rtlsim_xsi.cpp index d4fe79581d..d6fdc78bdf 100644 --- a/finn_xsi/rtlsim_xsi.cpp +++ b/finn_xsi/rtlsim_xsi.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,7 @@ int main(int argc, char *argv[]) { // Ultimate Simulation Summary std::string synopsis; + std::map maxcounts; { // RTL Simulation @@ -245,6 +247,27 @@ int main(int argc, char *argv[]) { "RUNTIME_S\t" << std::chrono::duration_cast(std::chrono::steady_clock::now() - begin).count(); synopsis = bld.str(); + // Read maxcount ports before $finish tears down the design + for(Port &p : top.ports()) { + if(p.isOutput()) { + char const *const name = p.name(); + if(std::strncmp(name, "maxcount", 8) == 0) { + p.read(); + maxcounts[name] = p.as_unsigned(); + } + } + } + + // Trigger $finish via sim_finish port so that final blocks execute + { + Port *const sim_finish = top.getPort("sim_finish"); + if(sim_finish) { + sim_finish->set(1).write_back(); + cycle(0); + cycle(1); + } + } + } // done simulation // Dump Simulation Statistics to stdout and results.txt @@ -258,14 +281,8 @@ int main(int argc, char *argv[]) { { // Synopsis and `max_count` readings to results file std::ofstream results_file("results.txt", std::ios::out | std::ios::trunc); results_file << synopsis << std::endl; - for(Port &p : top.ports()) { - if(p.isOutput()) { - char const *const name = p.name(); - if(std::strncmp(name, "maxcount", 8) == 0) { - p.read(); - results_file << name << '\t' << p.as_unsigned() << std::endl; - } - } + for(auto const &[name, val] : maxcounts) { + results_file << name << '\t' << val << std::endl; } } diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 443aaf227e..d1b58f7bb7 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -143,10 +143,6 @@ from finn.util.vivado import parse_ooc_synth_results -def _fifo_debug_dir(cfg): - return cfg.output_dir + "/debug/fifo_logs" - - def _maybe_enable_verify_behavioral(cfg): if cfg.debug_fifo and not cfg.verify_rtlsim_behavioral: print( @@ -156,33 +152,6 @@ def _maybe_enable_verify_behavioral(cfg): cfg.verify_rtlsim_behavioral = True -def _retarget_fifo_log_paths(model, cfg): - if not cfg.debug_fifo: - return model - dbg_dir = _fifo_debug_dir(cfg) - os.makedirs(dbg_dir, exist_ok=True) - for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"): - inst = getCustomOp(node) - old_path = inst.get_nodeattr("debug_log_path") - new_path = os.path.abspath(os.path.join(dbg_dir, node.name + ".log")) - if old_path and old_path != new_path and os.path.isfile(old_path): - os.rename(old_path, new_path) - inst.set_nodeattr("debug_log_path", new_path) - return model - - -def mark_fifo_debug_phase(cfg, phase_name): - if not cfg.debug_fifo: - return - dbg_dir = _fifo_debug_dir(cfg) - os.makedirs(dbg_dir, exist_ok=True) - for fn in os.listdir(dbg_dir): - if not fn.endswith(".log"): - continue - with open(os.path.join(dbg_dir, fn), "a") as f: - f.write(f"=== phase: {phase_name} ===\n") - - def verify_step( model: ModelWrapper, cfg: DataflowBuildConfig, @@ -350,14 +319,13 @@ def prepare_loop_ops_fifo_sizing(node, cfg): swg_exception=cfg.default_swg_exception, vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, - debug_log_dir=(_fifo_debug_dir(cfg) if cfg.debug_fifo else None), + debug_log=cfg.debug_fifo, ) ) loop_model = loop_model.transform(SplitLargeFIFOs()) loop_model = loop_model.transform(RemoveShallowFIFOs()) loop_model = loop_model.transform(GiveUniqueNodeNames(prefix=node.name + "_")) loop_model = loop_model.transform(GiveReadableTensorNames()) - loop_model = _retarget_fifo_log_paths(loop_model, cfg) node_inst.set_nodeattr("body", loop_model.graph) @@ -1012,7 +980,6 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model.set_metadata_prop( "rtlsim_trace", os.path.abspath(report_dir) + "/fifosim_trace.wdb" ) - mark_fifo_debug_phase(cfg, "fifo_sizing") model = model.transform( InsertAndSetFIFODepths( cfg._resolve_fpga_part(), @@ -1021,7 +988,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, cfg_n_inferences=cfg.fifosim_n_inferences, - debug_log_dir=(_fifo_debug_dir(cfg) if cfg.debug_fifo else None), + debug_log=cfg.debug_fifo, ) ) model = model.transform(GiveUniqueNodeNames()) @@ -1153,7 +1120,6 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): # prepare ip-stitched rtlsim verify_model = deepcopy(model) verify_model = prepare_for_stitched_ip_rtlsim(verify_model, cfg) - mark_fifo_debug_phase(cfg, "verify_stitched_ip_rtlsim") # use critical path estimate to set rtlsim liveness threshold # (very conservative) verify_model = verify_model.transform(AnnotateCycles()) @@ -1207,7 +1173,6 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi # Use behav=False for performance measurement to use real RTL components # instead of behavioral models (FINN_SIMULATION affects FIFOs, MVU, LayerNorm, # and RTL elementwise ops) - mark_fifo_debug_phase(cfg, "rtlsim_perf") rtlsim_perf_dict = xsi_fifosim(model, rtlsim_bs, max_iters=max_iters, behav=False) # keep keys consistent between the Python and C++-styles cycles = rtlsim_perf_dict["cycles"] diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 4543b808da..25f094ae54 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -296,7 +296,7 @@ def rtlsim_exec_cppxsi( runsim_cmd = ["bash", "run_rtlsim.sh"] with open(sim_base + "/run_rtlsim.sh", "w") as f: f.write( - f"LD_LIBRARY_PATH={runsim_env['LD_LIBRARY_PATH']} ./rtlsim_xsi > rtlsim_xsi_log.txt" + f"LD_LIBRARY_PATH={runsim_env['LD_LIBRARY_PATH']} ./rtlsim_xsi > rtlsim_xsi_log.txt 2> rtlsim_xsi_stderr.log" ) launch_process_helper(runsim_cmd, cwd=sim_base) diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index 399699996d..0ff05f7b1e 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -96,7 +96,7 @@ def generate_hdl(self, model, fpgapart, clk): code_gen_dict["$OUT_RANGE$"] = "[{}:0]".format(in_width - 1) code_gen_dict["$WIDTH$"] = str(in_width) code_gen_dict["$DEPTH$"] = str(depth) - code_gen_dict["$DATA_LOGFILE$"] = self.get_nodeattr("debug_log_path") + code_gen_dict["$DEBUG_LOG$"] = str(self.get_nodeattr("debug_log")) # apply code generation to templates code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") with open(template_path, "r") as f: diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 2351044387..1e43dfca22 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -64,7 +64,7 @@ def get_nodeattr_types(self): # the FIFO does not need its own FIFOs "inFIFODepths": ("ints", False, [0]), "outFIFODepths": ("ints", False, [0]), - "debug_log_path": ("s", False, ""), + "debug_log": ("i", False, 0), } ) diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index c713179e8f..fd2e178fb4 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -390,6 +390,23 @@ def insert_signature(self, checksum_count): self.connect_cmds.append("set_property name s_axilite_info [get_bd_intf_ports s_axi_0]") self.connect_cmds.append("assign_bd_address") + def insert_sim_ctrl(self): + sim_ctrl_src = "$::env(FINN_ROOT)/finn-rtllib/sim/hdl/sim_ctrl.v" + sim_ctrl_name = "sim_ctrl_0" + self.create_cmds.append("add_files -norecurse %s" % sim_ctrl_src) + self.create_cmds.append( + "create_bd_cell -type module -reference sim_ctrl %s" % sim_ctrl_name + ) + self.connect_cmds.append( + "connect_bd_net [get_bd_ports ap_clk] [get_bd_pins %s/ap_clk]" % sim_ctrl_name + ) + self.connect_cmds.append( + "make_bd_pins_external [get_bd_pins %s/sim_finish]" % sim_ctrl_name + ) + self.connect_cmds.append( + "set_property name sim_finish [get_bd_ports sim_finish_0]" + ) + def apply(self, model): # ensure non-relative readmemh .dat files model = model.transform(ReplaceVerilogRelPaths()) @@ -467,6 +484,8 @@ def apply(self, model): checksum_layers = model.get_nodes_by_op_type("CheckSum_hls") self.insert_signature(len(checksum_layers)) + self.insert_sim_ctrl() + # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_") diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 8202e809cb..66569cbdca 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -28,7 +28,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np -import os import warnings from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -279,7 +278,7 @@ def __init__( vivado_ram_style="auto", fifosim_input_throttle=True, cfg_n_inferences=2, - debug_log_dir=None, + debug_log=False, ): super().__init__() self.fpgapart = fpgapart @@ -292,7 +291,7 @@ def __init__( self.cfg_n_inferences = cfg_n_inferences self.mlo_max_iter = 0 self.ind_map = {} - self.debug_log_dir = debug_log_dir + self.debug_log = debug_log def apply(self, model): model = model.transform(GiveUniqueNodeNames()) @@ -409,11 +408,9 @@ def apply(self, model): if (self.max_depth is not None) and (node.get_nodeattr("depth") != self.max_depth): node.set_nodeattr("depth", self.max_depth) - if self.debug_log_dir is not None: - os.makedirs(os.path.abspath(self.debug_log_dir), exist_ok=True) + if self.debug_log: for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"): - log_path = os.path.abspath(os.path.join(self.debug_log_dir, node.name + ".log")) - getCustomOp(node).set_nodeattr("debug_log_path", log_path) + getCustomOp(node).set_nodeattr("debug_log", 1) # insert FIFOs and do all transformations for RTLsim model = model.transform(AnnotateCycles()) From 8351c0fe7034dc8dc7fc6c2839169f3b1aa335c5 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Fri, 12 Jun 2026 15:16:58 +0100 Subject: [PATCH 11/19] [Debug FIFO] fixing minor issue where the sim_finish signal was not actually being written --- finn_xsi/rtlsim_xsi.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/finn_xsi/rtlsim_xsi.cpp b/finn_xsi/rtlsim_xsi.cpp index 293a620851..41c8c31923 100644 --- a/finn_xsi/rtlsim_xsi.cpp +++ b/finn_xsi/rtlsim_xsi.cpp @@ -261,9 +261,8 @@ int main(int const argc, char const *const argv[]) { { Port *const sim_finish = top.getPort("sim_finish"); if(sim_finish) { - sim_finish->set(1).write_back(); - cycle(0); - cycle(1); + to_write.emplace_back(sim_finish->set(1)); + cycle(to_write); } } From ae85bdba0317c641c85b960f5633b11e0954f088 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Fri, 12 Jun 2026 15:29:01 +0100 Subject: [PATCH 12/19] [Debug FIFO] pre commit --- src/finn/core/rtlsim_exec.py | 5 ++++- src/finn/transformation/fpgadataflow/create_stitched_ip.py | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 25f094ae54..3176e28a71 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -295,8 +295,11 @@ def rtlsim_exec_cppxsi( runsim_env["LD_LIBRARY_PATH"] = get_vivado_root() + "/lib/lnx64.o" runsim_cmd = ["bash", "run_rtlsim.sh"] with open(sim_base + "/run_rtlsim.sh", "w") as f: + ld_path = runsim_env["LD_LIBRARY_PATH"] f.write( - f"LD_LIBRARY_PATH={runsim_env['LD_LIBRARY_PATH']} ./rtlsim_xsi > rtlsim_xsi_log.txt 2> rtlsim_xsi_stderr.log" + f"LD_LIBRARY_PATH={ld_path}" + " ./rtlsim_xsi > rtlsim_xsi_log.txt" + " 2> rtlsim_xsi_stderr.log" ) launch_process_helper(runsim_cmd, cwd=sim_base) diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index fd2e178fb4..58ee04b9db 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -403,9 +403,7 @@ def insert_sim_ctrl(self): self.connect_cmds.append( "make_bd_pins_external [get_bd_pins %s/sim_finish]" % sim_ctrl_name ) - self.connect_cmds.append( - "set_property name sim_finish [get_bd_ports sim_finish_0]" - ) + self.connect_cmds.append("set_property name sim_finish [get_bd_ports sim_finish_0]") def apply(self, model): # ensure non-relative readmemh .dat files From d24bcb151bf418102823bb637d2152f27935a934 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Fri, 12 Jun 2026 17:54:08 +0100 Subject: [PATCH 13/19] [FIFO Debug] Added a workaround for the early final block execution --- finn-rtllib/fifo/hdl/fifo_gauge.sv | 4 ++-- finn-rtllib/sim/hdl/sim_ctrl.v | 7 +++++++ finn_xsi/rtlsim_xsi.cpp | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 finn-rtllib/sim/hdl/sim_ctrl.v diff --git a/finn-rtllib/fifo/hdl/fifo_gauge.sv b/finn-rtllib/fifo/hdl/fifo_gauge.sv index 9b0b994f76..fad46911cf 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge.sv @@ -70,7 +70,7 @@ module fifo_gauge #( logic [WIDTH-1:0] ODat = 'x; final begin - $fwrite(STDERR_FD, "[%m] MaxFill: %0d; Transactions: in=%0d out=%0d", MaxCount, ITxnCnt, OTxnCnt); + $fwrite(STDERR_FD, "[%m @%0t] MaxFill: %0d; Transactions: in=%0d out=%0d", $time, MaxCount, ITxnCnt, OTxnCnt); end always_ff @(posedge clk) begin @@ -88,7 +88,7 @@ module fifo_gauge #( // Always take input and track Transactions if(ivld) begin Q.push_back(idat); - if(DEBUG_LOG) $fwrite(STDERR_FD, "[FIFOLOG %m] %0x\n", idat); + if(DEBUG_LOG) $fwrite(STDERR_FD, "[FIFOLOG %m @%0t] %0x\n", $time, idat); ITxnCnt <= ITxnCnt + 1; end if(OVld && ordy) OTxnCnt <= OTxnCnt + 1; diff --git a/finn-rtllib/sim/hdl/sim_ctrl.v b/finn-rtllib/sim/hdl/sim_ctrl.v new file mode 100644 index 0000000000..6168765af1 --- /dev/null +++ b/finn-rtllib/sim/hdl/sim_ctrl.v @@ -0,0 +1,7 @@ +module sim_ctrl(input ap_clk, input sim_finish); + always @(posedge sim_finish) $finish; + // Workaround for XSI bug: final blocks execute prematurely when all + // initial blocks complete, rather than at $finish. This never-completing + // initial block prevents that. + initial forever #1_000_000_000 ; +endmodule diff --git a/finn_xsi/rtlsim_xsi.cpp b/finn_xsi/rtlsim_xsi.cpp index 41c8c31923..b70fe7c38f 100644 --- a/finn_xsi/rtlsim_xsi.cpp +++ b/finn_xsi/rtlsim_xsi.cpp @@ -257,21 +257,21 @@ int main(int const argc, char const *const argv[]) { } } - // Trigger $finish via sim_finish port so that final blocks execute - { - Port *const sim_finish = top.getPort("sim_finish"); - if(sim_finish) { - to_write.emplace_back(sim_finish->set(1)); - cycle(to_write); - } - } - } // done simulation // Dump Simulation Statistics to stdout and results.txt std::cout << '\n' << synopsis << std::endl; - { // Log error info to file + // Trigger $finish so that final blocks execute + { + Port *const sim_finish = top.getPort("sim_finish"); + if(sim_finish) { + sim_finish->set(1).write_back(); + top.run(1); + } + } + + { // Log error info to file (includes final block output) std::ofstream error_file("fifosim.err", std::ios::out | std::ios::trunc); error_file << top.get_error_info(); } From 55e675cba812dd82ad7f152dc9557c3cf82f2d3d Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Wed, 17 Jun 2026 15:48:44 +0100 Subject: [PATCH 14/19] [Debug FIFO] restoring pre-file logging with phase snapshotting now that final block workaround is working. --- finn-rtllib/fifo/hdl/fifo_gauge.sv | 12 ++++--- finn-rtllib/fifo/hdl/fifo_template.v | 2 +- finn-rtllib/sim/hdl/sim_ctrl.v | 7 ++-- src/finn/builder/build_dataflow_steps.py | 36 +++++++++++++++++-- .../fpgadataflow/rtl/streamingfifo_rtl.py | 2 +- .../custom_op/fpgadataflow/streamingfifo.py | 2 +- .../fpgadataflow/set_fifo_depths.py | 16 ++++++--- 7 files changed, 61 insertions(+), 16 deletions(-) diff --git a/finn-rtllib/fifo/hdl/fifo_gauge.sv b/finn-rtllib/fifo/hdl/fifo_gauge.sv index fad46911cf..3799b4e8c0 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge.sv @@ -35,7 +35,7 @@ module fifo_gauge #( int unsigned WIDTH, int unsigned COUNT_WIDTH = 32, - parameter bit DEBUG_LOG = 0 + parameter DATA_LOGFILE = "" )( input logic clk, input logic rst, @@ -52,14 +52,13 @@ module fifo_gauge #( output logic [COUNT_WIDTH-1:0] maxcount ); - localparam int STDERR_FD = 32'h8000_0002; - //----------------------------------------------------------------------- // Monitoring & Debug // Transaction counters longint unsigned ITxnCnt = 0; longint unsigned OTxnCnt = 0; + int LogFd = (DATA_LOGFILE != "")? $fopen(DATA_LOGFILE, "w") : 0; // The internal Queue serving as data buffer and an output register logic [WIDTH-1:0] Q[$] = {}; @@ -70,7 +69,10 @@ module fifo_gauge #( logic [WIDTH-1:0] ODat = 'x; final begin - $fwrite(STDERR_FD, "[%m @%0t] MaxFill: %0d; Transactions: in=%0d out=%0d", $time, MaxCount, ITxnCnt, OTxnCnt); + if(LogFd) begin + $fwrite(LogFd, "[%m @%0t] MaxFill: %0d; Transactions: in=%0d out=%0d\n", $time, MaxCount, ITxnCnt, OTxnCnt); + $fclose(LogFd); + end end always_ff @(posedge clk) begin @@ -88,7 +90,7 @@ module fifo_gauge #( // Always take input and track Transactions if(ivld) begin Q.push_back(idat); - if(DEBUG_LOG) $fwrite(STDERR_FD, "[FIFOLOG %m @%0t] %0x\n", $time, idat); + if(LogFd) $fwrite(LogFd, "%0x\n", idat); ITxnCnt <= ITxnCnt + 1; end if(OVld && ordy) OTxnCnt <= OTxnCnt + 1; diff --git a/finn-rtllib/fifo/hdl/fifo_template.v b/finn-rtllib/fifo/hdl/fifo_template.v index b128d40dd6..b9692f1659 100644 --- a/finn-rtllib/fifo/hdl/fifo_template.v +++ b/finn-rtllib/fifo/hdl/fifo_template.v @@ -52,7 +52,7 @@ output $OUT_RANGE$ out0_V_TDATA ); `ifdef FINN_SIMULATION - fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$), .DEBUG_LOG($DEBUG_LOG$)) fifo ( + fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$), .DATA_LOGFILE("$DATA_LOGFILE$")) fifo ( .clk(ap_clk), .rst(!ap_rst_n), .idat(in0_V_TDATA), .ivld(in0_V_TVALID), .irdy(in0_V_TREADY), .odat(out0_V_TDATA), .ovld(out0_V_TVALID), .ordy(out0_V_TREADY), diff --git a/finn-rtllib/sim/hdl/sim_ctrl.v b/finn-rtllib/sim/hdl/sim_ctrl.v index 6168765af1..5e4cf627ad 100644 --- a/finn-rtllib/sim/hdl/sim_ctrl.v +++ b/finn-rtllib/sim/hdl/sim_ctrl.v @@ -1,7 +1,10 @@ -module sim_ctrl(input ap_clk, input sim_finish); +module sim_ctrl(input ap_clk, input sim_finish, output sim_ctrl_out); + assign sim_ctrl_out = 1'b0; +`ifdef FINN_SIMULATION always @(posedge sim_finish) $finish; // Workaround for XSI bug: final blocks execute prematurely when all // initial blocks complete, rather than at $finish. This never-completing // initial block prevents that. - initial forever #1_000_000_000 ; + initial forever #1; +`endif endmodule diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index d1b58f7bb7..b751a4d56c 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -152,6 +152,31 @@ def _maybe_enable_verify_behavioral(cfg): cfg.verify_rtlsim_behavioral = True +def _fifo_debug_live_dir(cfg): + return cfg.output_dir + "/debug/fifo_logs/_live" + + +def snapshot_fifo_logs(cfg, phase_name, loop_context=None): + if not cfg.debug_fifo: + return + live_dir = _fifo_debug_live_dir(cfg) + if not os.path.isdir(live_dir): + return + prefix = (loop_context + "_") if loop_context else None + subdir = loop_context or "main" + dest_dir = os.path.join(cfg.output_dir, "debug", "fifo_logs", phase_name, subdir) + os.makedirs(dest_dir, exist_ok=True) + for fn in os.listdir(live_dir): + if not fn.endswith(".log"): + continue + if prefix is not None and not fn.startswith(prefix): + continue + src = os.path.join(live_dir, fn) + dst = os.path.join(dest_dir, fn) + shutil.copy2(src, dst) + open(src, "w").close() + + def verify_step( model: ModelWrapper, cfg: DataflowBuildConfig, @@ -319,9 +344,11 @@ def prepare_loop_ops_fifo_sizing(node, cfg): swg_exception=cfg.default_swg_exception, vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, - debug_log=cfg.debug_fifo, + debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), + debug_log_prefix=node.name + "_", ) ) + snapshot_fifo_logs(cfg, "fifo_sizing", loop_context=node.name) loop_model = loop_model.transform(SplitLargeFIFOs()) loop_model = loop_model.transform(RemoveShallowFIFOs()) loop_model = loop_model.transform(GiveUniqueNodeNames(prefix=node.name + "_")) @@ -988,9 +1015,10 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, cfg_n_inferences=cfg.fifosim_n_inferences, - debug_log=cfg.debug_fifo, + debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), ) ) + snapshot_fifo_logs(cfg, "fifo_sizing") model = model.transform(GiveUniqueNodeNames()) loop_nodes = model.get_nodes_by_op_type("FINNLoop") for loop_node in loop_nodes: @@ -1138,8 +1166,12 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): verify_model.set_metadata_prop("rtlsim_behavioral", "1") if is_mlo(model): verify_mlo(verify_model, cfg, "stitched_ip_rtlsim") + for loop_node in verify_model.get_nodes_by_op_type("FINNLoop"): + snapshot_fifo_logs(cfg, "stitched_ip_rtlsim", loop_context=loop_node.name) + snapshot_fifo_logs(cfg, "stitched_ip_rtlsim") else: verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True) + snapshot_fifo_logs(cfg, "stitched_ip_rtlsim") os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness) return model diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index 0ff05f7b1e..399699996d 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -96,7 +96,7 @@ def generate_hdl(self, model, fpgapart, clk): code_gen_dict["$OUT_RANGE$"] = "[{}:0]".format(in_width - 1) code_gen_dict["$WIDTH$"] = str(in_width) code_gen_dict["$DEPTH$"] = str(depth) - code_gen_dict["$DEBUG_LOG$"] = str(self.get_nodeattr("debug_log")) + code_gen_dict["$DATA_LOGFILE$"] = self.get_nodeattr("debug_log_path") # apply code generation to templates code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") with open(template_path, "r") as f: diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 1e43dfca22..2351044387 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -64,7 +64,7 @@ def get_nodeattr_types(self): # the FIFO does not need its own FIFOs "inFIFODepths": ("ints", False, [0]), "outFIFODepths": ("ints", False, [0]), - "debug_log": ("i", False, 0), + "debug_log_path": ("s", False, ""), } ) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 66569cbdca..db6f53d218 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -27,6 +27,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os + import numpy as np import warnings from onnx import TensorProto, helper @@ -278,7 +280,8 @@ def __init__( vivado_ram_style="auto", fifosim_input_throttle=True, cfg_n_inferences=2, - debug_log=False, + debug_log_dir=None, + debug_log_prefix="", ): super().__init__() self.fpgapart = fpgapart @@ -291,7 +294,8 @@ def __init__( self.cfg_n_inferences = cfg_n_inferences self.mlo_max_iter = 0 self.ind_map = {} - self.debug_log = debug_log + self.debug_log_dir = debug_log_dir + self.debug_log_prefix = debug_log_prefix def apply(self, model): model = model.transform(GiveUniqueNodeNames()) @@ -408,9 +412,13 @@ def apply(self, model): if (self.max_depth is not None) and (node.get_nodeattr("depth") != self.max_depth): node.set_nodeattr("depth", self.max_depth) - if self.debug_log: + if self.debug_log_dir is not None: + os.makedirs(os.path.abspath(self.debug_log_dir), exist_ok=True) for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"): - getCustomOp(node).set_nodeattr("debug_log", 1) + log_path = os.path.abspath( + os.path.join(self.debug_log_dir, self.debug_log_prefix + node.name + ".log") + ) + getCustomOp(node).set_nodeattr("debug_log_path", log_path) # insert FIFOs and do all transformations for RTLsim model = model.transform(AnnotateCycles()) From 05cfdf52ad3a37e759bf5f6bb05b6dd7c63b1b46 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Wed, 17 Jun 2026 15:49:20 +0100 Subject: [PATCH 15/19] [Debug FIFO] precommit --- src/finn/transformation/fpgadataflow/set_fifo_depths.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index db6f53d218..c7bff82604 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -27,9 +27,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import os - import numpy as np +import os import warnings from onnx import TensorProto, helper from qonnx.core.datatype import DataType From 7e000762aceb901d9cf85291fc3c159534c614c3 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Fri, 19 Jun 2026 11:45:02 +0100 Subject: [PATCH 16/19] [FIFO Debug] Increase busy loop delay to keep sim alive --- finn-rtllib/sim/hdl/sim_ctrl.v | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/finn-rtllib/sim/hdl/sim_ctrl.v b/finn-rtllib/sim/hdl/sim_ctrl.v index 5e4cf627ad..12cd912e0d 100644 --- a/finn-rtllib/sim/hdl/sim_ctrl.v +++ b/finn-rtllib/sim/hdl/sim_ctrl.v @@ -2,9 +2,8 @@ module sim_ctrl(input ap_clk, input sim_finish, output sim_ctrl_out); assign sim_ctrl_out = 1'b0; `ifdef FINN_SIMULATION always @(posedge sim_finish) $finish; - // Workaround for XSI bug: final blocks execute prematurely when all - // initial blocks complete, rather than at $finish. This never-completing - // initial block prevents that. - initial forever #1; + // This ensures there is always a pending #delay in the event queue, + // preventing the kernel from concluding that the simulation is ending. + initial forever #1_000_000_000; `endif endmodule From 6e7f30d9bab5c441279d3400cd83fcce2d5cf489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Fri, 19 Jun 2026 13:48:40 +0200 Subject: [PATCH 17/19] Linting. --- finn-rtllib/sim/hdl/sim_ctrl.v | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/finn-rtllib/sim/hdl/sim_ctrl.v b/finn-rtllib/sim/hdl/sim_ctrl.v index 12cd912e0d..b94cc8d9ff 100644 --- a/finn-rtllib/sim/hdl/sim_ctrl.v +++ b/finn-rtllib/sim/hdl/sim_ctrl.v @@ -2,8 +2,8 @@ module sim_ctrl(input ap_clk, input sim_finish, output sim_ctrl_out); assign sim_ctrl_out = 1'b0; `ifdef FINN_SIMULATION always @(posedge sim_finish) $finish; - // This ensures there is always a pending #delay in the event queue, - // preventing the kernel from concluding that the simulation is ending. + // This ensures there is always a pending #delay in the event queue, + // preventing the kernel from concluding that the simulation is ending. initial forever #1_000_000_000; `endif endmodule From 44fc768d925b24beb00b2f2fe7106f5ad8403e71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Fri, 19 Jun 2026 13:00:27 +0100 Subject: [PATCH 18/19] Prune unused module port. --- finn-rtllib/sim/hdl/sim_ctrl.v | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/finn-rtllib/sim/hdl/sim_ctrl.v b/finn-rtllib/sim/hdl/sim_ctrl.v index b94cc8d9ff..6dafef1719 100644 --- a/finn-rtllib/sim/hdl/sim_ctrl.v +++ b/finn-rtllib/sim/hdl/sim_ctrl.v @@ -1,5 +1,4 @@ -module sim_ctrl(input ap_clk, input sim_finish, output sim_ctrl_out); - assign sim_ctrl_out = 1'b0; +module sim_ctrl(input ap_clk, input sim_finish); `ifdef FINN_SIMULATION always @(posedge sim_finish) $finish; // This ensures there is always a pending #delay in the event queue, From 2c1e50e650cbccc3fe48db0bd6638552e8241b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Fri, 19 Jun 2026 13:41:06 +0100 Subject: [PATCH 19/19] Fixing test. --- finn-rtllib/fifo/hdl/fifo_gauge_tb.sv | 2 +- finn-rtllib/fifo/sim.sh | 18 +++++++++--------- finn-rtllib/sim/hdl/sim_ctrl.v | 9 ++++++++- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv index e74905d7d7..4e75834515 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv @@ -59,7 +59,7 @@ module fifo_gauge_tb; // Depth Monitoring uwire count_t maxcount; - fifo_gauge #(.WIDTH(W), .DEBUG_LOG(1)) dut ( + fifo_gauge #(.WIDTH(W), .DATA_LOGFILE("fifo_trace.log")) dut ( .clk, .rst, .idat, .ivld, .irdy, .odat, .ovld, .ordy, diff --git a/finn-rtllib/fifo/sim.sh b/finn-rtllib/fifo/sim.sh index c9d8eb7206..27c16edbea 100755 --- a/finn-rtllib/fifo/sim.sh +++ b/finn-rtllib/fifo/sim.sh @@ -1,11 +1,11 @@ -/**************************************************************************** - * Copyright Advanced Micro Devices, Inc. - * SPDX-License-Identifier: BSD-3-Clause - * - * @brief FIFO gauge simulation script. - * @author Thomas B. Preußer - ***************************************************************************/ #!/bin/bash +############################################################################## +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: BSD-3-Clause +# +# @brief FIFO gauge simulation script. +# @author Thomas B. Preußer +############################################################################## set -euo pipefail cd "$(dirname "$0")" @@ -16,10 +16,10 @@ xelab fifo_gauge_tb -debug off -s sim xsim sim -runall echo "---" -if diff -q fifo_ref.log fifo_trace.log; then +if diff -q fifo_ref.log <(grep -v '^\[' fifo_trace.log); then echo "PASS: trace matches reference ($(wc -l < fifo_ref.log) lines)" else echo "FAIL: trace mismatch" - diff fifo_ref.log fifo_trace.log | head -20 + diff fifo_ref.log fifo_trace_data.log | head -20 exit 1 fi diff --git a/finn-rtllib/sim/hdl/sim_ctrl.v b/finn-rtllib/sim/hdl/sim_ctrl.v index 6dafef1719..2a4b2dc2e4 100644 --- a/finn-rtllib/sim/hdl/sim_ctrl.v +++ b/finn-rtllib/sim/hdl/sim_ctrl.v @@ -1,6 +1,13 @@ +/**************************************************************************** + * Copyright Advanced Micro Devices, Inc. + * SPDX-License-Identifier: BSD-3-Clause + * + * @brief Simulation control triggering $finish upon asserting sim_finish. + * @author Shane T. Fleming + ***************************************************************************/ module sim_ctrl(input ap_clk, input sim_finish); `ifdef FINN_SIMULATION - always @(posedge sim_finish) $finish; + initial @(posedge sim_finish) $finish; // This ensures there is always a pending #delay in the event queue, // preventing the kernel from concluding that the simulation is ending. initial forever #1_000_000_000;