diff --git a/finn-rtllib/eltwise/eltwise_template.v b/finn-rtllib/eltwise/eltwise_template.v index e451c55823..0e8018ddb0 100644 --- a/finn-rtllib/eltwise/eltwise_template.v +++ b/finn-rtllib/eltwise/eltwise_template.v @@ -34,7 +34,9 @@ eltwise #( .PE($PE$), .OP($OP$), .B_SCALE($B_SCALE$), - .FORCE_BEHAVIORAL($FORCE_BEHAVIORAL$), +`ifdef FINN_SIMULATION + .FORCE_BEHAVIORAL(1), +`endif .A_FLOAT($A_FLOAT$), .B_FLOAT($B_FLOAT$), .A_WIDTH($A_WIDTH$), diff --git a/finn-rtllib/fifo/hdl/fifo_gauge.sv b/finn-rtllib/fifo/hdl/fifo_gauge.sv index 37660a0533..3799b4e8c0 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge.sv @@ -34,7 +34,8 @@ module fifo_gauge #( int unsigned WIDTH, - int unsigned COUNT_WIDTH = 32 + int unsigned COUNT_WIDTH = 32, + parameter DATA_LOGFILE = "" )( input logic clk, input logic rst, @@ -51,25 +52,48 @@ module fifo_gauge #( output logic [COUNT_WIDTH-1:0] maxcount ); + //----------------------------------------------------------------------- + // Monitoring & Debug + + // Transaction counters + longint unsigned ITxnCnt = 0; + longint unsigned OTxnCnt = 0; + int LogFd = (DATA_LOGFILE != "")? $fopen(DATA_LOGFILE, "w") : 0; + // The internal Queue serving as data buffer and an output register logic [WIDTH-1:0] Q[$] = {}; - logic [COUNT_WIDTH-1:0] Count = 0; - logic [COUNT_WIDTH-1:0] MaxCount = 0; + longint unsigned Count = 0; + longint unsigned MaxCount = 0; logic OVld = 0; logic [WIDTH-1:0] ODat = 'x; + final begin + if(LogFd) begin + $fwrite(LogFd, "[%m @%0t] MaxFill: %0d; Transactions: in=%0d out=%0d\n", $time, MaxCount, ITxnCnt, OTxnCnt); + $fclose(LogFd); + end + end + always_ff @(posedge clk) begin if(rst) begin - Q <= {}; + Q = {}; Count <= 0; MaxCount <= 0; OVld <= 0; ODat <= 'x; + + ITxnCnt <= 0; + OTxnCnt <= 0; end else begin - // Always take input - if(ivld) Q.push_back(idat); + // Always take input and track Transactions + if(ivld) begin + Q.push_back(idat); + if(LogFd) $fwrite(LogFd, "%0x\n", idat); + ITxnCnt <= ITxnCnt + 1; + end + if(OVld && ordy) OTxnCnt <= OTxnCnt + 1; // Take Count Count <= Q.size; diff --git a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv index b3e7d7647d..4e75834515 100644 --- a/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv +++ b/finn-rtllib/fifo/hdl/fifo_gauge_tb.sv @@ -59,7 +59,7 @@ module fifo_gauge_tb; // Depth Monitoring uwire count_t maxcount; - fifo_gauge #(.WIDTH(W)) dut ( + fifo_gauge #(.WIDTH(W), .DATA_LOGFILE("fifo_trace.log")) dut ( .clk, .rst, .idat, .ivld, .irdy, .odat, .ovld, .ordy, @@ -70,6 +70,7 @@ module fifo_gauge_tb; // Stimulus data_t Q[$] = {}; initial begin + automatic int ref_fd = $fopen("fifo_ref.log", "w"); idat = 'x; ivld = 0; @(posedge clk iff !rst); @@ -79,10 +80,12 @@ module fifo_gauge_tb; idat <= data; ivld <= 1; Q.push_back(data); + $fwrite(ref_fd, "%0x\n", data); @(posedge clk); idat <= 'x; ivld <= 0; end + $fclose(ref_fd); end //----------------------------------------------------------------------- diff --git a/finn-rtllib/fifo/hdl/fifo_template.v b/finn-rtllib/fifo/hdl/fifo_template.v index 3066d9b92f..b9692f1659 100644 --- a/finn-rtllib/fifo/hdl/fifo_template.v +++ b/finn-rtllib/fifo/hdl/fifo_template.v @@ -52,7 +52,7 @@ output $OUT_RANGE$ out0_V_TDATA ); `ifdef FINN_SIMULATION - fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$)) fifo ( + fifo_gauge #(.WIDTH($WIDTH$), .COUNT_WIDTH($COUNT_WIDTH$), .DATA_LOGFILE("$DATA_LOGFILE$")) fifo ( .clk(ap_clk), .rst(!ap_rst_n), .idat(in0_V_TDATA), .ivld(in0_V_TVALID), .irdy(in0_V_TREADY), .odat(out0_V_TDATA), .ovld(out0_V_TVALID), .ordy(out0_V_TREADY), diff --git a/finn-rtllib/fifo/sim.sh b/finn-rtllib/fifo/sim.sh new file mode 100755 index 0000000000..27c16edbea --- /dev/null +++ b/finn-rtllib/fifo/sim.sh @@ -0,0 +1,25 @@ +#!/bin/bash +############################################################################## +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: BSD-3-Clause +# +# @brief FIFO gauge simulation script. +# @author Thomas B. Preußer +############################################################################## +set -euo pipefail +cd "$(dirname "$0")" + +rm -f fifo_trace.log fifo_ref.log + +xvlog -sv hdl/fifo_gauge.sv hdl/fifo_gauge_tb.sv +xelab fifo_gauge_tb -debug off -s sim +xsim sim -runall + +echo "---" +if diff -q fifo_ref.log <(grep -v '^\[' fifo_trace.log); then + echo "PASS: trace matches reference ($(wc -l < fifo_ref.log) lines)" +else + echo "FAIL: trace mismatch" + diff fifo_ref.log fifo_trace_data.log | head -20 + exit 1 +fi diff --git a/finn-rtllib/sim/hdl/sim_ctrl.v b/finn-rtllib/sim/hdl/sim_ctrl.v new file mode 100644 index 0000000000..2a4b2dc2e4 --- /dev/null +++ b/finn-rtllib/sim/hdl/sim_ctrl.v @@ -0,0 +1,15 @@ +/**************************************************************************** + * Copyright Advanced Micro Devices, Inc. + * SPDX-License-Identifier: BSD-3-Clause + * + * @brief Simulation control triggering $finish upon asserting sim_finish. + * @author Shane T. Fleming + ***************************************************************************/ +module sim_ctrl(input ap_clk, input sim_finish); +`ifdef FINN_SIMULATION + initial @(posedge sim_finish) $finish; + // This ensures there is always a pending #delay in the event queue, + // preventing the kernel from concluding that the simulation is ending. + initial forever #1_000_000_000; +`endif +endmodule diff --git a/finn_xsi/finn_xsi/adapter.py b/finn_xsi/finn_xsi/adapter.py index 0b73787a60..c2bde7dcf1 100644 --- a/finn_xsi/finn_xsi/adapter.py +++ b/finn_xsi/finn_xsi/adapter.py @@ -157,6 +157,10 @@ def reset_rtlsim( def close_rtlsim(sim): + sim_finish = sim.top.getPort("sim_finish") + if sim_finish is not None: + sim_finish.set(1).write_back() + sim.cycle({}) del sim diff --git a/finn_xsi/finn_xsi/sim_engine.py b/finn_xsi/finn_xsi/sim_engine.py index 0d17e581af..e15a37ff1e 100644 --- a/finn_xsi/finn_xsi/sim_engine.py +++ b/finn_xsi/finn_xsi/sim_engine.py @@ -24,30 +24,21 @@ def __init__(self, kernel, design, log=None, wdb=None): if p.isInput(): p.clear().write_back() - def cycle(updates): - # Rising Edge - clk.set(1).write_back() + def half_cycle(up): + clk.set(up).write_back() if clk2x is not None: clk2x.set(1).write_back() - # Updates after Active Edge - top.run(1) + top.run(25) + clk2x.set(0).write_back() + top.run(25) + else: + top.run(50) + + def cycle(updates): + half_cycle(1) for port, update in updates.items(): port.set_hexstr(update).write_back() - - # Edges inactive on interface & finish Cycle - if clk2x is None: - top.run(4999) - clk.set(0).write_back() - top.run(5000) - else: - top.run(2499) - clk2x.set(0).write_back() - top.run(2500) - clk.set(0).write_back() - clk2x.set(1).write_back() - top.run(2500) - clk2x.set(0).write_back() - top.run(2500) + half_cycle(0) self.top = top self.cycle = cycle diff --git a/finn_xsi/rtlsim_xsi.cpp b/finn_xsi/rtlsim_xsi.cpp index d4fe79581d..b70fe7c38f 100644 --- a/finn_xsi/rtlsim_xsi.cpp +++ b/finn_xsi/rtlsim_xsi.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -21,7 +22,7 @@ #include "xsi_finn.hpp" #include "rtlsim_config.hpp" -int main(int argc, char *argv[]) { +int main(int const argc, char const *const argv[]) { // Load Kernel and Design xsi::Kernel kernel(kernel_libname); @@ -34,6 +35,7 @@ int main(int argc, char *argv[]) { // Ultimate Simulation Summary std::string synopsis; + std::map maxcounts; { // RTL Simulation @@ -54,11 +56,11 @@ int main(int argc, char *argv[]) { size_t job_size; size_t job_txns; // [0:job_size] size_t total_txns; - size_t first_complete; // First completion timestamp union { // Input Stream struct { + size_t first_complete; // First completion timestamp size_t job_ticks; // throttle if job_size < job_ticks size_t await_iter; // iteration allowing start of next job }; @@ -94,7 +96,8 @@ int main(int argc, char *argv[]) { } // Find Global Control & Run Startup Sequence - std::function cycle; + std::function>&)> cycle; + std::vector> to_write; { Port *const clk = top.getPort("ap_clk"); Port *const clk2x = top.getPort("ap_clk2x"); @@ -103,24 +106,30 @@ int main(int argc, char *argv[]) { std::cerr << "No clock found on the design." << std::endl; return 1; } - cycle = clk2x? - std::function([&top, clk, clk2x](bool const up) mutable { + cycle = [half = clk2x? + std::function([&top, clk, clk2x](bool const up) { clk->set(up).write_back(); clk2x->set(1).write_back(); - top.run(5); + top.run(25); clk2x->set(0).write_back(); - top.run(5); + top.run(25); }) : - std::function([&top, clk](bool const up) mutable { + std::function([&top, clk](bool const up) { clk->set(up).write_back(); - top.run(5); - }); + top.run(50); + }) + ](std::vector> &to_write) { + half(1); + for(Port &p : to_write) p.write_back(); + to_write.clear(); + half(0); + }; // Reset all Inputs, Wait for Reset Period for(Port &p : top.ports()) { if(p.isInput()) p.clear().write_back(); }; if(rst_n) { - for(unsigned i = 0; i < 16; i++) { cycle(0); cycle(1); } - rst_n->set(1).write_back(); + for(unsigned i = 0; i < 16; i++) cycle(to_write); + to_write.emplace_back(rst_n->set(1)); } } @@ -128,25 +137,21 @@ int main(int argc, char *argv[]) { std::cout << "Starting data feed with idle-output timeout of " << max_iters << " cycles ...\n" << std::endl; // Make all Inputs valid & all Outputs ready - for(auto &s : istreams) s.port_vld.set(1).write_back(); - for(auto &s : ostreams) s.port_rdy.set(1).write_back(); + for(auto &s : istreams) to_write.emplace_back(s.port_vld.set(1)); + for(auto &s : ostreams) to_write.emplace_back(s.port_rdy.set(1)); + cycle(to_write); // flush & settle before first read // Enter Simulation Loop and track Progress auto const begin = std::chrono::steady_clock::now(); - std::vector> to_write; while(true) { - //------------------------------------------------------------------- - // Clock down - then read signal updates from design - cycle(0); - // check for transactions on input streams for(auto &s : istreams) { bool const vld = s.port_vld[0]; bool const rdy = s.port_rdy.read()[0]; if(vld && !rdy) continue; - // Track successgul Transactions + // Track successful Transactions if(vld) { s.job_txns++; if(++s.total_txns == s.job_size * n_inferences) itodo--; @@ -194,12 +199,8 @@ int main(int argc, char *argv[]) { } //------------------------------------------------------------------- - // Clock up - then write signal updates back to design - cycle(1); - - // Write back Ports with registered updates - for(Port &p : to_write) p.write_back(); - to_write.clear(); + // Advance clock: rise, write back, fall + cycle(to_write); // Show a progress message once in a while if(++iters % 10000 == 0) { @@ -245,12 +246,32 @@ int main(int argc, char *argv[]) { "RUNTIME_S\t" << std::chrono::duration_cast(std::chrono::steady_clock::now() - begin).count(); synopsis = bld.str(); + // Read maxcount ports before $finish tears down the design + for(Port &p : top.ports()) { + if(p.isOutput()) { + char const *const name = p.name(); + if(std::strncmp(name, "maxcount", 8) == 0) { + p.read(); + maxcounts[name] = p.as_unsigned(); + } + } + } + } // done simulation // Dump Simulation Statistics to stdout and results.txt std::cout << '\n' << synopsis << std::endl; - { // Log error info to file + // Trigger $finish so that final blocks execute + { + Port *const sim_finish = top.getPort("sim_finish"); + if(sim_finish) { + sim_finish->set(1).write_back(); + top.run(1); + } + } + + { // Log error info to file (includes final block output) std::ofstream error_file("fifosim.err", std::ios::out | std::ios::trunc); error_file << top.get_error_info(); } @@ -258,14 +279,8 @@ int main(int argc, char *argv[]) { { // Synopsis and `max_count` readings to results file std::ofstream results_file("results.txt", std::ios::out | std::ios::trunc); results_file << synopsis << std::endl; - for(Port &p : top.ports()) { - if(p.isOutput()) { - char const *const name = p.name(); - if(std::strncmp(name, "maxcount", 8) == 0) { - p.read(); - results_file << name << '\t' << p.as_unsigned() << std::endl; - } - } + for(auto const &[name, val] : maxcounts) { + results_file << name << '\t' << val << std::endl; } } diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py index 6c761e76cb..654bec5ad5 100644 --- a/src/finn/builder/build_dataflow.py +++ b/src/finn/builder/build_dataflow.py @@ -45,7 +45,10 @@ DataflowBuildConfig, default_build_dataflow_steps, ) -from finn.builder.build_dataflow_steps import build_dataflow_step_lookup +from finn.builder.build_dataflow_steps import ( + _maybe_enable_verify_behavioral, + build_dataflow_step_lookup, +) # adapted from https://stackoverflow.com/a/39215961 @@ -191,6 +194,8 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig): if not os.path.exists(cfg.output_dir): os.makedirs(cfg.output_dir) + _maybe_enable_verify_behavioral(cfg) + # Run configuration checks config_report = run_all_config_checks(cfg) print(format_report(config_report)) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 96ecfeb6b7..81dff67716 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -307,6 +307,8 @@ class DataflowBuildConfig: #: Only relevant if auto_fifo_strategy = LARGEFIFO_RTLSIM fifosim_save_waveform: Optional[bool] = False + debug_fifo: Optional[bool] = False + #: Target clock frequency (in nanoseconds) for Vitis HLS synthesis. #: e.g. `hls_clk_period_ns=5.0` will target a 200 MHz clock. #: If not specified it will default to synth_clk_period_ns @@ -382,6 +384,13 @@ class DataflowBuildConfig: #: rtlsim, otherwise they will be replaced by RTL implementations. rtlsim_use_vivado_comps: Optional[bool] = True + #: Use behavioral simulation for RTLSim verification steps. + #: When True, passes -define FINN_SIMULATION to xelab, enabling faster + #: behavioral models for DSP-heavy modules (MVU, LayerNorm, Elementwise) + #: and fifo_gauge (with debug capabilities) instead of Q_srl. + #: Does not affect FIFO sizing which always uses behavioral simulation. + verify_rtlsim_behavioral: Optional[bool] = False + #: If set to True, the FINN compiler tries to create an MLO design based on #: loop_body_hierarchy and loop_body_range mlo: Optional[bool] = False diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index f2164ca2c1..b751a4d56c 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -143,6 +143,40 @@ from finn.util.vivado import parse_ooc_synth_results +def _maybe_enable_verify_behavioral(cfg): + if cfg.debug_fifo and not cfg.verify_rtlsim_behavioral: + print( + "[debug_fifo] forcing verify_rtlsim_behavioral=True so that " + "the verify phase uses fifo_gauge and produces per-FIFO logs." + ) + cfg.verify_rtlsim_behavioral = True + + +def _fifo_debug_live_dir(cfg): + return cfg.output_dir + "/debug/fifo_logs/_live" + + +def snapshot_fifo_logs(cfg, phase_name, loop_context=None): + if not cfg.debug_fifo: + return + live_dir = _fifo_debug_live_dir(cfg) + if not os.path.isdir(live_dir): + return + prefix = (loop_context + "_") if loop_context else None + subdir = loop_context or "main" + dest_dir = os.path.join(cfg.output_dir, "debug", "fifo_logs", phase_name, subdir) + os.makedirs(dest_dir, exist_ok=True) + for fn in os.listdir(live_dir): + if not fn.endswith(".log"): + continue + if prefix is not None and not fn.startswith(prefix): + continue + src = os.path.join(live_dir, fn) + dst = os.path.join(dest_dir, fn) + shutil.copy2(src, dst) + open(src, "w").close() + + def verify_step( model: ModelWrapper, cfg: DataflowBuildConfig, @@ -310,8 +344,11 @@ def prepare_loop_ops_fifo_sizing(node, cfg): swg_exception=cfg.default_swg_exception, vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, + debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), + debug_log_prefix=node.name + "_", ) ) + snapshot_fifo_logs(cfg, "fifo_sizing", loop_context=node.name) loop_model = loop_model.transform(SplitLargeFIFOs()) loop_model = loop_model.transform(RemoveShallowFIFOs()) loop_model = loop_model.transform(GiveUniqueNodeNames(prefix=node.name + "_")) @@ -907,7 +944,7 @@ def step_hw_ipgen(model: ModelWrapper, cfg: DataflowBuildConfig): for node in model.graph.node: node_inst = getCustomOp(node) node_inst.set_nodeattr("rtlsim_trace", f"{abspath}/{node.name}_rtlsim.wdb") - model = model.transform(PrepareRTLSim()) + model = model.transform(PrepareRTLSim(behav=cfg.verify_rtlsim_behavioral)) model = model.transform(SetExecMode("rtlsim")) verify_step(model, cfg, "node_by_node_rtlsim", need_parent=True) # Clear rtlsim_trace attributes to prevent later simulations from @@ -978,8 +1015,10 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): vivado_ram_style=cfg.large_fifo_mem_style, fifosim_input_throttle=cfg.fifosim_input_throttle, cfg_n_inferences=cfg.fifosim_n_inferences, + debug_log_dir=(_fifo_debug_live_dir(cfg) if cfg.debug_fifo else None), ) ) + snapshot_fifo_logs(cfg, "fifo_sizing") model = model.transform(GiveUniqueNodeNames()) loop_nodes = model.get_nodes_by_op_type("FINNLoop") for loop_node in loop_nodes: @@ -1123,10 +1162,16 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): os.makedirs(waveform_dir, exist_ok=True) abspath = os.path.abspath(waveform_dir) verify_model.set_metadata_prop("rtlsim_trace", abspath + "/verify_rtlsim.wdb") + if cfg.verify_rtlsim_behavioral: + verify_model.set_metadata_prop("rtlsim_behavioral", "1") if is_mlo(model): verify_mlo(verify_model, cfg, "stitched_ip_rtlsim") + for loop_node in verify_model.get_nodes_by_op_type("FINNLoop"): + snapshot_fifo_logs(cfg, "stitched_ip_rtlsim", loop_context=loop_node.name) + snapshot_fifo_logs(cfg, "stitched_ip_rtlsim") else: verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True) + snapshot_fifo_logs(cfg, "stitched_ip_rtlsim") os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness) return model @@ -1157,7 +1202,10 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi perf = model.analysis(dataflow_performance) latency = perf["critical_path_cycles"] max_iters = latency * 1.1 + 50 - rtlsim_perf_dict = xsi_fifosim(model, rtlsim_bs, max_iters=max_iters) + # Use behav=False for performance measurement to use real RTL components + # instead of behavioral models (FINN_SIMULATION affects FIFOs, MVU, LayerNorm, + # and RTL elementwise ops) + rtlsim_perf_dict = xsi_fifosim(model, rtlsim_bs, max_iters=max_iters, behav=False) # keep keys consistent between the Python and C++-styles cycles = rtlsim_perf_dict["cycles"] clk_ns = cfg.synth_clk_period_ns diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index b734a181e5..3176e28a71 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -123,11 +123,14 @@ def rtlsim_exec_cppxsi( dummy_data_mode=False, timeout_cycles=None, throttle_cycles=0, + behav=True, ): """Use XSI C++ rtl simulation to execute given model with stitched IP. The dummy_data_mode flag controls whether the simulation is driven by dummy data or real data. The execution_context parameter must be formatted according to whether dummy or real data is used. + If behav=True (default), FINN_SIMULATION is defined and fifo_gauge is used. + If behav=False, Q_srl is used instead (no debug logging). Example with dummy_data = True: execution_context = { "inputs" : {"" : }, @@ -176,7 +179,7 @@ def rtlsim_exec_cppxsi( single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") debug = not (trace_file is None or trace_file == "") rtlsim_so = finnxsi.compile_sim_obj( - top_module_name, all_verilog_srcs, single_src_dir, debug=debug, behav=True + top_module_name, all_verilog_srcs, single_src_dir, debug=debug, behav=behav ) # save generated lib filename in attribute model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) @@ -292,8 +295,11 @@ def rtlsim_exec_cppxsi( runsim_env["LD_LIBRARY_PATH"] = get_vivado_root() + "/lib/lnx64.o" runsim_cmd = ["bash", "run_rtlsim.sh"] with open(sim_base + "/run_rtlsim.sh", "w") as f: + ld_path = runsim_env["LD_LIBRARY_PATH"] f.write( - f"LD_LIBRARY_PATH={runsim_env['LD_LIBRARY_PATH']} ./rtlsim_xsi > rtlsim_xsi_log.txt" + f"LD_LIBRARY_PATH={ld_path}" + " ./rtlsim_xsi > rtlsim_xsi_log.txt" + " 2> rtlsim_xsi_stderr.log" ) launch_process_helper(runsim_cmd, cwd=sim_base) @@ -341,8 +347,10 @@ def rtlsim_exec_finnxsi(model, execution_context, pre_hook=None, post_hook=None) top_module_name = top_module_file_name.strip(".v") single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") debug = not (trace_file is None or trace_file == "") + rtlsim_behavioral = model.get_metadata_prop("rtlsim_behavioral") + behav = rtlsim_behavioral is not None and rtlsim_behavioral == "1" rtlsim_so = finnxsi.compile_sim_obj( - top_module_name, all_verilog_srcs, single_src_dir, debug=debug + top_module_name, all_verilog_srcs, single_src_dir, debug=debug, behav=behav ) # save generated lib filename in attribute model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) diff --git a/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py index 2f919ffb3a..9c55e13b31 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/elementwise_binary_rtl.py @@ -151,7 +151,6 @@ def generate_hdl(self, model, fpgapart, clk): "PE": pe, "OP": op_name, "B_SCALE": 1.0, - "FORCE_BEHAVIORAL": 0, "A_FLOAT": 1 if lhs_float else 0, "B_FLOAT": 1 if rhs_float else 0, "A_WIDTH": a_width, diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index 018d8f0417..399699996d 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -96,6 +96,7 @@ def generate_hdl(self, model, fpgapart, clk): code_gen_dict["$OUT_RANGE$"] = "[{}:0]".format(in_width - 1) code_gen_dict["$WIDTH$"] = str(in_width) code_gen_dict["$DEPTH$"] = str(depth) + code_gen_dict["$DATA_LOGFILE$"] = self.get_nodeattr("debug_log_path") # apply code generation to templates code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") with open(template_path, "r") as f: diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index e1ae3e894f..2351044387 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -64,6 +64,7 @@ def get_nodeattr_types(self): # the FIFO does not need its own FIFOs "inFIFODepths": ("ints", False, [0]), "outFIFODepths": ("ints", False, [0]), + "debug_log_path": ("s", False, ""), } ) diff --git a/src/finn/qnn-data/build_dataflow/build.py b/src/finn/qnn-data/build_dataflow/build.py index 6cc7ff2419..fbad13c237 100644 --- a/src/finn/qnn-data/build_dataflow/build.py +++ b/src/finn/qnn-data/build_dataflow/build.py @@ -48,6 +48,7 @@ synth_clk_period_ns=10.0, board=platform_name, shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, + debug_fifo=True, generate_outputs=[ build_cfg.DataflowOutputType.PYNQ_DRIVER, build_cfg.DataflowOutputType.STITCHED_IP, diff --git a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json index 81e77a1606..9360075916 100644 --- a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json +++ b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json @@ -9,6 +9,7 @@ "shell_flow_type": "vivado_zynq", "verify_save_rtlsim_waveforms": true, "fifosim_save_waveform": true, + "debug_fifo": true, "verify_steps": [ "initial_python", "streamlined_python", diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 8511ee9e32..63ebc99077 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -390,6 +390,21 @@ def insert_signature(self, checksum_count): self.connect_cmds.append("set_property name s_axilite_info [get_bd_intf_ports s_axi_0]") self.connect_cmds.append("assign_bd_address") + def insert_sim_ctrl(self): + sim_ctrl_src = "$::env(FINN_ROOT)/finn-rtllib/sim/hdl/sim_ctrl.v" + sim_ctrl_name = "sim_ctrl_0" + self.create_cmds.append("add_files -norecurse %s" % sim_ctrl_src) + self.create_cmds.append( + "create_bd_cell -type module -reference sim_ctrl %s" % sim_ctrl_name + ) + self.connect_cmds.append( + "connect_bd_net [get_bd_ports ap_clk] [get_bd_pins %s/ap_clk]" % sim_ctrl_name + ) + self.connect_cmds.append( + "make_bd_pins_external [get_bd_pins %s/sim_finish]" % sim_ctrl_name + ) + self.connect_cmds.append("set_property name sim_finish [get_bd_ports sim_finish_0]") + def apply(self, model): # ensure non-relative readmemh .dat files model = model.transform(ReplaceVerilogRelPaths()) @@ -467,6 +482,8 @@ def apply(self, model): checksum_layers = model.get_nodes_by_op_type("CheckSum_hls") self.insert_signature(len(checksum_layers)) + self.insert_sim_ctrl() + # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_") diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index fa317265a6..c7bff82604 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -28,6 +28,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np +import os import warnings from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -195,12 +196,14 @@ def apply(self, model): return (model, False) -def xsi_fifosim(model, n_inferences, max_iters=None, throttle_cycles=0): +def xsi_fifosim(model, n_inferences, max_iters=None, throttle_cycles=0, behav=True): """Create a XSI model of stitched IP and use a simple C++ driver to drive the input stream. Useful for FIFO sizing, latency and throughput measurement. If max_iters is None, use the default liveness threshold instead. throttle_cycles can be used for throttling - the input stream every time a frame is finished.""" + the input stream every time a frame is finished. + If behav=True (default), FINN_SIMULATION is defined and fifo_gauge is used. + If behav=False, Q_srl is used instead (no debug logging).""" iname = model.get_first_global_in() first_node = model.find_consumer(iname) @@ -217,6 +220,7 @@ def xsi_fifosim(model, n_inferences, max_iters=None, throttle_cycles=0): dummy_data_mode=True, timeout_cycles=max_iters, throttle_cycles=throttle_cycles, + behav=behav, ) return ret_dict @@ -275,6 +279,8 @@ def __init__( vivado_ram_style="auto", fifosim_input_throttle=True, cfg_n_inferences=2, + debug_log_dir=None, + debug_log_prefix="", ): super().__init__() self.fpgapart = fpgapart @@ -287,6 +293,8 @@ def __init__( self.cfg_n_inferences = cfg_n_inferences self.mlo_max_iter = 0 self.ind_map = {} + self.debug_log_dir = debug_log_dir + self.debug_log_prefix = debug_log_prefix def apply(self, model): model = model.transform(GiveUniqueNodeNames()) @@ -403,6 +411,14 @@ def apply(self, model): if (self.max_depth is not None) and (node.get_nodeattr("depth") != self.max_depth): node.set_nodeattr("depth", self.max_depth) + if self.debug_log_dir is not None: + os.makedirs(os.path.abspath(self.debug_log_dir), exist_ok=True) + for node in model.get_nodes_by_op_type("StreamingFIFO_rtl"): + log_path = os.path.abspath( + os.path.join(self.debug_log_dir, self.debug_log_prefix + node.name + ".log") + ) + getCustomOp(node).set_nodeattr("debug_log_path", log_path) + # insert FIFOs and do all transformations for RTLsim model = model.transform(AnnotateCycles()) perf = model.analysis(dataflow_performance)