Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
04def38
Analytical FIFO sizing with a set of tests for generation trees and f…
Oct 27, 2025
b1a14a9
switched to assert tests, added a helper sizing function, removed reu…
Oct 29, 2025
04eb103
change testing function name
Oct 29, 2025
f5c9c43
add pool layer characterization, reorder fifo sizing step to after hw…
Oct 30, 2025
d5320bf
add SWG tree model which is not super accurate, TODO to fix it
Oct 30, 2025
58c3f9f
fix fmpadding wrong func name
Oct 30, 2025
7adbd80
added downsampler, 30 delta on the tests, will improve together with …
Oct 31, 2025
aba822c
updates to pol and label select, introduce more debugging information
Nov 6, 2025
3507019
pool and swg improvements
Nov 10, 2025
68c822e
SWG mobilenet node test
Nov 10, 2025
9254061
Merge branch 'dev' into feature/analytical-fifo-sizing
lstasytis Dec 16, 2025
63e0c58
Merge remote-tracking branch 'upstream/dev' into feature/analytical-f…
auphelia Jan 7, 2026
cf0ba17
Run pre-commit
auphelia Jan 7, 2026
da70e50
nasty analytical fifosizing bugfix
lstasytis Feb 16, 2026
7f86278
second adjustment - fixing aggressive strategy, small refactor
lstasytis Feb 17, 2026
5bda11e
bugfix
lstasytis Feb 17, 2026
f2a17ba
remove prints from swg characterization
lstasytis Feb 17, 2026
7be2629
fix wrong merge removing JIT
lstasytis Feb 17, 2026
8f06dc2
removing dwc from fifo sizing consideration
Feb 21, 2026
af85830
Merge remote-tracking branch 'upstream/dev' into feature/analytical-f…
auphelia May 21, 2026
09dbd19
[HWCustomOp] Extend rtlsim multi io by batch parameter and delete cus…
auphelia May 21, 2026
fbcd896
[CustomOp] Update derive TAV method to allow for pre-hook for rtlsim
auphelia May 25, 2026
34d7b61
add downsampler node onnx files of reference ground truths
Jun 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ markers =
bnn_kv260: mark tests that execute KV260 BNN tests
bnn_pynq: mark tests that execute Pynq-Z1 BNN tests
bnn_zcu104: mark tests that execute ZCU104 BNN tests
node_tree_modeling: mark tests for analytical FIFO sizing tree models
norecursedirs =
dist
build
Expand Down
82 changes: 82 additions & 0 deletions src/finn/analysis/fpgadataflow/dataflow_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from qonnx.custom_op.registry import getCustomOp

from finn.util.basic import decompress_string_to_numpy
from finn.util.fpgadataflow import is_hls_node, is_rtl_node


Expand Down Expand Up @@ -77,3 +78,84 @@ def dataflow_performance(model):
"max_cycles": int(max_cycles),
"max_cycles_node_name": max_node_name,
}


def max_period(model):
"""Extract maximum period among all nodes in the graph

Preconditions:
- model consists of HLS/RTL nodes
- model has cycle estimates annotated (see AnnotateCycles transformation)
- nodes have unique names (see GiveUniqueNodeNames)
- model has been characteristically derived and contains specific chr periods

Returns:
- max_cycles : number of cycles for slowest node
- max_cycles_node_name : name of slowest node
- critical_path_cycles : pessimistic expected latency from input to output
"""
max_cycles = 0

for node in model.graph.node:
if node is not None and node.op_type not in [
"AddStreams_hls",
"DuplicateStreams_hls",
"StreamingFIFO_hls",
"StreamingFIFO_rtl",
]:
if is_hls_node(node) or is_rtl_node(node):
inst = getCustomOp(node)
node_cycles_in = (
len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_in"))[0]) // 2
)
node_cycles_out = (
len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_out"))[0]) // 2
)
node_cycles = max(node_cycles_in, node_cycles_out)

if node_cycles > max_cycles:
max_cycles = node_cycles

return {
"max_cycles": int(max_cycles),
}


def max_remaining_period(model, node):
"""Extract maximum period among all nodes in the graph

Preconditions:
- model consists of HLS/RTL nodes
- model has cycle estimates annotated (see AnnotateCycles transformation)
- nodes have unique names (see GiveUniqueNodeNames)
- model has been characteristically derived and contains specific chr periods

Returns:
- max_cycles : number of cycles for slowest node
- max_cycles_node_name : name of slowest node
- critical_path_cycles : pessimistic expected latency from input to output
"""
max_cycles = 0
node_index = list(model.graph.node).index(node)
for node in model.graph.node[node_index:]:
if node is not None and node.op_type not in [
"AddStreams_hls",
"DuplicateStreams_hls",
"StreamingFIFO_hls",
"StreamingFIFO_rtl",
]:
if is_hls_node(node) or is_rtl_node(node):
inst = getCustomOp(node)
node_cycles = int(inst.get_nodeattr("io_chrc_period"))
node_cycles_in = (
len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_in"))[0]) // 2
)
node_cycles_out = (
len(decompress_string_to_numpy(inst.get_nodeattr("io_chrc_out"))[0]) // 2
)
node_cycles = max(node_cycles_in, node_cycles_out)
if node_cycles > max_cycles:
max_cycles = node_cycles
return {
"max_cycles": int(max_cycles),
}
48 changes: 46 additions & 2 deletions src/finn/builder/build_dataflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,30 @@

class AutoFIFOSizingMethod(str, Enum):
"Select the type of automatic FIFO sizing strategy."

CHARACTERIZE = "characterize"
ANALYTIC = "analytical"
LARGEFIFO_RTLSIM = "largefifo_rtlsim"


class TAVGenerationMethod(str, Enum):
"Select the strategy for constructing token access vectors of an operator."
RTLSIM = "rtlsim"
TREE_MODEL = "tree_model"


class TAVUtilizationMethod(str, Enum):
"""Select the strategy for utilizing token access vectors of an operator
for buffer sizing."""

# worst-case ratio of data rates between a consumer and producer
CONSERVATIVE_RELAXATION = "conservative_relaxation"

# average-case ratio of data rates between a consumer and producer
AGGRESSIVE_RELAXATION = "aggressive_relaxation"

# no relaxation, use the token access vectors as-is
NO_RELAXATION = "no_relaxation"


class ShellFlowType(str, Enum):
"""For builds that produce a bitfile, select the shell flow that will integrate
the FINN-generated accelerator."""
Expand Down Expand Up @@ -291,6 +310,31 @@ class DataflowBuildConfig:
#: setting the FIFO sizes.
auto_fifo_strategy: Optional[AutoFIFOSizingMethod] = AutoFIFOSizingMethod.LARGEFIFO_RTLSIM

#: Which strategy will be used for token access vector generation for FIFO sizing.
#: RTLSIM will result in performing RTLSIM for each node
#: to deduce the token access vectors empirically
#: TREE_MODEL will use the tree mode of an operator if available, avoiding the generation
#: of IP cores.
tav_generation_strategy: Optional[TAVGenerationMethod] = TAVGenerationMethod.RTLSIM

#: Which strategy will be used for token access vector generation for FIFO sizing.
#: RTLSIM will result in performing RTLSIM for each node
#: to deduce the token access vectors empirically
#: TREE_MODEL will use the tree mode of an operator if available, avoiding the generation
#: of IP cores.
tav_utilization_strategy: Optional[
TAVUtilizationMethod
] = TAVUtilizationMethod.CONSERVATIVE_RELAXATION

#: When True, skips the resynthesis steps after fifo sizing. This makes it
#: possible to run the step for rapid fifo size analysis during
#: automatic folding optimizations or as a first approximation.
skip_resynth_during_fifo_sizing: Optional[bool] = False

#: Avoid using C++ rtlsim for auto FIFO sizing and rtlsim throughput test
#: if set to True, always using Python instead
force_python_rtlsim: Optional[bool] = False

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasn't this option deprecated on the dev branch?


#: Memory resource type for large FIFOs
#: Only relevant when `auto_fifo_depths = True`
large_fifo_mem_style: Optional[LargeFIFOMemStyle] = LargeFIFOMemStyle.AUTO
Expand Down
126 changes: 119 additions & 7 deletions src/finn/builder/build_dataflow_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@

import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
import finn.transformation.streamline.absorb as absorb
from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
from finn.analysis.fpgadataflow.dataflow_performance import (
dataflow_performance,
max_period,
)
from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
from finn.analysis.fpgadataflow.op_and_param_counts import (
Expand Down Expand Up @@ -88,8 +91,13 @@
)
from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
from finn.transformation.fpgadataflow.derive_characteristic import (
DeriveCharacteristic,
DelayCharacteristicFunctions,
DeriveFIFOSizes,
DeriveTokenAccessVectors,
HandleBranches,
JustInTimeSynthesize,
LocalStretchCharacteristicFunctions,
ProducerDelayCharacteristicFunctions,
)
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
Expand All @@ -111,6 +119,7 @@
)
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.fpgadataflow.set_fifo_depths import (
CapConvolutionFIFODepths,
InsertAndSetFIFODepths,
RemoveShallowFIFOs,
SplitLargeFIFOs,
Expand Down Expand Up @@ -934,7 +943,9 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):

if cfg.auto_fifo_depths:
strategy = cfg.auto_fifo_strategy
if strategy == "characterize" or is_mlo(model):

# MLO models must use RTL sim characterize approach (no tree models)
if is_mlo(model):
model = model.transform(InsertDWC())
model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
model = model.transform(GiveUniqueNodeNames())
Expand All @@ -944,8 +955,17 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(HLSSynthIP(cfg._resolve_fpga_part()))
model = model.transform(PrepareRTLSim(behav=True))
model = model.transform(AnnotateCycles())
period = model.analysis(dataflow_performance)["max_cycles"] + 10
model = model.transform(DeriveCharacteristic(period))
period = int(model.analysis(dataflow_performance)["max_cycles"]) + 10
# Use rtlsim strategy to force RTL simulation (no tree models for MLO)
model = model.transform(
DeriveTokenAccessVectors(
model,
period,
strategy="rtlsim",
fpga_part=cfg._resolve_fpga_part(),
clk_period=cfg._resolve_hls_clk_period(),
)
)
model = model.transform(DeriveFIFOSizes())
model = model.transform(
InsertFIFO(
Expand All @@ -963,6 +983,96 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())

elif strategy == "analytical":
# Analytical FIFO sizing with tree models (Lukas's approach)
model = model.transform(InsertDWC())
model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
model = model.transform(GiveUniqueNodeNames())
model = model.transform(AnnotateCycles())

if cfg.tav_generation_strategy == "tree_model":
# if we have tree models, only rtlsim nodes for which we dont
only_jit_nodes_without_tree = True
else:
# rtlsim everything by force if not using trees
only_jit_nodes_without_tree = False

model = model.transform(
JustInTimeSynthesize(
cfg._resolve_fpga_part(),
cfg._resolve_hls_clk_period(),
only_jit_nodes_without_tree,
)
)
period = int(model.analysis(dataflow_performance)["max_cycles"])
model = model.transform(
DeriveTokenAccessVectors(
model,
period,
cfg.tav_generation_strategy,
cfg._resolve_fpga_part(),
cfg._resolve_hls_clk_period(),
)
)

period = int(model.analysis(dataflow_performance)["max_cycles"])
model = model.transform(
LocalStretchCharacteristicFunctions(
1,
period,
nodes_to_ignore=[],
)
)

period = int(model.analysis(dataflow_performance)["max_cycles"])

model = model.transform(HandleBranches(model, period))

period = int(model.analysis(dataflow_performance)["max_cycles"])
model = model.transform(
DelayCharacteristicFunctions(
1,
period,
nodes_to_ignore=[],
)
)

period = int(model.analysis(dataflow_performance)["max_cycles"])

model = model.transform(
ProducerDelayCharacteristicFunctions(
1,
period,
nodes_to_ignore=[],
)
)

period = int(model.analysis(max_period)["max_cycles"])

model = model.transform(
DeriveFIFOSizes(
period=period,
nodes_to_ignore=[],
global_offset_correction=True,
tav_utilization_strategy=cfg.tav_utilization_strategy,
)
)

model = model.transform(
InsertFIFO(
vivado_ram_style=cfg.large_fifo_mem_style,
max_qsrl_depth=256,
create_shallow_fifos=True,
)
)

model = model.transform(SpecializeLayers(cfg._resolve_fpga_part()))
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
if cfg.default_swg_exception:
model = model.transform(CapConvolutionFIFODepths(max_qsrl_depth=256))

elif strategy == "largefifo_rtlsim":
if cfg.fifosim_save_waveform:
report_dir = cfg.output_dir + "/report"
Expand Down Expand Up @@ -1041,8 +1151,10 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):

# after FIFOs are ready to go, call PrepareIP and HLSSynthIP again
# this will only run for the new nodes (e.g. FIFOs and DWCs)
model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
model = model.transform(HLSSynthIP(cfg._resolve_fpga_part()))
if not cfg.skip_resynth_during_fifo_sizing:
model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()))
model = model.transform(HLSSynthIP(cfg._resolve_fpga_part()))

return model


Expand Down
Loading