diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn index 5126ed3ff4..f960dc08a8 100644 --- a/docker/Dockerfile.finn +++ b/docker/Dockerfile.finn @@ -118,6 +118,7 @@ RUN pip install pytest-metadata==1.7.0 RUN pip install pytest-html==3.0.0 RUN pip install pytest-html-merger==0.0.8 RUN pip install pytest-cov==4.1.0 +RUN pip install pyyaml==6.0.1 # extra dependencies from other FINN deps # installed in Docker image to make entrypoint script go faster diff --git a/docker/jenkins/Jenkinsfile b/docker/jenkins/Jenkinsfile index 6d51fffd64..cca3436363 100644 --- a/docker/jenkins/Jenkinsfile +++ b/docker/jenkins/Jenkinsfile @@ -93,7 +93,7 @@ pipeline { cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR) // Pass in the marker to run with pytest and the XML test results filename - runDockerPytestWithMarker("fpgadataflow", "${env.TEST_NAME}", "--cov --cov-report=html:coverage_fpgadataflow") + runDockerPytestWithMarker("fpgadataflow", "${env.TEST_NAME}", "--cov --cov-report=html:coverage_fpgadataflow -n ${env.NUM_PYTEST_WORKERS} --dist worksteal") // Stash the test results file(s) stash name: env.TEST_NAME, includes: "${env.TEST_NAME}.xml,${env.TEST_NAME}.html" @@ -324,21 +324,17 @@ void runDockerPytestWithMarker(String marker, String testResultsFilename, String sh """./run-docker.sh python -m pytest -m ${marker} --junitxml=${testResultsFilename}.xml --html=${testResultsFilename}.html --self-contained-html ${additionalOptions}""" } -def findBoardBuildFiles(String searchDir, String dirToFind) { - def result = sh(script: "find $searchDir -type d -name \"$dirToFind*\"", returnStdout: true).trim() - if (result.empty) { - error "Directory containing '$dirToFind' not found." - } - return result -} - void findCopyZip(String board, String findDir, String copyDir) { - def buildDir = findBoardBuildFiles(findDir, "hw_deployment_${board}") - sh "cp -r ${buildDir}/${board} ${copyDir}/" - dir(copyDir) { - sh "zip -r ${board}.zip ${board}/" - sh "mkdir -p ${env.ARTIFACT_DIR}/${copyDir}/" - sh "cp ${board}.zip ${env.ARTIFACT_DIR}/${copyDir}/" + sh "mkdir -p ${copyDir}" + try { + sh "cp -r ${findDir}/hw_deployment_*/${board} ${copyDir}/" + dir(copyDir) { + sh "zip -r ${board}.zip ${board}/" + sh "mkdir -p ${env.ARTIFACT_DIR}/${copyDir}/" + sh "cp ${board}.zip ${env.ARTIFACT_DIR}/${copyDir}/" + } + } catch (err) { + error "No ${board} hw_deployment_* build artifacts found in ${findDir}" } } diff --git a/finn-rtllib/fifo/hdl/Q_srl.v b/finn-rtllib/fifo/hdl/Q_srl.v index 0b01973163..9eec01f81a 100644 --- a/finn-rtllib/fifo/hdl/Q_srl.v +++ b/finn-rtllib/fifo/hdl/Q_srl.v @@ -119,6 +119,15 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount); reg i_b_reg // - true iff !full /* synthesis syn_allow_retiming=0 */ ; + // Parameter Checking + initial begin + if(depth < 2) begin + $error("%m: FIFO depth must be two or higher."); + $finish; + end + end + + assign addr_full_ = (state_==state_more) && (addr_==depth-2); // - queue full assign addr_zero_ = (addr==0); // - queue contains 2 (or 1,0) diff --git a/finn-rtllib/memstream/hdl/Q_srl.v b/finn-rtllib/memstream/hdl/Q_srl.v deleted file mode 100644 index 11cef604e0..0000000000 --- a/finn-rtllib/memstream/hdl/Q_srl.v +++ /dev/null @@ -1,308 +0,0 @@ -// original source: -// https://github.com/nachiket/tdfc/blob/master/verilog/queues/Q_srl_oreg3_prefull_SIMPLE.v - - -// Copyright (c) 1999 The Regents of the University of California -// Copyright (c) 2010 The Regents of the University of Pennsylvania -// Copyright (c) 2011 Department of Electrical and Electronic Engineering, Imperial College London -// Copyright (c) 2020 Xilinx -// -// Permission to use, copy, modify, and distribute this software and -// its documentation for any purpose, without fee, and without a -// written agreement is hereby granted, provided that the above copyright -// notice and this paragraph and the following two paragraphs appear in -// all copies. -// -// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR -// DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING -// LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, -// EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF -// SUCH DAMAGE. -// -// THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, -// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -// AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON -// AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO -// PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. -// - -// Q_srl_oreg3_prefull_SIMPLE.v -// -// - In-page queue with parameterizable depth, bit width -// - Stream I/O is triple (data, valid, back-pressure), -// with EOS concatenated into the data -// - Flow control for input & output is combinationally decoupled -// - 2 <= depth <= 256 -// * (depth >= 2) is required to decouple I/O flow control, -// where empty => no produce, full => no consume, -// and depth 1 would ping-pong between the two at half rate -// * (depth <= 256) can be modified -// by changing ''synthesis loop_limit X'' below -// and changing ''addrwidth'' or its log computation -// - 1 <= width -// - Queue storage is in SRL16E, up to depth 16 per LUT per bit-slice, -// plus output register (for fast output) -// - Queue addressing is done by ''addr'' up-down counter -// - Queue fullness is checked by comparator (addr==depth) -// - Queue fullness is pre-computed for next cycle -// - Queue input back-pressure is pre-computed for next cycle -// - Queue output valid (state!=state__empty) is pre-computed for next cycle -// (necessary since SRL data output reg requires non-boolean state) -// - FSM has 3 states (empty, one, more) -// - When empty, continue to emit most recently emitted value (for debugging) -// -// - Queue slots used = / (state==state_empty) ? 0 -// | (state==state_one) ? 1 -// \ (state==state_more) ? addr+2 -// - Queue slots used <= depth -// - Queue slots remaining = depth - used -// = / (state==state_empty) ? depth -// | (state==state_one) ? depth-1 -// \ (state==state_more) ? depth-2-addr -// -// - Synplify 7.1 / 8.0 -// - Eylon Caspi, 9/11/03, 8/18/04, 3/29/05 - - -`ifdef Q_srl -`else -`define Q_srl - - -module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount); - - parameter depth = 16; // - greatest #items in queue (2 <= depth <= 256) - parameter width = 16; // - width of data (i_d, o_d) - - parameter addrwidth = $clog2(depth); - - input clock; - input reset; - - input [width-1:0] i_d; // - input stream data (concat data + eos) - input i_v; // - input stream valid - output i_r; // - input stream ready - wire i_b; // - input stream back-pressure - - output [width-1:0] o_d; // - output stream data (concat data + eos) - output o_v; // - output stream valid - input o_r; // - output stream ready - wire o_b; // - output stream back-pressure - - output [addrwidth:0] count; // - output number of elems in queue - output [addrwidth:0] maxcount; // - maximum observed count since reset - - reg [addrwidth:0] maxcount_reg; // - maximum count seen until now - reg [addrwidth-1:0] addr, addr_, a_; // - SRL16 address - // for data output - reg shift_en_; // - SRL16 shift enable - reg [width-1:0] srl [depth-2:0]; // - SRL16 memory - reg shift_en_o_; // - SRLO shift enable - reg [width-1:0] srlo_, srlo // - SRLO output reg - /* synthesis syn_allow_retiming=0 */ ; - - parameter state_empty = 2'd0; // - state empty : o_v=0 o_d=UNDEFINED - parameter state_one = 2'd1; // - state one : o_v=1 o_d=srlo - parameter state_more = 2'd2; // - state more : o_v=1 o_d=srlo - // #items in srl = addr+2 - - reg [1:0] state, state_; // - state register - - wire addr_full_; // - true iff addr==depth-2 on NEXT cycle - reg addr_full; // - true iff addr==depth-2 - wire addr_zero_; // - true iff addr==0 - wire o_v_reg_; // - true iff state_empty on NEXT cycle - reg o_v_reg // - true iff state_empty - /* synthesis syn_allow_retiming=0 */ ; - wire i_b_reg_; // - true iff !full on NEXT cycle - reg i_b_reg // - true iff !full - /* synthesis syn_allow_retiming=0 */ ; - - assign addr_full_ = (state_==state_more) && (addr_==depth-2); - // - queue full - assign addr_zero_ = (addr==0); // - queue contains 2 (or 1,0) - assign o_v_reg_ = (state_!=state_empty); // - output valid if non-empty - assign i_b_reg_ = addr_full_; // - input bp if full - assign o_d = srlo; // - output data from queue - assign o_v = o_v_reg; // - output valid if non-empty - assign i_b = i_b_reg; // - input bp if full - assign maxcount = maxcount_reg; - - assign i_r = !i_b; - assign o_b = !o_r; - - assign count = (state==state_more ? addr+2 : (state==state_one ? 1 : 0)); - - // - ''always'' block with both FFs and SRL16 does not work, - // since FFs need reset but SRL16 does not - - always @(posedge clock) begin // - seq always: FFs - if (reset) begin - state <= state_empty; - addr <= 0; - addr_full <= 0; - o_v_reg <= 0; - - i_b_reg <= 0; - maxcount_reg <= 0; - - end - else begin - state <= state_; - addr <= addr_; - addr_full <= addr_full_; - o_v_reg <= o_v_reg_; - i_b_reg <= i_b_reg_; - maxcount_reg <= (count > maxcount_reg ? count : maxcount_reg); - end - end // always @ (posedge clock) - - always @(posedge clock) begin // - seq always: srlo - // - infer enabled output reg at end of shift chain - // - input first element from i_d, all subsequent elements from SRL16 - if (reset) begin - srlo <= 0; - end - else begin - if (shift_en_o_) begin - srlo <= srlo_; - end - end - end // always @ (posedge clock) - - always @(posedge clock) begin // - seq always: srl - // - infer enabled SRL16E from shifting srl array - // - no reset capability; srl[] contents undefined on reset - if (shift_en_) begin - // synthesis loop_limit 256 - for (a_=depth-2; a_>0; a_=a_-1) begin - srl[a_] = srl[a_-1]; - end - srl[0] <= i_d; - end - end // always @ (posedge clock or negedge reset) - - always @* begin // - combi always - srlo_ <= 'bx; - shift_en_o_ <= 1'bx; - shift_en_ <= 1'bx; - addr_ <= 'bx; - state_ <= 2'bx; - case (state) - - state_empty: begin // - (empty, will not produce) - if (i_v) begin // - empty & i_v => consume - srlo_ <= i_d; - shift_en_o_ <= 1; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_one; - end - else begin // - empty & !i_v => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_empty; - end - end - - state_one: begin // - (contains one) - if (i_v && o_b) begin // - one & i_v & o_b => consume - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1; - addr_ <= 0; - state_ <= state_more; - end - else if (i_v && !o_b) begin // - one & i_v & !o_b => cons+prod - srlo_ <= i_d; - shift_en_o_ <= 1; - shift_en_ <= 1; - addr_ <= 0; - state_ <= state_one; - end - else if (!i_v && o_b) begin // - one & !i_v & o_b => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_one; - end - else if (!i_v && !o_b) begin // - one & !i_v & !o_b => produce - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1'bx; - addr_ <= 0; - state_ <= state_empty; - end - end // case: state_one - - state_more: begin // - (contains more than one) - if (addr_full || (depth==2)) begin - // - (full, will not consume) - // - (full here if depth==2) - if (o_b) begin // - full & o_b => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 0; - addr_ <= addr; - state_ <= state_more; - end - else begin // - full & !o_b => produce - srlo_ <= srl[addr]; - shift_en_o_ <= 1; - shift_en_ <= 0; -// addr_ <= addr-1; -// state_ <= state_more; - addr_ <= addr_zero_ ? 0 : addr-1; - state_ <= addr_zero_ ? state_one : state_more; - end - end - else begin // - (mid: neither empty nor full) - if (i_v && o_b) begin // - mid & i_v & o_b => consume - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 1; - addr_ <= addr+1; - state_ <= state_more; - end - else if (i_v && !o_b) begin // - mid & i_v & !o_b => cons+prod - srlo_ <= srl[addr]; - shift_en_o_ <= 1; - shift_en_ <= 1; - addr_ <= addr; - state_ <= state_more; - end - else if (!i_v && o_b) begin // - mid & !i_v & o_b => idle - srlo_ <= 'bx; - shift_en_o_ <= 0; - shift_en_ <= 0; - addr_ <= addr; - state_ <= state_more; - end - else if (!i_v && !o_b) begin // - mid & !i_v & !o_b => produce - srlo_ <= srl[addr]; - shift_en_o_ <= 1; - shift_en_ <= 0; - addr_ <= addr_zero_ ? 0 : addr-1; - state_ <= addr_zero_ ? state_one : state_more; - end - end // else: !if(addr_full) - end // case: state_more - - default: begin - srlo_ <= 'bx; - shift_en_o_ <= 1'bx; - shift_en_ <= 1'bx; - addr_ <= 'bx; - state_ <= 2'bx; - end // case: default - - endcase // case(state) - end // always @ * - -endmodule // Q_srl - - -`endif // `ifdef Q_srl diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb index 8b8cff8ee9..507b1022e6 100644 --- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb +++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb @@ -484,8 +484,7 @@ "metadata": {}, "outputs": [], "source": [ - "from shutil import copy\n", - "from distutils.dir_util import copy_tree\n", + "from shutil import copy, copytree\n", "\n", "# create directory for deployment files\n", "deployment_dir = make_build_dir(prefix=\"pynq_deployment_\")\n", @@ -503,7 +502,7 @@ "\n", "# driver.py and python libraries\n", "pynq_driver_dir = model.get_metadata_prop(\"pynq_driver_dir\")\n", - "copy_tree(pynq_driver_dir, deployment_dir)" + "copytree(pynq_driver_dir, deployment_dir, dirs_exist_ok=True)" ] }, { diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb index 675ba23d2d..bb5e357b66 100644 --- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb +++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb @@ -895,8 +895,7 @@ "metadata": {}, "outputs": [], "source": [ - "from shutil import copy\n", - "from distutils.dir_util import copy_tree\n", + "from shutil import copy, copytree\n", "\n", "# create directory for deployment files\n", "deployment_dir = make_build_dir(prefix=\"pynq_deployment_\")\n", @@ -914,7 +913,7 @@ "\n", "# driver.py and python libraries\n", "pynq_driver_dir = model.get_metadata_prop(\"pynq_driver_dir\")\n", - "copy_tree(pynq_driver_dir, deployment_dir)" + "copytree(pynq_driver_dir, deployment_dir, dirs_exist_ok=True)" ] }, { diff --git a/run-docker.sh b/run-docker.sh index ec55299f6c..66ef8f00f2 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -88,7 +88,7 @@ SCRIPTPATH=$(dirname "$SCRIPT") : ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"} : ${XRT_DEB_VERSION="xrt_202220.2.14.354_22.04-amd64-xrt"} : ${FINN_HOST_BUILD_DIR="/tmp/$DOCKER_INST_NAME"} -: ${FINN_DOCKER_TAG="xilinx/finn:$(git describe --always --tags --dirty).$XRT_DEB_VERSION"} +: ${FINN_DOCKER_TAG="xilinx/finn:$(OLD_PWD=$(pwd); cd $SCRIPTPATH; git describe --always --tags --dirty; cd $OLD_PWD).$XRT_DEB_VERSION"} : ${FINN_DOCKER_PREBUILT="0"} : ${FINN_DOCKER_RUN_AS_ROOT="0"} : ${FINN_DOCKER_GPU="$(docker info | grep nvidia | wc -m)"} diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index ab2280554c..5163b2dbdb 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -33,7 +33,6 @@ import shutil import warnings from copy import deepcopy -from distutils.dir_util import copy_tree from functools import partial from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp @@ -656,7 +655,9 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): ) ) # TODO copy all ip sources into output dir? as zip? - copy_tree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir) + shutil.copytree( + model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir, dirs_exist_ok=True + ) print("Vivado stitched IP written into " + stitched_ip_dir) if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps(): # prepare ip-stitched rtlsim @@ -761,7 +762,7 @@ def step_make_pynq_driver(model: ModelWrapper, cfg: DataflowBuildConfig): if DataflowOutputType.PYNQ_DRIVER in cfg.generate_outputs: driver_dir = cfg.output_dir + "/driver" model = model.transform(MakePYNQDriver(cfg._resolve_driver_platform())) - copy_tree(model.get_metadata_prop("pynq_driver_dir"), driver_dir) + shutil.copytree(model.get_metadata_prop("pynq_driver_dir"), driver_dir, dirs_exist_ok=True) print("PYNQ Python driver written into " + driver_dir) return model @@ -862,8 +863,8 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig): bitfile_dir = cfg.output_dir + "/bitfile" driver_dir = cfg.output_dir + "/driver" os.makedirs(deploy_dir, exist_ok=True) - copy_tree(bitfile_dir, deploy_dir + "/bitfile") - copy_tree(driver_dir, deploy_dir + "/driver") + shutil.copytree(bitfile_dir, deploy_dir + "/bitfile", dirs_exist_ok=True) + shutil.copytree(driver_dir, deploy_dir + "/driver", dirs_exist_ok=True) return model diff --git a/src/finn/custom_op/fpgadataflow/concat.py b/src/finn/custom_op/fpgadataflow/concat.py index 210b6b7fdd..985ac83ea6 100644 --- a/src/finn/custom_op/fpgadataflow/concat.py +++ b/src/finn/custom_op/fpgadataflow/concat.py @@ -29,7 +29,6 @@ import numpy as np from qonnx.core.datatype import DataType -from qonnx.util.basic import roundup_to_integer_multiple from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp @@ -134,10 +133,6 @@ def execute_node(self, context, graph): result = np.concatenate(inp_values, axis=-1) context[node.output[0]] = result - def get_instream_width_padded(self, ind=0): - in_width = self.get_instream_width(ind) - return roundup_to_integer_multiple(in_width, 8) - def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() n_inputs = self.get_n_inputs() diff --git a/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py b/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py index ba44deb898..ad40b62d8c 100644 --- a/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py @@ -28,6 +28,7 @@ import numpy as np import os +import warnings from math import ceil, log2 from qonnx.core.datatype import DataType @@ -87,31 +88,6 @@ def defines(self, var): my_defines.append("#define EmbeddingType %s" % emb_hls_type) self.code_gen_dict["$DEFINES$"] = my_defines - def read_npy_data(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - dtype = self.get_input_datatype() - if dtype == DataType["BIPOLAR"]: - # use binary for bipolar storage - dtype = DataType["BINARY"] - elem_bits = dtype.bitwidth() - packed_bits = self.get_instream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits - elem_hls_type = dtype.get_hls_datatype_str() - npy_type = "int64_t" - npy_in = "%s/input_0.npy" % code_gen_dir - self.code_gen_dict["$READNPYDATA$"] = [] - self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' - % ( - packed_hls_type, - elem_hls_type, - elem_bits, - npy_type, - npy_in, - self.hls_sname(), - ) - ) - def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") dtype = self.get_output_datatype() @@ -273,7 +249,18 @@ def execute_node(self, context, graph): ) inp = context[node.input[0]] - assert inp.dtype == np.int64, "Inputs must be contained in int64 ndarray" + + # Make sure the input has the right container datatype + if inp.dtype is not np.float32: + # Issue a warning to make the user aware of this type-cast + warnings.warn( + f"{node.name}: Changing input container datatype from " + f"{inp.dtype} to {np.float32}" + ) + # Convert the input to floating point representation as the + # container datatype + inp = inp.astype(np.float32) + assert inp.shape == exp_ishape, """Input shape doesn't match expected shape.""" export_idt = self.get_input_datatype() odt = self.get_output_datatype() diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index f8f27cb647..1b240eeff8 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -133,10 +133,18 @@ def execute_node(self, context, graph): elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") # create a npy file for the input of the node - assert ( - str(inp.dtype) == "float32" - ), """Input datatype is - not float32 as expected.""" + + # Make sure the input has the right container datatype + if inp.dtype is not np.float32: + # Issue a warning to make the user aware of this type-cast + warnings.warn( + f"{node.name}: Changing input container datatype from " + f"{inp.dtype} to {np.float32}" + ) + # Convert the input to floating point representation as the + # container datatype + inp = inp.astype(np.float32) + expected_inp_shape = self.get_folded_input_shape() reshaped_input = inp.reshape(expected_inp_shape) if DataType[self.get_nodeattr("dataType")] == DataType["BIPOLAR"]: diff --git a/src/finn/custom_op/fpgadataflow/thresholding.py b/src/finn/custom_op/fpgadataflow/thresholding.py index 12cb76be4e..8cebf613b1 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding.py +++ b/src/finn/custom_op/fpgadataflow/thresholding.py @@ -243,16 +243,29 @@ def execute_node(self, context, graph): inp_values = context[node.input[0]] th_val = context[node.input[1]] out_bias = self.get_nodeattr("ActVal") - # MT expects inputs to be in the shape (N,C,H,W) or (N, C) - # if 4D then input values in context are (N,H,W,C) and need to - # be transposed. - # if 2D then inputs can be passed directly to MT function - is_4d = len(inp_values.shape) == 4 - if is_4d: - inp_values = np.transpose(inp_values, (0, 3, 1, 2)) + + # Consider the data layout for transposing the input into the format + # accepted by the multithreshold function above, i.e, the channel + # dimension is along the axis with index 1. + data_layout = None + # If there is no layout annotation, guess based on rank of the tensor + # TODO: Currently there is no mechanism here to get the layout + # annotation, we allways guess, but this matches the previous behavior. + if len(inp_values.shape) < 5: + # Maps tensor rank to layout annotation + rank_to_layout = {0: None, 1: "C", 2: "NC", 3: "NWC", 4: "NHWC"} + # Lookup the layout required by this input shape + data_layout = rank_to_layout[len(inp_values.shape)] + # Lookup the index of the channel dimension in the data layout + # Note: Assumes there is at most one "C" which denotes the channel + # dimension + cdim = data_layout.index("C") if "C" in data_layout else 1 + # Rearrange the input to the expected (N, C, ...) layout + inp_values = inp_values.swapaxes(cdim, 1) y = multithreshold(inp_values, th_val, out_bias=out_bias) - if is_4d: - y = y.transpose(0, 2, 3, 1) + # Rearrange the output back to the original layout + y = y.swapaxes(cdim, 1) + act = DataType[self.get_nodeattr("outputDataType")] if act == DataType["BIPOLAR"]: # binary to bipolar diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index 9ed0f51cd4..3f697266ae 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -202,6 +202,8 @@ def apply(self, model): fifo_depth = n0.get_nodeattr("inFIFODepths")[inp_ind] if fifo_depth > 2 or self.create_shallow_fifos: + # Ensure that create shallow fifo condition doesn't create depth=1 fifos + fifo_depth = max(fifo_depth, 2) # create fifo node fifo_output_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), @@ -264,6 +266,8 @@ def apply(self, model): fifo_depth = n0.get_nodeattr("outFIFODepths")[out_ind] if fifo_depth > 2 or self.create_shallow_fifos: + # Ensure that create shallow fifo condition doesn't create depth=1 fifos + fifo_depth = max(fifo_depth, 2) # create fifo node fifo_input_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 82ee536d50..c3baf80aab 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -539,7 +539,7 @@ def decompose_pow2(x): ret_final = [] for cand_depth in ret_pass2: if cand_depth <= max_qsrl_depth: - ret_final.append((cand_depth, "rtl")) + ret_final.append((max(2, cand_depth), "rtl")) else: ret_final.append((cand_depth, "vivado")) diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 0d3418624a..ac098d708c 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -40,7 +40,6 @@ import warnings from brevitas.export import export_qonnx from dataset_loading import cifar, mnist -from distutils.dir_util import copy_tree from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp @@ -59,7 +58,7 @@ from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul from qonnx.transformation.merge_onnx_models import MergeONNXModels from qonnx.util.cleanup import cleanup as qonnx_cleanup -from shutil import copy +from shutil import copy, copytree import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw import finn.transformation.streamline.absorb as absorb @@ -112,8 +111,9 @@ rtlsim_trace = False -def get_checkpoint_name(topology, wbits, abits, step): - return build_dir + "/end2end_%s_w%da%d_%s.onnx" % ( +def get_checkpoint_name(board, topology, wbits, abits, step): + return build_dir + "/end2end_%s_%s_w%da%d_%s.onnx" % ( + board, topology, wbits, abits, @@ -357,7 +357,7 @@ def deploy_based_on_board(model, model_title, topology, wbits, abits, board): # driver.py and python libraries pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir") - copy_tree(pynq_driver_dir, deployment_dir) + copytree(pynq_driver_dir, deployment_dir, dirs_exist_ok=True) model.set_metadata_prop("pynq_deploy_dir", deployment_dir) @@ -451,11 +451,16 @@ def pytest_generate_tests(metafunc): scenarios.extend(test_cases) if len(scenarios) > 0: - for scenario in scenarios: + for i, scenario in enumerate(scenarios): idlist.append(scenario[0]) items = scenario[1].items() argnames = [x[0] for x in items] - argvalues.append([x[1] for x in items]) + argvalues_scenario = [x[1] for x in items] + argvalues.append( + pytest.param( + *argvalues_scenario, marks=pytest.mark.xdist_group(name="bnn_pynq_%d" % i) + ) + ) metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class") @@ -471,7 +476,7 @@ def test_export(self, topology, wbits, abits, board): if topology == "lfc" and not (wbits == 1 and abits == 1): pytest.skip("Skipping certain lfc configs") (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits) - chkpt_name = get_checkpoint_name(topology, wbits, abits, "export") + chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "export") export_qonnx(model, torch.randn(ishape), chkpt_name, opset_version=13) qonnx_cleanup(chkpt_name, out_file=chkpt_name) model = ModelWrapper(chkpt_name) @@ -480,7 +485,7 @@ def test_export(self, topology, wbits, abits, board): assert os.path.isfile(chkpt_name) def test_import_and_tidy(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "export") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "export") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) @@ -488,17 +493,17 @@ def test_import_and_tidy(self, topology, wbits, abits, board): model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveStaticGraphInputs()) - chkpt = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") + chkpt = get_checkpoint_name(board, topology, wbits, abits, "import_and_tidy") model.save(chkpt) def test_add_pre_and_postproc(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "import_and_tidy") model = load_test_checkpoint_or_skip(prev_chkpt_name) global_inp_name = model.graph.input[0].name ishape = model.get_tensor_shape(global_inp_name) # preprocessing: torchvision's ToTensor divides uint8 inputs by 255 totensor_pyt = ToTensor() - chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc") + chkpt_preproc_name = get_checkpoint_name(board, topology, wbits, abits, "preproc") export_qonnx(totensor_pyt, torch.randn(ishape), chkpt_preproc_name, opset_version=13) qonnx_cleanup(chkpt_preproc_name, out_file=chkpt_preproc_name) pre_model = ModelWrapper(chkpt_preproc_name) @@ -515,7 +520,7 @@ def test_add_pre_and_postproc(self, topology, wbits, abits, board): model.set_tensor_datatype(global_inp_name, DataType["UINT8"]) # postprocessing: insert Top-1 node at the end model = model.transform(InsertTopK(k=1)) - chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") + chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "pre_post") # tidy-up again model = model.transform(InferShapes()) model = model.transform(FoldConstants()) @@ -527,7 +532,7 @@ def test_add_pre_and_postproc(self, topology, wbits, abits, board): assert os.path.isfile(chkpt_name) def test_streamline(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "pre_post") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) # move past any reshapes to be able to streamline input scaling @@ -543,10 +548,10 @@ def test_streamline(self, topology, wbits, abits, board): model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) - model.save(get_checkpoint_name(topology, wbits, abits, "streamline")) + model.save(get_checkpoint_name(board, topology, wbits, abits, "streamline")) def test_convert_to_hw_layers(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "streamline") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "streamline") model = load_test_checkpoint_or_skip(prev_chkpt_name) if topology == "tfc" and wbits == 1 and abits == 1: # use standalone thresholds for tfc-w1a1 to also exercise that option @@ -568,7 +573,7 @@ def test_convert_to_hw_layers(self, topology, wbits, abits, board): model = model.transform(absorb.AbsorbConsecutiveTransposes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataLayouts()) - model.save(get_checkpoint_name(topology, wbits, abits, "convert_to_hw_layers")) + model.save(get_checkpoint_name(board, topology, wbits, abits, "convert_to_hw_layers")) exp_layer_counts = { "tfc": [ ("Reshape", 1), @@ -607,11 +612,11 @@ def test_convert_to_hw_layers(self, topology, wbits, abits, board): def test_specialize_layers(self, topology, wbits, abits, board): build_data = get_build_env(board, target_clk_ns) - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "convert_to_hw_layers") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "convert_to_hw_layers") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(SpecializeLayers(build_data["part"])) model = model.transform(GiveUniqueNodeNames()) - model.save(get_checkpoint_name(topology, wbits, abits, "specialize_layers")) + model.save(get_checkpoint_name(board, topology, wbits, abits, "specialize_layers")) exp_layer_counts = { "tfc": [ ("Reshape", 1), @@ -649,45 +654,45 @@ def test_specialize_layers(self, topology, wbits, abits, board): assert len(model.get_nodes_by_op_type(op_type)) == exp_count def test_create_dataflow_partition(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "specialize_layers") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "specialize_layers") model = load_test_checkpoint_or_skip(prev_chkpt_name) parent_model = model.transform(CreateDataflowPartition()) - parent_model_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") + parent_model_chkpt = get_checkpoint_name(board, topology, wbits, abits, "dataflow_parent") parent_model.save(parent_model_chkpt) sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) - dataflow_model_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_model") + dataflow_model_chkpt = get_checkpoint_name(board, topology, wbits, abits, "dataflow_model") dataflow_model.save(dataflow_model_chkpt) def test_fold(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "dataflow_model") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "dataflow_model") model = load_test_checkpoint_or_skip(prev_chkpt_name) folding_fxn = get_folding_function(topology, wbits, abits) model = folding_fxn(model) - model.save(get_checkpoint_name(topology, wbits, abits, "fold")) + model.save(get_checkpoint_name(board, topology, wbits, abits, "fold")) def test_minimize_bit_width(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "fold") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(MinimizeWeightBitWidth()) model = model.transform(RoundAndClipThresholds()) - curr_chkpt_name = get_checkpoint_name(topology, wbits, abits, "minimize_bit_width") + curr_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "minimize_bit_width") model.save(curr_chkpt_name) @pytest.mark.slow @pytest.mark.vivado def test_cppsim(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "minimize_bit_width") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "minimize_bit_width") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) - cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim") + cppsim_chkpt = get_checkpoint_name(board, topology, wbits, abits, "cppsim") model.save(cppsim_chkpt) - parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") + parent_chkpt = get_checkpoint_name(board, topology, wbits, abits, "dataflow_parent") (input_tensor_npy, output_tensor_npy) = get_golden_io_pair( topology, wbits, abits, return_topk=1 ) @@ -700,17 +705,17 @@ def test_ipgen(self, topology, wbits, abits, board): build_data = get_build_env(board, target_clk_ns) if build_data["kind"] == "alveo" and ("VITIS_PATH" not in os.environ): pytest.skip("VITIS_PATH not set") - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "minimize_bit_width") + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "minimize_bit_width") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(build_data["part"], target_clk_ns)) model = model.transform(HLSSynthIP()) - model.save(get_checkpoint_name(topology, wbits, abits, "ipgen_" + board)) + model.save(get_checkpoint_name(board, topology, wbits, abits, "ipgen")) @pytest.mark.slow @pytest.mark.vivado def test_set_fifo_depths(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "ipgen_" + board) + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "ipgen") model = load_test_checkpoint_or_skip(prev_chkpt_name) test_fpga_part = get_build_env(board, target_clk_ns)["part"] if topology == "cnv" and abits == 2 and board == "Pynq-Z1": @@ -725,12 +730,12 @@ def test_set_fifo_depths(self, topology, wbits, abits, board): fifo_layers = model.get_nodes_by_op_type("StreamingFIFO_rtl") assert len(fifo_layers) > 0 - model.save(get_checkpoint_name(topology, wbits, abits, "fifodepth_" + board)) + model.save(get_checkpoint_name(board, topology, wbits, abits, "fifodepth")) @pytest.mark.slow @pytest.mark.vivado def test_ipstitch_rtlsim(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fifodepth_" + board) + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "fifodepth") model = load_test_checkpoint_or_skip(prev_chkpt_name) test_fpga_part = get_build_env(board, target_clk_ns)["part"] model = model.transform(InsertDWC()) @@ -750,9 +755,9 @@ def test_ipstitch_rtlsim(self, topology, wbits, abits, board): if rtlsim_trace: model.set_metadata_prop("rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits)) os.environ["RTLSIM_TRACE_DEPTH"] = "3" - rtlsim_chkpt = get_checkpoint_name(topology, wbits, abits, "ipstitch_rtlsim_" + board) + rtlsim_chkpt = get_checkpoint_name(board, topology, wbits, abits, "ipstitch_rtlsim") model.save(rtlsim_chkpt) - parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") + parent_chkpt = get_checkpoint_name(board, topology, wbits, abits, "dataflow_parent") (input_tensor_npy, output_tensor_npy) = get_golden_io_pair( topology, wbits, abits, return_topk=1 ) @@ -762,7 +767,7 @@ def test_ipstitch_rtlsim(self, topology, wbits, abits, board): @pytest.mark.slow @pytest.mark.vivado def test_throughput_rtlsim(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "ipstitch_rtlsim_" + board) + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "ipstitch_rtlsim") model = load_test_checkpoint_or_skip(prev_chkpt_name) n_nodes = len(model.graph.node) perf_est = model.analysis(dataflow_performance) @@ -780,11 +785,11 @@ def test_throughput_rtlsim(self, topology, wbits, abits, board): def test_validate_top1(self, topology, wbits, abits, board): if "TEST_END2END_VALIDATE_TOP1" not in os.environ: pytest.skip("TEST_END2END_VALIDATE_TOP1 not set") - prepostproc_chkpt = get_checkpoint_name(topology, wbits, abits, "pre_post") - streamline_chkpt = get_checkpoint_name(topology, wbits, abits, "streamline") - parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent") - cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim") - rtlsim_chkpt = get_checkpoint_name(topology, wbits, abits, "ipstitch_rtlsim_" + board) + prepostproc_chkpt = get_checkpoint_name(board, topology, wbits, abits, "pre_post") + streamline_chkpt = get_checkpoint_name(board, topology, wbits, abits, "streamline") + parent_chkpt = get_checkpoint_name(board, topology, wbits, abits, "dataflow_parent") + cppsim_chkpt = get_checkpoint_name(board, topology, wbits, abits, "cppsim") + rtlsim_chkpt = get_checkpoint_name(board, topology, wbits, abits, "ipstitch_rtlsim") dataset = topology2dataset(topology) assert measure_top1_accuracy(prepostproc_chkpt, dataset) > 80 assert measure_top1_accuracy(streamline_chkpt, dataset) > 80 @@ -798,11 +803,11 @@ def test_build(self, topology, wbits, abits, board): build_data = get_build_env(board, target_clk_ns) if build_data["kind"] == "alveo" and ("VITIS_PATH" not in os.environ): pytest.skip("VITIS_PATH not set") - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fifodepth_" + board) + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "fifodepth") model = load_test_checkpoint_or_skip(prev_chkpt_name) model = model.transform(build_data["build_fxn"]) model = model.transform(AnnotateResources("synth", build_data["part"])) - model.save(get_checkpoint_name(topology, wbits, abits, "build_" + board)) + model.save(get_checkpoint_name(board, topology, wbits, abits, "build")) @pytest.mark.slow @pytest.mark.vivado @@ -811,16 +816,16 @@ def test_make_pynq_driver(self, topology, wbits, abits, board): build_data = get_build_env(board, target_clk_ns) if build_data["kind"] == "alveo" and ("VITIS_PATH" not in os.environ): pytest.skip("VITIS_PATH not set") - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "build_" + board) + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "build") model = load_test_checkpoint_or_skip(prev_chkpt_name) board_to_driver_platform = "alveo" if build_data["kind"] == "alveo" else "zynq-iodma" model = model.transform(MakePYNQDriver(board_to_driver_platform)) - model.save(get_checkpoint_name(topology, wbits, abits, "driver_" + board)) + model.save(get_checkpoint_name(board, topology, wbits, abits, "driver")) def test_deploy(self, topology, wbits, abits, board): - prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "driver_" + board) + prev_chkpt_name = get_checkpoint_name(board, topology, wbits, abits, "driver") model = load_test_checkpoint_or_skip(prev_chkpt_name) model_title = "%s_w%d_a%d_%s" % ("bnn", wbits, abits, topology) deploy_based_on_board(model, model_title, topology, wbits, abits, board) # save the model to be able to link it to the parent - model.save(get_checkpoint_name(topology, wbits, abits, "deploy_" + board)) + model.save(get_checkpoint_name(board, topology, wbits, abits, "deploy")) diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py index 9ee07d57a3..61b9c38ac5 100644 --- a/tests/end2end/test_end2end_cybsec_mlp.py +++ b/tests/end2end/test_end2end_cybsec_mlp.py @@ -79,6 +79,7 @@ def forward(self, x): return out_final +@pytest.mark.xdist_group(name="end2end_cybsec") @pytest.mark.end2end def test_end2end_cybsec_mlp_export(): assets_dir = os.environ["FINN_ROOT"] + "/src/finn/qnn-data/cybsec-mlp" @@ -143,6 +144,7 @@ def test_end2end_cybsec_mlp_export(): assert model.get_tensor_datatype(first_matmul_w_name) == DataType["INT2"] +@pytest.mark.xdist_group(name="end2end_cybsec") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py index 4c52277970..e1daf6fc97 100644 --- a/tests/end2end/test_end2end_mobilenet_v1.py +++ b/tests/end2end/test_end2end_mobilenet_v1.py @@ -106,6 +106,7 @@ first_layer_res_type = "dsp" +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_export(): # export preprocessing @@ -163,6 +164,7 @@ def test_end2end_mobilenet_export(): assert os.path.isfile(build_dir + "/end2end_mobilenet_preproc.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_tidy_and_merge_with_preproc(): preproc_model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_preproc.onnx") @@ -185,6 +187,7 @@ def test_end2end_mobilenet_tidy_and_merge_with_preproc(): model.save(build_dir + "/end2end_mobilenet_tidy.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_streamline(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_tidy.onnx") @@ -214,6 +217,7 @@ def test_end2end_mobilenet_streamline(): assert len(model.get_nodes_by_op_type("Mul")) == 0 # no Mul ops remain +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_lowering(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_streamlined.onnx") @@ -227,6 +231,7 @@ def test_end2end_mobilenet_lowering(): model.save(build_dir + "/end2end_mobilenet_lowered.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_convert_to_hw_layers(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_lowered.onnx") @@ -243,6 +248,7 @@ def test_end2end_mobilenet_convert_to_hw_layers(): model.save(build_dir + "/end2end_mobilenet_hw_layers.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_specialize_layers(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_hw_layers.onnx") @@ -252,6 +258,7 @@ def test_end2end_mobilenet_specialize_layers(): model.save(build_dir + "/end2end_mobilenet_specialize_layers.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_create_dataflow_partition(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_specialize_layers.onnx") @@ -265,6 +272,7 @@ def test_end2end_mobilenet_create_dataflow_partition(): dataflow_model.save(build_dir + "/end2end_mobilenet_dataflow_model.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_folding(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_dataflow_model.onnx") @@ -348,6 +356,7 @@ def test_end2end_mobilenet_folding(): model.save(build_dir + "/end2end_mobilenet_folded.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.end2end def test_end2end_mobilenet_minimize_bit_width(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx") @@ -357,6 +366,7 @@ def test_end2end_mobilenet_minimize_bit_width(): model.save(build_dir + "/end2end_mobilenet_minimize_bitwidth.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end @@ -393,6 +403,7 @@ def test_end2end_mobilenet_cppsim(): # assert np.isclose(golden_prob, res_cppsim_prob[0, 0, 0, :5]).all() +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end @@ -403,6 +414,7 @@ def test_end2end_mobilenet_ipgen(): model.save(build_dir + "/end2end_mobilenet_hw_ipgen.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end @@ -439,6 +451,7 @@ def test_end2end_mobilenet_rtlsim(): # assert np.isclose(golden_prob, res_rtlsim_prob[0, 0, 0, :5]).all() +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end @@ -465,6 +478,7 @@ def test_end2end_mobilenet_set_fifo_depths(): model.save(build_dir + "/end2end_mobilenet_set_fifo_depths.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end @@ -481,6 +495,7 @@ def test_end2end_mobilenet_stitched_ip(): model.save(build_dir + "/end2end_mobilenet_stitched_ip.onnx") +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end @@ -517,6 +532,7 @@ def test_end2end_mobilenet_stitched_ip_rtlsim(): # assert np.isclose(golden_prob, res_rtlsim_ip_prob[0, 0, 0, :5]).all() +@pytest.mark.xdist_group(name="end2end_mobilenet") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end diff --git a/tests/end2end/test_ext_weights.py b/tests/end2end/test_ext_weights.py index 29d2f58e66..eeb7d95a49 100644 --- a/tests/end2end/test_ext_weights.py +++ b/tests/end2end/test_ext_weights.py @@ -66,6 +66,7 @@ def get_checkpoint_name(step): return build_dir + "/end2end_ext_weights_%s.onnx" % (step) +@pytest.mark.xdist_group(name="end2end_ext_weights") @pytest.mark.end2end def test_end2end_ext_weights_download(): if not os.path.isfile(onnx_zip_local): @@ -75,6 +76,7 @@ def test_end2end_ext_weights_download(): assert os.path.isfile(get_checkpoint_name("download")) +@pytest.mark.xdist_group(name="end2end_ext_weights") @pytest.mark.slow @pytest.mark.vivado @pytest.mark.end2end diff --git a/tests/fpgadataflow/test_split_large_fifos.py b/tests/fpgadataflow/test_split_large_fifos.py index d192755d06..c993b51884 100644 --- a/tests/fpgadataflow/test_split_large_fifos.py +++ b/tests/fpgadataflow/test_split_large_fifos.py @@ -63,7 +63,7 @@ def get_folding_cfg(depth=65536): @pytest.mark.slow @pytest.mark.vivado @pytest.mark.fpgadataflow -@pytest.mark.parametrize("depth", [16384, 65536, 45000]) +@pytest.mark.parametrize("depth", [16384, 65536, 45000, 1537]) @pytest.mark.parametrize("force_python_rtlsim", ["True", "False"]) def test_split_large_fifos(depth, force_python_rtlsim): tmp_output_dir = fetch_test_model("tfc") @@ -103,13 +103,14 @@ def test_split_large_fifos(depth, force_python_rtlsim): inst = getCustomOp(fifo_node) fifo_depth = inst.get_nodeattr("depth") assert fifo_depth == golden_cfg[i % len(golden_cfg)][0] + assert fifo_depth > 1 shutil.rmtree(tmp_output_dir) def test_split_large_fifo_configs(): ret0 = get_fifo_split_configs(513, 256, 32768) - assert ret0 == [(512, "vivado"), (1, "rtl")] + assert ret0 == [(512, "vivado"), (2, "rtl")] ret1 = get_fifo_split_configs(1200, 256, 32768) assert ret1 == [(1024, "vivado"), (176, "rtl")] ret2 = get_fifo_split_configs(45000, 256, 32768) diff --git a/tests/notebooks/test_jupyter_notebooks.py b/tests/notebooks/test_jupyter_notebooks.py index e1415b9066..060bb07238 100644 --- a/tests/notebooks/test_jupyter_notebooks.py +++ b/tests/notebooks/test_jupyter_notebooks.py @@ -1,6 +1,7 @@ import pytest import nbformat +import os from nbconvert.preprocessors import ExecutePreprocessor from finn.util.basic import get_finn_root @@ -12,28 +13,67 @@ notebook_bnn_dir = get_finn_root() + "/notebooks/end2end_example/bnn-pynq/" basics_notebooks = [ - pytest.param(notebook_basic_dir + "0_how_to_work_with_onnx.ipynb"), - pytest.param(notebook_basic_dir + "1_brevitas_network_import_via_QONNX.ipynb"), + pytest.param( + notebook_basic_dir + "0_how_to_work_with_onnx.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_general"), + ), + pytest.param( + notebook_basic_dir + "1_brevitas_network_import_via_QONNX.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_general"), + ), ] advanced_notebooks = [ - pytest.param(notebook_advanced_dir + "0_custom_analysis_pass.ipynb"), - pytest.param(notebook_advanced_dir + "1_custom_transformation_pass.ipynb"), - pytest.param(notebook_advanced_dir + "2_custom_op.ipynb"), - pytest.param(notebook_advanced_dir + "3_folding.ipynb"), - pytest.param(notebook_advanced_dir + "4_advanced_builder_settings.ipynb"), + pytest.param( + notebook_advanced_dir + "0_custom_analysis_pass.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_general"), + ), + pytest.param( + notebook_advanced_dir + "1_custom_transformation_pass.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_general"), + ), + pytest.param( + notebook_advanced_dir + "2_custom_op.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_general"), + ), + pytest.param( + notebook_advanced_dir + "3_folding.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_general"), + ), + pytest.param( + notebook_advanced_dir + "4_advanced_builder_settings.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_general"), + ), ] cyber_notebooks = [ - pytest.param(notebook_cyber_dir + "1-train-mlp-with-brevitas.ipynb"), - pytest.param(notebook_cyber_dir + "2-import-into-finn-and-verify.ipynb"), - pytest.param(notebook_cyber_dir + "3-build-accelerator-with-finn.ipynb"), + pytest.param( + notebook_cyber_dir + "1-train-mlp-with-brevitas.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_cybsec"), + ), + pytest.param( + notebook_cyber_dir + "2-import-into-finn-and-verify.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_cybsec"), + ), + pytest.param( + notebook_cyber_dir + "3-build-accelerator-with-finn.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_cybsec"), + ), ] bnn_notebooks = [ - pytest.param(notebook_bnn_dir + "cnv_end2end_example.ipynb"), - pytest.param(notebook_bnn_dir + "tfc_end2end_example.ipynb"), - pytest.param(notebook_bnn_dir + "tfc_end2end_verification.ipynb"), + pytest.param( + notebook_bnn_dir + "cnv_end2end_example.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_cnv"), + ), + pytest.param( + notebook_bnn_dir + "tfc_end2end_example.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_tfc"), + ), + pytest.param( + notebook_bnn_dir + "tfc_end2end_verification.ipynb", + marks=pytest.mark.xdist_group(name="notebooks_tfc"), + ), ] @@ -41,8 +81,16 @@ @pytest.mark.parametrize( "notebook", basics_notebooks + advanced_notebooks + cyber_notebooks + bnn_notebooks ) -def test_notebook_exec(notebook): +def test_notebook_exec(notebook, request): with open(notebook) as f: + # Set different NETRON_PORT for each xdist group to avoid conflicts + xdist_groups = ["notebooks_general", "notebooks_cybsec", "notebooks_cnv", "notebooks_tfc"] + for mark in request.node.own_markers: + if mark.name == "xdist_group": + group = mark.kwargs["name"] + os.environ["NETRON_PORT"] = str(8081 + xdist_groups.index(group)) + break + nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(timeout=notebook_timeout_seconds, kernel_name="python3") try: