From 934c6db6e3dadf96be4bd686a8bf9aa2f1a351b7 Mon Sep 17 00:00:00 2001 From: Jaro Habiger Date: Tue, 14 Nov 2023 19:51:18 +0100 Subject: [PATCH 1/3] applets.interface.better_la: add --- .../applet/interface/better_la/__init__.py | 137 ++++++++++++++++++ .../applet/interface/better_la/arbeiter.py | 86 +++++++++++ .../interface/better_la/signal_compressor.py | 70 +++++++++ .../applet/interface/better_la/test.py | 108 ++++++++++++++ software/pyproject.toml | 1 + 5 files changed, 402 insertions(+) create mode 100644 software/glasgow/applet/interface/better_la/__init__.py create mode 100644 software/glasgow/applet/interface/better_la/arbeiter.py create mode 100644 software/glasgow/applet/interface/better_la/signal_compressor.py create mode 100644 software/glasgow/applet/interface/better_la/test.py diff --git a/software/glasgow/applet/interface/better_la/__init__.py b/software/glasgow/applet/interface/better_la/__init__.py new file mode 100644 index 000000000..3ad01a827 --- /dev/null +++ b/software/glasgow/applet/interface/better_la/__init__.py @@ -0,0 +1,137 @@ +from collections import defaultdict +import logging +import argparse +from vcd import VCDWriter +from amaranth import * +from amaranth.lib.cdc import FFSynchronizer + +from ....gateware.pads import * +from ....gateware.analyzer import * +from ... import * +from .signal_compressor import SignalCompressor +from .arbeiter import LAArbeiter + +# This LA uses a simple protocol for sending compressed values over the FIFO: +# Each packet starts with a 8 bit size word. The size can be 0, then the word only consists of that +# word. If the size is n != 0, the packet is n*2 bytes long. Each 16bit word is encoded acording +# to the format described in the SignalCompressor value. The packets are round-robin for each pin. + +class BetterLASubtarget(Elaboratable): + def __init__(self, pads, in_fifo): + self.pads = pads + self.in_fifo = in_fifo + + self.la = LAArbeiter(in_fifo) + + def elaborate(self, platform): + m = Module() + m.submodules += self.la + + pins_i = Signal.like(self.pads.i_t.i) + m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i) + m.d.comb += self.la.input.eq(pins_i) + + return m + + +class BetterLAApplet(GlasgowApplet): + logger = logging.getLogger(__name__) + help = "capture logic waveforms" + description = """ + A somewhat better logic analyzer applet that allows for the capture of traces as VCD files. + """ + + @classmethod + def add_build_arguments(cls, parser, access): + super().add_build_arguments(parser, access) + + access.add_pin_set_argument(parser, "i", width=range(1, 17), default=1) + + def build(self, target, args): + self.mux_interface = iface = target.multiplexer.claim_interface(self, args) + iface.add_subtarget(BetterLASubtarget( + pads=iface.get_pads(args, pin_sets=("i",)), + in_fifo=iface.get_in_fifo(depth=512*16), + )) + + self._sample_freq = target.sys_clk_freq + self._pins = getattr(args, "pin_set_i") + + @classmethod + def add_run_arguments(cls, parser, access): + super().add_run_arguments(parser, access) + + g_pulls = parser.add_mutually_exclusive_group() + g_pulls.add_argument( + "--pull-ups", default=False, action="store_true", + help="enable pull-ups on all pins") + g_pulls.add_argument( + "--pull-downs", default=False, action="store_true", + help="enable pull-downs on all pins") + + async def run(self, device, args): + pull_low = set() + pull_high = set() + if args.pull_ups: + pull_high = set(args.pin_set_i) + if args.pull_downs: + pull_low = set(args.pin_set_i) + iface = await device.demultiplexer.claim_interface(self, self.mux_interface, args, + pull_low=pull_low, pull_high=pull_high) + return iface + + @classmethod + def add_interact_arguments(cls, parser): + parser.add_argument( + "file", metavar="VCD-FILE", type=argparse.FileType("w"), + help="write VCD waveforms to VCD-FILE") + + async def interact(self, device, args, iface): + pins = defaultdict(list) + overrun = False + + zero_chunks = 0 + chunks = 0 + try: # this try catches Ctrl+C for being able to manually interrupt capture + while not overrun: + for p in self._pins: + pkgs = await LAArbeiter.read_chunk(iface.read) + if len(pkgs) == 0: + zero_chunks += 1 + chunks += 1 + pins[p].extend(pkgs) + if len(pkgs) > 255 - len(self._pins): + overrun = True + print("overrun") + finally: + events = [] + cycles = 0 + for p, pkgs in pins.items(): + cycle = 0 + for pkg in pkgs: + for value, duration in SignalCompressor.decode_pkg(pkg): + timestamp = cycle * 1_000_000_000 // self._sample_freq + events.append((timestamp, p, value)) + cycle += duration + cycles = max(cycle, cycles) + events.sort(key=lambda e: e[0]) + + total_pkgs = sum(len(pkgs) for pkgs in pins.values()) + total_bytes = chunks + total_pkgs * 2 + + print(f"captured {cycles} cycles") + print(f"chunking overhead: {chunks / total_bytes * 100}%") + print(f"zero chunks overhead: {zero_chunks / total_bytes * 100}%") + print(f"compression gain: {100 - (total_bytes * 8 / cycle * 100)}%") + + + vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False) + vcd_signals = { + p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire", + size=1, init=0) + for p in pins.keys() + } + for timestamp, p, value in events: + signal = vcd_signals[p] + vcd_writer.change(signal, timestamp, value) + vcd_writer.close(timestamp) diff --git a/software/glasgow/applet/interface/better_la/arbeiter.py b/software/glasgow/applet/interface/better_la/arbeiter.py new file mode 100644 index 000000000..abe48e626 --- /dev/null +++ b/software/glasgow/applet/interface/better_la/arbeiter.py @@ -0,0 +1,86 @@ +from typing import Callable +from amaranth import * +from amaranth.lib.fifo import SyncFIFOBuffered + +from . import SignalCompressor + +class LAArbeiter(Elaboratable): + """This Logic Analyzer Arbeiter instanciates n Signal compressors and n Fifos and arbeites the + output of the fifos in a round robin fashion. Its output format is one length byte followed by + 2*length bytes of compressed channel data. After that the next channel is send with the same + format. + """ + def __init__(self, output_fifo: SyncFIFOBuffered, n_channels=16, pressure_threshold=64): + self.output_fifo = output_fifo + assert output_fifo.width == 8 + self.input = Signal(n_channels) + + self._pressure_threshold = pressure_threshold + + self.fifos = [SyncFIFOBuffered(width=16, depth=256) for _ in range(n_channels)] + self.compressors = [SignalCompressor(self.input[i]) for i in range(n_channels)] + + def elaborate(self, platform): + m = Module() + + to_transfer = Signal(8) + enough_pressure = Signal(len(self.fifos)) + any_enough_pressure = Signal() + m.d.comb += any_enough_pressure.eq(enough_pressure.any()) + + with m.FSM(): + for i in range(len(self.input)): + fifo = self.fifos[i] + compressor = self.compressors[i] + m.submodules[f"fifo_{i}"] = fifo + m.submodules[f"compressor_{i}"] = compressor + + m.d.comb += fifo.w_en.eq(compressor.valid) + m.d.comb += fifo.w_data.eq(compressor.value) + + m.d.sync += enough_pressure[i].eq(fifo.r_level > self._pressure_threshold) + + def go_to_next(i): + with m.If(any_enough_pressure): + m.next = f"announce_{(i + 1) % len(self.input)}" + with m.Else(): + m.next = f"wait_{(i + 1) % len(self.input)}" + + + with m.State(f"wait_{i}"): + with m.If(any_enough_pressure): + m.next = f"announce_{i}" + with m.State(f"announce_{i}"): + m.d.comb += self.output_fifo.w_data.eq(fifo.r_level) + m.d.comb += self.output_fifo.w_en.eq(1) + m.d.sync += to_transfer.eq(fifo.r_level) + with m.If(self.output_fifo.w_rdy): + with m.If(fifo.r_level > 0): + m.next = f"send_{i}_lower" + with m.Else(): + go_to_next(i) + + with m.State(f"send_{i}_lower"): + m.d.comb += self.output_fifo.w_data.eq(fifo.r_data[0:8]) + m.d.comb += self.output_fifo.w_en.eq(1) + with m.If(self.output_fifo.w_rdy): + m.next = f"send_{i}_upper" + + with m.State(f"send_{i}_upper"): + m.d.comb += self.output_fifo.w_data.eq(fifo.r_data[8:16]) + m.d.comb += self.output_fifo.w_en.eq(1) + with m.If(self.output_fifo.w_rdy): + m.d.comb += fifo.r_en.eq(1) + with m.If(to_transfer > 1): + m.next = f"send_{i}_lower" + m.d.sync += to_transfer.eq(to_transfer - 1) + with m.Else(): + go_to_next(i) + + return m + + @staticmethod + async def read_chunk(read: Callable[[int], bytes]): + length = (await read(1))[0] + contents = (await read(2 * length)) + return [contents[2*i+1] << 8 | contents[2*i] for i in range(length)] diff --git a/software/glasgow/applet/interface/better_la/signal_compressor.py b/software/glasgow/applet/interface/better_la/signal_compressor.py new file mode 100644 index 000000000..d49a839bd --- /dev/null +++ b/software/glasgow/applet/interface/better_la/signal_compressor.py @@ -0,0 +1,70 @@ +from itertools import chain +from typing import List, Tuple +from amaranth import * + +class SignalCompressor(Elaboratable): + """The SignalCompressor converts information about value changes into an efficient compressed + format. It outputs a 16bit stream that is encoded in one of three ways: + + 0b0: plain, no compression [15 bit value dump] + 0b10: constant 0 for the following n [14 bit] cycles + 0b11: constant 1 for the following n [14 bit] cycles + """ + def __init__(self, signal): + self.signal = signal + + self.valid = Signal() + self.value = Signal(16) + + def elaborate(self, platform): + m = Module() + + last = Signal() + m.d.sync += last.eq(self.signal) + change = Signal() + m.d.comb += change.eq(self.signal ^ last) + + + counter = Signal(14) + m.d.sync += counter.eq(counter + 1) + + buffer = Signal(15) + m.d.sync += buffer.eq((buffer << 1) | self.signal) + + plain_mode = Signal() + + with m.If(change): + with m.If(counter < 15): + m.d.sync += plain_mode.eq(1) + with m.Elif(~plain_mode): + m.d.comb += self.valid.eq(1) + m.d.comb += self.value.eq(Cat(1, last, counter)) + m.d.sync += counter.eq(0) + m.d.sync += plain_mode.eq(0) + + with m.If(counter == 2**len(counter) - 1): + m.d.comb += self.valid.eq(1) + m.d.comb += self.value.eq(Cat(1, last, counter)) + m.d.sync += counter.eq(0) + m.d.sync += plain_mode.eq(0) + + with m.If(plain_mode & (counter == 14)): + m.d.comb += self.valid.eq(1) + m.d.comb += self.value.eq(Cat(0, buffer)) + m.d.sync += counter.eq(0) + m.d.sync += plain_mode.eq(0) + + return m + + @staticmethod + def decode_pkg(pkg) -> List[Tuple[int, int]]: + if pkg & 0b01: + value = pkg >> 1 & 0b01 + duration = pkg >> 2 + return [(value, duration + 1)] + else: + return [(int(x), 1) for x in list('{0:015b}'.format(pkg >> 1))] + + @staticmethod + def expand_duration_list(duration_list: List[Tuple[int, int]]) -> List[int]: + return list(chain(*[[value] * duration for value, duration in duration_list])) diff --git a/software/glasgow/applet/interface/better_la/test.py b/software/glasgow/applet/interface/better_la/test.py new file mode 100644 index 000000000..254e58f98 --- /dev/null +++ b/software/glasgow/applet/interface/better_la/test.py @@ -0,0 +1,108 @@ +import unittest +import random +from amaranth import * + +from ....gateware import simulation_test +from ....applet import GlasgowAppletTestCase, applet_simulation_test, synthesis_test +from .signal_compressor import SignalCompressor +from .arbeiter import LAArbeiter +from . import BetterLAApplet + + +class SignalCompressorTestCase(unittest.TestCase): + def setUp(self): + self.tb = SignalCompressor(Signal(name="input")) + + @simulation_test + def test_rlu(self, tb): + for _ in range(100): + yield + yield self.tb.signal.eq(1) + for _ in range(100): + yield + yield self.tb.signal.eq(0) + yield + + assert (yield self.tb.valid) == 1 + duration_list = SignalCompressor.decode_pkg((yield self.tb.value)) + assert SignalCompressor.expand_duration_list(duration_list) == [1] * 100 + + @simulation_test + def test_fallback(self, tb): + tx_string = "1011001001010000111100010010011100011100101010001010111001111000" + tx = [int(x) for x in tx_string] + + rx = [] + for x in tx: + yield self.tb.signal.eq(x) + if (yield self.tb.valid): + rx.append((yield self.tb.value)) + yield + + + decoded = [] + for pkg in rx: + decoded.extend(SignalCompressor.expand_duration_list(SignalCompressor.decode_pkg(pkg))) + + print(f"saved {100 - (len(rx) * 16 / len(decoded) * 100)}%") + assert decoded[2:] == tx[:len(decoded)-2] + + @simulation_test + def test_decode(self, tb): + random.seed(0) + tx = [] + for _ in range(100): + val = random.randint(0, 1) + length = random.randint(1, 7) if random.randint(0, 1) else random.randint(1, 250) + tx.extend(val for _ in range(length)) + + rx = [] + for x in tx: + yield self.tb.signal.eq(x) + if (yield self.tb.valid): + rx.append((yield self.tb.value)) + yield + + + decoded = [] + for pkg in rx: + decoded.extend(SignalCompressor.expand_duration_list(SignalCompressor.decode_pkg(pkg))) + + print(f"saved {100 - (len(rx) * 16 / len(decoded) * 100)}%") + assert decoded[2:] == tx[:len(decoded)-2] + + +class BetterLAAppletTestCase(GlasgowAppletTestCase, applet=BetterLAApplet): + @synthesis_test + def test_build(self): + self.assertBuilds() + + def setup_demo_source(self): + self.build_simulated_applet() + mux_iface = self.applet.mux_interface + m = Module() + m.d.sync += mux_iface.pads.i_t.i.eq(mux_iface.pads.i_t.i + 1) + self.target.add_submodule(m) + + @applet_simulation_test("setup_demo_source", ["--pins-i", "0:15"]) + async def test_smoke(self): + applet = await self.run_simulated_applet() + channels = [[] for _ in range(16)] + for _ in range(15): + for n, channel in enumerate(channels): + chunk = await LAArbeiter.read_chunk(applet.read) + assert len(chunk) < 255 - 16 + for pkg in chunk: + duration_list = SignalCompressor.decode_pkg(pkg) + expanded = SignalCompressor.expand_duration_list(duration_list) + channel.extend(expanded) + for i, channel in enumerate(channels): + duration = 0 + last = 0 + for j, x in enumerate(channel[3:]): + if x == last: + duration += 1 + else: + assert duration == 2**i, f"channel {i} at position {j}" + duration = 1 + last = x diff --git a/software/pyproject.toml b/software/pyproject.toml index fcaab3a85..ae149ecb2 100644 --- a/software/pyproject.toml +++ b/software/pyproject.toml @@ -81,6 +81,7 @@ selftest = "glasgow.applet.internal.selftest:SelfTestApplet" benchmark = "glasgow.applet.internal.benchmark:BenchmarkApplet" analyzer = "glasgow.applet.interface.analyzer:AnalyzerApplet" +better-la = "glasgow.applet.interface.better_la:BetterLAApplet" uart = "glasgow.applet.interface.uart:UARTApplet" spi-controller = "glasgow.applet.interface.spi_controller:SPIControllerApplet" i2c-initiator = "glasgow.applet.interface.i2c_initiator:I2CInitiatorApplet" From 356cfec9ed478474e03db3fa1212c07f5f146e8a Mon Sep 17 00:00:00 2001 From: Jaro Habiger Date: Thu, 16 Nov 2023 00:52:15 +0100 Subject: [PATCH 2/3] applets.interface.better_la: better performance this implements dynamic priority based scheduling in the LAArbiter. Also it does improvements on the host-side python --- .../applet/interface/better_la/__init__.py | 169 ++++++++++++------ .../applet/interface/better_la/arbeiter.py | 86 --------- .../applet/interface/better_la/arbiter.py | 110 ++++++++++++ .../applet/interface/better_la/argmax.py | 52 ++++++ .../interface/better_la/signal_compressor.py | 2 +- .../interface/better_la/step_encoder.py | 22 +++ .../applet/interface/better_la/test.py | 60 ++++++- 7 files changed, 346 insertions(+), 155 deletions(-) delete mode 100644 software/glasgow/applet/interface/better_la/arbeiter.py create mode 100644 software/glasgow/applet/interface/better_la/arbiter.py create mode 100644 software/glasgow/applet/interface/better_la/argmax.py create mode 100644 software/glasgow/applet/interface/better_la/step_encoder.py diff --git a/software/glasgow/applet/interface/better_la/__init__.py b/software/glasgow/applet/interface/better_la/__init__.py index 3ad01a827..30a40fbb9 100644 --- a/software/glasgow/applet/interface/better_la/__init__.py +++ b/software/glasgow/applet/interface/better_la/__init__.py @@ -1,4 +1,5 @@ from collections import defaultdict +import io import logging import argparse from vcd import VCDWriter @@ -9,27 +10,49 @@ from ....gateware.analyzer import * from ... import * from .signal_compressor import SignalCompressor -from .arbeiter import LAArbeiter - -# This LA uses a simple protocol for sending compressed values over the FIFO: -# Each packet starts with a 8 bit size word. The size can be 0, then the word only consists of that -# word. If the size is n != 0, the packet is n*2 bytes long. Each 16bit word is encoded acording -# to the format described in the SignalCompressor value. The packets are round-robin for each pin. +from .arbiter import LAArbiter + +# This LA uses a simple protocol for sending compressed values over the FIFO which is explained +# in the arbiter.py (high level chunks) and signal_compressor.py (low level packets) files. +# The basic architecture is as follows: +# +------------------+ +--------+ +# Pin0 --->| SignalCompressor |------>| FIFO |-----+ +# +------------------+ +--------+ | +# | +# +------------------+ +--------+ | +# Pin1 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+ +# +------------------+ +--------+ | | | | | +# +---->| LAArbiter |----->| USB-FIFO | +# +------------------+ +--------+ | | | | | +# Pin2 --->| SignalCompressor |------>| FIFO |-----+ +-----------+ +----------+ +# +------------------+ +--------+ | +# | +# +------------------+ +--------+ | +# PinN --->| ... |------>| ... |-----+ +# +------------------+ +--------+ class BetterLASubtarget(Elaboratable): - def __init__(self, pads, in_fifo): + def __init__(self, pads, in_fifo, counter_target=False): self.pads = pads self.in_fifo = in_fifo + self.counter_target = counter_target - self.la = LAArbeiter(in_fifo) + self.la = LAArbiter(in_fifo) def elaborate(self, platform): m = Module() m.submodules += self.la - pins_i = Signal.like(self.pads.i_t.i) - m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i) - m.d.comb += self.la.input.eq(pins_i) + if self.counter_target: + print("building bitstream with simulated counter target") + counter = Signal(len(self.pads.i_t.i)+2) + m.d.sync += counter.eq(counter + 1) + m.d.comb += self.la.input.eq(counter[2:]) + else: + print("building bitstream connected to real target") + pins_i = Signal.like(self.pads.i_t.i) + m.submodules += FFSynchronizer(self.pads.i_t.i, pins_i) + m.d.comb += self.la.input.eq(pins_i) return m @@ -46,12 +69,17 @@ def add_build_arguments(cls, parser, access): super().add_build_arguments(parser, access) access.add_pin_set_argument(parser, "i", width=range(1, 17), default=1) + parser.add_argument( + "--counter-target", default=False, action="store_true", + help="simulate a target with a counter signal", + ) def build(self, target, args): self.mux_interface = iface = target.multiplexer.claim_interface(self, args) iface.add_subtarget(BetterLASubtarget( pads=iface.get_pads(args, pin_sets=("i",)), - in_fifo=iface.get_in_fifo(depth=512*16), + in_fifo=iface.get_in_fifo(depth=512*16, auto_flush=False), + counter_target=args.counter_target )) self._sample_freq = target.sys_clk_freq @@ -85,53 +113,76 @@ def add_interact_arguments(cls, parser): parser.add_argument( "file", metavar="VCD-FILE", type=argparse.FileType("w"), help="write VCD waveforms to VCD-FILE") + parser.add_argument("--buffer-size", type=int, default=10, + help="how much data to capture in MB") async def interact(self, device, args, iface): - pins = defaultdict(list) - overrun = False - - zero_chunks = 0 - chunks = 0 - try: # this try catches Ctrl+C for being able to manually interrupt capture - while not overrun: - for p in self._pins: - pkgs = await LAArbeiter.read_chunk(iface.read) - if len(pkgs) == 0: - zero_chunks += 1 - chunks += 1 - pins[p].extend(pkgs) - if len(pkgs) > 255 - len(self._pins): - overrun = True - print("overrun") + # Step 1: record a buffer + # we do this before to get the full USB performance and not have any lag-spikes in between + try: + print(f"starting capture of {args.buffer_size} MB") + buffer = await iface.read(1024*1024 * args.buffer_size) + except KeyboardInterrupt: + pass finally: - events = [] - cycles = 0 - for p, pkgs in pins.items(): - cycle = 0 - for pkg in pkgs: - for value, duration in SignalCompressor.decode_pkg(pkg): - timestamp = cycle * 1_000_000_000 // self._sample_freq - events.append((timestamp, p, value)) - cycle += duration - cycles = max(cycle, cycles) - events.sort(key=lambda e: e[0]) - - total_pkgs = sum(len(pkgs) for pkgs in pins.values()) - total_bytes = chunks + total_pkgs * 2 - - print(f"captured {cycles} cycles") - print(f"chunking overhead: {chunks / total_bytes * 100}%") - print(f"zero chunks overhead: {zero_chunks / total_bytes * 100}%") - print(f"compression gain: {100 - (total_bytes * 8 / cycle * 100)}%") - - - vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False) - vcd_signals = { - p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire", - size=1, init=0) - for p in pins.keys() - } - for timestamp, p, value in events: - signal = vcd_signals[p] - vcd_writer.change(signal, timestamp, value) - vcd_writer.close(timestamp) + print("captured buffer, converting...") + + + # Step 2: parse the packets from the captured buffer and sort them into channels + ptr = 0 + async def read(size, ) -> bytes: + nonlocal ptr + to_return = buffer[ptr:ptr+size] + ptr += size + if ptr >= len(buffer): + return None + return to_return + channels = defaultdict(list) + chunks = 0 + while True: + read_result = await LAArbiter.read_chunk(read) + if read_result is None: + break + channel, chunk = read_result + if len(chunk) == 255: + print(f"channel {channel} overrun") + break + channels[self._pins[channel]].extend(chunk) + chunks += 1 + + # Step 3: convert each channels packets into events, attach timestamps and sort them by + # timestamp + events = [] + cycles = None + for p, pkgs in channels.items(): + cycle = 0 + for pkg in pkgs: + for value, duration in SignalCompressor.decode_pkg(pkg): + events.append((cycle, p, value)) + cycle += duration + cycles = cycle if cycles is None else cycle if cycle < cycles else cycles + events.sort(key=lambda e: e[0]) + + # Step 3.5: report statistics + total_pkgs = sum(len(pkgs) for pkgs in channels.values()) + total_bytes = chunks + total_pkgs * 2 + print(f"captured {cycles} samples ({cycles / self._sample_freq * 1000}ms)") + print(f"chunking overhead: {chunks / total_bytes * 100}%") + print(f"compression gain: {100 - (total_bytes * 8 / (cycle * len(self._pins)) * 100)}%") + + + # Step 4: write out VCD file + vcd_writer = VCDWriter(args.file, timescale="1 ns", check_values=False) + vcd_signals = { + p: vcd_writer.register_var(scope="", name="pin[{}]".format(p), var_type="wire", + size=1, init=0) + for p in self._pins + } + for cycle, p, value in events: + if cycle > cycles: + # we dont write any timestamps for which we dont have data on all channels + break + signal = vcd_signals[p] + timestamp = cycle * 1_000_000_000 // self._sample_freq + vcd_writer.change(signal, timestamp, value) + vcd_writer.close(timestamp) diff --git a/software/glasgow/applet/interface/better_la/arbeiter.py b/software/glasgow/applet/interface/better_la/arbeiter.py deleted file mode 100644 index abe48e626..000000000 --- a/software/glasgow/applet/interface/better_la/arbeiter.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import Callable -from amaranth import * -from amaranth.lib.fifo import SyncFIFOBuffered - -from . import SignalCompressor - -class LAArbeiter(Elaboratable): - """This Logic Analyzer Arbeiter instanciates n Signal compressors and n Fifos and arbeites the - output of the fifos in a round robin fashion. Its output format is one length byte followed by - 2*length bytes of compressed channel data. After that the next channel is send with the same - format. - """ - def __init__(self, output_fifo: SyncFIFOBuffered, n_channels=16, pressure_threshold=64): - self.output_fifo = output_fifo - assert output_fifo.width == 8 - self.input = Signal(n_channels) - - self._pressure_threshold = pressure_threshold - - self.fifos = [SyncFIFOBuffered(width=16, depth=256) for _ in range(n_channels)] - self.compressors = [SignalCompressor(self.input[i]) for i in range(n_channels)] - - def elaborate(self, platform): - m = Module() - - to_transfer = Signal(8) - enough_pressure = Signal(len(self.fifos)) - any_enough_pressure = Signal() - m.d.comb += any_enough_pressure.eq(enough_pressure.any()) - - with m.FSM(): - for i in range(len(self.input)): - fifo = self.fifos[i] - compressor = self.compressors[i] - m.submodules[f"fifo_{i}"] = fifo - m.submodules[f"compressor_{i}"] = compressor - - m.d.comb += fifo.w_en.eq(compressor.valid) - m.d.comb += fifo.w_data.eq(compressor.value) - - m.d.sync += enough_pressure[i].eq(fifo.r_level > self._pressure_threshold) - - def go_to_next(i): - with m.If(any_enough_pressure): - m.next = f"announce_{(i + 1) % len(self.input)}" - with m.Else(): - m.next = f"wait_{(i + 1) % len(self.input)}" - - - with m.State(f"wait_{i}"): - with m.If(any_enough_pressure): - m.next = f"announce_{i}" - with m.State(f"announce_{i}"): - m.d.comb += self.output_fifo.w_data.eq(fifo.r_level) - m.d.comb += self.output_fifo.w_en.eq(1) - m.d.sync += to_transfer.eq(fifo.r_level) - with m.If(self.output_fifo.w_rdy): - with m.If(fifo.r_level > 0): - m.next = f"send_{i}_lower" - with m.Else(): - go_to_next(i) - - with m.State(f"send_{i}_lower"): - m.d.comb += self.output_fifo.w_data.eq(fifo.r_data[0:8]) - m.d.comb += self.output_fifo.w_en.eq(1) - with m.If(self.output_fifo.w_rdy): - m.next = f"send_{i}_upper" - - with m.State(f"send_{i}_upper"): - m.d.comb += self.output_fifo.w_data.eq(fifo.r_data[8:16]) - m.d.comb += self.output_fifo.w_en.eq(1) - with m.If(self.output_fifo.w_rdy): - m.d.comb += fifo.r_en.eq(1) - with m.If(to_transfer > 1): - m.next = f"send_{i}_lower" - m.d.sync += to_transfer.eq(to_transfer - 1) - with m.Else(): - go_to_next(i) - - return m - - @staticmethod - async def read_chunk(read: Callable[[int], bytes]): - length = (await read(1))[0] - contents = (await read(2 * length)) - return [contents[2*i+1] << 8 | contents[2*i] for i in range(length)] diff --git a/software/glasgow/applet/interface/better_la/arbiter.py b/software/glasgow/applet/interface/better_la/arbiter.py new file mode 100644 index 000000000..e96850f3b --- /dev/null +++ b/software/glasgow/applet/interface/better_la/arbiter.py @@ -0,0 +1,110 @@ +from typing import Callable, List +from amaranth import * +from amaranth.lib.fifo import SyncFIFOBuffered + +from .signal_compressor import SignalCompressor +from .step_encoder import StepEncoder +from .argmax import ArgMax + +class LAArbiter(Elaboratable): + """This Logic Analyzer arbiter instanciates n Signal compressors and n Fifos and arbeites the + output of the fifos based on priority. Its output format is one byte of + [4bit channel][4bit length encoded using the table below] followed by 2*length bytes of + compressed channel data. + """ + + LENGTH_ENCODING = [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 255] + + def __init__(self, output_fifo: SyncFIFOBuffered, n_channels=16): + self.output_fifo = output_fifo + assert output_fifo.width == 8 + self.input = Signal(n_channels) + + def elaborate(self, platform): + m = Module() + + fifos: List[SyncFIFOBuffered] = [] + encoded_fifo_levels = [] + for i, sig in enumerate(self.input): + fifo = SyncFIFOBuffered(width=16, depth=256) # this is exactly one ice40 bram + m.submodules[f"fifo_{i}"] = fifo + fifos.append(fifo) + + compressor = SignalCompressor(sig) + m.submodules[f"compressor_{i}"] = compressor + m.d.comb += fifo.w_en.eq(compressor.valid) + m.d.comb += fifo.w_data.eq(compressor.value) + + step_encoder = StepEncoder(fifo.r_level, self.LENGTH_ENCODING) + m.submodules[f"step_encoder_{i}"] = step_encoder + encoded_fifo_levels.append(step_encoder.output) + + fifo_r_data = Array(fifo.r_data for fifo in fifos) + fifo_r_en = Array(fifo.r_en for fifo in fifos) + fifo_r_rdy = Array(fifo.r_rdy for fifo in fifos) + length_decoding = Array(self.LENGTH_ENCODING) + + # the argmax introduces 2 cycles of latency with pipelining to meet timing + # to acomodate for that we get the real level of the selected fifo in a combinatorial path + # it does not matter if we select a suboptimal fifo but it is bad if we assume a wrong level + argmax = m.submodules.argmax = ArgMax(encoded_fifo_levels, sync_levels=[1, 3]) + max_fifo_idx = argmax.max_idx + encoded_fifo_levels_array = Array(encoded_fifo_levels) + max_fifo_level_encoded = Signal(4) + m.d.comb += max_fifo_level_encoded.eq(encoded_fifo_levels_array[max_fifo_idx]) + max_fifo_level = Signal(8) + m.d.comb += max_fifo_level.eq(length_decoding[max_fifo_level_encoded]) + max_fifo_r_rdy = Signal() + m.d.comb += max_fifo_r_rdy.eq(fifo_r_rdy[max_fifo_idx]) + + to_transfer = Signal(4) + current_channel = Signal(4) + with m.FSM(): + with m.State("wait"): + with m.If(max_fifo_r_rdy): + m.next = "announce" + + with m.State("announce"): + m.d.sync += to_transfer.eq(max_fifo_level) + m.d.sync += current_channel.eq(max_fifo_idx) + + m.d.comb += self.output_fifo.w_data.eq(Cat(max_fifo_idx, max_fifo_level_encoded)) + m.d.comb += self.output_fifo.w_en.eq(max_fifo_r_rdy) + with m.If(~max_fifo_r_rdy): + m.next = "wait" + with m.Elif(self.output_fifo.w_rdy): + m.next = "send_lower" + + with m.State("send_lower"): + m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][0:8]) + m.d.comb += self.output_fifo.w_en.eq(1) + with m.If(self.output_fifo.w_rdy): + m.next = "send_upper" + with m.State("send_upper"): + m.d.comb += self.output_fifo.w_data.eq(fifo_r_data[current_channel][8:16]) + m.d.comb += self.output_fifo.w_en.eq(1) + with m.If(self.output_fifo.w_rdy): + m.d.comb += fifo_r_en[current_channel].eq(1) + with m.If(to_transfer > 1): + m.next = "send_lower" + m.d.sync += to_transfer.eq(to_transfer - 1) + with m.Else(): + with m.If(max_fifo_r_rdy): + m.next = "announce" + with m.Else(): + m.next = "wait" + + return m + + @staticmethod + async def read_chunk(read: Callable[[int], bytes]): + header = (await read(1))[0] + if header is None: + return None + channel = header & 0b1111 + length_encoded = header >> 4 + length = LAArbiter.LENGTH_ENCODING[length_encoded] + contents = (await read(2 * length)) + if contents is None: + return None + return channel, [contents[2*i+1] << 8 | contents[2*i] for i in range(length)] diff --git a/software/glasgow/applet/interface/better_la/argmax.py b/software/glasgow/applet/interface/better_la/argmax.py new file mode 100644 index 000000000..95db8477d --- /dev/null +++ b/software/glasgow/applet/interface/better_la/argmax.py @@ -0,0 +1,52 @@ +from typing import List +from amaranth import * + +class ArgMax(Elaboratable): + """ + Find the maximum value and the index of the maximum value of a list of signals using a + comparison-tree. + """ + def __init__(self, signals: List[Signal], sync_levels=[]): + self.signals = signals + + self.sync_levels = sync_levels + + self.max_value = Signal.like(signals[0]) + self.max_idx = Signal(range(len(signals))) + + def elaborate(self, platform): + m = Module() + + def build_tree(signals, offset=0, level=0): + suffix = f"l{level}_{offset}to{offset+len(signals)}" + + domain = m.d.sync if level in self.sync_levels else m.d.comb + + if len(signals) == 1: + return signals[0], offset + elif len(signals) == 2: + a, b = signals + value = Signal.like(self.signals[0], name=f"max_val_{suffix}") + index = Signal.like(self.max_idx, name=f"max_idx_{suffix}") + domain += [ + value.eq(Mux(a > b, a, b)), + index.eq(Mux(a > b, offset, offset + 1)) + ] + return value, index + else: + half = len(signals) // 2 + a, a_idx = build_tree(signals[:half], offset=offset, level=level+1) + b, b_idx = build_tree(signals[half:], offset=offset + half, level=level+1) + value = Signal.like(self.signals[0], name=f"max_val_{suffix}") + index = Signal.like(self.max_idx, name=f"max_idx_{suffix}") + domain += [ + value.eq(Mux(a > b, a, b)), + index.eq(Mux(a > b, a_idx, b_idx)) + ] + return value, index + + val, idx = build_tree(self.signals) + m.d.comb += self.max_value.eq(val) + m.d.comb += self.max_idx.eq(idx) + + return m diff --git a/software/glasgow/applet/interface/better_la/signal_compressor.py b/software/glasgow/applet/interface/better_la/signal_compressor.py index d49a839bd..f405e8e0b 100644 --- a/software/glasgow/applet/interface/better_la/signal_compressor.py +++ b/software/glasgow/applet/interface/better_la/signal_compressor.py @@ -58,7 +58,7 @@ def elaborate(self, platform): @staticmethod def decode_pkg(pkg) -> List[Tuple[int, int]]: - if pkg & 0b01: + if pkg & 0b1: value = pkg >> 1 & 0b01 duration = pkg >> 2 return [(value, duration + 1)] diff --git a/software/glasgow/applet/interface/better_la/step_encoder.py b/software/glasgow/applet/interface/better_la/step_encoder.py new file mode 100644 index 000000000..0966b8ed3 --- /dev/null +++ b/software/glasgow/applet/interface/better_la/step_encoder.py @@ -0,0 +1,22 @@ +from typing import List +from amaranth import * + +class StepEncoder(Elaboratable): + def __init__(self, input: Signal, possible_values: List[int]): + self.input = input + self.possible_values = possible_values + + self.output = Signal(range(len(possible_values))) + + def elaborate(self, platform): + m = Module() + + for i, v in enumerate(self.possible_values): + with m.If(self.input >= v): + m.d.comb += self.output.eq(i) + + # we add this to have a sync domain and be able to use the simulation helpers + a = Signal() + m.d.sync += a.eq(~a) + + return m \ No newline at end of file diff --git a/software/glasgow/applet/interface/better_la/test.py b/software/glasgow/applet/interface/better_la/test.py index 254e58f98..ca10c60ea 100644 --- a/software/glasgow/applet/interface/better_la/test.py +++ b/software/glasgow/applet/interface/better_la/test.py @@ -5,7 +5,9 @@ from ....gateware import simulation_test from ....applet import GlasgowAppletTestCase, applet_simulation_test, synthesis_test from .signal_compressor import SignalCompressor -from .arbeiter import LAArbeiter +from .arbiter import LAArbiter +from .argmax import ArgMax +from .step_encoder import StepEncoder from . import BetterLAApplet @@ -72,7 +74,48 @@ def test_decode(self, tb): assert decoded[2:] == tx[:len(decoded)-2] +class ArgMaxTestCase(unittest.TestCase): + def setUp(self): + self.tb = ArgMax([Signal(8, name=f"input_{i}") for i in range(10)], sync_levels=[1, 3]) + + @simulation_test + def test(self, tb): + yield self.tb.signals[3].eq(10) + yield + yield + yield + assert (yield self.tb.max_idx) == 3 + assert (yield self.tb.max_value) == 10 + + yield self.tb.signals[7].eq(22) + yield + yield + yield + assert (yield self.tb.max_idx) == 7 + assert (yield self.tb.max_value) == 22 + + +class StepEncoderTestCase(unittest.TestCase): + def setUp(self): + self.tb = StepEncoder(Signal(8, name="input"), LAArbiter.LENGTH_ENCODING) + + @simulation_test + def test(self, tb): + testdata = [ + (0, 0), + (1, 0), + (10, 5), + (100, 12) + ] + + for input, output in testdata: + yield self.tb.input.eq(input) + yield + assert (yield self.tb.output) == output + + class BetterLAAppletTestCase(GlasgowAppletTestCase, applet=BetterLAApplet): + @unittest.skip("this applet does not meet timing on revA but there is no way to specify a different revision") @synthesis_test def test_build(self): self.assertBuilds() @@ -88,14 +131,13 @@ def setup_demo_source(self): async def test_smoke(self): applet = await self.run_simulated_applet() channels = [[] for _ in range(16)] - for _ in range(15): - for n, channel in enumerate(channels): - chunk = await LAArbeiter.read_chunk(applet.read) - assert len(chunk) < 255 - 16 - for pkg in chunk: - duration_list = SignalCompressor.decode_pkg(pkg) - expanded = SignalCompressor.expand_duration_list(duration_list) - channel.extend(expanded) + for _ in range(100): + channel, chunk = await LAArbiter.read_chunk(applet.read) + assert len(chunk) != 255 + for pkg in chunk: + duration_list = SignalCompressor.decode_pkg(pkg) + expanded = SignalCompressor.expand_duration_list(duration_list) + channels[channel].extend(expanded) for i, channel in enumerate(channels): duration = 0 last = 0 From 1ad5d3c084a94e0e56ded0c64a207d90fad48962 Mon Sep 17 00:00:00 2001 From: Jaro Habiger Date: Thu, 16 Nov 2023 12:21:47 +0100 Subject: [PATCH 3/3] applet.interface.better_la: add required_revision --- software/glasgow/applet/interface/better_la/__init__.py | 3 +++ software/glasgow/applet/interface/better_la/test.py | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/software/glasgow/applet/interface/better_la/__init__.py b/software/glasgow/applet/interface/better_la/__init__.py index 30a40fbb9..3507eb279 100644 --- a/software/glasgow/applet/interface/better_la/__init__.py +++ b/software/glasgow/applet/interface/better_la/__init__.py @@ -64,6 +64,9 @@ class BetterLAApplet(GlasgowApplet): A somewhat better logic analyzer applet that allows for the capture of traces as VCD files. """ + # The FPGA on revA/revB is too slow for the complicated logic in this Applet + required_revision = "C0" + @classmethod def add_build_arguments(cls, parser, access): super().add_build_arguments(parser, access) diff --git a/software/glasgow/applet/interface/better_la/test.py b/software/glasgow/applet/interface/better_la/test.py index ca10c60ea..9f8ec4add 100644 --- a/software/glasgow/applet/interface/better_la/test.py +++ b/software/glasgow/applet/interface/better_la/test.py @@ -115,7 +115,6 @@ def test(self, tb): class BetterLAAppletTestCase(GlasgowAppletTestCase, applet=BetterLAApplet): - @unittest.skip("this applet does not meet timing on revA but there is no way to specify a different revision") @synthesis_test def test_build(self): self.assertBuilds()