diff --git a/software/glasgow/gateware/accumulator.py b/software/glasgow/gateware/accumulator.py new file mode 100644 index 000000000..18adf4608 --- /dev/null +++ b/software/glasgow/gateware/accumulator.py @@ -0,0 +1,64 @@ +import operator + +from amaranth import * +from amaranth.lib import wiring +from amaranth.lib.wiring import In, Out + + +class Accumulator(wiring.Component): + """Pipelined arithmetic accumulator. + + Computes :py:`new_sum = old_sum + addend` using at most :py:`stage_width` wide adders, with + a latency of :py:`(width + stage_width - 1) // stage_width + 1` cycles and throughput of one + addition per cycle. + + Members + ------- + addend : In(width) + Addend. + sum : Out(width) + Accumulated sum. + """ + def __init__(self, width, *, stage_width=16): + self._width = operator.index(width) + self._stage_width = operator.index(stage_width) + assert self._width >= 1 and self._stage_width >= 1 + self._stages = 1 + (self._width + self._stage_width - 1) // self._stage_width + super().__init__({ + "addend": In(self._width), + "sum": Out(self._width) + }) + + @property + def stages(self): + return self._stages + + def elaborate(self, platform): + m = Module() + + carry = Const(0) + addend = Signal.like(self.addend) + result = Cat() + + m.d.sync += addend.eq(self.addend) + + for index, start_at in enumerate(range(0, self._width, self._stage_width)): + stage_width = min(self._width - start_at, self._stage_width) + + carry_next = Signal(name=f"carry{index}") + addend_next = Signal.like(addend[stage_width:], name=f"addend{index}") + result_next = Signal.like(result, name=f"result{index}") + stage = Signal(stage_width, name=f"stage{index}") + + m.d.sync += Cat(stage, carry_next).eq(stage + addend[:stage_width] + carry) + m.d.sync += addend_next.eq(addend[stage_width:]) + m.d.sync += result_next.eq(result) + + carry = carry_next + addend = addend_next + result = Cat(result_next, stage) + + m.d.comb += self.sum.eq(result) + + return m + diff --git a/software/glasgow/gateware/registers.py b/software/glasgow/gateware/registers.py index 37c4061a1..f8bb3b6db 100644 --- a/software/glasgow/gateware/registers.py +++ b/software/glasgow/gateware/registers.py @@ -34,6 +34,20 @@ def add_rw(self, *args, **kwargs): self.regs_w.append(reg) return reg, addr + def add_existing_ro(self, reg): + addr = self.reg_count + self.reg_count += 1 + self.regs_r.append(reg) + self.regs_w.append(Signal(name="ro_reg_dummy")) + return addr + + def add_existing_rw(self, reg): + addr = self.reg_count + self.reg_count += 1 + self.regs_r.append(reg) + self.regs_w.append(reg) + return addr + def elaborate(self, platform): m = Module() return m @@ -56,7 +70,7 @@ def elaborate(self, platform): if self.reg_count != 0: latch_addr = Signal() reg_addr = Signal(range(self.reg_count)) - reg_data = Signal(max(len(s) for s in self.regs_r)) + reg_data = Signal(max(len(Value.cast(s)) for s in self.regs_r)) m.d.comb += self.i2c_target.data_o.eq(reg_data) diff --git a/software/glasgow/support/asignal.py b/software/glasgow/support/asignal.py index 6ef379cb9..35aa45b77 100644 --- a/software/glasgow/support/asignal.py +++ b/software/glasgow/support/asignal.py @@ -14,7 +14,7 @@ def callback(future): signal.signal(signum, old_handler) future.add_done_callback(callback) def handler(signum, frame): - loop.call_soon_threadsafe(lambda: future.set_result(None)) + loop.call_soon_threadsafe(lambda: future.done() or future.set_result(None)) signal.signal(signum, old_handler) old_handler = signal.signal(signum, handler) return future diff --git a/software/glasgow/support/os_network.py b/software/glasgow/support/os_network.py index 0ed3c4841..7b98e0ba6 100644 --- a/software/glasgow/support/os_network.py +++ b/software/glasgow/support/os_network.py @@ -25,29 +25,49 @@ def __init__(self, name: 'str | bytes'): struct.pack("16sH22s", name, LINUX_IFF_TAP | LINUX_IFF_NO_PI, b"")) def fileno(self): + """Raw file descriptor. + + The file descriptor may be useful for operations such as :meth:`fcntl.ioctl` or fine-grained + buffering that is not achievable with :meth:`send` and :meth:`recv`. + """ return self._fd - def send(self, packet: 'bytes | bytearray | memoryview') -> asyncio.Future: - loop = asyncio.get_event_loop() - future = asyncio.Future() - def callback(): - loop.remove_writer(self._fd) - try: + async def send(self, packets: 'list[bytes | bytearray | memoryview]'): + """"Send packets. + + To improve throughput, :meth:`send` can queue multiple packets. + + Calling :meth:`send` twice concurrently on the same interface has undefined behavior. + """ + try: + for packet in packets: os.write(self._fd, packet) - future.set_result(None) - except Exception as exc: - future.set_exception(exc) - loop.add_writer(self._fd, callback) - return future + except BlockingIOError: # write until the buffer is full + pass + + async def recv(self, *, length=65536) -> 'list[bytes | bytearray | memoryview]': + """"Receive packets. + + To improve throughput, :meth:`recv` dequeues all available packets. Packets longer than + :py:`length` are truncated to that length, without indication of it. - def recv(self, *, length=65536) -> asyncio.Future: + Calling :meth:`recv` twice concurrently on the same interface has undefined behavior. + """ loop = asyncio.get_event_loop() future = asyncio.Future() def callback(): loop.remove_reader(self._fd) try: - future.set_result(os.read(self._fd, length)) + packets = [] + while True: + packets.append(os.read(self._fd, length)) + except BlockingIOError: # read all of the ones available + future.set_result(packets) except Exception as exc: future.set_exception(exc) + else: + future.set_result(packets) + # I have benchmarked this and trying to do a speculative `os.read` instead of requiring + # the loop to poll the fd at least once doesn't result in any performance improvement. loop.add_reader(self._fd, callback) - return future + return await future diff --git a/software/tests/gateware/test_accumulator.py b/software/tests/gateware/test_accumulator.py new file mode 100644 index 000000000..1a54c5071 --- /dev/null +++ b/software/tests/gateware/test_accumulator.py @@ -0,0 +1,25 @@ +import unittest +from amaranth import * +from amaranth.sim import Tick + +from glasgow.gateware import simulation_test +from glasgow.gateware.accumulator import Accumulator + + +class AccumulatorTestCase(unittest.TestCase): + def setUp(self): + self.tb = Accumulator(5, stage_width=2) + + @simulation_test() + def test_counter(self, tb): + total = 0 + queue = [0] * (self.tb.stages + 1) + for i in range(100): + addend = i * 2137 % 32 + total += addend + total %= 32 + queue.append(total) + self.assertEqual(queue[0], (yield self.tb.sum)) + del queue[0] + yield self.tb.addend.eq(addend) + yield Tick()