From 301e1979613af810fd81516dde1233fedd2cd56e Mon Sep 17 00:00:00 2001 From: msinkec Date: Tue, 26 May 2026 09:35:17 +0200 Subject: [PATCH] enforce canonical encoding for arkade payloads --- pkg/ark-lib/asset/asset_group.go | 13 +++++ .../asset/asset_group_canonical_test.go | 44 ++++++++++++++ pkg/ark-lib/asset/packet_fuzz_test.go | 28 +++++---- pkg/ark-lib/asset/utils.go | 8 ++- pkg/ark-lib/extension/canonical_test.go | 55 ++++++++++++++++++ pkg/ark-lib/extension/extension.go | 35 ++++++++++- pkg/ark-lib/extension/extension_fuzz_test.go | 46 +++++++++++++++ pkg/ark-lib/internal/varint/varint.go | 30 ++++++++++ pkg/ark-lib/internal/varint/varint_test.go | 58 +++++++++++++++++++ 9 files changed, 303 insertions(+), 14 deletions(-) create mode 100644 pkg/ark-lib/asset/asset_group_canonical_test.go create mode 100644 pkg/ark-lib/extension/canonical_test.go create mode 100644 pkg/ark-lib/extension/extension_fuzz_test.go create mode 100644 pkg/ark-lib/internal/varint/varint.go create mode 100644 pkg/ark-lib/internal/varint/varint_test.go diff --git a/pkg/ark-lib/asset/asset_group.go b/pkg/ark-lib/asset/asset_group.go index e977bfd92..2728fd228 100644 --- a/pkg/ark-lib/asset/asset_group.go +++ b/pkg/ark-lib/asset/asset_group.go @@ -236,6 +236,14 @@ func newAssetGroupFromReader(r *bytes.Reader) (*AssetGroup, error) { return nil, err } + // Canonical encoding: only the three defined presence bits may be set. + const definedPresenceMask = maskAssetId | maskControlAsset | maskMetadata + if presence&^definedPresenceMask != 0 { + return nil, fmt.Errorf( + "non-canonical asset group: undefined presence bits set in 0x%02x", presence, + ) + } + var assetId *AssetId var controlAsset *AssetRef var metadata []Metadata @@ -263,6 +271,11 @@ func newAssetGroupFromReader(r *bytes.Reader) (*AssetGroup, error) { if err != nil { return nil, err } + if len(metadata) == 0 { + return nil, fmt.Errorf( + "non-canonical asset group: metadata flag set but list is empty", + ) + } } // 3. Inputs diff --git a/pkg/ark-lib/asset/asset_group_canonical_test.go b/pkg/ark-lib/asset/asset_group_canonical_test.go new file mode 100644 index 000000000..a4775d9e6 --- /dev/null +++ b/pkg/ark-lib/asset/asset_group_canonical_test.go @@ -0,0 +1,44 @@ +package asset_test + +import ( + "encoding/hex" + "testing" + + "github.com/arkade-os/arkd/pkg/ark-lib/asset" + "github.com/stretchr/testify/require" +) + +// baselineGroupHex is a canonical issuance group: presence 0x00, no inputs, one +// output (type=local, vout=0, amount=1). Bytes: 00 | 00 | 01 | 01 0000 01. +const baselineGroupHex = "00000101000001" + +func mustHex(t *testing.T, s string) []byte { + t.Helper() + b, err := hex.DecodeString(s) + require.NoError(t, err) + return b +} + +func TestAssetGroupBaselineIsCanonical(t *testing.T) { + ag, err := asset.NewAssetGroupFromBytes(mustHex(t, baselineGroupHex)) + require.NoError(t, err) + got, err := ag.Serialize() + require.NoError(t, err) + require.Equal(t, baselineGroupHex, hex.EncodeToString(got)) +} + +func TestAssetGroupRejectsUndefinedPresenceBits(t *testing.T) { + // Presence 0x08: a bit outside the defined mask (0x07), otherwise an + // issuance with the same body as the baseline. + _, err := asset.NewAssetGroupFromBytes(mustHex(t, "08000101000001")) + require.Error(t, err) + require.Contains(t, err.Error(), "non-canonical") +} + +func TestAssetGroupRejectsMetadataFlagWithEmptyList(t *testing.T) { + // Presence 0x04 (metadata) but metadata count is 0x00 (empty list). + // Bytes: 04 | 00(md count) | 00(inputs) | 01 01 0000 01 (one output). + _, err := asset.NewAssetGroupFromBytes(mustHex(t, "0400000101000001")) + require.Error(t, err) + require.Contains(t, err.Error(), "non-canonical") +} diff --git a/pkg/ark-lib/asset/packet_fuzz_test.go b/pkg/ark-lib/asset/packet_fuzz_test.go index 1e84ff1e4..07a95ab62 100644 --- a/pkg/ark-lib/asset/packet_fuzz_test.go +++ b/pkg/ark-lib/asset/packet_fuzz_test.go @@ -38,20 +38,28 @@ func FuzzNewPacketFromBytes(f *testing.F) { return } - // If parsing succeeded, serialization should succeed. + // Canonical property: any input that parses successfully must be its own + // canonical serialization. A failure here means NewPacketFromBytes accepted + // a non-canonical encoding that some guard failed to reject. serialized, err := pkt.Serialize() require.NoError(t, err) + require.Equalf(t, data, serialized, + "non-canonical input accepted: data=%x reserialized=%x", data, serialized) + }) +} - // Re-parsing serialized bytes should also succeed. - pkt2, err := asset.NewPacketFromBytes(serialized) - require.NoError(t, err) - - reserialized, err := pkt2.Serialize() - require.NoError(t, err) +func TestPacketRejectsOverlongGroupCount(t *testing.T) { + // Canonical: group count 0x01 + one canonical issuance group. + canonical, err := hex.DecodeString("0100000101000001") + require.NoError(t, err) + _, err = asset.NewPacketFromBytes(canonical) + require.NoError(t, err) - // Canonical serialized bytes should be stable across parse/serialize cycles. - require.Equalf(t, serialized, reserialized, "non-stable roundtrip: pkt=%x pkt2=%x", serialized, reserialized) - }) + // Non-canonical: the leading count 0x01 re-encoded overlong as 0x81 0x00. + nonCanonical, err := hex.DecodeString("810000000101000001") + require.NoError(t, err) + _, err = asset.NewPacketFromBytes(nonCanonical) + require.Error(t, err) } func addHexSeed(f *testing.F, s string) { diff --git a/pkg/ark-lib/asset/utils.go b/pkg/ark-lib/asset/utils.go index e343f2dbf..7234303fc 100644 --- a/pkg/ark-lib/asset/utils.go +++ b/pkg/ark-lib/asset/utils.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "io" + "github.com/arkade-os/arkd/pkg/ark-lib/internal/varint" "github.com/btcsuite/btcd/chaincfg/chainhash" ) @@ -62,9 +63,10 @@ func deserializeUint16(r *bytes.Reader) (uint16, error) { return binary.LittleEndian.Uint16(buf[:]), nil } -// deserializeVarUint reads a variable-length unsigned integer (uint64) from the reader. +// deserializeVarUint reads a canonical LEB128 unsigned integer from the reader, +// rejecting non-minimal (non-canonical) encodings. See varint.ReadCanonical. func deserializeVarUint(r *bytes.Reader) (uint64, error) { - return binary.ReadUvarint(r) + return varint.ReadCanonical(r) } // deserializeSlice reads exactly size bytes from the reader into a new slice. @@ -81,7 +83,7 @@ func deserializeSlice(r *bytes.Reader, size int) ([]byte, error) { // deserializeVarSlice reads a varint length prefix followed by that many bytes from the reader. func deserializeVarSlice(r *bytes.Reader) ([]byte, error) { - l, err := binary.ReadUvarint(r) + l, err := deserializeVarUint(r) if err != nil { return nil, err } diff --git a/pkg/ark-lib/extension/canonical_test.go b/pkg/ark-lib/extension/canonical_test.go new file mode 100644 index 000000000..4f1f8d1ce --- /dev/null +++ b/pkg/ark-lib/extension/canonical_test.go @@ -0,0 +1,55 @@ +package extension_test + +import ( + "encoding/hex" + "testing" + + "github.com/arkade-os/arkd/pkg/ark-lib/extension" + "github.com/stretchr/testify/require" +) + +// A minimal valid extension script: +// +// 6a 0d OP_RETURN, push 13 bytes +// 41 52 4b magic "ARK" +// 00 packet type 0x00 (asset) +// 08 packet length prefix = 8 +// 01 00000101000001 asset packet: 1 group (canonical issuance) +const canonicalExtensionHex = "6a0d41524b00080100000101000001" + +// Same payload, but the packet length prefix 0x08 is encoded non-minimally as +// 0x88 0x00. The OP_RETURN push length grows from 0x0d (13) to 0x0e (14). +const overlongPrefixExtensionHex = "6a0e41524b0088000100000101000001" + +func mustHex(t *testing.T, s string) []byte { + t.Helper() + b, err := hex.DecodeString(s) + require.NoError(t, err) + return b +} + +func TestExtensionCanonicalIsAccepted(t *testing.T) { + ext, err := extension.NewExtensionFromBytes(mustHex(t, canonicalExtensionHex)) + require.NoError(t, err) + require.Len(t, ext, 1) +} + +func TestExtensionRejectsOverlongPacketLengthPrefix(t *testing.T) { + _, err := extension.NewExtensionFromBytes(mustHex(t, overlongPrefixExtensionHex)) + require.Error(t, err) +} + +func TestExtensionRejectsTrailingBytes(t *testing.T) { + // Canonical script followed by a stray OP_1 (0x51) after the payload push. + _, err := extension.NewExtensionFromBytes(mustHex(t, canonicalExtensionHex+"51")) + require.Error(t, err) + require.Contains(t, err.Error(), "non-canonical") +} + +func TestExtensionRejectsNonMinimalPush(t *testing.T) { + // Same 13-byte payload pushed via OP_PUSHDATA1 (0x4c 0x0d) instead of the + // minimal direct push (0x0d). + _, err := extension.NewExtensionFromBytes(mustHex(t, "6a4c0d41524b00080100000101000001")) + require.Error(t, err) + require.Contains(t, err.Error(), "non-canonical") +} diff --git a/pkg/ark-lib/extension/extension.go b/pkg/ark-lib/extension/extension.go index c33d25950..d390d1557 100644 --- a/pkg/ark-lib/extension/extension.go +++ b/pkg/ark-lib/extension/extension.go @@ -9,6 +9,7 @@ import ( "reflect" "github.com/arkade-os/arkd/pkg/ark-lib/asset" + "github.com/arkade-os/arkd/pkg/ark-lib/internal/varint" "github.com/btcsuite/btcd/txscript" "github.com/btcsuite/btcd/wire" ) @@ -113,6 +114,21 @@ func opReturnScript(data []byte) []byte { return append(script, data...) } +// minimalPushOpcode returns the canonical opcode used to push n data bytes, +// matching the encoding produced by opReturnScript. +func minimalPushOpcode(n int) byte { + switch { + case n <= 75: + return byte(n) + case n <= 255: + return byte(txscript.OP_PUSHDATA1) + case n <= 65535: + return byte(txscript.OP_PUSHDATA2) + default: + return byte(txscript.OP_PUSHDATA4) + } +} + // TxOut serializes the extension and returns it as an unspendable OP_RETURN transaction output. func (e Extension) TxOut() (*wire.TxOut, error) { script, err := e.Serialize() @@ -190,6 +206,23 @@ func NewExtensionFromBytes(data []byte) (Extension, error) { } payload := tokenizer.Data() + pushOpcode := tokenizer.Opcode() + + // Canonical framing (narrow checks): the payload must use the minimal + // data-push opcode and there must be no trailing bytes after it, so an + // accepted script re-serializes to exactly the same bytes. + if tokenizer.Next() { + return nil, fmt.Errorf("non-canonical extension: trailing data after payload") + } + if err := tokenizer.Err(); err != nil { + return nil, fmt.Errorf("non-canonical extension: invalid script: %w", err) + } + if pushOpcode != minimalPushOpcode(len(payload)) { + return nil, fmt.Errorf( + "non-canonical extension: non-minimal data push opcode 0x%02x", pushOpcode, + ) + } + pr := bytes.NewReader(payload) // read magic prefix @@ -248,7 +281,7 @@ func parsePacket(packetType uint8, packetData []byte) (Packet, error) { // deserializeVarSlice reads a varint length prefix followed by that many bytes from the reader. func deserializeVarSlice(r *bytes.Reader) ([]byte, error) { - l, err := binary.ReadUvarint(r) + l, err := varint.ReadCanonical(r) if err != nil { return nil, err } diff --git a/pkg/ark-lib/extension/extension_fuzz_test.go b/pkg/ark-lib/extension/extension_fuzz_test.go new file mode 100644 index 000000000..5e73a66cf --- /dev/null +++ b/pkg/ark-lib/extension/extension_fuzz_test.go @@ -0,0 +1,46 @@ +package extension_test + +import ( + "encoding/hex" + "testing" + + "github.com/arkade-os/arkd/pkg/ark-lib/extension" + "github.com/stretchr/testify/require" +) + +func FuzzNewExtensionFromBytes(f *testing.F) { + for _, s := range []string{ + "6a0d41524b00080100000101000001", // canonical + "6a0e41524b0088000100000101000001", // overlong packet-length prefix + "6a4c0d41524b00080100000101000001", // non-minimal push (PUSHDATA1) + "6a0d41524b0008010000010100000151", // trailing OP_1 after payload + } { + f.Add(hexSeed(s)) + } + f.Add([]byte{}) + f.Add([]byte{0x6a}) // OP_RETURN only + f.Add([]byte{0x6a, 0x03, 0x41, 0x52, 0x4b}) // OP_RETURN + magic only (no packets) + + f.Fuzz(func(t *testing.T, data []byte) { + ext, err := extension.NewExtensionFromBytes(data) + if err != nil { + return + } + + // Round-trip oracle: any accepted script must re-serialize to exactly the + // input bytes. A failure means NewExtensionFromBytes accepted a non-canonical + // encoding that the narrow framing/packet checks failed to reject. + reser, err := ext.Serialize() + require.NoError(t, err) + require.Equalf(t, data, reser, + "non-canonical extension accepted: data=%x reser=%x", data, reser) + }) +} + +func hexSeed(s string) []byte { + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} diff --git a/pkg/ark-lib/internal/varint/varint.go b/pkg/ark-lib/internal/varint/varint.go new file mode 100644 index 000000000..ff2099e2b --- /dev/null +++ b/pkg/ark-lib/internal/varint/varint.go @@ -0,0 +1,30 @@ +// Package varint provides canonical (minimal) LEB128 unsigned-integer decoding +// shared by the ark-lib wire formats. +package varint + +import ( + "bytes" + "encoding/binary" + "errors" +) + +// ErrNonMinimal is returned when a varint is encoded with more bytes than +// necessary (a droppable all-zero high 7-bit group), i.e. non-canonical. +var ErrNonMinimal = errors.New("non-canonical (non-minimal) varint encoding") + +// ReadCanonical reads a canonical LEB128 unsigned integer from r. Decoding and +// 64-bit overflow detection are delegated to binary.ReadUvarint; non-minimal +// encodings are rejected by requiring the bytes consumed to equal the length of +// the value's canonical (shortest) re-encoding. +func ReadCanonical(r *bytes.Reader) (uint64, error) { + before := r.Len() + v, err := binary.ReadUvarint(r) + if err != nil { + return 0, err + } + var buf [binary.MaxVarintLen64]byte + if before-r.Len() != binary.PutUvarint(buf[:], v) { + return 0, ErrNonMinimal + } + return v, nil +} diff --git a/pkg/ark-lib/internal/varint/varint_test.go b/pkg/ark-lib/internal/varint/varint_test.go new file mode 100644 index 000000000..9755df342 --- /dev/null +++ b/pkg/ark-lib/internal/varint/varint_test.go @@ -0,0 +1,58 @@ +package varint_test + +import ( + "bytes" + "math" + "testing" + + "github.com/arkade-os/arkd/pkg/ark-lib/internal/varint" + "github.com/stretchr/testify/require" +) + +func TestReadCanonical(t *testing.T) { + cases := []struct { + name string + in []byte + want uint64 + }{ + {"zero", []byte{0x00}, 0}, + {"one", []byte{0x01}, 1}, + {"max-single-group", []byte{0x7f}, 127}, + {"two-bytes-300", []byte{0xac, 0x02}, 300}, + {"max-uint64", []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, math.MaxUint64}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + r := bytes.NewReader(c.in) + got, err := varint.ReadCanonical(r) + require.NoError(t, err) + require.Equal(t, c.want, got) + require.Equal(t, 0, r.Len(), "must consume exactly the varint bytes") + }) + } +} + +func TestReadCanonicalRejectsNonMinimal(t *testing.T) { + cases := map[string][]byte{ + "overlong-zero": {0x80, 0x00}, + "overlong-one": {0x81, 0x00}, + "overlong-300": {0xac, 0x82, 0x00}, + "trailing-zero-group": {0x80, 0x80, 0x00}, + } + for name, in := range cases { + t.Run(name, func(t *testing.T) { + r := bytes.NewReader(in) + _, err := varint.ReadCanonical(r) + require.ErrorIs(t, err, varint.ErrNonMinimal) + }) + } +} + +func TestReadCanonicalRejectsOverflow(t *testing.T) { + // 10th byte > 1 overflows a 64-bit integer (delegated to binary.ReadUvarint). + in := []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x02} + r := bytes.NewReader(in) + _, err := varint.ReadCanonical(r) + require.Error(t, err) + require.Contains(t, err.Error(), "overflow") +}