From e6e7596fea5354550d6457580b1e0194d079c266 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Tue, 2 Jun 2026 16:05:19 -0300 Subject: [PATCH 1/2] fix: raw key write --- src/core/include/hb.hrl | 4 ++ src/core/store/hb_store_arweave.erl | 71 +++++++++++++++++++++++----- src/core/store/hb_store_fs.erl | 14 ++++++ src/core/store/hb_store_lmdb.erl | 20 ++++++++ src/core/store/hb_store_rocksdb.erl | 11 +++++ src/core/store/hb_store_volatile.erl | 14 ++++++ 6 files changed, 121 insertions(+), 13 deletions(-) diff --git a/src/core/include/hb.hrl b/src/core/include/hb.hrl index fdf9ca2fe..f3ef42cfb 100644 --- a/src/core/include/hb.hrl +++ b/src/core/include/hb.hrl @@ -7,6 +7,10 @@ %% @doc Macro usable in guards that validates whether a term is a %% human-readable ID encoding. -define(IS_ID(X), (is_binary(X) andalso (byte_size(X) == 42 orelse byte_size(X) == 43 orelse byte_size(X) == 32))). +%% @doc Macro usable in guards that validates whether a term is a 43-byte +%% base64url-encoded Arweave ID (the string form), excluding the 32-byte native +%% and 42-byte encodings. Use where only the string-encoded ID is acceptable. +-define(IS_STRING_ID(X), (is_binary(X) andalso byte_size(X) == 43)). %% @doc Macro for checking a term is a link. -define(IS_LINK(X), (is_tuple(X) andalso element(1, X) == link)). %% @doc List of special keys that are used in the AO-Core protocol. diff --git a/src/core/store/hb_store_arweave.erl b/src/core/store/hb_store_arweave.erl index 3d3cfda3c..9e1685343 100644 --- a/src/core/store/hb_store_arweave.erl +++ b/src/core/store/hb_store_arweave.erl @@ -42,7 +42,7 @@ scope(#{ <<"scope">> := Scope }) -> Scope; scope(_) -> scope(). %% @doc Resolve a key path in the Arweave store, ignoring other paths. -resolve(_Store, #{ <<"resolve">> := ID }, _NodeOpts) when ?IS_ID(ID) -> +resolve(_Store, #{ <<"resolve">> := ID }, _NodeOpts) when ?IS_STRING_ID(ID) -> {ok, ID}; resolve(_Store, #{ <<"resolve">> := _ID }, _NodeOpts) -> {error, not_found}. @@ -60,8 +60,8 @@ group(_, _, _) -> {error, not_found}. %% result, so that we don't have to read the data from the GraphQL route %% multiple times. type(#{ <<"index-store">> := IndexStore }, #{ <<"type">> := ID }, NodeOpts) - when ?IS_ID(ID) -> - case hb_store:read(IndexStore, hb_store_arweave_offset:path(ID), NodeOpts) of + when ?IS_STRING_ID(ID) -> + case hb_store:read(IndexStore, raw_read_req(ID), NodeOpts) of {ok, _Offset} -> {ok, simple}; _ -> @@ -75,7 +75,7 @@ read_offset(StoreOpts = #{ <<"index-store">> := IndexStore }, ID, _Opts) -> ReadRes = hb_prometheus:measure_and_report( fun() -> - hb_store:read(IndexStore, hb_store_arweave_offset:path(ID), StoreOpts) + hb_store:read(IndexStore, raw_read_req(ID), StoreOpts) end, hb_store_arweave_index_check_duration_seconds ), @@ -96,7 +96,7 @@ read_offset(_, _, _) -> not_found. %% @doc Read the data at the given key, reading the `local-store' first if %% available. -read(StoreOpts, #{ <<"read">> := ID }, _NodeOpts) when ?IS_ID(ID) -> +read(StoreOpts, #{ <<"read">> := ID }, _NodeOpts) when ?IS_STRING_ID(ID) -> case hb_store_remote_node:read_local_cache(StoreOpts, ID, StoreOpts) of {ok, Message} -> ?event( @@ -264,6 +264,22 @@ read_chunks(StartOffset, Length, Opts) -> Opts ). +%% @doc Raw (non-path-normalized) read/write requests for the opaque offset key. +%% The index is keyed by the raw `native_id', which may contain `/' (0x2F) +%% bytes; the `raw' flag tells the store to use the key verbatim instead of +%% splitting it on `/' via hb_path:to_binary. Stores with no verbatim key +%% representation (e.g. `hb_store_fs', where `/' is the path separator) degrade +%% the flag to a normalized path -- consistently on both read and write -- so +%% the request can always carry `raw' without per-store gating. +raw_read_req(ID) -> + #{ <<"read">> => hb_store_arweave_offset:path(ID), <<"raw">> => true }. + +write_offset_req(ID, Value) -> + #{ + <<"write">> => {hb_store_arweave_offset:path(ID), Value}, + <<"raw">> => true + }. + %% @doc Write offset information to the index store. write_offset( StoreOpts = #{ <<"index-store">> := IndexStore }, @@ -283,11 +299,7 @@ write_offset( {value, {explicit, Value}} } ), - hb_store:write( - IndexStore, - #{ hb_store_arweave_offset:path(ID) => Value }, - StoreOpts - ). + hb_store:write(IndexStore, write_offset_req(ID, Value), StoreOpts). %% @doc Record the partition that data is found in when it is requested. record_partition_metric(Offset, Result, StoreOpts) when is_integer(Offset) -> @@ -340,7 +352,7 @@ init_prometheus() -> %%% Tests write_read_tx_test() -> - Store = [hb_test_utils:test_store()], + Store = [hb_test_utils:test_store(hb_store_lmdb)], Opts = #{ <<"index-store">> => Store }, @@ -383,7 +395,7 @@ write_read_tx_test() -> %% @doc Stale ANS-104 offset: fake ID pointing to a known bundle TX's %% data range. The deserialized item's ID won't match the fake ID. stale_ans104_offset_returns_error_test() -> - Store = [hb_test_utils:test_store()], + Store = [hb_test_utils:test_store(hb_store_lmdb)], Opts = #{<<"index-store">> => Store}, FakeID = <<"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA">>, RealEndOffset = 363524457284025, @@ -395,7 +407,7 @@ stale_ans104_offset_returns_error_test() -> %% @doc The L1 TX has bundle tags, but data is not a valid bundle. write_read_fake_bundle_tx_test() -> - Store = [hb_test_utils:test_store()], + Store = [hb_test_utils:test_store(hb_store_lmdb)], Opts = #{ <<"index-store">> => Store }, @@ -406,3 +418,36 @@ write_read_fake_bundle_tx_test() -> {ok, TX} = read(Opts, #{ <<"read">> => ID }, Opts), ?assert(hb_message:verify(TX, all, #{})), ok. + +%% @doc Regression: a `native_id' beginning with `/' (0x2F) must round-trip +%% verbatim through the raw index read. The id is seeded under the raw (verbatim) +%% key, as the prebuilt index shards are, and the raw read path must resolve it. +%% A read that normalizes the key via hb_path:to_binary drops the leading `/' and +%% misses -- the failure mode behind the index `404's for ~11% of ids. +slash_edge_id_offset_roundtrip_test() -> + Store = [hb_test_utils:test_store(hb_store_lmdb)], + Opts = #{ <<"index-store">> => Store }, + % Real mainnet tx whose native_id starts with 0x2F. + ID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>, + StartOffset = 363524457284025 - 8387, + V = hb_store_arweave_offset:encode(<<"tx@1.0">>, StartOffset, 8387), + Path = hb_store_arweave_offset:path(ID), + % Seed under the RAW (verbatim) key, as the prebuilt index shards are. + ok = hb_store:write(Store, #{ <<"write">> => {Path, V}, <<"raw">> => true }, Opts), + ?assertMatch( + {ok, #{ <<"start-offset">> := StartOffset }}, + read_offset(Opts, ID, Opts) + ). + +fs_index_store_offset_roundtrip_test() -> + Store = hb_test_utils:test_store(hb_store_fs, <<"arweave-fs-index">>), + Opts = #{ <<"index-store">> => Store }, + ID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + StartOffset = 363524457284025 - 8387, + ok = hb_store:start(Store), + ok = write_offset(Opts, ID, <<"tx@1.0">>, StartOffset, 8387), + ?assertMatch( + {ok, #{ <<"start-offset">> := StartOffset }}, + read_offset(Opts, ID, Opts) + ), + ok = hb_store:stop(Store). diff --git a/src/core/store/hb_store_fs.erl b/src/core/store/hb_store_fs.erl index 03ec51989..35a98984d 100644 --- a/src/core/store/hb_store_fs.erl +++ b/src/core/store/hb_store_fs.erl @@ -45,6 +45,13 @@ reset(#{ <<"name">> := DataDir }, _Req, _Opts) -> ?event({reset_store, {path, DataDir}}). %% @doc Read a key from the store, following symlinks as needed. +%% A `raw' request targets an opaque binary key verbatim. The filesystem store +%% has no verbatim representation -- `/' (0x2F) is the path separator -- so the +%% raw protocol degrades to the normalized path. Read and write both normalize, +%% so they stay symmetric. +read(Opts, #{ <<"read">> := Key, <<"raw">> := true }, NodeOpts) + when is_binary(Key) -> + read(Opts, #{ <<"read">> => Key }, NodeOpts); read(Opts, #{ <<"read">> := Key }, NodeOpts) -> case resolve(Opts, #{ <<"resolve">> => Key }, NodeOpts) of {ok, ResolvedPath} -> @@ -75,6 +82,13 @@ read_path(Path) -> end. %% @doc Write a value to the specified path in the store. +%% The `raw' write envelope is handled explicitly so it is not folded in as +%% literal request keys; the filesystem store has no verbatim key representation +%% (`/' is the path separator), so it degrades to the normalized path -- mirror +%% of the `raw' read clause above. +write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts) + when is_binary(Path) -> + write_path(Opts, Path, Value); write(Opts, Req, _NodeOpts) when is_map(Req) -> maps:fold( fun(PathComponents, Value, ok) -> diff --git a/src/core/store/hb_store_lmdb.erl b/src/core/store/hb_store_lmdb.erl index a5b649ffc..285068dd1 100644 --- a/src/core/store/hb_store_lmdb.erl +++ b/src/core/store/hb_store_lmdb.erl @@ -133,6 +133,10 @@ type(Opts, #{ <<"type">> := Key }, _NodeOpts) -> %% @returns `ok` immediately on success, or an error tuple on failure write(#{ <<"read-only">> := true }, _Req, _NodeOpts) when is_map(_Req) -> {error, not_found}; +write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts) + when is_binary(Path) -> + % Opaque binary key (raw Arweave ID) written verbatim so a raw read matches. + write(Opts, Path, Value); write(Opts, Req, _NodeOpts) when is_map(Req) -> maps:fold( fun(Path, Value, ok) -> @@ -181,6 +185,13 @@ write(Opts, Path, Value) -> %% @param PathReq Request of the form `#{<<"read">> => Path}`. %% @returns `{ok, Value}` on success, `{composite, Keys}` for groups, or %% `{error, not_found}` on failure +read(Opts, #{ <<"read">> := Path, <<"raw">> := true }, _NodeOpts) -> + % Opaque binary keys (raw Arweave IDs) are read verbatim: hb_path:to_binary + % would drop their `/' (0x2F) bytes and miss the raw-keyed index shards. + case read_direct(Opts, Path) of + {ok, Value} -> {ok, Value}; + _ -> {error, not_found} + end; read(Opts, #{ <<"read">> := Path }, _NodeOpts) -> case read_resolved(Opts, hb_path:to_binary(Path)) of {ok, ResolvedPath, <<"group">>} -> @@ -867,6 +878,15 @@ cache_style_test() -> ?assertEqual({ok, <<"test-value">>}, Result), hb_store:stop(StoreOpts). +single_key_write_normalizes_binary_path_test() -> + hb:init(), + StoreOpts = hb_test_utils:test_store(?MODULE), + test_reset(StoreOpts), + hb_store:start(StoreOpts), + ok = hb_store:write(StoreOpts, #{ <<"/a//b/">> => <<"value">> }, #{}), + ?assertEqual({ok, <<"value">>}, hb_store:read(StoreOpts, <<"a/b">>, #{})), + hb_store:stop(StoreOpts). + %% @doc Test nested map storage with cache-like linking behavior %% %% This test demonstrates how to store a nested map structure where: diff --git a/src/core/store/hb_store_rocksdb.erl b/src/core/store/hb_store_rocksdb.erl index 43103d39b..391706c1d 100644 --- a/src/core/store/hb_store_rocksdb.erl +++ b/src/core/store/hb_store_rocksdb.erl @@ -85,6 +85,12 @@ scope(_) -> local. Req :: map(), NodeOpts :: map(), Result :: {ok, value()} | {composite, [binary()]} | {error, any()}. +read(Opts, #{ <<"read">> := Path, <<"raw">> := true }, _NodeOpts) + when is_binary(Path) -> + case do_read(Opts, Path) of + {ok, {raw, Result}} -> {ok, Result}; + _ -> {error, not_found} + end; read(Opts, #{ <<"read">> := RawPath }, _NodeOpts) -> Path = resolve_path(Opts, RawPath), case do_read(Opts, Path) of @@ -107,6 +113,11 @@ read(Opts, #{ <<"read">> := RawPath }, _NodeOpts) -> Key :: key(), Value :: value(), Result :: ok | {error, any()}. +write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts) + when is_binary(Path) -> + EncodedValue = encode_value(raw, Value), + ?event({writing, Path, byte_size(EncodedValue)}), + do_write(Opts, Path, EncodedValue); write(Opts, Req, _NodeOpts) when is_map(Req) -> maps:fold( fun(Key, Value, ok) -> diff --git a/src/core/store/hb_store_volatile.erl b/src/core/store/hb_store_volatile.erl index 713d1c07c..2b52eca6b 100644 --- a/src/core/store/hb_store_volatile.erl +++ b/src/core/store/hb_store_volatile.erl @@ -100,6 +100,14 @@ reset_store(Opts) -> %% marker (raw/link entries have no descendants, so no subtree purge is %% needed). If the target key previously held a group, its descendants are %% deleted first. +%% +%% The explicit raw write-request used by the Arweave index store +%% (`#{<<"write">> => {Path, Value}, <<"raw">> => true}') stores `Value' at +%% `Path' rather than folding the request keys in as literal entries -- mirrors +%% the `raw' clause in `hb_store_lmdb'. +write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts) + when is_binary(Path) -> + put_entry(Opts, Path, {raw, Value}); write(Opts, Req, _NodeOpts) when is_map(Req) -> maps:fold( fun(Path, Value, ok) -> @@ -113,6 +121,12 @@ write(Opts, Req, _NodeOpts) when is_map(Req) -> %% @doc Read a value, following links when needed. Group paths return %% `{composite, Children}` with the immediate child names. +read(Opts, #{ <<"read">> := Path, <<"raw">> := true }, _NodeOpts) + when is_binary(Path) -> + case lookup_entry(Opts, Path) of + {raw, Value} -> {ok, Value}; + _ -> {error, not_found} + end; read(Opts, #{ <<"read">> := RawKey }, _NodeOpts) -> read_resolved(Opts, resolve_path(Opts, RawKey), 0). From 21b7b2081bc42576ea2b51ee5a983b92fced85a7 Mon Sep 17 00:00:00 2001 From: Victor Shyba Date: Mon, 8 Jun 2026 18:53:37 -0300 Subject: [PATCH 2/2] tests: cover some of the raw cases --- src/core/store/hb_store_arweave.erl | 43 ++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/core/store/hb_store_arweave.erl b/src/core/store/hb_store_arweave.erl index 9e1685343..8361e192f 100644 --- a/src/core/store/hb_store_arweave.erl +++ b/src/core/store/hb_store_arweave.erl @@ -439,10 +439,15 @@ slash_edge_id_offset_roundtrip_test() -> read_offset(Opts, ID, Opts) ). +%% @doc The filesystem store has no verbatim representation of a `/'-containing +%% key, so the raw protocol degrades to the normalized path. A `native_id' +%% beginning with `/' (0x2F) must still round-trip, because write and read +%% normalize identically. fs_index_store_offset_roundtrip_test() -> Store = hb_test_utils:test_store(hb_store_fs, <<"arweave-fs-index">>), Opts = #{ <<"index-store">> => Store }, - ID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + % Real mainnet tx whose native_id starts with 0x2F. + ID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>, StartOffset = 363524457284025 - 8387, ok = hb_store:start(Store), ok = write_offset(Opts, ID, <<"tx@1.0">>, StartOffset, 8387), @@ -451,3 +456,39 @@ fs_index_store_offset_roundtrip_test() -> read_offset(Opts, ID, Opts) ), ok = hb_store:stop(Store). + +%% @doc The index-store is a list of shards in production. A raw read must +%% propagate the `raw' flag across the list, walking past shards that miss until +%% one resolves. Also exercises the volatile store's raw write/read clauses. +multi_store_raw_index_test() -> + Volatile = hb_test_utils:test_store(hb_store_volatile), + Lmdb = hb_test_utils:test_store(hb_store_lmdb), + ok = hb_store:start(Volatile), + Opts = #{ <<"index-store">> => [Volatile, Lmdb] }, + ID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>, + StartOffset = 363524457284025 - 8387, + V = hb_store_arweave_offset:encode(<<"tx@1.0">>, StartOffset, 8387), + Path = hb_store_arweave_offset:path(ID), + % Seed only the back (lmdb) shard: the read walks past the empty volatile + % shard and resolves on lmdb. + ok = hb_store:write([Lmdb], #{ <<"write">> => {Path, V}, <<"raw">> => true }, Opts), + ?assertMatch( + {ok, #{ <<"start-offset">> := StartOffset }}, + read_offset(Opts, ID, Opts) + ), + % Seed the front (volatile) shard directly: covers the volatile raw clauses. + ok = hb_store:write([Volatile], #{ <<"write">> => {Path, V}, <<"raw">> => true }, Opts), + ?assertMatch( + {ok, #{ <<"start-offset">> := StartOffset }}, + read_offset(Opts, ID, Opts) + ). + +%% @doc The device-API guards accept only the 43-byte string id; a 32-byte +%% native id is rejected rather than processed. +resolve_rejects_non_string_id_test() -> + StringID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>, + ?assertEqual({ok, StringID}, resolve(#{}, #{ <<"resolve">> => StringID }, #{})), + ?assertEqual( + {error, not_found}, + resolve(#{}, #{ <<"resolve">> => <<0:256>> }, #{}) + ).