Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/core/include/hb.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
%% @doc Macro usable in guards that validates whether a term is a
%% human-readable ID encoding.
-define(IS_ID(X), (is_binary(X) andalso (byte_size(X) == 42 orelse byte_size(X) == 43 orelse byte_size(X) == 32))).
%% @doc Macro usable in guards that validates whether a term is a 43-byte
%% base64url-encoded Arweave ID (the string form), excluding the 32-byte native
%% and 42-byte encodings. Use where only the string-encoded ID is acceptable.
-define(IS_STRING_ID(X), (is_binary(X) andalso byte_size(X) == 43)).
%% @doc Macro for checking a term is a link.
-define(IS_LINK(X), (is_tuple(X) andalso element(1, X) == link)).
%% @doc List of special keys that are used in the AO-Core protocol.
Expand Down
112 changes: 99 additions & 13 deletions src/core/store/hb_store_arweave.erl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ scope(#{ <<"scope">> := Scope }) -> Scope;
scope(_) -> scope().

%% @doc Resolve a key path in the Arweave store, ignoring other paths.
resolve(_Store, #{ <<"resolve">> := ID }, _NodeOpts) when ?IS_ID(ID) ->
resolve(_Store, #{ <<"resolve">> := ID }, _NodeOpts) when ?IS_STRING_ID(ID) ->
{ok, ID};
resolve(_Store, #{ <<"resolve">> := _ID }, _NodeOpts) ->
{error, not_found}.
Expand All @@ -60,8 +60,8 @@ group(_, _, _) -> {error, not_found}.
%% result, so that we don't have to read the data from the GraphQL route
%% multiple times.
type(#{ <<"index-store">> := IndexStore }, #{ <<"type">> := ID }, NodeOpts)
when ?IS_ID(ID) ->
case hb_store:read(IndexStore, hb_store_arweave_offset:path(ID), NodeOpts) of
when ?IS_STRING_ID(ID) ->
case hb_store:read(IndexStore, raw_read_req(ID), NodeOpts) of
{ok, _Offset} ->
{ok, simple};
_ ->
Expand All @@ -75,7 +75,7 @@ read_offset(StoreOpts = #{ <<"index-store">> := IndexStore }, ID, _Opts) ->
ReadRes =
hb_prometheus:measure_and_report(
fun() ->
hb_store:read(IndexStore, hb_store_arweave_offset:path(ID), StoreOpts)
hb_store:read(IndexStore, raw_read_req(ID), StoreOpts)
end,
hb_store_arweave_index_check_duration_seconds
),
Expand All @@ -96,7 +96,7 @@ read_offset(_, _, _) -> not_found.

%% @doc Read the data at the given key, reading the `local-store' first if
%% available.
read(StoreOpts, #{ <<"read">> := ID }, _NodeOpts) when ?IS_ID(ID) ->
read(StoreOpts, #{ <<"read">> := ID }, _NodeOpts) when ?IS_STRING_ID(ID) ->
case hb_store_remote_node:read_local_cache(StoreOpts, ID, StoreOpts) of
{ok, Message} ->
?event(
Expand Down Expand Up @@ -264,6 +264,22 @@ read_chunks(StartOffset, Length, Opts) ->
Opts
).

%% @doc Raw (non-path-normalized) read/write requests for the opaque offset key.
%% The index is keyed by the raw `native_id', which may contain `/' (0x2F)
%% bytes; the `raw' flag tells the store to use the key verbatim instead of
%% splitting it on `/' via hb_path:to_binary. Stores with no verbatim key
%% representation (e.g. `hb_store_fs', where `/' is the path separator) degrade
%% the flag to a normalized path -- consistently on both read and write -- so
%% the request can always carry `raw' without per-store gating.
raw_read_req(ID) ->
#{ <<"read">> => hb_store_arweave_offset:path(ID), <<"raw">> => true }.

write_offset_req(ID, Value) ->
#{
<<"write">> => {hb_store_arweave_offset:path(ID), Value},
<<"raw">> => true
}.

%% @doc Write offset information to the index store.
write_offset(
StoreOpts = #{ <<"index-store">> := IndexStore },
Expand All @@ -283,11 +299,7 @@ write_offset(
{value, {explicit, Value}}
}
),
hb_store:write(
IndexStore,
#{ hb_store_arweave_offset:path(ID) => Value },
StoreOpts
).
hb_store:write(IndexStore, write_offset_req(ID, Value), StoreOpts).

%% @doc Record the partition that data is found in when it is requested.
record_partition_metric(Offset, Result, StoreOpts) when is_integer(Offset) ->
Expand Down Expand Up @@ -340,7 +352,7 @@ init_prometheus() ->
%%% Tests

write_read_tx_test() ->
Store = [hb_test_utils:test_store()],
Store = [hb_test_utils:test_store(hb_store_lmdb)],
Opts = #{
<<"index-store">> => Store
},
Expand Down Expand Up @@ -383,7 +395,7 @@ write_read_tx_test() ->
%% @doc Stale ANS-104 offset: fake ID pointing to a known bundle TX's
%% data range. The deserialized item's ID won't match the fake ID.
stale_ans104_offset_returns_error_test() ->
Store = [hb_test_utils:test_store()],
Store = [hb_test_utils:test_store(hb_store_lmdb)],
Opts = #{<<"index-store">> => Store},
FakeID = <<"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA">>,
RealEndOffset = 363524457284025,
Expand All @@ -395,7 +407,7 @@ stale_ans104_offset_returns_error_test() ->

%% @doc The L1 TX has bundle tags, but data is not a valid bundle.
write_read_fake_bundle_tx_test() ->
Store = [hb_test_utils:test_store()],
Store = [hb_test_utils:test_store(hb_store_lmdb)],
Opts = #{
<<"index-store">> => Store
},
Expand All @@ -406,3 +418,77 @@ write_read_fake_bundle_tx_test() ->
{ok, TX} = read(Opts, #{ <<"read">> => ID }, Opts),
?assert(hb_message:verify(TX, all, #{})),
ok.

%% @doc Regression: a `native_id' beginning with `/' (0x2F) must round-trip
%% verbatim through the raw index read. The id is seeded under the raw (verbatim)
%% key, as the prebuilt index shards are, and the raw read path must resolve it.
%% A read that normalizes the key via hb_path:to_binary drops the leading `/' and
%% misses -- the failure mode behind the index `404's for ~11% of ids.
slash_edge_id_offset_roundtrip_test() ->
Store = [hb_test_utils:test_store(hb_store_lmdb)],
Opts = #{ <<"index-store">> => Store },
% Real mainnet tx whose native_id starts with 0x2F.
ID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>,
StartOffset = 363524457284025 - 8387,
V = hb_store_arweave_offset:encode(<<"tx@1.0">>, StartOffset, 8387),
Path = hb_store_arweave_offset:path(ID),
% Seed under the RAW (verbatim) key, as the prebuilt index shards are.
ok = hb_store:write(Store, #{ <<"write">> => {Path, V}, <<"raw">> => true }, Opts),
?assertMatch(
{ok, #{ <<"start-offset">> := StartOffset }},
read_offset(Opts, ID, Opts)
).

%% @doc The filesystem store has no verbatim representation of a `/'-containing
%% key, so the raw protocol degrades to the normalized path. A `native_id'
%% beginning with `/' (0x2F) must still round-trip, because write and read
%% normalize identically.
fs_index_store_offset_roundtrip_test() ->
Store = hb_test_utils:test_store(hb_store_fs, <<"arweave-fs-index">>),
Opts = #{ <<"index-store">> => Store },
% Real mainnet tx whose native_id starts with 0x2F.
ID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>,
StartOffset = 363524457284025 - 8387,
ok = hb_store:start(Store),
ok = write_offset(Opts, ID, <<"tx@1.0">>, StartOffset, 8387),
?assertMatch(
{ok, #{ <<"start-offset">> := StartOffset }},
read_offset(Opts, ID, Opts)
),
ok = hb_store:stop(Store).

%% @doc The index-store is a list of shards in production. A raw read must
%% propagate the `raw' flag across the list, walking past shards that miss until
%% one resolves. Also exercises the volatile store's raw write/read clauses.
multi_store_raw_index_test() ->
Volatile = hb_test_utils:test_store(hb_store_volatile),
Lmdb = hb_test_utils:test_store(hb_store_lmdb),
ok = hb_store:start(Volatile),
Opts = #{ <<"index-store">> => [Volatile, Lmdb] },
ID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>,
StartOffset = 363524457284025 - 8387,
V = hb_store_arweave_offset:encode(<<"tx@1.0">>, StartOffset, 8387),
Path = hb_store_arweave_offset:path(ID),
% Seed only the back (lmdb) shard: the read walks past the empty volatile
% shard and resolves on lmdb.
ok = hb_store:write([Lmdb], #{ <<"write">> => {Path, V}, <<"raw">> => true }, Opts),
?assertMatch(
{ok, #{ <<"start-offset">> := StartOffset }},
read_offset(Opts, ID, Opts)
),
% Seed the front (volatile) shard directly: covers the volatile raw clauses.
ok = hb_store:write([Volatile], #{ <<"write">> => {Path, V}, <<"raw">> => true }, Opts),
?assertMatch(
{ok, #{ <<"start-offset">> := StartOffset }},
read_offset(Opts, ID, Opts)
).

%% @doc The device-API guards accept only the 43-byte string id; a 32-byte
%% native id is rejected rather than processed.
resolve_rejects_non_string_id_test() ->
StringID = <<"LwPn27rdIHwdXIHovfUODwZ7xngCzRyjgL7JiefuG64">>,
?assertEqual({ok, StringID}, resolve(#{}, #{ <<"resolve">> => StringID }, #{})),
?assertEqual(
{error, not_found},
resolve(#{}, #{ <<"resolve">> => <<0:256>> }, #{})
).
14 changes: 14 additions & 0 deletions src/core/store/hb_store_fs.erl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ reset(#{ <<"name">> := DataDir }, _Req, _Opts) ->
?event({reset_store, {path, DataDir}}).

%% @doc Read a key from the store, following symlinks as needed.
%% A `raw' request targets an opaque binary key verbatim. The filesystem store
%% has no verbatim representation -- `/' (0x2F) is the path separator -- so the
%% raw protocol degrades to the normalized path. Read and write both normalize,
%% so they stay symmetric.
read(Opts, #{ <<"read">> := Key, <<"raw">> := true }, NodeOpts)
when is_binary(Key) ->
read(Opts, #{ <<"read">> => Key }, NodeOpts);
read(Opts, #{ <<"read">> := Key }, NodeOpts) ->
case resolve(Opts, #{ <<"resolve">> => Key }, NodeOpts) of
{ok, ResolvedPath} ->
Expand Down Expand Up @@ -75,6 +82,13 @@ read_path(Path) ->
end.

%% @doc Write a value to the specified path in the store.
%% The `raw' write envelope is handled explicitly so it is not folded in as
%% literal request keys; the filesystem store has no verbatim key representation
%% (`/' is the path separator), so it degrades to the normalized path -- mirror
%% of the `raw' read clause above.
write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts)
when is_binary(Path) ->
write_path(Opts, Path, Value);
write(Opts, Req, _NodeOpts) when is_map(Req) ->
maps:fold(
fun(PathComponents, Value, ok) ->
Expand Down
20 changes: 20 additions & 0 deletions src/core/store/hb_store_lmdb.erl
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ type(Opts, #{ <<"type">> := Key }, _NodeOpts) ->
%% @returns `ok` immediately on success, or an error tuple on failure
write(#{ <<"read-only">> := true }, _Req, _NodeOpts) when is_map(_Req) ->
{error, not_found};
write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts)
when is_binary(Path) ->
% Opaque binary key (raw Arweave ID) written verbatim so a raw read matches.
write(Opts, Path, Value);
Comment on lines +136 to +139

@speeddragon speeddragon Jun 9, 2026

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Write doesn't have the same structure as read and type. We also cannot write raw binary via HTTP API requests. Write uses all the key values to write into the store.

One option would be to set allow-raw-key in the store configuration, so we can avoid the hb_path:to_binary call when reading and writing, falling back to the old behaviour.

Another could be similar to this, maybe with another structure to confuse the usage of HTTP requests? Eg, write(Opts, {raw, Map}, Nodeopts), where Map can be multiple key values.

write(Opts, Req, _NodeOpts) when is_map(Req) ->
maps:fold(
fun(Path, Value, ok) ->
Expand Down Expand Up @@ -181,6 +185,13 @@ write(Opts, Path, Value) ->
%% @param PathReq Request of the form `#{<<"read">> => Path}`.
%% @returns `{ok, Value}` on success, `{composite, Keys}` for groups, or
%% `{error, not_found}` on failure
read(Opts, #{ <<"read">> := Path, <<"raw">> := true }, _NodeOpts) ->
% Opaque binary keys (raw Arweave IDs) are read verbatim: hb_path:to_binary
% would drop their `/' (0x2F) bytes and miss the raw-keyed index shards.
case read_direct(Opts, Path) of
{ok, Value} -> {ok, Value};
_ -> {error, not_found}
end;
read(Opts, #{ <<"read">> := Path }, _NodeOpts) ->
case read_resolved(Opts, hb_path:to_binary(Path)) of
{ok, ResolvedPath, <<"group">>} ->
Expand Down Expand Up @@ -867,6 +878,15 @@ cache_style_test() ->
?assertEqual({ok, <<"test-value">>}, Result),
hb_store:stop(StoreOpts).

single_key_write_normalizes_binary_path_test() ->
hb:init(),
StoreOpts = hb_test_utils:test_store(?MODULE),
test_reset(StoreOpts),
hb_store:start(StoreOpts),
ok = hb_store:write(StoreOpts, #{ <<"/a//b/">> => <<"value">> }, #{}),
?assertEqual({ok, <<"value">>}, hb_store:read(StoreOpts, <<"a/b">>, #{})),
hb_store:stop(StoreOpts).

%% @doc Test nested map storage with cache-like linking behavior
%%
%% This test demonstrates how to store a nested map structure where:
Expand Down
11 changes: 11 additions & 0 deletions src/core/store/hb_store_rocksdb.erl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ scope(_) -> local.
Req :: map(),
NodeOpts :: map(),
Result :: {ok, value()} | {composite, [binary()]} | {error, any()}.
read(Opts, #{ <<"read">> := Path, <<"raw">> := true }, _NodeOpts)
when is_binary(Path) ->
case do_read(Opts, Path) of
{ok, {raw, Result}} -> {ok, Result};
_ -> {error, not_found}
end;
read(Opts, #{ <<"read">> := RawPath }, _NodeOpts) ->
Path = resolve_path(Opts, RawPath),
case do_read(Opts, Path) of
Expand All @@ -107,6 +113,11 @@ read(Opts, #{ <<"read">> := RawPath }, _NodeOpts) ->
Key :: key(),
Value :: value(),
Result :: ok | {error, any()}.
write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts)
when is_binary(Path) ->
EncodedValue = encode_value(raw, Value),
?event({writing, Path, byte_size(EncodedValue)}),
do_write(Opts, Path, EncodedValue);
write(Opts, Req, _NodeOpts) when is_map(Req) ->
maps:fold(
fun(Key, Value, ok) ->
Expand Down
14 changes: 14 additions & 0 deletions src/core/store/hb_store_volatile.erl
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ reset_store(Opts) ->
%% marker (raw/link entries have no descendants, so no subtree purge is
%% needed). If the target key previously held a group, its descendants are
%% deleted first.
%%
%% The explicit raw write-request used by the Arweave index store
%% (`#{<<"write">> => {Path, Value}, <<"raw">> => true}') stores `Value' at
%% `Path' rather than folding the request keys in as literal entries -- mirrors
%% the `raw' clause in `hb_store_lmdb'.
write(Opts, #{ <<"write">> := {Path, Value}, <<"raw">> := true }, _NodeOpts)
when is_binary(Path) ->
put_entry(Opts, Path, {raw, Value});
write(Opts, Req, _NodeOpts) when is_map(Req) ->
maps:fold(
fun(Path, Value, ok) ->
Expand All @@ -113,6 +121,12 @@ write(Opts, Req, _NodeOpts) when is_map(Req) ->

%% @doc Read a value, following links when needed. Group paths return
%% `{composite, Children}` with the immediate child names.
read(Opts, #{ <<"read">> := Path, <<"raw">> := true }, _NodeOpts)
when is_binary(Path) ->
case lookup_entry(Opts, Path) of
{raw, Value} -> {ok, Value};
_ -> {error, not_found}
end;
read(Opts, #{ <<"read">> := RawKey }, _NodeOpts) ->
read_resolved(Opts, resolve_path(Opts, RawKey), 0).

Expand Down