Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions packages/beacon-node/src/sync/range/chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -506,14 +506,23 @@ export class SyncChain {
// if last processed epoch is n, we don't want to request batches with epoch > n + MAX_LOOK_AHEAD_EPOCHS
// we should have enough batches to process in the buffer: n + 1, ..., n + MAX_LOOK_AHEAD_EPOCHS
// let's focus on redownloading these batches first because it may have to reach different peers to get enough sampled columns
//
// Only pending (not-yet-validated) batches count toward the look-ahead window. AwaitingValidation
// batches (e.g. already-processed empty/skip-slot batches) cannot advance lastEpochWithProcessBlocks
// on their own, so counting them would let a run of empty epochs longer than MAX_LOOK_AHEAD_EPOCHS
// fill the window and stall sync. Mirrors the BATCH_BUFFER_SIZE carve-out above.
const pendingBatches = batches.filter((batch) => batch.state.status !== BatchStatus.AwaitingValidation);
if (
batches.length > 0 &&
Math.max(...batches.map((b) => b.startEpoch)) >= this.lastEpochWithProcessBlocks + MAX_LOOK_AHEAD_EPOCHS
pendingBatches.length > 0 &&
Math.max(...pendingBatches.map((b) => b.startEpoch)) >= this.lastEpochWithProcessBlocks + MAX_LOOK_AHEAD_EPOCHS
) {
return null;
}

// This line decides the starting epoch of the next batch. MUST ensure no duplicate batch for the same startEpoch
// This line decides the starting epoch of the next batch. MUST ensure no duplicate batch for the same startEpoch.
// Note: intentionally considers ALL batches (including AwaitingValidation), unlike the pending-only
// look-ahead check above — the next epoch must follow the last batch we hold to avoid gaps/duplicates,
// regardless of validation state.
const startEpoch = toBeDownloadedStartEpoch(batches, this.lastEpochWithProcessBlocks);

// Don't request batches beyond the target head slot. The to-be-downloaded batch must be strictly after target.slot
Expand Down
90 changes: 32 additions & 58 deletions packages/beacon-node/src/sync/utils/downloadByRange.ts
Original file line number Diff line number Diff line change
Expand Up @@ -514,44 +514,21 @@ export async function validateResponses({
return {result: {responses: validatedResponses, payloadEnvelopes: null}, warnings};
}

if (!dataRequest) {
// Only envelope and/or parent-by-root validation needed
if (parentPayloadCommitments !== undefined) {
const parentValidated = await validateParentPayloadColumns(
parentPayloadCommitments,
columnSidecars ?? [],
peerDasMetrics
);
if (parentValidated) {
validatedResponses.validatedColumnSidecars = [parentValidated];
}
}
const validatedPayloadEnvelopes = validateEnvelopesByRangeResponse(
validatedResponses.validatedBlocks ?? [],
batchBlocks,
payloadEnvelopes ?? [],
parentPayloadCommitments
);
return {result: {responses: validatedResponses, payloadEnvelopes: validatedPayloadEnvelopes}, warnings};
}

const blocksForDataValidation = getBlocksForDataValidation(
dataRequest,
batchBlocks,
validatedResponses.validatedBlocks?.length ? validatedResponses.validatedBlocks : undefined
);

if (!blocksForDataValidation.length) {
throw new DownloadByRangeError(
{
code: DownloadByRangeErrorCode.MISSING_BLOCKS_RESPONSE,
...requestsLogMeta({blobsRequest, columnsRequest}),
},
"No blocks in data request slot range to validate data response against"
);
}

if (blobsRequest) {
// Validate data sidecars (blobs/columns) against the in-range blocks. Two cases have nothing to
// validate here and simply fall through to the shared parent-by-root + envelope tail below:
// - envelopes-only / parent-payload-only requests (no blobs/columns request at all), and
// - an empty epoch, where the data request's slot range contains no blocks. This is a valid
// response during poor chain liveness; the empty batch is confirmed at the chain level once a
// later batch imports a block, so a peer cannot stall sync by falsely claiming an epoch is empty.
const blocksForDataValidation = dataRequest
? getBlocksForDataValidation(
dataRequest,
batchBlocks,
validatedResponses.validatedBlocks?.length ? validatedResponses.validatedBlocks : undefined
Comment thread
wemeetagain marked this conversation as resolved.
Outdated
)
: [];

if (blobsRequest && blocksForDataValidation.length > 0) {
Comment thread
wemeetagain marked this conversation as resolved.
if (!blobSidecars) {
throw new DownloadByRangeError(
{
Expand All @@ -568,7 +545,7 @@ export async function validateResponses({
);
}

if (columnsRequest) {
if (columnsRequest && blocksForDataValidation.length > 0) {
if (!columnSidecars) {
throw new DownloadByRangeError(
{
Expand Down Expand Up @@ -664,26 +641,23 @@ export function validateBlockByRangeResponse(
): WarnResult<ValidatedBlock[], DownloadByRangeError> {
const {startSlot, count} = blocksRequest;

// An error was thrown here by @twoeths in #8150 but it breaks for epochs with 0 blocks during chain
// liveness issues. See comment https://github.com/ChainSafe/lodestar/issues/8147#issuecomment-3246434697
// There are instances where clients return no blocks though. Need to monitor this via the warns to see
// if what the correct behavior should be
// A peer returning zero blocks for an epoch is the correct response when every slot in the epoch
// was skipped (an "empty epoch"), which happens during periods of poor chain liveness. Return the
// empty result with a warning — rather than throwing — so the SyncChain can process through the
// empty epoch. Throwing here previously killed the chain after MAX_BATCH_DOWNLOAD_ATTEMPTS
// (https://github.com/ChainSafe/lodestar/issues/8147). The empty batch is held in
// AwaitingValidation and only confirmed once a later batch imports a block, so a peer cannot stall
// sync by falsely claiming an epoch is empty.
if (!blocks.length) {
throw new DownloadByRangeError({
code: DownloadByRangeErrorCode.MISSING_BLOCKS_RESPONSE,
...requestsLogMeta({blocksRequest}),
});
// TODO: this was causing deadlock again. need to come back and fix this so that its possible to process through
// an empty epoch for periods with poor liveness
// return {
// result: [],
// warnings: [
// new DownloadByRangeError({
// code: DownloadByRangeErrorCode.MISSING_BLOCKS_RESPONSE,
// ...requestsLogMeta({blocksRequest}),
// }),
// ],
// };
return {
result: [],
warnings: [
new DownloadByRangeError({
code: DownloadByRangeErrorCode.MISSING_BLOCKS_RESPONSE,
...requestsLogMeta({blocksRequest}),
}),
],
};
}

if (blocks.length > count) {
Expand Down
15 changes: 12 additions & 3 deletions packages/beacon-node/test/unit/sync/range/chain.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,23 @@ describe("sync / range / chain", () => {
targetEpoch: 16,
},
{
// due to BATCH_BUFFER_SIZE and MAX_LOOK_AHEAD_EPOCHS, lodestar cannot deal with unlimited skipped slots
// having a test with 2 epochs of skipped slots is enough to test the logic
// this hasn't happened in any networks as of Aug 2025
// A run of skipped slots that fits within the MAX_LOOK_AHEAD_EPOCHS window. The longer-run case
// (a run of empty epochs exceeding the look-ahead window, which previously deadlocked) is
// covered by "Simulate sync with 3 epochs of skipped slots" below.
id: "Simulate sync with 2 epochs of skipped slots",
startEpoch: 0,
targetEpoch: 16,
skippedSlots: new Set(linspace(3 * SLOTS_PER_EPOCH, 4 * SLOTS_PER_EPOCH)),
},
{
// Regression: a run of empty epochs longer than MAX_LOOK_AHEAD_EPOCHS must not deadlock the
// chain. Empty batches sit in AwaitingValidation and never advance lastEpochWithProcessBlocks,
// so the look-ahead window must not count them or no further batches are ever downloaded.
id: "Simulate sync with 3 epochs of skipped slots",
startEpoch: 0,
targetEpoch: 16,
skippedSlots: new Set(linspace(3 * SLOTS_PER_EPOCH, 6 * SLOTS_PER_EPOCH)),
},
// As of https://github.com/ChainSafe/lodestar/pull/8150, we abort the batch after a single processing error
// {
// id: "Simulate sync with multiple ranges of bad blocks",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
import {beforeEach, describe, expect, it} from "vitest";
import {ForkName} from "@lodestar/params";
import {ForkName, SLOTS_PER_EPOCH} from "@lodestar/params";
import {BlockInputPreData} from "../../../../src/chain/blocks/blockInput/blockInput.js";
import {BlockInputSource, IBlockInput} from "../../../../src/chain/blocks/blockInput/types.js";
import {ValidatedBlock, getBlocksForDataValidation} from "../../../../src/sync/utils/downloadByRange.js";
import {generateChainOfBlockMaybeSidecars} from "../../../utils/blocksAndData.js";
import {
DownloadByRangeErrorCode,
ValidatedBlock,
getBlocksForDataValidation,
validateBlockByRangeResponse,
validateResponses,
} from "../../../../src/sync/utils/downloadByRange.js";
import {config, generateChainOfBlockMaybeSidecars, slots} from "../../../utils/blocksAndData.js";

/**
* Logic errors and gaps identified during test case creation:
*
* INSERT_LOGIC_ERROR_BULLET_POINTS_HERE
*
* - validateBlockByRangeResponse: Commented out zero blocks check breaks during chain liveness issues (line 445-453)
* - validateBlobsByRangeResponse: Missing validation that blob sidecars are in consecutive (slot, index) order as per spec
* - validateColumnsByRangeResponse: Missing validation that column sidecars are in consecutive (slot, index) order
* - cacheByRangeResponses: Error handling for wrong chain only breaks loop but doesn't throw/propagate error properly
Expand Down Expand Up @@ -164,6 +169,59 @@ import {generateChainOfBlockMaybeSidecars} from "../../../utils/blocksAndData.js
// it("should propagate validation errors from validateBlockDataColumnSidecars");
// });

describe("validateBlockByRangeResponse", () => {
// Regression: a peer returning zero blocks for an all-skipped epoch is a valid response during
// poor chain liveness. It must be accepted (with a warning), not thrown — otherwise the SyncChain
// dies after MAX_BATCH_DOWNLOAD_ATTEMPTS and the node cannot sync past an empty epoch.
it("should accept empty response during chain liveness issues", () => {
const blocksRequest = {startSlot: slots.deneb, count: SLOTS_PER_EPOCH, step: 1};

const {result, warnings} = validateBlockByRangeResponse(config, blocksRequest, []);

expect(result).toEqual([]);
expect(warnings?.map((w) => w.type.code)).toContain(DownloadByRangeErrorCode.MISSING_BLOCKS_RESPONSE);
});
});

describe("validateResponses", () => {
// Regression: an empty epoch that also carries a data (blobs/columns) request must not throw.
// Both the block validator and the "no blocks in data range" guard previously threw
// MISSING_BLOCKS_RESPONSE for a legitimately empty epoch, killing the SyncChain.
it("should accept an empty epoch when a data request is present", async () => {
const startSlot = slots.deneb;
const blocksRequest = {startSlot, count: SLOTS_PER_EPOCH, step: 1};
const blobsRequest = {startSlot, count: SLOTS_PER_EPOCH};

const {result} = await validateResponses({
config,
blocksRequest,
blobsRequest,
blocks: [],
blobSidecars: [],
});

expect(result.responses.validatedBlocks).toEqual([]);
expect(result.responses.validatedBlobSidecars ?? []).toEqual([]);
});

// Characterization test for the no-data-request path (envelopes-only). With no blocks/blobs/columns
// request, data-sidecar validation must be skipped while envelope validation still runs and returns
// a (possibly empty) map — never null. This pins the behavior that the data-validation refactor
// must preserve.
it("validates envelopes and skips data validation when only an envelopes request is present", async () => {
const {result} = await validateResponses({
config,
envelopesRequest: {startSlot: slots.gloas, count: SLOTS_PER_EPOCH},
payloadEnvelopes: [],
});

expect(result.responses.validatedBlocks).toBeUndefined();
expect(result.responses.validatedColumnSidecars ?? []).toEqual([]);
expect(result.payloadEnvelopes).toBeInstanceOf(Map);
expect(result.payloadEnvelopes?.size).toBe(0);
});
});

describe("getBlocksForDataValidation", () => {
const forkName = ForkName.capella;
let chainOfBlocks: ReturnType<typeof generateChainOfBlockMaybeSidecars>;
Expand Down
Loading