Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ public static Result scanByRegionEncodedName(Connection connection, String regio
throws IOException {
RowFilter rowFilter =
new RowFilter(CompareOperator.EQUAL, new SubstringComparator(regionEncodedName));
Scan scan = getMetaScan(connection.getConfiguration(), 1);
Scan scan = getMetaScan(connection.getConfiguration(), 1, false);
scan.setFilter(rowFilter);
try (Table table = getMetaHTable(connection);
ResultScanner resultScanner = table.getScanner(scan)) {
Expand Down Expand Up @@ -558,13 +558,13 @@ public static Scan getScanForTableName(Configuration conf, TableName tableName)
// Stop key appends the smallest possible char to the table name
byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION);

Scan scan = getMetaScan(conf, -1);
Scan scan = getMetaScan(conf, -1, false);
scan.setStartRow(startKey);
scan.setStopRow(stopKey);
return scan;
}

private static Scan getMetaScan(Configuration conf, int rowUpperLimit) {
private static Scan getMetaScan(Configuration conf, int rowUpperLimit, boolean isPagedScan) {
Scan scan = new Scan();
int scannerCaching = conf.getInt(HConstants.HBASE_META_SCANNER_CACHING,
HConstants.DEFAULT_HBASE_META_SCANNER_CACHING);
Expand All @@ -575,7 +575,14 @@ private static Scan getMetaScan(Configuration conf, int rowUpperLimit) {
scan.setLimit(rowUpperLimit);
scan.setReadType(Scan.ReadType.PREAD);
}
scan.setCaching(scannerCaching);
if (isPagedScan) {
// Caller is doing a bounded paged scan and expects the whole slice back in one ScannerNext
// RPC. Size caching to the slice. Trade-off: a single larger response uses more
// RegionServer heap, fine for meta rows (small).
scan.setCaching(rowUpperLimit);
} else {
scan.setCaching(scannerCaching);
}
scan.setPriority(HConstants.INTERNAL_READ_QOS);
return scan;
}
Expand Down Expand Up @@ -706,6 +713,25 @@ public static void scanMetaForTableRegions(Connection connection, Visitor visito
scanMetaForTableRegions(connection, visitor, tableName, CatalogReplicaMode.NONE);
}

/**
* Scan meta for regions of {@code tableName}, starting at the meta row derived from
* {@code startRow} and returning at most {@code rowLimit} rows. {@code startRow} must be a region
* start-key boundary (e.g. the end key of the previously visited region), or {@code null}/empty
* to start at the first region. The scan is sized so that the whole {@code rowLimit}-row slice
* comes back in a single ScannerNext RPC, regardless of the configured
* {@code hbase.meta.scanner.caching}.
*/
public static void scanMetaForTableRegions(Connection connection, Visitor visitor,
TableName tableName, byte[] startRow, int rowLimit, CatalogReplicaMode metaReplicaMode)
throws IOException {
byte[] metaStart = (startRow == null || startRow.length == 0)
? getTableStartRowForMeta(tableName, QueryType.REGION)
: RegionInfo.createRegionName(tableName, startRow, HConstants.ZEROES, false);
byte[] metaStop = getTableStopRowForMeta(tableName, QueryType.REGION);
scanMeta(connection, metaStart, metaStop, QueryType.REGION, null, rowLimit, true, visitor,
metaReplicaMode);
}

private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows,
final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException {
scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type),
Expand Down Expand Up @@ -760,8 +786,15 @@ static void scanMeta(Connection connection, @Nullable final byte[] startRow,
private static void scanMeta(Connection connection, @Nullable final byte[] startRow,
@Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows,
final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException {
scanMeta(connection, startRow, stopRow, type, filter, maxRows, false, visitor, metaReplicaMode);
}

private static void scanMeta(Connection connection, @Nullable final byte[] startRow,
@Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows,
boolean isPagedScan, final Visitor visitor, CatalogReplicaMode metaReplicaMode)
throws IOException {
int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE;
Scan scan = getMetaScan(connection.getConfiguration(), rowUpperLimit);
Scan scan = getMetaScan(connection.getConfiguration(), rowUpperLimit, isPagedScan);

for (byte[] family : type.getFamilies()) {
scan.addFamily(family);
Expand Down Expand Up @@ -830,7 +863,7 @@ private static void scanMeta(Connection connection, @Nullable final byte[] start
private static RegionInfo getClosestRegionInfo(Connection connection,
@NonNull final TableName tableName, @NonNull final byte[] row) throws IOException {
byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
Scan scan = getMetaScan(connection.getConfiguration(), 1);
Scan scan = getMetaScan(connection.getConfiguration(), 1, false);
scan.setReversed(true);
scan.withStartRow(searchRow);
try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1146,11 +1146,7 @@ rpcControllerFactory, getMetaLookupPool(), connectionConfig.getMetaReadRpcTimeou
}
} finally {
if (lockedUserRegion) {
userRegionLock.unlock();
// update duration of the lock being held
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
}
releaseUserRegionLock(lockStartTime);
}
}
try {
Expand Down Expand Up @@ -1185,6 +1181,19 @@ void takeUserRegionLock() throws IOException {
}
}

/**
* Release {@link #userRegionLock} previously acquired via {@link #takeUserRegionLock()} and
* record the held duration in metrics.
* @param lockStartTimeMs value of {@link EnvironmentEdgeManager#currentTime()} captured
* immediately after {@link #takeUserRegionLock()} returned
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wonder how spotless didn't take care of spaces.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is valid space as we are listing method parameters and so, spotless also didn't flag it. On viewing in editor it is aligned rightly.
Screenshot 2026-05-15 at 9 10 20 PM

*/
void releaseUserRegionLock(long lockStartTimeMs) {
userRegionLock.unlock();
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTimeMs);
}
}

/**
* Put a newly discovered HRegionLocation into the cache.
* @param tableName The table name.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.trace.TableSpanBuilder;
import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.yetus.audience.InterfaceAudience;

import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
Expand Down Expand Up @@ -111,6 +112,63 @@ public List<HRegionLocation> getAllRegionLocations() throws IOException {
}, HRegionLocator::getRegionNames, supplier);
}

@Override
public List<HRegionLocation> getRegionLocationsPage(byte[] startKey, int limit)
throws IOException {
if (TableName.isMetaTableName(tableName)) {
throw new IOException(
"getRegionLocationsPage(startKey, limit) is not supported for hbase:meta;"
+ " use getRegionLocation(EMPTY_START_ROW) instead.");
}
final int effectiveLimit = limit > 0
? limit
: connection.getConfiguration().getInt(HConstants.HBASE_META_SCANNER_CACHING,
HConstants.DEFAULT_HBASE_META_SCANNER_CACHING);
final byte[] effectiveStart = startKey == null ? HConstants.EMPTY_START_ROW : startKey;
final CatalogReplicaMode metaReplicaMode = CatalogReplicaMode.fromString(connection
.getConfiguration().get(LOCATOR_META_REPLICAS_MODE, CatalogReplicaMode.NONE.toString()));

final Supplier<Span> supplier = new TableSpanBuilder(connection)
.setName("HRegionLocator.getRegionLocationsPage").setTableName(tableName);
return tracedLocationFuture(() -> {
final List<HRegionLocation> out = new ArrayList<>(effectiveLimit);
MetaTableAccessor.Visitor visitor = new MetaTableAccessor.TableVisitorBase(tableName) {
@Override
public boolean visitInternal(Result result) throws IOException {
RegionLocations locs = MetaTableAccessor.getRegionLocations(result);
if (locs == null) {
return true;
}
for (HRegionLocation loc : locs.getRegionLocations()) {
if (loc != null) {
out.add(loc);
}
}
RegionLocations cleaned = locs.removeElementsWithNullLocation();
if (cleaned != null) {
connection.cacheLocation(tableName, cleaned);
}
return true;
}
};

boolean locked = false;
long lockStart = 0;
try {
connection.takeUserRegionLock();
lockStart = EnvironmentEdgeManager.currentTime();
locked = true;
MetaTableAccessor.scanMetaForTableRegions(connection, visitor, tableName, effectiveStart,
effectiveLimit, metaReplicaMode);
} finally {
if (locked) {
connection.releaseUserRegionLock(lockStart);
}
}
return out;
}, HRegionLocator::getRegionNames, supplier);
}

private static List<String> getRegionNames(List<HRegionLocation> locations) {
if (CollectionUtils.isEmpty(locations)) {
return Collections.emptyList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.util.Pair;
Expand Down Expand Up @@ -130,6 +131,54 @@ default List<HRegionLocation> getRegionLocations(byte[] row) throws IOException
*/
List<HRegionLocation> getAllRegionLocations() throws IOException;

/**
* Bulk lookup of region locations from {@code hbase:meta} in a single RPC, starting at
* {@code startKey} (region start-key boundary, inclusive) and returning at most {@code limit}
* regions in start-key order.
* <p/>
* The returned list includes all replicas of each region (matching
* {@link #getAllRegionLocations()}), and the result is also written to the connection's region
* location cache.
* <p/>
* Ordering: regions are returned in ascending region start-key order (the natural order of
* {@code hbase:meta} rows for a single table). Within each region, replicas are returned in
* ascending replica-id order (replica 0, then 1, then 2, ...). Split parents and offline regions
* are filtered out, which may cause a page to contain fewer than {@code limit} regions but never
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this filtering happening? I didn't see any test coverage either. The existing methods don't do any such filtering correct, so is this even needed?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filtering is already implemented and happens in inside MetaTableAccessor.DefaultVisitorBase#visit(). Call chain: HRegionLocator#getRegionLocations() -> MetaTableAccessor.TableVisitorBase#visit() -> MetaTableAccessor.DefaultVisitorBase#visit().

Copy link
Copy Markdown
Contributor Author

@sanjeet006py sanjeet006py May 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am reusing the filtering logic so, no test coverage needed.

* disturbs ordering of the survivors.
* <p/>
* To page through all regions of a table, call repeatedly passing
* {@code last.getRegion().getEndKey()} as the next {@code startKey}, where {@code last} is the
* final element of the previous response. All replicas of a region share the same
* {@link RegionInfo}, so the last entry's end key is the correct cursor regardless of which
* replica it is. Pass {@code null} for the first call. Stop paging when the returned list is
* empty or when the last region's end key is {@link HConstants#EMPTY_END_ROW} (zero-length) -
* that signals the end of the table; passing it back in would re-scan from the beginning since by
* convention an empty start key means "from the first region".
* <p/>
* Unlike {@link #getAllRegionLocations()}, this method performs at most one RPC against
* {@code hbase:meta} per invocation, so its latency is bounded by {@code limit} rather than table
* size. Suitable for callers that wrap meta lookups in a lock with a fixed timeout, e.g. for bulk
* region-cache warmup.
* <p/>
* This method is optional. Implementations that cannot support paginated lookups should throw
* {@link UnsupportedOperationException} (the default behavior); callers should fall back to
* {@link #getAllRegionLocations()} in that case.
* @param startKey region start-key to begin scanning from (inclusive); {@code null} or empty
* starts from the first region
* @param limit maximum number of regions to return; if &lt;= 0, falls back to
* {@code hbase.meta.scanner.caching}
* @return up to {@code limit} {@link HRegionLocation}s in start-key order, possibly empty when no
* more regions exist
* @throws IOException if a remote or network exception occurs
* @throws UnsupportedOperationException if this implementation does not support paginated lookups
*/
default List<HRegionLocation> getRegionLocationsPage(byte[] startKey, int limit)
throws IOException {
throw new UnsupportedOperationException(
"getRegionLocationsPage(byte[], int) is not supported by this RegionLocator;"
+ " fall back to getAllRegionLocations()");
}

/**
* Gets the starting row key for every region in the currently open table.
* <p>
Expand Down
Loading
Loading