Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public static final class Builder {
private byte[] value;
private Filter filter;
private TimeRange timeRange;
private boolean queryMetricsEnabled = false;

private Builder(byte[] row) {
this.row = Preconditions.checkNotNull(row, "row is null");
Expand Down Expand Up @@ -133,6 +134,21 @@ public Builder timeRange(TimeRange timeRange) {
return this;
}

/**
* Enables the return of {@link QueryMetrics} alongside the corresponding result for this query
* <p>
* This is intended for advanced users who need result-granular, server-side metrics
* <p>
* Does not work
* @param queryMetricsEnabled {@code true} to enable collection of per-result query metrics
* {@code false} to disable metrics collection (resulting in
* {@code null} metrics)
*/
public Builder queryMetricsEnabled(boolean queryMetricsEnabled) {
this.queryMetricsEnabled = queryMetricsEnabled;
return this;
}

private void preCheck(Row action) {
Preconditions.checkNotNull(action, "action is null");
if (!Bytes.equals(row, action.getRow())) {
Expand All @@ -154,9 +170,10 @@ private void preCheck(Row action) {
public CheckAndMutate build(Put put) {
preCheck(put);
if (filter != null) {
return new CheckAndMutate(row, filter, timeRange, put);
return new CheckAndMutate(row, filter, timeRange, put, queryMetricsEnabled);
} else {
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, put);
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, put,
queryMetricsEnabled);
}
}

Expand All @@ -168,9 +185,10 @@ public CheckAndMutate build(Put put) {
public CheckAndMutate build(Delete delete) {
preCheck(delete);
if (filter != null) {
return new CheckAndMutate(row, filter, timeRange, delete);
return new CheckAndMutate(row, filter, timeRange, delete, queryMetricsEnabled);
} else {
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, delete);
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, delete,
queryMetricsEnabled);
}
}

Expand All @@ -182,9 +200,10 @@ public CheckAndMutate build(Delete delete) {
public CheckAndMutate build(Increment increment) {
preCheck(increment);
if (filter != null) {
return new CheckAndMutate(row, filter, timeRange, increment);
return new CheckAndMutate(row, filter, timeRange, increment, queryMetricsEnabled);
} else {
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, increment);
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, increment,
queryMetricsEnabled);
}
}

Expand All @@ -196,9 +215,10 @@ public CheckAndMutate build(Increment increment) {
public CheckAndMutate build(Append append) {
preCheck(append);
if (filter != null) {
return new CheckAndMutate(row, filter, timeRange, append);
return new CheckAndMutate(row, filter, timeRange, append, queryMetricsEnabled);
} else {
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, append);
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, append,
queryMetricsEnabled);
}
}

Expand All @@ -210,9 +230,10 @@ public CheckAndMutate build(Append append) {
public CheckAndMutate build(RowMutations mutations) {
preCheck(mutations);
if (filter != null) {
return new CheckAndMutate(row, filter, timeRange, mutations);
return new CheckAndMutate(row, filter, timeRange, mutations, queryMetricsEnabled);
} else {
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, mutations);
return new CheckAndMutate(row, family, qualifier, op, value, timeRange, mutations,
queryMetricsEnabled);
}
}
}
Expand All @@ -234,9 +255,10 @@ public static Builder newBuilder(byte[] row) {
private final Filter filter;
private final TimeRange timeRange;
private final Row action;
private final boolean queryMetricsEnabled;

private CheckAndMutate(byte[] row, byte[] family, byte[] qualifier, final CompareOperator op,
byte[] value, TimeRange timeRange, Row action) {
byte[] value, TimeRange timeRange, Row action, boolean queryMetricsEnabled) {
this.row = row;
this.family = family;
this.qualifier = qualifier;
Expand All @@ -245,9 +267,11 @@ private CheckAndMutate(byte[] row, byte[] family, byte[] qualifier, final Compar
this.filter = null;
this.timeRange = timeRange != null ? timeRange : TimeRange.allTime();
this.action = action;
this.queryMetricsEnabled = queryMetricsEnabled;
}

private CheckAndMutate(byte[] row, Filter filter, TimeRange timeRange, Row action) {
private CheckAndMutate(byte[] row, Filter filter, TimeRange timeRange, Row action,
boolean queryMetricsEnabled) {
this.row = row;
this.family = null;
this.qualifier = null;
Expand All @@ -256,6 +280,7 @@ private CheckAndMutate(byte[] row, Filter filter, TimeRange timeRange, Row actio
this.filter = filter;
this.timeRange = timeRange != null ? timeRange : TimeRange.allTime();
this.action = action;
this.queryMetricsEnabled = queryMetricsEnabled;
}

/** Returns the row */
Expand Down Expand Up @@ -303,4 +328,9 @@ public TimeRange getTimeRange() {
public Row getAction() {
return action;
}

/** Returns whether query metrics are enabled */
public boolean isQueryMetricsEnabled() {
return queryMetricsEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ public class CheckAndMutateResult {
private final boolean success;
private final Result result;

private QueryMetrics metrics = null;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly, because the Result can be null, we need to stash the QueryMetrics separately.


public CheckAndMutateResult(boolean success, Result result) {
this.success = success;
this.result = result;
Expand All @@ -41,4 +43,13 @@ public boolean isSuccess() {
public Result getResult() {
return result;
}

public CheckAndMutateResult setMetrics(QueryMetrics metrics) {
this.metrics = metrics;
return this;
}

public QueryMetrics getMetrics() {
return metrics;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public Get(Get get) {
this.setFilter(get.getFilter());
this.setReplicaId(get.getReplicaId());
this.setConsistency(get.getConsistency());
this.setQueryMetricsEnabled(get.isQueryMetricsEnabled());
// from Get
this.cacheBlocks = get.getCacheBlocks();
this.maxVersions = get.getMaxVersions();
Expand Down Expand Up @@ -453,6 +454,7 @@ public Map<String, Object> toMap(int maxCols) {
map.put("colFamTimeRangeMap", colFamTimeRangeMapStr);
}
map.put("priority", getPriority());
map.put("queryMetricsEnabled", queryMetricsEnabled);
return map;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.client;

import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.Filter;
Expand Down Expand Up @@ -46,6 +47,7 @@ public abstract class Query extends OperationWithAttributes {
protected Consistency consistency = Consistency.STRONG;
protected Map<byte[], TimeRange> colFamTimeRangeMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
protected Boolean loadColumnFamiliesOnDemand = null;
protected boolean queryMetricsEnabled = false;

public Filter getFilter() {
return filter;
Expand Down Expand Up @@ -157,6 +159,28 @@ public Query setIsolationLevel(IsolationLevel level) {
return this;
}

/**
* Enables the return of {@link QueryMetrics} alongside the corresponding result(s) for this query
* <p>
* This is intended for advanced users who need result-granular, server-side metrics
* <p>
* Does not work with calls to {@link Table#exists(Get)} and {@link Table#exists(List)}
* @param enabled {@code true} to enable collection of per-result query metrics {@code false} to
* disable metrics collection (resulting in {@code null} metrics)
*/
public Query setQueryMetricsEnabled(boolean enabled) {
this.queryMetricsEnabled = enabled;
return this;
}

/**
* Returns whether query metrics are enabled
* @return {@code true} if query metrics are enabled, {@code false} otherwise
*/
public boolean isQueryMetricsEnabled() {
return queryMetricsEnabled;
}

/**
* Returns The isolation level of this query. If no isolation level was set for this query object,
* then it returns READ_COMMITTED.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;

import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;

@InterfaceAudience.Public
@InterfaceStability.Evolving
public class QueryMetrics {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also decorate this class with IS.Unstable or Evolving. Whichever you choose, also place the same annotation on the public accessor methods on all the IA.Public classes. Let's give ourselves a change to explore this feature and make changes faster than once every 5 years.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use the existing ServerSideMetric class? It has loose typing for metrics via a map and protobuf logic is already there. The only advantage for having QueryMetrics is that the metrics as fields will be more efficient than a map, but it can be improved by defining predefined metric "schemas" for ServerSideMetric. E.g., you can have "QueryMetrics" as one of the schemas and it would predefine a fixed size counters map with "blockBytesScanned" as the only metric.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ServerSideMetric currently has a fixed schema of counters, but this can be changed such that a different schema can be specified.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We at Salesforce have a similar requirement to track scan metrics at a higher granularity of region level. Currently scan metrics for all the regions scanned are combined together as they are available and so we lose the granularity. Thus, we have a proposal to maintain the per region metrics along with capturing region name and RS where the region lies. A new method will be exposed to retrieve the metrics with granularity, but the existing method will behave the same as it combines all the per region metrics into one structure. We are reusing the existing ScanMetrics class due to following reasons:

  • Our use case involves doing scans and it already has support for this class, so this is an incremental change on top of it.
  • We are not changing the type of metrics, but simply preserving the granularity. So, we are capturing list of ScanMetric objects, one per each region.
  • This allows leveraging the existing integration of scan metrics on server and client side, such as protobuf serialization.

Copy link
Contributor Author

@hgromer hgromer Mar 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use the existing ServerSideMetric class? It has loose typing for metrics via a map and protobuf logic is already there.

I'm hesitant to do this. What's the value in obfuscating the actual metrics themselves by using a generic map when we can efficiently and more clearly represent whichever fields are being served to the user? In my opinion, this is also less error-prone, as the fields are clearly stated, and it's harder to remove or modify something unintentionally.

Thus, we have a proposal to maintain the per region metrics along with capturing region name and RS where the region lies

I talked a little bit about why I opted for creating a new class, rather than re-using an existing one here.

I worry about coupling the new metrics and Scans with this shared metrics class which could make it hard to iterate in the future.

It makes sense to re-use the ScanMetrics for more granular scan metrics, but I think it makes sense to create a new metric type for metrics that will back both Get(s) and Scans. Additionally, the granularity is so course, that we'll be adding bloat to the metric by requiring fields, such as countOfRegions which wouldn't apply to this new type of metric I'm proposing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll still need to introduce this new pattern to enable metrics at the granularity that we want (per-result). So I don't think re-using a base class, and implementing an inheritance hierarchy avoids this. I'm not sure I see the benefit of avoiding a new protobuf definition, or avoiding a new converter. Both are pretty trivial to create, and allow us to keep these concepts (result metrics, vs aggregated scan metrics) separate. Perhaps there's some value in a base metrics class that simply stores counters, it requires a bit of refactoring and touches existing code.

For the sake of discussion, I took a stab at what it would look if we did create this inheritance hierarchy. We can look at this commit to compare both implementations. I'm not we get much value from it, but am happy to continue discussing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Agreed with @hgromer. I think these are two separate things, and there's not a lot of value in shoehorning inheritance. Wishy washy inheritance feels like something we'll wish we hadn't done two or three iterations down the road

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll still need to introduce this new pattern to enable metrics at the granularity that we want (per-result).

I am not sure if I am missing the point, but of course, how else will you get instrumentation other than actually using the class?

So I don't think re-using a base class, and implementing an inheritance hierarchy avoids this.

The suggestion was about reusing the existing pattern by making it more generic, instead of introducing a new class. If we keep adding new standalone classes for every new scenario, it can quickly get confusing. Also, the existing counter map based approach is easier to introspect and process. Just my 2¢.

For the sake of discussion, I took a stab at what it would look if we did create this inheritance hierarchy. We can look at this commit to compare both implementations

Looks good, but I would rename ServerSideMetricsCounter as just MetricsCounter (or MetricsBase) as there is nothing server specific in it. Also, it is not the right place to have those public static constants as it is independent of any specific metric.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel that the original implementation is still far cleaner. We need to add a QueryMetric class regardless; the only thing we save on is an additional protobuf, but I don't think that's a huge benefit. I think generic counters make sense when the schema or shape of the data is abstract, or unknown (such as Configuration), but I think it's neater to leverage explicit typings whenever possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've reverted back

private final long blockBytesScanned;

public QueryMetrics(long blockBytesScanned) {
this.blockBytesScanned = blockBytesScanned;
}

@InterfaceStability.Evolving
public long getBlockBytesScanned() {
return blockBytesScanned;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ public CompletableFuture<Boolean> thenPut(Put put) {
() -> RawAsyncTableImpl.this.<Boolean> newCaller(row, put.getPriority(), rpcTimeoutNs)
.action((controller, loc, stub) -> RawAsyncTableImpl.mutate(controller, loc, stub, put,
(rn, p) -> RequestConverter.buildMutateRequest(rn, row, family, qualifier, op, value,
null, timeRange, p, HConstants.NO_NONCE, HConstants.NO_NONCE),
null, timeRange, p, HConstants.NO_NONCE, HConstants.NO_NONCE, false),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These methods correspond with the deprecated CheckAndMutateBuilder, so I didn't think it was worth making any client-side modifications to get this feature working with the deprecated builder.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A carrot to encourage folks to move on :)

(c, r) -> r.getProcessed()))
.call(),
supplier);
Expand All @@ -374,7 +374,7 @@ public CompletableFuture<Boolean> thenDelete(Delete delete) {
() -> RawAsyncTableImpl.this.<Boolean> newCaller(row, delete.getPriority(), rpcTimeoutNs)
.action((controller, loc, stub) -> RawAsyncTableImpl.mutate(controller, loc, stub, delete,
(rn, d) -> RequestConverter.buildMutateRequest(rn, row, family, qualifier, op, value,
null, timeRange, d, HConstants.NO_NONCE, HConstants.NO_NONCE),
null, timeRange, d, HConstants.NO_NONCE, HConstants.NO_NONCE, false),
(c, r) -> r.getProcessed()))
.call(),
supplier);
Expand All @@ -392,7 +392,7 @@ public CompletableFuture<Boolean> thenMutate(RowMutations mutations) {
.action((controller, loc, stub) -> RawAsyncTableImpl.this.mutateRow(controller, loc, stub,
mutations,
(rn, rm) -> RequestConverter.buildMultiRequest(rn, row, family, qualifier, op, value,
null, timeRange, rm, HConstants.NO_NONCE, HConstants.NO_NONCE),
null, timeRange, rm, HConstants.NO_NONCE, HConstants.NO_NONCE, false),
CheckAndMutateResult::isSuccess))
.call(), supplier);
}
Expand Down Expand Up @@ -433,7 +433,7 @@ public CompletableFuture<Boolean> thenPut(Put put) {
() -> RawAsyncTableImpl.this.<Boolean> newCaller(row, put.getPriority(), rpcTimeoutNs)
.action((controller, loc, stub) -> RawAsyncTableImpl.mutate(controller, loc, stub, put,
(rn, p) -> RequestConverter.buildMutateRequest(rn, row, null, null, null, null, filter,
timeRange, p, HConstants.NO_NONCE, HConstants.NO_NONCE),
timeRange, p, HConstants.NO_NONCE, HConstants.NO_NONCE, false),
(c, r) -> r.getProcessed()))
.call(),
supplier);
Expand All @@ -448,7 +448,7 @@ public CompletableFuture<Boolean> thenDelete(Delete delete) {
() -> RawAsyncTableImpl.this.<Boolean> newCaller(row, delete.getPriority(), rpcTimeoutNs)
.action((controller, loc, stub) -> RawAsyncTableImpl.mutate(controller, loc, stub, delete,
(rn, d) -> RequestConverter.buildMutateRequest(rn, row, null, null, null, null, filter,
timeRange, d, HConstants.NO_NONCE, HConstants.NO_NONCE),
timeRange, d, HConstants.NO_NONCE, HConstants.NO_NONCE, false),
(c, r) -> r.getProcessed()))
.call(),
supplier);
Expand All @@ -465,7 +465,7 @@ public CompletableFuture<Boolean> thenMutate(RowMutations mutations) {
.action((controller, loc, stub) -> RawAsyncTableImpl.this.mutateRow(controller, loc, stub,
mutations,
(rn, rm) -> RequestConverter.buildMultiRequest(rn, row, null, null, null, null, filter,
timeRange, rm, HConstants.NO_NONCE, HConstants.NO_NONCE),
timeRange, rm, HConstants.NO_NONCE, HConstants.NO_NONCE, false),
CheckAndMutateResult::isSuccess))
.call(), supplier);
}
Expand Down Expand Up @@ -500,7 +500,8 @@ public CompletableFuture<CheckAndMutateResult> checkAndMutate(CheckAndMutate che
(rn, m) -> RequestConverter.buildMutateRequest(rn, checkAndMutate.getRow(),
checkAndMutate.getFamily(), checkAndMutate.getQualifier(),
checkAndMutate.getCompareOp(), checkAndMutate.getValue(),
checkAndMutate.getFilter(), checkAndMutate.getTimeRange(), m, nonceGroup, nonce),
checkAndMutate.getFilter(), checkAndMutate.getTimeRange(), m, nonceGroup, nonce,
checkAndMutate.isQueryMetricsEnabled()),
(c, r) -> ResponseConverter.getCheckAndMutateResult(r, c.cellScanner())))
.call();
} else if (checkAndMutate.getAction() instanceof RowMutations) {
Expand All @@ -516,7 +517,8 @@ CheckAndMutateResult> mutateRow(controller, loc, stub, rowMutations,
(rn, rm) -> RequestConverter.buildMultiRequest(rn, checkAndMutate.getRow(),
checkAndMutate.getFamily(), checkAndMutate.getQualifier(),
checkAndMutate.getCompareOp(), checkAndMutate.getValue(),
checkAndMutate.getFilter(), checkAndMutate.getTimeRange(), rm, nonceGroup, nonce),
checkAndMutate.getFilter(), checkAndMutate.getTimeRange(), rm, nonceGroup, nonce,
checkAndMutate.isQueryMetricsEnabled()),
resp -> resp))
.call();
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public class Result implements ExtendedCellScannable, ExtendedCellScanner {
*/
private int cellScannerIndex = INITIAL_CELLSCANNER_INDEX;
private RegionLoadStats stats;
private QueryMetrics metrics = null;

private final boolean readonly;

Expand Down Expand Up @@ -931,6 +932,11 @@ public void setStatistics(RegionLoadStats loadStats) {
this.stats = loadStats;
}

@InterfaceAudience.Private
public void setMetrics(QueryMetrics metrics) {
this.metrics = metrics;
}

/**
* Returns the associated statistics about the region from which this was returned. Can be
* <tt>null</tt> if stats are disabled.
Expand All @@ -939,6 +945,11 @@ public RegionLoadStats getStats() {
return stats;
}

/** Returns the query metrics, or {@code null} if we do not enable metrics. */
public QueryMetrics getMetrics() {
return metrics;
}

/**
* All methods modifying state of Result object must call this method to ensure that special
* purpose immutable Results can't be accidentally modified.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ public Scan(Scan scan) throws IOException {
setPriority(scan.getPriority());
readType = scan.getReadType();
super.setReplicaId(scan.getReplicaId());
super.setQueryMetricsEnabled(scan.isQueryMetricsEnabled());
}

/**
Expand Down Expand Up @@ -249,6 +250,7 @@ public Scan(Get get) {
this.mvccReadPoint = -1L;
setPriority(get.getPriority());
super.setReplicaId(get.getReplicaId());
super.setQueryMetricsEnabled(get.isQueryMetricsEnabled());
}

public boolean isGetScan() {
Expand Down Expand Up @@ -826,6 +828,7 @@ public Map<String, Object> toMap(int maxCols) {
map.put("colFamTimeRangeMap", colFamTimeRangeMapStr);
}
map.put("priority", getPriority());
map.put("queryMetricsEnabled", queryMetricsEnabled);
return map;
}

Expand Down
Loading