Skip to content

Commit

Permalink
[feat](nereids) adjust min/max for partition key #41729 branch-2.1 (#…
Browse files Browse the repository at this point in the history
…45713)

### What problem does this PR solve?
pick #41729
  • Loading branch information
englefly authored Dec 22, 2024
1 parent 9fa94bb commit 7d9a751
Showing 1 changed file with 174 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,15 @@
package org.apache.doris.nereids.stats;

import org.apache.doris.analysis.IntLiteral;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.ListPartitionItem;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.PartitionKey;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.RangePartitionItem;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
Expand Down Expand Up @@ -136,11 +142,13 @@
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.statistics.util.StatisticsUtil;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -899,6 +907,155 @@ private double getTableRowCount(CatalogRelation relation) {
}
}

/**
* Determine whether it is a partition key inside the function.
*/
private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
if (olapTable.getPartitionType() == PartitionType.LIST) {
cache = updateMinMaxForListPartitionKey(olapTable, selectedPartitionNames, slot, cache);
} else if (olapTable.getPartitionType() == PartitionType.RANGE) {
cache = updateMinMaxForTheFirstRangePartitionKey(olapTable, selectedPartitionNames, slot, cache);
}
return cache;
}

private double convertLegacyLiteralToDouble(LiteralExpr literal) throws org.apache.doris.common.AnalysisException {
return StatisticsUtil.convertToDouble(literal.getType(), literal.getStringValue());
}

private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
if (partitionColumnIdx != -1) {
try {
LiteralExpr minExpr = null;
LiteralExpr maxExpr = null;
double minValue = 0;
double maxValue = 0;
for (String selectedPartitionName : selectedPartitionNames) {
PartitionItem item = olapTable.getPartitionItemOrAnalysisException(
selectedPartitionName);
if (item instanceof ListPartitionItem) {
ListPartitionItem lp = (ListPartitionItem) item;
for (PartitionKey key : lp.getItems()) {
if (minExpr == null) {
minExpr = key.getKeys().get(partitionColumnIdx);
minValue = convertLegacyLiteralToDouble(minExpr);
maxExpr = key.getKeys().get(partitionColumnIdx);
maxValue = convertLegacyLiteralToDouble(maxExpr);
} else {
double current = convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx));
if (current > maxValue) {
maxValue = current;
maxExpr = key.getKeys().get(partitionColumnIdx);
} else if (current < minValue) {
minValue = current;
minExpr = key.getKeys().get(partitionColumnIdx);
}
}
}
}
}
if (minExpr != null) {
cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr);
}
} catch (org.apache.doris.common.AnalysisException e) {
LOG.debug(e.getMessage());
}
}
return cache;
}

private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable olapTable,
List<String> selectedPartitionNames,
SlotReference slot, ColumnStatistic cache) {
int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get());
// for multi partition keys, only the first partition key need to adjust min/max
if (partitionColumnIdx == 0) {
// update partition column min/max by partition info
try {
LiteralExpr minExpr = null;
LiteralExpr maxExpr = null;
double minValue = 0;
double maxValue = 0;
for (String selectedPartitionName : selectedPartitionNames) {
PartitionItem item = olapTable.getPartitionItemOrAnalysisException(
selectedPartitionName);
if (item instanceof RangePartitionItem) {
RangePartitionItem ri = (RangePartitionItem) item;
Range<PartitionKey> range = ri.getItems();
PartitionKey upper = range.upperEndpoint();
PartitionKey lower = range.lowerEndpoint();
if (maxExpr == null) {
maxExpr = upper.getKeys().get(partitionColumnIdx);
maxValue = convertLegacyLiteralToDouble(maxExpr);
minExpr = lower.getKeys().get(partitionColumnIdx);
minValue = convertLegacyLiteralToDouble(minExpr);
} else {
double currentValue = convertLegacyLiteralToDouble(upper.getKeys()
.get(partitionColumnIdx));
if (currentValue > maxValue) {
maxValue = currentValue;
maxExpr = upper.getKeys().get(partitionColumnIdx);
}
currentValue = convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx));
if (currentValue < minValue) {
minValue = currentValue;
minExpr = lower.getKeys().get(partitionColumnIdx);
}
}
}
}
if (minExpr != null) {
cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr);
}
} catch (org.apache.doris.common.AnalysisException e) {
LOG.debug(e.getMessage());
}
}
return cache;
}

private ColumnStatistic updateMinMax(ColumnStatistic cache, double minValue, LiteralExpr minExpr,
double maxValue, LiteralExpr maxExpr) {
boolean shouldUpdateCache = false;
if (!cache.isUnKnown) {
// merge the min/max with cache.
// example: min/max range in cache is [10-20]
// range from partition def is [15-30]
// the final range is [15-20]
if (cache.minValue > minValue) {
minValue = cache.minValue;
minExpr = cache.minExpr;
} else {
shouldUpdateCache = true;
}
if (cache.maxValue < maxValue) {
maxValue = cache.maxValue;
maxExpr = cache.maxExpr;
} else {
shouldUpdateCache = true;
}
// if min/max is invalid, do not update cache
if (minValue > maxValue) {
shouldUpdateCache = false;
}
}

if (shouldUpdateCache) {
cache = new ColumnStatisticBuilder(cache)
.setMinExpr(minExpr)
.setMinValue(minValue)
.setMaxExpr(maxExpr)
.setMaxValue(maxValue)
.build();
}
return cache;
}

// TODO: 1. Subtract the pruned partition
// 2. Consider the influence of runtime filter
// 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now.
Expand Down Expand Up @@ -944,6 +1101,18 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
LOG.debug("{} is partially analyzed, clear min/max values in column stats",
catalogRelation.getTable().getName());
}
List<String> selectedPartitionNames = null;
if (catalogRelation instanceof OlapScan) {
OlapScan olapScan = (OlapScan) catalogRelation;
if (olapScan.getSelectedPartitionIds().size() < olapScan.getTable().getPartitionNum()) {
// partition pruned
// try to use selected partition stats, if failed, fall back to table stats
selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size());
for (Long id : olapScan.getSelectedPartitionIds()) {
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
}
}
}
for (SlotReference slotReference : slotSet) {
String colName = slotReference.getColumn().isPresent()
? slotReference.getColumn().get().getName()
Expand All @@ -959,6 +1128,11 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
cache = ColumnStatistic.UNKNOWN;
} else {
cache = getColumnStatistic(table, colName, idxId);
if (table instanceof OlapTable && slotReference.getColumn().isPresent()
&& selectedPartitionNames != null) {
cache = updateMinMaxForPartitionKey((OlapTable) table,
selectedPartitionNames, slotReference, cache);
}
}
ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache);
colStatsBuilder.normalizeAvgSizeByte(slotReference);
Expand Down

0 comments on commit 7d9a751

Please sign in to comment.