From 7d9a7512971b3ec9dc0dee68835999b123c8650a Mon Sep 17 00:00:00 2001 From: minghong Date: Sun, 22 Dec 2024 11:20:01 +0800 Subject: [PATCH] [feat](nereids) adjust min/max for partition key #41729 branch-2.1 (#45713) ### What problem does this PR solve? pick #41729 --- .../doris/nereids/stats/StatsCalculator.java | 174 ++++++++++++++++++ 1 file changed, 174 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index d4b4022fbe7b96..bfe5ec4be48691 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -18,9 +18,15 @@ package org.apache.doris.nereids.stats; import org.apache.doris.analysis.IntLiteral; +import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.ListPartitionItem; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionItem; +import org.apache.doris.catalog.PartitionKey; +import org.apache.doris.catalog.PartitionType; +import org.apache.doris.catalog.RangePartitionItem; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; @@ -136,11 +142,13 @@ import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; import org.apache.doris.statistics.TableStatsMeta; +import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Range; import org.apache.commons.collections.CollectionUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -899,6 +907,155 @@ private double getTableRowCount(CatalogRelation relation) { } } + /** + * Determine whether it is a partition key inside the function. + */ + private ColumnStatistic updateMinMaxForPartitionKey(OlapTable olapTable, + List selectedPartitionNames, + SlotReference slot, ColumnStatistic cache) { + if (olapTable.getPartitionType() == PartitionType.LIST) { + cache = updateMinMaxForListPartitionKey(olapTable, selectedPartitionNames, slot, cache); + } else if (olapTable.getPartitionType() == PartitionType.RANGE) { + cache = updateMinMaxForTheFirstRangePartitionKey(olapTable, selectedPartitionNames, slot, cache); + } + return cache; + } + + private double convertLegacyLiteralToDouble(LiteralExpr literal) throws org.apache.doris.common.AnalysisException { + return StatisticsUtil.convertToDouble(literal.getType(), literal.getStringValue()); + } + + private ColumnStatistic updateMinMaxForListPartitionKey(OlapTable olapTable, + List selectedPartitionNames, + SlotReference slot, ColumnStatistic cache) { + int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get()); + if (partitionColumnIdx != -1) { + try { + LiteralExpr minExpr = null; + LiteralExpr maxExpr = null; + double minValue = 0; + double maxValue = 0; + for (String selectedPartitionName : selectedPartitionNames) { + PartitionItem item = olapTable.getPartitionItemOrAnalysisException( + selectedPartitionName); + if (item instanceof ListPartitionItem) { + ListPartitionItem lp = (ListPartitionItem) item; + for (PartitionKey key : lp.getItems()) { + if (minExpr == null) { + minExpr = key.getKeys().get(partitionColumnIdx); + minValue = convertLegacyLiteralToDouble(minExpr); + maxExpr = key.getKeys().get(partitionColumnIdx); + maxValue = convertLegacyLiteralToDouble(maxExpr); + } else { + double current = convertLegacyLiteralToDouble(key.getKeys().get(partitionColumnIdx)); + if (current > maxValue) { + maxValue = current; + maxExpr = key.getKeys().get(partitionColumnIdx); + } else if (current < minValue) { + minValue = current; + minExpr = key.getKeys().get(partitionColumnIdx); + } + } + } + } + } + if (minExpr != null) { + cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr); + } + } catch (org.apache.doris.common.AnalysisException e) { + LOG.debug(e.getMessage()); + } + } + return cache; + } + + private ColumnStatistic updateMinMaxForTheFirstRangePartitionKey(OlapTable olapTable, + List selectedPartitionNames, + SlotReference slot, ColumnStatistic cache) { + int partitionColumnIdx = olapTable.getPartitionColumns().indexOf(slot.getColumn().get()); + // for multi partition keys, only the first partition key need to adjust min/max + if (partitionColumnIdx == 0) { + // update partition column min/max by partition info + try { + LiteralExpr minExpr = null; + LiteralExpr maxExpr = null; + double minValue = 0; + double maxValue = 0; + for (String selectedPartitionName : selectedPartitionNames) { + PartitionItem item = olapTable.getPartitionItemOrAnalysisException( + selectedPartitionName); + if (item instanceof RangePartitionItem) { + RangePartitionItem ri = (RangePartitionItem) item; + Range range = ri.getItems(); + PartitionKey upper = range.upperEndpoint(); + PartitionKey lower = range.lowerEndpoint(); + if (maxExpr == null) { + maxExpr = upper.getKeys().get(partitionColumnIdx); + maxValue = convertLegacyLiteralToDouble(maxExpr); + minExpr = lower.getKeys().get(partitionColumnIdx); + minValue = convertLegacyLiteralToDouble(minExpr); + } else { + double currentValue = convertLegacyLiteralToDouble(upper.getKeys() + .get(partitionColumnIdx)); + if (currentValue > maxValue) { + maxValue = currentValue; + maxExpr = upper.getKeys().get(partitionColumnIdx); + } + currentValue = convertLegacyLiteralToDouble(lower.getKeys().get(partitionColumnIdx)); + if (currentValue < minValue) { + minValue = currentValue; + minExpr = lower.getKeys().get(partitionColumnIdx); + } + } + } + } + if (minExpr != null) { + cache = updateMinMax(cache, minValue, minExpr, maxValue, maxExpr); + } + } catch (org.apache.doris.common.AnalysisException e) { + LOG.debug(e.getMessage()); + } + } + return cache; + } + + private ColumnStatistic updateMinMax(ColumnStatistic cache, double minValue, LiteralExpr minExpr, + double maxValue, LiteralExpr maxExpr) { + boolean shouldUpdateCache = false; + if (!cache.isUnKnown) { + // merge the min/max with cache. + // example: min/max range in cache is [10-20] + // range from partition def is [15-30] + // the final range is [15-20] + if (cache.minValue > minValue) { + minValue = cache.minValue; + minExpr = cache.minExpr; + } else { + shouldUpdateCache = true; + } + if (cache.maxValue < maxValue) { + maxValue = cache.maxValue; + maxExpr = cache.maxExpr; + } else { + shouldUpdateCache = true; + } + // if min/max is invalid, do not update cache + if (minValue > maxValue) { + shouldUpdateCache = false; + } + } + + if (shouldUpdateCache) { + cache = new ColumnStatisticBuilder(cache) + .setMinExpr(minExpr) + .setMinValue(minValue) + .setMaxExpr(maxExpr) + .setMaxValue(maxValue) + .build(); + } + return cache; + } + // TODO: 1. Subtract the pruned partition // 2. Consider the influence of runtime filter // 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now. @@ -944,6 +1101,18 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { LOG.debug("{} is partially analyzed, clear min/max values in column stats", catalogRelation.getTable().getName()); } + List selectedPartitionNames = null; + if (catalogRelation instanceof OlapScan) { + OlapScan olapScan = (OlapScan) catalogRelation; + if (olapScan.getSelectedPartitionIds().size() < olapScan.getTable().getPartitionNum()) { + // partition pruned + // try to use selected partition stats, if failed, fall back to table stats + selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size()); + for (Long id : olapScan.getSelectedPartitionIds()) { + selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName()); + } + } + } for (SlotReference slotReference : slotSet) { String colName = slotReference.getColumn().isPresent() ? slotReference.getColumn().get().getName() @@ -959,6 +1128,11 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { cache = ColumnStatistic.UNKNOWN; } else { cache = getColumnStatistic(table, colName, idxId); + if (table instanceof OlapTable && slotReference.getColumn().isPresent() + && selectedPartitionNames != null) { + cache = updateMinMaxForPartitionKey((OlapTable) table, + selectedPartitionNames, slotReference, cache); + } } ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); colStatsBuilder.normalizeAvgSizeByte(slotReference);