Skip to content

Commit 4b5cd7d

Browse files
committed
[enhancement](Nereids) boost characterLiteralTypeCoercion (apache#42941)
Boost characterLiteralTypeCoercion by check the string format and skip throw Throwable when can not parse string to integer/float/date/datetime. This logical usually appear when search `If` function signature, because the If has lots of signature, we need cast argument to the signature type to matches the best signature, for example: ``` select if(column_1, 'xxx', 'yyy') ``` we will check whether the 'xxx' and 'yyy' can be parsed to int/datetime and so on. In some scenarios, this optimize can provide 16% QPS before: optimize: <img width="1901" alt="image" src="https://github.com/user-attachments/assets/b03d2d29-5d3b-45a6-ba54-2bcc7c2dccca"> <img width="1484" alt="image" src="https://github.com/user-attachments/assets/82cbb2b0-dfe8-4a05-bc2f-ebb35dc23209"> after optimize: <img width="1724" alt="image" src="https://github.com/user-attachments/assets/d60a867d-596d-4ac1-9377-6460ed6d3dd1"> <img width="1722" alt="image" src="https://github.com/user-attachments/assets/c9c9f72c-3a5f-4c24-95d9-9ca99ecab0a6"> (cherry picked from commit 105a0ad)
1 parent db0986e commit 4b5cd7d

28 files changed

+1604
-319
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/SearchSignature.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,14 @@ private boolean doMatchTypes(FunctionSignature sig, List<Expression> arguments,
207207
int arity = arguments.size();
208208
for (int i = 0; i < arity; i++) {
209209
DataType sigArgType = sig.getArgType(i);
210-
DataType realType = arguments.get(i).getDataType();
210+
Expression argument = arguments.get(i);
211+
DataType realType = argument.getDataType();
211212
// we need to try to do string literal coercion when search signature.
212213
// for example, FUNC_A has two signature FUNC_A(datetime) and FUNC_A(string)
213214
// if SQL block is `FUNC_A('2020-02-02 00:00:00')`, we should return signature FUNC_A(datetime).
214-
if (arguments.get(i).isLiteral() && realType.isStringLikeType()) {
215-
realType = TypeCoercionUtils.characterLiteralTypeCoercion(((Literal) arguments.get(i)).getStringValue(),
216-
sigArgType).orElse(arguments.get(i)).getDataType();
215+
if (!argument.isNullLiteral() && argument.isLiteral() && realType.isStringLikeType()) {
216+
realType = TypeCoercionUtils.characterLiteralTypeCoercion(((Literal) argument).getStringValue(),
217+
sigArgType).orElse(argument).getDataType();
217218
}
218219
if (!typePredicate.apply(sigArgType, realType)) {
219220
return false;

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java

+52-21
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,17 @@
4444
public class DateLiteral extends Literal {
4545
public static final String JAVA_DATE_FORMAT = "yyyy-MM-dd";
4646

47+
public static final Set<Character> punctuations = ImmutableSet.of('!', '@', '#', '$', '%', '^', '&', '*', '(', ')',
48+
'-', '+', '=', '_', '{', '}', '[', ']', '|', '\\', ':', ';', '"', '\'', '<', '>', ',', '.', '?', '/', '~',
49+
'`');
50+
4751
// for cast datetime type to date type.
4852
private static final LocalDateTime START_OF_A_DAY = LocalDateTime.of(0, 1, 1, 0, 0, 0);
4953
private static final LocalDateTime END_OF_A_DAY = LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999000);
5054
private static final DateLiteral MIN_DATE = new DateLiteral(0, 1, 1);
5155
private static final DateLiteral MAX_DATE = new DateLiteral(9999, 12, 31);
5256
private static final int[] DAYS_IN_MONTH = new int[] {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
5357

54-
private static final Set<Character> punctuations = ImmutableSet.of('!', '@', '#', '$', '%', '^', '&', '*', '(', ')',
55-
'-', '+', '=', '_', '{', '}', '[', ']', '|', '\\', ':', ';', '"', '\'', '<', '>', ',', '.', '?', '/', '~',
56-
'`');
57-
5858
protected long year;
5959
protected long month;
6060
protected long day;
@@ -145,7 +145,7 @@ private static boolean isPunctuation(char c) {
145145
return punctuations.contains(c);
146146
}
147147

148-
static String normalize(String s) {
148+
static Result<String, AnalysisException> normalize(String s) {
149149
// merge consecutive space
150150
if (s.contains(" ")) {
151151
s = s.replaceAll(" +", " ");
@@ -208,7 +208,10 @@ static String normalize(String s) {
208208
sb.append('0').append(c);
209209
}
210210
} else {
211-
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
211+
final String currentString = s;
212+
return Result.err(
213+
() -> new AnalysisException("date/datetime literal [" + currentString + "] is invalid")
214+
);
212215
}
213216
i = j;
214217
partNumber += 1;
@@ -228,7 +231,10 @@ static String normalize(String s) {
228231
} else if (partNumber > 3 && isPunctuation(c)) {
229232
sb.append(':');
230233
} else {
231-
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
234+
final String currentString = s;
235+
return Result.err(
236+
() -> new AnalysisException("date/datetime literal [" + currentString + "] is invalid")
237+
);
232238
}
233239
} else {
234240
break;
@@ -259,15 +265,33 @@ static String normalize(String s) {
259265
// trim use to remove any blank before zone id or zone offset
260266
sb.append(s.substring(i).trim());
261267

262-
return sb.toString();
268+
return Result.ok(sb.toString());
263269
}
264270

265-
protected static TemporalAccessor parse(String s) {
271+
/** parseDateLiteral */
272+
public static Result<DateLiteral, AnalysisException> parseDateLiteral(String s) {
273+
Result<TemporalAccessor, AnalysisException> parseResult = parseDateTime(s);
274+
if (parseResult.isError()) {
275+
return parseResult.cast();
276+
}
277+
TemporalAccessor dateTime = parseResult.get();
278+
int year = DateUtils.getOrDefault(dateTime, ChronoField.YEAR);
279+
int month = DateUtils.getOrDefault(dateTime, ChronoField.MONTH_OF_YEAR);
280+
int day = DateUtils.getOrDefault(dateTime, ChronoField.DAY_OF_MONTH);
281+
282+
if (checkDatetime(dateTime) || checkRange(year, month, day) || checkDate(year, month, day)) {
283+
return Result.err(() -> new AnalysisException("date/datetime literal [" + s + "] is out of range"));
284+
}
285+
return Result.ok(new DateLiteral(year, month, day));
286+
}
287+
288+
/** parseDateTime */
289+
public static Result<TemporalAccessor, AnalysisException> parseDateTime(String s) {
266290
// fast parse '2022-01-01'
267291
if (s.length() == 10 && s.charAt(4) == '-' && s.charAt(7) == '-') {
268292
TemporalAccessor date = fastParseDate(s);
269293
if (date != null) {
270-
return date;
294+
return Result.ok(date);
271295
}
272296
}
273297

@@ -289,15 +313,20 @@ protected static TemporalAccessor parse(String s) {
289313
if (!containsPunctuation) {
290314
s = normalizeBasic(s);
291315
// mysql reject "20200219 010101" "200219 010101", can't use ' ' spilt basic date time.
316+
292317
if (!s.contains("T")) {
293318
dateTime = DateTimeFormatterUtils.BASIC_FORMATTER_WITHOUT_T.parse(s);
294319
} else {
295320
dateTime = DateTimeFormatterUtils.BASIC_DATE_TIME_FORMATTER.parse(s);
296321
}
297-
return dateTime;
322+
return Result.ok(dateTime);
298323
}
299324

300-
s = normalize(s);
325+
Result<String, AnalysisException> normalizeResult = normalize(s);
326+
if (normalizeResult.isError()) {
327+
return normalizeResult.cast();
328+
}
329+
s = normalizeResult.get();
301330

302331
if (!s.contains(" ")) {
303332
dateTime = DateTimeFormatterUtils.ZONE_DATE_FORMATTER.parse(s);
@@ -307,32 +336,34 @@ protected static TemporalAccessor parse(String s) {
307336

308337
// if Year is not present, throw exception
309338
if (!dateTime.isSupported(ChronoField.YEAR)) {
310-
throw new AnalysisException("date/datetime literal [" + originalString + "] is invalid");
339+
return Result.err(
340+
() -> new AnalysisException("date/datetime literal [" + originalString + "] is invalid")
341+
);
311342
}
312343

313-
return dateTime;
344+
return Result.ok(dateTime);
314345
} catch (Exception ex) {
315-
throw new AnalysisException("date/datetime literal [" + originalString + "] is invalid");
346+
return Result.err(() -> new AnalysisException("date/datetime literal [" + originalString + "] is invalid"));
316347
}
317348
}
318349

319350
protected void init(String s) throws AnalysisException {
320-
TemporalAccessor dateTime = parse(s);
351+
TemporalAccessor dateTime = parseDateTime(s).get();
321352
year = DateUtils.getOrDefault(dateTime, ChronoField.YEAR);
322353
month = DateUtils.getOrDefault(dateTime, ChronoField.MONTH_OF_YEAR);
323354
day = DateUtils.getOrDefault(dateTime, ChronoField.DAY_OF_MONTH);
324355

325-
if (checkDatetime(dateTime) || checkRange() || checkDate()) {
356+
if (checkDatetime(dateTime) || checkRange(year, month, day) || checkDate(year, month, day)) {
326357
throw new AnalysisException("date/datetime literal [" + s + "] is out of range");
327358
}
328359
}
329360

330-
protected boolean checkRange() {
361+
protected static boolean checkRange(long year, long month, long day) {
331362
return year > MAX_DATE.getYear() || month > MAX_DATE.getMonth() || day > MAX_DATE.getDay();
332363
}
333364

334-
protected boolean checkDate() {
335-
if (month != 0 && day > DAYS_IN_MONTH[((int) month)]) {
365+
protected static boolean checkDate(long year, long month, long day) {
366+
if (month != 0 && day > DAYS_IN_MONTH[(int) month]) {
336367
if (month == 2 && day == 29 && (Year.isLeap(year) && year > 0)) {
337368
return false;
338369
}
@@ -345,7 +376,7 @@ protected static boolean isDateOutOfRange(LocalDateTime dateTime) {
345376
return dateTime == null || dateTime.isBefore(START_OF_A_DAY) || dateTime.isAfter(END_OF_A_DAY);
346377
}
347378

348-
private boolean checkDatetime(TemporalAccessor dateTime) {
379+
private static boolean checkDatetime(TemporalAccessor dateTime) {
349380
return DateUtils.getOrDefault(dateTime, ChronoField.HOUR_OF_DAY) != 0
350381
|| DateUtils.getOrDefault(dateTime, ChronoField.MINUTE_OF_HOUR) != 0
351382
|| DateUtils.getOrDefault(dateTime, ChronoField.SECOND_OF_MINUTE) != 0

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeLiteral.java

+69-6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.doris.nereids.trees.expressions.Expression;
2424
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
2525
import org.apache.doris.nereids.types.DateTimeType;
26+
import org.apache.doris.nereids.types.DateTimeV2Type;
2627
import org.apache.doris.nereids.types.coercion.DateLikeType;
2728
import org.apache.doris.nereids.util.DateUtils;
2829

@@ -108,7 +109,7 @@ public static int determineScale(String s) {
108109
if (s.indexOf("-") == s.lastIndexOf("-") && s.indexOf(":") == s.lastIndexOf(":")) {
109110
return 0;
110111
}
111-
s = normalize(s);
112+
s = normalize(s).get();
112113
if (s.length() <= 19 || s.charAt(19) != '.') {
113114
return 0;
114115
}
@@ -130,10 +131,73 @@ public static int determineScale(String s) {
130131
return scale;
131132
}
132133

133-
@Override
134+
/** parseDateTimeLiteral */
135+
public static Result<DateTimeLiteral, AnalysisException> parseDateTimeLiteral(String s, boolean isV2) {
136+
Result<TemporalAccessor, AnalysisException> parseResult = parseDateTime(s);
137+
if (parseResult.isError()) {
138+
return parseResult.cast();
139+
}
140+
141+
TemporalAccessor temporal = parseResult.get();
142+
long year = DateUtils.getOrDefault(temporal, ChronoField.YEAR);
143+
long month = DateUtils.getOrDefault(temporal, ChronoField.MONTH_OF_YEAR);
144+
long day = DateUtils.getOrDefault(temporal, ChronoField.DAY_OF_MONTH);
145+
long hour = DateUtils.getOrDefault(temporal, ChronoField.HOUR_OF_DAY);
146+
long minute = DateUtils.getOrDefault(temporal, ChronoField.MINUTE_OF_HOUR);
147+
long second = DateUtils.getOrDefault(temporal, ChronoField.SECOND_OF_MINUTE);
148+
149+
ZoneId zoneId = temporal.query(TemporalQueries.zone());
150+
if (zoneId != null) {
151+
// get correct DST of that time.
152+
Instant thatTime = ZonedDateTime
153+
.of((int) year, (int) month, (int) day, (int) hour, (int) minute, (int) second, 0, zoneId)
154+
.toInstant();
155+
156+
int offset = DateUtils.getTimeZone().getRules().getOffset(thatTime).getTotalSeconds()
157+
- zoneId.getRules().getOffset(thatTime).getTotalSeconds();
158+
if (offset != 0) {
159+
DateTimeLiteral tempLiteral = new DateTimeLiteral(year, month, day, hour, minute, second);
160+
DateTimeLiteral result = (DateTimeLiteral) tempLiteral.plusSeconds(offset);
161+
second = result.second;
162+
minute = result.minute;
163+
hour = result.hour;
164+
day = result.day;
165+
month = result.month;
166+
year = result.year;
167+
}
168+
}
169+
170+
long microSecond = DateUtils.getOrDefault(temporal, ChronoField.NANO_OF_SECOND) / 100L;
171+
// Microseconds have 7 digits.
172+
long sevenDigit = microSecond % 10;
173+
microSecond = microSecond / 10;
174+
if (sevenDigit >= 5 && isV2) {
175+
DateTimeV2Literal tempLiteral = new DateTimeV2Literal(year, month, day, hour, minute, second, microSecond);
176+
DateTimeV2Literal result = (DateTimeV2Literal) tempLiteral.plusMicroSeconds(1);
177+
second = result.second;
178+
minute = result.minute;
179+
hour = result.hour;
180+
day = result.day;
181+
month = result.month;
182+
year = result.year;
183+
microSecond = result.microSecond;
184+
}
185+
186+
if (checkRange(year, month, day) || checkDate(year, month, day)) {
187+
return Result.err(() -> new AnalysisException("datetime literal [" + s + "] is out of range"));
188+
}
189+
190+
if (isV2) {
191+
DateTimeV2Type type = DateTimeV2Type.forTypeFromString(s);
192+
return Result.ok(new DateTimeV2Literal(type, year, month, day, hour, minute, second, microSecond));
193+
} else {
194+
return Result.ok(new DateTimeLiteral(DateTimeType.INSTANCE, year, month, day, hour, minute, second));
195+
}
196+
}
197+
134198
protected void init(String s) throws AnalysisException {
135199
// TODO: check and do fast parse like fastParseDate
136-
TemporalAccessor temporal = parse(s);
200+
TemporalAccessor temporal = parseDateTime(s).get();
137201

138202
year = DateUtils.getOrDefault(temporal, ChronoField.YEAR);
139203
month = DateUtils.getOrDefault(temporal, ChronoField.MONTH_OF_YEAR);
@@ -177,14 +241,13 @@ protected void init(String s) throws AnalysisException {
177241
this.microSecond = result.microSecond;
178242
}
179243

180-
if (checkRange() || checkDate()) {
244+
if (checkRange(year, month, day) || checkDate(year, month, day)) {
181245
throw new AnalysisException("datetime literal [" + s + "] is out of range");
182246
}
183247
}
184248

185-
@Override
186249
protected boolean checkRange() {
187-
return super.checkRange() || hour > MAX_DATETIME.getHour() || minute > MAX_DATETIME.getMinute()
250+
return checkRange(year, month, day) || hour > MAX_DATETIME.getHour() || minute > MAX_DATETIME.getMinute()
188251
|| second > MAX_DATETIME.getSecond() || microSecond > MAX_MICROSECOND;
189252
}
190253

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ private void roundMicroSecond(int scale) {
7777
this.second = localDateTime.getSecond();
7878
this.microSecond -= 1000000;
7979
}
80-
if (checkRange() || checkDate()) {
80+
if (checkRange() || checkDate(year, month, day)) {
8181
// may fallback to legacy planner. make sure the behaviour of rounding is same.
8282
throw new AnalysisException("datetime literal [" + toString() + "] is out of range");
8383
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java

+9-4
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,16 @@ public DecimalV3Literal(BigDecimal value) {
4444
* Constructor for DecimalV3Literal
4545
*/
4646
public DecimalV3Literal(DecimalV3Type dataType, BigDecimal value) {
47-
super(DecimalV3Type.createDecimalV3TypeLooseCheck(dataType.getPrecision(), dataType.getScale()));
47+
super(DecimalV3Type.createDecimalV3TypeLooseCheck(
48+
dataType.getPrecision() == -1 ? value.precision() : dataType.getPrecision(),
49+
dataType.getScale() == -1 ? value.scale() : dataType.getScale())
50+
);
51+
52+
int precision = dataType.getPrecision() == -1 ? value.precision() : dataType.getPrecision();
53+
int scale = dataType.getScale() == -1 ? value.scale() : dataType.getScale();
4854
Objects.requireNonNull(value, "value not be null");
49-
checkPrecisionAndScale(dataType.getPrecision(), dataType.getScale(), value);
50-
BigDecimal adjustedValue = value.scale() < 0 ? value
51-
: value.setScale(dataType.getScale(), RoundingMode.HALF_UP);
55+
checkPrecisionAndScale(precision, scale, value);
56+
BigDecimal adjustedValue = value.scale() < 0 ? value : value.setScale(scale, RoundingMode.HALF_UP);
5257
this.value = Objects.requireNonNull(adjustedValue);
5358
}
5459

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.literal;
19+
20+
import java.util.Optional;
21+
import java.util.function.Supplier;
22+
23+
/** Result */
24+
public class Result<R, T extends RuntimeException> {
25+
private final Optional<R> result;
26+
private final Optional<Supplier<T>> exceptionSupplier;
27+
28+
private Result(Optional<R> result, Optional<Supplier<T>> exceptionSupplier) {
29+
this.result = result;
30+
this.exceptionSupplier = exceptionSupplier;
31+
}
32+
33+
public static <R, T extends RuntimeException> Result<R, T> ok(R result) {
34+
return new Result<>(Optional.of(result), Optional.empty());
35+
}
36+
37+
public static <R, T extends RuntimeException> Result<R, T> err(Supplier<T> exceptionSupplier) {
38+
return new Result<>(Optional.empty(), Optional.of(exceptionSupplier));
39+
}
40+
41+
public boolean isOk() {
42+
return !exceptionSupplier.isPresent();
43+
}
44+
45+
public boolean isError() {
46+
return exceptionSupplier.isPresent();
47+
}
48+
49+
public <R, T extends RuntimeException> Result<R, T> cast() {
50+
return (Result<R, T>) this;
51+
}
52+
53+
public R get() {
54+
if (exceptionSupplier.isPresent()) {
55+
throw exceptionSupplier.get().get();
56+
}
57+
return result.get();
58+
}
59+
60+
public R orElse(R other) {
61+
if (exceptionSupplier.isPresent()) {
62+
return other;
63+
}
64+
return result.get();
65+
}
66+
}

0 commit comments

Comments
 (0)