Skip to content

Commit

Permalink
[SPARK-49490][SQL] Add benchmarks for initCap
Browse files Browse the repository at this point in the history
  • Loading branch information
mrk-andreev committed Oct 29, 2024
1 parent b6c569f commit 31632c9
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 0 deletions.
56 changes: 56 additions & 0 deletions sql/core/benchmarks/InitCapBenchmark-jdk21-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
================================================================================================
[wc=10000000, wl=1, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
execICU 1049 1052 4 79.9 12.5 1.0X
execBinaryICU 542 543 1 154.9 6.5 1.9X
execBinary 124 127 2 677.8 1.5 8.5X
execLowercase 1054 1068 20 79.6 12.6 1.0X


================================================================================================
[wc=10000000, wl=1, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU 1098 1103 8 76.4 13.1 1.0X
execBinaryICU 544 547 3 154.3 6.5 2.0X
execBinary 125 126 1 673.6 1.5 8.8X
execLowercase 863 908 61 97.2 10.3 1.3X


================================================================================================
[wc=10000000, wl=16, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU 3236 3240 4 25.9 38.6 1.0X
execBinaryICU 4781 4846 92 17.5 57.0 0.7X
execBinary 1158 1159 1 72.4 13.8 2.8X
execLowercase 3235 3241 8 25.9 38.6 1.0X


================================================================================================
[wc=10000000, wl=16, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------------
execICU 3316 3321 8 25.3 39.5 1.0X
execBinaryICU 4743 4813 98 17.7 56.5 0.7X
execBinary 1160 1162 3 72.3 13.8 2.9X
execLowercase 3263 3270 10 25.7 38.9 1.0X


56 changes: 56 additions & 0 deletions sql/core/benchmarks/InitCapBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
================================================================================================
[wc=10000000, wl=1, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11 on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
execICU 1076 1109 46 78.0 12.8 1.0X
execBinaryICU 553 554 1 151.6 6.6 1.9X
execBinary 142 143 2 592.5 1.7 7.6X
execLowercase 1006 1066 84 83.4 12.0 1.1X


================================================================================================
[wc=10000000, wl=1, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11 on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU 833 834 2 100.7 9.9 1.0X
execBinaryICU 566 574 6 148.3 6.7 1.5X
execBinary 141 142 1 596.2 1.7 5.9X
execLowercase 1088 1093 7 77.1 13.0 0.8X


================================================================================================
[wc=10000000, wl=16, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11 on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU 3154 3155 1 26.6 37.6 1.0X
execBinaryICU 4608 4612 6 18.2 54.9 0.7X
execBinary 1016 1016 0 82.6 12.1 3.1X
execLowercase 3109 3115 8 27.0 37.1 1.0X


================================================================================================
[wc=10000000, wl=16, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11 on Linux 6.8.0-1017-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------------
execICU 3093 3103 14 27.1 36.9 1.0X
execBinaryICU 4593 4656 89 18.3 54.8 0.7X
execBinary 1065 1067 2 78.8 12.7 2.9X
execLowercase 3089 3098 12 27.2 36.8 1.0X


Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark

import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.catalyst.util.CollationSupport.InitCap
import org.apache.spark.unsafe.types.UTF8String

/**
* A benchmark that compares the performance of different ways to evaluate SQL initcap expressions.
*
* Specifically, this class compares the execICU, execBinaryICU, execBinary, execLowercase
* approaches. This class compares for string of different lengths with different words count.
*
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class>
* --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
* 2. build/sbt "sql/Test/runMain <this class>"
* 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
* Results will be written to "benchmarks/InitCapBenchmark-results.txt".
* }}}
*/
object InitCapBenchmark extends BenchmarkBase {
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
def generateString(wordsCount: Int, wordLen: Int, firstLetterUpper: Boolean): UTF8String = {
val sb = new StringBuilder(wordsCount * wordLen + wordLen)
for (_ <- 0 until wordsCount) {
for (pos <- 0 until wordLen) {
if (pos == 0 && firstLetterUpper) {
sb.append("X")
} else {
sb.append("x")
}
}
sb.append(" ")
}
UTF8String.fromString(sb.toString())
}

def addCases(benchmark: Benchmark, text: UTF8String): Unit = {
for (collationName <- List("he_ISR", "UNICODE", "UNICODE_CI")) {
val collationId = CollationFactory.collationNameToId(collationName)
assert(CollationFactory.fetchCollation(collationId).collator != null)
val caseName = s"execICU[collationName=${collationName}]"
benchmark.addCase(caseName)(_ => InitCap.execICU(text, collationId))
}
benchmark.addCase(s"execBinaryICU")(_ => InitCap.execBinaryICU(text))
benchmark.addCase(s"execBinary")(_ => InitCap.execBinary(text))
benchmark.addCase(s"execLowercase")(_ => InitCap.execLowercase(text))
}

val N = 20 << 22

val wordCounts = 10_000_000
val wordLengths = List(1, 16)
val firstLetterUpper = List(true, false)

for (wordLength <- wordLengths) {
for (isFirstLetterUpper <- firstLetterUpper) {
val text: UTF8String = generateString(wordCounts, wordLength, isFirstLetterUpper)
val textDesc: String = s"[wc=${wordCounts}, wl=${wordLength}, " +
s"capitalized=${isFirstLetterUpper}]"

runBenchmark(textDesc) {
val benchmark = new Benchmark(
s"InitCap evaluation ${textDesc}",
valuesPerIteration = N,
output = output
)
addCases(benchmark, text)
benchmark.run()
}
}
}
}
}

0 comments on commit 31632c9

Please sign in to comment.