Skip to content

Commit

Permalink
[SPARK-49490][SQL] Add benchmarks for initCap
Browse files Browse the repository at this point in the history
  • Loading branch information
mrk-andreev committed Nov 3, 2024
1 parent b6c569f commit 6b1d79e
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 0 deletions.
64 changes: 64 additions & 0 deletions sql/core/benchmarks/InitCapBenchmark-jdk21-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
================================================================================================
[wc=10000000, wl=1, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 1404 1405 2 59.7 16.7 1.0X
execICU[collationName=UNICODE] 1427 1429 2 58.8 17.0 1.0X
execICU[collationName=UNICODE_CI] 1416 1459 61 59.2 16.9 1.0X
execBinaryICU 766 767 1 109.5 9.1 1.8X
execBinary 156 157 1 539.1 1.9 9.0X
execLowercase 1401 1449 68 59.9 16.7 1.0X


================================================================================================
[wc=10000000, wl=1, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 1471 1476 8 57.0 17.5 1.0X
execICU[collationName=UNICODE] 1456 1503 66 57.6 17.4 1.0X
execICU[collationName=UNICODE_CI] 1491 1535 62 56.3 17.8 1.0X
execBinaryICU 747 754 7 112.3 8.9 2.0X
execBinary 153 156 2 549.0 1.8 9.6X
execLowercase 1494 1514 28 56.1 17.8 1.0X


================================================================================================
[wc=10000000, wl=16, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 4314 4317 4 19.4 51.4 1.0X
execICU[collationName=UNICODE] 4248 4277 40 19.7 50.6 1.0X
execICU[collationName=UNICODE_CI] 4230 4271 58 19.8 50.4 1.0X
execBinaryICU 6412 6471 83 13.1 76.4 0.7X
execBinary 1432 1441 14 58.6 17.1 3.0X
execLowercase 4293 4354 86 19.5 51.2 1.0X


================================================================================================
[wc=10000000, wl=16, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 4362 4382 28 19.2 52.0 1.0X
execICU[collationName=UNICODE] 4321 4322 2 19.4 51.5 1.0X
execICU[collationName=UNICODE_CI] 4256 4286 43 19.7 50.7 1.0X
execBinaryICU 6373 6415 58 13.2 76.0 0.7X
execBinary 1430 1439 13 58.7 17.0 3.1X
execLowercase 4262 4263 1 19.7 50.8 1.0X


64 changes: 64 additions & 0 deletions sql/core/benchmarks/InitCapBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
================================================================================================
[wc=10000000, wl=1, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 1385 1389 6 60.6 16.5 1.0X
execICU[collationName=UNICODE] 1384 1395 16 60.6 16.5 1.0X
execICU[collationName=UNICODE_CI] 1425 1430 7 58.9 17.0 1.0X
execBinaryICU 743 754 11 112.9 8.9 1.9X
execBinary 184 187 2 456.2 2.2 7.5X
execLowercase 1386 1386 1 60.5 16.5 1.0X


================================================================================================
[wc=10000000, wl=1, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=1, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 1365 1368 5 61.5 16.3 1.0X
execICU[collationName=UNICODE] 1384 1406 31 60.6 16.5 1.0X
execICU[collationName=UNICODE_CI] 1385 1396 16 60.6 16.5 1.0X
execBinaryICU 745 759 13 112.6 8.9 1.8X
execBinary 162 167 3 518.0 1.9 8.4X
execLowercase 1366 1378 17 61.4 16.3 1.0X


================================================================================================
[wc=10000000, wl=16, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 3592 3597 8 23.4 42.8 1.0X
execICU[collationName=UNICODE] 3604 3605 0 23.3 43.0 1.0X
execICU[collationName=UNICODE_CI] 3577 3588 16 23.5 42.6 1.0X
execBinaryICU 6378 6408 43 13.2 76.0 0.6X
execBinary 1328 1338 13 63.2 15.8 2.7X
execLowercase 3608 3610 2 23.2 43.0 1.0X


================================================================================================
[wc=10000000, wl=16, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1016-aws
Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
InitCap evaluation [wc=10000000, wl=16, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------------
execICU[collationName=he_ISR] 3616 3620 6 23.2 43.1 1.0X
execICU[collationName=UNICODE] 3638 3649 15 23.1 43.4 1.0X
execICU[collationName=UNICODE_CI] 3594 3613 27 23.3 42.8 1.0X
execBinaryICU 6491 6527 51 12.9 77.4 0.6X
execBinary 1246 1248 3 67.3 14.9 2.9X
execLowercase 3680 3692 17 22.8 43.9 1.0X


Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark

import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.catalyst.util.CollationSupport.InitCap
import org.apache.spark.unsafe.types.UTF8String

/**
* A benchmark that compares the performance of different ways to evaluate SQL initcap expressions.
*
* Specifically, this class compares the execICU, execBinaryICU, execBinary, execLowercase
* approaches. This class compares for string of different lengths with different words count.
*
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class>
* --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
* 2. build/sbt "sql/Test/runMain <this class>"
* 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
* Results will be written to "benchmarks/InitCapBenchmark-results.txt".
* }}}
*/
object InitCapBenchmark extends BenchmarkBase {
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
def generateString(wordsCount: Int, wordLen: Int, firstLetterUpper: Boolean): UTF8String = {
val sb = new StringBuilder(wordsCount * wordLen + wordLen)
for (_ <- 0 until wordsCount) {
for (pos <- 0 until wordLen) {
if (pos == 0 && firstLetterUpper) {
sb.append("X")
} else {
sb.append("x")
}
}
sb.append(" ")
}
UTF8String.fromString(sb.toString())
}

def addCases(benchmark: Benchmark, text: UTF8String): Unit = {
for (collationName <- List("he_ISR", "UNICODE", "UNICODE_CI")) {
val collationId = CollationFactory.collationNameToId(collationName)
assert(CollationFactory.fetchCollation(collationId).collator != null)
val caseName = s"execICU[collationName=${collationName}]"
benchmark.addCase(caseName)(_ => InitCap.execICU(text, collationId))
}
benchmark.addCase(s"execBinaryICU")(_ => InitCap.execBinaryICU(text))
benchmark.addCase(s"execBinary")(_ => InitCap.execBinary(text))
benchmark.addCase(s"execLowercase")(_ => InitCap.execLowercase(text))
}

val N = 20 << 22

val wordCounts = 10_000_000
val wordLengths = List(1, 16)
val firstLetterUpper = List(true, false)

for (wordLength <- wordLengths) {
for (isFirstLetterUpper <- firstLetterUpper) {
val text: UTF8String = generateString(wordCounts, wordLength, isFirstLetterUpper)
val textDesc: String = s"[wc=${wordCounts}, wl=${wordLength}, " +
s"capitalized=${isFirstLetterUpper}]"

runBenchmark(textDesc) {
val benchmark = new Benchmark(
s"InitCap evaluation ${textDesc}",
valuesPerIteration = N,
output = output
)
addCases(benchmark, text)
benchmark.run()
}
}
}
}
}

0 comments on commit 6b1d79e

Please sign in to comment.