Skip to content

Commit

Permalink
[SPARK-49490][SQL] Add benchmarks for initCap
Browse files Browse the repository at this point in the history
  • Loading branch information
mrk-andreev committed Oct 19, 2024
1 parent 14ed86e commit b0e0cf0
Show file tree
Hide file tree
Showing 2 changed files with 261 additions and 0 deletions.
168 changes: 168 additions & 0 deletions sql/core/benchmarks/InitCapBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
================================================================================================
[wc=1, wl=1, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1, wl=1, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
---------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 371177345.1 0.0 1.0X
execBinaryICU 0 0 0 1613193846.2 0.0 4.3X
execBinary 0 0 0 3495253333.3 0.0 9.4X
execLowercase 0 0 0 430185025.6 0.0 1.2X


================================================================================================
[wc=1, wl=1, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1, wl=1, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 281496912.8 0.0 1.0X
execBinaryICU 0 0 0 1747626666.7 0.0 6.2X
execBinary 0 0 0 3647220869.6 0.0 13.0X
execLowercase 0 0 0 325139845.0 0.0 1.2X


================================================================================================
[wc=1, wl=16, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1, wl=16, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 228572425.1 0.0 1.0X
execBinaryICU 0 0 0 218453333.3 0.0 1.0X
execBinary 0 0 0 1181494084.5 0.0 5.2X
execLowercase 0 0 0 264624858.0 0.0 1.2X


================================================================================================
[wc=1, wl=16, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1, wl=16, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 193732286.4 0.0 1.0X
execBinaryICU 0 0 0 215092512.8 0.0 1.1X
execBinary 0 0 0 1075462564.1 0.0 5.6X
execLowercase 0 0 0 219023707.6 0.0 1.1X


================================================================================================
[wc=10, wl=1, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=10, wl=1, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 95869805.7 0.0 1.0X
execBinaryICU 0 0 0 176231260.5 0.0 1.8X
execBinary 0 0 0 830555247.5 0.0 8.7X
execLowercase 0 0 0 91379172.1 0.0 1.0X


================================================================================================
[wc=10, wl=1, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=10, wl=1, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 66841498.0 0.0 1.0X
execBinaryICU 0 0 0 181965466.4 0.0 2.7X
execBinary 0 0 0 902000860.2 0.0 13.5X
execLowercase 0 0 0 68200065.0 0.0 1.0X


================================================================================================
[wc=10, wl=16, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=10, wl=16, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-----------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 36695573.1 0.0 1.0X
execBinaryICU 0 0 0 22635207.8 0.0 0.6X
execBinary 0 0 0 144382237.5 0.0 3.9X
execLowercase 0 0 0 37349100.6 0.0 1.0X


================================================================================================
[wc=10, wl=16, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=10, wl=16, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 36567602.4 0.0 1.0X
execBinaryICU 0 0 0 20440078.0 0.0 0.6X
execBinary 0 0 0 146653986.0 0.0 4.0X
execLowercase 0 0 0 37299279.7 0.0 1.0X


================================================================================================
[wc=1000, wl=1, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1000, wl=1, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 954172.6 0.0 1.0X
execBinaryICU 0 0 0 1857612.8 0.0 1.9X
execBinary 0 0 0 8818973.9 0.0 9.2X
execLowercase 0 0 0 974603.6 0.0 1.0X


================================================================================================
[wc=1000, wl=1, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1000, wl=1, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 972332.9 0.0 1.0X
execBinaryICU 0 0 0 1891754.7 0.0 1.9X
execBinary 0 0 0 10791982.5 0.0 11.1X
execLowercase 0 0 0 915986.9 0.0 0.9X


================================================================================================
[wc=1000, wl=16, capitalized=true]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1000, wl=16, capitalized=true]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 388311.1 0.0 1.0X
execBinaryICU 0 0 0 228772.5 0.0 0.6X
execBinary 0 0 0 1481745.9 0.0 3.8X
execLowercase 0 0 0 387677.7 0.0 1.0X


================================================================================================
[wc=1000, wl=16, capitalized=false]
================================================================================================

OpenJDK 64-Bit Server VM 17.0.11+10-LTS on Linux 5.15.0-122-generic
Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
InitCap evaluation [wc=1000, wl=16, capitalized=false]: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
--------------------------------------------------------------------------------------------------------------------------------------
execICU 0 0 0 350758.8 0.0 1.0X
execBinaryICU 0 0 0 229855.1 0.0 0.7X
execBinary 0 0 0 1505682.3 0.0 4.3X
execLowercase 0 0 0 389843.2 0.0 1.1X


Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark

import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.catalyst.util.CollationSupport.InitCap
import org.apache.spark.unsafe.types.UTF8String

/**
* A benchmark that compares the performance of different ways to evaluate SQL initcap expressions.
*
* Specifically, this class compares the execICU, execBinaryICU, execBinary, execLowercase
* approaches. This class compares for string of different lengths with different words count.
*
* To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class>
* --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
* 2. build/sbt "sql/Test/runMain <this class>"
* 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
* Results will be written to "benchmarks/InitCapBenchmark-results.txt".
* }}}
*/
object InitCapBenchmark extends BenchmarkBase {
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
def generateString(wordsCount: Int, wordLen: Int, firstLetterUpper: Boolean): UTF8String = {
val sb = new StringBuilder(wordsCount * wordLen + wordLen)
for (_ <- 0 until wordsCount) {
for (pos <- 0 until wordLen) {
if (pos == 0 && firstLetterUpper) {
sb.append("X")
} else {
sb.append("x")
}
}
sb.append(" ")
}
UTF8String.fromString(sb.toString())
}

def addCases(benchmark: Benchmark,
text: UTF8String): Unit = {
// collation that contains collator
val collationId = CollationFactory.collationNameToId("he_ISR")
benchmark.addCase(s"execICU")(_ => InitCap.execICU(text, collationId))
benchmark.addCase(s"execBinaryICU")(_ => InitCap.execBinaryICU(text))
benchmark.addCase(s"execBinary")(_ => InitCap.execBinary(text))
benchmark.addCase(s"execLowercase")(_ => InitCap.execLowercase(text))
}

val N = 20 << 22

val wordCounts = List(1, 10, 1_000)
val wordLengths = List(1, 16)
val firstLetterUpper = List(true, false)

for (wordCounts <- wordCounts) {
for (wordLength <- wordLengths) {
for (isFirstLetterUpper <- firstLetterUpper) {
val text: UTF8String = generateString(wordCounts, wordLength, isFirstLetterUpper)
val textDesc: String = s"[wc=${wordCounts}, wl=${wordLength}, " +
s"capitalized=${isFirstLetterUpper}]"

runBenchmark(textDesc) {
val benchmark = new Benchmark(
s"InitCap evaluation ${textDesc}",
valuesPerIteration = N,
output = output
)
addCases(benchmark, text)
benchmark.run()
}
}
}
}
}
}

0 comments on commit b0e0cf0

Please sign in to comment.