Skip to content

Commit

Permalink
https://github.com/apache/incubator-gluten/pull/8656
Browse files Browse the repository at this point in the history
  • Loading branch information
baibaichen committed Feb 3, 2025
1 parent 4d4c31e commit ea87565
Show file tree
Hide file tree
Showing 47 changed files with 33 additions and 188 deletions.
2 changes: 1 addition & 1 deletion backends-clickhouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@
</goals>
<configuration>
<systemProperties>
<clickhouse.lib.path>${clickhouse.lib.path}</clickhouse.lib.path>
<spark.gluten.sql.columnar.libpath>${spark.gluten.sql.columnar.libpath}</spark.gluten.sql.columnar.libpath>
<tpcds.data.path>${tpcds.data.path}</tpcds.data.path>
</systemProperties>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
package org.apache.gluten.execution

import org.apache.gluten.backendsapi.clickhouse.CHConf
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
Expand All @@ -43,7 +41,6 @@ class GlutenClickHouseJoinSuite extends GlutenClickHouseWholeStageTransformerSui
.set("spark.sql.adaptive.enabled", "false")
.set("spark.sql.files.minPartitionNum", "1")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,10 @@
*/
package org.apache.gluten.execution

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.{TestExceptionUtils, UTSystemParameters}

import org.apache.spark.SparkConf
import org.apache.gluten.utils.TestExceptionUtils

class GlutenClickHouseNativeExceptionSuite extends GlutenClickHouseWholeStageTransformerSuite {

override protected def sparkConf: SparkConf = {
super.sparkConf
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
}

test("native exception caught by jvm") {
try {
TestExceptionUtils.generateNativeException()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package org.apache.gluten.execution

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.exception.GlutenException
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
Expand Down Expand Up @@ -65,7 +64,6 @@ class GlutenClickHouseNativeLibSuite extends PlanTest {
.builder()
.master("local[1]")
.config(baseSparkConf)
.config(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.config(GlutenConfig.GLUTEN_EXECUTOR_LIB_PATH.key, "/path/not/exist/libch.so")
.getOrCreate()
spark.sql("select 1").show()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.gluten.execution

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
Expand Down Expand Up @@ -52,7 +51,6 @@ class GlutenClickHouseSyntheticDataSuite
.set("spark.databricks.delta.properties.defaults.checkpointInterval", "5")
.set("spark.databricks.delta.stalenessLimit", "3600000")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ abstract class GlutenClickHouseTPCDSAbstractSuite
.set("spark.databricks.delta.properties.defaults.checkpointInterval", "5")
.set("spark.databricks.delta.stalenessLimit", "3600000")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.gluten.execution

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
Expand Down Expand Up @@ -568,7 +567,6 @@ abstract class GlutenClickHouseTPCHAbstractSuite
.set("spark.databricks.delta.stalenessLimit", "3600000")
.set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.gluten.execution

import org.apache.gluten.backendsapi.clickhouse.RuntimeConfig
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf}
Expand Down Expand Up @@ -79,7 +78,6 @@ class GlutenClickHouseWholeStageTransformerSuite extends WholeStageTransformerSu
import org.apache.gluten.backendsapi.clickhouse.CHConf._

val conf = super.sparkConf
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.enable.native.validation", "false")
.set("spark.sql.warehouse.dir", warehouse)
.setCHConfig("user_defined_path", "/tmp/user_defined")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
*/
package org.apache.gluten.execution

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, Row, TestUtils}
import org.apache.spark.sql.catalyst.expressions.{Expression, GetJsonObject, Literal}
Expand Down Expand Up @@ -57,7 +54,6 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS
.set("spark.databricks.delta.properties.defaults.checkpointInterval", "5")
.set("spark.databricks.delta.stalenessLimit", "3600000")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package org.apache.gluten.execution.compatibility

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.{GlutenClickHouseTPCHAbstractSuite, ProjectExecTransformer}
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
Expand Down Expand Up @@ -51,7 +50,6 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
.set("spark.databricks.delta.properties.defaults.checkpointInterval", "5")
.set("spark.databricks.delta.stalenessLimit", "3600000")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@
*/
package org.apache.gluten.execution.hive

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.{FileSourceScanExecTransformer, GlutenClickHouseWholeStageTransformerSuite, ProjectExecTransformer, TransformSupport}
import org.apache.gluten.test.AllDataTypesWithComplexType
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SaveMode}
Expand Down Expand Up @@ -56,7 +54,6 @@ class GlutenClickHouseHiveTableSuite
.set("spark.sql.adaptive.enabled", "false")
.set("spark.sql.files.minPartitionNum", "1")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import org.apache.gluten.backendsapi.clickhouse.RuntimeConfig
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.GlutenClickHouseWholeStageTransformerSuite
import org.apache.gluten.test.AllDataTypesWithComplexType.genTestData
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.gluten.NativeWriteChecker
Expand Down Expand Up @@ -57,7 +56,6 @@ class GlutenClickHouseNativeWriteTableSuite
.set("spark.databricks.delta.properties.defaults.checkpointInterval", "5")
.set("spark.databricks.delta.stalenessLimit", "3600000")
.set(ClickHouseConfig.CLICKHOUSE_WORKER_ID, "1")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.set("spark.gluten.sql.columnar.iterator", "true")
.set("spark.gluten.sql.columnar.hashagg.enablefinal", "true")
.set("spark.gluten.sql.enable.native.validation", "false")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,6 @@ package org.apache.gluten.utils

object UTSystemParameters {

private val CLICKHOUSE_LIB_PATH_KEY = "clickhouse.lib.path"
private val CLICKHOUSE_LIB_PATH_DEFAULT_VALUE = "/usr/local/clickhouse/lib/libch.so"

def clickHouseLibPath: String = {
System.getProperty(
UTSystemParameters.CLICKHOUSE_LIB_PATH_KEY,
UTSystemParameters.CLICKHOUSE_LIB_PATH_DEFAULT_VALUE)
}

private val TEST_DATA_PATH_KEY = "gluten.test.data.path"
private val TEST_DATA_PATH_DEFAULT_VALUE = "/data"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.benchmarks

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.jni.JniLibLoader
import org.apache.gluten.utils.UTSystemParameters

import org.apache.spark.SparkConf
import org.apache.spark.sql.delta.DeltaLog
Expand All @@ -32,7 +31,6 @@ trait CHSqlBasedBenchmark extends SqlBasedBenchmark {
def getSparkConf: SparkConf = {
val conf = new SparkConf()
.setAppName(appName)
.setIfMissing(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
.setIfMissing("spark.master", s"local[$thrdNum]")
.set("spark.plugins", "org.apache.gluten.GlutenPlugin")
.set(
Expand All @@ -57,9 +55,7 @@ trait CHSqlBasedBenchmark extends SqlBasedBenchmark {

override def afterAll(): Unit = {
DeltaLog.clearCache()
val libPath =
spark.conf.get(GlutenConfig.GLUTEN_LIB_PATH.key, UTSystemParameters.clickHouseLibPath)
JniLibLoader.unloadFromPath(libPath)
JniLibLoader.unloadFromPath(spark.conf.get(GlutenConfig.GLUTEN_LIB_PATH.key))
// Wait for Ctrl+C, convenient for seeing Spark UI
// Thread.sleep(600000)
super.afterAll()
Expand Down
2 changes: 1 addition & 1 deletion docs/developers/clickhouse-backend-debug.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ parent: /developer-overview/
![gluten-debug-idea-config.png](../image/ClickHouse/gluten-debug-idea-config.png)

VM Options:
`-Dtpcds.data.path=/data/tpcds-data-sf1 -Dclickhouse.lib.path=/path/to/gluten/cpp-ch/build/utils/extern-local-engine/libch.so -Dspark.test.home=/path/to/spark33`
`-Dtpcds.data.path=/data/tpcds-data-sf1 -Dspark.gluten.sql.columnar.libpath=/path/to/gluten/cpp-ch/build/utils/extern-local-engine/libch.so -Dspark.test.home=/path/to/spark33`
> Download tpcds-data in https://gluten-nginx.kyligence.com/dataset/
> Download spark33 using `git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git /tmp/spark33`
Expand Down
2 changes: 1 addition & 1 deletion gluten-ut/common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
</goals>
<configuration>
<systemProperties>
<clickhouse.lib.path>${clickhouse.lib.path}</clickhouse.lib.path>
<spark.gluten.sql.columnar.libpath>${spark.gluten.sql.columnar.libpath}</spark.gluten.sql.columnar.libpath>
<tpcds.data.path>${tpcds.data.path}</tpcds.data.path>
</systemProperties>
</configuration>
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
*/
package org.apache.spark.sql

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.{BackendTestUtils, SystemParameters}
import org.apache.gluten.utils.BackendTestUtils

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
Expand Down Expand Up @@ -86,7 +85,6 @@ object DummyFilterColmnarHelper {
.config("spark.memory.offHeap.size", "1024MB")
.config("spark.plugins", "org.apache.gluten.GlutenPlugin")
.config("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager")
.config(GlutenConfig.GLUTEN_LIB_PATH.key, SystemParameters.getClickHouseLibPath)
.config("spark.io.compression.codec", "LZ4")
.config("spark.gluten.sql.enable.native.validation", "false")
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
*/
package org.apache.spark.sql

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.{BackendTestUtils, SystemParameters}
import org.apache.gluten.utils.BackendTestUtils

import org.apache.spark.SparkConf
import org.apache.spark.sql.execution.SparkPlan
Expand Down Expand Up @@ -105,7 +104,6 @@ object GlutenSQLTestsBaseTrait {
.set("spark.io.compression.codec", "LZ4")
.set("spark.gluten.sql.columnar.backend.ch.worker.id", "1")
.set("spark.gluten.sql.enable.native.validation", "false")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, SystemParameters.getClickHouseLibPath)
.set("spark.sql.files.openCostInBytes", "134217728")
.set("spark.unsafe.exceptionOnMemoryLeak", "true")
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import org.apache.gluten.backendsapi.BackendsApiManager
import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.execution.ProjectExecTransformer
import org.apache.gluten.test.TestStats
import org.apache.gluten.utils.{BackendTestUtils, SystemParameters}
import org.apache.gluten.utils.BackendTestUtils

import org.apache.spark.sql.GlutenQueryTestUtil.isNaNOrInf
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
Expand Down Expand Up @@ -113,7 +113,6 @@ trait GlutenTestsTrait extends GlutenTestsCommonTrait {
.config("spark.gluten.sql.columnar.backend.ch.worker.id", "1")
.config("spark.gluten.sql.enable.native.validation", "false")
.config("spark.sql.files.openCostInBytes", "134217728")
.config(GlutenConfig.GLUTEN_LIB_PATH.key, SystemParameters.getClickHouseLibPath)
.config("spark.unsafe.exceptionOnMemoryLeak", "true")
.config(GlutenConfig.UT_STATISTIC.key, "true")
.getOrCreate()
Expand Down
5 changes: 1 addition & 4 deletions gluten-ut/spark32/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<clickhouse.lib.path></clickhouse.lib.path>
</properties>
</profile>
</profiles>

Expand Down Expand Up @@ -107,7 +104,7 @@
</goals>
<configuration>
<systemProperties>
<clickhouse.lib.path>${clickhouse.lib.path}</clickhouse.lib.path>
<spark.gluten.sql.columnar.libpath>${spark.gluten.sql.columnar.libpath}</spark.gluten.sql.columnar.libpath>
<tpcds.data.path>${tpcds.data.path}</tpcds.data.path>
</systemProperties>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@
*/
package org.apache.spark.sql

import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.exception.GlutenException
import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters}
import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils}

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
Expand Down Expand Up @@ -195,7 +194,6 @@ class GlutenSQLQueryTestSuite
.set("spark.io.compression.codec", "LZ4")
.set("spark.gluten.sql.columnar.backend.ch.worker.id", "1")
.set("spark.gluten.sql.enable.native.validation", "false")
.set(GlutenConfig.GLUTEN_LIB_PATH.key, SystemParameters.getClickHouseLibPath)
.set("spark.sql.files.openCostInBytes", "134217728")
.set("spark.unsafe.exceptionOnMemoryLeak", "true")
} else {
Expand Down
Loading

0 comments on commit ea87565

Please sign in to comment.