Skip to content

Commit f41129e

Browse files
authored
[GLUTEN-6067][CH][MINOR][UT] Followup 6623, fix backends-clickhouse ut issse in CI (#6891)
* fix fallback in spark 3.5 * Remove hive support in GlutenClickhouseFunctionSuite * Move Hive related suite into hive package * fix ut for spark 35 * fix celeborn ut for spark 35 * fix gluten ut for spark 35 * remove duplicated dependency * fix dependency for spark 3.5 ut
1 parent 6dcf83f commit f41129e

15 files changed

+354
-346
lines changed

backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala

+17-4
Original file line numberDiff line numberDiff line change
@@ -188,20 +188,33 @@ class GlutenClickHouseMergeTreeWriteOnS3Suite
188188
var metadataGlutenExist: Boolean = false
189189
var metadataBinExist: Boolean = false
190190
var dataBinExist: Boolean = false
191+
var hasCommits = false
191192
client
192193
.listObjects(args)
193194
.forEach(
194195
obj => {
195196
objectCount += 1
196-
if (obj.get().objectName().contains("metadata.gluten")) {
197+
val objectName = obj.get().objectName()
198+
if (objectName.contains("metadata.gluten")) {
197199
metadataGlutenExist = true
198-
} else if (obj.get().objectName().contains("meta.bin")) {
200+
} else if (objectName.contains("meta.bin")) {
199201
metadataBinExist = true
200-
} else if (obj.get().objectName().contains("data.bin")) {
202+
} else if (objectName.contains("data.bin")) {
201203
dataBinExist = true
204+
} else if (objectName.contains("_commits")) {
205+
// Spark 35 has _commits directory
206+
// table/_delta_log/_commits/
207+
hasCommits = true
202208
}
203209
})
204-
assertResult(5)(objectCount)
210+
211+
if (isSparkVersionGE("3.5")) {
212+
assertResult(6)(objectCount)
213+
assert(hasCommits)
214+
} else {
215+
assertResult(5)(objectCount)
216+
}
217+
205218
assert(metadataGlutenExist)
206219
assert(metadataBinExist)
207220
assert(dataBinExist)

backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseWholeStageTransformerSuite.scala

+7-5
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,13 @@ class GlutenClickHouseWholeStageTransformerSuite extends WholeStageTransformerSu
178178
super.beforeAll()
179179
}
180180

181-
protected val rootPath: String = this.getClass.getResource("/").getPath
182-
protected val basePath: String = rootPath + "tests-working-home"
183-
protected val warehouse: String = basePath + "/spark-warehouse"
184-
protected val metaStorePathAbsolute: String = basePath + "/meta"
185-
protected val hiveMetaStoreDB: String = metaStorePathAbsolute + "/metastore_db"
181+
final protected val rootPath: String = this.getClass.getResource("/").getPath
182+
final protected val basePath: String = rootPath + "tests-working-home"
183+
final protected val warehouse: String = basePath + "/spark-warehouse"
184+
final protected val metaStorePathAbsolute: String = basePath + "/meta"
185+
186+
protected val hiveMetaStoreDB: String =
187+
s"$metaStorePathAbsolute/${getClass.getSimpleName}/metastore_db"
186188

187189
final override protected val resourcePath: String = "" // ch not need this
188190
override protected val fileFormat: String = "parquet"

backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
*/
1717
package org.apache.gluten.execution
1818

19-
import org.apache.gluten.execution.AllDataTypesWithComplexType.genTestData
19+
import org.apache.gluten.test.AllDataTypesWithComplexType
20+
import org.apache.gluten.test.AllDataTypesWithComplexType.genTestData
2021

2122
import org.apache.spark.SparkConf
2223
class GlutenClickhouseCountDistinctSuite extends GlutenClickHouseWholeStageTransformerSuite {

backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala

+48-104
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,6 @@ import org.apache.gluten.GlutenConfig
2020
import org.apache.gluten.utils.UTSystemParameters
2121

2222
import org.apache.spark.SparkConf
23-
import org.apache.spark.sql.SparkSession
24-
import org.apache.spark.sql.delta.DeltaLog
25-
26-
import org.apache.commons.io.FileUtils
27-
28-
import java.io.File
2923

3024
class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
3125
override protected val needCopyParquetToTablePath = true
@@ -39,9 +33,6 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
3933
createNotNullTPCHTablesInParquet(tablesPath)
4034
}
4135

42-
private var _hiveSpark: SparkSession = _
43-
override protected def spark: SparkSession = _hiveSpark
44-
4536
override protected def sparkConf: SparkConf = {
4637
new SparkConf()
4738
.set("spark.plugins", "org.apache.gluten.GlutenPlugin")
@@ -69,70 +60,21 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
6960
.setMaster("local[1]")
7061
}
7162

72-
override protected def initializeSession(): Unit = {
73-
if (_hiveSpark == null) {
74-
val hiveMetaStoreDB = metaStorePathAbsolute + "/metastore_db"
75-
_hiveSpark = SparkSession
76-
.builder()
77-
.config(sparkConf)
78-
.enableHiveSupport()
79-
.config(
80-
"javax.jdo.option.ConnectionURL",
81-
s"jdbc:derby:;databaseName=$hiveMetaStoreDB;create=true")
82-
.getOrCreate()
83-
}
84-
}
85-
86-
override def beforeAll(): Unit = {
87-
// prepare working paths
88-
val basePathDir = new File(basePath)
89-
if (basePathDir.exists()) {
90-
FileUtils.forceDelete(basePathDir)
91-
}
92-
FileUtils.forceMkdir(basePathDir)
93-
FileUtils.forceMkdir(new File(warehouse))
94-
FileUtils.forceMkdir(new File(metaStorePathAbsolute))
95-
FileUtils.copyDirectory(new File(rootPath + resourcePath), new File(tablesPath))
96-
super.beforeAll()
97-
}
98-
99-
override protected def afterAll(): Unit = {
100-
DeltaLog.clearCache()
101-
102-
try {
103-
super.afterAll()
104-
} finally {
105-
try {
106-
if (_hiveSpark != null) {
107-
try {
108-
_hiveSpark.sessionState.catalog.reset()
109-
} finally {
110-
_hiveSpark.stop()
111-
_hiveSpark = null
112-
}
113-
}
114-
} finally {
115-
SparkSession.clearActiveSession()
116-
SparkSession.clearDefaultSession()
117-
}
118-
}
119-
}
120-
12163
test("test uuid - write and read") {
12264
withSQLConf(
12365
("spark.gluten.sql.native.writer.enabled", "true"),
12466
(GlutenConfig.GLUTEN_ENABLED.key, "true")) {
67+
withTable("uuid_test") {
68+
spark.sql("create table if not exists uuid_test (id string) using parquet")
12569

126-
spark.sql("drop table if exists uuid_test")
127-
spark.sql("create table if not exists uuid_test (id string) stored as parquet")
128-
129-
val df = spark.sql("select regexp_replace(uuid(), '-', '') as id from range(1)")
130-
df.cache()
131-
df.write.insertInto("uuid_test")
70+
val df = spark.sql("select regexp_replace(uuid(), '-', '') as id from range(1)")
71+
df.cache()
72+
df.write.insertInto("uuid_test")
13273

133-
val df2 = spark.table("uuid_test")
134-
val diffCount = df.exceptAll(df2).count()
135-
assert(diffCount == 0)
74+
val df2 = spark.table("uuid_test")
75+
val diffCount = df.exceptAll(df2).count()
76+
assert(diffCount == 0)
77+
}
13678
}
13779
}
13880

@@ -181,49 +123,51 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {
181123
}
182124

183125
test("GLUTEN-5981 null value from get_json_object") {
184-
spark.sql("create table json_t1 (a string) using parquet")
185-
spark.sql("insert into json_t1 values ('{\"a\":null}')")
186-
runQueryAndCompare(
187-
"""
188-
|SELECT get_json_object(a, '$.a') is null from json_t1
189-
|""".stripMargin
190-
)(df => checkFallbackOperators(df, 0))
191-
spark.sql("drop table json_t1")
126+
withTable("json_t1") {
127+
spark.sql("create table json_t1 (a string) using parquet")
128+
spark.sql("insert into json_t1 values ('{\"a\":null}')")
129+
runQueryAndCompare(
130+
"""
131+
|SELECT get_json_object(a, '$.a') is null from json_t1
132+
|""".stripMargin
133+
)(df => checkFallbackOperators(df, 0))
134+
}
192135
}
193136

194137
test("Fix arrayDistinct(Array(Nullable(Decimal))) core dump") {
195-
val create_sql =
196-
"""
197-
|create table if not exists test(
198-
| dec array<decimal(10, 2)>
199-
|) using parquet
200-
|""".stripMargin
201-
val fill_sql =
202-
"""
203-
|insert into test values(array(1, 2, null)), (array(null, 2,3, 5))
204-
|""".stripMargin
205-
val query_sql =
206-
"""
207-
|select array_distinct(dec) from test;
208-
|""".stripMargin
209-
spark.sql(create_sql)
210-
spark.sql(fill_sql)
211-
compareResultsAgainstVanillaSpark(query_sql, true, { _ => })
212-
spark.sql("drop table test")
138+
withTable("json_t1") {
139+
val create_sql =
140+
"""
141+
|create table if not exists test(
142+
| dec array<decimal(10, 2)>
143+
|) using parquet
144+
|""".stripMargin
145+
val fill_sql =
146+
"""
147+
|insert into test values(array(1, 2, null)), (array(null, 2,3, 5))
148+
|""".stripMargin
149+
val query_sql =
150+
"""
151+
|select array_distinct(dec) from test;
152+
|""".stripMargin
153+
spark.sql(create_sql)
154+
spark.sql(fill_sql)
155+
compareResultsAgainstVanillaSpark(query_sql, true, { _ => })
156+
}
213157
}
214158

215159
test("intersect all") {
216-
spark.sql("create table t1 (a int, b string) using parquet")
217-
spark.sql("insert into t1 values (1, '1'),(2, '2'),(3, '3'),(4, '4'),(5, '5'),(6, '6')")
218-
spark.sql("create table t2 (a int, b string) using parquet")
219-
spark.sql("insert into t2 values (4, '4'),(5, '5'),(6, '6'),(7, '7'),(8, '8'),(9, '9')")
220-
runQueryAndCompare(
221-
"""
222-
|SELECT a,b FROM t1 INTERSECT ALL SELECT a,b FROM t2
223-
|""".stripMargin
224-
)(df => checkFallbackOperators(df, 0))
225-
spark.sql("drop table t1")
226-
spark.sql("drop table t2")
160+
withTable("t1", "t2") {
161+
spark.sql("create table t1 (a int, b string) using parquet")
162+
spark.sql("insert into t1 values (1, '1'),(2, '2'),(3, '3'),(4, '4'),(5, '5'),(6, '6')")
163+
spark.sql("create table t2 (a int, b string) using parquet")
164+
spark.sql("insert into t2 values (4, '4'),(5, '5'),(6, '6'),(7, '7'),(8, '8'),(9, '9')")
165+
runQueryAndCompare(
166+
"""
167+
|SELECT a,b FROM t1 INTERSECT ALL SELECT a,b FROM t2
168+
|""".stripMargin
169+
)(df => checkFallbackOperators(df, 0))
170+
}
227171
}
228172

229173
test("array decimal32 CH column to row") {

0 commit comments

Comments
 (0)