Skip to content

Commit 3a42e8f

Browse files
authored
[CELEBORN] Add config to control celeborn fallback for CI (#6230)
1 parent 32808dd commit 3a42e8f

File tree

3 files changed

+20
-3
lines changed

3 files changed

+20
-3
lines changed

gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java

+9-3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.spark.shuffle.gluten.celeborn;
1818

19+
import org.apache.gluten.GlutenConfig;
1920
import org.apache.gluten.backendsapi.BackendsApiManager;
2021
import org.apache.gluten.exception.GlutenException;
2122

@@ -194,9 +195,14 @@ public <K, V, C> ShuffleHandle registerShuffle(
194195
if (dependency instanceof ColumnarShuffleDependency) {
195196
if (fallbackPolicyRunner.applyAllFallbackPolicy(
196197
lifecycleManager, dependency.partitioner().numPartitions())) {
197-
logger.warn("Fallback to ColumnarShuffleManager!");
198-
columnarShuffleIds.add(shuffleId);
199-
return columnarShuffleManager().registerShuffle(shuffleId, dependency);
198+
if (GlutenConfig.getConf().enableCelebornFallback()) {
199+
logger.warn("Fallback to ColumnarShuffleManager!");
200+
columnarShuffleIds.add(shuffleId);
201+
return columnarShuffleManager().registerShuffle(shuffleId, dependency);
202+
} else {
203+
throw new GlutenException(
204+
"The Celeborn service(Master: " + celebornConf.masterHost() + ") is unavailable");
205+
}
200206
} else {
201207
return registerCelebornShuffleHandle(shuffleId, dependency);
202208
}

shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala

+10
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
447447
conf.getConf(DYNAMIC_OFFHEAP_SIZING_ENABLED)
448448

449449
def enableHiveFileFormatWriter: Boolean = conf.getConf(NATIVE_HIVEFILEFORMAT_WRITER_ENABLED)
450+
451+
def enableCelebornFallback: Boolean = conf.getConf(CELEBORN_FALLBACK_ENABLED)
450452
}
451453

452454
object GlutenConfig {
@@ -2049,4 +2051,12 @@ object GlutenConfig {
20492051
.doubleConf
20502052
.checkValue(v => v >= 0 && v <= 1, "offheap sizing memory fraction must between [0, 1]")
20512053
.createWithDefault(0.6)
2054+
2055+
val CELEBORN_FALLBACK_ENABLED =
2056+
buildStaticConf("spark.gluten.sql.columnar.shuffle.celeborn.fallback.enabled")
2057+
.internal()
2058+
.doc("If enabled, fall back to ColumnarShuffleManager when celeborn service is unavailable." +
2059+
"Otherwise, throw an exception.")
2060+
.booleanConf
2061+
.createWithDefault(true)
20522062
}

tools/gluten-it/common/src/main/scala/org/apache/gluten/integration/Constants.scala

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ object Constants {
4444

4545
val VELOX_WITH_CELEBORN_CONF: SparkConf = new SparkConf(false)
4646
.set("spark.gluten.sql.columnar.forceShuffledHashJoin", "true")
47+
.set("spark.gluten.sql.columnar.shuffle.celeborn.fallback.enabled", "false")
4748
.set("spark.sql.parquet.enableVectorizedReader", "true")
4849
.set("spark.plugins", "org.apache.gluten.GlutenPlugin")
4950
.set(

0 commit comments

Comments
 (0)