Skip to content

Commit

Permalink
[SPARK-48392][CORE][FOLLOWUP] Add --load-spark-defaults flag to dec…
Browse files Browse the repository at this point in the history
…ide whether to load `spark-defaults.conf`

### What changes were proposed in this pull request?

Following the discussions in #46709, this PR adds a flag `--load-spark-defaults` to control whether `spark-defaults.conf` should be loaded when `--properties-file` is specified. By default, the flag is turned off.

### Why are the changes needed?

Ideally we should avoid behavior change and reduce user disruptions. For this reason, a flag is preferred.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46782 from sunchao/SPARK-48392-followup.

Authored-by: Chao Sun <[email protected]>
Signed-off-by: Chao Sun <[email protected]>
  • Loading branch information
sunchao committed Jun 2, 2024
1 parent 1cecdc7 commit 8cf3195
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
var executorCores: String = null
var totalExecutorCores: String = null
var propertiesFile: String = null
private var loadSparkDefaults: Boolean = false
var driverMemory: String = null
var driverExtraClassPath: String = null
var driverExtraLibraryPath: String = null
Expand Down Expand Up @@ -87,27 +88,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S

override protected def logName: String = classOf[SparkSubmitArguments].getName

/** Default properties present in the currently defined defaults file. */
lazy val defaultSparkProperties: HashMap[String, String] = {
val defaultProperties = new HashMap[String, String]()
if (verbose) {
logInfo(log"Using properties file: ${MDC(PATH, propertiesFile)}")
}
Option(propertiesFile).foreach { filename =>
val properties = Utils.getPropertiesFromFile(filename)
properties.foreach { case (k, v) =>
defaultProperties(k) = v
}
// Property files may contain sensitive information, so redact before printing
if (verbose) {
Utils.redact(properties).foreach { case (k, v) =>
logInfo(log"Adding default property: ${MDC(KEY, k)}=${MDC(VALUE, v)}")
}
}
}
defaultProperties
}

// Set parameters from command line arguments
parse(args.asJava)

Expand All @@ -123,31 +103,43 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
validateArguments()

/**
* Merge values from the default properties file with those specified through --conf.
* When this is called, `sparkProperties` is already filled with configs from the latter.
* Load properties from the file with the given path into `sparkProperties`.
* No-op if the file path is null
*/
private def mergeDefaultSparkProperties(): Unit = {
// Honor --conf before the specified properties file and defaults file
defaultSparkProperties.foreach { case (k, v) =>
if (!sparkProperties.contains(k)) {
sparkProperties(k) = v
private def loadPropertiesFromFile(filePath: String): Unit = {
if (filePath != null) {
if (verbose) {
logInfo(log"Using properties file: ${MDC(PATH, filePath)}")
}
}

// Also load properties from `spark-defaults.conf` if they do not exist in the properties file
// and --conf list
val defaultSparkConf = Utils.getDefaultPropertiesFile(env)
Option(defaultSparkConf).foreach { filename =>
val properties = Utils.getPropertiesFromFile(filename)
val properties = Utils.getPropertiesFromFile(filePath)
properties.foreach { case (k, v) =>
if (!sparkProperties.contains(k)) {
sparkProperties(k) = v
}
}
// Property files may contain sensitive information, so redact before printing
if (verbose) {
Utils.redact(properties).foreach { case (k, v) =>
logInfo(log"Adding default property: ${MDC(KEY, k)}=${MDC(VALUE, v)}")
}
}
}
}

if (propertiesFile == null) {
propertiesFile = defaultSparkConf
/**
* Merge values from the default properties file with those specified through --conf.
* When this is called, `sparkProperties` is already filled with configs from the latter.
*/
private def mergeDefaultSparkProperties(): Unit = {
// Honor --conf before the specified properties file and defaults file
loadPropertiesFromFile(propertiesFile)

// Also load properties from `spark-defaults.conf` if they do not exist in the properties file
// and --conf list when:
// - no input properties file is specified
// - input properties file is specified, but `--load-spark-defaults` flag is set
if (propertiesFile == null || loadSparkDefaults) {
loadPropertiesFromFile(Utils.getDefaultPropertiesFile(env))
}
}

Expand Down Expand Up @@ -405,6 +397,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
case PROPERTIES_FILE =>
propertiesFile = value

case LOAD_SPARK_DEFAULTS =>
loadSparkDefaults = true

case KILL_SUBMISSION =>
submissionToKill = value
if (action != null) {
Expand Down Expand Up @@ -548,6 +543,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
| --conf, -c PROP=VALUE Arbitrary Spark configuration property.
| --properties-file FILE Path to a file from which to load extra properties. If not
| specified, this will look for conf/spark-defaults.conf.
| --load-spark-defaults Whether to load properties from conf/spark-defaults.conf,
| even if --properties-file is specified. Configurations
| specified in --properties-file will take precedence over
| those in conf/spark-defaults.conf.
|
| --driver-memory MEM Memory for driver (e.g. 1000M, 2G) (Default: ${mem_mb}M).
| --driver-java-options Extra Java options to pass to the driver.
Expand Down
30 changes: 25 additions & 5 deletions core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1107,25 +1107,45 @@ class SparkSubmitSuite
"--master", "local",
unusedJar.toString)
val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
assert(appArgs.propertiesFile != null)
assert(appArgs.propertiesFile.startsWith(path))
appArgs.executorMemory should be ("3g")
}
}

test("SPARK-48392: Allow both spark-defaults.conf and properties file") {
forConfDir(Map("spark.executor.memory" -> "3g")) { path =>
withPropertyFile("spark-conf.properties", Map("spark.executor.cores" -> "16")) { propsFile =>
test("SPARK-48392: load spark-defaults.conf when --load-spark-defaults is set") {
forConfDir(Map("spark.executor.memory" -> "3g", "spark.driver.memory" -> "3g")) { path =>
withPropertyFile("spark-conf.properties",
Map("spark.executor.cores" -> "16", "spark.driver.memory" -> "4g")) { propsFile =>
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val args = Seq(
"--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
"--name", "testApp",
"--master", "local",
"--properties-file", propsFile,
"--load-spark-defaults",
unusedJar.toString)
val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
appArgs.executorCores should be("16")
appArgs.executorMemory should be("3g")
appArgs.driverMemory should be("4g")
}
}
}

test("SPARK-48392: should skip spark-defaults.conf when --load-spark-defaults is not set") {
forConfDir(Map("spark.executor.memory" -> "3g", "spark.driver.memory" -> "3g")) { path =>
withPropertyFile("spark-conf.properties",
Map("spark.executor.cores" -> "16", "spark.driver.memory" -> "4g")) { propsFile =>
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
val args = Seq(
"--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
"--name", "testApp",
"--master", "local",
"--properties-file", propsFile,
unusedJar.toString)
val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
appArgs.executorCores should be("16")
appArgs.driverMemory should be("4g")
appArgs.executorMemory should be(null)
}
}
}
Expand Down
8 changes: 5 additions & 3 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,18 @@ such as `--master`, as shown above. `spark-submit` can accept any Spark property
flag, but uses special flags for properties that play a part in launching the Spark application.
Running `./bin/spark-submit --help` will show the entire list of these options.

`bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which
each line consists of a key and a value separated by whitespace. For example:
When configurations are specified via the `--conf/-c` flags, `bin/spark-submit` will also read
configuration options from `conf/spark-defaults.conf`, in which each line consists of a key and
a value separated by whitespace. For example:

spark.master spark://5.6.7.8:7077
spark.executor.memory 4g
spark.eventLog.enabled true
spark.serializer org.apache.spark.serializer.KryoSerializer

In addition, a property file with Spark configurations can be passed to `bin/spark-submit` via
the `--properties-file` parameter.
`--properties-file` parameter. When this is set, Spark will no longer load configurations from
`conf/spark-defaults.conf` unless another parameter `--load-spark-defaults` is provided.

Any values specified as flags or in the properties file will be passed on to the application
and merged with those specified through SparkConf. Properties set directly on the SparkConf
Expand Down
9 changes: 7 additions & 2 deletions docs/submitting-applications.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,13 @@ The master URL passed to Spark can be in one of the following formats:
# Loading Configuration from a File

The `spark-submit` script can load default [Spark configuration values](configuration.html) from a
properties file and pass them on to your application. By default, it will read options
from `conf/spark-defaults.conf` in the `SPARK_HOME` directory.
properties file and pass them on to your application. The file can be specified via the `--properties-file`
parameter. When this is not specified, by default Spark will read options from `conf/spark-defaults.conf`
in the `SPARK_HOME` directory.

An additional flag `--load-spark-defaults` can be used to tell Spark to load configurations from `conf/spark-defaults.conf`
even when a property file is provided via `--properties-file`. This is useful, for instance, when users
want to put system-wide default settings in the former while user/cluster specific settings in the latter.

Loading default Spark configurations this way can obviate the need for certain flags to
`spark-submit`. For instance, if the `spark.master` property is set, you can safely omit the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class SparkSubmitOptionParser {
protected final String PACKAGES = "--packages";
protected final String PACKAGES_EXCLUDE = "--exclude-packages";
protected final String PROPERTIES_FILE = "--properties-file";
protected final String LOAD_SPARK_DEFAULTS = "--load-spark-defaults";
protected final String PROXY_USER = "--proxy-user";
protected final String PY_FILES = "--py-files";
protected final String REPOSITORIES = "--repositories";
Expand Down Expand Up @@ -130,6 +131,7 @@ class SparkSubmitOptionParser {
{ USAGE_ERROR },
{ VERBOSE, "-v" },
{ VERSION },
{ LOAD_SPARK_DEFAULTS },
};

/**
Expand Down

0 comments on commit 8cf3195

Please sign in to comment.