Skip to content

Commit a00aaf6

Browse files
wangyumsrowen
authored andcommitted
[MINOR][YARN] Make memLimitExceededLogMessage more clean
## What changes were proposed in this pull request? Current `memLimitExceededLogMessage`: <img src="https://user-images.githubusercontent.com/5399861/48467789-ec8e1000-e824-11e8-91fc-280d342e1bf3.png" width="360"> It‘s not very clear, because physical memory exceeds but suggestion contains virtual memory config. This pr makes it more clear and replace deprecated config: ```spark.yarn.executor.memoryOverhead```. ## How was this patch tested? manual tests Closes apache#23030 from wangyum/EXECUTOR_MEMORY_OVERHEAD. Authored-by: Yuming Wang <[email protected]> Signed-off-by: Sean Owen <[email protected]>
1 parent a09d5ba commit a00aaf6

File tree

2 files changed

+14
-31
lines changed

2 files changed

+14
-31
lines changed

resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ package org.apache.spark.deploy.yarn
2020
import java.util.Collections
2121
import java.util.concurrent._
2222
import java.util.concurrent.atomic.AtomicInteger
23-
import java.util.regex.Pattern
2423

2524
import scala.collection.JavaConverters._
2625
import scala.collection.mutable
@@ -598,13 +597,21 @@ private[yarn] class YarnAllocator(
598597
(false, s"Container ${containerId}${onHostStr} was preempted.")
599598
// Should probably still count memory exceeded exit codes towards task failures
600599
case VMEM_EXCEEDED_EXIT_CODE =>
601-
(true, memLimitExceededLogMessage(
602-
completedContainer.getDiagnostics,
603-
VMEM_EXCEEDED_PATTERN))
600+
val vmemExceededPattern = raw"$MEM_REGEX of $MEM_REGEX virtual memory used".r
601+
val diag = vmemExceededPattern.findFirstIn(completedContainer.getDiagnostics)
602+
.map(_.concat(".")).getOrElse("")
603+
val message = "Container killed by YARN for exceeding virtual memory limits. " +
604+
s"$diag Consider boosting ${EXECUTOR_MEMORY_OVERHEAD.key} or boosting " +
605+
s"${YarnConfiguration.NM_VMEM_PMEM_RATIO} or disabling " +
606+
s"${YarnConfiguration.NM_VMEM_CHECK_ENABLED} because of YARN-4714."
607+
(true, message)
604608
case PMEM_EXCEEDED_EXIT_CODE =>
605-
(true, memLimitExceededLogMessage(
606-
completedContainer.getDiagnostics,
607-
PMEM_EXCEEDED_PATTERN))
609+
val pmemExceededPattern = raw"$MEM_REGEX of $MEM_REGEX physical memory used".r
610+
val diag = pmemExceededPattern.findFirstIn(completedContainer.getDiagnostics)
611+
.map(_.concat(".")).getOrElse("")
612+
val message = "Container killed by YARN for exceeding physical memory limits. " +
613+
s"$diag Consider boosting ${EXECUTOR_MEMORY_OVERHEAD.key}."
614+
(true, message)
608615
case _ =>
609616
// all the failures which not covered above, like:
610617
// disk failure, kill by app master or resource manager, ...
@@ -735,18 +742,6 @@ private[yarn] class YarnAllocator(
735742

736743
private object YarnAllocator {
737744
val MEM_REGEX = "[0-9.]+ [KMG]B"
738-
val PMEM_EXCEEDED_PATTERN =
739-
Pattern.compile(s"$MEM_REGEX of $MEM_REGEX physical memory used")
740-
val VMEM_EXCEEDED_PATTERN =
741-
Pattern.compile(s"$MEM_REGEX of $MEM_REGEX virtual memory used")
742745
val VMEM_EXCEEDED_EXIT_CODE = -103
743746
val PMEM_EXCEEDED_EXIT_CODE = -104
744-
745-
def memLimitExceededLogMessage(diagnostics: String, pattern: Pattern): String = {
746-
val matcher = pattern.matcher(diagnostics)
747-
val diag = if (matcher.find()) " " + matcher.group() + "." else ""
748-
s"Container killed by YARN for exceeding memory limits. $diag " +
749-
"Consider boosting spark.yarn.executor.memoryOverhead or " +
750-
"disabling yarn.nodemanager.vmem-check-enabled because of YARN-4714."
751-
}
752747
}

resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import org.mockito.Mockito._
2929
import org.scalatest.{BeforeAndAfterEach, Matchers}
3030

3131
import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
32-
import org.apache.spark.deploy.yarn.YarnAllocator._
3332
import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
3433
import org.apache.spark.deploy.yarn.config._
3534
import org.apache.spark.rpc.RpcEndpointRef
@@ -376,17 +375,6 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
376375
verify(mockAmClient).updateBlacklist(Seq[String]().asJava, Seq("hostA", "hostB").asJava)
377376
}
378377

379-
test("memory exceeded diagnostic regexes") {
380-
val diagnostics =
381-
"Container [pid=12465,containerID=container_1412887393566_0003_01_000002] is running " +
382-
"beyond physical memory limits. Current usage: 2.1 MB of 2 GB physical memory used; " +
383-
"5.8 GB of 4.2 GB virtual memory used. Killing container."
384-
val vmemMsg = memLimitExceededLogMessage(diagnostics, VMEM_EXCEEDED_PATTERN)
385-
val pmemMsg = memLimitExceededLogMessage(diagnostics, PMEM_EXCEEDED_PATTERN)
386-
assert(vmemMsg.contains("5.8 GB of 4.2 GB virtual memory used."))
387-
assert(pmemMsg.contains("2.1 MB of 2 GB physical memory used."))
388-
}
389-
390378
test("window based failure executor counting") {
391379
sparkConf.set("spark.yarn.executor.failuresValidityInterval", "100s")
392380
val handler = createAllocator(4)

0 commit comments

Comments
 (0)