-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-53442]Make PrometheusServlet compatible with OpenMetrics #52183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,71 +59,116 @@ private[spark] class PrometheusServlet( | |
| def getMetricsSnapshot(): String = { | ||
| import scala.jdk.CollectionConverters._ | ||
|
|
||
| val gaugesLabel = """{type="gauges"}""" | ||
| val countersLabel = """{type="counters"}""" | ||
| val metersLabel = countersLabel | ||
| val histogramslabels = """{type="histograms"}""" | ||
| val timersLabels = """{type="timers"}""" | ||
| val PERCENTILE_P50 = "0.5" | ||
| val PERCENTILE_P75 = "0.75" | ||
| val PERCENTILE_P95 = "0.95" | ||
| val PERCENTILE_P98 = "0.98" | ||
| val PERCENTILE_P99 = "0.99" | ||
| val PERCENTILE_P999 = "0.999" | ||
|
|
||
| val sb = new StringBuilder() | ||
| registry.getGauges.asScala.foreach { case (k, v) => | ||
| if (!v.getValue.isInstanceOf[String]) { | ||
| sb.append(s"${normalizeKey(k)}Number$gaugesLabel ${v.getValue}\n") | ||
| sb.append(s"${normalizeKey(k)}Value$gaugesLabel ${v.getValue}\n") | ||
| v.getValue match { | ||
| case n: Number => | ||
| sb.append(s"# HELP ${normalizeKey(k)} Gauge metric\n") | ||
| sb.append(s"# TYPE ${normalizeKey(k)} gauge\n") | ||
| sb.append(s"${normalizeKey(k)} ${n.doubleValue()}\n") | ||
| case _ => // non-numeric gauges | ||
| } | ||
| } | ||
| registry.getCounters.asScala.foreach { case (k, v) => | ||
| sb.append(s"${normalizeKey(k)}Count$countersLabel ${v.getCount}\n") | ||
| val name = s"${normalizeKey(k)}_total" | ||
| sb.append(s"# HELP ${name} Counter metric\n") | ||
| sb.append(s"# TYPE ${name} counter\n") | ||
| sb.append(s"${name} ${v.getCount}\n") | ||
| } | ||
| registry.getHistograms.asScala.foreach { case (k, h) => | ||
| val snapshot = h.getSnapshot | ||
| val values = snapshot.getValues.map(_.toDouble) | ||
| val prefix = normalizeKey(k) | ||
| sb.append(s"${prefix}Count$histogramslabels ${h.getCount}\n") | ||
| sb.append(s"${prefix}Max$histogramslabels ${snapshot.getMax}\n") | ||
|
||
| sb.append(s"${prefix}Mean$histogramslabels ${snapshot.getMean}\n") | ||
| sb.append(s"${prefix}Min$histogramslabels ${snapshot.getMin}\n") | ||
| sb.append(s"${prefix}50thPercentile$histogramslabels ${snapshot.getMedian}\n") | ||
| sb.append(s"${prefix}75thPercentile$histogramslabels ${snapshot.get75thPercentile}\n") | ||
| sb.append(s"${prefix}95thPercentile$histogramslabels ${snapshot.get95thPercentile}\n") | ||
| sb.append(s"${prefix}98thPercentile$histogramslabels ${snapshot.get98thPercentile}\n") | ||
| sb.append(s"${prefix}99thPercentile$histogramslabels ${snapshot.get99thPercentile}\n") | ||
| sb.append(s"${prefix}999thPercentile$histogramslabels ${snapshot.get999thPercentile}\n") | ||
| sb.append(s"${prefix}StdDev$histogramslabels ${snapshot.getStdDev}\n") | ||
|
||
| sb.append(s"# HELP ${prefix} Histogram metric\n") | ||
| sb.append(s"# TYPE ${prefix} summary\n") | ||
| sb.append(s"${prefix}{quantile=\"${PERCENTILE_P50}\"} ${snapshot.getMedian}\n") | ||
| sb.append(s"${prefix}{quantile=\"${PERCENTILE_P75}\"} ${snapshot.get75thPercentile}\n") | ||
| sb.append(s"${prefix}{quantile=\"${PERCENTILE_P95}\"} ${snapshot.get95thPercentile}\n") | ||
| sb.append(s"${prefix}{quantile=\"${PERCENTILE_P98}\"} ${snapshot.get98thPercentile}\n") | ||
| sb.append(s"${prefix}{quantile=\"${PERCENTILE_P99}\"} ${snapshot.get99thPercentile}\n") | ||
| sb.append(s"${prefix}{quantile=\"${PERCENTILE_P999}\"} ${snapshot.get999thPercentile}\n") | ||
| sb.append(s"${prefix}_count ${h.getCount}\n") | ||
| sb.append(s"${prefix}_sum ${values.sum}\n") | ||
| sb.append(s"# HELP ${prefix}_min Minimum value\n") | ||
| sb.append(s"# TYPE ${prefix}_min gauge\n") | ||
| sb.append(s"${prefix}_min ${snapshot.getMin}\n") | ||
| sb.append(s"# HELP ${prefix}_max Maximal value\n") | ||
| sb.append(s"# TYPE ${prefix}_max gauge\n") | ||
| sb.append(s"${prefix}_max ${snapshot.getMax}\n") | ||
| sb.append(s"# HELP ${prefix}_mean Mean value\n") | ||
| sb.append(s"# TYPE ${prefix}_mean gauge\n") | ||
| sb.append(s"${prefix}_mean ${snapshot.getMedian}\n") | ||
| sb.append(s"# HELP ${prefix}_stddev Standard deviation value\n") | ||
| sb.append(s"# TYPE ${prefix}_stddev gauge\n") | ||
| sb.append(s"${prefix}_stddev ${snapshot.getStdDev}\n") | ||
| } | ||
| registry.getMeters.entrySet.iterator.asScala.foreach { kv => | ||
| val prefix = normalizeKey(kv.getKey) | ||
| val meter = kv.getValue | ||
| sb.append(s"${prefix}Count$metersLabel ${meter.getCount}\n") | ||
| sb.append(s"${prefix}MeanRate$metersLabel ${meter.getMeanRate}\n") | ||
| sb.append(s"${prefix}OneMinuteRate$metersLabel ${meter.getOneMinuteRate}\n") | ||
| sb.append(s"${prefix}FiveMinuteRate$metersLabel ${meter.getFiveMinuteRate}\n") | ||
| sb.append(s"${prefix}FifteenMinuteRate$metersLabel ${meter.getFifteenMinuteRate}\n") | ||
| sb.append(s"# HELP ${prefix}_count_cumulative Meter counts metric\n") | ||
| sb.append(s"# TYPE ${prefix}_count_cumulative gauge\n") | ||
| sb.append(s"${prefix}_count_cumulative ${meter.getCount}\n") | ||
| sb.append(s"# HELP ${prefix}_mean_rate total counts metric\n") | ||
| sb.append(s"# TYPE ${prefix}_mean_rate gauge\n") | ||
| sb.append(s"${prefix}_mean_rate ${meter.getMeanRate}\n") | ||
| sb.append(s"# HELP ${prefix}_m1_rate 1-min moving avg metric\n") | ||
| sb.append(s"# TYPE ${prefix}_m1_rate gauge\n") | ||
| sb.append(s"${prefix}_m1_rate ${meter.getOneMinuteRate}\n") | ||
| sb.append(s"# HELP ${prefix}_m5_rate 5-min moving avg metric\n") | ||
| sb.append(s"# TYPE ${prefix}_m5_rate gauge\n") | ||
| sb.append(s"${prefix}_m5_rate ${meter.getFiveMinuteRate}\n") | ||
| sb.append(s"# HELP ${prefix}_m15_rate 15-min moving avg metric\n") | ||
| sb.append(s"# TYPE ${prefix}_m15_rate gauge\n") | ||
| sb.append(s"${prefix}_m15_rate ${meter.getFifteenMinuteRate}\n") | ||
| } | ||
|
|
||
| registry.getTimers.entrySet.iterator.asScala.foreach { kv => | ||
| val prefix = normalizeKey(kv.getKey) | ||
| val timer = kv.getValue | ||
| val snapshot = timer.getSnapshot | ||
| sb.append(s"${prefix}Count$timersLabels ${timer.getCount}\n") | ||
| sb.append(s"${prefix}Max$timersLabels ${snapshot.getMax}\n") | ||
| sb.append(s"${prefix}Mean$timersLabels ${snapshot.getMean}\n") | ||
| sb.append(s"${prefix}Min$timersLabels ${snapshot.getMin}\n") | ||
| sb.append(s"${prefix}50thPercentile$timersLabels ${snapshot.getMedian}\n") | ||
| sb.append(s"${prefix}75thPercentile$timersLabels ${snapshot.get75thPercentile}\n") | ||
| sb.append(s"${prefix}95thPercentile$timersLabels ${snapshot.get95thPercentile}\n") | ||
| sb.append(s"${prefix}98thPercentile$timersLabels ${snapshot.get98thPercentile}\n") | ||
| sb.append(s"${prefix}99thPercentile$timersLabels ${snapshot.get99thPercentile}\n") | ||
| sb.append(s"${prefix}999thPercentile$timersLabels ${snapshot.get999thPercentile}\n") | ||
| sb.append(s"${prefix}StdDev$timersLabels ${snapshot.getStdDev}\n") | ||
| sb.append(s"${prefix}FifteenMinuteRate$timersLabels ${timer.getFifteenMinuteRate}\n") | ||
| sb.append(s"${prefix}FiveMinuteRate$timersLabels ${timer.getFiveMinuteRate}\n") | ||
| sb.append(s"${prefix}OneMinuteRate$timersLabels ${timer.getOneMinuteRate}\n") | ||
| sb.append(s"${prefix}MeanRate$timersLabels ${timer.getMeanRate}\n") | ||
| val NANOS_TO_SECONDS_UNIT = 1e9 | ||
| def nanosToSeconds(n: Double): Double = n / NANOS_TO_SECONDS_UNIT | ||
| val medianValue = nanosToSeconds(snapshot.getMedian) | ||
| val p75Value = nanosToSeconds(snapshot.get75thPercentile) | ||
| val p95Value = nanosToSeconds(snapshot.get95thPercentile) | ||
| val p98Value = nanosToSeconds(snapshot.get98thPercentile) | ||
| val p99Value = nanosToSeconds(snapshot.get99thPercentile) | ||
| val p999Value = nanosToSeconds(snapshot.get999thPercentile) | ||
|
|
||
| val durationSecondsName = s"${prefix}_duration_seconds" | ||
| sb.append(s"# HELP $durationSecondsName Timer summary metric\n") | ||
| sb.append(s"# TYPE $durationSecondsName summary\n") | ||
| sb.append(s"${durationSecondsName}{quantile=\"${PERCENTILE_P50}\"} ${medianValue}\n") | ||
| sb.append(s"${durationSecondsName}{quantile=\"${PERCENTILE_P75}\"} ${p75Value}\n") | ||
| sb.append(s"${durationSecondsName}{quantile=\"${PERCENTILE_P95}\"} ${p95Value}\n") | ||
| sb.append(s"${durationSecondsName}{quantile=\"${PERCENTILE_P98}\"} ${p98Value}\n") | ||
| sb.append(s"${durationSecondsName}{quantile=\"${PERCENTILE_P99}\"} ${p99Value}\n") | ||
| sb.append(s"${durationSecondsName}{quantile=\"${PERCENTILE_P999}\"} ${p999Value}\n") | ||
| sb.append(s"${durationSecondsName}_count ${timer.getCount}\n") | ||
| sb.append(s"${durationSecondsName}_sum " + | ||
| s"${snapshot.getValues.map(_.toDouble / NANOS_TO_SECONDS_UNIT).sum}\n") | ||
| sb.append(s"# HELP ${prefix}_m1_rate Timer rate 1-min moving avg metric\n") | ||
| sb.append(s"# TYPE ${prefix}_m1_rate gauge\n") | ||
| sb.append(s"${prefix}_m1_rate ${timer.getOneMinuteRate}\n") | ||
| sb.append(s"# HELP ${prefix}_m5_rate Timer rate 5-min moving avg metric\n") | ||
| sb.append(s"# TYPE ${prefix}_m5_rate gauge\n") | ||
| sb.append(s"${prefix}_m5_rate ${timer.getFiveMinuteRate}\n") | ||
| sb.append(s"# HELP ${prefix}_m15_rate Timer rate 15-min moving avg metric\n") | ||
| sb.append(s"# TYPE ${prefix}_m15_rate gauge\n") | ||
| sb.append(s"${prefix}_m15_rate ${timer.getFifteenMinuteRate}\n") | ||
| } | ||
| sb.toString() | ||
| } | ||
|
|
||
| private def normalizeKey(key: String): String = { | ||
| s"metrics_${key.replaceAll("[^a-zA-Z0-9]", "_")}_" | ||
| s"metrics_${key.replaceAll("[^a-zA-Z0-9]", "_")}" | ||
| } | ||
|
|
||
| override def start(): Unit = { } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this needs to be a double? https://github.com/apache/spark-kubernetes-operator/pull/298/files#diff-3281f083d917b4826b386883cf3a96b5eb2eb5d0c4334571ed15dec95358c1adR168 you didn't do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks a lot for pointing this out!
I was considering that even if this is a Number - it may not be correctly formatted with simple
getValueif we are dealing something complex, likeBigDecimal, orAtomicLong. etc. By doing a doubleValue we avoid the possibletoStringgives us a string instead of number.spark-kubernetes-operator is relatively new and we are sure there's no such gauges - but IMO we need to be taking this into consideration as well. I'll fix that.