From a7bd22f99d9ccc1122d24f001badafded87dbec1 Mon Sep 17 00:00:00 2001 From: mstopa-splunk <139441697+mstopa-splunk@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:34:26 +0100 Subject: [PATCH] feat: SC4S Dashboard (#2292) --- dashboard/dashboard.xml | 452 +++++++++++++++++++++++ docs/dashboard.md | 33 ++ mkdocs.yml | 1 + package/etc/conf.d/sources/internal.conf | 1 + 4 files changed, 487 insertions(+) create mode 100644 dashboard/dashboard.xml create mode 100644 docs/dashboard.md diff --git a/dashboard/dashboard.xml b/dashboard/dashboard.xml new file mode 100644 index 0000000000..a4dc15ad16 --- /dev/null +++ b/dashboard/dashboard.xml @@ -0,0 +1,452 @@ +
+ + Monitor SC4S instances connected to this Splunk instance. + + + | mstats + max("spl.sc4syslog.center.received.processed") + max("spl.sc4syslog.source.processed") + max("spl.sc4syslog.dst.dropped") + max("spl.sc4syslog.dst.queued") + prestats=true + WHERE "index"="_metrics" + BY sc4s_container + span=$span$ + + $time_range.earliest$ + $time_range.latest$ + 1 + + + + index=* sc4s_container=$sc4s_instance$ + | stats count by _time index sc4s_tags + + $time_range.earliest$ + $time_range.latest$ + 1 + +
+ + + + + 30s + 1m + 3m + 30m + 1h + 6h + 12h + 1d + 30s + 30s + + + + + rt-15m + rt + + + +

Choose time interval and window.

+

By default, the dashboard loads metrics every 30 seconds for the duration of the 15-minute window.

+

By default, the dashboard loads events for the duration of 15-minutes window if the SC4S instance name is provided.

+ +
+
+ + + +

+ Metrics +

+ +
+
+ + + Received Messages + +

Healthy SC4S instance logs the number of received messages each 30 seconds.

+

The number should grow by at least 1 every 30 seconds. This is because the metrics message counts as a received message.

+

This cumulative sum grows until the SC4S instance restarts.

+ + + + + | timechart max("spl.sc4syslog.source.processed") span=$span$ useother=false BY sc4s_container + WHERE max in top1000 + | fields - _span* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Dropped Messages by SC4S Instance + +

This is a cumulative sum that, in the absence of dropped messages, remains at a constant level of 0.

+

Upon restarting the SC4S instance, it is reset back to 0.

+

This does not include potential UDP messages dropped from the port buffer.

+ + + + + | timechart max("spl.sc4syslog.dst.dropped") span=$span$ useother=false BY sc4s_container WHERE max in top1000 + | fields - _span* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + SC4S Instance + + + values(sc4s_container) + values(sc4s_container) + + | mcatalog values(sc4s_container) WHERE index=_metrics | mvexpand values(sc4s_container) + 0 + + + + + + Instance name + + + | mcatalog values(sc4s_container) WHERE index=_metrics AND sc4s_container=$sc4s_instance$ | mvexpand values(sc4s_container) + 0 + + + + + + + + + + + + + + + + + + + + + + + SC4S version + + + | mcatalog values(sc4s_version) WHERE index=_metrics AND sc4s_container=$sc4s_instance$ | mvexpand values(sc4s_version) + 0 + + + + + + + + + + + + + + + + + + + + + + + + + +

+ Messages' metrics +

+

+ Received: if SC4S receives N events, the chart should return a value of N + 1 for given 30 seconds span. +

+

+ Queued: increases temporarily in case of a connection loss to Splunk or increased system load. +

+

+ Dropped: temporarily rises during periods where system load exceeds capacity. This number doesn't include messages dropped on the OS level, for example, from the UDP port buffer. +

+

The delta is negative at the moment of instance restart.

+ +
+
+ + + + + + | search sc4s_container=$sc4s_instance$ + | timechart + max("spl.sc4syslog.center.received.processed") AS received + max("spl.sc4syslog.dst.dropped") AS dropped + max("spl.sc4syslog.dst.queued") AS queued + span=$span$ + | delta received as received + | delta dropped as dropped + | fields - _span* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ Events +

+ +
+
+ + + Total number of events + + Total volume of actual syslog traffic delivered by this SC4S instance to Splunk + + | stats count + + + + + + + + + + + + + + + + + + + + + + + Distributions of events by index + + + | stats count by index + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Trends of events by index + + + | chart sparkline(count) AS "Indexes Trend" count AS Total BY index + + + + + + + + +
+
+
+ + + Data parsers + + + | eval tags=split(sc4s_tags,"|") | mvexpand tags | search tags=".app.*" | timechart count by tags + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Tags + + + | eval tags=split(sc4s_tags,"|") | mvexpand tags | chart count by tags + + +
+
+
+
\ No newline at end of file diff --git a/docs/dashboard.md b/docs/dashboard.md new file mode 100644 index 0000000000..b0992f07a8 --- /dev/null +++ b/docs/dashboard.md @@ -0,0 +1,33 @@ +# SC4S Metrics and Events Dashboard +The SC4S Metrics and Events Dashboard lets you monitor crucial metrics and event flows for all the SC4S instances sending data to a chosen Splunk platform. + + +## Functionalities + +### Overview metrics +The dashboard displays the cumulative sum of received and dropped messages for all SC4S instances in a chosen interval and for the specified time range. By default the interval is set to 30 seconds and the time range is 15 minutes. + +The Received Messages panel can be used as a heartbeat metric. A healthy SC4S instance should send at least one message per 30 seconds. This metrics message is included in the count. + +The Dropped Messages panel should remain at a constant level of 0. If SC4S drops messages due to filters, slow performance, or for any other reason, the number of dropped messages will persist until the instance restarts. This panel does not include potential UDP messages dropped from the port buffer, which SC4S is not able to track. + +### Single instance metrics +You can display the instance name and SC4S version for a chosen SC4S instance. +SC4S is available in versions greater than or equal to 3.16.0. + +The dashboard also displays a timechart of deltas for received, queued, and dropped messages for a chosen SC4S instance. + +### Single instance events +The dashboard helps to analyze traffic processed by an SC4S instance by visualizing the following events data: + +- total number of events +- distributions of events by index +- trends of events by index +- data parsers in use +- applied tags + +## Installation +1. In Splunk platform open `Search` -> `Dashboards`. +2. Click on `Create New Dashboard` and make an empty dashboard. Be sure to choose `Classic Dashboards`. +3. In the `Edit Dashboard` view go to `Source` and replace the initial xml with the contents of [dashboard/dashboard.xml](https://github.com/splunk/splunk-connect-for-syslog/blob/main/dashboard/dashboard.xml) published in the SC4S repository. +4. After saving the changes your dashboard will be ready to use. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 93f9293876..9f26aae99d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -60,6 +60,7 @@ nav: - Troubleshooting: - SC4S Startup and Validation: "troubleshooting/troubleshoot_SC4S_server.md" - SC4S Logging and Troubleshooting Resources: "troubleshooting/troubleshoot_resources.md" + - Dashboard: "dashboard.md" - Experiments: "experiments.md" - Upgrading SC4S: "upgrade.md" - SC4S FAQ: "faq.md" diff --git a/package/etc/conf.d/sources/internal.conf b/package/etc/conf.d/sources/internal.conf index 148c81566a..47bd35c981 100644 --- a/package/etc/conf.d/sources/internal.conf +++ b/package/etc/conf.d/sources/internal.conf @@ -12,6 +12,7 @@ source s_internal { rewrite { subst('^.*@','' value('HOST')); set("`SC4S_CONTAINER_HOST`", value("fields.sc4s_loghost") condition('`SC4S_CONTAINER_HOST`' ne "")); + set("`SC4S_VERSION`", value("fields.sc4s_version") condition('`SC4S_VERSION`' ne "")); set($LOGHOST, value("fields.sc4s_container") condition(match('container' template('`SC4S_DEST_SPLUNK_INDEXED_FIELDS`') type(string) flags(substring)) )); };