From 5971773c6124a9eb9e5f5e5a9ee998aa444e7040 Mon Sep 17 00:00:00 2001 From: Ryan Faircloth Date: Wed, 16 Mar 2022 11:51:29 -0400 Subject: [PATCH] feat: Use last valid host name when next event is incomplete New feature flag. Enables a kv store feature to cache the last host name from a SOURCEIP and use that value if a following event has null, nill or ip rather than host name. Local storage uses sqllite --- .github/workflows/ci-main.yaml | 1 + docs/experiments.md | 7 ++ mkdocs.yml | 1 + .../conf.d/conflib/_common/syslog_format.conf | 8 +- .../etc/conf.d/conflib/_splunk/fix_dns.conf | 6 ++ .../conf.d/destinations/dest_psc/plugin.conf | 7 ++ .../etc/conf.d/log_paths/2/lp-dest-psc.conf | 8 ++ .../conf.d/sources/source_syslog/plugin.jinja | 22 ++++- .../conf.d/sources/source_syslog/plugin.py | 12 +++ package/etc/pylib/parser_fix_dns.py | 2 +- package/etc/pylib/parser_source_cache.py | 93 +++++++++++++++++++ package/etc/pylib/psc_dump.py | 13 +++ package/sbin/entrypoint.sh | 2 + poetry.lock | 13 ++- pyproject.toml | 2 + tests/docker-compose.yml | 1 + 16 files changed, 190 insertions(+), 8 deletions(-) create mode 100644 docs/experiments.md create mode 100644 package/etc/conf.d/destinations/dest_psc/plugin.conf create mode 100644 package/etc/conf.d/log_paths/2/lp-dest-psc.conf create mode 100644 package/etc/pylib/parser_source_cache.py create mode 100644 package/etc/pylib/psc_dump.py diff --git a/.github/workflows/ci-main.yaml b/.github/workflows/ci-main.yaml index 4877cfb3e8..3a58d54ba3 100644 --- a/.github/workflows/ci-main.yaml +++ b/.github/workflows/ci-main.yaml @@ -108,6 +108,7 @@ jobs: with: context: . file: package/Dockerfile + #platforms: linux/amd64,linux/arm64 platforms: linux/amd64,linux/arm64 push: true #tags: ${{ needs.meta.outputs.container_tags }} diff --git a/docs/experiments.md b/docs/experiments.md new file mode 100644 index 0000000000..49f205711b --- /dev/null +++ b/docs/experiments.md @@ -0,0 +1,7 @@ +# Current Experimental Features + +# > 2.13.0 + +* In env_file set `SC4S_USE_NAME_CACHE=yes` to enable caching last valid host string and replacing nill, null, or ipv4 with last good value. + - Benefit: More correct host name values in Splunk when source vendor fails to provide valid syslog message + - Risk: Potential disk I/O usage (space, iops) Potential reduction in throughput when a high proportion of events are incomplete. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 514ff2b9e4..d4e89a6c7c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -45,5 +45,6 @@ nav: - Troubleshooting: - SC4S Startup and Validation: "troubleshooting/troubleshoot_SC4S_server.md" - SC4S Logging and Troubleshooting Resources: "troubleshooting/troubleshoot_resources.md" + - Experiments: "experiments.md" - "Upgrading SC4S": "upgrade.md" - "SC4S FAQ": "faq.md" diff --git a/package/etc/conf.d/conflib/_common/syslog_format.conf b/package/etc/conf.d/conflib/_common/syslog_format.conf index 0a0e74876a..1684a401fc 100644 --- a/package/etc/conf.d/conflib/_common/syslog_format.conf +++ b/package/etc/conf.d/conflib/_common/syslog_format.conf @@ -59,8 +59,12 @@ filter f_msg_is_tcp_json{ match("tcp_json" value("fields.sc4s_syslog_format")) }; filter f_host_is_nil_or_ip{ - host('^-') or - host('^((((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))|((([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))))$') + ( + host('^-') or + host('^((((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))|((([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))))$') + ) + and not '$HOST' eq "127.0.0.1" + }; filter f_host_is_ip{ diff --git a/package/etc/conf.d/conflib/_splunk/fix_dns.conf b/package/etc/conf.d/conflib/_splunk/fix_dns.conf index 3ca0590b42..0b3e7416f2 100644 --- a/package/etc/conf.d/conflib/_splunk/fix_dns.conf +++ b/package/etc/conf.d/conflib/_splunk/fix_dns.conf @@ -4,6 +4,12 @@ parser p_fix_host_resolver { ); }; +parser p_host_cache { + python( + class("parser_source_cache.psc_parse") + ); +}; + parser p_add_context_host { add-contextual-data( selector("${SOURCEIP}"), diff --git a/package/etc/conf.d/destinations/dest_psc/plugin.conf b/package/etc/conf.d/destinations/dest_psc/plugin.conf new file mode 100644 index 0000000000..e9f8f6f56b --- /dev/null +++ b/package/etc/conf.d/destinations/dest_psc/plugin.conf @@ -0,0 +1,7 @@ +destination d_psc { + python( + class("parser_source_cache.psc_dest") + batch-timeout(3000) + batch-lines(100) + ); +}; \ No newline at end of file diff --git a/package/etc/conf.d/log_paths/2/lp-dest-psc.conf b/package/etc/conf.d/log_paths/2/lp-dest-psc.conf new file mode 100644 index 0000000000..82e1ee7853 --- /dev/null +++ b/package/etc/conf.d/log_paths/2/lp-dest-psc.conf @@ -0,0 +1,8 @@ +log { + if { + filter(f_host_is_nil_or_ip); + } else { + destination(d_psc); + }; + flags(catchall); +}; \ No newline at end of file diff --git a/package/etc/conf.d/sources/source_syslog/plugin.jinja b/package/etc/conf.d/sources/source_syslog/plugin.jinja index f9f9bc1c93..c12a8f1462 100644 --- a/package/etc/conf.d/sources/source_syslog/plugin.jinja +++ b/package/etc/conf.d/sources/source_syslog/plugin.jinja @@ -166,12 +166,19 @@ source s_{{ port_id }} { filter(f_host_is_nil_or_ip); if { parser(p_add_context_host); + }; + {%- if use_namecache == True %} + if { + filter(f_host_is_nil_or_ip); + parser(p_host_cache); + }; + {%- endif %} {%- if use_reverse_dns == True %} - elif { + if { filter(f_host_is_nil_or_ip); parser(p_fix_host_resolver); - {%- endif %} }; + {%- endif %} }; rewrite { set('$(lowercase "$HOST")' value(HOST)); @@ -327,12 +334,19 @@ source s_{{ port_id }} { filter(f_host_is_nil_or_ip); if { parser(p_add_context_host); + }; + {%- if use_namecache == True %} + if { + filter(f_host_is_nil_or_ip); + parser(p_host_cache); + }; + {%- endif %} {%- if use_reverse_dns == True %} - elif { + if { filter(f_host_is_nil_or_ip); parser(p_fix_host_resolver); - {%- endif %} }; + {%- endif %} }; rewrite { diff --git a/package/etc/conf.d/sources/source_syslog/plugin.py b/package/etc/conf.d/sources/source_syslog/plugin.py index c259871842..8c648e3923 100755 --- a/package/etc/conf.d/sources/source_syslog/plugin.py +++ b/package/etc/conf.d/sources/source_syslog/plugin.py @@ -43,6 +43,17 @@ else: use_reverse_dns = False +if os.getenv(f"SC4S_USE_NAME_CACHE", "no").lower() in [ + "true", + "1", + "t", + "y", + "yes", +]: + use_namecache = True +else: + use_namecache = False + if os.getenv(f"SC4S_SOURCE_TLS_ENABLE", "no").lower() in [ "true", "1", @@ -68,6 +79,7 @@ store_raw_message=store_raw_message, port_id=port_id, use_reverse_dns=use_reverse_dns, + use_namecache=use_namecache, use_tls=use_tls, tls_dir=os.getenv(f"SC4S_TLS", "/etc/syslog-ng/tls"), cert_file=cert_file, diff --git a/package/etc/pylib/parser_fix_dns.py b/package/etc/pylib/parser_fix_dns.py index 644e6d615f..e77167af3f 100644 --- a/package/etc/pylib/parser_fix_dns.py +++ b/package/etc/pylib/parser_fix_dns.py @@ -26,7 +26,7 @@ def parse(self, log_message): if len(parts) > 1: log_message["HOST"] = name except: - pass + return False # return True, other way message is dropped return True diff --git a/package/etc/pylib/parser_source_cache.py b/package/etc/pylib/parser_source_cache.py new file mode 100644 index 0000000000..a7f171dad6 --- /dev/null +++ b/package/etc/pylib/parser_source_cache.py @@ -0,0 +1,93 @@ + +import sys +import traceback +import socket +import struct +from sqlitedict import SqliteDict + +import time +try: + import syslogng +except: + pass + + +def ip2int(addr): + return struct.unpack("!I", socket.inet_aton(addr))[0] + +def int2ip(addr): + return socket.inet_ntoa(struct.pack("!I", addr)) + +hostdict = str("/var/lib/syslog-ng/hostip") + +class psc_parse(object): + def init(self, options): + self.logger = syslogng.Logger() + self.db = SqliteDict(f"{hostdict}.sqlite") + return True + + def deinit(self): + self.db.close() + + def parse(self, log_message): + try: + ipaddr = log_message["SOURCEIP"].decode("utf-8") + ip_int = ip2int(ipaddr) + self.logger.debug(f'psc.parse sourceip={ipaddr} int={ip_int}') + name = self.db[ip_int] + self.logger.debug(f'psc.parse host={name}') + log_message["HOST"]=name + + except: + exc_type, exc_value, exc_traceback = sys.exc_info() + lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + self.logger.debug(''.join('!! ' + line for line in lines)) + return False + self.logger.debug(f'psc.parse complete') + return True + +class psc_dest(object): + def init(self, options): + self.logger = syslogng.Logger() + try: + self.db = SqliteDict(f"{hostdict}.sqlite",autocommit=True) + except: + exc_type, exc_value, exc_traceback = sys.exc_info() + lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + self.logger.debug(''.join('!! ' + line for line in lines)) + return False + return True + + def deinit(self): + """Close the connection to the target service""" + self.db.commit() + self.db.close() + + def send(self, log_message): + try: + ipaddr = log_message["SOURCEIP"].decode("utf-8") + ip_int = ip2int(ipaddr) + self.logger.debug(f'psc.send sourceip={ipaddr} int={ip_int} host={log_message["HOST"]}') + if ip_int in self.db: + current = self.db[ip_int] + if current != log_message["HOST"]: + self.db[ip_int] =log_message["HOST"] + else: + self.db[ip_int] =log_message["HOST"] + + except: + exc_type, exc_value, exc_traceback = sys.exc_info() + lines = traceback.format_exception(exc_type, exc_value, exc_traceback) + self.logger.debug(''.join('!! ' + line for line in lines)) + return False + self.logger.debug('psc.send complete') + return True + + def flush(self): + self.db.commit() + +if __name__ == "__main__": + db = SqliteDict(f"{hostdict}.sqlite",autocommit=True) + db[0]="seed" + db.commit() + db.close() diff --git a/package/etc/pylib/psc_dump.py b/package/etc/pylib/psc_dump.py new file mode 100644 index 0000000000..4b7e02f88d --- /dev/null +++ b/package/etc/pylib/psc_dump.py @@ -0,0 +1,13 @@ + +import sys +import traceback +import socket +import struct +from sqlitedict import SqliteDict + + +hostdict = str("/var/lib/syslog-ng/cache/hostip") +db = SqliteDict(f"{hostdict}.sqlite") + +for k,v in db.items(): + print(f"key={k}={v}") \ No newline at end of file diff --git a/package/sbin/entrypoint.sh b/package/sbin/entrypoint.sh index a3527ac3fb..1343eda467 100755 --- a/package/sbin/entrypoint.sh +++ b/package/sbin/entrypoint.sh @@ -3,6 +3,8 @@ function join_by { local d=$1; shift; local f=$1; shift; printf %s "$f" "${@/#/$ export PYTHONPATH=/etc/syslog-ng/pylib +python3 /etc/syslog-ng/pylib/parser_source_cache.py + export SC4S_LISTEN_STATUS_PORT=${SC4S_LISTEN_STATUS_PORT:=8080} # These path variables allow for a single entrypoint script to be utilized for both Container and BYOE runtimes diff --git a/poetry.lock b/poetry.lock index 8ee1aeb3a9..3035e298b6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -836,6 +836,14 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "sqlitedict" +version = "2.0.0" +description = "Persistent dict in Python, backed up by sqlite3 and pickle, multithread-safe." +category = "main" +optional = false +python-versions = "*" + [[package]] name = "texttable" version = "1.6.4" @@ -947,7 +955,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "e108e1ea8be72df3997a1cae74883a76ec4aa270a10d9a0e2b4dc0ae7ceea17d" +content-hash = "b3c51e69409fd50edd53536d3cd87887764d36dbe04a7e215a26f57d7e003d0e" [metadata.files] arrow = [ @@ -1426,6 +1434,9 @@ six = [ splunk-sdk = [ {file = "splunk-sdk-1.6.18.tar.gz", hash = "sha256:edc0959786f5dcab225ba98633c310dbf7584977849f6c2152a0e5090b5e2561"}, ] +sqlitedict = [ + {file = "sqlitedict-2.0.0.tar.gz", hash = "sha256:23a370416f4e1e962daa293382f3a8dbc4127e6a0abc06a5d4e58e6902f05d17"}, +] texttable = [ {file = "texttable-1.6.4-py2.py3-none-any.whl", hash = "sha256:dd2b0eaebb2a9e167d1cefedab4700e5dcbdb076114eed30b58b97ed6b37d6f2"}, {file = "texttable-1.6.4.tar.gz", hash = "sha256:42ee7b9e15f7b225747c3fa08f43c5d6c83bc899f80ff9bae9319334824076e9"}, diff --git a/pyproject.toml b/pyproject.toml index d46ae332dd..0dd184d4be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,8 @@ license = "Apache-2.0" [tool.poetry.dependencies] python = "^3.9" Jinja2 = "^3.0.3" +sqlitedict = "^2.0.0" + [tool.poetry.dev-dependencies] pytest = "^7.0.1" diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index 3a8f68c64f..ba11996524 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -72,6 +72,7 @@ services: - SC4S_LISTEN_PROOFPOINT_PPS_TLS_PORT=7000 #- SC4S_DEST_SPLUNK_HEC_GLOBAL=no #- SC4S_DEST_CEF_SPLUNK_HEC=yes + - SC4S_USE_NAME_CACHE=yes splunk: image: docker.io/splunk/splunk:latest