From e473d1ee263bf3dc35b5e60b5cfef552460fd21e Mon Sep 17 00:00:00 2001 From: Arseniy Kuznetsov Date: Sun, 14 Apr 2024 17:56:13 +0200 Subject: [PATCH] more netwatch metrics (#97) --- mktxp/collector/netwatch_collector.py | 43 ++++++++++++++++++++--- mktxp/datasource/kid_control_device_ds.py | 2 +- mktxp/datasource/netwatch_ds.py | 8 +---- mktxp/flow/processor/output.py | 19 +++++----- 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/mktxp/collector/netwatch_collector.py b/mktxp/collector/netwatch_collector.py index 36a1e1ae..27855afb 100644 --- a/mktxp/collector/netwatch_collector.py +++ b/mktxp/collector/netwatch_collector.py @@ -25,11 +25,44 @@ def collect(router_entry): if not router_entry.config_entry.netwatch: return - netwatch_labels = ['host', 'timeout', 'interval', 'since', 'status', 'comment', 'name'] - netwatch_records = NetwatchMetricsDataSource.metric_records(router_entry, metric_labels = netwatch_labels) + netwatch_labels = ['host', 'timeout', 'interval', 'since', 'status', 'comment', 'name', "done_tests", "type", "failed_tests", + "loss_count", "loss_percent", "rtt_avg", "rtt_min", "rtt_max", "rtt_jitter", "rtt_stdev", + "tcp_connect_time", "http_status_code", "http_resp_time", ] + translation_table = { + 'status': lambda value: '1' if value == 'up' else '0', + 'rtt_avg': lambda value: BaseOutputProcessor.parse_timedelta_milliseconds(value, ms_span=True) if value else '0', + 'rtt_min': lambda value: BaseOutputProcessor.parse_timedelta_milliseconds(value, ms_span=True) if value else '0', + 'rtt_max': lambda value: BaseOutputProcessor.parse_timedelta_milliseconds(value, ms_span=True) if value else '0', + 'rtt_jitter': lambda value: BaseOutputProcessor.parse_timedelta_milliseconds(value, ms_span=True) if value else '0', + 'rtt_stdev': lambda value: BaseOutputProcessor.parse_timedelta_milliseconds(value, ms_span=True) if value else '0', + 'tcp_connect_time': lambda value: BaseOutputProcessor.parse_timedelta_milliseconds(value, ms_span=True) if value else '0', + 'http_resp_time': lambda value: BaseOutputProcessor.parse_timedelta_milliseconds(value, ms_span=True) if value else '0'} + netwatch_records = NetwatchMetricsDataSource.metric_records(router_entry, metric_labels = netwatch_labels, translation_table=translation_table) if netwatch_records: - yield BaseCollector.info_collector('netwatch', 'Netwatch Info Metrics', netwatch_records, netwatch_labels) + netwatch_info_labels = ['host', 'timeout', 'interval', 'since', 'status', 'comment', 'name'] + yield BaseCollector.info_collector('netwatch', 'Netwatch Info Metrics', netwatch_records, netwatch_info_labels) - yield BaseCollector.gauge_collector('netwatch_status', 'Netwatch Status Metrics', netwatch_records, 'status', ['name']) - + yield BaseCollector.gauge_collector('netwatch_status', 'Netwatch Status Metrics', netwatch_records, 'status', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_done_tests', 'Netwatch Done Tests', netwatch_records, 'done_tests', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_failed_tests', 'Netwatch Failed Tests', netwatch_records, 'failed_tests', ['name', 'type']) + + # ICMP specific + icmp_records = [record for record in netwatch_records if record.get("type", None) == "icmp"] + yield BaseCollector.gauge_collector('netwatch_icmp_loss_count', 'Netwatch ICMP Loss Count', icmp_records, 'loss_count', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_icmp_loss_percent', 'Netwatch ICMP Loss Percent', icmp_records, 'loss_percent', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_icmp_rtt_avg_ms', 'Netwatch ICMP Round Trip Average', icmp_records, 'rtt_avg', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_icmp_rtt_min_ms', 'Netwatch ICMP Round Trip Min', icmp_records, 'rtt_min', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_icmp_rtt_max_ms', 'Netwatch ICMP Round Trip Max', icmp_records, 'rtt_max', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_icmp_rtt_jitter_ms', 'Netwatch ICMP Round Trip Jitter', icmp_records, 'rtt_jitter', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_icmp_rtt_stdev_ms', 'Netwatch ICMP Round Trip Stdev', icmp_records, 'rtt_stdev', ['name', 'type']) + + # TCP specific + tcp_records = [record for record in netwatch_records if record.get("type", None) == "tcp-conn"] + yield BaseCollector.gauge_collector('netwatch_tcp_connect_time_ms', 'Netwatch TCP Connect Time', tcp_records, 'tcp_connect_time', ['name', 'type']) + + # HTTP(s) specific + http_records = [record for record in netwatch_records if record.get("type", None) in ["http-get", "https-get"]] + yield BaseCollector.gauge_collector('netwatch_http_status_code', 'Netwatch HTTP status code', http_records, 'http_status_code', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_http_resp_time', 'Netwatch HTTP status code', http_records, 'http_resp_time', ['name', 'type']) + yield BaseCollector.gauge_collector('netwatch_tcp_connect_time_ms', 'Netwatch TCP Connect Time', http_records, 'tcp_connect_time', ['name', 'type']) diff --git a/mktxp/datasource/kid_control_device_ds.py b/mktxp/datasource/kid_control_device_ds.py index 87be35c8..fc55868f 100644 --- a/mktxp/datasource/kid_control_device_ds.py +++ b/mktxp/datasource/kid_control_device_ds.py @@ -20,7 +20,7 @@ class KidDeviceMetricsDataSource: """ @staticmethod - def metric_records(router_entry, *, metric_labels=None, translation_table = None): + def metric_records(router_entry, *, metric_labels=None, translation_table=None): if metric_labels is None: metric_labels = [] try: diff --git a/mktxp/datasource/netwatch_ds.py b/mktxp/datasource/netwatch_ds.py index 7101bd56..1e609f23 100644 --- a/mktxp/datasource/netwatch_ds.py +++ b/mktxp/datasource/netwatch_ds.py @@ -19,13 +19,12 @@ class NetwatchMetricsDataSource: ''' Netwatch Metrics data provider ''' @staticmethod - def metric_records(router_entry, *, metric_labels = None): + def metric_records(router_entry, *, metric_labels=None, translation_table=None): if metric_labels is None: metric_labels = [] try: netwatch_records = router_entry.api_connection.router_api().get_resource('/tool/netwatch').get(disabled='false') if 'name' in metric_labels: - for netwatch_record in netwatch_records: comment = netwatch_record.get('comment') host = netwatch_record.get('host') @@ -33,11 +32,6 @@ def metric_records(router_entry, *, metric_labels = None): netwatch_record['name'] = f'{host} ({comment[0:20]})' if not router_entry.config_entry.use_comments_over_names else comment else: netwatch_record['name'] = host - - # translation rules - translation_table = {} - if 'status' in metric_labels: - translation_table['status'] = lambda value: '1' if value == 'up' else '0' return BaseDSProcessor.trimmed_records(router_entry, router_records = netwatch_records, translation_table = translation_table, metric_labels = metric_labels) except Exception as exc: diff --git a/mktxp/flow/processor/output.py b/mktxp/flow/processor/output.py index 1af33c55..62d576e1 100644 --- a/mktxp/flow/processor/output.py +++ b/mktxp/flow/processor/output.py @@ -114,21 +114,24 @@ def parse_bitrates(rate): return f"{int(rate / 1000 ** power)} {['bps', 'Kbps', 'Mbps', 'Gbps'][int(power)]}" @staticmethod - def parse_timedelta(time): - duration_interval_rgx = config_handler.re_compiled.get('duration_interval_rgx') + def parse_timedelta(time, ms_span=False): + # ms_span for milliseconds-long durations, since otherwise minutes would match the ms in the value + rgx_key = 'duration_interval_rgx_sp' if ms_span else 'duration_interval_rgx' + duration_interval_rgx = config_handler.re_compiled.get(rgx_key) if not duration_interval_rgx: - duration_interval_rgx = re.compile(r'((?P\d+)w)?((?P\d+)d)?((?P\d+)h)?((?P\d+)m)?((?P\d+)s)?((?P\d+)ms)?') - config_handler.re_compiled['duration_interval_rgx'] = duration_interval_rgx + duration_interval_rgx = re.compile(r'((?P\d+)s)?((?P\d+)ms)?((?P\d+)us)?') if ms_span else\ + re.compile(r'((?P\d+)w)?((?P\d+)d)?((?P\d+)h)?((?P\d+)m)?((?P\d+)s)?((?P\d+)ms)?') + config_handler.re_compiled[rgx_key] = duration_interval_rgx time_dict = duration_interval_rgx.match(time).groupdict() return timedelta(**{key: int(value) for key, value in time_dict.items() if value}) @staticmethod - def parse_timedelta_seconds(time): - return BaseOutputProcessor.parse_timedelta(time).total_seconds() + def parse_timedelta_seconds(time, ms_span=False): + return BaseOutputProcessor.parse_timedelta(time, ms_span=ms_span).total_seconds() @staticmethod - def parse_timedelta_milliseconds(time): - return BaseOutputProcessor.parse_timedelta(time) / timedelta(milliseconds=1) + def parse_timedelta_milliseconds(time, ms_span=False): + return BaseOutputProcessor.parse_timedelta(time, ms_span=ms_span) / timedelta(milliseconds=1) @staticmethod def parse_signal_strength(signal_strength):