From 3e2dc64b6265b67b22e3dc318fffe304f5eebd94 Mon Sep 17 00:00:00 2001 From: Henrik Ingo Date: Tue, 27 Feb 2024 02:18:33 +0200 Subject: [PATCH] Add JsonImporter Co-authored-by: Matt Fleming --- hunter/importer.py | 127 +++++++++++++++++++++++++++++++++++++++++- hunter/test_config.py | 35 ++++++++++-- 2 files changed, 156 insertions(+), 6 deletions(-) diff --git a/hunter/importer.py b/hunter/importer.py index b02eb46..0ae3f30 100644 --- a/hunter/importer.py +++ b/hunter/importer.py @@ -1,4 +1,5 @@ import csv +import json from collections import OrderedDict from contextlib import contextmanager from dataclasses import dataclass @@ -16,6 +17,7 @@ CsvTestConfig, GraphiteTestConfig, HistoStatTestConfig, + JsonTestConfig, PostgresMetric, PostgresTestConfig, TestConfig, @@ -268,7 +270,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto # Read metric values. Note we can still fail on conversion to float, # because the user is free to override the column selection and thus # they may select a column that contains non-numeric data: - for (name, i) in zip(metric_names, metric_indexes): + for name, i in zip(metric_names, metric_indexes): try: data[name].append(float(row[i])) except ValueError as err: @@ -498,7 +500,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto # Read metric values. Note we can still fail on conversion to float, # because the user is free to override the column selection and thus # they may select a column that contains non-numeric data: - for (name, i) in zip(metric_names, metric_indexes): + for name, i in zip(metric_names, metric_indexes): try: data[name].append(float(row[i])) except ValueError as err: @@ -537,12 +539,125 @@ def fetch_all_metric_names(self, test_conf: PostgresTestConfig) -> List[str]: return [m for m in test_conf.metrics.keys()] +class JsonImporter(Importer): + def __init__(self): + self._data = {} + + @staticmethod + def _read_json_file(filename: str): + try: + return json.load(open(filename)) + except FileNotFoundError: + raise DataImportError(f"Input file not found: {filename}") + + def inputfile(self, test_conf: JsonTestConfig): + if test_conf.file not in self._data: + self._data[test_conf.file] = self._read_json_file(test_conf.file) + return self._data[test_conf.file] + + def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelector()) -> Series: + + if not isinstance(test_conf, JsonTestConfig): + raise ValueError("Expected JsonTestConfig") + + # TODO: refactor. THis is copy pasted from CSV importer + since_time = selector.since_time + until_time = selector.until_time + + if since_time.timestamp() > until_time.timestamp(): + raise DataImportError( + f"Invalid time range: [" + f"{format_timestamp(int(since_time.timestamp()))}, " + f"{format_timestamp(int(until_time.timestamp()))}]" + ) + + time = [] + data = OrderedDict() + metrics = OrderedDict() + attributes = OrderedDict() + + for name in self.fetch_all_metric_names(test_conf): + # Ignore metrics if selector.metrics is not None and name is not in selector.metrics + if selector.metrics is not None and name not in selector.metrics: + continue + data[name] = [] + + attr_names = self.fetch_all_attribute_names(test_conf) + for name in attr_names: + attributes[name] = [] + + # If the user specified a branch, only include results from that branch. + # Otherwise if the test config specifies a branch, only include results from that branch. + # Else include all results. + branch = None + if selector.branch: + branch = selector.branch + elif test_conf.base_branch: + branch = test_conf.base_branch + + objs = self.inputfile(test_conf) + list_of_json_obj = [] + for o in objs: + if branch and o["attributes"]["branch"] != branch: + continue + list_of_json_obj.append(o) + + for result in list_of_json_obj: + time.append(result["timestamp"]) + for metric in result["metrics"]: + # Skip metrics not in selector.metrics if selector.metrics is enabled + if metric["name"] not in data: + continue + + data[metric["name"]].append(metric["value"]) + metrics[metric["name"]] = Metric(1, 1.0) + for a in attr_names: + attributes[a] = [o["attributes"][a] for o in list_of_json_obj] + + # Leave last n points: + time = time[-selector.last_n_points :] + tmp = data + data = {} + for k, v in tmp.items(): + data[k] = v[-selector.last_n_points :] + tmp = attributes + attributes = {} + for k, v in tmp.items(): + attributes[k] = v[-selector.last_n_points :] + + return Series( + test_conf.name, + branch=None, + time=time, + metrics=metrics, + data=data, + attributes=attributes, + ) + + def fetch_all_metric_names(self, test_conf: JsonTestConfig) -> List[str]: + metric_names = set() + list_of_json_obj = self.inputfile(test_conf) + for result in list_of_json_obj: + for metric in result["metrics"]: + metric_names.add(metric["name"]) + return [m for m in metric_names] + + def fetch_all_attribute_names(self, test_conf: JsonTestConfig) -> List[str]: + attr_names = set() + list_of_json_obj = self.inputfile(test_conf) + for result in list_of_json_obj: + for a in result["attributes"].keys(): + attr_names.add(a) + return [m for m in attr_names] + + class Importers: __config: Config __csv_importer: Optional[CsvImporter] __graphite_importer: Optional[GraphiteImporter] __histostat_importer: Optional[HistoStatImporter] __postgres_importer: Optional[PostgresImporter] + __json_importer: Optional[JsonImporter] def __init__(self, config: Config): self.__config = config @@ -550,6 +665,7 @@ def __init__(self, config: Config): self.__graphite_importer = None self.__histostat_importer = None self.__postgres_importer = None + self.__json_importer = None def csv_importer(self) -> CsvImporter: if self.__csv_importer is None: @@ -571,6 +687,11 @@ def postgres_importer(self) -> PostgresImporter: self.__postgres_importer = PostgresImporter(Postgres(self.__config.postgres)) return self.__postgres_importer + def json_importer(self) -> JsonImporter: + if self.__json_importer is None: + self.__json_importer = JsonImporter() + return self.__json_importer + def get(self, test: TestConfig) -> Importer: if isinstance(test, CsvTestConfig): return self.csv_importer() @@ -580,5 +701,7 @@ def get(self, test: TestConfig) -> Importer: return self.histostat_importer() elif isinstance(test, PostgresTestConfig): return self.postgres_importer() + elif isinstance(test, JsonTestConfig): + return self.json_importer() else: raise ValueError(f"Unsupported test type {type(test)}") diff --git a/hunter/test_config.py b/hunter/test_config.py index 97e9c31..7e01b79 100644 --- a/hunter/test_config.py +++ b/hunter/test_config.py @@ -162,7 +162,7 @@ def create_test_config(name: str, config: Dict) -> TestConfig: Loads properties of a test from a dictionary read from hunter's config file This dictionary must have the `type` property to determine the type of the test. Other properties depend on the type. - Currently supported test types are `fallout`, `graphite`, `csv`, and `psql`. + Currently supported test types are `fallout`, `graphite`, `csv`, `json`, and `psql`. """ test_type = config.get("type") if test_type == "csv": @@ -173,6 +173,8 @@ def create_test_config(name: str, config: Dict) -> TestConfig: return create_histostat_test_config(name, config) elif test_type == "postgres": return create_postgres_test_config(name, config) + elif test_type == "json": + return create_json_test_config(name, config) elif test_type is None: raise TestConfigError(f"Test type not set for test {name}") else: @@ -192,7 +194,7 @@ def create_csv_test_config(test_name: str, test_info: Dict) -> CsvTestConfig: for name in metrics_info: metrics.append(CsvMetric(name, 1, 1.0, name)) elif isinstance(metrics_info, Dict): - for (metric_name, metric_conf) in metrics_info.items(): + for metric_name, metric_conf in metrics_info.items(): metrics.append( CsvMetric( name=metric_name, @@ -231,7 +233,7 @@ def create_graphite_test_config(name: str, test_info: Dict) -> GraphiteTestConfi metrics = [] try: - for (metric_name, metric_conf) in metrics_info.items(): + for metric_name, metric_conf in metrics_info.items(): metrics.append( GraphiteMetric( name=metric_name, @@ -279,7 +281,7 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest for name in metrics_info: metrics.append(CsvMetric(name, 1, 1.0)) elif isinstance(metrics_info, Dict): - for (metric_name, metric_conf) in metrics_info.items(): + for metric_name, metric_conf in metrics_info.items(): metrics.append( PostgresMetric( name=metric_name, @@ -294,3 +296,28 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest return PostgresTestConfig(test_name, query, update_stmt, time_column, metrics, attributes) except KeyError as e: raise TestConfigError(f"Configuration key not found in test {test_name}: {e.args[0]}") + + +@dataclass +class JsonTestConfig(TestConfig): + name: str + file: str + base_branch: str + + # TODO: This should return the list defined in the config file hunter.yaml + def fully_qualified_metric_names(self): + from hunter.importer import JsonImporter + + metric_names = JsonImporter().fetch_all_metric_names(self) + return [f"{self.name}.{m}" for m in metric_names] + + +def create_json_test_config(name: str, test_info: Dict) -> JsonTestConfig: + try: + file = test_info["file"] + except KeyError as e: + raise TestConfigError(f"Configuration key not found in test {name}: {e.args[0]}") + if not os.path.exists(file): + raise TestConfigError(f"Configuration file not found: {file}") + base_branch = test_info.get("base_branch", None) + return JsonTestConfig(name, file, base_branch)