From f8f50889bb6d30abb0e4214df7361d687d82950a Mon Sep 17 00:00:00 2001 From: Tyler Morrow Date: Thu, 15 Aug 2024 06:40:45 -0600 Subject: [PATCH] Refactor for Python and dependency upgrades. --- .github/workflows/build-and-run-tests.yml | 10 +- .github/workflows/run-examples.yml | 10 +- README.md | 11 +- examples/data/synthesis/mix_seeds.py | 6 +- examples/modeling/arad.py | 4 +- examples/modeling/arad_latent_prediction.py | 7 +- examples/modeling/classifier_comparison.py | 4 +- .../modeling/label_proportion_estimation.py | 1 - examples/modeling/multi_event_classifier.py | 74 --- .../modeling/neural_network_classifier.py | 2 +- examples/visualization/confusion_matrix.py | 2 +- pyproject.toml | 51 +- riid/data/converters/__init__.py | 25 +- riid/data/sampleset.py | 23 +- riid/data/synthetic/seed.py | 9 +- riid/losses/__init__.py | 19 +- riid/metrics.py | 38 +- riid/models/__init__.py | 4 +- riid/models/bayes.py | 148 +++--- riid/models/layers.py | 82 +++ riid/models/neural_nets/__init__.py | 467 +++++------------- riid/models/neural_nets/arad.py | 148 +++--- tests/anomaly_tests.py | 7 +- tests/model_tests.py | 19 +- tests/seedmixer_tests.py | 29 +- tests/visualize_tests.py | 3 +- 26 files changed, 509 insertions(+), 694 deletions(-) delete mode 100644 examples/modeling/multi_event_classifier.py create mode 100644 riid/models/layers.py diff --git a/.github/workflows/build-and-run-tests.yml b/.github/workflows/build-and-run-tests.yml index b1c8e2a4..96633e40 100644 --- a/.github/workflows/build-and-run-tests.yml +++ b/.github/workflows/build-and-run-tests.yml @@ -7,8 +7,8 @@ jobs: build: strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] - os: [ubuntu-latest, windows-latest, macos-13] + python-version: ["3.9", "3.10", "3.11", "3.12"] + os: [ubuntu-latest, windows-latest, macos-latest] runs-on: ${{ matrix.os }} steps: - name: Checkout @@ -19,6 +19,12 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" cache-dependency-path: "**/pyproject.toml" + - name: Install HDF5 (macOS only) + if: runner.os == 'macOS' + run: brew install hdf5 + - name: Set HDF5_DIR environment variable (macOS only) + if: runner.os == 'macOS' + run: echo "HDF5_DIR=$(brew --prefix hdf5)" >> $GITHUB_ENV - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/run-examples.yml b/.github/workflows/run-examples.yml index 295143f1..ddf06b6e 100644 --- a/.github/workflows/run-examples.yml +++ b/.github/workflows/run-examples.yml @@ -7,8 +7,8 @@ jobs: build: strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] - os: [ubuntu-latest, windows-latest, macos-13] + python-version: ["3.9", "3.10", "3.11", "3.12"] + os: [ubuntu-latest, windows-latest, macos-latest] runs-on: ${{ matrix.os }} steps: - name: Checkout @@ -19,6 +19,12 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" cache-dependency-path: "**/pyproject.toml" + - name: Install HDF5 (macOS only) + if: runner.os == 'macOS' + run: brew install hdf5 + - name: Set HDF5_DIR environment variable (macOS only) + if: runner.os == 'macOS' + run: echo "HDF5_DIR=$(brew --prefix hdf5)" >> $GITHUB_ENV - name: Install dependencies run: | python -m pip install --upgrade pip setuptools wheel diff --git a/README.md b/README.md index 7d7c285e..8f01a50c 100644 --- a/README.md +++ b/README.md @@ -11,23 +11,22 @@ PyRIID is a Python package providing modeling and data synthesis utilities for m Requirements: -- Python version: 3.8 to 3.10 +- Python version: 3.9 to 3.12 + - Note: we recommended the highest Python version you can manage as anecdotally, we have noticed that everything just tends to get faster. - Operating systems: Windows, Mac, or Ubuntu -A virtual environment is recommended. - Tests and examples are run via Actions on many combinations of Python version and operating system. You can verify support for your platform by checking the workflow files. ### For Use -To use the latest version on PyPI (note: changes are slower to appear here), run: +To use the latest version on PyPI, run: ```sh pip install riid ``` -**For the latest features, run:** +Note that changes are slower to appear on PyPI, so for the latest features, run:** ```sh pip install git+https://github.com/sandialabs/pyriid.git@main @@ -86,7 +85,7 @@ Full copyright details can be found [here](https://github.com/sandialabs/PyRIID/ ## Acknowledgements **Thank you** to the U.S. Department of Energy, National Nuclear Security Administration, -Office of Defense Nuclear Nonproliferation Research and Development (DNN R&D) for funding that has led to version `2.x`. +Office of Defense Nuclear Nonproliferation Research and Development (DNN R&D) for funding that has led to versions `2.0` and `2.1`. Additionally, **thank you** to the following individuals who have provided invaluable subject-matter expertise: diff --git a/examples/data/synthesis/mix_seeds.py b/examples/data/synthesis/mix_seeds.py index 907dfc5a..911fc56d 100644 --- a/examples/data/synthesis/mix_seeds.py +++ b/examples/data/synthesis/mix_seeds.py @@ -2,14 +2,16 @@ # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. """This example demonstrates how to generate synthetic gamma spectra from seeds.""" +import numpy as np from riid.data.synthetic import get_dummy_seeds from riid.data.synthetic.seed import SeedMixer fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg() -mixed_fg_seeds_ss = SeedMixer(fg_seeds_ss, mixture_size=2)\ +rng = np.random.default_rng(3) +mixed_fg_seeds_ss = SeedMixer(fg_seeds_ss, mixture_size=2, rng=rng)\ .generate(n_samples=10) -mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3)\ +mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3, rng=rng)\ .generate(n_samples=10) print(mixed_fg_seeds_ss) diff --git a/examples/modeling/arad.py b/examples/modeling/arad.py index 846975fe..f15d2e4e 100644 --- a/examples/modeling/arad.py +++ b/examples/modeling/arad.py @@ -14,7 +14,7 @@ # Config rng = np.random.default_rng(42) OOD_QUANTILE = 0.99 -VERBOSE = True +VERBOSE = False # Some of the following parameters are set low because this example runs on GitHub Actions and # we don't want it taking a bunch of time. # When running this locally, change the values per their corresponding comment, otherwise @@ -54,7 +54,7 @@ arad.predict(gross_train_ss) ood_threshold = np.quantile(gross_train_ss.info.recon_error, OOD_QUANTILE) - reconstructions = arad.predict(test_ss, verbose=True) + reconstructions = arad.predict(test_ss, verbose=VERBOSE) ood = test_ss.info.recon_error.values > ood_threshold false_positive_rate = ood.mean() mean_recon_error = test_ss.info.recon_error.values.mean() diff --git a/examples/modeling/arad_latent_prediction.py b/examples/modeling/arad_latent_prediction.py index fed2d805..9b233954 100644 --- a/examples/modeling/arad_latent_prediction.py +++ b/examples/modeling/arad_latent_prediction.py @@ -5,16 +5,17 @@ from an ARAD latent space. """ import numpy as np +from keras.api.metrics import Accuracy, CategoricalCrossentropy from sklearn.metrics import f1_score, mean_squared_error from riid.data.synthetic import get_dummy_seeds from riid.data.synthetic.seed import SeedMixer from riid.data.synthetic.static import StaticSynthesizer -from riid.models.neural_nets.arad import ARADv2, ARADLatentPredictor +from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv2 # Config rng = np.random.default_rng(42) -VERBOSE = True +VERBOSE = False # Some of the following parameters are set low because this example runs on GitHub Actions and # we don't want it taking a bunch of time. # When running this locally, change the values per their corresponding comment, otherwise @@ -66,7 +67,7 @@ print("Training Classifier") arad_classifier = ARADLatentPredictor( loss="categorical_crossentropy", - metrics=("accuracy", "categorical_crossentropy"), + metrics=[Accuracy(), CategoricalCrossentropy()], final_activation="softmax" ) arad_classifier.fit( diff --git a/examples/modeling/classifier_comparison.py b/examples/modeling/classifier_comparison.py index 7b1b4e12..4fce43e7 100644 --- a/examples/modeling/classifier_comparison.py +++ b/examples/modeling/classifier_comparison.py @@ -36,8 +36,8 @@ train_fg_ss, _ = static_synth.generate(fg_seeds_ss, mixed_bg_seed_ss, verbose=False) train_fg_ss.normalize() -model_nn = MLPClassifier(hidden_layers=(5,)) -model_nn.fit(train_fg_ss, epochs=10, patience=5, verbose=1) +model_nn = MLPClassifier() +model_nn.fit(train_fg_ss, epochs=10, patience=5) # Create PB model model_pb = PoissonBayesClassifier() diff --git a/examples/modeling/label_proportion_estimation.py b/examples/modeling/label_proportion_estimation.py index 8f3a4ef5..b4e5e4c4 100644 --- a/examples/modeling/label_proportion_estimation.py +++ b/examples/modeling/label_proportion_estimation.py @@ -55,7 +55,6 @@ batch_size=10, epochs=2, validation_split=0.2, - verbose=True, bg_cps=300 ) diff --git a/examples/modeling/multi_event_classifier.py b/examples/modeling/multi_event_classifier.py deleted file mode 100644 index eac23d44..00000000 --- a/examples/modeling/multi_event_classifier.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). -# Under the terms of Contract DE-NA0003525 with NTESS, -# the U.S. Government retains certain rights in this software. -"""This example demonstrates how to use the MLP classifier.""" -from copy import deepcopy as copy - -from sklearn.metrics import f1_score - -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.static import StaticSynthesizer -from riid.models.neural_nets import MLPClassifier, MultiEventClassifier - -fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg() -static_syn = StaticSynthesizer( - # log10 sampling samples lower SNR values more frequently. - # This makes the SampleSet overall "harder" to classify. - snr_function="log10", - samples_per_seed=50, - return_fg=True, - return_gross=True, -) - -# Generate some training data -fg_ss, gross_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss) -bg_ss = gross_ss - fg_ss -bg_ss.normalize() -gross_ss.normalize() - -# Train two single event classifiers -model1 = MLPClassifier() -model1.fit(gross_ss, bg_ss=bg_ss, verbose=1, epochs=50, patience=20) - -model2 = MLPClassifier() -model2.fit(gross_ss, bg_ss=bg_ss, verbose=1, epochs=50, patience=20) - -# Generate two sample sets (with same sources but predictions from different models) -train2a_fg_ss, train2a_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss) -train2a_bg_ss = train2a_ss - train2a_fg_ss -train2b_ss = copy(train2a_ss) -train2b_bg_ss = copy(train2a_bg_ss) - -model1.predict(train2a_ss, train2a_bg_ss) -model2.predict(train2b_ss, train2b_bg_ss) - -# Train MultiEvent model -mec = MultiEventClassifier() -mec.fit( - [train2a_ss, train2b_ss], - train2a_ss.sources.groupby(axis=1, level="Isotope", sort=False).sum(), - epochs=50 -) - -# Make predictions on multi model - -multi_preds = mec.predict([train2a_ss, train2b_ss]) - -# Compare performance for single event models and multi-event model -m1_f1_score = f1_score(train2a_ss.get_predictions(), - train2a_ss.get_labels(), - average="weighted") -m2_f1_score = f1_score(train2b_ss.get_predictions(), - train2b_ss.get_labels(), - average="weighted") - -multi_f1_score = f1_score(multi_preds.values.argmax(axis=1), - train2a_ss.get_source_contributions().argmax(axis=1), - average="weighted") - -results_str = ( - f"M1 F1 Score: {m1_f1_score:.2f}\n" - f"M2 F1 Score: {m2_f1_score:.2f}\n" - f"M12 F1 Score: {multi_f1_score:.2f}" -) -print(results_str) diff --git a/examples/modeling/neural_network_classifier.py b/examples/modeling/neural_network_classifier.py index 8ab7d55f..277a2dc5 100644 --- a/examples/modeling/neural_network_classifier.py +++ b/examples/modeling/neural_network_classifier.py @@ -24,7 +24,7 @@ train_ss.normalize() model = MLPClassifier() -model.fit(train_ss, epochs=10, patience=5, verbose=1) +model.fit(train_ss, epochs=10, patience=5) # Generate some test data static_synth.samples_per_seed = 50 diff --git a/examples/visualization/confusion_matrix.py b/examples/visualization/confusion_matrix.py index 704f4d1b..b2e25e72 100644 --- a/examples/visualization/confusion_matrix.py +++ b/examples/visualization/confusion_matrix.py @@ -26,7 +26,7 @@ .generate(fg_seeds_ss, mixed_bg_seed_ss) train_ss.normalize() -model = MLPClassifier(hidden_layers=(8,)) +model = MLPClassifier() model.fit(train_ss, verbose=0, epochs=50) # Generate some test data diff --git a/pyproject.toml b/pyproject.toml index a7af43bc..c20c480e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ namespaces = false [project] name = "riid" description = "Machine learning-based models and utilities for radioisotope identification" -version = "2.1.0" +version = "2.2.0" maintainers = [ {name="Tyler Morrow", email="tmorro@sandia.gov"}, ] @@ -41,44 +41,43 @@ classifiers = [ 'Topic :: Software Development', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', + 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', ] keywords = ["pyriid", "riid", "machine learning", "radioisotope identification", "gamma spectrum"] -requires-python = ">=3.8,<3.11" +requires-python = ">=3.9,<3.13" dependencies = [ - "jsonschema ==4.17.*", - "matplotlib ==3.7.*", - "pyyaml ==6.0.*", - "seaborn ==0.12.*", - "tf2onnx ==1.14.*", - "onnx ==1.14.1", - "tqdm ==4.65.*", - "numpy ==1.23.*", - "pandas ==2.0.*", - "parmap ==1.6.*", - "pythonnet ==3.0.*; platform_system == 'Windows'", - "tables ==3.8.*", - "tensorflow ==2.12.*", - "tensorflow-io ==0.27.*", - "tensorflow-model-optimization ==0.7.*", - "tensorflow-probability ==0.20.*", - "typeguard ==2.7.*", - "scikit-learn ==1.2.*", + "jsonschema ==4.23.*", # 3.8 - 3.13 + "matplotlib ==3.9.*", # 3.9 - 3.12 + "numpy ==1.26.*", # 3.9 - 3.12, also to be limited by onnx 1.16.2 + "pandas ==2.2.*", # >= 3.9 + "pythonnet ==3.0.3; platform_system == 'Windows'", # 3.7 - 3.12 + "pyyaml ==6.0.*", # >= 3.6 + "tables ==3.9.*", # >= 3.9 + "scikit-learn ==1.5.*", # 3.9 - 3.12 + "scipy ==1.13.*", # >= 3.10 + "seaborn ==0.13.*", # >= 3.8 + "tensorflow ==2.16.*", # 3.9 - 3.12 + "tensorflow-model-optimization ==0.8.*", # 3.7 - 3.12 + "onnx ==1.16.1", # 3.7 - 3.10 + "tf2onnx ==1.16.1", # 3.7 - 3.10 + "tqdm ==4.66.*", # >= 3.7 + "typeguard ==4.3.*", # 3.9 - 3.12 ] [project.optional-dependencies] dev = [ - "flake8", - "flake8-quotes", "coverage", "ipykernel", + "flake8", + "flake8-quotes", "tabulate", ] [project.urls] -repository = "https://github.com/sandialabs/PyRIID" +Documentation = "https://sandialabs.github.io/PyRIID" +Repository = "https://github.com/sandialabs/PyRIID" diff --git a/riid/data/converters/__init__.py b/riid/data/converters/__init__.py index 2959f299..c956e827 100644 --- a/riid/data/converters/__init__.py +++ b/riid/data/converters/__init__.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Callable -import parmap as pm +from joblib import Parallel, delayed def _validate_and_create_output_dir(output_dir: str): @@ -17,8 +17,8 @@ def _validate_and_create_output_dir(output_dir: str): def convert_directory(input_dir_path: str, conversion_func: Callable, file_ext: str, - pm_processes: int = 8, pm_chunksize: int = 1, **kwargs): - """Convert and save every file in a specified directory in parallel. + n_jobs: int = 8, **kwargs): + """Convert and save every file in a specified directory. Conversion functions can be found in sub-modules: @@ -32,16 +32,14 @@ def convert_directory(input_dir_path: str, conversion_func: Callable, file_ext: convert_directory(...) ``` - Consider setting `pm_processes` to `multiprocessing.cpu_count()`; - unfortunately, `pm_chunksize` requires some experimentation to fully optimize. + Tip: for max utilization, considering setting `n_jobs` to `multiprocessing.cpu_count()`. Args: input_dir_path: directory path containing the input files conversion_func: function used to convert a data file to a `SampleSet` file_ext: file extension to read in for conversion - pm_processes: parmap parameter to set the # of processes - pm_chunksize: parmap parameter to set the chunksize - kwargs: keyword args passed to underlying conversion_func operations + n_jobs: `joblib.Parallel` parameter to set the # of jobs + kwargs: additional keyword args passed to conversion_func """ input_path = Path(input_dir_path) if not input_path.exists() or not input_path.is_dir(): @@ -50,13 +48,6 @@ def convert_directory(input_dir_path: str, conversion_func: Callable, file_ext: input_file_paths = sorted(glob.glob(f"{input_dir_path}/*.{file_ext}")) - x = pm.map( - conversion_func, - input_file_paths, - **kwargs, - pm_processes=pm_processes, - pm_chunksize=pm_chunksize, - pm_parallel=True, - pm_pbar=True, + Parallel(n_jobs, verbose=10)( + delayed(conversion_func)(path, **kwargs) for path in input_file_paths ) - return x diff --git a/riid/data/sampleset.py b/riid/data/sampleset.py index 9e520428..47dd0332 100644 --- a/riid/data/sampleset.py +++ b/riid/data/sampleset.py @@ -13,7 +13,7 @@ import random import re import warnings -from datetime import datetime +from datetime import datetime, timezone from enum import Enum from typing import Callable, Iterable, Tuple, Union @@ -759,8 +759,9 @@ def concat(self, ss_list: list): sort=False ) self._sources = self._sources.where(pd.notnull(self._sources), 0) + existing_info_df = self._info if not self._info.empty else None self._info = pd.concat( - [self._info] + [ss.info for ss in ss_list], + [existing_info_df] + [ss.info for ss in ss_list], ignore_index=True, sort=False ) @@ -1015,7 +1016,8 @@ def get_confidences(self, fg_seeds_ss: SampleSet, bg_seed_ss: SampleSet = None, self.spectra.values, **confidence_func_kwargs ) - return np.array(confidences) + confidences = np.array(confidences) + return confidences def _get_spectral_distances(self, distance_func=distance.jensenshannon) -> np.array: n_samples = self.n_samples @@ -1111,7 +1113,7 @@ def get_source_contributions(self, target_level="Isotope") -> np.ndarray: Returns: Array containing the ground truth contributions for each sample """ - collapsed_sources = self.sources.groupby(axis=1, level=target_level).sum() + collapsed_sources = self.sources.T.groupby(target_level).sum().T sources_values = np.nan_to_num(collapsed_sources) return sources_values @@ -1465,7 +1467,8 @@ def _dict_to_bulleted_list(data_dict: dict, level=0, indent=4, bullet="-") -> st def _get_utc_timestamp(): - ts = datetime.utcnow().isoformat(sep=" ", timespec="seconds") + now_utc = datetime.now(timezone.utc) + ts = now_utc.isoformat(sep=" ", timespec="seconds") return ts @@ -1496,10 +1499,10 @@ def _get_row_labels(df: pd.DataFrame, target_level: str = "Isotope", max_only: b """ if max_only: if level_aggregation == "sum": - values = df.groupby(axis=1, level=target_level).sum() + values = df.T.groupby(target_level).sum().T labels = values.idxmax(axis=1) elif level_aggregation == "mean": - values = df.groupby(axis=1, level=target_level).mean() + values = df.T.groupby(target_level).mean().T labels = values.idxmax(axis=1) else: levels_to_drop = [ @@ -1513,9 +1516,9 @@ def _get_row_labels(df: pd.DataFrame, target_level: str = "Isotope", max_only: b labels = [f"{x} ({y:.2f})" for x, y in zip(labels, values)] else: # Much slower if level_aggregation == "sum": - values = df.groupby(axis=1, level=target_level).sum() + values = df.T.groupby(target_level).sum().T elif level_aggregation == "mean": - values = df.groupby(axis=1, level=target_level).mean() + values = df.T.groupby(target_level).mean().T else: values = df mask = values.ge(min_value).values @@ -1698,7 +1701,7 @@ def _ss_to_pcf_dict(ss: SampleSet, verbose=False) -> dict: info_timestamps = ss.info.timestamp.fillna("") info_tags = ss.info.tag.fillna("") - info_zero_fill = ss.info.fillna(0).astype(float, errors="ignore") + info_zero_fill = ss.info.infer_objects(copy=False).astype(float, errors="ignore") for i in sample_range: title = isotopes[i] if not isotopes.empty else NO_ISOTOPE description = ss.info.description.fillna("").iloc[i] diff --git a/riid/data/synthetic/seed.py b/riid/data/synthetic/seed.py index 08e19215..9fd1b2c4 100644 --- a/riid/data/synthetic/seed.py +++ b/riid/data/synthetic/seed.py @@ -292,10 +292,13 @@ def __call__(self, n_samples: int, max_batch_size: int = 100, ) sources_df = pd.DataFrame([r], columns=sources_cols) batch_sources_dfs.append(sources_df) - empty_sources_df = pd.DataFrame([], columns=self.seeds_ss.sources.columns) - batch_ss.sources = pd\ - .concat([empty_sources_df] + batch_sources_dfs)\ + sources_df = pd\ + .concat(batch_sources_dfs)\ .fillna(0.0) + batch_ss.sources = sources_df.reindex( + columns=self.seeds_ss.sources.columns, + fill_value=0.0 + ) n_samples_produced += batch_size diff --git a/riid/losses/__init__.py b/riid/losses/__init__.py index ae18ccc8..26ade493 100644 --- a/riid/losses/__init__.py +++ b/riid/losses/__init__.py @@ -4,7 +4,7 @@ """This module contains custom loss functions.""" import numpy as np import tensorflow as tf -from keras import backend as K +from keras.api import ops def negative_log_f1(y_true: np.ndarray, y_pred: np.ndarray): @@ -18,13 +18,13 @@ def negative_log_f1(y_true: np.ndarray, y_pred: np.ndarray): Custom loss score on a log scale """ diff = y_true - y_pred - negs = K.clip(diff, -1.0, 0.0) - false_positive = -K.sum(negs, axis=-1) + negs = ops.clip(diff, -1.0, 0.0) + false_positive = -ops.sum(negs, axis=-1) true_positive = 1.0 - false_positive lower_clip = 1e-20 - true_positive = K.clip(true_positive, lower_clip, 1.0) + true_positive = ops.clip(true_positive, lower_clip, 1.0) - return -K.mean(K.log(true_positive)) + return -ops.mean(ops.log(true_positive)) def negative_f1(y_true, y_pred): @@ -38,13 +38,13 @@ def negative_f1(y_true, y_pred): Custom loss score """ diff = y_true - y_pred - negs = K.clip(diff, -1.0, 0.0) - false_positive = -K.sum(negs, axis=-1) + negs = ops.clip(diff, -1.0, 0.0) + false_positive = -ops.sum(negs, axis=-1) true_positive = 1.0 - false_positive lower_clip = 1e-20 - true_positive = K.clip(true_positive, lower_clip, 1.0) + true_positive = ops.clip(true_positive, lower_clip, 1.0) - return -K.mean(true_positive) + return -ops.mean(true_positive) def build_keras_semisupervised_loss_func(supervised_loss_func, @@ -54,6 +54,7 @@ def build_keras_semisupervised_loss_func(supervised_loss_func, normalize: bool = False, normalize_scaler: float = 1.0, normalize_func=tf.math.tanh): + @tf.keras.utils.register_keras_serializable(package="Addons") def _semisupervised_loss_func(data, y_pred): """ Args: diff --git a/riid/metrics.py b/riid/metrics.py index 8b5c48dd..aa677448 100644 --- a/riid/metrics.py +++ b/riid/metrics.py @@ -20,14 +20,14 @@ def multi_f1(y_true: np.ndarray, y_pred: np.ndarray) -> float: Returns: Multi F1-score value(s) """ - from keras import backend as K + from keras.api import ops diff = y_true - y_pred - negs = K.clip(diff, -1.0, 0.0) - false_positive = -K.sum(negs, axis=-1) + negs = ops.clip(diff, -1.0, 0.0) + false_positive = -ops.sum(negs, axis=-1) true_positive = 1.0 - false_positive - return K.mean(true_positive) + return ops.mean(true_positive) def single_f1(y_true: np.ndarray, y_pred: np.ndarray): @@ -43,23 +43,23 @@ def single_f1(y_true: np.ndarray, y_pred: np.ndarray): F1-score value(s) """ import tensorflow as tf - from keras import backend as K + from keras.api import ops - a = tf.dtypes.cast(y_true == K.max(y_true, axis=1)[:, None], tf.float32) - b = tf.dtypes.cast(y_pred == K.max(y_pred, axis=1)[:, None], tf.float32) + a = tf.dtypes.cast(y_true == ops.max(y_true, axis=1)[:, None], tf.float32) + b = tf.dtypes.cast(y_pred == ops.max(y_pred, axis=1)[:, None], tf.float32) - TP_mat = tf.dtypes.cast(K.all(tf.stack([a, b]), axis=0), tf.float32) - FP_mat = tf.dtypes.cast(K.all(tf.stack([a != b, b == 1]), axis=0), tf.float32) - FN_mat = tf.dtypes.cast(K.all(tf.stack([a != b, a == 1]), axis=0), tf.float32) + TP_mat = tf.dtypes.cast(ops.all(tf.stack([a, b]), axis=0), tf.float32) + FP_mat = tf.dtypes.cast(ops.all(tf.stack([a != b, b == 1]), axis=0), tf.float32) + FN_mat = tf.dtypes.cast(ops.all(tf.stack([a != b, a == 1]), axis=0), tf.float32) - TPs = K.sum(TP_mat, axis=0) - FPs = K.sum(FP_mat, axis=0) - FNs = K.sum(FN_mat, axis=0) + TPs = ops.sum(TP_mat, axis=0) + FPs = ops.sum(FP_mat, axis=0) + FNs = ops.sum(FN_mat, axis=0) - F1s = 2 * TPs / (2*TPs + FNs + FPs + tf.fill(tf.shape(TPs), K.epsilon())) + F1s = 2 * TPs / (2*TPs + FNs + FPs + tf.fill(tf.shape(TPs), tf.keras.backend.epsilon())) - support = K.sum(a, axis=0) - f1 = K.sum(F1s * support) / K.sum(support) + support = ops.sum(a, axis=0) + f1 = ops.sum(F1s * support) / ops.sum(support) return f1 @@ -122,10 +122,10 @@ def precision_recall_curve(ss: SampleSet, smooth: bool = True, multiclass: bool https://jonathan-hui.medium.com/map-mean-average-precision-for-object-detection-45c121a31173) """ - y_true = ss.sources.groupby(axis=1, level=target_level, sort=False).sum() + y_true = ss.sources.T.groupby(target_level, sort=False).sum().T if minimum_contribution is not None: y_true = (y_true > minimum_contribution).astype(int) - y_pred = ss.prediction_probas.groupby(axis=1, level=target_level, sort=False).sum() + y_pred = ss.prediction_probas.T.groupby(target_level, sort=False).sum().T # switch from pandas to numpy labels = y_true.columns @@ -250,6 +250,6 @@ def build_keras_semisupervised_metric_func(keras_metric_func, activation_func, n_labels): def metric_func(y_true, y_pred): return keras_metric_func(y_true[:, :n_labels], activation_func(y_pred)) - metric_func.__name__ = keras_metric_func.__name__ + metric_func.__name__ = keras_metric_func.__class__.__name__ return metric_func diff --git a/riid/models/__init__.py b/riid/models/__init__.py index f2c29971..0825b251 100644 --- a/riid/models/__init__.py +++ b/riid/models/__init__.py @@ -11,8 +11,8 @@ import numpy as np import tensorflow as tf import tf2onnx -from keras.models import Model -from keras.utils import get_custom_objects +from keras.api.models import Model +from keras.api.utils import get_custom_objects import riid from riid.data.labeling import label_to_index_element diff --git a/riid/models/bayes.py b/riid/models/bayes.py index 758b0229..6850cbb1 100644 --- a/riid/models/bayes.py +++ b/riid/models/bayes.py @@ -5,10 +5,14 @@ import numpy as np import pandas as pd import tensorflow as tf -import tensorflow_probability as tfp +from keras.api.layers import Add, Input, Multiply, Subtract +from keras.api.models import Model from riid.data.sampleset import SampleSet from riid.models import PyRIIDModel +from riid.models.layers import (ClipByValueLayer, DivideLayer, ExpandDimsLayer, + PoissonLogProbabilityLayer, ReduceMaxLayer, + ReduceSumLayer, SeedLayer) class PoissonBayesClassifier(PyRIIDModel): @@ -29,6 +33,14 @@ class PoissonBayesClassifier(PyRIIDModel): def __init__(self): super().__init__() + self._update_custom_objects("ReduceSumLayer", ReduceSumLayer) + self._update_custom_objects("ReduceMaxLayer", ReduceMaxLayer) + self._update_custom_objects("DivideLayer", DivideLayer) + self._update_custom_objects("ExpandDimsLayer", ExpandDimsLayer) + self._update_custom_objects("ClipByValueLayer", ClipByValueLayer) + self._update_custom_objects("PoissonLogProbabilityLayer", PoissonLogProbabilityLayer) + self._update_custom_objects("SeedLayer", SeedLayer) + def fit(self, seeds_ss: SampleSet): """Construct a TF-based implementation of a poisson-bayes classifier in terms of the given seeds. @@ -50,74 +62,98 @@ def fit(self, seeds_ss: SampleSet): msg = "Argument 'seeds_ss' can't contain any spectra with zero total counts." raise ZeroTotalCountsError(msg) - self._seeds = tf.constant(tf.convert_to_tensor( + self._seeds = tf.convert_to_tensor( seeds_ss.spectra.values, dtype=tf.float32 - )) - - # Inputs - gross_spectrum_input = tf.keras.layers.Input( - shape=seeds_ss.n_channels, - name="gross_spectrum" - ) - gross_live_time_input = tf.keras.layers.Input( - shape=(), - name="gross_live_time" - ) - bg_spectrum_input = tf.keras.layers.Input( - shape=seeds_ss.n_channels, - name="bg_spectrum" - ) - bg_live_time_input = tf.keras.layers.Input( - shape=(), - name="bg_live_time" - ) - - # Compute expected_seed_spectrums - gross_total_counts = tf.reduce_sum(gross_spectrum_input, axis=1) - bg_total_counts = tf.reduce_sum(bg_spectrum_input, axis=1) - bg_count_rate = tf.divide(bg_total_counts, bg_live_time_input) - expected_bg_counts = tf.multiply(bg_count_rate, gross_live_time_input) - expected_fg_counts = tf.subtract(gross_total_counts, expected_bg_counts) - normalized_bg_spectrum = tf.divide( - bg_spectrum_input, - tf.expand_dims(bg_total_counts, axis=1) - ) - expected_bg_spectrum = tf.multiply( - normalized_bg_spectrum, - tf.expand_dims(expected_bg_counts, axis=1) - ) - expected_fg_spectrum = tf.multiply( - self._seeds, - tf.expand_dims(tf.expand_dims( - expected_fg_counts, - axis=-1 - ), axis=-1) - ) - max_value = tf.math.reduce_max(expected_fg_spectrum) - expected_fg_spectrum = tf.clip_by_value(expected_fg_spectrum, 1e-8, max_value) - expected_gross_spectrum = tf.add( - expected_fg_spectrum, - tf.expand_dims(expected_bg_spectrum, axis=1) ) - poisson_dist = tfp.distributions.Poisson(expected_gross_spectrum) - all_probas = poisson_dist.log_prob( - tf.expand_dims(gross_spectrum_input, axis=1) - ) - prediction_probas = tf.math.reduce_sum(all_probas, axis=2) - + # Inputs + gross_spectrum_input = Input(shape=(seeds_ss.n_channels,), + name="gross_spectrum") + gross_live_time_input = Input(shape=(), + name="gross_live_time") + bg_spectrum_input = Input(shape=(seeds_ss.n_channels,), + name="bg_spectrum") + bg_live_time_input = Input(shape=(), + name="bg_live_time") model_inputs = ( gross_spectrum_input, gross_live_time_input, bg_spectrum_input, bg_live_time_input, ) - self.model = tf.keras.Model(model_inputs, prediction_probas) + + # Input statistics + gross_total_counts = ReduceSumLayer(name="gross_total_counts")(gross_spectrum_input, axis=1) + bg_total_counts = ReduceSumLayer(name="bg_total_counts")(bg_spectrum_input, axis=1) + bg_count_rate = DivideLayer(name="bg_count_rate")([bg_total_counts, bg_live_time_input]) + + gross_spectrum_input_expanded = ExpandDimsLayer( + name="gross_spectrum_input_expanded" + )(gross_spectrum_input, axis=1) + bg_total_counts_expanded = ExpandDimsLayer( + name="bg_total_counts_expanded" + )(bg_total_counts, axis=1) + + # Expectations + seed_layer = SeedLayer(self._seeds)(model_inputs) + seed_layer_expanded = ExpandDimsLayer()(seed_layer, axis=0) + expected_bg_counts = Multiply( + trainable=False, + name="expected_bg_counts" + )([bg_count_rate, gross_live_time_input]) + expected_bg_counts_expanded = ExpandDimsLayer( + name="expected_bg_counts_expanded" + )(expected_bg_counts, axis=1) + normalized_bg_spectrum = DivideLayer( + name="normalized_bg_spectrum" + )([bg_spectrum_input, bg_total_counts_expanded]) + expected_bg_spectrum = Multiply( + trainable=False, + name="expected_bg_spectrum" + )([normalized_bg_spectrum, expected_bg_counts_expanded]) + expected_fg_counts = Subtract( + trainable=False, + name="expected_fg_counts" + )([gross_total_counts, expected_bg_counts]) + expected_fg_counts_expanded = ExpandDimsLayer( + name="expected_fg_counts_expanded" + )(expected_fg_counts, axis=-1) + expected_fg_counts_expanded2 = ExpandDimsLayer( + name="expected_fg_counts_expanded2" + )(expected_fg_counts_expanded, axis=-1) + expected_fg_spectrum = Multiply( + trainable=False, + name="expected_fg_spectrum" + )([seed_layer_expanded, expected_fg_counts_expanded2]) + max_fg_value = ReduceMaxLayer( + name="max_fg_value" + )(expected_fg_spectrum) + expected_fg_spectrum = ClipByValueLayer( + name="clip_expected_fg_spectrum" + )(expected_fg_spectrum, clip_value_min=1e-8, clip_value_max=max_fg_value) + expected_bg_spectrum_expanded = ExpandDimsLayer( + name="expected_bg_spectrum_expanded" + )(expected_bg_spectrum, axis=1) + expected_gross_spectrum = Add( + trainable=False, + name="expected_gross_spectrum" + )([expected_fg_spectrum, expected_bg_spectrum_expanded]) + + # Compute probabilities + log_probabilities = PoissonLogProbabilityLayer( + name="log_probabilities" + )([expected_gross_spectrum, gross_spectrum_input_expanded]) + summed_log_probabilities = ReduceSumLayer( + name="summed_log_probabilities" + )(log_probabilities, axis=2) + + # Assemble model + self.model = Model(model_inputs, summed_log_probabilities) self.model.compile() self.target_level = "Seed" - sources_df = seeds_ss.sources.groupby(axis=1, level=self.target_level, sort=False).sum() + sources_df = seeds_ss.sources.T.groupby(self.target_level, sort=False).sum().T self.model_outputs = sources_df.columns.values.tolist() def predict(self, gross_ss: SampleSet, bg_ss: SampleSet, diff --git a/riid/models/layers.py b/riid/models/layers.py new file mode 100644 index 00000000..c250e09f --- /dev/null +++ b/riid/models/layers.py @@ -0,0 +1,82 @@ +# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +# Under the terms of Contract DE-NA0003525 with NTESS, +# the U.S. Government retains certain rights in this software. +"""This module contains custom Keras layers.""" +import tensorflow as tf +from keras.api.layers import Layer + + +class ReduceSumLayer(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def call(self, x, axis): + return tf.reduce_sum(x, axis=axis) + + +class ReduceMaxLayer(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def call(self, x): + return tf.reduce_max(x) + + +class DivideLayer(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def call(self, x): + return tf.divide(x[0], x[1]) + + +class ExpandDimsLayer(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def call(self, x, axis): + return tf.expand_dims(x, axis=axis) + + +class ClipByValueLayer(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def call(self, x, clip_value_min, clip_value_max): + return tf.clip_by_value(x, clip_value_min=clip_value_min, clip_value_max=clip_value_max) + + +class PoissonLogProbabilityLayer(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def call(self, x): + exp, value = x + log_probas = tf.math.xlogy(value, exp) - exp - tf.math.lgamma(value + 1) + return log_probas + + +class SeedLayer(Layer): + def __init__(self, seeds, **kwargs): + super(SeedLayer, self).__init__(**kwargs) + self.seeds = tf.convert_to_tensor(seeds) + + def get_config(self): + config = super().get_config() + config.update({ + "seeds": self.seeds.numpy().tolist(), + }) + return config + + def call(self, inputs): + return self.seeds + + +class L1NormLayer(Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def call(self, inputs): + sums = tf.reduce_sum(inputs, axis=-1) + l1_norm = inputs / tf.reshape(sums, (-1, 1)) + return l1_norm diff --git a/riid/models/neural_nets/__init__.py b/riid/models/neural_nets/__init__.py index e46c700c..42443f88 100644 --- a/riid/models/neural_nets/__init__.py +++ b/riid/models/neural_nets/__init__.py @@ -2,88 +2,90 @@ # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. """This module contains neural network-based classifiers and regressors.""" -from typing import Any, List - +import keras import numpy as np import pandas as pd import tensorflow as tf -from keras.callbacks import EarlyStopping -from keras.layers import Dense, Dropout -from keras.optimizers import Adam -from keras.regularizers import L1L2, l1, l2 +from keras.api.activations import sigmoid, softmax +from keras.api.callbacks import EarlyStopping +from keras.api.layers import Dense, Dropout, Input +from keras.api.losses import CategoricalCrossentropy, MeanSquaredError +from keras.api.metrics import F1Score, Precision, Recall +from keras.api.models import Model +from keras.api.optimizers import Adam +from keras.api.regularizers import L1L2, l1, l2 +from keras.api.utils import split_dataset from scipy.interpolate import UnivariateSpline -from riid.data.sampleset import SampleSet +from riid.data.sampleset import SampleSet, SpectraType from riid.losses import (build_keras_semisupervised_loss_func, chi_squared_diff, jensen_shannon_divergence, normal_nll_diff, poisson_nll_diff, reconstruction_error, sse_diff, weighted_sse_diff) from riid.losses.sparsemax import SparsemaxLoss, sparsemax -from riid.metrics import (build_keras_semisupervised_metric_func, multi_f1, - single_f1) +from riid.metrics import build_keras_semisupervised_metric_func from riid.models import ModelInput, PyRIIDModel +from riid.models.layers import L1NormLayer class MLPClassifier(PyRIIDModel): """Multi-layer perceptron classifier.""" - def __init__(self, hidden_layers: tuple = (512,), activation: str = "relu", - loss: str = "categorical_crossentropy", - optimizer: Any = Adam(learning_rate=0.01, clipnorm=0.001), - metrics: tuple = ("accuracy", "categorical_crossentropy", multi_f1, single_f1), - l2_alpha: float = 1e-4, activity_regularizer=l1(0), dropout: float = 0.0, - learning_rate: float = 0.01, final_activation: str = "softmax"): + def __init__(self, activation=None, loss=None, optimizer=None, + metrics=None, l2_alpha: float = 1e-4, + activity_regularizer=None, final_activation=None): """ Args: - hidden_layers: tuple defining the number and size of dense layers activation: activate function to use for each dense layer loss: loss function to use for training optimizer: tensorflow optimizer or optimizer name to use for training metrics: list of metrics to be evaluating during training l2_alpha: alpha value for the L2 regularization of each dense layer activity_regularizer: regularizer function applied each dense layer output - dropout: amount of dropout to apply to each dense layer - learning_rate: learning rate to use for an Adam optimizer + final_activation: final activation function to apply to model output """ super().__init__() - self.hidden_layers = hidden_layers self.activation = activation - self.final_activation = final_activation self.loss = loss - if optimizer == "adam": - self.optimizer = Adam(learning_rate=learning_rate) - else: - self.optimizer = optimizer self.optimizer = optimizer + self.final_activation = final_activation self.metrics = metrics self.l2_alpha = l2_alpha self.activity_regularizer = activity_regularizer - self.dropout = dropout + self.final_activation = final_activation + + if self.activation is None: + self.activation = "relu" + if self.loss is None: + self.loss = CategoricalCrossentropy() + if optimizer is None: + self.optimizer = Adam(learning_rate=0.01, clipnorm=0.001) + if self.metrics is None: + self.metrics = [F1Score(), Precision(), Recall()] + if self.activity_regularizer is None: + self.activity_regularizer = l1(0.0) + if self.final_activation is None: + self.final_activation = "softmax" self.model = None + self._predict_fn = None - def fit(self, ss: SampleSet, bg_ss: SampleSet = None, - ss_input_type: ModelInput = ModelInput.GrossSpectrum, - bg_ss_input_type: ModelInput = ModelInput.BackgroundSpectrum, - batch_size: int = 200, epochs: int = 20, - validation_split: float = 0.2, callbacks=None, val_ss: SampleSet = None, - val_bg_ss: SampleSet = None, patience: int = 15, es_monitor: str = "val_loss", + def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20, + validation_split: float = 0.2, callbacks=None, + patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False): """Fit a model to the given `SampleSet`(s). Args: ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either foreground (AKA, "net") or gross. - bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background batch_size: number of samples per gradient update epochs: maximum number of training iterations validation_split: percentage of the training data to use as validation data callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method - val_ss: validation set to be used instead of taking a portion of the training data - val_bg_ss: validation set to be used as background for `val_ss` - patience: number of epochs to wait for `tf.keras.callbacks.EarlyStopping` - es_monitor: quantity to be monitored for `tf.keras.callbacks.EarlyStopping` - es_mode: mode for `tf.keras.callbacks.EarlyStopping` - es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping` + patience: number of epochs to wait for `EarlyStopping` object + es_monitor: quantity to be monitored for `EarlyStopping` object + es_mode: mode for `EarlyStopping` object + es_verbose: verbosity level for `EarlyStopping` object target_level: `SampleSet.sources` column level to use verbose: whether to show detailed model training output @@ -96,75 +98,44 @@ def fit(self, ss: SampleSet, bg_ss: SampleSet = None, if ss.n_samples <= 0: raise ValueError("No spectr[a|um] provided!") - x_train = ss.get_samples().astype(float) - source_contributions_df = ss.sources.groupby(axis=1, level=target_level, sort=False).sum() - y_train = source_contributions_df.values.astype(float) - if bg_ss: - x_bg_train = bg_ss.get_samples().astype(float) - - if val_ss: - if val_bg_ss: - val_data = ( - [val_ss.get_samples().astype(float), val_bg_ss.get_samples().astype(float)], - val_ss.get_source_contributions().astype(float), - ) - else: - val_data = ( - val_ss.get_samples().astype(float), - val_ss.get_source_contributions().astype(float), - ) - validation_split = None + if ss.spectra_type == SpectraType.Gross: + self.model_inputs = (ModelInput.GrossSpectrum,) + elif ss.spectra_type == SpectraType.Foreground: + self.model_inputs = (ModelInput.ForegroundSpectrum,) + elif ss.spectra_type == SpectraType.Background: + self.model_inputs = (ModelInput.BackgroundSpectrum,) else: - val_data = None - row_order = np.arange(x_train.shape[0]) - np.random.shuffle(row_order) - # Enforce random validation split through shuffling - x_train = x_train[row_order] - y_train = y_train[row_order] - - if bg_ss: - x_bg_train = x_bg_train[row_order] + raise ValueError(f"{ss.spectra_type} is not supported in this model.") + + X = ss.get_samples() + source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T + model_outputs = source_contributions_df.columns.values.tolist() + Y = source_contributions_df.values + + spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) + labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32) + training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor)) + training_dataset, validation_dataset = split_dataset( + training_dataset, + left_size=validation_split, + shuffle=True + ) + training_dataset = training_dataset.batch(batch_size=batch_size) + validation_dataset = validation_dataset.batch(batch_size=batch_size) if not self.model: - spectra_input = tf.keras.layers.Input( - x_train.shape[1], - name=ss_input_type.name - ) - inputs = [spectra_input] - - if bg_ss: - background_spectra_input = tf.keras.layers.Input( - x_bg_train.shape[1], - name=bg_ss_input_type.name - ) - inputs.append(background_spectra_input) - - if len(inputs) > 1: - x = tf.keras.layers.Concatenate()(inputs) - else: - x = inputs[0] - - for layer, nodes in enumerate(self.hidden_layers): - if layer == 0: - x = Dense( - nodes, - activation=self.activation, - activity_regularizer=self.activity_regularizer, - kernel_regularizer=l2(self.l2_alpha), - )(x) - else: - x = Dense( - nodes, - activation=self.activation, - activity_regularizer=self.activity_regularizer, - kernel_regularizer=l2(self.l2_alpha), - )(x) - if self.dropout > 0: - x = Dropout(self.dropout)(x) - - output = Dense(y_train.shape[1], activation=self.final_activation)(x) - self.model = tf.keras.models.Model(inputs, output) - self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics) + inputs = Input(shape=(X.shape[1],), name="Spectrum") + dense_layer_size = X.shape[1] // 2 + dense_layer = Dense( + dense_layer_size, + activation=self.activation, + activity_regularizer=self.activity_regularizer, + kernel_regularizer=l2(self.l2_alpha), + )(inputs) + outputs = Dense(Y.shape[1], activation=self.final_activation)(dense_layer) + self.model = Model(inputs, outputs) + self.model.compile(loss=self.loss, optimizer=self.optimizer, + metrics=self.metrics) es = EarlyStopping( monitor=es_monitor, @@ -173,41 +144,39 @@ def fit(self, ss: SampleSet, bg_ss: SampleSet = None, restore_best_weights=True, mode=es_mode, ) - if callbacks: callbacks.append(es) else: callbacks = [es] - if bg_ss: - X_data = [x_train, x_bg_train] - self.model_inputs = (ss_input_type, bg_ss_input_type) - else: - X_data = x_train - self.model_inputs = (ss_input_type,) - history = self.model.fit( - X_data, - y_train, + training_dataset, epochs=epochs, verbose=verbose, - validation_split=validation_split, - validation_data=val_data, + validation_data=validation_dataset, callbacks=callbacks, - shuffle=True, - batch_size=batch_size, - ) + ) # Update model information self._update_info( target_level=target_level, - model_outputs=source_contributions_df.columns.values.tolist(), + model_outputs=model_outputs, normalization=ss.spectra_state, ) + # Define the predict function with tf.function and input_signature + self._predict_fn = tf.function( + self._predict, + # input_signature=[tf.TensorSpec(shape=[None, X.shape[1]], dtype=tf.float32)] + experimental_relax_shapes=True + ) + return history - def predict(self, ss: SampleSet, bg_ss: SampleSet = None, verbose=False): + def _predict(self, input_tensor): + return self.model(input_tensor, training=False) + + def predict(self, ss: SampleSet, bg_ss: SampleSet = None): """Classify the spectra in the provided `SampleSet`(s). Results are stored inside the first SampleSet's prediction-related properties. @@ -223,7 +192,8 @@ def predict(self, ss: SampleSet, bg_ss: SampleSet = None, verbose=False): else: X = x_test - results = self.model.predict(X, verbose=verbose) + spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) + results = self._predict_fn(spectra_tensor) col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] @@ -238,194 +208,6 @@ def predict(self, ss: SampleSet, bg_ss: SampleSet = None, verbose=False): ss.classified_by = self.model_id -class MultiEventClassifier(PyRIIDModel): - """Classifier for spectra from multiple detectors observing the same event.""" - def __init__(self, hidden_layers: tuple = (512,), activation: str = "relu", - loss: str = "categorical_crossentropy", - optimizer: Any = Adam(learning_rate=0.01, clipnorm=0.001), - metrics: list = ["accuracy", "categorical_crossentropy", multi_f1, single_f1], - l2_alpha: float = 1e-4, activity_regularizer: tf.keras.regularizers = l1(0), - dropout: float = 0.0, learning_rate: float = 0.01): - """ - Args: - hidden_layers: tuple containing the number and size of dense layers - activation: activate function to use for each dense layer - loss: string name of the loss function to use for training - optimizer: string name of the optimizer to use for training - metrics: list of metrics to be evaluating during training - l2_alpha: alpha value for the L2 regularization of each dense layer - activity_regularizer: regularizer function applied each dense layer output - dropout: amount of dropout to apply to each dense layer - learning_rate: learning rate to use for an Adam optimizer - """ - super().__init__() - - self.hidden_layers = hidden_layers - self.activation = activation - self.loss = loss - if optimizer == "adam": - self.optimizer = Adam(learning_rate=learning_rate) - else: - self.optimizer = optimizer - self.metrics = metrics - self.l2_alpha = l2_alpha - self.activity_regularizer = activity_regularizer - self.dropout = dropout - self.model = None - - def fit(self, list_of_ss: List[SampleSet], target_contributions: pd.DataFrame, - batch_size: int = 200, epochs: int = 20, - validation_split: float = 0.2, callbacks: list = None, - val_model_ss_list: SampleSet = None, - val_model_target_contributions: pd.DataFrame = None, - patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min", - es_verbose: bool = False, target_level="Isotope", verbose: bool = False): - """Fit a model to the given SampleSet(s). - - Args: - list_of_ss: list of `SampleSet`s which have prediction_probas populated from - single-event classifiers - target_contributions: DataFrame of the contributions for each - observation. Column titles are the desired label strings. - batch_size: number of samples per gradient update - epochs: maximum number of training iterations - validation_split: percentage of the training data to use as validation data - callbacks: list of callbacks to be passed to TensorFlow Model.fit() method - val_model_ss_list: validation set to be used instead of taking a portion of the - training data - val_model_target_contributions: target contributions to the model for each sample - patience: number of epochs to wait for `tf.keras.callbacks.EarlyStopping` object - es_monitor: quantity to be monitored for `tf.keras.callbacks.EarlyStopping` object - es_mode: mode for `tf.keras.callbacks.EarlyStopping` object - es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping` object - target_level: source level to target for model output - verbose: whether to show detailed training output - - Returns: - `tf.History` object - - Raises: - `ValueError` when no predictions are provided with `list_of_ss` input - """ - if len(list_of_ss) <= 0: - raise ValueError("No model predictions provided!") - - x_train = [ss.prediction_probas.values for ss in list_of_ss] - y_train = target_contributions.values - - if val_model_ss_list and val_model_target_contributions: - val_data = ( - [ss.prediction_probas.values for ss in val_model_ss_list], - val_model_target_contributions.values, - ) - validation_split = None - else: - val_data = None - row_order = np.arange(x_train[0].shape[0]) - np.random.shuffle(row_order) - # Enforce random validation split through shuffling - x_train = [i[row_order] for i in x_train] - y_train = y_train[row_order] - - if not self.model: - inputs = [] - for ss in list_of_ss: - input_from_single_event_model = tf.keras.layers.Input( - ss.prediction_probas.shape[1], - name=ss.classified_by - ) - inputs.append(input_from_single_event_model) - - if len(inputs) > 1: - x = tf.keras.layers.Concatenate()(inputs) - else: - x = inputs[0] - - for layer, nodes in enumerate(self.hidden_layers): - if layer == 0: - x = Dense( - nodes, - activation=self.activation, - activity_regularizer=self.activity_regularizer, - kernel_regularizer=l2(self.l2_alpha), - )(x) - else: - x = Dense( - nodes, - activation=self.activation, - activity_regularizer=self.activity_regularizer, - kernel_regularizer=l2(self.l2_alpha), - )(x) - if self.dropout > 0: - x = Dropout(self.dropout)(x) - - output = Dense(y_train.shape[1], activation="softmax")(x) - self.model = tf.keras.models.Model(inputs, output) - self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics) - - es = EarlyStopping( - monitor=es_monitor, - patience=patience, - verbose=es_verbose, - restore_best_weights=True, - mode=es_mode, - ) - - if callbacks: - callbacks.append(es) - else: - callbacks = [es] - - history = self.model.fit( - x_train, - y_train, - epochs=epochs, - verbose=verbose, - validation_split=validation_split, - validation_data=val_data, - callbacks=callbacks, - shuffle=True, - batch_size=batch_size, - ) - - # Initialize model info, update output/input information - self._update_info( - target_level=target_level, - model_outputs=target_contributions.columns.values.tolist(), - model_inputs=tuple( - [(ss.classified_by, ss.prediction_probas.shape[1]) for ss in list_of_ss] - ), - normalization=tuple( - [(ss.classified_by, ss.spectra_state) for ss in list_of_ss] - ), - ) - - return history - - def predict(self, list_of_ss: List[SampleSet], verbose=False) -> pd.DataFrame: - """Classify the spectra in the provided `SampleSet`(s) based on each one's results. - - Args: - list_of_ss: list of `SampleSet`s which had predictions made by single-event models - - Returns: - `DataFrame` of predicted results for the `Sampleset`(s) - """ - X = [ss.prediction_probas for ss in list_of_ss] - results = self.model.predict(X, verbose=verbose) - - col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) - col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] - results_df = pd.DataFrame( - data=results, - columns=pd.MultiIndex.from_tuples( - self.get_model_outputs_as_label_tuples(), - names=col_level_subset - ) - ) - return results_df - - class LabelProportionEstimator(PyRIIDModel): """Regressor for predicting label proportions that uses a semi-supervised loss. @@ -450,19 +232,19 @@ class LabelProportionEstimator(PyRIIDModel): sparsemax, ), "categorical_crossentropy": ( - tf.keras.losses.CategoricalCrossentropy, + CategoricalCrossentropy, { "from_logits": True, "reduction": tf.keras.losses.Reduction.NONE, }, - tf.keras.activations.softmax, + softmax, ), "mse": ( - tf.keras.losses.MeanSquaredError, + MeanSquaredError, { "reduction": tf.keras.losses.Reduction.NONE, }, - tf.keras.activations.sigmoid, + sigmoid, ) } INFO_KEYS = ( @@ -495,7 +277,7 @@ class LabelProportionEstimator(PyRIIDModel): ) def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_loss="sse", - metrics=("mae", "categorical_crossentropy",), beta=0.9, source_dict=None, + metrics: list = ["mae", "categorical_crossentropy"], beta=0.9, source_dict=None, optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3, hidden_layer_activation: str = "mish", kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, @@ -551,7 +333,7 @@ def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_lo self.optimizer = optimizer if isinstance(optimizer, str): - self.optimizer = tf.keras.optimizers.get(optimizer) + self.optimizer = keras.optimizers.get(optimizer) if optimizer_kwargs is not None: for key, value in optimizer_kwargs.items(): setattr(self.optimizer, key, value) @@ -603,11 +385,11 @@ def _get_unsup_loss_func(self, loss_func_str): return self.UNSUPERVISED_LOSS_FUNCS[loss_func_str] def _initialize_model(self, input_size, output_size): - spectra_input = tf.keras.layers.Input(input_size, name="input_spectrum") + spectra_input = Input(input_size, name="input_spectrum") spectra_norm = L1NormLayer(name="normalized_input_spectrum")(spectra_input) x = spectra_norm for layer, nodes in enumerate(self.hidden_layers): - x = tf.keras.layers.Dense( + x = Dense( nodes, activation=self.hidden_layer_activation, kernel_regularizer=L1L2( @@ -626,17 +408,14 @@ def _initialize_model(self, input_size, output_size): )(x) if self.dropout > 0: - x = tf.keras.layers.Dropout(self.dropout)(x) - output = tf.keras.layers.Dense( + x = Dropout(self.dropout)(x) + output = Dense( output_size, activation="linear", name="output" )(x) - self.model = tf.keras.models.Model( - inputs=[spectra_input], - outputs=[output] - ) + self.model = Model(inputs=[spectra_input], outputs=[output]) def _get_info_as_dict(self): info_dict = {} @@ -703,10 +482,10 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b epochs: maximum number of training iterations validation_split: proportion of training data to use as validation data callbacks: list of callbacks to be passed to TensorFlow Model.fit() method - patience: number of epochs to wait for tf.keras.callbacks.EarlyStopping object - es_monitor: quantity to be monitored for tf.keras.callbacks.EarlyStopping object - es_mode: mode for tf.keras.callbacks.EarlyStopping object - es_verbose: verbosity level for tf.keras.callbacks.EarlyStopping object + patience: number of epochs to wait for `EarlyStopping` object + es_monitor: quantity to be monitored for `EarlyStopping` object + es_mode: mode for `EarlyStopping` object + es_verbose: verbosity level for `EarlyStopping` object es_min_delta: minimum change to count as an improvement for early stopping normalize_sup_loss: whether to normalize the supervised loss term normalize_func: normalization function used for supervised loss term @@ -715,7 +494,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b verbose: whether model training output is printed to the terminal """ spectra = ss.get_samples().astype(float) - sources_df = ss.sources.groupby(axis=1, level=target_level, sort=False).sum() + sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T sources = sources_df.values.astype(float) self.sources_columns = sources_df.columns @@ -734,7 +513,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b if verbose: print("Initializing model...") self._initialize_model( - ss.n_channels, + (ss.n_channels,), sources.shape[1], ) elif verbose: @@ -771,7 +550,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b ) else: semisup_metrics.append( - build_keras_semisupervised_loss_func( + build_keras_semisupervised_metric_func( each, self.activation, sources.shape[1] @@ -813,7 +592,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b if verbose: print("Finding OOD detection threshold function...") - train_logits = self.model.predict(spectra) + train_logits = self.model.predict(spectra, verbose=0) train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32)) self.spline_recon_errors = reconstruction_error( tf.convert_to_tensor(spectra, dtype=tf.float32), @@ -878,31 +657,11 @@ def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verb ss.info["recon_error"] = recon_errors -class L1NormLayer(tf.keras.layers.Layer): - """Keras layer applying an L1 norm (dividing by total counts) to input data. - """ - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def call(self, inputs): - """This is where the layer's logic lives. - - Args: - inputs: input tensor, or dict/list/tuple of input tensors. - - Returns: - A tensor or list/tuple of tensors. - """ - sums = tf.reduce_sum(inputs, axis=-1) - l1_norm = inputs / tf.reshape(sums, (-1, 1)) - return l1_norm - - def _get_reordered_spectra(old_spectra_df: pd.DataFrame, old_sources_df: pd.DataFrame, new_sources_columns, target_level) -> pd.DataFrame: collapsed_sources_df = old_sources_df\ - .groupby(axis=1, level=target_level)\ - .sum() + .T.groupby(target_level)\ + .sum().T reordered_spectra_df = old_spectra_df.iloc[ collapsed_sources_df[ new_sources_columns diff --git a/riid/models/neural_nets/arad.py b/riid/models/neural_nets/arad.py index 25f4941f..3679bfe6 100644 --- a/riid/models/neural_nets/arad.py +++ b/riid/models/neural_nets/arad.py @@ -4,22 +4,27 @@ """This module contains implementations of the ARAD deep learning architecture.""" from typing import List +import keras import pandas as pd import tensorflow as tf -from keras.activations import sigmoid, softplus -from keras.callbacks import EarlyStopping, ReduceLROnPlateau -from keras.initializers import GlorotNormal, HeNormal -from keras.layers import (BatchNormalization, Concatenate, Conv1D, - Conv1DTranspose, Dense, Dropout, Flatten, Input, - MaxPool1D, Reshape, UpSampling1D) -from keras.models import Model -from keras.regularizers import L1L2, L2 +from keras.api.activations import sigmoid, softplus +from keras.api.callbacks import EarlyStopping, ReduceLROnPlateau +from keras.api.initializers import GlorotNormal, HeNormal +from keras.api.layers import (BatchNormalization, Concatenate, Conv1D, + Conv1DTranspose, Dense, Dropout, Flatten, Input, + MaxPool1D, Reshape, UpSampling1D) +from keras.api.losses import kl_divergence, log_cosh +from keras.api.metrics import MeanSquaredError +from keras.api.models import Model +from keras.api.optimizers import Adam, Nadam +from keras.api.regularizers import L1L2, L2 from scipy.spatial.distance import jensenshannon from scipy.stats import entropy from riid.data.sampleset import SampleSet, SpectraState -from riid.losses import jensen_shannon_distance, mish +from riid.losses import mish from riid.models import PyRIIDModel +from riid.models.bayes import ExpandDimsLayer class ARADv1TF(Model): @@ -31,15 +36,15 @@ class ARADv1TF(Model): - Ghawaly Jr, James M. "A Datacentric Algorithm for Gamma-ray Radiation Anomaly Detection in Unknown Background Environments." (2020). """ - def __init__(self, latent_dim: int = 5): + def __init__(self, latent_dim: int = 5, **kwargs): """ Args: latent_dim: dimension of internal latent represention. 5 was the final one in the paper, but 4 to 8 were found to work well. """ - super().__init__() + super().__init__(**kwargs) - input_size = (128, 1) + input_size = (128,) # Encoder b1_config = ( (5, 1, 32), @@ -63,9 +68,10 @@ def __init__(self, latent_dim: int = 5): (1, 1, 2), ) encoder_input = Input(shape=input_size, name="encoder_input") - b1 = self._get_branch(encoder_input, b1_config, 0.1, "softplus", "B1", 5) - b2 = self._get_branch(encoder_input, b2_config, 0.1, "softplus", "B2", 5) - b3 = self._get_branch(encoder_input, b3_config, 0.1, "softplus", "B3", 5) + expanded_encoder_input = ExpandDimsLayer()(encoder_input, axis=-1) + b1 = self._get_branch(expanded_encoder_input, b1_config, 0.1, "softplus", "B1", 5) + b2 = self._get_branch(expanded_encoder_input, b2_config, 0.1, "softplus", "B2", 5) + b3 = self._get_branch(expanded_encoder_input, b3_config, 0.1, "softplus", "B3", 5) x = Concatenate(axis=1)([b1, b2, b3]) x = Reshape((15,), name="reshape")(x) latent_space = Dense( @@ -92,27 +98,11 @@ def __init__(self, latent_dim: int = 5): decoded_spectrum = decoder(encoded_spectrum) autoencoder = Model(encoder_input, decoded_spectrum, name="autoencoder") - def logcosh_with_kld_penalty(input_spectrum, decoded_spectrum, - latent_space, sparsities, - penalty_weight=0.5): - squeezed_input = tf.squeeze(input_spectrum) - logcosh_loss = tf.keras.losses.log_cosh(squeezed_input, decoded_spectrum) - kld_loss = tf.keras.losses.kld(sparsities, latent_space) - loss = logcosh_loss + penalty_weight * kld_loss - return loss - - sparsities = [0.001] * latent_dim - loss_func = logcosh_with_kld_penalty( - encoder_input, - decoded_spectrum, - latent_space, - sparsities, - ) - autoencoder.add_loss(loss_func) - self.encoder = encoder self.decoder = decoder self.autoencoder = autoencoder + self.sparsities = [0.001] * latent_dim + self.penalty_weight = 0.5 def _get_branch(self, input_layer, config, dropout_rate, activation, branch_name, dense_units): x = input_layer @@ -132,7 +122,15 @@ def _get_branch(self, input_layer, config, dropout_rate, activation, branch_name return x def call(self, x): - decoded = self.autoencoder(x) + encoded = self.encoder(x) + decoded = self.decoder(encoded) + + # Compute loss + logcosh_loss = log_cosh(x, decoded) + kld_loss = kl_divergence(self.sparsities, encoded) + loss = logcosh_loss + self.penalty_weight * kld_loss + self.add_loss(loss) + return decoded @@ -145,15 +143,15 @@ class ARADv2TF(Model): - Ghawaly Jr, James M., et al. "Characterization of the Autoencoder Radiation Anomaly Detection (ARAD) model." Engineering Applications of Artificial Intelligence 111 (2022): 104761. """ - def __init__(self, latent_dim: int = 8): + def __init__(self, latent_dim: int = 8, **kwargs): """ Args: latent_dim: dimension of internal latent represention. 5 was the final one in the paper, but 4 to 8 were found to work well. """ - super().__init__() + super().__init__(**kwargs) - input_size = (128, 1) + input_size = (128,) # Encoder config = ( (7, 1, 8, 2), @@ -163,7 +161,8 @@ def __init__(self, latent_dim: int = 8): (3, 1, 8, 2), ) encoder_input = Input(shape=input_size, name="encoder_input") - x = encoder_input + expanded_encoder_input = ExpandDimsLayer()(encoder_input, axis=-1) + x = expanded_encoder_input for i, (kernel_size, strides, filters, max_pool_size) in enumerate(config, start=1): conv_name = f"conv{i}" x = Conv1D( @@ -194,7 +193,7 @@ def __init__(self, latent_dim: int = 8): encoder = Model(encoder_input, encoder_output, name="encoder") # Decoder - decoder_input = Input(shape=latent_dim, name="decoder_input") + decoder_input = Input(shape=(latent_dim,), name="decoder_input") x = Dense(units=32, name="D2", activation=mish)(decoder_input) x = BatchNormalization(name="D2_batch_norm")(x) x = Reshape((4, 8), name="reshape")(x) @@ -242,7 +241,25 @@ def __init__(self, latent_dim: int = 8): self.autoencoder = autoencoder def call(self, x): - decoded = self.autoencoder(x) + encoded = self.encoder(x) + decoded = self.decoder(encoded) + + # Compute loss + p_sum = tf.reduce_sum(x, axis=-1) + p_norm = tf.divide( + x, + tf.reshape(p_sum, (-1, 1)) + ) + q_sum = tf.reduce_sum(decoded, axis=-1) + q_norm = tf.divide( + decoded, + tf.reshape(q_sum, (-1, 1)) + ) + m = (p_norm + q_norm) / 2 + js_divergence = (kl_divergence(p_norm, m) + kl_divergence(q_norm, m)) / 2 + loss = tf.math.sqrt(js_divergence) + self.add_loss(loss) + return decoded @@ -278,16 +295,12 @@ def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, x = ss.get_samples().astype(float) - optimizer = tf.keras.optimizers.Nadam( - learning_rate=1e-4 - ) + optimizer = Nadam(learning_rate=1e-4) if not self.model: self.model = ARADv1TF() - self.model.compile( - loss=None, - optimizer=optimizer - ) + + self.model.compile(optimizer=optimizer) callbacks = [ EarlyStopping( @@ -375,17 +388,12 @@ def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2, x = ss.get_samples().astype(float) - optimizer = tf.keras.optimizers.Adam( - learning_rate=0.01, - epsilon=0.05 - ) + optimizer = Adam(learning_rate=0.01, epsilon=0.05) if not self.model: self.model = ARADv2TF() - self.model.compile( - loss=jensen_shannon_distance, - optimizer=optimizer - ) + + self.model.compile(optimizer=optimizer) callbacks = [ EarlyStopping( @@ -448,7 +456,7 @@ class ARADLatentPredictor(PyRIIDModel): def __init__(self, hidden_layers: tuple = (8, 4,), hidden_activation: str = "relu", final_activation: str = "linear", loss: str = "mse", optimizer="adam", optimizer_kwargs=None, - learning_rate: float = 1e-3, metrics: tuple = ("mse", ), + learning_rate: float = 1e-3, metrics=None, kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0, activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0, @@ -479,12 +487,14 @@ def __init__(self, hidden_layers: tuple = (8, 4,), self.loss = loss self.optimizer = optimizer if isinstance(optimizer, str): - self.optimizer = tf.keras.optimizers.get(optimizer) + self.optimizer = keras.optimizers.get(optimizer) if optimizer_kwargs is not None: for key, value in optimizer_kwargs.items(): setattr(self.optimizer, key, value) self.optimizer.learning_rate = learning_rate self.metrics = metrics + if self.metrics is None: + self.metrics = [MeanSquaredError()] self.kernel_l1_regularization = kernel_l1_regularization self.kernel_l2_regularization = kernel_l2_regularization self.bias_l1_regularization = bias_l1_regularization @@ -498,15 +508,15 @@ def __init__(self, hidden_layers: tuple = (8, 4,), def _initialize_model(self, arad: Model, output_size: int): """Build Keras MLP model. """ - encoder = arad.get_layer("encoder") - encoder_input = encoder.get_layer(index=0).input - encoder_output = encoder.get_layer(index=-1).output - encoder_output_shape = encoder_output.shape + encoder: Model = arad.get_layer("encoder") + encoder_input = encoder.input + encoder_output = encoder.output + encoder_output_shape = encoder_output.shape[-1] - predictor_input = Input(shape=encoder_output_shape, name="predictor_input") + predictor_input = Input(shape=(encoder_output_shape,), name="inner_predictor_input") x = predictor_input for layer, nodes in enumerate(self.hidden_layers): - x = tf.keras.layers.Dense( + x = Dense( nodes, activation=self.hidden_activation, kernel_regularizer=L1L2( @@ -521,19 +531,19 @@ def _initialize_model(self, arad: Model, output_size: int): l1=self.activity_l1_regularization, l2=self.activity_l2_regularization ), - name=f"dense_{layer}" + name=f"inner_predictor_dense_{layer}" )(x) if self.dropout > 0: - x = tf.keras.layers.Dropout(self.dropout)(x) - predictor_output = tf.keras.layers.Dense( + x = Dropout(self.dropout)(x) + predictor_output = Dense( output_size, activation=self.final_activation, - name="output" + name="inner_predictor_output" )(x) - predictor = Model(predictor_input, predictor_output, name="predictor") + inner_predictor = Model(predictor_input, predictor_output, name="inner_predictor") encoded_spectrum = encoder(encoder_input) - predictions = predictor(encoded_spectrum) + predictions = inner_predictor(encoded_spectrum) self.model = Model(encoder_input, predictions, name="predictor") # Freeze the layers corresponding to the autoencoder # Note: setting trainable to False is recursive to sub-layers per TF docs: diff --git a/tests/anomaly_tests.py b/tests/anomaly_tests.py index 617384df..35143a1d 100644 --- a/tests/anomaly_tests.py +++ b/tests/anomaly_tests.py @@ -19,13 +19,14 @@ def setUp(self): pass def test_event_detector(self): - np.random.seed(42) + random_state = 42 + rng = np.random.default_rng(random_state) SAMPLE_INTERVAL = 0.5 BG_RATE = 300 seeds_ss = get_dummy_seeds(100) fg_seeds_ss, bg_seeds_ss = seeds_ss.split_fg_and_bg() - mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3)\ + mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3, rng=rng)\ .generate(1) events = PassbySynthesizer(events_per_seed=1, sample_interval=SAMPLE_INTERVAL, @@ -34,7 +35,7 @@ def test_event_detector(self): dwell_time_function_args=(20, 20), snr_function_args=(20, 20), return_gross=True, - rng=np.random.default_rng(42))\ + rng=rng)\ .generate(fg_seeds_ss, mixed_bg_seeds_ss, verbose=False) _, gross_events = list(zip(*events)) diff --git a/tests/model_tests.py b/tests/model_tests.py index 39357f3f..ea430abc 100644 --- a/tests/model_tests.py +++ b/tests/model_tests.py @@ -15,8 +15,7 @@ from riid.models import PyRIIDModel from riid.models.bayes import (NegativeSpectrumError, PoissonBayesClassifier, ZeroTotalCountsError) -from riid.models.neural_nets import (LabelProportionEstimator, MLPClassifier, - MultiEventClassifier) +from riid.models.neural_nets import (LabelProportionEstimator, MLPClassifier) from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv1, ARADv2 @@ -86,7 +85,7 @@ def test_pb_predict(self): # Get test samples gss = StaticSynthesizer( - samples_per_seed=1, + samples_per_seed=2, live_time_function_args=(4, 4), snr_function_args=(10, 10), rng=np.random.default_rng(42), @@ -99,7 +98,7 @@ def test_pb_predict(self): # Predict pb_model.predict(test_gross_ss, test_bg_ss) - truth_labels = fg_seeds_ss.get_labels() + truth_labels = test_fg_ss.get_labels() predicted_labels = test_gross_ss.get_predictions() assert (truth_labels == predicted_labels).all() @@ -110,18 +109,6 @@ def test_mlp_fit_save_load_predict(self): _test_model_fit_save_load_predict(self, MLPClassifier, self.test_ss, self.train_ss, epochs=1) - def test_mec_fit_save_load_predict(self): - test_copy_ss = self.test_ss[:] - test_copy_ss.prediction_probas = test_copy_ss.sources - _test_model_fit_save_load_predict( - self, - MultiEventClassifier, - [test_copy_ss], - [self.train_ss], - self.train_ss.sources.groupby(axis=1, level="Isotope", sort=False).sum(), - epochs=1 - ) - def test_lpe_fit_save_load_predict(self): _test_model_fit_save_load_predict(self, LabelProportionEstimator, self.test_ss, self.fg_seeds_ss, self.train_ss, epochs=1) diff --git a/tests/seedmixer_tests.py b/tests/seedmixer_tests.py index e56950e2..e986f891 100644 --- a/tests/seedmixer_tests.py +++ b/tests/seedmixer_tests.py @@ -15,8 +15,10 @@ class TestSeedMixer(unittest.TestCase): """Test seed mixing functionality of SampleSet. """ - def setUp(self): - np.random.seed(42) + @classmethod + def setUpClass(self): + random_state = 42 + self.rng = np.random.default_rng(random_state) self.ss, _ = get_dummy_seeds().split_fg_and_bg() self.ss.normalize() @@ -71,24 +73,24 @@ def test_mixture_pdf(self): self.assertAlmostEqual(self.three_mix_seeds_ss.spectra.values[sample, :].sum(), 1.0) def test_spectrum_construction_3seeds_2mix(self): - _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg() - mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2).generate(100) + _, bg_seeds_ss = get_dummy_seeds(n_channels=16, rng=self.rng).split_fg_and_bg() + mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2, rng=self.rng).generate(100) spectral_distances = _get_spectral_distances(bg_seeds_ss, mixed_bg_ss) - self.assertTrue(all(spectral_distances == 0)) + self.assertTrue(np.isclose(spectral_distances, 0.0).all()) def test_spectrum_construction_3seeds_3mix(self): - _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg() - mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=3).generate(100) + _, bg_seeds_ss = get_dummy_seeds(n_channels=16, rng=self.rng).split_fg_and_bg() + mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=3, rng=self.rng).generate(100) spectral_distances = _get_spectral_distances(bg_seeds_ss, mixed_bg_ss) - self.assertTrue(all(spectral_distances == 0)) + self.assertTrue(np.isclose(spectral_distances, 0.0).all()) def test_spectrum_construction_2seeds_2mix(self): - _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg( + _, bg_seeds_ss = get_dummy_seeds(n_channels=16, rng=self.rng).split_fg_and_bg( bg_seed_names=SampleSet.DEFAULT_BG_SEED_NAMES[1:3] ) - mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2).generate(100) + mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2, rng=self.rng).generate(100) spectral_distances = _get_spectral_distances(bg_seeds_ss, mixed_bg_ss) - self.assertTrue(all(spectral_distances == 0)) + self.assertTrue(np.isclose(spectral_distances, 0.0).all()) def test_spectrum_construction_2seeds_2mix_error(self): _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg( @@ -104,10 +106,11 @@ def _get_spectral_distances(seeds_ss, mixed_ss): mixed_ss.sources.values.T ).T spectral_distances = jensenshannon( - recon_spectra.astype(np.float32), - mixed_ss.spectra.values.astype(np.float32), + recon_spectra, + mixed_ss.spectra.values, axis=1 ) + spectral_distances = np.nan_to_num(spectral_distances, nan=0.0) return spectral_distances diff --git a/tests/visualize_tests.py b/tests/visualize_tests.py index e9e83fb3..151e9f48 100644 --- a/tests/visualize_tests.py +++ b/tests/visualize_tests.py @@ -23,7 +23,8 @@ class TestVisualize(unittest.TestCase): """Testing plot functions in the visualize module.""" - def setUp(self): + @classmethod + def setUpClass(self): """Test setup.""" self.fg_seeds_ss, self.bg_seeds_ss = get_dummy_seeds().split_fg_and_bg() self.mixed_bg_seed_ss = SeedMixer(self.bg_seeds_ss, mixture_size=3).generate(10)