From f8f50889bb6d30abb0e4214df7361d687d82950a Mon Sep 17 00:00:00 2001
From: Tyler Morrow <tmorro@sandia.gov>
Date: Thu, 15 Aug 2024 06:40:45 -0600
Subject: [PATCH] Refactor for Python and dependency upgrades.

---
 .github/workflows/build-and-run-tests.yml     |  10 +-
 .github/workflows/run-examples.yml            |  10 +-
 README.md                                     |  11 +-
 examples/data/synthesis/mix_seeds.py          |   6 +-
 examples/modeling/arad.py                     |   4 +-
 examples/modeling/arad_latent_prediction.py   |   7 +-
 examples/modeling/classifier_comparison.py    |   4 +-
 .../modeling/label_proportion_estimation.py   |   1 -
 examples/modeling/multi_event_classifier.py   |  74 ---
 .../modeling/neural_network_classifier.py     |   2 +-
 examples/visualization/confusion_matrix.py    |   2 +-
 pyproject.toml                                |  51 +-
 riid/data/converters/__init__.py              |  25 +-
 riid/data/sampleset.py                        |  23 +-
 riid/data/synthetic/seed.py                   |   9 +-
 riid/losses/__init__.py                       |  19 +-
 riid/metrics.py                               |  38 +-
 riid/models/__init__.py                       |   4 +-
 riid/models/bayes.py                          | 148 +++---
 riid/models/layers.py                         |  82 +++
 riid/models/neural_nets/__init__.py           | 467 +++++-------------
 riid/models/neural_nets/arad.py               | 148 +++---
 tests/anomaly_tests.py                        |   7 +-
 tests/model_tests.py                          |  19 +-
 tests/seedmixer_tests.py                      |  29 +-
 tests/visualize_tests.py                      |   3 +-
 26 files changed, 509 insertions(+), 694 deletions(-)
 delete mode 100644 examples/modeling/multi_event_classifier.py
 create mode 100644 riid/models/layers.py

diff --git a/.github/workflows/build-and-run-tests.yml b/.github/workflows/build-and-run-tests.yml
index b1c8e2a4..96633e40 100644
--- a/.github/workflows/build-and-run-tests.yml
+++ b/.github/workflows/build-and-run-tests.yml
@@ -7,8 +7,8 @@ jobs:
   build:
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
-        os: [ubuntu-latest, windows-latest, macos-13]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        os: [ubuntu-latest, windows-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout
@@ -19,6 +19,12 @@ jobs:
           python-version: ${{ matrix.python-version }}
           cache: "pip"
           cache-dependency-path: "**/pyproject.toml"
+      - name: Install HDF5 (macOS only)
+        if: runner.os == 'macOS'
+        run: brew install hdf5
+      - name: Set HDF5_DIR environment variable (macOS only)
+        if: runner.os == 'macOS'
+        run: echo "HDF5_DIR=$(brew --prefix hdf5)" >> $GITHUB_ENV
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
diff --git a/.github/workflows/run-examples.yml b/.github/workflows/run-examples.yml
index 295143f1..ddf06b6e 100644
--- a/.github/workflows/run-examples.yml
+++ b/.github/workflows/run-examples.yml
@@ -7,8 +7,8 @@ jobs:
   build:
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
-        os: [ubuntu-latest, windows-latest, macos-13]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        os: [ubuntu-latest, windows-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout
@@ -19,6 +19,12 @@ jobs:
           python-version: ${{ matrix.python-version }}
           cache: "pip"
           cache-dependency-path: "**/pyproject.toml"
+      - name: Install HDF5 (macOS only)
+        if: runner.os == 'macOS'
+        run: brew install hdf5
+      - name: Set HDF5_DIR environment variable (macOS only)
+        if: runner.os == 'macOS'
+        run: echo "HDF5_DIR=$(brew --prefix hdf5)" >> $GITHUB_ENV
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip setuptools wheel
diff --git a/README.md b/README.md
index 7d7c285e..8f01a50c 100644
--- a/README.md
+++ b/README.md
@@ -11,23 +11,22 @@ PyRIID is a Python package providing modeling and data synthesis utilities for m
 
 Requirements:
 
-- Python version: 3.8 to 3.10
+- Python version: 3.9 to 3.12
+  - Note: we recommended the highest Python version you can manage as anecdotally, we have noticed that everything just tends to get faster.
 - Operating systems: Windows, Mac, or Ubuntu
 
-A virtual environment is recommended.
-
 Tests and examples are run via Actions on many combinations of Python version and operating system.
 You can verify support for your platform by checking the workflow files.
 
 ### For Use
 
-To use the latest version on PyPI (note: changes are slower to appear here), run:
+To use the latest version on PyPI, run:
 
 ```sh
 pip install riid
 ```
 
-**For the latest features, run:**
+Note that changes are slower to appear on PyPI, so for the latest features, run:**
 
 ```sh
 pip install git+https://github.com/sandialabs/pyriid.git@main
@@ -86,7 +85,7 @@ Full copyright details can be found [here](https://github.com/sandialabs/PyRIID/
 ## Acknowledgements
 
 **Thank you** to the U.S. Department of Energy, National Nuclear Security Administration,
-Office of Defense Nuclear Nonproliferation Research and Development (DNN R&D) for funding that has led to version `2.x`.
+Office of Defense Nuclear Nonproliferation Research and Development (DNN R&D) for funding that has led to versions `2.0` and `2.1`.
 
 Additionally, **thank you** to the following individuals who have provided invaluable subject-matter expertise:
 
diff --git a/examples/data/synthesis/mix_seeds.py b/examples/data/synthesis/mix_seeds.py
index 907dfc5a..911fc56d 100644
--- a/examples/data/synthesis/mix_seeds.py
+++ b/examples/data/synthesis/mix_seeds.py
@@ -2,14 +2,16 @@
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
 """This example demonstrates how to generate synthetic gamma spectra from seeds."""
+import numpy as np
 from riid.data.synthetic import get_dummy_seeds
 from riid.data.synthetic.seed import SeedMixer
 
 fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg()
 
-mixed_fg_seeds_ss = SeedMixer(fg_seeds_ss, mixture_size=2)\
+rng = np.random.default_rng(3)
+mixed_fg_seeds_ss = SeedMixer(fg_seeds_ss, mixture_size=2, rng=rng)\
     .generate(n_samples=10)
-mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3)\
+mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3, rng=rng)\
     .generate(n_samples=10)
 
 print(mixed_fg_seeds_ss)
diff --git a/examples/modeling/arad.py b/examples/modeling/arad.py
index 846975fe..f15d2e4e 100644
--- a/examples/modeling/arad.py
+++ b/examples/modeling/arad.py
@@ -14,7 +14,7 @@
 # Config
 rng = np.random.default_rng(42)
 OOD_QUANTILE = 0.99
-VERBOSE = True
+VERBOSE = False
 # Some of the following parameters are set low because this example runs on GitHub Actions and
 #   we don't want it taking a bunch of time.
 # When running this locally, change the values per their corresponding comment, otherwise
@@ -54,7 +54,7 @@
     arad.predict(gross_train_ss)
     ood_threshold = np.quantile(gross_train_ss.info.recon_error, OOD_QUANTILE)
 
-    reconstructions = arad.predict(test_ss, verbose=True)
+    reconstructions = arad.predict(test_ss, verbose=VERBOSE)
     ood = test_ss.info.recon_error.values > ood_threshold
     false_positive_rate = ood.mean()
     mean_recon_error = test_ss.info.recon_error.values.mean()
diff --git a/examples/modeling/arad_latent_prediction.py b/examples/modeling/arad_latent_prediction.py
index fed2d805..9b233954 100644
--- a/examples/modeling/arad_latent_prediction.py
+++ b/examples/modeling/arad_latent_prediction.py
@@ -5,16 +5,17 @@
 from an ARAD latent space.
 """
 import numpy as np
+from keras.api.metrics import Accuracy, CategoricalCrossentropy
 from sklearn.metrics import f1_score, mean_squared_error
 
 from riid.data.synthetic import get_dummy_seeds
 from riid.data.synthetic.seed import SeedMixer
 from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.neural_nets.arad import ARADv2, ARADLatentPredictor
+from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv2
 
 # Config
 rng = np.random.default_rng(42)
-VERBOSE = True
+VERBOSE = False
 # Some of the following parameters are set low because this example runs on GitHub Actions and
 #   we don't want it taking a bunch of time.
 # When running this locally, change the values per their corresponding comment, otherwise
@@ -66,7 +67,7 @@
 print("Training Classifier")
 arad_classifier = ARADLatentPredictor(
     loss="categorical_crossentropy",
-    metrics=("accuracy", "categorical_crossentropy"),
+    metrics=[Accuracy(), CategoricalCrossentropy()],
     final_activation="softmax"
 )
 arad_classifier.fit(
diff --git a/examples/modeling/classifier_comparison.py b/examples/modeling/classifier_comparison.py
index 7b1b4e12..4fce43e7 100644
--- a/examples/modeling/classifier_comparison.py
+++ b/examples/modeling/classifier_comparison.py
@@ -36,8 +36,8 @@
 train_fg_ss, _ = static_synth.generate(fg_seeds_ss, mixed_bg_seed_ss, verbose=False)
 train_fg_ss.normalize()
 
-model_nn = MLPClassifier(hidden_layers=(5,))
-model_nn.fit(train_fg_ss, epochs=10, patience=5, verbose=1)
+model_nn = MLPClassifier()
+model_nn.fit(train_fg_ss, epochs=10, patience=5)
 
 # Create PB model
 model_pb = PoissonBayesClassifier()
diff --git a/examples/modeling/label_proportion_estimation.py b/examples/modeling/label_proportion_estimation.py
index 8f3a4ef5..b4e5e4c4 100644
--- a/examples/modeling/label_proportion_estimation.py
+++ b/examples/modeling/label_proportion_estimation.py
@@ -55,7 +55,6 @@
     batch_size=10,
     epochs=2,
     validation_split=0.2,
-    verbose=True,
     bg_cps=300
 )
 
diff --git a/examples/modeling/multi_event_classifier.py b/examples/modeling/multi_event_classifier.py
deleted file mode 100644
index eac23d44..00000000
--- a/examples/modeling/multi_event_classifier.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
-# Under the terms of Contract DE-NA0003525 with NTESS,
-# the U.S. Government retains certain rights in this software.
-"""This example demonstrates how to use the MLP classifier."""
-from copy import deepcopy as copy
-
-from sklearn.metrics import f1_score
-
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.neural_nets import MLPClassifier, MultiEventClassifier
-
-fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg()
-static_syn = StaticSynthesizer(
-    # log10 sampling samples lower SNR values more frequently.
-    # This makes the SampleSet overall "harder" to classify.
-    snr_function="log10",
-    samples_per_seed=50,
-    return_fg=True,
-    return_gross=True,
-)
-
-# Generate some training data
-fg_ss, gross_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss)
-bg_ss = gross_ss - fg_ss
-bg_ss.normalize()
-gross_ss.normalize()
-
-# Train two single event classifiers
-model1 = MLPClassifier()
-model1.fit(gross_ss, bg_ss=bg_ss, verbose=1, epochs=50, patience=20)
-
-model2 = MLPClassifier()
-model2.fit(gross_ss, bg_ss=bg_ss, verbose=1, epochs=50, patience=20)
-
-# Generate two sample sets (with same sources but predictions from different models)
-train2a_fg_ss, train2a_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss)
-train2a_bg_ss = train2a_ss - train2a_fg_ss
-train2b_ss = copy(train2a_ss)
-train2b_bg_ss = copy(train2a_bg_ss)
-
-model1.predict(train2a_ss, train2a_bg_ss)
-model2.predict(train2b_ss, train2b_bg_ss)
-
-# Train MultiEvent model
-mec = MultiEventClassifier()
-mec.fit(
-    [train2a_ss, train2b_ss],
-    train2a_ss.sources.groupby(axis=1, level="Isotope", sort=False).sum(),
-    epochs=50
-)
-
-# Make predictions on multi model
-
-multi_preds = mec.predict([train2a_ss, train2b_ss])
-
-# Compare performance for single event models and multi-event model
-m1_f1_score = f1_score(train2a_ss.get_predictions(),
-                       train2a_ss.get_labels(),
-                       average="weighted")
-m2_f1_score = f1_score(train2b_ss.get_predictions(),
-                       train2b_ss.get_labels(),
-                       average="weighted")
-
-multi_f1_score = f1_score(multi_preds.values.argmax(axis=1),
-                          train2a_ss.get_source_contributions().argmax(axis=1),
-                          average="weighted")
-
-results_str = (
-    f"M1 F1 Score:  {m1_f1_score:.2f}\n"
-    f"M2 F1 Score:  {m2_f1_score:.2f}\n"
-    f"M12 F1 Score: {multi_f1_score:.2f}"
-)
-print(results_str)
diff --git a/examples/modeling/neural_network_classifier.py b/examples/modeling/neural_network_classifier.py
index 8ab7d55f..277a2dc5 100644
--- a/examples/modeling/neural_network_classifier.py
+++ b/examples/modeling/neural_network_classifier.py
@@ -24,7 +24,7 @@
 train_ss.normalize()
 
 model = MLPClassifier()
-model.fit(train_ss, epochs=10, patience=5, verbose=1)
+model.fit(train_ss, epochs=10, patience=5)
 
 # Generate some test data
 static_synth.samples_per_seed = 50
diff --git a/examples/visualization/confusion_matrix.py b/examples/visualization/confusion_matrix.py
index 704f4d1b..b2e25e72 100644
--- a/examples/visualization/confusion_matrix.py
+++ b/examples/visualization/confusion_matrix.py
@@ -26,7 +26,7 @@
     .generate(fg_seeds_ss, mixed_bg_seed_ss)
 train_ss.normalize()
 
-model = MLPClassifier(hidden_layers=(8,))
+model = MLPClassifier()
 model.fit(train_ss, verbose=0, epochs=50)
 
 # Generate some test data
diff --git a/pyproject.toml b/pyproject.toml
index a7af43bc..c20c480e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ namespaces = false
 [project]
 name = "riid"
 description = "Machine learning-based models and utilities for radioisotope identification"
-version = "2.1.0"
+version = "2.2.0"
 maintainers = [
     {name="Tyler Morrow", email="tmorro@sandia.gov"},
 ]
@@ -41,44 +41,43 @@ classifiers = [
     'Topic :: Software Development',
     'Topic :: Software Development :: Libraries',
     'Topic :: Software Development :: Libraries :: Python Modules',
+    'Programming Language :: Python',
     'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3 :: Only',
-    'Programming Language :: Python :: 3.8',
-    'Programming Language :: Python :: 3.9',
     'Programming Language :: Python :: 3.10',
+    'Programming Language :: Python :: 3.11',
+    'Programming Language :: Python :: 3.12',
 ]
 keywords = ["pyriid", "riid", "machine learning", "radioisotope identification", "gamma spectrum"]
 
-requires-python = ">=3.8,<3.11"
+requires-python = ">=3.9,<3.13"
 dependencies = [
-    "jsonschema                     ==4.17.*",
-    "matplotlib                     ==3.7.*",
-    "pyyaml                         ==6.0.*",
-    "seaborn                        ==0.12.*",
-    "tf2onnx                        ==1.14.*",
-    "onnx                           ==1.14.1",
-    "tqdm                           ==4.65.*",
-    "numpy                          ==1.23.*",
-    "pandas                         ==2.0.*",
-    "parmap                         ==1.6.*",
-    "pythonnet                      ==3.0.*; platform_system == 'Windows'",
-    "tables                         ==3.8.*",
-    "tensorflow                     ==2.12.*",
-    "tensorflow-io                  ==0.27.*",
-    "tensorflow-model-optimization  ==0.7.*",
-    "tensorflow-probability         ==0.20.*",
-    "typeguard                      ==2.7.*",
-    "scikit-learn                   ==1.2.*",
+    "jsonschema                     ==4.23.*",  # 3.8 - 3.13
+    "matplotlib                     ==3.9.*",   # 3.9 - 3.12
+    "numpy                          ==1.26.*",  # 3.9 - 3.12, also to be limited by onnx 1.16.2
+    "pandas                         ==2.2.*",   # >= 3.9
+    "pythonnet                      ==3.0.3; platform_system == 'Windows'",  # 3.7 - 3.12
+    "pyyaml                         ==6.0.*",   # >= 3.6
+    "tables                         ==3.9.*",   # >= 3.9
+    "scikit-learn                   ==1.5.*",   # 3.9 - 3.12
+    "scipy                          ==1.13.*",  # >= 3.10
+    "seaborn                        ==0.13.*",  # >= 3.8
+    "tensorflow                     ==2.16.*",  # 3.9 - 3.12
+    "tensorflow-model-optimization  ==0.8.*",   # 3.7 - 3.12
+    "onnx                           ==1.16.1",  # 3.7 - 3.10
+    "tf2onnx                        ==1.16.1",  # 3.7 - 3.10
+    "tqdm                           ==4.66.*",  # >= 3.7
+    "typeguard                      ==4.3.*",   # 3.9 - 3.12
 ]
 
 [project.optional-dependencies]
 dev = [
-    "flake8",
-    "flake8-quotes",
     "coverage",
     "ipykernel",
+    "flake8",
+    "flake8-quotes",
     "tabulate",
 ]
 
 [project.urls]
-repository = "https://github.com/sandialabs/PyRIID"
+Documentation = "https://sandialabs.github.io/PyRIID"
+Repository = "https://github.com/sandialabs/PyRIID"
diff --git a/riid/data/converters/__init__.py b/riid/data/converters/__init__.py
index 2959f299..c956e827 100644
--- a/riid/data/converters/__init__.py
+++ b/riid/data/converters/__init__.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from typing import Callable
 
-import parmap as pm
+from joblib import Parallel, delayed
 
 
 def _validate_and_create_output_dir(output_dir: str):
@@ -17,8 +17,8 @@ def _validate_and_create_output_dir(output_dir: str):
 
 
 def convert_directory(input_dir_path: str, conversion_func: Callable, file_ext: str,
-                      pm_processes: int = 8, pm_chunksize: int = 1, **kwargs):
-    """Convert and save every file in a specified directory in parallel.
+                      n_jobs: int = 8, **kwargs):
+    """Convert and save every file in a specified directory.
 
     Conversion functions can be found in sub-modules:
 
@@ -32,16 +32,14 @@ def convert_directory(input_dir_path: str, conversion_func: Callable, file_ext:
         convert_directory(...)
     ```
 
-    Consider setting `pm_processes` to `multiprocessing.cpu_count()`;
-    unfortunately, `pm_chunksize` requires some experimentation to fully optimize.
+    Tip: for max utilization, considering setting `n_jobs` to `multiprocessing.cpu_count()`.
 
     Args:
         input_dir_path: directory path containing the input files
         conversion_func: function used to convert a data file to a `SampleSet`
         file_ext: file extension to read in for conversion
-        pm_processes: parmap parameter to set the # of processes
-        pm_chunksize: parmap parameter to set the chunksize
-        kwargs: keyword args passed to underlying conversion_func operations
+        n_jobs: `joblib.Parallel` parameter to set the # of jobs
+        kwargs: additional keyword args passed to conversion_func
     """
     input_path = Path(input_dir_path)
     if not input_path.exists() or not input_path.is_dir():
@@ -50,13 +48,6 @@ def convert_directory(input_dir_path: str, conversion_func: Callable, file_ext:
 
     input_file_paths = sorted(glob.glob(f"{input_dir_path}/*.{file_ext}"))
 
-    x = pm.map(
-        conversion_func,
-        input_file_paths,
-        **kwargs,
-        pm_processes=pm_processes,
-        pm_chunksize=pm_chunksize,
-        pm_parallel=True,
-        pm_pbar=True,
+    Parallel(n_jobs, verbose=10)(
+        delayed(conversion_func)(path, **kwargs) for path in input_file_paths
     )
-    return x
diff --git a/riid/data/sampleset.py b/riid/data/sampleset.py
index 9e520428..47dd0332 100644
--- a/riid/data/sampleset.py
+++ b/riid/data/sampleset.py
@@ -13,7 +13,7 @@
 import random
 import re
 import warnings
-from datetime import datetime
+from datetime import datetime, timezone
 from enum import Enum
 from typing import Callable, Iterable, Tuple, Union
 
@@ -759,8 +759,9 @@ def concat(self, ss_list: list):
             sort=False
         )
         self._sources = self._sources.where(pd.notnull(self._sources), 0)
+        existing_info_df = self._info if not self._info.empty else None
         self._info = pd.concat(
-            [self._info] + [ss.info for ss in ss_list],
+            [existing_info_df] + [ss.info for ss in ss_list],
             ignore_index=True,
             sort=False
         )
@@ -1015,7 +1016,8 @@ def get_confidences(self, fg_seeds_ss: SampleSet, bg_seed_ss: SampleSet = None,
             self.spectra.values,
             **confidence_func_kwargs
         )
-        return np.array(confidences)
+        confidences = np.array(confidences)
+        return confidences
 
     def _get_spectral_distances(self, distance_func=distance.jensenshannon) -> np.array:
         n_samples = self.n_samples
@@ -1111,7 +1113,7 @@ def get_source_contributions(self, target_level="Isotope") -> np.ndarray:
         Returns:
             Array containing the ground truth contributions for each sample
         """
-        collapsed_sources = self.sources.groupby(axis=1, level=target_level).sum()
+        collapsed_sources = self.sources.T.groupby(target_level).sum().T
         sources_values = np.nan_to_num(collapsed_sources)
         return sources_values
 
@@ -1465,7 +1467,8 @@ def _dict_to_bulleted_list(data_dict: dict, level=0, indent=4, bullet="-") -> st
 
 
 def _get_utc_timestamp():
-    ts = datetime.utcnow().isoformat(sep=" ", timespec="seconds")
+    now_utc = datetime.now(timezone.utc)
+    ts = now_utc.isoformat(sep=" ", timespec="seconds")
     return ts
 
 
@@ -1496,10 +1499,10 @@ def _get_row_labels(df: pd.DataFrame, target_level: str = "Isotope", max_only: b
     """
     if max_only:
         if level_aggregation == "sum":
-            values = df.groupby(axis=1, level=target_level).sum()
+            values = df.T.groupby(target_level).sum().T
             labels = values.idxmax(axis=1)
         elif level_aggregation == "mean":
-            values = df.groupby(axis=1, level=target_level).mean()
+            values = df.T.groupby(target_level).mean().T
             labels = values.idxmax(axis=1)
         else:
             levels_to_drop = [
@@ -1513,9 +1516,9 @@ def _get_row_labels(df: pd.DataFrame, target_level: str = "Isotope", max_only: b
             labels = [f"{x} ({y:.2f})" for x, y in zip(labels, values)]
     else:  # Much slower
         if level_aggregation == "sum":
-            values = df.groupby(axis=1, level=target_level).sum()
+            values = df.T.groupby(target_level).sum().T
         elif level_aggregation == "mean":
-            values = df.groupby(axis=1, level=target_level).mean()
+            values = df.T.groupby(target_level).mean().T
         else:
             values = df
         mask = values.ge(min_value).values
@@ -1698,7 +1701,7 @@ def _ss_to_pcf_dict(ss: SampleSet, verbose=False) -> dict:
 
     info_timestamps = ss.info.timestamp.fillna("")
     info_tags = ss.info.tag.fillna("")
-    info_zero_fill = ss.info.fillna(0).astype(float, errors="ignore")
+    info_zero_fill = ss.info.infer_objects(copy=False).astype(float, errors="ignore")
     for i in sample_range:
         title = isotopes[i] if not isotopes.empty else NO_ISOTOPE
         description = ss.info.description.fillna("").iloc[i]
diff --git a/riid/data/synthetic/seed.py b/riid/data/synthetic/seed.py
index 08e19215..9fd1b2c4 100644
--- a/riid/data/synthetic/seed.py
+++ b/riid/data/synthetic/seed.py
@@ -292,10 +292,13 @@ def __call__(self, n_samples: int, max_batch_size: int = 100,
                 )
                 sources_df = pd.DataFrame([r], columns=sources_cols)
                 batch_sources_dfs.append(sources_df)
-            empty_sources_df = pd.DataFrame([], columns=self.seeds_ss.sources.columns)
-            batch_ss.sources = pd\
-                .concat([empty_sources_df] + batch_sources_dfs)\
+            sources_df = pd\
+                .concat(batch_sources_dfs)\
                 .fillna(0.0)
+            batch_ss.sources = sources_df.reindex(
+                columns=self.seeds_ss.sources.columns,
+                fill_value=0.0
+            )
 
             n_samples_produced += batch_size
 
diff --git a/riid/losses/__init__.py b/riid/losses/__init__.py
index ae18ccc8..26ade493 100644
--- a/riid/losses/__init__.py
+++ b/riid/losses/__init__.py
@@ -4,7 +4,7 @@
 """This module contains custom loss functions."""
 import numpy as np
 import tensorflow as tf
-from keras import backend as K
+from keras.api import ops
 
 
 def negative_log_f1(y_true: np.ndarray, y_pred: np.ndarray):
@@ -18,13 +18,13 @@ def negative_log_f1(y_true: np.ndarray, y_pred: np.ndarray):
         Custom loss score on a log scale
     """
     diff = y_true - y_pred
-    negs = K.clip(diff, -1.0, 0.0)
-    false_positive = -K.sum(negs, axis=-1)
+    negs = ops.clip(diff, -1.0, 0.0)
+    false_positive = -ops.sum(negs, axis=-1)
     true_positive = 1.0 - false_positive
     lower_clip = 1e-20
-    true_positive = K.clip(true_positive, lower_clip, 1.0)
+    true_positive = ops.clip(true_positive, lower_clip, 1.0)
 
-    return -K.mean(K.log(true_positive))
+    return -ops.mean(ops.log(true_positive))
 
 
 def negative_f1(y_true, y_pred):
@@ -38,13 +38,13 @@ def negative_f1(y_true, y_pred):
         Custom loss score
     """
     diff = y_true - y_pred
-    negs = K.clip(diff, -1.0, 0.0)
-    false_positive = -K.sum(negs, axis=-1)
+    negs = ops.clip(diff, -1.0, 0.0)
+    false_positive = -ops.sum(negs, axis=-1)
     true_positive = 1.0 - false_positive
     lower_clip = 1e-20
-    true_positive = K.clip(true_positive, lower_clip, 1.0)
+    true_positive = ops.clip(true_positive, lower_clip, 1.0)
 
-    return -K.mean(true_positive)
+    return -ops.mean(true_positive)
 
 
 def build_keras_semisupervised_loss_func(supervised_loss_func,
@@ -54,6 +54,7 @@ def build_keras_semisupervised_loss_func(supervised_loss_func,
                                          normalize: bool = False,
                                          normalize_scaler: float = 1.0,
                                          normalize_func=tf.math.tanh):
+    @tf.keras.utils.register_keras_serializable(package="Addons")
     def _semisupervised_loss_func(data, y_pred):
         """
         Args:
diff --git a/riid/metrics.py b/riid/metrics.py
index 8b5c48dd..aa677448 100644
--- a/riid/metrics.py
+++ b/riid/metrics.py
@@ -20,14 +20,14 @@ def multi_f1(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     Returns:
         Multi F1-score value(s)
     """
-    from keras import backend as K
+    from keras.api import ops
 
     diff = y_true - y_pred
-    negs = K.clip(diff, -1.0, 0.0)
-    false_positive = -K.sum(negs, axis=-1)
+    negs = ops.clip(diff, -1.0, 0.0)
+    false_positive = -ops.sum(negs, axis=-1)
     true_positive = 1.0 - false_positive
 
-    return K.mean(true_positive)
+    return ops.mean(true_positive)
 
 
 def single_f1(y_true: np.ndarray, y_pred: np.ndarray):
@@ -43,23 +43,23 @@ def single_f1(y_true: np.ndarray, y_pred: np.ndarray):
         F1-score value(s)
     """
     import tensorflow as tf
-    from keras import backend as K
+    from keras.api import ops
 
-    a = tf.dtypes.cast(y_true == K.max(y_true, axis=1)[:, None], tf.float32)
-    b = tf.dtypes.cast(y_pred == K.max(y_pred, axis=1)[:, None], tf.float32)
+    a = tf.dtypes.cast(y_true == ops.max(y_true, axis=1)[:, None], tf.float32)
+    b = tf.dtypes.cast(y_pred == ops.max(y_pred, axis=1)[:, None], tf.float32)
 
-    TP_mat = tf.dtypes.cast(K.all(tf.stack([a, b]), axis=0), tf.float32)
-    FP_mat = tf.dtypes.cast(K.all(tf.stack([a != b, b == 1]), axis=0), tf.float32)
-    FN_mat = tf.dtypes.cast(K.all(tf.stack([a != b, a == 1]), axis=0), tf.float32)
+    TP_mat = tf.dtypes.cast(ops.all(tf.stack([a, b]), axis=0), tf.float32)
+    FP_mat = tf.dtypes.cast(ops.all(tf.stack([a != b, b == 1]), axis=0), tf.float32)
+    FN_mat = tf.dtypes.cast(ops.all(tf.stack([a != b, a == 1]), axis=0), tf.float32)
 
-    TPs = K.sum(TP_mat, axis=0)
-    FPs = K.sum(FP_mat, axis=0)
-    FNs = K.sum(FN_mat, axis=0)
+    TPs = ops.sum(TP_mat, axis=0)
+    FPs = ops.sum(FP_mat, axis=0)
+    FNs = ops.sum(FN_mat, axis=0)
 
-    F1s = 2 * TPs / (2*TPs + FNs + FPs + tf.fill(tf.shape(TPs), K.epsilon()))
+    F1s = 2 * TPs / (2*TPs + FNs + FPs + tf.fill(tf.shape(TPs), tf.keras.backend.epsilon()))
 
-    support = K.sum(a, axis=0)
-    f1 = K.sum(F1s * support) / K.sum(support)
+    support = ops.sum(a, axis=0)
+    f1 = ops.sum(F1s * support) / ops.sum(support)
     return f1
 
 
@@ -122,10 +122,10 @@ def precision_recall_curve(ss: SampleSet, smooth: bool = True, multiclass: bool
            https://jonathan-hui.medium.com/map-mean-average-precision-for-object-detection-45c121a31173)
 
     """
-    y_true = ss.sources.groupby(axis=1, level=target_level, sort=False).sum()
+    y_true = ss.sources.T.groupby(target_level, sort=False).sum().T
     if minimum_contribution is not None:
         y_true = (y_true > minimum_contribution).astype(int)
-    y_pred = ss.prediction_probas.groupby(axis=1, level=target_level, sort=False).sum()
+    y_pred = ss.prediction_probas.T.groupby(target_level, sort=False).sum().T
 
     # switch from pandas to numpy
     labels = y_true.columns
@@ -250,6 +250,6 @@ def build_keras_semisupervised_metric_func(keras_metric_func, activation_func,
                                            n_labels):
     def metric_func(y_true, y_pred):
         return keras_metric_func(y_true[:, :n_labels], activation_func(y_pred))
-    metric_func.__name__ = keras_metric_func.__name__
+    metric_func.__name__ = keras_metric_func.__class__.__name__
 
     return metric_func
diff --git a/riid/models/__init__.py b/riid/models/__init__.py
index f2c29971..0825b251 100644
--- a/riid/models/__init__.py
+++ b/riid/models/__init__.py
@@ -11,8 +11,8 @@
 import numpy as np
 import tensorflow as tf
 import tf2onnx
-from keras.models import Model
-from keras.utils import get_custom_objects
+from keras.api.models import Model
+from keras.api.utils import get_custom_objects
 
 import riid
 from riid.data.labeling import label_to_index_element
diff --git a/riid/models/bayes.py b/riid/models/bayes.py
index 758b0229..6850cbb1 100644
--- a/riid/models/bayes.py
+++ b/riid/models/bayes.py
@@ -5,10 +5,14 @@
 import numpy as np
 import pandas as pd
 import tensorflow as tf
-import tensorflow_probability as tfp
+from keras.api.layers import Add, Input, Multiply, Subtract
+from keras.api.models import Model
 
 from riid.data.sampleset import SampleSet
 from riid.models import PyRIIDModel
+from riid.models.layers import (ClipByValueLayer, DivideLayer, ExpandDimsLayer,
+                                PoissonLogProbabilityLayer, ReduceMaxLayer,
+                                ReduceSumLayer, SeedLayer)
 
 
 class PoissonBayesClassifier(PyRIIDModel):
@@ -29,6 +33,14 @@ class PoissonBayesClassifier(PyRIIDModel):
     def __init__(self):
         super().__init__()
 
+        self._update_custom_objects("ReduceSumLayer", ReduceSumLayer)
+        self._update_custom_objects("ReduceMaxLayer", ReduceMaxLayer)
+        self._update_custom_objects("DivideLayer", DivideLayer)
+        self._update_custom_objects("ExpandDimsLayer", ExpandDimsLayer)
+        self._update_custom_objects("ClipByValueLayer", ClipByValueLayer)
+        self._update_custom_objects("PoissonLogProbabilityLayer", PoissonLogProbabilityLayer)
+        self._update_custom_objects("SeedLayer", SeedLayer)
+
     def fit(self, seeds_ss: SampleSet):
         """Construct a TF-based implementation of a poisson-bayes classifier in terms
         of the given seeds.
@@ -50,74 +62,98 @@ def fit(self, seeds_ss: SampleSet):
             msg = "Argument 'seeds_ss' can't contain any spectra with zero total counts."
             raise ZeroTotalCountsError(msg)
 
-        self._seeds = tf.constant(tf.convert_to_tensor(
+        self._seeds = tf.convert_to_tensor(
             seeds_ss.spectra.values,
             dtype=tf.float32
-        ))
-
-        # Inputs
-        gross_spectrum_input = tf.keras.layers.Input(
-            shape=seeds_ss.n_channels,
-            name="gross_spectrum"
-        )
-        gross_live_time_input = tf.keras.layers.Input(
-            shape=(),
-            name="gross_live_time"
-        )
-        bg_spectrum_input = tf.keras.layers.Input(
-            shape=seeds_ss.n_channels,
-            name="bg_spectrum"
-        )
-        bg_live_time_input = tf.keras.layers.Input(
-            shape=(),
-            name="bg_live_time"
-        )
-
-        # Compute expected_seed_spectrums
-        gross_total_counts = tf.reduce_sum(gross_spectrum_input, axis=1)
-        bg_total_counts = tf.reduce_sum(bg_spectrum_input, axis=1)
-        bg_count_rate = tf.divide(bg_total_counts, bg_live_time_input)
-        expected_bg_counts = tf.multiply(bg_count_rate, gross_live_time_input)
-        expected_fg_counts = tf.subtract(gross_total_counts, expected_bg_counts)
-        normalized_bg_spectrum = tf.divide(
-            bg_spectrum_input,
-            tf.expand_dims(bg_total_counts, axis=1)
-        )
-        expected_bg_spectrum = tf.multiply(
-            normalized_bg_spectrum,
-            tf.expand_dims(expected_bg_counts, axis=1)
-        )
-        expected_fg_spectrum = tf.multiply(
-            self._seeds,
-            tf.expand_dims(tf.expand_dims(
-                expected_fg_counts,
-                axis=-1
-            ), axis=-1)
-        )
-        max_value = tf.math.reduce_max(expected_fg_spectrum)
-        expected_fg_spectrum = tf.clip_by_value(expected_fg_spectrum, 1e-8, max_value)
-        expected_gross_spectrum = tf.add(
-            expected_fg_spectrum,
-            tf.expand_dims(expected_bg_spectrum, axis=1)
         )
 
-        poisson_dist = tfp.distributions.Poisson(expected_gross_spectrum)
-        all_probas = poisson_dist.log_prob(
-            tf.expand_dims(gross_spectrum_input, axis=1)
-        )
-        prediction_probas = tf.math.reduce_sum(all_probas, axis=2)
-
+        # Inputs
+        gross_spectrum_input = Input(shape=(seeds_ss.n_channels,),
+                                     name="gross_spectrum")
+        gross_live_time_input = Input(shape=(),
+                                      name="gross_live_time")
+        bg_spectrum_input = Input(shape=(seeds_ss.n_channels,),
+                                  name="bg_spectrum")
+        bg_live_time_input = Input(shape=(),
+                                   name="bg_live_time")
         model_inputs = (
             gross_spectrum_input,
             gross_live_time_input,
             bg_spectrum_input,
             bg_live_time_input,
         )
-        self.model = tf.keras.Model(model_inputs, prediction_probas)
+
+        # Input statistics
+        gross_total_counts = ReduceSumLayer(name="gross_total_counts")(gross_spectrum_input, axis=1)
+        bg_total_counts = ReduceSumLayer(name="bg_total_counts")(bg_spectrum_input, axis=1)
+        bg_count_rate = DivideLayer(name="bg_count_rate")([bg_total_counts, bg_live_time_input])
+
+        gross_spectrum_input_expanded = ExpandDimsLayer(
+            name="gross_spectrum_input_expanded"
+        )(gross_spectrum_input, axis=1)
+        bg_total_counts_expanded = ExpandDimsLayer(
+            name="bg_total_counts_expanded"
+        )(bg_total_counts, axis=1)
+
+        # Expectations
+        seed_layer = SeedLayer(self._seeds)(model_inputs)
+        seed_layer_expanded = ExpandDimsLayer()(seed_layer, axis=0)
+        expected_bg_counts = Multiply(
+            trainable=False,
+            name="expected_bg_counts"
+        )([bg_count_rate, gross_live_time_input])
+        expected_bg_counts_expanded = ExpandDimsLayer(
+            name="expected_bg_counts_expanded"
+        )(expected_bg_counts, axis=1)
+        normalized_bg_spectrum = DivideLayer(
+            name="normalized_bg_spectrum"
+        )([bg_spectrum_input, bg_total_counts_expanded])
+        expected_bg_spectrum = Multiply(
+            trainable=False,
+            name="expected_bg_spectrum"
+        )([normalized_bg_spectrum, expected_bg_counts_expanded])
+        expected_fg_counts = Subtract(
+            trainable=False,
+            name="expected_fg_counts"
+        )([gross_total_counts, expected_bg_counts])
+        expected_fg_counts_expanded = ExpandDimsLayer(
+            name="expected_fg_counts_expanded"
+        )(expected_fg_counts, axis=-1)
+        expected_fg_counts_expanded2 = ExpandDimsLayer(
+            name="expected_fg_counts_expanded2"
+        )(expected_fg_counts_expanded, axis=-1)
+        expected_fg_spectrum = Multiply(
+            trainable=False,
+            name="expected_fg_spectrum"
+        )([seed_layer_expanded, expected_fg_counts_expanded2])
+        max_fg_value = ReduceMaxLayer(
+            name="max_fg_value"
+        )(expected_fg_spectrum)
+        expected_fg_spectrum = ClipByValueLayer(
+            name="clip_expected_fg_spectrum"
+        )(expected_fg_spectrum, clip_value_min=1e-8, clip_value_max=max_fg_value)
+        expected_bg_spectrum_expanded = ExpandDimsLayer(
+            name="expected_bg_spectrum_expanded"
+        )(expected_bg_spectrum, axis=1)
+        expected_gross_spectrum = Add(
+            trainable=False,
+            name="expected_gross_spectrum"
+        )([expected_fg_spectrum, expected_bg_spectrum_expanded])
+
+        # Compute probabilities
+        log_probabilities = PoissonLogProbabilityLayer(
+            name="log_probabilities"
+        )([expected_gross_spectrum, gross_spectrum_input_expanded])
+        summed_log_probabilities = ReduceSumLayer(
+            name="summed_log_probabilities"
+        )(log_probabilities, axis=2)
+
+        # Assemble model
+        self.model = Model(model_inputs, summed_log_probabilities)
         self.model.compile()
 
         self.target_level = "Seed"
-        sources_df = seeds_ss.sources.groupby(axis=1, level=self.target_level, sort=False).sum()
+        sources_df = seeds_ss.sources.T.groupby(self.target_level, sort=False).sum().T
         self.model_outputs = sources_df.columns.values.tolist()
 
     def predict(self, gross_ss: SampleSet, bg_ss: SampleSet,
diff --git a/riid/models/layers.py b/riid/models/layers.py
new file mode 100644
index 00000000..c250e09f
--- /dev/null
+++ b/riid/models/layers.py
@@ -0,0 +1,82 @@
+# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+# Under the terms of Contract DE-NA0003525 with NTESS,
+# the U.S. Government retains certain rights in this software.
+"""This module contains custom Keras layers."""
+import tensorflow as tf
+from keras.api.layers import Layer
+
+
+class ReduceSumLayer(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def call(self, x, axis):
+        return tf.reduce_sum(x, axis=axis)
+
+
+class ReduceMaxLayer(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def call(self, x):
+        return tf.reduce_max(x)
+
+
+class DivideLayer(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def call(self, x):
+        return tf.divide(x[0], x[1])
+
+
+class ExpandDimsLayer(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def call(self, x, axis):
+        return tf.expand_dims(x, axis=axis)
+
+
+class ClipByValueLayer(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def call(self, x, clip_value_min, clip_value_max):
+        return tf.clip_by_value(x, clip_value_min=clip_value_min, clip_value_max=clip_value_max)
+
+
+class PoissonLogProbabilityLayer(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def call(self, x):
+        exp, value = x
+        log_probas = tf.math.xlogy(value, exp) - exp - tf.math.lgamma(value + 1)
+        return log_probas
+
+
+class SeedLayer(Layer):
+    def __init__(self, seeds, **kwargs):
+        super(SeedLayer, self).__init__(**kwargs)
+        self.seeds = tf.convert_to_tensor(seeds)
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "seeds": self.seeds.numpy().tolist(),
+        })
+        return config
+
+    def call(self, inputs):
+        return self.seeds
+
+
+class L1NormLayer(Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def call(self, inputs):
+        sums = tf.reduce_sum(inputs, axis=-1)
+        l1_norm = inputs / tf.reshape(sums, (-1, 1))
+        return l1_norm
diff --git a/riid/models/neural_nets/__init__.py b/riid/models/neural_nets/__init__.py
index e46c700c..42443f88 100644
--- a/riid/models/neural_nets/__init__.py
+++ b/riid/models/neural_nets/__init__.py
@@ -2,88 +2,90 @@
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
 """This module contains neural network-based classifiers and regressors."""
-from typing import Any, List
-
+import keras
 import numpy as np
 import pandas as pd
 import tensorflow as tf
-from keras.callbacks import EarlyStopping
-from keras.layers import Dense, Dropout
-from keras.optimizers import Adam
-from keras.regularizers import L1L2, l1, l2
+from keras.api.activations import sigmoid, softmax
+from keras.api.callbacks import EarlyStopping
+from keras.api.layers import Dense, Dropout, Input
+from keras.api.losses import CategoricalCrossentropy, MeanSquaredError
+from keras.api.metrics import F1Score, Precision, Recall
+from keras.api.models import Model
+from keras.api.optimizers import Adam
+from keras.api.regularizers import L1L2, l1, l2
+from keras.api.utils import split_dataset
 from scipy.interpolate import UnivariateSpline
 
-from riid.data.sampleset import SampleSet
+from riid.data.sampleset import SampleSet, SpectraType
 from riid.losses import (build_keras_semisupervised_loss_func,
                          chi_squared_diff, jensen_shannon_divergence,
                          normal_nll_diff, poisson_nll_diff,
                          reconstruction_error, sse_diff, weighted_sse_diff)
 from riid.losses.sparsemax import SparsemaxLoss, sparsemax
-from riid.metrics import (build_keras_semisupervised_metric_func, multi_f1,
-                          single_f1)
+from riid.metrics import build_keras_semisupervised_metric_func
 from riid.models import ModelInput, PyRIIDModel
+from riid.models.layers import L1NormLayer
 
 
 class MLPClassifier(PyRIIDModel):
     """Multi-layer perceptron classifier."""
-    def __init__(self, hidden_layers: tuple = (512,), activation: str = "relu",
-                 loss: str = "categorical_crossentropy",
-                 optimizer: Any = Adam(learning_rate=0.01, clipnorm=0.001),
-                 metrics: tuple = ("accuracy", "categorical_crossentropy", multi_f1, single_f1),
-                 l2_alpha: float = 1e-4, activity_regularizer=l1(0), dropout: float = 0.0,
-                 learning_rate: float = 0.01, final_activation: str = "softmax"):
+    def __init__(self, activation=None, loss=None, optimizer=None,
+                 metrics=None, l2_alpha: float = 1e-4,
+                 activity_regularizer=None, final_activation=None):
         """
         Args:
-            hidden_layers: tuple defining the number and size of dense layers
             activation: activate function to use for each dense layer
             loss: loss function to use for training
             optimizer: tensorflow optimizer or optimizer name to use for training
             metrics: list of metrics to be evaluating during training
             l2_alpha: alpha value for the L2 regularization of each dense layer
             activity_regularizer: regularizer function applied each dense layer output
-            dropout: amount of dropout to apply to each dense layer
-            learning_rate: learning rate to use for an Adam optimizer
+            final_activation: final activation function to apply to model output
         """
         super().__init__()
 
-        self.hidden_layers = hidden_layers
         self.activation = activation
-        self.final_activation = final_activation
         self.loss = loss
-        if optimizer == "adam":
-            self.optimizer = Adam(learning_rate=learning_rate)
-        else:
-            self.optimizer = optimizer
         self.optimizer = optimizer
+        self.final_activation = final_activation
         self.metrics = metrics
         self.l2_alpha = l2_alpha
         self.activity_regularizer = activity_regularizer
-        self.dropout = dropout
+        self.final_activation = final_activation
+
+        if self.activation is None:
+            self.activation = "relu"
+        if self.loss is None:
+            self.loss = CategoricalCrossentropy()
+        if optimizer is None:
+            self.optimizer = Adam(learning_rate=0.01, clipnorm=0.001)
+        if self.metrics is None:
+            self.metrics = [F1Score(), Precision(), Recall()]
+        if self.activity_regularizer is None:
+            self.activity_regularizer = l1(0.0)
+        if self.final_activation is None:
+            self.final_activation = "softmax"
         self.model = None
+        self._predict_fn = None
 
-    def fit(self, ss: SampleSet, bg_ss: SampleSet = None,
-            ss_input_type: ModelInput = ModelInput.GrossSpectrum,
-            bg_ss_input_type: ModelInput = ModelInput.BackgroundSpectrum,
-            batch_size: int = 200, epochs: int = 20,
-            validation_split: float = 0.2, callbacks=None, val_ss: SampleSet = None,
-            val_bg_ss: SampleSet = None, patience: int = 15, es_monitor: str = "val_loss",
+    def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20,
+            validation_split: float = 0.2, callbacks=None,
+            patience: int = 15, es_monitor: str = "val_loss",
             es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False):
         """Fit a model to the given `SampleSet`(s).
 
         Args:
             ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either
                 foreground (AKA, "net") or gross.
-            bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background
             batch_size: number of samples per gradient update
             epochs: maximum number of training iterations
             validation_split: percentage of the training data to use as validation data
             callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method
-            val_ss: validation set to be used instead of taking a portion of the training data
-            val_bg_ss: validation set to be used as background for `val_ss`
-            patience: number of epochs to wait for `tf.keras.callbacks.EarlyStopping`
-            es_monitor: quantity to be monitored for `tf.keras.callbacks.EarlyStopping`
-            es_mode: mode for `tf.keras.callbacks.EarlyStopping`
-            es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping`
+            patience: number of epochs to wait for `EarlyStopping` object
+            es_monitor: quantity to be monitored for `EarlyStopping` object
+            es_mode: mode for `EarlyStopping` object
+            es_verbose: verbosity level for `EarlyStopping` object
             target_level: `SampleSet.sources` column level to use
             verbose: whether to show detailed model training output
 
@@ -96,75 +98,44 @@ def fit(self, ss: SampleSet, bg_ss: SampleSet = None,
         if ss.n_samples <= 0:
             raise ValueError("No spectr[a|um] provided!")
 
-        x_train = ss.get_samples().astype(float)
-        source_contributions_df = ss.sources.groupby(axis=1, level=target_level, sort=False).sum()
-        y_train = source_contributions_df.values.astype(float)
-        if bg_ss:
-            x_bg_train = bg_ss.get_samples().astype(float)
-
-        if val_ss:
-            if val_bg_ss:
-                val_data = (
-                    [val_ss.get_samples().astype(float), val_bg_ss.get_samples().astype(float)],
-                    val_ss.get_source_contributions().astype(float),
-                )
-            else:
-                val_data = (
-                    val_ss.get_samples().astype(float),
-                    val_ss.get_source_contributions().astype(float),
-                )
-            validation_split = None
+        if ss.spectra_type == SpectraType.Gross:
+            self.model_inputs = (ModelInput.GrossSpectrum,)
+        elif ss.spectra_type == SpectraType.Foreground:
+            self.model_inputs = (ModelInput.ForegroundSpectrum,)
+        elif ss.spectra_type == SpectraType.Background:
+            self.model_inputs = (ModelInput.BackgroundSpectrum,)
         else:
-            val_data = None
-            row_order = np.arange(x_train.shape[0])
-            np.random.shuffle(row_order)
-            # Enforce random validation split through shuffling
-            x_train = x_train[row_order]
-            y_train = y_train[row_order]
-
-            if bg_ss:
-                x_bg_train = x_bg_train[row_order]
+            raise ValueError(f"{ss.spectra_type} is not supported in this model.")
+
+        X = ss.get_samples()
+        source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T
+        model_outputs = source_contributions_df.columns.values.tolist()
+        Y = source_contributions_df.values
+
+        spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
+        labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32)
+        training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor))
+        training_dataset, validation_dataset = split_dataset(
+            training_dataset,
+            left_size=validation_split,
+            shuffle=True
+        )
+        training_dataset = training_dataset.batch(batch_size=batch_size)
+        validation_dataset = validation_dataset.batch(batch_size=batch_size)
 
         if not self.model:
-            spectra_input = tf.keras.layers.Input(
-                x_train.shape[1],
-                name=ss_input_type.name
-            )
-            inputs = [spectra_input]
-
-            if bg_ss:
-                background_spectra_input = tf.keras.layers.Input(
-                    x_bg_train.shape[1],
-                    name=bg_ss_input_type.name
-                )
-                inputs.append(background_spectra_input)
-
-            if len(inputs) > 1:
-                x = tf.keras.layers.Concatenate()(inputs)
-            else:
-                x = inputs[0]
-
-            for layer, nodes in enumerate(self.hidden_layers):
-                if layer == 0:
-                    x = Dense(
-                        nodes,
-                        activation=self.activation,
-                        activity_regularizer=self.activity_regularizer,
-                        kernel_regularizer=l2(self.l2_alpha),
-                    )(x)
-                else:
-                    x = Dense(
-                        nodes,
-                        activation=self.activation,
-                        activity_regularizer=self.activity_regularizer,
-                        kernel_regularizer=l2(self.l2_alpha),
-                    )(x)
-                if self.dropout > 0:
-                    x = Dropout(self.dropout)(x)
-
-            output = Dense(y_train.shape[1], activation=self.final_activation)(x)
-            self.model = tf.keras.models.Model(inputs, output)
-            self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics)
+            inputs = Input(shape=(X.shape[1],), name="Spectrum")
+            dense_layer_size = X.shape[1] // 2
+            dense_layer = Dense(
+                dense_layer_size,
+                activation=self.activation,
+                activity_regularizer=self.activity_regularizer,
+                kernel_regularizer=l2(self.l2_alpha),
+            )(inputs)
+            outputs = Dense(Y.shape[1], activation=self.final_activation)(dense_layer)
+            self.model = Model(inputs, outputs)
+            self.model.compile(loss=self.loss, optimizer=self.optimizer,
+                               metrics=self.metrics)
 
         es = EarlyStopping(
             monitor=es_monitor,
@@ -173,41 +144,39 @@ def fit(self, ss: SampleSet, bg_ss: SampleSet = None,
             restore_best_weights=True,
             mode=es_mode,
         )
-
         if callbacks:
             callbacks.append(es)
         else:
             callbacks = [es]
 
-        if bg_ss:
-            X_data = [x_train, x_bg_train]
-            self.model_inputs = (ss_input_type, bg_ss_input_type)
-        else:
-            X_data = x_train
-            self.model_inputs = (ss_input_type,)
-
         history = self.model.fit(
-            X_data,
-            y_train,
+            training_dataset,
             epochs=epochs,
             verbose=verbose,
-            validation_split=validation_split,
-            validation_data=val_data,
+            validation_data=validation_dataset,
             callbacks=callbacks,
-            shuffle=True,
-            batch_size=batch_size,
-        )
+         )
 
         # Update model information
         self._update_info(
             target_level=target_level,
-            model_outputs=source_contributions_df.columns.values.tolist(),
+            model_outputs=model_outputs,
             normalization=ss.spectra_state,
         )
 
+        # Define the predict function with tf.function and input_signature
+        self._predict_fn = tf.function(
+            self._predict,
+            # input_signature=[tf.TensorSpec(shape=[None, X.shape[1]], dtype=tf.float32)]
+            experimental_relax_shapes=True
+        )
+
         return history
 
-    def predict(self, ss: SampleSet, bg_ss: SampleSet = None, verbose=False):
+    def _predict(self, input_tensor):
+        return self.model(input_tensor, training=False)
+
+    def predict(self, ss: SampleSet, bg_ss: SampleSet = None):
         """Classify the spectra in the provided `SampleSet`(s).
 
         Results are stored inside the first SampleSet's prediction-related properties.
@@ -223,7 +192,8 @@ def predict(self, ss: SampleSet, bg_ss: SampleSet = None, verbose=False):
         else:
             X = x_test
 
-        results = self.model.predict(X, verbose=verbose)
+        spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
+        results = self._predict_fn(spectra_tensor)
 
         col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level)
         col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1]
@@ -238,194 +208,6 @@ def predict(self, ss: SampleSet, bg_ss: SampleSet = None, verbose=False):
         ss.classified_by = self.model_id
 
 
-class MultiEventClassifier(PyRIIDModel):
-    """Classifier for spectra from multiple detectors observing the same event."""
-    def __init__(self, hidden_layers: tuple = (512,), activation: str = "relu",
-                 loss: str = "categorical_crossentropy",
-                 optimizer: Any = Adam(learning_rate=0.01, clipnorm=0.001),
-                 metrics: list = ["accuracy", "categorical_crossentropy", multi_f1, single_f1],
-                 l2_alpha: float = 1e-4, activity_regularizer: tf.keras.regularizers = l1(0),
-                 dropout: float = 0.0, learning_rate: float = 0.01):
-        """
-        Args:
-            hidden_layers: tuple containing the number and size of dense layers
-            activation: activate function to use for each dense layer
-            loss: string name of the loss function to use for training
-            optimizer: string name of the optimizer to use for training
-            metrics: list of metrics to be evaluating during training
-            l2_alpha: alpha value for the L2 regularization of each dense layer
-            activity_regularizer: regularizer function applied each dense layer output
-            dropout: amount of dropout to apply to each dense layer
-            learning_rate: learning rate to use for an Adam optimizer
-        """
-        super().__init__()
-
-        self.hidden_layers = hidden_layers
-        self.activation = activation
-        self.loss = loss
-        if optimizer == "adam":
-            self.optimizer = Adam(learning_rate=learning_rate)
-        else:
-            self.optimizer = optimizer
-        self.metrics = metrics
-        self.l2_alpha = l2_alpha
-        self.activity_regularizer = activity_regularizer
-        self.dropout = dropout
-        self.model = None
-
-    def fit(self, list_of_ss: List[SampleSet], target_contributions: pd.DataFrame,
-            batch_size: int = 200, epochs: int = 20,
-            validation_split: float = 0.2, callbacks: list = None,
-            val_model_ss_list: SampleSet = None,
-            val_model_target_contributions: pd.DataFrame = None,
-            patience: int = 15, es_monitor: str = "val_loss", es_mode: str = "min",
-            es_verbose: bool = False, target_level="Isotope", verbose: bool = False):
-        """Fit a model to the given SampleSet(s).
-
-        Args:
-            list_of_ss: list of `SampleSet`s which have prediction_probas populated from
-                single-event classifiers
-            target_contributions: DataFrame of the contributions for each
-                observation. Column titles are the desired label strings.
-            batch_size: number of samples per gradient update
-            epochs: maximum number of training iterations
-            validation_split: percentage of the training data to use as validation data
-            callbacks: list of callbacks to be passed to TensorFlow Model.fit() method
-            val_model_ss_list: validation set to be used instead of taking a portion of the
-                training data
-            val_model_target_contributions: target contributions to the model for each sample
-            patience: number of epochs to wait for `tf.keras.callbacks.EarlyStopping` object
-            es_monitor: quantity to be monitored for `tf.keras.callbacks.EarlyStopping` object
-            es_mode: mode for `tf.keras.callbacks.EarlyStopping` object
-            es_verbose: verbosity level for `tf.keras.callbacks.EarlyStopping` object
-            target_level: source level to target for model output
-            verbose: whether to show detailed training output
-
-        Returns:
-            `tf.History` object
-
-        Raises:
-            `ValueError` when no predictions are provided with `list_of_ss` input
-        """
-        if len(list_of_ss) <= 0:
-            raise ValueError("No model predictions provided!")
-
-        x_train = [ss.prediction_probas.values for ss in list_of_ss]
-        y_train = target_contributions.values
-
-        if val_model_ss_list and val_model_target_contributions:
-            val_data = (
-                    [ss.prediction_probas.values for ss in val_model_ss_list],
-                    val_model_target_contributions.values,
-                )
-            validation_split = None
-        else:
-            val_data = None
-            row_order = np.arange(x_train[0].shape[0])
-            np.random.shuffle(row_order)
-            # Enforce random validation split through shuffling
-            x_train = [i[row_order] for i in x_train]
-            y_train = y_train[row_order]
-
-        if not self.model:
-            inputs = []
-            for ss in list_of_ss:
-                input_from_single_event_model = tf.keras.layers.Input(
-                    ss.prediction_probas.shape[1],
-                    name=ss.classified_by
-                )
-                inputs.append(input_from_single_event_model)
-
-            if len(inputs) > 1:
-                x = tf.keras.layers.Concatenate()(inputs)
-            else:
-                x = inputs[0]
-
-            for layer, nodes in enumerate(self.hidden_layers):
-                if layer == 0:
-                    x = Dense(
-                            nodes,
-                            activation=self.activation,
-                            activity_regularizer=self.activity_regularizer,
-                            kernel_regularizer=l2(self.l2_alpha),
-                        )(x)
-                else:
-                    x = Dense(
-                            nodes,
-                            activation=self.activation,
-                            activity_regularizer=self.activity_regularizer,
-                            kernel_regularizer=l2(self.l2_alpha),
-                        )(x)
-                if self.dropout > 0:
-                    x = Dropout(self.dropout)(x)
-
-            output = Dense(y_train.shape[1], activation="softmax")(x)
-            self.model = tf.keras.models.Model(inputs, output)
-            self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics)
-
-        es = EarlyStopping(
-            monitor=es_monitor,
-            patience=patience,
-            verbose=es_verbose,
-            restore_best_weights=True,
-            mode=es_mode,
-        )
-
-        if callbacks:
-            callbacks.append(es)
-        else:
-            callbacks = [es]
-
-        history = self.model.fit(
-            x_train,
-            y_train,
-            epochs=epochs,
-            verbose=verbose,
-            validation_split=validation_split,
-            validation_data=val_data,
-            callbacks=callbacks,
-            shuffle=True,
-            batch_size=batch_size,
-        )
-
-        # Initialize model info, update output/input information
-        self._update_info(
-            target_level=target_level,
-            model_outputs=target_contributions.columns.values.tolist(),
-            model_inputs=tuple(
-                [(ss.classified_by, ss.prediction_probas.shape[1]) for ss in list_of_ss]
-            ),
-            normalization=tuple(
-                [(ss.classified_by, ss.spectra_state) for ss in list_of_ss]
-            ),
-        )
-
-        return history
-
-    def predict(self, list_of_ss: List[SampleSet], verbose=False) -> pd.DataFrame:
-        """Classify the spectra in the provided `SampleSet`(s) based on each one's results.
-
-        Args:
-            list_of_ss: list of `SampleSet`s which had predictions made by single-event models
-
-        Returns:
-            `DataFrame` of predicted results for the `Sampleset`(s)
-        """
-        X = [ss.prediction_probas for ss in list_of_ss]
-        results = self.model.predict(X, verbose=verbose)
-
-        col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level)
-        col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1]
-        results_df = pd.DataFrame(
-            data=results,
-            columns=pd.MultiIndex.from_tuples(
-               self.get_model_outputs_as_label_tuples(),
-               names=col_level_subset
-            )
-        )
-        return results_df
-
-
 class LabelProportionEstimator(PyRIIDModel):
     """Regressor for predicting label proportions that uses a semi-supervised loss.
 
@@ -450,19 +232,19 @@ class LabelProportionEstimator(PyRIIDModel):
             sparsemax,
         ),
         "categorical_crossentropy": (
-            tf.keras.losses.CategoricalCrossentropy,
+            CategoricalCrossentropy,
             {
                 "from_logits": True,
                 "reduction": tf.keras.losses.Reduction.NONE,
             },
-            tf.keras.activations.softmax,
+            softmax,
         ),
         "mse": (
-            tf.keras.losses.MeanSquaredError,
+            MeanSquaredError,
             {
                 "reduction": tf.keras.losses.Reduction.NONE,
             },
-            tf.keras.activations.sigmoid,
+            sigmoid,
         )
     }
     INFO_KEYS = (
@@ -495,7 +277,7 @@ class LabelProportionEstimator(PyRIIDModel):
     )
 
     def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_loss="sse",
-                 metrics=("mae", "categorical_crossentropy",), beta=0.9, source_dict=None,
+                 metrics: list = ["mae", "categorical_crossentropy"], beta=0.9, source_dict=None,
                  optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3,
                  hidden_layer_activation: str = "mish",
                  kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0,
@@ -551,7 +333,7 @@ def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_lo
 
         self.optimizer = optimizer
         if isinstance(optimizer, str):
-            self.optimizer = tf.keras.optimizers.get(optimizer)
+            self.optimizer = keras.optimizers.get(optimizer)
         if optimizer_kwargs is not None:
             for key, value in optimizer_kwargs.items():
                 setattr(self.optimizer, key, value)
@@ -603,11 +385,11 @@ def _get_unsup_loss_func(self, loss_func_str):
         return self.UNSUPERVISED_LOSS_FUNCS[loss_func_str]
 
     def _initialize_model(self, input_size, output_size):
-        spectra_input = tf.keras.layers.Input(input_size, name="input_spectrum")
+        spectra_input = Input(input_size, name="input_spectrum")
         spectra_norm = L1NormLayer(name="normalized_input_spectrum")(spectra_input)
         x = spectra_norm
         for layer, nodes in enumerate(self.hidden_layers):
-            x = tf.keras.layers.Dense(
+            x = Dense(
                 nodes,
                 activation=self.hidden_layer_activation,
                 kernel_regularizer=L1L2(
@@ -626,17 +408,14 @@ def _initialize_model(self, input_size, output_size):
             )(x)
 
             if self.dropout > 0:
-                x = tf.keras.layers.Dropout(self.dropout)(x)
-        output = tf.keras.layers.Dense(
+                x = Dropout(self.dropout)(x)
+        output = Dense(
             output_size,
             activation="linear",
             name="output"
         )(x)
 
-        self.model = tf.keras.models.Model(
-            inputs=[spectra_input],
-            outputs=[output]
-        )
+        self.model = Model(inputs=[spectra_input], outputs=[output])
 
     def _get_info_as_dict(self):
         info_dict = {}
@@ -703,10 +482,10 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b
             epochs: maximum number of training iterations
             validation_split: proportion of training data to use as validation data
             callbacks: list of callbacks to be passed to TensorFlow Model.fit() method
-            patience: number of epochs to wait for tf.keras.callbacks.EarlyStopping object
-            es_monitor: quantity to be monitored for tf.keras.callbacks.EarlyStopping object
-            es_mode: mode for tf.keras.callbacks.EarlyStopping object
-            es_verbose: verbosity level for tf.keras.callbacks.EarlyStopping object
+            patience: number of epochs to wait for `EarlyStopping` object
+            es_monitor: quantity to be monitored for `EarlyStopping` object
+            es_mode: mode for `EarlyStopping` object
+            es_verbose: verbosity level for `EarlyStopping` object
             es_min_delta: minimum change to count as an improvement for early stopping
             normalize_sup_loss: whether to normalize the supervised loss term
             normalize_func: normalization function used for supervised loss term
@@ -715,7 +494,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b
             verbose: whether model training output is printed to the terminal
         """
         spectra = ss.get_samples().astype(float)
-        sources_df = ss.sources.groupby(axis=1, level=target_level, sort=False).sum()
+        sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T
         sources = sources_df.values.astype(float)
         self.sources_columns = sources_df.columns
 
@@ -734,7 +513,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b
             if verbose:
                 print("Initializing model...")
             self._initialize_model(
-                ss.n_channels,
+                (ss.n_channels,),
                 sources.shape[1],
             )
         elif verbose:
@@ -771,7 +550,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b
                     )
                 else:
                     semisup_metrics.append(
-                        build_keras_semisupervised_loss_func(
+                        build_keras_semisupervised_metric_func(
                             each,
                             self.activation,
                             sources.shape[1]
@@ -813,7 +592,7 @@ def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: b
             if verbose:
                 print("Finding OOD detection threshold function...")
 
-            train_logits = self.model.predict(spectra)
+            train_logits = self.model.predict(spectra, verbose=0)
             train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32))
             self.spline_recon_errors = reconstruction_error(
                 tf.convert_to_tensor(spectra, dtype=tf.float32),
@@ -878,31 +657,11 @@ def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verb
         ss.info["recon_error"] = recon_errors
 
 
-class L1NormLayer(tf.keras.layers.Layer):
-    """Keras layer applying an L1 norm (dividing by total counts) to input data.
-    """
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def call(self, inputs):
-        """This is where the layer's logic lives.
-
-        Args:
-            inputs: input tensor, or dict/list/tuple of input tensors.
-
-        Returns:
-            A tensor or list/tuple of tensors.
-        """
-        sums = tf.reduce_sum(inputs, axis=-1)
-        l1_norm = inputs / tf.reshape(sums, (-1, 1))
-        return l1_norm
-
-
 def _get_reordered_spectra(old_spectra_df: pd.DataFrame, old_sources_df: pd.DataFrame,
                            new_sources_columns, target_level) -> pd.DataFrame:
     collapsed_sources_df = old_sources_df\
-        .groupby(axis=1, level=target_level)\
-        .sum()
+        .T.groupby(target_level)\
+        .sum().T
     reordered_spectra_df = old_spectra_df.iloc[
         collapsed_sources_df[
             new_sources_columns
diff --git a/riid/models/neural_nets/arad.py b/riid/models/neural_nets/arad.py
index 25f4941f..3679bfe6 100644
--- a/riid/models/neural_nets/arad.py
+++ b/riid/models/neural_nets/arad.py
@@ -4,22 +4,27 @@
 """This module contains implementations of the ARAD deep learning architecture."""
 from typing import List
 
+import keras
 import pandas as pd
 import tensorflow as tf
-from keras.activations import sigmoid, softplus
-from keras.callbacks import EarlyStopping, ReduceLROnPlateau
-from keras.initializers import GlorotNormal, HeNormal
-from keras.layers import (BatchNormalization, Concatenate, Conv1D,
-                          Conv1DTranspose, Dense, Dropout, Flatten, Input,
-                          MaxPool1D, Reshape, UpSampling1D)
-from keras.models import Model
-from keras.regularizers import L1L2, L2
+from keras.api.activations import sigmoid, softplus
+from keras.api.callbacks import EarlyStopping, ReduceLROnPlateau
+from keras.api.initializers import GlorotNormal, HeNormal
+from keras.api.layers import (BatchNormalization, Concatenate, Conv1D,
+                              Conv1DTranspose, Dense, Dropout, Flatten, Input,
+                              MaxPool1D, Reshape, UpSampling1D)
+from keras.api.losses import kl_divergence, log_cosh
+from keras.api.metrics import MeanSquaredError
+from keras.api.models import Model
+from keras.api.optimizers import Adam, Nadam
+from keras.api.regularizers import L1L2, L2
 from scipy.spatial.distance import jensenshannon
 from scipy.stats import entropy
 
 from riid.data.sampleset import SampleSet, SpectraState
-from riid.losses import jensen_shannon_distance, mish
+from riid.losses import mish
 from riid.models import PyRIIDModel
+from riid.models.bayes import ExpandDimsLayer
 
 
 class ARADv1TF(Model):
@@ -31,15 +36,15 @@ class ARADv1TF(Model):
     - Ghawaly Jr, James M. "A Datacentric Algorithm for Gamma-ray Radiation Anomaly Detection
       in Unknown Background Environments." (2020).
     """
-    def __init__(self, latent_dim: int = 5):
+    def __init__(self, latent_dim: int = 5, **kwargs):
         """
         Args:
             latent_dim: dimension of internal latent represention.
                 5 was the final one in the paper, but 4 to 8 were found to work well.
         """
-        super().__init__()
+        super().__init__(**kwargs)
 
-        input_size = (128, 1)
+        input_size = (128,)
         # Encoder
         b1_config = (
             (5, 1, 32),
@@ -63,9 +68,10 @@ def __init__(self, latent_dim: int = 5):
             (1, 1, 2),
         )
         encoder_input = Input(shape=input_size, name="encoder_input")
-        b1 = self._get_branch(encoder_input, b1_config, 0.1, "softplus", "B1", 5)
-        b2 = self._get_branch(encoder_input, b2_config, 0.1, "softplus", "B2", 5)
-        b3 = self._get_branch(encoder_input, b3_config, 0.1, "softplus", "B3", 5)
+        expanded_encoder_input = ExpandDimsLayer()(encoder_input, axis=-1)
+        b1 = self._get_branch(expanded_encoder_input, b1_config, 0.1, "softplus", "B1", 5)
+        b2 = self._get_branch(expanded_encoder_input, b2_config, 0.1, "softplus", "B2", 5)
+        b3 = self._get_branch(expanded_encoder_input, b3_config, 0.1, "softplus", "B3", 5)
         x = Concatenate(axis=1)([b1, b2, b3])
         x = Reshape((15,), name="reshape")(x)
         latent_space = Dense(
@@ -92,27 +98,11 @@ def __init__(self, latent_dim: int = 5):
         decoded_spectrum = decoder(encoded_spectrum)
         autoencoder = Model(encoder_input, decoded_spectrum, name="autoencoder")
 
-        def logcosh_with_kld_penalty(input_spectrum, decoded_spectrum,
-                                     latent_space, sparsities,
-                                     penalty_weight=0.5):
-            squeezed_input = tf.squeeze(input_spectrum)
-            logcosh_loss = tf.keras.losses.log_cosh(squeezed_input, decoded_spectrum)
-            kld_loss = tf.keras.losses.kld(sparsities, latent_space)
-            loss = logcosh_loss + penalty_weight * kld_loss
-            return loss
-
-        sparsities = [0.001] * latent_dim
-        loss_func = logcosh_with_kld_penalty(
-            encoder_input,
-            decoded_spectrum,
-            latent_space,
-            sparsities,
-        )
-        autoencoder.add_loss(loss_func)
-
         self.encoder = encoder
         self.decoder = decoder
         self.autoencoder = autoencoder
+        self.sparsities = [0.001] * latent_dim
+        self.penalty_weight = 0.5
 
     def _get_branch(self, input_layer, config, dropout_rate, activation, branch_name, dense_units):
         x = input_layer
@@ -132,7 +122,15 @@ def _get_branch(self, input_layer, config, dropout_rate, activation, branch_name
         return x
 
     def call(self, x):
-        decoded = self.autoencoder(x)
+        encoded = self.encoder(x)
+        decoded = self.decoder(encoded)
+
+        # Compute loss
+        logcosh_loss = log_cosh(x, decoded)
+        kld_loss = kl_divergence(self.sparsities, encoded)
+        loss = logcosh_loss + self.penalty_weight * kld_loss
+        self.add_loss(loss)
+
         return decoded
 
 
@@ -145,15 +143,15 @@ class ARADv2TF(Model):
     - Ghawaly Jr, James M., et al. "Characterization of the Autoencoder Radiation Anomaly Detection
       (ARAD) model." Engineering Applications of Artificial Intelligence 111 (2022): 104761.
     """
-    def __init__(self, latent_dim: int = 8):
+    def __init__(self, latent_dim: int = 8, **kwargs):
         """
         Args:
             latent_dim: dimension of internal latent represention.
                 5 was the final one in the paper, but 4 to 8 were found to work well.
         """
-        super().__init__()
+        super().__init__(**kwargs)
 
-        input_size = (128, 1)
+        input_size = (128,)
         # Encoder
         config = (
             (7, 1, 8, 2),
@@ -163,7 +161,8 @@ def __init__(self, latent_dim: int = 8):
             (3, 1, 8, 2),
         )
         encoder_input = Input(shape=input_size, name="encoder_input")
-        x = encoder_input
+        expanded_encoder_input = ExpandDimsLayer()(encoder_input, axis=-1)
+        x = expanded_encoder_input
         for i, (kernel_size, strides, filters, max_pool_size) in enumerate(config, start=1):
             conv_name = f"conv{i}"
             x = Conv1D(
@@ -194,7 +193,7 @@ def __init__(self, latent_dim: int = 8):
         encoder = Model(encoder_input, encoder_output, name="encoder")
 
         # Decoder
-        decoder_input = Input(shape=latent_dim, name="decoder_input")
+        decoder_input = Input(shape=(latent_dim,), name="decoder_input")
         x = Dense(units=32, name="D2", activation=mish)(decoder_input)
         x = BatchNormalization(name="D2_batch_norm")(x)
         x = Reshape((4, 8), name="reshape")(x)
@@ -242,7 +241,25 @@ def __init__(self, latent_dim: int = 8):
         self.autoencoder = autoencoder
 
     def call(self, x):
-        decoded = self.autoencoder(x)
+        encoded = self.encoder(x)
+        decoded = self.decoder(encoded)
+
+        # Compute loss
+        p_sum = tf.reduce_sum(x, axis=-1)
+        p_norm = tf.divide(
+            x,
+            tf.reshape(p_sum, (-1, 1))
+        )
+        q_sum = tf.reduce_sum(decoded, axis=-1)
+        q_norm = tf.divide(
+            decoded,
+            tf.reshape(q_sum, (-1, 1))
+        )
+        m = (p_norm + q_norm) / 2
+        js_divergence = (kl_divergence(p_norm, m) + kl_divergence(q_norm, m)) / 2
+        loss = tf.math.sqrt(js_divergence)
+        self.add_loss(loss)
+
         return decoded
 
 
@@ -278,16 +295,12 @@ def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2,
 
         x = ss.get_samples().astype(float)
 
-        optimizer = tf.keras.optimizers.Nadam(
-            learning_rate=1e-4
-        )
+        optimizer = Nadam(learning_rate=1e-4)
 
         if not self.model:
             self.model = ARADv1TF()
-        self.model.compile(
-            loss=None,
-            optimizer=optimizer
-        )
+
+        self.model.compile(optimizer=optimizer)
 
         callbacks = [
             EarlyStopping(
@@ -375,17 +388,12 @@ def fit(self, ss: SampleSet, epochs: int = 300, validation_split=0.2,
 
         x = ss.get_samples().astype(float)
 
-        optimizer = tf.keras.optimizers.Adam(
-            learning_rate=0.01,
-            epsilon=0.05
-        )
+        optimizer = Adam(learning_rate=0.01, epsilon=0.05)
 
         if not self.model:
             self.model = ARADv2TF()
-        self.model.compile(
-            loss=jensen_shannon_distance,
-            optimizer=optimizer
-        )
+
+        self.model.compile(optimizer=optimizer)
 
         callbacks = [
             EarlyStopping(
@@ -448,7 +456,7 @@ class ARADLatentPredictor(PyRIIDModel):
     def __init__(self, hidden_layers: tuple = (8, 4,),
                  hidden_activation: str = "relu", final_activation: str = "linear",
                  loss: str = "mse", optimizer="adam", optimizer_kwargs=None,
-                 learning_rate: float = 1e-3, metrics: tuple = ("mse", ),
+                 learning_rate: float = 1e-3, metrics=None,
                  kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0,
                  bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0,
                  activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0,
@@ -479,12 +487,14 @@ def __init__(self, hidden_layers: tuple = (8, 4,),
         self.loss = loss
         self.optimizer = optimizer
         if isinstance(optimizer, str):
-            self.optimizer = tf.keras.optimizers.get(optimizer)
+            self.optimizer = keras.optimizers.get(optimizer)
         if optimizer_kwargs is not None:
             for key, value in optimizer_kwargs.items():
                 setattr(self.optimizer, key, value)
         self.optimizer.learning_rate = learning_rate
         self.metrics = metrics
+        if self.metrics is None:
+            self.metrics = [MeanSquaredError()]
         self.kernel_l1_regularization = kernel_l1_regularization
         self.kernel_l2_regularization = kernel_l2_regularization
         self.bias_l1_regularization = bias_l1_regularization
@@ -498,15 +508,15 @@ def __init__(self, hidden_layers: tuple = (8, 4,),
     def _initialize_model(self, arad: Model, output_size: int):
         """Build Keras MLP model.
         """
-        encoder = arad.get_layer("encoder")
-        encoder_input = encoder.get_layer(index=0).input
-        encoder_output = encoder.get_layer(index=-1).output
-        encoder_output_shape = encoder_output.shape
+        encoder: Model = arad.get_layer("encoder")
+        encoder_input = encoder.input
+        encoder_output = encoder.output
+        encoder_output_shape = encoder_output.shape[-1]
 
-        predictor_input = Input(shape=encoder_output_shape, name="predictor_input")
+        predictor_input = Input(shape=(encoder_output_shape,), name="inner_predictor_input")
         x = predictor_input
         for layer, nodes in enumerate(self.hidden_layers):
-            x = tf.keras.layers.Dense(
+            x = Dense(
                 nodes,
                 activation=self.hidden_activation,
                 kernel_regularizer=L1L2(
@@ -521,19 +531,19 @@ def _initialize_model(self, arad: Model, output_size: int):
                     l1=self.activity_l1_regularization,
                     l2=self.activity_l2_regularization
                 ),
-                name=f"dense_{layer}"
+                name=f"inner_predictor_dense_{layer}"
             )(x)
             if self.dropout > 0:
-                x = tf.keras.layers.Dropout(self.dropout)(x)
-        predictor_output = tf.keras.layers.Dense(
+                x = Dropout(self.dropout)(x)
+        predictor_output = Dense(
             output_size,
             activation=self.final_activation,
-            name="output"
+            name="inner_predictor_output"
         )(x)
-        predictor = Model(predictor_input, predictor_output, name="predictor")
+        inner_predictor = Model(predictor_input, predictor_output, name="inner_predictor")
 
         encoded_spectrum = encoder(encoder_input)
-        predictions = predictor(encoded_spectrum)
+        predictions = inner_predictor(encoded_spectrum)
         self.model = Model(encoder_input, predictions, name="predictor")
         # Freeze the layers corresponding to the autoencoder
         # Note: setting trainable to False is recursive to sub-layers per TF docs:
diff --git a/tests/anomaly_tests.py b/tests/anomaly_tests.py
index 617384df..35143a1d 100644
--- a/tests/anomaly_tests.py
+++ b/tests/anomaly_tests.py
@@ -19,13 +19,14 @@ def setUp(self):
         pass
 
     def test_event_detector(self):
-        np.random.seed(42)
+        random_state = 42
+        rng = np.random.default_rng(random_state)
 
         SAMPLE_INTERVAL = 0.5
         BG_RATE = 300
         seeds_ss = get_dummy_seeds(100)
         fg_seeds_ss, bg_seeds_ss = seeds_ss.split_fg_and_bg()
-        mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3)\
+        mixed_bg_seeds_ss = SeedMixer(bg_seeds_ss, mixture_size=3, rng=rng)\
             .generate(1)
         events = PassbySynthesizer(events_per_seed=1,
                                    sample_interval=SAMPLE_INTERVAL,
@@ -34,7 +35,7 @@ def test_event_detector(self):
                                    dwell_time_function_args=(20, 20),
                                    snr_function_args=(20, 20),
                                    return_gross=True,
-                                   rng=np.random.default_rng(42))\
+                                   rng=rng)\
             .generate(fg_seeds_ss, mixed_bg_seeds_ss, verbose=False)
 
         _, gross_events = list(zip(*events))
diff --git a/tests/model_tests.py b/tests/model_tests.py
index 39357f3f..ea430abc 100644
--- a/tests/model_tests.py
+++ b/tests/model_tests.py
@@ -15,8 +15,7 @@
 from riid.models import PyRIIDModel
 from riid.models.bayes import (NegativeSpectrumError, PoissonBayesClassifier,
                                ZeroTotalCountsError)
-from riid.models.neural_nets import (LabelProportionEstimator, MLPClassifier,
-                                     MultiEventClassifier)
+from riid.models.neural_nets import (LabelProportionEstimator, MLPClassifier)
 from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv1, ARADv2
 
 
@@ -86,7 +85,7 @@ def test_pb_predict(self):
 
         # Get test samples
         gss = StaticSynthesizer(
-            samples_per_seed=1,
+            samples_per_seed=2,
             live_time_function_args=(4, 4),
             snr_function_args=(10, 10),
             rng=np.random.default_rng(42),
@@ -99,7 +98,7 @@ def test_pb_predict(self):
         # Predict
         pb_model.predict(test_gross_ss, test_bg_ss)
 
-        truth_labels = fg_seeds_ss.get_labels()
+        truth_labels = test_fg_ss.get_labels()
         predicted_labels = test_gross_ss.get_predictions()
         assert (truth_labels == predicted_labels).all()
 
@@ -110,18 +109,6 @@ def test_mlp_fit_save_load_predict(self):
         _test_model_fit_save_load_predict(self, MLPClassifier, self.test_ss, self.train_ss,
                                           epochs=1)
 
-    def test_mec_fit_save_load_predict(self):
-        test_copy_ss = self.test_ss[:]
-        test_copy_ss.prediction_probas = test_copy_ss.sources
-        _test_model_fit_save_load_predict(
-            self,
-            MultiEventClassifier,
-            [test_copy_ss],
-            [self.train_ss],
-            self.train_ss.sources.groupby(axis=1, level="Isotope", sort=False).sum(),
-            epochs=1
-        )
-
     def test_lpe_fit_save_load_predict(self):
         _test_model_fit_save_load_predict(self, LabelProportionEstimator, self.test_ss,
                                           self.fg_seeds_ss, self.train_ss, epochs=1)
diff --git a/tests/seedmixer_tests.py b/tests/seedmixer_tests.py
index e56950e2..e986f891 100644
--- a/tests/seedmixer_tests.py
+++ b/tests/seedmixer_tests.py
@@ -15,8 +15,10 @@
 class TestSeedMixer(unittest.TestCase):
     """Test seed mixing functionality of SampleSet.
     """
-    def setUp(self):
-        np.random.seed(42)
+    @classmethod
+    def setUpClass(self):
+        random_state = 42
+        self.rng = np.random.default_rng(random_state)
 
         self.ss, _ = get_dummy_seeds().split_fg_and_bg()
         self.ss.normalize()
@@ -71,24 +73,24 @@ def test_mixture_pdf(self):
             self.assertAlmostEqual(self.three_mix_seeds_ss.spectra.values[sample, :].sum(), 1.0)
 
     def test_spectrum_construction_3seeds_2mix(self):
-        _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg()
-        mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2).generate(100)
+        _, bg_seeds_ss = get_dummy_seeds(n_channels=16, rng=self.rng).split_fg_and_bg()
+        mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2, rng=self.rng).generate(100)
         spectral_distances = _get_spectral_distances(bg_seeds_ss, mixed_bg_ss)
-        self.assertTrue(all(spectral_distances == 0))
+        self.assertTrue(np.isclose(spectral_distances, 0.0).all())
 
     def test_spectrum_construction_3seeds_3mix(self):
-        _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg()
-        mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=3).generate(100)
+        _, bg_seeds_ss = get_dummy_seeds(n_channels=16, rng=self.rng).split_fg_and_bg()
+        mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=3, rng=self.rng).generate(100)
         spectral_distances = _get_spectral_distances(bg_seeds_ss, mixed_bg_ss)
-        self.assertTrue(all(spectral_distances == 0))
+        self.assertTrue(np.isclose(spectral_distances, 0.0).all())
 
     def test_spectrum_construction_2seeds_2mix(self):
-        _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg(
+        _, bg_seeds_ss = get_dummy_seeds(n_channels=16, rng=self.rng).split_fg_and_bg(
             bg_seed_names=SampleSet.DEFAULT_BG_SEED_NAMES[1:3]
         )
-        mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2).generate(100)
+        mixed_bg_ss = SeedMixer(bg_seeds_ss, mixture_size=2, rng=self.rng).generate(100)
         spectral_distances = _get_spectral_distances(bg_seeds_ss, mixed_bg_ss)
-        self.assertTrue(all(spectral_distances == 0))
+        self.assertTrue(np.isclose(spectral_distances, 0.0).all())
 
     def test_spectrum_construction_2seeds_2mix_error(self):
         _, bg_seeds_ss = get_dummy_seeds(n_channels=16).split_fg_and_bg(
@@ -104,10 +106,11 @@ def _get_spectral_distances(seeds_ss, mixed_ss):
         mixed_ss.sources.values.T
     ).T
     spectral_distances = jensenshannon(
-        recon_spectra.astype(np.float32),
-        mixed_ss.spectra.values.astype(np.float32),
+        recon_spectra,
+        mixed_ss.spectra.values,
         axis=1
     )
+    spectral_distances = np.nan_to_num(spectral_distances, nan=0.0)
     return spectral_distances
 
 
diff --git a/tests/visualize_tests.py b/tests/visualize_tests.py
index e9e83fb3..151e9f48 100644
--- a/tests/visualize_tests.py
+++ b/tests/visualize_tests.py
@@ -23,7 +23,8 @@
 
 class TestVisualize(unittest.TestCase):
     """Testing plot functions in the visualize module."""
-    def setUp(self):
+    @classmethod
+    def setUpClass(self):
         """Test setup."""
         self.fg_seeds_ss, self.bg_seeds_ss = get_dummy_seeds().split_fg_and_bg()
         self.mixed_bg_seed_ss = SeedMixer(self.bg_seeds_ss, mixture_size=3).generate(10)