diff --git a/riid/losses/__init__.py b/riid/losses/__init__.py index b6db44b..ae18ccc 100644 --- a/riid/losses/__init__.py +++ b/riid/losses/__init__.py @@ -209,6 +209,11 @@ def jensen_shannon_divergence(p, q): return jsd +def jensen_shannon_distance(p, q): + divergence = jensen_shannon_divergence(p, q) + return tf.math.sqrt(divergence) + + def chi_squared_diff(spectra, reconstructed_spectra): """Compute the Chi-Squared test. diff --git a/riid/models/__init__.py b/riid/models/__init__.py index e4cd4f9..cb610d6 100644 --- a/riid/models/__init__.py +++ b/riid/models/__init__.py @@ -14,6 +14,7 @@ from riid.data.labeling import label_to_index_element from riid.data.sampleset import SampleSet, SpectraState from riid.metrics import multi_f1, single_f1 +from riid.losses import jensen_shannon_distance class ModelInput(Enum): diff --git a/riid/models/neural_nets.py b/riid/models/neural_nets.py index 404ff7a..6b19a1a 100644 --- a/riid/models/neural_nets.py +++ b/riid/models/neural_nets.py @@ -11,7 +11,7 @@ import pandas as pd import tensorflow as tf import tf2onnx -from keras.callbacks import EarlyStopping +from keras.callbacks import EarlyStopping, ReduceLROnPlateau from keras.layers import Activation, Dense, Dropout from keras.optimizers import Adam from keras.regularizers import L1L2, l1, l2 @@ -20,9 +20,10 @@ from riid.data.sampleset import SampleSet from riid.losses import (build_keras_semisupervised_loss_func, - chi_squared_diff, jensen_shannon_divergence, mish, - normal_nll_diff, poisson_nll_diff, - reconstruction_error, sse_diff, weighted_sse_diff) + chi_squared_diff, jensen_shannon_distance, + jensen_shannon_divergence, mish, normal_nll_diff, + poisson_nll_diff, reconstruction_error, sse_diff, + weighted_sse_diff) from riid.losses.sparsemax import SparsemaxLoss, sparsemax from riid.metrics import (build_keras_semisupervised_metric_func, multi_f1, single_f1) @@ -622,11 +623,6 @@ def _get_unsup_loss_func(self, loss_func_str): def _initialize_model(self, input_size, output_size): spectra_input = tf.keras.layers.Input(input_size, name="input_spectrum") - def _l1_norm(x): - sums = tf.reduce_sum(x, axis=-1) - l1_norm = x / tf.reshape(sums, (-1, 1)) - return l1_norm - spectra_norm = tf.keras.layers.Lambda(_l1_norm, name="normalized_input_spectrum")( spectra_input ) @@ -952,3 +948,208 @@ def load(self, file_path): self.__init__(**model_info) self.onnx_session = onnxruntime.InferenceSession(model_path) + + +class ARAD(TFModelBase): + """ARAD model desinnged by James Ghawaly. Implentation is based off his paper: + https://www.sciencedirect.com/science/article/pii/S0952197622000550.""" + + def __init__(self, latent_dim: int = 8): + """ + Args: + latent_dim: dimension of internal latent represention, 8 was used in paper + """ + super().__init__() + + self.latent_dim = latent_dim + self.model = None + + def _initialize_model(self): + conv_kernels = (7, 5, 3, 3, 3) + conv_strides = (1, 1, 1, 1, 1) + conv_filter_size = 8 + max_pool_size = 2 + hidden_activation = mish + final_activation = tf.keras.activations.sigmoid + hidden_initializer = tf.keras.initializers.HeNormal + final_initializer = tf.keras.initializers.GlorotNormal + conv_padding = "same" + kernel_l2_reg = 1e-3 + kernel_l1_reg = 1e-3 + bias_l2_reg = 1e-3 + + self.model = tf.keras.models.Sequential() + self.model.add(tf.keras.layers.Input(shape=(128, 1,), name="input_spectrum")) + + for i in range(len(conv_kernels)): + self.model.add(tf.keras.layers.Conv1D( + filters=conv_filter_size, + kernel_size=conv_kernels[i], + strides=conv_strides[i], + padding=conv_padding, + activation=hidden_activation, + kernel_regularizer=tf.keras.regularizers.L1L2( + l1=kernel_l1_reg, + l2=kernel_l2_reg + ), + bias_regularizer=tf.keras.regularizers.L2( + l2=bias_l2_reg + ), + kernel_initializer=hidden_initializer, + name=f"encoder_conv1d_{i}" + )) + self.model.add(tf.keras.layers.BatchNormalization()) + self.model.add(tf.keras.layers.MaxPool1D(pool_size=max_pool_size)) + self.model.add(tf.keras.layers.BatchNormalization()) + + self.model.add(tf.keras.layers.Flatten()) + self.model.add(tf.keras.layers.Dense( + self.latent_dim, + activation=hidden_activation, + kernel_regularizer=tf.keras.regularizers.L1L2( + l1=kernel_l1_reg, + l2=kernel_l2_reg + ), + bias_regularizer=tf.keras.regularizers.L2( + l2=bias_l2_reg + ), + kernel_initializer=hidden_initializer, + name="dense_latent" + )) + self.model.add(tf.keras.layers.BatchNormalization()) + # start decoding + self.model.add(tf.keras.layers.Dense( + 32, + activation=hidden_activation, + kernel_regularizer=tf.keras.regularizers.L1L2( + l1=kernel_l1_reg, + l2=kernel_l2_reg + ), + bias_regularizer=tf.keras.regularizers.L2( + l2=bias_l2_reg + ), + kernel_initializer=hidden_initializer, + name="dense_decoder" + )) + self.model.add(tf.keras.layers.BatchNormalization()) + self.model.add(tf.keras.layers.Reshape((4, 8))) + + for i in range(len(conv_kernels) - 1): + self.model.add(tf.keras.layers.UpSampling1D(max_pool_size)) + self.model.add(tf.keras.layers.BatchNormalization()) + self.model.add(tf.keras.layers.Conv1DTranspose( + filters=conv_filter_size, + kernel_size=conv_kernels[-i-1], + strides=conv_strides[-i-1], + padding=conv_padding, + activation=hidden_activation, + kernel_regularizer=tf.keras.regularizers.L1L2( + l1=kernel_l1_reg, + l2=kernel_l2_reg + ), + bias_regularizer=tf.keras.regularizers.L2( + l2=bias_l2_reg + ), + kernel_initializer=hidden_initializer, + name=f"decoder_conv1d_{i}" + )) + self.model.add(tf.keras.layers.BatchNormalization()) + + self.model.add(tf.keras.layers.UpSampling1D(max_pool_size)) + self.model.add(tf.keras.layers.BatchNormalization()) + self.model.add(tf.keras.layers.Conv1DTranspose( + filters=1, + kernel_size=conv_kernels[-1], + strides=conv_strides[-1], + padding=conv_padding, + activation=final_activation, + kernel_initializer=final_initializer + )) + + def fit(self, ss: SampleSet, validation_split: float = 0.2, + epochs: int = 300, es_verbose: int = 0, verbose: bool = False): + """Fit a model to the given `SampleSet`(s). + + Args: + ss: `SampleSet` of `n` spectra where `n` >= 1 + validation_split: percentage of the training data to use as validation data + epochs: maximum number of training iterations + es_verbose: verbosity level for tf.keras.callbacks.EarlyStopping object + verbose: whether model training output is printed to the terminal + + Returns: + `tf.History` object. + """ + norm_ss = ss[:] + norm_ss.downsample_spectra(target_bins=128) + norm_ss.normalize() + spectra = norm_ss.get_samples().astype(float) + + if not self.model: + self._initialize_model() + + optimizer = tf.keras.optimizers.Adam( + learning_rate=0.01, + epsilon=0.05 + ) + self.model.compile( + loss=jensen_shannon_distance, + optimizer=optimizer + ) + + callbacks = [ + EarlyStopping( + monitor="val_loss", + patience=6, + verbose=es_verbose, + restore_best_weights=True, + mode="min", + min_delta=1e-4 + ), + ReduceLROnPlateau( + monitor="val_loss", + factor=0.1, + patience=3, + min_delta=1e-4 + ), + ] + + history = self.model.fit( + # tf.expand_dims(spectra, axis=-1), + # tf.expand_dims(spectra, axis=-1), + spectra, + spectra, + epochs=epochs, + verbose=verbose, + validation_split=validation_split, + callbacks=callbacks, + shuffle=True, + batch_size=32 + ) + self.history = history.history + + return history + + def predict(self, ss: SampleSet, verbose=False): + """Recconstruct the spectra with the autoencoder. + + Args: + ss: `SampleSet` of `n` spectra where `n` >= 1 + + Returns: + reconstructed_spectra: output of autoencoder + """ + norm_ss = ss[:] + norm_ss.downsample_spectra(target_bins=128) + norm_ss.normalize() + spectra = norm_ss.get_samples().astype(float) + + reconstructed_spectra = self.model.predict(spectra, verbose=verbose) + + return reconstructed_spectra + + +def _l1_norm(x): + sums = tf.reduce_sum(x, axis=-1) + l1_norm = x / tf.reshape(sums, (-1, 1)) + return l1_norm