diff --git a/src/ydata_synthetic/tests/custom_layers/test_activation_interface.py b/src/ydata_synthetic/tests/custom_layers/test_activation_interface.py deleted file mode 100644 index b6bbec63..00000000 --- a/src/ydata_synthetic/tests/custom_layers/test_activation_interface.py +++ /dev/null @@ -1,72 +0,0 @@ -"Activation Interface layer test suite." -from itertools import cycle, islice -from re import search - -from numpy import array, cumsum, isin, split -from numpy import sum as npsum -from numpy.random import normal -from pandas import DataFrame, concat -from pytest import fixture -from tensorflow.keras import Model -from tensorflow.keras.layers import Dense, Input - -from ydata_synthetic.preprocessing.regular.processor import \ - RegularDataProcessor -from ydata_synthetic.utils.gumbel_softmax import ActivationInterface - -BATCH_SIZE = 10 - -@fixture(name='noise_batch') -def fixture_noise_batch(): - "Sample noise for mock output generation." - return normal(size=(BATCH_SIZE, 16)) - -@fixture(name='mock_data') -def fixture_mock_data(): - "Creates mock data for the tests." - num_block = DataFrame(normal(size=(BATCH_SIZE, 6)), columns = [f'num_{i}' for i in range(6)]) - cat_block_1 = DataFrame(array(list(islice(cycle(range(2)), BATCH_SIZE))), columns = ['cat_0']) - cat_block_2 = DataFrame(array(list(islice(cycle(range(4)), BATCH_SIZE))), columns = ['cat_1']) - return concat([num_block, cat_block_1, cat_block_2], axis = 1) - -@fixture(name='mock_processor') -def fixture_mock_processor(mock_data): - "Creates a mock data processor for the mock data." - num_cols = [col for col in mock_data.columns if col.startswith('num')] - cat_cols = [col for col in mock_data.columns if col.startswith('cat')] - return RegularDataProcessor(num_cols, cat_cols).fit(mock_data) - -# pylint: disable=C0103 -@fixture(name='mock_generator') -def fixture_mock_generator(noise_batch, mock_processor): - "A mock generator with the Activation Interface as final layer." - input_ = Input(shape=noise_batch.shape[1], batch_size = BATCH_SIZE) - dim = 15 - data_dim = 12 - x = Dense(dim, activation='relu')(input_) - x = Dense(dim * 2, activation='relu')(x) - x = Dense(dim * 4, activation='relu')(x) - x = Dense(data_dim)(x) - x = ActivationInterface(processor_info=mock_processor.col_transform_info, name='act_itf')(x) - return Model(inputs=input_, outputs=x) - -@fixture(name='mock_output') -def fixture_mock_output(noise_batch, mock_generator): - "Returns mock output of the model as a numpy object." - return mock_generator(noise_batch).numpy() - -# pylint: disable=W0632 -def test_io(mock_processor, mock_output): - "Tests the output format of the activation interface for a known input." - num_lens = len(mock_processor.col_transform_info.numerical.feat_names_out) - cat_lens = len(mock_processor.col_transform_info.categorical.feat_names_out) - assert mock_output.shape == (BATCH_SIZE, num_lens + cat_lens), "The output has wrong shape." - num_part, cat_part = split(mock_output, [num_lens], 1) - assert not isin(num_part, [0, 1]).all(), "The numerical block is not expected to contain 0 or 1." - assert isin(cat_part, [0, 1]).all(), "The categorical block is expected to contain only 0 or 1." - cat_i, cat_o = mock_processor.col_transform_info.categorical - cat_blocks = cumsum([len([col for col in cat_o if col.startswith(feat) and search('_[0-9]*$', col)]) \ - for feat in cat_i]) - cat_blocks = split(cat_part, cat_blocks[:-1], 1) - assert all(npsum(abs(block)) == BATCH_SIZE for block in cat_blocks), "There are non one-hot encoded \ - categorical blocks." diff --git a/src/ydata_synthetic/tests/custom_layers/test_gumbel_softmax.py b/src/ydata_synthetic/tests/custom_layers/test_gumbel_softmax.py deleted file mode 100644 index dd52c71d..00000000 --- a/src/ydata_synthetic/tests/custom_layers/test_gumbel_softmax.py +++ /dev/null @@ -1,54 +0,0 @@ -"Test suite for the Gumbel-Softmax layer implementation." -import tensorflow as tf -from numpy import amax, amin, isclose, ones -from numpy import sum as npsum -from pytest import fixture -from tensorflow.keras import layers - -from ydata_synthetic.utils.gumbel_softmax import GumbelSoftmaxLayer - - -# pylint:disable=W0613 -def custom_initializer(shape_list, dtype): - "A constant weight intializer to ensure test reproducibility." - return tf.constant(ones((5, 5)), dtype=tf.dtypes.float32) - -@fixture(name='rand_input') -def fixture_rand_input(): - "A random, reproducible, input for the mock model." - return tf.constant(tf.random.normal([4, 5], seed=42)) - -def test_hard_sample_output_format(rand_input): - """Tests that the hard output samples are in the expected formats. - The hard sample should be returned as a one-hot tensor.""" - affined = layers.Dense(5, use_bias = False, kernel_initializer=custom_initializer)(rand_input) - hard_sample, _ = GumbelSoftmaxLayer()(affined) - assert npsum(hard_sample) == hard_sample.shape[0], "The sum of the hard samples should equal the number." - assert all(npsum(hard_sample == 0, 1) == hard_sample.shape[1] - 1), "The hard samples is not a one-hot tensor." - -def test_soft_sample_output_format(rand_input): - """Tests that the soft output samples are in the expected formats. - The soft sample should be returned as a probabilities tensor.""" - affined = layers.Dense(5, use_bias = False, kernel_initializer=custom_initializer)(rand_input) - _, soft_sample = GumbelSoftmaxLayer(tau=0.5)(affined) - assert isclose(npsum(soft_sample), soft_sample.shape[0]), "The sum of the soft samples should be close to \ - the number of records." - assert amax(soft_sample) <= 1, "Invalid probability values found." - assert amin(soft_sample) >= 0, "Invalid probability values found." - -def test_gradients(rand_input): - "Performs basic numerical assertions on the gradients of the sof/hard samples." - def mock(i): - return GumbelSoftmaxLayer()(layers.Dense(5, use_bias=False, kernel_initializer=custom_initializer)(i)) - with tf.GradientTape() as hard_tape: - hard_tape.watch(rand_input) - hard_sample, _ = mock(rand_input) - with tf.GradientTape() as soft_tape: - soft_tape.watch(rand_input) - _, soft_sample = mock(rand_input) - hard_grads = hard_tape.gradient(hard_sample, rand_input) - soft_grads = soft_tape.gradient(soft_sample, rand_input) - - assert hard_grads is None, "The hard sample must not compute gradients." - assert soft_grads is not None, "The soft sample is expected to compute gradients." - assert npsum(abs(soft_grads)) != 0, "The soft sample is expected to have non-zero gradients."