Add nn_loss_cross_entropy (#30)

devfacet · Aug 24, 2024 · 64e041d · 64e041d
1 parent 7398af7
commit 64e041d
Show file tree

Hide file tree

Showing 8 changed files with 230 additions and 0 deletions.
diff --git a/include/nn_constants.h b/include/nn_constants.h
@@ -0,0 +1,9 @@
+#ifndef NN_CONSTANTS_H
+#define NN_CONSTANTS_H
+
+/**
+ * @brief Epsilon value for numerical stability
+ */
+#define NN_EPSILON 1e-7
+
+#endif // NN_CONSTANTS_H
diff --git a/include/nn_loss.h b/include/nn_loss.h
@@ -0,0 +1,19 @@
+#ifndef NN_LOSS_H
+#define NN_LOSS_H
+
+#include "nn_error.h"
+#include "nn_tensor.h"
+#include <stddef.h>
+
+/**
+ * @brief Returns the cross-entropy loss between the predictions and actual tensors.
+ *
+ * @param predictions The predictions (output of the network) tensor.
+ * @param actual The actual (ground truth) tensor (one-hot encoded or categorical).
+ * @param error The error instance to set if an error occurs.
+ *
+ * @return The cross-entropy loss.
+ */
+NNTensorUnit nn_loss_cross_entropy(const NNTensor *predictions, const NNTensor *actual, NNError *error);
+
+#endif // NN_LOSS_H
diff --git a/scripts/test/gen/nn_loss_cross_entropy.py b/scripts/test/gen/nn_loss_cross_entropy.py
@@ -0,0 +1,54 @@
+import numpy as np
+
+NN_EPSILON = 1e-7
+
+# Returns the cross-entropy loss between the predictions and actual.
+def nn_loss_cross_entropy(predictions, actual):
+    batch_size = predictions.shape[0]
+    predictions = np.clip(predictions, NN_EPSILON, 1 - NN_EPSILON)
+
+    if len(actual.shape) == 2:
+        # One-hot encoded
+        correct_confidences = np.sum(predictions * actual, axis=1)
+    else:
+        # Categorical
+        correct_confidences = predictions[np.arange(batch_size), actual.astype(int)]
+
+    loss = -np.mean(np.log(correct_confidences))
+
+    return loss
+
+
+# Generates a test case.
+def generate_test_case(predictions, actual):
+    predictions_c = ", ".join(map(str, predictions.flatten()))
+    actual_c = ", ".join(map(str, actual.flatten()))
+    expected_value = nn_loss_cross_entropy(predictions, actual)
+    actual_size_str = f"{len(actual)}, {len(actual[0])}" if len(actual.shape) > 1 else f"{len(actual)}"
+    return f"""
+    {{
+        .predictions = nn_tensor_init_NNTensor(2, (const size_t[]){{{len(predictions)}, {len(predictions[0])}}}, false, (const NNTensorUnit[]){{{predictions_c}}}, NULL),
+        .actual = nn_tensor_init_NNTensor({len(actual.shape)}, (const size_t[]){{{actual_size_str}}}, false, (const NNTensorUnit[]){{{actual_c}}}, NULL),
+        .expected_value = {expected_value}f,
+        .expected_tolerance = default_expected_tolerance,
+    }}"""
+
+
+# Generate test cases
+np.random.seed(2024)
+test_cases = []
+inputs = [
+    # One-hot encoded
+    (np.array([[0.1, 0.7, 0.2], [0.3, 0.4, 0.3], [0.8, 0.1, 0.1]]), np.array([[0, 1, 0], [1, 0, 0], [0, 1, 0]])),
+    (np.array([[0.2, 0.5, 0.3], [0.4, 0.4, 0.2], [0.7, 0.2, 0.1]]), np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])),
+    (np.array([[0.3, 0.4, 0.3], [0.6, 0.3, 0.1], [0.5, 0.2, 0.3]]), np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]])),
+
+    # Categorical labels
+    (np.array([[0.1, 0.7, 0.2], [0.3, 0.4, 0.3], [0.8, 0.1, 0.1]]), np.array([1, 0, 1])),
+    (np.array([[0.2, 0.5, 0.3], [0.4, 0.4, 0.2], [0.7, 0.2, 0.1]]), np.array([0, 1, 2])),
+    (np.array([[0.3, 0.4, 0.3], [0.6, 0.3, 0.1], [0.5, 0.2, 0.3]]), np.array([2, 1, 0])),
+]
+for predictions, actual in inputs:
+    test_cases.append(generate_test_case(predictions, actual))
+
+print(f"TestCase test_cases[] = {{{', '.join(test_cases)},\n}};")
diff --git a/src/nn_loss.c b/src/nn_loss.c
@@ -0,0 +1,60 @@
+#include "nn_loss.h"
+#include "nn_constants.h"
+#include "nn_debug.h"
+#include "nn_error.h"
+#include "nn_tensor.h"
+#include <math.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+// TODO: Implement macro for fminf and fmaxf
+// TODO: Implement macro for logf
+
+NNTensorUnit nn_loss_cross_entropy(const NNTensor *predictions, const NNTensor *actual, NNError *error) {
+    NN_DEBUG_PRINT(5, "function %s called with predictions.dims=%zu actual.dims=%zu\n", __func__, predictions->dims, actual->dims);
+
+    if (!(predictions->flags & NN_TENSOR_FLAG_INIT) || !(actual->flags & NN_TENSOR_FLAG_INIT)) {
+        nn_error_set(error, NN_ERROR_INVALID_ARGUMENT, "tensor predictions or actual is not initialized");
+        return 0;
+    } else if (predictions->dims != 2 || actual->dims < 1 || actual->dims > 2 || predictions->sizes[0] != actual->sizes[0]) {
+        // Only one-hot encoded or categorical tensors with the same batch size are allowed
+        nn_error_set(error, NN_ERROR_INVALID_ARGUMENT, "only 2-dimensional predictions tensor and 1 or 2-dimensional actual tensor with the same batch size are allowed");
+        return 0;
+    }
+
+    // Determine the batch size, the number of classes and if the actual tensor is one-hot encoded
+    size_t batch_size = predictions->sizes[0];
+    size_t num_classes = predictions->sizes[1];
+    bool one_hot = (actual->dims == 2 && actual->sizes[1] == num_classes);
+
+    // Compute the cross-entropy loss
+    NNTensorUnit loss = 0;
+    if (one_hot) {
+        // Iterate over the batch
+        for (size_t i = 0; i < batch_size; i++) {
+            // Iterate over the classes
+            for (size_t j = 0; j < num_classes; j++) {
+                // Clip the predictions value to avoid log(0)
+                NNTensorUnit predictions_val = fminf(fmaxf(predictions->data[i * num_classes + j], NN_EPSILON), 1 - NN_EPSILON);
+                // If the actual value is greater than 0
+                // if (actual->data[i * num_classes + j] > 0) {
+                //     loss -= logf(predictions_val);
+                // }
+                loss -= actual->data[i * num_classes + j] * logf(predictions_val);
+            }
+        }
+    } else {
+        // Iterate over the batch
+        for (size_t i = 0; i < batch_size; i++) {
+            // Clip the predictions value to avoid log(0)
+            size_t class_idx = (size_t)actual->data[i];
+            NNTensorUnit predictions_val = fminf(fmaxf(predictions->data[i * num_classes + class_idx], NN_EPSILON), 1 - NN_EPSILON);
+            loss -= logf(predictions_val);
+        }
+    }
+
+    // Average the loss
+    loss /= batch_size;
+
+    return loss;
+}
diff --git a/tests/arch/generic/loss/include.txt b/tests/arch/generic/loss/include.txt
@@ -0,0 +1,7 @@
+tests/arch/generic/loss/nn_loss_cross_entropy.c
+src/nn_app.c
+src/nn_config.c
+src/nn_error.c
+src/nn_loss.c
+src/nn_tensor.c
+src/nn_test.c
diff --git a/tests/arch/generic/loss/loss.h b/tests/arch/generic/loss/loss.h
@@ -0,0 +1 @@
+void test_nn_loss_cross_entropy();
diff --git a/tests/arch/generic/loss/main.c b/tests/arch/generic/loss/main.c
@@ -0,0 +1,11 @@
+#include "./loss.h"
+#include "nn_app.h"
+
+int main(int argc, char *argv[]) {
+    nn_init_app(argc, argv);
+    // nn_set_debug_level(5); // for debugging
+
+    test_nn_loss_cross_entropy();
+
+    return 0;
+}
diff --git a/tests/arch/generic/loss/nn_loss_cross_entropy.c b/tests/arch/generic/loss/nn_loss_cross_entropy.c
@@ -0,0 +1,69 @@
+#include "nn_loss.h"
+#include "nn_tensor.h"
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+typedef struct {
+    NNTensor *predictions;
+    NNTensor *actual;
+    NNTensorUnit expected_value;
+    NNTensorUnit expected_tolerance;
+} TestCase;
+
+void test_nn_loss_cross_entropy() {
+    const NNTensorUnit default_expected_tolerance = 0.000001f;
+
+    // See scripts/test/gen/nn_loss_cross_entropy.py
+    TestCase test_cases[] = {
+        {
+            .predictions = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0.1, 0.7, 0.2, 0.3, 0.4, 0.3, 0.8, 0.1, 0.1}, NULL),
+            .actual = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0, 1, 0, 1, 0, 0, 0, 1, 0}, NULL),
+            .expected_value = 1.2877442804195713f,
+            .expected_tolerance = default_expected_tolerance,
+        },
+        {
+            .predictions = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0.2, 0.5, 0.3, 0.4, 0.4, 0.2, 0.7, 0.2, 0.1}, NULL),
+            .actual = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){1, 0, 0, 0, 1, 0, 0, 0, 1}, NULL),
+            .expected_value = 1.6094379124341003f,
+            .expected_tolerance = default_expected_tolerance,
+        },
+        {
+            .predictions = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0.3, 0.4, 0.3, 0.6, 0.3, 0.1, 0.5, 0.2, 0.3}, NULL),
+            .actual = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0, 0, 1, 0, 1, 0, 1, 0, 0}, NULL),
+            .expected_value = 1.0336975964039392f,
+            .expected_tolerance = default_expected_tolerance,
+        },
+        {
+            .predictions = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0.1, 0.7, 0.2, 0.3, 0.4, 0.3, 0.8, 0.1, 0.1}, NULL),
+            .actual = nn_tensor_init_NNTensor(1, (const size_t[]){3}, false, (const NNTensorUnit[]){1, 0, 1}, NULL),
+            .expected_value = 1.2877442804195713f,
+            .expected_tolerance = default_expected_tolerance,
+        },
+        {
+            .predictions = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0.2, 0.5, 0.3, 0.4, 0.4, 0.2, 0.7, 0.2, 0.1}, NULL),
+            .actual = nn_tensor_init_NNTensor(1, (const size_t[]){3}, false, (const NNTensorUnit[]){0, 1, 2}, NULL),
+            .expected_value = 1.6094379124341003f,
+            .expected_tolerance = default_expected_tolerance,
+        },
+        {
+            .predictions = nn_tensor_init_NNTensor(2, (const size_t[]){3, 3}, false, (const NNTensorUnit[]){0.3, 0.4, 0.3, 0.6, 0.3, 0.1, 0.5, 0.2, 0.3}, NULL),
+            .actual = nn_tensor_init_NNTensor(1, (const size_t[]){3}, false, (const NNTensorUnit[]){2, 1, 0}, NULL),
+            .expected_value = 1.0336975964039392f,
+            .expected_tolerance = default_expected_tolerance,
+        },
+    };
+
+    const int n_cases = sizeof(test_cases) / sizeof(test_cases[0]);
+    for (int i = 0; i < n_cases; i++) {
+        TestCase tc = test_cases[i];
+
+        NNError error = {0};
+        NNTensorUnit loss = nn_loss_cross_entropy(tc.predictions, tc.actual, &error);
+        assert(error.code == NN_ERROR_NONE);
+        for (size_t j = 0; j < tc.predictions->sizes[0]; j++) {
+            assert(fabs(loss - tc.expected_value) < tc.expected_tolerance);
+        }
+        printf("passed: %s case=%d\n", __func__, i + 1);
+    }
+}