Added NewClassesCrossEntropy criterion and automatic criterion plugin (#1514)

AlbinSou · web-flow · commit 8803c85e326d · 2023-10-20T10:37:04.000+02:00
* Added NewClassesCrossEntropy criterion and automatic criterion plugin adding

* change to maskedcrossentropy with 3 modes

* added seen and all options, switch to torch functional kl div

* added stable softmax
diff --git a/avalanche/training/losses.py b/avalanche/training/losses.py
@@ -1,10 +1,13 @@
 import copy
 
+import numpy as np
 import torch
+import torch.nn.functional as F
 from torch import nn
-from avalanche.training.plugins import SupervisedPlugin
 from torch.nn import BCELoss
-import numpy as np
+
+from avalanche.training.plugins import SupervisedPlugin
+from avalanche.training.regularization import cross_entropy_with_oh_targets
 
 
 class ICaRLLossPlugin(SupervisedPlugin):
@@ -161,4 +164,62 @@ def forward(self, features, labels=None, mask=None):
         return loss
 
 
-__all__ = ["ICaRLLossPlugin", "SCRLoss"]
+class MaskedCrossEntropy(SupervisedPlugin):
+    """
+    Masked Cross Entropy
+
+    This criterion can be used for instance in Class Incremental
+    Learning Problems when no examplars are used
+    (i.e LwF in Class Incremental Learning would need to use mask="new").
+    """
+
+    def __init__(self, classes=None, mask="seen", reduction="mean"):
+        """
+        param: classes: Initial value for current classes
+        param: mask: "all" normal cross entropy, uses all the classes seen so far
+                     "old" cross entropy only on the old classes
+                     "new" cross entropy only on the new classes
+        param: reduction: "mean" or "none", average or per-sample loss
+        """
+        super().__init__()
+        assert mask in ["seen", "new", "old", "all"]
+        if classes is not None:
+            self.current_classes = set(classes)
+        else:
+            self.current_classes = set()
+
+        self.old_classes = set()
+        self.reduction = reduction
+        self.mask = mask
+
+    def __call__(self, logits, targets):
+        oh_targets = F.one_hot(targets, num_classes=logits.shape[1])
+
+        oh_targets = oh_targets[:, self.current_mask(logits.shape[1])]
+        logits = logits[:, self.current_mask(logits.shape[1])]
+
+        return cross_entropy_with_oh_targets(
+            logits,
+            oh_targets.float(),
+            reduction=self.reduction,
+        )
+
+    def current_mask(self, logit_shape):
+        if self.mask == "seen":
+            return list(self.current_classes.union(self.old_classes))
+        elif self.mask == "new":
+            return list(self.current_classes)
+        elif self.mask == "old":
+            return list(self.old_classes)
+        elif self.mask == "all":
+            return list(range(int(logit_shape)))
+
+    def adaptation(self, new_classes):
+        self.old_classes = self.old_classes.union(self.current_classes)
+        self.current_classes = set(new_classes)
+
+    def before_training_exp(self, strategy, **kwargs):
+        self.adaptation(strategy.experience.classes_in_this_experience)
+
+
+__all__ = ["ICaRLLossPlugin", "SCRLoss", "MaskedCrossEntropy"]
diff --git a/avalanche/training/regularization.py b/avalanche/training/regularization.py
@@ -9,12 +9,25 @@
 from avalanche.models import MultiTaskModule, avalanche_forward
 
 
-def cross_entropy_with_oh_targets(outputs, targets, eps=1e-5):
+def stable_softmax(x):
+    z = x - torch.max(x, dim=1, keepdim=True)[0]
+    numerator = torch.exp(z)
+    denominator = torch.sum(numerator, dim=1, keepdim=True)
+    softmax = numerator / denominator
+    return softmax
+
+
+def cross_entropy_with_oh_targets(outputs, targets, reduction="mean"):
     """Calculates cross-entropy with temperature scaling,
     targets can also be soft targets but they must sum to 1"""
-    outputs = torch.nn.functional.softmax(outputs, dim=1)
+    outputs = stable_softmax(outputs)
     ce = -(targets * outputs.log()).sum(1)
-    ce = ce.mean()
+    if reduction == "mean":
+        ce = ce.mean()
+    elif reduction == "none":
+        return ce
+    else:
+        raise NotImplementedError("reduction must be mean or none")
     return ce
 
 
diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py
@@ -96,6 +96,9 @@ def __init__(
         self._criterion = criterion
         """ Criterion. """
 
+        if criterion not in self.plugins and isinstance(criterion, BasePlugin):
+            self.plugins.append(criterion)
+
         self.train_epochs: int = train_epochs
         """ Number of training epochs. """
 
diff --git a/tests/training/test_losses.py b/tests/training/test_losses.py
@@ -1,7 +1,9 @@
 import unittest
 
 import torch
-from avalanche.training.losses import ICaRLLossPlugin
+import torch.nn as nn
+
+from avalanche.training.losses import ICaRLLossPlugin, MaskedCrossEntropy
 
 
 class TestICaRLLossPlugin(unittest.TestCase):
@@ -34,5 +36,28 @@ def test_loss(self):
         assert loss3 == loss1
 
 
+class TestMaskedCrossEntropy(unittest.TestCase):
+    def test_loss(self):
+        cross_entropy = nn.CrossEntropyLoss()
+
+        criterion = MaskedCrossEntropy(mask="new")
+        criterion.adaptation([1, 2, 3, 4])
+        criterion.adaptation([5, 6, 7])
+
+        mb_y = torch.tensor([5, 5, 6, 7, 6])
+
+        new_pred = torch.rand(5, 8)
+        new_pred_new = new_pred[:, criterion.current_mask(new_pred.shape[1])]
+
+        loss1 = criterion(new_pred, mb_y)
+        loss2 = cross_entropy(new_pred_new, mb_y - 5)
+
+        criterion.mask = "seen"
+        loss3 = criterion(new_pred, mb_y)
+
+        self.assertAlmostEqual(float(loss1), float(loss2), places=5)
+        self.assertNotAlmostEqual(float(loss1), float(loss3), places=5)
+
+
 if __name__ == "__main__":
     unittest.main()