diff --git a/avalanche/benchmarks/classic/clear.py b/avalanche/benchmarks/classic/clear.py
index ec71941c2..7db8ffdbd 100644
--- a/avalanche/benchmarks/classic/clear.py
+++ b/avalanche/benchmarks/classic/clear.py
@@ -24,8 +24,7 @@
 We support both evaluation protocols for benchmark construction."""
 
 from pathlib import Path
-from typing import List, Sequence, Union, Any, Optional
-from typing_extensions import Literal
+from typing import Sequence, Union, Any, Optional
 
 from avalanche.benchmarks.datasets.clear import (
     _CLEARImage,
@@ -34,9 +33,12 @@
     CLEAR_FEATURE_TYPES,
     _CLEAR_DATA_SPLITS,
 )
-from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
-    create_generic_benchmark_from_paths,
-    create_generic_benchmark_from_tensor_lists,
+from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
+    create_classification_benchmark_from_paths,
+    create_classification_benchmark_from_tensor_lists,
+)
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    CommonClassificationScenarioType,
 )
 
 EVALUATION_PROTOCOLS = ["iid", "streaming"]
@@ -108,7 +110,7 @@ def CLEAR(
         Defaults to None, which means that the default location for
         str(data_name) will be used.
 
-    :returns: a properly initialized :class:`GenericCLScenario` instance.
+    :returns: a properly initialized :class:`ClassificationScenario` instance.
     """
     assert data_name in _CLEAR_DATA_SPLITS
 
@@ -130,6 +132,7 @@ def CLEAR(
     else:
         raise NotImplementedError()
 
+    benchmark_obj: CommonClassificationScenarioType
     if feature_type is None:
         clear_dataset_train = _CLEARImage(
             root=dataset_root,
@@ -153,7 +156,7 @@ def CLEAR(
         test_samples_paths = clear_dataset_test.get_paths_and_targets(
             root_appended=True
         )
-        benchmark_obj = create_generic_benchmark_from_paths(
+        benchmark_obj = create_classification_benchmark_from_paths(
             train_samples_paths,
             test_samples_paths,
             task_labels=list(range(len(train_samples_paths))),
@@ -181,7 +184,7 @@ def CLEAR(
         train_samples = clear_dataset_train.tensors_and_targets
         test_samples = clear_dataset_test.tensors_and_targets
 
-        benchmark_obj = create_generic_benchmark_from_tensor_lists(
+        benchmark_obj = create_classification_benchmark_from_tensor_lists(
             train_samples,
             test_samples,
             task_labels=list(range(len(train_samples))),
diff --git a/avalanche/benchmarks/classic/core50.py b/avalanche/benchmarks/classic/core50.py
index f7e39b5b5..df6280bcf 100644
--- a/avalanche/benchmarks/classic/core50.py
+++ b/avalanche/benchmarks/classic/core50.py
@@ -10,8 +10,8 @@
 ################################################################################
 
 """ This module contains the high-level CORe50 benchmark generator. It
-basically returns a iterable benchmark object ``GenericCLScenario`` given a
-number of configuration parameters."""
+basically returns a iterable benchmark object :class:`ClassificationScenario`
+given a number of configuration parameters."""
 from pathlib import Path
 from typing import Union, Optional, Any
 
@@ -26,10 +26,13 @@
     check_vision_benchmark,
 )
 from avalanche.benchmarks.datasets import default_dataset_location
-from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
-    create_generic_benchmark_from_filelists,
+from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
+    create_classification_benchmark_from_filelists,
 )
 from avalanche.benchmarks.datasets.core50.core50 import CORe50Dataset
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    CommonClassificationScenarioType,
+)
 
 nbatch = {
     "ni": 8,
@@ -109,7 +112,7 @@ def CORe50(
         location for
         'core50' will be used.
 
-    :returns: a properly initialized :class:`GenericCLScenario` instance.
+    :returns: a properly initialized :class:`ClassificationScenario` instance.
     """
 
     assert 0 <= run <= 9, (
@@ -149,15 +152,29 @@ def CORe50(
             / ("train_batch_" + str(batch_id).zfill(2) + "_filelist.txt")
         )
 
-    benchmark_obj = create_generic_benchmark_from_filelists(
-        root_img,
-        train_failists_paths,
-        [root / filelists_bp / "test_filelist.txt"],
-        task_labels=[0 for _ in range(nbatch[scenario])],
-        complete_test_set_only=True,
-        train_transform=train_transform,
-        eval_transform=eval_transform,
-    )
+    benchmark_obj: CommonClassificationScenarioType = \
+        create_classification_benchmark_from_filelists(
+            root_img,
+            train_failists_paths,
+            [root / filelists_bp / "test_filelist.txt"],
+            task_labels=[0 for _ in range(nbatch[scenario])],
+            complete_test_set_only=True,
+            train_transform=train_transform,
+            eval_transform=eval_transform,
+        )
+    
+    if scenario == 'nc':
+        n_classes_per_exp = []
+        classes_order = []
+        for exp in benchmark_obj.train_stream:
+            exp_dataset = exp.dataset
+            unique_targets = list(sorted(
+                set(int(x) for x in exp_dataset.targets)  # type: ignore
+            ))
+            n_classes_per_exp.append(len(unique_targets))
+            classes_order.extend(unique_targets)
+        setattr(benchmark_obj, 'n_classes_per_exp', n_classes_per_exp)
+        setattr(benchmark_obj, 'classes_order', classes_order)
 
     return benchmark_obj
 
diff --git a/avalanche/benchmarks/classic/ctrl.py b/avalanche/benchmarks/classic/ctrl.py
index b896ccf48..50461d71d 100644
--- a/avalanche/benchmarks/classic/ctrl.py
+++ b/avalanche/benchmarks/classic/ctrl.py
@@ -18,9 +18,12 @@
 import torchvision.transforms.functional as F
 from torchvision import transforms
 from tqdm import tqdm
+from avalanche.benchmarks.generators.benchmark_generators import (
+    dataset_classification_benchmark,
+)
 
 from avalanche.benchmarks.utils.classification_dataset import (
-    SupervisedClassificationDataset,
+    ClassificationDataset,
 )
 
 try:
@@ -83,7 +86,7 @@ def CTrL(
         folder = path / "ctrl" / stream_name / f"seed_{seed}"
 
     # Train, val and test experiences
-    exps: List[List[SupervisedClassificationDataset]] = [[], [], []]
+    exps: List[List[ClassificationDataset]] = [[], [], []]
     for t_id, t in enumerate(
         tqdm(stream, desc=f"Loading {stream_name}"),
     ):
@@ -104,7 +107,7 @@ def CTrL(
                 common_root, exp_paths_list = common_paths_root(files)
                 paths_dataset: PathsDataset[Image, int] = \
                     PathsDataset(common_root, exp_paths_list)
-                dataset: SupervisedClassificationDataset = \
+                dataset: ClassificationDataset = \
                     make_classification_dataset(
                         paths_dataset,
                         task_labels=task_labels,
@@ -126,7 +129,7 @@ def CTrL(
             if t_id == n_tasks - 1:
                 break
 
-    return dataset_benchmark(
+    return dataset_classification_benchmark(
         train_datasets=exps[0],
         test_datasets=exps[2],
         other_streams_datasets=dict(val=exps[1]),
diff --git a/avalanche/benchmarks/classic/endless_cl_sim.py b/avalanche/benchmarks/classic/endless_cl_sim.py
index 367b082ee..87b0f1348 100644
--- a/avalanche/benchmarks/classic/endless_cl_sim.py
+++ b/avalanche/benchmarks/classic/endless_cl_sim.py
@@ -12,9 +12,15 @@
 """
 This module contains the high-level EndlessCLSim scenario 
 generator. It returns an iterable scenario object 
-``GenericCLScenario`` given a number of configuration parameters.
+:class:`ClassificationScenario` given a number of configuration parameters.
 """
 
+from avalanche.benchmarks.generators.benchmark_generators import (
+    dataset_classification_benchmark,
+)
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    CommonClassificationScenarioType,
+)
 from avalanche.benchmarks.utils.classification_dataset import (
     make_classification_dataset,
 )
@@ -27,11 +33,7 @@
 from torchvision.transforms import ToTensor
 from torchvision.transforms.transforms import Compose
 
-from avalanche.benchmarks.classic.classic_benchmarks_utils import (
-    check_vision_benchmark,
-)
 from avalanche.benchmarks.datasets import default_dataset_location
-from avalanche.benchmarks.generators import dataset_benchmark
 from avalanche.benchmarks.utils import make_classification_dataset
 
 _default_transform = Compose([ToTensor()])
@@ -146,7 +148,11 @@ def EndlessCLSim(
             )
         )
 
-    scenario_obj = dataset_benchmark(train_datasets, eval_datasets)
+    scenario_obj: CommonClassificationScenarioType = \
+        dataset_classification_benchmark(
+            train_datasets,
+            eval_datasets
+        )
 
     return scenario_obj
 
diff --git a/avalanche/benchmarks/classic/openloris.py b/avalanche/benchmarks/classic/openloris.py
index 38648efa9..4fa8fca56 100644
--- a/avalanche/benchmarks/classic/openloris.py
+++ b/avalanche/benchmarks/classic/openloris.py
@@ -10,8 +10,8 @@
 ################################################################################
 
 """ This module contains the high-level OpenLORIS benchmark/factor generator.
-It basically returns a iterable benchmark object ``GenericCLScenario`` given
-a number of configuration parameters."""
+It basically returns a iterable benchmark object :class:`ClassificationScenario`
+given a number of configuration parameters."""
 
 from pathlib import Path
 from typing import Union, Any, Optional
@@ -23,8 +23,11 @@
 from avalanche.benchmarks.datasets.openloris import (
     OpenLORIS as OpenLORISDataset,
 )
-from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
-    create_generic_benchmark_from_filelists,
+from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
+    create_classification_benchmark_from_filelists,
+)
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    CommonClassificationScenarioType,
 )
 
 
@@ -92,7 +95,7 @@ def OpenLORIS(
         Defaults to None, which means that the default location for
         'openloris' will be used.
 
-    :returns: a properly initialized :class:`GenericCLScenario` instance.
+    :returns: a properly initialized :class:`ClassificationScenario` instance.
     """
 
     assert factor in nbatch.keys(), (
@@ -117,15 +120,16 @@ def OpenLORIS(
             / ("train_batch_" + str(i).zfill(2) + ".txt")
         )
 
-    factor_obj = create_generic_benchmark_from_filelists(
-        dataset_root,
-        train_failists_paths,
-        [dataset_root / filelists_bp / "test.txt"],
-        task_labels=[0 for _ in range(nbatch[factor])],
-        complete_test_set_only=True,
-        train_transform=train_transform,
-        eval_transform=eval_transform,
-    )
+    factor_obj: CommonClassificationScenarioType = \
+        create_classification_benchmark_from_filelists(
+            dataset_root,
+            train_failists_paths,
+            [dataset_root / filelists_bp / "test.txt"],
+            task_labels=[0 for _ in range(nbatch[factor])],
+            complete_test_set_only=True,
+            train_transform=train_transform,
+            eval_transform=eval_transform,
+        )
 
     return factor_obj
 
diff --git a/avalanche/benchmarks/classic/stream51.py b/avalanche/benchmarks/classic/stream51.py
index 970e9b00f..7020530d6 100644
--- a/avalanche/benchmarks/classic/stream51.py
+++ b/avalanche/benchmarks/classic/stream51.py
@@ -9,18 +9,26 @@
 # Website: www.continualai.org                                                 #
 ################################################################################
 from pathlib import Path
-from typing import Any, List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Union
 
 from typing_extensions import Literal
 
+
 from avalanche.benchmarks.datasets import Stream51
+from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
+    create_classification_benchmark_from_paths,
+)
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    CommonClassificationScenarioType,
+)
 from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
-    create_generic_benchmark_from_paths, FileAndLabel
+    FileAndLabel,
 )
 from torchvision import transforms
 import math
 import os
 
+
 _mu = [0.485, 0.456, 0.406]
 _std = [0.229, 0.224, 0.225]
 _default_stream51_transform = transforms.Compose(
@@ -71,7 +79,7 @@ def CLStream51(
     train_transform=_default_stream51_transform,
     eval_transform=_default_stream51_transform,
     dataset_root: Optional[Union[str, Path]] = None
-):
+) -> CommonClassificationScenarioType:
     """
     Creates a CL benchmark for Stream-51.
 
@@ -125,7 +133,7 @@ def CLStream51(
         Defaults to None, which means that the default location for
         'stream51' will be used.
 
-    :returns: A properly initialized :class:`GenericCLScenario` instance.
+    :returns: A properly initialized :class:`ClassificationScenario` instance.
     """
 
     # get train and test sets and order them by benchmark
@@ -283,14 +291,15 @@ def CLStream51(
                 [(j[0], j[1]) for j in i] for i in test_ood_filelists_paths
             ]
 
-    benchmark_obj = create_generic_benchmark_from_paths(
-        train_lists_of_files=train_filelists_paths,
-        test_lists_of_files=test_filelists_paths,
-        task_labels=[0 for _ in range(num_tasks)],
-        complete_test_set_only=scenario == "instance",
-        train_transform=train_transform,
-        eval_transform=eval_transform,
-    )
+    benchmark_obj: CommonClassificationScenarioType = \
+        create_classification_benchmark_from_paths(
+            train_lists_of_files=train_filelists_paths,
+            test_lists_of_files=test_filelists_paths,
+            task_labels=[0 for _ in range(num_tasks)],
+            complete_test_set_only=scenario == "instance",
+            train_transform=train_transform,
+            eval_transform=eval_transform,
+        )
 
     return benchmark_obj
 
diff --git a/avalanche/benchmarks/generators/__init__.py b/avalanche/benchmarks/generators/__init__.py
index 189bb63bb..c74607fdb 100644
--- a/avalanche/benchmarks/generators/__init__.py
+++ b/avalanche/benchmarks/generators/__init__.py
@@ -1,2 +1 @@
-from .scenario_generators import *
 from .benchmark_generators import *
diff --git a/avalanche/benchmarks/generators/benchmark_generators.py b/avalanche/benchmarks/generators/benchmark_generators.py
index a5465d883..99d75fcfd 100644
--- a/avalanche/benchmarks/generators/benchmark_generators.py
+++ b/avalanche/benchmarks/generators/benchmark_generators.py
@@ -32,6 +32,13 @@
 )
 
 import torch
+from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
+    create_classification_benchmark_from_filelists,
+    create_classification_benchmark_from_paths,
+    create_classification_benchmark_from_tensor_lists,
+    create_lazy_classification_benchmark,
+    create_multi_dataset_classification_benchmark,
+)
 from avalanche.benchmarks.scenarios.classification_scenario import \
     ClassificationScenario
 
@@ -58,13 +65,15 @@
 from avalanche.benchmarks.scenarios.new_classes.nc_scenario import NCScenario
 from avalanche.benchmarks.scenarios.new_instances.ni_scenario import NIScenario
 from avalanche.benchmarks.utils.classification_dataset import (
-    SupervisedClassificationDataset,
+    ClassificationDataset,
     SupportedDataset,
-    as_supervised_classification_dataset,
     make_classification_dataset,
     concat_classification_datasets_sequentially
 )
 from avalanche.benchmarks.utils.data import AvalancheDataset
+from avalanche.benchmarks.scenarios.detection_benchmark_creation import (
+    create_multi_dataset_detection_benchmark,
+)
 
 
 TDatasetScenario = TypeVar(
@@ -228,10 +237,10 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
             )
 
         train_dataset_sup = list(
-            map(as_supervised_classification_dataset, train_dataset)
+            map(make_classification_dataset, train_dataset)
         )
         test_dataset_sup = list(
-            map(as_supervised_classification_dataset, test_dataset)
+            map(make_classification_dataset, test_dataset)
         )
         
         seq_train_dataset, seq_test_dataset, mapping = \
@@ -256,21 +265,21 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
             # Overrides n_experiences (and per_experience_classes, already done)
             n_experiences = len(train_dataset)
     else:
-        seq_train_dataset = as_supervised_classification_dataset(train_dataset)
-        seq_test_dataset = as_supervised_classification_dataset(test_dataset)
+        seq_train_dataset = make_classification_dataset(train_dataset)
+        seq_test_dataset = make_classification_dataset(test_dataset)
 
     transform_groups = dict(
         train=(train_transform, None), eval=(eval_transform, None)
     )
 
     # Set transformation groups
-    final_train_dataset = as_supervised_classification_dataset(
+    final_train_dataset = make_classification_dataset(
         seq_train_dataset,
         transform_groups=transform_groups,
         initial_transform_group="train",
     )
 
-    final_test_dataset = as_supervised_classification_dataset(
+    final_test_dataset = make_classification_dataset(
         seq_test_dataset,
         transform_groups=transform_groups,
         initial_transform_group="eval",
@@ -384,10 +393,10 @@ def ni_benchmark(
             )
 
         train_dataset_sup = list(
-            map(as_supervised_classification_dataset, train_dataset)
+            map(make_classification_dataset, train_dataset)
         )
         test_dataset_sup = list(
-            map(as_supervised_classification_dataset, test_dataset)
+            map(make_classification_dataset, test_dataset)
         )
 
         seq_train_dataset, seq_test_dataset, _ = \
@@ -395,8 +404,8 @@ def ni_benchmark(
                 train_dataset_sup, test_dataset_sup
             )
     else:
-        seq_train_dataset = as_supervised_classification_dataset(train_dataset)
-        seq_test_dataset = as_supervised_classification_dataset(test_dataset)
+        seq_train_dataset = make_classification_dataset(train_dataset)
+        seq_test_dataset = make_classification_dataset(test_dataset)
 
     transform_groups = dict(
         train=(train_transform, None), eval=(eval_transform, None)
@@ -439,6 +448,21 @@ def ni_benchmark(
 lazy_benchmark = create_lazy_generic_benchmark
 
 
+# Classification-specific
+dataset_classification_benchmark = \
+    create_multi_dataset_classification_benchmark
+filelist_classification_benchmark = \
+    create_classification_benchmark_from_filelists
+paths_classification_benchmark = create_classification_benchmark_from_paths
+tensors_classification_benchmark = \
+    create_classification_benchmark_from_tensor_lists
+lazy_classification_benchmark = create_lazy_classification_benchmark
+
+# Detection-specific
+dataset_detection_benchmark = \
+    create_multi_dataset_detection_benchmark
+
+
 def _one_dataset_per_exp_class_order(
     class_list_per_exp: Sequence[Sequence[int]],
     shuffle: bool,
@@ -809,9 +833,9 @@ def random_validation_split_strategy(
 
 
 def class_balanced_split_strategy(
-    validation_size: Union[int, float],
-    experience: DatasetExperience[SupervisedClassificationDataset],
-) -> Tuple[SupervisedClassificationDataset, SupervisedClassificationDataset]:
+    validation_size: float,
+    experience: DatasetExperience[ClassificationDataset],
+) -> Tuple[ClassificationDataset, ClassificationDataset]:
     """Class-balanced train/validation splits.
 
     This splitting strategy splits `experience` into two experiences
@@ -831,18 +855,11 @@ def class_balanced_split_strategy(
         datasets.
     """
     if not isinstance(validation_size, float):
-        raise ValueError("validation_size must be an integer")
+        raise ValueError("validation_size must be a float")
     if not 0.0 <= validation_size <= 1.0:
         raise ValueError("validation_size must be a float in [0, 1].")
 
     exp_dataset = experience.dataset
-    if validation_size > len(exp_dataset):
-        raise ValueError(
-            f"Can't create the validation experience: not enough "
-            f"instances. Required {validation_size}, got only"
-            f"{len(exp_dataset)}"
-        )
-
     exp_indices = list(range(len(exp_dataset)))
     targets_as_tensor = torch.as_tensor(experience.dataset.targets)
     exp_classes: List[int] = targets_as_tensor.unique().tolist()
@@ -1132,6 +1149,13 @@ def random_validation_split_strategy_wrapper(exp):
     "filelist_benchmark",
     "paths_benchmark",
     "tensors_benchmark",
+    "lazy_benchmark",
+    "dataset_classification_benchmark",
+    "dataset_detection_benchmark",
+    "filelist_classification_benchmark",
+    "paths_classification_benchmark",
+    "tensors_classification_benchmark",
+    "lazy_classification_benchmark",
     "data_incremental_benchmark",
     "benchmark_with_validation_stream",
     "random_validation_split_strategy",
diff --git a/avalanche/benchmarks/generators/scenario_generators.py b/avalanche/benchmarks/generators/scenario_generators.py
deleted file mode 100644
index b3049595e..000000000
--- a/avalanche/benchmarks/generators/scenario_generators.py
+++ /dev/null
@@ -1,710 +0,0 @@
-################################################################################
-# Copyright (c) 2021 ContinualAI.                                              #
-# Copyrights licensed under the MIT License.                                   #
-# See the accompanying LICENSE file for terms.                                 #
-#                                                                              #
-# Date: 12-05-2020                                                             #
-# Author(s): Vincenzo Lomonaco                                                 #
-# E-mail: contact@continualai.org                                              #
-# Website: avalanche.continualai.org                                           #
-################################################################################
-
-""" This module contains DEPRECATED high-level benchmark generators.
-Please use the ones found in benchmark_generators.
-"""
-
-import warnings
-from pathlib import Path
-from typing import (
-    Sequence,
-    Optional,
-    Dict,
-    SupportsInt,
-    Union,
-    Any,
-    Tuple,
-)
-
-from torch import Tensor
-from avalanche.benchmarks.generators.benchmark_generators import (
-    nc_benchmark,
-    ni_benchmark,
-)
-
-from avalanche.benchmarks.scenarios.classification_scenario import (
-    GenericCLScenario,
-)
-from avalanche.benchmarks.scenarios.generic_scenario_creation import *
-from avalanche.benchmarks.scenarios.new_classes.nc_scenario import NCScenario
-from avalanche.benchmarks.scenarios.new_instances.ni_scenario import NIScenario
-from avalanche.benchmarks.utils.classification_dataset import (
-    SupportedDataset
-)
-
-
-def nc_scenario(
-    train_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
-    test_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
-    n_experiences: int,
-    task_labels: bool,
-    *,
-    shuffle: bool = True,
-    seed: Optional[int] = None,
-    fixed_class_order: Optional[Sequence[int]] = None,
-    per_exp_classes: Optional[Dict[int, int]] = None,
-    class_ids_from_zero_from_first_exp: bool = False,
-    class_ids_from_zero_in_each_exp: bool = False,
-    one_dataset_per_exp: bool = False,
-    reproducibility_data: Optional[Dict[str, Any]] = None
-) -> NCScenario:
-    """
-    This helper function is DEPRECATED in favor of `nc_benchmark`.
-
-    This method is the high-level specific scenario generator for the
-    "New Classes" (NC) case. Given a sequence of train and test datasets creates
-    the continual stream of data as a series of experiences. Each experience
-    will contain all the patterns belonging to a certain set of classes and a
-    class won't be assigned to more than one experience.
-
-    The ``task_labels`` parameter determines if each incremental experience has
-    an increasing task label or if, at the contrary, a default task label 0
-    has to be assigned to all experiences. This can be useful when
-    differentiating between Single-Incremental-Task and Multi-Task scenarios.
-
-    There are other important parameters that can be specified in order to tweak
-    the behaviour of the resulting scenario. Please take a few minutes to read
-    and understand them as they may save you a lot of work.
-
-    This generator features a integrated reproducibility mechanism that allows
-    the user to store and later re-load a scenario. For more info see the
-    ``reproducibility_data`` parameter.
-
-    :param train_dataset: A list of training datasets, or a single dataset.
-    :param test_dataset: A list of test datasets, or a single test dataset.
-    :param n_experiences: The number of incremental experience. This is not used
-        when using multiple train/test datasets with the ``one_dataset_per_exp``
-        parameter set to True.
-    :param task_labels: If True, each experience will have an ascending task
-            label. If False, the task label will be 0 for all the experiences.
-    :param shuffle: If True, the class (or experience) order will be shuffled.
-        Defaults to True.
-    :param seed: If ``shuffle`` is True and seed is not None, the class (or
-        experience) order will be shuffled according to the seed. When None, the
-        current PyTorch random number generator state will be used. Defaults to
-        None.
-    :param fixed_class_order: If not None, the class order to use (overrides
-        the shuffle argument). Very useful for enhancing reproducibility.
-        Defaults to None.
-    :param per_exp_classes: Is not None, a dictionary whose keys are
-        (0-indexed) experience IDs and their values are the number of classes
-        to include in the respective experiences. The dictionary doesn't
-        have to contain a key for each experience! All the remaining experiences
-        will contain an equal amount of the remaining classes. The
-        remaining number of classes must be divisible without remainder
-        by the remaining number of experiences. For instance,
-        if you want to include 50 classes in the first experience
-        while equally distributing remaining classes across remaining
-        experiences, just pass the "{0: 50}" dictionary as the
-        per_experience_classes parameter. Defaults to None.
-    :param class_ids_from_zero_from_first_exp: If True, original class IDs
-        will be remapped so that they will appear as having an ascending
-        order. For instance, if the resulting class order after shuffling
-        (or defined by fixed_class_order) is [23, 34, 11, 7, 6, ...] and
-        class_ids_from_zero_from_first_exp is True, then all the patterns
-        belonging to class 23 will appear as belonging to class "0",
-        class "34" will be mapped to "1", class "11" to "2" and so on.
-        This is very useful when drawing confusion matrices and when dealing
-        with algorithms with dynamic head expansion. Defaults to False.
-        Mutually exclusive with the ``class_ids_from_zero_in_each_exp``
-        parameter.
-    :param class_ids_from_zero_in_each_exp: If True, original class IDs
-        will be mapped to range [0, n_classes_in_exp) for each experience.
-        Defaults to False. Mutually exclusive with the
-        ``class_ids_from_zero_from_first_exp`` parameter.
-    :param one_dataset_per_exp: available only when multiple train-test
-        datasets are provided. If True, each dataset will be treated as a
-        experience. Mutually exclusive with the ``per_experience_classes`` and
-        ``fixed_class_order`` parameters. Overrides the ``n_experiences``
-        parameter. Defaults to False.
-    :param reproducibility_data: If not None, overrides all the other
-        scenario definition options. This is usually a dictionary containing
-        data used to reproduce a specific experiment. One can use the
-        ``get_reproducibility_data`` method to get (and even distribute)
-        the experiment setup so that it can be loaded by passing it as this
-        parameter. In this way one can be sure that the same specific
-        experimental setup is being used (for reproducibility purposes).
-        Beware that, in order to reproduce an experiment, the same train and
-        test datasets must be used. Defaults to None.
-
-    :return: A properly initialized :class:`NCScenario` instance.
-    """
-
-    warnings.warn(
-        "nc_scenario is deprecated in favor of nc_benchmark.",
-        DeprecationWarning,
-    )
-
-    return nc_benchmark(
-        train_dataset=train_dataset,
-        test_dataset=test_dataset,
-        n_experiences=n_experiences,
-        task_labels=task_labels,
-        shuffle=shuffle,
-        seed=seed,
-        fixed_class_order=fixed_class_order,
-        per_exp_classes=per_exp_classes,
-        class_ids_from_zero_from_first_exp=class_ids_from_zero_from_first_exp,
-        class_ids_from_zero_in_each_exp=class_ids_from_zero_in_each_exp,
-        one_dataset_per_exp=one_dataset_per_exp,
-        reproducibility_data=reproducibility_data
-    )
-
-
-def ni_scenario(
-    train_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
-    test_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
-    n_experiences: int,
-    *,
-    task_labels: bool = False,
-    shuffle: bool = True,
-    seed: Optional[int] = None,
-    balance_experiences: bool = False,
-    min_class_patterns_in_exp: int = 0,
-    fixed_exp_assignment: Optional[Sequence[Sequence[int]]] = None,
-    reproducibility_data: Optional[Dict[str, Any]] = None
-) -> NIScenario:
-    """
-    This helper function is DEPRECATED in favor of `ni_benchmark`.
-
-    This method is the high-level specific scenario generator for the
-    "New Instances" (NI) case. Given a sequence of train and test datasets
-    creates the continual stream of data as a series of experiences. Each
-    experience will contain patterns belonging to the same classes.
-
-    The ``task_labels`` parameter determines if each incremental experience has
-    an increasing task label or if, at the contrary, a default task label 0
-    has to be assigned to all experiences. This can be useful when
-    differentiating between Single-Incremental-Task and Multi-Task scenarios.
-
-    There are other important parameters that can be specified in order to tweak
-    the behaviour of the resulting scenario. Please take a few minutes to read
-    and understand them as they may save you a lot of work.
-
-    This generator features an integrated reproducibility mechanism that allows
-    the user to store and later re-load a scenario. For more info see the
-    ``reproducibility_data`` parameter.
-
-    :param train_dataset: A list of training datasets, or a single dataset.
-    :param test_dataset: A list of test datasets, or a single test dataset.
-    :param n_experiences: The number of experiences.
-    :param task_labels: If True, each experience will have an ascending task
-            label. If False, the task label will be 0 for all the experiences.
-    :param shuffle: If True, patterns order will be shuffled.
-    :param seed: A valid int used to initialize the random number generator.
-        Can be None.
-    :param balance_experiences: If True, pattern of each class will be equally
-        spread across all experiences. If False, patterns will be assigned to
-        experiences in a complete random way. Defaults to False.
-    :param min_class_patterns_in_exp: The minimum amount of patterns of
-        every class that must be assigned to every experience. Compatible with
-        the ``balance_experiences`` parameter. An exception will be raised if
-        this constraint can't be satisfied. Defaults to 0.
-    :param fixed_exp_assignment: If not None, the pattern assignment
-        to use. It must be a list with an entry for each experience. Each entry
-        is a list that contains the indexes of patterns belonging to that
-        experience. Overrides the ``shuffle``, ``balance_experiences`` and
-        ``min_class_patterns_in_exp`` parameters.
-    :param reproducibility_data: If not None, overrides all the other
-        scenario definition options, including ``fixed_exp_assignment``.
-        This is usually a dictionary containing data used to
-        reproduce a specific experiment. One can use the
-        ``get_reproducibility_data`` method to get (and even distribute)
-        the experiment setup so that it can be loaded by passing it as this
-        parameter. In this way one can be sure that the same specific
-        experimental setup is being used (for reproducibility purposes).
-        Beware that, in order to reproduce an experiment, the same train and
-        test datasets must be used. Defaults to None.
-
-    :return: A properly initialized :class:`NIScenario` instance.
-    """
-
-    warnings.warn(
-        "ni_scenario is deprecated in favor of ni_benchmark.",
-        DeprecationWarning,
-    )
-
-    return ni_benchmark(
-        train_dataset=train_dataset,
-        test_dataset=test_dataset,
-        n_experiences=n_experiences,
-        task_labels=task_labels,
-        shuffle=shuffle,
-        seed=seed,
-        balance_experiences=balance_experiences,
-        min_class_patterns_in_exp=min_class_patterns_in_exp,
-        fixed_exp_assignment=fixed_exp_assignment,
-        reproducibility_data=reproducibility_data
-    )
-
-
-def dataset_scenario(
-    train_dataset_list: Sequence[SupportedDataset],
-    test_dataset_list: Sequence[SupportedDataset],
-    task_labels: Sequence[int],
-    *,
-    complete_test_set_only: bool = False
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of `dataset_benchmark`.
-
-    Creates a generic scenario given a list of datasets and the respective task
-    labels. Each training dataset will be considered as a separate training
-    experience. Contents of the datasets will not be changed, including the
-    targets.
-
-    When loading the datasets from a set of fixed file lists, consider using
-    the :func:`filelist_scenario` helper method instead. Also, loading from
-    a list of paths is supported through the :func:`paths_scenario` helper.
-
-    In its base form, this function accepts a list of test datasets that must
-    contain the same amount of datasets of the training list.
-    Those pairs are then used to create the "past", "cumulative"
-    (a.k.a. growing) and "future" test sets. However, in certain Continual
-    Learning scenarios only the concept of "complete" test set makes sense. In
-    that case, the ``complete_test_set_only`` parameter should be set to True
-    (see the parameter description for more info).
-
-    Beware that pattern transformations must already be included in the
-    datasets (when needed).
-
-    :param train_dataset_list: A list of training datasets.
-    :param test_dataset_list: A list of test datasets.
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_dataset_list`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that the ``test_dataset_list``
-        parameter must be list with a single element (the complete test set).
-        Defaults to False, which means that ``train_dataset_list`` and
-        ``test_dataset_list`` must contain the same amount of datasets.
-
-    :returns: A properly initialized :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "dataset_scenario is deprecated in favor of " "dataset_benchmark.",
-        DeprecationWarning,
-    )
-
-    return create_multi_dataset_generic_scenario(
-        train_dataset_list=train_dataset_list,
-        test_dataset_list=test_dataset_list,
-        task_labels=task_labels,
-        complete_test_set_only=complete_test_set_only,
-    )
-
-
-def filelist_scenario(
-    root: Union[str, Path],
-    train_file_lists: Sequence[Union[str, Path]],
-    test_file_lists: Union[Union[str, Path], Sequence[Union[str, Path]]],
-    task_labels: Sequence[int],
-    *,
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of `filelist_benchmark`.
-
-    Creates a generic scenario given a list of filelists and the respective task
-    labels. A separate dataset will be created for each filelist and each of
-    those training datasets will be considered a separate training experience.
-
-    In its base form, this function accepts a list of filelists for the test
-    datsets that must contain the same amount of elements of the training list.
-    Those pairs of datasets are then used to create the "past", "cumulative"
-    (a.k.a. growing) and "future" test sets. However, in certain Continual
-    Learning scenarios only the concept of "complete" test set makes sense. In
-    that case, the ``complete_test_set_only`` should be set to True (see the
-    parameter description for more info).
-
-    This helper functions is the best shot when loading Caffe-style dataset
-    based on filelists.
-
-    The resulting benchmark instance and the intermediate datasets used to
-    populate it will be of type CLASSIFICATION.
-
-    :param root: The root path of the dataset.
-    :param train_file_lists: A list of filelists describing the
-        paths of the training patterns for each experience.
-    :param test_file_lists: A list of filelists describing the
-        paths of the test patterns for each experience.
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_file_lists`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that the ``test_file_lists``
-        parameter must be list with a single element (the complete test set).
-        Alternatively, can be a plain string or :class:`Path` object.
-        Defaults to False, which means that ``train_file_lists`` and
-        ``test_file_lists`` must contain the same amount of filelists paths.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A properly initialized :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "filelist_scenario is deprecated in favor of " "filelist_benchmark.",
-        DeprecationWarning,
-    )
-
-    return create_generic_scenario_from_filelists(
-        root=root,
-        train_file_lists=train_file_lists,
-        test_file_lists=test_file_lists,
-        task_labels=task_labels,
-        complete_test_set_only=complete_test_set_only,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-    )
-
-
-FileAndLabel = Tuple[Union[str, Path], int]
-
-
-def paths_scenario(
-    train_list_of_files: Sequence[Sequence[FileAndLabel]],
-    test_list_of_files: Union[
-        Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]]
-    ],
-    task_labels: Sequence[int],
-    *,
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of `paths_benchmark`.
-
-    Creates a generic scenario given a list of files and class labels.
-    A separate dataset will be created for each list and each of
-    those training datasets will be considered a separate training experience.
-
-    This is very similar to `filelist_scenario`, with the main difference being
-    that `filelist_scenario` accepts, for each experience, a file list formatted
-    in Caffe-style. On the contrary, this accepts a list of tuples where each
-    tuple contains two elements: the full path to the pattern and its label.
-    Optionally, the tuple may contain a third element describing the bounding
-    box of the element to crop. This last bounding box may be useful when trying
-    to extract the part of the image depicting the desired element.
-
-    In its base form, this function accepts a list of lists of tuples for the
-    test datsets that must contain the same amount of lists of the training
-    list. Those pairs of datasets are then used to create the "past",
-    "cumulative" (a.k.a. growing) and "future" test sets. However, in certain
-    Continual Learning scenarios only the concept of "complete" test set makes
-    sense. In that case, the ``complete_test_set_only`` should be set to True
-    (see the parameter description for more info).
-
-    The label of each pattern doesn't have to be an int.
-
-    :param train_list_of_files: A list of lists. Each list describes the paths
-        and labels of patterns to include in that training experience as tuples.
-        Each tuple must contain two elements: the full path to the pattern
-        and its class label. Optionally, the tuple may contain a third element
-        describing the bounding box to use for cropping (top, left, height,
-        width).
-    :param test_list_of_files: A list of lists. Each list describes the paths
-        and labels of patterns to include in that test experience as tuples.
-        Each tuple must contain two elements: the full path to the pattern
-        and its class label. Optionally, the tuple may contain a third element
-        describing the bounding box to use for cropping (top, left, height,
-        width).
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_file_lists`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that the ``test_file_lists``
-        parameter must be list with a single element (the complete test set).
-        Alternatively, can be a plain string or :class:`Path` object.
-        Defaults to False, which means that ``train_file_lists`` and
-        ``test_file_lists`` must contain the same amount of filelists paths.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A properly initialized :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "paths_scenario is deprecated in favor of paths_benchmark.",
-        DeprecationWarning,
-    )
-
-    return create_generic_scenario_from_paths(
-        train_list_of_files=train_list_of_files,
-        test_list_of_files=test_list_of_files,
-        task_labels=task_labels,
-        complete_test_set_only=complete_test_set_only,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-    )
-
-
-def tensors_scenario(
-    train_tensors: Sequence[Sequence[Any]],
-    test_tensors: Sequence[Sequence[Any]],
-    task_labels: Sequence[int],
-    *,
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of `tensors_benchmark`.
-
-    Creates a generic scenario given lists of Tensors and the respective task
-    labels. A separate dataset will be created from each Tensor tuple
-    (x, y, ...) and each of those training datasets will be considered a
-    separate training experience. Using this helper function is the lowest-level
-    way to create a Continual Learning scenario. When possible, consider using
-    higher level helpers.
-
-    Experiences are defined by passing lists of tensors as the `train_tensors`
-    and `test_tensors` parameter. Those parameters must be lists containing
-    sub-lists of tensors, one for each experience. Each tensor defines the value
-    of a feature ("x", "y", "z", ...) for all patterns of that experience.
-
-    By default the second tensor of each experience will be used to fill the
-    `targets` value (label of each pattern).
-
-    In its base form, the test lists must contain the same amount of elements of
-    the training lists. Those pairs of datasets are then used to create the
-    "past", "cumulative" (a.k.a. growing) and "future" test sets.
-    However, in certain Continual Learning scenarios only the concept of
-    "complete" test set makes sense. In that case, the
-    ``complete_test_set_only`` should be set to True (see the parameter
-    description for more info).
-
-    :param train_tensors: A list of lists. The first list must contain the
-        tensors for the first training experience (one tensor per feature), the
-        second list must contain the tensors for the second training experience,
-        and so on.
-    :param test_tensors: A list of lists. The first list must contain the
-        tensors for the first test experience (one tensor per feature), the
-        second list must contain the tensors for the second test experience,
-        and so on.
-    :param task_labels: A list of task labels. Must contain a task label for
-        each experience. For Single-Incremental-Task (a.k.a. Task-Free)
-        scenarios, this is usually a list of zeros. For Multi Task scenario,
-        this is usually a list of ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that ``test_tensors`` must
-        define a single experience. Defaults to False, which means that
-        ``train_tensors`` and ``test_tensors`` must define the same
-        amount of experiences.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A properly initialized :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "tensors_scenario is deprecated in favor of " "tensors_benchmark.",
-        DeprecationWarning,
-    )
-
-    return create_generic_scenario_from_tensor_lists(
-        train_tensors=train_tensors,
-        test_tensors=test_tensors,
-        task_labels=task_labels,
-        complete_test_set_only=complete_test_set_only,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-    )
-
-
-def tensor_scenario(
-    train_data_x: Sequence[Any],
-    train_data_y: Sequence[Sequence[SupportsInt]],
-    test_data_x: Union[Any, Sequence[Any]],
-    test_data_y: Union[Any, Sequence[Sequence[SupportsInt]]],
-    task_labels: Sequence[int],
-    *,
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of `tensors_benchmark`.
-
-    Please consider using :func:`tensors_benchmark` instead. When switching to
-    the new function, please keep in mind that the format of the parameters is
-    completely different!
-
-    Creates a generic scenario given lists of Tensors and the respective task
-    labels. A separate dataset will be created from each Tensor pair (x + y)
-    and each of those training datasets will be considered a separate
-    training experience. Contents of the datasets will not be changed, including
-    the targets. Using this helper function is the lower level way to create a
-    Continual Learning scenario. When possible, consider using higher level
-    helpers.
-
-    By default the second tensor of each experience will be used to fill the
-    `targets` value (label of each pattern).
-
-    In its base form, the test lists must contain the same amount of elements of
-    the training lists. Those pairs of datasets are then used to create the
-    "past", "cumulative" (a.k.a. growing) and "future" test sets.
-    However, in certain Continual Learning scenarios only the concept of
-    "complete" test set makes sense. In that case, the
-    ``complete_test_set_only`` should be set to True (see the parameter
-    description for more info).
-
-    :param train_data_x: A list of Tensors (one per experience) containing the
-        patterns of the training sets.
-    :param train_data_y: A list of Tensors or int lists containing the
-        labels of the patterns of the training sets. Must contain the same
-        number of elements of ``train_datasets_x``.
-    :param test_data_x: A Tensor or a list of Tensors (one per experience)
-        containing the patterns of the test sets.
-    :param test_data_y: A Tensor or a list of Tensors or int lists containing
-        the labels of the patterns of the test sets. Must contain the same
-        number of elements of ``test_datasets_x``.
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_datasets_x`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that the ``test_datasets_x`` and
-        ``test_datasets_y`` parameters must be lists with a single element
-        (the complete test set). Defaults to False, which means that
-        ``train_file_lists`` and ``test_file_lists`` must contain the same
-        amount of filelists paths.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A properly initialized :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "tensor_scenario is deprecated in favor "
-        "of tensors_benchmark. When switching"
-        " to the new function, please keep in mind that the format of"
-        " the parameters is completely different!",
-        DeprecationWarning,
-    )
-
-    if isinstance(test_data_x, Tensor):
-        test_data_x = [test_data_x]
-        test_data_y = [test_data_y]
-    else:
-        if len(test_data_x) != len(test_data_y):
-            raise ValueError(
-                "test_data_x and test_data_y must contain"
-                " the same amount of elements"
-            )
-
-    if len(train_data_x) != len(train_data_y):
-        raise ValueError(
-            "train_data_x and train_data_y must contain"
-            " the same amount of elements"
-        )
-
-    exp_train_first_structure = []
-    exp_test_first_structure = []
-    for exp_idx in range(len(train_data_x)):
-        exp_x = train_data_x[exp_idx]
-        exp_y = train_data_y[exp_idx]
-
-        exp_train_first_structure.append([exp_x, exp_y])
-
-    for exp_idx in range(len(test_data_x)):
-        exp_x = test_data_x[exp_idx]
-        exp_y = test_data_y[exp_idx]
-
-        exp_test_first_structure.append([exp_x, exp_y])
-
-    return tensors_scenario(
-        train_tensors=exp_train_first_structure,
-        test_tensors=exp_test_first_structure,
-        task_labels=task_labels,
-        complete_test_set_only=complete_test_set_only,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-    )
-
-
-__all__ = [
-    "nc_scenario",
-    "ni_scenario",
-    "dataset_scenario",
-    "filelist_scenario",
-    "paths_scenario",
-    "tensors_scenario",
-    "tensor_scenario",
-]
diff --git a/avalanche/benchmarks/scenarios/__init__.py b/avalanche/benchmarks/scenarios/__init__.py
index 48befae1f..30816f3c2 100644
--- a/avalanche/benchmarks/scenarios/__init__.py
+++ b/avalanche/benchmarks/scenarios/__init__.py
@@ -1,7 +1,8 @@
 from .generic_scenario import *
 from .dataset_scenario import *
 from .classification_scenario import *
-from .generic_scenario_creation import *
+from .classification_benchmark_creation import *
+from .detection_benchmark_creation import *
 from .new_classes import *
 from .new_instances import *
 from .exmodel_scenario import *
diff --git a/avalanche/benchmarks/scenarios/classification_benchmark_creation.py b/avalanche/benchmarks/scenarios/classification_benchmark_creation.py
new file mode 100644
index 000000000..f585c16c6
--- /dev/null
+++ b/avalanche/benchmarks/scenarios/classification_benchmark_creation.py
@@ -0,0 +1,254 @@
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+)
+from avalanche.benchmarks.scenarios.dataset_scenario import (
+    DatasetScenario,
+    TStreamsUserDict,
+)
+from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
+    _make_classification_scenario,
+    FileAndLabel,
+    DatasetFactory,
+    LazyStreamDefinition,
+    create_generic_benchmark_from_filelists,
+    create_generic_benchmark_from_paths,
+    create_generic_benchmark_from_tensor_lists,
+    create_lazy_generic_benchmark,
+    create_multi_dataset_generic_benchmark,
+)
+
+from avalanche.benchmarks.utils.classification_dataset import (
+    SupportedDataset,
+    make_classification_dataset,
+)
+from avalanche.benchmarks.utils.transform_groups import XTransform, YTransform
+
+
+TDatasetScenario = TypeVar(
+    'TDatasetScenario',
+    bound='DatasetScenario')
+
+
+def create_multi_dataset_classification_benchmark(
+    train_datasets: Sequence[SupportedDataset],
+    test_datasets: Sequence[SupportedDataset],
+    *,
+    other_streams_datasets: Optional[
+        Mapping[str, Sequence[SupportedDataset]]] = None,
+    complete_test_set_only: bool = False,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_classification_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_classification_scenario  # type: ignore
+) -> TDatasetScenario:
+    """
+    Creates a classification benchmark instance given a list of datasets.
+    Each dataset will be considered as a separate experience.
+
+    Contents of the datasets must already be set, including task labels.
+    Transformations will be applied if defined.
+
+    For additional info, please refer to
+    :func:`create_multi_dataset_generic_benchmark`.
+    """
+    return create_multi_dataset_generic_benchmark(
+        train_datasets=train_datasets,
+        test_datasets=test_datasets,
+        other_streams_datasets=other_streams_datasets,
+        complete_test_set_only=complete_test_set_only,
+        train_transform=train_transform,
+        train_target_transform=train_target_transform,
+        eval_transform=eval_transform,
+        eval_target_transform=eval_target_transform,
+        other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
+    )
+
+
+def create_lazy_classification_benchmark(
+    train_generator: LazyStreamDefinition,
+    test_generator: LazyStreamDefinition,
+    *,
+    other_streams_generators: Optional[Dict[str, LazyStreamDefinition]] = None,
+    complete_test_set_only: bool = False,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_classification_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_classification_scenario  # type: ignore
+) -> TDatasetScenario:
+    """
+    Creates a lazily-defined classification benchmark instance given a dataset
+    generator for each stream.
+
+    Generators must return properly initialized instances of
+    :class:`AvalancheDataset` which will be used to create experiences.
+
+    For additional info, please refer to :func:`create_lazy_generic_benchmark`.
+    """
+    return create_lazy_generic_benchmark(
+        train_generator=train_generator,
+        test_generator=test_generator,
+        other_streams_generators=other_streams_generators,
+        complete_test_set_only=complete_test_set_only,
+        train_transform=train_transform,
+        train_target_transform=train_target_transform,
+        eval_transform=eval_transform,
+        eval_target_transform=eval_target_transform,
+        other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
+    )
+
+
+create_classification_benchmark_from_filelists = \
+    create_generic_benchmark_from_filelists
+
+
+def create_classification_benchmark_from_paths(
+    train_lists_of_files: Sequence[Sequence[FileAndLabel]],
+    test_lists_of_files: Sequence[Sequence[FileAndLabel]],
+    *,
+    other_streams_lists_of_files: Optional[Dict[
+        str, Sequence[Sequence[FileAndLabel]]
+    ]] = None,
+    task_labels: Sequence[int],
+    complete_test_set_only: bool = False,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_classification_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_classification_scenario  # type: ignore
+) -> TDatasetScenario:
+    """
+    Creates a classification benchmark instance given a sequence of lists of
+    files. A separate dataset will be created for each list. Each of those
+    datasets will be considered a separate experience.
+
+    This is very similar to
+    :func:`create_classification_benchmark_from_filelists`,
+    with the main difference being that
+    :func:`create_classification_benchmark_from_filelists` accepts, for each
+    experience, a file list formatted in Caffe-style. On the contrary, this
+    accepts a list of tuples where each tuple contains two elements: the full
+    path to the pattern and its label. Optionally, the tuple may contain a third
+    element describing the bounding box of the element to crop. This last
+    bounding box may be useful when trying to extract the part of the image
+    depicting the desired element.
+
+    For additional info, please refer to
+    :func:`create_generic_benchmark_from_paths`.
+    """
+    return create_generic_benchmark_from_paths(
+        train_lists_of_files=train_lists_of_files,
+        test_lists_of_files=test_lists_of_files,
+        other_streams_lists_of_files=other_streams_lists_of_files,
+        task_labels=task_labels,
+        complete_test_set_only=complete_test_set_only,
+        train_transform=train_transform,
+        train_target_transform=train_target_transform,
+        eval_transform=eval_transform,
+        eval_target_transform=eval_target_transform,
+        other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
+    )
+
+
+def create_classification_benchmark_from_tensor_lists(
+    train_tensors: Sequence[Sequence[Any]],
+    test_tensors: Sequence[Sequence[Any]],
+    *,
+    other_streams_tensors: Optional[Dict[str, Sequence[Sequence[Any]]]] = None,
+    task_labels: Sequence[int],
+    complete_test_set_only: bool = False,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_classification_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_classification_scenario  # type: ignore
+) -> TDatasetScenario:
+    """
+    Creates a classification benchmark instance given lists of Tensors. A
+    separate dataset will be created from each Tensor tuple (x, y, z, ...)
+    and each of those training datasets will be considered a separate training
+    experience. Using this helper function is the lowest-level way to create a
+    Continual Learning benchmark. When possible, consider using higher level
+    helpers.
+
+    Experiences are defined by passing lists of tensors as the `train_tensors`,
+    `test_tensors` (and `other_streams_tensors`) parameters. Those parameters
+    must be lists containing lists of tensors, one list for each experience.
+    Each tensor defines the value of a feature ("x", "y", "z", ...) for all
+    patterns of that experience.
+
+    By default the second tensor of each experience will be used to fill the
+    `targets` value (label of each pattern).
+
+    For additional info, please refer to
+    :func:`create_generic_benchmark_from_tensor_lists`.
+    """
+    return create_generic_benchmark_from_tensor_lists(
+        train_tensors=train_tensors,
+        test_tensors=test_tensors,
+        other_streams_tensors=other_streams_tensors,
+        task_labels=task_labels,
+        complete_test_set_only=complete_test_set_only,
+        train_transform=train_transform,
+        train_target_transform=train_target_transform,
+        eval_transform=eval_transform,
+        eval_target_transform=eval_target_transform,
+        other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
+    )
+
+
+__all__ = [
+    'create_multi_dataset_classification_benchmark',
+    'create_lazy_classification_benchmark',
+    'create_classification_benchmark_from_filelists',
+    'create_classification_benchmark_from_paths',
+    'create_classification_benchmark_from_tensor_lists'
+]
diff --git a/avalanche/benchmarks/scenarios/classification_scenario.py b/avalanche/benchmarks/scenarios/classification_scenario.py
index cfebe300a..c169b744f 100644
--- a/avalanche/benchmarks/scenarios/classification_scenario.py
+++ b/avalanche/benchmarks/scenarios/classification_scenario.py
@@ -14,6 +14,8 @@
 
 import warnings
 
+from torch import Tensor
+
 from avalanche.benchmarks.scenarios.generic_scenario import (
     AbstractClassTimelineExperience,
 )
@@ -169,12 +171,12 @@ class ClassificationExperience(
     ]
 ):
     """
-    Definition of a learning experience based on a :class:`GenericCLScenario`
-    instance.
+    Definition of a learning experience based on a
+    :class:`ClassificationScenario` instance.
 
     This experience implementation uses the generic experience-patterns
-    assignment defined in the :class:`GenericCLScenario` instance. Instances of
-    this class are usually obtained from a benchmark stream.
+    assignment defined in the :class:`ClassificationScenario` instance.
+    Instances of this class are usually obtained from a benchmark stream.
     """
 
     def __init__(
@@ -248,7 +250,7 @@ def task_labels(self) -> List[int]:
 class _LazyStreamClassesInClassificationExps(
         Mapping[str, 
                 Sequence[Set[int]]]):
-    def __init__(self, benchmark: GenericCLScenario):
+    def __init__(self, benchmark: ClassificationScenario):
         self._benchmark = benchmark
         self._default_lcie = _LazyClassesInClassificationExps(
             benchmark, stream="train")
@@ -278,7 +280,10 @@ def __iter__(self):
 
 
 class _LazyClassesInClassificationExps(Sequence[Optional[Set[int]]]):
-    def __init__(self, benchmark: GenericCLScenario, stream: str = "train"):
+    def __init__(
+            self,
+            benchmark: ClassificationScenario,
+            stream: str = "train"):
         self._benchmark = benchmark
         self._stream = stream
 
@@ -328,6 +333,17 @@ def _slice_collate(classes_in_exps: Iterable[Optional[Iterable[int]]]) -> \
             result.append(set(x))
 
         return tuple(result)
+    
+
+CommonClassificationItem = Tuple[Tensor, int, int]  # x, y, t
+CommonClassificationDataset = ClassificationDataset[CommonClassificationItem]
+CommonClassificationExperience = ClassificationExperience[
+    CommonClassificationDataset]
+
+CommonClassificationScenarioType = ClassificationScenario[
+        ClassificationStream[CommonClassificationExperience],
+        CommonClassificationExperience,
+        CommonClassificationDataset]
 
 
 __all__ = [
@@ -336,4 +352,5 @@ def _slice_collate(classes_in_exps: Iterable[Optional[Iterable[int]]]) -> \
     "ClassificationStream",
     "ClassificationExperience",
     "GenericClassificationExperience",
+    "CommonClassificationScenarioType"
 ]
diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py
index cfc1c2f56..00b45b4ba 100644
--- a/avalanche/benchmarks/scenarios/dataset_scenario.py
+++ b/avalanche/benchmarks/scenarios/dataset_scenario.py
@@ -294,8 +294,9 @@ def get_reproducibility_data(self) -> Dict[str, Any]:
         parameter in the constructor.
 
         Child classes should create their own reproducibility dictionary.
-        This means that the implementation found in :class:`GenericCLScenario`
-        will return an empty dictionary, which is meaningless.
+        This means that the implementation found in 
+        :class:`ClassificationScenario` will return an empty dictionary,
+        which is meaningless.
 
         In order to obtain the same benchmark instance, the reproducibility
         data must be passed to the constructor along with the exact same
diff --git a/avalanche/benchmarks/scenarios/detection_benchmark_creation.py b/avalanche/benchmarks/scenarios/detection_benchmark_creation.py
new file mode 100644
index 000000000..ca0c2d7ee
--- /dev/null
+++ b/avalanche/benchmarks/scenarios/detection_benchmark_creation.py
@@ -0,0 +1,144 @@
+from typing import (
+    Callable,
+    Dict,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+)
+from avalanche.benchmarks.scenarios.dataset_scenario import (
+    DatasetScenario,
+    TStreamsUserDict,
+)
+from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
+    DatasetFactory,
+    LazyStreamDefinition,
+    create_lazy_generic_benchmark,
+    create_multi_dataset_generic_benchmark,
+)
+
+from avalanche.benchmarks.utils.transform_groups import XTransform, YTransform
+from avalanche.benchmarks.scenarios.detection_scenario import (
+    DetectionExperience,
+    DetectionScenario,
+    DetectionStream,
+)
+from avalanche.benchmarks.utils.detection_dataset import (
+    make_detection_dataset,
+    SupportedDetectionDataset,
+)
+
+
+TDatasetScenario = TypeVar(
+    'TDatasetScenario',
+    bound='DatasetScenario')
+
+
+def _make_detection_scenario(
+    stream_definitions: TStreamsUserDict,
+    complete_test_set_only: bool
+) -> DetectionScenario[
+        DetectionStream[
+            DetectionExperience],
+        DetectionExperience]:
+    return DetectionScenario(
+        stream_definitions=stream_definitions,
+        complete_test_set_only=complete_test_set_only
+    )
+
+
+def create_multi_dataset_detection_benchmark(
+    train_datasets: Sequence[SupportedDetectionDataset],
+    test_datasets: Sequence[SupportedDetectionDataset],
+    *,
+    other_streams_datasets: Optional[
+        Mapping[str, Sequence[SupportedDetectionDataset]]] = None,
+    complete_test_set_only: bool = False,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_detection_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_detection_scenario  # type: ignore
+) -> TDatasetScenario:
+    """
+    Creates a detection benchmark instance given a list of datasets.
+    Each dataset will be considered as a separate experience.
+
+    Contents of the datasets must already be set, including task labels.
+    Transformations will be applied if defined.
+
+    For additional info, please refer to
+    :func:`create_multi_dataset_generic_benchmark`.
+    """
+    return create_multi_dataset_generic_benchmark(
+        train_datasets=train_datasets,
+        test_datasets=test_datasets,
+        other_streams_datasets=other_streams_datasets,
+        complete_test_set_only=complete_test_set_only,
+        train_transform=train_transform,
+        train_target_transform=train_target_transform,
+        eval_transform=eval_transform,
+        eval_target_transform=eval_target_transform,
+        other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
+    )
+
+
+def create_lazy_detection_benchmark(
+    train_generator: LazyStreamDefinition,
+    test_generator: LazyStreamDefinition,
+    *,
+    other_streams_generators: Optional[Dict[str, LazyStreamDefinition]] = None,
+    complete_test_set_only: bool = False,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_detection_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_detection_scenario  # type: ignore
+) -> TDatasetScenario:
+    """
+    Creates a lazily-defined detection benchmark instance given a dataset
+    generator for each stream.
+
+    Generators must return properly initialized instances of
+    :class:`AvalancheDataset` which will be used to create experiences.
+
+    For additional info, please refer to :func:`create_lazy_generic_benchmark`.
+    """
+    return create_lazy_generic_benchmark(
+        train_generator=train_generator,
+        test_generator=test_generator,
+        other_streams_generators=other_streams_generators,
+        complete_test_set_only=complete_test_set_only,
+        train_transform=train_transform,
+        train_target_transform=train_target_transform,
+        eval_transform=eval_transform,
+        eval_target_transform=eval_target_transform,
+        other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
+    )
+
+
+__all__ = [
+    'create_multi_dataset_detection_benchmark',
+    'create_lazy_detection_benchmark'
+]
diff --git a/avalanche/benchmarks/scenarios/generic_benchmark_creation.py b/avalanche/benchmarks/scenarios/generic_benchmark_creation.py
index f29aab947..fcd833a78 100644
--- a/avalanche/benchmarks/scenarios/generic_benchmark_creation.py
+++ b/avalanche/benchmarks/scenarios/generic_benchmark_creation.py
@@ -14,12 +14,15 @@
 them fit your needs, then the helper functions here listed may help.
 """
 
+import itertools
 from pathlib import Path
 from typing import (
+    Callable,
     Generator,
     List,
     Mapping,
     Sequence,
+    TypeVar,
     Union,
     Any,
     Tuple,
@@ -28,35 +31,284 @@
     Iterable,
     NamedTuple,
 )
+from typing_extensions import (
+    Protocol,
+    Literal,
+)
+import warnings
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    ClassificationExperience,
+    ClassificationScenario,
+    ClassificationStream,
+)
+from avalanche.benchmarks.scenarios.dataset_scenario import (
+    DatasetScenario,
+    DatasetStream,
+    FactoryBasedStream,
+    TStreamsUserDict,
+)
+from avalanche.benchmarks.scenarios.generic_scenario import DatasetExperience
 
 from avalanche.benchmarks.utils import (
-    make_tensor_classification_dataset,
-    SupportedDataset,
-    make_classification_dataset,
     FilelistDataset,
     PathsDataset,
     common_paths_root,
 )
+from torch.utils.data.dataset import Subset, ConcatDataset
 from avalanche.benchmarks.utils.classification_dataset import (
     ClassificationDataset,
+    make_classification_dataset,
+)
+from avalanche.benchmarks.utils.data import AvalancheDataset
+from avalanche.benchmarks.utils.transform_groups import (
+    TransformGroupDef,
+    XTransform,
+    YTransform,
+)
+from avalanche.benchmarks.utils.utils import (
+    _is_int_iterable,
+    make_generic_dataset,
+    make_generic_tensor_dataset,
+)
+from avalanche.benchmarks.utils.dataset_definitions import (
+    IDatasetWithTargets, 
+    ITensorDataset,
 )
-from .classification_scenario import GenericCLScenario
+
+
+TDatasetScenario = TypeVar(
+    'TDatasetScenario',
+    bound='DatasetScenario')
+
+TTargetType = TypeVar(
+    'TTargetType',
+    contravariant=True)
+TSupportedDataset = TypeVar(
+    'TSupportedDataset',
+    contravariant=True)
+TAvalancheDataset = TypeVar(
+    'TAvalancheDataset',
+    bound='AvalancheDataset',
+    covariant=True)
+
+
+GenericSupportedDataset = Union[
+    IDatasetWithTargets,
+    ITensorDataset,
+    Subset,
+    ConcatDataset,
+    AvalancheDataset
+]
+
+
+class DatasetFactory(
+        Protocol[
+            TSupportedDataset,
+            TTargetType,
+            TAvalancheDataset]):
+    def __call__(
+        self,
+        dataset: TSupportedDataset,
+        *,
+        transform: Optional[XTransform] = None,
+        target_transform: Optional[YTransform] = None,
+        transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
+        initial_transform_group: Optional[str] = None,
+        task_labels: Optional[Union[int, Sequence[int]]] = None,
+        targets: Optional[Sequence[TTargetType]] = None,
+        collate_fn: Optional[Callable[[List], Any]] = None
+    ) -> TAvalancheDataset:
+        ...
+
+
+class TensorDatasetFactory(
+        Protocol[
+            TAvalancheDataset]):
+    def __call__(
+        self,
+        dataset_tensors: Sequence,
+        *,
+        task_labels: Optional[Union[int, Sequence[int]]] = None,
+    ) -> TAvalancheDataset:
+        ...
+
+
+def _make_plain_experience(
+    stream: DatasetStream[DatasetExperience[TAvalancheDataset]],
+    experience_idx: int
+) -> DatasetExperience[TAvalancheDataset]:
+    dataset = stream.benchmark.stream_definitions[
+        stream.name
+    ].exps_data[experience_idx]
+
+    return DatasetExperience(
+        current_experience=experience_idx,
+        origin_stream=stream,
+        benchmark=stream.benchmark,
+        dataset=dataset
+    )
+
+
+def _make_generic_scenario(
+        stream_definitions: TStreamsUserDict,
+        complete_test_set_only: bool):
+    return DatasetScenario(
+        stream_definitions=stream_definitions,
+        complete_test_set_only=complete_test_set_only,
+        stream_factory=FactoryBasedStream,
+        experience_factory=_make_plain_experience
+    )
+
+
+def _make_classification_scenario(
+    stream_definitions: TStreamsUserDict,
+    complete_test_set_only: bool
+) -> ClassificationScenario[
+        ClassificationStream[
+            ClassificationExperience[
+                ClassificationDataset]],
+        ClassificationExperience[
+            ClassificationDataset],
+        ClassificationDataset]:
+    return ClassificationScenario(
+        stream_definitions=stream_definitions,
+        complete_test_set_only=complete_test_set_only
+    )
+
+
+def _detect_legacy_classification_usage(
+    all_datasets: Iterable[Any]
+) -> bool:
+    """
+    Used by :func:`create_multi_dataset_generic_benchmark` to check
+    if the user is trying to create a classification benchmark.
+
+    While using :func:`create_multi_dataset_generic_benchmark` to create a
+    classification benchmark is acceptable, it would be better to use
+    :func:`create_multi_dataset_classification_benchmark`, which returns
+    a :class:`ClassificationScenario`
+    
+    Fields defined in :class:`ClassificationScenario` are not to be found
+    in the generic :class:`DatasetScenario` instance returned by
+    func:`create_multi_dataset_generic_benchmark` and may be needed
+    by some continual learning strategies.
+
+    This function works by checking if input datasets contain all
+    int (including NumPy/PyTorch int types) targets.
+    """
+
+    for dataset in all_datasets:
+        try:
+            as_classification_dataset = make_classification_dataset(
+                dataset
+            )
+            if not _is_int_iterable(as_classification_dataset.targets):
+                return False
+        except Exception:
+            return False
+        
+    return True
+
+
+def _manage_legacy_classification_usage(
+    train_datasets: Sequence[GenericSupportedDataset],
+    test_datasets: Sequence[GenericSupportedDataset],
+    other_streams_datasets: Optional[
+        Mapping[str, Sequence[GenericSupportedDataset]]],
+    dataset_factory: Union[
+        DatasetFactory,
+        Literal['check_if_classification']
+    ],
+    benchmark_factory: Union[Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ], Literal['check_if_classification']]) -> Tuple[
+        DatasetFactory, 
+        Callable[[
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario]]:
+
+    check_implicit_classification = \
+        dataset_factory == 'check_if_classification' or \
+        benchmark_factory == 'check_if_classification'
+    
+    is_implicit_classification = False
+    if check_implicit_classification:
+        all_datasets_iterables = [
+            train_datasets,
+            test_datasets,
+        ]
+
+        if other_streams_datasets is not None:
+            all_datasets_iterables.extend(other_streams_datasets.values())
+
+        is_implicit_classification = _detect_legacy_classification_usage(
+            itertools.chain(*all_datasets_iterables)
+        )
+
+    if is_implicit_classification:
+        warnings.warn(
+            '`dataset_benchmark` is being called by passing classification '
+            'datasets. It is recommended to switch to '
+            '`dataset_classification_benchmark` to make sure a '
+            '`ClassificationScenario` is returned',
+            DeprecationWarning
+        )
+    
+    dataset_factory_compat: DatasetFactory
+    if dataset_factory == 'check_if_classification':
+        if is_implicit_classification:
+            dataset_factory_compat = make_classification_dataset
+        else:
+            dataset_factory_compat = make_generic_dataset
+    else:
+        dataset_factory_compat = dataset_factory
+    
+    benchmark_factory_compat: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ]
+    if benchmark_factory == 'check_if_classification':       
+        if is_implicit_classification:
+            benchmark_factory_compat = \
+                _make_classification_scenario  # type: ignore
+        else:
+            benchmark_factory_compat = _make_generic_scenario
+    else:
+        benchmark_factory_compat = benchmark_factory
+
+    return dataset_factory_compat, benchmark_factory_compat
 
 
 def create_multi_dataset_generic_benchmark(
-    train_datasets: Sequence[SupportedDataset],
-    test_datasets: Sequence[SupportedDataset],
+    train_datasets: Sequence[GenericSupportedDataset],
+    test_datasets: Sequence[GenericSupportedDataset],
     *,
     other_streams_datasets: Optional[
-        Mapping[str, Sequence[SupportedDataset]]] = None,
+        Mapping[str, Sequence[GenericSupportedDataset]]] = None,
     complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
     other_streams_transforms: Optional[
-        Mapping[str, Tuple[Any, Any]]] = None
-) -> GenericCLScenario:
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: Union[
+        DatasetFactory,
+        Literal['check_if_classification']
+    ] = 'check_if_classification',
+    benchmark_factory: Union[Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ], Literal['check_if_classification']] = 'check_if_classification'
+) -> TDatasetScenario:
     """
     Creates a benchmark instance given a list of datasets. Each dataset will be
     considered as a separate experience.
@@ -107,10 +359,28 @@ def create_multi_dataset_generic_benchmark(
         transformations will override the `train_transform`,
         `train_target_transform`, `eval_transform` and
         `eval_target_transform` parameters.
-
-    :returns: A :class:`GenericCLScenario` instance.
+    :param dataset_factory: The factory for the dataset. Should return
+        an :class:`AvalancheDataset` (or any subclass) given the input
+        dataset, the transform groups definition and the name of the
+        initial group (equal to the name of the stream). Defaults
+        to :func:`make_generic_dataset`.
+    :param benchmark_factory: The factory for the benchmark.
+        Should return the benchmark instance given the stream definitions
+        and a flag stating if the test stream contains a single dataset.
+        By default, returns a :class:`DatasetScenario`.
+
+    :returns: A benchmark instance.
     """
 
+    dataset_factory_compat, benchmark_factory_compat = \
+        _manage_legacy_classification_usage(
+            train_datasets=train_datasets,
+            test_datasets=test_datasets,
+            other_streams_datasets=other_streams_datasets,
+            dataset_factory=dataset_factory,
+            benchmark_factory=benchmark_factory
+        )
+
     transform_groups = dict(
         train=(train_transform, train_target_transform),
         eval=(eval_transform, eval_target_transform),
@@ -121,9 +391,20 @@ def create_multi_dataset_generic_benchmark(
             if isinstance(stream_transforms, Sequence):
                 if len(stream_transforms) == 1:
                     # Suppose we got only the transformation for X values
-                    stream_transforms = (stream_transforms[0], None)
+                    warnings.warn(
+                        'Transformations for other streams should be passed '
+                        'as a 2 elements tuple `(Xtransform, YTransform)`. '
+                        'You can pass None for the Y transformation.'
+                    )
+                    stream_transforms = (
+                        stream_transforms[0],  # type: ignore
+                        None)
             else:
                 # Suppose it's the transformation for X values
+                warnings.warn(
+                    'Transformations for other streams should be passed '
+                    'as a 2 elements tuple (Xtransform, YTransform).'
+                )
                 stream_transforms = (stream_transforms, None)
 
             transform_groups[stream_name] = stream_transforms
@@ -140,7 +421,7 @@ def create_multi_dataset_generic_benchmark(
                 "complete_test_set_only is True"
             )
 
-    stream_definitions: Dict[str, Tuple[Iterable[ClassificationDataset]]] = \
+    stream_definitions: Dict[str, Tuple[Iterable[AvalancheDataset]]] = \
         dict()
 
     for stream_name, dataset_list in input_streams.items():
@@ -151,22 +432,27 @@ def create_multi_dataset_generic_benchmark(
         stream_datasets = []
         for dataset_idx in range(len(dataset_list)):
             dataset = dataset_list[dataset_idx]
+
             stream_datasets.append(
-                make_classification_dataset(
-                    dataset,
+                dataset_factory_compat(
+                    dataset=dataset,
                     transform_groups=transform_groups,
-                    initial_transform_group=initial_transform_group,
+                    initial_transform_group=initial_transform_group
                 )
             )
         stream_definitions[stream_name] = (stream_datasets,)
 
-    return GenericCLScenario(
-        stream_definitions=stream_definitions,
-        complete_test_set_only=complete_test_set_only,
+    return benchmark_factory_compat(
+        stream_definitions,
+        complete_test_set_only,
     )
 
 
-def _adapt_lazy_stream(generator, transform_groups, initial_transform_group):
+def _adapt_lazy_stream(
+        generator,
+        transform_groups,
+        initial_transform_group,
+        dataset_factory):
     """
     A simple internal utility to apply transforms and dataset type to all lazily
     generated datasets. Used in the :func:`create_lazy_generic_benchmark`
@@ -177,7 +463,7 @@ def _adapt_lazy_stream(generator, transform_groups, initial_transform_group):
     """
 
     for dataset in generator:
-        dataset = make_classification_dataset(
+        dataset = dataset_factory(
             dataset,
             transform_groups=transform_groups,
             initial_transform_group=initial_transform_group,
@@ -203,7 +489,7 @@ class LazyStreamDefinition(NamedTuple):
       can be used.
     """
 
-    exps_generator: Iterable[ClassificationDataset]
+    exps_generator: Iterable[AvalancheDataset]
     """
     The experiences generator. Can be a "yield"-based generator, a custom
     sequence, a standard list or any kind of iterable returning
@@ -232,12 +518,20 @@ def create_lazy_generic_benchmark(
     *,
     other_streams_generators: Optional[Dict[str, LazyStreamDefinition]] = None,
     complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
-    other_streams_transforms: Optional[Dict[str, Tuple[Any, Any]]] = None
-) -> GenericCLScenario:
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_generic_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_generic_scenario
+) -> TDatasetScenario:
     """
     Creates a lazily-defined benchmark instance given a dataset generator for
     each stream.
@@ -296,8 +590,17 @@ def create_lazy_generic_benchmark(
         transformations for "train" or "test" streams then those transformations
         will override the `train_transform`, `train_target_transform`,
         `eval_transform` and `eval_target_transform` parameters.
-
-    :returns: A lazily-initialized :class:`GenericCLScenario` instance.
+    :param dataset_factory: The factory for the dataset. Should return
+        an :class:`AvalancheDataset` (or any subclass) given the input
+        dataset, the transform groups definition and the name of the
+        initial group (equal to the name of the stream). Defaults
+        to :func:`make_generic_dataset`.
+    :param benchmark_factory: The factory for the benchmark.
+        Should return the benchmark instance given the stream definitions
+        and a flag stating if the test stream contains a single dataset.
+        By default, returns a :class:`DatasetScenario`.
+    
+    :returns: A lazily-initialized benchmark instance.
     """
 
     transform_groups = dict(
@@ -310,9 +613,20 @@ def create_lazy_generic_benchmark(
             if isinstance(stream_transforms, Sequence):
                 if len(stream_transforms) == 1:
                     # Suppose we got only the transformation for X values
-                    stream_transforms = (stream_transforms[0], None)
+                    warnings.warn(
+                        'Transformations for other streams should be passed '
+                        'as a 2 elements tuple `(Xtransform, YTransform)`. '
+                        'You can pass None for the Y transformation.'
+                    )
+                    stream_transforms = (
+                        stream_transforms[0],  # type: ignore
+                        None)
             else:
                 # Suppose it's the transformation for X values
+                warnings.warn(
+                    'Transformations for other streams should be passed '
+                    'as a 2 elements tuple (Xtransform, YTransform).'
+                )
                 stream_transforms = (stream_transforms, None)
 
             transform_groups[stream_name] = stream_transforms
@@ -332,7 +646,7 @@ def create_lazy_generic_benchmark(
     stream_definitions: Dict[
         str, Tuple[
             # Dataset generator + stream length
-            Tuple[Generator[ClassificationDataset, None, None], int],
+            Tuple[Generator[AvalancheDataset, None, None], int],
             # Task label(s) for each experience
             Iterable[Union[int, Iterable[int]]]
             ]
@@ -351,6 +665,7 @@ def create_lazy_generic_benchmark(
             generator,
             transform_groups,
             initial_transform_group=initial_transform_group,
+            dataset_factory=dataset_factory
         )
 
         stream_definitions[stream_name] = (
@@ -358,9 +673,9 @@ def create_lazy_generic_benchmark(
             task_labels,
         )
 
-    return GenericCLScenario(
-        stream_definitions=stream_definitions,
-        complete_test_set_only=complete_test_set_only,
+    return benchmark_factory(
+        stream_definitions,
+        complete_test_set_only
     )
 
 
@@ -373,13 +688,20 @@ def create_generic_benchmark_from_filelists(
         Dict[str, Sequence[Union[str, Path]]]] = None,
     task_labels: Sequence[int],
     complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
     other_streams_transforms: Optional[
-        Dict[str, Tuple[Any, Any]]] = None
-) -> GenericCLScenario:
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: DatasetFactory = make_classification_dataset,
+    benchmark_factory: Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ] = _make_classification_scenario  # type: ignore
+) -> TDatasetScenario:
     """
     Creates a benchmark instance given a list of filelists and the respective
     task labels. A separate dataset will be created for each filelist and each
@@ -391,8 +713,7 @@ def create_generic_benchmark_from_filelists(
     Beware that this helper function is limited is the following two aspects:
 
     - The resulting benchmark instance and the intermediate datasets used to
-      populate it will be of type CLASSIFICATION. There is no way to change
-      this.
+      populate it will be of type CLASSIFICATION.
     - Task labels can only be defined by choosing a single task label for
       each experience (the same task label is applied to all patterns of
       experiences sharing the same position in different streams).
@@ -452,8 +773,17 @@ def create_generic_benchmark_from_filelists(
         transformations for "train" or "test" streams then those transformations
         will override the `train_transform`, `train_target_transform`,
         `eval_transform` and `eval_target_transform` parameters.
-
-    :returns: A :class:`GenericCLScenario` instance.
+    :param dataset_factory: The factory for the dataset. Should return
+        an :class:`AvalancheDataset` (or any subclass) given the input
+        dataset, the transform groups definition and the name of the
+        initial group (equal to the name of the stream). Defaults
+        to :func:`make_classification_dataset`.
+    :param benchmark_factory: The factory for the benchmark.
+        Should return the benchmark instance given the stream definitions
+        and a flag stating if the test stream contains a single dataset.
+        By default, returns a :class:`ClassificationScenario`.
+
+    :returns: A benchmark instance.
     """
 
     input_streams = dict(train=train_file_lists, test=test_file_lists)
@@ -461,15 +791,15 @@ def create_generic_benchmark_from_filelists(
     if other_streams_file_lists is not None:
         input_streams = {**input_streams, **other_streams_file_lists}
 
-    stream_definitions: Dict[str, Sequence[ClassificationDataset]] = dict()
+    stream_definitions: Dict[str, Sequence[AvalancheDataset]] = dict()
 
     for stream_name, file_lists in input_streams.items():
-        stream_datasets: List[ClassificationDataset] = []
+        stream_datasets: List[AvalancheDataset] = []
         for exp_id, f_list in enumerate(file_lists):
 
             f_list_dataset = FilelistDataset(root, f_list)
             stream_datasets.append(
-                make_classification_dataset(
+                dataset_factory(
                     f_list_dataset, task_labels=task_labels[exp_id]
                 )
             )
@@ -486,6 +816,8 @@ def create_generic_benchmark_from_filelists(
         eval_target_transform=eval_target_transform,
         complete_test_set_only=complete_test_set_only,
         other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
     )
 
 
@@ -503,12 +835,23 @@ def create_generic_benchmark_from_paths(
     ]] = None,
     task_labels: Sequence[int],
     complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
-    other_streams_transforms: Optional[Dict[str, Tuple[Any, Any]]] = None
-) -> GenericCLScenario:
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: Union[
+        DatasetFactory,
+        Literal['check_if_classification']
+    ] = 'check_if_classification',
+    benchmark_factory: Union[Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ], Literal['check_if_classification']] = 'check_if_classification'
+) -> TDatasetScenario:
     """
     Creates a benchmark instance given a sequence of lists of files. A separate
     dataset will be created for each list. Each of those datasets
@@ -579,8 +922,17 @@ def create_generic_benchmark_from_paths(
         transformations for "train" or "test" streams then those transformations
         will override the `train_transform`, `train_target_transform`,
         `eval_transform` and `eval_target_transform` parameters.
-
-    :returns: A :class:`GenericCLScenario` instance.
+    :param dataset_factory: The factory for the dataset. Should return
+        an :class:`AvalancheDataset` (or any subclass) given the input
+        dataset, the transform groups definition and the name of the
+        initial group (equal to the name of the stream). Defaults
+        to :func:`make_generic_dataset`.
+    :param benchmark_factory: The factory for the benchmark.
+        Should return the benchmark instance given the stream definitions
+        and a flag stating if the test stream contains a single dataset.
+        By default, returns a :class:`DatasetScenario`.
+
+    :returns: A benchmark instance.
     """
 
     input_streams = dict(train=train_lists_of_files, test=test_lists_of_files)
@@ -588,17 +940,18 @@ def create_generic_benchmark_from_paths(
     if other_streams_lists_of_files is not None:
         input_streams = {**input_streams, **other_streams_lists_of_files}
 
-    stream_definitions: Dict[str, Sequence[ClassificationDataset]] = dict()
+    stream_definitions: Dict[str, Sequence[AvalancheDataset]] = dict()
 
     for stream_name, lists_of_files in input_streams.items():
-        stream_datasets: List[ClassificationDataset] = []
+        stream_datasets: List[AvalancheDataset] = []
         for exp_id, list_of_files in enumerate(lists_of_files):
             common_root, exp_paths_list = common_paths_root(list_of_files)
-            paths_dataset: PathsDataset[Any, int] = \
+            paths_dataset: PathsDataset[Any, Any] = \
                 PathsDataset(common_root, exp_paths_list)
             stream_datasets.append(
-                make_classification_dataset(
-                    paths_dataset, task_labels=task_labels[exp_id]
+                make_generic_dataset(
+                    paths_dataset,
+                    task_labels=task_labels[exp_id]
                 )
             )
 
@@ -614,6 +967,8 @@ def create_generic_benchmark_from_paths(
         eval_target_transform=eval_target_transform,
         complete_test_set_only=complete_test_set_only,
         other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
     )
 
 
@@ -624,12 +979,23 @@ def create_generic_benchmark_from_tensor_lists(
     other_streams_tensors: Optional[Dict[str, Sequence[Sequence[Any]]]] = None,
     task_labels: Sequence[int],
     complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
-    other_streams_transforms: Optional[Dict[str, Tuple[Any, Any]]] = None
-) -> GenericCLScenario:
+    train_transform: XTransform = None,
+    train_target_transform: YTransform = None,
+    eval_transform: XTransform = None,
+    eval_target_transform: YTransform = None,
+    other_streams_transforms: Optional[
+        Mapping[str, Tuple[XTransform, YTransform]]] = None,
+    dataset_factory: Union[
+        DatasetFactory,
+        Literal['check_if_classification']
+    ] = 'check_if_classification',
+    benchmark_factory: Union[Callable[
+        [
+            TStreamsUserDict,
+            bool
+        ], TDatasetScenario
+    ], Literal['check_if_classification']] = 'check_if_classification'
+) -> TDatasetScenario:
     """
     Creates a benchmark instance given lists of Tensors. A separate dataset will
     be created from each Tensor tuple (x, y, z, ...) and each of those training
@@ -701,8 +1067,21 @@ def create_generic_benchmark_from_tensor_lists(
         transformations for "train" or "test" streams then those transformations
         will override the `train_transform`, `train_target_transform`,
         `eval_transform` and `eval_target_transform` parameters.
-
-    :returns: A :class:`GenericCLScenario` instance.
+    :param dataset_factory: The factory for the dataset. Should return
+        an :class:`AvalancheDataset` (or any subclass) given the input
+        dataset, the transform groups definition and the name of the
+        initial group (equal to the name of the stream). Defaults
+        to :func:`make_generic_dataset`.
+    :param tensor_dataset_factory: The factory for the intermediate
+        tensor dataset. This is used to convert the tensors list to a
+        PyTorch dataset. The returned dataset will be then processed
+        again using `dataset_factory`
+    :param benchmark_factory: The factory for the benchmark.
+        Should return the benchmark instance given the stream definitions
+        and a flag stating if the test stream contains a single dataset.
+        By default, returns a :class:`DatasetScenario`.
+
+    :returns: A benchmark instance.
     """
 
     input_streams = dict(train=train_tensors, test=test_tensors)
@@ -710,14 +1089,14 @@ def create_generic_benchmark_from_tensor_lists(
     if other_streams_tensors is not None:
         input_streams = {**input_streams, **other_streams_tensors}
 
-    stream_definitions: Dict[str, Sequence[ClassificationDataset]] = dict()
+    stream_definitions: Dict[str, Sequence[AvalancheDataset]] = dict()
 
     for stream_name, list_of_exps_tensors in input_streams.items():
-        stream_datasets: List[ClassificationDataset] = []
+        stream_datasets: List[AvalancheDataset] = []
         for exp_id, exp_tensors in enumerate(list_of_exps_tensors):
             stream_datasets.append(
-                make_tensor_classification_dataset(
-                    *exp_tensors, task_labels=task_labels[exp_id]
+                make_generic_tensor_dataset(
+                    exp_tensors, task_labels=task_labels[exp_id]
                 )
             )
 
@@ -733,6 +1112,8 @@ def create_generic_benchmark_from_tensor_lists(
         eval_target_transform=eval_target_transform,
         complete_test_set_only=complete_test_set_only,
         other_streams_transforms=other_streams_transforms,
+        dataset_factory=dataset_factory,
+        benchmark_factory=benchmark_factory
     )
 
 
diff --git a/avalanche/benchmarks/scenarios/generic_scenario_creation.py b/avalanche/benchmarks/scenarios/generic_scenario_creation.py
deleted file mode 100644
index 410dda14f..000000000
--- a/avalanche/benchmarks/scenarios/generic_scenario_creation.py
+++ /dev/null
@@ -1,587 +0,0 @@
-################################################################################
-# Copyright (c) 2021 ContinualAI.                                              #
-# Copyrights licensed under the MIT License.                                   #
-# See the accompanying LICENSE file for terms.                                 #
-#                                                                              #
-# Date: 22-06-2020                                                             #
-# Author(s): Lorenzo Pellegrini                                                #
-# E-mail: contact@continualai.org                                              #
-# Website: avalanche.continualai.org                                           #
-################################################################################
-
-""" This module contains DEPRECATED mid-level benchmark generators.
-Please use the ones found in generic_benchmark_creation.
-"""
-
-import warnings
-from pathlib import Path
-from typing import Sequence, Union, SupportsInt, Any, Tuple
-
-from torch import Tensor
-
-from avalanche.benchmarks.utils import (
-    make_tensor_classification_dataset,
-    SupportedDataset,
-    datasets_from_paths,
-    make_classification_dataset,
-)
-from avalanche.benchmarks.utils import datasets_from_filelists
-from .classification_scenario import GenericCLScenario
-from ..utils.flat_data import ConstantSequence
-
-
-def create_multi_dataset_generic_scenario(
-    train_dataset_list: Sequence[SupportedDataset],
-    test_dataset_list: Sequence[SupportedDataset],
-    task_labels: Sequence[int],
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of
-    `create_multi_dataset_generic_benchmark`.
-
-    Creates a generic scenario given a list of datasets and the respective task
-    labels. Each training dataset will be considered as a separate training
-    experience. Contents of the datasets will not be changed, including the
-    targets.
-
-    When loading the datasets from a set of fixed filelist, consider using
-    the :func:`create_generic_scenario_from_filelists` helper method instead.
-
-    In its base form, this function accepts a list of test datsets that must
-    contain the same amount of datasets of the training list.
-    Those pairs are then used to create the "past", "cumulative"
-    (a.k.a. growing) and "future" test sets. However, in certain Continual
-    Learning scenarios only the concept of "complete" test set makes sense. In
-    that case, the ``complete_test_set_only`` should be set to True (see the
-    parameter description for more info).
-
-    Beware that pattern transformations must already be included in the
-    datasets (when needed).
-
-    :param train_dataset_list: A list of training datasets.
-    :param test_dataset_list: A list of test datasets.
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_dataset_list`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that the ``test_dataset_list``
-        parameter must be list with a single element (the complete test set).
-        Defaults to False, which means that ``train_dataset_list`` and
-        ``test_dataset_list`` must contain the same amount of datasets.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "create_multi_dataset_generic_scenario is deprecated in favor"
-        " of create_multi_dataset_generic_benchmark.",
-        DeprecationWarning,
-    )
-
-    transform_groups = dict(
-        train=(train_transform, train_target_transform),
-        eval=(eval_transform, eval_target_transform),
-    )
-
-    if complete_test_set_only:
-        if len(test_dataset_list) != 1:
-            raise ValueError(
-                "Test must contain 1 element when"
-                "complete_test_set_only is True"
-            )
-    else:
-        if len(test_dataset_list) != len(train_dataset_list):
-            raise ValueError(
-                "Train and test lists must define the same "
-                " amount of experiences"
-            )
-
-    train_t_labels = []
-    train_dataset_list_avl = []
-    for dataset_idx, dataset in enumerate(train_dataset_list):
-        dataset = train_dataset_list[dataset_idx]
-        train_t_labels.append(task_labels[dataset_idx])
-        train_dataset_list_avl.append(make_classification_dataset(
-            dataset,
-            task_labels=ConstantSequence(
-                task_labels[dataset_idx], len(dataset)
-            ),
-            transform_groups=transform_groups,
-            initial_transform_group="train",
-        ))
-
-    test_t_labels = []
-    test_dataset_list_avl = []
-    for dataset_idx, dataset in enumerate(test_dataset_list):
-        dataset = test_dataset_list[dataset_idx]
-
-        test_t_label = task_labels[dataset_idx]
-        if complete_test_set_only:
-            test_t_label = 0
-
-        test_t_labels.append(test_t_label)
-
-        test_dataset_list_avl.append(make_classification_dataset(
-            dataset,
-            task_labels=ConstantSequence(test_t_label, len(dataset)),
-            transform_groups=transform_groups,
-            initial_transform_group="eval",
-        ))
-
-    return GenericCLScenario(
-        stream_definitions={
-            "train": (train_dataset_list_avl, train_t_labels),
-            "test": (test_dataset_list_avl, test_t_labels),
-        },
-        complete_test_set_only=complete_test_set_only,
-    )
-
-
-def create_generic_scenario_from_filelists(
-    root: Union[str, Path],
-    train_file_lists: Sequence[Union[str, Path]],
-    test_file_lists: Union[Union[str, Path], Sequence[Union[str, Path]]],
-    task_labels: Sequence[int],
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of
-    `create_generic_benchmark_from_filelists`.
-
-    Creates a generic scenario given a list of filelists and the respective task
-    labels. A separate dataset will be created for each filelist and each of
-    those training datasets will be considered a separate training experience.
-
-    In its base form, this function accepts a list of filelists for the test
-    datsets that must contain the same amount of elements of the training list.
-    Those pairs of datasets are then used to create the "past", "cumulative"
-    (a.k.a. growing) and "future" test sets. However, in certain Continual
-    Learning scenarios only the concept of "complete" test set makes sense. In
-    that case, the ``complete_test_set_only`` should be set to True (see the
-    parameter description for more info).
-
-    This helper functions is the best shot when loading Caffe-style dataset
-    based on filelists.
-
-    The resulting benchmark instance and the intermediate datasets used to
-    populate it will be of type CLASSIFICATION.
-
-    :param root: The root path of the dataset.
-    :param train_file_lists: A list of filelists describing the
-        paths of the training patterns for each experience.
-    :param test_file_lists: A list of filelists describing the
-        paths of the test patterns for each experience.
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_file_lists`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that the ``test_file_lists``
-        parameter must be list with a single element (the complete test set).
-        Alternatively, can be a plain string or :class:`Path` object.
-        Defaults to False, which means that ``train_file_lists`` and
-        ``test_file_lists`` must contain the same amount of filelists paths.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "create_generic_scenario_from_filelists is deprecated in "
-        "favor of create_generic_benchmark_from_filelists.",
-        DeprecationWarning,
-    )
-
-    train_datasets, test_dataset = datasets_from_filelists(
-        root,
-        train_file_lists,
-        test_file_lists,
-        complete_test_set_only=complete_test_set_only,
-    )
-
-    return create_multi_dataset_generic_scenario(
-        train_datasets,
-        test_dataset,
-        task_labels,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-        complete_test_set_only=complete_test_set_only,
-    )
-
-
-FileAndLabel = Tuple[Union[str, Path], int]
-
-
-def create_generic_scenario_from_paths(
-    train_list_of_files: Sequence[Sequence[FileAndLabel]],
-    test_list_of_files: Union[
-        Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]]
-    ],
-    task_labels: Sequence[int],
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of
-    `create_generic_benchmark_from_paths`.
-
-    Creates a generic scenario given a sequence of lists of files. A separate
-    dataset will be created for each list. Each of those training datasets
-    will be considered a separate training experience.
-
-    This is very similar to `create_generic_scenario_from_filelists`, with the
-    main difference being that `create_generic_scenario_from_filelists`
-    accepts, for each experience, a file list formatted in Caffe-style.
-    On the contrary, this accepts a list of tuples where each tuple contains
-    two elements: the full path to the pattern and its label.
-    Optionally, the tuple may contain a third element describing the bounding
-    box of the element to crop. This last bounding box may be useful when trying
-    to extract the part of the image depicting the desired element.
-
-    In its base form, this function accepts a list for the test datasets that
-    must contain the same amount of elements of the training list.
-    Those pairs of datasets are then used to create the "past", "cumulative"
-    (a.k.a. growing) and "future" test sets. However, in certain Continual
-    Learning scenarios only the concept of "complete" test set makes sense. In
-    that case, the ``complete_test_set_only`` should be set to True (see the
-    parameter description for more info).
-
-    The label of each pattern doesn't have to be an int.
-
-    :param train_list_of_files: A list of lists. Each list describes the paths
-        and labels of patterns to include in that training experience, as
-        tuples. Each tuple must contain two elements: the full path to the
-        pattern and its class label. Optionally, the tuple may contain a
-        third element describing the bounding box to use for cropping (top,
-        left, height, width).
-    :param test_list_of_files: A list of lists. Each list describes the paths
-        and labels of patterns to include in that test experience, as tuples.
-        Each tuple must contain two elements: the full path to the pattern
-        and its class label. Optionally, the tuple may contain a third element
-        describing the bounding box to use for cropping (top, left, height,
-        width).
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_file_lists`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that the ``test_list_of_files``
-        parameter must define a single experience (the complete test set).
-        Defaults to False, which means that ``train_list_of_files`` and
-        ``test_list_of_files`` must contain the same amount of paths.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "create_generic_scenario_from_paths is deprecated in favor"
-        " of create_generic_benchmark_from_paths.",
-        DeprecationWarning,
-    )
-
-    train_datasets, test_dataset = datasets_from_paths(
-        train_list_of_files,
-        test_list_of_files,
-        complete_test_set_only=complete_test_set_only,
-    )
-
-    return create_multi_dataset_generic_scenario(
-        train_datasets,
-        test_dataset,
-        task_labels,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-        complete_test_set_only=complete_test_set_only,
-    )
-
-
-def create_generic_scenario_from_tensor_lists(
-    train_tensors: Sequence[Sequence[Any]],
-    test_tensors: Sequence[Sequence[Any]],
-    task_labels: Sequence[int],
-    *,
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of
-    `create_generic_benchmark_from_tensor_lists`.
-
-    Creates a generic scenario given lists of Tensors. A separate dataset will
-    be created from each Tensor tuple (x, y, z, ...) and each of those training
-    datasets will be considered a separate training experience. Using this
-    helper function is the lowest-level way to create a Continual Learning
-    scenario. When possible, consider using higher level helpers.
-
-    Experiences are defined by passing lists of tensors as the `train_tensors`
-    and `test_tensors` parameter. Those parameters must be lists containing
-    sub-lists of tensors, one for each experience. Each tensor defines the value
-    of a feature ("x", "y", "z", ...) for all patterns of that experience.
-
-    By default the second tensor of each experience will be used to fill the
-    `targets` value (label of each pattern).
-
-    In its base form, the test lists must contain the same amount of elements of
-    the training lists. Those pairs of datasets are then used to create the
-    "past", "cumulative" (a.k.a. growing) and "future" test sets.
-    However, in certain Continual Learning scenarios only the concept of
-    "complete" test set makes sense. In that case, the
-    ``complete_test_set_only`` should be set to True (see the parameter
-    description for more info).
-
-    :param train_tensors: A list of lists. The first list must contain the
-        tensors for the first training experience (one tensor per feature), the
-        second list must contain the tensors for the second training experience,
-        and so on.
-    :param test_tensors: A list of lists. The first list must contain the
-        tensors for the first test experience (one tensor per feature), the
-        second list must contain the tensors for the second test experience,
-        and so on. When using `complete_test_set_only`, this parameter
-        must be a list containing a single sub-list for the single test
-        experience.
-    :param task_labels: A list of task labels. Must contain a task label for
-        each experience. For Single-Incremental-Task (a.k.a. Task-Free)
-        scenarios, this is usually a list of zeros. For Multi Task scenario,
-        this is usually a list of ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that ``test_tensors`` must
-        define a single experience. Defaults to False, which means that
-        ``train_tensors`` and ``test_tensors`` must define the same
-        amount of experiences.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "create_generic_scenario_from_tensor_lists is deprecated in "
-        "favor of create_generic_benchmark_from_tensor_lists.",
-        DeprecationWarning,
-    )
-
-    train_datasets = [
-        make_tensor_classification_dataset(*exp_tensors)
-        for exp_tensors in train_tensors
-    ]
-
-    test_datasets = [
-        make_tensor_classification_dataset(*exp_tensors)
-        for exp_tensors in test_tensors
-    ]
-
-    return create_multi_dataset_generic_scenario(
-        train_datasets,
-        test_datasets,
-        task_labels,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-        complete_test_set_only=complete_test_set_only,
-    )
-
-
-def create_generic_scenario_from_tensors(
-    train_data_x: Sequence[Any],
-    train_data_y: Sequence[Sequence[SupportsInt]],
-    test_data_x: Union[Any, Sequence[Any]],
-    test_data_y: Union[Any, Sequence[Sequence[SupportsInt]]],
-    task_labels: Sequence[int],
-    complete_test_set_only: bool = False,
-    train_transform=None,
-    train_target_transform=None,
-    eval_transform=None,
-    eval_target_transform=None,
-) -> GenericCLScenario:
-    """
-    This helper function is DEPRECATED in favor of
-    `create_generic_benchmark_from_tensor_lists`.
-
-    Please consider using :func:`create_generic_scenario_from_tensor_lists`
-    instead. When switching to the new function, please keep in mind that the
-    format of the parameters is completely different!
-
-    Creates a generic scenario given lists of Tensors and the respective task
-    labels. A separate dataset will be created from each Tensor pair (x + y)
-    and each of those training datasets will be considered a separate
-    training experience. Contents of the datasets will not be changed, including
-    the targets. Using this helper function is the lower level way to create a
-    Continual Learning scenario. When possible, consider using higher level
-    helpers.
-
-    By default the second tensor of each experience will be used to fill the
-    `targets` value (label of each pattern).
-
-    In its base form, the test lists must contain the same amount of elements of
-    the training lists. Those pairs of datasets are then used to create the
-    "past", "cumulative" (a.k.a. growing) and "future" test sets.
-    However, in certain Continual Learning scenarios only the concept of
-    "complete" test set makes sense. In that case, the
-    ``complete_test_set_only`` should be set to True (see the parameter
-    description for more info).
-
-    :param train_data_x: A list of Tensors (one per experience) containing the
-        patterns of the training sets.
-    :param train_data_y: A list of Tensors or int lists containing the
-        labels of the patterns of the training sets. Must contain the same
-        number of elements of ``train_datasets_x``.
-    :param test_data_x: A Tensor or a list of Tensors (one per experience)
-        containing the patterns of the test sets.
-    :param test_data_y: A Tensor or a list of Tensors or int lists containing
-        the labels of the patterns of the test sets. Must contain the same
-        number of elements of ``test_datasets_x``.
-    :param task_labels: A list of task labels. Must contain the same amount of
-        elements of the ``train_datasets_x`` parameter. For
-        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
-        a list of zeros. For Multi Task scenario, this is usually a list of
-        ascending task labels (starting from 0).
-    :param complete_test_set_only: If True, only the complete test set will
-        be returned by the scenario. This means that ``test_data_x`` and
-        ``test_data_y`` must define a single experience. Defaults to False,
-        which means that ``train_data_*`` and ``test_data_*`` must define the
-        same amount of experiences.
-    :param train_transform: The transformation to apply to the training data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param train_target_transform: The transformation to apply to training
-        patterns targets. Defaults to None.
-    :param eval_transform: The transformation to apply to the test data,
-        e.g. a random crop, a normalization or a concatenation of different
-        transformations (see torchvision.transform documentation for a
-        comprehensive list of possible transformations). Defaults to None.
-    :param eval_target_transform: The transformation to apply to test
-        patterns targets. Defaults to None.
-
-    :returns: A :class:`GenericCLScenario` instance.
-    """
-
-    warnings.warn(
-        "create_generic_scenario_from_tensors is deprecated in favor "
-        "of create_generic_benchmark_from_tensor_lists.",
-        DeprecationWarning,
-    )
-
-    if len(train_data_x) != len(train_data_y):
-        raise ValueError(
-            "train_data_x and train_data_y must contain"
-            " the same amount of elements"
-        )
-
-    if type(test_data_x) != type(test_data_y):
-        raise ValueError(
-            "test_data_x and test_data_y must be of" " the same type"
-        )
-
-    if isinstance(test_data_x, Tensor):
-        test_data_x = [test_data_x]
-        test_data_y = [test_data_y]
-    else:
-        if len(test_data_x) != len(test_data_y):
-            raise ValueError(
-                "test_data_x and test_data_y must contain"
-                " the same amount of elements"
-            )
-
-    exp_train_first_structure = []
-    exp_test_first_structure = []
-    for exp_idx in range(len(train_data_x)):
-        exp_x = train_data_x[exp_idx]
-        exp_y = train_data_y[exp_idx]
-
-        exp_train_first_structure.append([exp_x, exp_y])
-
-    for exp_idx in range(len(test_data_x)):
-        exp_x = test_data_x[exp_idx]
-        exp_y = test_data_y[exp_idx]
-
-        exp_test_first_structure.append([exp_x, exp_y])
-
-    return create_generic_scenario_from_tensor_lists(
-        train_tensors=exp_train_first_structure,
-        test_tensors=exp_test_first_structure,
-        task_labels=task_labels,
-        complete_test_set_only=complete_test_set_only,
-        train_transform=train_transform,
-        train_target_transform=train_target_transform,
-        eval_transform=eval_transform,
-        eval_target_transform=eval_target_transform,
-    )
-
-
-__all__ = [
-    "create_multi_dataset_generic_scenario",
-    "create_generic_scenario_from_filelists",
-    "create_generic_scenario_from_paths",
-    "create_generic_scenario_from_tensor_lists",
-    "create_generic_scenario_from_tensors",
-]
diff --git a/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py b/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py
index fdb75916a..8d04e5cb7 100644
--- a/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py
+++ b/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py
@@ -20,7 +20,7 @@
 )
 from avalanche.benchmarks.utils import classification_subset
 from avalanche.benchmarks.utils.classification_dataset import \
-    ClassificationDataset, SupervisedClassificationDataset
+    ClassificationDataset
 
 from avalanche.benchmarks.utils.flat_data import ConstantSequence
 
@@ -29,7 +29,7 @@ class NCScenario(
     ClassificationScenario[
         'NCStream',
         'NCExperience',
-        SupervisedClassificationDataset]):
+        ClassificationDataset]):
 
     """
     This class defines a "New Classes" scenario. Once created, an instance
@@ -87,7 +87,7 @@ def __init__(
         :param fixed_class_order: If not None, the class order to use (overrides
             the shuffle argument). Very useful for enhancing
             reproducibility. Defaults to None.
-        :param per_experience_classes: Is not None, a dictionary whose keys are
+        :param per_experience_classes: If not None, a dictionary whose keys are
             (0-indexed) experience IDs and their values are the number of
             classes to include in the respective experiences. The dictionary
             doesn't have to contain a key for each experience! All the remaining
@@ -124,10 +124,8 @@ class "34" will be mapped to "1", class "11" to "2" and so on.
             test datasets must be used. Defaults to None.
         """
 
-        if not isinstance(train_dataset, SupervisedClassificationDataset):
-            train_dataset = SupervisedClassificationDataset(train_dataset)
-        if not isinstance(test_dataset, SupervisedClassificationDataset):
-            test_dataset = SupervisedClassificationDataset(test_dataset)
+        train_dataset = ClassificationDataset(train_dataset)
+        test_dataset = ClassificationDataset(test_dataset)
         
         if (
             class_ids_from_zero_from_first_exp
@@ -561,7 +559,7 @@ def __init__(
             set_stream_info=set_stream_info)
 
 
-class NCExperience(ClassificationExperience[SupervisedClassificationDataset]):
+class NCExperience(ClassificationExperience[ClassificationDataset]):
     """
     Defines a "New Classes" experience. It defines fields to obtain the current
     dataset and the associated task label. It also keeps a reference to the
diff --git a/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py b/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py
index 63abf42c6..678363b1a 100644
--- a/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py
+++ b/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py
@@ -23,7 +23,7 @@
 )
 from avalanche.benchmarks.utils import classification_subset
 from avalanche.benchmarks.utils.classification_dataset import \
-    ClassificationDataset, SupervisedClassificationDataset
+    ClassificationDataset
 from avalanche.benchmarks.utils.flat_data import ConstantSequence
 
 
@@ -31,7 +31,7 @@ class NIScenario(
         ClassificationScenario[
             'NIStream',
             'NIExperience',
-            SupervisedClassificationDataset]):
+            ClassificationDataset]):
     """
     This class defines a "New Instance" scenario.
     Once created, an instance of this class can be iterated in order to obtain
@@ -107,8 +107,8 @@ def __init__(
             test datasets must be used. Defaults to None.
         """
 
-        train_dataset = SupervisedClassificationDataset(train_dataset)
-        test_dataset = SupervisedClassificationDataset(test_dataset)
+        train_dataset = ClassificationDataset(train_dataset)
+        test_dataset = ClassificationDataset(test_dataset)
 
         self._has_task_labels = task_labels
 
@@ -484,7 +484,7 @@ def __init__(
             set_stream_info=set_stream_info)
 
 
-class NIExperience(ClassificationExperience[SupervisedClassificationDataset]):
+class NIExperience(ClassificationExperience[ClassificationDataset]):
     """
     Defines a "New Instances" experience. It defines fields to obtain the
     current dataset and the associated task label. It also keeps a reference
diff --git a/avalanche/benchmarks/utils/classification_dataset.py b/avalanche/benchmarks/utils/classification_dataset.py
index e82141cd3..9ad055c9e 100644
--- a/avalanche/benchmarks/utils/classification_dataset.py
+++ b/avalanche/benchmarks/utils/classification_dataset.py
@@ -36,6 +36,7 @@
 from avalanche.benchmarks.utils.transform_groups import (
     TransformGroupDef,
     DefaultTransformGroups,
+    TransformGroups,
     XTransform,
     YTransform,
 )
@@ -45,6 +46,7 @@
 )
 from avalanche.benchmarks.utils.flat_data import ConstantSequence
 from avalanche.benchmarks.utils.dataset_definitions import (
+    IDataset,
     ISupportedClassificationDataset,
     ITensorDataset,
     IDatasetWithTargets,
@@ -61,7 +63,6 @@
     Dict,
     Tuple,
     Mapping,
-    overload,
 )
 
 
@@ -86,10 +87,43 @@ def lookup(indexable, idx):
 
 class ClassificationDataset(AvalancheDataset[T_co]):
 
+    def __init__(
+            self,
+            datasets: Sequence[IDataset[T_co]],
+            *,
+            indices: Optional[List[int]] = None,
+            data_attributes: Optional[List[DataAttribute]] = None,
+            transform_groups: Optional[TransformGroups] = None,
+            frozen_transform_groups: Optional[TransformGroups] = None,
+            collate_fn: Optional[Callable[[List], Any]] = None):
+        super().__init__(
+            datasets=datasets,
+            indices=indices,
+            data_attributes=data_attributes,
+            transform_groups=transform_groups,
+            frozen_transform_groups=frozen_transform_groups,
+            collate_fn=collate_fn
+        )
+
+        assert 'targets' in self._data_attributes, \
+            'The supervised version of the ClassificationDataset requires ' + \
+            'the targets field'
+        assert 'targets_task_labels' in self._data_attributes, \
+            'The supervised version of the ClassificationDataset requires ' + \
+            'the targets_task_labels field'
+    
+    @property
+    def targets(self) -> DataAttribute[TTargetType]:
+        return self._data_attributes['targets']
+
+    @property
+    def targets_task_labels(self) -> DataAttribute[int]:
+        return self._data_attributes['targets_task_labels']
+    
     @property
     def task_pattern_indices(self):
         """A dictionary mapping task ids to their sample indices."""
-        return self.targets_task_labels.val_to_idx  # type: ignore
+        return self.targets_task_labels.val_to_idx
 
     @property
     def task_set(self: TClassificationDataset) -> \
@@ -110,25 +144,6 @@ def concat(self, other):
 
     def __hash__(self):
         return id(self)
-    
-
-class SupervisedClassificationDataset(ClassificationDataset[T_co]):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        assert 'targets' in self._data_attributes, \
-            'The supervised version of the ClassificationDataset requires ' + \
-            'the targets field'
-        assert 'targets_task_labels' in self._data_attributes, \
-            'The supervised version of the ClassificationDataset requires ' + \
-            'the targets_task_labels field'
-
-    @property
-    def targets(self) -> DataAttribute[TTargetType]:
-        return self._data_attributes['targets']
-
-    @property
-    def targets_task_labels(self) -> DataAttribute[int]:
-        return self._data_attributes['targets_task_labels']
 
 
 SupportedDataset = Union[
@@ -140,37 +155,6 @@ def targets_task_labels(self) -> DataAttribute[int]:
 ]
 
 
-@overload
-def make_classification_dataset(
-    dataset: SupervisedClassificationDataset,
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedClassificationDataset:
-    ...
-
-
-@overload
-def make_classification_dataset(
-    dataset: SupportedDataset,
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Union[int, Sequence[int]],
-    targets: Sequence[TTargetType],
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedClassificationDataset:
-    ...
-
-
-@overload
 def make_classification_dataset(
     dataset: SupportedDataset,
     *,
@@ -182,20 +166,6 @@ def make_classification_dataset(
     targets: Optional[Sequence[TTargetType]] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> ClassificationDataset:
-    ...
-
-
-def make_classification_dataset(
-    dataset: SupportedDataset,
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[ClassificationDataset, SupervisedClassificationDataset]:
     """Avalanche Classification Dataset.
 
     Supervised continual learning benchmarks in Avalanche return instances of
@@ -273,8 +243,6 @@ def make_classification_dataset(
         the default collate function will be used.
     """
 
-    is_supervised = isinstance(dataset, SupervisedClassificationDataset)
-
     transform_gs = _init_transform_groups(
         transform_groups,
         transform,
@@ -293,26 +261,12 @@ def make_classification_dataset(
     if task_labels_data is not None:
         das.append(task_labels_data)
 
-        # Check if supervision data has been added
-    is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
-
-    data: Union[ClassificationDataset, SupervisedClassificationDataset]
-    if is_supervised:
-        data = SupervisedClassificationDataset(
-            [dataset],
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            collate_fn=collate_fn,
-        )
-    else:
-        data = ClassificationDataset(
-            [dataset],
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            collate_fn=collate_fn,
-        )
+    data: ClassificationDataset = ClassificationDataset(
+        [dataset],
+        data_attributes=das if len(das) > 0 else None,
+        transform_groups=transform_gs,
+        collate_fn=collate_fn,
+    )
     
     if initial_transform_group is not None:
         return data.with_transforms(initial_transform_group)
@@ -347,60 +301,6 @@ def _init_targets(dataset, targets, check_shape=True) -> \
     return DataAttribute(targets, "targets")
 
 
-@overload
-def classification_subset(
-    dataset: SupervisedClassificationDataset,
-    indices: Optional[Sequence[int]] = None,
-    *,
-    class_mapping: Optional[Sequence[int]] = None,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedClassificationDataset:
-    ...
-
-
-@overload
-def classification_subset(
-    dataset: SupportedDataset,
-    indices: Optional[Sequence[int]] = None,
-    *,
-    class_mapping: Optional[Sequence[int]] = None,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str,
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Union[int, Sequence[int]],
-    targets: Sequence[TTargetType],
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedClassificationDataset:
-    ...
-
-
-@overload
-def classification_subset(
-    dataset: SupportedDataset,
-    indices: Optional[Sequence[int]] = None,
-    *,
-    class_mapping: Optional[Sequence[int]] = None,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str,
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> ClassificationDataset:
-    ...
-
-
 def classification_subset(
     dataset: SupportedDataset,
     indices: Optional[Sequence[int]] = None,
@@ -414,7 +314,7 @@ def classification_subset(
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Sequence[TTargetType]] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[ClassificationDataset, SupervisedClassificationDataset]:
+) -> ClassificationDataset:
     """Creates an ``AvalancheSubset`` instance.
 
     For simple subset operations you should use the method
@@ -482,8 +382,6 @@ def classification_subset(
         `collate_fn` field exists in the dataset. If no such field exists,
         the default collate function will be used.
     """
-
-    is_supervised = isinstance(dataset, SupervisedClassificationDataset)
     
     if isinstance(dataset, ClassificationDataset):
         if (
@@ -534,18 +432,13 @@ def classification_subset(
     das = []
     if targets_data is not None:
         das.append(targets_data)
-
-    # Check if supervision data has been added
-    is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
     
     if task_labels_data is not None:
         # special treatment for task labels depending on length for
         # backward compatibility
         if len(task_labels_data) != len(dataset):
             # task labels are already subsampled
-            dataset = ClassificationDataset(
+            dataset_avl = AvalancheDataset(
                 [dataset],
                 indices=list(indices) if indices is not None else None,
                 data_attributes=das,
@@ -553,66 +446,22 @@ def classification_subset(
                 frozen_transform_groups=frozen_transform_groups,
                 collate_fn=collate_fn,
             )
+
             # now add task labels
-            if is_supervised:
-                return SupervisedClassificationDataset(
-                    [dataset],
-                    data_attributes=[dataset.targets,  # type: ignore
-                                     task_labels_data])
-            else:
-                return ClassificationDataset(
-                    [dataset],
-                    data_attributes=[dataset.targets,  # type: ignore
-                                     task_labels_data])
+            return ClassificationDataset(
+                [dataset_avl],
+                data_attributes=[task_labels_data])
         else:
             das.append(task_labels_data)
 
-    if is_supervised:
-        return SupervisedClassificationDataset(
-            [dataset],
-            indices=list(indices) if indices is not None else None,
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            frozen_transform_groups=frozen_transform_groups,
-            collate_fn=collate_fn,
-        )
-    else:
-        return ClassificationDataset(
-            [dataset],
-            indices=list(indices) if indices is not None else None,
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            frozen_transform_groups=frozen_transform_groups,
-            collate_fn=collate_fn,
-        )
-
-
-@overload
-def make_tensor_classification_dataset(
-    *dataset_tensors: Sequence,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Dict[str, Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = "train",
-    task_labels: Union[int, Sequence[int]],
-    targets: Union[Sequence[TTargetType], int],
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedClassificationDataset:
-    ...
-
-
-@overload
-def make_tensor_classification_dataset(
-    *dataset_tensors: Sequence,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Dict[str, Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = "train",
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Union[Sequence[TTargetType], int]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[ClassificationDataset, SupervisedClassificationDataset]:
-    ...
+    return ClassificationDataset(
+        [dataset],
+        indices=list(indices) if indices is not None else None,
+        data_attributes=das if len(das) > 0 else None,
+        transform_groups=transform_gs,
+        frozen_transform_groups=frozen_transform_groups,
+        collate_fn=collate_fn,
+    )
 
 
 def make_tensor_classification_dataset(
@@ -624,7 +473,7 @@ def make_tensor_classification_dataset(
     task_labels: Optional[Union[int, Sequence[int]]] = None,
     targets: Optional[Union[Sequence[TTargetType], int]] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[ClassificationDataset, SupervisedClassificationDataset]:
+) -> ClassificationDataset:
     """Creates a ``AvalancheTensorDataset`` instance.
 
     A Dataset that wraps existing ndarrays, Tensors, lists... to provide
@@ -697,26 +546,13 @@ def make_tensor_classification_dataset(
     for d in [targets_data, task_labels_data]:
         if d is not None:
             das.append(d)
-    
-    # Check if supervision data has been added
-    is_supervised = (
-        targets_data is not None and
-        task_labels_data is not None)
-
-    if is_supervised:
-        return SupervisedClassificationDataset(
-            [dataset],
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            collate_fn=collate_fn,
-        )
-    else:
-        return ClassificationDataset(
-            [dataset],
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            collate_fn=collate_fn,
-        )
+
+    return ClassificationDataset(
+        [dataset],
+        data_attributes=das if len(das) > 0 else None,
+        transform_groups=transform_gs,
+        collate_fn=collate_fn,
+    )
 
 
 class _TensorClassificationDataset(TensorDataset):
@@ -728,43 +564,6 @@ def __getitem__(self, item):
         return tuple(elem)
 
 
-@overload
-def concat_classification_datasets(
-    datasets: Sequence[SupervisedClassificationDataset],
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int,
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedClassificationDataset:
-    ...
-
-
-@overload
-def concat_classification_datasets(
-    datasets: Sequence[SupportedDataset],
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Union[int, Sequence[int], Sequence[Sequence[int]]],
-    targets: Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ],
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedClassificationDataset:
-    ...
-
-
-@overload
 def concat_classification_datasets(
     datasets: Sequence[SupportedDataset],
     *,
@@ -780,24 +579,6 @@ def concat_classification_datasets(
     ]] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> ClassificationDataset:
-    ...
-
-
-def concat_classification_datasets(
-    datasets: Sequence[SupportedDataset],
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, 
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[ClassificationDataset, SupervisedClassificationDataset]:
     """Creates a ``AvalancheConcatDataset`` instance.
 
     For simple subset operations you should use the method
@@ -878,7 +659,6 @@ def concat_classification_datasets(
         initial_transform_group = \
             find_common_transforms_group(datasets, default_group="train")
 
-    supervised = True
     for dd, dataset_task_labels, dataset_targets in \
             zip(datasets, per_dataset_task_labels, per_dataset_targets):
         dd = make_classification_dataset(
@@ -891,9 +671,6 @@ def concat_classification_datasets(
             targets=dataset_targets,
             collate_fn=collate_fn,
         )
-
-        if not isinstance(dd, SupervisedClassificationDataset):
-            supervised = False
         
         dds.append(dd)
 
@@ -907,24 +684,11 @@ def concat_classification_datasets(
         )
     else:
         transform_groups_obj = None
-
-    supervised = supervised and (
-        (len(dds) > 0) or (
-            targets is not None and task_labels is not None
-        )
-    )
     
-    data: Union[SupervisedClassificationDataset, ClassificationDataset]
-    if supervised:
-        data = SupervisedClassificationDataset(
-            dds,
-            transform_groups=transform_groups_obj
-        )
-    else:
-        data = ClassificationDataset(
-            dds,
-            transform_groups=transform_groups_obj
-        )
+    data: ClassificationDataset = ClassificationDataset(
+        dds,
+        transform_groups=transform_groups_obj
+    )
     return data.with_transforms(initial_transform_group)
 
 
@@ -958,8 +722,8 @@ def _select_targets(
 def concat_classification_datasets_sequentially(
     train_dataset_list: Sequence[ISupportedClassificationDataset],
     test_dataset_list: Sequence[ISupportedClassificationDataset],
-) -> Tuple[SupervisedClassificationDataset, 
-           SupervisedClassificationDataset,
+) -> Tuple[ClassificationDataset, 
+           ClassificationDataset,
            List[list]]:
     """
     Concatenates a list of datasets. This is completely different from
@@ -1004,15 +768,15 @@ def concat_classification_datasets_sequentially(
 
     :returns: A concatenated dataset.
     """
-    remapped_train_datasets: List[SupervisedClassificationDataset] = []
-    remapped_test_datasets: List[SupervisedClassificationDataset] = []
+    remapped_train_datasets: List[ClassificationDataset] = []
+    remapped_test_datasets: List[ClassificationDataset] = []
     next_remapped_idx = 0
 
     train_dataset_list_sup = list(
-        map(as_supervised_classification_dataset, train_dataset_list)
+        map(make_classification_dataset, train_dataset_list)
     )
     test_dataset_list_sup = list(
-        map(as_supervised_classification_dataset, test_dataset_list)
+        map(make_classification_dataset, test_dataset_list)
     )
     del train_dataset_list
     del test_dataset_list
@@ -1076,58 +840,12 @@ def concat_classification_datasets_sequentially(
     )
 
 
-def as_supervised_classification_dataset(
-    dataset,
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None) -> \
-        SupervisedClassificationDataset:
-
-    if (
-        transform is not None or
-        target_transform is not None or
-        transform_groups is not None or
-        initial_transform_group is not None or
-        task_labels is not None or
-        targets is not None or
-        collate_fn is not None or
-        not isinstance(dataset, SupervisedClassificationDataset)
-    ):
-        result_dataset = make_classification_dataset(
-            dataset=dataset,
-            transform=transform,
-            target_transform=target_transform,
-            transform_groups=transform_groups,
-            initial_transform_group=initial_transform_group,
-            task_labels=task_labels,
-            targets=targets,
-            collate_fn=collate_fn
-        )
-
-        if not isinstance(result_dataset, SupervisedClassificationDataset):
-            raise ValueError(
-                'The given dataset does not have supervision fields '
-                '(targets, task_labels).'
-            )
-
-        return result_dataset
-    
-    return dataset
-
-
 __all__ = [
     "SupportedDataset",
     "ClassificationDataset",
-    "SupervisedClassificationDataset",
     "make_classification_dataset",
     "classification_subset",
     "make_tensor_classification_dataset",
     "concat_classification_datasets",
-    "concat_classification_datasets_sequentially",
-    "as_supervised_classification_dataset"
+    "concat_classification_datasets_sequentially"
 ]
diff --git a/avalanche/benchmarks/utils/detection_dataset.py b/avalanche/benchmarks/utils/detection_dataset.py
index dec5a3206..602e150b0 100644
--- a/avalanche/benchmarks/utils/detection_dataset.py
+++ b/avalanche/benchmarks/utils/detection_dataset.py
@@ -73,32 +73,10 @@
 
 
 class DetectionDataset(AvalancheDataset[T_co]):
-    @property
-    def task_pattern_indices(self) -> Dict[int, Sequence[int]]:
-        """A dictionary mapping task ids to their sample indices."""
-        # Assumes that targets_task_labels exists
-        t_labels: DataAttribute[int] = self.targets_task_labels  # type: ignore
-        return t_labels.val_to_idx
-
-    @property
-    def task_set(self: TDetectionDataset) -> TaskSet[TDetectionDataset]:
-        """Returns the dataset's ``TaskSet``, which is a mapping <task-id,
-        task-dataset>."""
-        return TaskSet(self)
 
-    def subset(self, indices):
-        data = super().subset(indices)
-        return data.with_transforms(self._transform_groups.current_group)
-
-    def concat(self, other):
-        data = super().concat(other)
-        return data.with_transforms(self._transform_groups.current_group)
-
-
-class SupervisedDetectionDataset(DetectionDataset[T_co]):
     def __init__(
             self,
-            datasets: List[IDataset[T_co]],
+            datasets: Sequence[IDataset[T_co]],
             *,
             indices: Optional[List[int]] = None,
             data_attributes: Optional[List[DataAttribute]] = None,
@@ -120,7 +98,7 @@ def __init__(
         assert hasattr(self, 'targets_task_labels'), \
             'The supervised version of the ClassificationDataset requires ' + \
             'the targets_task_labels field'
-
+        
     @property
     def targets(self) -> DataAttribute[TTargetType]:
         return self._data_attributes['targets']
@@ -128,6 +106,30 @@ def targets(self) -> DataAttribute[TTargetType]:
     @property
     def targets_task_labels(self) -> DataAttribute[int]:
         return self._data_attributes['targets_task_labels']
+    
+    @property
+    def task_pattern_indices(self) -> Dict[int, Sequence[int]]:
+        """A dictionary mapping task ids to their sample indices."""
+        # Assumes that targets_task_labels exists
+        t_labels: DataAttribute[int] = self.targets_task_labels
+        return t_labels.val_to_idx
+
+    @property
+    def task_set(self: TDetectionDataset) -> TaskSet[TDetectionDataset]:
+        """Returns the dataset's ``TaskSet``, which is a mapping <task-id,
+        task-dataset>."""
+        return TaskSet(self)
+
+    def subset(self, indices):
+        data = super().subset(indices)
+        return data.with_transforms(self._transform_groups.current_group)
+
+    def concat(self, other):
+        data = super().concat(other)
+        return data.with_transforms(self._transform_groups.current_group)
+    
+    def __hash__(self):
+        return id(self)
 
 
 SupportedDetectionDataset = Union[
@@ -138,37 +140,6 @@ def targets_task_labels(self) -> DataAttribute[int]:
 ]
 
 
-@overload
-def make_detection_dataset(
-    dataset: SupervisedDetectionDataset,
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedDetectionDataset:
-    ...
-
-
-@overload
-def make_detection_dataset(
-    dataset: SupportedDetectionDataset,
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Union[int, Sequence[int]],
-    targets: Sequence[TTargetType],
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedDetectionDataset:
-    ...
-
-
-@overload
 def make_detection_dataset(
     dataset: SupportedDetectionDataset,
     *,
@@ -180,20 +151,6 @@ def make_detection_dataset(
     targets: Optional[Sequence[TTargetType]] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> DetectionDataset:
-    ...
-
-
-def make_detection_dataset(
-    dataset: SupportedDetectionDataset,
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[DetectionDataset, SupervisedDetectionDataset]:
     """Avalanche Detection Dataset.
 
     Supervised continual learning benchmarks in Avalanche return instances of
@@ -271,8 +228,6 @@ def make_detection_dataset(
         the default collate function for detection will be used.
     """
 
-    is_supervised = isinstance(dataset, SupervisedDetectionDataset)
-
     transform_gs = _init_transform_groups(
         transform_groups,
         transform,
@@ -290,30 +245,16 @@ def make_detection_dataset(
         das.append(targets_data)
     if task_labels_data is not None:
         das.append(task_labels_data)
-
-    # Check if supervision data has been added
-    is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
     
     if collate_fn is None:
         collate_fn = getattr(dataset, 'collate_fn', detection_collate_fn)
 
-    data: Union[DetectionDataset, SupervisedDetectionDataset]
-    if is_supervised:
-        data = SupervisedDetectionDataset(
-            [dataset],
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            collate_fn=collate_fn,
-        )
-    else:
-        data = DetectionDataset(
-            [dataset],
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            collate_fn=collate_fn,
-        )
+    data: DetectionDataset = DetectionDataset(
+        [dataset],
+        data_attributes=das if len(das) > 0 else None,
+        transform_groups=transform_gs,
+        collate_fn=collate_fn,
+    )
     
     if initial_transform_group is not None:
         return data.with_transforms(initial_transform_group)
@@ -359,43 +300,6 @@ def _detection_class_mapping_transform(class_mapping, example_target_dict):
     return example_target_dict
 
 
-@overload
-def detection_subset(
-    dataset: SupervisedDetectionDataset,
-    indices: Optional[Sequence[int]] = None,
-    *,
-    class_mapping: Optional[Sequence[int]] = None,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedDetectionDataset:
-    ...
-
-
-@overload
-def detection_subset(
-    dataset: SupportedDetectionDataset,
-    indices: Optional[Sequence[int]] = None,
-    *,
-    class_mapping: Optional[Sequence[int]] = None,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Union[int, Sequence[int]],
-    targets: Sequence[TTargetType],
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedDetectionDataset:
-    ...
-
-
-@overload
 def detection_subset(
     dataset: SupportedDetectionDataset,
     indices: Optional[Sequence[int]] = None,
@@ -410,23 +314,6 @@ def detection_subset(
     targets: Optional[Sequence[TTargetType]] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
 ) -> DetectionDataset:
-    ...
-
-
-def detection_subset(
-    dataset: SupportedDetectionDataset,
-    indices: Optional[Sequence[int]] = None,
-    *,
-    class_mapping: Optional[Sequence[int]] = None,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, Sequence[int]]] = None,
-    targets: Optional[Sequence[TTargetType]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[DetectionDataset, SupervisedDetectionDataset]:
     """Creates an ``AvalancheSubset`` instance.
 
     For simple subset operations you should use the method
@@ -492,8 +379,6 @@ def detection_subset(
         the default collate function for detection will be used
     """
 
-    is_supervised = isinstance(dataset, SupervisedDetectionDataset)
-
     if isinstance(dataset, DetectionDataset):
         if (
             class_mapping is None
@@ -559,90 +444,17 @@ def detection_subset(
     if task_labels_data is not None:
         das.append(task_labels_data)
 
-    # Check if supervision data has been added
-    is_supervised = is_supervised or (
-        targets_data is not None and
-        task_labels_data is not None)
-
     if collate_fn is None:
         collate_fn = detection_collate_fn
 
-    if is_supervised:
-        return SupervisedDetectionDataset(
-            [dataset],
-            indices=list(indices) if indices is not None else None,
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            frozen_transform_groups=frozen_transform_groups,
-            collate_fn=collate_fn,
-        )
-    else:
-        return DetectionDataset(
-            [dataset],
-            indices=list(indices) if indices is not None else None,
-            data_attributes=das if len(das) > 0 else None,
-            transform_groups=transform_gs,
-            frozen_transform_groups=frozen_transform_groups,
-            collate_fn=collate_fn,
-        )
-
-
-@overload
-def concat_detection_datasets(
-    datasets: Sequence[SupervisedDetectionDataset],
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str,
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int,
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedDetectionDataset:
-    ...
-
-
-@overload
-def concat_detection_datasets(
-    datasets: Sequence[SupportedDetectionDataset],
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Union[int, Sequence[int], Sequence[Sequence[int]]],
-    targets: Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ],
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> SupervisedDetectionDataset:
-    ...
-
-
-@overload
-def concat_detection_datasets(
-    datasets: Sequence[SupportedDetectionDataset],
-    *,
-    transform: Optional[XTransform] = None,
-    target_transform: Optional[YTransform] = None,
-    transform_groups: Optional[Mapping[str, 
-                                       Tuple[XTransform, YTransform]]] = None,
-    initial_transform_group: Optional[str] = None,
-    task_labels: Optional[Union[int, 
-                                Sequence[int],
-                                Sequence[Sequence[int]]]] = None,
-    targets: Optional[Union[
-        Sequence[TTargetType], Sequence[Sequence[TTargetType]]
-    ]] = None,
-    collate_fn: Optional[Callable[[List], Any]] = None
-) -> DetectionDataset:
-    ...
+    return DetectionDataset(
+        [dataset],
+        indices=list(indices) if indices is not None else None,
+        data_attributes=das if len(das) > 0 else None,
+        transform_groups=transform_gs,
+        frozen_transform_groups=frozen_transform_groups,
+        collate_fn=collate_fn,
+    )
 
 
 def concat_detection_datasets(
@@ -660,7 +472,7 @@ def concat_detection_datasets(
         Sequence[TTargetType], Sequence[Sequence[TTargetType]]
     ]] = None,
     collate_fn: Optional[Callable[[List], Any]] = None
-) -> Union[DetectionDataset, SupervisedDetectionDataset]:
+) -> DetectionDataset:
     """Creates a ``AvalancheConcatDataset`` instance.
 
     For simple subset operations you should use the method
diff --git a/avalanche/benchmarks/utils/utils.py b/avalanche/benchmarks/utils/utils.py
index 1356f67bb..fce5c0bd1 100644
--- a/avalanche/benchmarks/utils/utils.py
+++ b/avalanche/benchmarks/utils/utils.py
@@ -29,10 +29,11 @@
     SupportsInt,
 )
 import warnings
+import numpy as np
 
 import torch
 from torch import Tensor
-from torch.utils.data import Subset, ConcatDataset
+from torch.utils.data import Subset, ConcatDataset, TensorDataset
 
 from avalanche.benchmarks.utils.data import AvalancheDataset
 from avalanche.benchmarks.utils.data_attribute import DataAttribute
@@ -673,6 +674,285 @@ def _get_task_labels_field(self) -> DataAttribute[int]:
         return self.data.targets_task_labels  # type: ignore
 
 
+def _numpy_is_sequence_int(numpy_tensor: np.ndarray) -> bool:
+    return issubclass(numpy_tensor.dtype.type, np.integer)
+
+
+def _numpy_is_single_int(numpy_tensor: np.ndarray) -> bool:
+    try:
+        single_value = numpy_tensor.item()
+        return isinstance(single_value, int)
+    except ValueError:
+        return False
+
+
+def _torch_is_sequence_int(torch_tensor: Tensor) -> bool:
+    return not torch.is_floating_point(torch_tensor) and \
+        not torch.is_complex(torch_tensor)
+
+
+def _torch_is_single_int(torch_tensor: Tensor) -> bool:
+    try:
+        single_value = torch_tensor.item()
+        return isinstance(single_value, int)
+    except ValueError:
+        return False
+    
+
+def _element_is_single_int(element: Any):
+    if isinstance(element, (int, np.integer)):
+        return True
+    if isinstance(element, Tensor):
+        return _torch_is_single_int(element)
+    else:
+        return False
+
+
+def _is_int_iterable(iterable: Iterable[Any]):
+    if isinstance(iterable, torch.Tensor):
+        return _torch_is_sequence_int(iterable)
+    elif isinstance(iterable, np.ndarray):
+        return _numpy_is_sequence_int(iterable)
+    else:
+        for t in iterable:
+            if not _element_is_single_int(t):
+                return False
+        return True
+    
+
+AnyT = TypeVar('AnyT', bound=Iterable)
+
+
+def _to_int_list(iterable: AnyT, force: bool = True) -> Union[AnyT, List[int]]:
+    if isinstance(iterable, torch.Tensor):
+        if _torch_is_sequence_int(iterable):
+            return iterable.tolist()
+        elif force:
+            raise ValueError('Cannot convert PyTorch Tenspr to int list')
+        else:
+            return iterable
+    elif isinstance(iterable, np.ndarray):
+        if _numpy_is_sequence_int(iterable):
+            return iterable.tolist()
+        elif force:
+            raise ValueError('Cannot convert NumPy array to int list')
+        else:
+            return iterable  # type: ignore
+    else:
+        int_list = []
+        for t in iterable:
+            if _element_is_single_int(t):
+                int_list.append(t)
+            elif force:
+                raise ValueError('Cannot convert sequence to int list')
+            else:
+                return iterable
+        return int_list
+
+
+def _smart_init_targets(
+    dataset,
+    targets,
+    check_shape=True
+):
+    """
+    Initializes the targets for a given dataset.
+
+    To support backwards compatibility for when when 
+    :func:`create_multi_dataset_generic_benchmark` was
+    used to manage classification benchmarks only, this function will try to
+    mimic the steps taken in :func:`make_classification_dataset`, that is:
+    
+    - will try to check if the input dataset has classification 
+        targets (integer tensors / ndarray) and will cast them to
+        a list of native ints, as expected by other parts
+        of Avalanche.
+    - accepts passing an int for the targets field. The given int
+        will be applied to all exemplars in the dataset. 
+    - supports PyTorch TensorDataset, by taking the second tensor as targets.
+
+    If targets are not of type int, then they will be returned as-is,
+    so that other types of datasets (regression, detection, ...) are
+    supported without issues.
+
+    :param dataset: The input dataset. If the `targets` parameter is
+        not None, then targets will be retrieved from the dataset.
+    :param targets: The targets to use. Can be None, in which case
+        targets will be retrieved from the dataset.
+    :param check_shape: If True, will check if the number of exemplars
+        in the dataset match the length of the obtained targets sequence.
+    :return: The targets, as a DataAttribute of elements whose type depends
+        on the input dataset.
+    """
+    if targets is not None:
+        # User defined targets always take precedence
+        if isinstance(targets, int):
+            # Classification targets
+            targets = ConstantSequence(targets, len(dataset))
+        elif len(targets) != len(dataset) and check_shape:
+            raise ValueError(
+                "Invalid number of target labels. It must be equal to the "
+                "number of patterns in the dataset. Got {}, expected "
+                "{}!".format(len(targets), len(dataset))
+            )
+        return DataAttribute(targets, "targets")
+
+    targets = _traverse_supported_dataset(
+        dataset, _smart_select_targets_opt)
+    
+    if targets is not None:
+        # Classification targets
+        targets = _to_int_list(targets, force=False)
+
+    if targets is None:
+        return None
+    
+    return DataAttribute(targets, "targets")
+
+
+def _smart_select_targets_opt(
+        dataset: Any,
+        indices: Optional[List[int]]) -> Optional[Sequence[Any]]:
+    if hasattr(dataset, "targets"):
+        # Standard supported dataset
+        found_targets = dataset.targets
+    elif hasattr(dataset, "tensors") and len(dataset.tensors) >= 2:
+        # Support for PyTorch TensorDataset
+        found_targets = dataset.tensors[1]
+    else:
+        return None
+
+    if indices is not None:
+        found_targets = SubSequence(found_targets, indices=indices)
+
+    return found_targets
+
+
+def make_generic_dataset(
+    dataset: Any,
+    *,
+    transform: Optional[XTransform] = None,
+    target_transform: Optional[YTransform] = None,
+    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
+    initial_transform_group: Optional[str] = None,
+    task_labels: Optional[Union[int, Sequence[int]]] = None,
+    targets: Optional[Any] = None,
+    collate_fn: Optional[Callable[[List], Any]] = None
+) -> AvalancheDataset:
+    """
+    Helper function will create an :class:`AvalancheDataset` with
+    supervision fields `targets` and `targets_task_labels` (if given or found
+    in the input dataset).
+
+    :param dataset: The dataset to wrap in the AvalancheDataset. If it contains
+        `targets` and/or `targets_task_labels` fields, then those fields will
+        be inherited by the resulting dataset (if not given by the `targets`
+        or `task_labels` parameters). This will also check if the input dataset
+        is a :class:`TensorDataset` and, in that case, will try to use the
+        second tensor as the `targets` field.
+    :param transform: The transformation to apply to X values.
+        Mutually exclusive with `transform_groups`.
+    :param target_transform: The transformation to apply to Y values.
+        Mutually exclusive with `transform_groups`.
+    :param transform_groups: The transformations groups to add to the dataset.
+        Mutually xclusive with `transform` and `target_transform`.
+    :param task_labels: A list containing a task label for each example. Can
+        also be a plain `int`, in which case it will be applied to all
+        examples. If not None, shadows the `targets_task_labels` field from
+        the input dataset.
+    :param targets: A list containing a target for each example. If not None,
+        shadows the `targets` field from the input dataset.
+    :param collate_fn: The collate function to use when loading this dataset.
+
+    :returns: An :class:`AvalancheDataset`.
+    """
+    if isinstance(dataset, AvalancheDataset):
+        return dataset
+
+    transform_gs = _init_transform_groups(
+        transform_groups=transform_groups,
+        transform=transform,
+        target_transform=target_transform,
+        initial_transform_group=initial_transform_group,
+        dataset=dataset,
+    )
+
+    targets_data: Optional[DataAttribute[Any]] = \
+        _smart_init_targets(dataset, targets)
+    task_labels_data: Optional[DataAttribute[int]] = \
+        _init_task_labels(dataset, task_labels)
+
+    das: List[DataAttribute] = []
+    if targets_data is not None:
+        das.append(targets_data)
+    if task_labels_data is not None:
+        das.append(task_labels_data)
+
+    data = AvalancheDataset(
+        [dataset],
+        data_attributes=das if len(das) > 0 else None,
+        transform_groups=transform_gs,
+        collate_fn=collate_fn,
+    )
+    
+    if initial_transform_group is not None:
+        return data.with_transforms(initial_transform_group)
+    else:
+        return data
+
+
+def make_generic_tensor_dataset(
+    dataset_tensors: Sequence,
+    *,
+    transform: Optional[XTransform] = None,
+    target_transform: Optional[YTransform] = None,
+    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
+    initial_transform_group: Optional[str] = None,
+    task_labels: Optional[Union[int, Sequence[int]]] = None,
+    targets: Optional[Any] = None,
+    collate_fn: Optional[Callable[[List], Any]] = None
+) -> AvalancheDataset:
+    if len(dataset_tensors) < 1:
+        raise ValueError("At least one sequence must be passed")
+
+    if isinstance(targets, int):
+        targets = dataset_tensors[targets]
+    tts = []
+    for tt in dataset_tensors:  # TorchTensor requires a pytorch tensor
+        if not hasattr(tt, 'size'):
+            tt = torch.tensor(tt)
+        tts.append(tt)
+    dataset = TensorDataset(*tts)
+
+    transform_gs = _init_transform_groups(
+        transform_groups,
+        transform,
+        target_transform,
+        initial_transform_group,
+        dataset,
+    )
+    targets_data = _smart_init_targets(dataset, targets)
+    task_labels_data = _init_task_labels(dataset, task_labels)
+
+    das: List[DataAttribute] = []
+    if targets_data is not None:
+        das.append(targets_data)
+    if task_labels_data is not None:
+        das.append(task_labels_data)
+
+    data = AvalancheDataset(
+        [dataset],
+        data_attributes=das if len(das) > 0 else None,
+        transform_groups=transform_gs,
+        collate_fn=collate_fn,
+    )
+
+    if initial_transform_group is not None:
+        return data.with_transforms(initial_transform_group)
+    else:
+        return data
+
+
 __all__ = [
     "tensor_as_list",
     "grouped_and_ordered_indexes",
@@ -680,5 +960,7 @@ def _get_task_labels_field(self) -> DataAttribute[int]:
     "as_classification_dataset",
     "concat_datasets",
     "find_common_transforms_group",
-    "TaskSet"
+    "TaskSet",
+    "make_generic_dataset",
+    "make_generic_tensor_dataset"
 ]
diff --git a/avalanche/evaluation/metrics/checkpoint.py b/avalanche/evaluation/metrics/checkpoint.py
index 3c155c892..7793ec98b 100644
--- a/avalanche/evaluation/metrics/checkpoint.py
+++ b/avalanche/evaluation/metrics/checkpoint.py
@@ -10,9 +10,11 @@
 ################################################################################
 
 import copy
-from typing import TYPE_CHECKING
+import io
+from typing import TYPE_CHECKING, Optional
 
 from torch import Tensor
+import torch
 
 from avalanche.evaluation import PluginMetric
 from avalanche.evaluation.metric_results import MetricValue, MetricResult
@@ -46,9 +48,9 @@ def __init__(self):
         retrieved using the `result` method.
         """
         super().__init__()
-        self.weights = None
+        self.weights: None
 
-    def update(self, weights) -> Tensor:
+    def update(self, weights: bytes):
         """
         Update the weight checkpoint at the current experience.
 
@@ -57,7 +59,7 @@ def update(self, weights) -> Tensor:
         """
         self.weights = weights
 
-    def result(self) -> Tensor:
+    def result(self) -> bytes:
         """
         Retrieves the weight checkpoint at the current experience.
 
@@ -87,12 +89,18 @@ def _package_result(self, strategy) -> "MetricResult":
     def after_training_exp(
         self, strategy: "SupervisedTemplate"
     ) -> "MetricResult":
-        model_params = copy.deepcopy(strategy.model.parameters())
-        self.update(model_params)
-        return None
+        buff = io.BytesIO()
+        model_params = copy.deepcopy(strategy.model).to('cpu')
+        torch.save(model_params, buff)
+        buff.seek(0)
+        self.update(buff.read())
+        
+        return self._package_result(strategy)
 
     def __str__(self):
         return "WeightCheckpoint"
 
 
-__all__ = ["WeightCheckpoint"]
+__all__ = [
+    "WeightCheckpoint"
+]
diff --git a/avalanche/logging/text_logging.py b/avalanche/logging/text_logging.py
index f8151d420..ee3aad4aa 100644
--- a/avalanche/logging/text_logging.py
+++ b/avalanche/logging/text_logging.py
@@ -24,7 +24,7 @@
 if TYPE_CHECKING:
     from avalanche.training.templates import SupervisedTemplate
 
-UNSUPPORTED_TYPES: Tuple[Type] = (TensorImage,)
+UNSUPPORTED_TYPES: Tuple[Type, ...] = (TensorImage, bytes,)
 
 
 class TextLogger(BaseLogger, SupervisedPlugin):
diff --git a/avalanche/logging/wandb_logger.py b/avalanche/logging/wandb_logger.py
index 5ea49336b..dd97b3aef 100644
--- a/avalanche/logging/wandb_logger.py
+++ b/avalanche/logging/wandb_logger.py
@@ -11,10 +11,12 @@
 """ This module handles all the functionalities related to the logging of
 Avalanche experiments using Weights & Biases. """
 
-from typing import Union, List, TYPE_CHECKING
+import re
+from typing import Optional, Union, List, TYPE_CHECKING
 from pathlib import Path
 import os
 import errno
+import warnings
 
 import numpy as np
 from numpy import array
@@ -30,6 +32,7 @@
     MetricValue,
     TensorImage,
 )
+from avalanche.evaluation.metric_utils import phase_and_task
 from avalanche.logging import BaseLogger
 
 if TYPE_CHECKING:
@@ -37,6 +40,12 @@
     from avalanche.training.templates import SupervisedTemplate
 
 
+CHECKPOINT_METRIC_NAME = re.compile(
+    r"^WeightCheckpoint\/(?P<phase_name>\S+)_phase\/(?P<stream_name>\S+)_"
+    r"stream(\/Task(?P<task_id>\d+))?\/Exp(?P<experience_id>\d+)$"
+)
+
+
 class WandBLogger(BaseLogger, SupervisedPlugin):
     """Weights and Biases logger.
 
@@ -72,6 +81,9 @@ def __init__(
         :param project_name: Name of the W&B project.
         :param run_name: Name of the W&B run.
         :param log_artifacts: Option to log model weights as W&B Artifacts.
+            Note that, in order for model weights to be logged, the
+            :class:`WeightCheckpoint` metric must be added to the
+            evaluation plugin.
         :param path: Path to locally save the model checkpoints.
         :param uri: URI identifier for external storage buckets (GCS, S3).
         :param sync_tfboard: Syncs TensorBoard to the W&B dashboard UI.
@@ -102,7 +114,8 @@ def __init__(
     def import_wandb(self):
         try:
             import wandb
-        except ImportError:
+            assert hasattr(wandb, '__version__')
+        except (ImportError, AssertionError):
             raise ImportError('Please run "pip install wandb" to install wandb')
         self.wandb = wandb
 
@@ -151,6 +164,11 @@ def after_training_exp(
     def log_single_metric(self, name, value, x_plot):
         self.step = x_plot
 
+        if name.startswith("WeightCheckpoint"):
+            if self.log_artifacts:
+                self._log_checkpoint(name, value, x_plot)
+            return 
+        
         if isinstance(value, AlternativeValues):
             value = value.best_supported_value(
                 Image,
@@ -189,26 +207,53 @@ def log_single_metric(self, name, value, x_plot):
                 {name: self.wandb.Image(array(value))}, step=self.step
             )
 
-        elif name.startswith("WeightCheckpoint"):
-            if self.log_artifacts:
-                cwd = os.getcwd()
-                ckpt = os.path.join(cwd, self.path)
-                try:
-                    os.makedirs(ckpt)
-                except OSError as e:
-                    if e.errno != errno.EEXIST:
-                        raise
-                suffix = ".pth"
-                dir_name = os.path.join(ckpt, name + suffix)
-                artifact_name = os.path.join("Models", name + suffix)
-                if isinstance(value, Tensor):
-                    torch.save(value, dir_name)
-                    name = os.path.splittext(self.checkpoint)
-                    artifact = self.wandb.Artifact(name, type="model")
-                    artifact.add_file(dir_name, name=artifact_name)
-                    self.wandb.run.log_artifact(artifact)
-                    if self.uri is not None:
-                        artifact.add_reference(self.uri, name=artifact_name)
+    def _log_checkpoint(self, name, value, x_plot):
+        assert self.wandb is not None
+
+        # Example: 'WeightCheckpoint/train_phase/train_stream/Task000/Exp000'
+        name_match = CHECKPOINT_METRIC_NAME.match(name)
+        if name_match is None:
+            warnings.warn(
+                f'Checkpoint metric has unsupported name {name}.'
+            )
+            return
+        # phase_name: str = name_match['phase_name']
+        # stream_name: str = name_match['stream_name']
+        task_id: Optional[int] = \
+            int(name_match['task_id']) \
+            if name_match['task_id'] is not None \
+            else None
+        experience_id: int = int(name_match['experience_id'])
+        assert experience_id >= 0
+
+        cwd = Path.cwd()
+        checkpoint_directory = cwd / self.path
+        checkpoint_directory.mkdir(parents=True, exist_ok=True)
+
+        checkpoint_name = "Model_{}".format(experience_id)
+        checkpoint_file_name = checkpoint_name + '.pth'
+        checkpoint_path = checkpoint_directory / checkpoint_file_name
+        artifact_name = 'Models/' + checkpoint_file_name
+
+        # Write the checkpoint blob
+        with open(checkpoint_path, 'wb') as f:
+            f.write(value)
+
+        metadata = {
+            'experience': experience_id,
+            'x_step': x_plot,
+            **({'task_id': task_id} 
+                if task_id is not None
+                else {})}
+    
+        artifact = self.wandb.Artifact(
+            checkpoint_name,
+            type='model', 
+            metadata=metadata)
+        artifact.add_file(str(checkpoint_path), name=artifact_name)
+        self.wandb.run.log_artifact(artifact)
+        if self.uri is not None:
+            artifact.add_reference(self.uri, name=artifact_name)
 
     def __getstate__(self):
         state = self.__dict__.copy()
diff --git a/avalanche/training/plugins/ewc.py b/avalanche/training/plugins/ewc.py
index 6978c9167..3fb5fbaf5 100644
--- a/avalanche/training/plugins/ewc.py
+++ b/avalanche/training/plugins/ewc.py
@@ -121,6 +121,7 @@ def after_training_exp(self, strategy, **kwargs):
             strategy.experience.dataset,
             strategy.device,
             strategy.train_mb_size,
+            num_workers=kwargs.get('num_workers', 0)
         )
         self.update_importances(importances, exp_counter)
         self.saved_params[exp_counter] = copy_params_dict(strategy.model)
@@ -129,7 +130,14 @@ def after_training_exp(self, strategy, **kwargs):
             del self.saved_params[exp_counter - 1]
 
     def compute_importances(
-        self, model, criterion, optimizer, dataset, device, batch_size
+        self,
+        model,
+        criterion,
+        optimizer,
+        dataset,
+        device,
+        batch_size,
+        num_workers=0
     ) -> Dict[str, ParamData]:
         """
         Compute EWC importance matrix for each parameter
@@ -156,7 +164,8 @@ def compute_importances(
             dataset.collate_fn if hasattr(dataset, "collate_fn") else None
         )
         dataloader = DataLoader(
-            dataset, batch_size=batch_size, collate_fn=collate_fn
+            dataset, batch_size=batch_size, collate_fn=collate_fn,
+            num_workers=num_workers
         )
         for i, batch in enumerate(dataloader):
             # get only input, target and task_id from the batch
diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
index 03ca33f8f..3572c06e8 100644
--- a/docs/benchmarks.rst
+++ b/docs/benchmarks.rst
@@ -49,7 +49,7 @@ Streams
     ClassificationStream
 
 Experiences
-"""""""""
+"""""""""""
 
 .. autosummary::
     :toctree: generated
@@ -243,8 +243,10 @@ Benchmark Generators
 | This set of functions tries to cover most common use cases (Class/Task-Incremental, Domain-Incremental, ...) but it also allows for the creation of entirely custom benchmarks (based on lists of tensors, on file lists, ...).
 
 
+Class/Task/Domain-incremental benchmarks
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
 Generators for Class/Task/Domain-incremental benchmarks
-........................................................
 
 .. autosummary::
     :toctree: generated
@@ -253,8 +255,38 @@ Generators for Class/Task/Domain-incremental benchmarks
     ni_benchmark
 
 
-Starting from tensor lists, file lists, PyTorch datasets
-..........................................................
+Classification benchmarks
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Generate classification benchmarks, starting from tensor lists, file lists, PyTorch datasets
+
+.. autosummary::
+    :toctree: generated
+
+    dataset_classification_benchmark
+    filelist_classification_benchmark
+    paths_classification_benchmark
+    tensors_classification_benchmark
+
+
+Detection / segmentation benchmarks
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Generate detection/segmentation benchmarks, starting from PyTorch datasets
+
+
+.. autosummary::
+    :toctree: generated
+
+    dataset_detection_benchmark
+
+
+Generic benchmarks
+""""""""""""""""""
+
+
+Consider using the classification/detection when appropriate!
+
 
 .. autosummary::
     :toctree: generated
@@ -266,7 +298,7 @@ Starting from tensor lists, file lists, PyTorch datasets
 
 
 Misc (make data-incremental, add a validation stream, ...)
-..............................................................
+..........................................................
 
 | Avalanche offers utilities to adapt a previously instantiated benchmark object.
 | More utilities to come!
@@ -279,15 +311,16 @@ Misc (make data-incremental, add a validation stream, ...)
 
 .. currentmodule:: avalanche.benchmarks.utils
 
-Utils (Data Loading and AvalancheDataset)
+Utils
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-| The custom dataset and dataloader implementations contained in this sub-module are described in more detailed in the How-Tos about `"data loading and replay" <https://avalanche.continualai.org/how-tos/dataloading_buffers_replay>` and `"Avalanche Dataset" <https://avalanche.continualai.org/how-tos/avalanchedataset>`.
+| The custom dataset and dataloader implementations contained in this sub-module are described in more detailed in the How-Tos about `"Data Loading and Replay" <https://avalanche.continualai.org/how-tos/dataloading_buffers_replay>`_ and `"Avalanche Dataset" <https://avalanche.continualai.org/how-tos/avalanchedataset>`_.
 
 
 .. currentmodule:: avalanche.benchmarks.utils.data_loader
 
 Data Loaders
-............................
+""""""""""""
+
 .. autosummary::
     :toctree: generated
 
@@ -300,7 +333,8 @@ Data Loaders
 .. currentmodule:: avalanche.benchmarks.utils
 
 AvalancheDataset
-............................
+""""""""""""""""
+
 .. autosummary::
     :toctree: generated
 
diff --git a/docs/conf.py b/docs/conf.py
index a39c0dbe7..bb9371744 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -64,7 +64,8 @@
     'sphinx.ext.githubpages',
     'sphinx.ext.coverage',
     'sphinx_rtd_theme',
-    'sphinx_copybutton'
+    'sphinx_copybutton',
+    'sphinx_autodoc_typehints'
 ]
 
 autosummary_generate = True
diff --git a/docs/evaluation.rst b/docs/evaluation.rst
index 9fc627a40..1fc2abb43 100644
--- a/docs/evaluation.rst
+++ b/docs/evaluation.rst
@@ -4,7 +4,7 @@ Evaluation module
 | This module provides a number of metrics to monitor the continual learning performance.
 | Metrics subclass the :py:class:`PluginMetric` class, which provides all the callbacks needed to include custom metric logic in specific points of the continual learning workflow.
 
-evaluation.metrics
+avalanche.evaluation.metrics
 ----------------------------------------
 
 .. contents::
@@ -187,7 +187,7 @@ Standalone Metrics
 
 
 
-evaluation.metrics.detection
+avalanche.evaluation.metrics.detection
 ----------------------------------------
 
 | Metrics for Object Detection tasks. Please, take a look at the examples in the `examples` folder of Avalanche to better understand how to use these metrics.
@@ -207,8 +207,8 @@ evaluation.metrics.detection
     DetectionMetrics
 
 
-evaluation.metric_definitions
--------------------------------
+avalanche.evaluation.metric_definitions
+---------------------------------------
 
 General interfaces on which metrics are built.
 
@@ -227,8 +227,8 @@ General interfaces on which metrics are built.
     GenericPluginMetric
 
 
-evaluation.metric_results
--------------------------------
+avalanche.evaluation.metric_results
+-----------------------------------
 
 Metric result types
 
diff --git a/docs/logging.rst b/docs/logging.rst
index b31a24154..32b3be3bb 100644
--- a/docs/logging.rst
+++ b/docs/logging.rst
@@ -4,7 +4,7 @@ Logging module
 | This module provides a number of automatic logging facilities to monitor continual learning experiments.
 | Loggers should be provided as input to the :py:class:`EvaluationPlugin` class.
 
-logging
+avalanche.logging
 ----------------------------------------
 
 .. contents::
diff --git a/docs/models.rst b/docs/models.rst
index 90111a98d..411f2cbd4 100644
--- a/docs/models.rst
+++ b/docs/models.rst
@@ -3,7 +3,7 @@ Models module
 
 | This module provides models and building blocks to design continual learning architectures.
 
-models
+avalanche.models
 ----------------------------------------
 
 .. contents::
diff --git a/docs/requirements.txt b/docs/requirements.txt
index bc2fdb83c..2d4b04ca4 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -21,4 +21,5 @@ higher
 lvis
 ctrl-benchmark
 sphinx_copybutton
-dill
\ No newline at end of file
+dill
+sphinx_autodoc_typehints
\ No newline at end of file
diff --git a/docs/training.rst b/docs/training.rst
index 8f31edfcf..d1bf5f3fe 100644
--- a/docs/training.rst
+++ b/docs/training.rst
@@ -1,23 +1,23 @@
 Training module
 ============================
 
-.. currentmodule:: avalanche.training
-
-training
+avalanche.training
 ----------------------------------------
 
+.. currentmodule:: avalanche.training
+
 .. contents::
     :depth: 2
     :local:
     :backlinks: top
 
 Training Templates
-------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Templates define the training/eval loop for each setting (supervised CL, online CL, RL, ...). Each template supports a set of callback that can be used by a plugin to execute code inside the training/eval loops.
 
-Templates
-"""""""""
+Strategy Templates
+""""""""""""""""""
 
 Templates are defined in the `avalanche.training.templates` module.
 
@@ -32,8 +32,8 @@ Templates are defined in the `avalanche.training.templates` module.
     OnlineSupervisedTemplate
 
 
-Plugins ABCs
-""""""""""""
+Plugins ABCs / Templates
+""""""""""""""""""""""""
 
 ABCs for plugins are available in `avalanche.core`.
 
@@ -50,7 +50,7 @@ ABCs for plugins are available in `avalanche.core`.
 .. currentmodule:: avalanche.training
 
 Training Strategies
-----------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Ready-to-use continual learning strategies.
 
@@ -83,7 +83,7 @@ Ready-to-use continual learning strategies.
     MIR
 
 Replay Buffers and Selection Strategies
-----------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Buffers to store past samples according to different policies and selection strategies.
 
@@ -115,7 +115,7 @@ Selection strategies
 
 
 Loss Functions
-----------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 .. autosummary::
     :toctree: generated
@@ -126,12 +126,12 @@ Loss Functions
 
 
 Training Plugins
-----------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Plugins can be added to any CL strategy to support additional behavior.
 
-Utilities
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Utility Plugins
+""""""""""""""""""""
 
 Utilities in `avalanche.training.plugins`.
 
@@ -145,8 +145,8 @@ Utilities in `avalanche.training.plugins`.
     LRSchedulerPlugin
 
 
-Strategies
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Strategy Plugins
+""""""""""""""""""""
 
 Strategy implemented as plugins in `avalanche.training.plugins`.
 
@@ -175,12 +175,12 @@ Strategy implemented as plugins in `avalanche.training.plugins`.
 
 
 Utilities
-----------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Checkpointing allows to save and load serialized strategies to stop and resume experiments.
 
-Utilities
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Checkpointing
+""""""""""""""""""""""""""""""""""""""""
 
 Utilities in `avalanche.training.checkpoint`.
 
diff --git a/environment-dev.yml b/environment-dev.yml
index fba6b6fc1..c8050991a 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -38,3 +38,4 @@ dependencies:
     - gym
     - lvis
     - dill
+    - sphinx_autodoc_typehints
diff --git a/examples/task_metrics.py b/examples/task_metrics.py
index 1e4fa24da..aac53af1e 100644
--- a/examples/task_metrics.py
+++ b/examples/task_metrics.py
@@ -19,9 +19,8 @@
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 
-from avalanche.benchmarks.generators.benchmark_generators import (
-    create_multi_dataset_generic_benchmark,
-)
+from avalanche.benchmarks.scenarios.classification_benchmark_creation import \
+    create_multi_dataset_classification_benchmark
 from avalanche.benchmarks.utils import make_tensor_classification_dataset
 from avalanche.evaluation.metrics import (
     forgetting_metrics,
@@ -56,6 +55,7 @@ def main(args):
             torch.randn(10, 3),
             torch.randint(0, 3, (10,)),
             task_labels=torch.randint(0, 5, (10,)).tolist(),
+            targets=1
         )
         for _ in range(3)
     ]
@@ -64,10 +64,11 @@ def main(args):
             torch.randn(10, 3),
             torch.randint(0, 3, (10,)),
             task_labels=torch.randint(0, 5, (10,)).tolist(),
+            targets=1
         )
         for _ in range(3)
     ]
-    benchmark = create_multi_dataset_generic_benchmark(
+    benchmark = create_multi_dataset_classification_benchmark(
         train_datasets=tr_ds, test_datasets=ts_ds
     )
     # ---------
diff --git a/examples/wandb_logger.py b/examples/wandb_logger.py
index 2d49d5112..0bae27342 100644
--- a/examples/wandb_logger.py
+++ b/examples/wandb_logger.py
@@ -24,6 +24,7 @@
 
 from avalanche.benchmarks import nc_benchmark
 from avalanche.benchmarks.datasets.dataset_utils import default_dataset_location
+from avalanche.evaluation.metrics.checkpoint import WeightCheckpoint
 from avalanche.logging import InteractiveLogger, WandBLogger
 from avalanche.training.plugins import EvaluationPlugin
 from avalanche.evaluation.metrics import (
@@ -87,7 +88,11 @@ def main(args):
 
     interactive_logger = InteractiveLogger()
     wandb_logger = WandBLogger(
-        project_name=args.project, run_name=args.run, config=vars(args)
+        project_name=args.project,
+        run_name=args.run,
+        log_artifacts=args.artifacts,
+        path=args.path if args.path else None,
+        config=vars(args)
     )
 
     eval_plugin = EvaluationPlugin(
@@ -130,6 +135,7 @@ def main(args):
             minibatch=True, epoch=True, experience=True, stream=True
         ),
         MAC_metrics(minibatch=True, epoch=True, experience=True),
+        WeightCheckpoint(),
         loggers=[interactive_logger, wandb_logger],
     )
 
@@ -167,9 +173,15 @@ def main(args):
         default=0,
         help="Select zero-indexed cuda device. -1 to use CPU.",
     )
-    parser.add_argument("--run", type=str, help="Provide a run name for WandB")
     parser.add_argument(
         "--project", type=str, help="Define the name of the WandB project"
     )
+    parser.add_argument("--run", type=str, help="Provide a run name for WandB")
+    parser.add_argument('--artifacts', default=False, 
+                        action="store_true",
+                        help='Log Model Checkpoints as W&B Artifacts')
+    parser.add_argument('--path', type=str, default="Checkpoint",
+                        help='Local path to save the model checkpoints')
+    
     args = parser.parse_args()
     main(args)
diff --git a/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb b/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb
index ceb11b08a..8995655d7 100644
--- a/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb
+++ b/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "16b4b118",
    "metadata": {
@@ -27,6 +28,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "6142a7a6",
    "metadata": {},
@@ -112,6 +114,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "a2f992a4",
    "metadata": {},
@@ -132,6 +135,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "e0c2028f",
    "metadata": {},
@@ -220,6 +224,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "0e0f538c",
    "metadata": {},
@@ -270,6 +275,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "b905086a",
    "metadata": {},
@@ -337,6 +343,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "13812ca1",
    "metadata": {},
@@ -365,6 +372,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "37855c54",
    "metadata": {},
@@ -433,6 +441,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "6f226374",
    "metadata": {},
@@ -465,6 +474,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "20feadf7",
    "metadata": {},
@@ -500,6 +510,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "6c56f98c",
    "metadata": {},
@@ -557,6 +568,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "56236ffe",
    "metadata": {},
@@ -614,6 +626,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "ea760848",
    "metadata": {},
@@ -625,7 +638,18 @@
     "* **filelist\\_benchmark**\n",
     "* **paths\\_benchmark**\n",
     "* **dataset\\_benchmark**\n",
-    "* **tensors\\_benchmark**"
+    "* **tensors\\_benchmark**\n",
+    "\n",
+    "In addition, generic generators exists for *classification* and *detection* problems. Those generators accept the same parameters as the ones listed above, but they return benchmarks objects with additional useful problem-specific fields. Those generators are:\n",
+    "\n",
+    "* **filelist\\_classification\\_benchmark**\n",
+    "* **paths\\_classification\\_benchmark**\n",
+    "* **dataset\\_classification\\_benchmark**\n",
+    "* **tensors\\_classification\\_benchmark**\n",
+    "\n",
+    "(replace *classification* with *detection* for detection/segmentation datasets).\n",
+    "\n",
+    " In the following examples, we will use their plain version. Let's import them:"
    ]
   },
   {
@@ -640,6 +664,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "139a567d",
    "metadata": {},
@@ -674,6 +699,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "f13e0cf0",
    "metadata": {},
@@ -721,7 +747,7 @@
     "                \"{} {}\\n\".format(os.path.join(rel_dir, name), t_label)\n",
     "            )\n",
     "\n",
-    "# Here we create a GenericCLScenario ready to be iterated\n",
+    "# Here we create a DatasetScenario ready to be iterated\n",
     "generic_scenario = filelist_benchmark(\n",
     "   dirpath,  \n",
     "   [\"train_filelist_00.txt\", \"train_filelist_01.txt\"],\n",
@@ -734,6 +760,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "3ea44517",
    "metadata": {},
@@ -763,7 +790,7 @@
     "      experience_paths.append(instance_tuple)\n",
     "    train_experiences.append(experience_paths)\n",
     "\n",
-    "# Here we create a GenericCLScenario ready to be iterated\n",
+    "# Here we create a DatasetScenario ready to be iterated\n",
     "generic_scenario = paths_benchmark(\n",
     "   train_experiences,\n",
     "   [train_experiences[0]],  # Single test set\n",
@@ -775,6 +802,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "639676ce",
    "metadata": {},
@@ -812,6 +840,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "4522b04d",
    "metadata": {},
@@ -858,6 +887,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "342718b3",
    "metadata": {},
@@ -898,6 +928,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "42a39ec6",
    "metadata": {},
diff --git a/tests/benchmarks/scenarios/test_classification_scenario.py b/tests/benchmarks/scenarios/test_classification_scenario.py
index bf5afc9ed..688623989 100644
--- a/tests/benchmarks/scenarios/test_classification_scenario.py
+++ b/tests/benchmarks/scenarios/test_classification_scenario.py
@@ -5,10 +5,11 @@
 import torch
 
 from avalanche.benchmarks import (
-    dataset_benchmark,
     ClassificationExperience,
     ClassificationScenario,
 )
+from avalanche.benchmarks.generators.benchmark_generators import \
+    dataset_classification_benchmark
 from avalanche.benchmarks.utils import make_tensor_classification_dataset
 
 
@@ -54,7 +55,7 @@ def test_classes_in_exp(self):
             )
         )
 
-        benchmark_instance = dataset_benchmark(
+        benchmark_instance = dataset_classification_benchmark(
             train_datasets=train_exps,
             test_datasets=test_exps,
             other_streams_datasets={"other": other_stream_exps},
@@ -137,7 +138,7 @@ def test_classes_in_this_experience(self):
             )
         )
 
-        benchmark_instance = dataset_benchmark(
+        benchmark_instance = dataset_classification_benchmark(
             train_datasets=train_exps,
             test_datasets=test_exps,
             other_streams_datasets={"other": other_stream_exps},
diff --git a/tests/benchmarks/test_avalanche_dataset.py b/tests/benchmarks/test_avalanche_dataset.py
index 3de055fe3..55def8cc5 100644
--- a/tests/benchmarks/test_avalanche_dataset.py
+++ b/tests/benchmarks/test_avalanche_dataset.py
@@ -31,7 +31,6 @@
 )
 from avalanche.benchmarks.utils.classification_dataset import (
     ClassificationDataset,
-    SupervisedClassificationDataset,
 )
 from tests.unit_tests_utils import (
     load_image_benchmark,
@@ -400,13 +399,13 @@ def test_avalanche_dataset_mixed_task_labels(self):
     def test_avalanche_dataset_update_data_attribute(self):
         dataset_orig = load_image_benchmark()
 
-        dataset: SupervisedClassificationDataset = make_classification_dataset(
+        dataset: ClassificationDataset = make_classification_dataset(
             dataset_orig,
             transform=ToTensor(),
             task_labels=0
         )
 
-        self.assertIsInstance(dataset, SupervisedClassificationDataset)
+        self.assertIsInstance(dataset, ClassificationDataset)
         dataset_element = dataset[101]
         self.assertEqual(3, len(dataset_element))  # x, y, t
 
diff --git a/tests/benchmarks/test_flat_data.py b/tests/benchmarks/test_flat_data.py
index 0907064b8..9df3057fa 100644
--- a/tests/benchmarks/test_flat_data.py
+++ b/tests/benchmarks/test_flat_data.py
@@ -145,8 +145,8 @@ def test_concat_flattens_same_dataset_corner_case(self):
         C = B.concat(A)
         self.assertListEqual([2, 3, 1, 2, 3], list(C))
 
-    def test_concat_flattens_same_classification_dataset(self):
-        D = ClassificationDataset([[1, 2, 3]])
+    def test_concat_flattens_same_avalanche_dataset(self):
+        D = AvalancheDataset([[1, 2, 3]])
         B = concat_datasets([])
         B = B.concat(D)
         B = D.concat(B)
diff --git a/tests/distributed/test_distributed_helper.py b/tests/distributed/test_distributed_helper.py
index 123c281b5..ec0f959f6 100644
--- a/tests/distributed/test_distributed_helper.py
+++ b/tests/distributed/test_distributed_helper.py
@@ -10,8 +10,9 @@
 import torch.distributed as dst
 from torch.nn import Module
 from torch.nn.parallel import DistributedDataParallel
-from avalanche.benchmarks.generators.benchmark_generators import \
-    dataset_benchmark
+from avalanche.benchmarks.generators.benchmark_generators import (
+    dataset_classification_benchmark,
+)
 from avalanche.benchmarks.utils.classification_dataset import \
     make_tensor_classification_dataset
 
@@ -80,7 +81,7 @@ def test_wrap_model(self):
         model.eval()
         model_wrapped.eval()
 
-        benchmark = dataset_benchmark(
+        benchmark = dataset_classification_benchmark(
             [make_tensor_classification_dataset(
                 mb_x, mb_y, mb_t, task_labels=mb_t.tolist()
             )],
diff --git a/tests/test_core50.py b/tests/test_core50.py
index fe4e4c736..b9f06dcb0 100644
--- a/tests/test_core50.py
+++ b/tests/test_core50.py
@@ -38,6 +38,17 @@ def test_core50_nc_benchmark(self):
         classes_in_test = benchmark_instance.classes_in_experience["test"][0]
         self.assertSetEqual(set(range(50)), set(classes_in_test))
 
+        # Regression tests for issue #774
+        self.assertSequenceEqual(
+            [10] + ([5] * 8),
+            benchmark_instance.n_classes_per_exp)
+        self.assertSetEqual(
+            set(range(50)),
+            set(benchmark_instance.classes_order))
+        self.assertEqual(
+            50,
+            len(benchmark_instance.classes_order))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_custom_streams.py b/tests/test_custom_streams.py
index 3746af861..216ce306e 100644
--- a/tests/test_custom_streams.py
+++ b/tests/test_custom_streams.py
@@ -2,9 +2,11 @@
 
 import torch
 from torch.utils.data import TensorDataset
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    ClassificationScenario,
+)
 
 from avalanche.benchmarks.utils import make_tensor_classification_dataset
-from avalanche.benchmarks import GenericCLScenario
 from avalanche.benchmarks.utils import make_classification_dataset
 
 
@@ -59,7 +61,7 @@ def test_custom_streams_name_and_length(self):
         valid_t_labels = [{9}, {4, 5}, {7, 8}, {0}, {3}]
 
         with self.assertRaises(Exception):
-            benchmark_instance = GenericCLScenario(
+            benchmark_instance = ClassificationScenario(
                 stream_definitions={
                     "train": (train_exps,),
                     "test": (test_exps,),
@@ -69,7 +71,7 @@ def test_custom_streams_name_and_length(self):
 
         valid_t_labels = valid_t_labels[:-1]
 
-        benchmark_instance = GenericCLScenario(
+        benchmark_instance = ClassificationScenario(
             stream_definitions={
                 "train": (train_exps,),
                 "test": (test_exps,),
@@ -148,7 +150,7 @@ def test_complete_test_set_only(self):
             )
 
         with self.assertRaises(Exception):
-            benchmark_instance = GenericCLScenario(
+            benchmark_instance = ClassificationScenario(
                 stream_definitions={
                     "train": (train_exps,),
                     "test": (test_exps,),
@@ -156,7 +158,7 @@ def test_complete_test_set_only(self):
                 complete_test_set_only=True,
             )
 
-        benchmark_instance = GenericCLScenario(
+        benchmark_instance = ClassificationScenario(
             stream_definitions={
                 "train": (train_exps,),
                 "test": (test_exps[0],),
diff --git a/tests/test_high_level_generators.py b/tests/test_high_level_generators.py
index c01474907..52e22aaf7 100644
--- a/tests/test_high_level_generators.py
+++ b/tests/test_high_level_generators.py
@@ -1,3 +1,4 @@
+from functools import partial
 import os
 import tempfile
 import unittest
@@ -8,7 +9,15 @@
 from torchvision.datasets import MNIST
 from torchvision.datasets.utils import download_url, extract_archive
 from torchvision.transforms import ToTensor
-from tests.unit_tests_utils import DummyImageDataset
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    ClassificationScenario,
+)
+from avalanche.benchmarks.scenarios.dataset_scenario import DatasetScenario
+from avalanche.benchmarks.scenarios.detection_scenario import DetectionScenario
+from tests.unit_tests_utils import (
+    DummyImageDataset,
+    get_fast_detection_datasets,
+)
 
 
 from avalanche.benchmarks import (
@@ -22,6 +31,11 @@
 from avalanche.benchmarks.datasets import default_dataset_location
 from avalanche.benchmarks.generators.benchmark_generators import (
     class_balanced_split_strategy,
+    dataset_classification_benchmark,
+    dataset_detection_benchmark,
+    filelist_classification_benchmark,
+    paths_classification_benchmark,
+    tensors_classification_benchmark,
 )
 from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
     create_lazy_generic_benchmark,
@@ -50,9 +64,36 @@ def test_dataset_benchmark(self):
         train_cifar10 = DummyImageDataset(n_classes=10)
         test_cifar10 = DummyImageDataset(n_classes=10)
 
-        generic_benchmark = dataset_benchmark(
+        with self.assertWarns(DeprecationWarning):
+            # Assert it warns when creating classification
+            # scenarios implicitly
+            generic_benchmark = dataset_benchmark(
+                [train_MNIST, train_cifar10], [test_MNIST, test_cifar10]
+            )
+        self.assertIsInstance(generic_benchmark, DatasetScenario)
+
+        classification_benchmark = dataset_classification_benchmark(
             [train_MNIST, train_cifar10], [test_MNIST, test_cifar10]
         )
+        self.assertIsInstance(classification_benchmark, ClassificationScenario)
+
+        # Check dataset_benchmark classification retrocompatibility
+        # This check should be removed once we decide to transition to 
+        # dataset_classification/detection/..._benchmark
+        self.assertIsInstance(generic_benchmark, ClassificationScenario)
+
+    def test_dataset_detection_benchmark(self):
+        train_det, test_det = get_fast_detection_datasets()
+
+        generic_benchmark = dataset_benchmark(
+            [train_det], [test_det]
+        )
+        self.assertIsInstance(generic_benchmark, DatasetScenario)
+
+        classification_benchmark = dataset_detection_benchmark(
+            [train_det], [test_det]
+        )
+        self.assertIsInstance(classification_benchmark, DetectionScenario)
 
     def test_dataset_benchmark_avalanche_dataset(self):
         train_MNIST = make_classification_dataset(
@@ -83,7 +124,14 @@ def test_dataset_benchmark_avalanche_dataset(self):
             task_labels=1,
         )
 
-        generic_benchmark = dataset_benchmark(
+        with self.assertWarns(DeprecationWarning):
+            # Assert it warns when creating classification
+            # scenarios implicitly
+            generic_benchmark = dataset_benchmark(
+                [train_MNIST, train_cifar10], [test_MNIST, test_cifar10]
+            )
+
+        classification_benchmark = dataset_classification_benchmark(
             [train_MNIST, train_cifar10], [test_MNIST, test_cifar10]
         )
 
@@ -91,6 +139,13 @@ def test_dataset_benchmark_avalanche_dataset(self):
         self.assertEqual(1, generic_benchmark.train_stream[1].task_label)
         self.assertEqual(0, generic_benchmark.test_stream[0].task_label)
         self.assertEqual(1, generic_benchmark.test_stream[1].task_label)
+        self.assertIsInstance(generic_benchmark, DatasetScenario)
+        self.assertIsInstance(classification_benchmark, ClassificationScenario)
+
+        # Check dataset_benchmark classification retrocompatibility
+        # This check should be removed once we decide to transition to 
+        # dataset_classification/detection/..._benchmark
+        self.assertIsInstance(generic_benchmark, ClassificationScenario)
 
     def test_filelist_benchmark(self):
         download_url(
@@ -126,7 +181,6 @@ def test_filelist_benchmark(self):
                         wf.write(
                             "{} {}\n".format(os.path.join(rel_dir, name), label)
                         )
-
             generic_benchmark = filelist_benchmark(
                 dirpath,
                 list_paths,
@@ -137,8 +191,20 @@ def test_filelist_benchmark(self):
                 eval_transform=ToTensor(),
             )
 
+            classification_benchmark = filelist_classification_benchmark(
+                dirpath,
+                list_paths,
+                [list_paths[0]],
+                task_labels=[0, 0],
+                complete_test_set_only=True,
+                train_transform=ToTensor(),
+                eval_transform=ToTensor(),
+            )
+
         self.assertEqual(2, len(generic_benchmark.train_stream))
         self.assertEqual(1, len(generic_benchmark.test_stream))
+        self.assertIsInstance(generic_benchmark, ClassificationScenario)
+        self.assertIsInstance(classification_benchmark, ClassificationScenario)
 
     def test_paths_benchmark(self):
         download_url(
@@ -168,7 +234,19 @@ def test_paths_benchmark(self):
                 experience_paths.append(instance_tuple)
             train_experiences.append(experience_paths)
 
-        generic_benchmark = paths_benchmark(
+        with self.assertWarns(DeprecationWarning):
+            # Assert it warns when creating classification
+            # scenarios implicitly
+            generic_benchmark = paths_benchmark(
+                train_experiences,
+                [train_experiences[0]],  # Single test set
+                task_labels=[0, 0],
+                complete_test_set_only=True,
+                train_transform=ToTensor(),
+                eval_transform=ToTensor(),
+            )
+
+        classification_benchmark = paths_classification_benchmark(
             train_experiences,
             [train_experiences[0]],  # Single test set
             task_labels=[0, 0],
@@ -179,6 +257,13 @@ def test_paths_benchmark(self):
 
         self.assertEqual(2, len(generic_benchmark.train_stream))
         self.assertEqual(1, len(generic_benchmark.test_stream))
+        self.assertIsInstance(generic_benchmark, DatasetScenario)
+        self.assertIsInstance(classification_benchmark, ClassificationScenario)
+
+        # Check dataset_benchmark classification retrocompatibility
+        # This check should be removed once we decide to transition to 
+        # dataset_classification/detection/..._benchmark
+        self.assertIsInstance(generic_benchmark, ClassificationScenario)
 
     def test_tensors_benchmark(self):
         pattern_shape = (3, 32, 32)
@@ -196,7 +281,20 @@ def test_tensors_benchmark(self):
         test_x = torch.zeros(50, *pattern_shape)
         test_y = torch.zeros(50, dtype=torch.long)
 
-        generic_benchmark = tensors_benchmark(
+        with self.assertWarns(DeprecationWarning):
+            # Assert it warns when creating classification
+            # scenarios implicitly
+            generic_benchmark = tensors_benchmark(
+                train_tensors=[
+                    (experience_1_x, experience_1_y),
+                    (experience_2_x, experience_2_y),
+                ],
+                test_tensors=[(test_x, test_y)],
+                task_labels=[0, 0],  # Task label of each train exp
+                complete_test_set_only=True,
+            )
+
+        classification_benchmark = tensors_classification_benchmark(
             train_tensors=[
                 (experience_1_x, experience_1_y),
                 (experience_2_x, experience_2_y),
@@ -208,6 +306,13 @@ def test_tensors_benchmark(self):
 
         self.assertEqual(2, len(generic_benchmark.train_stream))
         self.assertEqual(1, len(generic_benchmark.test_stream))
+        self.assertIsInstance(generic_benchmark, DatasetScenario)
+        self.assertIsInstance(classification_benchmark, ClassificationScenario)
+
+        # Check dataset_benchmark classification retrocompatibility
+        # This check should be removed once we decide to transition to 
+        # dataset_classification/detection/..._benchmark
+        self.assertIsInstance(generic_benchmark, ClassificationScenario)
 
     def test_data_incremental_benchmark(self):
         pattern_shape = (3, 32, 32)
@@ -265,7 +370,7 @@ def test_data_incremental_benchmark(self):
             for x, y, *_ in exp.dataset:
                 self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x))
                 self.assertTrue(
-                    torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y))
+                    torch.equal(ref_tensor_y[tensor_idx], torch.as_tensor(y))
                 )
                 tensor_idx += 1
 
@@ -275,7 +380,7 @@ def test_data_incremental_benchmark(self):
         tensor_idx = 0
         for x, y, *_ in exp.dataset:
             self.assertTrue(torch.equal(test_x[tensor_idx], x))
-            self.assertTrue(torch.equal(test_y[tensor_idx], torch.tensor(y)))
+            self.assertTrue(torch.equal(test_y[tensor_idx], torch.as_tensor(y)))
             tensor_idx += 1
 
     def test_data_incremental_benchmark_from_lazy_benchmark(self):
@@ -347,7 +452,7 @@ def test_gen():
             for x, y, *_ in exp.dataset:
                 self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x))
                 self.assertTrue(
-                    torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y))
+                    torch.equal(ref_tensor_y[tensor_idx], torch.as_tensor(y))
                 )
                 tensor_idx += 1
 
@@ -357,7 +462,7 @@ def test_gen():
         tensor_idx = 0
         for x, y, *_ in exp.dataset:
             self.assertTrue(torch.equal(test_x[tensor_idx], x))
-            self.assertTrue(torch.equal(test_y[tensor_idx], torch.tensor(y)))
+            self.assertTrue(torch.equal(test_y[tensor_idx], torch.as_tensor(y)))
             tensor_idx += 1
 
     def test_benchmark_with_validation_stream_fixed_size(self):
@@ -441,7 +546,7 @@ def test_benchmark_with_validation_stream_rel_size(self):
         test_x = torch.zeros(50, *pattern_shape)
         test_y = torch.zeros(50, dtype=torch.long)
 
-        initial_benchmark_instance = tensors_benchmark(
+        initial_benchmark_instance = tensors_classification_benchmark(
             train_tensors=[
                 (experience_1_x, experience_1_y),
                 (experience_2_x, experience_2_y),
@@ -501,11 +606,78 @@ def test_benchmark_with_validation_stream_rel_size(self):
         self.assertTrue(torch.equal(test_y, mb[1]))
 
         # Regression test for #1371
-        self.assertEquals(
+        self.assertEqual(
             [0],
             valid_benchmark.train_stream[0].classes_in_this_experience
         )
 
+    def test_benchmark_with_validation_stream_class_balanced(self):
+        pattern_shape = (3, 32, 32)
+
+        # Definition of training experiences
+        # Experience 1
+        experience_1_x = torch.zeros(100, *pattern_shape)
+        experience_1_y = torch.randint(0, 10, (100,), dtype=torch.long)
+
+        # Experience 2
+        experience_2_x = torch.zeros(80, *pattern_shape)
+        experience_2_y = torch.randint(0, 10, (80,), dtype=torch.long)
+
+        # Test experience
+        test_x = torch.zeros(50, *pattern_shape)
+        test_y = torch.zeros(50, dtype=torch.long)
+
+        initial_benchmark_instance = tensors_classification_benchmark(
+            train_tensors=[
+                (experience_1_x, experience_1_y),
+                (experience_2_x, experience_2_y),
+            ],
+            test_tensors=[(test_x, test_y)],
+            task_labels=[0, 0],  # Task label of each train exp
+            complete_test_set_only=True,
+        )
+
+        validation_size = 0.2
+        class_balanced_strat = partial(
+            class_balanced_split_strategy,
+            validation_size
+        )
+        valid_benchmark = benchmark_with_validation_stream(
+            initial_benchmark_instance,
+            custom_split_strategy=class_balanced_strat
+        )
+
+        _, count_1 = torch.unique(experience_1_y, return_counts=True)
+        expected_class_sizes_1 = [int(validation_size * x) for x in count_1]
+        expected_size_1 = sum(expected_class_sizes_1)
+        
+        _, count_2 = torch.unique(experience_2_y, return_counts=True)
+        expected_class_sizes_2 = [int(validation_size * x) for x in count_2]
+        expected_size_2 = sum(expected_class_sizes_2)
+
+        self.assertEqual(2, len(valid_benchmark.train_stream))
+        self.assertEqual(2, len(valid_benchmark.valid_stream))
+        self.assertEqual(1, len(valid_benchmark.test_stream))
+        self.assertTrue(valid_benchmark.complete_test_set_only)
+
+        self.assertEqual(
+            100 - expected_size_1,
+            len(valid_benchmark.train_stream[0].dataset))
+        self.assertEqual(
+            80 - expected_size_2,
+            len(valid_benchmark.train_stream[1].dataset))
+        self.assertEqual(
+            expected_size_1,
+            len(valid_benchmark.valid_stream[0].dataset))
+        self.assertEqual(
+            expected_size_2,
+            len(valid_benchmark.valid_stream[1].dataset))
+
+        vd = valid_benchmark.test_stream[0].dataset
+        mb = get_mbatch(vd, len(vd))
+        self.assertTrue(torch.equal(test_x, mb[0]))
+        self.assertTrue(torch.equal(test_y, mb[1]))
+
     def test_lazy_benchmark_with_validation_stream_fixed_size(self):
         lazy_options = [None, True, False]
         for lazy_option in lazy_options:
diff --git a/tests/test_models.py b/tests/test_models.py
index 498f86923..812eeb472 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,6 +1,7 @@
 import sys
 import os
 import copy
+import tempfile
 
 import unittest
 
@@ -646,10 +647,12 @@ def test_ncm_save_load(self):
         classifier = NCMClassifier()
         classifier.update_class_means_dict({1: torch.randn(5,),
                                             2: torch.randn(5,)})
-        torch.save(classifier.state_dict(), 'ncm.pt')
-        del classifier
-        classifier = NCMClassifier()
-        check = torch.load('ncm.pt')
+        with tempfile.TemporaryFile() as tmpfile:
+            torch.save(classifier.state_dict(), tmpfile)
+            del classifier
+            classifier = NCMClassifier()
+            tmpfile.seek(0)
+            check = torch.load(tmpfile)
         classifier.load_state_dict(check)
         assert classifier.class_means.shape == (3, 5)
         assert (classifier.class_means[0] == 0).all()
diff --git a/tests/training/test_plugins.py b/tests/training/test_plugins.py
index c98a40ee0..57708e91a 100644
--- a/tests/training/test_plugins.py
+++ b/tests/training/test_plugins.py
@@ -15,9 +15,11 @@
 
 from avalanche.benchmarks import (
     nc_benchmark,
-    GenericCLScenario,
     benchmark_with_validation_stream,
 )
+from avalanche.benchmarks.scenarios.classification_scenario import (
+    ClassificationScenario,
+)
 from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader
 from avalanche.evaluation.metric_results import MetricValue
 from avalanche.evaluation.metrics import Mean
@@ -345,7 +347,9 @@ def assert_model_equals(self, model1, model2):
             self.assertTrue(torch.equal(v, dict2[k]))
 
     def assert_benchmark_equals(
-        self, bench1: GenericCLScenario, bench2: GenericCLScenario
+        self,
+        bench1: ClassificationScenario,
+        bench2: ClassificationScenario
     ):
         self.assertSetEqual(
             set(bench1.streams.keys()), set(bench2.streams.keys())
diff --git a/tests/unit_tests_utils.py b/tests/unit_tests_utils.py
index 7e6d232aa..d5b169fe6 100644
--- a/tests/unit_tests_utils.py
+++ b/tests/unit_tests_utils.py
@@ -1,10 +1,11 @@
+import itertools
 from os.path import expanduser
 
 import os
 import random
 import torch
 from PIL.Image import Image
-from sklearn.datasets import make_classification
+from sklearn.datasets import make_blobs, make_classification
 from sklearn.model_selection import train_test_split
 import numpy as np
 from torch.utils.data import TensorDataset, Dataset
@@ -14,6 +15,9 @@
 from torchvision.transforms import Compose, ToTensor
 
 from avalanche.benchmarks import nc_benchmark
+from avalanche.benchmarks.utils.detection_dataset import (
+    make_detection_dataset,
+)
 
 
 # Environment variable used to skip some expensive tests that are very unlikely
@@ -149,6 +153,129 @@ def get_fast_benchmark(
     return my_nc_benchmark
 
 
+def get_fast_detection_datasets(
+    n_images=30,
+    max_elements_per_image=10,
+    n_samples_per_class=20,
+    n_classes=10,
+    seed=None,
+    image_size=64,
+    n_test_images=5
+):  
+    if seed is not None:
+        np.random.seed(seed)
+        random.seed(seed)
+
+    assert n_images * max_elements_per_image >= \
+        n_samples_per_class * n_classes
+    assert n_test_images < n_images
+    assert n_test_images > 0
+    
+    base_n_per_images = (n_samples_per_class * n_classes) // n_images
+    additional_elements = (n_samples_per_class * n_classes) % n_images
+    to_allocate = np.full(n_images, base_n_per_images)
+    to_allocate[:additional_elements] += 1
+    np.random.shuffle(to_allocate)
+    classes_elements = np.repeat(np.arange(n_classes), n_samples_per_class)
+    np.random.shuffle(classes_elements)
+
+    import matplotlib.colors as mcolors
+    forms = ['ellipse', 'rectangle', 'line', 'arc']
+    colors = list(mcolors.TABLEAU_COLORS.values())
+    combs = list(itertools.product(forms, colors))
+    random.shuffle(combs)
+
+    generated_images = []
+    generated_targets = []
+    for img_idx in range(n_images):
+        n_to_allocate = to_allocate[img_idx]
+        base_alloc_idx = to_allocate[:img_idx].sum()
+        classes_to_instantiate = \
+            classes_elements[base_alloc_idx:base_alloc_idx+n_to_allocate]
+
+        _, _, clusters = make_blobs(
+            n_to_allocate,
+            n_features=2,
+            centers=n_to_allocate,
+            center_box=(0, image_size-1),
+            random_state=seed,
+            return_centers=True)
+        
+        from PIL import Image as ImageApi
+        from PIL import ImageDraw
+        im = ImageApi.new('RGB', (image_size, image_size))
+        draw = ImageDraw.Draw(im)
+        
+        target = {
+            'boxes': torch.zeros((n_to_allocate, 4), dtype=torch.float32),
+            'labels': torch.zeros((n_to_allocate,), dtype=torch.long),
+            'image_id': torch.full((1,), img_idx, dtype=torch.long),
+            'area': torch.zeros((n_to_allocate,), dtype=torch.float32),
+            'iscrowd': torch.zeros((n_to_allocate,), dtype=torch.long)
+        }
+
+        obj_sizes = np.random.uniform(
+            low=image_size * 0.1 * 0.95,
+            high=image_size * 0.1 * 1.05,
+            size=(n_to_allocate,))
+        for center_idx, center in enumerate(clusters):
+            obj_size = float(obj_sizes[center_idx])
+            class_to_gen = classes_to_instantiate[center_idx]
+            
+            class_form, class_color = combs[class_to_gen]
+            
+            left = center[0] - obj_size
+            top = center[1] - obj_size
+            right = center[0] + obj_size
+            bottom = center[1] + obj_size
+            ltrb = (left, top, right, bottom)
+            if class_form == 'ellipse':
+                draw.ellipse(ltrb, fill=class_color)
+            elif class_form == 'rectangle':
+                draw.rectangle(ltrb, fill=class_color)
+            elif class_form == 'line':
+                draw.line(ltrb, 
+                          fill=class_color,
+                          width=max(1, int(obj_size*0.25)))
+            elif class_form == 'arc':
+                draw.arc(ltrb, fill=class_color, start=45, end=200)
+            else:
+                raise RuntimeError('Unsupported form')
+            
+            target["boxes"][center_idx] = torch.as_tensor(ltrb)
+            target["labels"][center_idx] = class_to_gen
+            target["area"][center_idx] = obj_size ** 2
+
+        generated_images.append(np.array(im))
+        generated_targets.append(target)
+        im.close()
+
+    test_indices = set(
+        np.random.choice(
+            n_images,
+            n_test_images,
+            replace=False).tolist())
+    train_images = [x for i, x in enumerate(generated_images) 
+                    if i not in test_indices]
+    test_images = [x for i, x in enumerate(generated_images)
+                   if i in test_indices]
+
+    train_targets = [x for i, x in enumerate(generated_targets)
+                     if i not in test_indices]
+    test_targets = [x for i, x in enumerate(generated_targets)
+                    if i in test_indices]
+
+    return make_detection_dataset(
+        list(zip(train_images, train_targets)),
+        targets=train_targets,
+        task_labels=0
+    ), make_detection_dataset(
+        list(zip(test_images, test_targets)),
+        targets=test_targets,
+        task_labels=0
+    )
+
+
 class DummyImageDataset(Dataset):
     def __init__(self, n_elements=10000, n_classes=100):
         assert n_elements >= n_classes
@@ -214,7 +341,21 @@ def set_deterministic_run(seed=0):
     "common_setups",
     "load_benchmark",
     "get_fast_benchmark",
+    "get_fast_detection_datasets",
     "load_experience_train_eval",
     "get_device",
     "set_deterministic_run",
 ]
+
+
+# if __name__ == '__main__':
+#     from matplotlib import pyplot as plt
+
+#     train_dset, test_dset = get_fast_detection_datasets()
+#     for i in range(3):
+#         plt.imshow(train_dset[i][0], interpolation='nearest')
+#         plt.show()
+
+#     for i in range(3):
+#         plt.imshow(test_dset[i][0], interpolation='nearest')
+#         plt.show()