diff --git a/avalanche/benchmarks/classic/clear.py b/avalanche/benchmarks/classic/clear.py index ec71941c2..7db8ffdbd 100644 --- a/avalanche/benchmarks/classic/clear.py +++ b/avalanche/benchmarks/classic/clear.py @@ -24,8 +24,7 @@ We support both evaluation protocols for benchmark construction.""" from pathlib import Path -from typing import List, Sequence, Union, Any, Optional -from typing_extensions import Literal +from typing import Sequence, Union, Any, Optional from avalanche.benchmarks.datasets.clear import ( _CLEARImage, @@ -34,9 +33,12 @@ CLEAR_FEATURE_TYPES, _CLEAR_DATA_SPLITS, ) -from avalanche.benchmarks.scenarios.generic_benchmark_creation import ( - create_generic_benchmark_from_paths, - create_generic_benchmark_from_tensor_lists, +from avalanche.benchmarks.scenarios.classification_benchmark_creation import ( + create_classification_benchmark_from_paths, + create_classification_benchmark_from_tensor_lists, +) +from avalanche.benchmarks.scenarios.classification_scenario import ( + CommonClassificationScenarioType, ) EVALUATION_PROTOCOLS = ["iid", "streaming"] @@ -108,7 +110,7 @@ def CLEAR( Defaults to None, which means that the default location for str(data_name) will be used. - :returns: a properly initialized :class:`GenericCLScenario` instance. + :returns: a properly initialized :class:`ClassificationScenario` instance. """ assert data_name in _CLEAR_DATA_SPLITS @@ -130,6 +132,7 @@ def CLEAR( else: raise NotImplementedError() + benchmark_obj: CommonClassificationScenarioType if feature_type is None: clear_dataset_train = _CLEARImage( root=dataset_root, @@ -153,7 +156,7 @@ def CLEAR( test_samples_paths = clear_dataset_test.get_paths_and_targets( root_appended=True ) - benchmark_obj = create_generic_benchmark_from_paths( + benchmark_obj = create_classification_benchmark_from_paths( train_samples_paths, test_samples_paths, task_labels=list(range(len(train_samples_paths))), @@ -181,7 +184,7 @@ def CLEAR( train_samples = clear_dataset_train.tensors_and_targets test_samples = clear_dataset_test.tensors_and_targets - benchmark_obj = create_generic_benchmark_from_tensor_lists( + benchmark_obj = create_classification_benchmark_from_tensor_lists( train_samples, test_samples, task_labels=list(range(len(train_samples))), diff --git a/avalanche/benchmarks/classic/core50.py b/avalanche/benchmarks/classic/core50.py index f7e39b5b5..df6280bcf 100644 --- a/avalanche/benchmarks/classic/core50.py +++ b/avalanche/benchmarks/classic/core50.py @@ -10,8 +10,8 @@ ################################################################################ """ This module contains the high-level CORe50 benchmark generator. It -basically returns a iterable benchmark object ``GenericCLScenario`` given a -number of configuration parameters.""" +basically returns a iterable benchmark object :class:`ClassificationScenario` +given a number of configuration parameters.""" from pathlib import Path from typing import Union, Optional, Any @@ -26,10 +26,13 @@ check_vision_benchmark, ) from avalanche.benchmarks.datasets import default_dataset_location -from avalanche.benchmarks.scenarios.generic_benchmark_creation import ( - create_generic_benchmark_from_filelists, +from avalanche.benchmarks.scenarios.classification_benchmark_creation import ( + create_classification_benchmark_from_filelists, ) from avalanche.benchmarks.datasets.core50.core50 import CORe50Dataset +from avalanche.benchmarks.scenarios.classification_scenario import ( + CommonClassificationScenarioType, +) nbatch = { "ni": 8, @@ -109,7 +112,7 @@ def CORe50( location for 'core50' will be used. - :returns: a properly initialized :class:`GenericCLScenario` instance. + :returns: a properly initialized :class:`ClassificationScenario` instance. """ assert 0 <= run <= 9, ( @@ -149,15 +152,29 @@ def CORe50( / ("train_batch_" + str(batch_id).zfill(2) + "_filelist.txt") ) - benchmark_obj = create_generic_benchmark_from_filelists( - root_img, - train_failists_paths, - [root / filelists_bp / "test_filelist.txt"], - task_labels=[0 for _ in range(nbatch[scenario])], - complete_test_set_only=True, - train_transform=train_transform, - eval_transform=eval_transform, - ) + benchmark_obj: CommonClassificationScenarioType = \ + create_classification_benchmark_from_filelists( + root_img, + train_failists_paths, + [root / filelists_bp / "test_filelist.txt"], + task_labels=[0 for _ in range(nbatch[scenario])], + complete_test_set_only=True, + train_transform=train_transform, + eval_transform=eval_transform, + ) + + if scenario == 'nc': + n_classes_per_exp = [] + classes_order = [] + for exp in benchmark_obj.train_stream: + exp_dataset = exp.dataset + unique_targets = list(sorted( + set(int(x) for x in exp_dataset.targets) # type: ignore + )) + n_classes_per_exp.append(len(unique_targets)) + classes_order.extend(unique_targets) + setattr(benchmark_obj, 'n_classes_per_exp', n_classes_per_exp) + setattr(benchmark_obj, 'classes_order', classes_order) return benchmark_obj diff --git a/avalanche/benchmarks/classic/ctrl.py b/avalanche/benchmarks/classic/ctrl.py index b896ccf48..50461d71d 100644 --- a/avalanche/benchmarks/classic/ctrl.py +++ b/avalanche/benchmarks/classic/ctrl.py @@ -18,9 +18,12 @@ import torchvision.transforms.functional as F from torchvision import transforms from tqdm import tqdm +from avalanche.benchmarks.generators.benchmark_generators import ( + dataset_classification_benchmark, +) from avalanche.benchmarks.utils.classification_dataset import ( - SupervisedClassificationDataset, + ClassificationDataset, ) try: @@ -83,7 +86,7 @@ def CTrL( folder = path / "ctrl" / stream_name / f"seed_{seed}" # Train, val and test experiences - exps: List[List[SupervisedClassificationDataset]] = [[], [], []] + exps: List[List[ClassificationDataset]] = [[], [], []] for t_id, t in enumerate( tqdm(stream, desc=f"Loading {stream_name}"), ): @@ -104,7 +107,7 @@ def CTrL( common_root, exp_paths_list = common_paths_root(files) paths_dataset: PathsDataset[Image, int] = \ PathsDataset(common_root, exp_paths_list) - dataset: SupervisedClassificationDataset = \ + dataset: ClassificationDataset = \ make_classification_dataset( paths_dataset, task_labels=task_labels, @@ -126,7 +129,7 @@ def CTrL( if t_id == n_tasks - 1: break - return dataset_benchmark( + return dataset_classification_benchmark( train_datasets=exps[0], test_datasets=exps[2], other_streams_datasets=dict(val=exps[1]), diff --git a/avalanche/benchmarks/classic/endless_cl_sim.py b/avalanche/benchmarks/classic/endless_cl_sim.py index 367b082ee..87b0f1348 100644 --- a/avalanche/benchmarks/classic/endless_cl_sim.py +++ b/avalanche/benchmarks/classic/endless_cl_sim.py @@ -12,9 +12,15 @@ """ This module contains the high-level EndlessCLSim scenario generator. It returns an iterable scenario object -``GenericCLScenario`` given a number of configuration parameters. +:class:`ClassificationScenario` given a number of configuration parameters. """ +from avalanche.benchmarks.generators.benchmark_generators import ( + dataset_classification_benchmark, +) +from avalanche.benchmarks.scenarios.classification_scenario import ( + CommonClassificationScenarioType, +) from avalanche.benchmarks.utils.classification_dataset import ( make_classification_dataset, ) @@ -27,11 +33,7 @@ from torchvision.transforms import ToTensor from torchvision.transforms.transforms import Compose -from avalanche.benchmarks.classic.classic_benchmarks_utils import ( - check_vision_benchmark, -) from avalanche.benchmarks.datasets import default_dataset_location -from avalanche.benchmarks.generators import dataset_benchmark from avalanche.benchmarks.utils import make_classification_dataset _default_transform = Compose([ToTensor()]) @@ -146,7 +148,11 @@ def EndlessCLSim( ) ) - scenario_obj = dataset_benchmark(train_datasets, eval_datasets) + scenario_obj: CommonClassificationScenarioType = \ + dataset_classification_benchmark( + train_datasets, + eval_datasets + ) return scenario_obj diff --git a/avalanche/benchmarks/classic/openloris.py b/avalanche/benchmarks/classic/openloris.py index 38648efa9..4fa8fca56 100644 --- a/avalanche/benchmarks/classic/openloris.py +++ b/avalanche/benchmarks/classic/openloris.py @@ -10,8 +10,8 @@ ################################################################################ """ This module contains the high-level OpenLORIS benchmark/factor generator. -It basically returns a iterable benchmark object ``GenericCLScenario`` given -a number of configuration parameters.""" +It basically returns a iterable benchmark object :class:`ClassificationScenario` +given a number of configuration parameters.""" from pathlib import Path from typing import Union, Any, Optional @@ -23,8 +23,11 @@ from avalanche.benchmarks.datasets.openloris import ( OpenLORIS as OpenLORISDataset, ) -from avalanche.benchmarks.scenarios.generic_benchmark_creation import ( - create_generic_benchmark_from_filelists, +from avalanche.benchmarks.scenarios.classification_benchmark_creation import ( + create_classification_benchmark_from_filelists, +) +from avalanche.benchmarks.scenarios.classification_scenario import ( + CommonClassificationScenarioType, ) @@ -92,7 +95,7 @@ def OpenLORIS( Defaults to None, which means that the default location for 'openloris' will be used. - :returns: a properly initialized :class:`GenericCLScenario` instance. + :returns: a properly initialized :class:`ClassificationScenario` instance. """ assert factor in nbatch.keys(), ( @@ -117,15 +120,16 @@ def OpenLORIS( / ("train_batch_" + str(i).zfill(2) + ".txt") ) - factor_obj = create_generic_benchmark_from_filelists( - dataset_root, - train_failists_paths, - [dataset_root / filelists_bp / "test.txt"], - task_labels=[0 for _ in range(nbatch[factor])], - complete_test_set_only=True, - train_transform=train_transform, - eval_transform=eval_transform, - ) + factor_obj: CommonClassificationScenarioType = \ + create_classification_benchmark_from_filelists( + dataset_root, + train_failists_paths, + [dataset_root / filelists_bp / "test.txt"], + task_labels=[0 for _ in range(nbatch[factor])], + complete_test_set_only=True, + train_transform=train_transform, + eval_transform=eval_transform, + ) return factor_obj diff --git a/avalanche/benchmarks/classic/stream51.py b/avalanche/benchmarks/classic/stream51.py index 970e9b00f..7020530d6 100644 --- a/avalanche/benchmarks/classic/stream51.py +++ b/avalanche/benchmarks/classic/stream51.py @@ -9,18 +9,26 @@ # Website: www.continualai.org # ################################################################################ from pathlib import Path -from typing import Any, List, Optional, Sequence, Tuple, Union +from typing import List, Optional, Union from typing_extensions import Literal + from avalanche.benchmarks.datasets import Stream51 +from avalanche.benchmarks.scenarios.classification_benchmark_creation import ( + create_classification_benchmark_from_paths, +) +from avalanche.benchmarks.scenarios.classification_scenario import ( + CommonClassificationScenarioType, +) from avalanche.benchmarks.scenarios.generic_benchmark_creation import ( - create_generic_benchmark_from_paths, FileAndLabel + FileAndLabel, ) from torchvision import transforms import math import os + _mu = [0.485, 0.456, 0.406] _std = [0.229, 0.224, 0.225] _default_stream51_transform = transforms.Compose( @@ -71,7 +79,7 @@ def CLStream51( train_transform=_default_stream51_transform, eval_transform=_default_stream51_transform, dataset_root: Optional[Union[str, Path]] = None -): +) -> CommonClassificationScenarioType: """ Creates a CL benchmark for Stream-51. @@ -125,7 +133,7 @@ def CLStream51( Defaults to None, which means that the default location for 'stream51' will be used. - :returns: A properly initialized :class:`GenericCLScenario` instance. + :returns: A properly initialized :class:`ClassificationScenario` instance. """ # get train and test sets and order them by benchmark @@ -283,14 +291,15 @@ def CLStream51( [(j[0], j[1]) for j in i] for i in test_ood_filelists_paths ] - benchmark_obj = create_generic_benchmark_from_paths( - train_lists_of_files=train_filelists_paths, - test_lists_of_files=test_filelists_paths, - task_labels=[0 for _ in range(num_tasks)], - complete_test_set_only=scenario == "instance", - train_transform=train_transform, - eval_transform=eval_transform, - ) + benchmark_obj: CommonClassificationScenarioType = \ + create_classification_benchmark_from_paths( + train_lists_of_files=train_filelists_paths, + test_lists_of_files=test_filelists_paths, + task_labels=[0 for _ in range(num_tasks)], + complete_test_set_only=scenario == "instance", + train_transform=train_transform, + eval_transform=eval_transform, + ) return benchmark_obj diff --git a/avalanche/benchmarks/generators/__init__.py b/avalanche/benchmarks/generators/__init__.py index 189bb63bb..c74607fdb 100644 --- a/avalanche/benchmarks/generators/__init__.py +++ b/avalanche/benchmarks/generators/__init__.py @@ -1,2 +1 @@ -from .scenario_generators import * from .benchmark_generators import * diff --git a/avalanche/benchmarks/generators/benchmark_generators.py b/avalanche/benchmarks/generators/benchmark_generators.py index a5465d883..99d75fcfd 100644 --- a/avalanche/benchmarks/generators/benchmark_generators.py +++ b/avalanche/benchmarks/generators/benchmark_generators.py @@ -32,6 +32,13 @@ ) import torch +from avalanche.benchmarks.scenarios.classification_benchmark_creation import ( + create_classification_benchmark_from_filelists, + create_classification_benchmark_from_paths, + create_classification_benchmark_from_tensor_lists, + create_lazy_classification_benchmark, + create_multi_dataset_classification_benchmark, +) from avalanche.benchmarks.scenarios.classification_scenario import \ ClassificationScenario @@ -58,13 +65,15 @@ from avalanche.benchmarks.scenarios.new_classes.nc_scenario import NCScenario from avalanche.benchmarks.scenarios.new_instances.ni_scenario import NIScenario from avalanche.benchmarks.utils.classification_dataset import ( - SupervisedClassificationDataset, + ClassificationDataset, SupportedDataset, - as_supervised_classification_dataset, make_classification_dataset, concat_classification_datasets_sequentially ) from avalanche.benchmarks.utils.data import AvalancheDataset +from avalanche.benchmarks.scenarios.detection_benchmark_creation import ( + create_multi_dataset_detection_benchmark, +) TDatasetScenario = TypeVar( @@ -228,10 +237,10 @@ class "34" will be mapped to "1", class "11" to "2" and so on. ) train_dataset_sup = list( - map(as_supervised_classification_dataset, train_dataset) + map(make_classification_dataset, train_dataset) ) test_dataset_sup = list( - map(as_supervised_classification_dataset, test_dataset) + map(make_classification_dataset, test_dataset) ) seq_train_dataset, seq_test_dataset, mapping = \ @@ -256,21 +265,21 @@ class "34" will be mapped to "1", class "11" to "2" and so on. # Overrides n_experiences (and per_experience_classes, already done) n_experiences = len(train_dataset) else: - seq_train_dataset = as_supervised_classification_dataset(train_dataset) - seq_test_dataset = as_supervised_classification_dataset(test_dataset) + seq_train_dataset = make_classification_dataset(train_dataset) + seq_test_dataset = make_classification_dataset(test_dataset) transform_groups = dict( train=(train_transform, None), eval=(eval_transform, None) ) # Set transformation groups - final_train_dataset = as_supervised_classification_dataset( + final_train_dataset = make_classification_dataset( seq_train_dataset, transform_groups=transform_groups, initial_transform_group="train", ) - final_test_dataset = as_supervised_classification_dataset( + final_test_dataset = make_classification_dataset( seq_test_dataset, transform_groups=transform_groups, initial_transform_group="eval", @@ -384,10 +393,10 @@ def ni_benchmark( ) train_dataset_sup = list( - map(as_supervised_classification_dataset, train_dataset) + map(make_classification_dataset, train_dataset) ) test_dataset_sup = list( - map(as_supervised_classification_dataset, test_dataset) + map(make_classification_dataset, test_dataset) ) seq_train_dataset, seq_test_dataset, _ = \ @@ -395,8 +404,8 @@ def ni_benchmark( train_dataset_sup, test_dataset_sup ) else: - seq_train_dataset = as_supervised_classification_dataset(train_dataset) - seq_test_dataset = as_supervised_classification_dataset(test_dataset) + seq_train_dataset = make_classification_dataset(train_dataset) + seq_test_dataset = make_classification_dataset(test_dataset) transform_groups = dict( train=(train_transform, None), eval=(eval_transform, None) @@ -439,6 +448,21 @@ def ni_benchmark( lazy_benchmark = create_lazy_generic_benchmark +# Classification-specific +dataset_classification_benchmark = \ + create_multi_dataset_classification_benchmark +filelist_classification_benchmark = \ + create_classification_benchmark_from_filelists +paths_classification_benchmark = create_classification_benchmark_from_paths +tensors_classification_benchmark = \ + create_classification_benchmark_from_tensor_lists +lazy_classification_benchmark = create_lazy_classification_benchmark + +# Detection-specific +dataset_detection_benchmark = \ + create_multi_dataset_detection_benchmark + + def _one_dataset_per_exp_class_order( class_list_per_exp: Sequence[Sequence[int]], shuffle: bool, @@ -809,9 +833,9 @@ def random_validation_split_strategy( def class_balanced_split_strategy( - validation_size: Union[int, float], - experience: DatasetExperience[SupervisedClassificationDataset], -) -> Tuple[SupervisedClassificationDataset, SupervisedClassificationDataset]: + validation_size: float, + experience: DatasetExperience[ClassificationDataset], +) -> Tuple[ClassificationDataset, ClassificationDataset]: """Class-balanced train/validation splits. This splitting strategy splits `experience` into two experiences @@ -831,18 +855,11 @@ def class_balanced_split_strategy( datasets. """ if not isinstance(validation_size, float): - raise ValueError("validation_size must be an integer") + raise ValueError("validation_size must be a float") if not 0.0 <= validation_size <= 1.0: raise ValueError("validation_size must be a float in [0, 1].") exp_dataset = experience.dataset - if validation_size > len(exp_dataset): - raise ValueError( - f"Can't create the validation experience: not enough " - f"instances. Required {validation_size}, got only" - f"{len(exp_dataset)}" - ) - exp_indices = list(range(len(exp_dataset))) targets_as_tensor = torch.as_tensor(experience.dataset.targets) exp_classes: List[int] = targets_as_tensor.unique().tolist() @@ -1132,6 +1149,13 @@ def random_validation_split_strategy_wrapper(exp): "filelist_benchmark", "paths_benchmark", "tensors_benchmark", + "lazy_benchmark", + "dataset_classification_benchmark", + "dataset_detection_benchmark", + "filelist_classification_benchmark", + "paths_classification_benchmark", + "tensors_classification_benchmark", + "lazy_classification_benchmark", "data_incremental_benchmark", "benchmark_with_validation_stream", "random_validation_split_strategy", diff --git a/avalanche/benchmarks/generators/scenario_generators.py b/avalanche/benchmarks/generators/scenario_generators.py deleted file mode 100644 index b3049595e..000000000 --- a/avalanche/benchmarks/generators/scenario_generators.py +++ /dev/null @@ -1,710 +0,0 @@ -################################################################################ -# Copyright (c) 2021 ContinualAI. # -# Copyrights licensed under the MIT License. # -# See the accompanying LICENSE file for terms. # -# # -# Date: 12-05-2020 # -# Author(s): Vincenzo Lomonaco # -# E-mail: contact@continualai.org # -# Website: avalanche.continualai.org # -################################################################################ - -""" This module contains DEPRECATED high-level benchmark generators. -Please use the ones found in benchmark_generators. -""" - -import warnings -from pathlib import Path -from typing import ( - Sequence, - Optional, - Dict, - SupportsInt, - Union, - Any, - Tuple, -) - -from torch import Tensor -from avalanche.benchmarks.generators.benchmark_generators import ( - nc_benchmark, - ni_benchmark, -) - -from avalanche.benchmarks.scenarios.classification_scenario import ( - GenericCLScenario, -) -from avalanche.benchmarks.scenarios.generic_scenario_creation import * -from avalanche.benchmarks.scenarios.new_classes.nc_scenario import NCScenario -from avalanche.benchmarks.scenarios.new_instances.ni_scenario import NIScenario -from avalanche.benchmarks.utils.classification_dataset import ( - SupportedDataset -) - - -def nc_scenario( - train_dataset: Union[Sequence[SupportedDataset], SupportedDataset], - test_dataset: Union[Sequence[SupportedDataset], SupportedDataset], - n_experiences: int, - task_labels: bool, - *, - shuffle: bool = True, - seed: Optional[int] = None, - fixed_class_order: Optional[Sequence[int]] = None, - per_exp_classes: Optional[Dict[int, int]] = None, - class_ids_from_zero_from_first_exp: bool = False, - class_ids_from_zero_in_each_exp: bool = False, - one_dataset_per_exp: bool = False, - reproducibility_data: Optional[Dict[str, Any]] = None -) -> NCScenario: - """ - This helper function is DEPRECATED in favor of `nc_benchmark`. - - This method is the high-level specific scenario generator for the - "New Classes" (NC) case. Given a sequence of train and test datasets creates - the continual stream of data as a series of experiences. Each experience - will contain all the patterns belonging to a certain set of classes and a - class won't be assigned to more than one experience. - - The ``task_labels`` parameter determines if each incremental experience has - an increasing task label or if, at the contrary, a default task label 0 - has to be assigned to all experiences. This can be useful when - differentiating between Single-Incremental-Task and Multi-Task scenarios. - - There are other important parameters that can be specified in order to tweak - the behaviour of the resulting scenario. Please take a few minutes to read - and understand them as they may save you a lot of work. - - This generator features a integrated reproducibility mechanism that allows - the user to store and later re-load a scenario. For more info see the - ``reproducibility_data`` parameter. - - :param train_dataset: A list of training datasets, or a single dataset. - :param test_dataset: A list of test datasets, or a single test dataset. - :param n_experiences: The number of incremental experience. This is not used - when using multiple train/test datasets with the ``one_dataset_per_exp`` - parameter set to True. - :param task_labels: If True, each experience will have an ascending task - label. If False, the task label will be 0 for all the experiences. - :param shuffle: If True, the class (or experience) order will be shuffled. - Defaults to True. - :param seed: If ``shuffle`` is True and seed is not None, the class (or - experience) order will be shuffled according to the seed. When None, the - current PyTorch random number generator state will be used. Defaults to - None. - :param fixed_class_order: If not None, the class order to use (overrides - the shuffle argument). Very useful for enhancing reproducibility. - Defaults to None. - :param per_exp_classes: Is not None, a dictionary whose keys are - (0-indexed) experience IDs and their values are the number of classes - to include in the respective experiences. The dictionary doesn't - have to contain a key for each experience! All the remaining experiences - will contain an equal amount of the remaining classes. The - remaining number of classes must be divisible without remainder - by the remaining number of experiences. For instance, - if you want to include 50 classes in the first experience - while equally distributing remaining classes across remaining - experiences, just pass the "{0: 50}" dictionary as the - per_experience_classes parameter. Defaults to None. - :param class_ids_from_zero_from_first_exp: If True, original class IDs - will be remapped so that they will appear as having an ascending - order. For instance, if the resulting class order after shuffling - (or defined by fixed_class_order) is [23, 34, 11, 7, 6, ...] and - class_ids_from_zero_from_first_exp is True, then all the patterns - belonging to class 23 will appear as belonging to class "0", - class "34" will be mapped to "1", class "11" to "2" and so on. - This is very useful when drawing confusion matrices and when dealing - with algorithms with dynamic head expansion. Defaults to False. - Mutually exclusive with the ``class_ids_from_zero_in_each_exp`` - parameter. - :param class_ids_from_zero_in_each_exp: If True, original class IDs - will be mapped to range [0, n_classes_in_exp) for each experience. - Defaults to False. Mutually exclusive with the - ``class_ids_from_zero_from_first_exp`` parameter. - :param one_dataset_per_exp: available only when multiple train-test - datasets are provided. If True, each dataset will be treated as a - experience. Mutually exclusive with the ``per_experience_classes`` and - ``fixed_class_order`` parameters. Overrides the ``n_experiences`` - parameter. Defaults to False. - :param reproducibility_data: If not None, overrides all the other - scenario definition options. This is usually a dictionary containing - data used to reproduce a specific experiment. One can use the - ``get_reproducibility_data`` method to get (and even distribute) - the experiment setup so that it can be loaded by passing it as this - parameter. In this way one can be sure that the same specific - experimental setup is being used (for reproducibility purposes). - Beware that, in order to reproduce an experiment, the same train and - test datasets must be used. Defaults to None. - - :return: A properly initialized :class:`NCScenario` instance. - """ - - warnings.warn( - "nc_scenario is deprecated in favor of nc_benchmark.", - DeprecationWarning, - ) - - return nc_benchmark( - train_dataset=train_dataset, - test_dataset=test_dataset, - n_experiences=n_experiences, - task_labels=task_labels, - shuffle=shuffle, - seed=seed, - fixed_class_order=fixed_class_order, - per_exp_classes=per_exp_classes, - class_ids_from_zero_from_first_exp=class_ids_from_zero_from_first_exp, - class_ids_from_zero_in_each_exp=class_ids_from_zero_in_each_exp, - one_dataset_per_exp=one_dataset_per_exp, - reproducibility_data=reproducibility_data - ) - - -def ni_scenario( - train_dataset: Union[Sequence[SupportedDataset], SupportedDataset], - test_dataset: Union[Sequence[SupportedDataset], SupportedDataset], - n_experiences: int, - *, - task_labels: bool = False, - shuffle: bool = True, - seed: Optional[int] = None, - balance_experiences: bool = False, - min_class_patterns_in_exp: int = 0, - fixed_exp_assignment: Optional[Sequence[Sequence[int]]] = None, - reproducibility_data: Optional[Dict[str, Any]] = None -) -> NIScenario: - """ - This helper function is DEPRECATED in favor of `ni_benchmark`. - - This method is the high-level specific scenario generator for the - "New Instances" (NI) case. Given a sequence of train and test datasets - creates the continual stream of data as a series of experiences. Each - experience will contain patterns belonging to the same classes. - - The ``task_labels`` parameter determines if each incremental experience has - an increasing task label or if, at the contrary, a default task label 0 - has to be assigned to all experiences. This can be useful when - differentiating between Single-Incremental-Task and Multi-Task scenarios. - - There are other important parameters that can be specified in order to tweak - the behaviour of the resulting scenario. Please take a few minutes to read - and understand them as they may save you a lot of work. - - This generator features an integrated reproducibility mechanism that allows - the user to store and later re-load a scenario. For more info see the - ``reproducibility_data`` parameter. - - :param train_dataset: A list of training datasets, or a single dataset. - :param test_dataset: A list of test datasets, or a single test dataset. - :param n_experiences: The number of experiences. - :param task_labels: If True, each experience will have an ascending task - label. If False, the task label will be 0 for all the experiences. - :param shuffle: If True, patterns order will be shuffled. - :param seed: A valid int used to initialize the random number generator. - Can be None. - :param balance_experiences: If True, pattern of each class will be equally - spread across all experiences. If False, patterns will be assigned to - experiences in a complete random way. Defaults to False. - :param min_class_patterns_in_exp: The minimum amount of patterns of - every class that must be assigned to every experience. Compatible with - the ``balance_experiences`` parameter. An exception will be raised if - this constraint can't be satisfied. Defaults to 0. - :param fixed_exp_assignment: If not None, the pattern assignment - to use. It must be a list with an entry for each experience. Each entry - is a list that contains the indexes of patterns belonging to that - experience. Overrides the ``shuffle``, ``balance_experiences`` and - ``min_class_patterns_in_exp`` parameters. - :param reproducibility_data: If not None, overrides all the other - scenario definition options, including ``fixed_exp_assignment``. - This is usually a dictionary containing data used to - reproduce a specific experiment. One can use the - ``get_reproducibility_data`` method to get (and even distribute) - the experiment setup so that it can be loaded by passing it as this - parameter. In this way one can be sure that the same specific - experimental setup is being used (for reproducibility purposes). - Beware that, in order to reproduce an experiment, the same train and - test datasets must be used. Defaults to None. - - :return: A properly initialized :class:`NIScenario` instance. - """ - - warnings.warn( - "ni_scenario is deprecated in favor of ni_benchmark.", - DeprecationWarning, - ) - - return ni_benchmark( - train_dataset=train_dataset, - test_dataset=test_dataset, - n_experiences=n_experiences, - task_labels=task_labels, - shuffle=shuffle, - seed=seed, - balance_experiences=balance_experiences, - min_class_patterns_in_exp=min_class_patterns_in_exp, - fixed_exp_assignment=fixed_exp_assignment, - reproducibility_data=reproducibility_data - ) - - -def dataset_scenario( - train_dataset_list: Sequence[SupportedDataset], - test_dataset_list: Sequence[SupportedDataset], - task_labels: Sequence[int], - *, - complete_test_set_only: bool = False -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of `dataset_benchmark`. - - Creates a generic scenario given a list of datasets and the respective task - labels. Each training dataset will be considered as a separate training - experience. Contents of the datasets will not be changed, including the - targets. - - When loading the datasets from a set of fixed file lists, consider using - the :func:`filelist_scenario` helper method instead. Also, loading from - a list of paths is supported through the :func:`paths_scenario` helper. - - In its base form, this function accepts a list of test datasets that must - contain the same amount of datasets of the training list. - Those pairs are then used to create the "past", "cumulative" - (a.k.a. growing) and "future" test sets. However, in certain Continual - Learning scenarios only the concept of "complete" test set makes sense. In - that case, the ``complete_test_set_only`` parameter should be set to True - (see the parameter description for more info). - - Beware that pattern transformations must already be included in the - datasets (when needed). - - :param train_dataset_list: A list of training datasets. - :param test_dataset_list: A list of test datasets. - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_dataset_list`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that the ``test_dataset_list`` - parameter must be list with a single element (the complete test set). - Defaults to False, which means that ``train_dataset_list`` and - ``test_dataset_list`` must contain the same amount of datasets. - - :returns: A properly initialized :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "dataset_scenario is deprecated in favor of " "dataset_benchmark.", - DeprecationWarning, - ) - - return create_multi_dataset_generic_scenario( - train_dataset_list=train_dataset_list, - test_dataset_list=test_dataset_list, - task_labels=task_labels, - complete_test_set_only=complete_test_set_only, - ) - - -def filelist_scenario( - root: Union[str, Path], - train_file_lists: Sequence[Union[str, Path]], - test_file_lists: Union[Union[str, Path], Sequence[Union[str, Path]]], - task_labels: Sequence[int], - *, - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of `filelist_benchmark`. - - Creates a generic scenario given a list of filelists and the respective task - labels. A separate dataset will be created for each filelist and each of - those training datasets will be considered a separate training experience. - - In its base form, this function accepts a list of filelists for the test - datsets that must contain the same amount of elements of the training list. - Those pairs of datasets are then used to create the "past", "cumulative" - (a.k.a. growing) and "future" test sets. However, in certain Continual - Learning scenarios only the concept of "complete" test set makes sense. In - that case, the ``complete_test_set_only`` should be set to True (see the - parameter description for more info). - - This helper functions is the best shot when loading Caffe-style dataset - based on filelists. - - The resulting benchmark instance and the intermediate datasets used to - populate it will be of type CLASSIFICATION. - - :param root: The root path of the dataset. - :param train_file_lists: A list of filelists describing the - paths of the training patterns for each experience. - :param test_file_lists: A list of filelists describing the - paths of the test patterns for each experience. - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_file_lists`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that the ``test_file_lists`` - parameter must be list with a single element (the complete test set). - Alternatively, can be a plain string or :class:`Path` object. - Defaults to False, which means that ``train_file_lists`` and - ``test_file_lists`` must contain the same amount of filelists paths. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A properly initialized :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "filelist_scenario is deprecated in favor of " "filelist_benchmark.", - DeprecationWarning, - ) - - return create_generic_scenario_from_filelists( - root=root, - train_file_lists=train_file_lists, - test_file_lists=test_file_lists, - task_labels=task_labels, - complete_test_set_only=complete_test_set_only, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - ) - - -FileAndLabel = Tuple[Union[str, Path], int] - - -def paths_scenario( - train_list_of_files: Sequence[Sequence[FileAndLabel]], - test_list_of_files: Union[ - Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]] - ], - task_labels: Sequence[int], - *, - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of `paths_benchmark`. - - Creates a generic scenario given a list of files and class labels. - A separate dataset will be created for each list and each of - those training datasets will be considered a separate training experience. - - This is very similar to `filelist_scenario`, with the main difference being - that `filelist_scenario` accepts, for each experience, a file list formatted - in Caffe-style. On the contrary, this accepts a list of tuples where each - tuple contains two elements: the full path to the pattern and its label. - Optionally, the tuple may contain a third element describing the bounding - box of the element to crop. This last bounding box may be useful when trying - to extract the part of the image depicting the desired element. - - In its base form, this function accepts a list of lists of tuples for the - test datsets that must contain the same amount of lists of the training - list. Those pairs of datasets are then used to create the "past", - "cumulative" (a.k.a. growing) and "future" test sets. However, in certain - Continual Learning scenarios only the concept of "complete" test set makes - sense. In that case, the ``complete_test_set_only`` should be set to True - (see the parameter description for more info). - - The label of each pattern doesn't have to be an int. - - :param train_list_of_files: A list of lists. Each list describes the paths - and labels of patterns to include in that training experience as tuples. - Each tuple must contain two elements: the full path to the pattern - and its class label. Optionally, the tuple may contain a third element - describing the bounding box to use for cropping (top, left, height, - width). - :param test_list_of_files: A list of lists. Each list describes the paths - and labels of patterns to include in that test experience as tuples. - Each tuple must contain two elements: the full path to the pattern - and its class label. Optionally, the tuple may contain a third element - describing the bounding box to use for cropping (top, left, height, - width). - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_file_lists`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that the ``test_file_lists`` - parameter must be list with a single element (the complete test set). - Alternatively, can be a plain string or :class:`Path` object. - Defaults to False, which means that ``train_file_lists`` and - ``test_file_lists`` must contain the same amount of filelists paths. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A properly initialized :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "paths_scenario is deprecated in favor of paths_benchmark.", - DeprecationWarning, - ) - - return create_generic_scenario_from_paths( - train_list_of_files=train_list_of_files, - test_list_of_files=test_list_of_files, - task_labels=task_labels, - complete_test_set_only=complete_test_set_only, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - ) - - -def tensors_scenario( - train_tensors: Sequence[Sequence[Any]], - test_tensors: Sequence[Sequence[Any]], - task_labels: Sequence[int], - *, - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of `tensors_benchmark`. - - Creates a generic scenario given lists of Tensors and the respective task - labels. A separate dataset will be created from each Tensor tuple - (x, y, ...) and each of those training datasets will be considered a - separate training experience. Using this helper function is the lowest-level - way to create a Continual Learning scenario. When possible, consider using - higher level helpers. - - Experiences are defined by passing lists of tensors as the `train_tensors` - and `test_tensors` parameter. Those parameters must be lists containing - sub-lists of tensors, one for each experience. Each tensor defines the value - of a feature ("x", "y", "z", ...) for all patterns of that experience. - - By default the second tensor of each experience will be used to fill the - `targets` value (label of each pattern). - - In its base form, the test lists must contain the same amount of elements of - the training lists. Those pairs of datasets are then used to create the - "past", "cumulative" (a.k.a. growing) and "future" test sets. - However, in certain Continual Learning scenarios only the concept of - "complete" test set makes sense. In that case, the - ``complete_test_set_only`` should be set to True (see the parameter - description for more info). - - :param train_tensors: A list of lists. The first list must contain the - tensors for the first training experience (one tensor per feature), the - second list must contain the tensors for the second training experience, - and so on. - :param test_tensors: A list of lists. The first list must contain the - tensors for the first test experience (one tensor per feature), the - second list must contain the tensors for the second test experience, - and so on. - :param task_labels: A list of task labels. Must contain a task label for - each experience. For Single-Incremental-Task (a.k.a. Task-Free) - scenarios, this is usually a list of zeros. For Multi Task scenario, - this is usually a list of ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that ``test_tensors`` must - define a single experience. Defaults to False, which means that - ``train_tensors`` and ``test_tensors`` must define the same - amount of experiences. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A properly initialized :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "tensors_scenario is deprecated in favor of " "tensors_benchmark.", - DeprecationWarning, - ) - - return create_generic_scenario_from_tensor_lists( - train_tensors=train_tensors, - test_tensors=test_tensors, - task_labels=task_labels, - complete_test_set_only=complete_test_set_only, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - ) - - -def tensor_scenario( - train_data_x: Sequence[Any], - train_data_y: Sequence[Sequence[SupportsInt]], - test_data_x: Union[Any, Sequence[Any]], - test_data_y: Union[Any, Sequence[Sequence[SupportsInt]]], - task_labels: Sequence[int], - *, - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of `tensors_benchmark`. - - Please consider using :func:`tensors_benchmark` instead. When switching to - the new function, please keep in mind that the format of the parameters is - completely different! - - Creates a generic scenario given lists of Tensors and the respective task - labels. A separate dataset will be created from each Tensor pair (x + y) - and each of those training datasets will be considered a separate - training experience. Contents of the datasets will not be changed, including - the targets. Using this helper function is the lower level way to create a - Continual Learning scenario. When possible, consider using higher level - helpers. - - By default the second tensor of each experience will be used to fill the - `targets` value (label of each pattern). - - In its base form, the test lists must contain the same amount of elements of - the training lists. Those pairs of datasets are then used to create the - "past", "cumulative" (a.k.a. growing) and "future" test sets. - However, in certain Continual Learning scenarios only the concept of - "complete" test set makes sense. In that case, the - ``complete_test_set_only`` should be set to True (see the parameter - description for more info). - - :param train_data_x: A list of Tensors (one per experience) containing the - patterns of the training sets. - :param train_data_y: A list of Tensors or int lists containing the - labels of the patterns of the training sets. Must contain the same - number of elements of ``train_datasets_x``. - :param test_data_x: A Tensor or a list of Tensors (one per experience) - containing the patterns of the test sets. - :param test_data_y: A Tensor or a list of Tensors or int lists containing - the labels of the patterns of the test sets. Must contain the same - number of elements of ``test_datasets_x``. - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_datasets_x`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that the ``test_datasets_x`` and - ``test_datasets_y`` parameters must be lists with a single element - (the complete test set). Defaults to False, which means that - ``train_file_lists`` and ``test_file_lists`` must contain the same - amount of filelists paths. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A properly initialized :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "tensor_scenario is deprecated in favor " - "of tensors_benchmark. When switching" - " to the new function, please keep in mind that the format of" - " the parameters is completely different!", - DeprecationWarning, - ) - - if isinstance(test_data_x, Tensor): - test_data_x = [test_data_x] - test_data_y = [test_data_y] - else: - if len(test_data_x) != len(test_data_y): - raise ValueError( - "test_data_x and test_data_y must contain" - " the same amount of elements" - ) - - if len(train_data_x) != len(train_data_y): - raise ValueError( - "train_data_x and train_data_y must contain" - " the same amount of elements" - ) - - exp_train_first_structure = [] - exp_test_first_structure = [] - for exp_idx in range(len(train_data_x)): - exp_x = train_data_x[exp_idx] - exp_y = train_data_y[exp_idx] - - exp_train_first_structure.append([exp_x, exp_y]) - - for exp_idx in range(len(test_data_x)): - exp_x = test_data_x[exp_idx] - exp_y = test_data_y[exp_idx] - - exp_test_first_structure.append([exp_x, exp_y]) - - return tensors_scenario( - train_tensors=exp_train_first_structure, - test_tensors=exp_test_first_structure, - task_labels=task_labels, - complete_test_set_only=complete_test_set_only, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - ) - - -__all__ = [ - "nc_scenario", - "ni_scenario", - "dataset_scenario", - "filelist_scenario", - "paths_scenario", - "tensors_scenario", - "tensor_scenario", -] diff --git a/avalanche/benchmarks/scenarios/__init__.py b/avalanche/benchmarks/scenarios/__init__.py index 48befae1f..30816f3c2 100644 --- a/avalanche/benchmarks/scenarios/__init__.py +++ b/avalanche/benchmarks/scenarios/__init__.py @@ -1,7 +1,8 @@ from .generic_scenario import * from .dataset_scenario import * from .classification_scenario import * -from .generic_scenario_creation import * +from .classification_benchmark_creation import * +from .detection_benchmark_creation import * from .new_classes import * from .new_instances import * from .exmodel_scenario import * diff --git a/avalanche/benchmarks/scenarios/classification_benchmark_creation.py b/avalanche/benchmarks/scenarios/classification_benchmark_creation.py new file mode 100644 index 000000000..f585c16c6 --- /dev/null +++ b/avalanche/benchmarks/scenarios/classification_benchmark_creation.py @@ -0,0 +1,254 @@ +from typing import ( + Any, + Callable, + Dict, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, +) +from avalanche.benchmarks.scenarios.dataset_scenario import ( + DatasetScenario, + TStreamsUserDict, +) +from avalanche.benchmarks.scenarios.generic_benchmark_creation import ( + _make_classification_scenario, + FileAndLabel, + DatasetFactory, + LazyStreamDefinition, + create_generic_benchmark_from_filelists, + create_generic_benchmark_from_paths, + create_generic_benchmark_from_tensor_lists, + create_lazy_generic_benchmark, + create_multi_dataset_generic_benchmark, +) + +from avalanche.benchmarks.utils.classification_dataset import ( + SupportedDataset, + make_classification_dataset, +) +from avalanche.benchmarks.utils.transform_groups import XTransform, YTransform + + +TDatasetScenario = TypeVar( + 'TDatasetScenario', + bound='DatasetScenario') + + +def create_multi_dataset_classification_benchmark( + train_datasets: Sequence[SupportedDataset], + test_datasets: Sequence[SupportedDataset], + *, + other_streams_datasets: Optional[ + Mapping[str, Sequence[SupportedDataset]]] = None, + complete_test_set_only: bool = False, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_classification_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_classification_scenario # type: ignore +) -> TDatasetScenario: + """ + Creates a classification benchmark instance given a list of datasets. + Each dataset will be considered as a separate experience. + + Contents of the datasets must already be set, including task labels. + Transformations will be applied if defined. + + For additional info, please refer to + :func:`create_multi_dataset_generic_benchmark`. + """ + return create_multi_dataset_generic_benchmark( + train_datasets=train_datasets, + test_datasets=test_datasets, + other_streams_datasets=other_streams_datasets, + complete_test_set_only=complete_test_set_only, + train_transform=train_transform, + train_target_transform=train_target_transform, + eval_transform=eval_transform, + eval_target_transform=eval_target_transform, + other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory + ) + + +def create_lazy_classification_benchmark( + train_generator: LazyStreamDefinition, + test_generator: LazyStreamDefinition, + *, + other_streams_generators: Optional[Dict[str, LazyStreamDefinition]] = None, + complete_test_set_only: bool = False, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_classification_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_classification_scenario # type: ignore +) -> TDatasetScenario: + """ + Creates a lazily-defined classification benchmark instance given a dataset + generator for each stream. + + Generators must return properly initialized instances of + :class:`AvalancheDataset` which will be used to create experiences. + + For additional info, please refer to :func:`create_lazy_generic_benchmark`. + """ + return create_lazy_generic_benchmark( + train_generator=train_generator, + test_generator=test_generator, + other_streams_generators=other_streams_generators, + complete_test_set_only=complete_test_set_only, + train_transform=train_transform, + train_target_transform=train_target_transform, + eval_transform=eval_transform, + eval_target_transform=eval_target_transform, + other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory + ) + + +create_classification_benchmark_from_filelists = \ + create_generic_benchmark_from_filelists + + +def create_classification_benchmark_from_paths( + train_lists_of_files: Sequence[Sequence[FileAndLabel]], + test_lists_of_files: Sequence[Sequence[FileAndLabel]], + *, + other_streams_lists_of_files: Optional[Dict[ + str, Sequence[Sequence[FileAndLabel]] + ]] = None, + task_labels: Sequence[int], + complete_test_set_only: bool = False, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_classification_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_classification_scenario # type: ignore +) -> TDatasetScenario: + """ + Creates a classification benchmark instance given a sequence of lists of + files. A separate dataset will be created for each list. Each of those + datasets will be considered a separate experience. + + This is very similar to + :func:`create_classification_benchmark_from_filelists`, + with the main difference being that + :func:`create_classification_benchmark_from_filelists` accepts, for each + experience, a file list formatted in Caffe-style. On the contrary, this + accepts a list of tuples where each tuple contains two elements: the full + path to the pattern and its label. Optionally, the tuple may contain a third + element describing the bounding box of the element to crop. This last + bounding box may be useful when trying to extract the part of the image + depicting the desired element. + + For additional info, please refer to + :func:`create_generic_benchmark_from_paths`. + """ + return create_generic_benchmark_from_paths( + train_lists_of_files=train_lists_of_files, + test_lists_of_files=test_lists_of_files, + other_streams_lists_of_files=other_streams_lists_of_files, + task_labels=task_labels, + complete_test_set_only=complete_test_set_only, + train_transform=train_transform, + train_target_transform=train_target_transform, + eval_transform=eval_transform, + eval_target_transform=eval_target_transform, + other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory + ) + + +def create_classification_benchmark_from_tensor_lists( + train_tensors: Sequence[Sequence[Any]], + test_tensors: Sequence[Sequence[Any]], + *, + other_streams_tensors: Optional[Dict[str, Sequence[Sequence[Any]]]] = None, + task_labels: Sequence[int], + complete_test_set_only: bool = False, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_classification_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_classification_scenario # type: ignore +) -> TDatasetScenario: + """ + Creates a classification benchmark instance given lists of Tensors. A + separate dataset will be created from each Tensor tuple (x, y, z, ...) + and each of those training datasets will be considered a separate training + experience. Using this helper function is the lowest-level way to create a + Continual Learning benchmark. When possible, consider using higher level + helpers. + + Experiences are defined by passing lists of tensors as the `train_tensors`, + `test_tensors` (and `other_streams_tensors`) parameters. Those parameters + must be lists containing lists of tensors, one list for each experience. + Each tensor defines the value of a feature ("x", "y", "z", ...) for all + patterns of that experience. + + By default the second tensor of each experience will be used to fill the + `targets` value (label of each pattern). + + For additional info, please refer to + :func:`create_generic_benchmark_from_tensor_lists`. + """ + return create_generic_benchmark_from_tensor_lists( + train_tensors=train_tensors, + test_tensors=test_tensors, + other_streams_tensors=other_streams_tensors, + task_labels=task_labels, + complete_test_set_only=complete_test_set_only, + train_transform=train_transform, + train_target_transform=train_target_transform, + eval_transform=eval_transform, + eval_target_transform=eval_target_transform, + other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory + ) + + +__all__ = [ + 'create_multi_dataset_classification_benchmark', + 'create_lazy_classification_benchmark', + 'create_classification_benchmark_from_filelists', + 'create_classification_benchmark_from_paths', + 'create_classification_benchmark_from_tensor_lists' +] diff --git a/avalanche/benchmarks/scenarios/classification_scenario.py b/avalanche/benchmarks/scenarios/classification_scenario.py index cfebe300a..c169b744f 100644 --- a/avalanche/benchmarks/scenarios/classification_scenario.py +++ b/avalanche/benchmarks/scenarios/classification_scenario.py @@ -14,6 +14,8 @@ import warnings +from torch import Tensor + from avalanche.benchmarks.scenarios.generic_scenario import ( AbstractClassTimelineExperience, ) @@ -169,12 +171,12 @@ class ClassificationExperience( ] ): """ - Definition of a learning experience based on a :class:`GenericCLScenario` - instance. + Definition of a learning experience based on a + :class:`ClassificationScenario` instance. This experience implementation uses the generic experience-patterns - assignment defined in the :class:`GenericCLScenario` instance. Instances of - this class are usually obtained from a benchmark stream. + assignment defined in the :class:`ClassificationScenario` instance. + Instances of this class are usually obtained from a benchmark stream. """ def __init__( @@ -248,7 +250,7 @@ def task_labels(self) -> List[int]: class _LazyStreamClassesInClassificationExps( Mapping[str, Sequence[Set[int]]]): - def __init__(self, benchmark: GenericCLScenario): + def __init__(self, benchmark: ClassificationScenario): self._benchmark = benchmark self._default_lcie = _LazyClassesInClassificationExps( benchmark, stream="train") @@ -278,7 +280,10 @@ def __iter__(self): class _LazyClassesInClassificationExps(Sequence[Optional[Set[int]]]): - def __init__(self, benchmark: GenericCLScenario, stream: str = "train"): + def __init__( + self, + benchmark: ClassificationScenario, + stream: str = "train"): self._benchmark = benchmark self._stream = stream @@ -328,6 +333,17 @@ def _slice_collate(classes_in_exps: Iterable[Optional[Iterable[int]]]) -> \ result.append(set(x)) return tuple(result) + + +CommonClassificationItem = Tuple[Tensor, int, int] # x, y, t +CommonClassificationDataset = ClassificationDataset[CommonClassificationItem] +CommonClassificationExperience = ClassificationExperience[ + CommonClassificationDataset] + +CommonClassificationScenarioType = ClassificationScenario[ + ClassificationStream[CommonClassificationExperience], + CommonClassificationExperience, + CommonClassificationDataset] __all__ = [ @@ -336,4 +352,5 @@ def _slice_collate(classes_in_exps: Iterable[Optional[Iterable[int]]]) -> \ "ClassificationStream", "ClassificationExperience", "GenericClassificationExperience", + "CommonClassificationScenarioType" ] diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py index cfc1c2f56..00b45b4ba 100644 --- a/avalanche/benchmarks/scenarios/dataset_scenario.py +++ b/avalanche/benchmarks/scenarios/dataset_scenario.py @@ -294,8 +294,9 @@ def get_reproducibility_data(self) -> Dict[str, Any]: parameter in the constructor. Child classes should create their own reproducibility dictionary. - This means that the implementation found in :class:`GenericCLScenario` - will return an empty dictionary, which is meaningless. + This means that the implementation found in + :class:`ClassificationScenario` will return an empty dictionary, + which is meaningless. In order to obtain the same benchmark instance, the reproducibility data must be passed to the constructor along with the exact same diff --git a/avalanche/benchmarks/scenarios/detection_benchmark_creation.py b/avalanche/benchmarks/scenarios/detection_benchmark_creation.py new file mode 100644 index 000000000..ca0c2d7ee --- /dev/null +++ b/avalanche/benchmarks/scenarios/detection_benchmark_creation.py @@ -0,0 +1,144 @@ +from typing import ( + Callable, + Dict, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, +) +from avalanche.benchmarks.scenarios.dataset_scenario import ( + DatasetScenario, + TStreamsUserDict, +) +from avalanche.benchmarks.scenarios.generic_benchmark_creation import ( + DatasetFactory, + LazyStreamDefinition, + create_lazy_generic_benchmark, + create_multi_dataset_generic_benchmark, +) + +from avalanche.benchmarks.utils.transform_groups import XTransform, YTransform +from avalanche.benchmarks.scenarios.detection_scenario import ( + DetectionExperience, + DetectionScenario, + DetectionStream, +) +from avalanche.benchmarks.utils.detection_dataset import ( + make_detection_dataset, + SupportedDetectionDataset, +) + + +TDatasetScenario = TypeVar( + 'TDatasetScenario', + bound='DatasetScenario') + + +def _make_detection_scenario( + stream_definitions: TStreamsUserDict, + complete_test_set_only: bool +) -> DetectionScenario[ + DetectionStream[ + DetectionExperience], + DetectionExperience]: + return DetectionScenario( + stream_definitions=stream_definitions, + complete_test_set_only=complete_test_set_only + ) + + +def create_multi_dataset_detection_benchmark( + train_datasets: Sequence[SupportedDetectionDataset], + test_datasets: Sequence[SupportedDetectionDataset], + *, + other_streams_datasets: Optional[ + Mapping[str, Sequence[SupportedDetectionDataset]]] = None, + complete_test_set_only: bool = False, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_detection_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_detection_scenario # type: ignore +) -> TDatasetScenario: + """ + Creates a detection benchmark instance given a list of datasets. + Each dataset will be considered as a separate experience. + + Contents of the datasets must already be set, including task labels. + Transformations will be applied if defined. + + For additional info, please refer to + :func:`create_multi_dataset_generic_benchmark`. + """ + return create_multi_dataset_generic_benchmark( + train_datasets=train_datasets, + test_datasets=test_datasets, + other_streams_datasets=other_streams_datasets, + complete_test_set_only=complete_test_set_only, + train_transform=train_transform, + train_target_transform=train_target_transform, + eval_transform=eval_transform, + eval_target_transform=eval_target_transform, + other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory + ) + + +def create_lazy_detection_benchmark( + train_generator: LazyStreamDefinition, + test_generator: LazyStreamDefinition, + *, + other_streams_generators: Optional[Dict[str, LazyStreamDefinition]] = None, + complete_test_set_only: bool = False, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_detection_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_detection_scenario # type: ignore +) -> TDatasetScenario: + """ + Creates a lazily-defined detection benchmark instance given a dataset + generator for each stream. + + Generators must return properly initialized instances of + :class:`AvalancheDataset` which will be used to create experiences. + + For additional info, please refer to :func:`create_lazy_generic_benchmark`. + """ + return create_lazy_generic_benchmark( + train_generator=train_generator, + test_generator=test_generator, + other_streams_generators=other_streams_generators, + complete_test_set_only=complete_test_set_only, + train_transform=train_transform, + train_target_transform=train_target_transform, + eval_transform=eval_transform, + eval_target_transform=eval_target_transform, + other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory + ) + + +__all__ = [ + 'create_multi_dataset_detection_benchmark', + 'create_lazy_detection_benchmark' +] diff --git a/avalanche/benchmarks/scenarios/generic_benchmark_creation.py b/avalanche/benchmarks/scenarios/generic_benchmark_creation.py index f29aab947..fcd833a78 100644 --- a/avalanche/benchmarks/scenarios/generic_benchmark_creation.py +++ b/avalanche/benchmarks/scenarios/generic_benchmark_creation.py @@ -14,12 +14,15 @@ them fit your needs, then the helper functions here listed may help. """ +import itertools from pathlib import Path from typing import ( + Callable, Generator, List, Mapping, Sequence, + TypeVar, Union, Any, Tuple, @@ -28,35 +31,284 @@ Iterable, NamedTuple, ) +from typing_extensions import ( + Protocol, + Literal, +) +import warnings +from avalanche.benchmarks.scenarios.classification_scenario import ( + ClassificationExperience, + ClassificationScenario, + ClassificationStream, +) +from avalanche.benchmarks.scenarios.dataset_scenario import ( + DatasetScenario, + DatasetStream, + FactoryBasedStream, + TStreamsUserDict, +) +from avalanche.benchmarks.scenarios.generic_scenario import DatasetExperience from avalanche.benchmarks.utils import ( - make_tensor_classification_dataset, - SupportedDataset, - make_classification_dataset, FilelistDataset, PathsDataset, common_paths_root, ) +from torch.utils.data.dataset import Subset, ConcatDataset from avalanche.benchmarks.utils.classification_dataset import ( ClassificationDataset, + make_classification_dataset, +) +from avalanche.benchmarks.utils.data import AvalancheDataset +from avalanche.benchmarks.utils.transform_groups import ( + TransformGroupDef, + XTransform, + YTransform, +) +from avalanche.benchmarks.utils.utils import ( + _is_int_iterable, + make_generic_dataset, + make_generic_tensor_dataset, +) +from avalanche.benchmarks.utils.dataset_definitions import ( + IDatasetWithTargets, + ITensorDataset, ) -from .classification_scenario import GenericCLScenario + + +TDatasetScenario = TypeVar( + 'TDatasetScenario', + bound='DatasetScenario') + +TTargetType = TypeVar( + 'TTargetType', + contravariant=True) +TSupportedDataset = TypeVar( + 'TSupportedDataset', + contravariant=True) +TAvalancheDataset = TypeVar( + 'TAvalancheDataset', + bound='AvalancheDataset', + covariant=True) + + +GenericSupportedDataset = Union[ + IDatasetWithTargets, + ITensorDataset, + Subset, + ConcatDataset, + AvalancheDataset +] + + +class DatasetFactory( + Protocol[ + TSupportedDataset, + TTargetType, + TAvalancheDataset]): + def __call__( + self, + dataset: TSupportedDataset, + *, + transform: Optional[XTransform] = None, + target_transform: Optional[YTransform] = None, + transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, + initial_transform_group: Optional[str] = None, + task_labels: Optional[Union[int, Sequence[int]]] = None, + targets: Optional[Sequence[TTargetType]] = None, + collate_fn: Optional[Callable[[List], Any]] = None + ) -> TAvalancheDataset: + ... + + +class TensorDatasetFactory( + Protocol[ + TAvalancheDataset]): + def __call__( + self, + dataset_tensors: Sequence, + *, + task_labels: Optional[Union[int, Sequence[int]]] = None, + ) -> TAvalancheDataset: + ... + + +def _make_plain_experience( + stream: DatasetStream[DatasetExperience[TAvalancheDataset]], + experience_idx: int +) -> DatasetExperience[TAvalancheDataset]: + dataset = stream.benchmark.stream_definitions[ + stream.name + ].exps_data[experience_idx] + + return DatasetExperience( + current_experience=experience_idx, + origin_stream=stream, + benchmark=stream.benchmark, + dataset=dataset + ) + + +def _make_generic_scenario( + stream_definitions: TStreamsUserDict, + complete_test_set_only: bool): + return DatasetScenario( + stream_definitions=stream_definitions, + complete_test_set_only=complete_test_set_only, + stream_factory=FactoryBasedStream, + experience_factory=_make_plain_experience + ) + + +def _make_classification_scenario( + stream_definitions: TStreamsUserDict, + complete_test_set_only: bool +) -> ClassificationScenario[ + ClassificationStream[ + ClassificationExperience[ + ClassificationDataset]], + ClassificationExperience[ + ClassificationDataset], + ClassificationDataset]: + return ClassificationScenario( + stream_definitions=stream_definitions, + complete_test_set_only=complete_test_set_only + ) + + +def _detect_legacy_classification_usage( + all_datasets: Iterable[Any] +) -> bool: + """ + Used by :func:`create_multi_dataset_generic_benchmark` to check + if the user is trying to create a classification benchmark. + + While using :func:`create_multi_dataset_generic_benchmark` to create a + classification benchmark is acceptable, it would be better to use + :func:`create_multi_dataset_classification_benchmark`, which returns + a :class:`ClassificationScenario` + + Fields defined in :class:`ClassificationScenario` are not to be found + in the generic :class:`DatasetScenario` instance returned by + func:`create_multi_dataset_generic_benchmark` and may be needed + by some continual learning strategies. + + This function works by checking if input datasets contain all + int (including NumPy/PyTorch int types) targets. + """ + + for dataset in all_datasets: + try: + as_classification_dataset = make_classification_dataset( + dataset + ) + if not _is_int_iterable(as_classification_dataset.targets): + return False + except Exception: + return False + + return True + + +def _manage_legacy_classification_usage( + train_datasets: Sequence[GenericSupportedDataset], + test_datasets: Sequence[GenericSupportedDataset], + other_streams_datasets: Optional[ + Mapping[str, Sequence[GenericSupportedDataset]]], + dataset_factory: Union[ + DatasetFactory, + Literal['check_if_classification'] + ], + benchmark_factory: Union[Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ], Literal['check_if_classification']]) -> Tuple[ + DatasetFactory, + Callable[[ + TStreamsUserDict, + bool + ], TDatasetScenario]]: + + check_implicit_classification = \ + dataset_factory == 'check_if_classification' or \ + benchmark_factory == 'check_if_classification' + + is_implicit_classification = False + if check_implicit_classification: + all_datasets_iterables = [ + train_datasets, + test_datasets, + ] + + if other_streams_datasets is not None: + all_datasets_iterables.extend(other_streams_datasets.values()) + + is_implicit_classification = _detect_legacy_classification_usage( + itertools.chain(*all_datasets_iterables) + ) + + if is_implicit_classification: + warnings.warn( + '`dataset_benchmark` is being called by passing classification ' + 'datasets. It is recommended to switch to ' + '`dataset_classification_benchmark` to make sure a ' + '`ClassificationScenario` is returned', + DeprecationWarning + ) + + dataset_factory_compat: DatasetFactory + if dataset_factory == 'check_if_classification': + if is_implicit_classification: + dataset_factory_compat = make_classification_dataset + else: + dataset_factory_compat = make_generic_dataset + else: + dataset_factory_compat = dataset_factory + + benchmark_factory_compat: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] + if benchmark_factory == 'check_if_classification': + if is_implicit_classification: + benchmark_factory_compat = \ + _make_classification_scenario # type: ignore + else: + benchmark_factory_compat = _make_generic_scenario + else: + benchmark_factory_compat = benchmark_factory + + return dataset_factory_compat, benchmark_factory_compat def create_multi_dataset_generic_benchmark( - train_datasets: Sequence[SupportedDataset], - test_datasets: Sequence[SupportedDataset], + train_datasets: Sequence[GenericSupportedDataset], + test_datasets: Sequence[GenericSupportedDataset], *, other_streams_datasets: Optional[ - Mapping[str, Sequence[SupportedDataset]]] = None, + Mapping[str, Sequence[GenericSupportedDataset]]] = None, complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, other_streams_transforms: Optional[ - Mapping[str, Tuple[Any, Any]]] = None -) -> GenericCLScenario: + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: Union[ + DatasetFactory, + Literal['check_if_classification'] + ] = 'check_if_classification', + benchmark_factory: Union[Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ], Literal['check_if_classification']] = 'check_if_classification' +) -> TDatasetScenario: """ Creates a benchmark instance given a list of datasets. Each dataset will be considered as a separate experience. @@ -107,10 +359,28 @@ def create_multi_dataset_generic_benchmark( transformations will override the `train_transform`, `train_target_transform`, `eval_transform` and `eval_target_transform` parameters. - - :returns: A :class:`GenericCLScenario` instance. + :param dataset_factory: The factory for the dataset. Should return + an :class:`AvalancheDataset` (or any subclass) given the input + dataset, the transform groups definition and the name of the + initial group (equal to the name of the stream). Defaults + to :func:`make_generic_dataset`. + :param benchmark_factory: The factory for the benchmark. + Should return the benchmark instance given the stream definitions + and a flag stating if the test stream contains a single dataset. + By default, returns a :class:`DatasetScenario`. + + :returns: A benchmark instance. """ + dataset_factory_compat, benchmark_factory_compat = \ + _manage_legacy_classification_usage( + train_datasets=train_datasets, + test_datasets=test_datasets, + other_streams_datasets=other_streams_datasets, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory + ) + transform_groups = dict( train=(train_transform, train_target_transform), eval=(eval_transform, eval_target_transform), @@ -121,9 +391,20 @@ def create_multi_dataset_generic_benchmark( if isinstance(stream_transforms, Sequence): if len(stream_transforms) == 1: # Suppose we got only the transformation for X values - stream_transforms = (stream_transforms[0], None) + warnings.warn( + 'Transformations for other streams should be passed ' + 'as a 2 elements tuple `(Xtransform, YTransform)`. ' + 'You can pass None for the Y transformation.' + ) + stream_transforms = ( + stream_transforms[0], # type: ignore + None) else: # Suppose it's the transformation for X values + warnings.warn( + 'Transformations for other streams should be passed ' + 'as a 2 elements tuple (Xtransform, YTransform).' + ) stream_transforms = (stream_transforms, None) transform_groups[stream_name] = stream_transforms @@ -140,7 +421,7 @@ def create_multi_dataset_generic_benchmark( "complete_test_set_only is True" ) - stream_definitions: Dict[str, Tuple[Iterable[ClassificationDataset]]] = \ + stream_definitions: Dict[str, Tuple[Iterable[AvalancheDataset]]] = \ dict() for stream_name, dataset_list in input_streams.items(): @@ -151,22 +432,27 @@ def create_multi_dataset_generic_benchmark( stream_datasets = [] for dataset_idx in range(len(dataset_list)): dataset = dataset_list[dataset_idx] + stream_datasets.append( - make_classification_dataset( - dataset, + dataset_factory_compat( + dataset=dataset, transform_groups=transform_groups, - initial_transform_group=initial_transform_group, + initial_transform_group=initial_transform_group ) ) stream_definitions[stream_name] = (stream_datasets,) - return GenericCLScenario( - stream_definitions=stream_definitions, - complete_test_set_only=complete_test_set_only, + return benchmark_factory_compat( + stream_definitions, + complete_test_set_only, ) -def _adapt_lazy_stream(generator, transform_groups, initial_transform_group): +def _adapt_lazy_stream( + generator, + transform_groups, + initial_transform_group, + dataset_factory): """ A simple internal utility to apply transforms and dataset type to all lazily generated datasets. Used in the :func:`create_lazy_generic_benchmark` @@ -177,7 +463,7 @@ def _adapt_lazy_stream(generator, transform_groups, initial_transform_group): """ for dataset in generator: - dataset = make_classification_dataset( + dataset = dataset_factory( dataset, transform_groups=transform_groups, initial_transform_group=initial_transform_group, @@ -203,7 +489,7 @@ class LazyStreamDefinition(NamedTuple): can be used. """ - exps_generator: Iterable[ClassificationDataset] + exps_generator: Iterable[AvalancheDataset] """ The experiences generator. Can be a "yield"-based generator, a custom sequence, a standard list or any kind of iterable returning @@ -232,12 +518,20 @@ def create_lazy_generic_benchmark( *, other_streams_generators: Optional[Dict[str, LazyStreamDefinition]] = None, complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, - other_streams_transforms: Optional[Dict[str, Tuple[Any, Any]]] = None -) -> GenericCLScenario: + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_generic_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_generic_scenario +) -> TDatasetScenario: """ Creates a lazily-defined benchmark instance given a dataset generator for each stream. @@ -296,8 +590,17 @@ def create_lazy_generic_benchmark( transformations for "train" or "test" streams then those transformations will override the `train_transform`, `train_target_transform`, `eval_transform` and `eval_target_transform` parameters. - - :returns: A lazily-initialized :class:`GenericCLScenario` instance. + :param dataset_factory: The factory for the dataset. Should return + an :class:`AvalancheDataset` (or any subclass) given the input + dataset, the transform groups definition and the name of the + initial group (equal to the name of the stream). Defaults + to :func:`make_generic_dataset`. + :param benchmark_factory: The factory for the benchmark. + Should return the benchmark instance given the stream definitions + and a flag stating if the test stream contains a single dataset. + By default, returns a :class:`DatasetScenario`. + + :returns: A lazily-initialized benchmark instance. """ transform_groups = dict( @@ -310,9 +613,20 @@ def create_lazy_generic_benchmark( if isinstance(stream_transforms, Sequence): if len(stream_transforms) == 1: # Suppose we got only the transformation for X values - stream_transforms = (stream_transforms[0], None) + warnings.warn( + 'Transformations for other streams should be passed ' + 'as a 2 elements tuple `(Xtransform, YTransform)`. ' + 'You can pass None for the Y transformation.' + ) + stream_transforms = ( + stream_transforms[0], # type: ignore + None) else: # Suppose it's the transformation for X values + warnings.warn( + 'Transformations for other streams should be passed ' + 'as a 2 elements tuple (Xtransform, YTransform).' + ) stream_transforms = (stream_transforms, None) transform_groups[stream_name] = stream_transforms @@ -332,7 +646,7 @@ def create_lazy_generic_benchmark( stream_definitions: Dict[ str, Tuple[ # Dataset generator + stream length - Tuple[Generator[ClassificationDataset, None, None], int], + Tuple[Generator[AvalancheDataset, None, None], int], # Task label(s) for each experience Iterable[Union[int, Iterable[int]]] ] @@ -351,6 +665,7 @@ def create_lazy_generic_benchmark( generator, transform_groups, initial_transform_group=initial_transform_group, + dataset_factory=dataset_factory ) stream_definitions[stream_name] = ( @@ -358,9 +673,9 @@ def create_lazy_generic_benchmark( task_labels, ) - return GenericCLScenario( - stream_definitions=stream_definitions, - complete_test_set_only=complete_test_set_only, + return benchmark_factory( + stream_definitions, + complete_test_set_only ) @@ -373,13 +688,20 @@ def create_generic_benchmark_from_filelists( Dict[str, Sequence[Union[str, Path]]]] = None, task_labels: Sequence[int], complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, other_streams_transforms: Optional[ - Dict[str, Tuple[Any, Any]]] = None -) -> GenericCLScenario: + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: DatasetFactory = make_classification_dataset, + benchmark_factory: Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ] = _make_classification_scenario # type: ignore +) -> TDatasetScenario: """ Creates a benchmark instance given a list of filelists and the respective task labels. A separate dataset will be created for each filelist and each @@ -391,8 +713,7 @@ def create_generic_benchmark_from_filelists( Beware that this helper function is limited is the following two aspects: - The resulting benchmark instance and the intermediate datasets used to - populate it will be of type CLASSIFICATION. There is no way to change - this. + populate it will be of type CLASSIFICATION. - Task labels can only be defined by choosing a single task label for each experience (the same task label is applied to all patterns of experiences sharing the same position in different streams). @@ -452,8 +773,17 @@ def create_generic_benchmark_from_filelists( transformations for "train" or "test" streams then those transformations will override the `train_transform`, `train_target_transform`, `eval_transform` and `eval_target_transform` parameters. - - :returns: A :class:`GenericCLScenario` instance. + :param dataset_factory: The factory for the dataset. Should return + an :class:`AvalancheDataset` (or any subclass) given the input + dataset, the transform groups definition and the name of the + initial group (equal to the name of the stream). Defaults + to :func:`make_classification_dataset`. + :param benchmark_factory: The factory for the benchmark. + Should return the benchmark instance given the stream definitions + and a flag stating if the test stream contains a single dataset. + By default, returns a :class:`ClassificationScenario`. + + :returns: A benchmark instance. """ input_streams = dict(train=train_file_lists, test=test_file_lists) @@ -461,15 +791,15 @@ def create_generic_benchmark_from_filelists( if other_streams_file_lists is not None: input_streams = {**input_streams, **other_streams_file_lists} - stream_definitions: Dict[str, Sequence[ClassificationDataset]] = dict() + stream_definitions: Dict[str, Sequence[AvalancheDataset]] = dict() for stream_name, file_lists in input_streams.items(): - stream_datasets: List[ClassificationDataset] = [] + stream_datasets: List[AvalancheDataset] = [] for exp_id, f_list in enumerate(file_lists): f_list_dataset = FilelistDataset(root, f_list) stream_datasets.append( - make_classification_dataset( + dataset_factory( f_list_dataset, task_labels=task_labels[exp_id] ) ) @@ -486,6 +816,8 @@ def create_generic_benchmark_from_filelists( eval_target_transform=eval_target_transform, complete_test_set_only=complete_test_set_only, other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory ) @@ -503,12 +835,23 @@ def create_generic_benchmark_from_paths( ]] = None, task_labels: Sequence[int], complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, - other_streams_transforms: Optional[Dict[str, Tuple[Any, Any]]] = None -) -> GenericCLScenario: + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: Union[ + DatasetFactory, + Literal['check_if_classification'] + ] = 'check_if_classification', + benchmark_factory: Union[Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ], Literal['check_if_classification']] = 'check_if_classification' +) -> TDatasetScenario: """ Creates a benchmark instance given a sequence of lists of files. A separate dataset will be created for each list. Each of those datasets @@ -579,8 +922,17 @@ def create_generic_benchmark_from_paths( transformations for "train" or "test" streams then those transformations will override the `train_transform`, `train_target_transform`, `eval_transform` and `eval_target_transform` parameters. - - :returns: A :class:`GenericCLScenario` instance. + :param dataset_factory: The factory for the dataset. Should return + an :class:`AvalancheDataset` (or any subclass) given the input + dataset, the transform groups definition and the name of the + initial group (equal to the name of the stream). Defaults + to :func:`make_generic_dataset`. + :param benchmark_factory: The factory for the benchmark. + Should return the benchmark instance given the stream definitions + and a flag stating if the test stream contains a single dataset. + By default, returns a :class:`DatasetScenario`. + + :returns: A benchmark instance. """ input_streams = dict(train=train_lists_of_files, test=test_lists_of_files) @@ -588,17 +940,18 @@ def create_generic_benchmark_from_paths( if other_streams_lists_of_files is not None: input_streams = {**input_streams, **other_streams_lists_of_files} - stream_definitions: Dict[str, Sequence[ClassificationDataset]] = dict() + stream_definitions: Dict[str, Sequence[AvalancheDataset]] = dict() for stream_name, lists_of_files in input_streams.items(): - stream_datasets: List[ClassificationDataset] = [] + stream_datasets: List[AvalancheDataset] = [] for exp_id, list_of_files in enumerate(lists_of_files): common_root, exp_paths_list = common_paths_root(list_of_files) - paths_dataset: PathsDataset[Any, int] = \ + paths_dataset: PathsDataset[Any, Any] = \ PathsDataset(common_root, exp_paths_list) stream_datasets.append( - make_classification_dataset( - paths_dataset, task_labels=task_labels[exp_id] + make_generic_dataset( + paths_dataset, + task_labels=task_labels[exp_id] ) ) @@ -614,6 +967,8 @@ def create_generic_benchmark_from_paths( eval_target_transform=eval_target_transform, complete_test_set_only=complete_test_set_only, other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory ) @@ -624,12 +979,23 @@ def create_generic_benchmark_from_tensor_lists( other_streams_tensors: Optional[Dict[str, Sequence[Sequence[Any]]]] = None, task_labels: Sequence[int], complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, - other_streams_transforms: Optional[Dict[str, Tuple[Any, Any]]] = None -) -> GenericCLScenario: + train_transform: XTransform = None, + train_target_transform: YTransform = None, + eval_transform: XTransform = None, + eval_target_transform: YTransform = None, + other_streams_transforms: Optional[ + Mapping[str, Tuple[XTransform, YTransform]]] = None, + dataset_factory: Union[ + DatasetFactory, + Literal['check_if_classification'] + ] = 'check_if_classification', + benchmark_factory: Union[Callable[ + [ + TStreamsUserDict, + bool + ], TDatasetScenario + ], Literal['check_if_classification']] = 'check_if_classification' +) -> TDatasetScenario: """ Creates a benchmark instance given lists of Tensors. A separate dataset will be created from each Tensor tuple (x, y, z, ...) and each of those training @@ -701,8 +1067,21 @@ def create_generic_benchmark_from_tensor_lists( transformations for "train" or "test" streams then those transformations will override the `train_transform`, `train_target_transform`, `eval_transform` and `eval_target_transform` parameters. - - :returns: A :class:`GenericCLScenario` instance. + :param dataset_factory: The factory for the dataset. Should return + an :class:`AvalancheDataset` (or any subclass) given the input + dataset, the transform groups definition and the name of the + initial group (equal to the name of the stream). Defaults + to :func:`make_generic_dataset`. + :param tensor_dataset_factory: The factory for the intermediate + tensor dataset. This is used to convert the tensors list to a + PyTorch dataset. The returned dataset will be then processed + again using `dataset_factory` + :param benchmark_factory: The factory for the benchmark. + Should return the benchmark instance given the stream definitions + and a flag stating if the test stream contains a single dataset. + By default, returns a :class:`DatasetScenario`. + + :returns: A benchmark instance. """ input_streams = dict(train=train_tensors, test=test_tensors) @@ -710,14 +1089,14 @@ def create_generic_benchmark_from_tensor_lists( if other_streams_tensors is not None: input_streams = {**input_streams, **other_streams_tensors} - stream_definitions: Dict[str, Sequence[ClassificationDataset]] = dict() + stream_definitions: Dict[str, Sequence[AvalancheDataset]] = dict() for stream_name, list_of_exps_tensors in input_streams.items(): - stream_datasets: List[ClassificationDataset] = [] + stream_datasets: List[AvalancheDataset] = [] for exp_id, exp_tensors in enumerate(list_of_exps_tensors): stream_datasets.append( - make_tensor_classification_dataset( - *exp_tensors, task_labels=task_labels[exp_id] + make_generic_tensor_dataset( + exp_tensors, task_labels=task_labels[exp_id] ) ) @@ -733,6 +1112,8 @@ def create_generic_benchmark_from_tensor_lists( eval_target_transform=eval_target_transform, complete_test_set_only=complete_test_set_only, other_streams_transforms=other_streams_transforms, + dataset_factory=dataset_factory, + benchmark_factory=benchmark_factory ) diff --git a/avalanche/benchmarks/scenarios/generic_scenario_creation.py b/avalanche/benchmarks/scenarios/generic_scenario_creation.py deleted file mode 100644 index 410dda14f..000000000 --- a/avalanche/benchmarks/scenarios/generic_scenario_creation.py +++ /dev/null @@ -1,587 +0,0 @@ -################################################################################ -# Copyright (c) 2021 ContinualAI. # -# Copyrights licensed under the MIT License. # -# See the accompanying LICENSE file for terms. # -# # -# Date: 22-06-2020 # -# Author(s): Lorenzo Pellegrini # -# E-mail: contact@continualai.org # -# Website: avalanche.continualai.org # -################################################################################ - -""" This module contains DEPRECATED mid-level benchmark generators. -Please use the ones found in generic_benchmark_creation. -""" - -import warnings -from pathlib import Path -from typing import Sequence, Union, SupportsInt, Any, Tuple - -from torch import Tensor - -from avalanche.benchmarks.utils import ( - make_tensor_classification_dataset, - SupportedDataset, - datasets_from_paths, - make_classification_dataset, -) -from avalanche.benchmarks.utils import datasets_from_filelists -from .classification_scenario import GenericCLScenario -from ..utils.flat_data import ConstantSequence - - -def create_multi_dataset_generic_scenario( - train_dataset_list: Sequence[SupportedDataset], - test_dataset_list: Sequence[SupportedDataset], - task_labels: Sequence[int], - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of - `create_multi_dataset_generic_benchmark`. - - Creates a generic scenario given a list of datasets and the respective task - labels. Each training dataset will be considered as a separate training - experience. Contents of the datasets will not be changed, including the - targets. - - When loading the datasets from a set of fixed filelist, consider using - the :func:`create_generic_scenario_from_filelists` helper method instead. - - In its base form, this function accepts a list of test datsets that must - contain the same amount of datasets of the training list. - Those pairs are then used to create the "past", "cumulative" - (a.k.a. growing) and "future" test sets. However, in certain Continual - Learning scenarios only the concept of "complete" test set makes sense. In - that case, the ``complete_test_set_only`` should be set to True (see the - parameter description for more info). - - Beware that pattern transformations must already be included in the - datasets (when needed). - - :param train_dataset_list: A list of training datasets. - :param test_dataset_list: A list of test datasets. - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_dataset_list`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that the ``test_dataset_list`` - parameter must be list with a single element (the complete test set). - Defaults to False, which means that ``train_dataset_list`` and - ``test_dataset_list`` must contain the same amount of datasets. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "create_multi_dataset_generic_scenario is deprecated in favor" - " of create_multi_dataset_generic_benchmark.", - DeprecationWarning, - ) - - transform_groups = dict( - train=(train_transform, train_target_transform), - eval=(eval_transform, eval_target_transform), - ) - - if complete_test_set_only: - if len(test_dataset_list) != 1: - raise ValueError( - "Test must contain 1 element when" - "complete_test_set_only is True" - ) - else: - if len(test_dataset_list) != len(train_dataset_list): - raise ValueError( - "Train and test lists must define the same " - " amount of experiences" - ) - - train_t_labels = [] - train_dataset_list_avl = [] - for dataset_idx, dataset in enumerate(train_dataset_list): - dataset = train_dataset_list[dataset_idx] - train_t_labels.append(task_labels[dataset_idx]) - train_dataset_list_avl.append(make_classification_dataset( - dataset, - task_labels=ConstantSequence( - task_labels[dataset_idx], len(dataset) - ), - transform_groups=transform_groups, - initial_transform_group="train", - )) - - test_t_labels = [] - test_dataset_list_avl = [] - for dataset_idx, dataset in enumerate(test_dataset_list): - dataset = test_dataset_list[dataset_idx] - - test_t_label = task_labels[dataset_idx] - if complete_test_set_only: - test_t_label = 0 - - test_t_labels.append(test_t_label) - - test_dataset_list_avl.append(make_classification_dataset( - dataset, - task_labels=ConstantSequence(test_t_label, len(dataset)), - transform_groups=transform_groups, - initial_transform_group="eval", - )) - - return GenericCLScenario( - stream_definitions={ - "train": (train_dataset_list_avl, train_t_labels), - "test": (test_dataset_list_avl, test_t_labels), - }, - complete_test_set_only=complete_test_set_only, - ) - - -def create_generic_scenario_from_filelists( - root: Union[str, Path], - train_file_lists: Sequence[Union[str, Path]], - test_file_lists: Union[Union[str, Path], Sequence[Union[str, Path]]], - task_labels: Sequence[int], - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of - `create_generic_benchmark_from_filelists`. - - Creates a generic scenario given a list of filelists and the respective task - labels. A separate dataset will be created for each filelist and each of - those training datasets will be considered a separate training experience. - - In its base form, this function accepts a list of filelists for the test - datsets that must contain the same amount of elements of the training list. - Those pairs of datasets are then used to create the "past", "cumulative" - (a.k.a. growing) and "future" test sets. However, in certain Continual - Learning scenarios only the concept of "complete" test set makes sense. In - that case, the ``complete_test_set_only`` should be set to True (see the - parameter description for more info). - - This helper functions is the best shot when loading Caffe-style dataset - based on filelists. - - The resulting benchmark instance and the intermediate datasets used to - populate it will be of type CLASSIFICATION. - - :param root: The root path of the dataset. - :param train_file_lists: A list of filelists describing the - paths of the training patterns for each experience. - :param test_file_lists: A list of filelists describing the - paths of the test patterns for each experience. - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_file_lists`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that the ``test_file_lists`` - parameter must be list with a single element (the complete test set). - Alternatively, can be a plain string or :class:`Path` object. - Defaults to False, which means that ``train_file_lists`` and - ``test_file_lists`` must contain the same amount of filelists paths. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "create_generic_scenario_from_filelists is deprecated in " - "favor of create_generic_benchmark_from_filelists.", - DeprecationWarning, - ) - - train_datasets, test_dataset = datasets_from_filelists( - root, - train_file_lists, - test_file_lists, - complete_test_set_only=complete_test_set_only, - ) - - return create_multi_dataset_generic_scenario( - train_datasets, - test_dataset, - task_labels, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - complete_test_set_only=complete_test_set_only, - ) - - -FileAndLabel = Tuple[Union[str, Path], int] - - -def create_generic_scenario_from_paths( - train_list_of_files: Sequence[Sequence[FileAndLabel]], - test_list_of_files: Union[ - Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]] - ], - task_labels: Sequence[int], - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of - `create_generic_benchmark_from_paths`. - - Creates a generic scenario given a sequence of lists of files. A separate - dataset will be created for each list. Each of those training datasets - will be considered a separate training experience. - - This is very similar to `create_generic_scenario_from_filelists`, with the - main difference being that `create_generic_scenario_from_filelists` - accepts, for each experience, a file list formatted in Caffe-style. - On the contrary, this accepts a list of tuples where each tuple contains - two elements: the full path to the pattern and its label. - Optionally, the tuple may contain a third element describing the bounding - box of the element to crop. This last bounding box may be useful when trying - to extract the part of the image depicting the desired element. - - In its base form, this function accepts a list for the test datasets that - must contain the same amount of elements of the training list. - Those pairs of datasets are then used to create the "past", "cumulative" - (a.k.a. growing) and "future" test sets. However, in certain Continual - Learning scenarios only the concept of "complete" test set makes sense. In - that case, the ``complete_test_set_only`` should be set to True (see the - parameter description for more info). - - The label of each pattern doesn't have to be an int. - - :param train_list_of_files: A list of lists. Each list describes the paths - and labels of patterns to include in that training experience, as - tuples. Each tuple must contain two elements: the full path to the - pattern and its class label. Optionally, the tuple may contain a - third element describing the bounding box to use for cropping (top, - left, height, width). - :param test_list_of_files: A list of lists. Each list describes the paths - and labels of patterns to include in that test experience, as tuples. - Each tuple must contain two elements: the full path to the pattern - and its class label. Optionally, the tuple may contain a third element - describing the bounding box to use for cropping (top, left, height, - width). - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_file_lists`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that the ``test_list_of_files`` - parameter must define a single experience (the complete test set). - Defaults to False, which means that ``train_list_of_files`` and - ``test_list_of_files`` must contain the same amount of paths. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "create_generic_scenario_from_paths is deprecated in favor" - " of create_generic_benchmark_from_paths.", - DeprecationWarning, - ) - - train_datasets, test_dataset = datasets_from_paths( - train_list_of_files, - test_list_of_files, - complete_test_set_only=complete_test_set_only, - ) - - return create_multi_dataset_generic_scenario( - train_datasets, - test_dataset, - task_labels, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - complete_test_set_only=complete_test_set_only, - ) - - -def create_generic_scenario_from_tensor_lists( - train_tensors: Sequence[Sequence[Any]], - test_tensors: Sequence[Sequence[Any]], - task_labels: Sequence[int], - *, - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of - `create_generic_benchmark_from_tensor_lists`. - - Creates a generic scenario given lists of Tensors. A separate dataset will - be created from each Tensor tuple (x, y, z, ...) and each of those training - datasets will be considered a separate training experience. Using this - helper function is the lowest-level way to create a Continual Learning - scenario. When possible, consider using higher level helpers. - - Experiences are defined by passing lists of tensors as the `train_tensors` - and `test_tensors` parameter. Those parameters must be lists containing - sub-lists of tensors, one for each experience. Each tensor defines the value - of a feature ("x", "y", "z", ...) for all patterns of that experience. - - By default the second tensor of each experience will be used to fill the - `targets` value (label of each pattern). - - In its base form, the test lists must contain the same amount of elements of - the training lists. Those pairs of datasets are then used to create the - "past", "cumulative" (a.k.a. growing) and "future" test sets. - However, in certain Continual Learning scenarios only the concept of - "complete" test set makes sense. In that case, the - ``complete_test_set_only`` should be set to True (see the parameter - description for more info). - - :param train_tensors: A list of lists. The first list must contain the - tensors for the first training experience (one tensor per feature), the - second list must contain the tensors for the second training experience, - and so on. - :param test_tensors: A list of lists. The first list must contain the - tensors for the first test experience (one tensor per feature), the - second list must contain the tensors for the second test experience, - and so on. When using `complete_test_set_only`, this parameter - must be a list containing a single sub-list for the single test - experience. - :param task_labels: A list of task labels. Must contain a task label for - each experience. For Single-Incremental-Task (a.k.a. Task-Free) - scenarios, this is usually a list of zeros. For Multi Task scenario, - this is usually a list of ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that ``test_tensors`` must - define a single experience. Defaults to False, which means that - ``train_tensors`` and ``test_tensors`` must define the same - amount of experiences. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "create_generic_scenario_from_tensor_lists is deprecated in " - "favor of create_generic_benchmark_from_tensor_lists.", - DeprecationWarning, - ) - - train_datasets = [ - make_tensor_classification_dataset(*exp_tensors) - for exp_tensors in train_tensors - ] - - test_datasets = [ - make_tensor_classification_dataset(*exp_tensors) - for exp_tensors in test_tensors - ] - - return create_multi_dataset_generic_scenario( - train_datasets, - test_datasets, - task_labels, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - complete_test_set_only=complete_test_set_only, - ) - - -def create_generic_scenario_from_tensors( - train_data_x: Sequence[Any], - train_data_y: Sequence[Sequence[SupportsInt]], - test_data_x: Union[Any, Sequence[Any]], - test_data_y: Union[Any, Sequence[Sequence[SupportsInt]]], - task_labels: Sequence[int], - complete_test_set_only: bool = False, - train_transform=None, - train_target_transform=None, - eval_transform=None, - eval_target_transform=None, -) -> GenericCLScenario: - """ - This helper function is DEPRECATED in favor of - `create_generic_benchmark_from_tensor_lists`. - - Please consider using :func:`create_generic_scenario_from_tensor_lists` - instead. When switching to the new function, please keep in mind that the - format of the parameters is completely different! - - Creates a generic scenario given lists of Tensors and the respective task - labels. A separate dataset will be created from each Tensor pair (x + y) - and each of those training datasets will be considered a separate - training experience. Contents of the datasets will not be changed, including - the targets. Using this helper function is the lower level way to create a - Continual Learning scenario. When possible, consider using higher level - helpers. - - By default the second tensor of each experience will be used to fill the - `targets` value (label of each pattern). - - In its base form, the test lists must contain the same amount of elements of - the training lists. Those pairs of datasets are then used to create the - "past", "cumulative" (a.k.a. growing) and "future" test sets. - However, in certain Continual Learning scenarios only the concept of - "complete" test set makes sense. In that case, the - ``complete_test_set_only`` should be set to True (see the parameter - description for more info). - - :param train_data_x: A list of Tensors (one per experience) containing the - patterns of the training sets. - :param train_data_y: A list of Tensors or int lists containing the - labels of the patterns of the training sets. Must contain the same - number of elements of ``train_datasets_x``. - :param test_data_x: A Tensor or a list of Tensors (one per experience) - containing the patterns of the test sets. - :param test_data_y: A Tensor or a list of Tensors or int lists containing - the labels of the patterns of the test sets. Must contain the same - number of elements of ``test_datasets_x``. - :param task_labels: A list of task labels. Must contain the same amount of - elements of the ``train_datasets_x`` parameter. For - Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually - a list of zeros. For Multi Task scenario, this is usually a list of - ascending task labels (starting from 0). - :param complete_test_set_only: If True, only the complete test set will - be returned by the scenario. This means that ``test_data_x`` and - ``test_data_y`` must define a single experience. Defaults to False, - which means that ``train_data_*`` and ``test_data_*`` must define the - same amount of experiences. - :param train_transform: The transformation to apply to the training data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param train_target_transform: The transformation to apply to training - patterns targets. Defaults to None. - :param eval_transform: The transformation to apply to the test data, - e.g. a random crop, a normalization or a concatenation of different - transformations (see torchvision.transform documentation for a - comprehensive list of possible transformations). Defaults to None. - :param eval_target_transform: The transformation to apply to test - patterns targets. Defaults to None. - - :returns: A :class:`GenericCLScenario` instance. - """ - - warnings.warn( - "create_generic_scenario_from_tensors is deprecated in favor " - "of create_generic_benchmark_from_tensor_lists.", - DeprecationWarning, - ) - - if len(train_data_x) != len(train_data_y): - raise ValueError( - "train_data_x and train_data_y must contain" - " the same amount of elements" - ) - - if type(test_data_x) != type(test_data_y): - raise ValueError( - "test_data_x and test_data_y must be of" " the same type" - ) - - if isinstance(test_data_x, Tensor): - test_data_x = [test_data_x] - test_data_y = [test_data_y] - else: - if len(test_data_x) != len(test_data_y): - raise ValueError( - "test_data_x and test_data_y must contain" - " the same amount of elements" - ) - - exp_train_first_structure = [] - exp_test_first_structure = [] - for exp_idx in range(len(train_data_x)): - exp_x = train_data_x[exp_idx] - exp_y = train_data_y[exp_idx] - - exp_train_first_structure.append([exp_x, exp_y]) - - for exp_idx in range(len(test_data_x)): - exp_x = test_data_x[exp_idx] - exp_y = test_data_y[exp_idx] - - exp_test_first_structure.append([exp_x, exp_y]) - - return create_generic_scenario_from_tensor_lists( - train_tensors=exp_train_first_structure, - test_tensors=exp_test_first_structure, - task_labels=task_labels, - complete_test_set_only=complete_test_set_only, - train_transform=train_transform, - train_target_transform=train_target_transform, - eval_transform=eval_transform, - eval_target_transform=eval_target_transform, - ) - - -__all__ = [ - "create_multi_dataset_generic_scenario", - "create_generic_scenario_from_filelists", - "create_generic_scenario_from_paths", - "create_generic_scenario_from_tensor_lists", - "create_generic_scenario_from_tensors", -] diff --git a/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py b/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py index fdb75916a..8d04e5cb7 100644 --- a/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py +++ b/avalanche/benchmarks/scenarios/new_classes/nc_scenario.py @@ -20,7 +20,7 @@ ) from avalanche.benchmarks.utils import classification_subset from avalanche.benchmarks.utils.classification_dataset import \ - ClassificationDataset, SupervisedClassificationDataset + ClassificationDataset from avalanche.benchmarks.utils.flat_data import ConstantSequence @@ -29,7 +29,7 @@ class NCScenario( ClassificationScenario[ 'NCStream', 'NCExperience', - SupervisedClassificationDataset]): + ClassificationDataset]): """ This class defines a "New Classes" scenario. Once created, an instance @@ -87,7 +87,7 @@ def __init__( :param fixed_class_order: If not None, the class order to use (overrides the shuffle argument). Very useful for enhancing reproducibility. Defaults to None. - :param per_experience_classes: Is not None, a dictionary whose keys are + :param per_experience_classes: If not None, a dictionary whose keys are (0-indexed) experience IDs and their values are the number of classes to include in the respective experiences. The dictionary doesn't have to contain a key for each experience! All the remaining @@ -124,10 +124,8 @@ class "34" will be mapped to "1", class "11" to "2" and so on. test datasets must be used. Defaults to None. """ - if not isinstance(train_dataset, SupervisedClassificationDataset): - train_dataset = SupervisedClassificationDataset(train_dataset) - if not isinstance(test_dataset, SupervisedClassificationDataset): - test_dataset = SupervisedClassificationDataset(test_dataset) + train_dataset = ClassificationDataset(train_dataset) + test_dataset = ClassificationDataset(test_dataset) if ( class_ids_from_zero_from_first_exp @@ -561,7 +559,7 @@ def __init__( set_stream_info=set_stream_info) -class NCExperience(ClassificationExperience[SupervisedClassificationDataset]): +class NCExperience(ClassificationExperience[ClassificationDataset]): """ Defines a "New Classes" experience. It defines fields to obtain the current dataset and the associated task label. It also keeps a reference to the diff --git a/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py b/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py index 63abf42c6..678363b1a 100644 --- a/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py +++ b/avalanche/benchmarks/scenarios/new_instances/ni_scenario.py @@ -23,7 +23,7 @@ ) from avalanche.benchmarks.utils import classification_subset from avalanche.benchmarks.utils.classification_dataset import \ - ClassificationDataset, SupervisedClassificationDataset + ClassificationDataset from avalanche.benchmarks.utils.flat_data import ConstantSequence @@ -31,7 +31,7 @@ class NIScenario( ClassificationScenario[ 'NIStream', 'NIExperience', - SupervisedClassificationDataset]): + ClassificationDataset]): """ This class defines a "New Instance" scenario. Once created, an instance of this class can be iterated in order to obtain @@ -107,8 +107,8 @@ def __init__( test datasets must be used. Defaults to None. """ - train_dataset = SupervisedClassificationDataset(train_dataset) - test_dataset = SupervisedClassificationDataset(test_dataset) + train_dataset = ClassificationDataset(train_dataset) + test_dataset = ClassificationDataset(test_dataset) self._has_task_labels = task_labels @@ -484,7 +484,7 @@ def __init__( set_stream_info=set_stream_info) -class NIExperience(ClassificationExperience[SupervisedClassificationDataset]): +class NIExperience(ClassificationExperience[ClassificationDataset]): """ Defines a "New Instances" experience. It defines fields to obtain the current dataset and the associated task label. It also keeps a reference diff --git a/avalanche/benchmarks/utils/classification_dataset.py b/avalanche/benchmarks/utils/classification_dataset.py index e82141cd3..9ad055c9e 100644 --- a/avalanche/benchmarks/utils/classification_dataset.py +++ b/avalanche/benchmarks/utils/classification_dataset.py @@ -36,6 +36,7 @@ from avalanche.benchmarks.utils.transform_groups import ( TransformGroupDef, DefaultTransformGroups, + TransformGroups, XTransform, YTransform, ) @@ -45,6 +46,7 @@ ) from avalanche.benchmarks.utils.flat_data import ConstantSequence from avalanche.benchmarks.utils.dataset_definitions import ( + IDataset, ISupportedClassificationDataset, ITensorDataset, IDatasetWithTargets, @@ -61,7 +63,6 @@ Dict, Tuple, Mapping, - overload, ) @@ -86,10 +87,43 @@ def lookup(indexable, idx): class ClassificationDataset(AvalancheDataset[T_co]): + def __init__( + self, + datasets: Sequence[IDataset[T_co]], + *, + indices: Optional[List[int]] = None, + data_attributes: Optional[List[DataAttribute]] = None, + transform_groups: Optional[TransformGroups] = None, + frozen_transform_groups: Optional[TransformGroups] = None, + collate_fn: Optional[Callable[[List], Any]] = None): + super().__init__( + datasets=datasets, + indices=indices, + data_attributes=data_attributes, + transform_groups=transform_groups, + frozen_transform_groups=frozen_transform_groups, + collate_fn=collate_fn + ) + + assert 'targets' in self._data_attributes, \ + 'The supervised version of the ClassificationDataset requires ' + \ + 'the targets field' + assert 'targets_task_labels' in self._data_attributes, \ + 'The supervised version of the ClassificationDataset requires ' + \ + 'the targets_task_labels field' + + @property + def targets(self) -> DataAttribute[TTargetType]: + return self._data_attributes['targets'] + + @property + def targets_task_labels(self) -> DataAttribute[int]: + return self._data_attributes['targets_task_labels'] + @property def task_pattern_indices(self): """A dictionary mapping task ids to their sample indices.""" - return self.targets_task_labels.val_to_idx # type: ignore + return self.targets_task_labels.val_to_idx @property def task_set(self: TClassificationDataset) -> \ @@ -110,25 +144,6 @@ def concat(self, other): def __hash__(self): return id(self) - - -class SupervisedClassificationDataset(ClassificationDataset[T_co]): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - assert 'targets' in self._data_attributes, \ - 'The supervised version of the ClassificationDataset requires ' + \ - 'the targets field' - assert 'targets_task_labels' in self._data_attributes, \ - 'The supervised version of the ClassificationDataset requires ' + \ - 'the targets_task_labels field' - - @property - def targets(self) -> DataAttribute[TTargetType]: - return self._data_attributes['targets'] - - @property - def targets_task_labels(self) -> DataAttribute[int]: - return self._data_attributes['targets_task_labels'] SupportedDataset = Union[ @@ -140,37 +155,6 @@ def targets_task_labels(self) -> DataAttribute[int]: ] -@overload -def make_classification_dataset( - dataset: SupervisedClassificationDataset, - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedClassificationDataset: - ... - - -@overload -def make_classification_dataset( - dataset: SupportedDataset, - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Union[int, Sequence[int]], - targets: Sequence[TTargetType], - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedClassificationDataset: - ... - - -@overload def make_classification_dataset( dataset: SupportedDataset, *, @@ -182,20 +166,6 @@ def make_classification_dataset( targets: Optional[Sequence[TTargetType]] = None, collate_fn: Optional[Callable[[List], Any]] = None ) -> ClassificationDataset: - ... - - -def make_classification_dataset( - dataset: SupportedDataset, - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[ClassificationDataset, SupervisedClassificationDataset]: """Avalanche Classification Dataset. Supervised continual learning benchmarks in Avalanche return instances of @@ -273,8 +243,6 @@ def make_classification_dataset( the default collate function will be used. """ - is_supervised = isinstance(dataset, SupervisedClassificationDataset) - transform_gs = _init_transform_groups( transform_groups, transform, @@ -293,26 +261,12 @@ def make_classification_dataset( if task_labels_data is not None: das.append(task_labels_data) - # Check if supervision data has been added - is_supervised = is_supervised or ( - targets_data is not None and - task_labels_data is not None) - - data: Union[ClassificationDataset, SupervisedClassificationDataset] - if is_supervised: - data = SupervisedClassificationDataset( - [dataset], - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - collate_fn=collate_fn, - ) - else: - data = ClassificationDataset( - [dataset], - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - collate_fn=collate_fn, - ) + data: ClassificationDataset = ClassificationDataset( + [dataset], + data_attributes=das if len(das) > 0 else None, + transform_groups=transform_gs, + collate_fn=collate_fn, + ) if initial_transform_group is not None: return data.with_transforms(initial_transform_group) @@ -347,60 +301,6 @@ def _init_targets(dataset, targets, check_shape=True) -> \ return DataAttribute(targets, "targets") -@overload -def classification_subset( - dataset: SupervisedClassificationDataset, - indices: Optional[Sequence[int]] = None, - *, - class_mapping: Optional[Sequence[int]] = None, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedClassificationDataset: - ... - - -@overload -def classification_subset( - dataset: SupportedDataset, - indices: Optional[Sequence[int]] = None, - *, - class_mapping: Optional[Sequence[int]] = None, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Union[int, Sequence[int]], - targets: Sequence[TTargetType], - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedClassificationDataset: - ... - - -@overload -def classification_subset( - dataset: SupportedDataset, - indices: Optional[Sequence[int]] = None, - *, - class_mapping: Optional[Sequence[int]] = None, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> ClassificationDataset: - ... - - def classification_subset( dataset: SupportedDataset, indices: Optional[Sequence[int]] = None, @@ -414,7 +314,7 @@ def classification_subset( task_labels: Optional[Union[int, Sequence[int]]] = None, targets: Optional[Sequence[TTargetType]] = None, collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[ClassificationDataset, SupervisedClassificationDataset]: +) -> ClassificationDataset: """Creates an ``AvalancheSubset`` instance. For simple subset operations you should use the method @@ -482,8 +382,6 @@ def classification_subset( `collate_fn` field exists in the dataset. If no such field exists, the default collate function will be used. """ - - is_supervised = isinstance(dataset, SupervisedClassificationDataset) if isinstance(dataset, ClassificationDataset): if ( @@ -534,18 +432,13 @@ def classification_subset( das = [] if targets_data is not None: das.append(targets_data) - - # Check if supervision data has been added - is_supervised = is_supervised or ( - targets_data is not None and - task_labels_data is not None) if task_labels_data is not None: # special treatment for task labels depending on length for # backward compatibility if len(task_labels_data) != len(dataset): # task labels are already subsampled - dataset = ClassificationDataset( + dataset_avl = AvalancheDataset( [dataset], indices=list(indices) if indices is not None else None, data_attributes=das, @@ -553,66 +446,22 @@ def classification_subset( frozen_transform_groups=frozen_transform_groups, collate_fn=collate_fn, ) + # now add task labels - if is_supervised: - return SupervisedClassificationDataset( - [dataset], - data_attributes=[dataset.targets, # type: ignore - task_labels_data]) - else: - return ClassificationDataset( - [dataset], - data_attributes=[dataset.targets, # type: ignore - task_labels_data]) + return ClassificationDataset( + [dataset_avl], + data_attributes=[task_labels_data]) else: das.append(task_labels_data) - if is_supervised: - return SupervisedClassificationDataset( - [dataset], - indices=list(indices) if indices is not None else None, - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - frozen_transform_groups=frozen_transform_groups, - collate_fn=collate_fn, - ) - else: - return ClassificationDataset( - [dataset], - indices=list(indices) if indices is not None else None, - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - frozen_transform_groups=frozen_transform_groups, - collate_fn=collate_fn, - ) - - -@overload -def make_tensor_classification_dataset( - *dataset_tensors: Sequence, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Dict[str, Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = "train", - task_labels: Union[int, Sequence[int]], - targets: Union[Sequence[TTargetType], int], - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedClassificationDataset: - ... - - -@overload -def make_tensor_classification_dataset( - *dataset_tensors: Sequence, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Dict[str, Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = "train", - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Union[Sequence[TTargetType], int]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[ClassificationDataset, SupervisedClassificationDataset]: - ... + return ClassificationDataset( + [dataset], + indices=list(indices) if indices is not None else None, + data_attributes=das if len(das) > 0 else None, + transform_groups=transform_gs, + frozen_transform_groups=frozen_transform_groups, + collate_fn=collate_fn, + ) def make_tensor_classification_dataset( @@ -624,7 +473,7 @@ def make_tensor_classification_dataset( task_labels: Optional[Union[int, Sequence[int]]] = None, targets: Optional[Union[Sequence[TTargetType], int]] = None, collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[ClassificationDataset, SupervisedClassificationDataset]: +) -> ClassificationDataset: """Creates a ``AvalancheTensorDataset`` instance. A Dataset that wraps existing ndarrays, Tensors, lists... to provide @@ -697,26 +546,13 @@ def make_tensor_classification_dataset( for d in [targets_data, task_labels_data]: if d is not None: das.append(d) - - # Check if supervision data has been added - is_supervised = ( - targets_data is not None and - task_labels_data is not None) - - if is_supervised: - return SupervisedClassificationDataset( - [dataset], - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - collate_fn=collate_fn, - ) - else: - return ClassificationDataset( - [dataset], - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - collate_fn=collate_fn, - ) + + return ClassificationDataset( + [dataset], + data_attributes=das if len(das) > 0 else None, + transform_groups=transform_gs, + collate_fn=collate_fn, + ) class _TensorClassificationDataset(TensorDataset): @@ -728,43 +564,6 @@ def __getitem__(self, item): return tuple(elem) -@overload -def concat_classification_datasets( - datasets: Sequence[SupervisedClassificationDataset], - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, - Sequence[int], - Sequence[Sequence[int]]]] = None, - targets: Optional[Union[ - Sequence[TTargetType], Sequence[Sequence[TTargetType]] - ]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedClassificationDataset: - ... - - -@overload -def concat_classification_datasets( - datasets: Sequence[SupportedDataset], - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Union[int, Sequence[int], Sequence[Sequence[int]]], - targets: Union[ - Sequence[TTargetType], Sequence[Sequence[TTargetType]] - ], - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedClassificationDataset: - ... - - -@overload def concat_classification_datasets( datasets: Sequence[SupportedDataset], *, @@ -780,24 +579,6 @@ def concat_classification_datasets( ]] = None, collate_fn: Optional[Callable[[List], Any]] = None ) -> ClassificationDataset: - ... - - -def concat_classification_datasets( - datasets: Sequence[SupportedDataset], - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, - Sequence[int], - Sequence[Sequence[int]]]] = None, - targets: Optional[Union[ - Sequence[TTargetType], Sequence[Sequence[TTargetType]] - ]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[ClassificationDataset, SupervisedClassificationDataset]: """Creates a ``AvalancheConcatDataset`` instance. For simple subset operations you should use the method @@ -878,7 +659,6 @@ def concat_classification_datasets( initial_transform_group = \ find_common_transforms_group(datasets, default_group="train") - supervised = True for dd, dataset_task_labels, dataset_targets in \ zip(datasets, per_dataset_task_labels, per_dataset_targets): dd = make_classification_dataset( @@ -891,9 +671,6 @@ def concat_classification_datasets( targets=dataset_targets, collate_fn=collate_fn, ) - - if not isinstance(dd, SupervisedClassificationDataset): - supervised = False dds.append(dd) @@ -907,24 +684,11 @@ def concat_classification_datasets( ) else: transform_groups_obj = None - - supervised = supervised and ( - (len(dds) > 0) or ( - targets is not None and task_labels is not None - ) - ) - data: Union[SupervisedClassificationDataset, ClassificationDataset] - if supervised: - data = SupervisedClassificationDataset( - dds, - transform_groups=transform_groups_obj - ) - else: - data = ClassificationDataset( - dds, - transform_groups=transform_groups_obj - ) + data: ClassificationDataset = ClassificationDataset( + dds, + transform_groups=transform_groups_obj + ) return data.with_transforms(initial_transform_group) @@ -958,8 +722,8 @@ def _select_targets( def concat_classification_datasets_sequentially( train_dataset_list: Sequence[ISupportedClassificationDataset], test_dataset_list: Sequence[ISupportedClassificationDataset], -) -> Tuple[SupervisedClassificationDataset, - SupervisedClassificationDataset, +) -> Tuple[ClassificationDataset, + ClassificationDataset, List[list]]: """ Concatenates a list of datasets. This is completely different from @@ -1004,15 +768,15 @@ def concat_classification_datasets_sequentially( :returns: A concatenated dataset. """ - remapped_train_datasets: List[SupervisedClassificationDataset] = [] - remapped_test_datasets: List[SupervisedClassificationDataset] = [] + remapped_train_datasets: List[ClassificationDataset] = [] + remapped_test_datasets: List[ClassificationDataset] = [] next_remapped_idx = 0 train_dataset_list_sup = list( - map(as_supervised_classification_dataset, train_dataset_list) + map(make_classification_dataset, train_dataset_list) ) test_dataset_list_sup = list( - map(as_supervised_classification_dataset, test_dataset_list) + map(make_classification_dataset, test_dataset_list) ) del train_dataset_list del test_dataset_list @@ -1076,58 +840,12 @@ def concat_classification_datasets_sequentially( ) -def as_supervised_classification_dataset( - dataset, - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None) -> \ - SupervisedClassificationDataset: - - if ( - transform is not None or - target_transform is not None or - transform_groups is not None or - initial_transform_group is not None or - task_labels is not None or - targets is not None or - collate_fn is not None or - not isinstance(dataset, SupervisedClassificationDataset) - ): - result_dataset = make_classification_dataset( - dataset=dataset, - transform=transform, - target_transform=target_transform, - transform_groups=transform_groups, - initial_transform_group=initial_transform_group, - task_labels=task_labels, - targets=targets, - collate_fn=collate_fn - ) - - if not isinstance(result_dataset, SupervisedClassificationDataset): - raise ValueError( - 'The given dataset does not have supervision fields ' - '(targets, task_labels).' - ) - - return result_dataset - - return dataset - - __all__ = [ "SupportedDataset", "ClassificationDataset", - "SupervisedClassificationDataset", "make_classification_dataset", "classification_subset", "make_tensor_classification_dataset", "concat_classification_datasets", - "concat_classification_datasets_sequentially", - "as_supervised_classification_dataset" + "concat_classification_datasets_sequentially" ] diff --git a/avalanche/benchmarks/utils/detection_dataset.py b/avalanche/benchmarks/utils/detection_dataset.py index dec5a3206..602e150b0 100644 --- a/avalanche/benchmarks/utils/detection_dataset.py +++ b/avalanche/benchmarks/utils/detection_dataset.py @@ -73,32 +73,10 @@ class DetectionDataset(AvalancheDataset[T_co]): - @property - def task_pattern_indices(self) -> Dict[int, Sequence[int]]: - """A dictionary mapping task ids to their sample indices.""" - # Assumes that targets_task_labels exists - t_labels: DataAttribute[int] = self.targets_task_labels # type: ignore - return t_labels.val_to_idx - - @property - def task_set(self: TDetectionDataset) -> TaskSet[TDetectionDataset]: - """Returns the dataset's ``TaskSet``, which is a mapping .""" - return TaskSet(self) - def subset(self, indices): - data = super().subset(indices) - return data.with_transforms(self._transform_groups.current_group) - - def concat(self, other): - data = super().concat(other) - return data.with_transforms(self._transform_groups.current_group) - - -class SupervisedDetectionDataset(DetectionDataset[T_co]): def __init__( self, - datasets: List[IDataset[T_co]], + datasets: Sequence[IDataset[T_co]], *, indices: Optional[List[int]] = None, data_attributes: Optional[List[DataAttribute]] = None, @@ -120,7 +98,7 @@ def __init__( assert hasattr(self, 'targets_task_labels'), \ 'The supervised version of the ClassificationDataset requires ' + \ 'the targets_task_labels field' - + @property def targets(self) -> DataAttribute[TTargetType]: return self._data_attributes['targets'] @@ -128,6 +106,30 @@ def targets(self) -> DataAttribute[TTargetType]: @property def targets_task_labels(self) -> DataAttribute[int]: return self._data_attributes['targets_task_labels'] + + @property + def task_pattern_indices(self) -> Dict[int, Sequence[int]]: + """A dictionary mapping task ids to their sample indices.""" + # Assumes that targets_task_labels exists + t_labels: DataAttribute[int] = self.targets_task_labels + return t_labels.val_to_idx + + @property + def task_set(self: TDetectionDataset) -> TaskSet[TDetectionDataset]: + """Returns the dataset's ``TaskSet``, which is a mapping .""" + return TaskSet(self) + + def subset(self, indices): + data = super().subset(indices) + return data.with_transforms(self._transform_groups.current_group) + + def concat(self, other): + data = super().concat(other) + return data.with_transforms(self._transform_groups.current_group) + + def __hash__(self): + return id(self) SupportedDetectionDataset = Union[ @@ -138,37 +140,6 @@ def targets_task_labels(self) -> DataAttribute[int]: ] -@overload -def make_detection_dataset( - dataset: SupervisedDetectionDataset, - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedDetectionDataset: - ... - - -@overload -def make_detection_dataset( - dataset: SupportedDetectionDataset, - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Union[int, Sequence[int]], - targets: Sequence[TTargetType], - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedDetectionDataset: - ... - - -@overload def make_detection_dataset( dataset: SupportedDetectionDataset, *, @@ -180,20 +151,6 @@ def make_detection_dataset( targets: Optional[Sequence[TTargetType]] = None, collate_fn: Optional[Callable[[List], Any]] = None ) -> DetectionDataset: - ... - - -def make_detection_dataset( - dataset: SupportedDetectionDataset, - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[DetectionDataset, SupervisedDetectionDataset]: """Avalanche Detection Dataset. Supervised continual learning benchmarks in Avalanche return instances of @@ -271,8 +228,6 @@ def make_detection_dataset( the default collate function for detection will be used. """ - is_supervised = isinstance(dataset, SupervisedDetectionDataset) - transform_gs = _init_transform_groups( transform_groups, transform, @@ -290,30 +245,16 @@ def make_detection_dataset( das.append(targets_data) if task_labels_data is not None: das.append(task_labels_data) - - # Check if supervision data has been added - is_supervised = is_supervised or ( - targets_data is not None and - task_labels_data is not None) if collate_fn is None: collate_fn = getattr(dataset, 'collate_fn', detection_collate_fn) - data: Union[DetectionDataset, SupervisedDetectionDataset] - if is_supervised: - data = SupervisedDetectionDataset( - [dataset], - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - collate_fn=collate_fn, - ) - else: - data = DetectionDataset( - [dataset], - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - collate_fn=collate_fn, - ) + data: DetectionDataset = DetectionDataset( + [dataset], + data_attributes=das if len(das) > 0 else None, + transform_groups=transform_gs, + collate_fn=collate_fn, + ) if initial_transform_group is not None: return data.with_transforms(initial_transform_group) @@ -359,43 +300,6 @@ def _detection_class_mapping_transform(class_mapping, example_target_dict): return example_target_dict -@overload -def detection_subset( - dataset: SupervisedDetectionDataset, - indices: Optional[Sequence[int]] = None, - *, - class_mapping: Optional[Sequence[int]] = None, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedDetectionDataset: - ... - - -@overload -def detection_subset( - dataset: SupportedDetectionDataset, - indices: Optional[Sequence[int]] = None, - *, - class_mapping: Optional[Sequence[int]] = None, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Union[int, Sequence[int]], - targets: Sequence[TTargetType], - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedDetectionDataset: - ... - - -@overload def detection_subset( dataset: SupportedDetectionDataset, indices: Optional[Sequence[int]] = None, @@ -410,23 +314,6 @@ def detection_subset( targets: Optional[Sequence[TTargetType]] = None, collate_fn: Optional[Callable[[List], Any]] = None ) -> DetectionDataset: - ... - - -def detection_subset( - dataset: SupportedDetectionDataset, - indices: Optional[Sequence[int]] = None, - *, - class_mapping: Optional[Sequence[int]] = None, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, Sequence[int]]] = None, - targets: Optional[Sequence[TTargetType]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[DetectionDataset, SupervisedDetectionDataset]: """Creates an ``AvalancheSubset`` instance. For simple subset operations you should use the method @@ -492,8 +379,6 @@ def detection_subset( the default collate function for detection will be used """ - is_supervised = isinstance(dataset, SupervisedDetectionDataset) - if isinstance(dataset, DetectionDataset): if ( class_mapping is None @@ -559,90 +444,17 @@ def detection_subset( if task_labels_data is not None: das.append(task_labels_data) - # Check if supervision data has been added - is_supervised = is_supervised or ( - targets_data is not None and - task_labels_data is not None) - if collate_fn is None: collate_fn = detection_collate_fn - if is_supervised: - return SupervisedDetectionDataset( - [dataset], - indices=list(indices) if indices is not None else None, - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - frozen_transform_groups=frozen_transform_groups, - collate_fn=collate_fn, - ) - else: - return DetectionDataset( - [dataset], - indices=list(indices) if indices is not None else None, - data_attributes=das if len(das) > 0 else None, - transform_groups=transform_gs, - frozen_transform_groups=frozen_transform_groups, - collate_fn=collate_fn, - ) - - -@overload -def concat_detection_datasets( - datasets: Sequence[SupervisedDetectionDataset], - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, - Sequence[int], - Sequence[Sequence[int]]]] = None, - targets: Optional[Union[ - Sequence[TTargetType], Sequence[Sequence[TTargetType]] - ]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedDetectionDataset: - ... - - -@overload -def concat_detection_datasets( - datasets: Sequence[SupportedDetectionDataset], - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Union[int, Sequence[int], Sequence[Sequence[int]]], - targets: Union[ - Sequence[TTargetType], Sequence[Sequence[TTargetType]] - ], - collate_fn: Optional[Callable[[List], Any]] = None -) -> SupervisedDetectionDataset: - ... - - -@overload -def concat_detection_datasets( - datasets: Sequence[SupportedDetectionDataset], - *, - transform: Optional[XTransform] = None, - target_transform: Optional[YTransform] = None, - transform_groups: Optional[Mapping[str, - Tuple[XTransform, YTransform]]] = None, - initial_transform_group: Optional[str] = None, - task_labels: Optional[Union[int, - Sequence[int], - Sequence[Sequence[int]]]] = None, - targets: Optional[Union[ - Sequence[TTargetType], Sequence[Sequence[TTargetType]] - ]] = None, - collate_fn: Optional[Callable[[List], Any]] = None -) -> DetectionDataset: - ... + return DetectionDataset( + [dataset], + indices=list(indices) if indices is not None else None, + data_attributes=das if len(das) > 0 else None, + transform_groups=transform_gs, + frozen_transform_groups=frozen_transform_groups, + collate_fn=collate_fn, + ) def concat_detection_datasets( @@ -660,7 +472,7 @@ def concat_detection_datasets( Sequence[TTargetType], Sequence[Sequence[TTargetType]] ]] = None, collate_fn: Optional[Callable[[List], Any]] = None -) -> Union[DetectionDataset, SupervisedDetectionDataset]: +) -> DetectionDataset: """Creates a ``AvalancheConcatDataset`` instance. For simple subset operations you should use the method diff --git a/avalanche/benchmarks/utils/utils.py b/avalanche/benchmarks/utils/utils.py index 1356f67bb..fce5c0bd1 100644 --- a/avalanche/benchmarks/utils/utils.py +++ b/avalanche/benchmarks/utils/utils.py @@ -29,10 +29,11 @@ SupportsInt, ) import warnings +import numpy as np import torch from torch import Tensor -from torch.utils.data import Subset, ConcatDataset +from torch.utils.data import Subset, ConcatDataset, TensorDataset from avalanche.benchmarks.utils.data import AvalancheDataset from avalanche.benchmarks.utils.data_attribute import DataAttribute @@ -673,6 +674,285 @@ def _get_task_labels_field(self) -> DataAttribute[int]: return self.data.targets_task_labels # type: ignore +def _numpy_is_sequence_int(numpy_tensor: np.ndarray) -> bool: + return issubclass(numpy_tensor.dtype.type, np.integer) + + +def _numpy_is_single_int(numpy_tensor: np.ndarray) -> bool: + try: + single_value = numpy_tensor.item() + return isinstance(single_value, int) + except ValueError: + return False + + +def _torch_is_sequence_int(torch_tensor: Tensor) -> bool: + return not torch.is_floating_point(torch_tensor) and \ + not torch.is_complex(torch_tensor) + + +def _torch_is_single_int(torch_tensor: Tensor) -> bool: + try: + single_value = torch_tensor.item() + return isinstance(single_value, int) + except ValueError: + return False + + +def _element_is_single_int(element: Any): + if isinstance(element, (int, np.integer)): + return True + if isinstance(element, Tensor): + return _torch_is_single_int(element) + else: + return False + + +def _is_int_iterable(iterable: Iterable[Any]): + if isinstance(iterable, torch.Tensor): + return _torch_is_sequence_int(iterable) + elif isinstance(iterable, np.ndarray): + return _numpy_is_sequence_int(iterable) + else: + for t in iterable: + if not _element_is_single_int(t): + return False + return True + + +AnyT = TypeVar('AnyT', bound=Iterable) + + +def _to_int_list(iterable: AnyT, force: bool = True) -> Union[AnyT, List[int]]: + if isinstance(iterable, torch.Tensor): + if _torch_is_sequence_int(iterable): + return iterable.tolist() + elif force: + raise ValueError('Cannot convert PyTorch Tenspr to int list') + else: + return iterable + elif isinstance(iterable, np.ndarray): + if _numpy_is_sequence_int(iterable): + return iterable.tolist() + elif force: + raise ValueError('Cannot convert NumPy array to int list') + else: + return iterable # type: ignore + else: + int_list = [] + for t in iterable: + if _element_is_single_int(t): + int_list.append(t) + elif force: + raise ValueError('Cannot convert sequence to int list') + else: + return iterable + return int_list + + +def _smart_init_targets( + dataset, + targets, + check_shape=True +): + """ + Initializes the targets for a given dataset. + + To support backwards compatibility for when when + :func:`create_multi_dataset_generic_benchmark` was + used to manage classification benchmarks only, this function will try to + mimic the steps taken in :func:`make_classification_dataset`, that is: + + - will try to check if the input dataset has classification + targets (integer tensors / ndarray) and will cast them to + a list of native ints, as expected by other parts + of Avalanche. + - accepts passing an int for the targets field. The given int + will be applied to all exemplars in the dataset. + - supports PyTorch TensorDataset, by taking the second tensor as targets. + + If targets are not of type int, then they will be returned as-is, + so that other types of datasets (regression, detection, ...) are + supported without issues. + + :param dataset: The input dataset. If the `targets` parameter is + not None, then targets will be retrieved from the dataset. + :param targets: The targets to use. Can be None, in which case + targets will be retrieved from the dataset. + :param check_shape: If True, will check if the number of exemplars + in the dataset match the length of the obtained targets sequence. + :return: The targets, as a DataAttribute of elements whose type depends + on the input dataset. + """ + if targets is not None: + # User defined targets always take precedence + if isinstance(targets, int): + # Classification targets + targets = ConstantSequence(targets, len(dataset)) + elif len(targets) != len(dataset) and check_shape: + raise ValueError( + "Invalid number of target labels. It must be equal to the " + "number of patterns in the dataset. Got {}, expected " + "{}!".format(len(targets), len(dataset)) + ) + return DataAttribute(targets, "targets") + + targets = _traverse_supported_dataset( + dataset, _smart_select_targets_opt) + + if targets is not None: + # Classification targets + targets = _to_int_list(targets, force=False) + + if targets is None: + return None + + return DataAttribute(targets, "targets") + + +def _smart_select_targets_opt( + dataset: Any, + indices: Optional[List[int]]) -> Optional[Sequence[Any]]: + if hasattr(dataset, "targets"): + # Standard supported dataset + found_targets = dataset.targets + elif hasattr(dataset, "tensors") and len(dataset.tensors) >= 2: + # Support for PyTorch TensorDataset + found_targets = dataset.tensors[1] + else: + return None + + if indices is not None: + found_targets = SubSequence(found_targets, indices=indices) + + return found_targets + + +def make_generic_dataset( + dataset: Any, + *, + transform: Optional[XTransform] = None, + target_transform: Optional[YTransform] = None, + transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, + initial_transform_group: Optional[str] = None, + task_labels: Optional[Union[int, Sequence[int]]] = None, + targets: Optional[Any] = None, + collate_fn: Optional[Callable[[List], Any]] = None +) -> AvalancheDataset: + """ + Helper function will create an :class:`AvalancheDataset` with + supervision fields `targets` and `targets_task_labels` (if given or found + in the input dataset). + + :param dataset: The dataset to wrap in the AvalancheDataset. If it contains + `targets` and/or `targets_task_labels` fields, then those fields will + be inherited by the resulting dataset (if not given by the `targets` + or `task_labels` parameters). This will also check if the input dataset + is a :class:`TensorDataset` and, in that case, will try to use the + second tensor as the `targets` field. + :param transform: The transformation to apply to X values. + Mutually exclusive with `transform_groups`. + :param target_transform: The transformation to apply to Y values. + Mutually exclusive with `transform_groups`. + :param transform_groups: The transformations groups to add to the dataset. + Mutually xclusive with `transform` and `target_transform`. + :param task_labels: A list containing a task label for each example. Can + also be a plain `int`, in which case it will be applied to all + examples. If not None, shadows the `targets_task_labels` field from + the input dataset. + :param targets: A list containing a target for each example. If not None, + shadows the `targets` field from the input dataset. + :param collate_fn: The collate function to use when loading this dataset. + + :returns: An :class:`AvalancheDataset`. + """ + if isinstance(dataset, AvalancheDataset): + return dataset + + transform_gs = _init_transform_groups( + transform_groups=transform_groups, + transform=transform, + target_transform=target_transform, + initial_transform_group=initial_transform_group, + dataset=dataset, + ) + + targets_data: Optional[DataAttribute[Any]] = \ + _smart_init_targets(dataset, targets) + task_labels_data: Optional[DataAttribute[int]] = \ + _init_task_labels(dataset, task_labels) + + das: List[DataAttribute] = [] + if targets_data is not None: + das.append(targets_data) + if task_labels_data is not None: + das.append(task_labels_data) + + data = AvalancheDataset( + [dataset], + data_attributes=das if len(das) > 0 else None, + transform_groups=transform_gs, + collate_fn=collate_fn, + ) + + if initial_transform_group is not None: + return data.with_transforms(initial_transform_group) + else: + return data + + +def make_generic_tensor_dataset( + dataset_tensors: Sequence, + *, + transform: Optional[XTransform] = None, + target_transform: Optional[YTransform] = None, + transform_groups: Optional[Mapping[str, TransformGroupDef]] = None, + initial_transform_group: Optional[str] = None, + task_labels: Optional[Union[int, Sequence[int]]] = None, + targets: Optional[Any] = None, + collate_fn: Optional[Callable[[List], Any]] = None +) -> AvalancheDataset: + if len(dataset_tensors) < 1: + raise ValueError("At least one sequence must be passed") + + if isinstance(targets, int): + targets = dataset_tensors[targets] + tts = [] + for tt in dataset_tensors: # TorchTensor requires a pytorch tensor + if not hasattr(tt, 'size'): + tt = torch.tensor(tt) + tts.append(tt) + dataset = TensorDataset(*tts) + + transform_gs = _init_transform_groups( + transform_groups, + transform, + target_transform, + initial_transform_group, + dataset, + ) + targets_data = _smart_init_targets(dataset, targets) + task_labels_data = _init_task_labels(dataset, task_labels) + + das: List[DataAttribute] = [] + if targets_data is not None: + das.append(targets_data) + if task_labels_data is not None: + das.append(task_labels_data) + + data = AvalancheDataset( + [dataset], + data_attributes=das if len(das) > 0 else None, + transform_groups=transform_gs, + collate_fn=collate_fn, + ) + + if initial_transform_group is not None: + return data.with_transforms(initial_transform_group) + else: + return data + + __all__ = [ "tensor_as_list", "grouped_and_ordered_indexes", @@ -680,5 +960,7 @@ def _get_task_labels_field(self) -> DataAttribute[int]: "as_classification_dataset", "concat_datasets", "find_common_transforms_group", - "TaskSet" + "TaskSet", + "make_generic_dataset", + "make_generic_tensor_dataset" ] diff --git a/avalanche/evaluation/metrics/checkpoint.py b/avalanche/evaluation/metrics/checkpoint.py index 3c155c892..7793ec98b 100644 --- a/avalanche/evaluation/metrics/checkpoint.py +++ b/avalanche/evaluation/metrics/checkpoint.py @@ -10,9 +10,11 @@ ################################################################################ import copy -from typing import TYPE_CHECKING +import io +from typing import TYPE_CHECKING, Optional from torch import Tensor +import torch from avalanche.evaluation import PluginMetric from avalanche.evaluation.metric_results import MetricValue, MetricResult @@ -46,9 +48,9 @@ def __init__(self): retrieved using the `result` method. """ super().__init__() - self.weights = None + self.weights: None - def update(self, weights) -> Tensor: + def update(self, weights: bytes): """ Update the weight checkpoint at the current experience. @@ -57,7 +59,7 @@ def update(self, weights) -> Tensor: """ self.weights = weights - def result(self) -> Tensor: + def result(self) -> bytes: """ Retrieves the weight checkpoint at the current experience. @@ -87,12 +89,18 @@ def _package_result(self, strategy) -> "MetricResult": def after_training_exp( self, strategy: "SupervisedTemplate" ) -> "MetricResult": - model_params = copy.deepcopy(strategy.model.parameters()) - self.update(model_params) - return None + buff = io.BytesIO() + model_params = copy.deepcopy(strategy.model).to('cpu') + torch.save(model_params, buff) + buff.seek(0) + self.update(buff.read()) + + return self._package_result(strategy) def __str__(self): return "WeightCheckpoint" -__all__ = ["WeightCheckpoint"] +__all__ = [ + "WeightCheckpoint" +] diff --git a/avalanche/logging/text_logging.py b/avalanche/logging/text_logging.py index f8151d420..ee3aad4aa 100644 --- a/avalanche/logging/text_logging.py +++ b/avalanche/logging/text_logging.py @@ -24,7 +24,7 @@ if TYPE_CHECKING: from avalanche.training.templates import SupervisedTemplate -UNSUPPORTED_TYPES: Tuple[Type] = (TensorImage,) +UNSUPPORTED_TYPES: Tuple[Type, ...] = (TensorImage, bytes,) class TextLogger(BaseLogger, SupervisedPlugin): diff --git a/avalanche/logging/wandb_logger.py b/avalanche/logging/wandb_logger.py index 5ea49336b..dd97b3aef 100644 --- a/avalanche/logging/wandb_logger.py +++ b/avalanche/logging/wandb_logger.py @@ -11,10 +11,12 @@ """ This module handles all the functionalities related to the logging of Avalanche experiments using Weights & Biases. """ -from typing import Union, List, TYPE_CHECKING +import re +from typing import Optional, Union, List, TYPE_CHECKING from pathlib import Path import os import errno +import warnings import numpy as np from numpy import array @@ -30,6 +32,7 @@ MetricValue, TensorImage, ) +from avalanche.evaluation.metric_utils import phase_and_task from avalanche.logging import BaseLogger if TYPE_CHECKING: @@ -37,6 +40,12 @@ from avalanche.training.templates import SupervisedTemplate +CHECKPOINT_METRIC_NAME = re.compile( + r"^WeightCheckpoint\/(?P\S+)_phase\/(?P\S+)_" + r"stream(\/Task(?P\d+))?\/Exp(?P\d+)$" +) + + class WandBLogger(BaseLogger, SupervisedPlugin): """Weights and Biases logger. @@ -72,6 +81,9 @@ def __init__( :param project_name: Name of the W&B project. :param run_name: Name of the W&B run. :param log_artifacts: Option to log model weights as W&B Artifacts. + Note that, in order for model weights to be logged, the + :class:`WeightCheckpoint` metric must be added to the + evaluation plugin. :param path: Path to locally save the model checkpoints. :param uri: URI identifier for external storage buckets (GCS, S3). :param sync_tfboard: Syncs TensorBoard to the W&B dashboard UI. @@ -102,7 +114,8 @@ def __init__( def import_wandb(self): try: import wandb - except ImportError: + assert hasattr(wandb, '__version__') + except (ImportError, AssertionError): raise ImportError('Please run "pip install wandb" to install wandb') self.wandb = wandb @@ -151,6 +164,11 @@ def after_training_exp( def log_single_metric(self, name, value, x_plot): self.step = x_plot + if name.startswith("WeightCheckpoint"): + if self.log_artifacts: + self._log_checkpoint(name, value, x_plot) + return + if isinstance(value, AlternativeValues): value = value.best_supported_value( Image, @@ -189,26 +207,53 @@ def log_single_metric(self, name, value, x_plot): {name: self.wandb.Image(array(value))}, step=self.step ) - elif name.startswith("WeightCheckpoint"): - if self.log_artifacts: - cwd = os.getcwd() - ckpt = os.path.join(cwd, self.path) - try: - os.makedirs(ckpt) - except OSError as e: - if e.errno != errno.EEXIST: - raise - suffix = ".pth" - dir_name = os.path.join(ckpt, name + suffix) - artifact_name = os.path.join("Models", name + suffix) - if isinstance(value, Tensor): - torch.save(value, dir_name) - name = os.path.splittext(self.checkpoint) - artifact = self.wandb.Artifact(name, type="model") - artifact.add_file(dir_name, name=artifact_name) - self.wandb.run.log_artifact(artifact) - if self.uri is not None: - artifact.add_reference(self.uri, name=artifact_name) + def _log_checkpoint(self, name, value, x_plot): + assert self.wandb is not None + + # Example: 'WeightCheckpoint/train_phase/train_stream/Task000/Exp000' + name_match = CHECKPOINT_METRIC_NAME.match(name) + if name_match is None: + warnings.warn( + f'Checkpoint metric has unsupported name {name}.' + ) + return + # phase_name: str = name_match['phase_name'] + # stream_name: str = name_match['stream_name'] + task_id: Optional[int] = \ + int(name_match['task_id']) \ + if name_match['task_id'] is not None \ + else None + experience_id: int = int(name_match['experience_id']) + assert experience_id >= 0 + + cwd = Path.cwd() + checkpoint_directory = cwd / self.path + checkpoint_directory.mkdir(parents=True, exist_ok=True) + + checkpoint_name = "Model_{}".format(experience_id) + checkpoint_file_name = checkpoint_name + '.pth' + checkpoint_path = checkpoint_directory / checkpoint_file_name + artifact_name = 'Models/' + checkpoint_file_name + + # Write the checkpoint blob + with open(checkpoint_path, 'wb') as f: + f.write(value) + + metadata = { + 'experience': experience_id, + 'x_step': x_plot, + **({'task_id': task_id} + if task_id is not None + else {})} + + artifact = self.wandb.Artifact( + checkpoint_name, + type='model', + metadata=metadata) + artifact.add_file(str(checkpoint_path), name=artifact_name) + self.wandb.run.log_artifact(artifact) + if self.uri is not None: + artifact.add_reference(self.uri, name=artifact_name) def __getstate__(self): state = self.__dict__.copy() diff --git a/avalanche/training/plugins/ewc.py b/avalanche/training/plugins/ewc.py index 6978c9167..3fb5fbaf5 100644 --- a/avalanche/training/plugins/ewc.py +++ b/avalanche/training/plugins/ewc.py @@ -121,6 +121,7 @@ def after_training_exp(self, strategy, **kwargs): strategy.experience.dataset, strategy.device, strategy.train_mb_size, + num_workers=kwargs.get('num_workers', 0) ) self.update_importances(importances, exp_counter) self.saved_params[exp_counter] = copy_params_dict(strategy.model) @@ -129,7 +130,14 @@ def after_training_exp(self, strategy, **kwargs): del self.saved_params[exp_counter - 1] def compute_importances( - self, model, criterion, optimizer, dataset, device, batch_size + self, + model, + criterion, + optimizer, + dataset, + device, + batch_size, + num_workers=0 ) -> Dict[str, ParamData]: """ Compute EWC importance matrix for each parameter @@ -156,7 +164,8 @@ def compute_importances( dataset.collate_fn if hasattr(dataset, "collate_fn") else None ) dataloader = DataLoader( - dataset, batch_size=batch_size, collate_fn=collate_fn + dataset, batch_size=batch_size, collate_fn=collate_fn, + num_workers=num_workers ) for i, batch in enumerate(dataloader): # get only input, target and task_id from the batch diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst index 03ca33f8f..3572c06e8 100644 --- a/docs/benchmarks.rst +++ b/docs/benchmarks.rst @@ -49,7 +49,7 @@ Streams ClassificationStream Experiences -""""""""" +""""""""""" .. autosummary:: :toctree: generated @@ -243,8 +243,10 @@ Benchmark Generators | This set of functions tries to cover most common use cases (Class/Task-Incremental, Domain-Incremental, ...) but it also allows for the creation of entirely custom benchmarks (based on lists of tensors, on file lists, ...). +Class/Task/Domain-incremental benchmarks +""""""""""""""""""""""""""""""""""""""""""""""""""""""" + Generators for Class/Task/Domain-incremental benchmarks -........................................................ .. autosummary:: :toctree: generated @@ -253,8 +255,38 @@ Generators for Class/Task/Domain-incremental benchmarks ni_benchmark -Starting from tensor lists, file lists, PyTorch datasets -.......................................................... +Classification benchmarks +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Generate classification benchmarks, starting from tensor lists, file lists, PyTorch datasets + +.. autosummary:: + :toctree: generated + + dataset_classification_benchmark + filelist_classification_benchmark + paths_classification_benchmark + tensors_classification_benchmark + + +Detection / segmentation benchmarks +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Generate detection/segmentation benchmarks, starting from PyTorch datasets + + +.. autosummary:: + :toctree: generated + + dataset_detection_benchmark + + +Generic benchmarks +"""""""""""""""""" + + +Consider using the classification/detection when appropriate! + .. autosummary:: :toctree: generated @@ -266,7 +298,7 @@ Starting from tensor lists, file lists, PyTorch datasets Misc (make data-incremental, add a validation stream, ...) -.............................................................. +.......................................................... | Avalanche offers utilities to adapt a previously instantiated benchmark object. | More utilities to come! @@ -279,15 +311,16 @@ Misc (make data-incremental, add a validation stream, ...) .. currentmodule:: avalanche.benchmarks.utils -Utils (Data Loading and AvalancheDataset) +Utils ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -| The custom dataset and dataloader implementations contained in this sub-module are described in more detailed in the How-Tos about `"data loading and replay" ` and `"Avalanche Dataset" `. +| The custom dataset and dataloader implementations contained in this sub-module are described in more detailed in the How-Tos about `"Data Loading and Replay" `_ and `"Avalanche Dataset" `_. .. currentmodule:: avalanche.benchmarks.utils.data_loader Data Loaders -............................ +"""""""""""" + .. autosummary:: :toctree: generated @@ -300,7 +333,8 @@ Data Loaders .. currentmodule:: avalanche.benchmarks.utils AvalancheDataset -............................ +"""""""""""""""" + .. autosummary:: :toctree: generated diff --git a/docs/conf.py b/docs/conf.py index a39c0dbe7..bb9371744 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -64,7 +64,8 @@ 'sphinx.ext.githubpages', 'sphinx.ext.coverage', 'sphinx_rtd_theme', - 'sphinx_copybutton' + 'sphinx_copybutton', + 'sphinx_autodoc_typehints' ] autosummary_generate = True diff --git a/docs/evaluation.rst b/docs/evaluation.rst index 9fc627a40..1fc2abb43 100644 --- a/docs/evaluation.rst +++ b/docs/evaluation.rst @@ -4,7 +4,7 @@ Evaluation module | This module provides a number of metrics to monitor the continual learning performance. | Metrics subclass the :py:class:`PluginMetric` class, which provides all the callbacks needed to include custom metric logic in specific points of the continual learning workflow. -evaluation.metrics +avalanche.evaluation.metrics ---------------------------------------- .. contents:: @@ -187,7 +187,7 @@ Standalone Metrics -evaluation.metrics.detection +avalanche.evaluation.metrics.detection ---------------------------------------- | Metrics for Object Detection tasks. Please, take a look at the examples in the `examples` folder of Avalanche to better understand how to use these metrics. @@ -207,8 +207,8 @@ evaluation.metrics.detection DetectionMetrics -evaluation.metric_definitions -------------------------------- +avalanche.evaluation.metric_definitions +--------------------------------------- General interfaces on which metrics are built. @@ -227,8 +227,8 @@ General interfaces on which metrics are built. GenericPluginMetric -evaluation.metric_results -------------------------------- +avalanche.evaluation.metric_results +----------------------------------- Metric result types diff --git a/docs/logging.rst b/docs/logging.rst index b31a24154..32b3be3bb 100644 --- a/docs/logging.rst +++ b/docs/logging.rst @@ -4,7 +4,7 @@ Logging module | This module provides a number of automatic logging facilities to monitor continual learning experiments. | Loggers should be provided as input to the :py:class:`EvaluationPlugin` class. -logging +avalanche.logging ---------------------------------------- .. contents:: diff --git a/docs/models.rst b/docs/models.rst index 90111a98d..411f2cbd4 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -3,7 +3,7 @@ Models module | This module provides models and building blocks to design continual learning architectures. -models +avalanche.models ---------------------------------------- .. contents:: diff --git a/docs/requirements.txt b/docs/requirements.txt index bc2fdb83c..2d4b04ca4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -21,4 +21,5 @@ higher lvis ctrl-benchmark sphinx_copybutton -dill \ No newline at end of file +dill +sphinx_autodoc_typehints \ No newline at end of file diff --git a/docs/training.rst b/docs/training.rst index 8f31edfcf..d1bf5f3fe 100644 --- a/docs/training.rst +++ b/docs/training.rst @@ -1,23 +1,23 @@ Training module ============================ -.. currentmodule:: avalanche.training - -training +avalanche.training ---------------------------------------- +.. currentmodule:: avalanche.training + .. contents:: :depth: 2 :local: :backlinks: top Training Templates ------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Templates define the training/eval loop for each setting (supervised CL, online CL, RL, ...). Each template supports a set of callback that can be used by a plugin to execute code inside the training/eval loops. -Templates -""""""""" +Strategy Templates +"""""""""""""""""" Templates are defined in the `avalanche.training.templates` module. @@ -32,8 +32,8 @@ Templates are defined in the `avalanche.training.templates` module. OnlineSupervisedTemplate -Plugins ABCs -"""""""""""" +Plugins ABCs / Templates +"""""""""""""""""""""""" ABCs for plugins are available in `avalanche.core`. @@ -50,7 +50,7 @@ ABCs for plugins are available in `avalanche.core`. .. currentmodule:: avalanche.training Training Strategies ----------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Ready-to-use continual learning strategies. @@ -83,7 +83,7 @@ Ready-to-use continual learning strategies. MIR Replay Buffers and Selection Strategies ----------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Buffers to store past samples according to different policies and selection strategies. @@ -115,7 +115,7 @@ Selection strategies Loss Functions ----------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: generated @@ -126,12 +126,12 @@ Loss Functions Training Plugins ----------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Plugins can be added to any CL strategy to support additional behavior. -Utilities -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Utility Plugins +"""""""""""""""""""" Utilities in `avalanche.training.plugins`. @@ -145,8 +145,8 @@ Utilities in `avalanche.training.plugins`. LRSchedulerPlugin -Strategies -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Strategy Plugins +"""""""""""""""""""" Strategy implemented as plugins in `avalanche.training.plugins`. @@ -175,12 +175,12 @@ Strategy implemented as plugins in `avalanche.training.plugins`. Utilities ----------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Checkpointing allows to save and load serialized strategies to stop and resume experiments. -Utilities -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Checkpointing +"""""""""""""""""""""""""""""""""""""""" Utilities in `avalanche.training.checkpoint`. diff --git a/environment-dev.yml b/environment-dev.yml index fba6b6fc1..c8050991a 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -38,3 +38,4 @@ dependencies: - gym - lvis - dill + - sphinx_autodoc_typehints diff --git a/examples/task_metrics.py b/examples/task_metrics.py index 1e4fa24da..aac53af1e 100644 --- a/examples/task_metrics.py +++ b/examples/task_metrics.py @@ -19,9 +19,8 @@ from torch.nn import CrossEntropyLoss from torch.optim import SGD -from avalanche.benchmarks.generators.benchmark_generators import ( - create_multi_dataset_generic_benchmark, -) +from avalanche.benchmarks.scenarios.classification_benchmark_creation import \ + create_multi_dataset_classification_benchmark from avalanche.benchmarks.utils import make_tensor_classification_dataset from avalanche.evaluation.metrics import ( forgetting_metrics, @@ -56,6 +55,7 @@ def main(args): torch.randn(10, 3), torch.randint(0, 3, (10,)), task_labels=torch.randint(0, 5, (10,)).tolist(), + targets=1 ) for _ in range(3) ] @@ -64,10 +64,11 @@ def main(args): torch.randn(10, 3), torch.randint(0, 3, (10,)), task_labels=torch.randint(0, 5, (10,)).tolist(), + targets=1 ) for _ in range(3) ] - benchmark = create_multi_dataset_generic_benchmark( + benchmark = create_multi_dataset_classification_benchmark( train_datasets=tr_ds, test_datasets=ts_ds ) # --------- diff --git a/examples/wandb_logger.py b/examples/wandb_logger.py index 2d49d5112..0bae27342 100644 --- a/examples/wandb_logger.py +++ b/examples/wandb_logger.py @@ -24,6 +24,7 @@ from avalanche.benchmarks import nc_benchmark from avalanche.benchmarks.datasets.dataset_utils import default_dataset_location +from avalanche.evaluation.metrics.checkpoint import WeightCheckpoint from avalanche.logging import InteractiveLogger, WandBLogger from avalanche.training.plugins import EvaluationPlugin from avalanche.evaluation.metrics import ( @@ -87,7 +88,11 @@ def main(args): interactive_logger = InteractiveLogger() wandb_logger = WandBLogger( - project_name=args.project, run_name=args.run, config=vars(args) + project_name=args.project, + run_name=args.run, + log_artifacts=args.artifacts, + path=args.path if args.path else None, + config=vars(args) ) eval_plugin = EvaluationPlugin( @@ -130,6 +135,7 @@ def main(args): minibatch=True, epoch=True, experience=True, stream=True ), MAC_metrics(minibatch=True, epoch=True, experience=True), + WeightCheckpoint(), loggers=[interactive_logger, wandb_logger], ) @@ -167,9 +173,15 @@ def main(args): default=0, help="Select zero-indexed cuda device. -1 to use CPU.", ) - parser.add_argument("--run", type=str, help="Provide a run name for WandB") parser.add_argument( "--project", type=str, help="Define the name of the WandB project" ) + parser.add_argument("--run", type=str, help="Provide a run name for WandB") + parser.add_argument('--artifacts', default=False, + action="store_true", + help='Log Model Checkpoints as W&B Artifacts') + parser.add_argument('--path', type=str, default="Checkpoint", + help='Local path to save the model checkpoints') + args = parser.parse_args() main(args) diff --git a/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb b/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb index ceb11b08a..8995655d7 100644 --- a/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb +++ b/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "16b4b118", "metadata": { @@ -27,6 +28,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6142a7a6", "metadata": {}, @@ -112,6 +114,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a2f992a4", "metadata": {}, @@ -132,6 +135,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "e0c2028f", "metadata": {}, @@ -220,6 +224,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0e0f538c", "metadata": {}, @@ -270,6 +275,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "b905086a", "metadata": {}, @@ -337,6 +343,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "13812ca1", "metadata": {}, @@ -365,6 +372,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "37855c54", "metadata": {}, @@ -433,6 +441,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6f226374", "metadata": {}, @@ -465,6 +474,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "20feadf7", "metadata": {}, @@ -500,6 +510,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6c56f98c", "metadata": {}, @@ -557,6 +568,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "56236ffe", "metadata": {}, @@ -614,6 +626,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ea760848", "metadata": {}, @@ -625,7 +638,18 @@ "* **filelist\\_benchmark**\n", "* **paths\\_benchmark**\n", "* **dataset\\_benchmark**\n", - "* **tensors\\_benchmark**" + "* **tensors\\_benchmark**\n", + "\n", + "In addition, generic generators exists for *classification* and *detection* problems. Those generators accept the same parameters as the ones listed above, but they return benchmarks objects with additional useful problem-specific fields. Those generators are:\n", + "\n", + "* **filelist\\_classification\\_benchmark**\n", + "* **paths\\_classification\\_benchmark**\n", + "* **dataset\\_classification\\_benchmark**\n", + "* **tensors\\_classification\\_benchmark**\n", + "\n", + "(replace *classification* with *detection* for detection/segmentation datasets).\n", + "\n", + " In the following examples, we will use their plain version. Let's import them:" ] }, { @@ -640,6 +664,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "139a567d", "metadata": {}, @@ -674,6 +699,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f13e0cf0", "metadata": {}, @@ -721,7 +747,7 @@ " \"{} {}\\n\".format(os.path.join(rel_dir, name), t_label)\n", " )\n", "\n", - "# Here we create a GenericCLScenario ready to be iterated\n", + "# Here we create a DatasetScenario ready to be iterated\n", "generic_scenario = filelist_benchmark(\n", " dirpath, \n", " [\"train_filelist_00.txt\", \"train_filelist_01.txt\"],\n", @@ -734,6 +760,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3ea44517", "metadata": {}, @@ -763,7 +790,7 @@ " experience_paths.append(instance_tuple)\n", " train_experiences.append(experience_paths)\n", "\n", - "# Here we create a GenericCLScenario ready to be iterated\n", + "# Here we create a DatasetScenario ready to be iterated\n", "generic_scenario = paths_benchmark(\n", " train_experiences,\n", " [train_experiences[0]], # Single test set\n", @@ -775,6 +802,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "639676ce", "metadata": {}, @@ -812,6 +840,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "4522b04d", "metadata": {}, @@ -858,6 +887,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "342718b3", "metadata": {}, @@ -898,6 +928,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "42a39ec6", "metadata": {}, diff --git a/tests/benchmarks/scenarios/test_classification_scenario.py b/tests/benchmarks/scenarios/test_classification_scenario.py index bf5afc9ed..688623989 100644 --- a/tests/benchmarks/scenarios/test_classification_scenario.py +++ b/tests/benchmarks/scenarios/test_classification_scenario.py @@ -5,10 +5,11 @@ import torch from avalanche.benchmarks import ( - dataset_benchmark, ClassificationExperience, ClassificationScenario, ) +from avalanche.benchmarks.generators.benchmark_generators import \ + dataset_classification_benchmark from avalanche.benchmarks.utils import make_tensor_classification_dataset @@ -54,7 +55,7 @@ def test_classes_in_exp(self): ) ) - benchmark_instance = dataset_benchmark( + benchmark_instance = dataset_classification_benchmark( train_datasets=train_exps, test_datasets=test_exps, other_streams_datasets={"other": other_stream_exps}, @@ -137,7 +138,7 @@ def test_classes_in_this_experience(self): ) ) - benchmark_instance = dataset_benchmark( + benchmark_instance = dataset_classification_benchmark( train_datasets=train_exps, test_datasets=test_exps, other_streams_datasets={"other": other_stream_exps}, diff --git a/tests/benchmarks/test_avalanche_dataset.py b/tests/benchmarks/test_avalanche_dataset.py index 3de055fe3..55def8cc5 100644 --- a/tests/benchmarks/test_avalanche_dataset.py +++ b/tests/benchmarks/test_avalanche_dataset.py @@ -31,7 +31,6 @@ ) from avalanche.benchmarks.utils.classification_dataset import ( ClassificationDataset, - SupervisedClassificationDataset, ) from tests.unit_tests_utils import ( load_image_benchmark, @@ -400,13 +399,13 @@ def test_avalanche_dataset_mixed_task_labels(self): def test_avalanche_dataset_update_data_attribute(self): dataset_orig = load_image_benchmark() - dataset: SupervisedClassificationDataset = make_classification_dataset( + dataset: ClassificationDataset = make_classification_dataset( dataset_orig, transform=ToTensor(), task_labels=0 ) - self.assertIsInstance(dataset, SupervisedClassificationDataset) + self.assertIsInstance(dataset, ClassificationDataset) dataset_element = dataset[101] self.assertEqual(3, len(dataset_element)) # x, y, t diff --git a/tests/benchmarks/test_flat_data.py b/tests/benchmarks/test_flat_data.py index 0907064b8..9df3057fa 100644 --- a/tests/benchmarks/test_flat_data.py +++ b/tests/benchmarks/test_flat_data.py @@ -145,8 +145,8 @@ def test_concat_flattens_same_dataset_corner_case(self): C = B.concat(A) self.assertListEqual([2, 3, 1, 2, 3], list(C)) - def test_concat_flattens_same_classification_dataset(self): - D = ClassificationDataset([[1, 2, 3]]) + def test_concat_flattens_same_avalanche_dataset(self): + D = AvalancheDataset([[1, 2, 3]]) B = concat_datasets([]) B = B.concat(D) B = D.concat(B) diff --git a/tests/distributed/test_distributed_helper.py b/tests/distributed/test_distributed_helper.py index 123c281b5..ec0f959f6 100644 --- a/tests/distributed/test_distributed_helper.py +++ b/tests/distributed/test_distributed_helper.py @@ -10,8 +10,9 @@ import torch.distributed as dst from torch.nn import Module from torch.nn.parallel import DistributedDataParallel -from avalanche.benchmarks.generators.benchmark_generators import \ - dataset_benchmark +from avalanche.benchmarks.generators.benchmark_generators import ( + dataset_classification_benchmark, +) from avalanche.benchmarks.utils.classification_dataset import \ make_tensor_classification_dataset @@ -80,7 +81,7 @@ def test_wrap_model(self): model.eval() model_wrapped.eval() - benchmark = dataset_benchmark( + benchmark = dataset_classification_benchmark( [make_tensor_classification_dataset( mb_x, mb_y, mb_t, task_labels=mb_t.tolist() )], diff --git a/tests/test_core50.py b/tests/test_core50.py index fe4e4c736..b9f06dcb0 100644 --- a/tests/test_core50.py +++ b/tests/test_core50.py @@ -38,6 +38,17 @@ def test_core50_nc_benchmark(self): classes_in_test = benchmark_instance.classes_in_experience["test"][0] self.assertSetEqual(set(range(50)), set(classes_in_test)) + # Regression tests for issue #774 + self.assertSequenceEqual( + [10] + ([5] * 8), + benchmark_instance.n_classes_per_exp) + self.assertSetEqual( + set(range(50)), + set(benchmark_instance.classes_order)) + self.assertEqual( + 50, + len(benchmark_instance.classes_order)) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_custom_streams.py b/tests/test_custom_streams.py index 3746af861..216ce306e 100644 --- a/tests/test_custom_streams.py +++ b/tests/test_custom_streams.py @@ -2,9 +2,11 @@ import torch from torch.utils.data import TensorDataset +from avalanche.benchmarks.scenarios.classification_scenario import ( + ClassificationScenario, +) from avalanche.benchmarks.utils import make_tensor_classification_dataset -from avalanche.benchmarks import GenericCLScenario from avalanche.benchmarks.utils import make_classification_dataset @@ -59,7 +61,7 @@ def test_custom_streams_name_and_length(self): valid_t_labels = [{9}, {4, 5}, {7, 8}, {0}, {3}] with self.assertRaises(Exception): - benchmark_instance = GenericCLScenario( + benchmark_instance = ClassificationScenario( stream_definitions={ "train": (train_exps,), "test": (test_exps,), @@ -69,7 +71,7 @@ def test_custom_streams_name_and_length(self): valid_t_labels = valid_t_labels[:-1] - benchmark_instance = GenericCLScenario( + benchmark_instance = ClassificationScenario( stream_definitions={ "train": (train_exps,), "test": (test_exps,), @@ -148,7 +150,7 @@ def test_complete_test_set_only(self): ) with self.assertRaises(Exception): - benchmark_instance = GenericCLScenario( + benchmark_instance = ClassificationScenario( stream_definitions={ "train": (train_exps,), "test": (test_exps,), @@ -156,7 +158,7 @@ def test_complete_test_set_only(self): complete_test_set_only=True, ) - benchmark_instance = GenericCLScenario( + benchmark_instance = ClassificationScenario( stream_definitions={ "train": (train_exps,), "test": (test_exps[0],), diff --git a/tests/test_high_level_generators.py b/tests/test_high_level_generators.py index c01474907..52e22aaf7 100644 --- a/tests/test_high_level_generators.py +++ b/tests/test_high_level_generators.py @@ -1,3 +1,4 @@ +from functools import partial import os import tempfile import unittest @@ -8,7 +9,15 @@ from torchvision.datasets import MNIST from torchvision.datasets.utils import download_url, extract_archive from torchvision.transforms import ToTensor -from tests.unit_tests_utils import DummyImageDataset +from avalanche.benchmarks.scenarios.classification_scenario import ( + ClassificationScenario, +) +from avalanche.benchmarks.scenarios.dataset_scenario import DatasetScenario +from avalanche.benchmarks.scenarios.detection_scenario import DetectionScenario +from tests.unit_tests_utils import ( + DummyImageDataset, + get_fast_detection_datasets, +) from avalanche.benchmarks import ( @@ -22,6 +31,11 @@ from avalanche.benchmarks.datasets import default_dataset_location from avalanche.benchmarks.generators.benchmark_generators import ( class_balanced_split_strategy, + dataset_classification_benchmark, + dataset_detection_benchmark, + filelist_classification_benchmark, + paths_classification_benchmark, + tensors_classification_benchmark, ) from avalanche.benchmarks.scenarios.generic_benchmark_creation import ( create_lazy_generic_benchmark, @@ -50,9 +64,36 @@ def test_dataset_benchmark(self): train_cifar10 = DummyImageDataset(n_classes=10) test_cifar10 = DummyImageDataset(n_classes=10) - generic_benchmark = dataset_benchmark( + with self.assertWarns(DeprecationWarning): + # Assert it warns when creating classification + # scenarios implicitly + generic_benchmark = dataset_benchmark( + [train_MNIST, train_cifar10], [test_MNIST, test_cifar10] + ) + self.assertIsInstance(generic_benchmark, DatasetScenario) + + classification_benchmark = dataset_classification_benchmark( [train_MNIST, train_cifar10], [test_MNIST, test_cifar10] ) + self.assertIsInstance(classification_benchmark, ClassificationScenario) + + # Check dataset_benchmark classification retrocompatibility + # This check should be removed once we decide to transition to + # dataset_classification/detection/..._benchmark + self.assertIsInstance(generic_benchmark, ClassificationScenario) + + def test_dataset_detection_benchmark(self): + train_det, test_det = get_fast_detection_datasets() + + generic_benchmark = dataset_benchmark( + [train_det], [test_det] + ) + self.assertIsInstance(generic_benchmark, DatasetScenario) + + classification_benchmark = dataset_detection_benchmark( + [train_det], [test_det] + ) + self.assertIsInstance(classification_benchmark, DetectionScenario) def test_dataset_benchmark_avalanche_dataset(self): train_MNIST = make_classification_dataset( @@ -83,7 +124,14 @@ def test_dataset_benchmark_avalanche_dataset(self): task_labels=1, ) - generic_benchmark = dataset_benchmark( + with self.assertWarns(DeprecationWarning): + # Assert it warns when creating classification + # scenarios implicitly + generic_benchmark = dataset_benchmark( + [train_MNIST, train_cifar10], [test_MNIST, test_cifar10] + ) + + classification_benchmark = dataset_classification_benchmark( [train_MNIST, train_cifar10], [test_MNIST, test_cifar10] ) @@ -91,6 +139,13 @@ def test_dataset_benchmark_avalanche_dataset(self): self.assertEqual(1, generic_benchmark.train_stream[1].task_label) self.assertEqual(0, generic_benchmark.test_stream[0].task_label) self.assertEqual(1, generic_benchmark.test_stream[1].task_label) + self.assertIsInstance(generic_benchmark, DatasetScenario) + self.assertIsInstance(classification_benchmark, ClassificationScenario) + + # Check dataset_benchmark classification retrocompatibility + # This check should be removed once we decide to transition to + # dataset_classification/detection/..._benchmark + self.assertIsInstance(generic_benchmark, ClassificationScenario) def test_filelist_benchmark(self): download_url( @@ -126,7 +181,6 @@ def test_filelist_benchmark(self): wf.write( "{} {}\n".format(os.path.join(rel_dir, name), label) ) - generic_benchmark = filelist_benchmark( dirpath, list_paths, @@ -137,8 +191,20 @@ def test_filelist_benchmark(self): eval_transform=ToTensor(), ) + classification_benchmark = filelist_classification_benchmark( + dirpath, + list_paths, + [list_paths[0]], + task_labels=[0, 0], + complete_test_set_only=True, + train_transform=ToTensor(), + eval_transform=ToTensor(), + ) + self.assertEqual(2, len(generic_benchmark.train_stream)) self.assertEqual(1, len(generic_benchmark.test_stream)) + self.assertIsInstance(generic_benchmark, ClassificationScenario) + self.assertIsInstance(classification_benchmark, ClassificationScenario) def test_paths_benchmark(self): download_url( @@ -168,7 +234,19 @@ def test_paths_benchmark(self): experience_paths.append(instance_tuple) train_experiences.append(experience_paths) - generic_benchmark = paths_benchmark( + with self.assertWarns(DeprecationWarning): + # Assert it warns when creating classification + # scenarios implicitly + generic_benchmark = paths_benchmark( + train_experiences, + [train_experiences[0]], # Single test set + task_labels=[0, 0], + complete_test_set_only=True, + train_transform=ToTensor(), + eval_transform=ToTensor(), + ) + + classification_benchmark = paths_classification_benchmark( train_experiences, [train_experiences[0]], # Single test set task_labels=[0, 0], @@ -179,6 +257,13 @@ def test_paths_benchmark(self): self.assertEqual(2, len(generic_benchmark.train_stream)) self.assertEqual(1, len(generic_benchmark.test_stream)) + self.assertIsInstance(generic_benchmark, DatasetScenario) + self.assertIsInstance(classification_benchmark, ClassificationScenario) + + # Check dataset_benchmark classification retrocompatibility + # This check should be removed once we decide to transition to + # dataset_classification/detection/..._benchmark + self.assertIsInstance(generic_benchmark, ClassificationScenario) def test_tensors_benchmark(self): pattern_shape = (3, 32, 32) @@ -196,7 +281,20 @@ def test_tensors_benchmark(self): test_x = torch.zeros(50, *pattern_shape) test_y = torch.zeros(50, dtype=torch.long) - generic_benchmark = tensors_benchmark( + with self.assertWarns(DeprecationWarning): + # Assert it warns when creating classification + # scenarios implicitly + generic_benchmark = tensors_benchmark( + train_tensors=[ + (experience_1_x, experience_1_y), + (experience_2_x, experience_2_y), + ], + test_tensors=[(test_x, test_y)], + task_labels=[0, 0], # Task label of each train exp + complete_test_set_only=True, + ) + + classification_benchmark = tensors_classification_benchmark( train_tensors=[ (experience_1_x, experience_1_y), (experience_2_x, experience_2_y), @@ -208,6 +306,13 @@ def test_tensors_benchmark(self): self.assertEqual(2, len(generic_benchmark.train_stream)) self.assertEqual(1, len(generic_benchmark.test_stream)) + self.assertIsInstance(generic_benchmark, DatasetScenario) + self.assertIsInstance(classification_benchmark, ClassificationScenario) + + # Check dataset_benchmark classification retrocompatibility + # This check should be removed once we decide to transition to + # dataset_classification/detection/..._benchmark + self.assertIsInstance(generic_benchmark, ClassificationScenario) def test_data_incremental_benchmark(self): pattern_shape = (3, 32, 32) @@ -265,7 +370,7 @@ def test_data_incremental_benchmark(self): for x, y, *_ in exp.dataset: self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x)) self.assertTrue( - torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y)) + torch.equal(ref_tensor_y[tensor_idx], torch.as_tensor(y)) ) tensor_idx += 1 @@ -275,7 +380,7 @@ def test_data_incremental_benchmark(self): tensor_idx = 0 for x, y, *_ in exp.dataset: self.assertTrue(torch.equal(test_x[tensor_idx], x)) - self.assertTrue(torch.equal(test_y[tensor_idx], torch.tensor(y))) + self.assertTrue(torch.equal(test_y[tensor_idx], torch.as_tensor(y))) tensor_idx += 1 def test_data_incremental_benchmark_from_lazy_benchmark(self): @@ -347,7 +452,7 @@ def test_gen(): for x, y, *_ in exp.dataset: self.assertTrue(torch.equal(ref_tensor_x[tensor_idx], x)) self.assertTrue( - torch.equal(ref_tensor_y[tensor_idx], torch.tensor(y)) + torch.equal(ref_tensor_y[tensor_idx], torch.as_tensor(y)) ) tensor_idx += 1 @@ -357,7 +462,7 @@ def test_gen(): tensor_idx = 0 for x, y, *_ in exp.dataset: self.assertTrue(torch.equal(test_x[tensor_idx], x)) - self.assertTrue(torch.equal(test_y[tensor_idx], torch.tensor(y))) + self.assertTrue(torch.equal(test_y[tensor_idx], torch.as_tensor(y))) tensor_idx += 1 def test_benchmark_with_validation_stream_fixed_size(self): @@ -441,7 +546,7 @@ def test_benchmark_with_validation_stream_rel_size(self): test_x = torch.zeros(50, *pattern_shape) test_y = torch.zeros(50, dtype=torch.long) - initial_benchmark_instance = tensors_benchmark( + initial_benchmark_instance = tensors_classification_benchmark( train_tensors=[ (experience_1_x, experience_1_y), (experience_2_x, experience_2_y), @@ -501,11 +606,78 @@ def test_benchmark_with_validation_stream_rel_size(self): self.assertTrue(torch.equal(test_y, mb[1])) # Regression test for #1371 - self.assertEquals( + self.assertEqual( [0], valid_benchmark.train_stream[0].classes_in_this_experience ) + def test_benchmark_with_validation_stream_class_balanced(self): + pattern_shape = (3, 32, 32) + + # Definition of training experiences + # Experience 1 + experience_1_x = torch.zeros(100, *pattern_shape) + experience_1_y = torch.randint(0, 10, (100,), dtype=torch.long) + + # Experience 2 + experience_2_x = torch.zeros(80, *pattern_shape) + experience_2_y = torch.randint(0, 10, (80,), dtype=torch.long) + + # Test experience + test_x = torch.zeros(50, *pattern_shape) + test_y = torch.zeros(50, dtype=torch.long) + + initial_benchmark_instance = tensors_classification_benchmark( + train_tensors=[ + (experience_1_x, experience_1_y), + (experience_2_x, experience_2_y), + ], + test_tensors=[(test_x, test_y)], + task_labels=[0, 0], # Task label of each train exp + complete_test_set_only=True, + ) + + validation_size = 0.2 + class_balanced_strat = partial( + class_balanced_split_strategy, + validation_size + ) + valid_benchmark = benchmark_with_validation_stream( + initial_benchmark_instance, + custom_split_strategy=class_balanced_strat + ) + + _, count_1 = torch.unique(experience_1_y, return_counts=True) + expected_class_sizes_1 = [int(validation_size * x) for x in count_1] + expected_size_1 = sum(expected_class_sizes_1) + + _, count_2 = torch.unique(experience_2_y, return_counts=True) + expected_class_sizes_2 = [int(validation_size * x) for x in count_2] + expected_size_2 = sum(expected_class_sizes_2) + + self.assertEqual(2, len(valid_benchmark.train_stream)) + self.assertEqual(2, len(valid_benchmark.valid_stream)) + self.assertEqual(1, len(valid_benchmark.test_stream)) + self.assertTrue(valid_benchmark.complete_test_set_only) + + self.assertEqual( + 100 - expected_size_1, + len(valid_benchmark.train_stream[0].dataset)) + self.assertEqual( + 80 - expected_size_2, + len(valid_benchmark.train_stream[1].dataset)) + self.assertEqual( + expected_size_1, + len(valid_benchmark.valid_stream[0].dataset)) + self.assertEqual( + expected_size_2, + len(valid_benchmark.valid_stream[1].dataset)) + + vd = valid_benchmark.test_stream[0].dataset + mb = get_mbatch(vd, len(vd)) + self.assertTrue(torch.equal(test_x, mb[0])) + self.assertTrue(torch.equal(test_y, mb[1])) + def test_lazy_benchmark_with_validation_stream_fixed_size(self): lazy_options = [None, True, False] for lazy_option in lazy_options: diff --git a/tests/test_models.py b/tests/test_models.py index 498f86923..812eeb472 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,6 +1,7 @@ import sys import os import copy +import tempfile import unittest @@ -646,10 +647,12 @@ def test_ncm_save_load(self): classifier = NCMClassifier() classifier.update_class_means_dict({1: torch.randn(5,), 2: torch.randn(5,)}) - torch.save(classifier.state_dict(), 'ncm.pt') - del classifier - classifier = NCMClassifier() - check = torch.load('ncm.pt') + with tempfile.TemporaryFile() as tmpfile: + torch.save(classifier.state_dict(), tmpfile) + del classifier + classifier = NCMClassifier() + tmpfile.seek(0) + check = torch.load(tmpfile) classifier.load_state_dict(check) assert classifier.class_means.shape == (3, 5) assert (classifier.class_means[0] == 0).all() diff --git a/tests/training/test_plugins.py b/tests/training/test_plugins.py index c98a40ee0..57708e91a 100644 --- a/tests/training/test_plugins.py +++ b/tests/training/test_plugins.py @@ -15,9 +15,11 @@ from avalanche.benchmarks import ( nc_benchmark, - GenericCLScenario, benchmark_with_validation_stream, ) +from avalanche.benchmarks.scenarios.classification_scenario import ( + ClassificationScenario, +) from avalanche.benchmarks.utils.data_loader import TaskBalancedDataLoader from avalanche.evaluation.metric_results import MetricValue from avalanche.evaluation.metrics import Mean @@ -345,7 +347,9 @@ def assert_model_equals(self, model1, model2): self.assertTrue(torch.equal(v, dict2[k])) def assert_benchmark_equals( - self, bench1: GenericCLScenario, bench2: GenericCLScenario + self, + bench1: ClassificationScenario, + bench2: ClassificationScenario ): self.assertSetEqual( set(bench1.streams.keys()), set(bench2.streams.keys()) diff --git a/tests/unit_tests_utils.py b/tests/unit_tests_utils.py index 7e6d232aa..d5b169fe6 100644 --- a/tests/unit_tests_utils.py +++ b/tests/unit_tests_utils.py @@ -1,10 +1,11 @@ +import itertools from os.path import expanduser import os import random import torch from PIL.Image import Image -from sklearn.datasets import make_classification +from sklearn.datasets import make_blobs, make_classification from sklearn.model_selection import train_test_split import numpy as np from torch.utils.data import TensorDataset, Dataset @@ -14,6 +15,9 @@ from torchvision.transforms import Compose, ToTensor from avalanche.benchmarks import nc_benchmark +from avalanche.benchmarks.utils.detection_dataset import ( + make_detection_dataset, +) # Environment variable used to skip some expensive tests that are very unlikely @@ -149,6 +153,129 @@ def get_fast_benchmark( return my_nc_benchmark +def get_fast_detection_datasets( + n_images=30, + max_elements_per_image=10, + n_samples_per_class=20, + n_classes=10, + seed=None, + image_size=64, + n_test_images=5 +): + if seed is not None: + np.random.seed(seed) + random.seed(seed) + + assert n_images * max_elements_per_image >= \ + n_samples_per_class * n_classes + assert n_test_images < n_images + assert n_test_images > 0 + + base_n_per_images = (n_samples_per_class * n_classes) // n_images + additional_elements = (n_samples_per_class * n_classes) % n_images + to_allocate = np.full(n_images, base_n_per_images) + to_allocate[:additional_elements] += 1 + np.random.shuffle(to_allocate) + classes_elements = np.repeat(np.arange(n_classes), n_samples_per_class) + np.random.shuffle(classes_elements) + + import matplotlib.colors as mcolors + forms = ['ellipse', 'rectangle', 'line', 'arc'] + colors = list(mcolors.TABLEAU_COLORS.values()) + combs = list(itertools.product(forms, colors)) + random.shuffle(combs) + + generated_images = [] + generated_targets = [] + for img_idx in range(n_images): + n_to_allocate = to_allocate[img_idx] + base_alloc_idx = to_allocate[:img_idx].sum() + classes_to_instantiate = \ + classes_elements[base_alloc_idx:base_alloc_idx+n_to_allocate] + + _, _, clusters = make_blobs( + n_to_allocate, + n_features=2, + centers=n_to_allocate, + center_box=(0, image_size-1), + random_state=seed, + return_centers=True) + + from PIL import Image as ImageApi + from PIL import ImageDraw + im = ImageApi.new('RGB', (image_size, image_size)) + draw = ImageDraw.Draw(im) + + target = { + 'boxes': torch.zeros((n_to_allocate, 4), dtype=torch.float32), + 'labels': torch.zeros((n_to_allocate,), dtype=torch.long), + 'image_id': torch.full((1,), img_idx, dtype=torch.long), + 'area': torch.zeros((n_to_allocate,), dtype=torch.float32), + 'iscrowd': torch.zeros((n_to_allocate,), dtype=torch.long) + } + + obj_sizes = np.random.uniform( + low=image_size * 0.1 * 0.95, + high=image_size * 0.1 * 1.05, + size=(n_to_allocate,)) + for center_idx, center in enumerate(clusters): + obj_size = float(obj_sizes[center_idx]) + class_to_gen = classes_to_instantiate[center_idx] + + class_form, class_color = combs[class_to_gen] + + left = center[0] - obj_size + top = center[1] - obj_size + right = center[0] + obj_size + bottom = center[1] + obj_size + ltrb = (left, top, right, bottom) + if class_form == 'ellipse': + draw.ellipse(ltrb, fill=class_color) + elif class_form == 'rectangle': + draw.rectangle(ltrb, fill=class_color) + elif class_form == 'line': + draw.line(ltrb, + fill=class_color, + width=max(1, int(obj_size*0.25))) + elif class_form == 'arc': + draw.arc(ltrb, fill=class_color, start=45, end=200) + else: + raise RuntimeError('Unsupported form') + + target["boxes"][center_idx] = torch.as_tensor(ltrb) + target["labels"][center_idx] = class_to_gen + target["area"][center_idx] = obj_size ** 2 + + generated_images.append(np.array(im)) + generated_targets.append(target) + im.close() + + test_indices = set( + np.random.choice( + n_images, + n_test_images, + replace=False).tolist()) + train_images = [x for i, x in enumerate(generated_images) + if i not in test_indices] + test_images = [x for i, x in enumerate(generated_images) + if i in test_indices] + + train_targets = [x for i, x in enumerate(generated_targets) + if i not in test_indices] + test_targets = [x for i, x in enumerate(generated_targets) + if i in test_indices] + + return make_detection_dataset( + list(zip(train_images, train_targets)), + targets=train_targets, + task_labels=0 + ), make_detection_dataset( + list(zip(test_images, test_targets)), + targets=test_targets, + task_labels=0 + ) + + class DummyImageDataset(Dataset): def __init__(self, n_elements=10000, n_classes=100): assert n_elements >= n_classes @@ -214,7 +341,21 @@ def set_deterministic_run(seed=0): "common_setups", "load_benchmark", "get_fast_benchmark", + "get_fast_detection_datasets", "load_experience_train_eval", "get_device", "set_deterministic_run", ] + + +# if __name__ == '__main__': +# from matplotlib import pyplot as plt + +# train_dset, test_dset = get_fast_detection_datasets() +# for i in range(3): +# plt.imshow(train_dset[i][0], interpolation='nearest') +# plt.show() + +# for i in range(3): +# plt.imshow(test_dset[i][0], interpolation='nearest') +# plt.show()