diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5ab3ff4..75393cb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,41 +2,74 @@ image: python:3.7 before_script: - python --version - - pip install -e .[dev] --extra-index-url https://download.pytorch.org/whl/cu113 - - pwd - - ls -l - - python -c "import sys;print(sys.path)" + - pip install -e ".[dev]" + +# jobs extending .scheduled_only only run in scheduled pipelines not on every commit +.scheduled_only: + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" + +# Checking codestyle .codestyle: stage: test script: - flake8 --version - mypy --version + - black --version - flake8 - mypy --config-file mypy.ini + - black . --check --verbose --diff --color codestyle: - extends: .codestyle + extends: + - .codestyle -codestyle:3.8: - extends: .codestyle - image: python:3.8 +codestyle:3.10: + extends: + - .codestyle + - .scheduled_only + image: python:3.10 -.test: - stage: test +# Running tests + +test_and_coverage: + script: + - coverage run -m pytest + - coverage report + - coverage xml + coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/' + artifacts: + reports: + coverage_report: + coverage_format: cobertura + path: coverage.xml + +test:3.10: + extends: + - .scheduled_only + image: python:3.10 script: - - pytest --version - python -m pytest . -test: - extends: .test +test:torch_backwards_compatibility: + script: + - pip install torch==1.9.0 torchvision==0.10.0 + - python -m pytest . -test:3.8: - extends: .test - image: python:3.8 +# Documentation -test-build-doc: +test_build_doc: stage: test script: - apt-get update && apt-get install -y pandoc - sphinx-build -b html docs/source/ docs/build/ -a + +test_doc_completeness: + extends: .scheduled_only + stage: test + allow_failure: true + script: + - flake8 --version + # explicitly select Docstring errors and ignore to overwrite config in setup.cfg + - flake8 --select=D1 --ignore=E501 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a9abd52 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + language_version: python3.8 \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..f441005 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,30 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-20.04 + tools: + python: "3.9" + # You can also specify other tool versions: + # nodejs: "16" + # rust: "1.55" + # golang: "1.17" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# If using Sphinx, optionally build your docs in additional formats such as PDF +# formats: +# - pdf + +# Optionally declare the Python requirements required to build your docs +python: + install: + - requirements: requirements.txt + - requirements: requirements-dev.txt diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..53f2c1d --- /dev/null +++ b/AUTHORS @@ -0,0 +1,10 @@ +The development of BITorch was started by Joseph Bethge and PD Dr. Haojin Yang. + +The current maintainers can be contacted at: fb10-xnor@hpi.de + +The following people have contributed code to BITorch (in alphabetical order): + + Christopher Aust + Joseph Bethge + Paul Mattes + Haojing Yang diff --git a/CHANGELOG.md b/CHANGELOG.md index 044c107..7dc776e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,44 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.3.0] - 2023/01/13 + +### Added + +- new models: + - [MeliusNet](bitorch/models/meliusnet.py) + - [BinaryDenseNet](bitorch/models/densenet.py) + - [QuickNet](bitorch/models/quicknet.py) +- simple example script for MNIST +- support for integration of bitorch's inference engine for the following layers + - QLinear + - QConv +- a quantized DLRM version, derived from [this](https://github.com/facebookresearch/dlrm) implementation +- example code for training the quantized DLRM model +- new quantization function: [Progressive Sign](bitorch/quantizations/progressive_sign.py) +- new features in PyTorch Lightning example: + - training with Knowledge Distillation + - improved logging + - callback to update Progressive Sign module +- option to integrate custom models, datasets, quantization functions +- a quantization scheduler which lets you change quantization methods during training +- a padding layer + +### Changed + +- requirements changed: + - code now depends on torch 1.12.x and torchvision 0.13.x + - requirements for examples are now stored at their respective folders + - optional requirements now install everything needed to run all examples +- code is now formatted with the black code formatter +- using PyTorch's implementation of RAdam +- renamed the `bitwidth` attribute of quantization functions to `bit_width` +- moved the image datasets out of the bitorch core package into the image classification example + +### Fixed + +- fix error from updated protobuf package + ## [0.2.0] - 2022/05/19 ### Added diff --git a/README.md b/README.md index 972f49d..e7ca89b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,10 @@ Our current roadmap contains: - Extending the model zoo with pre-trained models of state-of-the-art approaches - Adding examples for advanced training methods with multiple stages, knowledge distillation, etc. -All changes are tracked in the [changelog](CHANGELOG.md). +All changes are tracked in the [changelog](https://github.com/hpi-xnor/bitorch/blob/main/CHANGELOG.md). + +Please refer to [our wiki](https://bitorch.readthedocs.io/en/latest/) for a comprehensive introduction into +the library or use the introduction notebook in `examples/notebooks`. ## Installation @@ -18,11 +21,11 @@ Currently, the only supported installation is pip (a conda package is planned in ### Pip -If you wish to use a *specific version* of PyTorch for compatibility with certain devices or CUDA versions, +If you wish to use a _specific version_ of PyTorch for compatibility with certain devices or CUDA versions, we advise on installing the corresponding versions of `pytorch` and `torchvision` first (or afterwards), please consult [pytorch's getting started guide](https://pytorch.org/get-started/locally/). -Afterwards simply run: +Otherwise, simply run: ```bash pip install bitorch ``` @@ -32,9 +35,7 @@ Note, that you can also request a specific PyTorch version directly, e.g. for CU pip install bitorch --extra-index-url https://download.pytorch.org/whl/cu113 ``` -To use advanced logging capabilities with [tensorboardX](https://github.com/lanpa/tensorboardX), -install the optional dependencies as well: - +If you want to run the examples install the optional dependencies as well: ```bash pip install "bitorch[opt]" ``` @@ -45,13 +46,8 @@ The package can also be installed locally for editing and development. First, clone the [repository](https://github.com/hpi-xnor/bitorch), then run: ```bash -pip install -e . -``` - -To activate advanced logging with Tensorboard and model summary, install the optional dependencies as well: - -```bash -pip install -e ".[opt]" +pip install -e . # without optional dependencies +pip install -e ".[opt]" # with optional dependencies ``` ### Dali Preprocessing @@ -61,31 +57,60 @@ e.g. with CUDA 11.x, (currently only supported for imagenet) you need to install the `nvidia-dali-cuda110` package by running the following command: ``` - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda110 +pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda110 ``` -### Code formatting and typing +## Development -Install the _dev_ requirements for (local) development: +Install the package and _dev_ requirements locally for development: ```bash pip install -e ".[dev]" ``` -New code should be compatible with Python 3.X versions and be compliant with PEP8. To check the codebase, please run +### Tests + +The tests can be run with [pytest](https://docs.pytest.org/): + +```bash +pytest +``` + +### Code formatting and typing +For conveniently checking whether your code suites the required style (more details below), run +```bash +./check-codestyle.sh +``` + +New code should be compatible with Python 3.X versions and be compliant with PEP8. To check the codebase, please run ```bash flake8 ``` The codebase has type annotations, please make sure to add type hints if required. We use `mypy` for type checking: - ```bash mypy --config-file mypy.ini ``` -Finally, the tests can be run with: +For code formatting we use `black`: +```bash +black . --check --verbose --diff --color # check what changes the formatter would do +black . # apply the formatter +``` + +In order to automatically apply the code formatting with every commit, you can also install pre-commit +and use the pre-commit hook: +```bash +pre-commit install +``` +### Documentation + +We use [Google's Python Docstring Format](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) +to document our code. + +Documentation can be generated with ```bash -pytest +sphinx-build -b html docs/source/ docs/build/ -a ``` diff --git a/bitorch/__init__.py b/bitorch/__init__.py index 087564f..6d63335 100644 --- a/bitorch/__init__.py +++ b/bitorch/__init__.py @@ -1,3 +1,7 @@ +""" +BITorch is a library currently under development to simplify building quantized and binary neural networks with PyTorch. +It contains implementation of the required layers, different quantization functions and examples. +""" import os from argparse import ArgumentParser, Namespace from importlib import import_module @@ -5,7 +9,10 @@ from typing import List from .config import Config +from .runtime_mode import RuntimeMode, runtime_mode_type, change_mode, pause_wrapping # noqa: F401 +from .layers import convert # noqa: F401 +mode: RuntimeMode = RuntimeMode.DEFAULT configs_by_name = {} @@ -50,7 +57,7 @@ def config_from_name(name: str) -> Config: def config_names() -> List: - """getter for list of config names for argparse + """Get the list of config names for argparse. Returns: List: the config names @@ -59,20 +66,29 @@ def config_names() -> List: def add_config_args(parser: ArgumentParser) -> None: - """adds all config arguments + """Adds all arguments from all registered configs. Args: parser (ArgumentParser): parser to add the arguments to """ - for config in configs_by_name.values(): - config.add_config_arguments(parser) + for config_ in configs_by_name.values(): + config_.add_config_arguments(parser) def apply_args_to_configuration(args: Namespace) -> None: - """applys the cli configurations to the config objects. + """Applies the cli configurations to the config objects. Args: args (Namespace): the cli configurations """ - for config in configs_by_name.values(): - config.apply_args_to_configuration(args) + for config_ in configs_by_name.values(): + config_.apply_args_to_configuration(args) + + +def register_custom_config(custom_config: Config) -> None: + """Register a custom (external) config in bitorch. + + Args: + custom_config: the custom config which should be added to bitorch + """ + configs_by_name[custom_config.name] = custom_config diff --git a/bitorch/config.py b/bitorch/config.py index f55c857..d57e785 100644 --- a/bitorch/config.py +++ b/bitorch/config.py @@ -1,29 +1,37 @@ -"""Config class for bitorch configurations. These configs can be used to specify key default values which benefit -from beeing changed easily via argparse e.g. for training scripts.""" +""" +Config class for bitorch configurations. These configs can be used to specify key default values which benefit +from beeing changed easily via argparse e.g. for training scripts. +""" from argparse import ArgumentParser, Namespace class Config: - """Config superclass that implements functionality to create argparse arguments for class attributes of - subclasses.""" + """ + Config superclass that implements functionality to create argparse arguments for class attributes of + subclasses. + """ + + name: str def __init__(self) -> None: """collects all attributes of class that are not the name as configurable attributes.""" configurable_attributes = [ - attribute for attribute in dir(self) - if not attribute.startswith('__') and not callable(getattr(self, attribute)) and not attribute == "name"] + attribute + for attribute in dir(self) + if not attribute.startswith("__") and not callable(getattr(self, attribute)) and not attribute == "name" + ] self._configurable_attributes = configurable_attributes for attribute in self._configurable_attributes: self._add_getter_setter_methods(attribute) def _add_getter_setter_methods(self, attribute: str) -> None: - def getter(self): # type: ignore - return getattr(self, attribute) + def getter(self_): # type: ignore + return getattr(self_, attribute) - def setter(self, value): # type: ignore - setattr(self, attribute, value) + def setter(self_, value): # type: ignore + setattr(self_, attribute, value) setattr(self, f"get_{attribute}", getter) setattr(self, f"set_{attribute}", setter) @@ -39,11 +47,21 @@ def add_config_arguments(self, parser: ArgumentParser) -> None: for attribute in self._configurable_attributes: attribute_value = getattr(self, attribute) if isinstance(attribute_value, bool): - config.add_argument(f"--{attribute.replace('_', '-')}", dest=attribute, default=attribute_value, - action=f"store_{'false' if attribute_value else 'true'}", required=False) + config.add_argument( + f"--{attribute.replace('_', '-')}", + dest=attribute, + default=attribute_value, + action=f"store_{'false' if attribute_value else 'true'}", + required=False, + ) else: - config.add_argument(f"--{attribute.replace('_', '-')}", dest=attribute, default=attribute_value, - type=type(attribute_value), required=False) + config.add_argument( + f"--{attribute.replace('_', '-')}", + dest=attribute, + default=attribute_value, + type=type(attribute_value), + required=False, + ) def apply_args_to_configuration(self, args: Namespace) -> None: """loads the cli set values of configurable attributes. diff --git a/bitorch/layers/__init__.py b/bitorch/layers/__init__.py index 67af28e..113a87b 100644 --- a/bitorch/layers/__init__.py +++ b/bitorch/layers/__init__.py @@ -3,24 +3,68 @@ and activations before forwarding them. These layers use the quantization functions specified in the quantization submodule. """ +from typing import Optional, TypeVar -from .debug_layers import ( - InputGraphicalDebug, - InputPrintDebug, - WeightGraphicalDebug, - WeightPrintDebug, - ShapePrintDebug -) -from .qactivation import QActivation -from .qconv1d import QConv1d, QConv1d_NoAct -from .qconv2d import QConv2d, QConv2d_NoAct -from .qconv3d import QConv3d, QConv3d_NoAct -from .qlinear import QLinear +import torch +from torch import nn + +from bitorch import RuntimeMode +from .debug_layers import InputGraphicalDebug, InputPrintDebug, WeightGraphicalDebug, WeightPrintDebug, ShapePrintDebug +from .extensions import CustomImplementationMixin from .pact import Pact +from .qactivation import QActivation +from .qconv1d import QConv1d, QConv1dBase, QConv1d_NoAct +from .qconv2d import QConv2d, QConv2dBase, QConv2d_NoAct +from .qconv3d import QConv3d, QConv3dBase, QConv3d_NoAct from .qembedding import QEmbedding, QEmbeddingBag +from .qlinear import QLinear, QLinearBase +from .register import all_layer_registries +from .pad import PadModule __all__ = [ - "InputGraphicalDebug", "InputPrintDebug", "WeightGraphicalDebug", "WeightPrintDebug", - "ShapePrintDebug", "QActivation", "QConv1d", "QConv2d", "QConv3d", "QConv1d_NoAct", - "QConv2d_NoAct", "QConv3d_NoAct", "QLinear", "QEmbedding", "QEmbeddingBag", "Pact", + "InputGraphicalDebug", + "InputPrintDebug", + "WeightGraphicalDebug", + "WeightPrintDebug", + "ShapePrintDebug", + "QActivation", + "QConv1d", + "QConv2d", + "QConv3d", + "QConv1dBase", + "QConv2dBase", + "QConv3dBase", + "QConv1d_NoAct", + "QConv2d_NoAct", + "QConv3d_NoAct", + "QLinear", + "QLinearBase", + "QEmbedding", + "QEmbeddingBag", + "Pact", + "CustomImplementationMixin", + "convert", + "PadModule", ] + + +T = TypeVar("T", bound=nn.Module) + + +def convert(module: T, new_mode: RuntimeMode, device: Optional[torch.device] = None, verbose: bool = False) -> T: + """ + Convert the given module to a new bitorch RuntimeMode. Needs to have custom implementations installed. + + Args: + module: the module to be converted + new_mode: the new mode for the module + device: an optional device + verbose: whether to print which layers are converted + + Returns: + the converted module + """ + submodules = list(module.modules()) + for registry in all_layer_registries(): + registry.convert_layers_to(new_mode, only=submodules, device=device, verbose=verbose) + return module diff --git a/bitorch/layers/bembedding.py b/bitorch/layers/bembedding.py new file mode 100644 index 0000000..a5b205a --- /dev/null +++ b/bitorch/layers/bembedding.py @@ -0,0 +1,237 @@ +from typing import Union, Optional, Dict, List, Tuple +import numpy +import torch +import warnings +from bitorch.layers.config import config +from bitorch.quantizations.base import Quantization +from torch import Tensor, nn +from torch.nn.parameter import Parameter + + +class BEmbedding(nn.Module): + """Binarized version of pytorchs embedding layer. Uses given binarization method to binarize the weights. + Memory consumption during training increases with batch size. Inference is always small. + """ + + def __init__( + self, + num_embeddings: int, + embedding_dim: int, + padding_idx: Optional[int] = None, + weight_quantization: Union[Quantization, str, None] = None, + device: Union[str, torch.device, None] = None, + sign_bool: bool = False, # Whether a boolean 0 represents a -1. Set to True for Sign. + ) -> None: + super().__init__() + # Load the quantization function + self.weight_quantization = config.get_quantization_function(weight_quantization or config.weight_quantization) + # Random initialize the weight. Can be set using set_weight. + self.weight: Union[Parameter, Tensor] = Parameter( + torch.rand((num_embeddings, embedding_dim), device=device) > 0.5, requires_grad=False + ) + + self.padding_idx = padding_idx + self.sign_bool = sign_bool + + self.optimizer: Optional[torch.optim.Optimizer] = None + self.optimizer_dict: Optional[Dict[str, List[Tensor]]] = None + self.unique: Optional[Tensor] = None + self.unique_vectors: Optional[Tensor] = None + self.out_param: Optional[Tensor] = None + + def select_unique_vectors(self, flat_indices: Tensor) -> Tuple[Tensor, Tensor, Tensor]: + """Given a flat tensor of indices, return the unique indices, their inverse in the original tensor, + and a tensor with embedding vectors that are indexed by the unique indices. + + Args: + flat_indices (Tensor): A flat tensor of indices that query the embedding table. + + Returns: + Tuple[Tensor, Tensor, Tensor]: unqiue indices, inverse indices, unique indexed embedding vectors + """ + unique, inverse_indices = self.unique_wrapper(flat_indices) + unique_weight = self.weight.index_select(0, unique).to(torch.float32) + return unique, inverse_indices, unique_weight + + def unique_wrapper(self, tensor: Tensor) -> Tuple[Tensor, Tensor]: + """Compute the unique values and inverse indices of a given tensor. Uses numpy when on cpu and otherwise pytorch. + + Args: + tensor (Tensor): Tensor to compute the unique values from. + + Returns: + Tuple[Tensor, Tensor]: unique values, inverse indices + """ + if tensor.device.type == "cpu": + unique, inverse_indices = numpy.unique(tensor.numpy(), return_inverse=True) + unique = torch.from_numpy(unique) + inverse_indices = torch.from_numpy(inverse_indices) + else: + unique, inverse_indices = torch.unique(tensor, return_inverse=True) + return unique, inverse_indices + + def apply_padding(self, indices: Tensor, embedding_vectors: Tensor) -> Tensor: + """Applies padding to the embedding vectors. Sets the embedding vector to zero where + the given unique index matches the padding_idx property. This operation is inplace. + + Args: + indices (Tensor): Indices of the embedding vectors. + embedding_vectors (Tensor): Embedding vectors to be padded. + + Returns: + Tensor: Padded embedding vectors. + """ + if self.padding_idx is not None: + embedding_vectors[indices == self.padding_idx] = 0 + return embedding_vectors + + def transform_zeros(self, embedding_vectors: Tensor) -> Tensor: + """If the sign_bool property is set, replaces 0 with -1. This operation is inplace. + + Args: + embedding_vectors (Tensor): The tensor to be modified. + + Returns: + Tensor: The modified input tensor + """ + if self.sign_bool: + embedding_vectors[embedding_vectors == 0] = -1 + return embedding_vectors + + def set_optimizable_weights(self, weights: Tensor) -> None: + """Inject the weights to be optimized into the optimizer. + + Args: + weights (Tensor): The weights to be ioptimized. + """ + if self.optimizer is not None: + if self.optimizer_dict is None: + self.optimizer_dict = {"params": [weights]} + self.optimizer.add_param_group(self.optimizer_dict) + elif self.optimizer_dict: + self.optimizer.state[weights] = self.optimizer.state[self.optimizer_dict["params"][0]] + del self.optimizer.state[self.optimizer_dict["params"][0]] + self.optimizer_dict["params"] = [weights] + + def forward(self, input: Tensor) -> Tensor: + """Generates embeddings for received tokens. + + Args: + input (Tensor): indices for embedding + + Returns: + Tensor: embeddings for given token + """ + input_shape = input.shape + self.unique, inverse_indices, self.unique_vectors = self.select_unique_vectors(input.flatten()) + self.apply_padding(self.unique, self.unique_vectors) + self.transform_zeros(self.unique_vectors) + self.unique_vectors.requires_grad_(True) + out = self.unique_vectors.index_select(0, inverse_indices) + self.set_optimizable_weights(self.unique_vectors) + return out.reshape((*input_shape, -1)) + + def set_weight(self, weight: Tensor) -> None: + if weight.dtype != torch.bool: + weight = self.weight_quantization(weight) == 1 + self.weight.copy_(weight) + + @torch.no_grad() + def step(self) -> None: + """Step the BEmbedding by copying the optimized unique embedding vectors into the binary embedding table.""" + assert self.unique is not None and self.unique_vectors is not None, "Call forward before step." + if self.padding_idx is not None: + self.unique_vectors = self.unique_vectors[self.unique != self.padding_idx] + self.unique = self.unique[self.unique != self.padding_idx] + self.weight.index_copy_(0, self.unique, self.weight_quantization(self.unique_vectors) == 1) + self.unique = None + self.unique_vectors = None + + def set_optimizer(self, optimizer: torch.optim.Optimizer) -> None: + """Set the optimizer to set parameters to be optimized dynamically during training. + + Args: + optimizer (torch.optim.Optimizer): The optimizer of the `BEmbedding`. + """ + self.optimizer = optimizer + + +class BEmbeddingBag(BEmbedding): + """Binarized version of pytorchs embedding bag. Uses given binarization method to binarize the weights. + Memory consumption during training increases with batch size. Inference is always small. + """ + + def __init__( + self, + num_embeddings: int, + embedding_dim: int, + padding_idx: Optional[int] = None, + weight_quantization: Union[Quantization, str, None] = None, + device: Union[str, torch.device, None] = None, + sign_bool: bool = False, # Whether a boolean 0 represents a -1. + mode: str = "mean", + ) -> None: + super().__init__( + num_embeddings=num_embeddings, + embedding_dim=embedding_dim, + padding_idx=padding_idx, + weight_quantization=weight_quantization, + device=device, + sign_bool=sign_bool, + ) + self.mode = mode + self.embedding_dim = embedding_dim + warnings.warn( + "The BEmbeddingBag is experimental. Using the BEmbeddingBag leads to significal slowdowns of the model." + ) + + def apply_aggregate( + self, batch_size: int, offsets: Tensor, inverse_indices: Tensor, unqiue_embedding_vectors: Tensor + ) -> Tensor: + """Aggregates the unique embedding vectors using the defined mode. + + Args: + batch_size (int): Batch size of the input data. + offsets (Tensor): Offsets of inverse indices for each batch. Defines which embedding vectors are aggregated. + inverse_indices (Tensor): Flattened bag of indices for each batch. + unqiue_embedding_vectors (Tensor): Unique embedding vectors to be aggregated. + + Returns: + Tensor: The aggregated embedding vectors. + """ + out = torch.zeros((batch_size, self.embedding_dim), device=self.weight.device) + for row, (start_index, end_index) in enumerate(zip(offsets.tolist(), offsets.tolist()[1:] + [None])): + use_indices = inverse_indices[start_index:end_index] + if self.mode == "sum": + out[row] = torch.sum(unqiue_embedding_vectors.index_select(0, use_indices), dim=0) + elif self.mode == "mean": + out[row] = torch.sum(unqiue_embedding_vectors.index_select(0, use_indices), dim=0).div_( + len(use_indices) + ) + elif self.mode == "prod": + out[row] = torch.prod(unqiue_embedding_vectors.index_select(0, use_indices), dim=0) + return out.reshape((batch_size, -1)) + + def forward(self, indices: Tensor, offsets: Tensor) -> Tensor: # type: ignore + """Generates embeddings from given tokens and offsets. + + Args: + indices (Tensor): The tokens to be embedded. + offsets (Tensor): The offsets describing the starting points of batch items. + + Returns: + Tensor: The embedded and aggregated tokens. + """ + self.unique, inverse_indices, self.unique_vectors = self.select_unique_vectors(indices.flatten()) + self.apply_padding(self.unique, self.unique_vectors) + self.transform_zeros(self.unique_vectors) + self.unique_vectors.requires_grad_(True) + batch_size = offsets.size(0) + out = self.apply_aggregate( + batch_size=offsets.size(0), + offsets=offsets, + inverse_indices=inverse_indices, + unqiue_embedding_vectors=self.unique_vectors, + ) + self.set_optimizable_weights(self.unique_vectors) + return out.reshape((batch_size, -1)) diff --git a/bitorch/layers/config.py b/bitorch/layers/config.py index 9fb5f84..17f393f 100644 --- a/bitorch/layers/config.py +++ b/bitorch/layers/config.py @@ -1,7 +1,6 @@ """Config class for quantization layers. This file should be imported before the other layers.""" from typing import Union -import torch from bitorch.config import Config from bitorch.quantizations import quantization_from_name, Quantization @@ -12,14 +11,14 @@ class LayerConfig(Config): name = "layer_config" - def get_quantization_function(self, quantization: Union[str, Quantization]) -> torch.nn.Module: - """Returns the quanitization module specified in quantization_name. + def get_quantization_function(self, quantization: Union[str, Quantization]) -> Quantization: + """Returns the quantization module specified by the given name or object. Args: - quantization (Union[str, Quantization]): quantization module or name of quantization function. + quantization: quantization module or name of quantization function. Returns: - torch.nn.Module: Quantization module + the quantization module """ if isinstance(quantization, Quantization): return quantization diff --git a/bitorch/layers/debug_layers.py b/bitorch/layers/debug_layers.py index 617e86b..cabc8b6 100644 --- a/bitorch/layers/debug_layers.py +++ b/bitorch/layers/debug_layers.py @@ -1,13 +1,10 @@ -from typing import Any +from typing import Optional, Any import torch from .config import config class _Debug(torch.nn.Module): - def __init__(self, - debug_interval: int = 100, - num_outputs: int = 10, - name: str = "Debug") -> None: + def __init__(self, debug_interval: int = 100, num_outputs: int = 10, name: str = "Debug") -> None: """inits values. Args: @@ -44,17 +41,19 @@ def _debug(self, debug_tensor: torch.Tensor) -> None: Args: debug_tensor (torch.Tensor): tensor to be debugged """ - print(self.name, ":", debug_tensor if len(debug_tensor) < - self._num_outputs else debug_tensor[:self._num_outputs]) + print( + self.name, ":", debug_tensor if len(debug_tensor) < self._num_outputs else debug_tensor[: self._num_outputs] + ) class _GraphicalDebug(_Debug): - - def __init__(self, - figure: object = None, - images: list = None, - debug_interval: int = 100, - num_outputs: int = 10) -> None: + def __init__( + self, + figure: Optional[object] = None, + images: Optional[list] = None, + debug_interval: int = 100, + num_outputs: int = 10, + ) -> None: """Debugs the given layer by drawing weights/inputs in given matplotlib plot images. Args: @@ -70,7 +69,7 @@ def __init__(self, self.set_figure(figure) self.set_images(images) - def set_figure(self, figure: object = None) -> None: + def set_figure(self, figure: Optional[object] = None) -> None: """setter for figure object Args: @@ -78,7 +77,7 @@ def set_figure(self, figure: object = None) -> None: """ self._figure = figure - def set_images(self, images: list = None) -> None: + def set_images(self, images: Optional[list] = None) -> None: """setter for images list Args: @@ -91,7 +90,8 @@ def set_images(self, images: list = None) -> None: if self._images is not None and len(self._images) != self._num_outputs: raise ValueError( f"number of given images ({len(self._images)}) must match " - f"number of desired outputs ({self._num_outputs})!") + f"number of desired outputs ({self._num_outputs})!" + ) def _debug(self, debug_tensor: torch.Tensor) -> None: """draws graphical debug information about given debug tensor into figure @@ -183,7 +183,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: weight = self._debug_module.weight.clone() # type: ignore # check if given module is a quantized module - if hasattr(self._debug_module, 'quantize'): + if hasattr(self._debug_module, "quantize"): weight = self._debug_module.quantize(weight) # type: ignore self._debug_tensor(weight) @@ -213,7 +213,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: weight = self._debug_module.weight.clone() # type: ignore # check if given module is a quantized module - if hasattr(self._debug_module, 'quantize'): + if hasattr(self._debug_module, "quantize"): weight = self._debug_module.quantize(weight) # type: ignore self._debug_tensor(weight) diff --git a/bitorch/layers/extensions/__init__.py b/bitorch/layers/extensions/__init__.py new file mode 100644 index 0000000..0775551 --- /dev/null +++ b/bitorch/layers/extensions/__init__.py @@ -0,0 +1,16 @@ +"""This submodule contains objects needed to provide and manage custom layer implementations.""" + +from .layer_container import LayerContainer +from .layer_implementation import DefaultImplementationMixin, CustomImplementationMixin +from .layer_recipe import LayerRecipe +from .layer_registration import LayerImplementation +from .layer_registry import LayerRegistry + +__all__ = [ + "LayerContainer", + "DefaultImplementationMixin", + "CustomImplementationMixin", + "LayerRecipe", + "LayerImplementation", + "LayerRegistry", +] diff --git a/bitorch/layers/extensions/layer_container.py b/bitorch/layers/extensions/layer_container.py new file mode 100644 index 0000000..b4f8a10 --- /dev/null +++ b/bitorch/layers/extensions/layer_container.py @@ -0,0 +1,96 @@ +from typing import Any, TypeVar, Type, Generic + +from bitorch.layers.extensions.layer_recipe import LayerRecipe + +T = TypeVar("T") + + +class LayerContainer(Generic[T]): + """This class wraps another layer - but the internally contained class can be swapped out during runtime.""" + + internal_variable_names = [ + "_layer_implementation", + "_recipe", + ] + + patch = [ + "to", + ] + + def __init__(self, impl_class: Type[T], *args: Any, **kwargs: Any) -> None: + """ + Wrap a new object based on the given class, positional arguments, and keyword arguments. + Args: + impl_class: class of the new object + *args: positional arguments of the new object + **kwargs: keyword arguments of the new object + """ + self._layer_implementation = impl_class(*args, **kwargs) + self._recipe = LayerRecipe(layer=self, args=args, kwargs=kwargs) + + def replace_layer_implementation(self, new_implementation: T) -> None: + """ + Replace the internally stored layer object with the given one. + Args: + new_implementation: new class which should replace the previous implementation. + """ + self._layer_implementation = new_implementation + + def __getattr__(self, item: Any) -> Any: + if item in self.internal_variable_names: + return self.__dict__[item] + attr_value = getattr(self._layer_implementation, item) + if attr_value == self._layer_implementation: + return self + if callable(attr_value) and item in self.patch: + # patch return values of all functions/classes defined in self.patch + # they should return this LayerContainer instead of themselves + # required for e.g. pytorch's .to(device) function + other = self + + class Patch: + def __call__(self, *args: Any, **kwargs: Any) -> Any: + fn_return_val = attr_value(*args, **kwargs) + if fn_return_val == other._layer_implementation: + return other + return fn_return_val + + def __getattr__(self, item_: Any) -> Any: + return getattr(attr_value, item_) + + # needed for tests: + @property # type: ignore[misc] + def __class__(self) -> Any: + return attr_value.__class__ + + return Patch() + return attr_value + + def __repr__(self) -> "str": + return f"LayerContainer (at {hex(id(self))}), contains: {self._layer_implementation}" + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + return self._layer_implementation(*args, **kwargs) # type:ignore[operator] + + def __setattr__(self, key: Any, value: Any) -> None: + if key in self.internal_variable_names: + self.__dict__[key] = value + return + setattr(self._layer_implementation, key, value) + + @property # type: ignore[misc] + def __class__(self) -> Type[T]: # type: ignore + return self._layer_implementation.__class__ + + @property + def layer_implementation(self) -> T: + """ + Access the internally wrapped layer object directly. + Returns: + the internal layer object + """ + return self._layer_implementation + + @property + def recipe(self) -> LayerRecipe: + return self._recipe diff --git a/bitorch/layers/extensions/layer_implementation.py b/bitorch/layers/extensions/layer_implementation.py new file mode 100644 index 0000000..73ffa16 --- /dev/null +++ b/bitorch/layers/extensions/layer_implementation.py @@ -0,0 +1,81 @@ +from abc import ABC +from typing import Optional, Any, Tuple, TYPE_CHECKING + +import torch + +if TYPE_CHECKING: + from . import LayerRecipe + + +class BaseImplementation: + """Defines the class interface of a custom layer implementation of a certain layer type.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + @classmethod + def is_default_implementation(cls) -> bool: + """ + Returns: + bool: whether this implementation is the default implementation of the current layer type + """ + raise NotImplementedError("Should be implemented by subclass.") + + @classmethod + def can_clone(cls, recipe: "LayerRecipe") -> Tuple[bool, str]: + """ + Returns whether this layer class supports the implementation of a given layer recipe. + + Args: + recipe (LayerRecipe): the layer which should be checked for cloning + + Returns: + Whether the layer can be cloned or not and an info message if it can not be cloned + """ + raise NotImplementedError("A custom layer should implement their own compatibility check.") + + @classmethod + def create_clone_from(cls, recipe: "LayerRecipe", device: Optional[torch.device] = None) -> Any: + """ + Create a new layer based on a given layer recipe (can be expected to be from the default category). + + Args: + recipe: the layer which should be cloned + device: the device on which the layer is going to be run + + Returns: + A clone of the LayerRecipe in the current class implementation + """ + raise NotImplementedError("A custom layer should implement a method to create a cloned layer.") + + +class DefaultImplementationMixin(BaseImplementation, ABC): + """Defines the class interface of a default layer implementation of a certain layer type.""" + + @classmethod + def is_default_implementation(cls) -> bool: + return True + + @classmethod + def can_clone(cls, recipe: "LayerRecipe") -> Tuple[bool, str]: + return True, "" + + @classmethod + def create_clone_from(cls, recipe: "LayerRecipe", device: Optional[torch.device] = None) -> Any: + return cls(*recipe.args, **recipe.kwargs) + + +class CustomImplementationMixin(BaseImplementation, ABC): + """Defines the class interface of a custom layer implementation of a certain layer type.""" + + @classmethod + def is_default_implementation(cls) -> bool: + return False + + @classmethod + def can_clone(cls, recipe: "LayerRecipe") -> Tuple[bool, str]: + raise NotImplementedError("A custom layer should implement their own compatibility check.") + + @classmethod + def create_clone_from(cls, recipe: "LayerRecipe", device: Optional[torch.device] = None) -> Any: + raise NotImplementedError("A custom layer should implement a method to create a cloned layer.") diff --git a/bitorch/layers/extensions/layer_recipe.py b/bitorch/layers/extensions/layer_recipe.py new file mode 100644 index 0000000..17197d9 --- /dev/null +++ b/bitorch/layers/extensions/layer_recipe.py @@ -0,0 +1,48 @@ +import typing +from dataclasses import dataclass +from typing import TypeVar, Tuple, Any, Dict + +if typing.TYPE_CHECKING: + from .layer_container import LayerContainer + +T = TypeVar("T") + + +@dataclass(eq=False, frozen=True) +class LayerRecipe: + """ + Data class to store a layer object and the arguments used to create it. + It allows to create other implementations of the same layer later on. + """ + + layer: "LayerContainer" + args: Tuple[Any, ...] + kwargs: Dict[str, Any] + + def get_positional_arg(self, pos: int) -> Any: + """ + Get a positional argument from the stored args. + + Args: + pos: the position of the argument if given as a positional arg + + Returns: + the argument value retrieved + """ + return self.args[pos] + + def get_arg(self, pos: int, key: str, default: T) -> T: + """ + Get an argument from the stored args or kwargs. + + Args: + pos: the position of the argument if given as a positional arg + key: the name of the argument + default: the default value of the argument + + Returns: + the argument value retrieved + """ + if len(self.args) > pos: + return self.args[pos] + return self.kwargs.get(key, default) diff --git a/bitorch/layers/extensions/layer_registration.py b/bitorch/layers/extensions/layer_registration.py new file mode 100644 index 0000000..33652c8 --- /dev/null +++ b/bitorch/layers/extensions/layer_registration.py @@ -0,0 +1,107 @@ +from abc import ABC +from typing import Optional, Any, Type, Union, Tuple, TYPE_CHECKING + +import torch + +import bitorch +from bitorch import runtime_mode_type, RuntimeMode +from .layer_container import LayerContainer +from .layer_implementation import DefaultImplementationMixin, BaseImplementation, CustomImplementationMixin +from .layer_recipe import LayerRecipe + +if TYPE_CHECKING: + from .layer_registry import LayerRegistry + + +class LayerImplementation(ABC): + """ + Superclass for storing different implementations of a common layer type. + + It registers all decorated classes in the given registry. On creation of a decorated class, it + wraps the created class object in a layer container and stores the arguments used to create the layer. + It also captures which RuntimeMode(s) is/are supported by an implementation. + """ + + registry: "LayerRegistry" + class_: Type[BaseImplementation] + class_name: str + _supported_modes: runtime_mode_type + __initialized: bool + + def __init__(self, registry: "LayerRegistry", supported_modes: runtime_mode_type) -> None: + """ + Define an implementation decorator for a certain type of layer. All implementations and objects of this type of + layer are stored in the given registry. + + Args: + registry: the registry which should store the implementation and objects of this layer type + supported_modes: the mode supported by the registering implementation + """ + self.registry = registry + assert RuntimeMode.is_combined_mode(supported_modes), f"invalid mode {supported_modes} given" + self._supported_modes = supported_modes + self.__initialized = False + self.class_ = None # type: ignore + self.class_name = "" + + def __call__( + self, *args: Any, **kwargs: Any + ) -> Union["LayerImplementation", Type[BaseImplementation], LayerContainer]: + if not self.__initialized: + # this object is called once when @Decorator is used, we need to initialize + return self._initialize(*args, **kwargs) + + if bitorch.mode == RuntimeMode.RAW: + return self.class_(*args, **kwargs) # type: ignore + + # on later calls we need to provide the correct layer implementation + return self._provide_layer_implementation(*args, **kwargs) + + def _initialize(self, class_: Type[BaseImplementation]) -> Union["LayerImplementation", Type[BaseImplementation]]: + self.__initialized = True + self.class_ = class_ + self.class_name = self.class_.__name__ + self.registry.register(self) + if self._supported_modes == RuntimeMode.DEFAULT: + assert issubclass( + self.class_, DefaultImplementationMixin + ), f"{self.class_name} should be a subclass of DefaultLayerImplementation." + # provide this wrapper + return self + else: + assert issubclass(self.class_, CustomImplementationMixin), ( + f"{self.class_name} should be a subclass of CustomImplementationInterface (and it should " + f"implement the corresponding class methods)." + ) + # after we have registered custom implementations, we do not interfere anymore + return self.class_ + + def _provide_layer_implementation(self, *args: Any, **kwargs: Any) -> LayerContainer: + correct_layer_implementation = self.registry.get_layer() + if self == correct_layer_implementation: + # this class provides the correct implementation for the current mode (recursion stop) + layer_container = LayerContainer(self.class_, *args, **kwargs) + self.registry.add_recipe(layer_container.recipe) + return layer_container + # call this method again but on the correct base class + return correct_layer_implementation._provide_layer_implementation(*args, **kwargs) + + def supports_mode(self, mode: RuntimeMode) -> bool: + """ + Check whether this layer implementation supports a given RuntimeMode. + Args: + mode: the runtime mode that should be supported + + Returns: + True if the given mode is supported, False otherwise + """ + return mode.is_supported_by(self._supported_modes) + + def can_create_clone_from(self, recipe: LayerRecipe) -> Tuple[bool, str]: + return self.class_.can_clone(recipe) + + def get_replacement(self, recipe: LayerRecipe, device: Optional[torch.device] = None) -> Any: + return self.class_.create_clone_from(recipe, device) + + def is_default(self) -> bool: + return self.class_.is_default_implementation() diff --git a/bitorch/layers/extensions/layer_registry.py b/bitorch/layers/extensions/layer_registry.py new file mode 100644 index 0000000..c6b3407 --- /dev/null +++ b/bitorch/layers/extensions/layer_registry.py @@ -0,0 +1,127 @@ +from typing import Set, Any, Optional, Iterable + +import bitorch +import torch +from bitorch import RuntimeMode + +from .layer_container import LayerContainer +from .layer_recipe import LayerRecipe +from .layer_registration import LayerImplementation + + +class LayerRegistry: + """ + Stores all available implementations (and their supported modes) for a certain type of layer. + It also wraps these implementations and stores references to them, so they can be replaced easily. + Needs to be subclassed for each type of layer. + """ + + def __init__(self, name: str) -> None: + self.name = name + self._class = None + self.layer_implementations: Set[LayerImplementation] = set() + self._instance_recipes: Set[LayerRecipe] = set() + self.is_replacing = False + + @property + def layer_instances(self) -> Set["LayerContainer"]: + return set(x.layer for x in self._instance_recipes) + + def get_recipe_for(self, layer: Any) -> Optional["LayerRecipe"]: + if layer not in map(lambda x: x.layer, self._instance_recipes): + return None + return next(filter(lambda x: x.layer == layer, self._instance_recipes)) + + def get_replacement(self, mode: RuntimeMode, recipe: LayerRecipe, device: Optional[torch.device] = None) -> Any: + layer = self.get_layer(mode, recipe) + return layer.get_replacement(recipe, device) + + def add_recipe(self, new_recipe: LayerRecipe) -> None: + if self.is_replacing: + return + self._instance_recipes.add(new_recipe) + + def __contains__(self, item: Any) -> bool: + return item.__class__ in map(lambda x: x.class_, self.layer_implementations) + + def register(self, layer: LayerImplementation) -> None: + """ + Register a layer implementaiton in this registry. + + Args: + layer: the layer to be registered + """ + self.layer_implementations.add(layer) + + def get_layer( + self, mode: Optional[RuntimeMode] = None, recipe: Optional[LayerRecipe] = None + ) -> LayerImplementation: + """ + Get a layer implementation compatible to the given mode and recipe. + + If no recipe is given, only compatibility with the mode is checked. + If no mode is given, the current bitorch mode is used. + + Args: + mode: mode that the layer implementation should support + recipe: recipe that the layer implementation should be able to copy + + Returns: + a LayerImplementation compatible with the given mode and recipe (if available) + """ + if mode is None: + mode = bitorch.mode + available_layers = [] + unavailable_layers = [] + + for implementation in self.layer_implementations: + if not implementation.supports_mode(mode): + continue + if recipe: + return_tuple = implementation.can_create_clone_from(recipe) + if not isinstance(return_tuple, tuple) and len(return_tuple) == 2: + raise RuntimeError(f"{implementation.__class__} returned non-tuple on 'can_create_clone_from'.") + can_be_used, message = return_tuple + if not can_be_used: + unavailable_layers.append(f" {implementation.__class__} unavailable because: {message}") + continue + available_layers.append(implementation) + + if len(available_layers) > 1: + RuntimeWarning(f"Multiple layer implementations available for '{self.name}' available (mode='{mode}').") + if len(available_layers) == 0: + base_error = f"No implementations for '{self.name}' available (mode='{mode}')." + if len(unavailable_layers) > 0: + raise RuntimeError("\n".join([base_error] + unavailable_layers)) + else: + raise RuntimeError(base_error) + return available_layers[0] + + def clear(self) -> None: + while len(self._instance_recipes) > 0: + self._instance_recipes.pop() + + def unregister_custom_implementations(self) -> None: + to_remove = list(filter(lambda x: not x.is_default(), self.layer_implementations)) + for i in to_remove: + self.layer_implementations.remove(i) + + def convert_layers_to( + self, + new_mode: RuntimeMode, + only: Optional[Iterable[Any]] = None, + device: Optional[torch.device] = None, + verbose: bool = False, + ) -> None: + for recipe in list(self._instance_recipes): + module = recipe.layer + if only is not None and module.layer_implementation not in only and module not in only: + continue + assert isinstance(module, LayerContainer) + if verbose: + print("| Replacing layer in", module) + replacement_module = self.get_replacement(new_mode, recipe, device) + replacement_module.to(device) + if verbose: + print("- with:", replacement_module) + module.replace_layer_implementation(replacement_module) diff --git a/bitorch/layers/pact.py b/bitorch/layers/pact.py index 18a272d..c2d204f 100644 --- a/bitorch/layers/pact.py +++ b/bitorch/layers/pact.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Optional, Tuple from torch.autograd import Function from torch.nn import Module import torch @@ -23,7 +23,7 @@ def backward(ctx, output_gradient: torch.Tensor) -> Tuple[torch.Tensor, torch.Te # Backward function, I borrowed code from # https://github.com/obilaniu/GradOverride/blob/master/functional.py # We get dL / dy_q as a gradient - x, alpha, = ctx.saved_tensors + x, alpha = ctx.saved_tensors # Weight gradient is only valid when [0, alpha] # Actual gradient for alpha, # By applying Chain Rule, we get dL / dy_q * dy_q / dy * dy / dalpha @@ -42,9 +42,9 @@ class Pact(Module): Choi, Jungwook, et al. "Pact: Parameterized clipping activation for quantized neural networks." (2018) """ - def __init__(self, bits: int = None) -> None: + def __init__(self, bits: Optional[int] = None) -> None: super().__init__() - self.alpha = torch.nn.parameter.Parameter(torch.tensor(10.)) + self.alpha = torch.nn.parameter.Parameter(torch.tensor(10.0)) self.bits = bits or config.pact_bits def forward(self, x: torch.Tensor) -> torch.Tensor: diff --git a/bitorch/layers/pad.py b/bitorch/layers/pad.py new file mode 100644 index 0000000..35c03fb --- /dev/null +++ b/bitorch/layers/pad.py @@ -0,0 +1,31 @@ +from torch import nn, Tensor +import torch.nn.functional as F + + +class PadModule(nn.Module): + """Module for padding tensors.""" + + def __init__( + self, + padding_left: int = 0, + padding_right: int = 0, + padding_top: int = 0, + padding_bottom: int = 0, + padding_value: int = 0, + ): + """initialization function for padding. + + Args: + padding_left (int, optional): number of columns to pad to the left. + padding_right (int, optional): number of columns to pad to the right. + padding_top (int, optional): number of rows to pad at the top. + padding_bottom (int, optional): number of rows to pad at the bottom. + padding_value (float, optional): fill value used for padding. + """ + super(PadModule, self).__init__() + self.padding_tensor = (padding_left, padding_right, padding_top, padding_bottom) + self.padding_value = padding_value + + def forward(self, x: Tensor) -> Tensor: + x = F.pad(x, self.padding_tensor, "constant", self.padding_value) + return x diff --git a/bitorch/layers/qactivation.py b/bitorch/layers/qactivation.py index 0e80e2d..c6722c0 100644 --- a/bitorch/layers/qactivation.py +++ b/bitorch/layers/qactivation.py @@ -9,13 +9,13 @@ class GradientCancellation(Function): - @staticmethod @typing.no_type_check def forward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - input_tensor: torch.Tensor, - threshold: float) -> torch.Tensor: + ctx: torch.autograd.function.BackwardCFunction, # type: ignore + input_tensor: torch.Tensor, + threshold: float, + ) -> torch.Tensor: """Binarize input tensor using the _sign function. Args: @@ -30,8 +30,9 @@ def forward( @staticmethod @typing.no_type_check def backward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - output_grad: torch.Tensor) -> Tuple[torch.Tensor, None]: + ctx: torch.autograd.function.BackwardCFunction, # type: ignore + output_grad: torch.Tensor, + ) -> Tuple[torch.Tensor, None]: """Apply straight through estimator. This passes the output gradient towards the input if the inputs are in the range [-1, 1]. @@ -45,9 +46,8 @@ def backward( """ input_tensor, threshold = ctx.saved_tensors cancelled = torch.where( - torch.abs(input_tensor) <= threshold, - output_grad, - torch.tensor(0., device=output_grad.device)) + torch.abs(input_tensor) <= threshold, output_grad, torch.tensor(0.0, device=output_grad.device) + ) return cancelled, None @@ -55,9 +55,10 @@ class QActivation(nn.Module): """Activation layer for quantization""" def __init__( - self, - activation: Union[str, Quantization] = None, - gradient_cancellation_threshold: Optional[float] = 0.0) -> None: + self, + activation: Optional[Union[str, Quantization]] = None, + gradient_cancellation_threshold: Optional[float] = 0.0, + ) -> None: """initialization function for fetching suitable activation function. Args: @@ -67,10 +68,8 @@ def __init__( cancellation. Disabled if threshold is 0. """ super(QActivation, self).__init__() - self._activation = config.get_quantization_function(activation or config.input_quantization) - self._gradient_cancellation_threshold = ( - gradient_cancellation_threshold or config.gradient_cancellation_threshold - ) + self.activation_function = config.get_quantization_function(activation or config.input_quantization) + self.gradient_cancellation_threshold = gradient_cancellation_threshold or config.gradient_cancellation_threshold def forward(self, input_tensor: torch.Tensor) -> torch.Tensor: """Forwards input tensor through activation function. @@ -81,6 +80,6 @@ def forward(self, input_tensor: torch.Tensor) -> torch.Tensor: Returns: torch.Tensor: quantized input tensor. """ - if self._gradient_cancellation_threshold > 0: - input_tensor = GradientCancellation.apply(input_tensor, self._gradient_cancellation_threshold) - return self._activation(input_tensor) + if self.gradient_cancellation_threshold > 0: + input_tensor = GradientCancellation.apply(input_tensor, self.gradient_cancellation_threshold) + return self.activation_function(input_tensor) diff --git a/bitorch/layers/qconv1d.py b/bitorch/layers/qconv1d.py index 41a9c2b..94d0bdb 100644 --- a/bitorch/layers/qconv1d.py +++ b/bitorch/layers/qconv1d.py @@ -1,25 +1,36 @@ -"""Module containing the quantized convolution layer""" +"""Module containing the quantized 1d convolution layer""" + +from typing import Optional, Any, Type, Union -from typing import Any, Union from torch import Tensor from torch.nn import Conv1d, init from torch.nn.functional import pad, conv1d -from bitorch.layers.config import config +from bitorch import RuntimeMode from bitorch.quantizations import Quantization -from bitorch.layers.qactivation import QActivation +from .config import config +from .extensions import DefaultImplementationMixin +from .qactivation import QActivation +from .qconv_mixin import QConvArgsProviderMixin +from .register import QConv1dImplementation class QConv1d_NoAct(Conv1d): # noqa: N801 - """Quantized 1d Convolutional Layer. Has the same api as Conv1d but lets you specify a weight quantization, that is - applied before the convolutional operation.""" - def __init__(self, - *args: Any, - weight_quantization: Union[str, Quantization] = None, - pad_value: float = None, - bias: bool = False, - **kwargs: Any) -> None: - """initialization function for padding and quantization. + """ + Quantized 1d Convolutional Layer. Has the same api as Conv1d but lets you specify a weight quantization, that is + applied before the convolutional operation. + """ + + def __init__( + self, + *args: Any, + weight_quantization: Optional[Union[str, Quantization]] = None, + pad_value: Optional[float] = None, + bias: bool = False, + **kwargs: Any, + ) -> None: + """ + initialization function for padding and quantization. Args: weight_quantization (Union[str, Quantization], optional): quantization module or name of quantization @@ -29,8 +40,7 @@ def __init__(self, assert bias is False, "A QConv layer can not use a bias due to acceleration techniques during deployment." kwargs["bias"] = False super(QConv1d_NoAct, self).__init__(*args, **kwargs) - self._weight_quantize = config.get_quantization_function( - weight_quantization or config.weight_quantization) + self._weight_quantize = config.get_quantization_function(weight_quantization or config.weight_quantization) self._pad_value = pad_value or config.padding_value def _apply_padding(self, x: Tensor) -> Tensor: @@ -64,16 +74,19 @@ def forward(self, input: Tensor) -> Tensor: stride=self.stride, padding=0, dilation=self.dilation, - groups=self.groups) - - -class QConv1d(QConv1d_NoAct): # type: ignore - def __init__(self, # type: ignore - *args: Any, - input_quantization: Union[str, Quantization] = None, - weight_quantization: Union[str, Quantization] = None, - gradient_cancellation_threshold: Union[float, None] = None, - **kwargs: Any) -> None: + groups=self.groups, + ) + + +class QConv1dBase(QConvArgsProviderMixin, QConv1d_NoAct): # type: ignore + def __init__( + self, # type: ignore + *args: Any, + input_quantization: Optional[Union[str, Quantization]] = None, + weight_quantization: Optional[Union[str, Quantization]] = None, + gradient_cancellation_threshold: Union[float, None] = None, + **kwargs: Any, + ) -> None: """initialization function for quantization of inputs and weights. Args: @@ -84,7 +97,7 @@ def __init__(self, # type: ignore weight_quantization (Union[str, Quantization], optional): quantization module or name of quantization function for weights. Defaults to None. """ - super(QConv1d, self).__init__(*args, weight_quantization=weight_quantization, **kwargs) + super().__init__(*args, weight_quantization=weight_quantization, **kwargs) self.activation = QActivation(input_quantization, gradient_cancellation_threshold) def forward(self, input_tensor: Tensor) -> Tensor: @@ -96,4 +109,22 @@ def forward(self, input_tensor: Tensor) -> Tensor: Returns: Tensor: the activated and convoluted output tensor. """ - return super(QConv1d, self).forward(self.activation(input_tensor)) + return super().forward(self.activation(input_tensor)) + + +class _QConv1dComposed(DefaultImplementationMixin, QConv1dBase): + """ + This class defines the default implementation of a QConv1d layer (which is actually implemented by QConv1dBase). + + To implement a custom QConv1d implementation use QConv1dBase as a super class instead. + """ + + pass + + +QConv1d: Type[_QConv1dComposed] = QConv1dImplementation(RuntimeMode.DEFAULT)(_QConv1dComposed) # type: ignore +""" +This class provides the current implementation of a QConv1d layer (which is actually implemented by :class:`QConv1dBase`). + +To implement a custom QConv1d implementation use :class:`QConv1dBase` as a super class instead. +""" diff --git a/bitorch/layers/qconv2d.py b/bitorch/layers/qconv2d.py index 6408880..e01f06d 100644 --- a/bitorch/layers/qconv2d.py +++ b/bitorch/layers/qconv2d.py @@ -1,20 +1,29 @@ -from typing import Union, Any +"""Module containing the quantized 2d convolution layer""" + +from typing import Optional, Any, Type, Union + from torch import Tensor from torch.nn import Conv2d, init from torch.nn.functional import pad, conv2d -from bitorch.layers.config import config +from bitorch import RuntimeMode from bitorch.quantizations import Quantization -from bitorch.layers.qactivation import QActivation +from .config import config +from .extensions import DefaultImplementationMixin +from .qactivation import QActivation +from .qconv_mixin import QConvArgsProviderMixin +from .register import QConv2dImplementation class QConv2d_NoAct(Conv2d): # type: ignore # noqa: N801 - def __init__(self, # type: ignore - *args: Any, - weight_quantization: Union[str, Quantization] = None, - pad_value: float = None, - bias: bool = False, - **kwargs: Any) -> None: + def __init__( + self, # type: ignore + *args: Any, + weight_quantization: Optional[Union[str, Quantization]] = None, + pad_value: Optional[float] = None, + bias: bool = False, + **kwargs: Any, + ) -> None: """initialization function for padding and quantization. Args: @@ -25,8 +34,7 @@ def __init__(self, # type: ignore assert bias is False, "A QConv layer can not use a bias due to acceleration techniques during deployment." kwargs["bias"] = False super(QConv2d_NoAct, self).__init__(*args, **kwargs) - self._weight_quantize = config.get_quantization_function( - weight_quantization or config.weight_quantization) + self._weight_quantize = config.get_quantization_function(weight_quantization or config.weight_quantization) self._pad_value = pad_value or config.padding_value def _apply_padding(self, x: Tensor) -> Tensor: @@ -60,16 +68,19 @@ def forward(self, input: Tensor) -> Tensor: stride=self.stride, padding=0, dilation=self.dilation, - groups=self.groups) - - -class QConv2d(QConv2d_NoAct): # type: ignore - def __init__(self, # type: ignore - *args: Any, - input_quantization: Union[str, Quantization] = None, - weight_quantization: Union[str, Quantization] = None, - gradient_cancellation_threshold: Union[float, None] = None, - **kwargs: Any) -> None: + groups=self.groups, + ) + + +class QConv2dBase(QConvArgsProviderMixin, QConv2d_NoAct): # type: ignore + def __init__( + self, # type: ignore + *args: Any, + input_quantization: Optional[Union[str, Quantization]] = None, + weight_quantization: Optional[Union[str, Quantization]] = None, + gradient_cancellation_threshold: Union[float, None] = None, + **kwargs: Any, + ) -> None: """initialization function for quantization of inputs and weights. Args: @@ -80,7 +91,7 @@ def __init__(self, # type: ignore weight_quantization (Union[str, Quantization], optional): quantization module or name of quantization function for weights. Defaults to None. """ - super(QConv2d, self).__init__(*args, weight_quantization=weight_quantization, **kwargs) + super().__init__(*args, weight_quantization=weight_quantization, **kwargs) self.activation = QActivation(input_quantization, gradient_cancellation_threshold) def forward(self, input_tensor: Tensor) -> Tensor: @@ -92,4 +103,22 @@ def forward(self, input_tensor: Tensor) -> Tensor: Returns: Tensor: the activated and convoluted output tensor. """ - return super(QConv2d, self).forward(self.activation(input_tensor)) + return super().forward(self.activation(input_tensor)) + + +class _QConv2dComposed(DefaultImplementationMixin, QConv2dBase): + """ + This class defines the default implementation of a QConv2d layer (which is actually implemented by QConv2dBase). + + To implement a custom QConv2d implementation use QConv2dBase as a super class instead. + """ + + pass + + +QConv2d: Type[_QConv2dComposed] = QConv2dImplementation(RuntimeMode.DEFAULT)(_QConv2dComposed) # type: ignore +""" +This class provides the current implementation of a QConv2d layer (which is actually implemented by :class:`QConv2dBase`). + +To implement a custom QConv2d implementation use :class:`QConv2dBase` as a super class instead. +""" diff --git a/bitorch/layers/qconv3d.py b/bitorch/layers/qconv3d.py index 0db97b3..623e872 100644 --- a/bitorch/layers/qconv3d.py +++ b/bitorch/layers/qconv3d.py @@ -1,21 +1,29 @@ -"""Module containing the quantized convolution layer""" -from typing import Union, Any +"""Module containing the quantized 3d convolution layer""" + +from typing import Optional, Any, Type, Union + from torch import Tensor from torch.nn import Conv3d, init from torch.nn.functional import pad, conv3d -from bitorch.layers.config import config +from bitorch import RuntimeMode from bitorch.quantizations import Quantization -from bitorch.layers.qactivation import QActivation +from .config import config +from .extensions import DefaultImplementationMixin +from .qactivation import QActivation +from .qconv_mixin import QConvArgsProviderMixin +from .register import QConv3dImplementation class QConv3d_NoAct(Conv3d): # type: ignore # noqa: N801 - def __init__(self, # type: ignore - *args: Any, - weight_quantization: Union[str, Quantization] = None, - pad_value: float = None, - bias: bool = False, - **kwargs: Any) -> None: + def __init__( + self, # type: ignore + *args: Any, + weight_quantization: Optional[Union[str, Quantization]] = None, + pad_value: Optional[float] = None, + bias: bool = False, + **kwargs: Any, + ) -> None: """initialization function for padding and quantization. Args: @@ -26,8 +34,7 @@ def __init__(self, # type: ignore assert bias is False, "A QConv layer can not use a bias due to acceleration techniques during deployment." kwargs["bias"] = False super(QConv3d_NoAct, self).__init__(*args, **kwargs) - self._weight_quantize = config.get_quantization_function( - weight_quantization or config.weight_quantization) + self._weight_quantize = config.get_quantization_function(weight_quantization or config.weight_quantization) self._pad_value = pad_value or config.padding_value def _apply_padding(self, x: Tensor) -> Tensor: @@ -61,16 +68,19 @@ def forward(self, input: Tensor) -> Tensor: stride=self.stride, padding=0, dilation=self.dilation, - groups=self.groups) - - -class QConv3d(QConv3d_NoAct): # type: ignore - def __init__(self, # type: ignore - *args: Any, - input_quantization: Union[str, Quantization] = None, - weight_quantization: Union[str, Quantization] = None, - gradient_cancellation_threshold: Union[float, None] = None, - **kwargs: Any) -> None: + groups=self.groups, + ) + + +class QConv3dBase(QConvArgsProviderMixin, QConv3d_NoAct): # type: ignore + def __init__( + self, # type: ignore + *args: Any, + input_quantization: Optional[Union[str, Quantization]] = None, + weight_quantization: Optional[Union[str, Quantization]] = None, + gradient_cancellation_threshold: Union[float, None] = None, + **kwargs: Any, + ) -> None: """initialization function for quantization of inputs and weights. Args: @@ -81,7 +91,7 @@ def __init__(self, # type: ignore weight_quantization (Union[str, Quantization], optional): quantization module or name of quantization function for weights. Defaults to None. """ - super(QConv3d, self).__init__(*args, weight_quantization=weight_quantization, **kwargs) + super().__init__(*args, weight_quantization=weight_quantization, **kwargs) self.activation = QActivation(input_quantization, gradient_cancellation_threshold) def forward(self, input_tensor: Tensor) -> Tensor: @@ -93,4 +103,22 @@ def forward(self, input_tensor: Tensor) -> Tensor: Returns: Tensor: the activated and convoluted output tensor. """ - return super(QConv3d, self).forward(self.activation(input_tensor)) + return super().forward(self.activation(input_tensor)) + + +class _QConv3dComposed(DefaultImplementationMixin, QConv3dBase): + """ + This class defines the default implementation of a QConv3d layer (which is actually implemented by QConv3dBase). + + To implement a custom QConv3d implementation use QConv3dBase as a super class instead. + """ + + pass + + +QConv3d: Type[_QConv3dComposed] = QConv3dImplementation(RuntimeMode.DEFAULT)(_QConv3dComposed) # type: ignore +""" +This class provides the current implementation of a QConv3d layer (which is actually implemented by :class:`QConv3dBase`). + +To implement a custom QConv3d implementation use :class:`QConv3dBase` as a super class instead. +""" diff --git a/bitorch/layers/qconv_mixin.py b/bitorch/layers/qconv_mixin.py new file mode 100644 index 0000000..66c82fe --- /dev/null +++ b/bitorch/layers/qconv_mixin.py @@ -0,0 +1,31 @@ +from typing import Dict, Any + +from .extensions import LayerRecipe + + +class QConvArgsProviderMixin: + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + @staticmethod + def get_args_as_kwargs(recipe: LayerRecipe) -> Dict[str, Any]: + """ + Gather all arguments that were used to create a QLinear layer with argument names. + Can be used to recreate a layer with identical arguments. + + Returns: + A dictionary with all arguments (key is the argument name as a string even for positional arguments) + """ + return { + "in_channels": recipe.get_positional_arg(0), + "out_channels": recipe.get_positional_arg(1), + "kernel_size": recipe.get_positional_arg(2), + "stride": recipe.get_arg(3, "stride", None), + "padding": recipe.get_arg(4, "padding", None), + "dilation": recipe.get_arg(5, "dilation", None), + "groups": recipe.get_arg(6, "groups", None), + "bias": recipe.get_arg(7, "bias", True), + "padding_mode": recipe.get_arg(8, "padding_mode", None), + "device": recipe.get_arg(9, "device", None), + "dtype": recipe.get_arg(10, "dtype", None), + } diff --git a/bitorch/layers/qembedding.py b/bitorch/layers/qembedding.py index da6b95a..53b1811 100644 --- a/bitorch/layers/qembedding.py +++ b/bitorch/layers/qembedding.py @@ -1,4 +1,4 @@ -from typing import Union, Optional +from typing import Any, Union, Optional from torch import Tensor from torch.nn import EmbeddingBag, Embedding from torch.nn.functional import embedding_bag, embedding @@ -14,24 +14,28 @@ class QEmbeddingBag(EmbeddingBag): """ def __init__( - self, - *args: int, - embedding_dim: int, - weight_quantization: Union[Quantization, str] = None, - output_quantization: Union[Quantization, str] = None, - **kwargs: int) -> None: + self, + *args: Any, + embedding_dim: int, + weight_quantization: Optional[Union[Quantization, str]] = None, + output_quantization: Optional[Union[Quantization, str]] = None, + **kwargs: Any, + ) -> None: super(QEmbeddingBag, self).__init__(*args, embedding_dim=embedding_dim, **kwargs) # type: ignore """load quantization functions""" self.embedding_weight_quantization = config.get_quantization_function( - weight_quantization or config.weight_quantization) + weight_quantization or config.weight_quantization + ) self.embedding_input_quantization = config.get_quantization_function( - output_quantization or config.input_quantization) + output_quantization or config.input_quantization + ) def forward( - self, - input: Tensor, - offsets: Optional[Tensor] = None, - per_sample_weights: Optional[Tensor] = None) -> Tensor: + self, + input: Tensor, + offsets: Optional[Tensor] = None, + per_sample_weights: Optional[Tensor] = None, + ) -> Tensor: """generates embeddings for received bags. then quantizes these embeddings and depending on configuration forwards it through another quantized linear layer. @@ -43,22 +47,33 @@ def forward( Returns: Tensor: embeddings for given sequences """ - # necessary for torch 1.8 compliance - if hasattr(self, 'padding_idx'): + if hasattr(self, "padding_idx"): embeddings = embedding_bag( - input, self.embedding_weight_quantization(self.weight), offsets, - self.max_norm, self.norm_type, - self.scale_grad_by_freq, self.mode, self.sparse, - per_sample_weights, self.include_last_offset, - self.padding_idx + input, + self.embedding_weight_quantization(self.weight), + offsets, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.mode, + self.sparse, + per_sample_weights, + self.include_last_offset, + self.padding_idx, ) else: embeddings = embedding_bag( - input, self.embedding_weight_quantization(self.weight), offsets, - self.max_norm, self.norm_type, - self.scale_grad_by_freq, self.mode, self.sparse, - per_sample_weights, self.include_last_offset, + input, + self.embedding_weight_quantization(self.weight), + offsets, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.mode, + self.sparse, + per_sample_weights, + self.include_last_offset, ) embeddings = self.embedding_input_quantization(embeddings) return embeddings @@ -70,18 +85,21 @@ class QEmbedding(Embedding): """ def __init__( - self, - *args: int, - embedding_dim: int, - weight_quantization: Union[Quantization, str] = None, - output_quantization: Union[Quantization, str] = None, - **kwargs: int) -> None: + self, + *args: Any, + embedding_dim: int, + weight_quantization: Optional[Union[Quantization, str]] = None, + output_quantization: Optional[Union[Quantization, str]] = None, + **kwargs: Any, + ) -> None: super(QEmbedding, self).__init__(*args, embedding_dim=embedding_dim, **kwargs) # type: ignore """load quantization functions""" self.embedding_weight_quantization = config.get_quantization_function( - weight_quantization or config.weight_quantization) + weight_quantization or config.weight_quantization + ) self.embedding_output_quantization = config.get_quantization_function( - output_quantization or config.input_quantization) + output_quantization or config.input_quantization + ) def forward(self, input: Tensor) -> Tensor: """generates embeddings for received bags. then quantizes these embeddings and depending on configuration @@ -94,9 +112,13 @@ def forward(self, input: Tensor) -> Tensor: Tensor: embeddings for given sequences """ embeddings = embedding( - input, self.embedding_weight_quantization(self.weight), self.padding_idx, - self.max_norm, self.norm_type, - self.scale_grad_by_freq, self.sparse, + input, + self.embedding_weight_quantization(self.weight), + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, ) embeddings = self.embedding_output_quantization(embeddings) return embeddings diff --git a/bitorch/layers/qlinear.py b/bitorch/layers/qlinear.py index 301ec2f..3670764 100644 --- a/bitorch/layers/qlinear.py +++ b/bitorch/layers/qlinear.py @@ -1,40 +1,72 @@ -"""Module containting the quantized linear layer""" +"""Module containing the quantized linear layer""" + +from typing import Optional, Any, Type, Union, Dict -from typing import Union import torch from torch.nn import Linear from torch.nn.functional import linear +from bitorch import RuntimeMode from bitorch.quantizations import Quantization from .config import config +from .extensions import LayerRecipe, DefaultImplementationMixin from .qactivation import QActivation +from .register import QLinearImplementation -class QLinear(Linear): +class QLinearBase(Linear): def __init__( - self, - *args: int, - input_quantization: Union[str, Quantization] = None, - gradient_cancellation_threshold: Union[float, None] = None, - weight_quantization: Union[str, Quantization] = None, - **kwargs: bool) -> None: - """Applys the given quantization functions on weights and inputs before applying the linear operation. + self, + *args: int, + input_quantization: Optional[Union[str, Quantization]] = None, + gradient_cancellation_threshold: Union[float, None] = None, + weight_quantization: Optional[Union[str, Quantization]] = None, + **kwargs: bool, + ) -> None: + """Applies the given quantization functions on weights and inputs before applying the linear operation. Args: - *args (Argument list): positional arguments for linear layer + *args: positional arguments for linear layer input_quantization (Union[str, Quantization], optional): quantization module used for input quantization. Defaults to None. gradient_cancellation_threshold (Union[float, None], optional): threshold for input gradient cancellation. disabled if threshold is None. Defaults to None. weight_quantization (Union[str, Quantization], optional): quantization module or name of quantization function. Defaults to None. - **kwargs (keyword Argument list): keyword arguments for linear layer + **kwargs: keyword arguments for linear layer """ - - super(QLinear, self).__init__(*args, **kwargs) # type: ignore - self.weight_quantize = config.get_quantization_function(weight_quantization or config.weight_quantization) + super().__init__(*args, **kwargs) # type: ignore + self.weight_quantization = config.get_quantization_function(weight_quantization or config.weight_quantization) self.activation = QActivation(input_quantization, gradient_cancellation_threshold) + @staticmethod + def get_args_as_kwargs(recipe: LayerRecipe) -> Dict[str, Any]: + """ + Gather all arguments that were used to create a QLinear layer with argument names. + Can be used to recreate a layer with identical arguments. + + Returns: + A dictionary with all arguments (key is the argument name as a string even for positional arguments) + """ + return { + "in_features": recipe.get_positional_arg(0), + "out_features": recipe.get_positional_arg(1), + "input_quantization": recipe.layer.input_quantization, + "gradient_cancellation_threshold": recipe.layer.gradient_cancellation_threshold, + "weight_quantization": recipe.layer.weight_quantization, + "bias": recipe.get_arg(5, "bias", True), + "device": recipe.get_arg(6, "device", None), + "dtype": recipe.get_arg(7, "dtype", None), + } + + @property + def input_quantization(self) -> Quantization: + return self.activation.activation_function + + @property + def gradient_cancellation_threshold(self) -> float: + return self.activation.gradient_cancellation_threshold + def forward(self, x: torch.Tensor) -> torch.Tensor: """Forwards x through the binary linear layer. @@ -44,5 +76,22 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: Returns: torch.Tensors: forwarded tensor """ + return linear(self.activation(x), self.weight_quantization(self.weight), self.bias) + + +class _QLinearComposed(DefaultImplementationMixin, QLinearBase): + """ + This class defines the default implementation of a QLinear layer (which is actually implemented by QLinearBase). + + To implement a custom QLinear implementation use QLinearBase as a super class instead. + """ + + pass + + +QLinear: Type[_QLinearComposed] = QLinearImplementation(RuntimeMode.DEFAULT)(_QLinearComposed) # type: ignore +""" +This class provides the current implementation of a QLinear layer (which is actually implemented by :class:`QLinearBase`). - return linear(self.activation(x), self.weight_quantize(self.weight), self.bias) +To implement a custom QLinear implementation use :class:`QLinearBase` as a super class instead. +""" diff --git a/bitorch/layers/register.py b/bitorch/layers/register.py new file mode 100644 index 0000000..9e447d9 --- /dev/null +++ b/bitorch/layers/register.py @@ -0,0 +1,88 @@ +from typing import List, Iterable, Any, Optional + +import torch + +from bitorch import runtime_mode_type, RuntimeMode +from bitorch.layers.extensions import LayerImplementation, LayerRegistry + +q_linear_registry = LayerRegistry("QLinear") +q_conv1d_registry = LayerRegistry("QConv1d") +q_conv2d_registry = LayerRegistry("QConv2d") +q_conv3d_registry = LayerRegistry("QConv3d") + + +def all_layer_registries() -> List[LayerRegistry]: + """ + Return all layer registries (one for each layer type: QLinear, QConv[1-3]d). + + Returns: + A list of all layer registries. + """ + return [ + q_conv1d_registry, + q_conv2d_registry, + q_conv3d_registry, + q_linear_registry, + ] + + +def convert_layers_to( + new_mode: RuntimeMode, + only: Optional[Iterable[Any]] = None, + device: Optional[torch.device] = None, + verbose: bool = False, +) -> None: + """ + Convert all wrapped layers (or a given subset of them) to a new mode. + Args: + new_mode: the new RuntimeMode + only: optional white"list" (Iterable) of layers or wrapped layers which should be converted + device: the new device for the layers + verbose: whether to print which layers are being converted + """ + for registry in all_layer_registries(): + registry.convert_layers_to(new_mode, only, device, verbose) + + +class QLinearImplementation(LayerImplementation): + """Decorator for :class:`QLinear` implementations.""" + + def __init__(self, supports_modes: runtime_mode_type) -> None: + """ + Args: + supports_modes: RuntimeMode(s) that is/are supported by an implementation + """ + super().__init__(q_linear_registry, supports_modes) + + +class QConv1dImplementation(LayerImplementation): + """Decorator for :class:`QConv1d` implementations.""" + + def __init__(self, supports_modes: runtime_mode_type) -> None: + """ + Args: + supports_modes: RuntimeMode(s) that is/are supported by an implementation + """ + super().__init__(q_conv1d_registry, supports_modes) + + +class QConv2dImplementation(LayerImplementation): + """Decorator for :class:`QConv2d` implementations.""" + + def __init__(self, supports_modes: runtime_mode_type) -> None: + """ + Args: + supports_modes: RuntimeMode(s) that is/are supported by an implementation + """ + super().__init__(q_conv2d_registry, supports_modes) + + +class QConv3dImplementation(LayerImplementation): + """Decorator for :class:`QConv3d` implementations.""" + + def __init__(self, supports_modes: runtime_mode_type) -> None: + """ + Args: + supports_modes: RuntimeMode(s) that is/are supported by an implementation + """ + super().__init__(q_conv3d_registry, supports_modes) diff --git a/bitorch/models/__init__.py b/bitorch/models/__init__.py index 06983d4..87453ec 100644 --- a/bitorch/models/__init__.py +++ b/bitorch/models/__init__.py @@ -20,26 +20,82 @@ Resnet50V1, Resnet50V2, ) +from .densenet import ( + DenseNet, + DenseNet28, + DenseNet37, + DenseNet45, + DenseNetFlex, +) +from .meliusnet import ( + MeliusNet, + MeliusNet22, + MeliusNet23, + MeliusNet42, + MeliusNet59, + MeliusNetA, + MeliusNetB, + MeliusNetC, + MeliusNetFlex, +) from .resnet_e import ( ResnetE, ResnetE18, ResnetE34, ) +from .quicknet import ( + QuickNet, + QuickNetSmall, + QuickNetLarge, +) +from .dlrm import DLRM from ..util import build_lookup_dictionary __all__ = [ - "Model", "LeNet", "Resnet", "Resnet152V1", "Resnet152V2", "Resnet18V1", - "Resnet18V2", "Resnet34V1", "Resnet34V2", "Resnet50V1", "Resnet50V2", - "ResnetE", "ResnetE18", "ResnetE34", + "Model", + "model_from_name", + "model_names", + "register_custom_model", + "LeNet", + "Resnet", + "Resnet152V1", + "Resnet152V2", + "Resnet18V1", + "Resnet18V2", + "Resnet34V1", + "Resnet34V2", + "Resnet50V1", + "Resnet50V2", + "ResnetE", + "ResnetE18", + "ResnetE34", + "DLRM", + "DenseNet", + "DenseNet28", + "DenseNet37", + "DenseNet45", + "DenseNetFlex", + "MeliusNet", + "MeliusNet22", + "MeliusNet23", + "MeliusNet42", + "MeliusNet59", + "MeliusNetA", + "MeliusNetB", + "MeliusNetC", + "MeliusNetFlex", + "QuickNet", + "QuickNetSmall", + "QuickNetLarge", ] -models_by_name = build_lookup_dictionary(__name__, __all__, Model) +models_by_name = build_lookup_dictionary(__name__, __all__, Model, key_fn=lambda x: x.name.lower()) def model_from_name(name: str) -> Type[Model]: - """returns the model to which the name belongs to (name has to be the value of the models - name-attribute) + """ + Return a model by the given name. Args: name (str): name of the model @@ -50,15 +106,26 @@ def model_from_name(name: str) -> Type[Model]: Returns: Model: the model """ - if name not in models_by_name: + if name.lower() not in models_by_name: raise ValueError(f"{name} model not found!") - return models_by_name[name] + return models_by_name[name.lower()] -def model_names() -> List: - """getter for list of model names for argparse +def model_names() -> List[str]: + """ + Get the list of model names. Returns: List: the model names """ return list(models_by_name.keys()) + + +def register_custom_model(custom_model: Type[Model]) -> None: + """ + Register a custom (external) model in bitorch. + + Args: + custom_model: the custom model which should be added to bitorch + """ + models_by_name[custom_model.name] = custom_model diff --git a/bitorch/models/base.py b/bitorch/models/base.py index f3f4029..78f0d33 100644 --- a/bitorch/models/base.py +++ b/bitorch/models/base.py @@ -1,21 +1,29 @@ +import logging from argparse import ArgumentParser -from typing import Union, Type +from typing import Optional, List, Any import torch from torch import nn -from bitorch.datasets.base import BasicDataset -from bitorch.layers import QConv1d, QConv2d, QConv3d, QConv1d_NoAct, QConv2d_NoAct, QConv3d_NoAct +from bitorch import RuntimeMode +from bitorch.layers import convert +from bitorch.layers.qconv1d import QConv1dBase, QConv1d_NoAct +from bitorch.layers.qconv2d import QConv2dBase, QConv2d_NoAct +from bitorch.layers.qconv3d import QConv3dBase, QConv3d_NoAct +from bitorch.models.model_hub import load_from_hub class Model(nn.Module): """Base class for Bitorch models""" - name = "None" - def __init__(self, dataset: Union[BasicDataset, Type[BasicDataset]]) -> None: + name = "" + version_table_url = "https://api.wandb.ai/artifactsV2/default/hpi-deep-learning/QXJ0aWZhY3Q6MzE1MzQ1ODM1/a9bd2573417efc7fb8f562f06f3d322d" + + def __init__(self, input_shape: List[int], num_classes: int = 0) -> None: super(Model, self).__init__() self._model = nn.Module() - self._dataset = dataset + self._input_shape = input_shape + self._num_classes = num_classes @staticmethod def add_argparse_arguments(parser: ArgumentParser) -> None: @@ -53,12 +61,22 @@ def initialize(self) -> None: for module in self._model.modules(): if isinstance(module, (nn.Conv1d, nn.Conv2d, nn.Conv3d)): # binary layers - if isinstance(module, (QConv1d, QConv2d, QConv3d, QConv1d_NoAct, QConv2d_NoAct, QConv3d_NoAct)): + if isinstance( + module, + ( + QConv1dBase, + QConv2dBase, + QConv3dBase, + QConv1d_NoAct, + QConv2d_NoAct, + QConv3d_NoAct, + ), + ): nn.init.xavier_normal_(module.weight) else: if module.kernel_size[0] == 7: # first conv layer - nn.init.kaiming_normal_(module.weight, nonlinearity='relu') + nn.init.kaiming_normal_(module.weight, nonlinearity="relu") else: # other 32-bit conv layers nn.init.xavier_normal_(module.weight) @@ -66,3 +84,47 @@ def initialize(self) -> None: nn.init.constant_(module.bias, 0) elif isinstance(module, nn.Linear): nn.init.xavier_normal_(module.weight) + + def convert(self, new_mode: RuntimeMode, device: Optional[torch.device] = None, verbose: bool = False) -> "Model": + return convert(self, new_mode, device, verbose) + + @classmethod + def from_pretrained( + cls, source: Optional[str] = None, mode: RuntimeMode = RuntimeMode.DEFAULT, **kwargs: str + ) -> nn.Module: + model = cls(**kwargs) # type: ignore + if source is not None: + logging.info(f"Loading {cls.name} model state_dict from file {source}") + state_dict = torch.load(source) + else: + kwargs["model_name"] = cls.name.lower() + logging.info(f"Downloading {cls.name} model state_dict from hub...") + state_dict = load_from_hub(cls.version_table_url, **kwargs) + + model.load_state_dict(state_dict) + return model + + def on_train_batch_end(self, layer: nn.Module) -> None: + """Is used with the pytorch lighting on_train_batch_end callback + + Implement it to e.g. clip weights after optimization. Is recursively applied to every submodule. + + Args: + layer (nn.Module): current layer + """ + pass + + +class NoArgparseArgsMixin: + """ + Mixin for Models which subclass an existing Model, but do not have any argparse arguments anymore. + + By using this Mixin, there is no special Parser displayed for the class. + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + @staticmethod + def add_argparse_arguments(parser: ArgumentParser) -> None: + pass diff --git a/bitorch/models/common_layers.py b/bitorch/models/common_layers.py index 16abe78..2693b97 100644 --- a/bitorch/models/common_layers.py +++ b/bitorch/models/common_layers.py @@ -1,17 +1,70 @@ -from typing import List +from typing import List, Optional, Union from torch import nn +from bitorch.layers.pad import PadModule -def get_initial_layers(variant: str, input_channels: int, output_channels: int) -> List[nn.Module]: + +def get_initial_layers( + variant: Optional[Union[List[int], str]], input_channels: int, output_channels: int +) -> List[nn.Module]: + """returns the initial layers for the given variant""" layers: List[nn.Module] = [] - if variant == "imagenet": + if variant == (224, 224) or variant == "imagenet": layers.append(nn.Conv2d(input_channels, output_channels, kernel_size=7, stride=2, padding=3, bias=False)) + + elif variant == "quicknet_stem": + assert output_channels % 4 == 0 + stem_channels = output_channels // 4 + + layers.append(PadModule(0, 1, 0, 1)) + layers.append( + nn.Conv2d( + input_channels, + stem_channels, + kernel_size=3, + stride=2, + bias=False, + ) + ) + layers.append(nn.BatchNorm2d(stem_channels, momentum=0.9)) + layers.append(nn.ReLU()) + layers.append(PadModule(0, 1, 0, 1)) + layers.append( + nn.Conv2d( + stem_channels, + stem_channels, + kernel_size=3, + groups=stem_channels, + stride=2, + bias=False, + ) + ) + layers.append(nn.BatchNorm2d(stem_channels, momentum=0.9)) + layers.append( + nn.Conv2d( + stem_channels, + output_channels, + kernel_size=1, + bias=False, + ) + ) layers.append(nn.BatchNorm2d(output_channels, momentum=0.9)) + elif variant == "grouped_stem": + stem_width = output_channels // 2 + + layers.append(nn.Conv2d(input_channels, stem_width, kernel_size=3, stride=2, padding=1, bias=False)) + layers.append(nn.BatchNorm2d(stem_width, momentum=0.9)) layers.append(nn.ReLU()) - layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) - elif variant in ["mnist", "cifar10", "cifar100"]: - layers.append(nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1, bias=False)) + layers.append(nn.Conv2d(stem_width, stem_width, kernel_size=3, stride=1, padding=1, groups=4, bias=False)) + layers.append(nn.BatchNorm2d(stem_width, momentum=0.9)) + layers.append(nn.ReLU()) + layers.append(nn.Conv2d(stem_width, output_channels, kernel_size=3, stride=1, padding=1, groups=8, bias=False)) else: - raise ValueError(f"Unknown initial layers for dataset '{variant}'.") + layers.append(nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1, bias=False)) + + if variant in [(224, 224), "imagenet", "grouped_stem"]: + layers.append(nn.BatchNorm2d(output_channels, momentum=0.9)) + layers.append(nn.ReLU()) + layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) return layers diff --git a/bitorch/models/densenet.py b/bitorch/models/densenet.py new file mode 100644 index 0000000..3db4b17 --- /dev/null +++ b/bitorch/models/densenet.py @@ -0,0 +1,355 @@ +import logging +import argparse +from typing import Any, List, Optional, Type, Union + +import torch +from torch import nn +from torch.nn import Module, ChannelShuffle + +from .base import Model, NoArgparseArgsMixin +from bitorch.layers import QConv2d +from bitorch.models.common_layers import get_initial_layers + + +class DenseLayer(Module): + def __init__(self, num_features: int, growth_rate: int, bn_size: int, dilation: int, dropout: float): + super(DenseLayer, self).__init__() + self.dropout = dropout + self.num_features = num_features + self.feature_list: List[Module] = [] + if bn_size == 0: + # no bottleneck + self._add_conv_block( + QConv2d(self.num_features, growth_rate, kernel_size=3, padding=dilation, dilation=dilation) + ) + else: + self._add_conv_block(QConv2d(self.num_features, bn_size * growth_rate, kernel_size=1)) + self._add_conv_block(QConv2d(bn_size * growth_rate, growth_rate, kernel_size=3, padding=1)) + self.features = nn.Sequential(*self.feature_list) + + def _add_conv_block(self, layer: Module) -> None: + self.feature_list.append(nn.BatchNorm2d(self.num_features)) + self.feature_list.append(layer) + if self.dropout: + self.feature_list.append(nn.Dropout(self.dropout)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + ident = x + x = self.features(x) + x = torch.cat([ident, x], dim=1) + return x + + +class BaseNetDense(Module): + """Densenet-BC model from the + `"Densely Connected Convolutional Networks" `_ paper. + """ + + def __init__( + self, + num_init_features: int, + growth_rate: int, + block_config: List[int], + reduction: List[float], + bn_size: int, + downsample: str, + image_resolution: Optional[List[int]] = None, + dropout: float = 0, + classes: int = 1000, + image_channels: int = 3, + dilated: bool = False, + ): + super(BaseNetDense, self).__init__() + self.num_blocks = len(block_config) + self.dilation = (1, 1, 2, 4) if dilated else (1, 1, 1, 1) + self.downsample_struct = downsample + self.bn_size = bn_size + self.growth_rate = growth_rate + self.dropout = dropout + self.reduction_rates = reduction + self.num_features = num_init_features + + self.features = nn.Sequential(*get_initial_layers(image_resolution, image_channels, self.num_features)) + # Add dense blocks + for i, repeat_num in enumerate(block_config): + self._make_repeated_base_blocks(repeat_num, i) + if i != len(block_config) - 1: + self._make_transition(i) + self.finalize = nn.Sequential( + nn.BatchNorm2d(self.num_features), nn.ReLU(), nn.AdaptiveAvgPool2d(1), nn.Flatten() + ) + self.output = nn.Linear(self.num_features, classes) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.features(x) + x = self.finalize(x) + x = self.output(x) + return x + + def _add_base_block_structure(self, layer_num: int, dilation: int) -> None: + raise NotImplementedError() + + def _make_repeated_base_blocks(self, num_base_blocks: int, stage_index: int) -> None: + dilation = self.dilation[stage_index] + self.current_dense_block = nn.Sequential() + for i in range(num_base_blocks): + self._add_base_block_structure(i, dilation) + self.features.add_module("DenseBlock_%d" % (stage_index + 1), self.current_dense_block) + + def _add_dense_layer(self, layer_num: int, dilation: int) -> None: + dense_layer = DenseLayer(self.num_features, self.growth_rate, self.bn_size, dilation, self.dropout) + self.num_features += self.growth_rate + self.current_dense_block.add_module("DenseLayer_%d" % (layer_num + 1), dense_layer) + + def _make_transition(self, transition_num: int) -> None: + dilation = self.dilation[transition_num + 1] + num_out_features = self.num_features // self.reduction_rates[transition_num] + num_out_features = int(round(num_out_features / 32)) * 32 + + transition_layers: List[Module] = [] + + for layer in self.downsample_struct.split(","): + if layer == "bn": + transition_layers.append(nn.BatchNorm2d(self.num_features)) + elif layer == "relu": + transition_layers.append(nn.ReLU()) + elif layer == "q_conv": + transition_layers.append(QConv2d(self.num_features, num_out_features, kernel_size=1)) + elif "fp_conv" in layer: + groups = 1 + if ":" in layer: + groups = int(layer.split(":")[1]) + transition_layers.append( + nn.Conv2d(self.num_features, num_out_features, kernel_size=1, groups=groups, bias=False) + ) + elif layer == "pool" and dilation == 1: + transition_layers.append(nn.AvgPool2d(2, stride=2)) + elif layer == "max_pool" and dilation == 1: + transition_layers.append(nn.MaxPool2d(2, stride=2)) + elif "cs" in layer: + groups = 16 + if ":" in layer: + groups = int(layer.split(":")[1]) + transition_layers.append(ChannelShuffle(groups)) + + transition = nn.Sequential(*transition_layers) + + self.features.add_module("Transition_%d" % (transition_num + 1), transition) + self.num_features = num_out_features + + +class _DenseNet(BaseNetDense): + def _add_base_block_structure(self, layer_num: int, dilation: int) -> None: + self._add_dense_layer(layer_num, dilation) + + +def basedensenet_constructor( + spec: dict, + model: Type[BaseNetDense], + num_layers: Optional[Union[int, str]], + num_init_features: int, + growth_rate: int, + bn_size: int, + dropout: float, + dilated: bool, + flex_block_config: Optional[List[int]], + classes: int = 1000, + image_resolution: Optional[List[int]] = None, + image_channels: int = 3, +) -> Module: + """Creates a densenet of the given model type with given layer numbers. + + Args: + spec (dict): specification that holds block config, reduction factors and downsample layer names + model (Type[BaseNetDense]): the model to instantiate. + num_layers (int): number of layers to be build. + num_init_features (int, optional): number of initial features. + growth_rate (int, optional): growth rate of the channels. + bn_size (int, optional): size of the bottleneck. + dropout (float, optional): dropout percentage in dense layers. + dilated (bool, optional): whether to use dilation in convolutions. + flex_block_config (List[int], optional) number of blocks in a flex model. + classes (int, optional): number of output classes. Defaults to 1000. + image_resolution (List[int], optional): determines set of initial layers to be used. Defaults to None. + image_channels (int, optional): number of channels of input images. Defaults to 3. + + Raises: + ValueError: raised if no specification for given num_layers is listed in the given spec dict, + block config is not given as a list of ints, + number of reductions is incorrect + + Returns: + Module: instance of model + """ + if num_layers not in spec: + raise ValueError(f"No spec for {num_layers} available!") + + block_config, reduction_factor, downsampling = spec[num_layers] + + if num_layers is None and flex_block_config is not None: + block_config = flex_block_config + + reduction = [1 / x for x in reduction_factor] + if not isinstance(block_config, List): + raise ValueError(f"block config {block_config} must be a list") + if not len(reduction) == len(block_config) - 1: + raise ValueError(f'"wrong number of reductions, should be {len(block_config) - 1}"') + + return model( + num_init_features, + growth_rate, + block_config, + reduction, + bn_size, + downsampling, + image_resolution, + dropout, + classes, + image_channels, + dilated, + ) + + +""" +DenseNet specifications +""" + +DOWNSAMPLE_STRUCT = "bn,max_pool,relu,fp_conv" + + +class DenseNet(Model): + name = "DenseNet" + densenet_spec = { + # block_config, reduction_factor, downsampling + None: (None, [1 / 2, 1 / 2, 1 / 2], DOWNSAMPLE_STRUCT), + 28: ([6, 6, 6, 5], [1 / 2.7, 1 / 2.7, 1 / 2.2], DOWNSAMPLE_STRUCT), + 37: ([6, 8, 12, 6], [1 / 3.3, 1 / 3.3, 1 / 4], DOWNSAMPLE_STRUCT), + 45: ([6, 12, 14, 8], [1 / 2.7, 1 / 3.3, 1 / 4], DOWNSAMPLE_STRUCT), + } + + def __init__( + self, + num_layers: Optional[int], + input_shape: List[int], + num_classes: int = 0, + num_init_features: int = 64, + growth_rate: int = 64, + bn_size: int = 0, + dropout: float = 0, + dilated: bool = False, + flex_block_config: Optional[List[int]] = None, + ) -> None: + super(DenseNet, self).__init__(input_shape, num_classes) + self._model = basedensenet_constructor( + self.densenet_spec, + _DenseNet, + num_layers, + num_init_features, + growth_rate, + bn_size, + dropout, + dilated, + flex_block_config, + self._num_classes, + self._input_shape[-2:], + self._input_shape[1], + ) + logging.info(f"building DenseNet with {str(num_layers)} layers...") + + @staticmethod + def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--num-layers", + type=int, + choices=[None, 28, 37, 45], + required=True, + help="number of layers to be used inside densenet", + ) + parser.add_argument( + "--reduction", + type=str, + required=False, + help='divide channels by this number in transition blocks (3 values, e.g. "2,2.5,3")', + ) + parser.add_argument( + "--growth-rate", + type=int, + required=False, + help="add this many features each block", + ) + parser.add_argument( + "--init-features", + type=int, + required=False, + help="start with this many filters in the first layer", + ) + parser.add_argument( + "--downsample-structure", + type=str, + required=False, + help="layers in downsampling branch (available: bn,relu,conv,fp_conv,pool,max_pool)", + ) + + +class DenseNetFlex(DenseNet): + """ + Flexible BinaryDenseNet model from `"BinaryDenseNet: Developing an Architecture for Binary Neural Networks" + ` paper. + """ + + name = "DenseNetFlex" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(DenseNetFlex, self).__init__(None, *args, **kwargs) + + @staticmethod + def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: + DenseNet.add_argparse_arguments(parser) + parser.add_argument( + "--block-config", + type=str, + required=True, + help="how many blocks to use in a flex model", + ) + + +class DenseNet28(NoArgparseArgsMixin, DenseNet): + """ + BinaryDenseNet-28 model from `"BinaryDenseNet: Developing an Architecture for Binary Neural Networks"` paper. + + .. _"BinaryDenseNet: Developing an Architecture for Binary Neural Networks": + https://openaccess.thecvf.com/content_ICCVW_2019/html/NeurArch/Bethge_BinaryDenseNet_Developing_an_Architecture_for_Binary_Neural_Networks_ICCVW_2019_paper.html + """ + + name = "DenseNet28" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(DenseNet28, self).__init__(28, *args, **kwargs) + + +class DenseNet37(NoArgparseArgsMixin, DenseNet): + """ + BinaryDenseNet-37 model from `"BinaryDenseNet: Developing an Architecture for Binary Neural Networks"` paper. + + .. _"BinaryDenseNet: Developing an Architecture for Binary Neural Networks": + https://openaccess.thecvf.com/content_ICCVW_2019/html/NeurArch/Bethge_BinaryDenseNet_Developing_an_Architecture_for_Binary_Neural_Networks_ICCVW_2019_paper.html + """ + + name = "DenseNet37" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(DenseNet37, self).__init__(37, *args, **kwargs) + + +class DenseNet45(NoArgparseArgsMixin, DenseNet): + """ + BinaryDenseNet-45 model from `"BinaryDenseNet: Developing an Architecture for Binary Neural Networks"` paper. + + .. _"BinaryDenseNet: Developing an Architecture for Binary Neural Networks": + https://openaccess.thecvf.com/content_ICCVW_2019/html/NeurArch/Bethge_BinaryDenseNet_Developing_an_Architecture_for_Binary_Neural_Networks_ICCVW_2019_paper.html + """ + + name = "DenseNet45" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(DenseNet45, self).__init__(45, *args, **kwargs) diff --git a/bitorch/models/dlrm.py b/bitorch/models/dlrm.py new file mode 100644 index 0000000..af4c4ad --- /dev/null +++ b/bitorch/models/dlrm.py @@ -0,0 +1,219 @@ +from argparse import ArgumentParser +from enum import Enum +from typing import Any, List, Union +import logging +import torch +from torch.nn import Linear, Sequential, PReLU, Sigmoid, EmbeddingBag, ModuleList, BatchNorm1d, Module +import numpy as np +from bitorch.layers import QLinear +from bitorch.models.base import Model + +# from .utils import create_loss_function, create_optimizer, create_activation_function, parse_layer_sizes, str2bool +from bitorch.layers.qembedding import QEmbeddingBag + + +def parse_layer_sizes(layer_sizes_str: Union[List[int], str]) -> List[int]: + """parses layer sizes passed as string via cli arg + + Args: + layer_sizes_str (Union[List[int], str]): either list of layer sizes in which case the input is just returned or + a string in format '[layer size a, layer size b, etc]' + + Returns: + List[int]: list of layer sizes + """ + if isinstance(layer_sizes_str, list): + return [int(size) for size in layer_sizes_str] + layer_sizes_str = layer_sizes_str.replace("[", "").replace("]", "") + return [int(size) for size in layer_sizes_str.split(",")] + + +class Interaction_Operation_Type(Enum): + PRODUCT = "product" + CONCAT = "concat" + SUM = "sum" + + +def create_mlp(layer_sizes: List[int], quantized: bool = False) -> Sequential: + """creates a mlp module + + Args: + layer_sizes (List[int]): linear layer unit sizes + for size in enumerate(layer_sizes_str.split(",")): + parsed_layer_sizes.append(int(size))oid activation function. + all other layers will have relu activation. + """ + input_size = layer_sizes[0] + mlp_layers: List[Module] = [] + + for layer_size in layer_sizes[1:]: + output_size = layer_size + mlp_layers.append(BatchNorm1d(input_size)) + mlp_layers.append( + QLinear(input_size, output_size, bias=False) if quantized else Linear(input_size, output_size, bias=True) + ) + mean = 0.0 # std_dev = np.sqrt(variance) + std_dev = np.sqrt(2 / (output_size + input_size)) # np.sqrt(1 / m) # np.sqrt(1 / n) + mlp_weight = np.random.normal(mean, std_dev, size=(output_size, input_size)).astype(np.float32) + std_dev = np.sqrt(1 / output_size) # np.sqrt(2 / (m + 1)) + mlp_bias = np.random.normal(mean, std_dev, size=output_size).astype(np.float32) + # approach 1 + mlp_layers[-1].weight.data = torch.tensor(mlp_weight, requires_grad=True) + if mlp_layers[-1].bias is not None: + mlp_layers[-1].bias.data = torch.tensor(mlp_bias, requires_grad=True) + + mlp_layers.append(BatchNorm1d(output_size)) + mlp_layers.append(PReLU()) + input_size = output_size + return Sequential(*mlp_layers) + + +def create_embeddings( + embedding_dimension: int, layer_sizes: List[int], quantized: bool, sparse: bool = False +) -> ModuleList: + """creates the embedding layers for each category.""" + if sparse: + logging.info("USING SPARSE EMBEDDINGS") + embedding_layers = ModuleList() + for layer_size in layer_sizes: + logging.info( + f"creating embedding layer with {layer_size} * {embedding_dimension} = " + f"{layer_size * embedding_dimension} params..." + ) + if quantized: + embedding_layers.append( + QEmbeddingBag( + layer_size, + embedding_dim=embedding_dimension, + mode="mean", + sparse=sparse, + ) + ) + else: + embedding_layers.append(EmbeddingBag(layer_size, embedding_dimension, mode="sum", sparse=sparse)) + embedding_weights = np.random.uniform( + low=-np.sqrt(1 / layer_size), high=np.sqrt(1 / layer_size), size=(layer_size, embedding_dimension) + ).astype(np.float32) + embedding_layers[-1].weight.data = torch.tensor(embedding_weights, requires_grad=True) + + return embedding_layers + + +class DLRM(Model): + name = "DLRM" + total_size = 1.0 + inference_speed = 1.0 + validation_results: List[dict] = [] + + def __init__( + self, + dense_feature_size: int, + embedding_layer_sizes: List[int], + input_shape: List[int] = [], + bottom_mlp_layer_sizes: Union[List[int], str] = [512, 256, 64], + top_mlp_layer_sizes: Union[List[int], str] = [512, 256, 1], + interaction_operation: str = Interaction_Operation_Type.PRODUCT.value, + binary_bottom_mlp: bool = False, + binary_top_mlp: bool = True, + binary_embedding: bool = True, + embedding_dimension: int = 16, + **kwargs: Any, + ) -> None: + super().__init__(input_shape) + self.interaction_operation = interaction_operation + self.embedding_layers = create_embeddings( + embedding_dimension, + embedding_layer_sizes, + binary_embedding, + ) + + bottom_mlp_layer_sizes = parse_layer_sizes(bottom_mlp_layer_sizes) + top_mlp_layer_sizes = parse_layer_sizes(top_mlp_layer_sizes) + + # computing the correct bottom and top mlp layer sizes taking into account + # feature dimensions and feature interaction output shapes + bottom_mlp_layer_sizes = [dense_feature_size, *bottom_mlp_layer_sizes, embedding_dimension] + + if interaction_operation == Interaction_Operation_Type.CONCAT.value: + top_mlp_layer_sizes = [(len(embedding_layer_sizes) + 1) * embedding_dimension, *top_mlp_layer_sizes] + elif interaction_operation == Interaction_Operation_Type.PRODUCT.value: + top_mlp_layer_sizes = [ + embedding_dimension + (len(embedding_layer_sizes) + 1) * ((len(embedding_layer_sizes) + 1) // 2), + *top_mlp_layer_sizes, + ] + self.bottom_mlp = create_mlp( + bottom_mlp_layer_sizes, + quantized=binary_bottom_mlp, + ) + self.top_mlp = create_mlp( + top_mlp_layer_sizes, + quantized=binary_top_mlp, + ) + self.top_mlp[-1] = Sigmoid() + + @staticmethod + def add_argparse_arguments(parent_parser: ArgumentParser) -> None: + parser = parent_parser.add_argument_group("DLRM Model") + parser.add_argument( + "--bottom-mlp-layer-sizes", type=str, default="[512, 256, 64]", help="layer sizes of the bottom mlp" + ) + parser.add_argument( + "--top-mlp-layer-sizes", type=str, default="[512, 256, 1]", help="layer sizes of the top mlp" + ) + parser.add_argument("--embedding-dimension", type=int, default=16, help="number of embedding dimensions") + parser.add_argument( + "--interaction-operation", + choices=[Interaction_Operation_Type.CONCAT.value, Interaction_Operation_Type.PRODUCT.value], + default=Interaction_Operation_Type.PRODUCT.value, + ) + parser.add_argument("--dense-embeddings", action="store_false", help="Disable sparse embeddings") + + parser.add_argument( + "--binary-embedding", action="store_true", default=True, help="toggles use of binary embeddings in model." + ) + parser.add_argument( + "--binary-top-mlp", action="store_true", default=True, help="toggles use of binary top mlp in model." + ) + parser.add_argument( + "--binary-bottom-mlp", action="store_true", default=False, help="toggles use of binary bottom mlp in model." + ) + + def forward_embeddings( + self, categorical_values_i: torch.Tensor, categorical_values_o: torch.Tensor + ) -> List[torch.Tensor]: + """forwards the preprocessed data through the embedding layers.""" + embedding_outputs = [] + for index, embedding_layer in enumerate(self.embedding_layers): + index_group = categorical_values_i[index] + offset_group = categorical_values_o[index] + embedding_outputs.append(embedding_layer(index_group, offset_group)) + return embedding_outputs + + def feature_interaction(self, mlp_output: torch.Tensor, embedding_outputs: List[torch.Tensor]) -> torch.Tensor: + if self.interaction_operation == Interaction_Operation_Type.PRODUCT.value: + batch_size, dimension = mlp_output.shape + concated_values = torch.cat([mlp_output] + embedding_outputs, dim=1).view((batch_size, -1, dimension)) + product_matrix = torch.bmm(concated_values, torch.transpose(concated_values, 1, 2)) + _, ni, nj = product_matrix.shape + li = torch.tensor([i for i in range(ni) for j in range(i + 0)]) + lj = torch.tensor([j for i in range(nj) for j in range(i + 0)]) + flat_product_matrix = product_matrix[:, li, lj] + result = torch.cat([mlp_output, flat_product_matrix], dim=1) + elif self.interaction_operation == Interaction_Operation_Type.CONCAT.value: + result = torch.cat([mlp_output] + embedding_outputs, dim=1) + else: + raise ValueError("Interaction operation not supported!") + + return result + + def forward(self, dense_values: torch.Tensor, categorical_values: torch.Tensor) -> torch.Tensor: # type: ignore + mlp_output = self.bottom_mlp(dense_values) + embedding_outputs = self.forward_embeddings(*categorical_values) + feature_interactions = self.feature_interaction(mlp_output, embedding_outputs) + interaction_probability = self.top_mlp(feature_interactions) + + # if the top mlp has multiple output values, aggregate these into one single value + if len(interaction_probability.shape) > 1 and interaction_probability.shape[1] > 1: + interaction_probability = torch.clamp(interaction_probability, 0, 1) + interaction_probability = torch.mean(interaction_probability, dim=1) + return interaction_probability diff --git a/bitorch/models/lenet.py b/bitorch/models/lenet.py index 9f9f67f..9f09934 100644 --- a/bitorch/models/lenet.py +++ b/bitorch/models/lenet.py @@ -1,6 +1,6 @@ import argparse +from typing import Optional, List from bitorch.layers.debug_layers import ShapePrintDebug -from bitorch.datasets.base import BasicDataset from bitorch.layers import QLinear, QConv2d, QActivation from torch import nn from .base import Model @@ -12,10 +12,15 @@ class LeNet(Model): num_channels_conv = 64 activation_function = nn.Tanh num_fc = 1000 - name = "lenet" - - def generate_quant_model(self, weight_quant: str, input_quant: str, - weight_quant_2: str = None, input_quant_2: str = None) -> nn.Sequential: + name = "LeNet" + + def generate_quant_model( + self, + weight_quant: str, + input_quant: str, + weight_quant_2: Optional[str] = None, + input_quant_2: Optional[str] = None, + ) -> nn.Sequential: weight_quant_2 = weight_quant_2 or weight_quant input_quant_2 = input_quant_2 or input_quant @@ -24,38 +29,44 @@ def generate_quant_model(self, weight_quant: str, input_quant: str, self.activation_function(), nn.MaxPool2d(2, 2), nn.BatchNorm2d(self.num_channels_conv), - QConv2d( self.num_channels_conv, self.num_channels_conv, kernel_size=5, input_quantization=input_quant, - weight_quantization=weight_quant), + weight_quantization=weight_quant, + ), nn.BatchNorm2d(self.num_channels_conv), nn.MaxPool2d(2, 2), ShapePrintDebug(), - nn.Flatten(), - QActivation(activation=input_quant_2), - QLinear(self.num_channels_conv * 4 * 4, - self.num_fc, weight_quantization=weight_quant_2), + QLinear( + self.num_channels_conv * 4 * 4, + self.num_fc, + weight_quantization=weight_quant_2, + ), nn.BatchNorm1d(self.num_fc), self.activation_function(), - nn.Linear(self.num_fc, self.num_output), ) return model - def __init__(self, dataset: BasicDataset, lenet_version: int = 0) -> None: - """builds the model, depending on mode in either quantized or full_precision mode + def __init__(self, input_shape: List[int], num_classes: int = 0, lenet_version: int = 0) -> None: + """builds the model depending on mode in either quantized or full_precision mode Args: - lenet_quantized (bool, optional): toggles use of quantized version of lenet. Default is False. + input_shape (List[int]): input shape of images + num_classes (int, optional): number of output classes. Defaults to None. + lenet_version (int, optional): lenet version. if version outside of [0, 3], the full precision version is used. Defaults to 0. + + Raises: + ValueError: thrown if num classes is none """ - super(LeNet, self).__init__(dataset) - self.input_channels = dataset.shape[1] - self.num_output = dataset.num_classes + super(LeNet, self).__init__(input_shape, num_classes) + self.input_channels = self._input_shape[1] + self.num_output = self._num_classes + if lenet_version == 0: self._model = self.generate_quant_model("sign", "sign") elif lenet_version == 1: @@ -70,22 +81,17 @@ def __init__(self, dataset: BasicDataset, lenet_version: int = 0) -> None: nn.BatchNorm2d(self.num_channels_conv), self.activation_function(), nn.MaxPool2d(2, 2), - nn.Conv2d(self.num_channels_conv, self.num_channels_conv, kernel_size=5), nn.BatchNorm2d(self.num_channels_conv), self.activation_function(), nn.MaxPool2d(2, 2), - nn.Flatten(), - nn.Linear(self.num_channels_conv * 4 * 4, self.num_fc), nn.BatchNorm1d(self.num_fc), self.activation_function(), - nn.Linear(self.num_fc, self.num_output), ) @staticmethod def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - parser.add_argument("--lenet-version", type=int, default=0, - help="choses a verion of lenet") + parser.add_argument("--version", type=int, default=0, help="choose a version of lenet") diff --git a/bitorch/models/meliusnet.py b/bitorch/models/meliusnet.py new file mode 100644 index 0000000..cc163ea --- /dev/null +++ b/bitorch/models/meliusnet.py @@ -0,0 +1,246 @@ +import argparse +import logging +from typing import Optional, List, Any + +import torch +from torch import nn +from torch.nn import Module + +from .densenet import BaseNetDense, DOWNSAMPLE_STRUCT, basedensenet_constructor +from .base import Model, NoArgparseArgsMixin +from bitorch.layers import QConv2d + + +# Blocks +class ImprovementBlock(Module): + """ImprovementBlock which improves the last n channels""" + + def __init__(self, channels: int, in_channels: int, dilation: int = 1): + super(ImprovementBlock, self).__init__() + self.body_layers: List[Module] = [] + self.body_layers.append(nn.BatchNorm2d(in_channels)) + self.body_layers.append( + QConv2d(in_channels, channels, kernel_size=3, stride=1, padding=dilation, dilation=dilation) + ) + + self.use_sliced_addition = channels != in_channels + if self.use_sliced_addition: + assert channels < in_channels + self.slices = [0, in_channels - channels, in_channels] + self.slices_add_x = [False, True] + self.body = nn.Sequential(*self.body_layers) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + residual = x + x = self.body(x) + if not self.use_sliced_addition: + return x + residual + + parts = [] + for add_x, slice_begin, slice_end in zip(self.slices_add_x, self.slices[:-1], self.slices[1:]): + length = slice_end - slice_begin + if length == 0: + continue + result = torch.narrow(residual, dim=1, start=slice_begin, length=length) + if add_x: + result = result + x + parts.append(result) + return torch.cat(parts, dim=1) + + +class _MeliusNet(BaseNetDense): + def _add_base_block_structure(self, layer_num: int, dilation: int) -> None: + self._add_dense_layer(layer_num, dilation) + self.current_dense_block.add_module( + "ImprovementBlock%d" % (layer_num + 1), + ImprovementBlock(self.growth_rate, self.num_features, dilation=dilation), + ) + + +class MeliusNet(Model): + name = "MeliusNet" + + meliusnet_spec = { + # name: block_config, reduction_factors, downsampling + None: (None, [1 / 2, 1 / 2, 1 / 2], DOWNSAMPLE_STRUCT), + "23": ([2, 4, 6, 6], [128 / 192, 192 / 384, 288 / 576], DOWNSAMPLE_STRUCT.replace("fp_conv", "cs,fp_conv:8")), + "22": ([4, 5, 4, 4], [160 / 320, 224 / 480, 256 / 480], DOWNSAMPLE_STRUCT), + "29": ([4, 6, 8, 6], [128 / 320, 192 / 512, 256 / 704], DOWNSAMPLE_STRUCT), + "42": ([5, 8, 14, 10], [160 / 384, 256 / 672, 416 / 1152], DOWNSAMPLE_STRUCT), + "59": ([6, 12, 24, 12], [192 / 448, 320 / 960, 544 / 1856], DOWNSAMPLE_STRUCT), + "a": ([4, 5, 5, 6], [160 / 320, 256 / 480, 288 / 576], DOWNSAMPLE_STRUCT.replace("fp_conv", "cs,fp_conv:4")), + "b": ([4, 6, 8, 6], [160 / 320, 224 / 544, 320 / 736], DOWNSAMPLE_STRUCT.replace("fp_conv", "cs,fp_conv:2")), + "c": ([3, 5, 10, 6], [128 / 256, 192 / 448, 288 / 832], DOWNSAMPLE_STRUCT.replace("fp_conv", "cs,fp_conv:4")), + } + + def __init__( + self, + num_layers: Optional[str], + input_shape: List[int], + num_classes: int = 0, + num_init_features: int = 64, + growth_rate: int = 64, + bn_size: int = 0, + dropout: float = 0, + dilated: bool = False, + flex_block_config: Optional[List[int]] = None, + ) -> None: + super(MeliusNet, self).__init__(input_shape, num_classes) + self._model = basedensenet_constructor( + self.meliusnet_spec, + _MeliusNet, + num_layers, + num_init_features, + growth_rate, + bn_size, + dropout, + dilated, + flex_block_config, + self._num_classes, + self._input_shape[-2:], + self._input_shape[1], + ) + logging.info(f"building MeliusNet with {str(num_layers)} layers...") + + @staticmethod + def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--num-layers", + type=str, + choices=[None, "22", "23", "29", "42", "59", "a", "b", "c"], + required=True, + help="number of layers to be used inside meliusnet", + ) + parser.add_argument( + "--reduction", + type=str, + required=False, + help="divide channels by this number in transition blocks", + ) + parser.add_argument( + "--growth-rate", + type=int, + required=False, + help="add this many features each block", + ) + parser.add_argument( + "--init-features", + type=int, + required=False, + help="start with this many filters in the first layer", + ) + parser.add_argument( + "--downsample-structure", + type=str, + required=False, + help="layers in downsampling branch (available: bn,relu,conv,fp_conv,pool,max_pool)", + ) + + +class MeliusNetFlex(MeliusNet): + """MeliusNet-Flex model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNetFlex" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNetFlex, self).__init__(None, *args, **kwargs) + + @staticmethod + def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: + MeliusNet.add_argparse_arguments(parser) + parser.add_argument( + "--block-config", + type=str, + required=True, + help="how many blocks to use in a flex model", + ) + + +class MeliusNet22(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-22 model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNet22" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNet22, self).__init__("22", *args, **kwargs) + + +class MeliusNet23(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-23 model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNet23" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNet23, self).__init__("23", *args, **kwargs) + + +class MeliusNet29(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-29 model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNet29" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNet29, self).__init__("29", *args, **kwargs) + + +class MeliusNet42(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-42 model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNet42" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNet42, self).__init__("42", *args, **kwargs) + + +class MeliusNet59(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-59 model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNet59" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNet59, self).__init__("59", *args, **kwargs) + + +class MeliusNetA(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-A model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNetA" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNetA, self).__init__("a", *args, **kwargs) + + +class MeliusNetB(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-B model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNetB" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNetB, self).__init__("b", *args, **kwargs) + + +class MeliusNetC(NoArgparseArgsMixin, MeliusNet): + """MeliusNet-C model from `"MeliusNet: Can Binary Neural Networks Achieve MobileNet-level Accuracy?" + ` paper. + """ + + name = "MeliusNetC" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(MeliusNetC, self).__init__("c", *args, **kwargs) diff --git a/bitorch/models/model_hub.py b/bitorch/models/model_hub.py new file mode 100644 index 0000000..10293d2 --- /dev/null +++ b/bitorch/models/model_hub.py @@ -0,0 +1,164 @@ +from pathlib import Path +from typing import Dict, Any, Union, Tuple +import numbers +import pandas +import logging +import warnings +import torch +import base64 +import hashlib +from torchvision.datasets.utils import download_url + + +def _md5_hash_file(path: Path) -> Any: + hash_md5 = hashlib.md5() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(64 * 1024), b""): + hash_md5.update(chunk) + return hash_md5 + + +def _digest_file(path: Union[Path, str]) -> str: + return base64.b64encode(_md5_hash_file(Path(path)).digest()).decode("ascii") + + +def convert_dtypes(data: dict) -> dict: + """converts types of the values of dict so that they can be easily compared accross + dataframes and csvs. converts all values that are not numerical to string. + + Args: + data (dict): dict with values to be converted + + Returns: + dict: dict with converted values + """ + for key, value in data.items(): + if isinstance(value, list): + value = tuple(value) + if not isinstance(value, numbers.Number) and not isinstance(value, bool): + data[key] = str(value) + return data + + +def get_matching_row(version_table: pandas.DataFrame, model_kwargs: dict) -> pandas.DataFrame: + """searches the version table dataframe for a row that matches model kwargs + + Args: + version_table (pandas.DataFrame): the dataframe to search in + model_kwargs (dict): the dict to search for. does not have to have key-value-pairs of each + column of version_table, i.e. can be subset + + Returns: + pandas.DataFrame: row with values in model_kwargs.keys() columns that are equal to model_kwargs values. + if not existent, returns an empty dataframe. + """ + model_kwargs = convert_dtypes(model_kwargs) + with warnings.catch_warnings(): + model_kwargs_series = pandas.Series(model_kwargs) + existing_row = version_table[(version_table[model_kwargs.keys()] == model_kwargs_series).all(1)] + if existing_row.empty: + return None + return existing_row + + +def get_model_path(version_table: pandas.DataFrame, model_kwargs: dict) -> Tuple[str, str]: + """finds the matching row for model_kwargs in version table and path to model artifact for given configuration + + Args: + version_table (pandas.DataFrame): version table with model configurations and corresponding model hub versions + model_kwargs (dict): model configuration to search for + + Raises: + RuntimeError: thrown if no matching model can be found in version table + + Returns: + str: path to matching model hub artifact + """ + matching_row = get_matching_row(version_table, model_kwargs) + if matching_row is None: + raise RuntimeError( + f"No matching model found in hub with configuration: {model_kwargs}! You can train" + " it yourself or try to load it from a local checkpoint!" + ) + model_url = matching_row["model_hub_url"][0] + model_digest = matching_row["model_digest"][0] + return model_url, model_digest + + +def load_from_hub( + model_version_table_path: str, download_path: str = "bitorch_models", **model_kwargs: str +) -> torch.Tensor: + """loads the model that matches the requested model configuration in model_kwargs from the model hub. + + Args: + model_version_table_path (str): path to model version table on model hub + download_path (str, optional): path to store the downloaded files. Defaults to "/tmp". + + Returns: + torch.Tensor: state dict of downloaded model file + """ + Path(download_path).mkdir(parents=True, exist_ok=True) + + version_table = download_version_table(model_version_table_path) + model_path, model_digest = get_model_path(version_table, model_kwargs) + model_checksum = model_path.split("/")[-1] + model_local_path = Path(f"{download_path}/{model_checksum}") + + if not model_local_path.exists() or _digest_file(str(model_local_path)) != model_digest: + logging.info("downloading model...") + download_url(model_path, model_local_path.parent, model_local_path.name, model_checksum) + logging.info("Model downloaded!") + else: + logging.info(f"Using already downloaded model at {model_local_path}") + artifact = torch.load(model_local_path, map_location="cpu") + + # true if artifact is a checkpoint from pytorch lightning + if isinstance(artifact, dict): + return lightning_checkpoint_to_state_dict(artifact) # type: ignore + return artifact + + +def lightning_checkpoint_to_state_dict(artifact: Dict[Any, Any]) -> Dict[Any, Any]: + """converts a pytorch lightning checkpoint to a normal torch state dict + + Args: + artifact (Dict[Any, Any]): dict containing a ['state_dict'] attribute + + Returns: + Dict[Any, Any]: state dict for model + """ + state_dict = artifact["state_dict"] + + for key in state_dict.keys(): + assert key.startswith("model."), f"Unexpected malformed static dict key {key}." + + # turns model._model.arg keys in state dict into _model.arg + extracted_state_dict = {key[6:]: value for key, value in state_dict.items()} + return extracted_state_dict + + +def download_version_table(model_table_path: str, no_exception: bool = False) -> pandas.DataFrame: + """downloads the newest version table from model hub. + + Args: + model_table_path (str): path on hub to model version table + api (wandb.Api): api to make download request with + no_exception (bool, optional): weather exception shall be thrown if received version table is empty. Defaults to False. + + Raises: + Exception: thrown if received version table is empty / cannot be downloaded and no_exception is False + + Returns: + pandas.DataFrame: model version table + """ + logging.info("downloading model version table from hub...") + try: + download_url(model_table_path, "/tmp", "bitorch_model_version_table.csv") + version_table = pandas.read_csv("/tmp/bitorch_model_version_table.csv") + except Exception as e: + logging.info(f"could not retrieve model version table from {model_table_path}: {e}") + if no_exception: + logging.info("creating empty table...") + return pandas.DataFrame() + raise Exception(e) + return version_table diff --git a/bitorch/models/quicknet.py b/bitorch/models/quicknet.py new file mode 100644 index 0000000..600a8a3 --- /dev/null +++ b/bitorch/models/quicknet.py @@ -0,0 +1,187 @@ +import logging +from typing import Any, List, Optional + +import torch +from torch import nn +from torch.nn import Module +import numpy as np + +from .base import Model, NoArgparseArgsMixin +from bitorch.layers import QConv2d, PadModule +from bitorch.models.common_layers import get_initial_layers + + +class ResidualBlock(nn.Sequential): + def __init__(self, in_channels: int, out_channels: int) -> None: + super().__init__() + self.add_module( + "qconv", + QConv2d( + in_channels, + out_channels, + kernel_size=3, + pad_value=1, + padding="same", + bias=False, + ), + ) + self.add_module("relu", nn.ReLU()) + self.add_module("bnorm", nn.BatchNorm2d(out_channels, momentum=0.9)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return super().forward(x) + x + + +class TransitionBlock(nn.Sequential): + def __init__(self, in_channels: int, out_channels: int, strides: int) -> None: + super().__init__() + self.add_module("relu", nn.ReLU()) + self.add_module("pool", nn.MaxPool2d(strides, stride=1)) + self.add_module("pad", PadModule(1, 1, 1, 1)) + self.add_module( + "depth_conv", + nn.Conv2d( + in_channels, + in_channels, + kernel_size=3, + groups=in_channels, + stride=strides, + bias=False, + ).requires_grad_(False), + ) + self.add_module("conv", nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)) + self.add_module("relu2", nn.ReLU()) + self.add_module("norm", nn.BatchNorm2d(out_channels, momentum=0.9)) + + +class QuickNet(Model): + """QuickNet model from `"Larq Compute Engine: Design, Benchmark, and Deploy State-of-the-Art Binarized Neural Networks" + `_ paper. + """ + + name = "QuickNet" + + def __init__( + self, + input_shape: List[int], + section_filters: Optional[List[int]] = None, + section_blocks: Optional[List[int]] = None, + num_classes: int = 0, + ) -> None: + super(QuickNet, self).__init__(input_shape, num_classes) + if section_filters is None: + section_filters = [64, 128, 256, 512] + if section_blocks is None: + section_blocks = [4, 4, 4, 4] + self.image_channels = self._input_shape[1] + self.num_classes = num_classes + self.section_filters = section_filters + self.section_blocks = section_blocks + self._model = self._build_model() + logging.info("building Quicknet") + + self._model.stem.apply(self._initialize_stem) # type: ignore + self._model.body.apply(self._initialize_body_top) # type: ignore + self._model.top.apply(self._initialize_body_top) # type: ignore + + def _blurpool_init(self, weight: torch.Tensor) -> None: + """Initialize anti-alias low_pass filter. + See the `"Making Convolutional Networks Shift-Invariant Again" `_ paper. + """ + filters, kernel_size = weight.data.shape[0], weight.data.shape[2] + + if kernel_size == 2: + base = np.array([1.0, 1.0]) + elif kernel_size == 3: + base = np.array([1.0, 2.0, 1.0]) + elif kernel_size == 5: + base = np.array([1.0, 4.0, 6.0, 4.0, 1.0]) + else: + raise ValueError("filter size should be in 2, 3, 5") + + new_weights = torch.Tensor(base[:, None] * base[None, :]) + new_weights = new_weights / torch.sum(new_weights) + new_weights = new_weights[None, None, :, :].repeat((filters, 1, 1, 1)) + weight.data = new_weights + + def _initialize_stem(self, layer: Module) -> None: + if isinstance(layer, nn.Conv2d): + if layer.groups == 1: + nn.init.kaiming_normal_(layer.weight) + else: + nn.init.xavier_uniform_(layer.weight) + + def _initialize_body_top(self, layer: Module) -> None: + if isinstance(layer, (nn.Conv2d, nn.Linear)): + if isinstance(layer, nn.Linear) or layer.groups == 1: + nn.init.xavier_normal_(layer.weight) + else: + self._blurpool_init(layer.weight) + + def _build_model(self) -> nn.Sequential: + model = nn.Sequential() + model.add_module( + "stem", + nn.Sequential(*get_initial_layers("quicknet_stem", self.image_channels, self.section_filters[0])), + ) + body = nn.Sequential() + for block_num, (layers, filters) in enumerate(zip(self.section_blocks, self.section_filters)): + residual_blocks: List[Module] = [] + for layer in range(layers): + residual_blocks.append(ResidualBlock(filters, filters)) + body.add_module( + "ResidualBlocks_%d" % (block_num + 1), + nn.Sequential(*residual_blocks), + ) + if block_num != len(self.section_blocks) - 1: + body.add_module( + "Transition_%d" % (block_num + 1), TransitionBlock(filters, self.section_filters[block_num + 1], 2) + ) + model.add_module( + "body", + body, + ) + model.add_module( + "top", + nn.Sequential( + nn.ReLU(), + nn.AdaptiveAvgPool2d(1), + nn.Flatten(), + nn.Linear(self.section_filters[-1], self.num_classes), + ), + ) + return model + + def clip_weights(self, layer: Module, clip_value: float = 1.25) -> None: + """Clips weights in quantized convolution layer in Residual Blocks""" + if isinstance(layer, ResidualBlock): + weights = layer.qconv.weight.data # type: ignore + weights = weights.clamp(-clip_value, clip_value) # type: ignore + layer.qconv.weight.data = weights # type: ignore + + def on_train_batch_end(self, layer: Module) -> None: + self.clip_weights(layer) + + +class QuickNetSmall(NoArgparseArgsMixin, QuickNet): + """QuickNetSmall model from `"Larq Compute Engine: Design, Benchmark, and Deploy State-of-the-Art Binarized Neural Networks" + `_ paper. + """ + + name = "QuickNetSmall" + section_filters = [32, 64, 256, 512] + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(QuickNetSmall, self).__init__(section_filters=self.section_filters, *args, **kwargs) + + +class QuickNetLarge(NoArgparseArgsMixin, QuickNet): + """QuickNetLarge model from `"Larq Compute Engine: Design, Benchmark, and Deploy State-of-the-Art Binarized Neural Networks" + `_ paper. + """ + + name = "QuickNetLarge" + section_blocks = [6, 8, 12, 6] + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super(QuickNetLarge, self).__init__(section_blocks=self.section_blocks, *args, **kwargs) diff --git a/bitorch/models/resnet.py b/bitorch/models/resnet.py index bb246f6..b570a6b 100644 --- a/bitorch/models/resnet.py +++ b/bitorch/models/resnet.py @@ -1,6 +1,5 @@ -from bitorch.datasets.base import BasicDataset -from .base import Model -from typing import List, Any +from .base import Model, NoArgparseArgsMixin +from typing import Optional, List, Any from bitorch.layers import QConv2d_NoAct import torch import argparse @@ -44,7 +43,13 @@ def _build_downsampling(self) -> nn.Sequential: nn.Sequential: the downsampling model """ return nn.Sequential( - QConv2d(self.in_channels, self.out_channels, kernel_size=1, stride=self.stride, padding=0), + QConv2d( + self.in_channels, + self.out_channels, + kernel_size=1, + stride=self.stride, + padding=0, + ), nn.BatchNorm2d(self.out_channels), ) @@ -56,7 +61,13 @@ def _build_body(self) -> nn.Sequential: nn.Sequential: the basic building block body model """ return nn.Sequential( - QConv2d(self.in_channels, self.out_channels, kernel_size=3, stride=self.stride, padding=1), + QConv2d( + self.in_channels, + self.out_channels, + kernel_size=3, + stride=self.stride, + padding=1, + ), nn.BatchNorm2d(self.out_channels), QConv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(self.out_channels), @@ -111,8 +122,14 @@ def _build_downsampling(self) -> nn.Sequential: nn.Sequential: the downsampling model """ return nn.Sequential( - QConv2d_NoAct(self.in_channels, self.out_channels, kernel_size=1, - stride=self.stride, padding=0, bias=False), + QConv2d_NoAct( + self.in_channels, + self.out_channels, + kernel_size=1, + stride=self.stride, + padding=0, + bias=False, + ), nn.BatchNorm2d(self.out_channels), ) @@ -123,14 +140,25 @@ def _build_body(self) -> nn.Sequential: nn.Sequential: the bottleneck body model """ return nn.Sequential( - QConv2d_NoAct(self.in_channels, self.out_channels // 4, kernel_size=1, stride=self.stride), + QConv2d_NoAct( + self.in_channels, + self.out_channels // 4, + kernel_size=1, + stride=self.stride, + ), nn.BatchNorm2d(self.out_channels // 4), nn.ReLU(), - QConv2d_NoAct(self.out_channels // 4, self.out_channels // 4, kernel_size=3, stride=1, padding=1), + QConv2d_NoAct( + self.out_channels // 4, + self.out_channels // 4, + kernel_size=3, + stride=1, + padding=1, + ), nn.BatchNorm2d(self.out_channels // 4), nn.ReLU(), QConv2d_NoAct(self.out_channels // 4, self.out_channels, kernel_size=1, stride=1), - nn.BatchNorm2d(self.out_channels) + nn.BatchNorm2d(self.out_channels), ) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -182,7 +210,13 @@ def _build_downsampling(self) -> nn.Module: Returns: QConv2d: the downsampling convolution layer """ - return QConv2d(self.in_channels, self.out_channels, kernel_size=1, stride=self.stride, padding=0) + return QConv2d( + self.in_channels, + self.out_channels, + kernel_size=1, + stride=self.stride, + padding=0, + ) def _build_body(self) -> nn.Sequential: """builds body of building block. Check referenced paper for more details. @@ -191,7 +225,13 @@ def _build_body(self) -> nn.Sequential: nn.Sequential: the bottleneck body model """ return nn.Sequential( - QConv2d(self.in_channels, self.out_channels, kernel_size=3, stride=self.stride, padding=1), + QConv2d( + self.in_channels, + self.out_channels, + kernel_size=3, + stride=self.stride, + padding=1, + ), nn.BatchNorm2d(self.out_channels), QConv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1), ) @@ -241,7 +281,13 @@ def _build_downsampling(self) -> nn.Module: Returns: QConv2d: the downsampling convolution layer """ - return QConv2d_NoAct(self.in_channels, self.out_channels, kernel_size=1, stride=self.stride, bias=False) + return QConv2d_NoAct( + self.in_channels, + self.out_channels, + kernel_size=1, + stride=self.stride, + bias=False, + ) def _build_body(self) -> nn.Sequential: """builds body of building block. Check referenced paper for more details. @@ -250,10 +296,21 @@ def _build_body(self) -> nn.Sequential: nn.Sequential: the bottleneck body model """ return nn.Sequential( - QConv2d_NoAct(self.in_channels, self.out_channels // 4, kernel_size=1, stride=self.stride), + QConv2d_NoAct( + self.in_channels, + self.out_channels // 4, + kernel_size=1, + stride=self.stride, + ), nn.BatchNorm2d(self.out_channels // 4), nn.ReLU(), - QConv2d_NoAct(self.out_channels // 4, self.out_channels // 4, kernel_size=3, stride=1, padding=1), + QConv2d_NoAct( + self.out_channels // 4, + self.out_channels // 4, + kernel_size=3, + stride=1, + padding=1, + ), nn.BatchNorm2d(self.out_channels // 4), nn.ReLU(), QConv2d_NoAct(self.out_channels // 4, self.out_channels, kernel_size=1, stride=1), @@ -291,7 +348,14 @@ def __init__(self, classes: int, channels: list) -> None: self.features = nn.Sequential() self.output_layer = nn.Linear(channels[-1], classes) - def make_layer(self, block: Module, layers: int, in_channels: int, out_channels: int, stride: int) -> nn.Sequential: + def make_layer( + self, + block: Module, + layers: int, + in_channels: int, + out_channels: int, + stride: int, + ) -> nn.Sequential: """builds a layer by stacking blocks in a sequential models. Args: @@ -348,13 +412,14 @@ class ResNetV1(SpecificResnet): """ def __init__( - self, - block: Module, - layers: list, - channels: list, - classes: int, - initial_layers: str = "imagenet", - image_channels: int = 3) -> None: + self, + block: Module, + layers: list, + channels: list, + classes: int, + image_resolution: Optional[List[int]] = None, + image_channels: int = 3, + ) -> None: """Creates ResNetV1 model. Args: @@ -363,8 +428,8 @@ def __init__( channels (list): channel num used for input/output channel size of layers. there must always be one more channels than there are layers. classes (int): number of output classes - initial_layers (str, optional): name of set for initial layers. refer to common_layers.py. - Defaults to "imagenet". + image_resolution (List[int], optional): resolution of input image. refer to common_layers.py. + Defaults to None. image_channels (int, optional): input channels of images. Defaults to 3. Raises: @@ -373,11 +438,12 @@ def __init__( super(ResNetV1, self).__init__(classes, channels) if len(channels) != (len(layers) + 1): raise ValueError( - f"the len of channels ({len(channels)}) must be exactly the len of layers ({len(layers)}) + 1!") + f"the len of channels ({len(channels)}) must be exactly the len of layers ({len(layers)}) + 1!" + ) feature_layers: List[nn.Module] = [] feature_layers.append(nn.BatchNorm2d(image_channels)) - feature_layers.extend(get_initial_layers(initial_layers, image_channels, channels[0])) + feature_layers.extend(get_initial_layers(image_resolution, image_channels, channels[0])) feature_layers.append(nn.BatchNorm2d(channels[0])) feature_layers.extend(self.make_feature_layers(block, layers, channels)) @@ -396,13 +462,14 @@ class ResNetV2(SpecificResnet): """ def __init__( - self, - block: Module, - layers: list, - channels: list, - classes: int = 1000, - initial_layers: str = "imagenet", - image_channels: int = 3) -> None: + self, + block: Module, + layers: list, + channels: list, + classes: int = 1000, + image_resolution: Optional[List[int]] = None, + image_channels: int = 3, + ) -> None: """Creates ResNetV2 model. Args: @@ -411,8 +478,8 @@ def __init__( channels (list): channel num used for input/output channel size of layers. there must always be one more channels than there are layers. classes (int): number of output classes - initial_layers (str, optional): name of set for initial layers. refer to common_layers.py. - Defaults to "imagenet". + image_resolution (List[int], optional): resolution of input image. refer to common_layers.py. + Defaults to None. image_channels (int, optional): input channels of images. Defaults to 3. Raises: @@ -421,11 +488,12 @@ def __init__( super(ResNetV2, self).__init__(classes, channels) if len(channels) != (len(layers) + 1): raise ValueError( - f"the len of channels ({len(channels)}) must be exactly the len of layers ({len(layers)}) + 1!") + f"the len of channels ({len(channels)}) must be exactly the len of layers ({len(layers)}) + 1!" + ) feature_layers: List[nn.Module] = [] feature_layers.append(nn.BatchNorm2d(image_channels)) - feature_layers.extend(get_initial_layers(initial_layers, image_channels, channels[0])) + feature_layers.extend(get_initial_layers(image_resolution, image_channels, channels[0])) feature_layers.extend(self.make_feature_layers(block, layers, channels)) @@ -444,41 +512,38 @@ def __init__( class Resnet(Model): - name = "resnet" + name = "Resnet" - resnet_spec = {18: ('basic_block', [2, 2, 2, 2], [64, 64, 128, 256, 512]), - 34: ('basic_block', [3, 4, 6, 3], [64, 64, 128, 256, 512]), - 50: ('bottle_neck', [3, 4, 6, 3], [64, 256, 512, 1024, 2048]), - 101: ('bottle_neck', [3, 4, 23, 3], [64, 256, 512, 1024, 2048]), - 152: ('bottle_neck', [3, 8, 36, 3], [64, 256, 512, 1024, 2048])} + resnet_spec = { + 18: ("basic_block", [2, 2, 2, 2], [64, 64, 128, 256, 512]), + 34: ("basic_block", [3, 4, 6, 3], [64, 64, 128, 256, 512]), + 50: ("bottle_neck", [3, 4, 6, 3], [64, 256, 512, 1024, 2048]), + 101: ("bottle_neck", [3, 4, 23, 3], [64, 256, 512, 1024, 2048]), + 152: ("bottle_neck", [3, 8, 36, 3], [64, 256, 512, 1024, 2048]), + } resnet_net_versions = [ResNetV1, ResNetV2] - resnet_block_versions = [{'basic_block': BasicBlockV1, 'bottle_neck': BottleneckV1}, - {'basic_block': BasicBlockV2, 'bottle_neck': BottleneckV2}] + resnet_block_versions = [ + {"basic_block": BasicBlockV1, "bottle_neck": BottleneckV1}, + {"basic_block": BasicBlockV2, "bottle_neck": BottleneckV2}, + ] def __init__( - self, - resnet_version: int, - resnet_num_layers: int, - dataset: BasicDataset) -> None: - super(Resnet, self).__init__(dataset) - self._model = self.create_resnet(resnet_version, resnet_num_layers, - self._dataset.num_classes, self._dataset.name, self._dataset.shape[1]) + self, + resnet_version: int, + resnet_num_layers: int, + input_shape: List[int], + num_classes: int = 0, + ) -> None: + super(Resnet, self).__init__(input_shape, num_classes) + self._model = self.create_resnet(resnet_version, resnet_num_layers) logging.info(f"building Resnetv{str(resnet_version)} with {str(resnet_num_layers)} layers...") - def create_resnet(self, - version: int, - num_layers: int, - classes: int = 1000, - initial_layers: str = "imagenet", - image_channels: int = 3) -> Module: + def create_resnet(self, version: int, num_layers: int) -> Module: """Creates a resnet complying to given version and layer number. Args: version (int): version of resnet to be used. availavle versions are 1 or 2 num_layers (int): number of layers to be build. - classes (int, optional): number of output classes. Defaults to 1000. - initial_layers (str, optional): name of set of initial layers to be used. Defaults to "imagenet". - image_channels (int, optional): number of channels of input images. Defaults to 3. Raises: ValueError: raised if no resnet specification for given num_layers is listed in the resnet_spec dict above @@ -492,133 +557,114 @@ def create_resnet(self, if version not in [1, 2]: raise ValueError(f"invalid resnet version {version}, only 1 or 2 allowed") + image_channels = self._input_shape[1] + image_resolution = self._input_shape[-2:] block_type, layers, channels = self.resnet_spec[num_layers] resnet = self.resnet_net_versions[version - 1] block = self.resnet_block_versions[version - 1][block_type] - return resnet(block, layers, channels, classes, initial_layers, image_channels) + return resnet(block, layers, channels, self._num_classes, image_resolution, image_channels) @staticmethod def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - parser.add_argument("--resnet-version", type=int, choices=[1, 2], required=True, - help="version of resnet to be used") - parser.add_argument("--resnet-num-layers", type=int, choices=[18, 34, 50, 152], required=True, - help="number of layers to be used inside resnet") + parser.add_argument( + "--version", + type=int, + choices=[1, 2], + required=True, + help="version of resnet to be used", + ) + parser.add_argument( + "--num-layers", + type=int, + choices=[18, 34, 50, 152], + required=True, + help="number of layers to be used inside resnet", + ) -class Resnet18V1(Resnet): +class Resnet18V1(NoArgparseArgsMixin, Resnet): """ResNet-18 V1 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet18v1" + name = "Resnet18V1" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet18V1, self).__init__(1, 18, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class Resnet34V1(Resnet): +class Resnet34V1(NoArgparseArgsMixin, Resnet): """ResNet-34 V1 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet34v1" + + name = "Resnet34V1" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet34V1, self).__init__(1, 34, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class Resnet50V1(Resnet): +class Resnet50V1(NoArgparseArgsMixin, Resnet): """ResNet-50 V1 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet50v1" + name = "Resnet50V1" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet50V1, self).__init__(1, 50, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class Resnet152V1(Resnet): +class Resnet152V1(NoArgparseArgsMixin, Resnet): """ResNet-152 V1 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet152v1" + name = "Resnet152V1" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet152V1, self).__init__(1, 152, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class Resnet18V2(Resnet): +class Resnet18V2(NoArgparseArgsMixin, Resnet): """ResNet-18 V2 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet18v2" + name = "Resnet18V2" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet18V2, self).__init__(2, 18, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class Resnet34V2(Resnet): +class Resnet34V2(NoArgparseArgsMixin, Resnet): """ResNet-34 V2 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet34v2" + name = "Resnet34V2" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet34V2, self).__init__(2, 34, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class Resnet50V2(Resnet): +class Resnet50V2(NoArgparseArgsMixin, Resnet): """ResNet-50 V2 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet50v2" + name = "Resnet50V2" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet50V2, self).__init__(2, 50, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class Resnet152V2(Resnet): +class Resnet152V2(NoArgparseArgsMixin, Resnet): """ResNet-152 V2 model from `"Deep Residual Learning for Image Recognition" `_ paper. """ - name = "resnet152v2" + name = "Resnet152V2" def __init__(self, *args: Any, **kwargs: Any) -> None: super(Resnet152V2, self).__init__(2, 152, *args, **kwargs) - - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass diff --git a/bitorch/models/resnet_e.py b/bitorch/models/resnet_e.py index 9a0b429..32c1722 100644 --- a/bitorch/models/resnet_e.py +++ b/bitorch/models/resnet_e.py @@ -2,9 +2,8 @@ Resnet_E implementation from `"Back to Simplicity: How to Train Accurate BNNs from Scratch?" `_ paper. """ -from bitorch.datasets.base import BasicDataset -from .base import Model -from typing import List, Any +from .base import Model, NoArgparseArgsMixin +from typing import Optional, List, Any import torch import argparse from torch import nn @@ -13,7 +12,7 @@ from bitorch.layers import QConv2d from bitorch.models.common_layers import get_initial_layers -__all__ = ['ResnetE34', 'ResnetE18', 'ResnetE'] +__all__ = ["ResnetE34", "ResnetE18", "ResnetE"] class BasicBlock(nn.Module): @@ -49,7 +48,14 @@ def _build_downsampling(self) -> nn.Sequential: """ return nn.Sequential( nn.AvgPool2d(kernel_size=2, stride=self.stride), - nn.Conv2d(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, bias=False), + nn.Conv2d( + self.in_channels, + self.out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False, + ), nn.BatchNorm2d(self.out_channels, momentum=0.9), ) @@ -61,8 +67,14 @@ def _build_body(self) -> nn.Sequential: nn.Sequential: the basic building block body model """ return nn.Sequential( - QConv2d(self.in_channels, self.out_channels, kernel_size=3, stride=self.stride, padding=1, bias=False, - input_quantization="sign", weight_quantization="sign"), + QConv2d( + self.in_channels, + self.out_channels, + kernel_size=3, + stride=self.stride, + padding=1, + bias=False, + ), nn.BatchNorm2d(self.out_channels, momentum=0.9), ) @@ -109,7 +121,6 @@ def make_layer(self, layers: int, in_channels: int, out_channels: int, stride: i Returns: nn.Sequential: the model containing the building blocks """ - # this tricks adds shortcut connections between original resnet blocks # we multiple number of blocks by 2, but add only one layer instead of two in each block layers = layers * 2 @@ -157,12 +168,13 @@ class _ResnetE(SpecificResnetE): """ def __init__( - self, - layers: list, - channels: list, - classes: int, - initial_layers: str = "imagenet", - image_channels: int = 3) -> None: + self, + layers: list, + channels: list, + classes: int, + image_resolution: Optional[List[int]] = None, + image_channels: int = 3, + ) -> None: """Creates ResNetE model. Args: @@ -170,8 +182,8 @@ def __init__( channels (list): channel num used for input/output channel size of layers. there must always be one more channels than there are layers. classes (int): number of output classes - initial_layers (str, optional): name of set for initial layers. refer to common_layers.py. - Defaults to "imagenet". + image_resolution (List[int], optional): resolution of input image. refer to common_layers.py. + Defaults to None. image_channels (int, optional): input channels of images. Defaults to 3. Raises: @@ -180,11 +192,12 @@ def __init__( super(_ResnetE, self).__init__(classes, channels) if len(channels) != (len(layers) + 1): raise ValueError( - f"the len of channels ({len(channels)}) must be exactly the len of layers ({len(layers)}) + 1!") + f"the len of channels ({len(channels)}) must be exactly the len of layers ({len(layers)}) + 1!" + ) feature_layers: List[nn.Module] = [] # feature_layers.append(nn.BatchNorm2d(image_channels, eps=2e-5, momentum=0.9)) - feature_layers.extend(get_initial_layers(initial_layers, image_channels, channels[0])) + feature_layers.extend(get_initial_layers(image_resolution, image_channels, channels[0])) feature_layers.append(nn.BatchNorm2d(channels[0], momentum=0.9)) feature_layers.extend(self.make_feature_layers(layers, channels)) @@ -203,34 +216,23 @@ def __init__( class ResnetE(Model): - name = "resnete" + name = "ResnetE" - resnet_spec = {18: ([2, 2, 2, 2], [64, 64, 128, 256, 512]), - 34: ([3, 4, 6, 3], [64, 64, 128, 256, 512])} + resnet_spec = { + 18: ([2, 2, 2, 2], [64, 64, 128, 256, 512]), + 34: ([3, 4, 6, 3], [64, 64, 128, 256, 512]), + } - def __init__( - self, - resnete_num_layers: int, - dataset: BasicDataset) -> None: - super(ResnetE, self).__init__(dataset) - self._model = self.create(resnete_num_layers, self._dataset.num_classes, - self._dataset.name, self._dataset.shape[1]) + def __init__(self, resnete_num_layers: int, input_shape: List[int], num_classes: int = 0) -> None: + super(ResnetE, self).__init__(input_shape, num_classes) + self._model = self.create(resnete_num_layers) logging.info(f"building ResnetE with {str(resnete_num_layers)} layers...") - @classmethod - def create( - cls, - num_layers: int, - classes: int = 1000, - initial_layers: str = "imagenet", - image_channels: int = 3) -> nn.Module: + def create(self, num_layers: int) -> nn.Module: """Creates a ResNetE complying to given layer number. Args: num_layers (int): number of layers to be build. - classes (int, optional): number of output classes. Defaults to 1000. - initial_layers (str, optional): name of set of initial layers to be used. Defaults to "imagenet". - image_channels (int, optional): number of channels of input images. Defaults to 3. Raises: ValueError: raised if no resnet specification for given num_layers is listed in the resnet_spec dict above @@ -238,44 +240,43 @@ def create( Returns: Module: resnetE model """ - if num_layers not in cls.resnet_spec: + if num_layers not in self.resnet_spec: raise ValueError(f"No resnet spec for {num_layers} available!") - layers, channels = cls.resnet_spec[num_layers] + layers, channels = self.resnet_spec[num_layers] + image_channels = self._input_shape[1] + image_resolution = self._input_shape[-2:] - return _ResnetE(layers, channels, classes, initial_layers, image_channels) + return _ResnetE(layers, channels, self._num_classes, image_resolution, image_channels) @staticmethod def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - parser.add_argument("--resnetE-num-layers", type=int, choices=[18, 34], required=True, - help="number of layers to be used inside resnetE") + parser.add_argument( + "--num-layers", + type=int, + choices=[18, 34], + required=True, + help="number of layers to be used inside resnetE", + ) -class ResnetE18(ResnetE): +class ResnetE18(NoArgparseArgsMixin, ResnetE): """ResNetE-18 model from `"Back to Simplicity: How to Train Accurate BNNs from Scratch?" `_ paper. """ - name = "resnete18" + name = "ResnetE18" def __init__(self, *args: Any, **kwargs: Any) -> None: super(ResnetE18, self).__init__(18, *args, **kwargs) - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass - -class ResnetE34(ResnetE): +class ResnetE34(NoArgparseArgsMixin, ResnetE): """ResNetE-34 model from `"Back to Simplicity: How to Train Accurate BNNs from Scratch?" `_ paper. """ - name = "resnete34" + name = "ResnetE34" def __init__(self, *args: Any, **kwargs: Any) -> None: super(ResnetE34, self).__init__(34, *args, **kwargs) - - @staticmethod - def add_argparse_arguments(parser: argparse.ArgumentParser) -> None: - pass diff --git a/bitorch/optimization/__init__.py b/bitorch/optimization/__init__.py deleted file mode 100644 index b70c7c2..0000000 --- a/bitorch/optimization/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -This submodule contains a custom implementation of the `rectified adam optimizer ` -""" diff --git a/bitorch/optimization/radam.py b/bitorch/optimization/radam.py deleted file mode 100644 index 5630a71..0000000 --- a/bitorch/optimization/radam.py +++ /dev/null @@ -1,301 +0,0 @@ -"""RAdam implementation copied from https://github.com/LiyuanLucasLiu/RAdam/blob/master/radam/radam.py. - -It has been proposed in `On the Variance of the Adaptive Learning Rate and Beyond`. -https://arxiv.org/abs/1908.03265 - -""" - -import math -from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union -import torch -from torch.functional import Tensor -from torch.optim.optimizer import Optimizer - - -class RAdam(Optimizer): - - def __init__( - self, - params: Union[Iterable[Tensor], Iterable[Dict[Any, Any]]], - lr: float = 1e-3, - betas: Tuple[float, float] = (0.9, 0.999), - eps: float = 1e-8, - weight_decay: float = 0, - degenerated_to_sgd: bool = True) -> None: - """Initialises RAdam optimizer - - Args: - params (Union[Iterable[Tensor], Iterable[Dict[Any, Any]]]): iterable of parameters to optimize or dicts - defining parameter groups - lr (float, optional): learning range. Defaults to 1e-3. - betas (Tuple[float, float], optional): coefficients used for computing running averages of gradient and its - square. Defaults to (0.9, 0.999). - eps (float, optional): term added to the denominator to improve numerical stability. Defaults to 1e-8. - weight_decay (float, optional): weight decay (L2 penality). Defaults to 0. - degenerated_to_sgd (bool, optional): toggles wether to use sgd step. Defaults to True. - - Raises: - ValueError: thrown if lr <= 0.0 - ValueError: thrown if eps <= 0.0 - ValueError: thrown if first beta value <= 0 - ValueError: thrown if second beta value <= 0 - """ - if not 0.0 <= lr: - raise ValueError("Invalid learning rate: {}".format(lr)) - if not 0.0 <= eps: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if not 0.0 <= betas[0] < 1.0: - raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) - if not 0.0 <= betas[1] < 1.0: - raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) - - self.degenerated_to_sgd = degenerated_to_sgd - if isinstance(params, (list, tuple)) and len(params) > 0 and isinstance(params[0], dict): - for param in params: - if 'betas' in param and (param['betas'][0] != betas[0] or param['betas'][1] != betas[1]): - param['buffer'] = [[None, None, None] for _ in range(10)] - defaults = dict( - lr=lr, - betas=betas, - eps=eps, - weight_decay=weight_decay, - buffer=[[None, None, None] for _ in range(10)] - ) - super(RAdam, self).__init__(params, defaults) - - def __getstate__(self) -> dict: - # for correct pickling of this class (necessary for mp.spawn) - optimizer_state = super(RAdam, self).__getstate__() # type: ignore - optimizer_state["degenerated_to_sgd"] = self.degenerated_to_sgd - return optimizer_state - - def step(self, closure: Callable = None) -> Optional[float]: - - loss = None - if closure is not None: - loss = closure() - - for group in self.param_groups: - - for p in group['params']: - if p.grad is None: - continue - grad = p.grad.data.float() - if grad.is_sparse: - raise RuntimeError('RAdam does not support sparse gradients') - - p_data_fp32 = p.data.float() - - state = self.state[p] - - if len(state) == 0: - state['step'] = 0 - state['exp_avg'] = torch.zeros_like(p_data_fp32) - state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) - else: - state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) - state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) - - exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] - beta1, beta2 = group['betas'] - - exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) - exp_avg.mul_(beta1).add_(1 - beta1, grad) - - state['step'] += 1 - buffered = group['buffer'][int(state['step'] % 10)] - if state['step'] == buffered[0]: - n_sma, step_size = buffered[1], buffered[2] - else: - buffered[0] = state['step'] - beta2_t = beta2 ** state['step'] - n_sma_max = 2 / (1 - beta2) - 1 - n_sma = n_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) - buffered[1] = n_sma - - # more conservative since it's an approximated value - if n_sma >= 5: - step_size = math.sqrt( - (1 - beta2_t) * (n_sma - 4) - / (n_sma_max - 4) * (n_sma - 2) - / n_sma * n_sma_max / (n_sma_max - 2)) / (1 - beta1 ** state['step']) - elif self.degenerated_to_sgd: - step_size = 1.0 / (1 - beta1 ** state['step']) - else: - step_size = -1 - buffered[2] = step_size - - # more conservative since it's an approximated value - if n_sma >= 5: - if group['weight_decay'] != 0: - p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) - denom = exp_avg_sq.sqrt().add_(group['eps']) - p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom) - p.data.copy_(p_data_fp32) - elif step_size > 0: - if group['weight_decay'] != 0: - p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) - p_data_fp32.add_(-step_size * group['lr'], exp_avg) - p.data.copy_(p_data_fp32) - - return loss - - -class PlainRAdam(Optimizer): - def __init__( - self, - params: Union[Iterable[Tensor], Iterable[Dict[Any, Any]]], - lr: float = 1e-3, - betas: Tuple[float, float] = (0.9, 0.999), - eps: float = 1e-8, - weight_decay: float = 0, - degenerated_to_sgd: bool = True) -> None: - if not 0.0 <= lr: - raise ValueError("Invalid learning rate: {}".format(lr)) - if not 0.0 <= eps: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if not 0.0 <= betas[0] < 1.0: - raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) - if not 0.0 <= betas[1] < 1.0: - raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) - - self.degenerated_to_sgd = degenerated_to_sgd - defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) - - super(PlainRAdam, self).__init__(params, defaults) - - def step(self, closure: Callable = None) -> Optional[float]: - - loss = None - if closure is not None: - loss = closure() - - for group in self.param_groups: - - for p in group['params']: - if p.grad is None: - continue - grad = p.grad.data.float() - if grad.is_sparse: - raise RuntimeError('RAdam does not support sparse gradients') - - p_data_fp32 = p.data.float() - - state = self.state[p] - - if len(state) == 0: - state['step'] = 0 - state['exp_avg'] = torch.zeros_like(p_data_fp32) - state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) - else: - state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) - state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) - - exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] - beta1, beta2 = group['betas'] - - exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) - exp_avg.mul_(beta1).add_(1 - beta1, grad) - - state['step'] += 1 - beta2_t = beta2 ** state['step'] - n_sma_max = 2 / (1 - beta2) - 1 - n_sma = n_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) - - # more conservative since it's an approximated value - if n_sma >= 5: - if group['weight_decay'] != 0: - p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) - step_size = group['lr'] * math.sqrt( - (1 - beta2_t) * (n_sma - 4) - / (n_sma_max - 4) * (n_sma - 2) - / n_sma * n_sma_max / (n_sma_max - 2)) / (1 - beta1 ** state['step']) - denom = exp_avg_sq.sqrt().add_(group['eps']) - p_data_fp32.addcdiv_(-step_size, exp_avg, denom) - p.data.copy_(p_data_fp32) - elif self.degenerated_to_sgd: - if group['weight_decay'] != 0: - p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32) - step_size = group['lr'] / (1 - beta1 ** state['step']) - p_data_fp32.add_(-step_size, exp_avg) - p.data.copy_(p_data_fp32) - - return loss - - -class AdamW(Optimizer): - - def __init__( - self, - params: Union[Iterable[Tensor], Iterable[Dict[Any, Any]]], - lr: float = 1e-3, - betas: Tuple[float, float] = (0.9, 0.999), - eps: float = 1e-8, - weight_decay: float = 0, - warmup: int = 0) -> None: - if not 0.0 <= lr: - raise ValueError("Invalid learning rate: {}".format(lr)) - if not 0.0 <= eps: - raise ValueError("Invalid epsilon value: {}".format(eps)) - if not 0.0 <= betas[0] < 1.0: - raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) - if not 0.0 <= betas[1] < 1.0: - raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) - - defaults = dict(lr=lr, betas=betas, eps=eps, - weight_decay=weight_decay, warmup=warmup) - super(AdamW, self).__init__(params, defaults) - - def step(self, closure: Callable = None) -> Optional[float]: - loss = None - if closure is not None: - loss = closure() - - for group in self.param_groups: - - for p in group['params']: - if p.grad is None: - continue - grad = p.grad.data.float() - if grad.is_sparse: - raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') - - p_data_fp32 = p.data.float() - - state = self.state[p] - - if len(state) == 0: - state['step'] = 0 - state['exp_avg'] = torch.zeros_like(p_data_fp32) - state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) - else: - state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) - state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32) - - exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] - beta1, beta2 = group['betas'] - - state['step'] += 1 - - exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) - exp_avg.mul_(beta1).add_(1 - beta1, grad) - - denom = exp_avg_sq.sqrt().add_(group['eps']) - bias_correction1 = 1 - beta1 ** state['step'] - bias_correction2 = 1 - beta2 ** state['step'] - - if group['warmup'] > state['step']: - scheduled_lr = 1e-8 + state['step'] * group['lr'] / group['warmup'] - else: - scheduled_lr = group['lr'] - - step_size = scheduled_lr * math.sqrt(bias_correction2) / bias_correction1 - - if group['weight_decay'] != 0: - p_data_fp32.add_(-group['weight_decay'] * scheduled_lr, p_data_fp32) - - p_data_fp32.addcdiv_(-step_size, exp_avg, denom) - - p.data.copy_(p_data_fp32) - - return loss diff --git a/bitorch/quantizations/__init__.py b/bitorch/quantizations/__init__.py index 5f5c8ee..fa9d48a 100644 --- a/bitorch/quantizations/__init__.py +++ b/bitorch/quantizations/__init__.py @@ -5,7 +5,7 @@ If you want to implement a new function, use the :code:`Quantization` base class as superclass. """ -from typing import List, Type +from typing import List, Type, Dict from .base import Quantization from .approx_sign import ApproxSign @@ -14,15 +14,29 @@ from .sign import Sign from .ste_heaviside import SteHeaviside from .swish_sign import SwishSign +from .progressive_sign import ProgressiveSign +from .quantization_scheduler import Quantization_Scheduler, ScheduledQuantizer from ..util import build_lookup_dictionary __all__ = [ - "Quantization", "ApproxSign", "InputDoReFa", "WeightDoReFa", "Identity", "Sign", - "SteHeaviside", "SwishSign", + "Quantization", + "quantization_from_name", + "quantization_names", + "register_custom_quantization", + "ApproxSign", + "InputDoReFa", + "WeightDoReFa", + "Identity", + "ProgressiveSign", + "Sign", + "SteHeaviside", + "SwishSign", + "Quantization_Scheduler", + "ScheduledQuantizer", ] -quantizations_by_name = build_lookup_dictionary(__name__, __all__, Quantization) +quantizations_by_name: Dict[str, Type[Quantization]] = build_lookup_dictionary(__name__, __all__, Quantization) def quantization_from_name(name: str) -> Type[Quantization]: @@ -50,3 +64,13 @@ def quantization_names() -> List: List: the quantization names """ return list(quantizations_by_name.keys()) + + +def register_custom_quantization(custom_quantization: Type[Quantization]) -> None: + """ + Register a custom (external) quantization in bitorch. + + Args: + custom_quantization: the custom config which should be added to bitorch + """ + quantizations_by_name[custom_quantization.name] = custom_quantization diff --git a/bitorch/quantizations/approx_sign.py b/bitorch/quantizations/approx_sign.py index e64718a..4e5f2fb 100644 --- a/bitorch/quantizations/approx_sign.py +++ b/bitorch/quantizations/approx_sign.py @@ -12,8 +12,8 @@ class ApproxSignFunction(Function): @staticmethod @typing.no_type_check def forward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - input_tensor: torch.Tensor) -> torch.Tensor: + ctx: torch.autograd.function.BackwardCFunction, input_tensor: torch.Tensor # type: ignore + ) -> torch.Tensor: """Binarize input tensor using the _sign function. Args: @@ -25,14 +25,14 @@ def forward( ctx.save_for_backward(input_tensor) sign_tensor = torch.sign(input_tensor) - sign_tensor = torch.where(sign_tensor == 0, torch.tensor(1., device=sign_tensor.device), sign_tensor) + sign_tensor = torch.where(sign_tensor == 0, torch.tensor(1.0, device=sign_tensor.device), sign_tensor) return sign_tensor @staticmethod @typing.no_type_check def backward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - output_grad: torch.Tensor) -> torch.Tensor: + ctx: torch.autograd.function.BackwardCFunction, output_grad: torch.Tensor # type: ignore + ) -> torch.Tensor: """Apply approx sign function. used e.g. for birealnet Args: @@ -44,7 +44,7 @@ def backward( """ input_tensor = ctx.saved_tensors[0] # produces zeros where preactivation inputs exceeded threshold, ones otherwise - inside_threshold = (torch.abs(input_tensor) <= 1) + inside_threshold = torch.abs(input_tensor) <= 1 approx_sign = (2.0 - 2.0 * torch.abs(input_tensor)) * inside_threshold return approx_sign * output_grad @@ -53,7 +53,7 @@ class ApproxSign(Quantization): """Module for applying the sign function with approx sign in backward pass""" name = "approxsign" - bitwidth = 1 + bit_width = 1 def quantize(self, x: torch.Tensor) -> torch.Tensor: """Forwards the tensor through the approx sign function. diff --git a/bitorch/quantizations/base.py b/bitorch/quantizations/base.py index 8505a36..c2f8e83 100644 --- a/bitorch/quantizations/base.py +++ b/bitorch/quantizations/base.py @@ -1,10 +1,12 @@ """Quantization superclass implementation""" -import torch import typing +from typing import Any +from warnings import warn + +import torch from torch import nn from torch.autograd.function import Function -from typing import Any class STE(Function): @@ -13,18 +15,10 @@ class STE(Function): @staticmethod @typing.no_type_check def forward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - input_tensor: torch.Tensor) -> torch.Tensor: - """just fowards the unchanged input_tensor. - - Args: - ctx (Any): autograd context - input_tensor (torch.Tensor): input tensor - - Returns: - torch.Tensor: the unchanged input tensor - """ - return input_tensor + ctx: torch.autograd.function.BackwardCFunction, # type: ignore + input_tensor: torch.Tensor, + ) -> torch.Tensor: + raise NotImplementedError("Forwards pass of STE should be implemented by subclass.") @staticmethod @typing.no_type_check @@ -44,12 +38,19 @@ def backward(ctx: Any, output_gradient: torch.Tensor) -> torch.Tensor: class Quantization(nn.Module): """superclass for quantization modules""" - name = "None" - bitwidth = -1 + name: str = "None" + bit_width: int = -1 + + @property + def bitwidth(self) -> int: + warn("Attribute 'bitwidth' is deprecated, use 'bit_width' instead.", DeprecationWarning, stacklevel=2) + return self.bit_width def quantize(self, x: torch.Tensor) -> torch.Tensor: - """quantize the input tensor. It is recommended to use a torch.Function to also maniputlate backward behaiviour. See - the implementations of sign or dorefa quantization functions for more examples. + """Apply the quantization function to the input tensor. + + It is recommended to use a torch.Function to also manipulate backwards behavior. + See the implementations of sign or dorefa quantization functions for more examples. Args: x (torch.Tensor): the input to be quantized diff --git a/bitorch/quantizations/dorefa.py b/bitorch/quantizations/dorefa.py index e63c0a1..54cc314 100644 --- a/bitorch/quantizations/dorefa.py +++ b/bitorch/quantizations/dorefa.py @@ -8,6 +8,44 @@ from .config import config +class WeightDoReFaFunction(Function): + @staticmethod + @typing.no_type_check + def forward( + ctx: torch.autograd.function.BackwardCFunction, input_tensor: torch.Tensor, maximum_bit_value: int + ) -> torch.Tensor: + """quantizes input tensor and forwards it. + + Args: + ctx (Any): autograd context + input_tensor (torch.Tensor): input tensor + bits (int): number of bits to round the input tensor to + + Returns: + torch.Tensor: the quantized input tensor + """ + ctx.save_for_backward(input_tensor) + + squashed_values = torch.tanh(input_tensor) + max_val = torch.max(torch.abs(squashed_values)).detach() + adjusted_values = squashed_values / (2.0 * max_val) + 0.5 + return 2.0 * (torch.round(adjusted_values * maximum_bit_value) / maximum_bit_value) - 1.0 + + @staticmethod + @typing.no_type_check + def backward(ctx: Any, output_gradient: torch.Tensor) -> torch.Tensor: + """just passes the unchanged output gradient as input gradient. + + Args: + ctx (Any): autograd context + output_gradient (torch.Tensor): output gradient + + Returns: + torch.Tensor: the unchanged output gradient + """ + return output_gradient, None, None + + class WeightDoReFa(Quantization): """Module for applying the dorefa function on weights. @@ -16,7 +54,7 @@ class WeightDoReFa(Quantization): """ name = "weightdorefa" - bitwidth = config.dorefa_bits + bit_width = config.dorefa_bits def __init__(self, bits: Union[int, None] = None) -> None: """Initiates quantization bits. @@ -25,8 +63,8 @@ def __init__(self, bits: Union[int, None] = None) -> None: bits (int, optional): number of bits to quantize into. Defaults to None. """ super(WeightDoReFa, self).__init__() - self.bitwidth = bits or config.dorefa_bits - self._max_value = 2 ** self.bitwidth - 1 + self.bit_width = bits or config.dorefa_bits + self._max_value = 2**self.bit_width - 1 def quantize(self, x: torch.Tensor) -> torch.Tensor: """DoReFas the tensor to desired bit resolution using weight dorefa. @@ -37,18 +75,15 @@ def quantize(self, x: torch.Tensor) -> torch.Tensor: Returns: torch.Tensor: DoReFaed tensor x """ - squashed_values = torch.tanh(x) - max_val = torch.max(torch.abs(squashed_values)).detach() - adjusted_values = squashed_values / (2.0 * max_val) + 0.5 - return 2.0 * (torch.round(adjusted_values * self._max_value) / self._max_value) - 1.0 + return WeightDoReFaFunction.apply(x, self._max_value) class InputDoReFaFunction(Function): @staticmethod @typing.no_type_check def forward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - input_tensor: torch.Tensor, bits: int) -> torch.Tensor: + ctx: torch.autograd.function.BackwardCFunction, input_tensor: torch.Tensor, bits: int # type: ignore + ) -> torch.Tensor: """quantizes input tensor and forwards it. Args: @@ -59,7 +94,7 @@ def forward( Returns: torch.Tensor: the quantized input tensor """ - max_value = 2 ** bits - 1 + max_value = 2**bits - 1 quantized_tensor = torch.round(torch.clamp(input_tensor, 0, 1) * max_value) / max_value return quantized_tensor @@ -87,7 +122,7 @@ class InputDoReFa(Quantization): """ name = "inputdorefa" - bitwidth = config.dorefa_bits + bit_width = config.dorefa_bits def __init__(self, bits: Union[int, None] = None) -> None: """Initiates quantization bits. @@ -96,7 +131,7 @@ def __init__(self, bits: Union[int, None] = None) -> None: bits (int, optional): number of bits to quantize into. Defaults to None. """ super(InputDoReFa, self).__init__() - self.bitwidth = bits or config.dorefa_bits + self.bit_width = bits or config.dorefa_bits def quantize(self, x: torch.Tensor) -> torch.Tensor: """DoReFas the tensor to desired bit resolution. @@ -107,5 +142,4 @@ def quantize(self, x: torch.Tensor) -> torch.Tensor: Returns: torch.Tensor: DoReFaed tensor x """ - - return InputDoReFaFunction.apply(x, self.bitwidth) + return InputDoReFaFunction.apply(x, self.bit_width) diff --git a/bitorch/quantizations/identity.py b/bitorch/quantizations/identity.py index 83af174..7d68d51 100644 --- a/bitorch/quantizations/identity.py +++ b/bitorch/quantizations/identity.py @@ -8,7 +8,7 @@ class Identity(Quantization): """Module that provides the identity function, which can be useful for certain training strategies""" name = "identity" - bitwidth = 32 + bit_width = 32 def quantize(self, x: torch.Tensor) -> torch.Tensor: """forwards the input tensor x without quantization. diff --git a/bitorch/quantizations/progressive_sign.py b/bitorch/quantizations/progressive_sign.py new file mode 100644 index 0000000..1c6c2e0 --- /dev/null +++ b/bitorch/quantizations/progressive_sign.py @@ -0,0 +1,159 @@ +"""Progressive Sign Function""" +import typing +from typing import Any, Callable, Optional, Union + +import torch +import torch.nn.functional as F +from torch.autograd.function import Function + +from bitorch.config import Config +from .base import Quantization +from .sign import SignFunction + +EPSILON = 1e-7 + + +class ProgressiveSignConfig(Config): + name = "progressive_sign_config" + + # scaling of progressive sign function, should be zero at the start of the training, and (close to) one at the end + progressive_sign_scale = 0.0 + + # alpha of default progressive sign transform function, should be between 2 and 10 + progressive_sign_alpha = 4 + + # beta of default progressive sign transform function, should be between 2 and 10 + progressive_sign_beta = 10 + + +config = ProgressiveSignConfig() + + +class ProgressiveSignFunctionTrain(Function): + @staticmethod + @typing.no_type_check + def forward( + ctx: torch.autograd.function.BackwardCFunction, # type: ignore + input_tensor: torch.Tensor, + temperature: float, + ) -> torch.Tensor: + """Binarize the input tensor using the sign function + + Args: + ctx (Any): autograd context + input_tensor (torch.Tensor): input tensor + temperature: the temperature of the incline + + Returns: + torch.Tensor: the sign tensor + """ + ctx.save_for_backward(input_tensor) + # avoid division by zero with EPSILON + return F.hardtanh(input_tensor / max(1.0 - temperature, EPSILON)) + + @staticmethod + @typing.no_type_check + def backward(ctx: Any, output_gradient: torch.Tensor) -> torch.Tensor: + return output_gradient, None # type: ignore + + +class ProgressiveSign(Quantization): + """ + Module for applying a progressive sign function with STE during training. + + During validation a regular sign function is used. + This can lead to a significant accuracy difference during the first epochs. + With a temperature of one this function is basically equal to a regular sign function. + """ + + name = "progressive_sign" + bit_width = 1 + + scale: float + global_scaling: bool + alpha: Union[int, float] + beta: Union[int, float] + + def __init__( + self, + use_global_scaling: bool = True, + initial_scale: Optional[float] = None, + custom_transform: Optional[Callable[[float], float]] = None, + alpha: Optional[Union[int, float]] = None, + beta: Optional[Union[int, float]] = None, + ) -> None: + """ + Initialize the progressive sign module (can be used for progressive weight binarization). + + If `use_global_scaling` is set to False, the scale of this module must be set manually. + Otherwise, the value can be set for all progressive sign modules in the config. + + Args: + use_global_scaling: whether to use the global scaling variable stored in the config + initial_scale: if not using global scaling you can set an initial scale + custom_transform: to use a custom transform function from scale to temperature, add it here + alpha: parameters of default transform function + beta: parameters of default transform function + """ + super().__init__() + if initial_scale is not None and use_global_scaling: + raise RuntimeWarning( + "An initial scale was set on ProgressiveSign, but this has not effect, " + "since use_global_scaling is True." + ) + self.global_scaling = use_global_scaling + self.scale = initial_scale or config.progressive_sign_scale + self.custom_transform = custom_transform + self.alpha = alpha or config.progressive_sign_alpha + self.beta = beta or config.progressive_sign_beta + + @property + def current_scale(self) -> float: + """Return the current scale of this Progressive Sign layer.""" + if self.global_scaling: + return config.progressive_sign_scale + return self.scale + + @staticmethod + def default_transform( + scale: float, alpha: Optional[Union[int, float]] = None, beta: Optional[Union[int, float]] = None + ) -> float: + """Transform the given scale into the temperature of the progressive sign function with the default function. + + The formula is as follows: 1 - (alpha ** (-beta * scale)) + + Args: + scale: the current scale + alpha: base of default exponential function + beta: (negative) factor of scale exponent + """ + if alpha is None: + alpha = config.progressive_sign_alpha + if beta is None: + beta = config.progressive_sign_beta + return 1 - (alpha ** (-beta * scale)) + + def transform(self, scale: float) -> float: + """Transform the given scale into a steady temperature increase, higher at the start, and much less at the end. + + Args: + scale: the current scale + """ + if self.custom_transform is not None: + return self.custom_transform(scale) + return self.default_transform(scale, self.alpha, self.beta) + + def quantize(self, x: torch.Tensor) -> torch.Tensor: + """Forwards the tensor through the sign function. + + Args: + x (torch.Tensor): tensor to be forwarded. + + Returns: + torch.Tensor: sign of tensor x + """ + if self.training: + temperature = self.transform(self.current_scale) + return ProgressiveSignFunctionTrain.apply(x, temperature) + else: + return SignFunction.apply(x) diff --git a/bitorch/quantizations/quantization_scheduler.py b/bitorch/quantizations/quantization_scheduler.py new file mode 100644 index 0000000..6c1ddfc --- /dev/null +++ b/bitorch/quantizations/quantization_scheduler.py @@ -0,0 +1,196 @@ +""" +Implementation of a quantization scheduler which replaces quantization functions inside a given model during +training. This module also contains various scheduling procedure implementations which can be extended in future +versions +""" + +from torch.nn import Module +import torch +from typing import List, Type +from .base import Quantization +from copy import deepcopy + + +class ScheduledQuantizer(Quantization): + """Base class for scheduled quantizers to inherit from. You can also use this quantization method + to indicate to the quantization scheduler that only this quantization should be scheduled. + + e.g. + ``` + model = Sequential( + QConv2d(3, 64, input_quantization="scheduled_quantizer", weight_quantization="sign"), + ReLU(), + flatten(), + QLinear(1000, 10, input_quantization="sign", weight_quantization="sign"), + Softmax(), + ) + # this replaces all quantizations in the model with scheduled quantizers and schedules them during training + scheduler = Quantization_Scheduler(model, [Identity(), InputDorefa()], replace_all_quantizations=True) + + # this only replaces the one instance of the ScheduledQuantizer and leaves the rest unchanged + scheduler = Quantization_Scheduler(model, [Identity(), InputDorefa()], replace_all_quantizations=False) + ``` + + """ + + name = "scheduled_quantizer" + bit_width = 32 + + def __init__(self, quantizations: List[Quantization] = [], steps: int = 0) -> None: + """Initias scheduled optimizer and sets bitwidth to width of last quantization to be scheduled. + + Args: + quantizations (List[Quantization]): list of quantizations to be scheduled + steps (int): number of steps. at the end of each step, the step() method has to be called once. + """ + super().__init__() + self.quantizations = [deepcopy(quantization) for quantization in quantizations] + if len(quantizations) > 0: + self.bit_width = self.quantizations[-1].bit_width if hasattr(self.quantizations[-1], "bit_width") else 32 + self.step_count = 0 + self.factor = 0.0 + self.steps = steps + + def step(self) -> None: + """increments step count and updates internal factor variable""" + self.step_count += 1 + self.factor = self.step_count / self.steps + self.factor = min(self.factor, 1.0) + + def quantize(self, x: torch.Tensor) -> torch.Tensor: + """dummy quantization function for compability reasons. + + Args: + x (torch.Tensor): input tensor + + Returns: + torch.Tensor: unchanged input tensor + """ + return x + + +class MixLinearScheduling(ScheduledQuantizer): + name = "__mixlinarscheduling__" + + def quantize(self, x: torch.Tensor) -> torch.Tensor: + """interpolates linearly between the output of the specified quantizations. + + Args: + x (torch.Tensor): input tensor + + Returns: + torch.Tensor: quantized output tensor + """ + if len(self.quantizations) == 1: + return self.quantizations[0](x) + + scaled_mix_factor = self.factor * (len(self.quantizations) - 1) + lower_idx = int(scaled_mix_factor) + higher_idx = lower_idx + 1 + if higher_idx == len(self.quantizations): + return self.quantizations[lower_idx](x) + + inter_unit_mix_factor = scaled_mix_factor - lower_idx + return self.quantizations[higher_idx](x) * inter_unit_mix_factor + self.quantizations[lower_idx](x) * ( + 1.0 - inter_unit_mix_factor + ) + + +class StepScheduling(ScheduledQuantizer): + name = "__stepscheduling__" + + def quantize(self, x: torch.Tensor) -> torch.Tensor: + """interpolates linearly between the output of the specified quantizations. + + Args: + x (torch.Tensor): input tensor + + Returns: + torch.Tensor: quantized output tensor + """ + quantization_idx = min(int(self.factor * len(self.quantizations)), len(self.quantizations) - 1) + return self.quantizations[quantization_idx](x) + + +class Quantization_Scheduler(Module): + + procedure_classes = {"mix_linear": MixLinearScheduling, "step": StepScheduling} + + def __init__( + self, + model: Module, + steps: int, + quantizations: List[Quantization], + scheduling_procedure: str, + schedule_all_quantizations: bool = False, + exclude_layers: List[Type] = [], + ) -> None: + """Initiates the quantization scheduler and replaces the activation function inside the model with scheduled + quantizers + + Args: + model (Module): model to be scheduled quantized + steps (int): number of steps, e.g. number of epochs. Each step the step() method has to be called once to + update all scheduled quantizers. + quantizations (List[Quantization]): Quantization functions to be scheduled + scheduling_procedure (str): procedure to be used for scheduling. See available subclasses of + ScheduledQuantizer + schedule_all_quantizations (bool): toggles weather all quantizations in the model shall be replaced with + quantized schedulers or weather only the quantized scheduler layers already present shall be used for + scheduling. Defaults to False. + exclude_layers (List[Type], optional): list of layers types to exclude from replacement with scheduled + quantizers. Defaults to []. + """ + super().__init__() + + assert steps > 0, "steps has to be an integer > 0" + assert isinstance(quantizations, list) + assert len(quantizations) > 0 + + self.quantizations = quantizations + self.steps = steps + + self.scheduled_quantizer = self.get_scheduled_quantizer(scheduling_procedure) + + self.scheduled_quantizer_instances: List[ScheduledQuantizer] = [] + self.replace_quantizations(model, exclude_layers, schedule_all_quantizations) + + def get_scheduled_quantizer(self, procedure: str) -> Type: + """gets the scheduling class associated with the given scheduling procedure + + Args: + procedure (str): name of the scheduling procedure to be used + + Returns: + Type: a subclass of ScheduledQuantizer + """ + return self.procedure_classes[procedure] + + def replace_quantizations(self, model: Module, exclude_layers: List[Type], replace_all_quantizations: bool) -> None: + """replaces all quantization functions present in the model with a scheduled quantizer. + iterates recursevely to the model layers. + + Args: + model (Module): model have the quantization functions replaced + exclude_layers (List[Type]): list of layers to exclude from replacement, e.g. if QConv2d is specified, + the quantization functions from all QConv2d layers (input and weight) are not replaced + replace_all_quantizations (bool): toggles weather to replace all quantizations or just the instances + of ScheduledQuantizer + """ + for name in dir(model): + module = getattr(model, name) + if replace_all_quantizations and issubclass(type(module), Quantization): + self.scheduled_quantizer_instances.append(self.scheduled_quantizer(self.quantizations, self.steps)) + setattr(model, name, self.scheduled_quantizer_instances[-1]) + elif not replace_all_quantizations and issubclass(type(module), ScheduledQuantizer): + self.scheduled_quantizer_instances.append(self.scheduled_quantizer(self.quantizations, self.steps)) + setattr(model, name, self.scheduled_quantizer_instances[-1]) + + for child in model.children(): + if type(child) not in exclude_layers: + self.replace_quantizations(child, exclude_layers, replace_all_quantizations) + + def step(self) -> None: + """updates all instances of scheduled quantizers in the model""" + for scheduled_quantizer in self.scheduled_quantizer_instances: + scheduled_quantizer.step() diff --git a/bitorch/quantizations/sign.py b/bitorch/quantizations/sign.py index 16818e2..99d45e7 100644 --- a/bitorch/quantizations/sign.py +++ b/bitorch/quantizations/sign.py @@ -1,8 +1,9 @@ """Sign Function Implementation""" -from typing import Tuple, Union, Optional -import torch import typing + +import torch + from .base import Quantization, STE @@ -10,9 +11,10 @@ class SignFunction(STE): @staticmethod @typing.no_type_check def forward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - input_tensor: torch.Tensor) -> torch.Tensor: - """Binarize the input tensor using the sign function + ctx: torch.autograd.function.BackwardCFunction, # type: ignore + input_tensor: torch.Tensor, + ) -> torch.Tensor: + """Binarize the input tensor using the sign function. Args: ctx (Any): autograd context @@ -22,16 +24,15 @@ def forward( torch.Tensor: the sign tensor """ sign_tensor = torch.sign(input_tensor) - sign_tensor = torch.where(sign_tensor == 0, torch.tensor( - 1., device=sign_tensor.device), sign_tensor) + sign_tensor = torch.where(sign_tensor == 0, torch.tensor(1.0, device=sign_tensor.device), sign_tensor) return sign_tensor class Sign(Quantization): - """Module for applying the sign function with straight through estimator in backward pass""" + """Module for applying the sign function with straight through estimator in backward pass.""" name = "sign" - bitwidth = 1 + bit_width = 1 def quantize(self, x: torch.Tensor) -> torch.Tensor: """Forwards the tensor through the sign function. diff --git a/bitorch/quantizations/ste_heaviside.py b/bitorch/quantizations/ste_heaviside.py index 655589a..d490878 100644 --- a/bitorch/quantizations/ste_heaviside.py +++ b/bitorch/quantizations/ste_heaviside.py @@ -1,6 +1,7 @@ """Sign Function Implementation""" import torch import typing +from typing import Any from .base import STE, Quantization @@ -8,8 +9,9 @@ class SteHeavisideFunction(STE): @staticmethod @typing.no_type_check def forward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - input_tensor: torch.Tensor) -> torch.Tensor: + ctx: torch.autograd.function.BackwardCFunction, # type: ignore + input_tensor: torch.Tensor, + ) -> torch.Tensor: """quantizes input tensor and forwards it. Args: @@ -19,17 +21,38 @@ def forward( Returns: torch.Tensor: the quantized input tensor """ + ctx.save_for_backward(input_tensor) - quantized_tensor = torch.where(input_tensor > 0, torch.tensor( - 1., device=input_tensor.device), torch.tensor(0., device=input_tensor.device)) + quantized_tensor = torch.where( + input_tensor > 0, + torch.tensor(1.0, device=input_tensor.device), + torch.tensor(-1.0, device=input_tensor.device), + ) return quantized_tensor + @staticmethod + @typing.no_type_check + def backward(ctx: Any, output_gradient: torch.Tensor) -> torch.Tensor: + """just passes the unchanged output gradient as input gradient. + + Args: + ctx (Any): autograd context + output_gradient (torch.Tensor): output gradient + + Returns: + torch.Tensor: the unchanged output gradient + """ + input_tensor = ctx.saved_tensors[0] + inside_threshold = torch.abs(input_tensor) <= 1 + print("over threshold:", len(input_tensor) - torch.sum(inside_threshold)) + return output_gradient * inside_threshold + class SteHeaviside(Quantization): """Module for applying the SteHeaviside quantization, using an ste in backward pass""" name = "steheaviside" - bitwidth = 1 + bit_width = 1 def quantize(self, x: torch.Tensor) -> torch.Tensor: """Forwards the tensor through the sign function. diff --git a/bitorch/quantizations/swish_sign.py b/bitorch/quantizations/swish_sign.py index c870c1e..7a90230 100644 --- a/bitorch/quantizations/swish_sign.py +++ b/bitorch/quantizations/swish_sign.py @@ -15,9 +15,10 @@ class SwishSignFunction(Function): @staticmethod @typing.no_type_check def forward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - input_tensor: torch.Tensor, - beta: float = 1.0) -> torch.Tensor: + ctx: torch.autograd.function.BackwardCFunction, # type: ignore + input_tensor: torch.Tensor, + beta: float = 1.0, + ) -> torch.Tensor: """Binarize input tensor using the _sign function. Args: @@ -29,14 +30,14 @@ def forward( ctx.save_for_backward(input_tensor, torch.tensor(beta, device=input_tensor.device)) sign_tensor = torch.sign(input_tensor) - sign_tensor = torch.where(sign_tensor == 0, torch.tensor(1., device=input_tensor.device), sign_tensor) + sign_tensor = torch.where(sign_tensor == 0, torch.tensor(1.0, device=input_tensor.device), sign_tensor) return sign_tensor @staticmethod @typing.no_type_check def backward( - ctx: torch.autograd.function.BackwardCFunction, # type: ignore - output_grad: torch.Tensor) -> Tuple[torch.Tensor, None]: + ctx: torch.autograd.function.BackwardCFunction, output_grad: torch.Tensor # type: ignore + ) -> Tuple[torch.Tensor, None]: """Apply straight through estimator. This passes the output gradient as input gradient after clamping the gradient values to the range [-1, 1] @@ -50,8 +51,9 @@ def backward( """ input_tensor, beta = ctx.saved_tensors # produces zeros where preactivation inputs exceeded threshold, ones otherwise - swish = (beta * (2 - beta * input_tensor * torch.tanh(beta * input_tensor / 2))) / \ - (1 + torch.cosh(beta * input_tensor)) + swish = (beta * (2 - beta * input_tensor * torch.tanh(beta * input_tensor / 2))) / ( + 1 + torch.cosh(beta * input_tensor) + ) return swish * output_grad, None @@ -59,7 +61,7 @@ class SwishSign(Quantization): """Module for applying the SwishSign function""" name = "swishsign" - bitwidth = 1 + bit_width = 1 def __init__(self, beta: Union[float, None] = None) -> None: """Initializes gradient cancelation threshold. diff --git a/bitorch/runtime_mode.py b/bitorch/runtime_mode.py new file mode 100644 index 0000000..dd9a9d8 --- /dev/null +++ b/bitorch/runtime_mode.py @@ -0,0 +1,136 @@ +from enum import Enum +from functools import total_ordering +from types import TracebackType +from typing import Union, Any, Optional, Type, List + +import bitorch + +__all__ = ["RuntimeMode", "runtime_mode_type", "change_mode", "pause_wrapping"] + +runtime_mode_type = Union["RuntimeMode", int] + + +@total_ordering +class RuntimeMode(Enum): + """ + Enum for BITorch modes: + + - DEFAULT: use the default implementation of all layers + - CPU: use layer implementations for inference on CPU + - GPU: use layer implementations for inference on GPU + - INFERENCE_AUTO: use an automatic layer that uses the fastest implementation available (not recommended) + - RAW: while in this mode, new layers are created as the default implementation BUT without wrapping, so they can + not be switched to other layers later on (it does not influence already wrapped layers) + """ + + RAW = 0 + DEFAULT = 1 + CPU = 2 + GPU = 4 + INFERENCE_AUTO = 8 + + def __add__(self, other: runtime_mode_type) -> runtime_mode_type: + if self._to_int(self) == self._to_int(other): + return self + return self._to_int(other) + self.value + + @staticmethod + def available_values() -> List["RuntimeMode"]: + return RuntimeMode.__members__.values() # type:ignore + + @staticmethod + def list_of_names() -> List[str]: + return RuntimeMode.__members__.keys() # type:ignore + + @staticmethod + def _max_val() -> int: + return sum(map(lambda x: x.value, RuntimeMode.__members__.values())) + + @staticmethod + def is_single_mode(mode: runtime_mode_type) -> bool: + return any(x.value == mode for x in RuntimeMode.__members__.values()) + + @staticmethod + def is_combined_mode(mode: runtime_mode_type) -> bool: + return 0 <= mode < RuntimeMode._max_val() + + def __lt__(self, other: Any) -> bool: + if not isinstance(other, RuntimeMode) and not isinstance(other, int): + return NotImplemented + return self.value < self._to_int(other) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, RuntimeMode) and not isinstance(other, int): + return NotImplemented + return self.value == self._to_int(other) + + def __str__(self) -> str: + return self.name.lower() + + @staticmethod + def _to_int(mode: runtime_mode_type) -> int: + if isinstance(mode, RuntimeMode): + return mode.value + return mode + + @staticmethod + def from_string(level: str) -> "RuntimeMode": + return { + "raw": RuntimeMode.RAW, + "default": RuntimeMode.DEFAULT, + "cpu": RuntimeMode.CPU, + "gpu": RuntimeMode.GPU, + "inference_auto": RuntimeMode.INFERENCE_AUTO, + }[level.lower()] + + @staticmethod + def mode_compatible(required_mode: "RuntimeMode", provided_modes: runtime_mode_type) -> bool: + if required_mode == RuntimeMode.RAW.value or provided_modes == RuntimeMode.RAW.value: + return True + return bool(RuntimeMode._to_int(required_mode) & RuntimeMode._to_int(provided_modes)) + + def is_supported_by(self, provided_modes: runtime_mode_type) -> bool: + if self._to_int(self) == RuntimeMode.RAW.value: + return True + return self.mode_compatible(self, provided_modes) + + +class _PauseWrapping: + def __init__(self) -> None: + self._previous_mode = bitorch.mode + + def __enter__(self) -> "_PauseWrapping": + bitorch.mode = RuntimeMode.RAW + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + bitorch.mode = self._previous_mode + + +class _SafeModeChanger: + def __init__(self, new_mode: RuntimeMode) -> None: + assert new_mode.is_single_mode(new_mode) + self._previous_mode = bitorch.mode + self._new_mode = new_mode + + def __enter__(self) -> "_SafeModeChanger": + bitorch.mode = self._new_mode + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + bitorch.mode = self._previous_mode + + +change_mode = _SafeModeChanger + +pause_wrapping = _PauseWrapping diff --git a/bitorch/util.py b/bitorch/util.py index 157a9db..955364b 100644 --- a/bitorch/util.py +++ b/bitorch/util.py @@ -1,16 +1,16 @@ -# import sys import typing import importlib -from typing import Callable, List, Any, Dict +from typing import Optional, Callable, List, Any, Dict @typing.no_type_check def build_lookup_dictionary( - current_module_name: str, - class_strings: List[str], - filter_by_superclass: Any = None, - filter_fn: Callable[[Any], bool] = None, - key_fn: Callable[[Any], str] = lambda x: x.name) -> Dict[str, Any]: + current_module_name: str, + class_strings: List[str], + filter_by_superclass: Optional[Any] = None, + filter_fn: Optional[Callable[[Any], bool]] = None, + key_fn: Callable[[Any], str] = lambda x: x.name, +) -> Dict[str, Any]: """Builds a lookup dictionary based on a list of strings of class names. Args: @@ -25,16 +25,18 @@ def build_lookup_dictionary( """ assert filter_fn is not None or filter_by_superclass is not None, "one of the filter options must be given" if filter_fn is None: + def filter_fn(x: Any) -> bool: return isinstance(x, type) and issubclass(x, filter_by_superclass) and x != filter_by_superclass + lookup = {} current_module = importlib.import_module(current_module_name) for class_name in class_strings: - # current_module = sys.modules.get(current_module_name, None) if not hasattr(current_module, class_name): continue class_ = getattr(current_module, class_name) if filter_fn(class_): - lookup[key_fn(class_)] = class_ + transformed_key = key_fn(class_) + lookup[transformed_key] = class_ return lookup diff --git a/check-codestyle.sh b/check-codestyle.sh new file mode 100755 index 0000000..9ff5a44 --- /dev/null +++ b/check-codestyle.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +fails=() +successes=() + +checkmark="✔" +cross="✘" + +function check() { + echo "+ $@" + "$@" && { + successes+=("${checkmark} ${1}") + } || { + fails+=("${cross} ${1}") + } +} + +check flake8 +check mypy --config-file mypy.ini +check black . --check --diff --color + +echo +if [ "${#successes[@]}" -gt "0" ]; then + echo "Successful checks:" + echo ${successes[@]} +fi +if [ "${#fails[@]}" -gt "0" ]; then + echo "The following checks failed (please check the output above):" + echo ${fails[@]} + exit 1 +else + echo + echo "All looking good!" +fi diff --git a/docs/source/_templates/class.rst b/docs/source/_templates/class.rst index 9f31d95..39b2c4f 100644 --- a/docs/source/_templates/class.rst +++ b/docs/source/_templates/class.rst @@ -14,7 +14,7 @@ .. autosummary:: :nosignatures: {% for item in methods %} - + {%- if item.startswith('__init__') %} {%- if item not in inherited_members %} ~{{ name }}.{{ item }} diff --git a/docs/source/_templates/module.rst b/docs/source/_templates/module.rst index 4ce24de..10f5e26 100644 --- a/docs/source/_templates/module.rst +++ b/docs/source/_templates/module.rst @@ -33,6 +33,7 @@ .. autosummary:: :template: class.rst + :toctree: {% for item in classes %} {{ item }} {%- endfor %} @@ -53,6 +54,8 @@ {% block modules %} {% if modules %} +.. rubric:: {{ _('Modules') }} + .. autosummary:: :toctree: :template: module.rst @@ -61,4 +64,4 @@ {{ item }} {%- endfor %} {% endif %} -{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/docs/source/conf.py b/docs/source/conf.py index 2691121..4db63d8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,17 +12,18 @@ # import os import sys -sys.path.insert(0, os.path.abspath('../..')) + +sys.path.insert(0, os.path.abspath("../..")) # -- Project information ----------------------------------------------------- -project = 'bitorch' -copyright = '2022, Joseph Bethge, Haojin Yang, Paul Mattes, Christopher Aust' -author = 'Joseph Bethge, Haojin Yang, Paul Mattes, Christopher Aust' +project = "bitorch" +copyright = "2022, Joseph Bethge, Haojin Yang, Paul Mattes, Christopher Aust" +author = "Joseph Bethge, Haojin Yang, Paul Mattes, Christopher Aust" # The full version, including alpha/beta/rc tags -release = 'v0.1' +release = "v0.1" # -- General configuration --------------------------------------------------- @@ -31,23 +32,27 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.todo', - 'sphinx.ext.mathjax', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon', - 'nbsphinx', - 'nbsphinx_link', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.doctest", + "sphinx.ext.todo", + "sphinx.ext.mathjax", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", + "sphinx.ext.githubpages", + "sphinx.ext.napoleon", + "nbsphinx", + "nbsphinx_link", ] +# Generate type hints +# autodoc_typehints = "description" + # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] autosummary_generate = True +# autoclass_content = "init" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -60,12 +65,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = "alabaster" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # # This is the expected signature of the handler for this event, cf doc diff --git a/docs/source/index.rst b/docs/source/index.rst index b5d725d..86dfab1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ Welcome to bitorch's documentation! BITorch is a library currently under development to simplify building quantized and binary neural networks -with [PyTorch](https://pytorch.org/). +with `PyTorch `_. This is an early preview version of the library. If you wish to use it and encounter any problems, please create an issue. Our current roadmap contains: @@ -16,7 +16,7 @@ Our current roadmap contains: - Extending the model zoo with pre-trained models of state-of-the-art approaches - Adding examples for advanced training methods with multiple stages, knowledge distillation, etc. -All changes are tracked in the [changelog](CHANGELOG.md). +All changes are tracked in the `changelog `_. diff --git a/examples/__init__.py b/examples/__init__.py index e69de29..0868335 100644 --- a/examples/__init__.py +++ b/examples/__init__.py @@ -0,0 +1 @@ +"""This package contains examples for BITorch.""" diff --git a/examples/dlrm/.gitignore b/examples/dlrm/.gitignore new file mode 100644 index 0000000..333c1e9 --- /dev/null +++ b/examples/dlrm/.gitignore @@ -0,0 +1 @@ +logs/ diff --git a/examples/dlrm/README.md b/examples/dlrm/README.md new file mode 100644 index 0000000..42c2af8 --- /dev/null +++ b/examples/dlrm/README.md @@ -0,0 +1,56 @@ +# Pytorch Lightning Example Script + +To give an example on how to use bitorch for your own recommendation projects `train_dlrm.py` trains a quantized version of Facebooks [DLRM](https://github.com/facebookresearch/dlrm) implemented in `bitorch` on an ad recommendation dataset. +Right now only the [Criteo Ad Challenge](https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset/) dataset is supported. + +First the requirements for this example need to be installed +(unless the optional dependencies of BITorch were already installed): +```bash +pip install -r requirements.txt +``` + +Below you can find an example call of the script: +```bash +python examples/dlrm/train_dlrm.py --dataset criteo --input-quantization sign --weight-quantization approxsign --download --ignore-dataset-size 0.0 --batch-size 8192 --lr-scheduler cosine --optimizer adam --wandb --batch-size-test 10000 --num-workers 0 --dataset-dir /datasets --gpus 1 --max_epochs 10 +``` + +If the dataset is not present in the given directory, it will be downloaded to the specified directory and preprocessed. Preprocessing usually takes about 30 min, depending on your hardware setup. + +## Arguments + +To find an exhaustive overview over the parameters to configure the `train_dlrm.py` script, call `python train_dlrm.py --help`. +The list below gives a brief overview over some selected arguments. + +### general training args + +- `--optimizer` sets the optimizer. Choose from `adam, sgd` and `radam`. +- `--lr-scheduler` sets the learning rate scheduler. Choose from `cosine, step` and `exponential` +- `--lr` sets the used learning rate. +- `--max-epochs` sets the number of epochs to train. +- `--max-steps` sets the number of training steps to perform. +- `--batch-size` sets batchsize to use +- `--gpus n` specify number of gpus to use. if `n` not specified, all available gpus will be used. +- `--cpu` force training on cpu. + +### logging args + +- `--log-file` specifies the file to log into +- `--log-stdout` toggles if the log output should also go to stdout +- `--tensorboar` toggles logging to tensorboard +- `--wandb` toggles logging to wandb. You need to specify a WANDB_API_TOKEN variable in your environment to use this. [details](https://docs.wandb.ai/guides/track/public-api-guide#authentication) +- `--result-file` specifies path to a result file which will contain the evaluation metrics in csv format. +- `--checkpoint-dir` path to where checkpoints shall be stored +- `--checkpoint-load` path to checkpoint to load from + +### dataset args + +- `--datset` name of dataset to train on. Chose from `criteo` +- `--download` toggles if dataset if not present at `--dataset-dir` should be downloaded. +- `--dataset-dir` path to dataset. +- `--num-worker` sets number of workers for dataloading + +### quantization args + +- `--input-quantization` chooses the default input quantization method. +- `--weight-quantization` chooses the default weight quantization method. +- `--gradient-cancellation-threshold` sets the default gradient cancellation threshold diff --git a/examples/dlrm/__init__.py b/examples/dlrm/__init__.py new file mode 100644 index 0000000..aa5b8b1 --- /dev/null +++ b/examples/dlrm/__init__.py @@ -0,0 +1 @@ +"""This package contains an example for image classification with BITorch.""" diff --git a/examples/dlrm/datasets/__init__.py b/examples/dlrm/datasets/__init__.py new file mode 100644 index 0000000..44adf35 --- /dev/null +++ b/examples/dlrm/datasets/__init__.py @@ -0,0 +1,42 @@ +""" +This submodule contains data preparation code for some of the datasets used with our models, +i.e. MNIST, CIFAR 10 and 100 and ImageNet. +""" + +from typing import List, Type + +from .base import BasicDataset +from .criteo import Criteo + +__all__ = [ + "BasicDataset", + "Criteo", +] + + +def dataset_from_name(name: str) -> Type[BasicDataset]: + """returns the dataset to which the name belongs to (name has to be the value of the datasets + name-attribute) + + Args: + name (str): name of the dataset + + Raises: + ValueError: raised if no dataset under that name was found + + Returns: + dataset: the dataset + """ + for dataset_class in [Criteo]: + if dataset_class.name == name: + return dataset_class + raise Exception(f"unknown dataset: {name}") + + +def dataset_names() -> List[str]: + """getter for list of dataset names for argparse + + Returns: + List: the dataset names + """ + return [dataset_class.name for dataset_class in [Criteo]] diff --git a/bitorch/datasets/base.py b/examples/dlrm/datasets/base.py similarity index 76% rename from bitorch/datasets/base.py rename to examples/dlrm/datasets/base.py index 9acf68d..d79500b 100644 --- a/bitorch/datasets/base.py +++ b/examples/dlrm/datasets/base.py @@ -1,6 +1,5 @@ import logging import os -from enum import Enum from pathlib import Path from typing import Optional, Tuple, Any @@ -8,25 +7,7 @@ from torch.utils.data import Dataset from torchvision.transforms import transforms -from bitorch.datasets.dummy_dataset import DummyDataset - - -class Augmentation(Enum): - NONE = -1 - DEFAULT = 0 - LOW = 1 - MEDIUM = 2 - HIGH = 3 - - @staticmethod - def from_string(level: str) -> "Augmentation": - return { - "none": Augmentation.NONE, - "default": Augmentation.DEFAULT, - "low": Augmentation.LOW, - "medium": Augmentation.MEDIUM, - "high": Augmentation.HIGH, - }[level] +from .dummy_dataset import DummyDataset class BasicDataset(Dataset): @@ -38,42 +19,31 @@ class BasicDataset(Dataset): num_train_samples = 0 num_val_samples = 0 - def __init__( - self, - train: bool, - root_directory: str = None, - download: bool = False, - augmentation: Augmentation = Augmentation.DEFAULT) -> None: + def __init__(self, train: bool, root_directory: Optional[str] = None, download: bool = False) -> None: """initializes the dataset. Args: train (bool): whether the train or test dataset is wanted root_directory (str): path to main dataset storage directory download (bool): whether train/test should be downloaded if it does not exist - augmentation (Augmentation): the level of augmentation (only for train dataset) Returns: Dataset: the created test/train dataset """ super(BasicDataset, self).__init__() self.is_train = train - self.augmentation_level = augmentation self._download = download self.root_directory = self.get_dataset_root_directory(root_directory) self.dataset = self.get_dataset(download) @classmethod - def get_train_and_test( - cls, - root_directory: str, - download: bool = False, - augmentation: Augmentation = Augmentation.DEFAULT) -> Tuple["BasicDataset", "BasicDataset"]: + def get_train_and_test(cls, root_directory: str, download: bool = False) -> Tuple["BasicDataset", "BasicDataset"]: """creates a pair of train and test dataset. Returns: Tuple: the train and test dataset """ - return cls(True, root_directory, download, augmentation), cls(False, root_directory, download) + return cls(True, root_directory, download), cls(False, root_directory, download) @classmethod def get_dummy_train_and_test_datasets(cls) -> Tuple[DummyDataset, DummyDataset]: @@ -123,7 +93,7 @@ def get_dataset(self, download: bool) -> Dataset: def get_transform(self) -> Any: if self.is_train: - return self.train_transform(self.augmentation_level) + return self.train_transform() return self.test_transform() @classmethod @@ -136,8 +106,8 @@ def test_transform(cls) -> Any: return transforms.Compose([transforms.ToTensor(), cls.get_normalize_transform()]) @classmethod - def train_transform(cls, augmentation: Augmentation = Augmentation.DEFAULT) -> Any: - """get the transform for the training data (should consider the current augmentation_level). + def train_transform(cls) -> Any: + """get the transform for the training data. Returns: transform: the transform pipeline diff --git a/examples/dlrm/datasets/criteo.py b/examples/dlrm/datasets/criteo.py new file mode 100644 index 0000000..d700348 --- /dev/null +++ b/examples/dlrm/datasets/criteo.py @@ -0,0 +1,76 @@ +import gc +from typing import Tuple +from torch.utils.data import Dataset +import logging +import torch +import os +import numpy as np +from .base import BasicDataset +from facebook_dataloading.dataloading_fb import CriteoDataset + + +class SplitCriteoDataset(Dataset): + """Dataset to get items from a dataset for each split. Useful if dataset creation takes a lot of time and can be done exactly once.""" + + def __init__( + self, dataset: BasicDataset, split: str, train_split_fraction: float = 0.9, ignore_size: float = 0.0 + ) -> None: + self.dataset = dataset + self.indices = self.dataset.train_indices if split == "train" else self.dataset.test_indices + + dataset_size = int(len(self.indices) * (1.0 - ignore_size)) + self.indices = np.random.choice(self.indices, size=dataset_size, replace=False) + + def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: + return self.dataset[self.indices[idx]] + + def __len__(self) -> int: + return len(self.indices) + + +class Criteo(BasicDataset): + name = "criteo" + + num_train_samples = 60000 + num_val_samples = 10000 + dataset_url = "http://go.criteo.net/criteo-research-kaggle-display-advertising-challenge-dataset.tar.gz" + + def get_dataset(self, download: bool = True) -> Dataset: + try: + self.download_path = self.root_directory / "criteo.tar.gz" + self.path = self.root_directory / "train.txt" + self.path.parent.mkdir(parents=True, exist_ok=True) + if download and not self.download_path.exists(): + logging.info("DOWNLOADING CRITEO DATASET") + result = os.system(f"wget {self.dataset_url} -O {str(self.root_directory / 'criteo.tar.gz')}") + if result != 0: + raise Exception("Download failed") + logging.info("FINISHED DOWNLOAD") + if not (self.root_directory / "train.txt").exists(): + logging.info("EXTRACTING CRITEO DATASET") + result = os.system(f"tar -xf {str(self.root_directory / 'criteo.tar.gz')} -C {self.root_directory}") + if result != 0: + raise Exception("Extraction failed") + logging.info("FINISHED EXTRACTION") + except Exception as e: + logging.error( + f"Cannot get criteo dataset: {e}. You need download " + "the dataset manually under the following link: " + "http://go.criteo.net/criteo-research-kaggle-display-advertising-challenge-dataset.tar.gz " + f"and extract it to the following path: {str(self.root_directory.resolve())}. " + "alternatively you can try downloading it automatically by using the --download flag" + ) + dataset = CriteoDataset( + dataset="kaggle", + max_ind_range=-1, + sub_sample_rate=0.0, + randomize="total", + # split="train" if self.is_train else "test", + raw_path=str(self.root_directory / "train.txt"), + pro_data=str(self.root_directory / "preprocessed.npz"), + memory_map=False, + dataset_multiprocessing=True, + store_all_indices=True, + ) + gc.collect() + return dataset diff --git a/bitorch/datasets/dummy_dataset.py b/examples/dlrm/datasets/dummy_dataset.py similarity index 100% rename from bitorch/datasets/dummy_dataset.py rename to examples/dlrm/datasets/dummy_dataset.py diff --git a/examples/dlrm/facebook_dataloading/__init__.py b/examples/dlrm/facebook_dataloading/__init__.py new file mode 100644 index 0000000..73448bd --- /dev/null +++ b/examples/dlrm/facebook_dataloading/__init__.py @@ -0,0 +1 @@ +"""Criteo Dataloading for DLRM partially copied from here: https://github.com/facebookresearch/dlrm""" diff --git a/examples/dlrm/facebook_dataloading/data_utils.py b/examples/dlrm/facebook_dataloading/data_utils.py new file mode 100644 index 0000000..a425288 --- /dev/null +++ b/examples/dlrm/facebook_dataloading/data_utils.py @@ -0,0 +1,660 @@ +# fmt: off +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# +# Description: generate inputs and targets for the DLRM benchmark +# +# Utility function(s) to download and pre-process public data sets +# - Criteo Kaggle Display Advertising Challenge Dataset +# https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset +# - Criteo Terabyte Dataset +# https://labs.criteo.com/2013/12/download-terabyte-click-logs +# +# After downloading dataset, run: +# getCriteoAdData( +# datafile="", +# o_filename=kaggleAdDisplayChallenge_processed.npz, +# max_ind_range=-1, +# sub_sample_rate=0.0, +# days=7, +# data_split='train', +# randomize='total', +# criteo_kaggle=True, +# memory_map=False +# ) +# getCriteoAdData( +# datafile="", +# o_filename=terabyte_processed.npz, +# max_ind_range=-1, +# sub_sample_rate=0.0, +# days=24, +# data_split='train', +# randomize='total', +# criteo_kaggle=False, +# memory_map=False +# ) + +from __future__ import absolute_import, division, print_function, unicode_literals + +from pathlib import Path +import sys +# import os +import logging +from os import path +from multiprocessing import Process, Manager +from typing import Optional, Any, Union +# import io +# from io import StringIO +# import collections as coll + +import numpy as np + + +def processCriteoAdData( + d_path: str, + d_file: str, + npzfile: str, + i: int, + convertDicts: dict, + pre_comp_counts: bool) -> None: + """Process Kaggle Display Advertising Challenge or Terabyte Dataset + by converting unicode strings in X_cat to integers and + converting negative integer values in X_int. + + Loads data in the form "{kaggle|terabyte}_day_i.npz" where i is the day. + + Args: + d_path (str): path for {kaggle|terabyte}_day_i.npz files + d_file (str): _description_ + npzfile (str): _description_ + i (int): splits in the dataset (typically 0 to 7 or 0 to 24) + convertDicts (dict): _description_ + pre_comp_counts (bool): _description_ + """ + # process data if not all files exist + filename_i = npzfile + "_{0}_processed.npz".format(i) + + if path.exists(filename_i): + logging.debug("Using existing " + filename_i, end="\n") + else: + logging.debug("Not existing " + filename_i) + with np.load(npzfile + "_{0}.npz".format(i)) as data: + # categorical features + # Approach 2a: using pre-computed dictionaries + X_cat_t = np.zeros(data["X_cat_t"].shape) + for j in range(26): + for k, x in enumerate(data["X_cat_t"][j, :]): + X_cat_t[j, k] = convertDicts[j][x] + # continuous features + X_int = data["X_int"] + X_int[X_int < 0] = 0 + # targets + y = data["y"] + + np.savez_compressed( + filename_i, + # X_cat = X_cat, + X_cat=np.transpose(X_cat_t), # transpose of the data + X_int=X_int, + y=y, + ) + logging.debug("Processed " + filename_i, end="\n") + + return + + +def concatCriteoAdData( + d_path: Any, + d_file: Any, + npzfile: Any, + trafile: Any, + days: Any, + data_split: Any, + randomize: Any, + total_per_file: Any, + total_count: Any, + memory_map: Any, + o_filename: Any +) -> str: + """Concatenates different days and saves the result. + + Args: + days (int): total number of days in the dataset (typically 7 or 24) + d_path (str): path for {kaggle|terabyte}_day_i.npz files + o_filename (str): output file name + + Return: + o_file (str): output file path + """ + if memory_map: + # dataset break up per fea + # tar_fea = 1 # single target + den_fea = 13 # 13 dense features + spa_fea = 26 # 26 sparse features + # tad_fea = tar_fea + den_fea + # tot_fea = tad_fea + spa_fea + # create offset per file + offset_per_file = np.array([0] + [x for x in total_per_file]) + for i in range(days): + offset_per_file[i + 1] += offset_per_file[i] + + # Approach 4: Fisher-Yates-Rao (FYR) shuffle algorithm + # 1st pass of FYR shuffle + # check if data already exists + recreate_flag = False + for j in range(days): + filename_j_y = npzfile + "_{0}_intermediate_y.npy".format(j) + filename_j_d = npzfile + "_{0}_intermediate_d.npy".format(j) + filename_j_s = npzfile + "_{0}_intermediate_s.npy".format(j) + if ( + path.exists(filename_j_y) + and path.exists(filename_j_d) + and path.exists(filename_j_s) + ): + logging.debug( + "Using existing\n" + + filename_j_y + "\n" + + filename_j_d + "\n" + + filename_j_s + ) + else: + recreate_flag = True + # reorder across buckets using sampling + if recreate_flag: + # init intermediate files (.npy appended automatically) + for j in range(days): + filename_j_y = npzfile + "_{0}_intermediate_y".format(j) + filename_j_d = npzfile + "_{0}_intermediate_d".format(j) + filename_j_s = npzfile + "_{0}_intermediate_s".format(j) + np.save(filename_j_y, np.zeros((total_per_file[j]))) + np.save(filename_j_d, np.zeros((total_per_file[j], den_fea))) + np.save(filename_j_s, np.zeros((total_per_file[j], spa_fea))) + # start processing files + total_counter = [0] * days + for i in range(days): + filename_i = npzfile + "_{0}_processed.npz".format(i) + with np.load(filename_i) as data: + X_cat = data["X_cat"] + X_int = data["X_int"] + y = data["y"] + size = len(y) + # sanity check + if total_per_file[i] != size: + sys.exit("ERROR: sanity check on number of samples failed") + # debug prints + logging.debug("Reordering (1st pass) " + filename_i) + + # create buckets using sampling of random ints + # from (discrete) uniform distribution + buckets = [] + for _j in range(days): + buckets.append([]) + counter = [0] * days + days_to_sample = days if data_split == "none" else days - 1 + if randomize == "total": + rand_u = np.random.randint(low=0, high=days_to_sample, size=size) + for k in range(size): + # sample and make sure elements per buckets do not overflow + if data_split == "none" or i < days - 1: + # choose bucket + p = rand_u[k] + # retry of the bucket is full + while total_counter[p] + counter[p] >= total_per_file[p]: + p = np.random.randint(low=0, high=days_to_sample) + else: # preserve the last day/bucket if needed + p = i + buckets[p].append(k) + counter[p] += 1 + else: # randomize is day or none + for k in range(size): + # do not sample, preserve the data in this bucket + p = i + buckets[p].append(k) + counter[p] += 1 + + # sanity check + if np.sum(counter) != size: + sys.exit("ERROR: sanity check on number of samples failed") + # debug prints + # logging.debug(counter) + # logging.debug(str(np.sum(counter)) + " = " + str(size)) + # logging.debug([len(x) for x in buckets]) + # logging.debug(total_counter) + + # partially feel the buckets + for j in range(days): + filename_j_y = npzfile + "_{0}_intermediate_y.npy".format(j) + filename_j_d = npzfile + "_{0}_intermediate_d.npy".format(j) + filename_j_s = npzfile + "_{0}_intermediate_s.npy".format(j) + start = total_counter[j] + end = total_counter[j] + counter[j] + # target buckets + fj_y = np.load(filename_j_y, mmap_mode='r+') + # logging.debug("start=" + str(start) + " end=" + str(end) + # + " end - start=" + str(end - start) + " " + # + str(fj_y[start:end].shape) + " " + # + str(len(buckets[j]))) + fj_y[start:end] = y[buckets[j]] + del fj_y + # dense buckets + fj_d = np.load(filename_j_d, mmap_mode='r+') + # logging.debug("start=" + str(start) + " end=" + str(end) + # + " end - start=" + str(end - start) + " " + # + str(fj_d[start:end, :].shape) + " " + # + str(len(buckets[j]))) + fj_d[start:end, :] = X_int[buckets[j], :] + del fj_d + # sparse buckets + fj_s = np.load(filename_j_s, mmap_mode='r+') + # logging.debug("start=" + str(start) + " end=" + str(end) + # + " end - start=" + str(end - start) + " " + # + str(fj_s[start:end, :].shape) + " " + # + str(len(buckets[j]))) + fj_s[start:end, :] = X_cat[buckets[j], :] + del fj_s + # update counters for next step + total_counter[j] += counter[j] + + # 2nd pass of FYR shuffle + # check if data already exists + for j in range(days): + filename_j = npzfile + "_{0}_reordered.npz".format(j) + if path.exists(filename_j): + logging.debug("Using existing " + filename_j) + else: + recreate_flag = True + # reorder within buckets + if recreate_flag: + for j in range(days): + filename_j_y = npzfile + "_{0}_intermediate_y.npy".format(j) + filename_j_d = npzfile + "_{0}_intermediate_d.npy".format(j) + filename_j_s = npzfile + "_{0}_intermediate_s.npy".format(j) + fj_y = np.load(filename_j_y) + fj_d = np.load(filename_j_d) + fj_s = np.load(filename_j_s) + + indices = range(total_per_file[j]) + if randomize == "day" or randomize == "total": + if data_split == "none" or j < days - 1: + indices = np.random.permutation(range(total_per_file[j])) + + filename_r = npzfile + "_{0}_reordered.npz".format(j) + logging.debug("Reordering (2nd pass) " + filename_r) + np.savez_compressed( + filename_r, + X_cat=fj_s[indices, :], + X_int=fj_d[indices, :], + y=fj_y[indices], + ) + + else: + logging.debug("Concatenating multiple days into %s.npz file" % str(d_path + o_filename)) + + # load and concatenate data + for i in range(days): + filename_i = npzfile + "_{0}_processed.npz".format(i) + with np.load(filename_i) as data: + if i == 0: + X_cat = data["X_cat"] + X_int = data["X_int"] + y = data["y"] + else: + X_cat = np.concatenate((X_cat, data["X_cat"])) + X_int = np.concatenate((X_int, data["X_int"])) + y = np.concatenate((y, data["y"])) + logging.debug("Loaded day:", i, "y = 1:", len(y[y == 1]), "y = 0:", len(y[y == 0])) + + with np.load(d_path + d_file + "_fea_count.npz") as data: + counts = data["counts"] + logging.debug("Loaded counts!") + + logging.debug("saving compressed dataset") + np.savez_compressed( + d_path + o_filename + ".npz", + X_cat=X_cat, + X_int=X_int, + y=y, + counts=counts, + ) + + return d_path + o_filename + ".npz" + + +def getCriteoAdData( + datafile: str, + o_filename: str, + max_ind_range: int = -1, + sub_sample_rate: int = 0.0, + days: int = 7, + data_split: str = 'train', + randomize: str = 'total', + criteo_kaggle: bool = True, + memory_map: bool = False, + dataset_multiprocessing: bool = False, +) -> str: + """Passes through entire dataset and defines dictionaries for categorical + features and determines the number of total categories. + + Inputs: + datafile : path to downloaded raw data file + o_filename (str): saves results under o_filename if filename is not "" + + Output: + o_file (str): output file path + """ + # split the datafile into path and filename + lstr = datafile.split("/") + d_path = "/".join(lstr[0:-1]) + "/" + d_file = lstr[-1].split(".")[0] if criteo_kaggle else lstr[-1] + npzfile = d_path + ((d_file + "_day") if criteo_kaggle else d_file) + trafile = d_path + ((d_file + "_fea") if criteo_kaggle else "fea") + + # count number of datapoints in training set + total_file = d_path + d_file + "_day_count.npz" + if path.exists(total_file): + with np.load(total_file) as data: + total_per_file = list(data["total_per_file"]) + total_count = np.sum(total_per_file) + logging.debug("Skipping counts per file (already exist)") + else: + total_count = 0 + total_per_file = [] + if criteo_kaggle: + # WARNING: The raw data consists of a single train.txt file + # Each line in the file is a sample, consisting of 13 continuous and + # 26 categorical features (an extra space indicates that feature is + # missing and will be interpreted as 0). + if path.exists(datafile): + logging.debug("Reading data from path=%s" % (datafile)) + with open(str(datafile)) as f: + for _ in f: + total_count += 1 + total_per_file.append(total_count) + # reset total per file due to split + num_data_per_split, extras = divmod(total_count, days) + total_per_file = [num_data_per_split] * days + for j in range(extras): + total_per_file[j] += 1 + # split into days (simplifies code later on) + file_id = 0 + boundary = total_per_file[file_id] + nf = open(npzfile + "_" + str(file_id), "w") + with open(str(datafile)) as f: + for j, line in enumerate(f): + if j == boundary: + nf.close() + file_id += 1 + nf = open(npzfile + "_" + str(file_id), "w") + boundary += total_per_file[file_id] + nf.write(line) + nf.close() + else: + sys.exit( + "ERROR: Criteo Kaggle Display Ad Challenge Dataset path is invalid; please download from " + "https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset" + ) + else: + # WARNING: The raw data consist of day_0.gz,... ,day_23.gz text files + # Each line in the file is a sample, consisting of 13 continuous and + # 26 categorical features (an extra space indicates that feature is + # missing and will be interpreted as 0). + for i in range(days): + datafile_i = datafile + "_" + str(i) # + ".gz" + if path.exists(str(datafile_i)): + logging.debug("Reading data from path=%s" % (str(datafile_i))) + # file day_ + total_per_file_count = 0 + with open(str(datafile_i)) as f: + for _ in f: + total_per_file_count += 1 + total_per_file.append(total_per_file_count) + total_count += total_per_file_count + else: + sys.exit( + "ERROR: Criteo Terabyte Dataset path is invalid; please download " + "from https://labs.criteo.com/2013/12/download-terabyte-click-logs" + ) + + # process a file worth of data and reinitialize data + # note that a file main contain a single or multiple splits + def process_one_file( + datfile: Any, + npzfile: Any, + split: Any, + num_data_in_split: Any, + dataset_multiprocessing: Any, + convertDictsDay: Optional[Any] = None, + resultDay: Optional[Any] = None + ) -> Union[None, int]: + if dataset_multiprocessing: + convertDicts_day = [{} for _ in range(26)] + + with open(str(datfile)) as f: + y = np.zeros(num_data_in_split, dtype="i4") # 4 byte int + X_int = np.zeros((num_data_in_split, 13), dtype="i4") # 4 byte int + X_cat = np.zeros((num_data_in_split, 26), dtype="i4") # 4 byte int + if sub_sample_rate == 0.0: + rand_u = 1.0 + else: + rand_u = np.random.uniform(low=0.0, high=1.0, size=num_data_in_split) + + i = 0 + percent = 0 + for k, line in enumerate(f): + # process a line (data point) + line = line.split('\t') + # set missing values to zero + for j in range(len(line)): + if (line[j] == '') or (line[j] == '\n'): + line[j] = '0' + # sub-sample data by dropping zero targets, if needed + target = np.int32(line[0]) + if target == 0 and \ + (rand_u if sub_sample_rate == 0.0 else rand_u[k]) < sub_sample_rate: + continue + + y[i] = target + X_int[i] = np.array(line[1:14], dtype=np.int32) + if max_ind_range > 0: + X_cat[i] = np.array( + list(map(lambda x: int(x, 16) % max_ind_range, line[14:])), + dtype=np.int32 + ) + else: + X_cat[i] = np.array( + list(map(lambda x: int(x, 16), line[14:])), + dtype=np.int32 + ) + + # count uniques + if dataset_multiprocessing: + for j in range(26): + convertDicts_day[j][X_cat[i][j]] = 1 + # debug prints + if float(i) / num_data_in_split * 100 > percent + 1: + percent = int(float(i) / num_data_in_split * 100) + logging.debug( + "Load %d/%d (%d%%) Split: %d Label True: %d Stored: %d" + % ( + i, + num_data_in_split, + percent, + split, + target, + y[i], + ), + end="\n", + ) + else: + for j in range(26): + convertDicts[j][X_cat[i][j]] = 1 + # debug prints + logging.debug( + "Load %d/%d Split: %d Label True: %d Stored: %d" + % ( + i, + num_data_in_split, + split, + target, + y[i], + ), + end="\r", + ) + i += 1 + + # store num_data_in_split samples or extras at the end of file + # count uniques + # X_cat_t = np.transpose(X_cat) + # for j in range(26): + # for x in X_cat_t[j,:]: + # convertDicts[j][x] = 1 + # store parsed + filename_s = npzfile + "_{0}.npz".format(split) + if path.exists(filename_s): + logging.debug("\nSkip existing " + filename_s) + else: + np.savez_compressed( + filename_s, + X_int=X_int[0:i, :], + # X_cat=X_cat[0:i, :], + X_cat_t=np.transpose(X_cat[0:i, :]), # transpose of the data + y=y[0:i], + ) + logging.debug("\nSaved " + npzfile + "_{0}.npz!".format(split)) + + if dataset_multiprocessing: + resultDay[split] = i + convertDictsDay[split] = convertDicts_day + return + else: + return i + + # create all splits (reuse existing files if possible) + recreate_flag = False + convertDicts = [{} for _ in range(26)] + # WARNING: to get reproducable sub-sampling results you must reset the seed below + # np.random.seed(123) + # in this case there is a single split in each day + for i in range(days): + npzfile_i = npzfile + "_{0}.npz".format(i) + npzfile_p = npzfile + "_{0}_processed.npz".format(i) + if path.exists(npzfile_i): + logging.debug("Skip existing " + npzfile_i) + elif path.exists(npzfile_p): + logging.debug("Skip existing " + npzfile_p) + else: + logging.debug("setting recreate for day", i) + recreate_flag = True + + if recreate_flag: + if dataset_multiprocessing: + resultDay = Manager().dict() + convertDictsDay = Manager().dict() + processes = [Process(target=process_one_file, + name="process_one_file:%i" % i, + args=(npzfile + "_{0}".format(i), + npzfile, + i, + total_per_file[i], + dataset_multiprocessing, + convertDictsDay, + resultDay, + ) + ) for i in range(0, days)] + for process in processes: + process.start() + for process in processes: + process.join() + for day in range(days): + total_per_file[day] = resultDay[day] + logging.debug("Constructing convertDicts Split: {}".format(day)) + convertDicts_tmp = convertDictsDay[day] + for i in range(26): + for j in convertDicts_tmp[i]: + convertDicts[i][j] = 1 + else: + for i in range(days): + total_per_file[i] = process_one_file( + npzfile + "_{0}".format(i), + npzfile, + i, + total_per_file[i], + dataset_multiprocessing, + ) + + # report and save total into a file + total_count = np.sum(total_per_file) + if not path.exists(total_file): + np.savez_compressed(total_file, total_per_file=total_per_file) + logging.debug("Total number of samples:", total_count) + logging.debug("Divided into days/splits:\n", total_per_file) + + # dictionary files + counts = np.zeros(26, dtype=np.int32) + if recreate_flag: + # create dictionaries + for j in range(26): + for i, x in enumerate(convertDicts[j]): + convertDicts[j][x] = i + dict_file_j = d_path + d_file + "_fea_dict_{0}.npz".format(j) + if not path.exists(dict_file_j): + np.savez_compressed( + dict_file_j, + unique=np.array(list(convertDicts[j]), dtype=np.int32) + ) + counts[j] = len(convertDicts[j]) + # store (uniques and) counts + count_file = d_path + d_file + "_fea_count.npz" + if not path.exists(count_file): + np.savez_compressed(count_file, counts=counts) + else: + # create dictionaries (from existing files) + for j in range(26): + with np.load(d_path + d_file + "_fea_dict_{0}.npz".format(j)) as data: + unique = data["unique"] + for i, x in enumerate(unique): + convertDicts[j][x] = i + # load (uniques and) counts + with np.load(d_path + d_file + "_fea_count.npz") as data: + counts = data["counts"] + + # process all splits + if dataset_multiprocessing: + processes = [ + Process( + target=processCriteoAdData, + name="processCriteoAdData:%i" % i, + args=(d_path, d_file, npzfile, i, convertDicts, counts,) + ) for i in range(0, days) + ] + for process in processes: + process.start() + for process in processes: + process.join() + + else: + for i in range(days): + processCriteoAdData(d_path, d_file, npzfile, i, convertDicts, counts) + + output_path = Path(d_path + o_filename + ".npz") + + if not output_path.exists(): + o_file = concatCriteoAdData( + d_path, + d_file, + npzfile, + trafile, + days, + data_split, + randomize, + total_per_file, + total_count, + memory_map, + o_filename + ) + else: + o_file = str(output_path) + + return o_file diff --git a/examples/dlrm/facebook_dataloading/dataloading_fb.py b/examples/dlrm/facebook_dataloading/dataloading_fb.py new file mode 100644 index 0000000..e4fea7a --- /dev/null +++ b/examples/dlrm/facebook_dataloading/dataloading_fb.py @@ -0,0 +1,393 @@ +# fmt: off +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +# +# Description: generate inputs and targets for the dlrm benchmark +# The inpts and outputs are generated according to the following three option(s) +# 1) random distribution +# 2) synthetic distribution, based on unique accesses and distances between them +# i) R. Hassan, A. Harris, N. Topham and A. Efthymiou "Synthetic Trace-Driven +# Simulation of Cache Memory", IEEE AINAM'07 +# 3) public data set +# i) Criteo Kaggle Display Advertising Challenge Dataset +# https://labs.criteo.com/2014/02/kaggle-display-advertising-challenge-dataset +# ii) Criteo Terabyte Dataset +# https://labs.criteo.com/2013/12/download-terabyte-click-logs + + +from __future__ import absolute_import, division, print_function, unicode_literals +import gc +# others +from os import path +import sys +import logging +from typing import Any, List, Tuple + +from . import data_utils + +# numpy +import numpy as np + +# pytorch +import torch +from torch.utils.data import Dataset + + +# Kaggle Display Advertising Challenge Dataset +# dataset (str): name of dataset (Kaggle or Terabyte) +# randomize (str): determines randomization scheme +# "none": no randomization +# "day": randomizes each day"s data (only works if split = True) +# "total": randomizes total dataset +# split (bool) : to split into train, test, validation data-sets +class CriteoDataset(Dataset): + + def __init__( + self, + dataset: str, + max_ind_range: int, + sub_sample_rate: int, + randomize: bool, + split: str = "train", + raw_path: str = "", + pro_data: str = "", + memory_map: str = False, + dataset_multiprocessing: str = False, + store_all_indices: str = False, + ) -> None: + # dataset + # tar_fea = 1 # single target + den_fea = 13 # 13 dense features + # spa_fea = 26 # 26 sparse features + # tad_fea = tar_fea + den_fea + # tot_fea = tad_fea + spa_fea + if dataset == "kaggle": + days = 7 + out_file = "kaggleAdDisplayChallenge_processed" + elif dataset == "terabyte": + days = 24 + out_file = "terabyte_processed" + else: + raise (ValueError("Data set option is not supported")) + self.max_ind_range = max_ind_range + self.memory_map = memory_map + + # split the datafile into path and filename + lstr = raw_path.split("/") + self.d_path = "/".join(lstr[0:-1]) + "/" + self.d_file = lstr[-1].split(".")[0] if dataset == "kaggle" else lstr[-1] + self.npzfile = self.d_path + ( + (self.d_file + "_day") if dataset == "kaggle" else self.d_file + ) + self.trafile = self.d_path + ( + (self.d_file + "_fea") if dataset == "kaggle" else "fea" + ) + + # check if pre-processed data is available + data_ready = True + if memory_map: + for i in range(days): + reo_data = self.npzfile + "_{0}_reordered.npz".format(i) + if not path.exists(str(reo_data)): + data_ready = False + else: + if not path.exists(str(pro_data)): + data_ready = False + + # pre-process data if needed + # WARNNING: when memory mapping is used we get a collection of files + if data_ready: + logging.debug("Reading pre-processed data=%s" % (str(pro_data))) + file = str(pro_data) + else: + logging.debug("Reading raw data=%s" % (str(raw_path))) + file = data_utils.getCriteoAdData( + raw_path, + out_file, + max_ind_range, + sub_sample_rate, + days, + split, + randomize, + dataset == "kaggle", + memory_map, + dataset_multiprocessing, + ) + + # get a number of samples per day + total_file = self.d_path + self.d_file + "_day_count.npz" + with np.load(total_file) as data: + total_per_file = data["total_per_file"] + # compute offsets per file + self.offset_per_file = np.array([0] + [x for x in total_per_file]) + for i in range(days): + self.offset_per_file[i + 1] += self.offset_per_file[i] + # logging.debug(self.offset_per_file) + + # setup data + if memory_map: + # setup the training/testing split + self.split = split + if split == 'none' or split == 'train': + self.day = 0 + self.max_day_range = days if split == 'none' else days - 1 + elif split == 'test' or split == 'val': + self.day = days - 1 + num_samples = self.offset_per_file[days] - self.offset_per_file[days - 1] + self.test_size = int(np.ceil(num_samples / 2.)) + self.val_size = num_samples - self.test_size + else: + sys.exit("ERROR: dataset split is neither none, nor train or test.") + + # load unique counts + with np.load(self.d_path + self.d_file + "_fea_count.npz") as data: + self.counts = data["counts"] + self.m_den = den_fea # X_int.shape[1] + self.n_emb = len(self.counts) + logging.debug("Sparse features= %d, Dense features= %d" % (self.n_emb, self.m_den)) + + # Load the test data + # Only a single day is used for testing + if self.split == 'test' or self.split == 'val': + # only a single day is used for testing + fi = self.npzfile + "_{0}_reordered.npz".format( + self.day + ) + with np.load(fi) as data: + self.X_int = data["X_int"] # continuous feature + self.X_cat = data["X_cat"] # categorical feature + self.y = data["y"] # target + + else: + # load and preprocess data + with np.load(file) as data: + X_int = data["X_int"] # continuous feature + X_cat = data["X_cat"] # categorical feature + y = data["y"] # target + self.counts = data["counts"] + self.m_den = X_int.shape[1] # den_fea + self.n_emb = len(self.counts) + logging.debug("Sparse fea = %d, Dense fea = %d" % (self.n_emb, self.m_den)) + + # create reordering + indices = np.arange(len(y)) + + self.train_indices = None + self.test_indices = None + self.val_indices = None + if store_all_indices: + indices = np.array_split(indices, self.offset_per_file[1:-1]) + + # randomize train data (per day) + if randomize == "day": # or randomize == "total": + for i in range(len(indices) - 1): + indices[i] = np.random.permutation(indices[i]) + logging.debug("Randomized indices per day ...") + + self.train_indices = np.concatenate(indices[:-1]) + self.test_indices = indices[-1] + self.test_indices, self.val_indices = np.array_split(self.test_indices, 2) + + logging.debug("Defined %s indices..." % (split)) + self.X_int = X_int + self.X_cat = X_cat + self.y = y + # randomize train data (across days) + if randomize == "total": + train_indices = np.random.permutation(self.train_indices) + logging.debug("Randomized indices across days ...") + + elif split == "none": + # randomize all data + if randomize == "total": + indices = np.random.permutation(indices) + logging.debug("Randomized indices...") + + X_int[indices] = X_int + X_cat[indices] = X_cat + y[indices] = y + + else: + indices = np.array_split(indices, self.offset_per_file[1:-1]) + + # randomize train data (per day) + if randomize == "day": # or randomize == "total": + for i in range(len(indices) - 1): + indices[i] = np.random.permutation(indices[i]) + logging.debug("Randomized indices per day ...") + + train_indices = np.concatenate(indices[:-1]) + test_indices = indices[-1] + test_indices, val_indices = np.array_split(test_indices, 2) + + logging.debug("Defined %s indices..." % (split)) + + # randomize train data (across days) + if randomize == "total": + train_indices = np.random.permutation(train_indices) + logging.debug("Randomized indices across days ...") + + # create training, validation, and test sets + if split == 'train': + self.X_int = [X_int[i] for i in train_indices] + self.X_cat = [X_cat[i] for i in train_indices] + self.y = [y[i] for i in train_indices] + elif split == 'val': + self.X_int = [X_int[i] for i in val_indices] + self.X_cat = [X_cat[i] for i in val_indices] + self.y = [y[i] for i in val_indices] + elif split == 'test': + self.X_int = [X_int[i] for i in test_indices] + self.X_cat = [X_cat[i] for i in test_indices] + self.y = [y[i] for i in test_indices] + + logging.debug("Split data according to indices...") + + def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + + if isinstance(index, slice): + return [ + self[idx] for idx in range( + index.start or 0, index.stop or len(self), index.step or 1 + ) + ] + + if self.memory_map: + if self.split == 'none' or self.split == 'train': + # check if need to swicth to next day and load data + if index == self.offset_per_file[self.day]: + # logging.debug("day_boundary switch", index) + self.day_boundary = self.offset_per_file[self.day] + fi = self.npzfile + "_{0}_reordered.npz".format( + self.day + ) + # logging.debug('Loading file: ', fi) + with np.load(fi) as data: + self.X_int = data["X_int"] # continuous feature + self.X_cat = data["X_cat"] # categorical feature + self.y = data["y"] # target + self.day = (self.day + 1) % self.max_day_range + + i = index - self.day_boundary + elif self.split == 'test' or self.split == 'val': + # only a single day is used for testing + i = index + (0 if self.split == 'test' else self.test_size) + else: + sys.exit("ERROR: dataset split is neither none, nor train or test.") + else: + i = index + + if self.max_ind_range > 0: + return self.X_int[i], self.X_cat[i] % self.max_ind_range, self.y[i] + else: + return self.X_int[i], self.X_cat[i], self.y[i] + + def _default_preprocess( + self, + X_int: torch.Tensor, + X_cat: torch.Tensor, + y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + X_int = torch.log(torch.tensor(X_int, dtype=torch.float) + 1) + if self.max_ind_range > 0: + X_cat = torch.tensor(X_cat % self.max_ind_range, dtype=torch.long) + else: + X_cat = torch.tensor(X_cat, dtype=torch.long) + y = torch.tensor(y.astype(np.float32)) + + return X_int, X_cat, y + + def __len__(self) -> int: + if self.memory_map: + if self.split == 'none': + return self.offset_per_file[-1] + elif self.split == 'train': + return self.offset_per_file[-2] + elif self.split == 'test': + return self.test_size + elif self.split == 'val': + return self.val_size + else: + sys.exit("ERROR: dataset split is neither none, nor train nor test.") + else: + return len(self.y) + + +def collate_wrapper_criteo_offset( + list_of_tuples: List[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """collates the input features into processabel tensors + + Args: + list_of_tuples (List[torch.Tensor]): input tensors + + Returns: + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: output + """ + # where each tuple is (X_int, X_cat, y) + # transposed_data = np.array(list(zip(*list_of_tuples)), dtype=np.float32) + # transposed_data = list(zip(*list_of_tuples)) + + transposed_data = list(np.array(i) for i in zip(*list_of_tuples)) + X_int = torch.log(torch.tensor(transposed_data[0], dtype=torch.float) + 1) + X_cat = torch.tensor(transposed_data[1], dtype=torch.long) + T = torch.tensor(transposed_data[2], dtype=torch.float32).view(-1, 1) + + batchSize = X_cat.shape[0] + featureCnt = X_cat.shape[1] + + lS_i = [X_cat[:, i] for i in range(featureCnt)] + lS_o = [torch.tensor(range(batchSize)) for _ in range(featureCnt)] + gc.collect() + return X_int, torch.stack(lS_i), torch.stack(lS_o), T + + +# Conversion from offset to length +def offset_to_length_converter(lS_o: torch.Tensor, lS_i: torch.Tensor) -> torch.Tensor: + """converts the offsets of categorical features into tensors containing length + + Args: + lS_o (torch.Tensor): offset tensors + lS_i (torch.Tensor): indices + + Returns: + torch.Tensor: lengths + """ + def diff(tensor: torch.Tensor) -> torch.Tensor: + return tensor[1:] - tensor[:-1] + + return torch.stack( + [ + diff(torch.cat((S_o, torch.tensor(lS_i[ind].shape))).int()) + for ind, S_o in enumerate(lS_o) + ] + ) + + +def collate_wrapper_criteo_length( + list_of_tuples: List[Any]) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """collates the input features into processabel tensors + + Args: + list_of_tuples (List[torch.Tensor]): input tensors + + Returns: + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: output + """ + transposed_data = list(np.array(i) for i in zip(*list_of_tuples)) + # transposed_data = np.array(list(zip(*list_of_tuples)), dtype=np.float32) + X_int = torch.log(torch.tensor(transposed_data[0], dtype=torch.float) + 1) + X_cat = torch.tensor(transposed_data[1], dtype=torch.long) + T = torch.tensor(transposed_data[2], dtype=torch.float32).view(-1, 1) + + batchSize = X_cat.shape[0] + featureCnt = X_cat.shape[1] + + lS_i = torch.stack([X_cat[:, i] for i in range(featureCnt)]) + lS_o = torch.stack( + [torch.tensor(range(batchSize)) for _ in range(featureCnt)] + ) + + lS_l = offset_to_length_converter(lS_o, lS_i) + + return X_int, lS_l, lS_i, T diff --git a/examples/dlrm/requirements.txt b/examples/dlrm/requirements.txt new file mode 100644 index 0000000..7308aef --- /dev/null +++ b/examples/dlrm/requirements.txt @@ -0,0 +1,6 @@ +bitorch +fvbitcore +pytorch_lightning +sklearn +torchmetrics +wandb diff --git a/examples/dlrm/train_dlrm.py b/examples/dlrm/train_dlrm.py new file mode 100644 index 0000000..7353c53 --- /dev/null +++ b/examples/dlrm/train_dlrm.py @@ -0,0 +1,219 @@ +import os + +if os.environ.get("REMOTE_PYCHARM_DEBUG_SESSION", False): + import pydevd_pycharm + + pydevd_pycharm.settrace( + "localhost", + port=int(os.environ.get("REMOTE_PYCHARM_DEBUG_PORT", "12345")), + stdoutToServer=True, + stderrToServer=True, + ) + +import argparse +import logging +from pathlib import Path +from typing import List, Any, Tuple + +import fvbitcore.nn as fv_nn +import wandb +from pytorch_lightning import Trainer +from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor +from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger, LightningLoggerBase, WandbLogger +from torch.utils.data import DataLoader + +from bitorch import apply_args_to_configuration +from bitorch.models import DLRM +from bitorch.quantizations import Quantization + +from utils.arg_parser import create_argparser +from utils.lightning_model import ModelWrapper +from utils.utils import configure_logging +from utils.log import CommandLineLogger + +from datasets.criteo import Criteo, SplitCriteoDataset +from facebook_dataloading.dataloading_fb import collate_wrapper_criteo_offset + +logger = logging.getLogger() + + +def make_dlrm_dataloaders( + dataset_dir: Path, download: bool, ignore_size: float, batch_size: int, batch_size_test: int, num_workers: int +) -> Tuple[DataLoader, DataLoader, List[int], int]: + """Creates test and train dataloaders for dlrm + + Args: + dataset_dir (Path): path to dataset (to be stored or existent) + download (bool): weather dataset should be downloaded + ignore_size (dloat): portion of dataset to ignore while training + batch_size (int): batch size + batch_size_test (int): batch size to be used in test loader (might be larger) + num_workers (int): number of workers to be used in dataloader + + Returns: + Tuple[Dataloader, Dataloader, int, int]: the dataloaders, the size of the dense features and the size of embedding layers + """ + logging.info("loading Criteo dataset...") + dataset = Criteo(True, root_directory=dataset_dir, download=download).dataset + + train_dataset = SplitCriteoDataset(dataset, "train", ignore_size=ignore_size) + test_dataset = SplitCriteoDataset(dataset, "test", ignore_size=ignore_size) + logging.info(f"loaded {len(train_dataset)} train and {len(test_dataset)} test samples!") + + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + collate_fn=collate_wrapper_criteo_offset, + pin_memory=False, + drop_last=False, + ) + + test_loader = DataLoader( + test_dataset, + batch_size=batch_size_test, + shuffle=False, + num_workers=num_workers, + collate_fn=collate_wrapper_criteo_offset, + pin_memory=False, + drop_last=False, + ) + + return train_loader, test_loader, train_dataset.dataset.m_den, train_dataset.dataset.counts + + +def main(args: argparse.Namespace, model_args: argparse.Namespace) -> None: + """trains a model on the configured image dataset. + + Args: + args (argparse.Namespace): cli arguments + model_args (argparse.Namespace): model specific cli arguments + """ + configure_logging(logger, args.log_file, args.log_level, args.log_stdout) + + apply_args_to_configuration(args) + + output_dir = Path(args.result_directory) + output_dir.mkdir(exist_ok=True) + + loggers: List[LightningLoggerBase] = [] + if args.tensorboard_log: + loggers.append(TensorBoardLogger(str(output_dir), name="tensorboard")) + if args.csv_log: + loggers.append(CSVLogger(str(output_dir), name="csv")) + if args.wandb_log: + loggers.append( + WandbLogger( + project=args.wandb_project, + log_model=True, + name=args.wandb_experiment, + save_dir=str(output_dir), + ) # type: ignore + ) + callbacks: List[Any] = [] + if args.checkpoint_dir is not None: + callbacks.append( + ModelCheckpoint( + args.checkpoint_dir, + save_last=True, + save_top_k=args.checkpoint_keep_count, + monitor="metrics/roc-auc", + ) + ) + + # providing our own progress bar disables the default progress bar (not needed to disable later on) + cmd_logger = CommandLineLogger(args.log_interval) + callbacks.append(cmd_logger) + configure_logging(cmd_logger.logger, args.log_file, args.log_level, args.log_stdout) + + if len(loggers) > 0: + lr_monitor = LearningRateMonitor(logging_interval="step") + callbacks.append(lr_monitor) + + if args.dataset == "criteo": + train_loader, test_loader, dense_feature_size, embedding_layer_sizes = make_dlrm_dataloaders( + args.dataset_dir, + args.download, + args.ignore_dataset_size, + args.batch_size, + args.batch_size_test, + args.num_workers, + ) + else: + logging.error(f"dataset {args.dataset} is not yet supported for dlrm") + return + + model_kwargs = vars(model_args) + logger.debug(f"got model args as dict: {model_kwargs}") + + data_point = iter(train_loader).next() + print("DATA SHAPE:", (type(data_point[0]), (type(data_point[1]), type(data_point[2])))) + print("DATA SHAPE:", (data_point[0].shape, (data_point[1].shape, data_point[2].shape))) + data_point = (data_point[0], (data_point[1], data_point[2])) + + # for model registry compliance + model_kwargs["embedding_layer_sizes"] = embedding_layer_sizes + model_kwargs["input_shape"] = [] + model_kwargs["dense_feature_size"] = dense_feature_size + if args.pretrained: + model = DLRM.from_pretrained(args.checkpoint_load, **model_kwargs) + else: + model = DLRM(**model_kwargs) # type: ignore + model.initialize() + + if args.checkpoint_load is not None and args.resume_training: + logger.info(f"resuming training from pretrained model at checkpoint {args.checkpoint_load}") + model_wrapped = ModelWrapper.load_from_checkpoint(args.checkpoint_load) + else: + model_wrapped = ModelWrapper(model, 1, args) + + # for model registry compliance + model_kwargs["model_name"] = "dlrm" + if args.wandb_log: + wandb.config.update({"model_config": model_kwargs}) + + trainer = Trainer( + strategy=args.strategy, + accelerator="cpu" if args.cpu else args.accelerator, + gpus=0 if args.cpu else args.gpus, + max_epochs=args.max_epochs, + max_steps=args.max_steps, + logger=loggers if len(loggers) > 0 else None, # type: ignore + callbacks=callbacks, # type: ignore + log_every_n_steps=args.log_interval, + ) + logger.info("model: DLRM") + logger.info(f"optimizer: {args.optimizer}") + logger.info(f"lr: {args.lr}") + logger.info(f"max_epochs: {args.max_epochs}") + computational_intensity = fv_nn.FlopCountAnalysis(model, inputs=data_point, quantization_base_class=Quantization) + + stats, table = fv_nn.flop_count_table(computational_intensity, automatic_qmodules=True) + logger.info("\n" + table) + total_size = stats["#compressed size in bits"][""] + logger.info("Total size in MB: " + str(total_size / 1e6 / 8.0)) + total_flops = stats["#speed up flops (app.)"][""] + logger.info("Approximated mflops: " + str(total_flops / 1e6)) + if args.wandb_log: + wandb.config.update( + { + "mflops": total_flops / 1e6, + "size in MB": total_size / 1e6 / 8.0, + } + ) + + trainer.fit( + model_wrapped, + train_dataloaders=train_loader, + val_dataloaders=test_loader, + ckpt_path=args.checkpoint_load if not args.pretrained else None, + ) + + +if __name__ == "__main__": + parser, model_parser = create_argparser() + args_, unparsed_model_args = parser.parse_known_args() + model_args_ = model_parser.parse_args(unparsed_model_args) + + main(args_, model_args_) diff --git a/examples/dlrm/utils/__init__.py b/examples/dlrm/utils/__init__.py new file mode 100644 index 0000000..f85768d --- /dev/null +++ b/examples/dlrm/utils/__init__.py @@ -0,0 +1 @@ +"""Utilities for image classification example.""" diff --git a/examples/dlrm/utils/arg_parser.py b/examples/dlrm/utils/arg_parser.py new file mode 100644 index 0000000..0b2867c --- /dev/null +++ b/examples/dlrm/utils/arg_parser.py @@ -0,0 +1,302 @@ +from argparse import ArgumentParser +import sys +from typing import Tuple + +from bitorch.models import model_from_name, model_names +from bitorch import add_config_args +from bitorch.models.dlrm import DLRM +from pytorch_lightning import Trainer + + +def add_logging_args(parser: ArgumentParser) -> None: + """adds cli parameters for logging configuration + + Args: + parser (ArgumentParser): the main argument parser + """ + log = parser.add_argument_group("Logging", "parameters for logging") + log.add_argument( + "--log-level", + type=str, + default="info", + choices=["debug", "info", "warning", "error", "critical"], + help="log level for logging message output", + ) + log.add_argument( + "--log-interval", + type=int, + default=100, + metavar="N", + help="how many batches to wait before logging training status", + ) + log.add_argument( + "--log-file", + type=str, + default=None, + help="output file path for logging. default to stdout", + ) + log.add_argument( + "--log-stdout", + action="store_true", + help="toggles force logging to stdout. if a log file is specified, logging will be" + "printed to both the log file and stdout", + ) + log.add_argument( + "--result-directory", + type=str, + default="./logs", + help="path to logs directory, e.g. tensorboard logs, csv files", + ) + log.add_argument( + "--disable-tensorboard-log", + action="store_false", + dest="tensorboard_log", + help="disables tensorboard logging", + ) + log.add_argument( + "--disable-csv-log", + action="store_false", + dest="csv_log", + help="disables csv logging", + ) + log.add_argument( + "--wandb", + action="store_true", + dest="wandb_log", + help="enables wandb logging (WANDB_API_KEY environment variable must be set)", + ) + log.add_argument( + "--wandb-project", + type=str, + default="bitorch", + help="name of wand project to be used by wandb logger", + ) + log.add_argument( + "--wandb-experiment", + type=str, + default=None, + help="name of wand experiment to be used by wandb logger", + ) + + +def add_checkpoint_args(parser: ArgumentParser) -> None: + """adds cli parameters for checkpoint logging + + Args: + parser (ArgumentParser): the main argument parser + """ + checkpoint = parser.add_argument_group("checkpoints", "parameters for checkpoint storing / loading") + checkpoint.add_argument( + "--checkpoint-dir", + type=str, + default=None, + help="set a custom path to store checkpoints in.", + ) + checkpoint.add_argument( + "--checkpoint-keep-count", + type=int, + default=1, + help="number of checkpoints to keep.", + ) + checkpoint.add_argument( + "--checkpoint-load", + type=str, + default=None, + help="path to checkpoint file to load state from. if omitted and --pretrained is activated, the model weights will be downloaded from the model hub. If --pretrained is not set, a new model will be trained.", + ) + checkpoint.add_argument( + "--resume_training", + action="store_true", + help="resume training from given checkpoint", + ) + checkpoint.add_argument( + "--pretrained", + action="store_true", + help="load the state dict either from model hub or from checkpoint_load", + ) + + +def add_optimizer_args(parser: ArgumentParser) -> None: + """adds cli parameters for optimizer configuration + + Args: + parser (ArgumentParser): the main argument parser + """ + optimizer = parser.add_argument_group("Optimizer", "parameters for optimizer") + optimizer.add_argument( + "--lr-scheduler", + type=str, + choices=["cosine", "step", "exponential"], + help="name of the lr scheduler to use. default to none", + ) + optimizer.add_argument( + "--lr", + type=float, + default=0.01, + help="initial learning rate (default: 0.01)", + ) + optimizer.add_argument( + "--lr-factor", + default=0.1, + type=float, + help="learning rate decay ratio. this is used only by the step and exponential lr scheduler", + ) + optimizer.add_argument( + "--lr-steps", + nargs="*", + default=[30, 60, 90], + help="list of learning rate decay epochs as list. this is used only by the step scheduler", + ) + optimizer.add_argument( + "--momentum", + type=float, + default=0.9, + help="momentum value for optimizer, default is 0.9. only used for sgd optimizer", + ) + optimizer.add_argument( + "--optimizer", + type=str, + default="adam", + choices=["adam", "sgd", "radam"], + help="the optimizer to use. default is adam.", + ) + + +def add_dataset_args(parser: ArgumentParser) -> None: + """adds cli parameters for dataset configuration + + Args: + parser (ArgumentParser): the main argument parser + """ + data = parser.add_argument_group("dataset", "parameters for the dataset used for training") + data.add_argument( + "--dataset", + type=str, + default="criteo", + choices=["criteo"], + help="name of the dataset to be used for training", + ) + data.add_argument( + "--dataset-dir", + type=str, + default=None, + help="path to where the train dataset is saved / shall be downloaded to", + ) + data.add_argument( + "--download", + action="store_true", + help="toggles wether the dataset shall be downloaded if not present. " + "only has effect with the cifar10 and mnist dataset so far.", + ) + data.add_argument( + "--batch-size", + type=int, + default=128, + help="batch size for training (default: 128)", + ) + data.add_argument( + "--batch-size-test", + type=int, + default=128, + help="batch size for testing (might be higher than training) (default: 128)", + ) + data.add_argument( + "--ignore-dataset-size", + type=float, + default=0.9, + help="portion of dataset that should be ignored for training (usefull for fast development) (default: 128)", + ) + data.add_argument( + "--num-workers", + type=int, + default=4, + help="number of workers to be used for dataloading (default: 4)", + ) + data.add_argument( + "--augmentation", + type=str, + choices=["none", "low", "medium", "high"], + default="none", + help="level of augmentation to be used in data preparation (default 'none')", + ) + data.add_argument( + "--fake-data", + action="store_true", + help="train with fake data", + ) + + +def create_model_argparser(model_class: object) -> ArgumentParser: + """adds model specific cli arguments from model_class object + + Args: + model_class (object): the class-object of selected model + + Returns: + ArgumentParser: cli argument parser + """ + model_parser = ArgumentParser(add_help=False) + model_class.add_argparse_arguments(model_parser) + return model_parser + + +def help_in_args() -> bool: + """determines if script was called with a --help or -h flag + + Returns: + bool: True if help flag was set, False otherwise + """ + passed_args = sys.argv[1:] + if "--help" in passed_args or "-h" in passed_args: + return True + return False + + +def add_all_model_args(parser: ArgumentParser) -> None: + """iterates through all existent models and adds their specific cli args to parser + + Args: + parser (ArgumentParser): the main cli argument parser + """ + for model_name in model_names(): + model_group = parser.add_argument_group(model_name, f"parameters for {model_name} model") + model_from_name(model_name).add_argparse_arguments(model_group) # type: ignore + + +def add_regular_args(parser: ArgumentParser) -> None: + """adds all regular arguments, including dynamically created config args to parser. + + Args: + parser (ArgumentParser): parser to add the regular arguments to + """ + Trainer.add_argparse_args(parser) + add_logging_args(parser) + add_dataset_args(parser) + add_optimizer_args(parser) + add_checkpoint_args(parser) + + add_config_args(parser) + parser.add_argument( + "--cpu", + action="store_true", + help="explicitly use the cpu. overwrites gpu settings", + ) + + +def create_argparser(arguments=None) -> Tuple[ArgumentParser, ArgumentParser]: + """creates a main argument parser for general options and a model parser for model specific options + + Returns: + Tuple[ArgumentParser, ArgumentParser]: the main and model argument parser + """ + parser = ArgumentParser(description="Bitorch Image Classification") + + add_regular_args(parser) + + if help_in_args(): + model_group = parser.add_argument_group("DLRM", "parameters for DLRM model") + DLRM.add_argparse_arguments(model_group) + args, _ = parser.parse_known_args() + + model_parser = create_model_argparser(DLRM) + return parser, model_parser diff --git a/examples/dlrm/utils/lightning_model.py b/examples/dlrm/utils/lightning_model.py new file mode 100644 index 0000000..8c858f7 --- /dev/null +++ b/examples/dlrm/utils/lightning_model.py @@ -0,0 +1,124 @@ +import numpy as np +from sklearn import metrics +import logging +from argparse import Namespace +from typing import Union, Any, List + +import torch +import torch.nn.functional as F +from pytorch_lightning import LightningModule +from torch.nn import Module, BCELoss +from torchmetrics import Accuracy, F1Score, Precision, Recall + +from .unused_args import clean_hyperparameters +from .utils import create_optimizer, create_scheduler + + +class ModelWrapper(LightningModule): + """Wrapper class for a pytorch model to fully utilize pytorch lightning functionality""" + + def __init__( + self, + model: Module, + num_classes: int, + args: Namespace, + add_f1_prec_recall: bool = False, + ) -> None: + super().__init__() + self.save_hyperparameters(clean_hyperparameters(args)) + self.loss_function = BCELoss() + self.model = model + self.train_accuracy_top1 = Accuracy(num_classes=num_classes) + self.train_accuracy_top5 = Accuracy(top_k=5, num_classes=num_classes) + self.accuracy_top1 = Accuracy(num_classes=num_classes) + self.accuracy_top5 = Accuracy(top_k=5, num_classes=num_classes) + self.add_f1_prec_recall = add_f1_prec_recall + if add_f1_prec_recall: + self.f1 = F1Score(num_classes=num_classes) + self.prec = Precision(num_classes=num_classes) + self.recall = Recall(num_classes=num_classes) + + def configure_optimizers(self) -> Union[dict, torch.optim.Optimizer]: # type: ignore + logging.info(f"Using {self.hparams.optimizer} optimizer and {self.hparams.lr_scheduler} lr scheduler...") + optimizer = create_optimizer(self.hparams.optimizer, self.model, self.hparams.lr, self.hparams.momentum) + if self.hparams.lr_scheduler is not None: + scheduler = create_scheduler( + self.hparams.lr_scheduler, + optimizer, + self.hparams.lr_factor, + self.hparams.lr_steps, + self.hparams.max_epochs, + ) + return {"optimizer": optimizer, "lr_scheduler": scheduler} + else: + return optimizer + + def training_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor: # type: ignore + dense_values, categorical_values_i, categorical_values_o, y = batch + if isinstance(categorical_values_i, list): + for el in categorical_values_i: + el.to(self.device) + else: + categorical_values_i = categorical_values_i.to(self.device) + if isinstance(categorical_values_o, list): + for el in categorical_values_o: + el.to(self.device) + else: + categorical_values_o = categorical_values_o.to(self.device) + dense_values.to(self.device) + y_hat = self.model(dense_values, (categorical_values_i, categorical_values_o)) + + loss = self.loss_function(torch.squeeze(y_hat), torch.squeeze(y)) + self.log_dict({"loss/train": loss}) + return loss + + def validation_step_end(self, *args: Any, **kwargs: Any) -> Any: + """calculate all them metrics and log via wandb/tensorboard""" + y = torch.cat(list(map(lambda x: x["y"], self.validation_results))) + y_hat = torch.cat(list(map(lambda x: x["y_hat"], self.validation_results))) + loss = self.loss_function(y, y_hat) + rmse = torch.sqrt(F.mse_loss(y_hat, y)).item() + y_array = np.array(y.cpu()) + y_hat_array = np.array(y_hat.cpu()) >= 0.5 + balanced_accuracy = metrics.balanced_accuracy_score(y_array, y_hat_array) + accuracy = metrics.accuracy_score(y_array, y_hat_array) + f1 = metrics.f1_score(y_array, y_hat_array) + roc_auc = metrics.roc_auc_score(y_array, y_hat.cpu()) + precision = metrics.precision_score(y_array, y_hat_array) + recall = metrics.recall_score(y_array, y_hat_array) + self.log_dict( + { + "val_los": loss, + "val_rmse": rmse, + "roc_auc": roc_auc, + "precision": precision, + "recall": recall, + "balanced accuracy": balanced_accuracy, + "accuracy": accuracy, + "f1 score": f1, + }, + prog_bar=True, + ) + return super().validation_step_end(*args, **kwargs) + + def on_validation_start(self) -> None: + self.validation_results: List[dict] = [] + return super().on_validation_start() + + def validation_step(self, batch: torch.Tensor, batch_idx: int) -> None: # type: ignore + dense_values, categorical_values_i, categorical_values_o, y = batch + dense_values = dense_values.to(self.device) + if isinstance(categorical_values_i, list): + for el in categorical_values_i: + el.to(self.device) + else: + categorical_values_i = categorical_values_i.to(self.device) + if isinstance(categorical_values_o, list): + for el in categorical_values_o: + el.to(self.device) + else: + categorical_values_o = categorical_values_o.to(self.device) + y_hat = torch.squeeze(self.model(dense_values, (categorical_values_i, categorical_values_o))) + y = torch.squeeze(y) + y_hat = torch.squeeze(y_hat) + self.validation_results.append({"y": y, "y_hat": y_hat}) diff --git a/examples/dlrm/utils/log.py b/examples/dlrm/utils/log.py new file mode 100644 index 0000000..ea49921 --- /dev/null +++ b/examples/dlrm/utils/log.py @@ -0,0 +1,177 @@ +import logging +import time +from typing import Optional, Any, Dict, List, Union + +import math +import pytorch_lightning as pl +from pytorch_lightning.callbacks import ProgressBarBase +from pytorch_lightning.utilities.types import STEP_OUTPUT + + +TIME_INTERVALS = ( + ("w", 60 * 60 * 24 * 7), + ("d", 60 * 60 * 24), + ("h", 60 * 60), + ("m", 60), + ("s", 1), +) + + +def _display_time(seconds: float, granularity: int = 2) -> str: + result: List[str] = [] + + seconds = int(round(seconds)) + + for name, count in TIME_INTERVALS: + value = seconds // count + if value == 0 and len(result) == 0: + continue + seconds -= value * count + result.append(f"{value:02d}{name}") + return ":".join(result[:granularity]) + + +class CommandLineLogger(ProgressBarBase): + """ + This module provides a replacement for the default tqdm-based progress bar, that is more suitable for logging + progress in a non-interactive way, e.g. to a file. + """ + + def __init__(self, refresh_rate: int) -> None: + super().__init__() + self._is_enabled = True + self._epoch_start_time: float = 0.0 + self._validation_start_time: float = 0.0 + self._train_start_time: float = 0.0 + self._last_epoch_times: List[float] = [] + self._validation_times: List[float] = [] + + self.logger = logging.getLogger("CommandLineLogger") + + self.refresh_rate = refresh_rate + self.log_batch = self.log_info + self.log_debug("Command line logger initialized.") + + def log_debug(self, message: str) -> None: + if self._is_enabled: + # self.logger.debug(message) + print(message) + + def log_info(self, message: str) -> None: + if self._is_enabled: + # self.logger.info(message) + print(message) + + def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None: + self.log_debug(f"Command line logger setup. ( is root trainer: {trainer.is_global_zero} )") + super().setup(trainer, pl_module, stage) + + def disable(self) -> None: + self.log_debug("Logging disabled...") + self._is_enabled = False + + def enable(self) -> None: + self._is_enabled = True + self.log_debug("Logging enabled...") + + def _should_update(self, current: int, total: Union[int, float]) -> bool: + return self._is_enabled and (current % self.refresh_rate == 0 or current == int(total)) + + def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self.log_info("Starting training...") + + def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self.log_info("Ending training.") + + def on_train_batch_end( + self, + trainer: "pl.Trainer", + pl_module: "pl.LightningModule", + outputs: STEP_OUTPUT, + batch: Any, + batch_idx: int, + unused: int = 0, + ) -> None: + if not self._should_update(self.train_batch_idx, self.total_train_batches): + return + + percent = (self.train_batch_idx / self.total_train_batches) * 100 + + time_in_this_epoch = time.time() - self._epoch_start_time + epoch_total_est = int(round((time_in_this_epoch * self.total_train_batches) / self.train_batch_idx)) + eta_epoch = _display_time(epoch_total_est - time_in_this_epoch) + full_epochs_left = trainer.max_epochs - trainer.current_epoch + if full_epochs_left < 0: + full_epochs_left = 0 + if self._average_epoch_time() > 0: + epoch_total_est = self._average_epoch_time() + self._average_validation_time() + eta_train = _display_time(epoch_total_est - time_in_this_epoch + full_epochs_left * epoch_total_est) + + epoch_info = f"Epoch {trainer.current_epoch:3d}" + batch_info = f"{self.train_batch_idx:4d}/{self.total_train_batches:4d} ({percent:5.1f}%)" + metrics = self._format_metric_string(self.get_metrics(trainer, pl_module)) + eta_info = f"ETA: {eta_epoch} & {eta_train}" + self.log_batch(f"{epoch_info} - {batch_info} - {metrics} - {eta_info}") + + @staticmethod + def _replace_metric_key(metric_key: str) -> str: + remove_strings = [ + "metrics/", + ] + for s in remove_strings: + metric_key = metric_key.replace(s, "") + return metric_key.replace("accuracy", "acc") + + @staticmethod + def _format_metric_string(metrics_dict: Dict[str, Union[int, str]], train: bool = True) -> str: + metric_list = [] + + for key, value in metrics_dict.items(): + if key == "v_num" or "loss" in key: + continue + key = CommandLineLogger._replace_metric_key(key) + try: + f_value = float(value) + if math.isnan(f_value): + continue + if key: + metric_list.append(f"{key}={f_value:2.2f}") + except ValueError: + if key: + metric_list.append(f"{key}={value}") + + return ", ".join(metric_list) + + @staticmethod + def _average_time(time_list: List[float]) -> int: + return int(round(sum(time_list) / len(time_list))) + + def _average_epoch_time(self) -> int: + if len(self._last_epoch_times) == 0: + return 0 + return self._average_time(self._last_epoch_times) + + def _average_validation_time(self) -> int: + if len(self._validation_times) == 0: + return 0 + return self._average_time(self._validation_times) + + def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._epoch_start_time = time.time() + if self._train_start_time is None: + self._train_start_time = self._epoch_start_time + + def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._last_epoch_times.append(time.time() - self._epoch_start_time) + self._last_epoch_times = self._last_epoch_times[-3:] + + def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._validation_start_time = time.time() + self.log_info("Validating model...") + + def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._validation_times.append(time.time() - self._validation_start_time) + self._validation_times = self._validation_times[-3:] + self.log_info( + f"Validation complete. ({self._format_metric_string(self.get_metrics(trainer, pl_module), train=False)})" + ) diff --git a/examples/dlrm/utils/unused_args.py b/examples/dlrm/utils/unused_args.py new file mode 100644 index 0000000..7adeb79 --- /dev/null +++ b/examples/dlrm/utils/unused_args.py @@ -0,0 +1,73 @@ +"""Args from PyTorch Lightning's Trainer that are currently unused and a function to deal with them.""" +from argparse import Namespace +from typing import List + + +def clean_hyperparameters(args: Namespace) -> Namespace: + """Remove args which are not passed to the constructor in our training script.""" + clean_args = Namespace() + for key in args.__dict__.keys(): + if key in UNUSED_PL_ARGS: + continue + setattr(clean_args, key, getattr(args, key)) + return clean_args + + +# this list is copied from the constructor of PyTorch's Trainer, but all arguments used in our script were removed +UNUSED_PL_ARGS: List[str] = [ + "logger", + "checkpoint_callback", + "enable_checkpointing", + "callbacks", + "default_root_dir", + "gradient_clip_val", + "gradient_clip_algorithm", + "process_position", + "num_nodes", + "num_processes", + "devices", + "auto_select_gpus", + "tpu_cores", + "ipus", + "log_gpu_memory", + "progress_bar_refresh_rate", + "enable_progress_bar", + "overfit_batches", + "track_grad_norm", + "check_val_every_n_epoch", + "fast_dev_run", + "accumulate_grad_batches", + "min_epochs", + "min_steps", + "max_time", + "limit_train_batches", + "limit_val_batches", + "limit_test_batches", + "limit_predict_batches", + "val_check_interval", + "flush_logs_every_n_steps", + "log_every_n_steps", + "sync_batchnorm", + "precision", + "enable_model_summary", + "weights_summary", + "weights_save_path", + "num_sanity_val_steps", + "resume_from_checkpoint", + "profiler", + "benchmark", + "deterministic", + "reload_dataloaders_every_n_epochs", + "auto_lr_find", + "replace_sampler_ddp", + "detect_anomaly", + "auto_scale_batch_size", + "prepare_data_per_node", + "plugins", + "amp_backend", + "amp_level", + "move_metrics_to_cpu", + "multiple_trainloader_mode", + "stochastic_weight_avg", + "terminate_on_nan", +] diff --git a/examples/pytorch_lightning/utils/utils.py b/examples/dlrm/utils/utils.py similarity index 82% rename from examples/pytorch_lightning/utils/utils.py rename to examples/dlrm/utils/utils.py index 532451e..2a55bbb 100644 --- a/examples/pytorch_lightning/utils/utils.py +++ b/examples/dlrm/utils/utils.py @@ -1,38 +1,36 @@ import logging from pathlib import Path -from torch.optim import Adam, SGD + +from torch.optim import Adam, SGD, RAdam from torch.optim.lr_scheduler import MultiStepLR, ExponentialLR, CosineAnnealingLR, _LRScheduler -from typing import Union, Optional +from typing import Union, Optional, Any from torch.nn import Module from torch.optim.optimizer import Optimizer -from bitorch.optimization.radam import RAdam - -def set_logging(log_file: Union[None, str], log_level: str, output_stdout: bool) -> None: +def configure_logging(logger: Any, log_file: Union[None, str], log_level: str, output_stdout: bool) -> None: """configures logging module. Args: + logger: the logger to be configured log_file (str): path to log file. if omitted, logging will be forced to stdout. log_level (str): string name of log level (e.g. 'debug') output_stdout (bool): toggles stdout output. will be activated automatically if no log file was given. otherwise if activated, logging will be outputed both to stdout and log file. """ - logger = logging.getLogger() - log_level_name = log_level.upper() log_level = getattr(logging, log_level_name) logger.setLevel(log_level) logging_format = logging.Formatter( - '%(asctime)s - %(levelname)s [%(filename)s : %(funcName)s() : l. %(lineno)s]: %(message)s', - datefmt='%Y-%m-%d %H:%M:%S') + "%(asctime)s - %(levelname)s [%(filename)s : %(funcName)s() : l. %(lineno)s]: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) if log_file: log_file_path = Path(log_file) log_file_path.parent.mkdir(parents=True, exist_ok=True) file_handler = logging.FileHandler(log_file_path) - file_handler.setLevel(log_level) file_handler.setFormatter(logging_format) logger.addHandler(file_handler) else: @@ -40,7 +38,6 @@ def set_logging(log_file: Union[None, str], log_level: str, output_stdout: bool) if output_stdout: stream = logging.StreamHandler() - stream.setLevel(log_level) stream.setFormatter(logging_format) logger.addHandler(stream) @@ -60,22 +57,24 @@ def create_optimizer(name: str, model: Module, lr: float, momentum: float) -> Op Returns: Optimizer: the model optimizer """ + name = name.lower() if name == "adam": return Adam(params=model.parameters(), lr=lr) elif name == "sgd": return SGD(params=model.parameters(), lr=lr, momentum=momentum) elif name == "radam": - return RAdam(params=model.parameters(), lr=lr, degenerated_to_sgd=False) + return RAdam(params=model.parameters(), lr=lr) else: raise ValueError(f"No optimizer with name {name} found!") def create_scheduler( - scheduler_name: Optional[str], - optimizer: Optimizer, - lr_factor: float, - lr_steps: Optional[list], - epochs: int) -> Union[_LRScheduler, None]: + scheduler_name: Optional[str], + optimizer: Optimizer, + lr_factor: float, + lr_steps: Optional[list], + epochs: int, +) -> Union[_LRScheduler, None]: """creates a learning rate scheduler with the given parameters Args: diff --git a/examples/image_classification/.gitignore b/examples/image_classification/.gitignore new file mode 100644 index 0000000..333c1e9 --- /dev/null +++ b/examples/image_classification/.gitignore @@ -0,0 +1 @@ +logs/ diff --git a/examples/pytorch_lightning/README.md b/examples/image_classification/README.md similarity index 85% rename from examples/pytorch_lightning/README.md rename to examples/image_classification/README.md index a0eaeba..7186a5c 100644 --- a/examples/pytorch_lightning/README.md +++ b/examples/image_classification/README.md @@ -3,8 +3,13 @@ To give an example on how to use bitorch for your own projects `image_classification.py` trains one of the models implemented in `bitorch` on an image classification dataset. -Below you can find an example call of the script: +First the requirements for this example need to be installed +(unless the optional dependencies of BITorch were already installed): +```bash +pip install -r requirements.txt +``` +Below you can find an example call of the script: ```bash python3 image_classification.py --optimizer adam --lr 0.001 --lr-scheduler cosine --max_epochs 2 --dataset imagenet --model resnet18v1 --batch-size 128 --accelerator gpu --num-workers 16 --gpus 3 ``` @@ -37,14 +42,14 @@ The list below gives a brief overview over some selected arguments. ### model args -- `--model` specify name of model you want to train. Choose from `lenet,resnet,resnet152v1,resnet152v2,resnet18v1,resnet18v2,resnet34v1,resnet34v2,resnet50v1,resnet50v2,resnete,resnete18` or `resnete34` +- `--model` specify name of model you want to train. Choose from `Lenet,Resnet,Resnet152V1,Resnet152V2,Resnet18V1,Resnet18V2,Resnet34V1,Resnet34V2,Resnet50V1,Resnet50V2,ResnetE,ResnetE18,ResnetE34,Quicknet,QuicknetSmall` or `QuickNetLarge` Each model can have specific arguments. Check them by calling `python image_classification.py --help`. ### dataset args - `--datset` name of dataset to train on. Chose from `mnist, cifar10, cifar100` and `imagenet` -- `--download` toggles if dataset if not present at `--dataset-dir` should be downloaded. Only available for `mnist` and `cifar10`. +- `--download` toggles if dataset is not present at `--dataset-dir` should be downloaded. Only available for `mnist` and `cifar10`. - `--dataset-dir` path to dataset. - `--num-worker` sets number of workers for dataloading diff --git a/examples/image_classification/__init__.py b/examples/image_classification/__init__.py new file mode 100644 index 0000000..aa5b8b1 --- /dev/null +++ b/examples/image_classification/__init__.py @@ -0,0 +1 @@ +"""This package contains an example for image classification with BITorch.""" diff --git a/bitorch/datasets/__init__.py b/examples/image_classification/datasets/__init__.py similarity index 67% rename from bitorch/datasets/__init__.py rename to examples/image_classification/datasets/__init__.py index 972707e..5ac794b 100644 --- a/bitorch/datasets/__init__.py +++ b/examples/image_classification/datasets/__init__.py @@ -9,17 +9,18 @@ from .cifar import CIFAR10, CIFAR100 from .imagenet import ImageNet from .mnist import MNIST -from ..util import build_lookup_dictionary __all__ = [ - 'BasicDataset', 'dataset_from_name', 'dataset_names', - 'MNIST', 'CIFAR10', 'CIFAR100', 'ImageNet', + "BasicDataset", + "dataset_from_name", + "dataset_names", + "MNIST", + "CIFAR10", + "CIFAR100", + "ImageNet", ] -datasets_by_name = build_lookup_dictionary(__name__, __all__, BasicDataset) - - def dataset_from_name(name: str) -> Type[BasicDataset]: """returns the dataset to which the name belongs to (name has to be the value of the datasets name-attribute) @@ -33,9 +34,10 @@ def dataset_from_name(name: str) -> Type[BasicDataset]: Returns: dataset: the dataset """ - if name not in datasets_by_name: - raise ValueError(f"{name} dataset not found!") - return datasets_by_name[name] + for dataset_class in [CIFAR10, CIFAR100, ImageNet, MNIST]: + if dataset_class.name == name: + return dataset_class + raise Exception(f"unknown dataset: {name}") def dataset_names() -> List[str]: @@ -44,4 +46,4 @@ def dataset_names() -> List[str]: Returns: List: the dataset names """ - return list(datasets_by_name.keys()) + return [dataset_class.name for dataset_class in [CIFAR10, CIFAR100, ImageNet, MNIST]] diff --git a/examples/image_classification/datasets/base.py b/examples/image_classification/datasets/base.py new file mode 100644 index 0000000..d79500b --- /dev/null +++ b/examples/image_classification/datasets/base.py @@ -0,0 +1,137 @@ +import logging +import os +from pathlib import Path +from typing import Optional, Tuple, Any + +import torch +from torch.utils.data import Dataset +from torchvision.transforms import transforms + +from .dummy_dataset import DummyDataset + + +class BasicDataset(Dataset): + name = "None" + num_classes = 0 + shape = (0, 0, 0, 0) + mean: Any = None + std_dev: Any = None + num_train_samples = 0 + num_val_samples = 0 + + def __init__(self, train: bool, root_directory: Optional[str] = None, download: bool = False) -> None: + """initializes the dataset. + + Args: + train (bool): whether the train or test dataset is wanted + root_directory (str): path to main dataset storage directory + download (bool): whether train/test should be downloaded if it does not exist + + Returns: + Dataset: the created test/train dataset + """ + super(BasicDataset, self).__init__() + self.is_train = train + self._download = download + self.root_directory = self.get_dataset_root_directory(root_directory) + self.dataset = self.get_dataset(download) + + @classmethod + def get_train_and_test(cls, root_directory: str, download: bool = False) -> Tuple["BasicDataset", "BasicDataset"]: + """creates a pair of train and test dataset. + + Returns: + Tuple: the train and test dataset + """ + return cls(True, root_directory, download), cls(False, root_directory, download) + + @classmethod + def get_dummy_train_and_test_datasets(cls) -> Tuple[DummyDataset, DummyDataset]: + train_set = DummyDataset(cls.shape, cls.num_classes, cls.num_train_samples) # type: ignore + val_set = DummyDataset(cls.shape, cls.num_classes, cls.num_val_samples) # type: ignore + return train_set, val_set + + def get_dataset_root_directory(self, root_directory_argument: Optional[str]) -> Path: + """chooses the dataset root directory based on the passed argument or environment variables. + + Returns: + Tuple: the train and test dataset + """ + if root_directory_argument is not None: + return Path(root_directory_argument) + + environment_variable_name = f"{self.name.upper()}_HOME" + if os.environ.get(environment_variable_name) is not None: + return Path(os.environ.get(environment_variable_name)) # type: ignore + if os.environ.get("BITORCH_DATA_HOME") is not None: + return Path(os.environ.get("BITORCH_DATA_HOME")) / self.name # type: ignore + + environment_variable_hint = ( + f" To change this, set '{environment_variable_name}' or 'BITORCH_DATA_HOME' " + f"(in the latter case, the data resides in the folder '{self.name}' in BITORCH_DATA_HOME)." + f" Some datasets can be downloaded by adding the --download command line argument." + ) + if self._download: + logging.warning("Dataset is being downloaded to the directory './data'." + environment_variable_hint) + return Path("./data") + else: + raise ValueError(f"Dataset {self.name} not found." + environment_variable_hint) + + def get_dataset(self, download: bool) -> Dataset: + """creates the actual dataset + + Args: + download (bool): toggles if train/test shall be downloaded if possible + + Raises: + NotImplementedError: thrown, because this method needs to be overwritten by subclasses + + Returns: + Dataset: the created test/train dataset + """ + raise NotImplementedError() + + def get_transform(self) -> Any: + if self.is_train: + return self.train_transform() + return self.test_transform() + + @classmethod + def test_transform(cls) -> Any: + """get the transform for the test data. + + Returns: + transform: the transform pipeline + """ + return transforms.Compose([transforms.ToTensor(), cls.get_normalize_transform()]) + + @classmethod + def train_transform(cls) -> Any: + """get the transform for the training data. + + Returns: + transform: the transform pipeline + """ + return transforms.Compose([transforms.ToTensor(), cls.get_normalize_transform()]) + + @classmethod + def get_normalize_transform(cls) -> transforms.Normalize: + return transforms.Normalize(cls.mean, cls.std_dev) + + def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]: # type: ignore + """returns the item at the given index of the dataset. + + Args: + index (int): requested index + + Returns: + Tuple[torch.Tensor, torch.Tensor]: data and label at the specified index + """ + return self.dataset[index] + + def __len__(self) -> int: + return len(self.dataset) # type: ignore + + def num_samples(self) -> int: + """returns the (theoretical) dataset size.""" + return self.num_train_samples if self.is_train else self.num_val_samples diff --git a/bitorch/datasets/cifar.py b/examples/image_classification/datasets/cifar.py similarity index 63% rename from bitorch/datasets/cifar.py rename to examples/image_classification/datasets/cifar.py index 7084f6b..40838c4 100644 --- a/bitorch/datasets/cifar.py +++ b/examples/image_classification/datasets/cifar.py @@ -4,9 +4,9 @@ from torchvision.datasets import cifar from torchvision.transforms import transforms -from .base import BasicDataset, Augmentation +from .base import BasicDataset -__all__ = ['CIFAR10', 'CIFAR100'] +__all__ = ["CIFAR10", "CIFAR100"] class CIFAR(BasicDataset, ABC): @@ -15,20 +15,24 @@ class CIFAR(BasicDataset, ABC): num_val_samples = 10000 @classmethod - def train_transform(cls, augmentation: Augmentation = Augmentation.DEFAULT) -> transforms.Compose: - return transforms.Compose([ - transforms.RandomCrop(32, padding=4), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - cls.get_normalize_transform(), - ]) + def train_transform(cls) -> transforms.Compose: + return transforms.Compose( + [ + transforms.RandomCrop(32, padding=4), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + cls.get_normalize_transform(), + ] + ) @classmethod def test_transform(cls) -> transforms.Compose: - return transforms.Compose([ - transforms.ToTensor(), - cls.get_normalize_transform(), - ]) + return transforms.Compose( + [ + transforms.ToTensor(), + cls.get_normalize_transform(), + ] + ) class CIFAR10(CIFAR): @@ -42,7 +46,7 @@ def get_dataset(self, download: bool = True) -> Dataset: root=self.root_directory, train=self.is_train, transform=self.get_transform(), - download=download + download=download, ) @@ -57,5 +61,5 @@ def get_dataset(self, download: bool = True) -> Dataset: root=self.root_directory, train=self.is_train, transform=self.get_transform(), - download=download + download=download, ) diff --git a/examples/image_classification/datasets/dummy_dataset.py b/examples/image_classification/datasets/dummy_dataset.py new file mode 100644 index 0000000..91b5524 --- /dev/null +++ b/examples/image_classification/datasets/dummy_dataset.py @@ -0,0 +1,23 @@ +from torch.utils.data import Dataset +import torch +from typing import Tuple + + +class DummyDataset(Dataset): + """An iterator that produces repeated dummy data. + Args: + data_sample: a data sample that should be produced at each step. + batch_size: the batch size for storing. + sample_count: number of `data` samples in the dummy dataset. + """ + + def __init__(self, data_shape: torch.Size, num_classes: int, sample_count: int) -> None: + self._data_sample = torch.zeros(data_shape) + self._class_sample = torch.zeros((num_classes,), dtype=torch.int64) + self._sample_count = sample_count + + def __len__(self) -> int: + return self._sample_count + + def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: + return self._data_sample, self._class_sample diff --git a/bitorch/datasets/imagenet.py b/examples/image_classification/datasets/imagenet.py similarity index 62% rename from bitorch/datasets/imagenet.py rename to examples/image_classification/datasets/imagenet.py index b68355e..d5b116d 100644 --- a/bitorch/datasets/imagenet.py +++ b/examples/image_classification/datasets/imagenet.py @@ -1,11 +1,10 @@ -import os from pathlib import Path from torch.utils.data import Dataset from torchvision import transforms from torchvision.datasets import ImageFolder -from .base import BasicDataset, Augmentation +from .base import BasicDataset class ImageNet(BasicDataset): @@ -30,20 +29,24 @@ def get_dataset(self, download: bool) -> Dataset: return ImageFolder(directory, transform=self.get_transform()) @classmethod - def train_transform(cls, augmentation: Augmentation = Augmentation.DEFAULT) -> transforms.Compose: + def train_transform(cls) -> transforms.Compose: crop_scale = 0.08 - return transforms.Compose([ - transforms.RandomResizedCrop(224, scale=(crop_scale, 1.0)), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - cls.get_normalize_transform(), - ]) + return transforms.Compose( + [ + transforms.RandomResizedCrop(224, scale=(crop_scale, 1.0)), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + cls.get_normalize_transform(), + ] + ) @classmethod def test_transform(cls) -> transforms.Compose: - return transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - cls.get_normalize_transform(), - ]) + return transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + cls.get_normalize_transform(), + ] + ) diff --git a/bitorch/datasets/mnist.py b/examples/image_classification/datasets/mnist.py similarity index 94% rename from bitorch/datasets/mnist.py rename to examples/image_classification/datasets/mnist.py index 5121f04..fde7f8e 100644 --- a/bitorch/datasets/mnist.py +++ b/examples/image_classification/datasets/mnist.py @@ -19,5 +19,5 @@ def get_dataset(self, download: bool = True) -> Dataset: root=self.root_directory, train=self.is_train, transform=self.get_transform(), - download=download + download=download, ) diff --git a/examples/image_classification/image_classification.py b/examples/image_classification/image_classification.py new file mode 100644 index 0000000..e299d86 --- /dev/null +++ b/examples/image_classification/image_classification.py @@ -0,0 +1,234 @@ +import os + +if os.environ.get("REMOTE_PYCHARM_DEBUG_SESSION", False): + import pydevd_pycharm + + pydevd_pycharm.settrace( + "localhost", + port=int(os.environ.get("REMOTE_PYCHARM_DEBUG_PORT", "12345")), + stdoutToServer=True, + stderrToServer=True, + ) + +import argparse +import logging +from pathlib import Path +from typing import List, Any, Type + +import fvbitcore.nn as fv_nn +import torch +import wandb +import pytorch_lightning as pl +from pytorch_lightning import Trainer +from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor, Callback +from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger, LightningLoggerBase +from pytorch_lightning.utilities.types import STEP_OUTPUT +from torch.utils.data import DataLoader + +import bitorch +from bitorch import apply_args_to_configuration, RuntimeMode +from bitorch.quantizations import Quantization_Scheduler, quantization_from_name +from datasets import dataset_from_name +from bitorch.models import model_from_name +from bitorch.quantizations import Quantization +from examples.image_classification.utils.callbacks import ProgressiveSignScalerCallback +from examples.image_classification.utils.log import CommandLineLogger +from examples.image_classification.utils.wandb_logger import CustomWandbLogger +from utils.arg_parser import create_argparser +from utils.lightning_model import ModelWrapper, DistillationModelWrapper +from utils.utils import configure_logging + +logger = logging.getLogger() + + +class ModelCallback(Callback): + def on_train_batch_end( + self, + trainer: Trainer, + pl_module: pl.LightningModule, + outputs: STEP_OUTPUT, + batch: Any, + batch_idx: int, + unused: int = 0, + ) -> None: + pl_module.model.apply(pl_module.model.on_train_batch_end) # type: ignore + + +def main(args: argparse.Namespace, model_args: argparse.Namespace) -> None: + """trains a model on the configured image dataset. + + Args: + args (argparse.Namespace): cli arguments + model_args (argparse.Namespace): model specific cli arguments + """ + configure_logging(logger, args.log_file, args.log_level, args.log_stdout) + + # switch to RAW bitorch mode for distributed data parallel training + bitorch.mode = RuntimeMode.RAW + + apply_args_to_configuration(args) + + output_dir = Path(args.result_directory) + output_dir.mkdir(exist_ok=True) + + loggers: List[LightningLoggerBase] = [] + if args.tensorboard_log: + loggers.append(TensorBoardLogger(str(output_dir), name="tensorboard")) # type: ignore + if args.csv_log: + loggers.append(CSVLogger(str(output_dir), name="csv")) # type: ignore + if args.wandb_log: + loggers.append( + CustomWandbLogger( + args, + project=args.wandb_project, + name=args.wandb_experiment, + save_dir=str(output_dir), + log_model=True, + ) # type: ignore + ) + callbacks: List[Any] = [] + if args.checkpoint_dir is not None: + callbacks.append( + ModelCheckpoint( + args.checkpoint_dir, + save_last=True, + save_top_k=args.checkpoint_keep_count, + every_n_epochs=1, + monitor="metrics/test-top1-accuracy", + mode="max", + filename="{epoch:03d}", + ) + ) + + callbacks.append(ModelCallback()) + + # providing our own progress bar disables the default progress bar (not needed to disable later on) + cmd_logger = CommandLineLogger(args.log_interval) + callbacks.append(cmd_logger) + configure_logging(cmd_logger.logger, args.log_file, args.log_level, args.log_stdout) + + # add scaling callback for progressive sign (not be needed for all models, but should not slow down training) + callbacks.append(ProgressiveSignScalerCallback()) + + if len(loggers) > 0: + lr_monitor = LearningRateMonitor(logging_interval="step") + callbacks.append(lr_monitor) + + dataset = dataset_from_name(args.dataset) + + model_kwargs = vars(model_args) + logger.debug(f"got model args as dict: {model_kwargs}") + + model_kwargs["input_shape"] = tuple(dataset.shape) + model_kwargs["num_classes"] = dataset.num_classes + if args.pretrained: + model = model_from_name(args.model).from_pretrained(args.checkpoint_load, **model_kwargs) + else: + model = model_from_name(args.model)(**model_kwargs) # type: ignore + model.initialize() + + # for model registry compliance + model_kwargs["model_name"] = args.model + if args.wandb_log: + wandb.config.update({"model_config": model_kwargs}) + + if args.quantization_scheduling: + quantization_scheduler = Quantization_Scheduler( + model, + quantizations=[quantization_from_name(name)() for name in args.scheduled_quantizations], + scheduling_procedure=args.quantization_scheduling_procedure, + schedule_all_quantizations=args.schedule_all_quantizations, + steps=args.max_epochs, + ) + else: + quantization_scheduler = None + + wrapper_class: Type[ModelWrapper] = ModelWrapper + if args.teacher: + if args.dataset != "imagenet": + raise ValueError( + f"Teacher '{args.teacher}' and dataset '{args.dataset}' selected, " + f"but teacher models are only supported for imagenet." + ) + wrapper_class = DistillationModelWrapper + + if args.checkpoint_load is not None and args.resume_training: + logger.info(f"resuming training from pretrained model at checkpoint {args.checkpoint_load}") + model_wrapped = wrapper_class.load_from_checkpoint(args.checkpoint_load) + else: + model_wrapped = wrapper_class(model, dataset.num_classes, quantization_scheduler, args) + + trainer = Trainer( + strategy=args.strategy, + accelerator="cpu" if args.cpu else args.accelerator, + gpus=0 if args.cpu else args.gpus, + max_epochs=args.max_epochs, + max_steps=args.max_steps, + logger=loggers if len(loggers) > 0 else None, # type: ignore + callbacks=callbacks, # type: ignore + log_every_n_steps=args.log_interval, + limit_train_batches=0.01 if args.dev_run else None, + limit_val_batches=0.01 if args.dev_run else None, + ) + if args.dev_run: + logger.info("this run only uses 1 % of training and validation data (--dev-run)!") + logger.info(f"model: {args.model}") + logger.info(f"optimizer: {args.optimizer}") + logger.info(f"lr: {args.lr}") + logger.info(f"max_epochs: {args.max_epochs}") + if args.fake_data: + logger.info(f"dummy dataset: {dataset.name} (not using real data!)") + train_dataset, test_dataset = dataset.get_dummy_train_and_test_datasets() # type: ignore + else: + logger.info(f"dataset: {dataset.name}") + train_dataset, test_dataset = dataset.get_train_and_test( # type: ignore + root_directory=args.dataset_dir, download=args.download + ) + train_loader = DataLoader( + train_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers, + shuffle=True, + pin_memory=True, + persistent_workers=True, + ) # type: ignore + test_loader = DataLoader( + test_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers, + shuffle=False, + pin_memory=True, + persistent_workers=True, + ) # type: ignore + + data_point = torch.zeros(dataset.shape) + computational_intensity = fv_nn.FlopCountAnalysis(model, inputs=data_point, quantization_base_class=Quantization) + + stats, table = fv_nn.flop_count_table(computational_intensity, automatic_qmodules=True) + logger.info("\n" + table) + total_size = stats["#compressed size in bits"][""] + logger.info("Total size in MB: " + str(total_size / 1e6 / 8.0)) + total_flops = stats["#speed up flops (app.)"][""] + logger.info("Approximated mflops: " + str(total_flops / 1e6)) + if args.wandb_log: + wandb.config.update( + { + "mflops": total_flops / 1e6, + "size in MB": total_size / 1e6 / 8.0, + } + ) + + trainer.fit( + model_wrapped, + train_dataloaders=train_loader, + val_dataloaders=test_loader, + ckpt_path=args.checkpoint_load if args.resume_training else None, + ) + + +if __name__ == "__main__": + parser, model_parser = create_argparser() + args_, unparsed_model_args = parser.parse_known_args() + model_args_ = model_parser.parse_args(unparsed_model_args) + + main(args_, model_args_) diff --git a/examples/image_classification/requirements.txt b/examples/image_classification/requirements.txt new file mode 100644 index 0000000..3eb115f --- /dev/null +++ b/examples/image_classification/requirements.txt @@ -0,0 +1,5 @@ +bitorch +fvbitcore +pytorch_lightning>=1.8.1 +sklearn +wandb~=0.12.0 diff --git a/examples/image_classification/utils/__init__.py b/examples/image_classification/utils/__init__.py new file mode 100644 index 0000000..f85768d --- /dev/null +++ b/examples/image_classification/utils/__init__.py @@ -0,0 +1 @@ +"""Utilities for image classification example.""" diff --git a/examples/image_classification/utils/arg_parser.py b/examples/image_classification/utils/arg_parser.py new file mode 100644 index 0000000..9757ea6 --- /dev/null +++ b/examples/image_classification/utils/arg_parser.py @@ -0,0 +1,375 @@ +import argparse +import sys +from argparse import ArgumentParser +from typing import Tuple, List, Type, Any, Optional, Sequence + +from pytorch_lightning import Trainer + +import bitorch +from bitorch.models import model_from_name, model_names, Model +from bitorch.models.base import NoArgparseArgsMixin +from bitorch.quantizations.quantization_scheduler import Quantization_Scheduler +from datasets import dataset_names +from utils.teachers import available_teachers + + +class _HeadArgumentParser(ArgumentParser): + _informational_sub_parsers: List[ArgumentParser] + + def print_help(self, *args: Any) -> None: + super().print_help(*args) + if hasattr(self, "_informational_sub_parsers"): + for parser in self._informational_sub_parsers: + print("\n") + parser.print_help() + + def add_informational_subparsers(self, sub_parsers: List[ArgumentParser]) -> None: + self._informational_sub_parsers = sub_parsers + + +def add_logging_args(parser: ArgumentParser) -> None: + """adds cli parameters for logging configuration + + Args: + parser (ArgumentParser): the main argument parser + """ + log = parser.add_argument_group("Logging", "parameters for logging") + log.add_argument( + "--log-level", + type=str, + default="info", + choices=["debug", "info", "warning", "error", "critical"], + help="log level for logging message output", + ) + log.add_argument( + "--log-interval", + type=int, + default=100, + metavar="N", + help="how many batches to wait before logging training status", + ) + log.add_argument( + "--log-file", + type=str, + default=None, + help="output file path for logging. default to stdout", + ) + log.add_argument( + "--log-stdout", + action="store_true", + help="toggles force logging to stdout. if a log file is specified, logging will be" + "printed to both the log file and stdout", + ) + log.add_argument( + "--result-directory", + type=str, + default="./logs", + help="path to logs directory, e.g. tensorboard logs, csv files", + ) + log.add_argument( + "--disable-tensorboard-log", + action="store_false", + dest="tensorboard_log", + help="disables tensorboard logging", + ) + log.add_argument( + "--disable-csv-log", + action="store_false", + dest="csv_log", + help="disables csv logging", + ) + log.add_argument( + "--wandb", + action="store_true", + dest="wandb_log", + help="enables wandb logging (WANDB_API_KEY environment variable must be set)", + ) + log.add_argument( + "--wandb-project", + type=str, + default="bitorch", + help="name of wand project to be used by wandb logger", + ) + log.add_argument( + "--wandb-experiment", + type=str, + default=None, + help="name of wand experiment to be used by wandb logger", + ) + + +def add_checkpoint_args(parser: ArgumentParser) -> None: + """adds cli parameters for checkpoint logging + + Args: + parser (ArgumentParser): the main argument parser + """ + checkpoint = parser.add_argument_group("checkpoints", "parameters for checkpoint storing / loading") + checkpoint.add_argument( + "--checkpoint-dir", + type=str, + default=None, + help="set a custom path to store checkpoints in.", + ) + checkpoint.add_argument( + "--checkpoint-keep-count", + type=int, + default=1, + help="number of checkpoints to keep.", + ) + checkpoint.add_argument( + "--checkpoint-load", + type=str, + default=None, + help="path to checkpoint file to load state from. if omitted and --pretrained is activated, the model weights will be downloaded from the model hub. If --pretrained is not set, a new model will be trained.", + ) + checkpoint.add_argument( + "--resume_training", + action="store_true", + help="resume training from given checkpoint", + ) + checkpoint.add_argument( + "--pretrained", + action="store_true", + help="load the state dict either from model hub or from checkpoint_load", + ) + + +def add_optimizer_args(parser: ArgumentParser) -> None: + """adds cli parameters for optimizer configuration + + Args: + parser (ArgumentParser): the main argument parser + """ + optimizer = parser.add_argument_group("Optimizer", "parameters for optimizer") + optimizer.add_argument( + "--lr-scheduler", + type=str, + choices=["cosine", "step", "exponential"], + help="name of the lr scheduler to use. default to none", + ) + optimizer.add_argument( + "--quantization-scheduling", + action="store_true", + default=False, + help="toggles weather to use quantization scheduling", + ) + optimizer.add_argument( + "--schedule-all-quantizations", + action="store_true", + default=False, + help="toggles weather to replace all quantizations inside the model with scheduled quantizers or " + "to just use the instances of ScheduledQuantizer which are already present in the model.", + ) + optimizer.add_argument( + "--scheduled-quantizations", + nargs="*", + default=["identity", "sign"], + help="name of quantizations to schedule", + ) + optimizer.add_argument( + "--quantization-scheduling-procedure", + type=str, + default="mix_linear", + choices=list(Quantization_Scheduler.procedure_classes.keys()), + help="procedure to use for scheduling", + ) + optimizer.add_argument( + "--lr", + type=float, + default=0.01, + help="initial learning rate (default: 0.01)", + ) + optimizer.add_argument( + "--lr-factor", + default=0.1, + type=float, + help="learning rate decay ratio. this is used only by the step and exponential lr scheduler", + ) + optimizer.add_argument( + "--lr-steps", + nargs="*", + default=[30, 60, 90], + help="list of learning rate decay epochs as list. this is used only by the step scheduler", + ) + optimizer.add_argument( + "--momentum", + type=float, + default=0.9, + help="momentum value for optimizer, default is 0.9. only used for sgd optimizer", + ) + optimizer.add_argument( + "--optimizer", + type=str, + default="adam", + choices=["adam", "sgd", "radam"], + help="the optimizer to use. default is adam.", + ) + + +def add_training_args(parser: ArgumentParser) -> None: + """ + Add arguments for training strategies. + + Args: + parser (ArgumentParser): the main argument parser + """ + train = parser.add_argument_group("training", "parameters for the training strategies") + train.add_argument( + "--teacher", + type=str, + default="", + choices=available_teachers(), + help="name of the teacher model, the student is going to be trained with KD if not empty", + ) + + +def add_dataset_args(parser: ArgumentParser) -> None: + """adds cli parameters for dataset configuration + + Args: + parser (ArgumentParser): the main argument parser + """ + data = parser.add_argument_group("dataset", "parameters for the dataset used for training") + data.add_argument( + "--dataset", + type=str, + default="cifar10", + choices=dataset_names(), + help="name of the dataset to be used for training", + ) + data.add_argument( + "--dataset-dir", + type=str, + default=None, + help="path to where the train dataset is saved / shall be downloaded to", + ) + data.add_argument( + "--download", + action="store_true", + help="toggles wether the dataset shall be downloaded if not present. " + "only has effect with the cifar10 and mnist dataset so far.", + ) + data.add_argument( + "--batch-size", + type=int, + default=128, + help="batch size for training and testing (default: 128)", + ) + data.add_argument( + "--num-workers", + type=int, + default=4, + help="number of workers to be used for dataloading (default: 4)", + ) + data.add_argument( + "--augmentation", + type=str, + choices=["none", "low", "medium", "high"], + default="none", + help="level of augmentation to be used in data preparation (default 'none')", + ) + data.add_argument( + "--fake-data", + action="store_true", + help="train with fake data", + ) + + +def create_model_argparser(model_class: Type[Model]) -> ArgumentParser: + """adds model specific cli arguments from model_class object + + Args: + model_class (object): the class-object of selected model + + Returns: + ArgumentParser: cli argument parser + """ + model_parser = argparse.ArgumentParser( + description=f"Additional arguments for {model_class.name} (--model {model_class.name.lower()})", + add_help=False, + usage=argparse.SUPPRESS, + ) + model_class.add_argparse_arguments(model_parser) + return model_parser + + +def help_in_args(cmd_args: Optional[Sequence[str]] = None) -> bool: + """determines if script was called with a --help or -h flag + + Returns: + bool: True if help flag was set, False otherwise + """ + passed_args = cmd_args + if passed_args is None: + passed_args = sys.argv[1:] + if "--help" in passed_args or "-h" in passed_args: + return True + return False + + +def create_list_of_all_model_parsers() -> List[ArgumentParser]: + """iterates through all existent models and adds a parser for each one""" + all_model_parsers = [] + for model_name in model_names(): + model_class = model_from_name(model_name) + if model_class.add_argparse_arguments == Model.add_argparse_arguments: + continue + if model_class.add_argparse_arguments == NoArgparseArgsMixin.add_argparse_arguments: + continue + model_parser = create_model_argparser(model_class) + all_model_parsers.append(model_parser) + return all_model_parsers + + +def add_regular_args(parser: ArgumentParser) -> None: + """adds all regular arguments, including dynamically created config args to parser. + + Args: + parser (ArgumentParser): parser to add the regular arguments to + """ + Trainer.add_argparse_args(parser) + add_logging_args(parser) + add_dataset_args(parser) + add_optimizer_args(parser) + add_checkpoint_args(parser) + add_training_args(parser) + + bitorch.add_config_args(parser) + + parser.add_argument( + "--model", + type=str.lower, + choices=model_names(), + required=True, + help="name of the model to be trained", + ) + parser.add_argument( + "--cpu", + action="store_true", + help="explicitly use the cpu. overwrites gpu settings", + ) + parser.add_argument( + "--dev-run", + action="store_true", + help="use only 1%% of training/validation data for testing purposes", + ) + + +def create_argparser(cmd_args: Optional[Sequence[str]] = None) -> Tuple[ArgumentParser, ArgumentParser]: + """creates a main argument parser for general options and a model parser for model specific options + + Returns: + Tuple[ArgumentParser, ArgumentParser]: the main and model argument parser + """ + parser = _HeadArgumentParser(description="Bitorch Image Classification") + + add_regular_args(parser) + + if help_in_args(cmd_args): + parser.add_informational_subparsers(create_list_of_all_model_parsers()) + args, _ = parser.parse_known_args(cmd_args) + + model_class = model_from_name(args.model) + model_parser = create_model_argparser(model_class) + return parser, model_parser diff --git a/examples/image_classification/utils/callbacks.py b/examples/image_classification/utils/callbacks.py new file mode 100644 index 0000000..4bff90c --- /dev/null +++ b/examples/image_classification/utils/callbacks.py @@ -0,0 +1,21 @@ +# type: ignore +import pytorch_lightning as pl + +from bitorch.quantizations import ProgressiveSign +from bitorch.quantizations.progressive_sign import config as progressive_sign_config + + +class ProgressiveSignScalerCallback(pl.callbacks.Callback): + """Callback that updates the scale of progressive sign functions based on current epoch.""" + + def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + scale = trainer.current_epoch / trainer.max_epochs + progressive_sign_config.progressive_sign_scale = scale + for logger in trainer.loggers: + logger.log_metrics( + { + "_progressive_sign_scale": scale, + "_progressive_sign_temperature": ProgressiveSign.default_transform(scale), + }, + step=trainer.fit_loop.epoch_loop._batches_that_stepped, + ) diff --git a/examples/image_classification/utils/kd_loss.py b/examples/image_classification/utils/kd_loss.py new file mode 100644 index 0000000..7a17d1b --- /dev/null +++ b/examples/image_classification/utils/kd_loss.py @@ -0,0 +1,37 @@ +# Code is modified from MEAL (https://arxiv.org/abs/1812.02425) and Label Refinery (https://arxiv.org/abs/1805.02641). + +import torch +from torch.nn import functional as F +from torch.nn.modules import loss + + +class DistributionLoss(loss._Loss): + """The KL-Divergence loss for a student and teacher model.""" + + def forward(self, student_out: torch.Tensor, teacher_out: torch.Tensor) -> torch.Tensor: + """ + Calculate the KL-Divergence loss. + + Args: + student_out: NxC tensor (must be the output of the student network before softmax function) + teacher_out: NxC tensor (each row must be a probability score, adding up to one) + + Returns: + the loss score + """ + # check that teacher does not require gradients + if teacher_out.requires_grad: + raise ValueError("real network output should not require gradients.") + + student_log_prob = F.log_softmax(student_out, dim=1) + teacher_soft_output = F.softmax(teacher_out, dim=1) + del student_out, teacher_out + + # Loss is -dot(student_log_prob, teacher_out). Reshape tensors for batch matrix multiplication + teacher_soft_output = teacher_soft_output.unsqueeze(1) + student_log_prob = student_log_prob.unsqueeze(2) + + # Compute the loss, and average for the batch. + cross_entropy_loss = -torch.bmm(teacher_soft_output, student_log_prob) + + return cross_entropy_loss.mean() diff --git a/examples/image_classification/utils/lightning_model.py b/examples/image_classification/utils/lightning_model.py new file mode 100644 index 0000000..85f3376 --- /dev/null +++ b/examples/image_classification/utils/lightning_model.py @@ -0,0 +1,143 @@ +# type: ignore +import logging +from argparse import Namespace +from typing import Union, Any + +import torch +from pytorch_lightning import LightningModule +from torch.nn import Module, CrossEntropyLoss +from torchmetrics import Accuracy, F1Score, Precision, Recall + +from .kd_loss import DistributionLoss +from .teachers import get_teacher +from .unused_args import clean_hyperparameters +from .utils import create_optimizer, create_scheduler + + +class ModelWrapper(LightningModule): + def __init__( + self, + model: Module, + num_classes: int, + quantization_scheduler: Module, + script_args: Namespace, + add_f1_prec_recall: bool = False, + ) -> None: + super().__init__() + self.save_hyperparameters(clean_hyperparameters(script_args)) + self.loss_function = CrossEntropyLoss() + self.model = model + self.batch_accuracy_top1 = Accuracy(num_classes=num_classes) + self.batch_accuracy_top5 = Accuracy(top_k=5, num_classes=num_classes) + self.train_accuracy_top1 = Accuracy(num_classes=num_classes) + self.train_accuracy_top5 = Accuracy(top_k=5, num_classes=num_classes) + self.accuracy_top1 = Accuracy(num_classes=num_classes) + self.accuracy_top5 = Accuracy(top_k=5, num_classes=num_classes) + self.add_f1_prec_recall = add_f1_prec_recall + self.quantization_scheduler = quantization_scheduler + if add_f1_prec_recall: + self.f1 = F1Score(num_classes=num_classes) + self.prec = Precision(num_classes=num_classes) + self.recall = Recall(num_classes=num_classes) + + def training_step(self, batch: torch.Tensor) -> torch.Tensor: # type: ignore + x_train, y_train = batch + + y_hat = self.model(x_train) + loss = self.calculate_loss(x_train, y_train, y_hat) + + self.batch_accuracy_top1(y_hat, y_train) + self.batch_accuracy_top5(y_hat, y_train) + self.train_accuracy_top1(y_hat, y_train) + self.train_accuracy_top5(y_hat, y_train) + + self.log_dict( + { + "metrics/batch-top1-accuracy": self.batch_accuracy_top1, + "metrics/batch-top5-accuracy": self.batch_accuracy_top5, + "loss/train": loss, + }, + prog_bar=True, + on_step=True, + on_epoch=False, + ) + self.log_dict( + { + "metrics/train-top1-accuracy": self.train_accuracy_top1, + "metrics/train-top5-accuracy": self.train_accuracy_top5, + }, + on_step=False, + on_epoch=True, + ) + return loss + + def calculate_loss(self, x_train: torch.Tensor, y_train: torch.Tensor, y_hat: torch.Tensor) -> torch.Tensor: + return self.loss_function(y_hat, y_train) + + def validation_step(self, batch: torch.Tensor, batch_idx: int) -> None: # type: ignore + x_test, y_test = batch + + y_hat = self.model(x_test) + loss = self.loss_function(y_hat, y_test) + + self.accuracy_top1(y_hat, y_test) + self.accuracy_top5(y_hat, y_test) + + metrics_dict = { + "metrics/test-top1-accuracy": self.accuracy_top1, + "metrics/test-top5-accuracy": self.accuracy_top5, + "loss/test": loss, + } + + if self.add_f1_prec_recall: + self.f1(y_hat, y_test) + self.prec(y_hat, y_test) + self.recall(y_hat, y_test) + metrics_dict.update( + { + "metrics/f1": self.f1, + "metrics/precision": self.prec, + "metrics/recall": self.recall, + } + ) + self.log_dict(metrics_dict, prog_bar=True, on_step=False, on_epoch=True) + + return loss + + def on_validation_epoch_end(self) -> None: + if self.quantization_scheduler is not None: + self.quantization_scheduler.step() + self.log( + "quantization_scheduler/factor", + self.quantization_scheduler.scheduled_quantizer_instances[0].factor, + ) + return super().on_validation_epoch_end() + + def configure_optimizers(self) -> Union[dict, torch.optim.Optimizer]: # type: ignore + logging.info(f"Using {self.hparams.optimizer} optimizer and {self.hparams.lr_scheduler} lr scheduler...") + optimizer = create_optimizer(self.hparams.optimizer, self.model, self.hparams.lr, self.hparams.momentum) + if self.hparams.lr_scheduler is not None: + scheduler = create_scheduler( + self.hparams.lr_scheduler, + optimizer, + self.hparams.lr_factor, + self.hparams.lr_steps, + self.hparams.max_epochs, + ) + return {"optimizer": optimizer, "lr_scheduler": scheduler} + else: + return optimizer + + +class DistillationModelWrapper(ModelWrapper): + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._kd_loss = DistributionLoss() + logging.info(f"Training with Knowledge Distillation, loading teacher {self.hparams.teacher}.") + self.teacher = get_teacher(self.hparams.teacher) + for param in self.teacher.parameters(): + param.requires_grad = False + + def calculate_loss(self, x_train: torch.Tensor, y_train: torch.Tensor, y_hat: torch.Tensor) -> torch.Tensor: + y_hat_teacher = self.teacher.forward(x_train) + return self._kd_loss(y_hat, y_hat_teacher) diff --git a/examples/image_classification/utils/log.py b/examples/image_classification/utils/log.py new file mode 100644 index 0000000..a26848d --- /dev/null +++ b/examples/image_classification/utils/log.py @@ -0,0 +1,178 @@ +# type: ignore +import logging +import time +from typing import Optional, Any, Dict, List, Union + +import math +import pytorch_lightning as pl +from pytorch_lightning.callbacks import ProgressBarBase +from pytorch_lightning.utilities.types import STEP_OUTPUT + + +TIME_INTERVALS = ( + ("w", 60 * 60 * 24 * 7), + ("d", 60 * 60 * 24), + ("h", 60 * 60), + ("m", 60), + ("s", 1), +) + + +def _display_time(seconds: float, granularity: int = 2) -> str: + result: List[str] = [] + + seconds = int(round(seconds)) + + for name, count in TIME_INTERVALS: + value = seconds // count + if value == 0 and len(result) == 0: + continue + seconds -= value * count + result.append(f"{value:02d}{name}") + return ":".join(result[:granularity]) + + +class CommandLineLogger(ProgressBarBase): + """ + This module provides a replacement for the default tqdm-based progress bar, that is more suitable for logging + progress in a non-interactive way, e.g. to a file. + """ + + def __init__(self, refresh_rate: int) -> None: + super().__init__() + self._is_enabled = True + self._epoch_start_time: float = 0.0 + self._validation_start_time: float = 0.0 + self._train_start_time: float = 0.0 + self._last_epoch_times: List[float] = [] + self._validation_times: List[float] = [] + + self.logger = logging.getLogger("CommandLineLogger") + + self.refresh_rate = refresh_rate + self.log_batch = self.log_info + self.log_debug("Command line logger initialized.") + + def log_debug(self, message: str) -> None: + if self._is_enabled: + # self.logger.debug(message) + print(message) + + def log_info(self, message: str) -> None: + if self._is_enabled: + # self.logger.info(message) + print(message) + + def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: Optional[str] = None) -> None: + self.log_debug(f"Command line logger setup. ( is root trainer: {trainer.is_global_zero} )") + super().setup(trainer, pl_module, stage) + + def disable(self) -> None: + self.log_debug("Logging disabled...") + self._is_enabled = False + + def enable(self) -> None: + self._is_enabled = True + self.log_debug("Logging enabled...") + + def _should_update(self, current: int, total: Union[int, float]) -> bool: + return self._is_enabled and (current % self.refresh_rate == 0 or current == int(total)) + + def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self.log_info("Starting training...") + + def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self.log_info("Ending training.") + + def on_train_batch_end( + self, + trainer: "pl.Trainer", + pl_module: "pl.LightningModule", + outputs: STEP_OUTPUT, + batch: Any, + batch_idx: int, + unused: int = 0, + ) -> None: + if not self._should_update(self.train_batch_idx, self.total_train_batches): + return + + percent = (self.train_batch_idx / self.total_train_batches) * 100 + + time_in_this_epoch = time.time() - self._epoch_start_time + epoch_total_est = int(round((time_in_this_epoch * self.total_train_batches) / self.train_batch_idx)) + eta_epoch = _display_time(epoch_total_est - time_in_this_epoch) + full_epochs_left = trainer.max_epochs - trainer.current_epoch + if full_epochs_left < 0: + full_epochs_left = 0 + if self._average_epoch_time() > 0: + epoch_total_est = self._average_epoch_time() + self._average_validation_time() + eta_train = _display_time(epoch_total_est - time_in_this_epoch + full_epochs_left * epoch_total_est) + + epoch_info = f"Epoch {trainer.current_epoch:3d}" + batch_info = f"{self.train_batch_idx:4d}/{self.total_train_batches:4d} ({percent:5.1f}%)" + metrics = self._format_metric_string(self.get_metrics(trainer, pl_module)) + eta_info = f"ETA: {eta_epoch} & {eta_train}" + self.log_batch(f"{epoch_info} - {batch_info} - {metrics} - {eta_info}") + + @staticmethod + def _replace_metric_key(metric_key: str) -> str: + remove_strings = [ + "metrics/", + ] + for s in remove_strings: + metric_key = metric_key.replace(s, "") + return metric_key.replace("accuracy", "acc") + + @staticmethod + def _format_metric_string(metrics_dict: Dict[str, Union[int, str]], train: bool = True) -> str: + metric_list = [] + + for key, value in metrics_dict.items(): + if key == "v_num" or "loss" in key: + continue + key = CommandLineLogger._replace_metric_key(key) + try: + f_value = float(value) + if math.isnan(f_value): + continue + if key: + metric_list.append(f"{key}={f_value:2.2f}") + except ValueError: + if key: + metric_list.append(f"{key}={value}") + + return ", ".join(metric_list) + + @staticmethod + def _average_time(time_list: List[float]) -> int: + return int(round(sum(time_list) / len(time_list))) + + def _average_epoch_time(self) -> int: + if len(self._last_epoch_times) == 0: + return 0 + return self._average_time(self._last_epoch_times) + + def _average_validation_time(self) -> int: + if len(self._validation_times) == 0: + return 0 + return self._average_time(self._validation_times) + + def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._epoch_start_time = time.time() + if self._train_start_time is None: + self._train_start_time = self._epoch_start_time + + def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._last_epoch_times.append(time.time() - self._epoch_start_time) + self._last_epoch_times = self._last_epoch_times[-3:] + + def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._validation_start_time = time.time() + self.log_info("Validating model...") + + def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + self._validation_times.append(time.time() - self._validation_start_time) + self._validation_times = self._validation_times[-3:] + self.log_info( + f"Validation complete. ({self._format_metric_string(self.get_metrics(trainer, pl_module), train=False)})" + ) diff --git a/examples/image_classification/utils/teachers.py b/examples/image_classification/utils/teachers.py new file mode 100644 index 0000000..d90bb34 --- /dev/null +++ b/examples/image_classification/utils/teachers.py @@ -0,0 +1,38 @@ +from typing import Dict, List + +from torch import nn + + +from torchvision import models + + +def _teachers() -> Dict[str, nn.Module]: + def resnet18() -> nn.Module: + return models.resnet18(weights=models.ResNet18_Weights.DEFAULT) + + def resnet34() -> nn.Module: + return models.resnet34(weights=models.ResNet34_Weights.DEFAULT) + + def resnet50_v1() -> nn.Module: + # Old weights with accuracy 76.130% + return models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1) + + def resnet50_v2() -> nn.Module: + # New weights with accuracy 80.858% + return models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2) + + def resnet50() -> nn.Module: + # New weights with accuracy 80.858% + return models.resnet50(weights=models.ResNet50_Weights.DEFAULT) + + return locals() + + +def available_teachers() -> List[str]: + """Return a list of all available model names (pre-trained on ImageNet).""" + return list(_teachers().keys()) + + +def get_teacher(teacher_name: str) -> nn.Module: + """Return a model pretrained on ImageNet for a given model name.""" + return _teachers()[teacher_name]() diff --git a/examples/image_classification/utils/unused_args.py b/examples/image_classification/utils/unused_args.py new file mode 100644 index 0000000..7adeb79 --- /dev/null +++ b/examples/image_classification/utils/unused_args.py @@ -0,0 +1,73 @@ +"""Args from PyTorch Lightning's Trainer that are currently unused and a function to deal with them.""" +from argparse import Namespace +from typing import List + + +def clean_hyperparameters(args: Namespace) -> Namespace: + """Remove args which are not passed to the constructor in our training script.""" + clean_args = Namespace() + for key in args.__dict__.keys(): + if key in UNUSED_PL_ARGS: + continue + setattr(clean_args, key, getattr(args, key)) + return clean_args + + +# this list is copied from the constructor of PyTorch's Trainer, but all arguments used in our script were removed +UNUSED_PL_ARGS: List[str] = [ + "logger", + "checkpoint_callback", + "enable_checkpointing", + "callbacks", + "default_root_dir", + "gradient_clip_val", + "gradient_clip_algorithm", + "process_position", + "num_nodes", + "num_processes", + "devices", + "auto_select_gpus", + "tpu_cores", + "ipus", + "log_gpu_memory", + "progress_bar_refresh_rate", + "enable_progress_bar", + "overfit_batches", + "track_grad_norm", + "check_val_every_n_epoch", + "fast_dev_run", + "accumulate_grad_batches", + "min_epochs", + "min_steps", + "max_time", + "limit_train_batches", + "limit_val_batches", + "limit_test_batches", + "limit_predict_batches", + "val_check_interval", + "flush_logs_every_n_steps", + "log_every_n_steps", + "sync_batchnorm", + "precision", + "enable_model_summary", + "weights_summary", + "weights_save_path", + "num_sanity_val_steps", + "resume_from_checkpoint", + "profiler", + "benchmark", + "deterministic", + "reload_dataloaders_every_n_epochs", + "auto_lr_find", + "replace_sampler_ddp", + "detect_anomaly", + "auto_scale_batch_size", + "prepare_data_per_node", + "plugins", + "amp_backend", + "amp_level", + "move_metrics_to_cpu", + "multiple_trainloader_mode", + "stochastic_weight_avg", + "terminate_on_nan", +] diff --git a/examples/image_classification/utils/utils.py b/examples/image_classification/utils/utils.py new file mode 100644 index 0000000..8ae811c --- /dev/null +++ b/examples/image_classification/utils/utils.py @@ -0,0 +1,105 @@ +import logging +from pathlib import Path + +from torch.optim import Adam, SGD, RAdam +from torch.optim.lr_scheduler import MultiStepLR, ExponentialLR, CosineAnnealingLR, _LRScheduler +from typing import Union, Optional, Any +from torch.nn import Module +from torch.optim.optimizer import Optimizer + + +def configure_logging(logger: Any, log_file: Optional[str], log_level: str, output_stdout: bool) -> None: + """configures logging module. + + Args: + logger: the logger to be configured + log_file (str): path to log file. if omitted, logging will be forced to stdout. + log_level (str): string name of log level (e.g. 'debug') + output_stdout (bool): toggles stdout output. will be activated automatically if no log file was given. + otherwise if activated, logging will be outputed both to stdout and log file. + """ + log_level_name = log_level.upper() + log_level = getattr(logging, log_level_name) + logger.setLevel(log_level) + + logging_format = logging.Formatter( + "%(asctime)s - %(levelname)s [%(filename)s : %(funcName)s() : l. %(lineno)s]: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + if log_file is not None: + log_file_path = Path(log_file) + log_file_path.parent.mkdir(parents=True, exist_ok=True) + file_handler = logging.FileHandler(log_file_path) + file_handler.setFormatter(logging_format) + logger.addHandler(file_handler) + else: + output_stdout = True + + if output_stdout: + stream = logging.StreamHandler() + stream.setFormatter(logging_format) + logger.addHandler(stream) + + +def create_optimizer(name: str, model: Module, lr: float, momentum: float) -> Optimizer: + """creates the specified optimizer with the given parameters + + Args: + name (str): str name of optimizer + model (Module): the model used for training + lr (float): learning rate + momentum (float): momentum (only for sgd optimizer) + + Raises: + ValueError: thrown if optimizer name not known + + Returns: + Optimizer: the model optimizer + """ + name = name.lower() + if name == "adam": + return Adam(params=model.parameters(), lr=lr) + elif name == "sgd": + return SGD(params=model.parameters(), lr=lr, momentum=momentum) + elif name == "radam": + return RAdam(params=model.parameters(), lr=lr) + else: + raise ValueError(f"No optimizer with name {name} found!") + + +def create_scheduler( + scheduler_name: Optional[str], + optimizer: Optimizer, + lr_factor: float, + lr_steps: Optional[list], + epochs: int, +) -> Union[_LRScheduler, None]: + """creates a learning rate scheduler with the given parameters + + Args: + scheduler_name (Optional[str]): str name of scheduler or None, in which case None will be returned + optimizer (Optimizer): the learning optimizer + lr_factor (float): the learning rate factor + lr_steps (Optional[list]): learning rate steps for the scheduler to take (only supported for step scheduler) + epochs (int): number of scheduler epochs (only supported for cosine scheduler) + + Raises: + ValueError: thrown if step scheduler was chosen but no steps were passed + ValueError: thrown if scheduler name not known and not None + + Returns: + Union[_LRScheduler, None]: either the learning rate scheduler object or None if scheduler_name was None + """ + if scheduler_name == "step": + if not lr_steps: + raise ValueError("step scheduler chosen but no lr steps passed!") + return MultiStepLR(optimizer, lr_steps, lr_factor) + elif scheduler_name == "exponential": + return ExponentialLR(optimizer, lr_factor) + elif scheduler_name == "cosine": + return CosineAnnealingLR(optimizer, epochs) + elif not scheduler_name: + return None + else: + raise ValueError(f"no scheduler with name {scheduler_name} found!") diff --git a/examples/image_classification/utils/wandb_logger.py b/examples/image_classification/utils/wandb_logger.py new file mode 100644 index 0000000..ef0550a --- /dev/null +++ b/examples/image_classification/utils/wandb_logger.py @@ -0,0 +1,34 @@ +from argparse import Namespace +from typing import Any + +from pytorch_lightning.loggers import WandbLogger +from pytorch_lightning.utilities import rank_zero_only + + +class CustomWandbLogger(WandbLogger): + """ + Customized Wandb Logger with the following changes: + + - the last model is not uploaded to wandb at the end of the training + - automatically adds some tags based on the command line arguments + """ + + def __init__(self, script_args: Namespace, *args: Any, **kwargs: Any) -> None: + kv_tags = ["model", "dataset"] + wandb_tags = [f"{k}:{getattr(script_args, k, 'unknown')}" for k in kv_tags] + if script_args.dev_run: + wandb_tags.append("dev-run") + if script_args.teacher: + wandb_tags.append("kd") + if "tags" in kwargs: + kwargs["tags"].extend(wandb_tags) + else: + kwargs["tags"] = wandb_tags + super().__init__(*args, **kwargs) + + @rank_zero_only + def finalize(self, status: str) -> None: + if self._checkpoint_callback: + # disable saving the last model to wandb + self._checkpoint_callback.last_model_path = "" + super().finalize(status) diff --git a/examples/mnist/README.md b/examples/mnist/README.md new file mode 100644 index 0000000..86aa016 --- /dev/null +++ b/examples/mnist/README.md @@ -0,0 +1,15 @@ +# Example for MNIST + +In this example script we train a simple model for the MNIST dataset and also use the [bitorch inference engine](https://github.com/hpi-xnor/bitorch-inference-engine) for speed up. + +First the requirements for this example need to be installed +(unless the optional dependencies of BITorch were already installed): +```bash +pip install -r requirements.txt +``` + +Then you can run the following to train an MLP with 3 layers (one of which is a binary layer), +or add `--help` for more arguments: +```bash +python train_mnist.py --epochs 10 --model mlp --log-interval 100 +``` diff --git a/examples/mnist/__init__.py b/examples/mnist/__init__.py new file mode 100644 index 0000000..1ce417a --- /dev/null +++ b/examples/mnist/__init__.py @@ -0,0 +1,4 @@ +""" +This package contains an example for training an image classification model on the MNIST data set with BITorch +and deploying it with the inference engine. +""" diff --git a/examples/mnist/datasets/__init__.py b/examples/mnist/datasets/__init__.py new file mode 100644 index 0000000..f334be7 --- /dev/null +++ b/examples/mnist/datasets/__init__.py @@ -0,0 +1,44 @@ +""" +This submodule contains data preparation code for some of the datasets used with our models, +i.e. MNIST, CIFAR 10 and 100 and ImageNet. +""" + +from typing import List, Type + +from .base import BasicDataset +from .mnist import MNIST + +__all__ = [ + "BasicDataset", + "dataset_from_name", + "dataset_names", + "MNIST", +] + + +def dataset_from_name(name: str) -> Type[BasicDataset]: + """returns the dataset to which the name belongs to (name has to be the value of the datasets + name-attribute) + + Args: + name (str): name of the dataset + + Raises: + ValueError: raised if no dataset under that name was found + + Returns: + dataset: the dataset + """ + for dataset_class in [MNIST]: + if dataset_class.name == name: + return dataset_class + raise Exception(f"unknown dataset: {name}") + + +def dataset_names() -> List[str]: + """getter for list of dataset names for argparse + + Returns: + List: the dataset names + """ + return [dataset_class.name for dataset_class in [MNIST]] diff --git a/examples/mnist/datasets/base.py b/examples/mnist/datasets/base.py new file mode 100644 index 0000000..d291fe4 --- /dev/null +++ b/examples/mnist/datasets/base.py @@ -0,0 +1,137 @@ +import logging +import os +from pathlib import Path +from typing import Optional, Tuple, Any + +import torch +from torch.utils.data import Dataset +from torchvision.transforms import transforms + +from ..datasets.dummy_dataset import DummyDataset + + +class BasicDataset(Dataset): + name = "None" + num_classes = 0 + shape = (0, 0, 0, 0) + mean: Any = None + std_dev: Any = None + num_train_samples = 0 + num_val_samples = 0 + + def __init__(self, train: bool, root_directory: Optional[str] = None, download: bool = False) -> None: + """initializes the dataset. + + Args: + train (bool): whether the train or test dataset is wanted + root_directory (str): path to main dataset storage directory + download (bool): whether train/test should be downloaded if it does not exist + + Returns: + Dataset: the created test/train dataset + """ + super(BasicDataset, self).__init__() + self.is_train = train + self._download = download + self.root_directory = self.get_dataset_root_directory(root_directory) + self.dataset = self.get_dataset(download) + + @classmethod + def get_train_and_test(cls, root_directory: str, download: bool = False) -> Tuple["BasicDataset", "BasicDataset"]: + """creates a pair of train and test dataset. + + Returns: + Tuple: the train and test dataset + """ + return cls(True, root_directory, download), cls(False, root_directory, download) + + @classmethod + def get_dummy_train_and_test_datasets(cls) -> Tuple[DummyDataset, DummyDataset]: + train_set = DummyDataset(cls.shape, cls.num_classes, cls.num_train_samples) # type: ignore + val_set = DummyDataset(cls.shape, cls.num_classes, cls.num_val_samples) # type: ignore + return train_set, val_set + + def get_dataset_root_directory(self, root_directory_argument: Optional[str]) -> Path: + """chooses the dataset root directory based on the passed argument or environment variables. + + Returns: + Tuple: the train and test dataset + """ + if root_directory_argument is not None: + return Path(root_directory_argument) + + environment_variable_name = f"{self.name.upper()}_HOME" + if os.environ.get(environment_variable_name) is not None: + return Path(os.environ.get(environment_variable_name)) # type: ignore + if os.environ.get("BITORCH_DATA_HOME") is not None: + return Path(os.environ.get("BITORCH_DATA_HOME")) / self.name # type: ignore + + environment_variable_hint = ( + f" To change this, set '{environment_variable_name}' or 'BITORCH_DATA_HOME' " + f"(in the latter case, the data resides in the folder '{self.name}' in BITORCH_DATA_HOME)." + f" Some datasets can be downloaded by adding the --download command line argument." + ) + if self._download: + logging.warning("Dataset is being downloaded to the directory './data'." + environment_variable_hint) + return Path("./data") + else: + raise ValueError(f"Dataset {self.name} not found." + environment_variable_hint) + + def get_dataset(self, download: bool) -> Dataset: + """creates the actual dataset + + Args: + download (bool): toggles if train/test shall be downloaded if possible + + Raises: + NotImplementedError: thrown, because this method needs to be overwritten by subclasses + + Returns: + Dataset: the created test/train dataset + """ + raise NotImplementedError() + + def get_transform(self) -> Any: + if self.is_train: + return self.train_transform() + return self.test_transform() + + @classmethod + def test_transform(cls) -> Any: + """get the transform for the test data. + + Returns: + transform: the transform pipeline + """ + return transforms.Compose([transforms.ToTensor(), cls.get_normalize_transform()]) + + @classmethod + def train_transform(cls) -> Any: + """get the transform for the training data. + + Returns: + transform: the transform pipeline + """ + return transforms.Compose([transforms.ToTensor(), cls.get_normalize_transform()]) + + @classmethod + def get_normalize_transform(cls) -> transforms.Normalize: + return transforms.Normalize(cls.mean, cls.std_dev) + + def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]: # type: ignore + """returns the item at the given index of the dataset. + + Args: + index (int): requested index + + Returns: + Tuple[torch.Tensor, torch.Tensor]: data and label at the specified index + """ + return self.dataset[index] + + def __len__(self) -> int: + return len(self.dataset) # type: ignore + + def num_samples(self) -> int: + """returns the (theoretical) dataset size.""" + return self.num_train_samples if self.is_train else self.num_val_samples diff --git a/examples/mnist/datasets/dummy_dataset.py b/examples/mnist/datasets/dummy_dataset.py new file mode 100644 index 0000000..91b5524 --- /dev/null +++ b/examples/mnist/datasets/dummy_dataset.py @@ -0,0 +1,23 @@ +from torch.utils.data import Dataset +import torch +from typing import Tuple + + +class DummyDataset(Dataset): + """An iterator that produces repeated dummy data. + Args: + data_sample: a data sample that should be produced at each step. + batch_size: the batch size for storing. + sample_count: number of `data` samples in the dummy dataset. + """ + + def __init__(self, data_shape: torch.Size, num_classes: int, sample_count: int) -> None: + self._data_sample = torch.zeros(data_shape) + self._class_sample = torch.zeros((num_classes,), dtype=torch.int64) + self._sample_count = sample_count + + def __len__(self) -> int: + return self._sample_count + + def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: + return self._data_sample, self._class_sample diff --git a/examples/mnist/datasets/mnist.py b/examples/mnist/datasets/mnist.py new file mode 100644 index 0000000..fde7f8e --- /dev/null +++ b/examples/mnist/datasets/mnist.py @@ -0,0 +1,23 @@ +from torch.utils.data import Dataset +from torchvision.datasets import mnist + +from .base import BasicDataset + + +class MNIST(BasicDataset): + name = "mnist" + num_classes = 10 + shape = (1, 1, 28, 28) + + mean = (0.1307,) + std_dev = (0.3081,) + num_train_samples = 60000 + num_val_samples = 10000 + + def get_dataset(self, download: bool = True) -> Dataset: + return mnist.MNIST( + root=self.root_directory, + train=self.is_train, + transform=self.get_transform(), + download=download, + ) diff --git a/examples/mnist/requirements.txt b/examples/mnist/requirements.txt new file mode 100644 index 0000000..ababd87 --- /dev/null +++ b/examples/mnist/requirements.txt @@ -0,0 +1,2 @@ +bitorch +bitorch_engine diff --git a/examples/mnist/train_mnist.py b/examples/mnist/train_mnist.py new file mode 100644 index 0000000..5eb9480 --- /dev/null +++ b/examples/mnist/train_mnist.py @@ -0,0 +1,185 @@ +""" +An example script for training a model for the MNIST dataset with BITorch. + +Modified from the `PyTorch MNIST Example `_, +which was published under the `BSD 3-Clause License `_. +""" +# fmt: off +import argparse + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.optim.lr_scheduler import StepLR + +import bitorch.layers as qnn +from bitorch import RuntimeMode +from datasets import MNIST +from bitorch.layers import convert +import bitorch_engine + + +bitorch_engine.initialize() + + +class QuantizedMLP(nn.Module): + def __init__(self, num_hidden_units_1=256, num_hidden_units_2=128): + super().__init__() + self.flatten = nn.Flatten() + self.fc1 = nn.Linear(784, num_hidden_units_1) + self.act1 = nn.PReLU() + self.bn1 = nn.BatchNorm1d(num_hidden_units_1) + + self.fc2 = qnn.QLinear(num_hidden_units_1, num_hidden_units_2, bias=False) + self.act2 = nn.PReLU() + self.bn2 = nn.BatchNorm1d(num_hidden_units_2) + + self.fc3 = nn.Linear(num_hidden_units_2, 10) + + def forward(self, x): + x = self.flatten(x) + + x = self.fc1(x) + x = self.act1(x) + x = self.bn1(x) + + x = self.fc2(x) + x = self.act2(x) + x = self.bn2(x) + + x = self.fc3(x) + output = F.log_softmax(x, dim=1) + return output + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout(0.25) + self.dropout2 = nn.Dropout(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + x = self.dropout1(x) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = F.relu(x) + x = self.dropout2(x) + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output + + +def train(args, model, device, train_loader, optimizer, epoch): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % args.log_interval == 0: + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_loader.dataset), + 100. * batch_idx / len(train_loader), loss.item())) + if args.dry_run: + break + + +def test(model, device, test_loader): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss + pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + correct += pred.eq(target.view_as(pred)).sum().item() + + test_loss /= len(test_loader.dataset) + + print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( + test_loss, correct, len(test_loader.dataset), + 100. * correct / len(test_loader.dataset))) + + +def main(): + # Training settings + parser = argparse.ArgumentParser(description='PyTorch MNIST Example') + parser.add_argument('--model', type=str, choices=["mlp", "lenet"], default="lenet", + help='input batch size for training (default: 64)') + parser.add_argument('--batch-size', type=int, default=64, metavar='N', + help='input batch size for training (default: 64)') + parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', + help='input batch size for testing (default: 1000)') + parser.add_argument('--epochs', type=int, default=14, metavar='N', + help='number of epochs to train (default: 14)') + parser.add_argument('--lr', type=float, default=1.0, metavar='LR', + help='learning rate (default: 1.0)') + parser.add_argument('--gamma', type=float, default=0.7, metavar='M', + help='Learning rate step gamma (default: 0.7)') + parser.add_argument('--no-cuda', action='store_true', + help='disables CUDA training') + parser.add_argument('--dry-run', action='store_true', + help='quickly check a single pass') + parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') + parser.add_argument('--log-interval', type=int, default=10, metavar='N', + help='how many batches to wait before logging training status') + parser.add_argument('--save-model', action='store_true', + help='For Saving the current Model') + args = parser.parse_args() + + use_cuda = not args.no_cuda and torch.cuda.is_available() + + torch.manual_seed(args.seed) + + device = torch.device("cuda:0" if use_cuda else "cpu") + + train_kwargs = {'batch_size': args.batch_size} + test_kwargs = {'batch_size': args.test_batch_size} + if use_cuda: + cuda_kwargs = {'num_workers': 1, + 'pin_memory': True, + 'shuffle': True} + train_kwargs.update(cuda_kwargs) + test_kwargs.update(cuda_kwargs) + + train_dataset, test_dataset = MNIST.get_train_and_test(download=True) + + train_loader = torch.utils.data.DataLoader(train_dataset, **train_kwargs) + test_loader = torch.utils.data.DataLoader(test_dataset, **test_kwargs) + + if args.model == "mlp": + model = QuantizedMLP().to(device) + else: + model = Net().to(device) + optimizer = optim.Adadelta(model.parameters(), lr=args.lr) + + scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) + for epoch in range(1, args.epochs + 1): + train(args, model, device, train_loader, optimizer, epoch) + test(model, device, test_loader) + scheduler.step() + + inference_model = convert(model, RuntimeMode.INFERENCE_AUTO, device=device, verbose=True) + test(inference_model, device, test_loader) + + if args.save_model: + torch.save(model.state_dict(), "mnist_cnn.pt") + + +if __name__ == '__main__': + main() diff --git a/examples/notebooks/Quantization_Visualiztion.ipynb b/examples/notebooks/Quantization_Visualiztion.ipynb new file mode 100644 index 0000000..52ffbbf --- /dev/null +++ b/examples/notebooks/Quantization_Visualiztion.ipynb @@ -0,0 +1,3627 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/paul/uni/deeplearning/bitorch/venv/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from bitorch.quantizations.quantization_scheduler import MixLinearScheduling, StepScheduling\n", + "import torch\n", + "from torch.nn import Tanh, ReLU\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.animation import FuncAnimation\n", + "from IPython import display\n", + "from typing import List, Callable\n", + "\n", + "from bitorch.quantizations import Sign, SwishSign, SteHeaviside, ApproxSign, ProgressiveSign, InputDoReFa, WeightDoReFa, Identity\n", + "from bitorch.layers import QActivation\n", + "import numpy as np\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quantization Visualization\n", + "\n", + "Use the code below to visualize the quantization functions supported in bitorch. The upper graph shows the applied quantization during forward pass (black graph) as well as the integral of the backward pass function (red). The graph below shows the function applied during backward pass." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2oAAAD/CAYAAACAaCVmAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAABph0lEQVR4nO3dd3gUVffA8e9NowRCC72FXqWGJqICIqACgoh0gfcVewUEBJUiCFJs+KLYQOmiIEiXItKEUIXQawglgQAJkLp7f3+Q8KOEZLPZ3bubnM/z5AGyszMn63Fyz9w7Z5TWGiGEEEIIIYQQ7sPLdABCCCGEEEIIIe4khZoQQgghhBBCuBkp1IQQQgghhBDCzUihJoQQQgghhBBuRgo1IYQQQgghhHAzUqgJIYQQQgghhJvxMXXgwMBAHRQUZOrwbi0+Ph6AHDlyGI7Efe3YseOi1rqwK48pOXt/krO2cXXeSs6mTfI2fXKudS+Ss+mTnHUvsbGxAOTKlctwJO4rrZw1VqgFBQUREhJi6vBu7fjx4wCUL1/ecCTuSyl1ytXHlJy9P8lZ27g6byVn0yZ5mz4517oXydn0Sc66l3379gFQs2ZNu96flJSEj4+xcsUl0srZrP2TCyGEEEIIITzClStXOHnyJEeOHOHkyZMkJSUxcOBAfH19TYdmhBRqQgghhBBCCJe7uzBLSEjAy8uLhIQEALy8vLBarYajNEcKNSGEEEIIIYRLXLhwgQ0bNqRamN3NarUybdo0YzNqOXPm5JlnniFv3rxGji+FmhBCCJFJ1qQkInbv5sLOnVw7exalFN45c5KneHEKVK5MoerVyREQYDpMIYQw7vr165w/f57Y2Fj8/PxuNcm5n6ioKBdFlrqYmBgp1IQQQghPE3vpEiGTJ7P322+JjYy8/4ZKEVizJiUffJCSzZpR+tFHyVuypOsCFUIIN1G+fHlef/114uLiOHXqFMeOHePYsWNcuXIFX1/fOwo3b29vBg0alG07nTqsUFNK/QA8BURore1r7SKEC0nOCk8jOeteDs6fz5pXXyUuKooK7dpR5bnnKNGkCXlKlsTL25ukuDhizpzh8qFDXNi1i7ObN3Ngzhz2fPMNAAUqVaL0o49SunlzSj/6KHmKFzf8Ezme5KzwRJK3rpEzZ06qVKlClSpVAO4o3I4ePcrVq1exWq0opQxHao4jZ9SmA1OAnxy4TyGcaTqSs8KzTEdy1jitNZtHjmTLyJEUb9SIx9eupfADD9yznW/u3BSsXJmClStToV07AKwWC5F79xK2bh1h69dzaP589n77LQAFq1S5o3DzL1rUpT+Xk0xHclZ4nulI3rpcaoXbtWvX8PPzMxyZOQ4r1LTWG5RSQY7aX3Z36NAhzpw5YzqMLE1y1rHCwsIkZ51MctaxLl68yPHjxzOct2d++IGzM2YQ2KYNJQcN4sDlyxzYsCFjBw8OplBwMAXffpsbR48SvWsX0bt28e/PP9+acctZtiwBdesSUKcOeWrWxDcw0OOuLEvOZj1aa3bu3Mn169dNh+I0kreOk5iYyP79+22+z8xisRAVFYXFYnHI8ePj4z06V+UeNTd07tw5nnjiCdNh2K0cILfMZz/t27cnOjradBhC2OzVV1/N8ENq6wNdgW3ALytWwIoVDo3JCygJVAAqnDpFuVOnyLFoEQA3gHPJXxeAS8lfV4Ds27xauNqmTZto1qxZht+XG6gIeNalBpFZCxYsYOzYsabDsIsPUAWzxZJLj62U6g/0ByhTpowrD+1RYmJiAHj//fd59NFHzQaTAda4OMK+/JJLDh64pGaP049wk+Ss7aKjo+nVqxd9+vQxHUqqEqOiuHHkCLFHjxJ74gSJFy+SGBVF0pUraIvl5pfVClo7LYZBLngWjOSs7aKjowkODmb8+PE2bR8XFsaBF18kT/XqvDhuHC/5OP9XqE5K4sbhw1w/fJi448cpfOIElU6cwBob+/8beXnhV6QIfsWK4VuwIL4FCuCTPz8+BQrc+ru3vz9euXLd/DNnTptn5lq2bOmkn+xOkree4+LFiwBMmzaNChUq2PSemL17OfnRRyReuuTM0AAZH7iblDHtypUr8UnnnHn58mUWL15MpUqVKFeuXKaOGxUVxc6dO8mbNy8VK1bM8GqEmK1b0XPmkMOJY4IUaeWsSws1rfU0YBpAcHCw839yD5XyYL/atWvTokULw9HY5mJoKEu6dOFSaCiNhw2jWo8eTj3eoOrVnbr/FJKztknJ2QoVKrhNzlotFs789RfHli7l5MqVXNq//9ZrAUFB5C9TBv/q1cldpAjefn4ob2+8vL1RXl7OC2rMGOftO5nkrO2sViuFCxe2KWetSUnMeeghcuTOTY8//iBPiRIuiDDZ44/f8U9ttRJz5gxXT5zgyvHjXE35OnmSG6dOcXnbNhKvXbvv7pSXF3558+IXEIBf3rz45MqFT86ceOfMiXeOHPjkzHnr364iees54uLiAGjWrBlVq1ZNc1tttfLPxx+z64MPyF+hAq1++QX/YsWcGp+MD9xLyhLGli1b4u3tnea2a9eupUKFCrz99tv4+/vbfczIyEh++OEHgoOD6devH7ly5crQ++c/9hjxa9YAUP6ppyjn7FVur7xy35dk6aMbSknq9BLaXeybMYM/X3kFX39/Oq9cSVCrVqZDEi7mTjl76eBB9k+fTujMmVwLD8fbz4+SzZpR4/nnKdG4MYVr1SJHvnxmgnNBoSZsZ7FY8LKxMN/77bec++cfnpw927VFWiqUlxcBZcoQUKYMpR95JNVtEm/c4PqFC9y4cIHYixdJiIkhITqa+OhoEpK/Uv6eFBeHJT4eS1wc8VeuYImPv/m95AG5ELdLKdRyplPIX4+IYFnPnpxavZqq3brx+Dff4GfoWVTCHJ08I5XeuVZrTWhoKEFBQZkq0mJiYpg1axY+Pj706NEjQ0VaTHg4P9aoQcLVqyhvb55etIgKTz1ldyw2c0WhppSaAzwKBCqlzgAfaq2/d9T+s5OU2Ql3GPSmJeH6dda8+ir7Z8yg9KOP3hzAeFB7aclZxzGds1przmzYwPYJEzi+dCnK25tybdvy6OTJlH/ySfwycdJ3J5KzjmW1Wm3K2fjoaDZ9+CGlHn6Yql27uiCyzPPNnZv85cqRP5PLh17KZPMSydmsJ6VQS+u5VqfXr2dp9+7EX77M49Om8cB//+tRjXAkbx0n5YJYev/9IyIiuHTpEo0bN7b7WPHx8cyePZsbN27Qt29f8ufPb/N7d3/zDX++/DJoTZ6SJel38CB+efLYHYujOLLrYzdH7Su7Sxn02nql14TIfftY0qULUQcP0uTDD2ny/vt4uXlheTfJWccxmbPhmzbx17vvcnbzZnIFBvLgyJHUfvHFrNLa/A6Ss45ltVptytlt48cTGxnJo8uWedRg0x1IzmY9KQ8jTm1GzWqxsHXMGLaMHEn+ihXpvGIFhWvVcnWImSZ56zi2nmf379+PUopq1arZdRyLxcKCBQu4cOEC3bp1o3gGJg7mPfooYX/9BUD13r15YsYMu2JwBln66IZMz06kRWvNvh9/ZM1rr+EXEMCzq1dT1kU3mwv3ZSJnLx85wobBgzmycCH+xYvT8quvqNm3L74ZXIsusi9bBhBxV66w68svqdKlC8WCg10UmRDu635LH6+fP8/SHj04vXYt1Xv25LGpU91iRkKYZct5NmXZY9myZe1a9qi1ZunSpRw9epR27dpRqVIlm9539fRpZjzwAAnR0ShvbzouXkx5N+u6LoWaG0q538fdZtQSrl3jz5dfJnTmTMq0bMmTM2c6/aZg4RlcOaNmSUhg+4QJbBk9Gi9fX5qOHk39t9/OMssbhevYco/anqlTSYiJodHQoS6KSgj3ltrSx1Nr1rC0Rw8SoqNp/f331OzbV2afBWBboZbZZY8bNmxg165dNGvWjHr16tn0np1TprD2jTdAa/KWLk3f0FC3vLAghZobSrnx0p1m1CL37mVJly5cPnKEpqNG0ei99zxuqaNwHlfNqJ0PCWFF375c3LePyp070+KLLzzqvkjhXrTWaQ4gkuLi2PHZZwS1aUOROnVcF5gQbiw+Ph5fX1+8vLywWixsGTWKLaNHU7BqVZ79808K16xpOkThRmwp1DKz7HH37t2sX7+e2rVr07x5c5veM+fhhwn/+28AavbrR5vv3ff2QynU3JA7zahprfn3u+9Y+8Yb5Mifn2fXrKGMBz3bTbiGs3NWW61snzSJje+9R+6iRXn699+p2L69U44lso/0ZtQOL1jAjYgIGgwc6MKohHBvcXFx5MyZk2vnzrG0e3fC1q+nxvPP0/Krr2Rlg7hHeoVaZpY9Hjt2jCVLllC+fHnatWuX7izulRMnmFGrFonXrqF8fOi0bBnl3LxTuRRqbshdWp0nxMSw6sUXOThnDmVbteKJmTPxL1LEaEzCPTlzRu36hQss792bk6tWUalTJx7/9ltyFSzo8OOI7Ce9ro97vv6aApUqUcZNng0ohDuIi4ujipcXP9WpQ8K1a7T58Udq9uljOizhptIr1FKWPTZq1ChD+z1//jzz588nMDCQZ599Nt3xR8hnn7H+7bcByFu2LH0PHMDPA+5pl0LNDbnD0seI3btZ0qULV44d46ExY2g0ZIhzHwQsPJqzCrULO3eyqEMHYi9epNXXX1Orf3+570E4TFoDiMh9+wjftIlHJk6UnBMimTUpibybNtHl6lVy1ahBl3XrCHTRA6aFZ0pv5YI9yx6vXr3K7NmzyZEjBz169EjzmX4Wi4V5Dz/M2c2bAaj14os8/vXXtv8Ahkmh5oZMLn3UWrPn669Z9/bb5CpUiC7r1lH64YddHofwLM7I2YPz5rGib19yBQbSbfNmitat67B9CwFpDyD+/e47vP38qPH88y6OSgj3FBMeztJu3Si8dy8H8+blrW3b8M2d23RYws2ldS/w7cse89jYyCMuLo7Zs2eTkJBA3759CQgIuO+2UYcP83O9eiRev47y8eHZlSs9boWEFGpuyFR7/vjoaFa98AKH5s8nqE0bnvjpJ3IXLuzSGIRncuQssNaarWPGsOn99ynZtCntf/01Sz4TTZintU41Z61JSRycM4fy7dqROzDQQGRCuJcTK1awrFcvkmJjORIczD9xcVKkCZtYLJb7jg0yuuzRYrEwb948Ll68SI8ePSiaxthg+6RJ/JV8f3G+cuV4fv9+j1jqeDdZy+aGTDw8+MLOnfxcrx6Hf/2VZuPG8czSpVKkCZs5akbNarGw5vXX2fT++1Tv1Ysua9dKkSacxmKxpLqs8dSaNdyIiKB6jx4GohLCfViTktgwdCi/tm2Lf/Hi9AwJ4UyRIne05hciLVar9b7Lx0NDQ21e9qi1ZvHixZw8eZL27dtTvnz5VLezWCzMbNToVpFW+9VXeeH4cY8s0kBm1NySK2fUtNbs+uor/howgNxFitD1r78o2bSp048rshZH5GxSfDzLe/fm0Pz5BA8cyCPjx8t9kcKp7tdM5MDMmeTIn59ybvbgUyFcKTosjKXduhG+aRO1XniB5p9/jm+uXMTHx6d5T5AQt7vfeVZrzf79+21e9rh27Vr27t1L8+bNqV27dqrbXDpwgJ+Dg0m6cQMvX186//knZTz89h0p1NyQq2bU4q9eZcV//sORX3+l/JNP0nbGDHIVKuTUY4qsKbM5mxQfz+LOnTn+xx88MmGCtEMXLpFaM5HE2FiOLFxI1W7d8JFZA5FNHVu6lOW9e2NJSODJ2bOp1q3brddS2vMLYYv7NW3KyLLHHTt2sHHjRurVq0ezZs1S3Wbrxx+z8b33AMhfsSJ99+/H288vc8G7ASnU3JAr2vOfDwlhSZcuxISF8ciECQS/847MXgi7ZSZnby/SHps6lTovveTo8IRIVWrNRE6tXk3i9etU6dLFUFRCmGNJTGTjsGFsnzCBwrVr027+fApWrnzHNnFxceTPn99MgMLj3K9Qs3XZ4+HDh1m6dCmVKlXiySefvGcZpcViYXajRlzYsQOAum+8QcvPP3fcD2CYFGpuyJlLH7XW7PziC/4aNAj/4sXpumEDJZo0cfhxRPZib85aEhKkSBPGpLYk5+iiReTIl4/SjzxiKCohzIg+fZo/unbl7JYt1H7pJZp/+ik+qcycxcXFyT1qwmapFWq2dns8e/YsCxYsoFixYnTu3Pme/Vzct4+ZDRuSFBuLl68vz61bl+Vu35FCzQ05a+lj3OXLrOjXj6OLFlGhXTvaTJ8uDw4WDmFPzmqrleXPPy9FmjAitZy1JiVxbPFiyj/1VJZYMiOErY4uXsyKPn2wJiXx1Lx5VE1jRlnuURMZkVqhFhERwcWLF2nYsOF933f58mVmz56Nv78/3bt3x++uc/Lm0aPZ/MEHABSoXJk+//6bJc/bUqi5IWfMqJ3bto0lXbpwLTycRydPpv5bb8lDXIXDZDRntdasfestDs6dy8Pjx0uRJlwutUItfNMmYi9douLTTxuKSgjXsiQksGHoUHZMnkyRunVpN38+BSpWTPM9co+ayIjUCrX0lj3euHGDWbNmYbFY6NOnzx2zbhaLhZnBwUTu3g1A8IABPDpxotPiN00KNTfkyIcHa63Z8emnbBg8mDwlS9Jt40aK2/i8CiFsldGc/efjj9n15ZfUf+cdGgwa5MzQhEhVajl7ZOFCvHPkoFybNqbCEsJlrp48yZLnnuP8tm3UefVVHp04MdWljneTpY8iI+6+Fzi9ZY9JSUnMnTuXK1eu0KtXLwJve5ZlxN69zGrUCEtcHF5+fnT7+2+KpzErlxVIoeaGHPXw4NioKFb06cOxJUuo+PTTtPnhB3IWKOCIEIW4Q0Zy9t/vv2fjsGFU79mTRydMkJldYcTds8Baa44uWkTQ44/jZ0OraCE82ZFFi1jRty/aaqXdL79QpXNnm98rSx9FRtx9L3BkZOR9lz1qrVm4cCFhYWF07tyZsmXL3npt04cfsmXUKAAKVq3K83v2ZMmljneTQs0NOWJG7eyWLSzp2pXr587R4vPPqfv66zIgFk5ja86eXreOVS++SFDr1rT+4QfpNCqMScnZlPNi5N69RJ86RZP33zcZlhBpSkxM5MSJE8TGxtr1fmtiIgcnTeLUzJkEVK9OnYkTSSpThv3799u8j9jYWCnUhM3ufuD1/v3777vscdWqVYSGhtKqVStq1KgB3DxX/1y3Lhf//ReABu++yyPjx7smeDcghZobykyrc221sn3SJDa+9x55S5em26ZNFG/QwNEhCnEHW3L2yrFjLO7cmYJVqtBu/ny8fX1dFZ4Q97g7Z08sXw4gD7kWbm3MmDH8/PPPdr23INATKA38DSwNDcViZ77ny5fPrveJ7Of2GbW0lj3+888/bN26lYYNG9IkuRv5hd27md24MZb4eLxz5KDbpk0Uq1/f5T+DSVKouSF7lz7euHiRFX36cHzpUio98wytv/uOnPKsE+EC6eVsfHQ0v7VrB0DHJUvIERDgstiESM3dzUROrFhBkTp1yFO8uMmwhEhTZGQkxYsX5/MMPifq+tatXJo6FeXlRaGXX6Znw4b0tDMGb29vWrVqZee7RXZzezOR+y17PHDgACtWrKBq1aq0bt0apRR/DxvGP2PHAlCoZk16797t1OcLuysp1NyQPUsfwzdt4o+uXbkREUHLKVOo88orstRRuExaOWu1WFjavTtXjhyh86pV5C9f3tXhCXGP23M2Pjqas5s2ETxwoOGohEhbfHw8BQsW5Nlnn7Vp+6T4eP4aOJBTU6ZQrGFD2s2bR76gIOcGKcRtbi/UUlv2GBYWxm+//UapUqXo1KkTWmt+rFmTS8nLcRsNH06z0aONxO4OpFBzQxlpda6tVrZ98gkbhw8nX1AQ3bdsoWi9es4OUYg7pJWzm0eM4PjSpTz2v/9RpnlzV4cmRKpuz9mwdeuwJiVJt0fh9hISEmzuuHj56FH+eO45LuzcSf233+bhceOyRfMF4V5SCrXUlj1eunSJOXPmEBAQQNeuXbm0dy9zHnwQS0IC3jly0H3rVorWqWP2BzBMCjU3ZOvDg29ERrKsd29OrlhBlS5dePzbb2VJmTDifjl7YuVKto4ZQ81+/ajz8ssmQhMiVbfPqJ1YsQK/vHkpkXxfhBDuKj4+3qZC7eD8+az673/x8vHh6d9/p2L79i6IToh7pRRqdy97vH79OrNmzUIpRY8ePdg+ciQhEyYAULhWLXru3JktlzreTQo1N2TLjFrYhg0s7daN2EuXeGzqVGq/+KIsdRTGpJazMWfOsKxnTwJr1qTllCmmQhMiVbcuLijFiRUrKNOypcw2CLcXHx+Pv7//fV9Piotj3dtvs+frryneuDFPzZ1LvttanAvhalarFV9f31udRatVq0ZCQgJz5swhJiaGnt26sfDBB7l86BAAD44YwYMffmgyZLcihZobSut+H2218s/HH7Ppgw/IX6ECnZYupUg2nxYW5t2ds5bERP7o2pWkuDja//ILvrlymQxPiHvcytmoKKJPnqTRkCGGIxIifWktfYw6fJglXboQuWcPwQMH0mzsWOmuK4yzWCz4+fkRGhpKUFAQuXPnZv78+Zw9e5bHKlfm12rVsCYk4J0zJz3++YcitWqZDtmtSKHmhu43o3Y9IoJlPXtyavVqqnbrxuPffINf3rwmQhTiDnfn7MbhwwnftIknZ8+mYJUqJkMTIlW3CrUjRwAIat3aZDhC2OR+Sx8PzJnDqv798fbzo+Mff1DhyScNRCfEvbTWaK25ePEiDRo0YPny5Rw6dIhKhw6xOXnmrEjduvTYvl2WOqZCCjU3lFqhdnr9epZ270785cs8Pm0aD/z3v7LUUbiN23P25OrVbP/kE2q/+CLVunUzHJkQqbu19PHwYQpWqSKd8IRHuLtQS4yNZd2bb7L3228p2bQpT86ZQ0Dp0gYjFOJOFouF+Ph4AK5du0bI1q0U+OEHzp49C0DT0aNpMny4yRDdmu3939OhlGqjlDqklDqqlJI1JJlwe2MGq8XC5lGj+KVlS3IEBNDjn3+o9cILUqQ5iOStY6TkrCUmhhV9+lCwalUe/fRTw1FlTZKzjmGxWG5eqTx2jCDp9uhUkrOOEx8fj1/yvZSXDh5kVqNG7P32WxoOGUKXdeukSHMQyVnHsVqtJCQkEBgYyN+zZuH/8ccknD2LT65c9A0NlSItHQ6ZUVNKeQNfAa2AM8B2pdRirXWoI/af3aQMemMjIljWqxen16yhes+ePDZ1Kn53Pcld2E/y1nFScvbguHHciIjg6cWL5b40J5CcdRyr1Uo5gKQkgh5/3HQ4WZbkrGOl3KMWOnMmq196CZ9cuei0bBnl27Y1HVqWITnrWImJiXh7e3P1l1/w/+svAIoGB9N961ZZ6mgDR82oNQSOaq2Pa60TgLlABwftO9uxWCxUAn6qW5ezmzfT+vvvafvTT1KkOZ7krYNYLBbqAOeXL+fBESMoVr++6ZCyKslZB7FYLFQG8Pam1COPmA4nK5OcdSAdH0+pLVtY1qsXRevVo/fu3VKkOZ7krAPFx8ZSLywM3+Qirdm4cfSS+9Fs5qh71EoCYbf9+wzQKK03xMfHc/z4cQcdPuvQFgt5t2zhv4B33rw8On06/pUrc+LECdOhZUUZylvJ2fuLv3CBjkDu6tUJfPZZ+ZycR3LWQU6fPk1lQJUrx5kLF0yHk5XJ+MBBrhw6xEuJieQ9eJAqr7xCtTffJDI+nkj5rBwtwzkbGxvLvn37nBqUJzq7cSPtz54lSGu8cubkwV9+wT8oSD6rDHDYPWq2UEr1V0qFKKVCoqKiXHlojxAbEcHfvXpRaOdOdilF84ULCahc2XRY2ZrkbPq01Qrz5+MNBA0ejJeP9CgySXLWNvEXL1IC8Kpa1XQoAsnb9Jz69Vf+euYZ8gCXOnakxoABcq417PacvXz5sulw3M7ekSPZ8/LLlNSafwICaLl1K3mkaVOGOer/8nDg9jtYSyV/7w5a62nANIDg4GBdvnx5Bx3e851cvZq/evYk4do1zj38MIu2bWNOzZqmw8rq0s1bydn07fj8c9TRoywBuj30EPIZOZXkrIMcnDMHgJwPPCA561wyPsiEhOvXWfPqq+yfMYPiDz3EiI0beaNGDclZ57IrZ2vKmA2AhNhYplerRvSpU6AUP/n7U7ZJE2rXrm06NI/kqBm17UAlpVQ5pZQf0BVY7KB9Z2nWpCQ2Dh/OgtatyVW4MD23b+dqpUqydtc1JG8z6WJoKBsGD0ZXq8Y/3PvsP+FwkrMOcmnLFq4DvuXKmQ4lq5OctVPkvn3MbNCA/T/9RJMPPqD53LlEw30feC0cRnLWTqfXruXLgACiT51C+/qihg/nYs6c+Mjsr90cUqhprZOA14CVwAFgvtZ6vyP2nZXFhIczv0ULto4ZQ82+fem5bRuB1atjsVik/b4LSN5mjiUhgWU9e+KXNy9JTz8N3HykhHAeyVnH0Fpz+Z9/OAJ4ycUFp5KczTitNf/+8AOzGjYkLiqKZ1etounIkSQkJgLcas8vnENy1j4rX3iB+S1bopOSSCpdmsq//MJ1b2+8vLzkIm4mOKzE1VovA5Y5an9Z3YkVK1jWqxdJsbE88fPPVO/Z89ZrVqtVktpFJG/tt3nkSCJ27aLDwoXM33/zd5jkrfNJzmbepQMHSLh4kSNAK8lZp5OctV3CtWv8+fLLhM6cSZkWLXhy1iz8ixUDuPXQYCnUnE9y1nYJsbH8WKUKMWE3+6/EtW1Lzf79CQgIAEApJZMPmSCXv13MmpTEhqFD+bVtW/yLF6dnSMgdRRrcLNRkZkK4szN//822ceOo2a8flZ5++o6HtAvh7k6tWgXAYSRnhfuI3LuXmcHBHJg9mwdHjqTzqlW3ijSAuLg4QJY+CvdxYuVKvsybl5iwMHz8/Yl96y3KdutGu3btOHDgAGXLlkVrLRdxM0EWjbpQdFgYS7t1I3zTJmr170/zzz5L9aHAUqgJd3YjMpI/unYlX/nyNP/0U+D/H3gtJ2PhCU6uXk2OUqW4cuaMnGuFcVpr/v3uO9a+8QY58ufn2T//pEzz5vdsl1KoyYyacAfL+/Vj/48/AlC4SRPOtmtH4QIF6NKlC5cuXSIyMpK2bdtitVplRi0TpFBzkePLlrG8d2+S4uN5cvZsqnXrdt9tLRaLDHiFW9JWK8t69iT20iV6LF1KjuSlDRaLBZBCTbi/pPh4wtavp0CrVnDmjOSsMCohJoZVL77IwTlzKNuqFU/8/DP+RYumum3K0keZURMmJVy7xg9Vq3ItPByU4sFPPmGrlxc5gO7du5MjRw5CQ0MBqF69utzOk0lyKdHJLImJ/PXuu/z25JPkKVWKXjt2pFmkgcyoCfe1dexYTq5aRYvPP6dInTq3vi9LH4WnOLtlC0k3buBfty4gOSvMidi9m5/r1+fQvHk89NFHdF6x4r5FGsjSR2HesT/+4Mv8+bkWHo5fQAC9Dx1ij78/CQkJ9OjR49Z9aaGhoZQtW5Y8efLIjFomyW8oJ4o+fZp5jzzC9gkTqP3yy/TYupWCNjzAWgo14Y5Or1vH5g8/pFr37tTq3/+O12Tpo/AUp1avRnl7k/uBBwAp1ITraa3ZPXUqsxo3JvH6dbqsW0fjYcNQ6eSiFGrCpKW9erGwXTu0xUKZFi14NSqK5Zs3c/HiRbp06ULR5IsMkZGRREZGUr16dUBWiWWWLH10kmNLlrD8+eexJiXx1Lx5VO3Sxeb3WiwWGTwItxJz5gx/dOtGgUqVaPXNN/dcHUtZ+ih5K9zdqdWrKdG4MeTMCUjOCtvFxsYSHR2dqX0kxMSwdcAATi1eTInmzWn65Zf4BQZy4cKFdN8bEREByD1qwrVir15lerVqXD93DpTisalTqd2/P4sWLeLkyZM8/fTTdzyAfX9yF+iUQk1rLefZTJBCzcEsCQlsGDqUHZMnU7RePZ6aN48CFStmaB+S1MKdJN64waIOHW5e+V2zBr88ee7ZRmsNyIyacG+xly5xPiSEBz/8kChZrisyQGtNxYoVOXv2rN37KAn0BApw8wFd69etQ9esmeH95EnlHCyEMxxZvJjFHTuirVZy5M9Pn337yFuyJGvWrGHv3r00b96c2rVr3/Ge25c9gkw+ZJYUag509eRJljz3HOe3baPua6/xyMSJ+NixREGSWrgLrTUr+vXjwq5ddFy8mMAaNVLdTmbUhCc4vXYtaE3ZVq2IjIwEJGeFbRITEzl79izt27enTZs2GXqv1pqkv/4i4bffUHnykKNfP56tWJFn7YjDarVSokQJO94pRMYs6dqVQ/PmARDUujWdV6wAYMeOHWzcuJF69erRrFmzO96Tsuyxbdu2t74n7fkzRwo1BzmyaBEr+vZFW620X7CAys88Y/e+pEOOcBdbx4zh0Lx5NBs3jgpPPXXf7eQeNeEJTixfTo78+SnesCHWJUsAyVlhm5T7wx555BFefvllm98Xf/UqK/7zH478+ivlnniCtjNmkDsw0O44jh8/bvd7hbBF7NWr/FilCjcuXAClaP3DDzzQpw8Ahw8fZunSpVSqVIknn3zyntsg7l72CDcv5EozEftJoZZJloQE/nr3XXZ+/jlFg4NpN28e+W9bq2sPaSYi3MHh335j0/vvU71nTxq++26a20rXR+HutNXK8WXLCGrdGi8fH5kFFhliTyOP8yEhLOnShejTp3n4k09oMGBAug1DhDDp0K+/8keXLjeXOhYoQN/QUPIkP3T97NmzLFiwgGLFitG5c+dUz513L3sEmVHLLCnUMuHK8eMsee45LoSEUO/NN3l4/Hi7ljreTZY+CtPObNzI0u7dKd64MY9/+226V8NSrpjJVTPhri7s3MmNCxco/+STgCzXFRmT8gyznMlNaNKitWbnF1/w16BB+BcrRre//6ZEkybODlGITPm9c2eO/PorAOWeeIJnli699drly5eZPXs2/v7+dO/ePdWGNqktewSZUcssKdTsdPjXX1nRrx/Ky4sOCxdS6emnHbZvufogTIrct4+F7dqRLyiIjkuW4GPDwESW6wp3d3zpUlCKcsn3F8lyXZERKTNq6RVqcZcvs6JfP44uWkSFdu1oM306uQoWdEWIQtglNiqKH6pWJTYyEpSi7fTp1Ojd+9brN27cYNasWVgsFvr06XPfZjYpyx6rVat263taaxnTZpIUahmUFB/PXwMHsmvKFIo1bEi7efPIFxTk0GPI1QdhSnRYGL+2aYNPrlw8s2KFzfdSyHJd4e6OL11K8UaNyF24MPD/M2pyrhW2sKVQO7dtG0u6dOFaeDiPTppE/bfflvwSbu3AvHks694dbbWSs1Ah+h44gH/yORIgKSmJuXPncuXKFXr16kVgGmOClGWPefPmvfU9uS0i8+STy4DLR48y58EH2TVlCvXfeYduf//t8CINZHZCmHHt7Fnmt2hBQkwMzyxfnqHclkJNuLPrFy5wfvv2W8seQWbURMakdY+a1pqQyZOZ07QpAN02biT4nXekSBNubVHHjizt2hVttVKhfXteu3jxjiJNa83ChQsJCwujY8eOlC1b9r77uvsh1ylkiXnmyYyajQ7On8+q//4XLx8fnv79dyq2b++0Y8mgV7ja9fPnmd+iBdfPn+fZVasoctdzUdIjOSvc2YnlywHuKNRkACEy4n73qMVGRbGiTx+OLVlCxQ4daPPjj+QsUMBEiELY5HpkJNOrVSP20iXw8uKJn3+mevfu92y3atUqQkNDadWqFTXu82ieFKktewSZUXMEKdTSkRQXx7p33mHP1KkUb9yYp+bOJV8aVxUcQZqJCFe6HhHB/JYtiTlzhmdWrLDrpnfJWeHOji1Zgn/x4hSpU+fW96RQExmR2tLHs1u2sKRrV66fO0fzzz6j3htvyCyacGuhs2ezrFcvsFrJFRhIn9DQO2bRUvzzzz9s3bqVhg0b0sSGMUFqyx5BzrOOIJ9cGi4fOcLsJk3YM3UqDQYNouuGDU4v0kCWPgrXiT59mrnNmnH1xAk6/vEHpR56yK79SM4Kd5Vw7Ronli2jUqdOdwyiZemjyIjblz5qq5VtEyYw9+GH8fL2ptumTdR/800p0oRb+61dO5b16AFWK5U6deLVyMhUi7QDBw6wYsUKqlatSuvWrdPN6/stewQ5zzqCzKjdx4E5c1jVvz8+OXLQ8Y8/qHDbkhlnk2VkwhUuHTzIglatSIiJ4dnVqymZfH+FPSRnhbs6sXw5SXFxVHn22Tu+L1d6RUakLH30io1lYfv2HF+6lErPPEPr774jZ/78ZoMTIg3Xzp9neo0axEVFoby8eHLuXKredT5MERYWxm+//UapUqXo1KmTTefH0NBQ4N5ljyBNmxxBCrW7JMbGsu7NN9n77beUbNqUJ+fMIaB0aZfGYLVa8fX1dekxRfZy7p9/+O2pp1BeXjz3118ZviftblKoCXd1eMECchcpQsm7Zovl3gmREXFxcQQBW557jsSoKFpOmUKdV16RAahwa/9On87Kfv1Aa3IXLUrf0ND7Pi7i0qVLzJkzh4CAALp27WrzOHT//v2UKVPmnmWPIDNqjiCF2m0uHTzIki5duPjvvzQcMoSmo0bhbaBgslgsqXaWEsIRDs6dy/I+fchTogSdV66kQKVKmd6n3KMm3FHijRsc++MPavTujdddAwWZURO20lYrV377jZcAnxw56Lx5M8Xq1zcdlhBpWtC2LSdXrACgSpcutJs3777bXr9+nVmzZqGUokePHvj7+9t0jPs95DqFzKhlnhRqyUJnzmT1Sy/dfH7U8uW3HopqgtZaBg/C4bTVypbRo9k8YgSlmjWj/W+/2fyctHT3LQ+0FG7oxIoVJN24QeXOne95Ta70ClvciIxkWe/exK5Ywb/AuD//pFjFiqbDEuK+YsLDmV6zJvFXrqC8vGi/YAGVOna87/YJCQnMmTOHmJgYnn/+eQpm4AHtaS17BDnPOkK2rwYSb9xgxX/+w7JevSharx69d+82WqSBzE4Ix4u9dImF7duzecQIajz/PJ1Xr3ZYkQbykHbhng4vWECuwEBKP/LIPa/JlV6RnrANG/ipTh3C1q1DPfMMs4CAIkVMhyXEfe399lu+KV2a+CtX8C9enFeiotIs0qxWK7/99htnz57lmWeeoVSpUhk6Xmho6H2XPYKsXHCEbP3JXQwNZWbDhuz78UcaDxtGl7VryVuypOmwpIOecKjwzZv5qU4dTq1eTcspU2jz44/4OHhprcVikZwVbiU+OpqjixZR+Zln8PK5d/FIygBC8lbcTVutbB0zhvnNm+Pr70+PrVuJr1cPuPc5akK4i19atWJV//6gNVW7dePls2fJlS/ffbfXWrNixQoOHTpEmzZtqFq1aoaOFxkZSURERKrdHlNIoZZ52Xbp474ZM/jzlVfw9fen88qVBLVqZTqkW6Qxg3CEpLg4towaxbZPPiGgbFm6b9lC0eTBhqPJcl3hbg7Nn09SbCw1+/ZN9XVZkiNScz0igmU9e3Jq9WqqduvG4998g1/evMT99huANPoSbicmPJwfa9Qg4epVlLc3Ty9aRIWnnkr3fZs3b2b79u08+OCDNGzYMMPHTVn2mFahJk2bMi/bFWoJ16+z5tVX2T9jBqUffZQnZ88mT/HipsO6gyx9FJl17p9/WN63L1EHDlCzb1+af/opOdK4spZZkrPC3ez78UcKVq1KsfsMQGTpo7jb6fXrWdq9O/GXL/P4tGk88N//3sqP+Ph4cubMKfki3Mrub77hz5dfBq3JU7Ik/Q4exC9PnnTft2/fPv78809q1qzJY489Ztex01v2CDKj5gjZ6pO7uH8/Mxs0YP9PP9Hkww959s8/3a5IA2nMIOx37exZlvfty6wmTUiMieGZ5ctp88MPTi3SQJbrCvcSdegQZzdvpmbfvvcdWMuMmkhhtVjYPGoUv7RsiV/evPT45x9qvfDCHbkTFxcnyx6FW5nXogV/vvQSaE313r156cwZm4q0kydPsmjRIsqWLUuHDh3suvhgy7JHkBk1R8gWM2paa/b9+CNrXnsNv4AAnl29mrItW5oO676kMYPIqLgrV9j15ZdsGz8eS0ICDQYOpPHw4eQICHDJ8WW5rnAnu6ZMwcvXlxq9e993G7nSKwCunz/P0p49Ob1mDdV79uSxqVNTHexKoSbcxdXTp5nxwAMkREejvL3puHgx5Z94wqb3RkZGMm/ePAoUKMBzzz2HTyr379rClmWPIOdZR8j0J6eUelYptV8pZVVKBTsiKEdKuHaN5b17s/I//6HEgw/y/O7dbl2kgcxOuIK7562tbkRG8vewYUwrW5ZNH3xA0OOP0+/AAR755BOXFWkghZorZJWcdbb4q1fZN306Vbt2xb9YsftuZ7VaUUrJRTEncvecPbVmDTPq1OHs5s20/v572v70031nJOLj4+X5ptmAu+fszilT+DYoiIToaPKWLs3rV67YXKTFxMQwa9YsfHx86NGjB7ly5bI7DluWPYKsXHAER8yo7QM6Ad84YF8OFbl3L0u6dOHykSM0HTWKRu+9d89DT92RDHpdwm3zNj3aauXUmjX8+913HFm4EGtSEpU7d6bR0KEUrVvXSEySsy7hsTnrSv/+8AOJ165R780309xO7qt0CbfMWavFwpZRo9gyejQFq1Th2T//pHDNmmm+R2bUsg23zFmAOQ8/TPjffwNQs18/2nz/vc3vjY+PZ/bs2dy4cYO+ffuSP39+u+O4ePEiERERtLHhUVZyL3DmZbpQ01ofgIz/R9Bak5SUlNnD33ff+77/nvVvv02O/PnptGoVpR95BKvWWJ10TEeSAYTz2ZO3zszZ9MRfvUrY+vWcWLqUE8uXc+P8eXIWLEjtl1/mgRdeoGByW11T8SUlJUnOOpmn5awJSXFxhEyaRMlmzQisXTvNnz0pKUmu8jqZO44Prp87x/JevTjz119U69WLFl9+ia+/f7rHi42NlUItG8hMzsbFxTklpqsnTzKnQQOSrl0Db2/aLVpE2cces/l4CQkJ/Pbbb4SHh9O1a1cKFSpEQkKC3fGEhIRgsVioXLlyuv/fpBxHzrX2M3aPWvjOnQx1UptbbyAQOAzMPn+eV+3saGOStAB2P3t27qSIry8KSDmF334qv/t7Gf0z5e8+QAEgP1AEKAUUTX49FjgE/Avsj4rC8sUX8MUX9v5IDlXX0GyeuL+dO3dmq3NJU+BpYGJ4ON1t+Ln9/f2dHZKwgzPHBwHcHCMsBEJ+/hl+/tnm9zZp0sQpMQnPF75zJ+9nYilhWnID/sCfwJ8WC4PatbN7X6NHj3ZUWBnaV3b6PeRoNhVqSqk/gdQW+w/TWv9u68GUUv2B/gBFfX0JrFzZ1rdmWGKZMpRp3JghHniVPyoqirZt25oOw+M5Im9vz9lSQNqLqRxP582LtWRJEkuWRJcpgypThqo+PlQFnnVxLGmJioqicePGpsPweI7O2Xz58jFw4EAHRujGEhLI8emnWAoXpk+/fja9pVChQk4OKuvzuPGBry9JzZrRoUgROmTwrc2bN3dKSMK1nJKzAE4qRq4DJ4KCyPvAA3S0cx+5cuVy2Iywt7c3RYsWxc/Pz6btr169SoMGDRxy7OzIpkJNa+2QKSmt9TRgGkBwcLAeHBLiiN1mOcePHzcdQpbgiLy9PWdrBAXpNiNGQEoDgrv/hDu+l97rd/+Z8rq3nx95S5cmoEwZfHPnzuyP4BKSs47h6JytXa2a7v/UU/jkynXPlyfcr5sRfw0ezPZr1+i5ciUlH3zQpvdI3maejA+Ep3FWzk7Iwjmrtbb7PrN9+/Y5OJrsJVu05xfCEXIFBlKzTx/TYQhhs6iDB/npPktSvXx98cmVC9/cufHLm5ccBQqQM/kr5e+5ChUiT4kS5ClZ8tafPm54n07kv/+yY/JkavbrZ3ORJoQQIn1RUVF8/fXX5M+fn0qVKlG+fHlKly5t84yayJxMF2pKqY7Al0BhYKlSarfWunWmIxPCiSRvhaexJ2fzV6pEh/HjSYqNJSk2lsTkP+/4unGD+Oho4i9fJi4qiivHjt38++XL6OTWyrfLWbAgAWXLUqByZQpWqULBKlUokPynLQ9bdbTE2FiW9epFjnz5eOSTT1x+fHF/cp4VnkZy9l43btxAKUVkZCQXL15kx44dJCYmUqhQISncXMARXR8XcvO+XCE8huSt8DT25GyOgAAqdbTvrgatNfFXrnDt7FmuhYcTEx7OtfBwrp09y9UTJzi/fTuHf/nl/4s5pShYpQpF69enaP36FAsOpmhwML5OusE+JcY1r75K5J49dFq6lFxyz5lbkfOs8DTumLMXL17kzz//RGtt5PhRUVG3ujtqrYmPjwe4b+H24IMPSqMmB5Klj0IIIe6hlLq1FDKwRo1Ut0mKi+PKsWNEHTrExX37uLBjB2Hr13Ng1iwAvP38KN6oEaUefZTSjzxCiSZNHHbfpdaav4cOZd+PP9J4+HCbH/oqhBCe5Pz58xw6dMh0GKlKKdx8fX2JjIwkMTGROnXqSKHmQFKoCSGEsItPzpwE1qhBYI0aVO7U6db3r1+4wPnt2zmzYQNh69fzz5gxbB09Gi9fX0o0bkyZli0p06IFxRs1wtuO5TJJcXGse+st9nzzDbVfeommo0Y58scSQgi3UbNmTWqm80B2Zzpz5gwzZ868NZMG4Ofnh8ViIW/evFSoUIEKFSpQtmxZcntIAzRPIoWaEEIIh/IvWpQKTz1FhaeeAiA+OprwTZsIW7+e02vWsHnkSDaPGIGvvz8lmzWjbMuWlGnZkiK1a6PSeKSK1prjy5axYfBgLu3fT8PBg2k2dqzd3ciEEEKkzcvLi8TERLy9vaUwM0AKNSGEEE6VIyCA8m3bUj75+ZCxUVE3i7a1azm9Zg1/DRoEQK5ChShSrx6BDzxAQJky5CpUCC9fX+Kiori4bx/Hly0j+uRJ8leoQKelS2W5oxBCOFnx4sV5/vnnCQwMlMLMACnUhBBCuFSuggWp3KnTreWSMeHhnF67lrB164jcs4c9//sfSXFxd7zHL29eSjZrRtNRo6jatSveTnq4rBBCiP+nlKJMmTKmw8i2pFATQghhVN6SJanRqxc1evUCQFutxF2+TOylS1gTE8mRPz95SpSQJY5CCCGyFWWq3adSKhI45cRDBAIXnbh/Z5P401ZWa13Yifu/h+Rsujw9fshieSs5axNP/xmyVM6C5K0NJP60Sc66H4k/bffNWWOFmrMppUK01sGm47CXxJ/9ePpn5unxQ9b4GVwpK3xenv4zeHr8Jnj6ZybxZz+e/plJ/Pa7f3stIYQQQgghhBBGSKEmhBBCCCGEEG4mKxdq00wHkEkSf/bj6Z+Zp8cPWeNncKWs8Hl5+s/g6fGb4OmfmcSf/Xj6Zybx2ynL3qMmhBBCCCGEEJ4qK8+oCSGEEEIIIYRHytKFmlLqWaXUfqWUVSnlMd1mlFJtlFKHlFJHlVJDTMeTEUqpH5RSEUqpfaZj8USSs64nOZs5krNmSN7aT3LWDMnZzPHEvJWczbwsXagB+4BOwAbTgdhKKeUNfAW0BaoD3ZRS1c1GlSHTgTamg/BgkrOuNx3J2cyQnDVjOpK39pKcNWM6krOZ4VF5KznrGFm6UNNaH9BaHzIdRwY1BI5qrY9rrROAuUAHwzHZTGu9AYgyHYenkpx1PcnZzJGcNUPy1n6Ss2ZIzmaOB+at5KwDZOlCzUOVBMJu+/eZ5O8J4a4kZ4WnkZwVnkZyVngayVkH8DEdQGYppf4EiqXy0jCt9e+ujkeI9EjOCk8jOSs8jeSs8ESSt+JuHl+oaa0fMx2Dg4UDpW/7d6nk74ksQnJWeBrJWeFpJGeFJ8pieSs56wCy9NH9bAcqKaXKKaX8gK7AYsMxCZEWyVnhaSRnhaeRnBWeRnLWAbJ0oaaU6qiUOgM0AZYqpVaajik9Wusk4DVgJXAAmK+13m82KtsppeYAW4AqSqkzSqn/mI7Jk0jOup7kbOZIzpoheWs/yVkzJGczx9PyVnLWQTForV19TCGEEEIIIYQQacjSM2pCCCGEEEII4YmkUBNCCCGEEEIIN2Os62NgYKAOCgoydXi3Fh8fD0COHDkMR+K+duzYcVFrXTgz+1BKlQZ+AooCGpimtf78fttLzt6f5KxtHJG3GSE5mzbJ2/S5OmdB8jYtkrPpk5x1L7GxsQDkypXLcCTuK62cTbdQU0r9ADwFRGita6byugI+B54AbgB9tNY709tvUFAQISEh6W2WLR0/fhyA8uXLG47EfSmlTjlgN0nAAK31TqVUXmCHUmq11jo0tY0lZ+9PctY2Dspbm0nOpk3yNn1p5ayMD1xPcjZ9krPuZd++fQDUrHnPxy2SpZWztix9nA60SeP1tkCl5K/+wNSMBCeEKVrrcyknYK11DDe7EpU0G5UQQniM6cj4QHiW6UjOCg+S7oya1nqDUioojU06AD/pm+0jtyql8iulimutzzkqSHtcvXqVS5cumQzBbqdPnzYdQqZ4e3tTpkwZbl6Y8gzJOV4X+MdwKB7JYrFw4cIFucorPIbFYiEsLIwiRYqYDsVjeeL4QGvN6dOnsVgspkLIFE8fHwQGBhIQEGDs+J6YswArV6702DFtWFgYAHv37jUciX3Kly9P48aNjR3fEfeolQTCbvv3meTvGUtqi8VCuXLluHz5sqkQsr1vvvmG/v37mw7DJkqpPMCvwFta6+i7XuvPzatqlClTxkB07k9rzX/+8x/+/vtvhg4dytixY02HJESawsLCaNeuHXv27KFEiRIsWbKEevXqmQ4rK3K78cH//vc/XnvtNVOHz/YKFy5MRESE6TDS4nY5+/rrrzNlyhRThxfAihUraN26tZFju7SZiKsGvYmJiVy+fJnOnTvTrl07px3HWVJOYp54pddisdCvXz8uXLhgOhSbKKV8uVmkzdJa/3b361rracA0gODgYHnoYCoWLVrE33//DcDEiRPp27cvlSpVMhyVEKlLSkqiU6dOHD9+nIEDBzJr1izat2/Pv//+S4ECBUyHl225anxw/vx5AGbMmOG0YziTJ48PFi5cyKJFi9Bae9SKm/txVc4ePXoUgEqVKlGiRAmnHcdZbty4AUDu3LkNR5Jx+/bt49KlS+zfv9+jC7VwoPRt/y6V/L17uGrQa7VaST4GvXv3dtZhnMaTbxa2Wq3069fv1n8Dd5Z80/D3wAGt9WTT8XiihIQE3n33XSpVqsT06dNp3bo1gwcP5rff7ql5hXALP/74IyEhIcydO5cGDRrQtGlTOnXqxNixY5kwYYLp8LIatxwf+Pj4eOTYADx7fHDq1CkWLVqE1WrF29vbdDj345Y5CzBo0CBeeOEFZx3GaTy5mUjHjh1ZtGgRSUlJxmJwxHPUFgO91U2Ngaum1/KmJLWXlzwmztVSrpJ5QqEGNAV6AS2UUruTv54wHZQnmTp1KkePHmXIkCEUK1aMIUOGsHDhQjZs2GA6NCHuYbFY+Pjjj2nUqBFdunQBoFatWvTs2ZP//e9/XLlyxWyAWY9bjg9kbGBGyufu5uMDt8vZlPspfX19TYaRLaVcUDB5T2u6Zyul1BxgC1BFKXVGKfUfpdRLSqmXkjdZBhwHjgLfAq84LVobSaFmjlIKpZS7n4gB0Fpv1ForrXUtrXWd5K9lpuPyFFFRUYwcOZJWrVrxyCOPAPD2229TqlQp3nnnHY/IAZG9rFy5khMnTvDOO+/csfTq7bff5saNG0yfPt1ccB7IU8cHMjYwwx0KNU/NWQAfH2OPPs62Ugo1kzNqtnR97JbO6xp41WEROYAUamZ5eXnJID0b+Oijj7hy5QoTJ068NejNnTs3H3/8Mb169WL27Nn07NnTcJRC/L+ffvqJwMBAOnbseMf369atS4MGDfj555956623zATngTx1fCBjAzPcoVDzxJyVGTVz3KFQy5Jnq5STgBuvgc7SvL29pVDL4o4ePcqUKVPo168ftWrVuuO17t27ExwczNChQ2/dRCyEadevX2fJkiV07tw51QFPt27d2LlzJ4cPHzYQnXAVN78/KktL+dxlfJAxKZ+Xn5+f4Uiyn5TfFVKoOVjK1Qe5amaGl5eXxz6jRthmyJAh+Pn5MXr06Hte8/LyYvLkyZw5c4ZPP/3UQHRC3GvNmjXcuHGDzp07p/p6yvcXL17syrCEi1ksFhkbGJLyucv4IGNkRs2clIsLiYmJxmLIkmcrWfpolix9zNo2btzIr7/+yuDBgylevHiq2zRr1oxOnTrx8ccf32qHLYRJK1euxN/fn4ceeijV10uXLk316tVZuXKliyMTriRLH81xh6WPnkgKNXNk6aOTSKFmlhRqWZfVamXAgAGULFmSAQMGpLnt+PHjSUhI4P3333dRdELc38qVK2nevDk5cuS47zZt2rRhw4YNXL9+3YWRCVeSQs0cKdTsk/J5SaHmeh7R9dETSaFmlhRqWdfcuXPZtm0bY8aMSffhlRUrVuS1117jhx9+YO/evS6KUIh7HT16lGPHjtGmTZs0t2vdujUJCQn89ddfLopMuJoUauZIoWYf6fpoTspnLoWag0kzEbOkUMuaYmNjGTp0KHXr1qVXr142vWf48OHky5ePgQMHcrOZlhCul7KcsXXr1mlu16xZM3LmzCnLH7MwKdTMkULNPtJMxBxZ+ugkMqNmlnR9zJo+//xzTp8+zaRJk2z+f6tgwYJ8+OGHrF69mhUrVjg5QiFSt2bNGoKCgqhYsWKa2+XKlYuHH36YNWvWuCgy4WrS9dEc6fpon5TPK2fOnIYjyX5SlpvKjJqDSddHs6TrY9YTERHB2LFjad++Pc2bN8/Qe19++WUqVqzIgAEDjF6VykqUUt5KqV1KqT9Mx+LutNZs2rSJhx9+2KbtmzVrxv79+7l8+bKTIxMmSNdHc6Tro31k6aM50vXRSWRGzSxZ+pj1jBgxgtjYWD755JMMv9fPz48JEyZw4MABvvvuOydEly29CRwwHYQnOHr0KBERETRt2tSm7VO227JlizPDEobI0kdzZOmjfVIKW1n66Hpyj5qTSKFmlhRqWUtoaCjTpk3jpZdeokqVKnbto0OHDjz88MN88MEHREdHOzjC7EUpVQp4EpCq1wabNm0CsLlQa9iwId7e3mzcuNGZYQlDpFAzRwo1+6Tc3y1dH11PCjUnkULNLCnUspZBgwaRJ08ePvzwQ7v3oZRi8uTJREZG8vHHHzswumzpM+BdINX/yZRS/ZVSIUqpkMjISJcG5o42bdpEgQIFqFatmk3b+/v7U69evVsFnshapFAzRwo1+0gzEXOkUHMS6fpolhRqWceff/7JsmXLGDZsGIGBgZnaV/369enduzeffvopJ0+edEyA2YxS6ikgQmu9437baK2naa2DtdbBhQsXdmF07mnjxo08+OCDGRqcN23alG3btpGQkODEyIQJUqiZI4WafaRQM0cKNSeRGTWzpOtj1mCxWBgwYADlypXj9ddfd8g+x4wZg5eXF0OHDnXI/rKhpkB7pdRJYC7QQik102xI7isqKoqDBw/avOwxRdOmTYmLi2PXrl1OikyYIl0fzZGuj/ZJWfqYK1cuw5FkPynFsRRqDiZdH82Sro9Zw4wZM9i7dy/jxo1zWFvgUqVKMXDgQObOncvWrVsdss/sRGs9VGtdSmsdBHQF1mqtexoOy23t2HFz4rFhw4YZel+jRo0ACAkJcXhMwizp+miOdH20jzQTMUeeo+YkMqNmlix99HzXrl1j+PDhNGnShGeffdah+3733XcpVqwY77zzjjwEWzhVSqFVr169DL2vVKlSFClShO3btzsjLGGQLH00R5Y+2ifl96TMBLteytJHkzmbJc9WUqiZJYWa55swYQLnzp1j0qRJKKUcuu88efLw0UcfsWXLFhYsWODQfWcnWuv1WuunTMfhzkJCQqhYsSIFChTI0PuUUjRo0EBm1LIgKdTMkULNPtJ3wRx54LWTSFKbJYWaZwsPD2fChAl06dKFJk2aOOUYffr0oVatWgwePJj4+HinHEOIkJAQgoOD7XpvcHAwBw4c4Nq1aw6OSpgkhZo5UqjZR1aemCPNRJxEZtTMkmYinm348OFYLBbGjRvntGN4e3szadIkTpw4wZdffum044jsKyIigtOnT9tdqDVo0ACr1SoNRbIYaSZijjQTsY8UaubkyJEDkKWPDifNRMySZiKea/fu3cyYMYM333yTcuXKOfVYjz32GE888QQfffQRFy9edOqxRPaT0kgkMzNqgNynlsVIMxFzpJmIfaSwNSdl6aM0E3EwmVEzS5Y+eiatNQMGDKBgwYK89957LjnmhAkTuHbtGiNHjnTJ8UT2ERISglKKunXr2vX+okWLUrp0ablPLYuRpY/myNJH+8jnZY40E3ESKdTMkkLNMy1dupS1a9cyYsQI8ufP75JjVq9enRdffJGpU6dy8OBBlxxTZA8hISFUqVKFgIAAu/fRoEEDmVHLYqRQM0cKNfvI0kdzUmbUpFBzMGkmYpYUap4nMTGRgQMHUqVKFV588UWXHnvEiBH4+/szaNAglx5XZG2ZaSSSIjg4mKNHj3L58mUHRSVMk0LNHCnU7COFmjkyo+YkMqNmlhRqnmfatGkcOnSITz755NYVJFcpXLgww4YN448//mDNmjUuPbbIms6dO8fZs2czXag1aNAA+P/73YTnk0LNHCnU7COFmjnSnt9JpFAzS7o+eparV68yYsQImjdvTrt27YzE8MYbb1C2bFkGDBggN5qLTMtsI5EU9evXB6ShSFYiXR/Nka6P9pFCzRzp+ugk0vXRLOn66FnGjh3LpUuXnPJwa1vlzJmT8ePHs2fPHn766ScjMYisIyQkBC8vL+rUqZOp/RQoUIAKFSrIjFoWIl0fzZGuj/aRwtYcPz8/QGbUHE5m1MySpY+e4+TJk3z22Wf07t3b7u54jtKlSxcaN27MsGHD5CHDIlNCQkKoXr06/v7+md5XcHCwFGpZiCx9NEeWPtpHZtTMkWYiTiKFmllSqHmOoUOH4u3tzZgxY0yHglKKyZMnc+7cOSZOnGg6HOGhtNYOaSSSon79+pw8eVKe9ZdFSKFmjhRq9pFCzRwp1JxEuj6aJYWaZ9i6dStz585l4MCBlCxZ0nQ4ADRp0oQuXbrwySefEB4ebjoc4YHCw8O5cOHCrfvLMiul4JNZtaxBCjVzpFCzjxRq5qQUaib/G2TJs5XMqJklhZr701rzzjvvUKxYMd59913T4dxh3LhxWCwWhg8fbjoU4YEc1UgkRb169e7Yr/BsUqiZI4Wa8DRyj5qTSKFmlnR9dH8LFixgy5YtfPTRR+TJk8d0OHcoV64cb775JjNmzGDXrl2mwxEeJiQkBG9vb2rXru2Q/eXLl49KlSoREhLikP0Js6TroznS9dE+MqNmTs6cOQGZUXM46fpolnR9dG/x8fEMHjyYWrVq0adPH9PhpOq9996jUKFCDBgwQH5JiQzZsWMH1atXJ1euXA7bZ3BwsBRqWYR0fTRHuj7aRwpbc2RGzUlkRs0sWfro3qZMmcKJEyeYOHGi215Zzp8/PyNHjmTdunUsWbLEdDjCQ2it2bFjh8OWPaYIDg4mLCyMiIgIh+5XuJ4sfTRHlj7aRy5WmpNSqMmMmoNJMxGzpFBzXxcvXmT06NG0bduWVq1amQ4nTf3796dq1aoMGjSIxMRE0+EID3DmzBkiIiIc1kgkRcr+5D41zyeFmjlSqAlPk1KoSddHB5MZNbM8pVBTSv2glIpQSu0zHYurjBo1ipiYGCZMmGA6lHT5+PgwceJEDh8+zNdff206HOEBUgopRxdqdevWRSklyx+zACnUzJFCzT4yo2aOx8yoKaXaKKUOKaWOKqWGpPJ6H6VUpFJqd/LXfx0fqu2kUDPLg5qJTAfamA7CVQ4fPszUqVPp378/NWrUMB2OTZ544glatmzJiBEjuHz5sulwhJvbsWOHQxuJpAgICKBKlSoyo3YXTxsbgDQTMcldmol4Wt5qrVFKmQwh23KHnE23klFKeQNfAW2B6kA3pVT1VDadp7Wuk/z1nYPjzBBpJmKWpzQT0VpvAKJMx+Eq7777Lrly5WLkyJGmQ7GZUopJkyZx+fJlt3got3BvISEh1KhRw6GNRFLUr19fZtRu44ljA5BmIia5QzMRT8xbmVEzz60LNaAhcFRrfVxrnQDMBTo4N6zMkRk1szxl6WN28tdff/H7778zdOhQihQpYjqcDKlduzZ9+/bliy++4NixY6bDEW4qpZGIo5c9pggODiY8PJzz5887Zf8eyOPGBiBLH01yk6WPHpm3wix3X/pYEgi77d9nkr93t2eUUnuVUguUUqVT25FSqr9SKkQpFRIZGWlHuLaRZiJmZaVCzVU560xWq5V33nmHMmXK8NZbb5kOxy6jR4/Gz8+PIUPuWaUiBHCzkUhkZKRTCzWQhiK3cdjYAFw7PpBCzQw3KdQ8bkwrM2rmufuMmi2WAEFa61rAamBGahtpradprYO11sGFCxd20KHvJTNqZmWlQs1VOetMM2fOZOfOnYwdO9YpS8JcoUSJErz77rssWLCAjRs3mg5HuKGUZYmObs2fok6dOtJQJONsGhuAa8cHMjYww00KNVu41ZgWkHvUDHP3GbVw4ParCaWSv3eL1vqS1jo++Z/fAc65pGkjKdTMykqFmqe7ceMG7733Hg0aNKBbt26mw8mUAQMGULJkSQYMGJBt80spVVoptU4pFaqU2q+UetN0TO4ipZFIrVq1nLL/PHnyUK1aNSnU/p/HjQ1ACjWT3KRQ88i8FWa5e6G2HaiklCqnlPIDugKLb99AKVX8tn+2Bw44LsSMk0LNLE/p+qiUmgNsAaoopc4opf5jOiZHmzx5MuHh4UyePNnj/3/w9/dnzJgxbNu2jblz55oOx5QkYIDWujrQGHj1PjfCZzs7duxwWiORFMHBwbL08f953NgApOujSe7QQQ8PzFtZ+mieWxdqWusk4DVgJTeTdb7Wer9SapRSqn3yZm8kX93dA7wB9HFWwLaQro9meVDXx25a6+Jaa1+tdSmt9femY3Kk8+fPM27cODp16sRDDz1kOhyH6NWrF/Xq1WPIkCHExsaaDsfltNbntNY7k/8ew81zcmr3V2QrWmu2b9/utGWPKerXr8+5c+c4e/asU4/jCTxxbADS9dEkd+j66Il5K4WaeSYvLvjYspHWehmw7K7vfXDb34cCQx0bmv1kRs0sWfroHj744AMSEhIYP3686VAcxsvLi0mTJtG8eXM+++wzhg51m9OOyymlgoC6wD93fb8/0B+gTJkyrg/MgCNHjnDp0iWaNGni1OOkFIIhISG0b98+na2zPk8bG4AsfTTJTZY+emTeyj1qZrn1jJonkq6PZkmhZt6///7L999/z6uvvkrFihVNh+NQjz76KB06dGDs2LFcuHDBdDhGKKXyAL8Cb2mto29/LSs0wMmoLVu2ADi9UKtTpw5eXl6y/NGDSaFmjrsUakJklBRqDiYzamZJoWbewIEDyZcvH++//77pUJzik08+IS4ujg8//NB0KC6nlPLlZpE2S2v9m+l43MGWLVvIly8f1apVc+pxcufOTfXq1dm+fbtTjyOcRwo1c6RQs4/WWmbUDJNCzcGkUDNLCjWzVqxYwapVq/jggw8oWLCg6XCconLlyrzyyit8++237Nu3z3Q4LqNu/rb+HjigtZ5sOh53sWXLFho1auSSc36TJk3YsmWLnOM8lBRq5kihJjyVFGoOJoWaWZ7S9TErSkpKYuDAgVSsWJFXXnnFdDhO9cEHHxAQEMCgQYNMh+JKTYFeQAul1O7krydMB2VSTEwM+/btc/qyxxQPPfQQV65cITQ01CXHE44lXR/NcZOujx5HmomYJ4Wag0nXR7M8petjVvTDDz+wf/9+xo8fj5+fn+lwnKpQoUK8//77rFixgpUrV5oOxyW01hu11kprXUtrXSf5a1n678y6tm3bhtVqdWmhBsiD1z2UdH00xx26PnoqWfpolhRqDibNRMySpY9mxMTE8P7779OsWTM6duxoOhyXePXVV6lQoQIDBgwgKSnJdDjCgJRGIo0aNXLJ8cqVK0exYsWkUPNQsvTRHFn6aD8p1MySQs3BZOmjWVKomTFu3DgiIiKYNGlStjmp58iRg/Hjx7N//35++OEH0+EIA7Zs2UL16tXJnz+/S46nlOKhhx6SQs1DSaFmjhRqwlNJoeZgUqiZJYWa64WFhTF58mR69OhBgwYNTIfjUikP9H7//feJjo5O/w0iy9Bas3XrVpcte0zx0EMPcerUKc6cOePS44rMk0LNHCnU7CP3qJknhZqDpZwEssusgruRQs313nvvPQDGjh1rOBLXU0oxefJkIiIistTDvUX6QkNDiYqKomnTpi49bsrxNm3a5NLjisyTQs0cKdTsJ+NZs6RQczA5EZslXR9dKyQkhJkzZ/L2229TpkwZ0+EY0aBBA3r06MHkyZM5ffq06XCEi6xbtw6A5s2bu/S4derUwd/fX5Y/eiDp+miOdH20nxRqZkmh5mAWi0VOxAZJ10fX0VozYMAAihQpwpAhQ0yHY1TKbGLK7KLI+tatW0dQUBBBQUEuPa6Pjw+NGzfm77//dulxReZordFay4VcQ6Tro/BESikp1BxNZtTMkqWPrrNo0SI2bNjAyJEjCQgIMB2OUWXKlOGdd95h1qxZbNu2zXQ4wsmsVivr1693+Wxaiocffpi9e/dy6dIlI8cXGSf3r5uVMisk44OM0VrLjFo2liXPVlKomSWFmmskJCTw7rvvUr16df773/+aDsctDBkyhCJFijBgwAC5ATuL+/fff4mKijJWqD3++ONorVmzZo2R44uMk0LNPBkf2EcKNbNkRs3BpFAzK+Wzl4Gyc02dOpWjR48yceJEfHx8TIfjFvLmzcvo0aPZuHEjv/32m+lwhBOtXbsWcP39aSmCg4PJly8fq1atMnJ8kXFSqJknhZoQGZMlz1ZSqJklNww73+XLlxk1ahStWrWiTZs2psNxK/369aNmzZoMHjyY+Ph40+EIJ1mxYgVVq1alVKlSRo7v4+NDixYtWL16tVyU8hApv5PkHnZzpNmYfWRMa47co+YEFotFktoguWHY+T766CMuX77MxIkTZUnEXXx8fJg4cSLHjh3jq6++Mh2OcILr16+zfv16nnzySaNxPP7445w+fZojR44YjUPYJuV3kowPzJFmY/aR3/PZV5Y8W0n7XbPkWSnOdezYMb788kv69etHrVq1TIfjllq3bk2bNm0YPXq0NHvIgtasWUNCQgJPPPGE0Tgef/xxAJYvX240DmEbWfponix9tI8UambJjJqDydJHs6RQc67Bgwfj5+fH6NGjTYfi1iZOnEh0dDSjRo0yHYpwsGXLlpE3b14eeugho3GUL1+eGjVqsGjRIqNxCNtIoWaeFGrCE0mh5mBSqJklhZrzbNy4kV9//ZXBgwdTvHhx0+G4tRo1avDCCy/wv//9j8OHD5sORziI1pqlS5fSqlUr/Pz8TIfD008/zYYNG2Tm1gNIoWaeFGr2kRk1c0x/9lnybCWFmllSqDmH1WplwIABlCxZkgEDBpgOxyOMHDmSnDlz8u6775oORTjI1q1bOXPmDB06dDAdCgAdO3bEarXyxx9/mA5FpEMKNfOkULOP6WJBmJMlz1ZSqJklXR+dY968eWzbto0xY8aQO3du0+F4hKJFi/Lee+/x+++/s379etPhCAeYN28efn5+blOo1atXj1KlSrFw4ULToYh0SNdH86Tro31kTGuOdH10AovFIidig6Tro+PFxsYyZMgQ6tatS69evUyH41HeeustypQpwzvvvCMDBA9ntVr55ZdfaNu2Lfny5TMdDnDzl3jnzp1Zvnw5UVFRpsMRaZCuj+ZJ10f7yIxa9pUlz1Yyo2aWLH10vM8//5zTp08zadIkye0MypUrFx9//DG7du3i559/Nh2OyISNGzdy9uxZnnvuOdOh3KF3794kJCQwd+5c06GINMjSR/Nk6aN9pFAzS2bUHEwKNbOkUHOsiIgIxo4dS7t27WjevLnpcDxS165dadiwIcOGDeP69eumwxF2+v7778mTJw/t2rUzHcod6tSpwwMPPMCMGTNMhyLSIIWaeVKo2UcKNXNMf/ZZ8mwlhZpZUqg51ogRI4iNjWXChAmmQ/FYXl5eTJ48mfDwcCZNmmQ6HGGHS5cuMW/ePHr16kWePHlMh3MHpRR9+vRh27ZthIaGmg5H3IcUauZJoWYf08WCMCdLnq2kUDNLCjXHOXDgANOmTeOll16iSpUqpsPxaE2bNqVz586MHz+es2fPmg5HZNCMGTOIj4/n5ZdfNh1Kqnr16kWOHDn44osvTIci7kMKNfOkULOPFGrmmP7ss+TZSgo1s6Tro+MMGjSIPHny8OGHH5oOJUsYN24ciYmJvP/++6ZDERmQlJTEV199RdOmTXnggQdMh5OqwoUL07t3b2bMmEFkZKTpcEQqpOujedL10T4ypjVL7lFzMOn6aJZ0fXSMP//8k6VLlzJs2DACAwNNh5MlVKhQgTfeeIMff/yR3bt3mw5H2GjmzJkcP37c7Z+H98477xAXF8dXX31lOhSRCun6aJ50fbSP5Kw5MqPmBDKjZpYsfcw8i8XCgAEDCAoK4vXXXzcdTpYybNgwChQowIABA4xeJbOXUqqNUuqQUuqoUmqI6XicLSkpiY8++oi6deu6XRORu1WtWpUOHTrw6aefEhERYToccRdZ+mieLH20j+liQZiTJc9WUqiZJYVa5s2YMYO9e/cyfvx4cubMaTqcLKVAgQKMGDGCtWvXsnTpUtPhZIhSyhv4CmgLVAe6KaWqm43KuaZOncqxY8cYMWKERwxWxo0bx/Xr1xkxYoTpUMRdpFAzTwo1+3jCuS+rMv3Z+xg9upNIoWaWFGqZc+3aNYYPH06TJk149tlnTYeTJb300ktMmTKFQYMG0bp1a3x9fU2HZKuGwFGt9XEApdRcoAOQaqvB6OhoNm3aRJ48efD39/e48+LZs2cZOnQozZo1o0aNGhw/ftyh+4+LiwNw6H79/Pzo3r0706ZNo3Xr1m57T939aK25ceMG165d49q1a6bDcSgp1MyTQi1jZLmueUopo6tvpFATDieFWuZMnDiRc+fO8euvvxq/kpNV+fr6MmHCBDp06MC0adN49dVXTYdkq5JA2G3/PgM0un0DpVR/oH/Kvx966KFbr+XJk+eOr4IFCxIYGEihQoUIDAwkMDCQwoULU7JkSYoVK4aPj7lfERaLhffeew+LxcLo0aM96v+Ft99+m9WrV/PWW2+xePFi/P39jcVitVqJiIjgzJkzREZGcvHixTu+oqKiiImJuVWYXb9+Pcueu6VQM08KtYxJKdQ86fwnHCvLFmrSTMQc6fpov/DwcCZMmECXLl1o0qSJ6XCytJQHiI8YMYIePXqQP39+0yE5hNZ6GjANoEKFCnrIkCFER0ff83X16lUiIyM5cOAAERER99zg7+PjQ+nSpSlXrhzly5enevXq1KxZk5o1a1KsWDGnDxzeffdd/v77b77++mseeeQRpxwjZSatfPnyDt/3nDlzaNGiBaNGjWLu3LlO/52UmJhIaGgoISEh7Nq1i6NHj3LixAlOnTpFfHz8HdsqpQgMDKRo0aIUKVKEoKAgAgICUv3q3r27U+N2Jen6aJ50fcyYhIQEQHLWJNNFcpYs1CwWi1wxM0i6Ptpv+PDhJCUlMW7cONOhZHlKKSZNmkT9+vUZO3Ysn3zyiemQbBEOlL7t36WSv5eqAgUK8MILL6S7U6vVSlRUFOfPn+fcuXOcOnWKEydO3Pr6/fff+e67725tX6hQIWrWrElwcDCNGjWiUaNGlC5d2iG/0LTWjB49mgkTJvDKK6/w4osvZnqfJjz66KNMmDCBgQMH0r9/f7755huHzVBaLBYOHjxISEjIra/du3ffWsoZEBBApUqVqFWrFh06dKBcuXIEBQVRokQJihYtSmBgoM2xZKVCTZaRmSddHzMmNjYWMF8sZGemP3ubztRKqTbA54A38J3Wetxdr+cAfgLqA5eA57TWJx0bqu1k6aNZnrT0Mb3cdqXdu3czY8YMBgwYQLly5UyFka3UrVuX559/ns8//5yXX37ZEz737UAlpVQ5bhZoXYFMj6S9vLxuLX2sWbNmqttERESwf/9+/v33X/bt28fevXuZMmUKkyZNAqBYsWK3irZGjRrRoEED8ubNm6E4Ll26xJtvvsmsWbN4/vnn+eyzzzL7oxk1YMAAYmJiGDlyJMeOHePnn3+mdOnS6b/xNlarlaNHj7J9+/ZbRdmuXbu4fv06AP7+/tSvX59XXnmF4OBggoODqVChgkt+B3ri2ACkUDPJHZY+elLepsyoSc6a4/aF2m1dxlpx836I7UqpxVrr229e/w9wWWtdUSnVFRgPPOeMgG0hhZpZnlKo2ZjbLmG1Wnn77bcpWLAgw4YNc/Xhs7WPPvqI+fPnM2TIEObNm2c6nDRprZOUUq8BK7k5yPhBa73fFccuUqQIRYoUoXnz5re+l5CQwJ49e/jnn3/4559/2Lp1K7///jtw85dbjRo1bhVu9evXp0KFCuTLl++O/VqtVkJDQ5k7dy5ff/01V69eZeTIkbz//vvGf0E6wogRI6hQoQIvvvgilSpV4oUXXqBnz57Ur1//nlmt+Ph4Dh48yL///suePXvYsWMHO3bsIDo6GoCcOXNSt25d+vXrR4MGDQgODqZy5cpGlkV56tgAZNBrkulCzdPyVgo180z/HrJlRs2WLmMdgBHJf18ATFFKKZ1Gm5TQ0FDq1atnV9DpOXLkCNWqVXN4hzBXcUYnMldKeX5Q165djd5Eb4MMddBzZs7GxcVx4MABxowZQ1RUFFFRUU45jrN4es7+97//5YsvvuDQoUNu/wtRa70MWGY6DrjZ4bBBgwY0aNCA1157Dbg5K7Zt27ZbxdvChQv5/vvvb72nYMGCFCtWDF9fXxITEwkLCyMmJgalFO3atWPUqFHUrl3b1I/kFL169eLhhx9mxIgRfPPNN0yZMoUcOXJQtmzZW4/fuHDhApGRkbcGsX5+ftSuXZsePXrcmimrXr260QYvd3HK2ACcd65N6WJ54cIFjz1Xefq5NjExkb///ttpv0tt4JS83bFjh1MumKQcMiEhgX379jl8/67kqfGbvrfVljN+ul3Gbt8m+YrvVaAQcPH2jW7vRpYjRw7y5MljZ9hpq1u3Lk8++aRT9i3SV7t2bVq1anXrF4oby1AHPWfmbJ48eXj55Zd57jljF5uztRdeeOHW/VkicwoVKkTbtm1p27YtcHOgcfToUfbs2cOJEyc4fvz4reYlXl5etGjRgvr169OyZcsMLwv0JGXLluXHH39k8uTJLF++nJ07dxIWFnar0UejRo0oXrz4raYtlStXdvfHRjhsbACuOdfmyZOHcuXKUaNGDYfvW9imc+fOrFq1ymQIThnTgnNnvZ5++mmn7VukrUWLFixfvtypx0hrltmll+Zu70YWHBysN2zY4MrDewxndiJzhfLlyzv9ROyqqWjJWdt4es4CLln2aHoJhQlKKSpVqkSlSpVMh+IWChQoQPfu3bNUkw5HkHOtbTz9XDt8+HCGDx/u1GOYGh+EhIS45LieJmUm7X73P7u7xYsXO/0YaeWsLeW/LV3Gbm2jlPIB8nHzBkwh3FmGOugJIYS4RcYGwhNJ3gqPYkuhdqvLmFLKj5tdxu4uLxcDzyf/vTOwNr016EK4AVtyWwghxL1kbCA8keSt8CjKltxTSj0BfMb/dxkbo5QaBYRorRcrpXICPwN1gSiga8qNmmnsMxI4lcn40xJIKuvgPYjEn7ayWuvCmd1JarmdxraSs2nz9PjBQ/LWVpKzNvH0n8FYzjpjbJC8X8nbtEn8aUvzPCtjWiMk/rTd/zybVS8SKKVCtNbBpuOwl8Sf/Xj6Z+bp8UPW+BlcKSt8Xp7+M3h6/CZ4+mcm8Wc/nv6ZSfz2c+8+1EIIIYQQQgiRDUmhJoQQQgghhBBuJisXatNMB5BJEn/24+mfmafHD1njZ3ClrPB5efrP4Onxm+Dpn5nEn/14+mcm8dspy96jJoQQQgghhBCeKivPqAkhhBBCCCGER5JCTQghhBBCCCHcTJYu1JRSzyql9iulrEopj2kLqpRqo5Q6pJQ6qpQaYjqejFBK/aCUilBK7TMdiyeSnHU9ydnMkZw1Q/LWfpKzZkjOZo4n5q3kbOZl6UIN2Ad0AjaYDsRWSilv4CugLVAd6KaUqm42qgyZDrQxHYQHk5x1velIzmaG5KwZ05G8tZfkrBnTkZzNDI/KW8lZx8jShZrW+oDW+pDpODKoIXBUa31ca50AzAU6GI7JZlrrDUCU6Tg8leSs60nOZo7krBmSt/aTnDVDcjZzPDBvJWcdIEsXah6qJBB227/PJH9PCHclOSs8jeSs8DSSs8LTSM46gI/pADJLKfUnUCyVl4ZprX93dTxCpEdyVngayVnhaSRnhSeSvBV38/hCTWv9mOkYHCwcKH3bv0slf09kEZKzwtNIzgpPIzkrPFEWy1vJWQeQpY/uZztQSSlVTinlB3QFFhuOSYi0SM4KTyM5KzyN5KzwNJKzDpClCzWlVEel1BmgCbBUKbXSdEzp0VonAa8BK4EDwHyt9X6zUdlOKTUH2AJUUUqdUUr9x3RMnkRy1vUkZzNHctYMyVv7Sc6aITmbOZ6Wt5KzDopBa+3qYwohhBBCCCGESEOWnlETQgghhBBCCE8khZoQQgghhBBCuBkp1IQQQgghhBDCzUihJoQQQgghhBBuRgo1IYQQQgghhHAzUqi5OaVUO6XUPNNxCHE7pdRJpdQ9D+ZUStVSSm02EZMQ9pLzrPAUSqnpSqmP5FwrPJFS6nWl1HjTcXgSKdTcnNZ6CVBDKVXLdCxCpEdrvRe4opRqZzoWIWwl51nhaeRcKzzUt0APpVQR04F4CinUPMMcoL/pIISw0SzgRdNBCJFBcp4VnkbOtcKjaK3jgOVAb9OxeAop1NyEUmqIUuqYUipGKRWqlOp428vrgScNhSbE/TRIztXLSqkflVI5k7+/HmiplMphMDYhUqWUKq2U+k0pFamUuqSUmpL80nrkPCvcjFKqrlJqZ/LYYB6Q87aX1yPnWuGGlFIllFK/Jp9nTyil3rjt5fXIudZmUqi5j2NAMyAfMBKYqZQqnvzaASBIKRVgKjghUtEDaA1UACoDwwG01uFAIlDFXGhC3Esp5Q38AZwCgoCSwNzkl+U8K9yKUsoPWAT8DBQEfgGeSXldzrXCHSmlvIAlwB5unmNbAm8ppVonb3IAqG0oPI8jhZqb0Fr/orU+q7W2aq3nAUeAhskvxyT/md9IcEKkborWOkxrHQWMAbrd9loMkq/C/TQESgCDtNbXtdZxWuuNya/JeVa4m8aAL/CZ1jpRa70A2H7XNnKuFe6mAVBYaz1Ka52gtT7OzXvTuia/HsPNSQlhAx/TAYiblFK9gXe4eZUXIA8QmPz3vMl/XnFtVEKkKey2v5/i5gA4RV4kX4X7KQ2c0lonpfKanGeFuykBhGut9W3fO3XXNnKuFe6mLFBCKXXltu95A38n/z0vcNXVQXkqKdTcgFKqLDevNrQEtmitLUqp3YBK3qQacFJrHW0oRCFSU/q2v5cBzgIopUoCfsAhE0EJkYYwoIxSyieVYk3Os8LdnANKKqXUbcVaGW7eKiHnWuGuwoATWutK93m9GjeXRQobyNJH9+APaCASQCnVF6h52+uPcLNLjhDu5FWlVCmlVEFgGJDyHKpHgLVa63hzoQmRqm3cHPyOU0r5K6VyKqWaJr8m51nhbrYAScAbSilfpVQn/v+WCJBzrXBP24AYpdRgpVQupZS3UqqmUqpB8utyrs0AKdTcgNY6FJjEzZPyBeABYNNtm3QDvjEQmhBpmQ2sAo5z8wrvR8nf7wF8bSooIe5Ha20B2gEVgdPAGeC55JflPCvcitY6AegE9AGiuJmrv922iZxrhdtJPs8+BdQBTgAXge+AfMndoZ8AZhgL0MOoO5c+C3eT/DDLXlrrLqZjESI9yQ8M/kZr3cR0LELYSs6zwtPIuVZ4IqXU60BprfW7pmPxFFKoCSGEEEIIIYSbkaWPQgghhBBCCOFmpFATQgghhBBCCDcjhZoQQgghhBBCuBkp1IQQQgghhBDCzUihJoQQQgghhBBuRgo1IYQQQgghhHAzUqgJIYQQQgghhJuRQk0IIYQQQggh3Mz/AboZKerdJ7bcAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "\n", + "LABEL_POS = -0.4\n", + "\n", + "def calculate_numeric_gradient(x_values: np.array, y_values: np.array, x_step_size: float):\n", + " # :param x_values: a list of equidistant x values\n", + " # :param y_values: the list of corresponding y values for an unknown function\n", + " # :param x_step_size: the fixed distance between two consecutive x values\n", + " # :return: a list of numeric gradients for each step in the function (with one less element than the given x_values)\n", + "\n", + " # initialize a numpy array with the correct size\n", + " numeric_gradient = np.zeros(len(x_values) - 1)\n", + "\n", + " for i in range(1, len(x_values)):\n", + " numeric_gradient[i-1] = (y_values[i] - y_values[i-1]) / x_step_size\n", + " return numeric_gradient\n", + "\n", + "\n", + "def reconstruct_gradient_function(x_grad_values: np.array, x_step_size: float, y_offset=0.0):\n", + " # :param x_grad_values: a list of calculated gradients (based on step size `x_step_size`)\n", + " # :param x_step_size: the distance between two consecutive x values that was used to calculate `x_grad_values`\n", + " # :return: a list of y values for a (representative) function that would create the given gradient values\n", + " # (the y_values should be zero at the middle element)\n", + "\n", + " # intialize an array with the correct size\n", + " y_values = np.zeros(len(x_grad_values))\n", + "\n", + " y_values[0] = 0\n", + " for i in range(1, len(x_grad_values)):\n", + " y_values[i] = y_values[i-1] + x_grad_values[i] * x_step_size\n", + " y_values -= y_values[len(x_grad_values) // 2]\n", + " y_values += y_offset\n", + " return y_values\n", + "\n", + "\n", + "def plot_functions(functions_to_plot: List[Callable[[torch.tensor], torch.tensor]], xlim = (-1.5, 1.5), ylim = (-1.5, 1.5), steps = 200, y_offset=0.0):\n", + " x_values = np.linspace(*xlim, steps)\n", + " x_step_size = (xlim[1] - xlim[0]) / steps\n", + " \n", + " fig, axes = plt.subplots(2, len(functions_to_plot) + 1, figsize=FIGSIZE, subplot_kw={\"xlim\": xlim})\n", + " \n", + " if len(functions_to_plot) == 1:\n", + " axes = [axes]\n", + " # create the actual plot\n", + " for fn, axis_idx in zip(functions_to_plot, range(len(functions_to_plot))):\n", + " x = torch.tensor(x_values, requires_grad=True, dtype=torch.float)\n", + " y = fn(x)\n", + " y.sum().backward()\n", + "\n", + " y_values = y.detach().numpy()\n", + " \n", + " numeric_gradient = calculate_numeric_gradient(x_values, y_values, x_step_size)\n", + " numeric_grad_function = reconstruct_gradient_function(x.grad, x_step_size, y_offset=y_offset)\n", + " axes[0][axis_idx].locator_params(tight=True, nbins=3)\n", + " axes[1][axis_idx].locator_params(tight=True, nbins=3)\n", + " axes[0][axis_idx].set_ylim((-1.3, 1.3))\n", + " \n", + " axes[0][axis_idx].plot(x_values, y_values, color=\"black\")\n", + " axes[1][axis_idx].plot(x_values, x.grad, color=\"black\")\n", + " axes[0][axis_idx].plot(x_values, numeric_grad_function, color=\"darkred\")\n", + " axes[1][axis_idx].set_title(str(chr(ord(\"a\") + axis_idx)) + \")\", y=LABEL_POS)\n", + "\n", + " for axis in axes[:, axis_idx]:\n", + " axis.axvline(x=0, c=\"lightgrey\", zorder=0)\n", + " axis.axhline(y=0, c=\"lightgrey\", zorder=0)\n", + "\n", + " # plt.show()\n", + " return len(functions_to_plot), axes\n", + "\n", + "\n", + "def plot_progressive(axes, idx, functions_to_plot: List[Callable[[torch.tensor], torch.tensor]], xlim = (-1.5, 1.5), ylim = (-1.5, 1.5), steps = 200, y_offset=0.0):\n", + " x_values = np.linspace(*xlim, steps)\n", + " x_step_size = (xlim[1] - xlim[0]) / steps\n", + " first = True\n", + " for fi, fn in enumerate(functions_to_plot):\n", + " fn.training=True\n", + " x = torch.tensor(x_values, requires_grad=True, dtype=torch.float)\n", + " y = fn(x)\n", + " y.sum().backward()\n", + " c = \"grey\"\n", + " if fi == len(functions_to_plot) - 1:\n", + " first = False\n", + " c = \"black\"\n", + "\n", + " y_values = y.detach().numpy()\n", + " \n", + " numeric_gradient = calculate_numeric_gradient(x_values, y_values, x_step_size)\n", + " numeric_grad_function = reconstruct_gradient_function(x.grad, x_step_size, y_offset=y_offset)\n", + " axes[0][idx].locator_params(tight=True, nbins=3)\n", + " axes[1][idx].locator_params(tight=True, nbins=3)\n", + " axes[0][idx].set_ylim((-1.3, 1.3))\n", + "\n", + " axes[0][idx].plot(x_values, y_values, color=c)\n", + " axes[1][idx].plot(x_values, x.grad, color=\"black\")\n", + " axes[0][idx].plot(x_values, numeric_grad_function, color=\"darkred\")\n", + " axes[1][idx].set_title(str(chr(ord(\"a\") + idx)) + \")\", y=LABEL_POS)\n", + "\n", + " for axis in axes[:, idx]:\n", + " axis.axvline(x=0, c=\"lightgrey\", zorder=0)\n", + " axis.axhline(y=0, c=\"lightgrey\", zorder=0)\n", + "\n", + "\n", + "\n", + "def quantize(x: torch.tensor):\n", + " backward = x.clip(-1, 1)\n", + " forward = (x.clip(-1, 1) * 3).round() / 3\n", + " return (forward - backward).detach() + backward\n", + "\n", + "FIGSIZE = (15, 3.8)\n", + "LABEL_POS = -0.4\n", + "\n", + "# plot_functions([quantize])\n", + "idx, axes = plot_functions([\n", + " QActivation(Sign(), 1.0),\n", + " ApproxSign(),\n", + " SwishSign(),\n", + " QActivation(WeightDoReFa(2)),\n", + "])\n", + "plot_progressive(axes, idx, [\n", + " QActivation(ProgressiveSign(use_global_scaling=False, initial_scale=i), 1.0) for i in [0.03, 0.1, 1.0]\n", + "])\n", + "axes[0][idx].arrow(1.0, 1.15, -0.9, 0.0, head_width=0.15, length_includes_head=True, color=\"grey\")\n", + "axes[0][idx].arrow(-1.0, -1.15, 0.9, 0.0, head_width=0.15, length_includes_head=True, color=\"grey\")\n", + "plt.savefig(\"quantization_functions.pdf\", dpi=600, bbox_inches='tight')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quantization scheduling visualization\n", + "\n", + "Use the code below to visualize the scheduling of quantizations. You can replace the used quantization functions and observe how they are applied with different factors." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "steps = 100\n", + "factor_samples = 5\n", + "function_samples = 300\n", + "\n", + "\n", + "FIGURE_SIZE = (8, 10)\n", + "LABEL_POS = -0.3\n", + "xlim = (-1.5, 1.5)\n", + "ylim = (-1.3, 1.3)\n", + "\n", + "def sample_scheduler(i, axes, scheduler):\n", + " \n", + " axes[0].clear()\n", + " axes[1].clear()\n", + " \n", + " x_values = np.linspace(*xlim, steps)\n", + " x_step_size = (xlim[1] - xlim[0]) / steps\n", + " scheduler.factor = i / steps if i <= steps else 1.0 - ((i - steps) / steps)\n", + " x = torch.tensor(x_values, requires_grad=True, dtype=torch.float)\n", + " y = scheduler(x)\n", + " y.sum().backward()\n", + "\n", + " y_values = y.detach().numpy()\n", + " \n", + " numeric_gradient = calculate_numeric_gradient(x_values, y_values, x_step_size)\n", + " numeric_grad_function = reconstruct_gradient_function(x.grad, x_step_size, y_offset=0.0)\n", + " axes[0].locator_params(tight=True, nbins=3)\n", + " axes[1].locator_params(tight=True, nbins=3)\n", + " axes[0].set_ylim((-1.3, 1.3))\n", + "\n", + " axes[0].plot(x_values, y_values, color=\"black\")\n", + " axes[1].plot(x_values, x.grad, color=\"black\")\n", + " axes[0].plot(x_values, numeric_grad_function, color=\"darkred\")\n", + " axes[1].set_title(f\"factor: {scheduler.factor:.2f}\", y=LABEL_POS)\n", + "\n", + " for axis in axes:\n", + " axis.axvline(x=0, c=\"lightgrey\", zorder=0)\n", + " axis.axhline(y=0, c=\"lightgrey\", zorder=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "scheduler = MixLinearScheduling([Identity(), Tanh(), Sign(), InputDoReFa(2), InputDoReFa(4), InputDoReFa(8), ReLU()], steps)\n", + "fig, ax = plt.subplots(2, 1, figsize=FIGURE_SIZE)\n", + "anim_created = FuncAnimation(fig, sample_scheduler, frames=2 * steps, interval=50, fargs=(ax, scheduler))\n", + "\n", + "video = anim_created.to_html5_video()\n", + "html = display.HTML(video)\n", + "display.display(html)\n", + "\n", + "plt.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "scheduler = StepScheduling([Identity(), Tanh(), Sign(), InputDoReFa(2), InputDoReFa(4), InputDoReFa(8), ReLU()], steps)\n", + "fig, ax = plt.subplots(2, 1, figsize=FIGURE_SIZE)\n", + "anim_created = FuncAnimation(fig, sample_scheduler, frames=2 * steps, interval=50, fargs=(ax, scheduler))\n", + "\n", + "video = anim_created.to_html5_video()\n", + "html = display.HTML(video)\n", + "display.display(html)\n", + "\n", + "plt.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + }, + "vscode": { + "interpreter": { + "hash": "449ab358cbc9abff7c95eafc39955d97c1cf480c5202edcd424b61e46b87f27c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pytorch_lightning/image_classification.py b/examples/pytorch_lightning/image_classification.py deleted file mode 100644 index ad6a938..0000000 --- a/examples/pytorch_lightning/image_classification.py +++ /dev/null @@ -1,148 +0,0 @@ -import os - -if os.environ.get('REMOTE_PYCHARM_DEBUG_SESSION', False): - import pydevd_pycharm - pydevd_pycharm.settrace( - 'localhost', - port=int(os.environ.get('REMOTE_PYCHARM_DEBUG_PORT', "12345")), - stdoutToServer=True, - stderrToServer=True - ) - -import argparse -import logging -from torch.utils.data import DataLoader -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import ModelCheckpoint -from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger -from utils.utils import set_logging -from utils.arg_parser import create_argparser -from utils.lightning_model import ModelWrapper - -from bitorch.datasets.base import Augmentation -from bitorch.models import model_from_name -from bitorch.datasets import dataset_from_name -from bitorch import apply_args_to_configuration -from bitorch.quantizations import Quantization - -FVBITCORE_AVAILABLE = True -try: - import fvbitcore.nn as fv_nn -except ModuleNotFoundError: - logging.warning("fvbitcore not installed, will not calculate model flops!") - FVBITCORE_AVAILABLE = False - -WANDB_AVAILABLE = True -try: - from pytorch_lightning.loggers import WandbLogger - import wandb -except ModuleNotFoundError: - logging.warning("wandb not installed, will not log metrics to wandb!") - WANDB_AVAILABLE = False - - -def main(args: argparse.Namespace, model_args: argparse.Namespace) -> None: - """trains a model on the configured image dataset. - - Args: - args (argparse.Namespace): cli arguments - model_args (argparse.Namespace): model specific cli arguments - """ - set_logging(args.log_file, args.log_level, args.log_stdout) - - apply_args_to_configuration(args) - - loggers = [] - if args.tensorboard: - loggers.append(TensorBoardLogger(args.tensorboard_output)) # type: ignore - if args.result_file is not None: - loggers.append(CSVLogger(args.result_file)) # type: ignore - if WANDB_AVAILABLE and args.wandb: - try: - loggers.append( - WandbLogger(project=args.wandb_project, log_model=True, name=args.wandb_experiment)) # type: ignore - except ModuleNotFoundError: - logging.warning( - "wandb is not installed, values will not be logged via wandb. install it with " - "`pip install wandb`." - ) - callbacks = [] - if args.checkpoint_dir is not None: - callbacks.append(ModelCheckpoint(args.checkpoint_dir, save_last=True, - save_top_k=args.checkpoint_keep_count, monitor="metrics/top1 accuracy")) - - dataset = dataset_from_name(args.dataset) - - model_kwargs = vars(model_args) - logging.debug(f"got model args as dict: {model_kwargs}") - - model = model_from_name(args.model)(**model_kwargs, dataset=dataset) # type: ignore - model.initialize() - if args.checkpoint_load is not None and args.pretrained: - logging.info(f"starting training from pretrained model at checkpoint {args.checkpoint_load}") - model_wrapped = ModelWrapper.load_from_checkpoint(args.checkpoint_load) - else: - model_wrapped = ModelWrapper( - model, args.optimizer, args.lr, args.momentum, args.lr_scheduler, args.lr_factor, args.lr_steps, - dataset.num_classes, args.max_epochs, - ) - - trainer = Trainer( - strategy=args.strategy, - accelerator="cpu" if args.cpu else args.accelerator, - gpus=0 if args.cpu else args.gpus, - max_epochs=args.max_epochs, - max_steps=args.max_steps, - logger=loggers if len(loggers) > 0 else None, # type: ignore - callbacks=callbacks, # type: ignore - log_every_n_steps=args.log_interval, - progress_bar_refresh_rate=10, - ) - augmentation_level = Augmentation.from_string(args.augmentation) - if args.fake_data: - logging.info(f"dummy dataset: {dataset.name} (not using real data!)...") - train_dataset, test_dataset = dataset.get_dummy_train_and_test_datasets() # type: ignore - else: - logging.info(f"dataset: {dataset.name}...") - train_dataset, test_dataset = dataset.get_train_and_test( # type: ignore - root_directory=args.dataset_dir, download=args.download, augmentation=augmentation_level - ) - train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, - shuffle=True, pin_memory=True, persistent_workers=True) # type: ignore - test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, - shuffle=False, pin_memory=True, persistent_workers=True) # type: ignore - - if FVBITCORE_AVAILABLE: - data_point = iter(train_loader).next() - computational_intensity = fv_nn.FlopCountAnalysis( - model, - inputs=data_point[0], - quantization_base_class=Quantization - ) - - stats, table = fv_nn.flop_count_table(computational_intensity, automatic_qmodules=True) - logging.info("\n" + table) - total_size = stats["#compressed size in bits"][""] - logging.info("Total size in MB: " + str(total_size / 1e6 / 8.0)) - total_flops = stats["#speed up flops (app.)"][""] - logging.info("Approximated mflops: " + str(total_flops / 1e6)) - for logger in loggers: - logger.log_dict({ - "mflops": total_flops / 1e6, - "size in MB": total_size / 1e6 / 8.0, - }) - - trainer.fit( - model_wrapped, - train_dataloaders=train_loader, - val_dataloaders=test_loader, - ckpt_path=args.checkpoint_load if not args.pretrained else None - ) - - -if __name__ == "__main__": - parser, model_parser = create_argparser() - args, unparsed_model_args = parser.parse_known_args() - model_args = model_parser.parse_args(unparsed_model_args) - - main(args, model_args) diff --git a/examples/pytorch_lightning/utils/arg_parser.py b/examples/pytorch_lightning/utils/arg_parser.py deleted file mode 100644 index e4ae057..0000000 --- a/examples/pytorch_lightning/utils/arg_parser.py +++ /dev/null @@ -1,175 +0,0 @@ -from argparse import ArgumentParser -import sys -from typing import Tuple - -from bitorch.models import model_from_name, model_names -from bitorch.datasets import dataset_names -from bitorch import add_config_args -from pytorch_lightning import Trainer - - -def add_logging_args(parser: ArgumentParser) -> None: - """adds cli parameters for logging configuration - - Args: - parser (ArgumentParser): the main argument parser - """ - log = parser.add_argument_group("Logging", "parameters for logging") - log.add_argument("--log-level", type=str, default="info", - choices=["debug", "info", "warning", "error", "critical"], - help="log level for logging message output") - log.add_argument("--log-interval", type=int, default=100, metavar="N", - help="how many batches to wait before logging training status") - log.add_argument("--log-file", type=str, default=None, - help="output file path for logging. default to stdout") - log.add_argument("--log-stdout", action="store_true", default=False, - help="toggles force logging to stdout. if a log file is specified, logging will be " - "printed to both the log file and stdout") - log.add_argument("--tensorboard", action="store_true", default=False, - help="toggles use of tensorboard for logging learning progress") - log.add_argument("--tensorboard-output", type=str, default="./tblogs", - help="output dir for tensorboard. default to ./tblogs") - log.add_argument("--result-file", type=str, default=None, - help="path to result file; train and test metrics will be logged in csv format") - - log.add_argument("--wandb", action="store_true", default=False, - help="toggles use of wandb for logging learning progress. For this to work, " - "the WANDB_API_KEY environment variable must be set.") - log.add_argument("--wandb-project", type=str, default="bitorch", - help="name of wand project to be used by wandb logger") - log.add_argument("--wandb-experiment", type=str, default=None, - help="name of wand experiment to be used by wandb logger") - - -def add_checkpoint_args(parser: ArgumentParser) -> None: - checkpoint = parser.add_argument_group("checkpoints", "parameters for checkpoint storing / loading") - checkpoint.add_argument("--checkpoint-dir", type=str, default=None, - help="path to directory to store checkpoints in.") - checkpoint.add_argument("--checkpoint-keep-count", type=int, default=10, - help="number of checkpoints to keep.") - checkpoint.add_argument("--checkpoint-load", type=str, default=None, - help="path to checkpoint file to load state from. if omitted, a new model will be trained.") - checkpoint.add_argument("--pretrained", action="store_true", default=False, - help="uses the given checkpoint as a pretrained model (only for initialization)") - - -def add_optimizer_args(parser: ArgumentParser) -> None: - """adds cli parameters for optimizer configuration - - Args: - parser (ArgumentParser): the main argument parser - """ - optimizer = parser.add_argument_group("Optimizer", "parameters for optimizer") - optimizer.add_argument("--lr-scheduler", type=str, - choices=["cosine", "step", "exponential"], - help="name of the lr scheduler to use. default to none") - optimizer.add_argument("--lr", type=float, default=0.01, - help="initial learning rate (default: 0.01)") - optimizer.add_argument('--lr-factor', default=0.1, type=float, - help='learning rate decay ratio. this is used only by the step and exponential lr scheduler') - optimizer.add_argument('--lr-steps', nargs="*", default=[30, 60, 90], - help='list of learning rate decay epochs as list. this is used only by the step scheduler') - optimizer.add_argument('--momentum', type=float, default=0.9, - help='momentum value for optimizer, default is 0.9. only used for sgd optimizer') - optimizer.add_argument('--optimizer', type=str, default="adam", choices=["adam", "sgd", "radam"], - help='the optimizer to use. default is adam.') - - -def add_dataset_args(parser: ArgumentParser) -> None: - """adds cli parameters for dataset configuration - - Args: - parser (ArgumentParser): the main argument parser - """ - data = parser.add_argument_group("dataset", "parameters for the dataset used for training") - data.add_argument("--dataset", type=str, default="cifar10", choices=dataset_names(), - help="name of the dataset to be used for training") - data.add_argument("--dataset-dir", type=str, default=None, - help="path to where the train dataset is saved / shall be downloaded to") - data.add_argument("--download", action="store_true", default=False, - help="toggles wether the dataset shall be downloaded if not present. " - "only has effect with the cifar10 and mnist dataset so far.") - data.add_argument("--batch-size", type=int, default=128, - help="batch size for training and testing (default: 128)") - data.add_argument("--num-workers", type=int, default=4, - help="number of workers to be used for dataloading (default: 4)") - data.add_argument("--augmentation", type=str, choices=["none", "low", "medium", "high"], default="none", - help="level of augmentation to be used in data preparation (default 'none')") - data.add_argument("--fake-data", action="store_true", - help="train with fake data") - - -def create_model_argparser(model_class: object) -> ArgumentParser: - """adds model specific cli arguments from model_class object - - Args: - model_class (object): the class-object of selected model - - Returns: - ArgumentParser: cli argument parser - """ - model_parser = ArgumentParser(add_help=False) - model_class.add_argparse_arguments(model_parser) - return model_parser - - -def help_in_args() -> bool: - """determines if script was called with a --help or -h flag - - Returns: - bool: True if help flag was set, False otherwise - """ - passed_args = sys.argv[1:] - if "--help" in passed_args or "-h" in passed_args: - return True - return False - - -def add_all_model_args(parser: ArgumentParser) -> None: - """iterates through all existent models and adds their specific cli args to parser - - Args: - parser (ArgumentParser): the main cli argument parser - """ - for model_name in model_names(): - model_group = parser.add_argument_group(model_name, f"parameters for {model_name} model") - model_from_name(model_name).add_argparse_arguments(model_group) # type: ignore - - -def add_regular_args(parser: ArgumentParser) -> None: - """adds all regular arguments, including dynamically created config args to parser. - - Args: - parser (ArgumentParser): parser to add the regular arguments to - """ - Trainer.add_argparse_args(parser) - add_logging_args(parser) - add_dataset_args(parser) - add_optimizer_args(parser) - add_checkpoint_args(parser) - - add_config_args(parser) - - parser.add_argument("--model", type=str.lower, choices=model_names(), required=True, - help="name of the model to be trained") - parser.add_argument("--cpu", action="store_true", default=False, - help="explicitly use the cpu. overwrites gpu settings") - - -def create_argparser() -> Tuple[ArgumentParser, ArgumentParser]: - """creates a main argument parser for general options and a model parser for model specific options - - Returns: - Tuple[ArgumentParser, ArgumentParser]: the main and model argument parser - """ - parser = ArgumentParser(description="Bitorch Image Classification") - - add_regular_args(parser) - - if help_in_args(): - add_all_model_args(parser) - args, _ = parser.parse_known_args() - - model_class = model_from_name(args.model) - model_parser = create_model_argparser(model_class) - return parser, model_parser diff --git a/examples/pytorch_lightning/utils/lightning_model.py b/examples/pytorch_lightning/utils/lightning_model.py deleted file mode 100644 index 6a6452b..0000000 --- a/examples/pytorch_lightning/utils/lightning_model.py +++ /dev/null @@ -1,70 +0,0 @@ -from typing import Union -import torch -from pytorch_lightning import LightningModule -from torch.nn import Module, CrossEntropyLoss -from utils.utils import create_optimizer, create_scheduler -from torchmetrics import Accuracy, F1Score, Precision, Recall, AUROC -import logging - - -class ModelWrapper(LightningModule): - def __init__( - self, - model: Module, - optimizer: str, - lr: float, - momentum: float, - lr_scheduler: str, - lr_factor: float, - lr_steps: list, - num_classes: int, - epochs: int) -> None: - super().__init__() - self.save_hyperparameters(ignore=["model"]) - self.loss_function = CrossEntropyLoss() - self.model = model - self.accuracy_top1 = Accuracy(num_classes=num_classes) - self.accuracy_top5 = Accuracy(top_k=5, num_classes=num_classes) - self.f1 = F1Score(num_classes=num_classes) - self.prec = Precision(num_classes=num_classes) - self.recall = Recall(num_classes=num_classes) - - def training_step(self, batch: torch.Tensor) -> torch.Tensor: # type: ignore - x_train, y_train = batch - - y_hat = self.model(x_train) - loss = self.loss_function(y_hat, y_train) - self.log_dict({ - "loss/train": loss, - }) - return loss - - def validation_step(self, batch: torch.Tensor, batch_idx: int) -> None: # type: ignore - x_test, y_test = batch - - y_hat = self.model(x_test) - loss = self.loss_function(y_hat, y_test) - - self.log_dict({ - "metrics/top1 accuracy": self.accuracy_top1(y_hat, y_test), - "metrics/top5 accuracy": self.accuracy_top5(y_hat, y_test), - "metrics/f1": self.f1(y_hat, y_test), - "metrics/precision": self.prec(y_hat, y_test), - "metrics/recall": self.recall(y_hat, y_test), - "loss/test": loss, - }, prog_bar=True) - - def configure_optimizers(self) -> Union[dict, torch.optim.Optimizer]: # type: ignore - logging.info(f"Using {self.hparams.optimizer} optimizer and {self.hparams.lr_scheduler} lr schedluer...") - optimizer = create_optimizer(self.hparams.optimizer, self.model, self.hparams.lr, self.hparams.momentum) - if self.hparams.lr_scheduler is not None: - scheduler = create_scheduler( - self.hparams.lr_scheduler, optimizer, self.hparams.lr_factor, - self.hparams.lr_steps, self.hparams.epochs - ) - return { - "optimizer": optimizer, - "lr_scheduler": scheduler - } - else: - return optimizer diff --git a/mypy.ini b/mypy.ini index e039b64..177e95e 100644 --- a/mypy.ini +++ b/mypy.ini @@ -8,7 +8,7 @@ ignore_missing_imports = True disallow_untyped_defs = True disallow_any_explicit = False disable_error_code = attr-defined - +exclude = examples/(mnist|dlrm) [mypy-torchvision.io._video_opt.*] diff --git a/pyproject.toml b/pyproject.toml index 374b58c..0b03205 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,3 +4,7 @@ requires = [ "wheel" ] build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 120 +target-version = ['py37', 'py38', 'py39', 'py310'] diff --git a/requirements-dev.txt b/requirements-dev.txt index 879ecd8..131fd76 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,11 +1,17 @@ +black build +importlib-metadata==4.13.0 flake8 -mypy +flake8-docstrings +importlib_metadata<5 +mypy~=0.920 +myst-nb +nbclient==0.5.13 +nbsphinx-link==1.3.0 +nbsphinx==0.8.8 pep8-naming +pre-commit pytest +pytest-cov sphinx twine -nbclient==0.5.13 -nbsphinx==0.8.8 -myst-nb -nbsphinx-link==1.3.0 diff --git a/requirements-opt.txt b/requirements-opt.txt deleted file mode 100644 index 0b8115e..0000000 --- a/requirements-opt.txt +++ /dev/null @@ -1,4 +0,0 @@ -tensorboardX -tensorboard -wandb -fvbitcore diff --git a/requirements.txt b/requirements.txt index 7cf4b86..43065eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,5 @@ -torch~=1.11.0 -torchvision~=0.12.0 -bitorchinfo +torch>=1.9.0 +torchvision>=0.10.0 matplotlib numpy -sklearn -pytorch_lightning -torchmetrics +pandas diff --git a/setup.cfg b/setup.cfg index c552139..09fb933 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,26 +13,48 @@ exclude = .git, .venv, venv, - dist - tests -select = C,E,F,W,B,B950 + dist, + tests, + examples/mnist +select = C,D,E,F,W,B,B950 ignore = + D100, + # D100: Missing docstring in public module + D101, + # D101: Missing docstring in public class + D102, + # D102: Missing docstring in public method + D105, + # D105: Missing docstring in magic method D107, + # D107: Missing docstring in __init__ D204, + # D204: 1 blank line required after class docstring D205, + # D205: 1 blank line required between summary line and description D400, + # D400: First line should end with a period D401, + # D401: First line should be in imperative mood D403, + # D403: First word of the first line should be properly capitalized DAR103, - E203, - E402, - E402, + # DAR103: The docstring parameter type doesn't match function. E501, - F401, - F403, - F821, + # E501: Line too long (82 > 79 characters) W503, - W504, + # W503: line break before binary operator [pydocstyle] select = D417 # Missing argument descriptions in the docstring + +[coverage:run] +branch = True + +[coverage:report] +show_missing = True +skip_covered = True +precision = 2 + +[coverage:xml] +output = coverage.xml diff --git a/setup.py b/setup.py index 6ee1ac1..e23c5b3 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ -import subprocess from pathlib import Path -from typing import Union +from typing import Union, List import setuptools @@ -16,27 +15,40 @@ print("version:", version) -def get_requirements(file_path: Union[Path, str]): - return [requirement.strip() for requirement in (root_path / file_path).open().readlines()] +def _get_requirements(*file_path: Union[Path, str]): + requirements_list = [] + for fp in file_path: + with (root_path / fp).open() as requirements_file: + requirements_list.extend(list(requirement.strip() for requirement in requirements_file.readlines())) + # exclude bitorch from examples + if "bitorch" in requirements_list: + requirements_list.remove("bitorch") + return requirements_list + + +def _get_files_recursively(glob: str, root: str = ".") -> List[str]: + return list(str(x) for x in Path(root).rglob(glob)) with open("README.md", "r", encoding="utf-8") as handle: readme_content = handle.read() + setuptools.setup( name="bitorch", url="https://github.com/hpi-xnor/bitorch", version=version, - author="Joseph Bethge", - author_email="joseph.bethge@hpi.de", + author="Hasso Plattner Institute", + author_email="fb10-xnor@hpi.de", description="A package for building and training quantized and binary neural networks with Pytorch", long_description=readme_content, long_description_content_type="text/markdown", - packages=setuptools.find_packages(exclude='tests'), - install_requires=get_requirements('requirements.txt'), + packages=setuptools.find_packages(), + install_requires=_get_requirements("requirements.txt"), extras_require={ - "dev": get_requirements('requirements-dev.txt'), - "opt": get_requirements('requirements-opt.txt'), + "dev": _get_requirements("requirements-dev.txt"), + # "opt": _get_requirements(*_get_files_recursively("requirements*.txt", root="examples")), + "opt": _get_requirements("examples/image_classification/requirements.txt", "examples/mnist/requirements.txt"), }, classifiers=[ "Development Status :: 3 - Alpha", @@ -45,6 +57,20 @@ def get_requirements(file_path: Union[Path, str]): "Operating System :: OS Independent", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + ], + python_requires=">=3.7", + data_files=[ + ( + ".", + [ + "AUTHORS", + "CHANGELOG.md", + "mypy.ini", + "version.txt", + ] + + _get_files_recursively("requirements*.txt") + + _get_files_recursively("README.md", root="examples"), + ), ], - python_requires='>=3.7', ) diff --git a/tests/layers/test_bembedding.py b/tests/layers/test_bembedding.py new file mode 100644 index 0000000..0214fb0 --- /dev/null +++ b/tests/layers/test_bembedding.py @@ -0,0 +1,328 @@ +import numpy as np +import pytest +import torch +from bitorch.layers.bembedding import BEmbedding, BEmbeddingBag +from bitorch.quantizations import ApproxSign, Sign, SwishSign +from torch.nn.functional import embedding, embedding_bag +from torch.optim import SGD, Adam + +TEST_INPUT_DATA = [ + (10, 10), + (100, 10), + (1000, 100), + (30000, 300), +] * 3 +TEST_QUANTIZATION_FUNCTIONS = [ApproxSign, Sign, SwishSign] +TEST_OPTIMIZERS = [Adam, SGD] + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +def test_bembedding(vocab_size, embedding_size, quantization_function): + qembedding = BEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + ) + example_input = torch.zeros(vocab_size, dtype=int) + example_input[np.random.randint(vocab_size)] = 1 + + output = qembedding(example_input) + + binarized_embedding_table = qembedding.weight.to(dtype=torch.float32) + + raw_embeddings = embedding( + input=example_input, + weight=binarized_embedding_table, + sparse=False, + ) + assert torch.equal(output, raw_embeddings) + + # now sparse tests + qembedding = BEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + ) + + example_input = torch.tensor(np.random.randint(vocab_size), dtype=int) + output = qembedding(example_input) + + binarized_embedding_table = qembedding.weight.to(dtype=torch.float32) + raw_embeddings = embedding( + input=example_input, + weight=binarized_embedding_table, + sparse=True, + ) + assert torch.equal(output, raw_embeddings) + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +def test_batched_bembedding(vocab_size, embedding_size, quantization_function): + qembedding = BEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + ) + example_input = torch.zeros(vocab_size, dtype=int) + example_input[np.random.randint(vocab_size)] = 1 + example_input[np.random.randint(vocab_size)] = 1 + + output = qembedding(example_input) + + binarized_embedding_table = qembedding.weight.to(dtype=torch.float32) + + raw_embeddings = embedding( + input=example_input, + weight=binarized_embedding_table, + sparse=False, + ) + assert torch.equal(output, raw_embeddings) + + # now sparse tests + qembedding = BEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + ) + + example_input = torch.randint(0, vocab_size, (np.random.randint(1, 100), 1), dtype=int) + output = qembedding(example_input) + binarized_embedding_table = qembedding.weight.to(dtype=torch.float32) + raw_embeddings = embedding( + input=example_input, + weight=binarized_embedding_table, + sparse=True, + ) + assert torch.equal(output, raw_embeddings) + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +@pytest.mark.parametrize("optimizer", TEST_OPTIMIZERS) +def test_bembedding_training(vocab_size, embedding_size, quantization_function, optimizer): + example_input = torch.randint(0, vocab_size, (1,), dtype=int) + example_output = torch.rand(size=(1, 1)) + assert_equal_train(example_input, example_output, vocab_size, embedding_size, quantization_function, optimizer) + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +@pytest.mark.parametrize("optimizer", TEST_OPTIMIZERS) +def test_batched_bembedding_training(vocab_size, embedding_size, quantization_function, optimizer): + batch_size = np.random.randint(1, 100) + example_input = torch.randint(0, vocab_size, (batch_size,), dtype=int) + example_output = torch.rand((batch_size, 1)) + assert_equal_train(example_input, example_output, vocab_size, embedding_size, quantization_function, optimizer) + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +@pytest.mark.parametrize("optimizer", TEST_OPTIMIZERS) +def test_batched_bembedding_training_duplicate(vocab_size, embedding_size, quantization_function, optimizer): + example_input = torch.tensor([0, 1, 2, 3, 1]) + example_output = torch.rand((len(example_input), 1)) + assert_equal_train(example_input, example_output, vocab_size, embedding_size, quantization_function, optimizer) + + +def assert_equal_train(input, output, vocab_size, embedding_size, quantization_function, optimizer_class): + input.requires_grad_(False) + output.requires_grad_(False) + weights = (torch.rand((vocab_size, embedding_size)) * 100) - 5 + linear = torch.nn.Linear(embedding_size, 1).requires_grad_(False) + below_zero = False + if quantization_function()(weights).min().item() < 0: + below_zero = True + qembedding = BEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + sign_bool=below_zero, + ) + qembedding.set_weight(weights) + model = torch.nn.Sequential(qembedding, linear) + optimizer = optimizer_class(model.parameters(), lr=0.03) + qembedding.set_optimizer(optimizer) + model_output1 = model(input) + torch.nn.functional.l1_loss(model_output1, output).backward() + optimizer.step() + qembedding.step() + model_output1 = model(input) + torch.nn.functional.l1_loss(model_output1, output).backward() + optimizer.step() + qembedding.step() + + class NormalEmbedding(torch.nn.Module): + def __init__(self, weight, q_function) -> None: + super().__init__() + self.weight = weight + self.q_function = q_function + + def forward(self, x): + return embedding(x, self.q_function(self.weight), sparse=False) + + normal_embedding = NormalEmbedding(torch.clone(weights).requires_grad_(True), quantization_function()) + model = torch.nn.Sequential(normal_embedding, linear) + optimizer = optimizer_class(model.parameters(), lr=0.03) + model_output2 = model(input) + torch.nn.functional.l1_loss(model_output2, output).backward() + optimizer.step() + model_output2 = model(input) + torch.nn.functional.l1_loss(model_output2, output).backward() + optimizer.step() + qweight = qembedding.weight.clone().to(torch.float32) + if below_zero: + qweight[qweight == 0] = -1 + nweight = quantization_function()(normal_embedding.weight) + assert torch.equal(model_output1, model_output2) + assert torch.equal(qweight, nweight) + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +@pytest.mark.parametrize("optimizer", TEST_OPTIMIZERS) +def test_optimizer_is_cleared(vocab_size, embedding_size, quantization_function, optimizer): + model = BEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + ) + example_input = torch.tensor([0, 1, 2, 3, 1]) + optimizer = torch.optim.Adam(model.parameters(), lr=0.03) + before_size = len(optimizer.param_groups) + model.set_optimizer(optimizer) + model(example_input) + assert before_size + 1 == len(optimizer.param_groups) + model(example_input) + assert before_size + 1 == len(optimizer.param_groups) + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +@pytest.mark.parametrize("optimizer", TEST_OPTIMIZERS) +def test_batched_bembedding_bag_training_duplicate(vocab_size, embedding_size, quantization_function, optimizer): + example_input = torch.tensor([1, 2, 3, 1, 1, 2]) + example_offsets = torch.tensor([0, 2, 3, 4, 5]) + example_output = torch.rand((len(example_offsets), 1)) + assert_equal_train_embedding_bag( + example_input, example_offsets, example_output, vocab_size, embedding_size, quantization_function, optimizer + ) + + +def assert_equal_train_embedding_bag( + input_indices, input_offsets, output, vocab_size, embedding_size, quantization_function, optimizer_class +): + input_indices.requires_grad_(False) + input_offsets.requires_grad_(False) + output.requires_grad_(False) + weights = (torch.rand((vocab_size, embedding_size)) * 100) - 50 + linear = torch.nn.Linear(embedding_size, 1).requires_grad_(False) + below_zero = False + if quantization_function()(weights).min().item() < 0: + below_zero = True + qembedding = BEmbeddingBag( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + sign_bool=below_zero, + ) + qembedding.set_weight(weights) + model = torch.nn.Sequential(qembedding, linear) + qoptimizer = optimizer_class(model.parameters(), lr=0.03) + qembedding.set_optimizer(qoptimizer) + + class NormalEmbeddingBag(torch.nn.Module): + def __init__(self, weight, q_function) -> None: + super().__init__() + self.weight = weight + self.q_function = q_function + + def forward(self, indices, offsets): + return embedding_bag(input=indices, offsets=offsets, weight=self.q_function(self.weight), sparse=False) + + normal_embedding = NormalEmbeddingBag(torch.clone(weights).requires_grad_(True), quantization_function()) + model = torch.nn.Sequential(normal_embedding, linear) + optimizer = optimizer_class(model.parameters(), lr=0.03) + + # Check if weights match before + qweight = qembedding.weight.clone().to(torch.float32) + if below_zero: + qweight[qweight == 0] = -1 + nweight = quantization_function()(normal_embedding.weight) + assert torch.equal(qweight, nweight) + + # First pass + model_output1_1 = linear(qembedding(input_indices, input_offsets)) + torch.nn.functional.l1_loss(model_output1_1, output).backward() + optimizer.step() + qembedding.step() + model_output2_1 = linear(normal_embedding(input_indices, input_offsets)) + torch.nn.functional.l1_loss(model_output2_1, output).backward() + optimizer.step() + + qweight = qembedding.weight.clone().to(torch.float32) + if below_zero: + qweight[qweight == 0] = -1 + nweight = quantization_function()(normal_embedding.weight) + assert torch.equal(qweight, nweight) + + # Second pass + model_output1_2 = linear(qembedding(input_indices, input_offsets)) + torch.nn.functional.l1_loss(model_output1_2, output).backward() + optimizer.step() + qembedding.step() + model_output2_2 = linear(normal_embedding(input_indices, input_offsets)) + torch.nn.functional.l1_loss(model_output2_2, output).backward() + optimizer.step() + + qweight = qembedding.weight.clone().to(torch.float32) + if below_zero: + qweight[qweight == 0] = -1 + nweight = quantization_function()(normal_embedding.weight) + assert torch.equal(model_output1_2, model_output2_2) + assert torch.equal(qweight, nweight) + + +@pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) +@pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) +def test_bembedding_bag(vocab_size, embedding_size, quantization_function): + qembedding = BEmbeddingBag( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + ) + example_input = torch.tensor([0, 1, 2]) + example_offsets = torch.tensor([0]) + + output = qembedding(example_input, example_offsets) + + binarized_embedding_table = qembedding.weight.to(dtype=torch.float32) + + raw_embeddings = embedding_bag( + input=example_input, + offsets=example_offsets, + weight=binarized_embedding_table, + sparse=False, + ) + assert torch.equal(output, raw_embeddings) + + qembedding = BEmbeddingBag( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + ) + example_input = torch.tensor([0, 1, 2, 1, 2, 3]) + example_offsets = torch.tensor([0, 3]) + + output = qembedding(example_input, example_offsets) + + binarized_embedding_table = qembedding.weight.to(dtype=torch.float32) + + raw_embeddings = embedding_bag( + input=example_input, + offsets=example_offsets, + weight=binarized_embedding_table, + sparse=False, + ) + assert torch.equal(output, raw_embeddings) diff --git a/tests/layers/test_layer_arg_retrieval.py b/tests/layers/test_layer_arg_retrieval.py new file mode 100644 index 0000000..1a68e60 --- /dev/null +++ b/tests/layers/test_layer_arg_retrieval.py @@ -0,0 +1,66 @@ +import pytest + +from bitorch.layers import ( + QConv1d, + QConv1dBase, + QConv2d, + QConv2dBase, + QConv3d, + QConv3dBase, + QLinear, + QLinearBase, +) +from bitorch.quantizations import Sign + + +Q_CONV_ARGS = [ + ("in_channels", 16), + ("out_channels", 64), + ("kernel_size", 3), + ("stride", 1), + ("padding", 1), + ("dilation", 1), + ("groups", 1), + ("bias", False), + ("padding_mode", "zeros"), + ("device", None), + ("dtype", None), +] +Q_LINEAR_ARGS = [ + ("in_features", 64), + ("out_features", 32), + ("input_quantization", Sign()), + ("gradient_cancellation_threshold", 1.3), + ("weight_quantization", Sign()), + ("bias", False), + ("device", None), + ("dtype", None), +] + + +@pytest.mark.parametrize( + "all_args, layer, base_layer, num_positional_args", + [ + [Q_CONV_ARGS, QConv1d, QConv1dBase, 3], + [Q_CONV_ARGS, QConv2d, QConv2dBase, 3], + [Q_CONV_ARGS, QConv3d, QConv3dBase, 3], + [Q_LINEAR_ARGS, QLinear, QLinearBase, 2], + ], +) +def test_args_function(all_args, layer, base_layer, num_positional_args: int): + expected_result = {} + layer_args = [] + layer_kwargs = {} + + for j, (key, val) in enumerate(all_args): + expected_result[key] = val + if j < num_positional_args: + layer_args.append(val) + else: + layer_kwargs[key] = val + + layer = layer(*layer_args, **layer_kwargs) + result = base_layer.get_args_as_kwargs(layer.recipe) + assert result.keys() == expected_result.keys() + for k in expected_result.keys(): + assert expected_result[k] == result[k] diff --git a/tests/layers/test_layer_implementation.py b/tests/layers/test_layer_implementation.py new file mode 100644 index 0000000..d4fb3c3 --- /dev/null +++ b/tests/layers/test_layer_implementation.py @@ -0,0 +1,139 @@ +import pickle +from typing import Any + +import pytest + +import bitorch +import torch +from bitorch import RuntimeMode +from bitorch.layers.extensions.layer_implementation import CustomImplementationMixin, DefaultImplementationMixin +from bitorch.layers.extensions import LayerRecipe, LayerImplementation, LayerRegistry +from bitorch.layers.extensions.layer_container import LayerContainer + +TEST_MODE = RuntimeMode.INFERENCE_AUTO + + +class ExampleBase: + def __init__(self, s: str, val: int = 42) -> None: + self.s = s + self.val = val + + def do_something(self): + return f"{self.s}: {self.val} - made by {self.class_name()}" + + def class_name(self) -> str: + return "BaseClass" + + +example_registry = LayerRegistry("Example") + + +class ExampleImplementation(LayerImplementation): + def __init__(self, *args): + super().__init__(example_registry, *args) + + +class ExampleComposed(DefaultImplementationMixin, ExampleBase): + """Compose the default implementation""" + + pass + + +# create the decorated default implementation +Example = ExampleImplementation(RuntimeMode.DEFAULT)(ExampleComposed) + + +@ExampleImplementation(TEST_MODE) +class CustomLayerImplementation(CustomImplementationMixin, ExampleBase): + @classmethod + def can_clone(cls, recipe: LayerRecipe) -> bool: + # assume this test class can only clone layers with 'vals' lower than 100 + val = recipe.kwargs.get("val", recipe.args[2] if 2 < len(recipe.args) else None) + return val < 100, "val needs to be smaller than 100" + + @classmethod + def create_clone_from(cls, recipe: LayerRecipe, device: torch.device) -> Any: + return cls(recipe.layer.s, recipe.layer.val) + + def do_something(self): + return f"{self.s}: {self.val} - made by {self.class_name()}" + + def class_name(self) -> str: + return "CustomClass" + + +@pytest.fixture(scope="function", autouse=True) +def clean_environment(): + example_registry.clear() + bitorch.mode = RuntimeMode.DEFAULT + yield None + example_registry.clear() + bitorch.mode = RuntimeMode.DEFAULT + + +def test_recipe(): + s1 = Example("Hello World", val=21) + s2 = Example("Hello World", 21) + + s1_recipe = example_registry.get_recipe_for(s1) + assert s1_recipe.args[0] == "Hello World" + assert s1_recipe.kwargs["val"] == 21 + + s2_recipe = example_registry.get_recipe_for(s2) + assert s2_recipe.args[0] == "Hello World" + assert s2_recipe.args[1] == 21 + + +def test_default_impl(): + print("bitorch test mode:", bitorch.mode) + layer = Example("Hello World", val=21) + assert layer.val == 21 + assert layer.class_name() == "BaseClass" + assert isinstance(layer, Example.class_) + assert isinstance(layer, LayerContainer) + print(layer) + # TODO: pickling is currently only possible in RAW mode + # content = pickle.dumps(layer) + + # layer_loaded = pickle.loads(content) + # assert layer_loaded.val == 21 + + +def test_train_impl(): + bitorch.mode = TEST_MODE + layer = Example("Hello World", val=21) + assert layer.val == 21 + assert layer.class_name() == "CustomClass" + assert isinstance(layer, CustomLayerImplementation) + assert isinstance(layer, LayerContainer) + + +def test_raw_impl(): + bitorch.mode = RuntimeMode.RAW + layer = Example("Hello World", val=21) + assert layer.val == 21 + assert layer.class_name() == "BaseClass" + assert isinstance(layer, Example.class_) + assert not isinstance(layer, LayerContainer) + + content = pickle.dumps(layer) + + layer_loaded = pickle.loads(content) + assert layer_loaded.val == 21 + + +@pytest.mark.parametrize("val, is_supported", [(150, False), (50, True)]) +def test_clone(val, is_supported): + layer = Example("Hello World", val=val) + recipe = example_registry.get_recipe_for(layer) + if is_supported: + replacement = example_registry.get_replacement(TEST_MODE, recipe) + assert isinstance(replacement, CustomLayerImplementation) # type: ignore + else: + with pytest.raises(RuntimeError) as e_info: + _ = example_registry.get_replacement(TEST_MODE, recipe) + error_message = str(e_info.value) + assert e_info.typename == "RuntimeError" + expected_key_strings = ["Example", "implementation", str(TEST_MODE), "val", "100"] + for key in expected_key_strings: + assert key in error_message diff --git a/tests/layers/test_pact.py b/tests/layers/test_pact.py index a719f89..f11bb51 100644 --- a/tests/layers/test_pact.py +++ b/tests/layers/test_pact.py @@ -21,5 +21,5 @@ def test_qactivation(alpha): assert torch.equal(quantized, y) y.backward(x) - expected_gradient = torch.where((x >= 0) & (x <= alpha), x, torch.tensor(0.)) + expected_gradient = torch.where((x >= 0) & (x <= alpha), x, torch.tensor(0.0)) assert torch.equal(expected_gradient, x.grad) diff --git a/tests/layers/test_qactivation.py b/tests/layers/test_qactivation.py index 3050a12..b2c4085 100644 --- a/tests/layers/test_qactivation.py +++ b/tests/layers/test_qactivation.py @@ -12,22 +12,24 @@ @pytest.mark.parametrize("threshold", TEST_THRESHOLDS) -def test_qactivation(threshold): +def test_q_activation(threshold): input_quantization = config.get_quantization_function(config.input_quantization) - assert isinstance(activation._activation, type(input_quantization)) - assert isinstance(QActivation("sign")._activation, Sign) - assert isinstance(QActivation(Sign())._activation, Sign) + + assert isinstance(activation.activation_function, type(input_quantization)) + assert isinstance(QActivation("sign").activation_function, Sign) + assert isinstance(QActivation(Sign()).activation_function, Sign) + with pytest.raises(ValueError): QActivation("iNvAlIdNaMe") x = torch.Tensor(TEST_DATA).float().requires_grad_(True) - activation._gradient_cancellation_threshold = threshold + activation.gradient_cancellation_threshold = threshold y = activation(x) y.backward(x) if threshold > 0: - expected_gradient = torch.where(torch.abs(x) <= threshold, x, torch.tensor(0.)) + expected_gradient = torch.where(torch.abs(x) <= threshold, x, torch.tensor(0.0)) else: expected_gradient = x.clone() assert torch.equal(expected_gradient, x.grad) diff --git a/tests/layers/test_qconv.py b/tests/layers/test_qconv.py index 5596c2c..4e5617c 100644 --- a/tests/layers/test_qconv.py +++ b/tests/layers/test_qconv.py @@ -6,26 +6,72 @@ import numpy as np TEST_INPUT_DATA = [ - (QConv1d, conv1d, (1, 2, 5), [2, 2], - {"kernel_size": 3, "weight_quantization": "sign", "input_quantization": "sign", "padding": 1}), - (QConv2d, conv2d, (1, 2, 5, 5), [2, 2], - {"kernel_size": 3, "weight_quantization": "sign", "input_quantization": "sign", "padding": 1}), - (QConv3d, conv3d, (1, 2, 4, 4, 4), [2, 2], - {"kernel_size": 3, "weight_quantization": "sign", "input_quantization": "sign", "padding": 1}), - (QConv1d, conv1d, (1, 2, 5), [2, 2], - {"kernel_size": 3, "weight_quantization": Sign(), "input_quantization": "sign", - "gradient_cancellation_threshold": 0.5, "padding": 1}), - (QConv2d, conv2d, (1, 2, 5, 5), [2, 2], - {"kernel_size": 3, "weight_quantization": Sign(), "input_quantization": "sign", - "gradient_cancellation_threshold": 1.0, "padding": 1}), - (QConv3d, conv3d, (1, 2, 4, 4, 4), [2, 2], - {"kernel_size": 3, "weight_quantization": Sign(), "input_quantization": "sign", - "gradient_cancellation_threshold": 2.0, "padding": 1}), -] * 10 + ( + QConv1d, + conv1d, + (1, 2, 5), + [2, 2], + {"kernel_size": 3, "weight_quantization": "sign", "input_quantization": "sign", "padding": 1}, + ), + ( + QConv2d, + conv2d, + (1, 2, 5, 5), + [2, 2], + {"kernel_size": 3, "weight_quantization": "sign", "input_quantization": "sign", "padding": 1}, + ), + ( + QConv3d, + conv3d, + (1, 2, 4, 4, 4), + [2, 2], + {"kernel_size": 3, "weight_quantization": "sign", "input_quantization": "sign", "padding": 1}, + ), + ( + QConv1d, + conv1d, + (1, 2, 5), + [2, 2], + { + "kernel_size": 3, + "weight_quantization": Sign(), + "input_quantization": "sign", + "gradient_cancellation_threshold": 0.5, + "padding": 1, + }, + ), + ( + QConv2d, + conv2d, + (1, 2, 5, 5), + [2, 2], + { + "kernel_size": 3, + "weight_quantization": Sign(), + "input_quantization": "sign", + "gradient_cancellation_threshold": 1.0, + "padding": 1, + }, + ), + ( + QConv3d, + conv3d, + (1, 2, 4, 4, 4), + [2, 2], + { + "kernel_size": 3, + "weight_quantization": Sign(), + "input_quantization": "sign", + "gradient_cancellation_threshold": 2.0, + "padding": 1, + }, + ), +] +@pytest.mark.parametrize("execution_number", range(10)) @pytest.mark.parametrize("conv_layer, conv_fn, input_shape, args, kwargs", TEST_INPUT_DATA) -def test_qconv(conv_layer, conv_fn, input_shape, args, kwargs): +def test_qconv(conv_layer, conv_fn, input_shape, args, kwargs, execution_number): input_values = np.random.uniform(-1, 1, input_shape) layer = conv_layer(*args, **kwargs) input_tensor = torch.tensor(input_values).float().requires_grad_(True) @@ -50,7 +96,8 @@ def test_qconv(conv_layer, conv_fn, input_shape, args, kwargs): stride=layer.stride, padding=0, dilation=layer.dilation, - groups=layer.groups) + groups=layer.groups, + ) direct_result.backward(input_tensor) grad2 = input_tensor.grad.clone() diff --git a/tests/layers/test_qconv_noact.py b/tests/layers/test_qconv_noact.py index 0d4769c..a7e146f 100644 --- a/tests/layers/test_qconv_noact.py +++ b/tests/layers/test_qconv_noact.py @@ -11,8 +11,7 @@ (QConv3d_NoAct, conv3d, (1, 2, 4, 4, 4), [2, 2], {"kernel_size": 3, "weight_quantization": "sign", "padding": 1}), (QConv1d_NoAct, conv1d, (1, 2, 5), [2, 2], {"kernel_size": 3, "weight_quantization": Sign(), "padding": 1}), (QConv2d_NoAct, conv2d, (1, 2, 5, 5), [2, 2], {"kernel_size": 3, "weight_quantization": Sign(), "padding": 1}), - (QConv3d_NoAct, conv3d, (1, 2, 4, 4, 4), [2, 2], { - "kernel_size": 3, "weight_quantization": Sign(), "padding": 1}), + (QConv3d_NoAct, conv3d, (1, 2, 4, 4, 4), [2, 2], {"kernel_size": 3, "weight_quantization": Sign(), "padding": 1}), ] * 10 @@ -42,7 +41,8 @@ def test_qconv(conv_layer, conv_fn, input_shape, args, kwargs): stride=layer.stride, padding=0, dilation=layer.dilation, - groups=layer.groups) + groups=layer.groups, + ) direct_result.backward(input_tensor) grad2 = input_tensor.grad.clone() diff --git a/tests/layers/test_qembeddings.py b/tests/layers/test_qembeddings.py index b3828bf..a08fa8a 100644 --- a/tests/layers/test_qembeddings.py +++ b/tests/layers/test_qembeddings.py @@ -17,9 +17,13 @@ @pytest.mark.parametrize("vocab_size, embedding_size", TEST_INPUT_DATA) @pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) def test_qembedding(vocab_size, embedding_size, quantization_function): - qembedding = QEmbedding(num_embeddings=vocab_size, embedding_dim=embedding_size, - weight_quantization=quantization_function(), - output_quantization=quantization_function(), sparse=False) + qembedding = QEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + output_quantization=quantization_function(), + sparse=False, + ) example_input = torch.zeros(vocab_size, dtype=int) example_input[np.random.randint(vocab_size)] = 1 @@ -30,16 +34,24 @@ def test_qembedding(vocab_size, embedding_size, quantization_function): binarized_embedding_table = quantization(qembedding.weight) raw_embeddings = embedding( - example_input, binarized_embedding_table, qembedding.padding_idx, - qembedding.max_norm, qembedding.norm_type, - qembedding.scale_grad_by_freq, False, + example_input, + binarized_embedding_table, + qembedding.padding_idx, + qembedding.max_norm, + qembedding.norm_type, + qembedding.scale_grad_by_freq, + False, ) assert torch.equal(output, quantization(raw_embeddings)) # now sparse tests - qembedding = QEmbedding(num_embeddings=vocab_size, embedding_dim=embedding_size, - weight_quantization=quantization_function(), - output_quantization=quantization_function(), sparse=True) + qembedding = QEmbedding( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + output_quantization=quantization_function(), + sparse=True, + ) example_input = torch.tensor(np.random.randint(vocab_size), dtype=int) @@ -48,9 +60,13 @@ def test_qembedding(vocab_size, embedding_size, quantization_function): binarized_embedding_table = quantization(qembedding.weight) raw_embeddings = embedding( - example_input, binarized_embedding_table, qembedding.padding_idx, - qembedding.max_norm, qembedding.norm_type, - qembedding.scale_grad_by_freq, True, + example_input, + binarized_embedding_table, + qembedding.padding_idx, + qembedding.max_norm, + qembedding.norm_type, + qembedding.scale_grad_by_freq, + True, ) assert torch.equal(output, quantization(raw_embeddings)) @@ -59,14 +75,19 @@ def test_qembedding(vocab_size, embedding_size, quantization_function): @pytest.mark.parametrize("quantization_function", TEST_QUANTIZATION_FUNCTIONS) def test_qembeddingbag(vocab_size, embedding_size, quantization_function): - qembeddingbag = QEmbeddingBag(num_embeddings=vocab_size, embedding_dim=embedding_size, - weight_quantization=quantization_function(), - output_quantization=quantization_function(), sparse=False, mode="mean") + qembeddingbag = QEmbeddingBag( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + output_quantization=quantization_function(), + sparse=False, + mode="mean", + ) example_input = torch.zeros(vocab_size, dtype=int) for _ in range(np.random.randint(vocab_size)): example_input[np.random.randint(vocab_size)] = 1 - example_offset = torch.tensor((0, ), dtype=int) + example_offset = torch.tensor((0,), dtype=int) quantization = quantization_function() output = qembeddingbag(example_input, example_offset) @@ -74,28 +95,45 @@ def test_qembeddingbag(vocab_size, embedding_size, quantization_function): binarized_embedding_table = quantization(qembeddingbag.weight) # necessary for torch 1.8 compliance - if hasattr(qembeddingbag, 'padding_idx'): + if hasattr(qembeddingbag, "padding_idx"): raw_embeddings = embedding_bag( - example_input, binarized_embedding_table, example_offset, - qembeddingbag.max_norm, qembeddingbag.norm_type, - qembeddingbag.scale_grad_by_freq, qembeddingbag.mode, False, - None, qembeddingbag.include_last_offset, - qembeddingbag.padding_idx + example_input, + binarized_embedding_table, + example_offset, + qembeddingbag.max_norm, + qembeddingbag.norm_type, + qembeddingbag.scale_grad_by_freq, + qembeddingbag.mode, + False, + None, + qembeddingbag.include_last_offset, + qembeddingbag.padding_idx, ) else: raw_embeddings = embedding_bag( - example_input, binarized_embedding_table, example_offset, - qembeddingbag.max_norm, qembeddingbag.norm_type, - qembeddingbag.scale_grad_by_freq, qembeddingbag.mode, False, - None, qembeddingbag.include_last_offset, + example_input, + binarized_embedding_table, + example_offset, + qembeddingbag.max_norm, + qembeddingbag.norm_type, + qembeddingbag.scale_grad_by_freq, + qembeddingbag.mode, + False, + None, + qembeddingbag.include_last_offset, ) assert torch.equal(output, quantization(raw_embeddings)) # now sparse tests - qembeddingbag = QEmbeddingBag(num_embeddings=vocab_size, embedding_dim=embedding_size, - weight_quantization=quantization_function(), - output_quantization=quantization_function(), sparse=True, mode="mean") + qembeddingbag = QEmbeddingBag( + num_embeddings=vocab_size, + embedding_dim=embedding_size, + weight_quantization=quantization_function(), + output_quantization=quantization_function(), + sparse=True, + mode="mean", + ) example_input = torch.tensor(np.random.randint(vocab_size, size=np.random.randint(vocab_size)), dtype=int) @@ -104,20 +142,32 @@ def test_qembeddingbag(vocab_size, embedding_size, quantization_function): binarized_embedding_table = quantization(qembeddingbag.weight) # necessary for torch 1.8 compliance - if hasattr(qembeddingbag, 'padding_idx'): + if hasattr(qembeddingbag, "padding_idx"): raw_embeddings = embedding_bag( - example_input, binarized_embedding_table, example_offset, - qembeddingbag.max_norm, qembeddingbag.norm_type, - qembeddingbag.scale_grad_by_freq, qembeddingbag.mode, True, - None, qembeddingbag.include_last_offset, - qembeddingbag.padding_idx + example_input, + binarized_embedding_table, + example_offset, + qembeddingbag.max_norm, + qembeddingbag.norm_type, + qembeddingbag.scale_grad_by_freq, + qembeddingbag.mode, + True, + None, + qembeddingbag.include_last_offset, + qembeddingbag.padding_idx, ) else: raw_embeddings = embedding_bag( - example_input, binarized_embedding_table, example_offset, - qembeddingbag.max_norm, qembeddingbag.norm_type, - qembeddingbag.scale_grad_by_freq, qembeddingbag.mode, True, - None, qembeddingbag.include_last_offset, + example_input, + binarized_embedding_table, + example_offset, + qembeddingbag.max_norm, + qembeddingbag.norm_type, + qembeddingbag.scale_grad_by_freq, + qembeddingbag.mode, + True, + None, + qembeddingbag.include_last_offset, ) output = torch.nan_to_num(output, nan=0.0) output_raw = torch.nan_to_num(quantization(raw_embeddings), nan=0.0) diff --git a/tests/layers/test_qlinear.py b/tests/layers/test_qlinear.py index 86c5d34..04e8ae3 100644 --- a/tests/layers/test_qlinear.py +++ b/tests/layers/test_qlinear.py @@ -1,25 +1,20 @@ import pytest -from bitorch.layers.qlinear import QLinear -from bitorch.layers.qactivation import QActivation -from bitorch.quantizations import Sign, quantization_from_name import torch from torch.nn import Parameter +from bitorch.layers.qactivation import QActivation +from bitorch.layers.qlinear import QLinear +from bitorch.quantizations import Sign, quantization_from_name -TEST_INPUT_DATA = [ - [0., 0.], - [1., 0.], - [-1., 1.], - [0.3, -0.3], - [1e12, -1e12] -] +TEST_INPUT_DATA = [[0.0, 0.0], [1.0, 0.0], [-1.0, 1.0], [0.3, -0.3], [1e12, -1e12]] @pytest.mark.parametrize("input_values", TEST_INPUT_DATA) @pytest.mark.parametrize("quantization", ["sign", Sign()]) def test_qlinear(input_values, quantization): layer = QLinear(2, 2, bias=False, weight_quantization=quantization, input_quantization=quantization) - assert isinstance(layer.weight_quantize, quantization_from_name("sign")) + assert isinstance(layer.weight_quantization, quantization_from_name("sign")) + assert isinstance(layer.input_quantization, quantization_from_name("sign")) test_weights = [[0.3, -1.4], [-0.3, 2.6]] diff --git a/tests/layers/test_switchable_layer.py b/tests/layers/test_switchable_layer.py new file mode 100644 index 0000000..14e275c --- /dev/null +++ b/tests/layers/test_switchable_layer.py @@ -0,0 +1,63 @@ +import pickle + +import pytest +import torch +from torch import nn + +from bitorch.layers.extensions.layer_container import LayerContainer + + +class Foo: + pass + + +class Layer(nn.Module): + def __init__(self, x=10): + super().__init__() + self.x = x + self.foo = Foo() + + def get_foo(self): + return self.foo + + @property + def self_property(self): + return self + + def self_function(self): + return self + + +class _LayerContainer(LayerContainer): + patch = LayerContainer.patch + [ + "self_function", + ] + + +@pytest.mark.parametrize("test_wrapped_layer", [False, True]) +def test_switchable_layer(test_wrapped_layer): + if test_wrapped_layer: + layer = _LayerContainer(Layer, 42) + else: + layer = Layer(42) + assert layer.x == 42 + layer.x = 3 + assert layer.x == 3 + assert layer.self_function() == layer + assert layer.self_property == layer + + assert isinstance(layer, nn.Module) + assert isinstance(layer, Layer) + assert isinstance(layer.foo, Foo) + assert isinstance(layer.get_foo(), Foo) + assert test_wrapped_layer == isinstance(layer, LayerContainer) + + moved_layer = layer.to(torch.device("cpu")) + + assert isinstance(layer, nn.Module) + assert isinstance(layer, Layer) + assert isinstance(layer.foo, Foo) + assert isinstance(layer.get_foo(), Foo) + assert test_wrapped_layer == isinstance(layer, LayerContainer) + + assert layer == moved_layer diff --git a/tests/models/__init__.py b/tests/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/models/test_model_conversion.py b/tests/models/test_model_conversion.py new file mode 100644 index 0000000..2e82b18 --- /dev/null +++ b/tests/models/test_model_conversion.py @@ -0,0 +1,169 @@ +from typing import Any, Tuple + +import pytest +import torch +from torch import nn +from torch.nn import functional as F + +import bitorch +import bitorch.runtime_mode +from bitorch import RuntimeMode +from bitorch.layers import QConv2d, QLinear +from bitorch.layers.extensions.layer_implementation import CustomImplementationMixin +from bitorch.layers.extensions import LayerRecipe +from bitorch.layers.qconv2d import QConv2dBase +from bitorch.layers.qlinear import QLinearBase +from bitorch.layers.register import ( + q_linear_registry, + QLinearImplementation, + q_conv2d_registry, + QConv2dImplementation, +) +from bitorch.models import Model + +TEST_MODE = RuntimeMode.INFERENCE_AUTO +MNIST = [(1, 1, 28, 28), 10, "MNIST"] + + +class _TestModel(Model): + def __init__(self): + super().__init__(input_shape=MNIST[0], num_classes=MNIST[1]) + self.q_conv2d = QConv2d(1, 32, 3, 1, 1) + self.q_linear = QLinear(784, 64) + self._model = nn.Sequential( + self.q_conv2d, + nn.Conv2d(32, 1, 3, 1, 1), + nn.Flatten(), + self.q_linear, + nn.Linear(64, 10), + ) + + def forward(self, x): + x = self._model(x) + output = F.log_softmax(x, dim=1) + return output + + +def reset(): + bitorch.mode = RuntimeMode.DEFAULT + for registry in (q_linear_registry, q_conv2d_registry): + registry.unregister_custom_implementations() + + +@pytest.fixture +def get_decorated_impls(): + reset() + + @QLinearImplementation(TEST_MODE) + class QLinearTestImpl(CustomImplementationMixin, nn.Module): + def __init__(self, *args, **kwargs): + super().__init__() + with bitorch.pause_wrapping(): + self._layer = QLinear(*args, **kwargs) + self.is_test_implementation = True + + def forward(self, x): + return self._layer(x) + + @classmethod + def can_clone(cls, recipe: LayerRecipe) -> Tuple[bool, str]: + return True, "" + + @classmethod + def create_clone_from(cls, recipe: LayerRecipe, device: torch.device) -> Any: + new_layer = cls(*recipe.args, **recipe.kwargs) + new_layer._layer.weight = recipe.layer.weight + new_layer._layer.bias = recipe.layer.bias + return new_layer + + @QConv2dImplementation(TEST_MODE) + class QConv2dTestImpl(CustomImplementationMixin, nn.Module): + def __init__(self, *args, **kwargs): + super().__init__() + with bitorch.pause_wrapping(): + self._layer = QConv2d(*args, **kwargs) + self.is_test_implementation = True + + def forward(self, x): + return self._layer(x) + + @classmethod + def can_clone(cls, recipe: LayerRecipe) -> Tuple[bool, str]: + return True, "" + + @classmethod + def create_clone_from(cls, recipe: LayerRecipe, device: torch.device) -> Any: + new_layer = cls(*recipe.args, **recipe.kwargs) + new_layer._layer.weight = recipe.layer.weight + new_layer._layer.bias = recipe.layer.bias + return new_layer + + yield QLinearTestImpl, QConv2dTestImpl + reset() + + +@pytest.fixture +def get_subclassed_impls(): + reset() + + @QLinearImplementation(TEST_MODE) + class QLinearTestImpl(CustomImplementationMixin, QLinearBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.is_test_implementation = True + + @classmethod + def can_clone(cls, recipe: LayerRecipe) -> bool: + return True, "" + + @classmethod + def create_clone_from(cls, recipe: LayerRecipe, device: torch.device) -> Any: + new_layer = cls(*recipe.args, **recipe.kwargs) + new_layer.weight = recipe.layer.weight + new_layer.bias = recipe.layer.bias + return new_layer + + @QConv2dImplementation(TEST_MODE) + class QConv2dTestImpl(CustomImplementationMixin, QConv2dBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.is_test_implementation = True + + @classmethod + def can_clone(cls, recipe: LayerRecipe) -> Tuple[bool, str]: + return True, "" + + @classmethod + def create_clone_from(cls, recipe: LayerRecipe, device: torch.device) -> Any: + new_layer = cls(*recipe.args, **recipe.kwargs) + new_layer.weight = recipe.layer.weight + new_layer.bias = recipe.layer.bias + return new_layer + + yield QLinearTestImpl, QConv2dTestImpl + reset() + + +def _test(): + x = torch.rand(1, 1, 28, 28) + net = _TestModel() + + assert not hasattr(net.q_linear, "is_test_implementation") + assert not hasattr(net.q_conv2d, "is_test_implementation") + y1 = net(x) + + net.convert(TEST_MODE) + + assert hasattr(net.q_linear, "is_test_implementation") and net.q_linear.is_test_implementation + assert hasattr(net.q_conv2d, "is_test_implementation") and net.q_conv2d.is_test_implementation + y2 = net(x) + + assert torch.equal(y1, y2) + + +def test_convert_model_decorated(get_decorated_impls): + _test() + + +def test_convert_model_subclassed(get_subclassed_impls): + _test() diff --git a/tests/models/test_model_hub.py b/tests/models/test_model_hub.py new file mode 100644 index 0000000..fe5feba --- /dev/null +++ b/tests/models/test_model_hub.py @@ -0,0 +1,17 @@ +from bitorch.models import ResnetE18 +import torch +import pytest +import time + +TEST_DATA = [ + (ResnetE18, {"input_shape": (1, 3, 32, 32), "num_classes": 10}), +] + + +@pytest.mark.parametrize("model, kwargs", TEST_DATA) +def test_model_hub(model, kwargs): + m = model.from_pretrained(**kwargs) + input_values = torch.randn(kwargs["input_shape"]) + + result = m(input_values) + assert result.shape == torch.Size([kwargs["input_shape"][0], kwargs["num_classes"]]) diff --git a/tests/models/test_model_names.py b/tests/models/test_model_names.py new file mode 100644 index 0000000..e8d478b --- /dev/null +++ b/tests/models/test_model_names.py @@ -0,0 +1,13 @@ +import bitorch +from bitorch.models import model_from_name, model_names + + +def test_all_model_names(): + wrong_model_names = [] + for model_name in model_names(): + model = model_from_name(model_name) + assert model_from_name(model.name) == model + assert model_from_name(model.name.lower()) == model + if model.name != model.__name__: + wrong_model_names.append((model.name, model.__name__)) + assert len(wrong_model_names) == 0 diff --git a/tests/models/test_models.py b/tests/models/test_models.py index bd2eed9..571d673 100644 --- a/tests/models/test_models.py +++ b/tests/models/test_models.py @@ -1,5 +1,3 @@ -from bitorch.datasets import MNIST, CIFAR10, CIFAR100, ImageNet - from bitorch.models import ( models_by_name, LeNet, @@ -12,37 +10,81 @@ Resnet50V2, ResnetE, ResnetE18, - ResnetE34 + ResnetE34, + DLRM, + DenseNet28, + DenseNet37, + DenseNet45, + DenseNetFlex, + MeliusNet22, + MeliusNet42, + MeliusNetFlex, + MeliusNet23, + MeliusNet59, + MeliusNetA, + MeliusNetB, + MeliusNetC, + QuickNet, + QuickNetSmall, + QuickNetLarge, ) import torch import numpy as np import pytest import itertools -ALL_DATASETS = [MNIST, CIFAR10, CIFAR100, ImageNet] -RGB_DATASETS = [CIFAR10, CIFAR100, ImageNet] + +MNIST = [(1, 1, 28, 28), 10, "MNIST"] +CIFAR10 = [(1, 3, 32, 32), 10, "CIFAR10"] +CIFAR100 = [(1, 3, 32, 32), 100, "CIFAR100"] +IMAGENET = [(1, 3, 224, 224), 1000, "IMAGENET"] + +CRITEO = [([1, 13], ([26, 1], [26, 1])), 1, "CRITEO"] + +ALL_IMAGE_DATASETS = [MNIST, CIFAR10, CIFAR100, IMAGENET] +RGB_DATASETS = [CIFAR10, CIFAR100, IMAGENET] TEST_INPUT_DATA = [ - [Resnet, {"resnet_version": [1, 2], "resnet_num_layers": [18, 34, 50]}, ALL_DATASETS], - [Resnet18V1, {}, ALL_DATASETS], - [Resnet34V1, {}, ALL_DATASETS], - [Resnet50V1, {}, ALL_DATASETS], - [Resnet18V2, {}, ALL_DATASETS], - [Resnet34V2, {}, ALL_DATASETS], - [Resnet50V2, {}, ALL_DATASETS], + [ + Resnet, + {"resnet_version": [1, 2], "resnet_num_layers": [18, 34, 50]}, + ALL_IMAGE_DATASETS, + ], + [Resnet18V1, {}, ALL_IMAGE_DATASETS], + [Resnet34V1, {}, ALL_IMAGE_DATASETS], + [Resnet50V1, {}, ALL_IMAGE_DATASETS], + [Resnet18V2, {}, ALL_IMAGE_DATASETS], + [Resnet34V2, {}, ALL_IMAGE_DATASETS], + [Resnet50V2, {}, ALL_IMAGE_DATASETS], + [DenseNet28, {}, ALL_IMAGE_DATASETS], + [DenseNet37, {}, ALL_IMAGE_DATASETS], + [DenseNet45, {}, ALL_IMAGE_DATASETS], + [DenseNetFlex, {"flex_block_config": [[6, 6, 6, 5]]}, ALL_IMAGE_DATASETS], + [MeliusNet22, {}, ALL_IMAGE_DATASETS], + [MeliusNet23, {}, ALL_IMAGE_DATASETS], + [MeliusNet42, {}, ALL_IMAGE_DATASETS], + [MeliusNet59, {}, ALL_IMAGE_DATASETS], + [MeliusNetA, {}, ALL_IMAGE_DATASETS], + [MeliusNetB, {}, ALL_IMAGE_DATASETS], + [MeliusNetC, {}, ALL_IMAGE_DATASETS], + [MeliusNetFlex, {"flex_block_config": [[6, 6, 6, 5]]}, ALL_IMAGE_DATASETS], [ResnetE, {"resnete_num_layers": [18, 34]}, RGB_DATASETS], [ResnetE18, {}, RGB_DATASETS], [ResnetE34, {}, RGB_DATASETS], - [LeNet, {"lenet_version": [0,1,2,3,4]}, [MNIST]], + [LeNet, {"lenet_version": [0, 1, 2, 3, 4]}, [MNIST]], + [DLRM, {}, [CRITEO]], + [QuickNet, {}, [IMAGENET]], + [QuickNetSmall, {}, [IMAGENET]], + [QuickNetLarge, {}, [IMAGENET]], ] @pytest.mark.parametrize("model_class, model_kwargs, datasets_to_test", TEST_INPUT_DATA) -@pytest.mark.parametrize("dataset", ALL_DATASETS) +@pytest.mark.parametrize("dataset", [MNIST, CIFAR10, CIFAR100, IMAGENET, CRITEO]) def test_models(model_class, model_kwargs, datasets_to_test, dataset) -> None: - assert models_by_name[model_class.name] is model_class + assert models_by_name[model_class.name.lower()] is model_class if dataset not in datasets_to_test: - pytest.skip(f"Model '{model_class.name}' does not need to work with the dataset '{dataset.name}'.") + pytest.skip(f"Model '{model_class.name}' does not need to work with the dataset '{dataset[2]}'.") all_model_kwargs_combinations = [ dict(zip(model_kwargs.keys(), combination)) for combination in itertools.product(*model_kwargs.values()) @@ -50,14 +92,30 @@ def test_models(model_class, model_kwargs, datasets_to_test, dataset) -> None: for combination in all_model_kwargs_combinations: batch_sizes_to_test = [2, 10] - if dataset is ImageNet: + if dataset is IMAGENET: batch_sizes_to_test = [1, 2] for batch_size in batch_sizes_to_test: - input_shape = list(dataset.shape) - input_shape[0] = batch_size - - model = model_class(dataset=dataset, **combination) - input_values = torch.Tensor(np.random.uniform(0, 1.0, input_shape)) - output = model(input_values) - assert torch.equal(torch.as_tensor(output.shape), torch.Tensor( - [input_shape[0], dataset.num_classes]).long()) + if model_class.name == "DLRM": + model = model_class( + dense_feature_size=dataset[0][0][1], + embedding_layer_sizes=[100] * dataset[0][1][0][0], + **combination, + ) + dataset[0][0][0] = batch_size + dataset[0][1][0][1] = batch_size + dataset[0][1][1][1] = batch_size + input_values = ( + torch.Tensor(np.random.uniform(0, 1.0, dataset[0][0])), + (torch.zeros(dataset[0][1][0], dtype=int), torch.zeros(dataset[0][1][1], dtype=int)), + ) + output = model(*input_values) + else: + input_shape = list(dataset[0]) + input_shape[0] = batch_size + model = model_class(input_shape=dataset[0], num_classes=dataset[1], **combination) + input_values = torch.Tensor(np.random.uniform(0, 1.0, input_shape)) + output = model(input_values) + assert torch.equal( + torch.as_tensor(output.shape), + torch.Tensor([batch_size, dataset[1]]).long(), + ) diff --git a/tests/quantizations/test_quantization_scheduler.py b/tests/quantizations/test_quantization_scheduler.py new file mode 100644 index 0000000..827c725 --- /dev/null +++ b/tests/quantizations/test_quantization_scheduler.py @@ -0,0 +1,39 @@ +import torch +from bitorch.models import LeNet +from bitorch.quantizations import Quantization_Scheduler, Sign, WeightDoReFa +from bitorch.quantizations.quantization_scheduler import MixLinearScheduling + +INPUT_SHAPE = (10, 1, 28, 28) + + +def test_scheduler(): + torch.manual_seed(123) + model = LeNet(INPUT_SHAPE, 10, 0) + torch.manual_seed(123) + model_unscheduled = LeNet(INPUT_SHAPE, 10, 0) + torch.manual_seed(123) + model_dorefa = LeNet(INPUT_SHAPE, 10, 1) + + par1 = list(model.parameters())[0] + par2 = list(model_unscheduled.parameters())[0] + par3 = list(model_dorefa.parameters())[0] + assert torch.equal(par1, par2) + assert torch.equal(par2, par3) + + scheduler = Quantization_Scheduler(model, 2, [Sign(), WeightDoReFa(), Sign()], scheduling_procedure="mix_linear") + assert scheduler.scheduled_quantizer is MixLinearScheduling + + input_data = torch.rand(INPUT_SHAPE) + sign_output = model_unscheduled(input_data) + dorefa_output = model_dorefa(input_data) + + scheduled_output = model(input_data) + assert torch.equal(scheduled_output, sign_output) + scheduler.step() + + scheduled_output = model(input_data) + assert torch.equal(scheduled_output, dorefa_output) + scheduler.step() + + scheduled_output = model(input_data) + assert torch.equal(scheduled_output, sign_output) diff --git a/tests/quantizations/test_quantizations.py b/tests/quantizations/test_quantizations.py index 83b21e3..7333b33 100644 --- a/tests/quantizations/test_quantizations.py +++ b/tests/quantizations/test_quantizations.py @@ -8,35 +8,104 @@ ApproxSign, SteHeaviside, SwishSign, - quantization_from_name + ProgressiveSign, + quantization_from_name, ) TEST_INPUT_DATA = [ - (Sign(), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], [-1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), - (ApproxSign(), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], [-1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0], - [0.0, 0.0, 1.4, 2.0, 1.4, 0.0, 0.0]), - (SteHeaviside(), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], - [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), - (SwishSign(5.0), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], [-1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0], - [-0.03, -0.195, 1.562, 5.0, 1.562, -0.195, -0.03]), - (InputDoReFa(bits=2), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], [0.0, 0.0, 0.0, 0.0, 1.0 / 3.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), - (WeightDoReFa(bits=2), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], - [-1.0, -1.0, -1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), - (InputDoReFa(bits=1), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), - (WeightDoReFa(bits=1), [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], - [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), + ( + Sign(), + 1, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [-1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + ProgressiveSign(), + 1, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [-1.0, -1.0, -0.3, 0.0, 0.3, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + ProgressiveSign(use_global_scaling=False, initial_scale=0.05), + 1, + [-1.5, -1.0, -0.3, -0.1, 0.0, 0.1, 0.3, 1.0, 1.5], + [-1.0, -1.0, -0.6, -0.2, 0.0, 0.2, 0.6, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + ProgressiveSign(use_global_scaling=False, initial_scale=0.5), + 1, + [-1.5, -1.0, -0.3, -0.1, -0.05, 0.0, 0.05, 0.1, 0.3, 1.0, 1.5], + [-1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + ProgressiveSign(use_global_scaling=False, initial_scale=0.2, custom_transform=lambda x: x), + 1, + [-1.5, -1.0, -0.3, -0.1, 0.0, 0.1, 0.3, 1.0, 1.5], + [-1.0, -1.0, -0.375, -0.125, 0.0, 0.125, 0.375, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + ApproxSign(), + 1, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [-1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 1.4, 2.0, 1.4, 0.0, 0.0], + ), + ( + SteHeaviside(), + 1, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0], + [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], + ), + ( + SwishSign(5.0), + 1, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [-1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0], + [-0.03, -0.195, 1.562, 5.0, 1.562, -0.195, -0.03], + ), + ( + InputDoReFa(bits=2), + 2, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [0.0, 0.0, 0.0, 0.0, 1.0 / 3.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + WeightDoReFa(bits=2), + 2, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [-1.0, -1.0, -1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + InputDoReFa(bits=1), + 1, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), + ( + WeightDoReFa(bits=1), + 1, + [-1.5, -1.0, -0.3, 0.0, 0.3, 1.0, 1.5], + [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + ), ] -@pytest.mark.parametrize("quantization, input_values, expected_output, expected_gradient_factors", TEST_INPUT_DATA) +@pytest.mark.parametrize( + "quantization, bits, input_values, expected_output, expected_gradient_factors", TEST_INPUT_DATA +) def test_quantizations( - quantization: Quantization, - input_values: list, - expected_output: list, - expected_gradient_factors: list) -> None: + quantization: Quantization, bits: int, input_values: list, expected_output: list, expected_gradient_factors: list +) -> None: x = torch.tensor(input_values).float().requires_grad_(True) x_exp = torch.tensor(expected_output).float().requires_grad_(True) exp_grad_factors = torch.tensor(expected_gradient_factors).float().requires_grad_(True) @@ -44,7 +113,12 @@ def test_quantizations( assert isinstance(quantization, quantization_from_name(quantization.name)) y = quantization(x) + assert len(y) == len(x_exp) assert torch.allclose(y, x_exp, atol=0.001) + assert quantization.bit_width == bits + with pytest.deprecated_call(): + # noinspection PyDeprecation + assert quantization.bitwidth == bits y.backward(x) computed_gradient = x.grad.clone() diff --git a/tests/test_argparse.py b/tests/test_argparse.py index c648f9e..bfb0c43 100644 --- a/tests/test_argparse.py +++ b/tests/test_argparse.py @@ -1,10 +1,9 @@ -from argparse import ArgumentParser -from examples.pytorch_lightning.utils.arg_parser import add_regular_args, add_all_model_args - -# this test checks for naming conflicts by adding all arguments to one parser +import pytest def test_argparse(): - parser = ArgumentParser() - add_regular_args(parser) - add_all_model_args(parser) + arg_parser = pytest.importorskip("examples.pytorch_lightning.utils.arg_parser") + with pytest.raises(SystemExit) as pytest_wrapped_e: + _ = arg_parser.create_argparser(["main.py", "-h"]) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 0 diff --git a/tests/test_runtime_mode.py b/tests/test_runtime_mode.py new file mode 100644 index 0000000..2b2cf0b --- /dev/null +++ b/tests/test_runtime_mode.py @@ -0,0 +1,87 @@ +import pytest + +import bitorch +from bitorch import RuntimeMode + + +TEST_MODE = RuntimeMode.INFERENCE_AUTO + + +def test_mode_creation_from_name(): + for mode_str in RuntimeMode.list_of_names(): + assert isinstance(RuntimeMode.from_string(mode_str), RuntimeMode) + + +def test_mode_supports_self(): + for mode in RuntimeMode.available_values(): + assert mode.is_supported_by(mode) + + +def test_mode_does_not_support_other_mode(): + for mode in RuntimeMode.available_values(): + for other_mode in RuntimeMode.available_values(): + if mode == other_mode or mode == RuntimeMode.RAW or other_mode == RuntimeMode.RAW: + continue + assert not mode.is_supported_by(other_mode) + + +def test_mode_self_addition(): + for mode in RuntimeMode.available_values(): + same_mode_twice = mode + mode + assert same_mode_twice == mode + + +def test_mode_addition_supports_both(): + for mode in RuntimeMode.available_values(): + for other_mode in RuntimeMode.available_values(): + if mode == other_mode: + continue + added_modes = mode + other_mode + assert mode.is_supported_by(added_modes) + assert other_mode.is_supported_by(added_modes) + + +def test_str_output(): + assert str(RuntimeMode.DEFAULT) == "default" + + +def test_repr_output(): + assert repr(RuntimeMode.DEFAULT) == "" + + +def test_bitorch_default_mode(): + assert bitorch.mode == RuntimeMode.DEFAULT + + +@pytest.fixture() +def set_test_mode(): + bitorch.mode = TEST_MODE + yield None + bitorch.mode = RuntimeMode.DEFAULT + + +def test_set_bitorch_mode(set_test_mode): + assert bitorch.mode == TEST_MODE + + +@pytest.fixture() +def reset_modes(): + bitorch.mode = RuntimeMode.DEFAULT + yield None + bitorch.mode = RuntimeMode.DEFAULT + + +def test_setting_different_modes(reset_modes): + assert bitorch.mode == RuntimeMode.DEFAULT + bitorch.mode = TEST_MODE + assert bitorch.mode == TEST_MODE + + +def test_with_statement(reset_modes): + with bitorch.change_mode(TEST_MODE): + assert bitorch.mode == TEST_MODE + + +def test_pause_wrap(reset_modes): + with bitorch.pause_wrapping(): + assert bitorch.mode == RuntimeMode.RAW diff --git a/version.txt b/version.txt index 0ea3a94..0d91a54 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.0 +0.3.0