From 1df3bffb7856871dba468f5d4211b95d01056b2d Mon Sep 17 00:00:00 2001 From: Thomas M Kehrenberg Date: Tue, 19 Mar 2024 18:55:29 +0100 Subject: [PATCH] Clean up and update README --- .pylintrc | 510 ------------------------------- README.md | 65 ++-- conf/experiment/cmnist_2d2c.yaml | 11 + conf/split/cmnist/2d2c_sb1.yaml | 23 +- install.sh | 14 - mypy.ini | 45 --- pyproject.toml | 3 +- 7 files changed, 58 insertions(+), 613 deletions(-) delete mode 100644 .pylintrc create mode 100644 conf/experiment/cmnist_2d2c.yaml delete mode 100755 install.sh delete mode 100644 mypy.ini diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index b8bcb952..00000000 --- a/.pylintrc +++ /dev/null @@ -1,510 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-whitelist=numpy,torch - -# Add files or directories to the blacklist. They should be base names, not -# paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python modules names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# Specify a configuration file. -#rcfile= - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=yes - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=bad-continuation, - comparison-with-itself, - no-else-return, - protected-access, - arguments-differ, - global-statement, - invalid-name, - unsubscriptable-object - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=c-extension-no-member - - -[REPORTS] - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages. -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=sys.exit - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME, - XXX, - TODO - - -[SPELLING] - -# Limits count of emitted suggestions for spelling mistakes. -max-spelling-suggestions=4 - -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package.. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. -spelling-store-unknown-words=no - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_, - _cb - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore. -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io - - -[STRING] - -# This flag controls whether the implicit-str-concat-in-sequence should -# generate a warning on implicit string concatenation in sequences defined over -# several lines. -check-str-concat-over-line-jumps=no - - -[BASIC] - -# Naming style matching correct argument names. -argument-naming-style=snake_case - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. -#argument-rgx= - -# Naming style matching correct attribute names. -attr-naming-style=snake_case - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names=foo, - bar, - baz, - toto, - tutu, - tata - -# Naming style matching correct class attribute names. -class-attribute-naming-style=any - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. -#class-attribute-rgx= - -# Naming style matching correct class names. -class-naming-style=PascalCase - -# Regular expression matching correct class names. Overrides class-naming- -# style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style=UPPER_CASE - -# Regular expression matching correct constant names. Overrides const-naming- -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style=snake_case - -# Regular expression matching correct function names. Overrides function- -# naming-style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=i, - j, - k, - ex, - Run, - _, - f, - x, - xy, - y, - s, - z, - zx, - zy, - zs, - df, - kl - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=no - -# Naming style matching correct inline iteration names. -inlinevar-naming-style=any - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style=snake_case - -# Regular expression matching correct method names. Overrides method-naming- -# style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style=snake_case - -# Regular expression matching correct module names. Overrides module-naming- -# style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes=abc.abstractproperty - -# Naming style matching correct variable names. -variable-naming-style=snake_case - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. -#variable-rgx= - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members=torch.* - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=100 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma, - dict-separator - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[SIMILARITIES] - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=no - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[LOGGING] - -# Format style used to check logging format string. `old` means using % -# formatting, while `new` is for `{}` formatting. -logging-format-style=old - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules=logging - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=cls - - -[DESIGN] - -# Maximum number of arguments for function / method. -max-args=10 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement. -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=3 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=1000 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=0 - - -[IMPORTS] - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma. -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled). -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled). -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled). -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant,wandb - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "BaseException, Exception". -overgeneral-exceptions=BaseException, - Exception diff --git a/README.md b/README.md index b8a1bcd5..084ea7cb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# Addressing Missing Sources with Adversarial Support-Matching +# Addressing Attribute Bias with Adversarial Support-Matching -Code for the paper *Addressing Missing Sources with Adversarial Support-Matching* [https://arxiv.org/abs/2203.13154]. +Code for the paper [Addressing Attribute Bias with Adversarial Support-Matching](https://openreview.net/forum?id=JYbnJ92TJf). Requires Python 3.10+. @@ -11,70 +11,55 @@ One of the dependencies is PyTorch. If your system is not compatible with the de then please install this from [pytorch.org](https://pytorch.org/) as required. If so, it's recommended that you do this first. -We have provided a `setup.py` file with the dependencies. -To install this package, `pip install -e /path/to/this/dir` +You can install this package, with dependencies, with `pip install .`. -# Running Experiments +## Running Experiments -The `compare.sh` script runs all the variants of our method. +### ACS -## Adult Income - -This dataset is included in the repository. - -### Subgroup bias +This dataset will be downloaded on first use. ``` -bash scripts/compare.sh data=adult/gender bias=adult/partial_outcome enc=adult adapt=adult/on_enc_with_bags clust=adult +python -m src.run.supmatch +experiment=acs/fcn ``` -For the "no-cluster" baseline, the bag size needs to be changed: - -``` -python run_ss.py adapt.balanced_context=false misc.log_method=no-cluster-fdm data=adult/gender bias=adult/partial_outcome enc=adult adapt=adult/on_enc_with_bags clust=adult adapt.bag_size=32 adapt.batch_size=16 -``` +### NICO++ -### Missing subgroup +This dataset has to be downloaded separately. ``` -bash scripts/compare.sh data=adult/gender bias=adult/missing_demo enc=adult adapt=adult/on_enc_with_bags clust=adult +python -m src.run.supmatch +experiment=nicopp/rn50/pretrained_enc ds.root=/path/to/dataset ``` -For the "no-cluster" baseline, the bag size needs to be changed: +### Colored MNIST + +This dataset will be downloaded automatically. ``` -python run_ss.py adapt.balanced_context=false misc.log_method=no-cluster-fdm data=adult/gender bias=adult/missing_demo enc=adult adapt=adult/on_enc_with_bags clust=adult adapt.bag_size=32 adapt.batch_size=16 +python -m src.run.supmatch +experiment=cmnist/2d2c labeller=gt ``` -## Colored MNIST - -This dataset will be downloaded automatically. - -### 2 digits +### CelebA -#### Subgroup bias +The code will try to download this, but the download quota is often saturated, +so it might not work immediately. +#### Without smiling males ``` -bash scripts/compare.sh +experiment=cmnist_subgroup_bias +python -m src.run.supmatch +experiment=celeba/sm/pt split=celeba/artifact/no_smiling_males ``` -#### Missing subgroup - +#### Without smiling females ``` -bash scripts/compare.sh +experiment=cmnist_missing_subgroup +python -m src.run.supmatch +experiment=celeba/sm/pt split=celeba/artifact/no_smiling_females ``` -### 3 digits - +#### Without unsmiling males ``` -bash scripts/compare.sh data=cmnist/3dig bias=cmnist/3dig/4miss enc=mnist adapt=cmnist/mostly_traditional clust=vague_spaceship_improved adapt.iters=20000 adapt.zs_dim=2 +python -m src.run.supmatch +experiment=celeba/sm/pt split=celeba/artifact/no_unsmiling_males ``` -## CelebA - -The code will try to download this, but the download quota is often saturated, -so it might not work immediately. - +#### Without unsmiling females ``` -bash scripts/compare.sh +experiment=celeba_gender +python -m src.run.supmatch +experiment=celeba/sm/pt split=celeba/artifact/no_unsmiling_females ``` diff --git a/conf/experiment/cmnist_2d2c.yaml b/conf/experiment/cmnist_2d2c.yaml new file mode 100644 index 00000000..ffbb8a6e --- /dev/null +++ b/conf/experiment/cmnist_2d2c.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +defaults: + - override /ds: cmnist/2d2c + - override /split: cmnist/2d2c_sb1 + - override /ae_arch: simple + - override /disc_arch: set + - override /scorer: none + +ae_arch: + levels: 2 diff --git a/conf/split/cmnist/2d2c_sb1.yaml b/conf/split/cmnist/2d2c_sb1.yaml index 542c8565..8080a6e7 100644 --- a/conf/split/cmnist/2d2c_sb1.yaml +++ b/conf/split/cmnist/2d2c_sb1.yaml @@ -20,6 +20,25 @@ train_subsampling_props: 1: 0.3 1: 0: 0.0 -train_transforms: null -test_transforms: null +train_transforms: + _target_: torchvision.transforms.Compose + transforms: + - _target_: torchvision.transforms.Resize + size: 28 + - _target_: torchvision.transforms.ToTensor + # ImageNet channel-wise moments + - _target_: torchvision.transforms.Normalize + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] + +test_transforms: + _target_: torchvision.transforms.Compose + transforms: + - _target_: torchvision.transforms.Resize + size: 28 + - _target_: torchvision.transforms.ToTensor + # ImageNet channel-wise moments + - _target_: torchvision.transforms.Normalize + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] dep_transforms: null diff --git a/install.sh b/install.sh deleted file mode 100755 index 697adadc..00000000 --- a/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -poetry install -if [[ "$OSTYPE" == "linux-gnu"* ]]; then - pip3 uninstall --yes torch torchvision torchaudio - # Determine whether any GPUs are available and thus whether - # to install the CUDA-enabled version of PyTorch. - if [[ $(lshw -C display | grep vendor) =~ NVIDIA ]] - then - echo "At least one CUDA-compatible device detected: re-installing PyTorch with CUDA support." - pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 - else - pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu - fi -fi diff --git a/mypy.ini b/mypy.ini deleted file mode 100644 index 8631bcb6..00000000 --- a/mypy.ini +++ /dev/null @@ -1,45 +0,0 @@ -# Global options: - -[mypy] -no_implicit_optional = True -allow_redefinition = True -strict_equality = True -check_untyped_defs = True -warn_unreachable = True -disallow_any_unimported = True -disallow_untyped_defs = True -disallow_incomplete_defs = True -show_error_codes = True -warn_unused_ignores = True - -# ========== imports that don't have types =========== - -[mypy-faiss.*] -ignore_missing_imports = True - -[mypy-git.*] -ignore_missing_imports = True - -[mypy-lapjv.*] -ignore_missing_imports = True - -[mypy-pykeops.*] -ignore_missing_imports = True - -[mypy-scipy.*] -ignore_missing_imports = True - -[mypy-sklearn.*] -ignore_missing_imports = True - -[mypy-torchvision.*] -ignore_missing_imports = True - -[mypy-tqdm.*] -ignore_missing_imports = True - -[mypy-wandb] -ignore_missing_imports = True - -[mypy-ray] -ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 9bcf1271..133f4c5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "support-matching" -package-mode = false +version = "0.0.0" description = "Addressing Missing Sources with Adversarial Support-Matching" authors = ["PAL "] packages = [{ include = "src" }, { include = "hydra_plugins" }] @@ -88,7 +88,6 @@ known-third-party = [ "ethicml", "hydra", "matplotlib", - "mypy", "numpy", "omegaconf", "pandas",