Skip to content

Commit

Permalink
Merge branch 'main' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
tmke8 committed Sep 28, 2022
2 parents 3310cca + 6ed5663 commit dcab696
Show file tree
Hide file tree
Showing 42 changed files with 584 additions and 374 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,5 @@ results/*
!/ethicml/data/csvs/health.csv.zip

/darglint.out
/docs/tutorials/results/
/examples/results/
12 changes: 6 additions & 6 deletions ethicml/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def _one_hot_encode_and_combine(
mask = None # the mask is needed when we have to discard samples

# create a Series of zeroes with the same length as the dataframe
combination: pd.Series = pd.Series( # type: ignore[call-overload]
combination: pd.Series = pd.Series(
0, index=range(len(attributes)), name=",".join(label_spec)
)

Expand Down Expand Up @@ -351,7 +351,7 @@ def load_aif(self): # Returns aif.360 Standard Dataset
)


@dataclass # type: ignore[misc] # mypy doesn't allow abstract dataclasses because mypy is stupid
@dataclass
class CSVDatasetDC(CSVDataset, ABC):
"""Dataset that uses the default load function."""

Expand All @@ -369,7 +369,7 @@ def invert_sens_attr(self) -> bool:
return self.invert_s


@dataclass # type: ignore[misc] # mypy doesn't allow abstract dataclasses because mypy is stupid
@dataclass
class StaticCSVDataset(CSVDatasetDC, ABC):
"""Dataset whose size and file location does not depend on constructor arguments."""

Expand Down Expand Up @@ -421,17 +421,17 @@ def __init__(
self._raw_file_name_or_path = filename_or_path
self._cont_features = list(cont_features)

@property # type: ignore[misc]
@property
@implements(CSVDataset)
def unfiltered_disc_feat_groups(self) -> DiscFeatureGroup:
return self._unfiltered_disc_feat_groups

@property # type: ignore[misc]
@property
@implements(CSVDataset)
def continuous_features(self) -> list[str]:
return self._cont_features

@property # type: ignore[misc]
@property
@implements(CSVDataset)
def name(self) -> str:
return self._name
Expand Down
2 changes: 1 addition & 1 deletion ethicml/data/tabular_data/acs.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def _one_hot_encode_and_combine(
return attributes[label_mapping], mask

# create a Series of zeroes with the same length as the dataframe
combination: pd.Series = pd.Series( # type: ignore[call-overload]
combination: pd.Series = pd.Series(
0, index=range(len(attributes)), name=",".join(label_mapping)
)

Expand Down
6 changes: 3 additions & 3 deletions ethicml/data/tabular_data/adult.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class Adult(StaticCSVDataset):
binarize_nationality: bool = False
binarize_race: bool = False

@property # type: ignore[misc]
@property
@implements(StaticCSVDataset)
def name(self) -> str:
name = f"Adult {self.split.value}"
Expand Down Expand Up @@ -93,7 +93,7 @@ def get_label_specs(self) -> LabelSpecsPair:
raise NotImplementedError
return LabelSpecsPair(s=sens_attr_spec, y=class_label_spec, to_remove=label_feature_groups)

@property # type: ignore[misc]
@property
@implements(StaticCSVDataset)
def unfiltered_disc_feat_groups(self) -> DiscFeatureGroup:
dfgs = DISC_FEATURE_GROUPS
Expand Down Expand Up @@ -128,7 +128,7 @@ def unfiltered_disc_feat_groups(self) -> DiscFeatureGroup:
assert len(flatten_dict(dfgs)) == 97 # 93 (discrete) features + 4 class labels
return dfgs

@property # type: ignore[misc]
@property
@implements(StaticCSVDataset)
def continuous_features(self) -> list[str]:
feats = [
Expand Down
6 changes: 3 additions & 3 deletions ethicml/data/tabular_data/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ class Synthetic(CSVDatasetDC):
def __post_init__(self) -> None:
assert 0 < self.num_samples <= 100_000

@property # type: ignore[misc]
@property
@implements(CSVDatasetDC)
def continuous_features(self) -> list[str]:
return ["x1f", "x2f", "n1", "n2"] if self.fair else ["x1", "x2", "n1", "n2"]

@property # type: ignore[misc]
@property
@implements(CSVDatasetDC)
def name(self) -> str:
return (
Expand All @@ -84,7 +84,7 @@ def get_num_samples(self) -> int:
def get_filename_or_path(self) -> str | Path:
return f"synthetic_scenario_{self.scenario.value}.csv"

@property # type: ignore[misc]
@property
@implements(CSVDatasetDC)
def unfiltered_disc_feat_groups(self) -> DiscFeatureGroup:
return {}
6 changes: 3 additions & 3 deletions ethicml/data/tabular_data/toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Toy(StaticCSVDataset):
num_samples: ClassVar[int] = 400
csv_file: ClassVar[str] = "toy.csv"

@property # type: ignore[misc]
@property
@implements(StaticCSVDataset)
def name(self) -> str:
return "Toy"
Expand All @@ -31,15 +31,15 @@ def get_label_specs(self) -> LabelSpecsPair:
to_remove=[],
)

@property # type: ignore[misc]
@property
@implements(StaticCSVDataset)
def unfiltered_disc_feat_groups(self) -> DiscFeatureGroup:
return {
"disc_1": ["disc_1_a", "disc_1_b", "disc_1_c", "disc_1_d", "disc_1_e"],
"disc_2": ["disc_2_x", "disc_2_y", "disc_2_z"],
}

@property # type: ignore[misc]
@property
@implements(StaticCSVDataset)
def continuous_features(self) -> list[str]:
return ["a1", "a2"]
5 changes: 2 additions & 3 deletions ethicml/implementations/pytorch_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pandas as pd

from ethicml.utility import DataTuple, TestTuple
from ethicml.utility.data_structures import ModelType

try:
import torch
Expand All @@ -21,7 +20,7 @@

def _get_info(data: TestTuple) -> tuple[np.ndarray, np.ndarray, int, int, pd.Index, str]:
features = data.x.to_numpy(dtype=np.float32)
sens_labels = data.s.to_numpy(dtype=np.float32) # type: ignore[type-var]
sens_labels = data.s.to_numpy(dtype=np.float32)
num = data.s.shape[0]
xdim = data.x.shape[1]
x_names = data.x.columns
Expand Down Expand Up @@ -59,7 +58,7 @@ def __init__(self, data: DataTuple):
test = data.remove_y()
self.x, self.s, self.num, self.xdim, self.x_names, self.s_names = _get_info(test)
self.sdim = 1
self.y = data.y.to_numpy(dtype=np.float32) # type: ignore[type-var]
self.y = data.y.to_numpy(dtype=np.float32)
self.ydim = data.y.nunique()
self.y_names = str(data.y.name)

Expand Down
4 changes: 1 addition & 3 deletions ethicml/implementations/zemel.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,7 @@ def fit(train: DataTuple, flags: ZemelArgs, seed: int) -> Model:
parameters_initialization = np.random.uniform(
size=flags["clusters"] + features_dim * flags["clusters"]
)
bnd = [(0, 1)] * flags["clusters"] + [(None, None)] * features_dim * flags[ # type: ignore[operator]
"clusters"
]
bnd = [(0, 1)] * flags["clusters"] + [(None, None)] * features_dim * flags["clusters"]
LFR_optim_objective.steps = 0 # type: ignore[attr-defined]

learned_model = optim.fmin_l_bfgs_b( # type: ignore[attr-defined]
Expand Down
5 changes: 3 additions & 2 deletions ethicml/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
.. code:: python
from ethicml import metrics, run
from ethicml.metrics import Accuracy, TPR, run_metrics
run.run_metrics(predictions, test_data, metrics=[metrics.Accuracy(), metrics.TPR()])
run_metrics(predictions, test_data, metrics=[Accuracy(), TPR()])
"""
from .accuracy import *
from .anti_spur import *
Expand All @@ -16,6 +16,7 @@
from .confusion_matrix import *
from .cv import *
from .dependence_measures import *
from .eval import *
from .fnr import *
from .fpr import *
from .hsic import *
Expand Down
4 changes: 2 additions & 2 deletions ethicml/metrics/confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
__all__ = ["CfmMetric", "LabelOutOfBounds"]


@dataclass # type: ignore[misc] # mypy doesn't allow abstract dataclasses because mypy is stupid
@dataclass
class CfmMetric(MetricStaticName, ABC):
"""Confusion Matrix based metric."""

Expand All @@ -30,7 +30,7 @@ def _confusion_matrix(
:param prediction: The predictions.
:param actual: The actual labels.
"""
actual_y: np.ndarray = actual.y.to_numpy(dtype=np.int32) # type: ignore[type-var]
actual_y: np.ndarray = actual.y.to_numpy(dtype=np.int32)
_labels: np.ndarray = np.unique(actual_y) if self.labels is None else np.array(self.labels)
if _labels.size == 1:
_labels = np.array([0, 1], dtype=np.int32)
Expand Down
66 changes: 66 additions & 0 deletions ethicml/metrics/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Runs given metrics on the given results."""
from __future__ import annotations
from typing import TYPE_CHECKING, Sequence

from ethicml.metrics.per_sensitive_attribute import MetricNotApplicable, PerSens, metric_per_sens
from ethicml.utility.data_structures import EvalTuple, Prediction

if TYPE_CHECKING: # the following imports are only needed for type checking
from collections.abc import Set as AbstractSet

from ethicml.metrics.metric import Metric


__all__ = ["run_metrics", "per_sens_metrics_check"]


def run_metrics(
predictions: Prediction,
actual: EvalTuple,
metrics: Sequence[Metric] = (),
per_sens_metrics: Sequence[Metric] = (),
aggregation: PerSens | AbstractSet[PerSens] = PerSens.DIFFS_RATIOS,
use_sens_name: bool = True,
) -> dict[str, float]:
"""Run all the given metrics on the given predictions and return the results.
:param predictions: DataFrame with predictions
:param actual: EvalTuple with the labels
:param metrics: list of metrics (Default: ())
:param per_sens_metrics: list of metrics that are computed per sensitive attribute (Default: ())
:param aggregation: Optionally specify aggregations that are performed on the per-sens metrics.
(Default: ``DIFFS_RATIOS``)
:param use_sens_name: if True, use the name of the senisitive variable in the returned results.
If False, refer to the sensitive variable as "S". (Default: ``True``)
:returns: A dictionary of all the metric results.
"""
result: dict[str, float] = {}
if predictions.hard.isna().any(axis=None): # type: ignore[arg-type]
return {"algorithm_failed": 1.0}
for metric in metrics:
result[metric.name] = metric.score(predictions, actual)

for metric in per_sens_metrics:
per_sens = metric_per_sens(predictions, actual, metric, use_sens_name)
agg_funcs: AbstractSet[PerSens] = (
{aggregation} if isinstance(aggregation, PerSens) else aggregation
)
# we can't add the aggregations directly to ``per_sens`` because then
# we would create aggregations of aggregations
aggregations: dict[str, float] = {}
for agg in agg_funcs:
aggregations.update(agg.func(per_sens))
per_sens.update(aggregations)
for key, value in per_sens.items():
result[f"{metric.name}_{key}"] = value
return result # SUGGESTION: we could return a DataFrame here instead of a dictionary


def per_sens_metrics_check(per_sens_metrics: Sequence[Metric]) -> None:
"""Check if the given metrics allow application per sensitive attribute."""
for metric in per_sens_metrics:
if not metric.apply_per_sensitive:
raise MetricNotApplicable(
f"Metric {metric.name} is not applicable per sensitive "
f"attribute, apply to whole dataset instead"
)
4 changes: 2 additions & 2 deletions ethicml/models/inprocess/adv_debiasing.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _get_flags(self) -> AdvDebArgs:
"lambda_vec": self.lambda_vec,
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def hyperparameters(self) -> HyperParamType:
return {
Expand All @@ -68,7 +68,7 @@ def hyperparameters(self) -> HyperParamType:
"lambda_vec": self.lambda_vec,
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def name(self) -> str:
return "Adversarial Debiasing"
4 changes: 2 additions & 2 deletions ethicml/models/inprocess/agarwal_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _get_flags(self) -> AgarwalArgs:
"kernel": chosen_kernel if chosen_kernel is not None else "",
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def hyperparameters(self) -> HyperParamType:
chosen_c, chosen_kernel = settings_for_svm_lr(self.classifier, self.C, self.kernel)
Expand All @@ -80,7 +80,7 @@ def hyperparameters(self) -> HyperParamType:
_hyperparameters["kernel"] = chosen_kernel
return _hyperparameters

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def name(self) -> str:
return f"Agarwal, {self.classifier}, {self.fairness}, {self.eps}"
Expand Down
2 changes: 1 addition & 1 deletion ethicml/models/inprocess/blind.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Blind(InAlgorithmNoParams):

is_fairness_algo: ClassVar[bool] = False

@property # type: ignore[misc]
@property
@implements(InAlgorithmNoParams)
def name(self) -> str:
return "Blind"
Expand Down
4 changes: 2 additions & 2 deletions ethicml/models/inprocess/fair_dummies.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _get_flags(self) -> FairDummiesArgs:
"second_moment_scaling": self.second_moment_scaling,
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def hyperparameters(self) -> HyperParamType:
return {
Expand All @@ -80,7 +80,7 @@ def hyperparameters(self) -> HyperParamType:
"second_moment_scaling": self.second_moment_scaling,
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def name(self) -> str:
return f"Fair Dummies {self.model_type}_model"
4 changes: 2 additions & 2 deletions ethicml/models/inprocess/fairness_wo_demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ def _get_flags(self) -> DroArgs:
"network_size": self.network_size,
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def hyperparameters(self) -> HyperParamType:
_hyperparameters = asdict(self)
_hyperparameters.pop("dir") # this is not really a hyperparameter
return _hyperparameters

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def name(self) -> str:
return "Dist Robust Optim"
Expand Down
4 changes: 2 additions & 2 deletions ethicml/models/inprocess/hgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _get_flags(self) -> HgrArgs:
"model_type": self.model_type,
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def hyperparameters(self) -> HyperParamType:
return {
Expand All @@ -61,7 +61,7 @@ def hyperparameters(self) -> HyperParamType:
"model_type": self.model_type,
}

@property # type: ignore[misc]
@property
@implements(InAlgorithmSubprocess)
def name(self) -> str:
return f"HGR {self.model_type}_model"
2 changes: 1 addition & 1 deletion ethicml/models/inprocess/in_subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class InAlgoPredArgs(TypedDict):
_IS = TypeVar("_IS", bound="InAlgorithmSubprocess")


@dataclass # type: ignore[misc] # mypy doesn't allow abstract dataclasses because mypy is stupid
@dataclass
class InAlgorithmSubprocess(SubprocessAlgorithmMixin, InAlgorithm, ABC):
"""In-Algorithm that uses a subprocess to run.
Expand Down
2 changes: 1 addition & 1 deletion ethicml/models/inprocess/installed_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(
self.__executable = executable
self.__name = name

@property # type: ignore[misc]
@property
@implements(InAlgorithm)
def name(self) -> str:
return self.__name
Expand Down
Loading

0 comments on commit dcab696

Please sign in to comment.