-
Notifications
You must be signed in to change notification settings - Fork 0
/
adult_baseline.py
83 lines (75 loc) · 3.01 KB
/
adult_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""Baseline for Adult dataset"""
import pandas as pd
from ethicml.algorithms.inprocess import LR, SVM, Agarwal, Kamiran, Majority
from ethicml.evaluators.evaluate_models import run_metrics
from ethicml.metrics import CV, NMI, PPV, TNR, TPR, Accuracy, ProbPos, Theil
from ethicml.preprocessing.train_test_split import train_test_split
from nifr.data.data_loading import load_adult_data_tuples
def main():
class _Namespace:
pretrain = True
drop_native = True
meta_lead = True
data_split_seed = 888
for clf in [LR()]: # , Majority(), Kamiran(), Agarwal(), SVM(kernel='linear')]:
df = pd.DataFrame(
columns=[
"mix_factor",
"Accuracy",
"Theil_Index",
"TPR_sex_Male_1",
"TPR_sex_Male_0",
"TPR_sex_Male_0-sex_Male_1",
"TPR_sex_Male_0/sex_Male_1",
"prob_pos_sex_Male_1",
"prob_pos_sex_Male_0",
"prob_pos_sex_Male_0-sex_Male_1",
"prob_pos_sex_Male_0/sex_Male_1",
"Theil_Index_sex_Male_1",
"Theil_Index_sex_Male_0",
"Theil_Index_sex_Male_0-sex_Male_1",
"Theil_Index_sex_Male_0/sex_Male_1",
"NMI",
"NMI_sex_Male_0",
"NMI_sex_Male_0-sex_Male_1",
"NMI_sex_Male_0/sex_Male_1",
"NMI_sex_Male_1",
"PPV",
"PPV_sex_Male_0",
"PPV_sex_Male_0-sex_Male_1",
"PPV_sex_Male_0/sex_Male_1",
"PPV_sex_Male_1",
"TNR",
"TNR_sex_Male_0",
"TNR_sex_Male_0-sex_Male_1",
"TNR_sex_Male_0/sex_Male_1",
"TNR_sex_Male_1",
"TPR",
]
)
for mix_fact in [k / 100 for k in range(0, 105, 5)]:
args = _Namespace()
args.task_mixing_factor = mix_fact
_, task, task_train = load_adult_data_tuples(args)
try:
preds = clf.run(task_train, task)
except:
print(f"{clf.name} failed on mix: {mix_fact}")
continue
metrics = run_metrics(preds, task, metrics=[Accuracy()], per_sens_metrics=[])
print(f"{clf.name} Accuracy: {metrics['Accuracy']}")
res_dict = run_metrics(
preds,
task,
metrics=[Accuracy(), Theil(), NMI(), TPR(), TNR(), PPV()],
per_sens_metrics=[Theil(), ProbPos(), TPR(), TNR(), NMI(), PPV()],
)
res_dict["mix_factor"] = mix_fact
print(",".join(res_dict.keys()))
print(",".join([str(val) for val in res_dict.values()]))
df = df.append(res_dict, ignore_index=True)
print(f"mix: {mix_fact}\t acc: {res_dict['Accuracy']}")
print("----------------------")
df.to_csv(f"{clf.name}.csv", index=False)
if __name__ == "__main__":
main()