forked from ustunb/actionable-recourse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
60 lines (47 loc) · 2.22 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
import recourse as rs
# import data
url = "https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv"
df = pd.read_csv(url)
y, X = df.iloc[:, 0], df.iloc[:, 1:]
# train a classifier
clf = LogisticRegression(max_iter=1000)
clf.fit(X, y)
yhat = clf.predict(X)
# customize the set of actions
## matrix of features. ActionSet will set bounds and step sizes by default
A = rs.ActionSet(X)
# specify immutable variables
A["Married"].actionable = False
# can only specify properties for multiple variables using a list
A[["Age_lt_25", "Age_in_25_to_40", "Age_in_40_to_59", "Age_geq_60"]].actionable = False
# education level
A["EducationLevel"].step_direction = 1 ## force conditional immutability.
A["EducationLevel"].step_size = 1 ## set step-size to a custom value.
A["EducationLevel"].step_type = "absolute" ## force conditional immutability.
A["EducationLevel"].bounds = (0, 3)
A["TotalMonthsOverdue"].step_size = 1 ## set step-size to a custom value.
## discretize on absolute values of feature rather than percentile values
A["TotalMonthsOverdue"].step_type = "absolute"
A["TotalMonthsOverdue"].bounds = (0, 100) ## set bounds to a custom value.
## get model coefficients and align
## tells `ActionSet` which directions each feature should move in to produce positive change.
A.set_alignment(clf)
# Get one individual
i = np.flatnonzero(yhat <= 0).astype(int)[0]
# build a flipset for one individual
fs = rs.Flipset(x=X.values[i], action_set=A, clf=clf)
fs.populate(enumeration_type="distinct_subsets", total_items=10)
print(fs.to_latex())
print(fs.to_html())
# # Run Recourse Audit on Training Data
# auditor = rs.RecourseAuditor(A, coefficients=clf.coef_, intercept=clf.intercept_)
# audit_df = auditor.audit(X) ## matrix of features over which we will perform the audit.
# ## print mean feasibility and cost of recourse
# print(audit_df["feasible"].mean())
# print(audit_df["cost"].mean())
# print_recourse_audit_report(X, audit_df, y)
# # or produce additional information of cost of recourse by other variables
# # print_recourse_audit_report(X, audit_df, y, group_by = ['y', 'Married', 'EducationLevel'])