-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPlotResults.py
147 lines (127 loc) · 8.02 KB
/
PlotResults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import pandas as pd
import pdb
import matplotlib.pyplot as plt
import re
import os
import numpy as np
import math
plt.rcParams.update({'lines.linewidth': 2.0})
def str2list(input_list):
output_list = input_list
if type(input_list) is str:
splitted_string = re.split('\[|\]|,\s', input_list)
return [float(el) for el in splitted_string if el != '']
else:
return output_list
def set_axis_plot(title_, prefix="", x_label=""):
fig = plt.figure(figsize=(18, 5)) #(24, 5) if accuracy
#ax1 = fig.add_subplot(141)
ax2 = fig.add_subplot(131)
ax3 = fig.add_subplot(132)
ax4 = fig.add_subplot(133)
fig.suptitle(f'{title_}', fontsize=16)
#ax1.set_title(f"{prefix}accuracy", fontsize=18)
ax2.set_title(f"{prefix}mean argument distance", fontsize=18)
ax3.set_title(f"{prefix}mean abs. error proponent utility", fontsize=18)
ax4.set_title(f"{prefix}mean abs. error opponent utility", fontsize=18)
#ax1.set_xlabel(x_label, fontsize=18)
ax2.set_xlabel(x_label, fontsize=18)
ax3.set_xlabel(x_label, fontsize=18)
ax4.set_xlabel(x_label, fontsize=18)
return fig, ax2, ax3, ax4
class PlotResults():
def __init__(self, data, folder):
self.folder = folder
self.data = data
self.metrics = ['Mean arg distance', 'Mae Prop', 'Mae Opp', 'Mean arg distance std', 'Mae Prop std', 'Mae Opp std']
#self.metrics = ['Accuracy', 'Mean arg distance', 'Mae Prop', 'Mae Opp', 'Accuracy std', 'Mean arg distance std', 'Mae Prop std', 'Mae Opp std']
self.num_metrics = int(len(self.metrics)/2)
self.method_label = {'ClusterRegressor': r'$\mathrm{CLUMER}$', 'CRFRegressor':r'$\mathrm{CRAMER}$', 'MeanRegressor':r'$\mathrm{MeanR}$', 'RandomRegressor': r'$\mathrm{RandR}$', 'SuppVecMach':r'$\mathrm{SVR}$', 'GradientBoost': r'$\mathrm{GBOOST}$'}
self.method_marker = {'ClusterRegressor': '<', 'CRFRegressor':'x', 'MeanRegressor':'s', 'RandomRegressor': '>', 'SuppVecMach':'D', 'GradientBoost': '^'}
self.method_color = {'ClusterRegressor': 'mediumpurple', 'CRFRegressor':'deepskyblue', 'MeanRegressor':'orange', 'RandomRegressor': 'crimson', 'SuppVecMach':'forestgreen', 'GradientBoost': 'blue'}
self.method_line = {'ClusterRegressor': (0, (3, 1, 1, 1)), 'CRFRegressor':(0, (5, 1)), 'MeanRegressor':(0, (3, 5, 1, 5, 1, 5)), 'RandomRegressor': 'dotted', 'SuppVecMach':'-', 'GradientBoost': '--'}
def plot_single_trees(self):
for tree_id in pd.unique(self.data['Tree id']):
for sample_id in pd.unique(self.data['Sample id']):
title = f"Tree id {tree_id}, Sample id {sample_id}"
title=''
plt.clf()
fig, ax2, ax3, ax4 = set_axis_plot(title, x_label='Number questions asked') # TODO ax1 if accuracy
query_res = self.data.loc[(self.data['Tree id'] == tree_id) & (self.data['Sample id'] == sample_id)]
for idx, single_run in query_res.iterrows():
evidence = [el+1 for el in range(single_run['Tree leaves']-1)]
#ax1.errorbar(evidence, str2list(single_run['Accuracy']), yerr=str2list(single_run['Accuracy std']), linestyle=self.method_line[single_run['Model']], color=self.method_color[single_run['Model']], marker=self.method_marker[single_run['Model']], label=self.method_label[single_run['Model']])
ax2.errorbar(evidence, str2list(single_run['Mean arg distance']), yerr=str2list(single_run['Mean arg distance std']), linestyle=self.method_line[single_run['Model']], color=self.method_color[single_run['Model']], marker=self.method_marker[single_run['Model']], label=self.method_label[single_run['Model']])
ax3.errorbar(evidence, str2list(single_run['Mae Prop']), yerr=str2list(single_run['Mae Prop std']), linestyle=self.method_line[single_run['Model']], color=self.method_color[single_run['Model']], marker=self.method_marker[single_run['Model']], label=self.method_label[single_run['Model']])
ax4.errorbar(evidence, str2list(single_run['Mae Opp']), yerr=str2list(single_run['Mae Opp std']), linestyle=self.method_line[single_run['Model']], color=self.method_color[single_run['Model']], marker=self.method_marker[single_run['Model']], label=self.method_label[single_run['Model']])
ax4.legend(fontsize=14)
if not os.path.exists(os.path.join(self.folder, 'tree_samples')):
os.makedirs(os.path.join(self.folder, 'tree_samples'))
plt.tight_layout()
title='geppino'
plt.savefig(os.path.join(self.folder, 'tree_samples', f'{title}.pdf'))
def plot_aggregate_results(self, aggregation_param):
title = f"Results on {aggregation_param.lower()} aggregation"
fig, ax2, ax3, ax4 = set_axis_plot(title, prefix="Average ", x_label=f'Number of {aggregation_param.lower()}') # TODO ax1 if accuracy
axis_list = [ax2, ax3, ax4] # TODO ax1 if accuracy
aggr_dict = {model_name: {el: [] for el in self.metrics} for model_name in self.data["Model"].unique()}
x_axis_values = []
unique_value = self.data[aggregation_param].unique()
for aggreg_value in sorted(unique_value):
x_axis_values.append(aggreg_value)
query_res = self.data.loc[(self.data[aggregation_param] == aggreg_value)]
aggr_dict_value = {model_name: {el: [] for el in self.metrics} for model_name in self.data["Model"].unique()}
for idx, single_run in query_res.iterrows():
for metric in self.metrics:
aggr_dict_value[single_run['Model']][metric] += str2list(single_run[metric])
for model_name in aggr_dict.keys():
for metric in self.metrics:
if metric.split(" ")[-1] == 'std':
# sampled variance
aggr_dict[model_name][metric].append(np.sqrt(np.mean(np.square(aggr_dict_value[model_name][metric]))))
else:
aggr_dict[model_name][metric].append(np.mean(aggr_dict_value[model_name][metric]))
for model_name in aggr_dict.keys():
for idx, metric in enumerate(self.metrics[:self.num_metrics]):
tmp_ax = axis_list[idx]
tmp_ax.errorbar(x_axis_values, aggr_dict[model_name][metric], yerr=aggr_dict[model_name][f"{metric} std" ], color=self.method_color[model_name], marker=self.method_marker[model_name], label=self.method_label[model_name])
ax4.legend(fontsize=14)
plt.tight_layout()
plt.savefig(os.path.join(self.folder, f'{title}.pdf'))
def plot_aggregate_results_evidence_perc(self):
title = ""
fig, ax2, ax3, ax4 = set_axis_plot(title, prefix="Average ", x_label='Percentage of evidence') # TODO ax1 if accuracy
axis_list = [ax2, ax3, ax4] # TODO ax1 if accuracy
x_axis_values = [(el+1)*10 for el in range(10)]
aggr_dict = {model_name: {el: [] for el in self.metrics} for model_name in self.data["Model"].unique()}
for evidence_perc in x_axis_values:
aggr_dict_value = {model_name: {el: [] for el in self.metrics} for model_name in self.data["Model"].unique()}
for idx, single_run in self.data.iterrows():
for metric in self.metrics:
results_list = str2list(single_run[metric])
selected_values = math.ceil(len(results_list)*evidence_perc/100)
aggr_dict_value[single_run["Model"]][metric] += str2list(single_run[metric])[:selected_values]
for model_name in aggr_dict.keys():
for metric in self.metrics:
if metric.split(" ")[-1] == 'std':
# sampled variance
aggr_dict[model_name][metric].append(np.sqrt(np.mean(np.square(aggr_dict_value[model_name][metric]))))
else:
aggr_dict[model_name][metric].append(np.mean(aggr_dict_value[model_name][metric]))
for model_name in aggr_dict.keys():
for idx, metric in enumerate(self.metrics[:self.num_metrics]):
tmp_ax = axis_list[idx]
tmp_ax.errorbar(x_axis_values, aggr_dict[model_name][metric], yerr=aggr_dict[model_name][f"{metric} std" ], linestyle=self.method_line[model_name], color=self.method_color[model_name], marker=self.method_marker[model_name], label=self.method_label[model_name])
ax4.legend(fontsize=14)
plt.tight_layout()
title = f"Results on percentage of evidence aggregation"
plt.savefig(os.path.join(self.folder, f'{title}.pdf'))
if __name__ == '__main__':
results_df = pd.read_csv('meat_results/results.csv')
plot_res = PlotResults(results_df, "meat_results")
#results_df = pd.read_csv('results/results.csv')
#plot_res = PlotResults(results_df, "results")
plot_res.plot_single_trees()
#plot_res.plot_aggregate_results('Tree leaves')
#plot_res.plot_aggregate_results('Sample clusters')
#plot_res.plot_aggregate_results_evidence_perc()