22
33import argparse
44import json
5- import os , sys
5+ import os
66import re
7-
7+ import sys
88from pathlib import Path
99
1010import pandas as pd
@@ -126,7 +126,12 @@ def create_stats_dataframe(base_dir):
126126 )
127127 continue
128128
129- stats ["IPC" ] = stats ["board.processor.cores.core.ipc" ]
129+ try :
130+ stats ["IPC" ] = stats ["board.processor.cores.core.ipc" ]
131+ except :
132+ print (f"Error reading IPC from { stats_file } " )
133+ # sys.exit(-1)
134+
130135 if weights and index in weights :
131136 stats ["weight" ] = weights [index ]
132137 stats ["weighted_IPC" ] = stats ["IPC" ] * stats ["weight" ]
@@ -146,58 +151,103 @@ def create_stats_dataframe(base_dir):
146151 f"Error: No stats.txt files found in subdirectories of '{ base_dir } '."
147152 )
148153 return None
154+
149155 df = pd .DataFrame (data , index = labels )
150156
151157 # Calculate per-benchmark IPC
152158 benchmark_ipc = {}
153159 for benchmark_number , benchmark_name in benchmark_mapping .items ():
154- benchmark_entries = df [df .index .str .contains (f"chkpt_{ benchmark_name } _" )]
160+ benchmark_entries = df [
161+ df .index .str .contains (f"chkpt_{ benchmark_name } _" )
162+ ]
155163 if not benchmark_entries .empty :
156- weighted_ipcs = benchmark_entries [benchmark_entries ['weighted_IPC' ] != "N/A" ]['weighted_IPC' ].astype (float )
164+ weighted_ipcs = benchmark_entries [
165+ benchmark_entries ["weighted_IPC" ] != "N/A"
166+ ]["weighted_IPC" ].astype (float )
157167 if not weighted_ipcs .empty :
158168 benchmark_ipc [benchmark_name ] = weighted_ipcs .sum ()
159169 else :
160- print (f"Warning: No valid weighted IPCs found for { benchmark_name } " )
170+ print (
171+ f"Warning: No valid weighted IPCs found for { benchmark_name } "
172+ )
161173 benchmark_ipc [benchmark_name ] = "N/A"
162174 else :
163175 print (f"Warning: No entries found for benchmark { benchmark_name } " )
164176 sys .exit (- 1 )
165177 benchmark_ipc [benchmark_name ] = "N/A"
166178
167179 return df , benchmark_ipc # Return both DataFrame and benchmark IPCs
168-
180+
181+
182+ def rename_to_last_two_parts (col_name ):
183+ """
184+ Renames a column to its last two dot-separated substrings.
185+ Returns the original name if it has fewer than two parts.
186+ """
187+ parts = col_name .split ("." )
188+ if len (parts ) >= 2 :
189+ # Join the last two elements with a dot
190+ return "." .join (parts [- 2 :])
191+ else :
192+ # Return original name if fewer than 2 parts (e.g., 'IPC', 'ShortName')
193+ return col_name
194+
169195
170196if __name__ == "__main__" :
171197 parser = argparse .ArgumentParser (
172198 description = "Process stats.txt files in subdirectories."
173199 )
174200 parser .add_argument (
175- "base_dirs" , nargs = "+" , help = "The directories containing the results."
201+ "--dirs" ,
202+ required = True ,
203+ nargs = "+" ,
204+ help = "The directories containing the results." ,
176205 )
177206 parser .add_argument (
178207 "-o" ,
179208 "--output" ,
180209 help = "Output JSON file name (default: stats_summary.json)" ,
181210 default = "stats_summary.json" ,
182211 )
212+ parser .add_argument (
213+ "--stats" ,
214+ required = True ,
215+ nargs = "+" ,
216+ help = "Regex pattern for extracting stats." ,
217+ )
218+ parser .add_argument (
219+ "--exact" ,
220+ action = "store_true" ,
221+ help = "Match exact stat." ,
222+ )
223+ parser .add_argument (
224+ "--checkpoint" ,
225+ help = "Get stats for the checkpoint." ,
226+ )
183227
184228 args = parser .parse_args ()
185- base_dirs = args .base_dirs
229+ base_dirs = args .dirs
230+ extract_stats = args .stats
186231 output_file = args .output
232+ exact_stat = args .exact
187233
188234 all_stats = {}
189- all_ipc = []
235+ all_ipc = []
190236 all_ipc_df = pd .DataFrame ()
191237 for base_directory in base_dirs :
192238 try :
193239 if not os .path .isdir (base_directory ):
194- raise FileNotFoundError (f"Directory '{ base_directory } ' not found." )
240+ raise FileNotFoundError (
241+ f"Directory '{ base_directory } ' not found."
242+ )
195243
196244 base_dir = str (Path (base_directory ))
197245
198246 df_stats , benchmark_ipc = create_stats_dataframe (base_dir )
199247 all_stats [base_dir ] = df_stats
200- benchmark_ipc_df = pd .DataFrame .from_dict (benchmark_ipc , orient = 'index' , columns = [base_dir ])
248+ benchmark_ipc_df = pd .DataFrame .from_dict (
249+ benchmark_ipc , orient = "index" , columns = [base_dir ]
250+ )
201251 all_ipc_df = pd .concat ([all_ipc_df , benchmark_ipc_df ], axis = 1 )
202252
203253 except FileNotFoundError as e :
@@ -208,26 +258,74 @@ def create_stats_dataframe(base_dir):
208258 if all_stats is not None :
209259 # Convert DataFrame to dictionary for JSON output
210260 with open (output_file , "w" ) as f :
211- json_strings = {base_dir : df .to_json (orient = "index" , indent = 4 ) for base_dir , df in all_stats .items ()}
212- json_objects = {base_dir : json .loads (json_string ) for base_dir , json_string in json_strings .items ()}
261+ json_strings = {
262+ base_dir : df .to_json (orient = "index" , indent = 4 )
263+ for base_dir , df in all_stats .items ()
264+ }
265+ json_objects = {
266+ base_dir : json .loads (json_string )
267+ for base_dir , json_string in json_strings .items ()
268+ }
213269 json .dump (json_objects , f , indent = 4 )
214270
215271 print (f"Stats summary saved to { output_file } in JSON format" )
216272
273+ matching_stats = []
274+ if extract_stats is not None :
275+ bench_list = []
276+ stat_df_bench = pd .DataFrame ()
277+ for bench_run , df in all_stats .items ():
278+ bench_list .append (bench_run )
279+ for stat in extract_stats :
280+ print (f"Extracting stat { stat } in bench { bench_run } " )
281+ # print(df)
282+ if exact_stat :
283+ stat_df = df .filter (items = [stat ])
284+ else :
285+ stat_df = df .filter (regex = rf"{ stat } " )
286+
287+ # print(stat_df)
288+ if not stat_df .empty :
289+ # save the stat names for calculating %
290+ matching_stats .extend ([col for col in stat_df .columns ])
291+ stat_df_bench = stat_df_bench .join (
292+ stat_df , how = "outer" , rsuffix = f"_{ bench_run } "
293+ )
294+ # stat_df_bench = stat_df_bench.set_axis(stat_df.index)
295+ # print(stat_df_bench)
296+
297+ # Calculate %diff for all stats
298+ for stat in matching_stats :
299+ base_bench = bench_list [0 ]
300+ for bench in bench_list [1 :]:
301+ stat_bench = stat + f"_{ bench } "
302+ percent_stat = stat + f"_{ bench } " + "%"
303+ stat_df_bench [percent_stat ] = (
304+ (stat_df_bench [stat_bench ] / stat_df_bench [stat ]) - 1
305+ ) * 100.0
306+
307+ stat_df_bench = stat_df_bench .rename (
308+ columns = rename_to_last_two_parts
309+ )
310+ if not args .checkpoint :
311+ print (stat_df_bench )
312+ else :
313+ print (stat_df_bench .filter (like = f"{ args .checkpoint } " , axis = 0 ))
217314
218315 new_columns = []
219316 base_col = all_ipc_df .columns [0 ]
220317 for i , col in enumerate (all_ipc_df .columns ):
221318 new_columns .append (col )
222319 if i > 0 : # Start from the second column (index 1)
223- new_col_name = f'{ col } _%'
224- all_ipc_df [new_col_name ] = ((all_ipc_df [col ] - all_ipc_df [base_col ]) / all_ipc_df [base_col ]) * 100
225- #Handle division by zero error
320+ new_col_name = f"% { col } "
321+ all_ipc_df [new_col_name ] = (
322+ (all_ipc_df [col ] - all_ipc_df [base_col ]) / all_ipc_df [base_col ]
323+ ) * 100
324+ # Handle division by zero error
226325 # all_ipc_df.loc[all_ipc_df[base_col] == 0, new_col_name] = all_ipc_df.loc[all_ipc_df[base_col] == 0].apply(lambda x: float('inf') if x[col]!=0 else 0, axis=1)
227326 new_columns .append (new_col_name )
228-
229327
230- all_ipc_df = all_ipc_df [new_columns ]
231- pd .options .display .float_format = ' {:.2f}' .format
328+ all_ipc_df = all_ipc_df [new_columns ]
329+ pd .options .display .float_format = " {:.2f}" .format
232330 print (all_ipc_df .to_string ())
233331 # print(all_ipc_df)
0 commit comments