From 8485928d311b38ee8bc516d16707710104df7e91 Mon Sep 17 00:00:00 2001 From: wangxf35 Date: Wed, 18 Dec 2024 09:08:07 +0000 Subject: [PATCH 1/3] Fix issue 22 incorrect filling computation time into the msg_size --- utils/utils.py | 2 +- workload_applyer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/utils.py b/utils/utils.py index 2269835..537c94e 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -246,7 +246,7 @@ def Comp_with_aiob(workload, compute_cache): for key in compute_cache: key_temp = key.split("_")[0] if key_temp in item.stage: - item.msg_size = compute_cache[key] + item._elapsed_time = compute_cache[key] break return workload diff --git a/workload_applyer.py b/workload_applyer.py index ddd019f..1c6a6fb 100755 --- a/workload_applyer.py +++ b/workload_applyer.py @@ -357,7 +357,7 @@ def _apply_computation(self, item): if self.skip_computation: return if self.computation_aiob: - time.sleep(item.msg_size / 1e9) + time.sleep(item._elapsed_time / 1e9) else: # item.msg_size = 1 input_shape1, input_shape2 = item.msg_size From 6707cc95ff0578734fc76387dad96c695a3f130f Mon Sep 17 00:00:00 2001 From: wangxf35 Date: Fri, 27 Dec 2024 06:09:25 +0000 Subject: [PATCH 2/3] Fix the entire attention's _elapsed_time is repeatedly assigned to attention_comlumn and attention_row --- utils/utils.py | 55 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/utils/utils.py b/utils/utils.py index 537c94e..d714df0 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -244,8 +244,10 @@ def Comp_with_aiob(workload, compute_cache): for item in workload.workload: if item.comm_type == CommType.computation: for key in compute_cache: - key_temp = key.split("_")[0] - if key_temp in item.stage: + item._elapsed_time = 0 + key_split = key.rsplit('_', 1) + stage_split = item.stage.rsplit('.', 2) + if (len(key_split) > 1 and len(stage_split) > 2) and (key_split[0] == stage_split[2]) and (key_split[1] == stage_split[0]): item._elapsed_time = compute_cache[key] break return workload @@ -287,8 +289,10 @@ def get_comp_out(args): def extract_averages(file_path,args): - attention_avg_sum = 0.0 - mlp_avg_sum = 0.0 + attention_column_avg_sum = 0.0 + attention_row_avg_sum = 0.0 + mlp_column_avg_sum = 0.0 + mlp_row_avg_sum = 0.0 other_avgs = {} grad_forward = 0.0 grad_backward = 0.0 @@ -314,31 +318,46 @@ def extract_averages(file_path,args): grad_backward = float(avg_match.group(1)) * 1000 elif avg_match and current_section: avg_value = float(avg_match.group(1)) * 1000 - if "atten" in current_section or current_section == "layernorm": - + if current_section in ["atten_qkv", "atten_core_qk", "atten_core_softmax", "atten_core_contex"]: + if args.recompute_activations and 'flash' in current_section: + attention_column_avg_sum += avg_value*2 + else: + attention_column_avg_sum += avg_value + elif current_section in ["atten_linear", "layernorm2"]: if args.recompute_activations and 'flash' in current_section: - attention_avg_sum += avg_value*2 + attention_row_avg_sum += avg_value*2 else: - attention_avg_sum += avg_value - elif "mlp" in current_section or current_section == "layernorm2": - mlp_avg_sum += avg_value + attention_row_avg_sum += avg_value + elif current_section in ["mlp_linear_1", "mlp_gelu"]: + mlp_column_avg_sum += avg_value + elif current_section in ["mlp_linear_2"]: + mlp_row_avg_sum += avg_value else: other_avgs[current_section] = avg_value # 四舍五入并转换为整数 - attention_forward = round(attention_avg_sum) - attention_backward = attention_forward - mlp_forward = round(mlp_avg_sum) - mlp_backward = mlp_forward + attention_column_forward = round(attention_column_avg_sum) + attention_row_forward = round(attention_row_avg_sum) + attention_column_backward = attention_column_forward + attention_row_backward = attention_row_forward + mlp_column_forward = round(mlp_column_avg_sum) + mlp_row_forward = round(mlp_row_avg_sum) + mlp_column_backward = mlp_column_forward + mlp_row_backward = mlp_row_forward + grad_backward = round(grad_backward) grad_forward = round(grad_forward) other_avgs_int = {k: round(v) for k, v in other_avgs.items() if k != "param_time"} a100_compute_cache = { - "attention_forward": attention_forward, - "attention_backward": attention_backward, - "mlp_forward": mlp_forward, - "mlp_backward": mlp_backward, + "attention_column_forward": attention_column_forward, + "attention_row_forward": attention_row_forward, + "attention_column_backward": attention_column_backward, + "attention_row_backward": attention_row_backward, + "mlp_column_forward": mlp_column_forward, + "mlp_row_forward": mlp_row_forward, + "mlp_column_backward": mlp_column_backward, + "mlp_row_backward": mlp_row_backward, "grad_forward": grad_forward, "grad_backward": grad_backward, } From 2db1dadafb4824022c73e8056efe093aca4c9e35 Mon Sep 17 00:00:00 2001 From: wangxf35 Date: Fri, 27 Dec 2024 06:22:50 +0000 Subject: [PATCH 3/3] Fix Spelling. --- workload_generator/mocked_model/MockedMegatron.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workload_generator/mocked_model/MockedMegatron.py b/workload_generator/mocked_model/MockedMegatron.py index c5f9dc4..af6519e 100755 --- a/workload_generator/mocked_model/MockedMegatron.py +++ b/workload_generator/mocked_model/MockedMegatron.py @@ -109,7 +109,7 @@ def backward(self): (self.seq_len, self.batch_size, self.output_size), self.weight.shape, ), - stage="backward.MegatronRowLinear" + self.name, + stage="backward.MegatronRowLinear." + self.name, ) ) workloads.append( @@ -119,7 +119,7 @@ def backward(self): (self.output_size, self.seq_len * self.batch_size), (self.seq_len * self.batch_size, self.input_size_per_partition), ), - stage="backward.MegatronRowLinear" + self.name, + stage="backward.MegatronRowLinear." + self.name, ) ) return workloads