Skip to content

Commit 0d23fdc

Browse files
change qual tool to convert time unit during aggregation
Signed-off-by: cindyyuanjiang <[email protected]>
1 parent f2a6d62 commit 0d23fdc

18 files changed

+32
-29
lines changed

core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationAppInfo.scala

+9-4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
package org.apache.spark.sql.rapids.tool.qualification
1818

19+
import java.util.concurrent.TimeUnit.NANOSECONDS
20+
1921
import scala.collection.mutable.{ArrayBuffer, HashMap}
2022
import scala.collection.mutable
2123

@@ -168,8 +170,9 @@ class QualificationAppInfo(
168170
}
169171

170172
private def calculateCpuTimePercent(perSqlStageSummary: Seq[SQLStageSummary]): Double = {
171-
val totalCpuTime = perSqlStageSummary.map(_.execCPUTime).sum
172-
val totalRunTime = perSqlStageSummary.map(_.execRunTime).sum
173+
val totalCpuTime =
174+
NANOSECONDS.toMillis(perSqlStageSummary.map(_.execCPUTime).sum) // in milliseconds
175+
val totalRunTime = perSqlStageSummary.map(_.execRunTime).sum // in milliseconds
173176
ToolUtils.calculateDurationPercent(totalCpuTime, totalRunTime)
174177
}
175178

@@ -456,8 +459,10 @@ class QualificationAppInfo(
456459
val ratio = numSupportedExecs / numExecs
457460
val estimateWallclockSupported = (sqlWallClockDuration * ratio).toInt
458461
// don't worry about supported execs for these are these are mostly indicator of I/O
459-
val execRunTime = sqlIDToTaskEndSum.get(sqlID).map(_.executorRunTime).getOrElse(0L)
460-
val execCPUTime = sqlIDToTaskEndSum.get(sqlID).map(_.executorCPUTime).getOrElse(0L)
462+
val execRunTime =
463+
sqlIDToTaskEndSum.get(sqlID).map(_.executorRunTime).getOrElse(0L) // in milliseconds
464+
val execCPUTime =
465+
sqlIDToTaskEndSum.get(sqlID).map(_.executorCPUTime).getOrElse(0L) // in nanoseconds
461466
SQLStageSummary(stageSum, sqlID, estimateWallclockSupported,
462467
execCPUTime, execRunTime)
463468
}

core/src/main/scala/org/apache/spark/sql/rapids/tool/qualification/QualificationEventProcessor.scala

+4-6
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616

1717
package org.apache.spark.sql.rapids.tool.qualification
1818

19-
import java.util.concurrent.TimeUnit.NANOSECONDS
20-
2119
import scala.collection.JavaConverters._
2220
import scala.collection.mutable.ArrayBuffer
2321

@@ -42,8 +40,8 @@ class QualificationEventProcessor(app: QualificationAppInfo, perSqlOnly: Boolean
4240
val taskSum = app.stageIdToTaskEndSum.getOrElseUpdate(event.stageId, {
4341
new StageTaskQualificationSummary(event.stageId, event.stageAttemptId, 0, 0, 0, 0)
4442
})
45-
taskSum.executorRunTime += event.taskMetrics.executorRunTime
46-
taskSum.executorCPUTime += NANOSECONDS.toMillis(event.taskMetrics.executorCpuTime)
43+
taskSum.executorRunTime += event.taskMetrics.executorRunTime // in milliseconds
44+
taskSum.executorCPUTime += event.taskMetrics.executorCpuTime // in nanoseconds
4745
taskSum.totalTaskDuration += event.taskInfo.duration
4846
// Add the total bytes read from the task if it's available. This is from inputMetrics if
4947
// it is reading from datasource, or shuffleReadMetrics if it is reading from shuffle.
@@ -61,8 +59,8 @@ class QualificationEventProcessor(app: QualificationAppInfo, perSqlOnly: Boolean
6159
val taskSum = app.sqlIDToTaskEndSum.getOrElseUpdate(sqlID, {
6260
new StageTaskQualificationSummary(event.stageId, event.stageAttemptId, 0, 0, 0, 0)
6361
})
64-
taskSum.executorRunTime += event.taskMetrics.executorRunTime
65-
taskSum.executorCPUTime += NANOSECONDS.toMillis(event.taskMetrics.executorCpuTime)
62+
taskSum.executorRunTime += event.taskMetrics.executorRunTime // in milliseconds
63+
taskSum.executorCPUTime += event.taskMetrics.executorCpuTime // in nanoseconds
6664
taskSum.totalTaskDuration += event.taskInfo.duration
6765
}
6866
}
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly), Total Core Seconds
2-
"Spark shell","local-1626104300434",1500,1469,131104,996,88.35,"","","","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,string>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>;array<string>","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>","NESTED COMPLEX TYPE",1260,1388,129598,493,976,false,"CollectLimit","",30,1564
2+
"Spark shell","local-1626104300434",1500,1469,131104,996,89.7,"","","","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,string>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>;array<string>","struct<firstname:string,middlename:array<string>,lastname:string>;struct<current:struct<state:string,city:string>,previous:struct<state:map<string,string>,city:string>>;array<struct<city:string,state:string>>;map<string,array<string>>;map<string,map<string,string>>;array<array<string>>","NESTED COMPLEX TYPE",1260,1388,129598,493,976,false,"CollectLimit","",30,1564
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1623876083964",119353,1417661,133857,92667,91.14,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",30,1599
2+
"Spark shell","local-1623876083964",119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",30,1599
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1623876083964",119353,1417661,133857,92667,91.14,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",30,1599
2+
"Spark shell","local-1623876083964",119353,1417661,133857,92667,91.25,"","","","","","",119903,143821,14504,316964,1100697,false,"Scan unknown;SerializeFromObject","",30,1599
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","app-20211019113801-0001",2942,19894,571967,2814,28.41,"","JDBC[*]","","","","",1812,2883,569025,859,19035,false,"CollectLimit;Scan jdbc;Execute CreateViewCommand","",30,9110
2+
"Spark shell","app-20211019113801-0001",2942,19894,571967,2814,29.76,"","JDBC[*]","","","","",1812,2883,569025,859,19035,false,"CollectLimit;Scan jdbc;Execute CreateViewCommand","",30,9110
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1629446106683",1910,6475,17698,1910,27.76,"","","","array<struct<city:string,state:string>>;map<string,map<string,string>>","array<struct<city:string,state:string>>;map<string,map<string,string>>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",221851,132
2+
"Spark shell","local-1629446106683",1910,6475,17698,1910,27.8,"","","","array<struct<city:string,state:string>>;map<string,map<string,string>>","array<struct<city:string,state:string>>;map<string,map<string,string>>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",221851,132
33
"Spark shell","local-1623263471760",0,0,22937,0,0.0,"","","","","","",0,0,22937,0,0,false,"","",221851,266
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"TPC-DS Like Bench q86","app-20210319163812-1778",9910,4320658,26171,9910,35.34,"","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",30,24270
2+
"TPC-DS Like Bench q86","app-20210319163812-1778",9910,4320658,26171,9910,35.39,"","","","","","",9565,9265,3596053,0,4320658,false,"Execute CreateViewCommand","",30,24270
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1630045673160",1363,3757,21200,1363,34.56,"","","","array<struct<city:string,state:string>>;map<string,map<st","array<struct<city:string,state:string>>;map<string,map<st","NESTED COMPLEX TYPE",1294,716,20239,0,3757,false,"","",30,160
2+
"Spark shell","local-1630045673160",1363,3757,21200,1363,34.65,"","","","array<struct<city:string,state:string>>;map<string,map<st","array<struct<city:string,state:string>>;map<string,map<st","NESTED COMPLEX TYPE",1294,716,20239,0,3757,false,"","",30,160
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1629446106683",1910,6475,17698,1910,27.76,"","","","array<struct<city:string,state:string>>;map<string,map<string,string>>","array<struct<city:string,state:string>>;map<string,map<string,string>>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",30,132
2+
"Spark shell","local-1629446106683",1910,6475,17698,1910,27.8,"","","","array<struct<city:string,state:string>>;map<string,map<string,string>>","array<struct<city:string,state:string>>;map<string,map<string,string>>","NESTED COMPLEX TYPE",1453,1203,16292,0,6475,false,"","",30,132
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Rapids Spark Profiling Tool Unit Tests","local-1622561780883",0,40448,7673,0,55.94,"","","","","","",0,5000,7673,8096,32352,false,"Scan unknown;SerializeFromObject","",30,82
2+
"Rapids Spark Profiling Tool Unit Tests","local-1622561780883",0,40448,7673,0,56.24,"","","","","","",0,5000,7673,8096,32352,false,"Scan unknown;SerializeFromObject","",30,82
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018",11600,132257,16319,9868,37.7,"","","JSON","","","",7143,13770,4719,19744,112513,false,"SerializeFromObject;Scan unknown;Execute InsertIntoHadoopFsRelationCommand json;DeserializeToObject;Filter;MapElements","",1,186
2+
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018",11600,132257,16319,9868,37.97,"","","JSON","","","",7143,13770,4719,19744,112513,false,"SerializeFromObject;Scan unknown;Execute InsertIntoHadoopFsRelationCommand json;DeserializeToObject;Filter;MapElements","",1,186
33
"Spark shell","local-1651187225439",224,180,355637,142,87.88,"","","","","","",498,228,355101,66,114,false,"SerializeFromObject;CollectLimit;DeserializeToObject;Filter;MapElements","",1,2834
44
"Spark shell","local-1651188809790",347,283,166215,128,81.18,"","","","","","UDF",715,318,165572,178,105,false,"CollectLimit;Project","UDF",1,1318
5-
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390",1156,4666,6240,122,46.27,"","","JSON","","","UDF",1209,1130,5809,4170,496,false,"Execute InsertIntoHadoopFsRelationCommand json;LocalTableScan;Execute CreateViewCommand;Project","UDF",1,64
5+
"Rapids Spark Profiling Tool Unit Tests","local-1623281204390",1156,4666,6240,122,47.48,"","","JSON","","","UDF",1209,1130,5809,4170,496,false,"Execute InsertIntoHadoopFsRelationCommand json;LocalTableScan;Execute CreateViewCommand;Project","UDF",1,64
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1624371544219",4575,20421,175293,4365,72.15,"","Text[*]","JSON","","","",1859,5372,176916,938,19483,false,"CollectLimit;Scan text;Execute InsertIntoHadoopFsRelationCommand json","",30,2096
2+
"Spark shell","local-1624371544219",4575,20421,175293,4365,72.2,"","Text[*]","JSON","","","",1859,5372,176916,938,19483,false,"CollectLimit;Scan text;Execute InsertIntoHadoopFsRelationCommand json","",30,2096
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1624371906627",4917,21802,83738,4762,71.3,"","Text[*]","JSON","","","",1984,5438,83336,689,21113,false,"CollectLimit;Scan text;Execute InsertIntoHadoopFsRelationCommand json","",30,997
2+
"Spark shell","local-1624371906627",4917,21802,83738,4762,71.34,"","Text[*]","JSON","","","",1984,5438,83336,689,21113,false,"CollectLimit;Scan text;Execute InsertIntoHadoopFsRelationCommand json","",30,997
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1634253215009",523,359,47063,281,67.64,"","Text[*]","","","","",1068,385,46540,166,193,false,"CollectLimit;Scan text","",30,369
2+
"Spark shell","local-1634253215009",523,359,47063,281,68.73,"","Text[*]","","","","",1068,385,46540,166,193,false,"CollectLimit;Scan text","",30,369
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018",395,14353,4872,164,62.67,"","","JSON","","","",1306,794,4477,8376,5977,true,"SerializeFromObject;Scan unknown;Execute InsertIntoHadoopFsRelationCommand json;DeserializeToObject;Filter;MapElements","",30,49
2+
"Rapids Spark Profiling Tool Unit Tests","local-1622043423018",395,14353,4872,164,62.79,"","","JSON","","","",1306,794,4477,8376,5977,true,"SerializeFromObject;Scan unknown;Execute InsertIntoHadoopFsRelationCommand json;DeserializeToObject;Filter;MapElements","",30,49
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
App Name,App ID,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,SQL Stage Durations Sum,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,App Duration Estimated,Unsupported Execs,Unsupported Expressions,Estimated Job Frequency (monthly),Total Core Seconds
2-
"Spark shell","local-1629442299891",1151,920,19554,788,91.72,"","","CSV;JSON","","","",1235,1049,18251,290,630,false,"Execute InsertIntoHadoopFsRelationCommand csv;Execute InsertIntoHadoopFsRelationCommand json","",30,147
2+
"Spark shell","local-1629442299891",1151,920,19554,788,91.98,"","","CSV;JSON","","","",1235,1049,18251,290,630,false,"Execute InsertIntoHadoopFsRelationCommand csv;Execute InsertIntoHadoopFsRelationCommand json","",30,147

0 commit comments

Comments
 (0)