Skip to content

Commit 1ec7e7a

Browse files
authored
Finalize the toolsAPI helper methods and classes (#1893)
Signed-off-by: Ahmed Hussein (amahussein) <[email protected]> Fixes #1887 Simplifies the interface in order to use the Tools-API more efficiently. This pull request refactors the API v1 report handler system in the RAPIDS PyTools codebase to simplify and modernize the way core report handlers are created and used. The main change is the removal of the legacy `APIHelpers` builder pattern in favor of direct class-based instantiation for report handlers (e.g., `QualCore`, `ProfCore`). This results in cleaner, more readable code and a more consistent interface for handling reports. Additionally, the `CombinedDFBuilder` is replaced with a new `CombinedCSVBuilder` class and metaclass for combining CSV reports. --------- Signed-off-by: Ahmed Hussein (amahussein) <[email protected]>
1 parent 0ab998d commit 1ec7e7a

File tree

16 files changed

+463
-406
lines changed

16 files changed

+463
-406
lines changed

user_tools/src/spark_rapids_pytools/rapids/profiling.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from spark_rapids_pytools.common.sys_storage import FSUtil
2525
from spark_rapids_pytools.common.utilities import Utils
2626
from spark_rapids_pytools.rapids.profiling_core import ProfilingCore
27-
from spark_rapids_tools.api_v1.builder import TXTReport
2827
from spark_rapids_tools.utils.data_utils import TXTResult
2928

3029

@@ -130,7 +129,7 @@ def __generate_report_with_recommendations(self):
130129
sec_comments_head = ['\tComments:']
131130
log_lines.append('### Recommended configurations ###')
132131
headers = self.ctxt.get_value('local', 'output', 'summaryColumns')
133-
with TXTReport(self.core_handler).table('appRawSummaryLog') as app_rawlogs_summaries:
132+
with self.core_handler.txt('appRawSummaryLog') as app_rawlogs_summaries:
134133
# the result of the load operation is a dictionary [appid, data]
135134
for app_id, log_content in app_rawlogs_summaries.items():
136135
app_name = ''

user_tools/src/spark_rapids_pytools/rapids/profiling_core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
from typing import List
1919

2020
from spark_rapids_pytools.rapids.rapids_tool import RapidsJarTool
21-
from spark_rapids_tools.api_v1 import ProfCoreResultHandler
21+
from spark_rapids_tools.api_v1 import ProfCore
2222

2323

2424
@dataclass
25-
class ProfilingCore(RapidsJarTool[ProfCoreResultHandler]):
25+
class ProfilingCore(RapidsJarTool[ProfCore]):
2626
"""
2727
Core profiling tool
2828
"""

user_tools/src/spark_rapids_pytools/rapids/qualification.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
from spark_rapids_pytools.common.sys_storage import FSUtil
3030
from spark_rapids_pytools.common.utilities import Utils, TemplateGenerator
3131
from spark_rapids_pytools.rapids.qualification_core import QualificationCore
32-
from spark_rapids_tools.api_v1 import APIHelpers, CSVReport
3332
from spark_rapids_tools.enums import QualFilterApp, QualEstimationModel, SubmissionMode
3433
from spark_rapids_tools.tools.additional_heuristics import AdditionalHeuristics
3534
from spark_rapids_tools.tools.cluster_config_recommender import ClusterConfigRecommender
@@ -418,7 +417,7 @@ def create_stdout_table_pprinter(total_apps: pd.DataFrame,
418417
return TopCandidates(props=view_dic, total_apps=total_apps, tools_processed_apps=tools_processed_apps)
419418

420419
# 1. Read summary report using QualCoreHandler
421-
with CSVReport(self.core_handler, _tbl='qualCoreCSVSummary') as q_sum_res:
420+
with self.core_handler.csv('qualCoreCSVSummary') as q_sum_res:
422421
df = q_sum_res.data
423422
# 1. Operations related to XGboost modelling
424423
if not df.empty and self.ctxt.get_ctxt('estimationModelArgs')['xgboostEnabled']:
@@ -434,12 +433,9 @@ def create_stdout_table_pprinter(total_apps: pd.DataFrame,
434433
) from e
435434
# 2. Operations related to cluster information
436435
try:
437-
with APIHelpers.CombinedDFBuilder(
438-
table='clusterInfoJSONReport',
439-
handlers=self.core_handler,
440-
raise_on_empty=False,
441-
raise_on_failure=False
442-
) as c_builder:
436+
with self.core_handler.csv_combiner(
437+
'clusterInfoJSONReport'
438+
).suppress_failure() as c_builder:
443439
# convert the json columns to csv columns
444440
c_builder.apply_on_report(lambda x: x.map_cols(Qualification.__map_cluster_info_table()))
445441
# use "App ID" included in the json report
@@ -455,17 +451,14 @@ def create_stdout_table_pprinter(total_apps: pd.DataFrame,
455451
'Reason - %s:%s', type(e).__name__, e)
456452

457453
# 3. Operations related to reading qualification output (unsupported operators and apps status)
458-
with APIHelpers.CombinedDFBuilder(
459-
table='unsupportedOpsCSVReport',
460-
handlers=self.core_handler,
461-
raise_on_empty=False,
462-
raise_on_failure=False
463-
) as c_builder:
454+
with self.core_handler.csv_combiner(
455+
'unsupportedOpsCSVReport'
456+
).suppress_failure() as c_builder:
464457
# use "App ID" column name on the injected apps
465458
c_builder.combiner.on_app_fields({'app_id': 'App ID'})
466459
unsupported_ops_df = c_builder.build()
467460

468-
with CSVReport(self.core_handler, _tbl='coreCSVStatus') as status_res:
461+
with self.core_handler.csv('coreCSVStatus') as status_res:
469462
apps_status_df = status_res.data
470463

471464
# 4. Operations related to output
@@ -684,12 +677,9 @@ def __assign_spark_runtime_to_apps(self,
684677
Assigns the Spark Runtime (Spark/Photon) to each application. This will be used to categorize
685678
applications into speedup categories (Small/Medium/Large).
686679
"""
687-
with APIHelpers.CombinedDFBuilder(
688-
table='coreRawApplicationInformationCSV',
689-
handlers=self.core_handler,
690-
raise_on_empty=False,
691-
raise_on_failure=False
692-
) as c_builder:
680+
with self.core_handler.csv_combiner(
681+
'coreRawApplicationInformationCSV'
682+
).suppress_failure() as c_builder:
693683
# customize the report loading to only select required columns and rename them
694684
c_builder.apply_on_report(
695685
lambda r: r.pd_args(

user_tools/src/spark_rapids_pytools/rapids/qualification_core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
from typing import List
1919

2020
from spark_rapids_pytools.rapids.rapids_tool import RapidsJarTool
21-
from spark_rapids_tools.api_v1 import QualCoreResultHandler
21+
from spark_rapids_tools.api_v1 import QualCore
2222

2323

2424
@dataclass
25-
class QualificationCore(RapidsJarTool[QualCoreResultHandler]):
25+
class QualificationCore(RapidsJarTool[QualCore]):
2626
"""
2727
Base class for qualification tools that provides core qualification functionality.
2828
"""

user_tools/src/spark_rapids_pytools/rapids/qualification_stats.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from spark_rapids_pytools.common.utilities import Utils
2323
from spark_rapids_pytools.rapids.rapids_tool import RapidsTool
2424
from spark_rapids_pytools.rapids.tool_ctxt import ToolContext
25-
from spark_rapids_tools.api_v1 import APIHelpers
25+
from spark_rapids_tools.api_v1.builder import QualCore
2626
from spark_rapids_tools.tools.qualification_stats_report import SparkQualificationStats
2727

2828

@@ -74,8 +74,7 @@ def _process_output_args(self) -> None:
7474
self.ctxt.set_local('outputFolder', self.output_folder)
7575
self.logger.info('Local output folder is set as: %s', self.output_folder)
7676
# Add QualCoreHandler to the context
77-
self.ctxt.set_ctxt('coreHandler',
78-
APIHelpers.QualCore.build_handler(dir_path=self.qual_output))
77+
self.ctxt.set_ctxt('coreHandler', QualCore(self.qual_output))
7978

8079
def _run_rapids_tool(self) -> None:
8180
"""

user_tools/src/spark_rapids_pytools/rapids/qualx/prediction.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from spark_rapids_pytools.common.sys_storage import FSUtil
2020
from spark_rapids_pytools.rapids.qualx.qualx_tool import QualXTool
21-
from spark_rapids_tools.api_v1 import QualCoreResultHandler, APIHelpers
21+
from spark_rapids_tools.api_v1 import QualCore
2222
from spark_rapids_tools.tools.qualx.qualx_main import predict
2323
from spark_rapids_tools.tools.qualx.util import print_summary, print_speedup_summary
2424

@@ -37,8 +37,8 @@ class Prediction(QualXTool):
3737
name = 'prediction'
3838

3939
@property
40-
def qual_handler(self) -> QualCoreResultHandler:
41-
return APIHelpers.QualCore.build_handler(dir_path=self.qual_output)
40+
def qual_handler(self) -> QualCore:
41+
return QualCore(self.qual_output)
4242

4343
def __prepare_prediction_output_info(self) -> dict:
4444
"""

user_tools/src/spark_rapids_pytools/rapids/rapids_tool.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
from spark_rapids_pytools.rapids.rapids_job import RapidsJobPropContainer
3838
from spark_rapids_pytools.rapids.tool_ctxt import ToolContext
3939
from spark_rapids_tools import CspEnv
40-
from spark_rapids_tools.api_v1 import ToolResultHandlerT, APIHelpers
40+
from spark_rapids_tools.api_v1 import ToolResultHandlerT
41+
from spark_rapids_tools.api_v1 import APIResHandler, QualCore, ProfCore
4142
from spark_rapids_tools.configuration.common import RuntimeDependency
4243
from spark_rapids_tools.configuration.submission.distributed_config import DistributedToolsConfig
4344
from spark_rapids_tools.configuration.tools_config import ToolsConfig
@@ -448,7 +449,7 @@ class RapidsJarTool(RapidsTool, Generic[ToolResultHandlerT]):
448449
"""
449450

450451
@cached_property
451-
def core_handler(self) -> ToolResultHandlerT:
452+
def core_handler(self) -> APIResHandler[ToolResultHandlerT]:
452453
"""
453454
Create and return a coreHandler instance for reading core reports.
454455
This property should always be called after the scala code has executed.
@@ -459,9 +460,9 @@ def core_handler(self) -> ToolResultHandlerT:
459460
"""
460461
normalized_tool_name = self.name.lower()
461462
if 'qualification' in normalized_tool_name:
462-
return APIHelpers.QualCore.build_handler(dir_path=self.csp_output_path)
463+
return QualCore(self.csp_output_path)
463464
if 'profiling' in normalized_tool_name:
464-
return APIHelpers.ProfCore.build_handler(dir_path=self.csp_output_path)
465+
return ProfCore(self.csp_output_path)
465466
raise ValueError(f'Tool name [{normalized_tool_name}] has no CoreHandler associated with it.')
466467

467468
def _process_jar_arg(self):

user_tools/src/spark_rapids_tools/api_v1/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,12 @@
4242
from .builder import (
4343
CSVReportCombiner,
4444
CSVReport,
45-
APIHelpers
45+
QualCore,
46+
ProfCore,
47+
QualWrapper,
48+
ProfWrapper,
49+
APIResHandler,
50+
CombinedCSVBuilder
4651
)
4752

4853
__all__ = [
@@ -60,5 +65,10 @@
6065
'ProfCoreResultHandler',
6166
'CSVReportCombiner',
6267
'CSVReport',
63-
'APIHelpers'
68+
'QualCore',
69+
'ProfCore',
70+
'QualWrapper',
71+
'ProfWrapper',
72+
'APIResHandler',
73+
'CombinedCSVBuilder'
6474
]

0 commit comments

Comments
 (0)