Skip to content

Commit

Permalink
Fix future warnings for pandas>=2.2 (#1494)
Browse files Browse the repository at this point in the history
This PR fixes #1490.

- Resolves FutureWarning messages when running qualification with
pandas>=2.2 by using new form of `df.method({col: value}, inplace=True)`
for `df.fillna()` calls.
- Resolves SettingWithCopyWarning by making an explicit copy of input
dataframe before modification.

---------

Signed-off-by: Lee Yang <[email protected]>
  • Loading branch information
leewyang authored Jan 10, 2025
1 parent a215df7 commit bb048d0
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
6 changes: 3 additions & 3 deletions user_tools/src/spark_rapids_pytools/rapids/qualification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -555,9 +555,9 @@ def __update_apps_with_prediction_info(self,
# Rename the source column to the destination column
result_df.rename(columns={src_col: dst_col}, errors='ignore', inplace=True)
# if the qualx does not have a speedup value, default to 1.0
result_df['Estimated GPU Speedup'].fillna(1.0, inplace=True)
result_df.fillna({'Estimated GPU Speedup': 1.0}, inplace=True)
# if the qualx does not have a duration value, default to App Duration
result_df['Estimated GPU Duration'].fillna(result_df['App Duration'], inplace=True)
result_df.fillna({'Estimated GPU Duration': result_df['App Duration']}, inplace=True)
# We need to be careful about other columns that depend on remapped columns
result_df['Estimated GPU Time Saved'] = result_df['App Duration'] - result_df['Estimated GPU Duration']
return result_df
Expand Down
4 changes: 2 additions & 2 deletions user_tools/src/spark_rapids_tools/tools/qualx/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -810,7 +810,7 @@ def scan_tbl(

if not app_info.empty:
app_info['appName'] = app_name
app_info['sparkVersion'].fillna('Unknown', inplace=True)
app_info.fillna({'sparkVersion': 'Unknown'}, inplace=True)

# Get jar versions:
cudf_version = '-'
Expand Down
11 changes: 6 additions & 5 deletions user_tools/src/spark_rapids_tools/tools/top_candidates.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -101,13 +101,14 @@ def _generate_output_table_internal(self, output_df: pd.DataFrame) -> pd.DataFra
"""
# Create and append 'Speedup Category Order' column to output_df for sorting order
speedup_category_order = self.props.get('ineligibleCategory') + self.props.get('eligibleCategories')
output_df['Speedup Category Order'] = \
output_df['Estimated GPU Speedup Category'].map({name: i for i, name in enumerate(speedup_category_order)})
df = output_df.copy()
df['Speedup Category Order'] = \
df['Estimated GPU Speedup Category'].map({name: i for i, name in enumerate(speedup_category_order)})
# Sort columns and select output columns
output_columns = self.props.get('outputColumns')
sorting_columns = self.props.get('sortingColumns')
valid_output_columns = list(output_df.columns.intersection(output_columns))
res_df = output_df.sort_values(by=sorting_columns, ascending=False)[valid_output_columns]
valid_output_columns = list(df.columns.intersection(output_columns))
res_df = df.sort_values(by=sorting_columns, ascending=False)[valid_output_columns]
# this is a bit weird since hardcoding, but we don't want this to have ** for csv output
if 'Estimated GPU Speedup Category' in res_df:
res_df.rename(columns={'Estimated GPU Speedup Category': 'Estimated GPU Speedup Category**'},
Expand Down

0 comments on commit bb048d0

Please sign in to comment.