Skip to content

Commit 29d3d0a

Browse files
committed
Fix compatibility issues with pandas 2.0
There is a new pandas release (2.0) and then the library's test cases start failing with errors like "TypeError: DataFrame.drop() takes from 1 to 2 positional arguments but 3 were given". Per investigation the rootcause is that the pandas 2.0 deprecated non-keyword arguments of several methods including drop(), and it also deprecated Series.append. The commit fixes the problems by 1. using keyword arguments for the drop method 2. using pandas.concat() to replace the Series.append() method Internal tracking ticket: https://tiny.amazon.com/kzvlpcax
1 parent 71fd1bd commit 29d3d0a

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/smclarify/bias/report.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def _positive_predicted_index(
207207
raise ValueError("Predicted Label Column series datatype is not the same as Label Column series")
208208
if predicted_label_datatype == common.DataType.CONTINUOUS:
209209
predicted_label_data = predicted_label_data.astype(label_data.dtype)
210-
data_interval_indices = _interval_index(label_data.append(predicted_label_data), positive_label_values)
210+
data_interval_indices = _interval_index(pd.concat([label_data, predicted_label_data]), positive_label_values)
211211
positive_predicted_index = _continuous_data_idx(predicted_label_data, data_interval_indices)
212212
elif predicted_label_datatype == common.DataType.CATEGORICAL and positive_label_values:
213213
positive_predicted_index = _categorical_data_idx(predicted_label_data, positive_label_values)
@@ -436,7 +436,7 @@ def model_performance_report(df: pd.DataFrame, label_column: LabelColumn, predic
436436
data=label_data_series, data_type=label_data_type, positive_values=positive_label_values
437437
)
438438
if label_column.name in df.columns:
439-
df = df.drop(label_column.name, 1)
439+
df = df.drop(labels=label_column.name, axis=1)
440440

441441
predicted_label_data_type, predicted_label_data_series = common.ensure_series_data_type(
442442
predicted_label_column.series, positive_label_values
@@ -508,7 +508,7 @@ def bias_report(
508508
metrics_to_run.extend(post_training_metrics)
509509
predicted_label_series = predicted_label_column.series
510510
if predicted_label_column.name in df.columns:
511-
df = df.drop(predicted_label_column.name, 1)
511+
df = df.drop(labels=predicted_label_column.name, axis=1)
512512
else:
513513
pre_training_metrics = (
514514
smclarify.bias.metrics.PRETRAINING_METRICS
@@ -588,15 +588,15 @@ def _report(
588588

589589
sensitive_facet_values = facet_column.sensitive_values
590590
facet_data_type, facet_data_series = common.ensure_series_data_type(df[facet_column.name], sensitive_facet_values)
591-
df = df.drop(facet_column.name, 1)
591+
df = df.drop(labels=facet_column.name, axis=1)
592592

593593
positive_label_values = label_column.positive_label_values
594594
label_data_type, label_data_series = common.ensure_series_data_type(label_column.series, positive_label_values)
595595
positive_label_index, _ = _positive_label_index(
596596
data=label_data_series, data_type=label_data_type, positive_values=positive_label_values
597597
)
598598
if label_column.name in df.columns:
599-
df = df.drop(label_column.name, 1)
599+
df = df.drop(labels=label_column.name, axis=1)
600600

601601
positive_predicted_label_index = [None]
602602
if predicted_label_column:
@@ -612,7 +612,7 @@ def _report(
612612
positive_label_values=positive_label_values,
613613
)
614614
if predicted_label_column.name in df.columns:
615-
df = df.drop(predicted_label_column.name, 1)
615+
df = df.drop(labels=predicted_label_column.name, axis=1)
616616

617617
# Above are validations and preprocessing, the real reporting logic is moved to a new method for clarity and
618618
# to avoid using wrong data by chance (e.g., label_data_series should be used, instead of label_column.data).

0 commit comments

Comments
 (0)