Skip to content

Commit b03a9fa

Browse files
authored
Merge pull request #283 from posit-dev/fix-pre-isolate
fix: usage of `pre=` should be isolated to steps using it
2 parents 7f19856 + 4f779f5 commit b03a9fa

File tree

3 files changed

+619
-2
lines changed

3 files changed

+619
-2
lines changed

pointblank/_utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,46 @@ def _select_df_lib(preference: str = "polars") -> Any:
240240
return pl if pl is not None else pd
241241

242242

243+
def _copy_dataframe(df):
244+
"""
245+
Create a copy of a DataFrame, handling different DataFrame types.
246+
247+
This function attempts to create a proper copy of the DataFrame using
248+
the most appropriate method for each DataFrame type.
249+
"""
250+
# Try standard copy methods first
251+
if hasattr(df, "copy") and callable(getattr(df, "copy")):
252+
try:
253+
return df.copy()
254+
except Exception:
255+
pass
256+
257+
if hasattr(df, "clone") and callable(getattr(df, "clone")):
258+
try:
259+
return df.clone()
260+
except Exception:
261+
pass
262+
263+
# Try the select('*') approach for DataFrames that support it
264+
# This works well for PySpark and other SQL-like DataFrames
265+
if hasattr(df, "select") and callable(getattr(df, "select")):
266+
try:
267+
return df.select("*")
268+
except Exception:
269+
pass
270+
271+
# For DataFrames that can't be copied, return original
272+
# This provides some protection while avoiding crashes
273+
try:
274+
import copy
275+
276+
return copy.deepcopy(df)
277+
except Exception:
278+
# If all else fails, return the original DataFrame
279+
# This is better than crashing the validation
280+
return df
281+
282+
243283
def _convert_to_narwhals(df: FrameT) -> nw.DataFrame:
244284
# Convert the DataFrame to a format that narwhals can work with
245285
return nw.from_native(df)

pointblank/validate.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
_check_any_df_lib,
7676
_check_invalid_fields,
7777
_column_test_prep,
78+
_copy_dataframe,
7879
_count_null_values_in_column,
7980
_count_true_values_in_column,
8081
_derive_bounds,
@@ -9986,8 +9987,9 @@ def interrogate(
99869987
validation.active = False
99879988
continue
99889989

9989-
# Make a copy of the table for this step
9990-
data_tbl_step = data_tbl
9990+
# Make a deep copy of the table for this step to ensure proper isolation
9991+
# This prevents modifications from one validation step affecting others
9992+
data_tbl_step = _copy_dataframe(data_tbl)
99919993

99929994
# ------------------------------------------------
99939995
# Preprocessing stage

0 commit comments

Comments
 (0)