File tree Expand file tree Collapse file tree 3 files changed +619
-2
lines changed
Expand file tree Collapse file tree 3 files changed +619
-2
lines changed Original file line number Diff line number Diff line change @@ -240,6 +240,46 @@ def _select_df_lib(preference: str = "polars") -> Any:
240240 return pl if pl is not None else pd
241241
242242
243+ def _copy_dataframe (df ):
244+ """
245+ Create a copy of a DataFrame, handling different DataFrame types.
246+
247+ This function attempts to create a proper copy of the DataFrame using
248+ the most appropriate method for each DataFrame type.
249+ """
250+ # Try standard copy methods first
251+ if hasattr (df , "copy" ) and callable (getattr (df , "copy" )):
252+ try :
253+ return df .copy ()
254+ except Exception :
255+ pass
256+
257+ if hasattr (df , "clone" ) and callable (getattr (df , "clone" )):
258+ try :
259+ return df .clone ()
260+ except Exception :
261+ pass
262+
263+ # Try the select('*') approach for DataFrames that support it
264+ # This works well for PySpark and other SQL-like DataFrames
265+ if hasattr (df , "select" ) and callable (getattr (df , "select" )):
266+ try :
267+ return df .select ("*" )
268+ except Exception :
269+ pass
270+
271+ # For DataFrames that can't be copied, return original
272+ # This provides some protection while avoiding crashes
273+ try :
274+ import copy
275+
276+ return copy .deepcopy (df )
277+ except Exception :
278+ # If all else fails, return the original DataFrame
279+ # This is better than crashing the validation
280+ return df
281+
282+
243283def _convert_to_narwhals (df : FrameT ) -> nw .DataFrame :
244284 # Convert the DataFrame to a format that narwhals can work with
245285 return nw .from_native (df )
Original file line number Diff line number Diff line change 7575 _check_any_df_lib,
7676 _check_invalid_fields,
7777 _column_test_prep,
78+ _copy_dataframe,
7879 _count_null_values_in_column,
7980 _count_true_values_in_column,
8081 _derive_bounds,
@@ -9986,8 +9987,9 @@ def interrogate(
99869987 validation.active = False
99879988 continue
99889989
9989- # Make a copy of the table for this step
9990- data_tbl_step = data_tbl
9990+ # Make a deep copy of the table for this step to ensure proper isolation
9991+ # This prevents modifications from one validation step affecting others
9992+ data_tbl_step = _copy_dataframe(data_tbl)
99919993
99929994 # ------------------------------------------------
99939995 # Preprocessing stage
You can’t perform that action at this time.
0 commit comments