Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,7 @@ Other Removals
- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_)
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
- Enforced deprecation allowing non-``bool`` and NA values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` (:issue:`59615`)
- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`)
Expand Down
15 changes: 2 additions & 13 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
TYPE_CHECKING,
Self,
)
import warnings

import numpy as np

Expand All @@ -19,7 +18,7 @@
PYARROW_MIN_VERSION,
pa_version_under16p0,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_na_arg

from pandas.core.dtypes.common import (
is_scalar,
Expand Down Expand Up @@ -242,17 +241,7 @@ def insert(self, loc: int, item) -> ArrowStringArray:
return super().insert(loc, item)

def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
f"Allowing a non-bool 'na' in obj.str.{method_name} is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
na = bool(na)

validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
if self.dtype.na_value is np.nan:
if na is lib.no_default or isna(na):
# NaN propagates as False
Expand Down
33 changes: 4 additions & 29 deletions pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
cast,
)
import unicodedata
import warnings

import numpy as np

from pandas._libs import lib
import pandas._libs.missing as libmissing
import pandas._libs.ops as libops
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_na_arg

from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -145,6 +144,7 @@ def _str_contains(
na=lib.no_default,
regex: bool = True,
):
validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
if regex:
if not case:
flags |= re.IGNORECASE
Expand All @@ -158,41 +158,16 @@ def _str_contains(
else:
upper_pat = pat.upper()
f = lambda x: upper_pat in x.upper()
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
return self._str_map(f, na, dtype=np.dtype("bool"))

def _str_startswith(self, pat, na=lib.no_default):
validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
f = lambda x: x.startswith(pat)
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.startswith is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
return self._str_map(f, na_value=na, dtype=np.dtype(bool))

def _str_endswith(self, pat, na=lib.no_default):
validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
f = lambda x: x.endswith(pat)
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.endswith is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
return self._str_map(f, na_value=na, dtype=np.dtype(bool))

def _str_replace(
Expand Down
41 changes: 13 additions & 28 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,14 @@ def test_contains_na_kwarg_for_nullable_string_dtype(

values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype)

msg = (
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
"will raise in a future version"
)
warn = None
if not pd.isna(na) and not isinstance(na, bool):
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
if na in [0, 3] and na is not False:
msg = f"na must be a valid NA value; got {na}"
with pytest.raises(ValueError, match=msg):
values.str.contains("a", na=na, regex=regex)
else:
result = values.str.contains("a", na=na, regex=regex)
expected = Series([True, False, False, True, expected], dtype="boolean")
tm.assert_series_equal(result, expected)
expected = Series([True, False, False, True, expected], dtype="boolean")
tm.assert_series_equal(result, expected)


def test_contains_moar(any_string_dtype):
Expand Down Expand Up @@ -255,19 +252,9 @@ def test_contains_nan(any_string_dtype):
expected = Series([True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)

msg = (
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
"will raise in a future version"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.str.contains("foo", na="foo")
if any_string_dtype == "object":
expected = Series(["foo", "foo", "foo"], dtype=np.object_)
elif any_string_dtype.na_value is np.nan:
expected = Series([True, True, True], dtype=np.bool_)
else:
expected = Series([True, True, True], dtype="boolean")
tm.assert_series_equal(result, expected)
msg = "na must be a valid NA value; got foo"
with pytest.raises(ValueError, match=msg):
s.str.contains("foo", na="foo")

result = s.str.contains("foo")
if any_string_dtype == "str":
Expand Down Expand Up @@ -352,12 +339,10 @@ def test_startswith_endswith_validate_na(any_string_dtype):
["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"],
dtype=any_string_dtype,
)

msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
msg = "na must be a valid NA value; got baz"
with pytest.raises(ValueError, match=msg):
ser.str.startswith("kapow", na="baz")
msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pytest.raises(ValueError, match=msg):
ser.str.endswith("bar", na="baz")


Expand Down
32 changes: 32 additions & 0 deletions pandas/util/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
is_bool,
is_integer,
)
from pandas.core.dtypes.missing import isna

BoolishT = TypeVar("BoolishT", bool, int)
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
Expand Down Expand Up @@ -269,6 +270,37 @@ def validate_bool_kwarg(
return value


def validate_na_arg(
value, name: str, allow_no_default: bool = False, allow_bool: bool = False
):
"""
Validate na arguments.

Parameters
----------
value : object
Value to validate.
name : str
Name of the argument, used to raise an informative error message.
allow_no_default : bool, default False
Whether to allow ``value`` to be ``lib.no_default``.
allow_bool : bool, default False
Whether to allow ``value`` to be an instance of bool.

Raises
______
ValueError
When ``value`` is determined to be invalid.
"""
if allow_no_default and value is lib.no_default:
return
if allow_bool and isinstance(value, bool):
return
if isna(value):
return
raise ValueError(f"{name} must be a valid NA value; got {value}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
raise ValueError(f"{name} must be a valid NA value; got {value}")
raise ValueError(f"{name} must be a bool (True/False) or a valid NA value; got {value}")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, I don't know if there is a case for which you are foreseeing allow_no_default and allow_bool to be used, but if they would actually be used, then the error message would also have to be updated to follow that.

(so for now could also simplify things leaving out the keywords?)

Copy link
Member Author

@rhshadrach rhshadrach Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would you feel about ...must be a valid value; got {value}?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally think it is useful to give an indication about what a valid value is (or say "a valid value for dtype ..", so then if it is bool, then it is clearer that it should be True/False or a missing value (for nullable bool)?)



def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
"""
Validate the keyword arguments to 'fillna'.
Expand Down
Loading