Skip to content

Commit 6ffb507

Browse files
committed
CLN: Enforce deprecation of not validating argument
1 parent 350202d commit 6ffb507

File tree

5 files changed

+52
-70
lines changed

5 files changed

+52
-70
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,7 @@ Other Removals
744744
- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
745745
- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_)
746746
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
747+
- Enforced deprecation allowing non-``bool`` and NA values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` (:issue:`59615`)
747748
- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`)
748749
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
749750
- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`)

pandas/core/arrays/string_arrow.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
TYPE_CHECKING,
77
Self,
88
)
9-
import warnings
109

1110
import numpy as np
1211

@@ -19,7 +18,7 @@
1918
PYARROW_MIN_VERSION,
2019
pa_version_under16p0,
2120
)
22-
from pandas.util._exceptions import find_stack_level
21+
from pandas.util._validators import validate_na_arg
2322

2423
from pandas.core.dtypes.common import (
2524
is_scalar,
@@ -242,17 +241,7 @@ def insert(self, loc: int, item) -> ArrowStringArray:
242241
return super().insert(loc, item)
243242

244243
def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
245-
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
246-
# TODO: Enforce in 3.0 (#59615)
247-
# GH#59561
248-
warnings.warn(
249-
f"Allowing a non-bool 'na' in obj.str.{method_name} is deprecated "
250-
"and will raise in a future version.",
251-
FutureWarning, # pdlint: ignore[warning_class]
252-
stacklevel=find_stack_level(),
253-
)
254-
na = bool(na)
255-
244+
validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
256245
if self.dtype.na_value is np.nan:
257246
if na is lib.no_default or isna(na):
258247
# NaN propagates as False

pandas/core/strings/object_array.py

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@
99
cast,
1010
)
1111
import unicodedata
12-
import warnings
1312

1413
import numpy as np
1514

1615
from pandas._libs import lib
1716
import pandas._libs.missing as libmissing
1817
import pandas._libs.ops as libops
19-
from pandas.util._exceptions import find_stack_level
18+
from pandas.util._validators import validate_na_arg
2019

2120
from pandas.core.dtypes.common import pandas_dtype
2221
from pandas.core.dtypes.missing import isna
@@ -145,6 +144,7 @@ def _str_contains(
145144
na=lib.no_default,
146145
regex: bool = True,
147146
):
147+
validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
148148
if regex:
149149
if not case:
150150
flags |= re.IGNORECASE
@@ -158,41 +158,16 @@ def _str_contains(
158158
else:
159159
upper_pat = pat.upper()
160160
f = lambda x: upper_pat in x.upper()
161-
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
162-
# TODO: Enforce in 3.0 (#59615)
163-
# GH#59561
164-
warnings.warn(
165-
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
166-
"and will raise in a future version.",
167-
FutureWarning, # pdlint: ignore[warning_class]
168-
stacklevel=find_stack_level(),
169-
)
170161
return self._str_map(f, na, dtype=np.dtype("bool"))
171162

172163
def _str_startswith(self, pat, na=lib.no_default):
164+
validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
173165
f = lambda x: x.startswith(pat)
174-
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
175-
# TODO: Enforce in 3.0 (#59615)
176-
# GH#59561
177-
warnings.warn(
178-
"Allowing a non-bool 'na' in obj.str.startswith is deprecated "
179-
"and will raise in a future version.",
180-
FutureWarning, # pdlint: ignore[warning_class]
181-
stacklevel=find_stack_level(),
182-
)
183166
return self._str_map(f, na_value=na, dtype=np.dtype(bool))
184167

185168
def _str_endswith(self, pat, na=lib.no_default):
169+
validate_na_arg(na, name="na", allow_no_default=True, allow_bool=True)
186170
f = lambda x: x.endswith(pat)
187-
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
188-
# TODO: Enforce in 3.0 (#59615)
189-
# GH#59561
190-
warnings.warn(
191-
"Allowing a non-bool 'na' in obj.str.endswith is deprecated "
192-
"and will raise in a future version.",
193-
FutureWarning, # pdlint: ignore[warning_class]
194-
stacklevel=find_stack_level(),
195-
)
196171
return self._str_map(f, na_value=na, dtype=np.dtype(bool))
197172

198173
def _str_replace(

pandas/tests/strings/test_find_replace.py

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -175,17 +175,14 @@ def test_contains_na_kwarg_for_nullable_string_dtype(
175175

176176
values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype)
177177

178-
msg = (
179-
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
180-
"will raise in a future version"
181-
)
182-
warn = None
183-
if not pd.isna(na) and not isinstance(na, bool):
184-
warn = FutureWarning
185-
with tm.assert_produces_warning(warn, match=msg):
178+
if na in [0, 3] and na is not False:
179+
msg = f"na must be a valid NA value; got {na}"
180+
with pytest.raises(ValueError, match=msg):
181+
values.str.contains("a", na=na, regex=regex)
182+
else:
186183
result = values.str.contains("a", na=na, regex=regex)
187-
expected = Series([True, False, False, True, expected], dtype="boolean")
188-
tm.assert_series_equal(result, expected)
184+
expected = Series([True, False, False, True, expected], dtype="boolean")
185+
tm.assert_series_equal(result, expected)
189186

190187

191188
def test_contains_moar(any_string_dtype):
@@ -255,19 +252,9 @@ def test_contains_nan(any_string_dtype):
255252
expected = Series([True, True, True], dtype=expected_dtype)
256253
tm.assert_series_equal(result, expected)
257254

258-
msg = (
259-
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
260-
"will raise in a future version"
261-
)
262-
with tm.assert_produces_warning(FutureWarning, match=msg):
263-
result = s.str.contains("foo", na="foo")
264-
if any_string_dtype == "object":
265-
expected = Series(["foo", "foo", "foo"], dtype=np.object_)
266-
elif any_string_dtype.na_value is np.nan:
267-
expected = Series([True, True, True], dtype=np.bool_)
268-
else:
269-
expected = Series([True, True, True], dtype="boolean")
270-
tm.assert_series_equal(result, expected)
255+
msg = "na must be a valid NA value; got foo"
256+
with pytest.raises(ValueError, match=msg):
257+
s.str.contains("foo", na="foo")
271258

272259
result = s.str.contains("foo")
273260
if any_string_dtype == "str":
@@ -352,12 +339,10 @@ def test_startswith_endswith_validate_na(any_string_dtype):
352339
["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"],
353340
dtype=any_string_dtype,
354341
)
355-
356-
msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
357-
with tm.assert_produces_warning(FutureWarning, match=msg):
342+
msg = "na must be a valid NA value; got baz"
343+
with pytest.raises(ValueError, match=msg):
358344
ser.str.startswith("kapow", na="baz")
359-
msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
360-
with tm.assert_produces_warning(FutureWarning, match=msg):
345+
with pytest.raises(ValueError, match=msg):
361346
ser.str.endswith("bar", na="baz")
362347

363348

pandas/util/_validators.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
is_bool,
2323
is_integer,
2424
)
25+
from pandas.core.dtypes.missing import isna
2526

2627
BoolishT = TypeVar("BoolishT", bool, int)
2728
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
@@ -269,6 +270,37 @@ def validate_bool_kwarg(
269270
return value
270271

271272

273+
def validate_na_arg(
274+
value, name: str, allow_no_default: bool = False, allow_bool: bool = False
275+
):
276+
"""
277+
Validate na arguments.
278+
279+
Parameters
280+
----------
281+
value : object
282+
Value to validate.
283+
name : str
284+
Name of the argument, used to raise an informative error message.
285+
allow_no_default : bool, default False
286+
Whether to allow ``value`` to be ``lib.no_default``.
287+
allow_bool : bool, default False
288+
Whether to allow ``value`` to be an instance of bool.
289+
290+
Raises
291+
______
292+
ValueError
293+
When ``value`` is determined to be invalid.
294+
"""
295+
if allow_no_default and value is lib.no_default:
296+
return
297+
if allow_bool and isinstance(value, bool):
298+
return
299+
if isna(value):
300+
return
301+
raise ValueError(f"{name} must be a valid NA value; got {value}")
302+
303+
272304
def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
273305
"""
274306
Validate the keyword arguments to 'fillna'.

0 commit comments

Comments
 (0)