Skip to content

Commit

Permalink
Implement Series.between_time() (#2129)
Browse files Browse the repository at this point in the history
```py
>>> idx = pd.date_range('2018-04-09', periods=4, freq='1D20min')
>>> kser = ks.Series([1, 2, 3, 4], index=idx)
>>> kser
2018-04-09 00:00:00    1
2018-04-10 00:20:00    2
2018-04-11 00:40:00    3
2018-04-12 01:00:00    4
dtype: int64

>>> kser.between_time('0:15', '0:45')
2018-04-10 00:20:00    2
2018-04-11 00:40:00    3
dtype: int64
```
  • Loading branch information
xinrong-meng authored Mar 31, 2021
1 parent fcf21dc commit 07c4e36
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 1 deletion.
1 change: 0 additions & 1 deletion databricks/koalas/missing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class MissingPandasLikeSeries(object):
asfreq = _unsupported_function("asfreq")
at_time = _unsupported_function("at_time")
autocorr = _unsupported_function("autocorr")
between_time = _unsupported_function("between_time")
combine = _unsupported_function("combine")
convert_dtypes = _unsupported_function("convert_dtypes")
cov = _unsupported_function("cov")
Expand Down
65 changes: 65 additions & 0 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""
A wrapper class for Spark Column to behave similar to pandas Series.
"""
import datetime
import re
import inspect
import sys
Expand Down Expand Up @@ -5791,6 +5792,70 @@ def align(

return (left_ser.copy(), right.copy()) if copy else (left_ser, right)

def between_time(
self,
start_time: Union[datetime.time, str],
end_time: Union[datetime.time, str],
include_start: bool = True,
include_end: bool = True,
axis: Union[int, str] = 0,
) -> "Series":
"""
Select values between particular times of the day (e.g., 9:00-9:30 AM).
By setting ``start_time`` to be later than ``end_time``,
you can get the times that are *not* between the two times.
Parameters
----------
start_time : datetime.time or str
Initial time as a time filter limit.
end_time : datetime.time or str
End time as a time filter limit.
include_start : bool, default True
Whether the start time needs to be included in the result.
include_end : bool, default True
Whether the end time needs to be included in the result.
axis : {0 or 'index', 1 or 'columns'}, default 0
Determine range time on index or columns value.
Returns
-------
Series
Data from the original object filtered to the specified dates range.
Raises
------
TypeError
If the index is not a :class:`DatetimeIndex`
See Also
--------
at_time : Select values at a particular time of the day.
last : Select final periods of time series based on a date offset.
DatetimeIndex.indexer_between_time : Get just the index locations for
values between particular times of the day.
Examples
--------
>>> idx = pd.date_range('2018-04-09', periods=4, freq='1D20min')
>>> kser = ks.Series([1, 2, 3, 4], index=idx)
>>> kser
2018-04-09 00:00:00 1
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3
2018-04-12 01:00:00 4
dtype: int64
>>> kser.between_time('0:15', '0:45')
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3
dtype: int64
"""
return first_series(
self.to_frame().between_time(start_time, end_time, include_start, include_end, axis)
).rename(self.name)

def _cum(self, func, skipna, part_cols=(), ascending=True):
# This is used to cummin, cummax, cumsum, etc.

Expand Down
23 changes: 23 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2882,3 +2882,26 @@ def test_pow_and_rpow(self):
self.assert_eq(pser ** np.nan, kser ** np.nan)
self.assert_eq(pser.rpow(np.nan), kser.rpow(np.nan))
self.assert_eq(1 ** pser, 1 ** kser)

def test_between_time(self):
idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
pser = pd.Series([1, 2, 3, 4], index=idx)
kser = ks.from_pandas(pser)
self.assert_eq(
pser.between_time("0:15", "0:45").sort_index(),
kser.between_time("0:15", "0:45").sort_index(),
)

pser.index.name = "ts"
kser = ks.from_pandas(pser)
self.assert_eq(
pser.between_time("0:15", "0:45").sort_index(),
kser.between_time("0:15", "0:45").sort_index(),
)

pser.index.name = "index"
kser = ks.from_pandas(pser)
self.assert_eq(
pser.between_time("0:15", "0:45").sort_index(),
kser.between_time("0:15", "0:45").sort_index(),
)
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ Time series-related
Series.shift
Series.first_valid_index
Series.last_valid_index
Series.between_time

Spark-related
-------------
Expand Down

0 comments on commit 07c4e36

Please sign in to comment.