Skip to content

Commit 583e03d

Browse files
authored
Implemented dateframe.between_time (#2111)
ref #1929 Implement `DataFrame.between_time` ```py >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) >>> kts = ks.from_pandas(ts) >>> kts A 2018-04-09 00:00:00 1 2018-04-10 00:20:00 2 2018-04-11 00:40:00 3 2018-04-12 01:00:00 4 >>> kts.between_time('0:15', '0:45') A 2018-04-10 00:20:00 2 2018-04-11 00:40:00 3 You get the times that are *not* between two times by setting ``start_time`` later than ``end_time``: >>> kts.between_time('0:45', '0:15') A 2018-04-09 00:00:00 1 2018-04-12 01:00:00 4 ```
1 parent 48c311b commit 583e03d

File tree

4 files changed

+109
-1
lines changed

4 files changed

+109
-1
lines changed

databricks/koalas/frame.py

+85
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
cast,
4444
TYPE_CHECKING,
4545
)
46+
import datetime
4647

4748
import numpy as np
4849
import pandas as pd
@@ -2982,6 +2983,90 @@ class locomotion
29822983
).resolved_copy
29832984
return DataFrame(internal)
29842985

2986+
def between_time(
2987+
self,
2988+
start_time: Union[datetime.time, str],
2989+
end_time: Union[datetime.time, str],
2990+
include_start: bool = True,
2991+
include_end: bool = True,
2992+
axis: Union[int, str] = 0,
2993+
) -> Union["Series", "DataFrame"]:
2994+
"""
2995+
Select values between particular times of the day (e.g., 9:00-9:30 AM).
2996+
2997+
By setting ``start_time`` to be later than ``end_time``,
2998+
you can get the times that are *not* between the two times.
2999+
3000+
Parameters
3001+
----------
3002+
start_time : datetime.time or str
3003+
Initial time as a time filter limit.
3004+
end_time : datetime.time or str
3005+
End time as a time filter limit.
3006+
include_start : bool, default True
3007+
Whether the start time needs to be included in the result.
3008+
include_end : bool, default True
3009+
Whether the end time needs to be included in the result.
3010+
axis : {0 or 'index', 1 or 'columns'}, default 0
3011+
Determine range time on index or columns value.
3012+
3013+
Returns
3014+
-------
3015+
Series or DataFrame
3016+
Data from the original object filtered to the specified dates range.
3017+
3018+
Raises
3019+
------
3020+
TypeError
3021+
If the index is not a :class:`DatetimeIndex`
3022+
3023+
See Also
3024+
--------
3025+
at_time : Select values at a particular time of the day.
3026+
first : Select initial periods of time series based on a date offset.
3027+
last : Select final periods of time series based on a date offset.
3028+
DatetimeIndex.indexer_between_time : Get just the index locations for
3029+
values between particular times of the day.
3030+
3031+
Examples
3032+
--------
3033+
>>> idx = pd.date_range('2018-04-09', periods=4, freq='1D20min')
3034+
>>> kdf = ks.DataFrame({'A': [1, 2, 3, 4]}, index=idx)
3035+
>>> kdf
3036+
A
3037+
2018-04-09 00:00:00 1
3038+
2018-04-10 00:20:00 2
3039+
2018-04-11 00:40:00 3
3040+
2018-04-12 01:00:00 4
3041+
3042+
>>> kdf.between_time('0:15', '0:45')
3043+
A
3044+
2018-04-10 00:20:00 2
3045+
2018-04-11 00:40:00 3
3046+
3047+
You get the times that are *not* between two times by setting
3048+
``start_time`` later than ``end_time``:
3049+
3050+
>>> kdf.between_time('0:45', '0:15')
3051+
A
3052+
2018-04-09 00:00:00 1
3053+
2018-04-12 01:00:00 4
3054+
"""
3055+
from databricks.koalas.indexes import DatetimeIndex
3056+
3057+
axis = validate_axis(axis)
3058+
3059+
if axis != 0:
3060+
raise NotImplementedError("between_time currently only works for axis=0")
3061+
3062+
if not isinstance(self.index, DatetimeIndex):
3063+
raise TypeError("Index must be DatetimeIndex")
3064+
3065+
def pandas_between_time(pdf):
3066+
return pdf.between_time(start_time, end_time, include_start, include_end)
3067+
3068+
return self.koalas.apply_batch(pandas_between_time)
3069+
29853070
def where(self, cond, other=np.nan) -> "DataFrame":
29863071
"""
29873072
Replace values where the condition is False.

databricks/koalas/missing/frame.py

-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ class _MissingPandasLikeDataFrame(object):
3838
asfreq = _unsupported_function("asfreq")
3939
asof = _unsupported_function("asof")
4040
at_time = _unsupported_function("at_time")
41-
between_time = _unsupported_function("between_time")
4241
boxplot = _unsupported_function("boxplot")
4342
combine = _unsupported_function("combine")
4443
combine_first = _unsupported_function("combine_first")

databricks/koalas/tests/test_dataframe.py

+23
Original file line numberDiff line numberDiff line change
@@ -5422,3 +5422,26 @@ def test_align(self):
54225422
pdf_l, pdf_r = pdf1.align(pdf2, join=join, axis=1)
54235423
self.assert_eq(kdf_l.sort_index(), pdf_l.sort_index())
54245424
self.assert_eq(kdf_r.sort_index(), pdf_r.sort_index())
5425+
5426+
def test_between_time(self):
5427+
idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
5428+
pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
5429+
kdf = ks.from_pandas(pdf)
5430+
self.assert_eq(
5431+
pdf.between_time("0:15", "0:45"),
5432+
kdf.between_time("0:15", "0:45").sort_index(),
5433+
almost=True,
5434+
)
5435+
5436+
with self.assertRaisesRegex(
5437+
NotImplementedError, "between_time currently only works for axis=0"
5438+
):
5439+
kdf.between_time("0:15", "0:45", axis=1)
5440+
5441+
kdf = ks.DataFrame({"A": [1, 2, 3, 4]})
5442+
with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
5443+
kdf.between_time("0:15", "0:45")
5444+
5445+
def test_between_time_no_shortcut(self):
5446+
with ks.option_context("compute.shortcut_limit", 0):
5447+
self.test_between_time()

docs/source/reference/frame.rst

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Reindexing / Selection / Label manipulation
164164
DataFrame.add_prefix
165165
DataFrame.add_suffix
166166
DataFrame.align
167+
DataFrame.between_time
167168
DataFrame.drop
168169
DataFrame.droplevel
169170
DataFrame.drop_duplicates

0 commit comments

Comments
 (0)