|
43 | 43 | cast,
|
44 | 44 | TYPE_CHECKING,
|
45 | 45 | )
|
| 46 | +import datetime |
46 | 47 |
|
47 | 48 | import numpy as np
|
48 | 49 | import pandas as pd
|
@@ -2982,6 +2983,90 @@ class locomotion
|
2982 | 2983 | ).resolved_copy
|
2983 | 2984 | return DataFrame(internal)
|
2984 | 2985 |
|
| 2986 | + def between_time( |
| 2987 | + self, |
| 2988 | + start_time: Union[datetime.time, str], |
| 2989 | + end_time: Union[datetime.time, str], |
| 2990 | + include_start: bool = True, |
| 2991 | + include_end: bool = True, |
| 2992 | + axis: Union[int, str] = 0, |
| 2993 | + ) -> Union["Series", "DataFrame"]: |
| 2994 | + """ |
| 2995 | + Select values between particular times of the day (e.g., 9:00-9:30 AM). |
| 2996 | +
|
| 2997 | + By setting ``start_time`` to be later than ``end_time``, |
| 2998 | + you can get the times that are *not* between the two times. |
| 2999 | +
|
| 3000 | + Parameters |
| 3001 | + ---------- |
| 3002 | + start_time : datetime.time or str |
| 3003 | + Initial time as a time filter limit. |
| 3004 | + end_time : datetime.time or str |
| 3005 | + End time as a time filter limit. |
| 3006 | + include_start : bool, default True |
| 3007 | + Whether the start time needs to be included in the result. |
| 3008 | + include_end : bool, default True |
| 3009 | + Whether the end time needs to be included in the result. |
| 3010 | + axis : {0 or 'index', 1 or 'columns'}, default 0 |
| 3011 | + Determine range time on index or columns value. |
| 3012 | +
|
| 3013 | + Returns |
| 3014 | + ------- |
| 3015 | + Series or DataFrame |
| 3016 | + Data from the original object filtered to the specified dates range. |
| 3017 | +
|
| 3018 | + Raises |
| 3019 | + ------ |
| 3020 | + TypeError |
| 3021 | + If the index is not a :class:`DatetimeIndex` |
| 3022 | +
|
| 3023 | + See Also |
| 3024 | + -------- |
| 3025 | + at_time : Select values at a particular time of the day. |
| 3026 | + first : Select initial periods of time series based on a date offset. |
| 3027 | + last : Select final periods of time series based on a date offset. |
| 3028 | + DatetimeIndex.indexer_between_time : Get just the index locations for |
| 3029 | + values between particular times of the day. |
| 3030 | +
|
| 3031 | + Examples |
| 3032 | + -------- |
| 3033 | + >>> idx = pd.date_range('2018-04-09', periods=4, freq='1D20min') |
| 3034 | + >>> kdf = ks.DataFrame({'A': [1, 2, 3, 4]}, index=idx) |
| 3035 | + >>> kdf |
| 3036 | + A |
| 3037 | + 2018-04-09 00:00:00 1 |
| 3038 | + 2018-04-10 00:20:00 2 |
| 3039 | + 2018-04-11 00:40:00 3 |
| 3040 | + 2018-04-12 01:00:00 4 |
| 3041 | +
|
| 3042 | + >>> kdf.between_time('0:15', '0:45') |
| 3043 | + A |
| 3044 | + 2018-04-10 00:20:00 2 |
| 3045 | + 2018-04-11 00:40:00 3 |
| 3046 | +
|
| 3047 | + You get the times that are *not* between two times by setting |
| 3048 | + ``start_time`` later than ``end_time``: |
| 3049 | +
|
| 3050 | + >>> kdf.between_time('0:45', '0:15') |
| 3051 | + A |
| 3052 | + 2018-04-09 00:00:00 1 |
| 3053 | + 2018-04-12 01:00:00 4 |
| 3054 | + """ |
| 3055 | + from databricks.koalas.indexes import DatetimeIndex |
| 3056 | + |
| 3057 | + axis = validate_axis(axis) |
| 3058 | + |
| 3059 | + if axis != 0: |
| 3060 | + raise NotImplementedError("between_time currently only works for axis=0") |
| 3061 | + |
| 3062 | + if not isinstance(self.index, DatetimeIndex): |
| 3063 | + raise TypeError("Index must be DatetimeIndex") |
| 3064 | + |
| 3065 | + def pandas_between_time(pdf): |
| 3066 | + return pdf.between_time(start_time, end_time, include_start, include_end) |
| 3067 | + |
| 3068 | + return self.koalas.apply_batch(pandas_between_time) |
| 3069 | + |
2985 | 3070 | def where(self, cond, other=np.nan) -> "DataFrame":
|
2986 | 3071 | """
|
2987 | 3072 | Replace values where the condition is False.
|
|
0 commit comments