-
Notifications
You must be signed in to change notification settings - Fork 131
/
reusable_subdags.py
112 lines (96 loc) · 3.02 KB
/
reusable_subdags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import pandas as pd
import unique_users
from hamilton.function_modifiers import subdag, value
def website_interactions() -> pd.DataFrame:
"""Gives event-driven data with a series
:return: Some mock event data.
"""
data = [
("20220901-14:00:00", 1, "US"),
("20220901-18:30:00", 2, "US"),
("20220901-19:00:00", 1, "US"),
("20220902-08:00:00", 3, "US"),
("20220903-16:00:00", 1, "US"),
("20220907-13:00:00", 4, "US"),
("20220910-14:00:00", 1, "US"),
("20220911-12:00:00", 3, "US"),
("20220914-11:00:00", 1, "US"),
("20220915-07:30:00", 2, "US"),
("20220916-06:00:00", 1, "US"),
("20220917-16:00:00", 2, "US"),
("20220920-17:00:00", 5, "US"),
("20220922-09:30:00", 2, "US"),
("20220922-10:00:00", 1, "US"),
("20220924-07:00:00", 6, "US"),
("20220924-08:00:00", 1, "US"),
("20220925-21:00:00", 1, "US"),
("20220926-15:30:00", 2, "US"),
("20220901-14:00:00", 7, "CA"),
("20220901-18:30:00", 8, "CA"),
("20220901-19:00:00", 9, "CA"),
("20220902-08:00:00", 7, "CA"),
("20220903-16:00:00", 10, "CA"),
("20220907-13:00:00", 9, "CA"),
("20220910-14:00:00", 8, "CA"),
("20220911-12:00:00", 11, "CA"),
("20220914-11:00:00", 12, "CA"),
("20220915-07:30:00", 7, "CA"),
("20220916-06:00:00", 9, "CA"),
("20220917-16:00:00", 10, "CA"),
("20220920-17:00:00", 7, "CA"),
("20220922-09:30:00", 11, "CA"),
("20220922-10:00:00", 8, "CA"),
("20220924-07:00:00", 9, "CA"),
("20220924-08:00:00", 10, "CA"),
("20220925-21:00:00", 13, "CA"),
("20220926-15:30:00", 14, "CA"),
]
df = (
pd.DataFrame(data, columns=["timestamp", "user_id", "region"])
.set_index("timestamp")
.sort_index()
)
df.index = pd.DatetimeIndex(df.index)
return df
@subdag(
unique_users,
inputs={"grain": value("day")},
config={"region": "US"},
)
def daily_unique_users_US(unique_users: pd.Series) -> pd.Series:
return unique_users
@subdag(
unique_users,
inputs={"grain": value("week")},
config={"region": "US"},
)
def weekly_unique_users_US(unique_users: pd.Series) -> pd.Series:
return unique_users
@subdag(
unique_users,
inputs={"grain": value("month")},
config={"region": "US"},
)
def monthly_unique_users_US(unique_users: pd.Series) -> pd.Series:
return unique_users
@subdag(
unique_users,
inputs={"grain": value("day")},
config={"region": "CA"},
)
def daily_unique_users_CA(unique_users: pd.Series) -> pd.Series:
return unique_users
@subdag(
unique_users,
inputs={"grain": value("week")},
config={"region": "CA"},
)
def weekly_unique_users_CA(unique_users: pd.Series) -> pd.Series:
return unique_users
@subdag(
unique_users,
inputs={"grain": value("month")},
config={"region": "CA"},
)
def monthly_unique_users_CA(unique_users: pd.Series) -> pd.Series:
return unique_users