-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomparison_helper.py
114 lines (97 loc) · 3.51 KB
/
comparison_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
class ComparisonBetweenStations:
"""Parameters for comparison between stations"""
def __init__(self, years, aggregation):
self.years = years
if len(self.years) > 1:
self.years_string = "20" + "/".join(
[str(year)[2:] for year in sorted(self.years)]
)
else:
self.years_string = "".join(str(self.years[0]))
self.aggregation = aggregation
def prepare_comparison_df(df):
"""updates dataframe for barchart"""
df["timestamp"] = pd.to_datetime(df["timestamp"])
df.set_index("timestamp", inplace=True)
return df
def aggregate(df, comparison):
"""returns aggregated dataframe"""
if comparison.aggregation == "sum":
bikes_df = (
df[df.index.year.isin(comparison.years)]
.groupby("description")[["total_bikes"]]
.sum()
.sort_values("total_bikes", ascending=True)
)
elif comparison.aggregation == "mean":
bikes_df = (
df[df.index.year.isin(comparison.years)]
.groupby("description")[["total_bikes"]]
.resample("D")
.sum()
.reset_index()
.groupby("description")[["total_bikes"]]
.mean()
.sort_values("total_bikes", ascending=True)
)
return bikes_df
def get_key(my_dict, val):
"""function to return key for any value"""
for key, value in my_dict.items():
if val == value:
return key
return "key doesn't exist"
def map_colors(dataframe, station_name):
"""returns list of y values for horizontal bar and color map"""
stations_dict = dataframe.reset_index()['description'].to_dict()
colors = ['lightslategray', ] * len(stations_dict)
colors[get_key(stations_dict, station_name)] = 'lightgreen'
stations_list = stations_dict.values()
color_map = dict(zip(stations_list, colors))
return stations_list, color_map
if __name__ == "__main__":
df = pd.read_csv("berlin_bikedata_2017-2019.csv")
prepare_comparison_df(df)
comparison = ComparisonBetweenStations([2019], "mean")
bikes_df = aggregate(df, comparison)
# Set general style for plotly graphs
px.defaults.template = "ggplot2"
px.defaults.color_continuous_scale = px.colors.sequential.Plasma_r
# Barchart with Total Bikes by year and bicycle counter
fig = px.bar(
bikes_df.reset_index(),
x="total_bikes",
y="description",
color="total_bikes",
orientation="h",
labels={"total_bikes": "Total Bikes", "description": "Bicycle Counter"},
)
fig.add_annotation(
text=f"{comparison.years_string}",
xref="paper",
yref="paper",
x=1,
y=0,
showarrow=False,
opacity=0.1,
font=dict(family="Arial", size=100, color="black"),
)
fig.show()
# sum_total_bikes = int(
# df[df.index.year.isin(comparison.years)].groupby('description')[['total_bikes']].sum().groupby('description')[
# ['total_bikes']].sum().sum())
# # Draw indicator
# fig = go.Figure()
# fig.add_trace(go.Indicator(
# mode="number",
# value=int(sum_total_bikes),
# domain={'row': 0, 'column': 1}))
# fig.update_layout(
# grid={'rows': 1, 'columns': 1, 'pattern': "independent"},
# template={'data': {'indicator': [{
# 'title': {'text': f"Total Bikes ({comparison.years_string})"},
# }]}}
# )