forked from iSchool-597PR/2022Fall_projects
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
436 lines (420 loc) · 20 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
"""utility functions and classes used by all other class files"""
from typing import Dict
import numpy as np
import pandas as pd
from Global import *
import cython
@cython.cfunc
def mod_beta_random(low: float, high: float, mean: float, std: float, samples: int, seed:int = RANDOM_SEED) -> np.ndarray:
"""
Generate random numbers from a transformed Beta distribution
Formulas to derive parameters are taken from here
https://stats.stackexchange.com/questions/12232/calculating-the-parameters-of-a-beta-distribution-using-the-mean-and-variance
:param seed: random seed
:param low: the lowest value possible
:param high: the highest value possible
:param mean: the mean of the population
:param std: the standard deviation of the population
:param samples: the number of samples to generate, or a tuple of the output shape
:return: a np.ndarray of size "shape" drawn from the distribution
>>> beta = mod_beta_random(0., 10., 4., 1.9, 500)
>>> np.all((0 <= beta) & (beta <= 10))
True
>>> np.all((0.5 <= beta) & (beta <= 9.5))
False
>>> mu = beta.mean()
>>> 3.8 < mu and mu < 4.2
True
>>> std = beta.std()
>>> 1.8 < std and std < 2.0
True
"""
assert low <= mean <= high, "Population mean out of bound."
mu = (mean - low) / (high - low)
sigma = std / (high - low)
assert sigma ** 2 <= mu * (1 - mu), "Population standard deviation too large for a Beta distribution to exist."
a = ((1 - mu) / (sigma ** 2) - 1 / mu) * (mu ** 2)
b = a * (1 / mu - 1)
beta = np.random.default_rng(RANDOM_SEED).beta(a, b, samples)
beta = low + beta * (high - low)
return beta
@cython.cclass
class Food:
"""Stores a collection of batches of food, and has methods of common dataframe operations about food
"""
@cython.ccall
def __init__(self, stock=None):
"""
Initialize a Food object which is either empty, or based on a dataframe or total pounds of food.
:param stock: the food data to initialize with
>>> Food().df
Empty DataFrame
Columns: [type, remaining_days, quantity]
Index: []
>>> a = Food(5000).df
>>> a.round(2) # doctest: +ELLIPSIS
type remaining_days quantity
0 staples 1 8.33
1 staples 2 8.33
2 staples 3 8.33
...
741 packaged_protein 178 6.94
742 packaged_protein 179 6.94
743 packaged_protein 180 6.94
<BLANKLINE>
[744 rows x 3 columns]
>>> a.equals(Food(a).df)
True
"""
if stock is None:
self.df = pd.DataFrame(columns=[
"type",
"remaining_days",
"quantity"
]).astype(dtype={
"type": str,
"remaining_days": int,
"quantity": float
})
elif isinstance(stock, pd.DataFrame):
self.df = stock
elif isinstance(stock, (float, int)):
assert stock > 0
types = []
remaining_days = []
quantity = []
for t in TYPES.keys():
# Assume that the remaining shelf lives of foods are uniformly distributed within [1, max_days]
max_days = TYPES[t]["max_days"]
q = stock * TYPES[t]["proportion"] / max_days
types.extend([t] * max_days)
remaining_days.extend(list(range(1, max_days + 1)))
quantity.extend([q] * max_days)
self.df = pd.DataFrame({"type": types, "remaining_days": remaining_days, "quantity": quantity})
else:
raise ValueError("Invalid input for initialization")
@cython.ccall
@classmethod
def generate_donation(cls, mean_total: float):
"""Generate donated food to a food bank in a day. The quantity of different types and the total are random, but
their mean values are derived from anual statistics.
:param mean_total: the mean of the total pounds of foods donated to a food bank per day.
:return: a Food object of the donated food
>>> food = Food.generate_donation(5000).df
>>> len(food) == sum(info["max_days"] for info in TYPES.values())
True
>>> food["type"].unique().tolist() == list(TYPES.keys())
True
"""
types = []
remaining_days = []
quantity = []
for t in TYPES.keys():
mean = mean_total * TYPES[t]["proportion"]
low, high, stdev = 0.3 * mean, 5 * mean, 0.5 * mean
beta = mod_beta_random(low, high, mean, stdev, 1).item()
# Assume that the remaining shelf lives of foods are uniformly distributed within [1, max_days]
max_days = TYPES[t]["max_days"]
q = beta / max_days
types.extend([t] * max_days)
remaining_days.extend(list(range(1, max_days + 1)))
quantity.extend([q] * max_days)
df = pd.DataFrame({"type": types, "remaining_days": remaining_days, "quantity": quantity})
return Food(df)
@cython.ccall
def sort_by_freshness(self, reverse=False, inplace=True):
"""Sort the food in each category by the remaining shelf life.
:param reverse: Whether the freshest food is ranked first. We may assume that clients prefer the freshest food,
whereas food bank gives out food that is going to expire in order to reduce waste.
:param inplace: Whether to change self.df in place
:return: if "inplace" is False, return a Food object with the sorted dataframe
>>> a = Food(5000)
>>> a.sort_by_freshness()
>>> a.df.round(2) # doctest: +ELLIPSIS
type remaining_days quantity
0 fresh_fruits_and_vegetables 14 35.71
1 fresh_fruits_and_vegetables 13 35.71
2 fresh_fruits_and_vegetables 12 35.71
...
741 staples 3 8.33
742 staples 2 8.33
743 staples 1 8.33
<BLANKLINE>
[744 rows x 3 columns]
>>> a.sort_by_freshness(reverse=True, inplace=False).df.round(2) # doctest: +ELLIPSIS
type remaining_days quantity
0 fresh_fruits_and_vegetables 1 35.71
1 fresh_fruits_and_vegetables 2 35.71
2 fresh_fruits_and_vegetables 3 35.71
...
741 staples 178 8.33
742 staples 179 8.33
743 staples 180 8.33
<BLANKLINE>
[744 rows x 3 columns]
"""
sorted_df = self.df.sort_values(by=["type", "remaining_days"], ascending=[True, reverse]).reset_index(
drop=True)
if not inplace:
return Food(sorted_df)
self.df = sorted_df
@cython.ccall
def get_quantity(self) -> Dict[str, float]:
"""Get the quantity of each type of food in pounds
:return: a dictionary that maps food types to corresponding quantities
>>> emp = Food()
>>> emp.get_quantity() # doctest: +NORMALIZE_WHITESPACE
{'staples': 0, 'fresh_fruits_and_vegetables': 0, 'packaged_fruits_and_vegetables': 0, 'fresh_protein': 0,
'packaged_protein': 0}
>>> food = Food(5000)
>>> counter = food.get_quantity()
>>> actual = {typ: 5000 * info["proportion"] for typ, info in TYPES.items()}
>>> counter == actual
True
"""
counter = self.df.groupby(["type"])["quantity"].agg("sum").to_dict()
for typ in TYPES.keys():
if typ not in counter:
counter[typ] = 0
return counter
@cython.ccall
def select(self, typ):
""" Select one or more types of food from a Food object
:param typ: the specified type(s)
:return: a new Food object with the specified type(s) of food
>>> food = Food(5000)
>>> food.select(STP).df.round(3) # doctest: +ELLIPSIS
type remaining_days quantity
0 staples 1 8.333
1 staples 2 8.333
...
179 staples 180 8.333
<BLANKLINE>
[180 rows x 3 columns]
>>> food.select([FFV, PPT]).df.round(2) # doctest: +ELLIPSIS
type remaining_days quantity
0 fresh_fruits_and_vegetables 1 35.71
1 fresh_fruits_and_vegetables 2 35.71
...
192 packaged_protein 179 6.94
193 packaged_protein 180 6.94
<BLANKLINE>
[194 rows x 3 columns]
>>> food.select("strange").df
Traceback (most recent call last):
AssertionError: invalid food type
>>> food.select([FFV, "strange"]).df
Traceback (most recent call last):
AssertionError: invalid combination of food types
"""
if isinstance(typ, str):
assert typ in TYPES, "invalid food type"
return Food(self.df[self.df["type"] == typ].reset_index(drop=True))
elif isinstance(typ, list):
assert set(typ).issubset(set(TYPES.keys())), "invalid combination of food types"
return Food(self.df[self.df["type"].isin(typ)].reset_index(drop=True))
else:
raise TypeError("The 'typ' parameter should be of either str or list type")
@cython.ccall
def quality_control(self, num_days=1, inplace=True) -> Dict[str, float]:
"""Subtract some days from the remaining shelf life of the food, remove the expired food from stock, and record
the quantity of waste in each category.
:param num_days: number of days since the last quality check
:param inplace: Whether to change self.df in place. Set to False to preview food waste in the next few days
:return: a dictionary storing the wasted food in each category. If "inplace" is True, also returns the resulting
dataframe
>>> a = Food(5000)
>>> expired = a.quality_control(0)
>>> expired # doctest: +NORMALIZE_WHITESPACE
{'staples': 0, 'fresh_fruits_and_vegetables': 0, 'packaged_fruits_and_vegetables': 0, 'fresh_protein': 0,
'packaged_protein': 0}
>>> b = Food(5000)
>>> expired, remain = b.quality_control(float("inf"), inplace=False)
>>> sum(expired.values())
5000.0
>>> remain
Empty DataFrame
Columns: [type, remaining_days, quantity]
Index: []
>>> w1 = Food(2000).quality_control(5)
>>> w2 = Food(2000).quality_control(10)
>>> w3 = Food(2000).quality_control(20)
>>> all([w1[key] <= w2[key] <= w3[key] for key in TYPES.keys()])
True
"""
if inplace:
self.df["remaining_days"] -= num_days
mask = self.df["remaining_days"] <= 0
waste = Food(self.df[mask])
waste_counter = waste.get_quantity()
self.df = self.df[~mask]
return waste_counter
else:
df = self.df.copy()
df["remaining_days"] -= num_days
mask = df["remaining_days"] <= 0
waste = Food(df[mask])
waste_counter = waste.get_quantity()
df = df[~mask]
return waste_counter, df
@cython.ccall
def add(self, other) -> None:
""" Add a new batch of food to stock. Merge food items with same type and remaining days.
Fully tested on jupyter notebook. Still thinking of how to present tests concisely in doctrings
:param other: a Food object as supplement
:return:
>>> a = Food(1000).select(STP)
>>> b = Food(1000).select(FPT)
>>> a.add(b)
>>> added = a.sort_by_freshness(inplace=False)
>>> added.df.round(3) # doctest: +ELLIPSIS
type remaining_days quantity
0 fresh_protein 10 10.000
1 fresh_protein 9 10.000
...
188 staples 2 1.667
189 staples 1 1.667
<BLANKLINE>
[190 rows x 3 columns]
>>> subset = Food(1000).select([STP, FPT]).sort_by_freshness(inplace=False)
>>> added.df.equals(subset.df)
True
>>> c = Food(1000)
>>> d = Food(1000)
>>> c.add(d)
>>> added2 = c.sort_by_freshness(inplace=False)
>>> double = Food(2000).sort_by_freshness(inplace=False)
>>> added2.df.equals(double.df)
True
"""
if isinstance(other, Food):
other = other.df
self.df = self.df.set_index(["type", "remaining_days"]).add(other.set_index(["type", "remaining_days"]),
fill_value=0).reset_index()
@cython.ccall
def get_quantity_by_food(self):
"""Returns storages for each type in weight units
"""
storage_amounts = {}
for food_type in Global.get_food_types():
storage_amounts[food_type] = [self.df[self.df['type'] == food_type]['quantity'].sum()]
return pd.DataFrame(storage_amounts)
@cython.ccall
def subtract(self, order: Dict[str, float], predict=False):
"""
Subtract some quantity of food from stock, and return the Food object with that quantity with specific remaining
shelf lives.
:param order: a dictionary storing the quantity of ordered food in each category
:param predict: Set to True to preview food availability in the next few days, where negative quantities will be
used to represent shortage; otherwise, an error will be raised for subtracting more than available.
:return: if "predict" is set to False, returns a Food object of the subtracted food; if set to True, returns a
tuple of two Food objects, the first being the subtracted food and the second being the remaining food.
>>> food = Food(5000)
>>> q = food.get_quantity()
>>> order = {k: v-7 for k, v in q.items()}
>>> sent = food.subtract(order).sort_by_freshness(inplace=False)
>>> food.df.round(3) # 7 pounds left in stock for each type
type remaining_days quantity
0 fresh_fruits_and_vegetables 14 7.000
1 fresh_protein 10 7.000
2 packaged_fruits_and_vegetables 358 0.056
3 packaged_fruits_and_vegetables 359 3.472
4 packaged_fruits_and_vegetables 360 3.472
5 packaged_protein 179 0.056
6 packaged_protein 180 6.944
7 staples 180 7.000
>>> sent.df[sent.df["type"] == STP].round(3) # doctest: +ELLIPSIS
type remaining_days quantity
561 staples 180 1.333
562 staples 179 8.333
563 staples 178 8.333
...
<BLANKLINE>
[180 rows x 3 columns]
>>> food2 = Food(5000)
>>> sent2, stock2 = food2.subtract({STP: 30.0, FPT: 22.0}, predict=True)
>>> sent2.sort_by_freshness()
>>> sent2.df[sent2.df["type"] == STP].round(3)
type remaining_days quantity
4 staples 4 5.000
5 staples 3 8.333
6 staples 2 8.333
7 staples 1 8.333
>>> sent2.df[sent2.df["type"] == FFV].round(3)
type remaining_days quantity
0 fresh_fruits_and_vegetables 1 0.0
>>> stock2.df[stock2.df["type"] == FPT].round(3) # doctest: +ELLIPSIS
type remaining_days quantity
14 fresh_protein 1 28.0
15 fresh_protein 2 50.0
16 fresh_protein 3 50.0
...
>>> stock2.df["quantity"].sum() == (5000 - 30 - 22)
True
>>> food3 = Food(5000)
>>> food3.subtract({STP: float("inf")})
Traceback (most recent call last):
ValueError: The "staples" you ordered does not exist or is not sufficient in stock
>>> food3.df["quantity"].sum() == 5000 # Subtraction failed, stock remains the same
True
>>> food4 = Food(5000)
>>> order4 = {k: v+7 for k, v in q.items()}
>>> sent4, stock4 = food4.subtract(order4, predict=True)
>>> sent4.df.round(3) # doctest: +ELLIPSIS
type remaining_days quantity
0 fresh_fruits_and_vegetables 1 35.714
1 fresh_fruits_and_vegetables 2 35.714
...
742 staples 179 8.333
743 staples 180 8.333
<BLANKLINE>
[744 rows x 3 columns]
>>> stock4.df.round(3)
type remaining_days quantity
0 fresh_fruits_and_vegetables 14 -7.0
1 fresh_protein 10 -7.0
2 packaged_fruits_and_vegetables 360 -7.0
3 packaged_protein 180 -7.0
4 staples 180 -7.0
"""
quantity = self.get_quantity()
self.sort_by_freshness(reverse=True)
shortage = dict()
for typ in TYPES.keys():
if (typ not in order) or (order[typ] <= 0):
order[typ] = 0.
elif (typ not in quantity) or (order[typ] > quantity[typ]):
if predict:
available = quantity.get(typ, 0)
shortage[typ] = order[typ] - available
order[typ] = available
else:
raise ValueError(f"The \"{typ}\" you ordered does not exist or is not sufficient in stock")
order = pd.DataFrame(order.items(), columns=["type", "demand"])
stock = self.df.copy()
stock = stock.merge(order, on="type", how="left")
stock["cum_sum"] = stock.groupby("type")["quantity"].cumsum()
# Due to float precision, loosen the condition a bit
stock["satisfied"] = stock["cum_sum"] >= (stock["demand"] - 1e-7)
# Get the first row in each type where the demand is satisfied
pivot = stock.groupby("type")["satisfied"].idxmax().reset_index().rename(columns={"satisfied": "pivot"})
stock = stock.merge(pivot, on="type", how="left")
# Split into two parts, and divide the quantity of this row
sent = stock.loc[stock.index <= stock["pivot"]]
stock = stock.loc[stock.index >= stock["pivot"]]
stock.loc[stock.index == stock["pivot"], "quantity"] = stock.loc[stock.index == stock["pivot"], "cum_sum"] - \
stock.loc[
stock.index == stock["pivot"], "demand"]
sent.loc[sent.index == sent["pivot"], "quantity"] -= sent.loc[sent.index == sent["pivot"], "cum_sum"] - \
sent.loc[sent.index == sent["pivot"], "demand"]
sent = sent[["type", "remaining_days", "quantity"]].reset_index(drop=True)
stock = stock[["type", "remaining_days", "quantity"]].reset_index(drop=True)
if len(shortage) > 0:
for typ, qt in shortage.items():
assert len(stock[stock["type"] == typ]) == 1
stock.loc[stock["type"] == typ, "quantity"] -= qt
if predict:
return Food(sent), Food(stock)
else:
self.df = stock
return Food(sent)