From 382020e3db0ad89ada59753d5a3fa9ee615c0294 Mon Sep 17 00:00:00 2001 From: Yevhenii Vinichenko <56126336+gekas145@users.noreply.github.com> Date: Thu, 24 Mar 2022 22:09:32 +0200 Subject: [PATCH 01/19] begin hw1 --- Homeworks/Homework-I/hw1.ipynb | 1235 ++++++++++++++++++++++++++++++++ 1 file changed, 1235 insertions(+) create mode 100644 Homeworks/Homework-I/hw1.ipynb diff --git a/Homeworks/Homework-I/hw1.ipynb b/Homeworks/Homework-I/hw1.ipynb new file mode 100644 index 0000000..eedbdc4 --- /dev/null +++ b/Homeworks/Homework-I/hw1.ipynb @@ -0,0 +1,1235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d2b566f2", + "metadata": {}, + "source": [ + "# Homework 1" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "fd9eb0ea", + "metadata": {}, + "outputs": [], + "source": [ + "import dalex as dx\n", + "import pandas as pd\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "07355f2d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | hotel | \n", + "is_canceled | \n", + "lead_time | \n", + "arrival_date_year | \n", + "arrival_date_month | \n", + "arrival_date_week_number | \n", + "arrival_date_day_of_month | \n", + "stays_in_weekend_nights | \n", + "stays_in_week_nights | \n", + "adults | \n", + "... | \n", + "deposit_type | \n", + "agent | \n", + "company | \n", + "days_in_waiting_list | \n", + "customer_type | \n", + "adr | \n", + "required_car_parking_spaces | \n", + "total_of_special_requests | \n", + "reservation_status | \n", + "reservation_status_date | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Resort Hotel | \n", + "0 | \n", + "342 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
1 | \n", + "Resort Hotel | \n", + "0 | \n", + "737 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
2 | \n", + "Resort Hotel | \n", + "0 | \n", + "7 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
3 | \n", + "Resort Hotel | \n", + "0 | \n", + "13 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "304.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
4 | \n", + "Resort Hotel | \n", + "0 | \n", + "14 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "2 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "240.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "98.0 | \n", + "0 | \n", + "1 | \n", + "Check-Out | \n", + "2015-07-03 | \n", + "
5 rows × 32 columns
\n", + "\n", + " | variable_name | \n", + "variable_value | \n", + "variable | \n", + "cumulative | \n", + "contribution | \n", + "sign | \n", + "position | \n", + "label | \n", + "
---|---|---|---|---|---|---|---|---|
0 | \n", + "intercept | \n", + "1 | \n", + "intercept | \n", + "0.370320 | \n", + "0.370320 | \n", + "1.0 | \n", + "7 | \n", + "observation | \n", + "
1 | \n", + "lead_time | \n", + "203.0 | \n", + "lead_time = 203.0 | \n", + "0.405468 | \n", + "0.035148 | \n", + "1.0 | \n", + "6 | \n", + "observation | \n", + "
2 | \n", + "adults | \n", + "2.0 | \n", + "adults = 2.0 | \n", + "0.412069 | \n", + "0.006601 | \n", + "1.0 | \n", + "5 | \n", + "observation | \n", + "
3 | \n", + "babies | \n", + "0.0 | \n", + "babies = 0.0 | \n", + "0.412456 | \n", + "0.000386 | \n", + "1.0 | \n", + "4 | \n", + "observation | \n", + "
4 | \n", + "children | \n", + "0.0 | \n", + "children = 0.0 | \n", + "0.412374 | \n", + "-0.000082 | \n", + "-1.0 | \n", + "3 | \n", + "observation | \n", + "
5 | \n", + "arrival_date_year | \n", + "2016.0 | \n", + "arrival_date_year = 2016.0 | \n", + "0.410609 | \n", + "-0.001765 | \n", + "-1.0 | \n", + "2 | \n", + "observation | \n", + "
6 | \n", + "booking_changes | \n", + "4.0 | \n", + "booking_changes = 4.0 | \n", + "0.307920 | \n", + "-0.102689 | \n", + "-1.0 | \n", + "1 | \n", + "observation | \n", + "
7 | \n", + "\n", + " | \n", + " | prediction | \n", + "0.307920 | \n", + "0.307920 | \n", + "1.0 | \n", + "0 | \n", + "observation | \n", + "
\n", - " | hotel | \n", - "is_canceled | \n", - "lead_time | \n", - "arrival_date_year | \n", - "arrival_date_month | \n", - "arrival_date_week_number | \n", - "arrival_date_day_of_month | \n", - "stays_in_weekend_nights | \n", - "stays_in_week_nights | \n", - "adults | \n", - "... | \n", - "deposit_type | \n", - "agent | \n", - "company | \n", - "days_in_waiting_list | \n", - "customer_type | \n", - "adr | \n", - "required_car_parking_spaces | \n", - "total_of_special_requests | \n", - "reservation_status | \n", - "reservation_status_date | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "Resort Hotel | \n", - "0 | \n", - "342 | \n", - "2015 | \n", - "July | \n", - "27 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "2 | \n", - "... | \n", - "No Deposit | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "Transient | \n", - "0.0 | \n", - "0 | \n", - "0 | \n", - "Check-Out | \n", - "2015-07-01 | \n", - "
1 | \n", - "Resort Hotel | \n", - "0 | \n", - "737 | \n", - "2015 | \n", - "July | \n", - "27 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "2 | \n", - "... | \n", - "No Deposit | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "Transient | \n", - "0.0 | \n", - "0 | \n", - "0 | \n", - "Check-Out | \n", - "2015-07-01 | \n", - "
2 | \n", - "Resort Hotel | \n", - "0 | \n", - "7 | \n", - "2015 | \n", - "July | \n", - "27 | \n", - "1 | \n", - "0 | \n", - "1 | \n", - "1 | \n", - "... | \n", - "No Deposit | \n", - "NaN | \n", - "NaN | \n", - "0 | \n", - "Transient | \n", - "75.0 | \n", - "0 | \n", - "0 | \n", - "Check-Out | \n", - "2015-07-02 | \n", - "
3 | \n", - "Resort Hotel | \n", - "0 | \n", - "13 | \n", - "2015 | \n", - "July | \n", - "27 | \n", - "1 | \n", - "0 | \n", - "1 | \n", - "1 | \n", - "... | \n", - "No Deposit | \n", - "304.0 | \n", - "NaN | \n", - "0 | \n", - "Transient | \n", - "75.0 | \n", - "0 | \n", - "0 | \n", - "Check-Out | \n", - "2015-07-02 | \n", - "
4 | \n", - "Resort Hotel | \n", - "0 | \n", - "14 | \n", - "2015 | \n", - "July | \n", - "27 | \n", - "1 | \n", - "0 | \n", - "2 | \n", - "2 | \n", - "... | \n", - "No Deposit | \n", - "240.0 | \n", - "NaN | \n", - "0 | \n", - "Transient | \n", - "98.0 | \n", - "0 | \n", - "1 | \n", - "Check-Out | \n", - "2015-07-03 | \n", - "
5 rows × 32 columns
\n", - "\n", - " | variable_name | \n", - "variable_value | \n", - "variable | \n", - "cumulative | \n", - "contribution | \n", - "sign | \n", - "position | \n", - "label | \n", - "
---|---|---|---|---|---|---|---|---|
0 | \n", - "intercept | \n", - "1 | \n", - "intercept | \n", - "0.370320 | \n", - "0.370320 | \n", - "1.0 | \n", - "7 | \n", - "observation | \n", - "
1 | \n", - "lead_time | \n", - "203.0 | \n", - "lead_time = 203.0 | \n", - "0.405468 | \n", - "0.035148 | \n", - "1.0 | \n", - "6 | \n", - "observation | \n", - "
2 | \n", - "adults | \n", - "2.0 | \n", - "adults = 2.0 | \n", - "0.412069 | \n", - "0.006601 | \n", - "1.0 | \n", - "5 | \n", - "observation | \n", - "
3 | \n", - "babies | \n", - "0.0 | \n", - "babies = 0.0 | \n", - "0.412456 | \n", - "0.000386 | \n", - "1.0 | \n", - "4 | \n", - "observation | \n", - "
4 | \n", - "children | \n", - "0.0 | \n", - "children = 0.0 | \n", - "0.412374 | \n", - "-0.000082 | \n", - "-1.0 | \n", - "3 | \n", - "observation | \n", - "
5 | \n", - "arrival_date_year | \n", - "2016.0 | \n", - "arrival_date_year = 2016.0 | \n", - "0.410609 | \n", - "-0.001765 | \n", - "-1.0 | \n", - "2 | \n", - "observation | \n", - "
6 | \n", - "booking_changes | \n", - "4.0 | \n", - "booking_changes = 4.0 | \n", - "0.307920 | \n", - "-0.102689 | \n", - "-1.0 | \n", - "1 | \n", - "observation | \n", - "
7 | \n", - "\n", - " | \n", - " | prediction | \n", - "0.307920 | \n", - "0.307920 | \n", - "1.0 | \n", - "0 | \n", - "observation | \n", - "
\n", + " | hotel | \n", + "is_canceled | \n", + "lead_time | \n", + "arrival_date_year | \n", + "arrival_date_month | \n", + "arrival_date_week_number | \n", + "arrival_date_day_of_month | \n", + "stays_in_weekend_nights | \n", + "stays_in_week_nights | \n", + "adults | \n", + "... | \n", + "deposit_type | \n", + "agent | \n", + "company | \n", + "days_in_waiting_list | \n", + "customer_type | \n", + "adr | \n", + "required_car_parking_spaces | \n", + "total_of_special_requests | \n", + "reservation_status | \n", + "reservation_status_date | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Resort Hotel | \n", + "0 | \n", + "342 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
1 | \n", + "Resort Hotel | \n", + "0 | \n", + "737 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
2 | \n", + "Resort Hotel | \n", + "0 | \n", + "7 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
3 | \n", + "Resort Hotel | \n", + "0 | \n", + "13 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "304.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
4 | \n", + "Resort Hotel | \n", + "0 | \n", + "14 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "2 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "240.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "98.0 | \n", + "0 | \n", + "1 | \n", + "Check-Out | \n", + "2015-07-03 | \n", + "
5 rows × 32 columns
\n", + "\n", + " | variable_name | \n", + "variable_value | \n", + "variable | \n", + "cumulative | \n", + "contribution | \n", + "sign | \n", + "position | \n", + "label | \n", + "
---|---|---|---|---|---|---|---|---|
0 | \n", + "intercept | \n", + "1 | \n", + "intercept | \n", + "0.370320 | \n", + "0.370320 | \n", + "1.0 | \n", + "7 | \n", + "observation | \n", + "
1 | \n", + "lead_time | \n", + "203.0 | \n", + "lead_time = 203.0 | \n", + "0.405468 | \n", + "0.035148 | \n", + "1.0 | \n", + "6 | \n", + "observation | \n", + "
2 | \n", + "adults | \n", + "2.0 | \n", + "adults = 2.0 | \n", + "0.412069 | \n", + "0.006601 | \n", + "1.0 | \n", + "5 | \n", + "observation | \n", + "
3 | \n", + "babies | \n", + "0.0 | \n", + "babies = 0.0 | \n", + "0.412456 | \n", + "0.000386 | \n", + "1.0 | \n", + "4 | \n", + "observation | \n", + "
4 | \n", + "children | \n", + "0.0 | \n", + "children = 0.0 | \n", + "0.412374 | \n", + "-0.000082 | \n", + "-1.0 | \n", + "3 | \n", + "observation | \n", + "
5 | \n", + "arrival_date_year | \n", + "2016.0 | \n", + "arrival_date_year = 2016.0 | \n", + "0.410609 | \n", + "-0.001765 | \n", + "-1.0 | \n", + "2 | \n", + "observation | \n", + "
6 | \n", + "booking_changes | \n", + "4.0 | \n", + "booking_changes = 4.0 | \n", + "0.307920 | \n", + "-0.102689 | \n", + "-1.0 | \n", + "1 | \n", + "observation | \n", + "
7 | \n", + "\n", + " | \n", + " | prediction | \n", + "0.307920 | \n", + "0.307920 | \n", + "1.0 | \n", + "0 | \n", + "observation | \n", + "
import dalex as dx
+import pandas as pd
+import warnings
+warnings.filterwarnings('ignore')
+
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+
data = pd.read_csv('hotel_bookings.csv')
+data.head()
+
+ | hotel | +is_canceled | +lead_time | +arrival_date_year | +arrival_date_month | +arrival_date_week_number | +arrival_date_day_of_month | +stays_in_weekend_nights | +stays_in_week_nights | +adults | +... | +deposit_type | +agent | +company | +days_in_waiting_list | +customer_type | +adr | +required_car_parking_spaces | +total_of_special_requests | +reservation_status | +reservation_status_date | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +Resort Hotel | +0 | +342 | +2015 | +July | +27 | +1 | +0 | +0 | +2 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +0.0 | +0 | +0 | +Check-Out | +2015-07-01 | +
1 | +Resort Hotel | +0 | +737 | +2015 | +July | +27 | +1 | +0 | +0 | +2 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +0.0 | +0 | +0 | +Check-Out | +2015-07-01 | +
2 | +Resort Hotel | +0 | +7 | +2015 | +July | +27 | +1 | +0 | +1 | +1 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +75.0 | +0 | +0 | +Check-Out | +2015-07-02 | +
3 | +Resort Hotel | +0 | +13 | +2015 | +July | +27 | +1 | +0 | +1 | +1 | +... | +No Deposit | +304.0 | +NaN | +0 | +Transient | +75.0 | +0 | +0 | +Check-Out | +2015-07-02 | +
4 | +Resort Hotel | +0 | +14 | +2015 | +July | +27 | +1 | +0 | +2 | +2 | +... | +No Deposit | +240.0 | +NaN | +0 | +Transient | +98.0 | +0 | +1 | +Check-Out | +2015-07-03 | +
5 rows × 32 columns
+# in order to simplify plots I decided to use only a subset of variables
+data = data[['is_canceled', 'lead_time', 'arrival_date_year', 'adults', 'children', 'babies', 'booking_changes']]
+data = data.dropna()
+X, y = data.loc[:, data.columns != 'is_canceled'], data[['is_canceled']]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+
clf = RandomForestClassifier(max_depth=2, random_state=0)
+clf.fit(X_train, y_train)
+
RandomForestClassifier(max_depth=2, random_state=0)+
# making prediction on unseen data
+observation = X_test.iloc[0,:].to_frame().transpose()
+clf.predict(observation)
+
array([0], dtype=int64)+
observation = pd.DataFrame({'lead_time': [203.0],
+ 'arrival_date_year': [2016.0],
+ 'adults': [2.0],
+ 'children': [0.0],
+ 'babies': [0.0],
+ 'booking_changes': [4.]},
+ index = ['observation'])
+
exp = dx.Explainer(clf, X_train, y_train)
+
Preparation of a new explainer is initiated + + -> data : 107447 rows 6 cols + -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. + -> target variable : 107447 values + -> model_class : sklearn.ensemble._forest.RandomForestClassifier (default) + -> label : Not specified, model's class short name will be used. (default) + -> predict function : <function yhat_proba_default at 0x0000019C693170D0> will be used (default) + -> predict function : Accepts pandas.DataFrame and numpy.ndarray. + -> predicted values : min = 0.139, mean = 0.37, max = 0.45 + -> model type : classification will be used (default) + -> residual function : difference between y and yhat (default) + -> residuals : min = -0.45, mean = -3.51e-05, max = 0.861 + -> model_info : package sklearn + +A new explainer has been created! ++
exp.predict(observation)
+
array([0.3079201])+
bd_observation = exp.predict_parts(observation, type='break_down', label=observation.index[0])
+
bd_observation.result
+
+ | variable_name | +variable_value | +variable | +cumulative | +contribution | +sign | +position | +label | +
---|---|---|---|---|---|---|---|---|
0 | +intercept | +1 | +intercept | +0.370320 | +0.370320 | +1.0 | +7 | +observation | +
1 | +lead_time | +203.0 | +lead_time = 203.0 | +0.405468 | +0.035148 | +1.0 | +6 | +observation | +
2 | +adults | +2.0 | +adults = 2.0 | +0.412069 | +0.006601 | +1.0 | +5 | +observation | +
3 | +babies | +0.0 | +babies = 0.0 | +0.412456 | +0.000386 | +1.0 | +4 | +observation | +
4 | +children | +0.0 | +children = 0.0 | +0.412374 | +-0.000082 | +-1.0 | +3 | +observation | +
5 | +arrival_date_year | +2016.0 | +arrival_date_year = 2016.0 | +0.410609 | +-0.001765 | +-1.0 | +2 | +observation | +
6 | +booking_changes | +4.0 | +booking_changes = 4.0 | +0.307920 | +-0.102689 | +-1.0 | +1 | +observation | +
7 | ++ | + | prediction | +0.307920 | +0.307920 | +1.0 | +0 | +observation | +
bd_observation.plot()
+# lead_time variable is equal to number of days between
+# entering date and arrival(or cancelation) date
+
sh_observation = exp.predict_parts(observation, type='shap', B = 10, label=observation.index[0])
+
sh_observation.plot(bar_width = 16)
+
+# big number of booking changes contributes negatively to final result
+# big lead time however makes cancelation more likely
+
observation1 = pd.DataFrame({'lead_time': [4.0],
+ 'arrival_date_year': [2015.0],
+ 'adults': [2.0],
+ 'children': [0.0],
+ 'babies': [1.0],
+ 'booking_changes': [0.0]},
+ index = ['observation1'])
+sh_observation1 = exp.predict_parts(observation1, type='shap', B = 10, label=observation1.index[0])
+
sh_observation1.plot(bar_width = 16)
+# smaller lead time makes cancelation less likely
+# same applies for number of babies
+# also absence of booking changes contibutes positively to final result
+
bd_observation.plot()
# lead_time variable is equal to number of days between
# entering date and arrival(or cancelation) date
+
+# target - is_canceled - 1 if canceled, 0 if not
+
+# its better to comment more on shap plot, cuz break down depends on order of variables
sh_observation.plot(bar_width = 16)
+# shap plot shows similar ways of contribution of each variable
+
# big number of booking changes contributes negatively to final result
-# big lead time however makes cancelation more likely
+# making cancelation less likely
+# cuz people who adjusted their booking precisely most probably won't resign from it
+
+# big lead time however makes cancelation more likely, cuz during this time a lot can happen
+# e.g. someone can lose their job and as consequence an ability to afford holiday
+
+# other variables in this example do not have significant contribution
sh_observation1.plot(bar_width = 16)
# smaller lead time makes cancelation less likely
-# same applies for number of babies
-# also absence of booking changes contibutes positively to final result
+# cuz financial situation of potential customer won't change much during smaller time period
+
+# same applies for number of babies as people with children tend to plan their life
+# more precise
+
+# also absence of booking changes contibutes positively to final result making cancelation more
+# likely, cuz if there is nothing to change for the better maybe this hotel isn't good enough
+# nevertheless its contribution is way smaller than ones of previous two variables
+# this shows that booking changes may be not very important in this case
\n", + " | hotel | \n", + "is_canceled | \n", + "lead_time | \n", + "arrival_date_year | \n", + "arrival_date_month | \n", + "arrival_date_week_number | \n", + "arrival_date_day_of_month | \n", + "stays_in_weekend_nights | \n", + "stays_in_week_nights | \n", + "adults | \n", + "... | \n", + "deposit_type | \n", + "agent | \n", + "company | \n", + "days_in_waiting_list | \n", + "customer_type | \n", + "adr | \n", + "required_car_parking_spaces | \n", + "total_of_special_requests | \n", + "reservation_status | \n", + "reservation_status_date | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Resort Hotel | \n", + "0 | \n", + "342 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
1 | \n", + "Resort Hotel | \n", + "0 | \n", + "737 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
2 | \n", + "Resort Hotel | \n", + "0 | \n", + "7 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
3 | \n", + "Resort Hotel | \n", + "0 | \n", + "13 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "304.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
4 | \n", + "Resort Hotel | \n", + "0 | \n", + "14 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "2 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "240.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "98.0 | \n", + "0 | \n", + "1 | \n", + "Check-Out | \n", + "2015-07-03 | \n", + "
5 rows × 32 columns
\n", + "\n", + " | variable | \n", + "effect | \n", + "
---|---|---|
0 | \n", + "booking_changes > 0.00 | \n", + "-0.102434 | \n", + "
1 | \n", + "lead_time > 160.00 | \n", + "0.056035 | \n", + "
2 | \n", + "babies <= 0.00 | \n", + "0.045443 | \n", + "
3 | \n", + "adults <= 2.00 | \n", + "-0.006092 | \n", + "
4 | \n", + "arrival_date_year <= 2016.00 | \n", + "-0.003575 | \n", + "
5 | \n", + "children <= 0.00 | \n", + "-0.001196 | \n", + "
import dalex as dx
+import pandas as pd
+import warnings
+warnings.filterwarnings('ignore')
+
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+
data = pd.read_csv('hotel_bookings.csv')
+data.head()
+
+ | hotel | +is_canceled | +lead_time | +arrival_date_year | +arrival_date_month | +arrival_date_week_number | +arrival_date_day_of_month | +stays_in_weekend_nights | +stays_in_week_nights | +adults | +... | +deposit_type | +agent | +company | +days_in_waiting_list | +customer_type | +adr | +required_car_parking_spaces | +total_of_special_requests | +reservation_status | +reservation_status_date | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +Resort Hotel | +0 | +342 | +2015 | +July | +27 | +1 | +0 | +0 | +2 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +0.0 | +0 | +0 | +Check-Out | +2015-07-01 | +
1 | +Resort Hotel | +0 | +737 | +2015 | +July | +27 | +1 | +0 | +0 | +2 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +0.0 | +0 | +0 | +Check-Out | +2015-07-01 | +
2 | +Resort Hotel | +0 | +7 | +2015 | +July | +27 | +1 | +0 | +1 | +1 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +75.0 | +0 | +0 | +Check-Out | +2015-07-02 | +
3 | +Resort Hotel | +0 | +13 | +2015 | +July | +27 | +1 | +0 | +1 | +1 | +... | +No Deposit | +304.0 | +NaN | +0 | +Transient | +75.0 | +0 | +0 | +Check-Out | +2015-07-02 | +
4 | +Resort Hotel | +0 | +14 | +2015 | +July | +27 | +1 | +0 | +2 | +2 | +... | +No Deposit | +240.0 | +NaN | +0 | +Transient | +98.0 | +0 | +1 | +Check-Out | +2015-07-03 | +
5 rows × 32 columns
+# in order to simplify plots I decided to use only a subset of variables
+data = data[['is_canceled', 'lead_time', 'arrival_date_year', 'adults', 'children', 'babies', 'booking_changes']]
+data = data.dropna()
+X, y = data.loc[:, data.columns != 'is_canceled'], data[['is_canceled']]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+
clf = RandomForestClassifier(max_depth=2, random_state=0)
+clf.fit(X_train, y_train)
+
RandomForestClassifier(max_depth=2, random_state=0)+
# making prediction on unseen data
+observation = X_test.iloc[0,:].to_frame().transpose()
+clf.predict(observation)
+
array([0], dtype=int64)+
observation = pd.DataFrame({'lead_time': [203.0],
+ 'arrival_date_year': [2016.0],
+ 'adults': [2.0],
+ 'children': [0.0],
+ 'babies': [0.0],
+ 'booking_changes': [4.]},
+ index = ['observation'])
+
exp = dx.Explainer(clf, X_train, y_train, label='random_forest')
+
Preparation of a new explainer is initiated + + -> data : 107447 rows 6 cols + -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. + -> target variable : 107447 values + -> model_class : sklearn.ensemble._forest.RandomForestClassifier (default) + -> label : random_forest + -> predict function : <function yhat_proba_default at 0x0000027ABA9CB0D0> will be used (default) + -> predict function : Accepts pandas.DataFrame and numpy.ndarray. + -> predicted values : min = 0.139, mean = 0.37, max = 0.45 + -> model type : classification will be used (default) + -> residual function : difference between y and yhat (default) + -> residuals : min = -0.45, mean = -3.51e-05, max = 0.861 + -> model_info : package sklearn + +A new explainer has been created! ++
exp.predict(observation) # one more prediction
+
array([0.3079201])+
lime = exp.predict_surrogate(observation)
+
lime.plot()
+# as it was already shown(see hw1) big(which is usually just being greater than 0) number of booking changes
+# makes cancelation less likely
+# although big time period before arrival and absence of babies make cancelation more likely
+
lime.result
+# result attribute can give us more precise information on contributions
+
+ | variable | +effect | +
---|---|---|
0 | +booking_changes > 0.00 | +-0.102434 | +
1 | +lead_time > 160.00 | +0.056035 | +
2 | +babies <= 0.00 | +0.045443 | +
3 | +adults <= 2.00 | +-0.006092 | +
4 | +arrival_date_year <= 2016.00 | +-0.003575 | +
5 | +children <= 0.00 | +-0.001196 | +
observation2 = pd.DataFrame({'lead_time': [332.0],
+ 'arrival_date_year': [2017.0],
+ 'adults': [3.0],
+ 'children': [1.0],
+ 'babies': [0.0],
+ 'booking_changes': [0.0]},
+ index = ['observation2'])
+lime2 = exp.predict_surrogate(observation2)
+lime2.plot()
+# as we can see this observation is perfect example of a guest whose every parameter makes cancelation more likely
+
observation3 = pd.DataFrame({'lead_time': [203.0],
+ 'arrival_date_year': [2017.0],
+ 'adults': [2.0],
+ 'children': [1.0],
+ 'babies': [0.0],
+ 'booking_changes': [1.0]},
+ index = ['observation3'])
+lime3 = exp.predict_surrogate(observation3)
+lime3.plot()
+# same as all of the above
+
observation5 = pd.DataFrame({'lead_time': [203.0],
+ 'arrival_date_year': [2017.0],
+ 'adults': [2.0],
+ 'children': [1.0],
+ 'babies': [0.0],
+ 'booking_changes': [0.0]},
+ index = ['observation5'])
+lime5 = exp.predict_surrogate(observation5)
+lime5.plot()
+# one more indication that smaller lead time makes the cancelation less likely
+
observation4 = pd.DataFrame({'lead_time': [82.0],
+ 'arrival_date_year': [2015.0],
+ 'adults': [2.0],
+ 'children': [0.0],
+ 'babies': [0.0],
+ 'booking_changes': [0.0]},
+ index = ['observation4'])
+lime4 = exp.predict_surrogate(observation4)
+lime4.plot()
+
observation6 = pd.DataFrame({'lead_time': [14.0],
+ 'arrival_date_year': [2015.0],
+ 'adults': [2.0],
+ 'children': [2.0],
+ 'babies': [0.0],
+ 'booking_changes': [0.0]},
+ index = ['observation6'])
+lime6 = exp.predict_surrogate(observation6)
+lime6.plot()
+
observation7 = pd.DataFrame({'lead_time': [14.0],
+ 'arrival_date_year': [2015.0],
+ 'adults': [2.0],
+ 'children': [0.0],
+ 'babies': [2.0],
+ 'booking_changes': [0.0]},
+ index = ['observation7'])
+lime7 = exp.predict_surrogate(observation7)
+lime7.plot()
+# On this and the previous plot all variables have same values and only number of children and babies differ
+# (2 and 0 in first case and vice versa in second)
+# One can notice that babies contribute much stronger than children
+# Moreover children in family without babies can contribute positively to the result (see plot below)
+
observation8 = pd.DataFrame({'lead_time': [100.0],
+ 'arrival_date_year': [2015.0],
+ 'adults': [2.0],
+ 'children': [2.0],
+ 'babies': [0.0],
+ 'booking_changes': [4.0]},
+ index = ['observation8'])
+lime8 = exp.predict_surrogate(observation8)
+lime8.plot()
+
On every plot there is usually one main variable with the most influence(positive or negative) on the result. Those are usually the number of booking changes(especially when it's bigger than 0) or the time before arrival(lead time). Other variables usually do not have strong contributions and their positivity or negativity depends on values of the main variables.
+ +\n", + " | hotel | \n", + "is_canceled | \n", + "lead_time | \n", + "arrival_date_year | \n", + "arrival_date_month | \n", + "arrival_date_week_number | \n", + "arrival_date_day_of_month | \n", + "stays_in_weekend_nights | \n", + "stays_in_week_nights | \n", + "adults | \n", + "... | \n", + "deposit_type | \n", + "agent | \n", + "company | \n", + "days_in_waiting_list | \n", + "customer_type | \n", + "adr | \n", + "required_car_parking_spaces | \n", + "total_of_special_requests | \n", + "reservation_status | \n", + "reservation_status_date | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Resort Hotel | \n", + "0 | \n", + "342 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
1 | \n", + "Resort Hotel | \n", + "0 | \n", + "737 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "0.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-01 | \n", + "
2 | \n", + "Resort Hotel | \n", + "0 | \n", + "7 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
3 | \n", + "Resort Hotel | \n", + "0 | \n", + "13 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "... | \n", + "No Deposit | \n", + "304.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "75.0 | \n", + "0 | \n", + "0 | \n", + "Check-Out | \n", + "2015-07-02 | \n", + "
4 | \n", + "Resort Hotel | \n", + "0 | \n", + "14 | \n", + "2015 | \n", + "July | \n", + "27 | \n", + "1 | \n", + "0 | \n", + "2 | \n", + "2 | \n", + "... | \n", + "No Deposit | \n", + "240.0 | \n", + "NaN | \n", + "0 | \n", + "Transient | \n", + "98.0 | \n", + "0 | \n", + "1 | \n", + "Check-Out | \n", + "2015-07-03 | \n", + "
5 rows × 32 columns
\n", + "import dalex as dx
+import pandas as pd
+import warnings
+warnings.filterwarnings('ignore')
+
+from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import f1_score
+
+from keras.models import Sequential
+from keras.layers import Dense
+import keras.backend as K
+
data = pd.read_csv('hotel_bookings.csv')
+data.head()
+
+ | hotel | +is_canceled | +lead_time | +arrival_date_year | +arrival_date_month | +arrival_date_week_number | +arrival_date_day_of_month | +stays_in_weekend_nights | +stays_in_week_nights | +adults | +... | +deposit_type | +agent | +company | +days_in_waiting_list | +customer_type | +adr | +required_car_parking_spaces | +total_of_special_requests | +reservation_status | +reservation_status_date | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | +Resort Hotel | +0 | +342 | +2015 | +July | +27 | +1 | +0 | +0 | +2 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +0.0 | +0 | +0 | +Check-Out | +2015-07-01 | +
1 | +Resort Hotel | +0 | +737 | +2015 | +July | +27 | +1 | +0 | +0 | +2 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +0.0 | +0 | +0 | +Check-Out | +2015-07-01 | +
2 | +Resort Hotel | +0 | +7 | +2015 | +July | +27 | +1 | +0 | +1 | +1 | +... | +No Deposit | +NaN | +NaN | +0 | +Transient | +75.0 | +0 | +0 | +Check-Out | +2015-07-02 | +
3 | +Resort Hotel | +0 | +13 | +2015 | +July | +27 | +1 | +0 | +1 | +1 | +... | +No Deposit | +304.0 | +NaN | +0 | +Transient | +75.0 | +0 | +0 | +Check-Out | +2015-07-02 | +
4 | +Resort Hotel | +0 | +14 | +2015 | +July | +27 | +1 | +0 | +2 | +2 | +... | +No Deposit | +240.0 | +NaN | +0 | +Transient | +98.0 | +0 | +1 | +Check-Out | +2015-07-03 | +
5 rows × 32 columns
+# in order to simplify plots I decided to use only a subset of variables
+data = data[['is_canceled', 'lead_time', 'arrival_date_year', 'adults', 'children', 'babies', 'booking_changes']]
+data = data.dropna()
+X, y = data.loc[:, data.columns != 'is_canceled'], data[['is_canceled']]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
+
observation = pd.DataFrame({'lead_time': [203.0],
+ 'arrival_date_year': [2016.0],
+ 'adults': [2.0],
+ 'children': [0.0],
+ 'babies': [0.0],
+ 'booking_changes': [4.]},
+ index = ['observation'])
+
def f1_metric(y_true, y_pred):
+ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
+ possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
+ predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
+ precision = true_positives / (predicted_positives + K.epsilon())
+ recall = true_positives / (possible_positives + K.epsilon())
+ f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
+ return f1_val
+
model = Sequential()
+model.add(Dense(100, input_dim=6, activation='sigmoid'))
+model.add(Dense(1, activation='sigmoid'))
+model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1_metric])
+
model.fit(X_train, y_train, epochs=150, batch_size=10)
+
Epoch 1/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6294 - accuracy: 0.6431 - f1_metric: 0.1923 +Epoch 2/150 +10745/10745 [==============================] - 21s 2ms/step - loss: 0.6301 - accuracy: 0.6292 - f1_metric: 0.1184 +Epoch 3/150 +10745/10745 [==============================] - 21s 2ms/step - loss: 0.6114 - accuracy: 0.6600 - f1_metric: 0.2779 +Epoch 4/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6219 - accuracy: 0.6371 - f1_metric: 0.1628 - +Epoch 5/150 +10745/10745 [==============================] - 20s 2ms/step - loss: 0.6235 - accuracy: 0.6270 - f1_metric: 0.1388 +Epoch 6/150 +10745/10745 [==============================] - 20s 2ms/step - loss: 0.6131 - accuracy: 0.6565 - f1_metric: 0.2974 +Epoch 7/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6146 - accuracy: 0.6420 - f1_metric: 0.2362 +Epoch 8/150 +10745/10745 [==============================] - 20s 2ms/step - loss: 0.6112 - accuracy: 0.6615 - f1_metric: 0.2803 +Epoch 9/150 +10745/10745 [==============================] - 19s 2ms/step - loss: 0.6090 - accuracy: 0.6628 - f1_metric: 0.2776 +Epoch 10/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6097 - accuracy: 0.6624 - f1_metric: 0.2866 +Epoch 11/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6130 - accuracy: 0.6556 - f1_metric: 0.2499 +Epoch 12/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6094 - accuracy: 0.6623 - f1_metric: 0.2794 +Epoch 13/150 +10745/10745 [==============================] - 25s 2ms/step - loss: 0.6091 - accuracy: 0.6640 - f1_metric: 0.2689 +Epoch 14/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6099 - accuracy: 0.6603 - f1_metric: 0.2654 +Epoch 15/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6098 - accuracy: 0.6626 - f1_metric: 0.2577 +Epoch 16/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6096 - accuracy: 0.6573 - f1_metric: 0.3510 +Epoch 17/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6111 - accuracy: 0.6502 - f1_metric: 0.3392 +Epoch 18/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6093 - accuracy: 0.6571 - f1_metric: 0.2860 +Epoch 19/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6105 - accuracy: 0.6570 - f1_metric: 0.2723 3s - loss: 0.6109 - accuracy: - ETA: 2s - loss: 0.6109 - accuracy: 0.6564 - f1_metric: 0.27 - ETA: 2s - loss: 0 - ETA +Epoch 20/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6104 - accuracy: 0.6635 - f1_metric: 0.2755 +Epoch 21/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6090 - accuracy: 0.6635 - f1_metric: 0.2538 +Epoch 22/150 +10745/10745 [==============================] - 21s 2ms/step - loss: 0.6099 - accuracy: 0.6621 - f1_metric: 0.2672 +Epoch 23/150 +10745/10745 [==============================] - 21s 2ms/step - loss: 0.6101 - accuracy: 0.6606 - f1_metric: 0.2807 +Epoch 24/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6095 - accuracy: 0.6598 - f1_metric: 0.3025 +Epoch 25/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6101 - accuracy: 0.6556 - f1_metric: 0.2873 +Epoch 26/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6074 - accuracy: 0.6653 - f1_metric: 0.2829 +Epoch 27/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6071 - accuracy: 0.6657 - f1_metric: 0.2621 +Epoch 28/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6086 - accuracy: 0.6650 - f1_metric: 0.2629 +Epoch 29/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6102 - accuracy: 0.6637 - f1_metric: 0.2507 +Epoch 30/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6097 - accuracy: 0.6570 - f1_metric: 0.3216 0s - loss: 0.6099 - accuracy: 0.6569 - f1_metric: 0. +Epoch 31/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6652 - f1_metric: 0.2557 +Epoch 32/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6133 - accuracy: 0.6438 - f1_metric: 0.1708 +Epoch 33/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6124 - accuracy: 0.6424 - f1_metric: 0.2483 +Epoch 34/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6076 - accuracy: 0.6635 - f1_metric: 0.2860 +Epoch 35/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6099 - accuracy: 0.6631 - f1_metric: 0.3145 +Epoch 36/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6136 - accuracy: 0.6388 - f1_metric: 0.2997 +Epoch 37/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6153 - accuracy: 0.6305 - f1_metric: 0.2894 +Epoch 38/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6122 - accuracy: 0.6419 - f1_metric: 0.2989 +Epoch 39/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6103 - accuracy: 0.6620 - f1_metric: 0.3117 +Epoch 40/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6073 - accuracy: 0.6639 - f1_metric: 0.2582 +Epoch 41/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6098 - accuracy: 0.6572 - f1_metric: 0.2211 +Epoch 42/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6075 - accuracy: 0.6610 - f1_metric: 0.2269 +Epoch 43/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6090 - accuracy: 0.6631 - f1_metric: 0.3087 +Epoch 44/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6084 - accuracy: 0.6626 - f1_metric: 0.2721 +Epoch 45/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6073 - accuracy: 0.6636 - f1_metric: 0.2737 0s - loss: 0.6073 - accuracy: 0.6636 +Epoch 46/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6084 - accuracy: 0.6628 - f1_metric: 0.2779 +Epoch 47/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6077 - accuracy: 0.6631 - f1_metric: 0.2666 +Epoch 48/150 +10745/10745 [==============================] - 20s 2ms/step - loss: 0.6084 - accuracy: 0.6644 - f1_metric: 0.2629 +Epoch 49/150 +10745/10745 [==============================] - 21s 2ms/step - loss: 0.6088 - accuracy: 0.6608 - f1_metric: 0.2420 +Epoch 50/150 +10745/10745 [==============================] - 21s 2ms/step - loss: 0.6074 - accuracy: 0.6644 - f1_metric: 0.2598 +Epoch 51/150 +10745/10745 [==============================] - 19s 2ms/step - loss: 0.6083 - accuracy: 0.6638 - f1_metric: 0.2646 +Epoch 52/150 +10745/10745 [==============================] - 19s 2ms/step - loss: 0.6065 - accuracy: 0.6637 - f1_metric: 0.2631 0s - loss: 0.6064 - accuracy: 0. +Epoch 53/150 +10745/10745 [==============================] - 19s 2ms/step - loss: 0.6076 - accuracy: 0.6641 - f1_metric: 0.2831 +Epoch 54/150 +10745/10745 [==============================] - 21s 2ms/step - loss: 0.6100 - accuracy: 0.6562 - f1_metric: 0.3729 +Epoch 55/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6071 - accuracy: 0.6641 - f1_metric: 0.2930 +Epoch 56/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6064 - accuracy: 0.6651 - f1_metric: 0.2836 +Epoch 57/150 +10745/10745 [==============================] - ETA: 0s - loss: 0.6082 - accuracy: 0.6627 - f1_metric: 0.23 - 27s 2ms/step - loss: 0.6082 - accuracy: 0.6626 - f1_metric: 0.2355 +Epoch 58/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6068 - accuracy: 0.6635 - f1_metric: 0.2482 +Epoch 59/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6068 - accuracy: 0.6643 - f1_metric: 0.2535 +Epoch 60/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6067 - accuracy: 0.6626 - f1_metric: 0.2391 +Epoch 61/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6059 - accuracy: 0.6645 - f1_metric: 0.2589 +Epoch 62/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6081 - accuracy: 0.6643 - f1_metric: 0.2713 0s - loss: 0.6082 - accuracy: 0.6642 - f1_metric: +Epoch 63/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6075 - accuracy: 0.6642 - f1_metric: 0.2753 +Epoch 64/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6067 - accuracy: 0.6636 - f1_metric: 0.2541 +Epoch 65/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6077 - accuracy: 0.6651 - f1_metric: 0.2713 +Epoch 66/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6085 - accuracy: 0.6623 - f1_metric: 0.2540 0s - loss: 0.6085 - accuracy: 0.6620 - f1_ +Epoch 67/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6059 - accuracy: 0.6656 - f1_metric: 0.2653 +Epoch 68/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6071 - accuracy: 0.6644 - f1_metric: 0.2711 +Epoch 69/150 +10745/10745 [==============================] - 19s 2ms/step - loss: 0.6070 - accuracy: 0.6650 - f1_metric: 0.2714 +Epoch 70/150 +10745/10745 [==============================] - 19s 2ms/step - loss: 0.6083 - accuracy: 0.6639 - f1_metric: 0.2679 1s - +Epoch 71/150 +10745/10745 [==============================] - 20s 2ms/step - loss: 0.6128 - accuracy: 0.6362 - f1_metric: 0.2749 +Epoch 72/150 +10745/10745 [==============================] - 20s 2ms/step - loss: 0.6128 - accuracy: 0.6325 - f1_metric: 0.2856 +Epoch 73/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6139 - accuracy: 0.6261 - f1_metric: 0.2090 1s - loss: 0.6144 - accuracy: - ETA: 0s - loss: 0.614 +Epoch 74/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6105 - accuracy: 0.6299 - f1_metric: 0.2798 +Epoch 75/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6120 - accuracy: 0.6322 - f1_metric: 0.2966 +Epoch 76/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6099 - accuracy: 0.6404 - f1_metric: 0.3304 1s - los - ETA: 0s - loss: 0.6101 - +Epoch 77/150 +10745/10745 [==============================] - 25s 2ms/step - loss: 0.6077 - accuracy: 0.6623 - f1_metric: 0.3048 +Epoch 78/150 +10745/10745 [==============================] - 25s 2ms/step - loss: 0.6068 - accuracy: 0.6653 - f1_metric: 0.2665 1s - loss: 0.6 +Epoch 79/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6074 - accuracy: 0.6646 - f1_metric: 0.2620 +Epoch 80/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6069 - accuracy: 0.6646 - f1_metric: 0.2679 +Epoch 81/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6099 - accuracy: 0.6521 - f1_metric: 0.1853 +Epoch 82/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6098 - accuracy: 0.6620 - f1_metric: 0.2685 +Epoch 83/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6072 - accuracy: 0.6650 - f1_metric: 0.2876 0s - loss: 0.6073 - accuracy: 0.66 +Epoch 84/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6085 - accuracy: 0.6618 - f1_metric: 0.2469 +Epoch 85/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6069 - accuracy: 0.6643 - f1_metric: 0.2607 0s - loss: 0.6069 - accuracy: 0.6643 - f1_metric: 0.26 +Epoch 86/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6081 - accuracy: 0.6610 - f1_metric: 0.2260 +Epoch 87/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6076 - accuracy: 0.6645 - f1_metric: 0.2758 +Epoch 88/150 +10745/10745 [==============================] - ETA: 0s - loss: 0.6065 - accuracy: 0.6649 - f1_metric: 0.25 - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6649 - f1_metric: 0.2554 +Epoch 89/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6086 - accuracy: 0.6627 - f1_metric: 0.2534 +Epoch 90/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6648 - f1_metric: 0.2597 0s - loss: 0.6060 - accuracy: 0.6648 - f1_metric: 0. +Epoch 91/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6082 - accuracy: 0.6648 - f1_metric: 0.2986 +Epoch 92/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6065 - accuracy: 0.6647 - f1_metric: 0.2496 +Epoch 93/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6079 - accuracy: 0.6643 - f1_metric: 0.2586 +Epoch 94/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6076 - accuracy: 0.6630 - f1_metric: 0.2560 +Epoch 95/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6069 - accuracy: 0.6643 - f1_metric: 0.2511 +Epoch 96/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6078 - accuracy: 0.6624 - f1_metric: 0.2371 +Epoch 97/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6077 - accuracy: 0.6638 - f1_metric: 0.2548 +Epoch 98/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6074 - accuracy: 0.6612 - f1_metric: 0.2175 0s - loss: 0.6075 - accuracy: 0.6614 - f1_metric +Epoch 99/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6059 - accuracy: 0.6654 - f1_metric: 0.2642 +Epoch 100/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6068 - accuracy: 0.6635 - f1_metric: 0.2406 +Epoch 101/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6058 - accuracy: 0.6653 - f1_metric: 0.2649 +Epoch 102/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6077 - accuracy: 0.6640 - f1_metric: 0.2608 +Epoch 103/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6063 - accuracy: 0.6657 - f1_metric: 0.2782 +Epoch 104/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6060 - accuracy: 0.6648 - f1_metric: 0.2535 +Epoch 105/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6062 - accuracy: 0.6645 - f1_metric: 0.2510 +Epoch 106/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6663 - f1_metric: 0.2883 +Epoch 107/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6668 - f1_metric: 0.2864 +Epoch 108/150 +10745/10745 [==============================] - 25s 2ms/step - loss: 0.6063 - accuracy: 0.6651 - f1_metric: 0.2745 +Epoch 109/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6095 - accuracy: 0.6576 - f1_metric: 0.2248 +Epoch 110/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6055 - accuracy: 0.6644 - f1_metric: 0.2500 +Epoch 111/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6060 - accuracy: 0.6645 - f1_metric: 0.2617 +Epoch 112/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6060 - accuracy: 0.6651 - f1_metric: 0.2739 +Epoch 113/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6052 - accuracy: 0.6652 - f1_metric: 0.2581 +Epoch 114/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6061 - accuracy: 0.6662 - f1_metric: 0.2867 +Epoch 115/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6631 - f1_metric: 0.2462 +Epoch 116/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6071 - accuracy: 0.6638 - f1_metric: 0.3069 +Epoch 117/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6075 - accuracy: 0.6658 - f1_metric: 0.2657 +Epoch 118/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6056 - accuracy: 0.6652 - f1_metric: 0.2526 +Epoch 119/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6055 - accuracy: 0.6646 - f1_metric: 0.2577 +Epoch 120/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6068 - accuracy: 0.6643 - f1_metric: 0.3007 +Epoch 121/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6054 - accuracy: 0.6652 - f1_metric: 0.2593 +Epoch 122/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6073 - accuracy: 0.6641 - f1_metric: 0.2520 +Epoch 123/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6057 - accuracy: 0.6639 - f1_metric: 0.2428 0s - loss: 0.6057 - accuracy: +Epoch 124/150 +10745/10745 [==============================] - 22s 2ms/step - loss: 0.6060 - accuracy: 0.6624 - f1_metric: 0.2371 +Epoch 125/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6118 - accuracy: 0.6624 - f1_metric: 0.2519 +Epoch 126/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6085 - accuracy: 0.6630 - f1_metric: 0.2569 +Epoch 127/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6069 - accuracy: 0.6655 - f1_metric: 0.2755 +Epoch 128/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6088 - accuracy: 0.6608 - f1_metric: 0.2245 1s - loss: 0 +Epoch 129/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6072 - accuracy: 0.6657 - f1_metric: 0.2625 +Epoch 130/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6086 - accuracy: 0.6641 - f1_metric: 0.2477 +Epoch 131/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6065 - accuracy: 0.6646 - f1_metric: 0.2870 +Epoch 132/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6649 - f1_metric: 0.2629 +Epoch 133/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6054 - accuracy: 0.6656 - f1_metric: 0.2596 0s - loss: 0.6053 - accuracy: 0.6656 - f1_metr +Epoch 134/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6052 - accuracy: 0.6647 - f1_metric: 0.2392 +Epoch 135/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6063 - accuracy: 0.6654 - f1_metric: 0.2879 +Epoch 136/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6057 - accuracy: 0.6656 - f1_metric: 0.2954 +Epoch 137/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6055 - accuracy: 0.6654 - f1_metric: 0.2748 0s - loss: 0.6052 - accuracy: 0.6659 - f1_metr +Epoch 138/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6057 - accuracy: 0.6640 - f1_metric: 0.2520 +Epoch 139/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6054 - accuracy: 0.6644 - f1_metric: 0.2446 1s - loss: 0.6048 - accuracy: 0.6651 - - ETA: +Epoch 140/150 +10745/10745 [==============================] - 25s 2ms/step - loss: 0.6079 - accuracy: 0.6605 - f1_metric: 0.2652 +Epoch 141/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6065 - accuracy: 0.6658 - f1_metric: 0.3009 0s - loss: 0.6070 - - ETA: 0s - loss: 0.6065 - accuracy: 0.6658 - f1_metric: 0.30 +Epoch 142/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6076 - accuracy: 0.6640 - f1_metric: 0.2451 +Epoch 143/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6066 - accuracy: 0.6637 - f1_metric: 0.2536 +Epoch 144/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6058 - accuracy: 0.6646 - f1_metric: 0.2663 +Epoch 145/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6061 - accuracy: 0.6646 - f1_metric: 0.2647 0s - loss: 0.6061 - accuracy: 0.6643 - +Epoch 146/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6071 - accuracy: 0.6651 - f1_metric: 0.3173 +Epoch 147/150 +10745/10745 [==============================] - 24s 2ms/step - loss: 0.6081 - accuracy: 0.6660 - f1_metric: 0.3018 2s - loss: 0.6083 - accuracy: - ETA: 2s - ETA: 0s - loss: 0.6081 - +Epoch 148/150 +10745/10745 [==============================] - 27s 3ms/step - loss: 0.6054 - accuracy: 0.6659 - f1_metric: 0.2581 0s - loss: 0.6054 - accuracy: 0.6659 - f1_metric: 0.25 +Epoch 149/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6049 - accuracy: 0.6645 - f1_metric: 0.2388 +Epoch 150/150 +10745/10745 [==============================] - 23s 2ms/step - loss: 0.6065 - accuracy: 0.6644 - f1_metric: 0.2897 ++
<tensorflow.python.keras.callbacks.History at 0x161305b1d90>+
model.evaluate(X_test, y_test) # parameters were not tuned, so results are bad
+
374/374 [==============================] - 1s 1ms/step - loss: 0.6059 - accuracy: 0.6647 - f1_metric: 0.4049 ++
[0.6058630347251892, 0.6647123098373413, 0.4049013555049896]+
model.predict(observation) # prediction
+
array([[0.3713342]], dtype=float32)+
exp_nn = dx.Explainer(model, X_train, y_train, label='neural_network')
+
Preparation of a new explainer is initiated + + -> data : 107447 rows 6 cols + -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. + -> target variable : 107447 values + -> model_class : tensorflow.python.keras.engine.sequential.Sequential (default) + -> label : neural_network + -> predict function : <function yhat_tf_regression at 0x0000016122F04DC0> will be used (default) + -> predict function : Accepts pandas.DataFrame and numpy.ndarray. + -> predicted values : min = 0.121, mean = 0.341, max = 0.581 + -> model type : regression will be used (default) + -> residual function : difference between y and yhat (default) + -> residuals : min = -0.581, mean = 0.0293, max = 0.879 + -> model_info : package tensorflow + +A new explainer has been created! ++
nn_profile = exp_nn.predict_profile(new_observation = observation)
+
+# plot Ceteris Paribus profile
+nn_profile.plot(variables = ['lead_time', 'children', 'babies', 'booking_changes'])
+# in this model chidren and babies variables did not matter at all
+# bigger lead time(>81 days) increased probability of cancelation, so did lead time > 235 days
+# alternatively bigger number of booking changes decreased chances of clients not coming
+
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 6/6 [00:00<00:00, 8.27it/s] ++
forest = RandomForestClassifier(max_depth=2, random_state=0)
+forest.fit(X_train, y_train)
+
RandomForestClassifier(max_depth=2, random_state=0)+
forest.predict(observation) # prediction
+
array([0], dtype=int64)+
exp_forest = dx.Explainer(forest, X_train, y_train, label='random_forest')
+
Preparation of a new explainer is initiated + + -> data : 107447 rows 6 cols + -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. + -> target variable : 107447 values + -> model_class : sklearn.ensemble._forest.RandomForestClassifier (default) + -> label : random_forest + -> predict function : <function yhat_proba_default at 0x0000016122F04C10> will be used (default) + -> predict function : Accepts pandas.DataFrame and numpy.ndarray. + -> predicted values : min = 0.139, mean = 0.37, max = 0.45 + -> model type : classification will be used (default) + -> residual function : difference between y and yhat (default) + -> residuals : min = -0.45, mean = -3.51e-05, max = 0.861 + -> model_info : package sklearn + +A new explainer has been created! ++
forest_profile = exp_forest.predict_profile(new_observation = observation)
+
+forest_profile.plot(variables = ['lead_time', 'children', 'babies', 'booking_changes'])
+# this model was not so dependent on lead time, as well as on number of children and babies
+# none zero amount of booking changes caused the decrease of the cancelation probability
+# but it wasn't so significant as by previous model
+
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 6/6 [00:00<00:00, 23.74it/s] ++
adaboost = AdaBoostClassifier(n_estimators=100, random_state=0)
+adaboost.fit(X_train, y_train)
+
AdaBoostClassifier(n_estimators=100, random_state=0)+
adaboost.predict(observation) # prediction
+
array([0], dtype=int64)+
exp_adaboost = dx.Explainer(forest, X_train, y_train, label='adaboost')
+
Preparation of a new explainer is initiated + + -> data : 107447 rows 6 cols + -> target variable : Parameter 'y' was a pandas.DataFrame. Converted to a numpy.ndarray. + -> target variable : 107447 values + -> model_class : sklearn.ensemble._forest.RandomForestClassifier (default) + -> label : adaboost + -> predict function : <function yhat_proba_default at 0x0000016122F04C10> will be used (default) + -> predict function : Accepts pandas.DataFrame and numpy.ndarray. + -> predicted values : min = 0.139, mean = 0.37, max = 0.45 + -> model type : classification will be used (default) + -> residual function : difference between y and yhat (default) + -> residuals : min = -0.45, mean = -3.51e-05, max = 0.861 + -> model_info : package sklearn + +A new explainer has been created! ++
adaboost_profile = exp_adaboost.predict_profile(new_observation = observation)
+
+adaboost_profile.plot(variables = ['lead_time', 'children', 'babies', 'booking_changes'])
+# ada boost doesn't really differ from random forest
+# only numbers of babies has more visible drop than earlier
+
Calculating ceteris paribus: 100%|███████████████████████████████████████████████████████| 6/6 [00:00<00:00, 26.78it/s] ++
In general models demonstrated similar behavior(bigger lead time means increase and bigger number of booking changes means decrease of the cancelation probability) and had similar significant variables. Neural network however demonstrated a bit more complex decision changes(it had several visible 'jumping' points not only one). Of course the results would be more reliable if models were better trained(but hyperparameters tuning is out of scope for this homework).
+ +