|
| 1 | +import io |
| 2 | +import pandas as pd |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import seaborn as sns |
| 6 | +%matplotlib inline |
| 7 | +from sklearn.model_selection import train_test_split |
| 8 | +from sklearn.preprocessing import StandardScaler |
| 9 | +import tensorflow as tf |
| 10 | +from tensorflow import keras |
| 11 | +from tensorflow.keras import layers |
| 12 | +from sklearn.metrics import mean_absolute_error, r2_score |
| 13 | +import warnings |
| 14 | +warnings.filterwarnings('ignore') |
| 15 | + |
| 16 | +# Importing DataSet and take a look at Data |
| 17 | +data = pd.read_csv('housing_data - housing_data.csv') |
| 18 | +data |
| 19 | + |
| 20 | +# Handle null values by filling them with the mean of the respective columns |
| 21 | +data.fillna(data.mean(), inplace=True) |
| 22 | + |
| 23 | +data.isnull().sum() |
| 24 | + |
| 25 | +data.describe() |
| 26 | + |
| 27 | +data.info() |
| 28 | +data.shape |
| 29 | + |
| 30 | +import seaborn as sns |
| 31 | +sns.distplot(data.MEDV) |
| 32 | + |
| 33 | +sns.boxplot(data.MEDV) |
| 34 | + |
| 35 | +correlation = data.corr() |
| 36 | +correlation.loc['MEDV'] |
| 37 | + |
| 38 | +# plotting the heatmap |
| 39 | +import matplotlib.pyplot as plt |
| 40 | +fig,axes = plt.subplots(figsize=(15,12)) |
| 41 | +sns.heatmap(correlation,square = True,annot = True) |
| 42 | + |
| 43 | +# Checking the scatter plot with the most correlated features |
| 44 | +plt.figure(figsize = (20,5)) |
| 45 | +features = ['LSTAT','RM','PTRATIO'] |
| 46 | +for i, col in enumerate(features): |
| 47 | + plt.subplot(1, len(features) , i+1) |
| 48 | + x = data[col] |
| 49 | + y = data.MEDV |
| 50 | + plt.scatter(x, y, marker='o') |
| 51 | + plt.title("Variation in House prices") |
| 52 | + plt.xlabel(col) |
| 53 | + plt.ylabel('"House prices in $1000"') |
| 54 | + |
| 55 | +# Splitting the dependent feature and independent feature |
| 56 | +#X = data[['LSTAT','RM','PTRATIO']] |
| 57 | +X = data.iloc[:,:-1] |
| 58 | +y= data.MEDV |
| 59 | + |
| 60 | +import numpy as np |
| 61 | +from sklearn.model_selection import train_test_split |
| 62 | + |
| 63 | +# Assuming you have data stored in some variables X and y |
| 64 | +# Splitting data into training and testing sets |
| 65 | +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
| 66 | +# Now you can proceed with the code you provided |
| 67 | +# Importing necessary libraries |
| 68 | +from sklearn.linear_model import LinearRegression |
| 69 | +from sklearn.preprocessing import StandardScaler |
| 70 | +# Scaling the features |
| 71 | +scaler = StandardScaler() |
| 72 | +X_train_scaled = scaler.fit_transform(X_train) |
| 73 | +X_test_scaled = scaler.transform(X_test) |
| 74 | +mean = X_train.mean(axis=0) |
| 75 | +std = X_train.std(axis=0) |
| 76 | +X_train = (X_train - mean) / std |
| 77 | +X_test = (X_test - mean) / std |
| 78 | +#Linear Regression |
| 79 | + |
| 80 | +from sklearn.linear_model import LinearRegression |
| 81 | +regressor = LinearRegression() |
| 82 | +#Fitting the model |
| 83 | +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
| 84 | +regressor.fit(X_train,y_train) |
| 85 | + |
| 86 | + |
| 87 | + |
| 88 | +#Prediction on the test dataset |
| 89 | +y_pred = regressor.predict(X_test) |
| 90 | +# Predicting RMSE the Test set results |
| 91 | +from sklearn.metrics import mean_squared_error |
| 92 | +rmse = (np.sqrt(mean_squared_error(y_test, y_pred))) |
| 93 | +print(rmse) |
| 94 | + |
| 95 | +from sklearn.metrics import r2_score |
| 96 | +r2 = r2_score(y_test, y_pred) |
| 97 | +print(r2) |
| 98 | + |
| 99 | +from sklearn.metrics import r2_score |
| 100 | +r2 = r2_score(y_test, y_pred) |
| 101 | +print(r2) |
| 102 | + |
| 103 | +#Evaluation of the model |
| 104 | +y_pred = model.predict(X_test) |
| 105 | +mse_nn, mae_nn = model.evaluate(X_test, y_test) |
| 106 | +print('Mean squared error on test data: ', mse_nn) |
| 107 | +print('Mean absolute error on test data: ', mae_nn) |
| 108 | + |
| 109 | +#Comparison with traditional approaches |
| 110 | +#First let's try with a simple algorithm, the Linear Regression: |
| 111 | +from sklearn.metrics import mean_absolute_error |
| 112 | +lr_model = LinearRegression() |
| 113 | +lr_model.fit(X_train, y_train) |
| 114 | +y_pred_lr = lr_model.predict(X_test) |
| 115 | +mse_lr = mean_squared_error(y_test, y_pred_lr) |
| 116 | +mae_lr = mean_absolute_error(y_test, y_pred_lr) |
| 117 | +print('Mean squared error on test data: ', mse_lr) |
| 118 | +print('Mean absolute error on test data: ', mae_lr) |
| 119 | +from sklearn.metrics import r2_score |
| 120 | +r2 = r2_score(y_test, y_pred) |
| 121 | +print(r2) |
| 122 | + |
| 123 | +# Predicting RMSE the Test set results |
| 124 | +from sklearn.metrics import mean_squared_error |
| 125 | +rmse = (np.sqrt(mean_squared_error(y_test, y_pred))) |
| 126 | +print(rmse) |
| 127 | + |
| 128 | +# Make predictions on new data |
| 129 | +import sklearn |
| 130 | +new_data = scaler.transform([[0.1, 10.0, 5.0, 0, 0.4, 6.0, 50, 6.0, 1, 400, 20, 300, 10]]) # Scaling new data |
| 131 | +prediction = model.predict(new_data) |
| 132 | +print("Predicted house price:", prediction) |
0 commit comments