Skip to content

Commit

Permalink
restructuring
Browse files Browse the repository at this point in the history
  • Loading branch information
NishantBaheti committed Feb 27, 2024
1 parent 0b00eda commit c562718
Show file tree
Hide file tree
Showing 8 changed files with 515 additions and 326 deletions.
1 change: 1 addition & 0 deletions src/mightypy/ml/_ensemble.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Ensemble methods for Machine Learning
"""

from __future__ import annotations
from typing import Union, Tuple, List, Optional
import numpy as np
Expand Down
147 changes: 80 additions & 67 deletions src/mightypy/ml/_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
__copyright__ = "Nishant Baheti"
__license__ = "MIT"


class LinearRegression:
"""Linear Regression Model Class
Expand All @@ -23,8 +24,7 @@ class LinearRegression:
"""

def __init__(self, alpha: float = 0.01, iterations: int = 10000):
"""Constructor
"""
"""Constructor"""
self.alpha = alpha
self.iterations = iterations
self._theta = None
Expand Down Expand Up @@ -127,13 +127,15 @@ def predict(self, X: np.ndarray) -> np.ndarray:
else:
raise Warning("Model is not trained yet. Theta is None.")

def train(self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
batch_size: int = 30) -> None:
def train(
self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
batch_size: int = 30,
) -> None:
"""train model /theta estimator
Args:
Expand Down Expand Up @@ -203,8 +205,8 @@ def train(self,
# creating batch for this iteration
# X_batch = np.take(self._X, indices, axis=0)
# y_batch = np.take(self._y, indices, axis=0)
X_batch = self._X[indices,:]
y_batch = self._y[indices,:]
X_batch = self._X[indices, :]
y_batch = self._y[indices, :]

# calculate y_pred
y_pred = self.predict(X_batch)
Expand Down Expand Up @@ -247,8 +249,7 @@ class RidgeRegression:
"""

def __init__(self, alpha: float = 0.01, iterations: int = 10000):
"""Constructor
"""
"""Constructor"""
self.alpha = alpha
self.iterations = iterations
self._theta = None
Expand Down Expand Up @@ -351,14 +352,16 @@ def predict(self, X: np.ndarray) -> np.ndarray:
else:
raise Warning("Model is not trained yet. Theta is None.")

def train(self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
penalty: Union[float, int] = 1.0,
batch_size: int = 30) -> None:
def train(
self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
penalty: Union[float, int] = 1.0,
batch_size: int = 30,
) -> None:
"""train model /theta estimator
Args:
Expand Down Expand Up @@ -406,7 +409,7 @@ def train(self,
# theta_0 will not be effected by penalty
new_theta_0 = self._theta[:, [0]] - (self.alpha * gradient[0]) # type: ignore
# rest of theta's will be effected by it
new_theta_rest = self._theta[:, range(1, self._n)] * (1 - (penalty/self._m)) - (self.alpha * gradient[1:]) # type: ignore
new_theta_rest = self._theta[:, range(1, self._n)] * (1 - (penalty / self._m)) - (self.alpha * gradient[1:]) # type: ignore

new_theta = np.hstack((new_theta_0, new_theta_rest))

Expand Down Expand Up @@ -434,8 +437,8 @@ def train(self,
# X_batch = np.take(self._X, indices, axis=0)
# y_batch = np.take(self._y, indices, axis=0)

X_batch = self._X[indices,:]
y_batch = self._y[indices,:]
X_batch = self._X[indices, :]
y_batch = self._y[indices, :]

# calculate y_pred
y_pred = self.predict(X_batch)
Expand All @@ -444,11 +447,10 @@ def train(self,

# simultaneous operation
gradient = np.mean((y_pred - y_batch) * X_batch, axis=0) # type: ignore
new_theta_0 = self._theta[:,[0]] - (self.alpha * gradient[0]) # type: ignore
new_theta_rest = self._theta[:,range(1,self._n)] * (1 - (penalty/self._m) ) - (self.alpha * gradient[1:]) # type: ignore

new_theta = np.hstack((new_theta_0,new_theta_rest))
new_theta_0 = self._theta[:, [0]] - (self.alpha * gradient[0]) # type: ignore
new_theta_rest = self._theta[:, range(1, self._n)] * (1 - (penalty / self._m)) - (self.alpha * gradient[1:]) # type: ignore

new_theta = np.hstack((new_theta_0, new_theta_rest))

if np.isnan(np.sum(new_theta)) or np.isinf(np.sum(new_theta)):
print("breaking. found inf or nan.")
Expand All @@ -464,8 +466,11 @@ def train(self,
self._theta_history.append(self._theta[0])

elif method == "NORMAL":
self._theta = np.linalg.inv(
self._X.T @ self._X + (penalty * np.identity(self._n))) @ self._X.T @ self._y
self._theta = (
np.linalg.inv(self._X.T @ self._X + (penalty * np.identity(self._n)))
@ self._X.T
@ self._y
)

else:
print("No Method Defined.")
Expand All @@ -483,8 +488,7 @@ class LassoRegression:
"""

def __init__(self, alpha: float = 0.01, iterations: int = 10000):
"""Constructor
"""
"""Constructor"""
self.alpha = alpha
self.iterations = iterations
self._theta = None
Expand Down Expand Up @@ -587,14 +591,16 @@ def predict(self, X: np.ndarray) -> np.ndarray:
else:
raise Warning("Model is not trained yet. Theta is None.")

def train(self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
penalty: Union[int, float] = 1.0,
batch_size: int = 30) -> None:
def train(
self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
penalty: Union[int, float] = 1.0,
batch_size: int = 30,
) -> None:
"""train model /theta estimator
Args:
Expand Down Expand Up @@ -638,7 +644,7 @@ def train(self,

gradient = np.mean((y_pred - self._y) * self._X, axis=0) # type: ignore
new_theta_0 = self._theta[:, [0]] - (self.alpha * gradient[0]) # type: ignore
new_theta_rest = self._theta[:, range(1, self._n)] - (self.alpha * gradient[1:]) - (penalty/self._m) # type: ignore
new_theta_rest = self._theta[:, range(1, self._n)] - (self.alpha * gradient[1:]) - (penalty / self._m) # type: ignore

new_theta = np.hstack((new_theta_0, new_theta_rest))

Expand Down Expand Up @@ -679,7 +685,7 @@ def train(self,

gradient = np.mean((y_pred - y_batch) * X_batch, axis=0) # type: ignore
new_theta_0 = self._theta[:, [0]] - (self.alpha * gradient[0]) # type: ignore
new_theta_rest = self._theta[:, range(1, self._n)] - (self.alpha * gradient[1:]) - (penalty/self._m) # type: ignore
new_theta_rest = self._theta[:, range(1, self._n)] - (self.alpha * gradient[1:]) - (penalty / self._m) # type: ignore

new_theta = np.hstack((new_theta_0, new_theta_rest))

Expand Down Expand Up @@ -815,15 +821,17 @@ def predict(self, X: np.ndarray) -> np.ndarray:
else:
raise Warning("Model is not trained yet. Theta is None.")

def train(self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
batch_size: int = 30,
regularization: bool = False,
penalty: Union[float, int] = 1.0) -> None:
def train(
self,
X: np.ndarray,
y: np.ndarray,
verbose: bool = True,
method: str = "SGD",
theta_precision: float = 0.001,
batch_size: int = 30,
regularization: bool = False,
penalty: Union[float, int] = 1.0,
) -> None:
"""train theta / estimator
Args:
Expand All @@ -837,7 +845,7 @@ def train(self,
"SGD"(Stochastic Gradient Descent)
theta_precision (float, optional): theta initialization value precision. Defaults to 0.001.
batch_size (int, optional): batch size only for BGD. Defaults to 30.
batch_size (int, optional): batch size only for BGD. Defaults to 30.
regularization (bool, optional): Apply Regularization. Defaults to False.
penalty (Union[float, int], optional): regularization penalty only works for regularization=True. Defaults to 1.0.
"""
Expand Down Expand Up @@ -869,8 +877,9 @@ def train(self,
if regularization:
gradient = np.mean((y_pred - self._y) * self._X, axis=0)
new_theta_0 = self._theta[:, [0]] - (self.alpha * gradient[0])
new_theta_rest = self._theta[:, range(
1, self._n)] * (1 - (penalty/self._m)) - (self.alpha * gradient[1:])
new_theta_rest = self._theta[:, range(1, self._n)] * (
1 - (penalty / self._m)
) - (self.alpha * gradient[1:])
new_theta = np.hstack((new_theta_0, new_theta_rest))

else:
Expand Down Expand Up @@ -915,8 +924,9 @@ def train(self,
if regularization:
gradient = np.mean((y_pred - y_batch) * X_batch, axis=0)
new_theta_0 = self._theta[:, [0]] - (self.alpha * gradient[0])
new_theta_rest = self._theta[:, range(
1, self._n)] * (1 - (penalty/self._m)) - (self.alpha * gradient[1:])
new_theta_rest = self._theta[:, range(1, self._n)] * (
1 - (penalty / self._m)
) - (self.alpha * gradient[1:])
new_theta = np.hstack((new_theta_0, new_theta_rest))

else:
Expand All @@ -940,7 +950,9 @@ def train(self,
print("No Method Defined.")


def polynomial_regression(x: np.ndarray, y: np.ndarray, degree: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
def polynomial_regression(
x: np.ndarray, y: np.ndarray, degree: int
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
fit Regression line with polynomial degree.
Expand All @@ -963,15 +975,17 @@ def polynomial_regression(x: np.ndarray, y: np.ndarray, degree: int) -> Tuple[np
>>> plt.show()
"""
a = np.polynomial.Polynomial.fit(x, y, deg=degree).convert().coef

if len(a) == 1:
slope = a
resid = np.array([0])
else:
slope = a[1:]
resid = a[0]
fit_line = np.array([(x**(degree - i))*slope[i]
for i in range(0, degree)]).sum(axis=0) + resid
fit_line = (
np.array([(x ** (degree - i)) * slope[i] for i in range(0, degree)]).sum(axis=0)
+ resid
)
return slope, resid, fit_line


Expand All @@ -984,10 +998,10 @@ def trend(x: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndar
y (np.ndarray): dependent variable.
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray]: slope, residual, trendline.
Tuple[np.ndarray, np.ndarray, np.ndarray]: slope, residual, trendline.
Examples;
>>> import matplotlib.pyplot as plt
>>> import matplotlib.pyplot as plt
>>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
>>> y = np.array([1, 2, 3, 3, 4, 5, 7, 10])
>>> s, r, t = trend(x, y)
Expand All @@ -1006,18 +1020,17 @@ def trend(x: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndar
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = np.array([1, 2, 3, 3, 4, 5, 7, 10])
s, r, t = trend(x, y)
plt.plot(x, y, 'o', label='original', alpha=0.6)
plt.plot(x, t, '.-', label='regression line')
plt.plot(x, y, "o", label="original", alpha=0.6)
plt.plot(x, t, ".-", label="regression line")
plt.legend()
plt.show(block=True)


x = np.arange(1, 10)
y = x**2 + x**3
s, r, l = polynomial_regression(x, y, 1)

plt.plot(x, y, 'ko', label='original', alpha=0.6)
plt.plot(x, l, '.-', label='regression line')
plt.plot(x, y, "ko", label="original", alpha=0.6)
plt.plot(x, l, ".-", label="regression line")
plt.legend()
plt.show()

Expand Down
Loading

0 comments on commit c562718

Please sign in to comment.