diff --git a/.gitignore b/.gitignore index 12d6303..1ff859b 100644 --- a/.gitignore +++ b/.gitignore @@ -143,3 +143,4 @@ cython_debug/ *~ *.swp *.swo + diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e8a555a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "python.linting.flake8Enabled": true, + "python.linting.enabled": true, + "python.linting.flake8Args": ["--ignore=E501, F401"] +} \ No newline at end of file diff --git a/probdists/Batesdistribution.py b/probdists/Batesdistribution.py new file mode 100644 index 0000000..9e1b0c7 --- /dev/null +++ b/probdists/Batesdistribution.py @@ -0,0 +1,148 @@ +import math +import numpy as np +from matplotlib import pyplot as plt +from .Generaldistribution import Distribution + + +class Bates(Distribution): + + def __init__(self, n=20, a=0, b=1): + """ Bates distribution class for calculating and + visualizing a Bates distribution. + + Attributes: + + mean (float): the mean value of the distribution + stdev (float): the standard deviation of the distribution + + data (list of floats): extracted from the data file + + n (int): The number of samples + a (int): The lower limit of distribution [Default: 0] + b (int): The upper limit of distribution [Default: 1] + """ + self.n = n + self.a = a + self.b = b + Distribution.__init__(self, + self.calculate_mean(), + self.calculate_stdev()) + + def calculate_mean(self, round_to=2): + """ Method to calculate the mean from n + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: mean of the distribution + """ + self.mean = 0.5 * (self.a + self.b) + + return round(self.mean, round_to) + + def calculate_stdev(self, round_to=2): + """ Method to calculate the standard deviation from n + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: standard deviation of the distribution + """ + var = (self.b - self.a) / (12 * self.n) + + self.stdev = math.sqrt(var) + + return round(self.stdev, round_to) + + def _fx(self, x): + """ Internal function to calculate probability density function at a point. + Should not be used by end user. + + Args: + x (int): point for calculating the mean value. + """ + if x < 0 or x > 1: + value = 0 + else: + g = 0 + for i in range(0, int(self.n * x + 1)): + g += pow(-1, i) * _comb(self.n, i) * pow(x - i / self.n, self.n - 1) + value = (self.n**self.n / math.factorial(self.n - 1)) * g + return value + + def calculate_pdf(self, x, round_to=2): + """ Probability density function calculator for the Bates distribution. + + Args: + x (float): point for caluclating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: probability density function + """ + self.pdf = self._fx((x - self.a) / (self.b - self.a) + ) / (self.b - self.a) + return round(self.pdf, round_to) + + def calculate_cdf(self, x, round_to=2): + """ Cumulative distribution function calculator for the Bates distribution. + Args: + x (float): point for calculating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: cumulative distribution function output + """ + value = 0 + for i in range(0, int(x) + 1): + value += self.calculate_pdf(i) + self.cdf = value + return round(value, round_to) + + def plot_pdf(self, samples=10**6): + """ Method to plot the pdf of the Bates distribution. + + Args: + points (int): number of discrete data points + + Returns: + F (np.array): list of PDFs for samples + """ + x = np.linspace(self.a, self.b, num=samples) + y = (x - self.a) / (self.b - self.a) + + F = np.zeros_like(y) + + for i in range(0, len(y) + 1 // 2): + F[i] = self.calculate_pdf(y[i]) + F[-i - 1] = F[i] # symmetric graph + + plt.plot(x, F, label=f'n={self.n}') + plt.legend() + plt.title(f"Probability Distribution Function for Bates n={self.n}") + plt.show() + return F + + def __repr__(self): + """ Method to output the characteristics of the Bates instace. + Args: + None + Returns: + string: characteristics of the Bates + """ + return "mean {0}, standard deviation {1}, n {2}".format(self.mean, + self.stdev, self.n) + + +def _comb(n, k): + """Protected function to calculate nCk + math.comb(n,k) was added in Python v3.8 + Hence, for backward compatibility with earlier versions + """ + return math.factorial(n) / (math.factorial(n - k) * math.factorial(k)) diff --git a/probdists/Exponentialdistribution.py b/probdists/Exponentialdistribution.py index 8eda38f..a9cf3f8 100644 --- a/probdists/Exponentialdistribution.py +++ b/probdists/Exponentialdistribution.py @@ -124,7 +124,7 @@ def plot_bar_pdf(self, points=100): # def __repr__(self): - """ Method to outputthe characteristics of the Exponential instace. + """ Method to output the characteristics of the Exponential instace. Args: None Returns: diff --git a/probdists/Generaldistribution.py b/probdists/Generaldistribution.py index 4beac8b..d2311c0 100644 --- a/probdists/Generaldistribution.py +++ b/probdists/Generaldistribution.py @@ -58,7 +58,9 @@ def read_data_file(self, file_name, separator='\\n', header=None): 'demo_gamma_data': 'numbers_gamma.txt', 'demo_uniform_data': 'numbers_uniform.txt', 'demo_bernoulli_data': 'numbers_bernoulli.txt', - 'demo_triangular_data': 'numbers_triangular.txt' + 'demo_triangular_data': 'numbers_triangular.txt', + 'demo_poisson_data': 'numbers_poisson.txt', + 'demo_bates_data': 'numbers_bates.txt' } if file_name in file_name_map: dirname = Path(__file__).parent.parent.absolute() @@ -78,7 +80,7 @@ def read_data_file(self, file_name, separator='\\n', header=None): for i in df.iterrows(): try: data_list.append(float(df.iat[i[0], 0])) - except: # pylint: disable=W0702 + except Exception: # pylint: disable=W0702 traceback.print_exc() print('Could not convert', df.iat[i[0], 0], ' to int.') else: @@ -102,7 +104,7 @@ def read_data_file(self, file_name, separator='\\n', header=None): for number in line: try: data_list.append(float(number)) - except: # pylint: disable=W0702 + except Exception: # pylint: disable=W0702 traceback.print_exc() print('Could not convert', number, ' to int.') line = file.readline() diff --git a/probdists/Poissondistribution.py b/probdists/Poissondistribution.py new file mode 100644 index 0000000..78b670d --- /dev/null +++ b/probdists/Poissondistribution.py @@ -0,0 +1,138 @@ +import math +import matplotlib.pyplot as plt +from .Generaldistribution import Distribution + + +class Poisson(Distribution): + """ Poisson distribution class for calculating and + visualizing a Poisson distribution. + + Attributes: + + mean (float): the mean value of the distribution + stdev (float): the standard deviation of the distribution + + data (list of floats): extracted from the data file + + lmbda (float): rate of the poisson distribution + (missing an 'a' to prevent name clash with Python keyword) + + """ + def __init__(self, lmbda): + + self.lmbda = lmbda + + Distribution.__init__(self, + self.calculate_mean(), + self.calculate_stdev()) + + def calculate_mean(self, round_to=2): + """ Method to calculate the mean from lambda + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: mean of the distribution + """ + self.mean = math.sqrt(self.lmbda) + + return round(self.mean, round_to) + + def calculate_stdev(self, round_to=2): + """ Method to calculate the standard deviation from lmbda + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: standard deviation of the distribution + """ + self.stdev = math.sqrt(self.lmbda) + + return round(self.stdev, round_to) + + def calculate_pdf(self, x, round_to=2): + """ Probability density function calculator for the Poisson distribution. + + Args: + x (float): point for calculating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: probability density function + """ + + self.pdf = self._calc_discrete_pdf(x) + return round(self.pdf, round_to) + + def calculate_cdf(self, x, round_to=2): + """ Cumulative distribution function calculator for the Poisson distribution. + Args: + x (float): point for calculating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: cumulative distribution function output + """ + value = 0 + for i in range(0, x + 1): + value += self._calc_discrete_pdf(i) + self.cdf = value + return round(value, round_to) + + def _calc_discrete_pdf(self, x): + """ Internal function to calculate probability density function at a point. + Should not be used by end user. + + Args: + x (int): point for calculating the mean value. + """ + fact = math.factorial(x) + pdf = (math.exp(-self.lmbda) * self.lmbda ** x) / fact + return pdf + + def plot_pdf(self, points=100): + """ Method to plot the pdf of the Poisson distribution. + + Args: + points (int): number of discrete data points + + Returns: + list: x values for the pdf plot + list: y values for the pdf plot + + """ + + x = [] + y = [] + + # calculate the x values to visualize + for i in range(points + 1): + x.append(i) + y.append(self._calc_discrete_pdf(i)) + + # make the plots + plt.bar(x, y) + plt.title("Probability Mass Plt for Poisson Distribution") + plt.ylabel("Probability") + plt.xlabel("x") + + plt.show() + + return x, y + + def __repr__(self): + """ Method to output the characteristics of the Poisson instace. + Args: + None + Returns: + string: characteristics of the Poisson + """ + + return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean, + self.stdev, self.lmbda) diff --git a/probdists/StudentTdistribution.py b/probdists/StudentTdistribution.py new file mode 100644 index 0000000..3f5b2e7 --- /dev/null +++ b/probdists/StudentTdistribution.py @@ -0,0 +1,161 @@ +import numpy as np +import mpmath as mp +import math +from matplotlib import pyplot as plt +from .Generaldistribution import Distribution + + +class StudentT(Distribution): + + def __init__(self, v=1): + """ Student's t-distribution class for calculating and + visualizing a t-distribution. + + Attributes: + + mean (float): the mean value of the distribution + stdev (float): the standard deviation of the distribution + + data (list of floats): extracted from the data file + + v (float): degree of freedom [Default: 1] + """ + + self.v = v + + Distribution.__init__(self, self.calculate_mean(), + self.calculate_stdev()) + + def calculate_mean(self, round_to=2): + """ Method to calculate the mean + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: mean of the distribution + None is used to represent "undefined" + """ + if self.v > 1: + self.mean = 0 + else: + self.mean = None + + return self.mean + + def calculate_stdev(self, round_to=2): + """ Method to calculate the standard deviation + + Args: + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: standard deviation of the distribution + None is used to represent "undefined" + """ + if self.v > 2: + self.stdev = self.v / (self.v - 2) + elif self.v > 1 and self.v <= 2: + self.stdev = float("inf") + else: + self.stdev = None # undefined + + return self.stdev + + def calculate_pdf(self, x, round_to=2): + """ Probability density function calculator for the t-distribution. + + Args: + x (float): point for calculating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: probability density function + """ + denom = math.sqrt(self.v * math.pi) * math.gamma(self.v / 2) + num = math.gamma(self.v + 1 / 2) * pow((1 + (x ** 2 / self.v)), -1 * (self.v + 1 / 2)) + pdf = num / denom + self.pdf = round(pdf, round_to) + return self.pdf + + def calculate_cdf(self, x, round_to=2): + """Cumulative distribution function calculator for the Bates distribution. + NOTE: This function is valid only for x**2 < v + For other values of x, it returns None + This shall be fixed in the future + + Args: + x (float): point for calculating the probability density function + round_to (int): Round the mean value. + [Default value: 2 floating point] + + Returns: + float: cumulative distribution function output + """ + out = None + if x**2 < self.v: + numerator = x * math.gamma(self.v + 1 / 2) + denominator = math.sqrt(math.pi * self.v) * math.gamma(self.v / 2) + two_f_one = mp.hyp2f1(0.5, 0.5 * (self.v + 1), 1.5, -x**2 / self.v) + cdf = float(0.5 + (numerator * two_f_one) / denominator) + self.cdf = round(cdf, round_to) + out = self.cdf + return out + + def plot_pdf(self, a=-4, b=4, samples=10**3): + """Method to plot the pdf of the t-distribution. + + Args: + a (float): left hand limit for the graph [Default: -4] + b (float): right hand limit for the graph [Default: 4] + samples (int): `samples` number of points evenly distributed + between a and b [Default: 10**3] + + Returns: + y (np.array): list of PDFs for samples + """ + + x = np.linspace(a, b, nunm=samples) + y = np.zeroes_like(x) + + for i in range(0, len(x) + 1 // 2): + y[i] = self.calculate_pdf(x[i]) + y[-i - 1] = y[i] # symmetric; compute friendly + + plt.plot(x, y, label=f"v={self.v}") + plt.legend() + plt.title(f"Probability Distribution Function for Student's t-distribution v={self.v}") + plt.show() + return y + + def plot_cdf(self, a=-4, b=4, samples=10**3): + """Method to plot the cdf of the t-distribution + + Args: + a (float): left hand limit for the graph [Default: -4] + b (float): right hand limit for the graph [Default: 4] + samples (int): `samples` number of points evenly distributed + between a and b [Default: 10**3] + + Returns: + y (np.array): list of CDFs for samples + """ + x = np.linspace(a, b, nunm=samples) + y = np.zeroes_like(x) + + for i, item in enumerate(x): + y[i] = self.calculate_cdf(item) + + plt.plot(x, y, label=f"v={self.v}") + plt.legend() + plt.title(f"Cumulative Distribution Function for Student's t-distribution v={self.v}") + plt.show() + return y + + def __repr__(self): + + return 'mean {0}, standard deviation {1}, \ + degree of freedom {2}'.format(self.mean, self.stdev, self.v) diff --git a/probdists/__init__.py b/probdists/__init__.py index 664a26b..23b5802 100644 --- a/probdists/__init__.py +++ b/probdists/__init__.py @@ -6,3 +6,6 @@ from .Bernoullidistribution import Bernoulli from .Uniformdistribution import Uniform from .Triangulardistribution import Triangular, TriangularValueException +from .Poissondistribution import Poisson +from .Batesdistribution import Bates +from .StudentTdistribution import StudentT diff --git a/probdists/numbers_bates.txt b/probdists/numbers_bates.txt new file mode 100644 index 0000000..4a32a4c --- /dev/null +++ b/probdists/numbers_bates.txt @@ -0,0 +1 @@ +0.0 0.03448276 0.06896552 0.10344828 0.13793103 0.17241379 0.20689655 0.24137931 0.27586207 0.31034483 0.34482759 0.37931034 0.4137931 0.44827586 0.48275862 0.51724138 0.55172414 0.5862069 0.62068966 0.65517241 0.68965517 0.72413793 0.75862069 0.79310345 0.82758621 0.86206897 0.89655172 0.93103448 0.96551724 1.0 \ No newline at end of file diff --git a/probdists/numbers_poisson.txt b/probdists/numbers_poisson.txt new file mode 100644 index 0000000..6a0bcb8 --- /dev/null +++ b/probdists/numbers_poisson.txt @@ -0,0 +1,100 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index bbec779..65eb89c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ python-dateutil==2.8.1 pytz==2020.1 six==1.15.0 xlrd==1.2.0 +mpmath==1.2.1 diff --git a/test.py b/test.py index 240b84d..7bd31a4 100644 --- a/test.py +++ b/test.py @@ -8,6 +8,9 @@ from probdists import Bernoulli from probdists import Uniform from probdists import Triangular, TriangularValueException +from probdists import Poisson +from probdists import Bates +from probdists import StudentT class TestGeneraldistribution(unittest.TestCase): @@ -49,8 +52,7 @@ def test_readdata(self): def test_meancalculation(self): self.gaussian.calculate_mean() self.assertEqual(self.gaussian.mean, - sum(self.gaussian.data) / - float(len(self.gaussian.data)), + sum(self.gaussian.data) / float(len(self.gaussian.data)), 'calculated mean not as expected') def test_stdevcalculation(self): @@ -181,14 +183,140 @@ def test_cdf(self): self.exponential.calculate_mean() self.exponential.calculate_stdev() - self.assertEqual(self.exponential.calculate_cdf(-1.3), 0, \ - 'calculate_cdf does not return expected result after calculating mean and stdev') - self.assertEqual(self.exponential.calculate_cdf(9.5, 4), 0.907, \ - 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.exponential.calculate_cdf(-1.3), + 0, + 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.exponential.calculate_cdf(9.5, 4), + 0.907, + 'calculate_cdf does not return expected result after calculating mean and stdev') + + +class TestPoissonClass(unittest.TestCase): + def setUp(self): + self.poisson = Poisson(50) + self.poisson.read_data_file('probdists/numbers_poisson.txt') + + def test_initialization(self): + self.assertEqual(round(self.poisson.mean, 2), 7.07, 'incorrect mean') + self.assertEqual(round(self.poisson.stdev, 2), 7.07, + 'incoorect standard deviation') + + def test_readdata(self): + self.assertEqual(self.poisson.data, + [i for i in range(1, 101)], + 'data read incorrectly') + + def test_meancalculation(self): + self.poisson.calculate_mean() + self.assertEqual(round(self.poisson.mean, 2), + 7.07, + 'calculated mean not as expected') + + def test_stdevcalculation(self): + self.poisson.calculate_stdev() + self.assertEqual(round(self.poisson.stdev, 2), + 7.07, + 'calculated standard deviation incorrect') + + def test_pdf(self): + self.assertEqual(self.poisson.calculate_pdf(50, 5), 0.05633, + 'calculate_pdf function does not give expected result') + self.poisson.calculate_mean() + self.poisson.calculate_stdev() + self.assertEqual(self.poisson.calculate_pdf(75, 5), 0.00021, + 'calculate_pdf function after calculating mean and \ + stdev does not give expected result') + + def test_cdf(self): + self.assertEqual(self.poisson.calculate_cdf(25), 0.0, 'calculate_cdf does not return expected result') + self.assertEqual(self.poisson.calculate_cdf(50, 5), 0.53752, 'calculate_cdf does not return expected result') + + self.poisson.calculate_mean() + self.poisson.calculate_stdev() + + self.assertEqual(self.poisson.calculate_cdf(60), + 0.93, + 'calculate_cdf does not return expected result after calculating mean and stdev') + self.assertEqual(self.poisson.calculate_cdf(75, 4), + 0.9996, + 'calculate_cdf does not return expected result after calculating mean and stdev') + + +class TestBatesClass(unittest.TestCase): + def setUp(self): + self.bates = Bates(n=30) + self.bates.read_data_file('probdists/numbers_bates.txt') + + def test_initialization(self): + self.assertEqual(self.bates.a, 0, 'incorrect initialization of interval start') + self.assertEqual(self.bates.b, 1, 'incorrect initialization of interval end') + + def test_readdata(self): + self.assertEqual(self.bates.data, + [0., 0.03448276, 0.06896552, 0.10344828, 0.13793103, + 0.17241379, 0.20689655, 0.24137931, 0.27586207, 0.31034483, + 0.34482759, 0.37931034, 0.4137931, 0.44827586, 0.48275862, + 0.51724138, 0.55172414, 0.5862069, 0.62068966, 0.65517241, + 0.68965517, 0.72413793, 0.75862069, 0.79310345, 0.82758621, + 0.86206897, 0.89655172, 0.93103448, 0.96551724, 1.], + 'data read incorrectly') + + def test_meancalculation(self): + self.assertEqual(self.bates.mean, 0.5, 'incorrect mean') + + def test_stdev(self): + self.assertEqual(round(self.bates.stdev, 2), 0.05) + + def test_pdf(self): + self.bates.calculate_mean() + self.bates.calculate_stdev() + self.bates.calculate_pdf(0.06896552) + self.assertEqual(round(self.bates.pdf), 0, 'incorrect pdf') + self.bates.calculate_pdf(0.75862069) + self.assertEqual(round(self.bates.pdf), 0, 'incorrect pdf') + + def test_cdf(self): + self.bates.calculate_cdf(0.5) + self.assertEqual(self.bates.cdf, 0, 'incorrect cdf') + + +class TestStudentTClass(unittest.TestCase): + def setUp(self): + self.student = StudentT() + + def test_initialization(self): + self.assertEqual(self.student.v, 1, 'incorrect initialization of degree of freedom') + + def test_meancalculation(self): + self.assertEqual(self.student.mean, None, 'incorrect mean') + + def test_stdev(self): + self.assertEqual(self.student.stdev, None, 'incorrect stdev') + + def test_pdf(self): + self.student.calculate_mean() + self.student.calculate_stdev() + self.student.calculate_pdf(-3.5) + self.assertEqual(self.student.pdf, 0.01, 'incorrect pdf') + self.student.calculate_pdf(0.0) + self.assertEqual(self.student.pdf, 0.28, 'incorrect pdf') + self.student.calculate_pdf(3.5) + self.assertEqual(self.student.pdf, 0.01, 'incorrect pdf') + + def test_cdf(self): + self.student.calculate_cdf(0.0) + self.assertEqual(self.student.cdf, 0.5, 'incorrect cdf') + + self.student.calculate_cdf(0.1, round_to=5) + self.assertEqual(self.student.cdf, 0.52812, 'incorrect cdf') + + self.student.calculate_cdf(-0.1, 5) + self.assertEqual(self.student.cdf, 0.47188, 'incorrect cdf') + class TestUniformClass(unittest.TestCase): def setUp(self): - self.uniform = Uniform(0,10) + self.uniform = Uniform(0, 10) self.uniform.read_data_file('probdists/numbers_uniform.txt') def test_initialization(self): @@ -208,11 +336,10 @@ def test_replace_stats_with_data(self): self.assertEqual(l, 1) self.assertEqual(h, 5) - def test_meancalculation(self): self.uniform.calculate_mean() self.assertEqual(self.uniform.mean, - 5, + 5, 'calculated mean not as expected') def test_stdevcalculation(self): @@ -238,7 +365,6 @@ def test_cdf(self): self.assertEqual(self.uniform.calculate_cdf(4), 0.75, 'calculate_cdf function does not give expected result') - class TestGammaClass(unittest.TestCase): def setUp(self): self.gamma = Gamma() @@ -276,9 +402,9 @@ def test_pdf(self): 'calculate_pdf function does not give expected result') def test_cdf(self): - self.assertEqual(self.gamma.calculate_cdf(4, False, 5), 1 - round(3/math.exp(2), 5), + self.assertEqual(self.gamma.calculate_cdf(4, False, 5), 1 - round(3 / math.exp(2), 5), 'cdf function does not give expected result') - self.assertEqual(self.gamma.calculate_cdf(4), round(3/math.exp(2), 2), + self.assertEqual(self.gamma.calculate_cdf(4), round(3 / math.exp(2), 2), 'cdf function does not give expected result') def test_add(self):