Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Student's t-distribution #72

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d500ff7
Add new file: Poissondistribution.py; Implement mean, stdev, pdf, cdf…
saeyma Oct 11, 2021
ff16c14
Add pydocs: Poisson Distribution
saeyma Oct 11, 2021
fdcba26
Add tests for Poisson distribution class
saeyma Oct 11, 2021
ea9664d
Update data map for Poisson
saeyma Oct 11, 2021
25f2fad
Missed a comma
saeyma Oct 11, 2021
15f7561
Remove trailing newlines
saeyma Oct 11, 2021
d7c9918
Lint: Poissondistribution.py and test.py with flake8
saeyma Oct 13, 2021
868bca8
Commit flake8 vscode settings: ignore E501
saeyma Oct 13, 2021
fa4b641
Add Poisson to __init__.py
saeyma Oct 16, 2021
7586e5b
Implement: Batesdistribution.py
saeyma Oct 16, 2021
5687b31
Add: documentation using pydocs
saeyma Oct 16, 2021
bc29d95
Adjust rounding errors in test.py
saeyma Oct 17, 2021
3f038bd
Change file ext
saeyma Oct 17, 2021
a6867b4
Remove: bates
saeyma Oct 19, 2021
5cfea20
Fix: errors and test failures; recalculate assert values manually
saeyma Oct 20, 2021
1b206d7
Fix: test errors in Poisson class; recalculate values manually
saeyma Oct 20, 2021
46fd0c3
Delete: dummy file
saeyma Oct 21, 2021
b83b949
Delete: dummy file
saeyma Oct 21, 2021
a0a043d
Custom combination function for backward compatibility
saeyma Oct 22, 2021
f9867fe
Change: _comb from method to function
saeyma Oct 23, 2021
3bcde5b
Merge branch 'feat/bates' into main
saeyma Oct 26, 2021
0c55bdf
Implement StudentTdistribution.py
saeyma Oct 27, 2021
0bdbc82
Add pydocs for StudentTdistribution
saeyma Oct 27, 2021
34604c1
Add tests for Student's t-distribution
saeyma Oct 27, 2021
a7ed95b
Fix: code factor issues; use enumerate
saeyma Oct 27, 2021
dd5edd6
Remove: unnecessary import
saeyma Oct 27, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,4 @@ cython_debug/
*~
*.swp
*.swo

5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"python.linting.flake8Enabled": true,
"python.linting.enabled": true,
"python.linting.flake8Args": ["--ignore=E501, F401"]
}
148 changes: 148 additions & 0 deletions probdists/Batesdistribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import math
import numpy as np
from matplotlib import pyplot as plt
from .Generaldistribution import Distribution


class Bates(Distribution):

def __init__(self, n=20, a=0, b=1):
""" Bates distribution class for calculating and
visualizing a Bates distribution.

Attributes:

mean (float): the mean value of the distribution
stdev (float): the standard deviation of the distribution

data (list of floats): extracted from the data file

n (int): The number of samples
a (int): The lower limit of distribution [Default: 0]
b (int): The upper limit of distribution [Default: 1]
"""
self.n = n
self.a = a
self.b = b
Distribution.__init__(self,
self.calculate_mean(),
self.calculate_stdev())

def calculate_mean(self, round_to=2):
""" Method to calculate the mean from n

Args:
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: mean of the distribution
"""
self.mean = 0.5 * (self.a + self.b)

return round(self.mean, round_to)

def calculate_stdev(self, round_to=2):
""" Method to calculate the standard deviation from n

Args:
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: standard deviation of the distribution
"""
var = (self.b - self.a) / (12 * self.n)

self.stdev = math.sqrt(var)

return round(self.stdev, round_to)

def _fx(self, x):
""" Internal function to calculate probability density function at a point.
Should not be used by end user.

Args:
x (int): point for calculating the mean value.
"""
if x < 0 or x > 1:
value = 0
else:
g = 0
for i in range(0, int(self.n * x + 1)):
g += pow(-1, i) * _comb(self.n, i) * pow(x - i / self.n, self.n - 1)
value = (self.n**self.n / math.factorial(self.n - 1)) * g
return value

def calculate_pdf(self, x, round_to=2):
""" Probability density function calculator for the Bates distribution.

Args:
x (float): point for caluclating the probability density function
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: probability density function
"""
self.pdf = self._fx((x - self.a) / (self.b - self.a)
) / (self.b - self.a)
return round(self.pdf, round_to)

def calculate_cdf(self, x, round_to=2):
""" Cumulative distribution function calculator for the Bates distribution.
Args:
x (float): point for calculating the probability density function
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: cumulative distribution function output
"""
value = 0
for i in range(0, int(x) + 1):
value += self.calculate_pdf(i)
self.cdf = value
return round(value, round_to)

def plot_pdf(self, samples=10**6):
""" Method to plot the pdf of the Bates distribution.

Args:
points (int): number of discrete data points

Returns:
F (np.array): list of PDFs for samples
"""
x = np.linspace(self.a, self.b, num=samples)
y = (x - self.a) / (self.b - self.a)

F = np.zeros_like(y)

for i in range(0, len(y) + 1 // 2):
F[i] = self.calculate_pdf(y[i])
F[-i - 1] = F[i] # symmetric graph

plt.plot(x, F, label=f'n={self.n}')
plt.legend()
plt.title(f"Probability Distribution Function for Bates n={self.n}")
plt.show()
return F

def __repr__(self):
""" Method to output the characteristics of the Bates instace.
Args:
None
Returns:
string: characteristics of the Bates
"""
return "mean {0}, standard deviation {1}, n {2}".format(self.mean,
self.stdev, self.n)


def _comb(n, k):
"""Protected function to calculate nCk
math.comb(n,k) was added in Python v3.8
Hence, for backward compatibility with earlier versions
"""
return math.factorial(n) / (math.factorial(n - k) * math.factorial(k))
2 changes: 1 addition & 1 deletion probdists/Exponentialdistribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def plot_bar_pdf(self, points=100):
#

def __repr__(self):
""" Method to outputthe characteristics of the Exponential instace.
""" Method to output the characteristics of the Exponential instace.
Args:
None
Returns:
Expand Down
8 changes: 5 additions & 3 deletions probdists/Generaldistribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def read_data_file(self, file_name, separator='\\n', header=None):
'demo_gamma_data': 'numbers_gamma.txt',
'demo_uniform_data': 'numbers_uniform.txt',
'demo_bernoulli_data': 'numbers_bernoulli.txt',
'demo_triangular_data': 'numbers_triangular.txt'
'demo_triangular_data': 'numbers_triangular.txt',
'demo_poisson_data': 'numbers_poisson.txt',
'demo_bates_data': 'numbers_bates.txt'
}
if file_name in file_name_map:
dirname = Path(__file__).parent.parent.absolute()
Expand All @@ -78,7 +80,7 @@ def read_data_file(self, file_name, separator='\\n', header=None):
for i in df.iterrows():
try:
data_list.append(float(df.iat[i[0], 0]))
except: # pylint: disable=W0702
except Exception: # pylint: disable=W0702
traceback.print_exc()
print('Could not convert', df.iat[i[0], 0], ' to int.')
else:
Expand All @@ -102,7 +104,7 @@ def read_data_file(self, file_name, separator='\\n', header=None):
for number in line:
try:
data_list.append(float(number))
except: # pylint: disable=W0702
except Exception: # pylint: disable=W0702
traceback.print_exc()
print('Could not convert', number, ' to int.')
line = file.readline()
Expand Down
138 changes: 138 additions & 0 deletions probdists/Poissondistribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import math
import matplotlib.pyplot as plt
from .Generaldistribution import Distribution


class Poisson(Distribution):
""" Poisson distribution class for calculating and
visualizing a Poisson distribution.

Attributes:

mean (float): the mean value of the distribution
stdev (float): the standard deviation of the distribution

data (list of floats): extracted from the data file

lmbda (float): rate of the poisson distribution
(missing an 'a' to prevent name clash with Python keyword)

"""
def __init__(self, lmbda):

self.lmbda = lmbda

Distribution.__init__(self,
self.calculate_mean(),
self.calculate_stdev())

def calculate_mean(self, round_to=2):
""" Method to calculate the mean from lambda

Args:
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: mean of the distribution
"""
self.mean = math.sqrt(self.lmbda)

return round(self.mean, round_to)

def calculate_stdev(self, round_to=2):
""" Method to calculate the standard deviation from lmbda

Args:
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: standard deviation of the distribution
"""
self.stdev = math.sqrt(self.lmbda)

return round(self.stdev, round_to)

def calculate_pdf(self, x, round_to=2):
""" Probability density function calculator for the Poisson distribution.

Args:
x (float): point for calculating the probability density function
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: probability density function
"""

self.pdf = self._calc_discrete_pdf(x)
return round(self.pdf, round_to)

def calculate_cdf(self, x, round_to=2):
""" Cumulative distribution function calculator for the Poisson distribution.
Args:
x (float): point for calculating the probability density function
round_to (int): Round the mean value.
[Default value: 2 floating point]

Returns:
float: cumulative distribution function output
"""
value = 0
for i in range(0, x + 1):
value += self._calc_discrete_pdf(i)
self.cdf = value
return round(value, round_to)

def _calc_discrete_pdf(self, x):
""" Internal function to calculate probability density function at a point.
Should not be used by end user.

Args:
x (int): point for calculating the mean value.
"""
fact = math.factorial(x)
pdf = (math.exp(-self.lmbda) * self.lmbda ** x) / fact
return pdf

def plot_pdf(self, points=100):
""" Method to plot the pdf of the Poisson distribution.

Args:
points (int): number of discrete data points

Returns:
list: x values for the pdf plot
list: y values for the pdf plot

"""

x = []
y = []

# calculate the x values to visualize
for i in range(points + 1):
x.append(i)
y.append(self._calc_discrete_pdf(i))

# make the plots
plt.bar(x, y)
plt.title("Probability Mass Plt for Poisson Distribution")
plt.ylabel("Probability")
plt.xlabel("x")

plt.show()

return x, y

def __repr__(self):
""" Method to output the characteristics of the Poisson instace.
Args:
None
Returns:
string: characteristics of the Poisson
"""

return "mean {0}, standard deviation {1}, lambda {2}".format(self.mean,
self.stdev, self.lmbda)
Loading