-
Notifications
You must be signed in to change notification settings - Fork 377
/
Copy pathR_Functions.py
457 lines (399 loc) · 12.2 KB
/
R_Functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
"""
Compact Python wrapper library for commonly used R-style functions
============================================================================
Basic functional programming nature of R provides users with extremely simple and compact interface for quick calculations of probabilities and essential descriptive/inferential statistics for a data analysis problem. On the other hand, Python scripting ability allows the analyst to use those statistics in a wide variety of analytics pipeline with limitless sophistication and creativity. To combine the advantage of both worlds, one needs a simple Python-based wrapper library which contains some basic functions pertaining to probability distributions and descriptive statistics defined in R-style so that users can call those functions fast without having to go to the proper Python statistical libraries and figure out the whole list of methods and arguments.
Goal of this library is to provide simple Python sub-routines mimicing R-style statistical functions for quickly calculating density/point estimates, cumulative distributions, quantiles, and generating random variates for various important probability distributions. To maintain the spirit of R styling, no class hiararchy was used and just raw functions are defined in this file so that user can import this one Python script and use all the functions whenever he/she needs them with a single name call.
"""
#============================
Basic Descriptive Statistics
#============================
def mean(array):
"""
Calculates the mean of an array/vector
"""
import numpy as np
array=np.array(array)
result= np.mean(array)
return result
def sd(array):
"""
Calculates the standard deviation of an array/vector
"""
import numpy as np
array=np.array(array)
result= np.std(array)
return result
def median(array):
"""
Calculates the median of an array/vector
"""
import numpy as np
array=np.array(array)
result= np.median(array)
return result
def var(array):
"""
Calculates the variance of an array/vector
"""
import numpy as np
array=np.array(array)
result= np.var(array)
return result
def cov(x,y=None):
"""
Calculates the covariance between two arrays/vectors or of a single matrix
"""
import numpy as np
array1=np.array(x)
if y!=None:
array2=np.array(y)
if array1.shape!=array2.shape:
print("Error: incompatible dimensions")
return None
covmat=np.cov(array1,array2)
result=covmat[0][1]
elif len(array1.shape)==1:
result=float(np.cov(array1))
else:
result=np.cov(array1)
return result
def fivenum(array):
"""
Calculates the Tuckey Five-number (min/median/max/1st quartile/3rd quartile) of an array/vector
"""
import numpy as np
array=np.array(array)
result=[0]*5
result[0]=np.min(array)
result[1]=np.percentile(array,25)
result[2]=np.median(array)
result[3]=np.percentile(array,75)
result[4]=np.max(array)
result=np.array(result)
return result
def IQR(array):
"""
Calculates the inter-quartile range of an array/vector
"""
import numpy as np
array=np.array(array)
result = np.percentile(array,75)-np.percentile(array,25)
return result
"""
Probability distributions
"""
#=====================
# Uniform distribution
#=====================
def dunif(x, minimum=0,maximum=1):
"""
Calculates the point estimate of the uniform distribution
"""
from scipy.stats import uniform
result=uniform.pdf(x=x,loc=minimum,scale=maximum-minimum)
return result
def punif(q, minimum=0,maximum=1):
"""
Calculates the cumulative of the uniform distribution
"""
from scipy.stats import uniform
result=uniform.cdf(x=q,loc=minimum,scale=maximum-minimum)
return result
def qunif(p, minimum=0,maximum=1):
"""
Calculates the quantile function of the uniform distribution
"""
from scipy.stats import uniform
result=uniform.ppf(q=p,loc=minimum,scale=maximum-minimum)
return result
def runif(n, minimum=0,maximum=1):
"""
Generates random variables from the uniform distribution
"""
from scipy.stats import uniform
result=uniform.rvs(size=n,loc=minimum,scale=maximum-minimum)
return result
#======================
# Binomial distribution
#======================
def dbinom(x,size,prob=0.5):
"""
Calculates the point estimate of the binomial distribution
"""
from scipy.stats import binom
result=binom.pmf(k=x,n=size,p=prob,loc=0)
return result
def pbinom(q,size,prob=0.5):
"""
Calculates the cumulative of the binomial distribution
"""
from scipy.stats import binom
result=binom.cdf(k=q,n=size,p=prob,loc=0)
return result
def qbinom(p, size, prob=0.5):
"""
Calculates the quantile function from the binomial distribution
"""
from scipy.stats import binom
result=binom.ppf(q=p,n=size,p=prob,loc=0)
return result
def rbinom(n,size,prob=0.5):
"""
Generates random variables from the binomial distribution
"""
from scipy.stats import binom
result=binom.rvs(n=size,p=prob,size=n)
return result
#=====================
# Normal distribution
#=====================
def dnorm(x,mean=0,sd =1):
"""
Calculates the density of the Normal distribution
"""
from scipy.stats import norm
result=norm.pdf(x,loc=mean,scale=sd)
return result
def pnorm(q,mean=0,sd=1):
"""
Calculates the cumulative of the normal distribution
"""
from scipy.stats import norm
result=norm.cdf(x=q,loc=mean,scale=sd)
return result
def qnorm(p,mean=0,sd=1):
"""
Calculates the quantile function of the normal distribution
"""
from scipy.stats import norm
result=norm.ppf(q=p,loc=mean,scale=sd)
return result
def rnorm(n,mean=0,sd=1):
"""
Generates random variables from the normal distribution
"""
from scipy.stats import norm
result=norm.rvs(size=n,loc=mean,scale=sd)
return result
#=====================
# Poisson distribution
#=====================
def dpois(x,mu):
"""
Calculates the density/point estimate of the Poisson distribution
"""
from scipy.stats import poisson
result=poisson.pmf(k=x,mu=mu)
return result
def ppois(q,mu):
"""
Calculates the cumulative of the Poisson distribution
"""
from scipy.stats import poisson
result=poisson.cdf(k=q,mu=mu)
return result
def qpois(p,mu):
"""
Calculates the quantile function of the Poisson distribution
"""
from scipy.stats import poisson
result=poisson.ppf(q=p,mu=mu)
return result
def rpois(n,mu):
"""
Generates random variables from the Poisson distribution
"""
from scipy.stats import poisson
result=poisson.rvs(size=n,mu=mu)
return result
#=====================
# chi^2-distribution
#=====================
def dchisq(x,df,ncp=0):
"""
Calculates the density/point estimate of the chi-square distribution
"""
from scipy.stats import chi2,ncx2
if ncp==0:
result=chi2.pdf(x=x,df=df,loc=0,scale=1)
else:
result=ncx2.pdf(x=x,df=df,nc=ncp,loc=0,scale=1)
return result
def pchisq(q,df,ncp=0):
"""
Calculates the cumulative of the chi-square distribution
"""
from scipy.stats import chi2,ncx2
if ncp==0:
result=chi2.cdf(x=q,df=df,loc=0,scale=1)
else:
result=ncx2.cdf(x=q,df=df,nc=ncp,loc=0,scale=1)
return result
def qchisq(p,df,ncp=0):
"""
Calculates the quantile function of the chi-square distribution
"""
from scipy.stats import chi2,ncx2
if ncp==0:
result=chi2.ppf(q=p,df=df,loc=0,scale=1)
else:
result=ncx2.ppf(q=p,df=df,nc=ncp,loc=0,scale=1)
return result
def rchisq(n,df,ncp=0):
"""
Generates random variables from the chi-square distribution
"""
from scipy.stats import chi2,ncx2
if ncp==0:
result=chi2.rvs(size=n,df=df,loc=0,scale=1)
else:
result=ncx2.rvs(size=n,df=df,nc=ncp,loc=0,scale=1)
return result
#==============================
# ### Student's t-distribution
#==============================
def dt(x,df,ncp=0):
"""
Calculates the density/point estimate of the t-distribution
"""
from scipy.stats import t,nct
if ncp==0:
result=t.pdf(x=x,df=df,loc=0,scale=1)
else:
result=nct.pdf(x=x,df=df,nc=ncp,loc=0,scale=1)
return result
def pt(q,df,ncp=0):
"""
Calculates the cumulative of the t-distribution
"""
from scipy.stats import t,nct
if ncp==0:
result=t.cdf(x=q,df=df,loc=0,scale=1)
else:
result=nct.cdf(x=q,df=df,nc=ncp,loc=0,scale=1)
return result
def qt(p,df,ncp=0):
"""
Calculates the quantile function of the t-distribution
"""
from scipy.stats import t,nct
if ncp==0:
result=t.ppf(q=p,df=df,loc=0,scale=1)
else:
result=nct.ppf(q=p,df=df,nc=ncp,loc=0,scale=1)
return result
def rt(n,df,ncp=0):
"""
Generates random variables from the t-distribution
"""
from scipy.stats import t,nct
if ncp==0:
result=t.rvs(size=n,df=df,loc=0,scale=1)
else:
result=nct.rvs(size=n,df=df,nc=ncp,loc=0,scale=1)
return result
#================
# F-distribution
#================
def df(x,df1,df2,ncp=0):
"""
Calculates the density/point estimate of the F-distribution
"""
from scipy.stats import f,ncf
if ncp==0:
result=f.pdf(x=x,dfn=df1,dfd=df2,loc=0,scale=1)
else:
result=ncf.pdf(x=x,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1)
return result
def pf(q,df1,df2,ncp=0):
"""
Calculates the cumulative of the F-distribution
"""
from scipy.stats import f,ncf
if ncp==0:
result=f.cdf(x=q,dfn=df1,dfd=df2,loc=0,scale=1)
else:
result=ncf.cdf(x=q,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1)
return result
def qf(p,df1,df2,ncp=0):
"""
Calculates the quantile function of the F-distribution
"""
from scipy.stats import f,ncf
if ncp==0:
result=f.ppf(q=p,dfn=df1,dfd=df2,loc=0,scale=1)
else:
result=ncf.ppf(q=p,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1)
return result
def rf(n,df1,df2,ncp=0):
"""
Calculates the quantile function of the F-distribution
"""
from scipy.stats import f,ncf
if ncp==0:
result=f.rvs(size=n,dfn=df1,dfd=df2,loc=0,scale=1)
else:
result=ncf.rvs(size=n,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1)
return result
#===================
# Beta distribution
#===================
def dbeta(x,shape1,shape2):
"""
Calculates the density/point estimate of the Beta-distribution
"""
from scipy.stats import beta
result=beta.pdf(x=x,a=shape1,b=shape2,loc=0,scale=1)
return result
def pbeta(q,shape1,shape2):
"""
Calculates the cumulative of the Beta-distribution
"""
from scipy.stats import beta
result=beta.cdf(x=q,a=shape1,b=shape2,loc=0,scale=1)
return result
def qbeta(p,shape1,shape2):
"""
Calculates the cumulative of the Beta-distribution
"""
from scipy.stats import beta
result=beta.ppf(q=p,a=shape1,b=shape2,loc=0,scale=1)
return result
def rbeta(n,shape1,shape2):
"""
Calculates the cumulative of the Beta-distribution
"""
from scipy.stats import beta
result=beta.rvs(size=n,a=shape1,b=shape2,loc=0,scale=1)
return result
#========================
# ### Gamma distribution
#========================
def dgamma(x,shape,rate=1):
"""
Calculates the density/point estimate of the Gamma-distribution
"""
from scipy.stats import gamma
result=rate*gamma.pdf(x=rate*x,a=shape,loc=0,scale=1)
return result
def pgamma(q,shape,rate=1):
"""
Calculates the cumulative of the Gamma-distribution
"""
from scipy.stats import gamma
result=gamma.cdf(x=rate*q,a=shape,loc=0,scale=1)
return result
def qgamma(p,shape,rate=1):
"""
Calculates the cumulative of the Gamma-distribution
"""
from scipy.stats import gamma
result=(1/rate)*gamma.ppf(q=p,a=shape,loc=0,scale=1)
return result
def rgamma(n,shape,rate=1):
"""
Calculates the cumulative of the Gamma-distribution
"""
from scipy.stats import gamma
result=gamma.rvs(size=n,a=shape,loc=0,scale=1)
return result