Skip to content

Commit fe05482

Browse files
authored
Merge pull request #251 from abstractqqq/discrete_entropy
added discrete entropy as a shorthand
2 parents c2f4f52 + 243dec0 commit fe05482

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

python/polars_ds/features.py

+17
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"query_c3_stats",
2626
"query_cid_ce",
2727
"query_time_reversal_asymmetry_stats",
28+
"query_entropy",
2829
"query_approx_entropy",
2930
"query_sample_entropy",
3031
"query_knn_entropy",
@@ -375,6 +376,22 @@ def query_time_reversal_asymmetry_stats(x: str | pl.Expr, n_lags: int) -> pl.Exp
375376
#################################################
376377

377378

379+
def query_entropy(x: str | pl.Expr, base: float = math.e, normalize: bool = True) -> pl.Expr:
380+
"""
381+
Computes the entropy of any discrete column. This is shorthand for x.unique_counts().entropy()
382+
383+
Parameters
384+
----------
385+
x
386+
Either a string or a polars expression
387+
base
388+
Base for the log in the entropy computation
389+
normalize
390+
Normalize if the probabilities don't sum to 1.
391+
"""
392+
return str_to_expr(x).unique_counts().entropy(base=base, normalize=normalize)
393+
394+
378395
def query_cond_entropy(x: str | pl.Expr, y: str | pl.Expr) -> pl.Expr:
379396
"""
380397
Queries the conditional entropy of x on y, aka. H(x|y).

0 commit comments

Comments
 (0)