Skip to content
This repository has been archived by the owner on Aug 1, 2024. It is now read-only.

Commit

Permalink
add percentages to bar plots
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromedockes committed Jul 1, 2024
1 parent 1d64063 commit 77be528
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 14 deletions.
38 changes: 25 additions & 13 deletions src/skrubview/_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@
from . import _utils

# from matplotlib import colormaps, colors
# _PASTEL = list(map(colors.rgb2hex, colormaps.get_cmap("tab10").colors))
# _TAB10 = list(map(colors.rgb2hex, colormaps.get_cmap("tab10").colors))


# sns.color_palette('muted').as_hex()
_SEABORN = [
"#4c72b0",
"#dd8452",
"#55a868",
"#c44e52",
"#8172b3",
"#937860",
"#da8bc3",
"#8c8c8c",
"#ccb974",
"#64b5cd",
"#4878d0",
"#ee854a",
"#6acc64",
"#d65f5f",
"#956cb4",
"#8c613c",
"#dc7ec0",
"#797979",
"#d5bb67",
"#82c6e2",
]

COLORS = _SEABORN
COLOR_0 = COLORS[0]

Expand Down Expand Up @@ -80,7 +83,7 @@ def line(x_col, y_col):
return _serialize(fig)


def value_counts(value_counts, n_unique, color=COLOR_0):
def value_counts(value_counts, n_unique, n_rows, color=COLOR_0):
values = [_utils.ellide_string_short(s) for s in value_counts.keys()][::-1]
counts = list(value_counts.values())[::-1]
if n_unique > len(value_counts):
Expand All @@ -89,7 +92,16 @@ def value_counts(value_counts, n_unique, color=COLOR_0):
title = None
fig, ax = plt.subplots()
_despine(ax)
ax.barh(list(map(str, range(len(values)))), counts, color=color)
rects = ax.barh(list(map(str, range(len(values)))), counts, color=color)
percent = [_utils.format_percent(c / n_rows) for c in counts]
large_percent = [
f"{p: >6}" if c > counts[-1] / 2 else "" for (p, c) in zip(percent, counts)
]
small_percent = [
p if c <= counts[-1] / 2 else "" for (p, c) in zip(percent, counts)
]
ax.bar_label(rects, large_percent, padding=-30, color="black", fontsize=8)
ax.bar_label(rects, small_percent, padding=5, color="black", fontsize=8)
ax.set_yticks(ax.get_yticks())
ax.set_yticklabels(list(map(str, values)))
if title is not None:
Expand Down
2 changes: 1 addition & 1 deletion src/skrubview/_summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def _add_value_counts(summary, column, *, dataframe_summary, with_plots):
summary["value_is_constant"] = False
if with_plots:
summary["value_counts_plot"] = _plotting.value_counts(
value_counts, n_unique, color=_plotting.COLORS[1]
value_counts, n_unique, dataframe_summary["n_rows"], color=_plotting.COLORS[1]
)


Expand Down

0 comments on commit 77be528

Please sign in to comment.