Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 32 additions & 37 deletions src/votekit/ballot_generator/bloc_slate_generator/cumulative.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,51 +34,46 @@ def _inner_name_cumulative(config: BlocSlateConfig) -> dict[str, ScoreProfile]:
`ScoreProfile` objects representing the generated ballots for each bloc.
"""
bloc_lst = config.blocs
n_voters = int(config.n_voters)

bloc_counts = apportion.compute(
"huntington", list(config.bloc_proportions.values()), config.n_voters
"huntington", list(config.bloc_proportions.values()), n_voters
)

if not isinstance(bloc_counts, list):
if not isinstance(bloc_counts, int):
raise TypeError(
f"Unexpected type from apportionment got {type(bloc_counts)}"
)

bloc_counts = [bloc_counts]

ballots_per_bloc = {bloc: bloc_counts[i] for i, bloc in enumerate(bloc_lst)}
bloc_counts = [int(bloc_counts)]

pp_by_bloc = {b: ScoreProfile() for b in bloc_lst}
ballots_per_bloc = dict(zip(bloc_lst, bloc_counts))
pp_by_bloc: dict[str, ScoreProfile] = {}

pref_interval_by_bloc_dict = config.get_combined_preference_intervals_by_bloc()
pref_by_bloc = config.get_combined_preference_intervals_by_bloc()
rng = np.random.default_rng()

for bloc in bloc_lst:
ballot_pool = []
num_ballots = ballots_per_bloc[bloc]
pref_interval = pref_interval_by_bloc_dict[bloc]

non_zero_cands = list(pref_interval.non_zero_cands)
cand_support_vec = [pref_interval.interval[cand] for cand in non_zero_cands]

for _ in range(num_ballots):
list_ranking = list(
np.random.choice(
non_zero_cands,
config.n_voters,
p=cand_support_vec,
replace=True,
)
)

scores = {c: 0.0 for c in list_ranking}
for c in list_ranking:
scores[c] += 1

ballot_pool.append(ScoreBallot(scores=scores, weight=1))

pp = ScoreProfile(ballots=tuple(ballot_pool))
pp_by_bloc[bloc] = pp
num_ballots = int(ballots_per_bloc.get(bloc, 0))
if num_ballots <= 0:
pp_by_bloc[bloc] = ScoreProfile()
continue

pref = pref_by_bloc[bloc]
non_zero_cands = list(pref.non_zero_cands)
if not non_zero_cands:
pp_by_bloc[bloc] = ScoreProfile()
continue

# config.get_combined_preference_intervals_by_bloc() should ensure normalization
# for the non-zero candidates
p = np.array([pref.interval[c] for c in non_zero_cands], dtype=float)
assert abs(p.sum() - 1.0) < 1e-10, "PreferenceInterval not normalized"

# Vectorized: one multinomial per ballot -> shape (num_ballots, n_cands)
# Each row sums to n_voters and the entries are counts for each candidate
counts = rng.multinomial(n=n_voters, pvals=p, size=num_ballots)

ballots = [
ScoreBallot(scores=dict(zip(non_zero_cands, row.astype(float))), weight=1)
for row in counts
]
pp_by_bloc[bloc] = ScoreProfile(ballots=tuple(ballots))

return pp_by_bloc

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def _check_name_bt_memory(config: BlocSlateConfig) -> None:
)

mem = system_memory()
# rough estimate of memory usage. Gives a little bit of a buffer to account for overhead
pmf_size = math.factorial(n_cands)
candidate_with_longest_name = max(config.candidates, key=len)
est_bytes_pmf = pmf_size * sys.getsizeof(candidate_with_longest_name) * n_cands
Expand All @@ -128,7 +127,11 @@ def _check_name_bt_memory(config: BlocSlateConfig) -> None:
* n_cands
* sys.getsizeof(frozenset({candidate_with_longest_name}))
)
est_bytes = est_bytes_pmf + est_bytes_profile
est_bytes = float(est_bytes_pmf + est_bytes_profile)

# fudge factor for overhead. Just tuned to a couple of machines, but gives pretty close
# upper bound on memory usage while leaving room for other processes
est_bytes *= 1.5
if est_bytes > mem["available_gib"] * 2**30:
raise MemoryError(
f"Not enough memory to generate the profile. Estimated memory usage is "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
from votekit.pref_profile import RankProfile
from votekit.ballot_generator.bloc_slate_generator.model import BlocSlateConfig
from votekit.ballot_generator.utils import system_memory
from votekit.ballot_generator.bloc_slate_generator.slate_utils import (
_make_cand_ordering_by_slate,
_convert_ballot_type_to_ranking,
)

# ====================================================
# ================= Helper Functions =================
Expand Down Expand Up @@ -185,7 +189,6 @@ def _check_slate_bt_memory(config: BlocSlateConfig) -> None:
)

mem = system_memory()
# rough estimate of memory usage. Gives a little bit of a buffer to account for overhead
pmf_size = total_arrangements
candidate_with_longest_name = max(config.candidates, key=len)
est_bytes_pmf = pmf_size * sys.getsizeof(candidate_with_longest_name) * n_cands
Expand All @@ -194,7 +197,11 @@ def _check_slate_bt_memory(config: BlocSlateConfig) -> None:
* n_cands
* sys.getsizeof(frozenset({candidate_with_longest_name}))
)
est_bytes = est_bytes_pmf + est_bytes_profile
est_bytes = float(est_bytes_pmf + est_bytes_profile)

# fudge factor for overhead. Just tuned to a couple of machines, but gives pretty close
# upper bound on memory usage while leaving room for other processes
est_bytes *= 1.5
if est_bytes > mem["available_gib"] * 2**30:
raise MemoryError(
f"Not enough memory to generate the profile. Estimated memory usage is "
Expand Down Expand Up @@ -288,7 +295,6 @@ def _inner_slate_bradley_terry(

# Save on repeated calls to computed property
bloc_lst = config.blocs
slate_lst = config.slates

bloc_counts = apportion.compute(
"huntington", list(config.bloc_proportions.values()), config.n_voters
Expand All @@ -306,7 +312,7 @@ def _inner_slate_bradley_terry(
pref_profile_by_bloc = {b: RankProfile() for b in bloc_lst}
candidates = config.candidates

for i, bloc in enumerate(bloc_lst):
for bloc in bloc_lst:
# number of voters in this bloc
n_ballots = ballots_per_bloc[bloc]
ballot_pool = np.full((n_ballots, n_candidates), frozenset("~"))
Expand All @@ -331,33 +337,19 @@ def _inner_slate_bradley_terry(
non_zero_candidate_set=non_zero_cands_set,
)

cand_ordering_by_slate = _make_cand_ordering_by_slate(
config, pref_intervals_by_slate_dict
)
for j, bt in enumerate(ballot_types):
cand_ordering_by_bloc = {}

for slate in slate_lst:
# create a pref interval dict of only this blocs candidates
bloc_cand_pref_interval = pref_intervals_by_slate_dict[slate].interval
cands = pref_intervals_by_slate_dict[slate].non_zero_cands

# if there are no non-zero candidates, skip this bloc
if len(cands) == 0:
continue

distribution = [bloc_cand_pref_interval[c] for c in cands]

# sample by Plackett-Luce within the bloc
cand_ordering = np.random.choice(
a=list(cands), size=len(cands), p=distribution, replace=False
ranking = _convert_ballot_type_to_ranking(
ballot_type=bt, cand_ordering_by_slate=cand_ordering_by_slate
)
if ranking is None:
raise RuntimeError(
"Unexpeceted None from internal function _convert_ballot_type_to_ranking "
"Likely caused by an empty ballot type."
)

cand_ordering_by_bloc[slate] = list(cand_ordering)

ranking = [frozenset({"~"})] * len(bt)
for i, slate in enumerate(bt):
# append the current first candidate, then remove them from the ordering
ranking[i] = frozenset({cand_ordering_by_bloc[slate][0]})
cand_ordering_by_bloc[slate].pop(0)

if len(zero_cands) > 0:
ranking.append(frozenset(zero_cands))
ballot_pool[j] = np.array(ranking)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
sample_cohesion_ballot_types,
)
from votekit.ballot_generator.bloc_slate_generator.model import BlocSlateConfig
from votekit.ballot_generator.bloc_slate_generator.slate_utils import (
_make_cand_ordering_by_slate,
_convert_ballot_type_to_ranking,
)

# ===========================================================
# ================= Interior Work Functions =================
Expand Down Expand Up @@ -48,7 +52,6 @@ def _inner_slate_plackett_luce(
"""
n_candidates = len(config.candidates)
bloc_lst = config.blocs
slate_lst = config.slates

bloc_counts = apportion.compute(
"huntington", list(config.bloc_proportions.values()), config.n_voters
Expand All @@ -69,7 +72,7 @@ def _inner_slate_plackett_luce(

pref_profile_by_bloc = {}

for i, bloc in enumerate(bloc_lst):
for bloc in bloc_lst:
n_ballots = ballots_per_bloc[bloc]
ballot_pool = np.full((n_ballots, n_candidates), frozenset("~"))
pref_intervals_by_slate_dict = config.get_preference_intervals_for_bloc(bloc)
Expand All @@ -88,28 +91,18 @@ def _inner_slate_plackett_luce(
cohesion_parameters_for_bloc=config.cohesion_df.loc[bloc].to_dict(), # type: ignore
)

cand_ordering_by_slate = _make_cand_ordering_by_slate(
config, pref_intervals_by_slate_dict
)
for j, bt in enumerate(ballot_types):
cand_ordering_by_bloc = {}

for slate in slate_lst:
bloc_cand_pref_interval = pref_intervals_by_slate_dict[slate].interval
cands = pref_intervals_by_slate_dict[slate].non_zero_cands

if len(cands) == 0:
continue

distribution = [bloc_cand_pref_interval[c] for c in cands]

# sample within bloc according to Plackett-Luce
cand_ordering = np.random.choice(
a=list(cands), size=len(cands), p=distribution, replace=False
ranking = _convert_ballot_type_to_ranking(
ballot_type=bt, cand_ordering_by_slate=cand_ordering_by_slate
)
if ranking is None:
raise RuntimeError(
"Unexpeceted None from internal function _convert_ballot_type_to_ranking "
"Likely caused by an empty ballot type."
)
cand_ordering_by_bloc[slate] = list(cand_ordering)

ranking = [frozenset({"-1"})] * len(bt)
for i, slate in enumerate(bt):
ranking[i] = frozenset({cand_ordering_by_bloc[slate][0]})
cand_ordering_by_bloc[slate].pop(0)

if len(zero_cands) > 0:
ranking.append(frozenset(zero_cands))
Expand Down
87 changes: 87 additions & 0 deletions src/votekit/ballot_generator/bloc_slate_generator/slate_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import numpy as np
from typing import Sequence

from votekit.ballot_generator.bloc_slate_generator.model import BlocSlateConfig
from votekit.pref_interval import PreferenceInterval


def _make_cand_ordering_by_slate(
config: BlocSlateConfig, pref_intervals_by_slate_dict: dict[str, PreferenceInterval]
) -> dict[str, list[str]]:
"""
Create a candidate ordering within each slate based on the preference intervals.

The candidate oridering is determined by sampling without replacement according to
the preference intervals using the Plackett-Luce model (i.e. throwing a dart and removing
that part of the interval).

Args:
config (BlocSlateConfig): Configuration object containing all necessary parameters for
working with a bloc-slate ballot generator.
pref_intervals_by_slate_dict (dict[str, PreferenceInterval]): A dictionary mapping
slate names to their corresponding PreferenceInterval objects.

Returns:
dict[str, list[str]]: A dictionary mapping slate names to a list of candidate names
ordered according to the sampled preference intervals.
"""
cand_ordering_by_slate = {}

for slate in config.slates:
preference_interval = pref_intervals_by_slate_dict[slate].interval
cands = pref_intervals_by_slate_dict[slate].non_zero_cands

if len(cands) == 0:
continue

distribution = [preference_interval[c] for c in cands]

# sample by Plackett-Luce within the bloc
cand_ordering = np.random.choice(
a=list(cands), size=len(cands), p=distribution, replace=False
)

cand_ordering_by_slate[slate] = list(cand_ordering)
return cand_ordering_by_slate


def _convert_ballot_type_to_ranking(
ballot_type: Sequence[str],
cand_ordering_by_slate: dict[str, list[str]],
) -> list[frozenset[str]]:
"""
Given a ballot type and a candidate ordering by slate, convert the ballot type to a ranking.

Example:

Given a ballot type "AABBA" and candidate ordering by slate
{"A": ["a3", "a1", "a2"], "B": ["b2", "b1"]}, the function will return
[frozenset({"a3"}), frozenset({"a1"}), frozenset({"b2"}), frozenset({"b1"}), frozenset({"a2"})].

Args:
ballot_type (Sequence[str]): A sequence of slate names representing the ballot type.
cand_ordering_by_slate (dict[str, list[str]]): A dictionary mapping slate names to a list
of candidate names ordered according to the sampled preference intervals.

Returns:
list[frozenset[str]]: A list of frozensets, where each frozenset contains a single
candidate name, representing the ranking derived from the ballot type and candidate
ordering
"""
positions = {s: 0 for s in cand_ordering_by_slate}
ranking: list[frozenset[str]] = [frozenset("~")] * len(ballot_type)

fset_cache: dict[str, frozenset[str]] = {}

for i, slate in enumerate(ballot_type):
pos = positions[slate]
cand = cand_ordering_by_slate[slate][pos]
positions[slate] = pos + 1

fset = fset_cache.get(cand)
if fset is None:
fset = frozenset((cand,))
fset_cache[cand] = fset
ranking[i] = fset

return ranking
Loading