mggg · cdonnay · Sep 24, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/src/votekit/ballot_generator/bloc_slate_generator/cumulative.py b/src/votekit/ballot_generator/bloc_slate_generator/cumulative.py
@@ -34,51 +34,46 @@ def _inner_name_cumulative(config: BlocSlateConfig) -> dict[str, ScoreProfile]:
             `ScoreProfile` objects representing the generated ballots for each bloc.
     """
     bloc_lst = config.blocs
+    n_voters = int(config.n_voters)
 
     bloc_counts = apportion.compute(
-        "huntington", list(config.bloc_proportions.values()), config.n_voters
+        "huntington", list(config.bloc_proportions.values()), n_voters
     )
-
     if not isinstance(bloc_counts, list):
-        if not isinstance(bloc_counts, int):
-            raise TypeError(
-                f"Unexpected type from apportionment got {type(bloc_counts)}"
-            )
-
-        bloc_counts = [bloc_counts]
-
-    ballots_per_bloc = {bloc: bloc_counts[i] for i, bloc in enumerate(bloc_lst)}
+        bloc_counts = [int(bloc_counts)]
 
-    pp_by_bloc = {b: ScoreProfile() for b in bloc_lst}
+    ballots_per_bloc = dict(zip(bloc_lst, bloc_counts))
+    pp_by_bloc: dict[str, ScoreProfile] = {}
 
-    pref_interval_by_bloc_dict = config.get_combined_preference_intervals_by_bloc()
+    pref_by_bloc = config.get_combined_preference_intervals_by_bloc()
+    rng = np.random.default_rng()
 
     for bloc in bloc_lst:
-        ballot_pool = []
-        num_ballots = ballots_per_bloc[bloc]
-        pref_interval = pref_interval_by_bloc_dict[bloc]
-
-        non_zero_cands = list(pref_interval.non_zero_cands)
-        cand_support_vec = [pref_interval.interval[cand] for cand in non_zero_cands]
-
-        for _ in range(num_ballots):
-            list_ranking = list(
-                np.random.choice(
-                    non_zero_cands,
-                    config.n_voters,
-                    p=cand_support_vec,
-                    replace=True,
-                )
-            )
-
-            scores = {c: 0.0 for c in list_ranking}
-            for c in list_ranking:
-                scores[c] += 1
-
-            ballot_pool.append(ScoreBallot(scores=scores, weight=1))
-
-        pp = ScoreProfile(ballots=tuple(ballot_pool))
-        pp_by_bloc[bloc] = pp
+        num_ballots = int(ballots_per_bloc.get(bloc, 0))
+        if num_ballots <= 0:
+            pp_by_bloc[bloc] = ScoreProfile()
+            continue
+
+        pref = pref_by_bloc[bloc]
+        non_zero_cands = list(pref.non_zero_cands)
+        if not non_zero_cands:
+            pp_by_bloc[bloc] = ScoreProfile()
+            continue
+
+        # config.get_combined_preference_intervals_by_bloc() should ensure normalization
+        # for the non-zero candidates
+        p = np.array([pref.interval[c] for c in non_zero_cands], dtype=float)
+        assert abs(p.sum() - 1.0) < 1e-10, "PreferenceInterval not normalized"
+
+        # Vectorized: one multinomial per ballot -> shape (num_ballots, n_cands)
+        # Each row sums to n_voters and the entries are counts for each candidate
+        counts = rng.multinomial(n=n_voters, pvals=p, size=num_ballots)
+
+        ballots = [
+            ScoreBallot(scores=dict(zip(non_zero_cands, row.astype(float))), weight=1)
+            for row in counts
+        ]
+        pp_by_bloc[bloc] = ScoreProfile(ballots=tuple(ballots))
 
     return pp_by_bloc
 

diff --git a/src/votekit/ballot_generator/bloc_slate_generator/name_bradley_terry.py b/src/votekit/ballot_generator/bloc_slate_generator/name_bradley_terry.py
@@ -119,7 +119,6 @@ def _check_name_bt_memory(config: BlocSlateConfig) -> None:
         )
 
     mem = system_memory()
-    # rough estimate of memory usage. Gives a little bit of a buffer to account for overhead
     pmf_size = math.factorial(n_cands)
     candidate_with_longest_name = max(config.candidates, key=len)
     est_bytes_pmf = pmf_size * sys.getsizeof(candidate_with_longest_name) * n_cands
@@ -128,7 +127,11 @@ def _check_name_bt_memory(config: BlocSlateConfig) -> None:
         * n_cands
         * sys.getsizeof(frozenset({candidate_with_longest_name}))
     )
-    est_bytes = est_bytes_pmf + est_bytes_profile
+    est_bytes = float(est_bytes_pmf + est_bytes_profile)
+
+    # fudge factor for overhead. Just tuned to a couple of machines, but gives pretty close
+    # upper bound on memory usage while leaving room for other processes
+    est_bytes *= 1.5
     if est_bytes > mem["available_gib"] * 2**30:
         raise MemoryError(
             f"Not enough memory to generate the profile. Estimated memory usage is "

diff --git a/src/votekit/ballot_generator/bloc_slate_generator/slate_bradley_terry.py b/src/votekit/ballot_generator/bloc_slate_generator/slate_bradley_terry.py
@@ -28,6 +28,10 @@
 from votekit.pref_profile import RankProfile
 from votekit.ballot_generator.bloc_slate_generator.model import BlocSlateConfig
 from votekit.ballot_generator.utils import system_memory
+from votekit.ballot_generator.bloc_slate_generator.slate_utils import (
+    _make_cand_ordering_by_slate,
+    _convert_ballot_type_to_ranking,
+)
 
 # ====================================================
 # ================= Helper Functions =================
@@ -185,7 +189,6 @@ def _check_slate_bt_memory(config: BlocSlateConfig) -> None:
         )
 
     mem = system_memory()
-    # rough estimate of memory usage. Gives a little bit of a buffer to account for overhead
     pmf_size = total_arrangements
     candidate_with_longest_name = max(config.candidates, key=len)
     est_bytes_pmf = pmf_size * sys.getsizeof(candidate_with_longest_name) * n_cands
@@ -194,7 +197,11 @@ def _check_slate_bt_memory(config: BlocSlateConfig) -> None:
         * n_cands
         * sys.getsizeof(frozenset({candidate_with_longest_name}))
     )
-    est_bytes = est_bytes_pmf + est_bytes_profile
+    est_bytes = float(est_bytes_pmf + est_bytes_profile)
+
+    # fudge factor for overhead. Just tuned to a couple of machines, but gives pretty close
+    # upper bound on memory usage while leaving room for other processes
+    est_bytes *= 1.5
     if est_bytes > mem["available_gib"] * 2**30:
         raise MemoryError(
             f"Not enough memory to generate the profile. Estimated memory usage is "
@@ -288,7 +295,6 @@ def _inner_slate_bradley_terry(
 
     # Save on repeated calls to computed property
     bloc_lst = config.blocs
-    slate_lst = config.slates
 
     bloc_counts = apportion.compute(
         "huntington", list(config.bloc_proportions.values()), config.n_voters
@@ -306,7 +312,7 @@ def _inner_slate_bradley_terry(
     pref_profile_by_bloc = {b: RankProfile() for b in bloc_lst}
     candidates = config.candidates
 
-    for i, bloc in enumerate(bloc_lst):
+    for bloc in bloc_lst:
         # number of voters in this bloc
         n_ballots = ballots_per_bloc[bloc]
         ballot_pool = np.full((n_ballots, n_candidates), frozenset("~"))
@@ -331,33 +337,19 @@ def _inner_slate_bradley_terry(
                 non_zero_candidate_set=non_zero_cands_set,
             )
 
+        cand_ordering_by_slate = _make_cand_ordering_by_slate(
+            config, pref_intervals_by_slate_dict
+        )
         for j, bt in enumerate(ballot_types):
-            cand_ordering_by_bloc = {}
-
-            for slate in slate_lst:
-                # create a pref interval dict of only this blocs candidates
-                bloc_cand_pref_interval = pref_intervals_by_slate_dict[slate].interval
-                cands = pref_intervals_by_slate_dict[slate].non_zero_cands
-
-                # if there are no non-zero candidates, skip this bloc
-                if len(cands) == 0:
-                    continue
-
-                distribution = [bloc_cand_pref_interval[c] for c in cands]
-
-                # sample by Plackett-Luce within the bloc
-                cand_ordering = np.random.choice(
-                    a=list(cands), size=len(cands), p=distribution, replace=False
+            ranking = _convert_ballot_type_to_ranking(
+                ballot_type=bt, cand_ordering_by_slate=cand_ordering_by_slate
+            )
+            if ranking is None:
+                raise RuntimeError(
+                    "Unexpeceted None from internal function _convert_ballot_type_to_ranking "
+                    "Likely caused by an empty ballot type."
                 )
 
-                cand_ordering_by_bloc[slate] = list(cand_ordering)
-
-            ranking = [frozenset({"~"})] * len(bt)
-            for i, slate in enumerate(bt):
-                # append the current first candidate, then remove them from the ordering
-                ranking[i] = frozenset({cand_ordering_by_bloc[slate][0]})
-                cand_ordering_by_bloc[slate].pop(0)
-
             if len(zero_cands) > 0:
                 ranking.append(frozenset(zero_cands))
             ballot_pool[j] = np.array(ranking)

diff --git a/src/votekit/ballot_generator/bloc_slate_generator/slate_plackett_luce.py b/src/votekit/ballot_generator/bloc_slate_generator/slate_plackett_luce.py
@@ -20,6 +20,10 @@
     sample_cohesion_ballot_types,
 )
 from votekit.ballot_generator.bloc_slate_generator.model import BlocSlateConfig
+from votekit.ballot_generator.bloc_slate_generator.slate_utils import (
+    _make_cand_ordering_by_slate,
+    _convert_ballot_type_to_ranking,
+)
 
 # ===========================================================
 # ================= Interior Work Functions =================
@@ -48,7 +52,6 @@ def _inner_slate_plackett_luce(
     """
     n_candidates = len(config.candidates)
     bloc_lst = config.blocs
-    slate_lst = config.slates
 
     bloc_counts = apportion.compute(
         "huntington", list(config.bloc_proportions.values()), config.n_voters
@@ -69,7 +72,7 @@ def _inner_slate_plackett_luce(
 
     pref_profile_by_bloc = {}
 
-    for i, bloc in enumerate(bloc_lst):
+    for bloc in bloc_lst:
         n_ballots = ballots_per_bloc[bloc]
         ballot_pool = np.full((n_ballots, n_candidates), frozenset("~"))
         pref_intervals_by_slate_dict = config.get_preference_intervals_for_bloc(bloc)
@@ -88,28 +91,18 @@ def _inner_slate_plackett_luce(
             cohesion_parameters_for_bloc=config.cohesion_df.loc[bloc].to_dict(),  # type: ignore
         )
 
+        cand_ordering_by_slate = _make_cand_ordering_by_slate(
+            config, pref_intervals_by_slate_dict
+        )
         for j, bt in enumerate(ballot_types):
-            cand_ordering_by_bloc = {}
-
-            for slate in slate_lst:
-                bloc_cand_pref_interval = pref_intervals_by_slate_dict[slate].interval
-                cands = pref_intervals_by_slate_dict[slate].non_zero_cands
-
-                if len(cands) == 0:
-                    continue
-
-                distribution = [bloc_cand_pref_interval[c] for c in cands]
-
-                # sample within bloc according to Plackett-Luce
-                cand_ordering = np.random.choice(
-                    a=list(cands), size=len(cands), p=distribution, replace=False
+            ranking = _convert_ballot_type_to_ranking(
+                ballot_type=bt, cand_ordering_by_slate=cand_ordering_by_slate
+            )
+            if ranking is None:
+                raise RuntimeError(
+                    "Unexpeceted None from internal function _convert_ballot_type_to_ranking "
+                    "Likely caused by an empty ballot type."
                 )
-                cand_ordering_by_bloc[slate] = list(cand_ordering)
-
-            ranking = [frozenset({"-1"})] * len(bt)
-            for i, slate in enumerate(bt):
-                ranking[i] = frozenset({cand_ordering_by_bloc[slate][0]})
-                cand_ordering_by_bloc[slate].pop(0)
 
             if len(zero_cands) > 0:
                 ranking.append(frozenset(zero_cands))

diff --git a/src/votekit/ballot_generator/bloc_slate_generator/slate_utils.py b/src/votekit/ballot_generator/bloc_slate_generator/slate_utils.py
@@ -0,0 +1,87 @@
+import numpy as np
+from typing import Sequence
+
+from votekit.ballot_generator.bloc_slate_generator.model import BlocSlateConfig
+from votekit.pref_interval import PreferenceInterval
+
+
+def _make_cand_ordering_by_slate(
+    config: BlocSlateConfig, pref_intervals_by_slate_dict: dict[str, PreferenceInterval]
+) -> dict[str, list[str]]:
+    """
+    Create a candidate ordering within each slate based on the preference intervals.
+
+    The candidate oridering is determined by sampling without replacement according to
+    the preference intervals using the Plackett-Luce model (i.e. throwing a dart and removing
+    that part of the interval).
+
+    Args:
+        config (BlocSlateConfig): Configuration object containing all necessary parameters for
+            working with a bloc-slate ballot generator.
+        pref_intervals_by_slate_dict (dict[str, PreferenceInterval]): A dictionary mapping
+            slate names to their corresponding PreferenceInterval objects.
+
+    Returns:
+        dict[str, list[str]]: A dictionary mapping slate names to a list of candidate names
+            ordered according to the sampled preference intervals.
+    """
+    cand_ordering_by_slate = {}
+
+    for slate in config.slates:
+        preference_interval = pref_intervals_by_slate_dict[slate].interval
+        cands = pref_intervals_by_slate_dict[slate].non_zero_cands
+
+        if len(cands) == 0:
+            continue
+
+        distribution = [preference_interval[c] for c in cands]
+
+        # sample by Plackett-Luce within the bloc
+        cand_ordering = np.random.choice(
+            a=list(cands), size=len(cands), p=distribution, replace=False
+        )
+
+        cand_ordering_by_slate[slate] = list(cand_ordering)
+    return cand_ordering_by_slate
+
+
+def _convert_ballot_type_to_ranking(
+    ballot_type: Sequence[str],
+    cand_ordering_by_slate: dict[str, list[str]],
+) -> list[frozenset[str]]:
+    """
+    Given a ballot type and a candidate ordering by slate, convert the ballot type to a ranking.
+
+    Example:
+
+    Given a ballot type "AABBA" and candidate ordering by slate
+    {"A": ["a3", "a1", "a2"], "B": ["b2", "b1"]}, the function will return
+    [frozenset({"a3"}), frozenset({"a1"}), frozenset({"b2"}), frozenset({"b1"}), frozenset({"a2"})].
+
+    Args:
+        ballot_type (Sequence[str]): A sequence of slate names representing the ballot type.
+        cand_ordering_by_slate (dict[str, list[str]]): A dictionary mapping slate names to a list
+        of candidate names ordered according to the sampled preference intervals.
+
+    Returns:
+        list[frozenset[str]]: A list of frozensets, where each frozenset contains a single
+            candidate name, representing the ranking derived from the ballot type and candidate
+            ordering
+    """
+    positions = {s: 0 for s in cand_ordering_by_slate}
+    ranking: list[frozenset[str]] = [frozenset("~")] * len(ballot_type)
+
+    fset_cache: dict[str, frozenset[str]] = {}
+
+    for i, slate in enumerate(ballot_type):
+        pos = positions[slate]
+        cand = cand_ordering_by_slate[slate][pos]
+        positions[slate] = pos + 1
+
+        fset = fset_cache.get(cand)
+        if fset is None:
+            fset = frozenset((cand,))
+            fset_cache[cand] = fset
+        ranking[i] = fset
+
+    return ranking