engine_numpy.py

"""Classes and functions for running poker games"""
import itertools
from typing import Any, Iterator, List, Literal, Optional, Tuple, Type, TypedDict, Union

import numpy as np

from clubs import error, poker, render


class ObservationDict(TypedDict):
    action: int
    active: List[bool]
    button: int
    call: int
    community_cards: List[poker.Card]
    hole_cards: List[poker.Card]
    max_raise: int
    min_raise: int
    pot: int
    stacks: List[int]
    street_commits: List[int]


class Dealer:
    """Runs a range of different of poker games dependent on the
    given configuration. Supports limit, no limit and pot limit
    bet sizing, arbitrary deck sizes, arbitrary hole and community
    cards and many other options.

    Parameters
    ----------
    num_players : int
        maximum number of players
    num_streets : int
        number of streets including preflop, e.g. for texas hold'em
        num_streets=4
    blinds : Union[int, List[int]]
        blind distribution as a list of ints, one for each player
        starting from the button e.g. [0, 1, 2] for a three player game
        with a sb of 1 and bb of 2, passed ints will be expanded to
        all players i.e. pass blinds=0 for no blinds
    antes : Union[int, List[int]]
        ante distribution as a list of ints, one for each player
        starting from the button e.g. [0, 0, 5] for a three player game
        with a bb ante of 5, passed ints will be expanded to all
        players i.e. pass antes=0 for no antes
    raise_sizes : Union[
            int, Literal["pot", "inf"], List[Union[int, Literal["pot", "inf"]]]
        ]
        max raise sizes for each street, valid raise sizes are ints,
        'inf', and 'pot', e.g. for a 1-2 limit hold'em the raise sizes
        should be [2, 2, 4, 4] as the small and big bet are 2 and 4.
        'inf' can be used for no limit games. pot limit raise
        sizes can be set using 'pot'. if only a single int or
        string is passed the value is expanded to a list the length
        of number of streets, e.g. for a standard no limit game pass
        raise_sizes=float('inf')
    num_raises : Union[int, Literal["inf"], List[Union[int, Literal["inf"]]]]
        max number of bets for each street including preflop, valid
        raise numbers are ints and floats. if only a single int or float
        is passed the value is expanded to a list the length of number
        of streets, e.g. for a standard limit game pass num_raises=4
    num_suits : int
        number of suits to use in deck, must be between 1 and 4
    num_ranks : int
        number of ranks to use in deck, must be between 1 and 13
    num_hole_cards : int
        number of hole cards per player, must be greater than 0
    num_community_cards : Union[int, List[int]]
        number of community cards per street including preflop, e.g.
        for texas hold'em pass num_community_cards=[0, 3, 1, 1]. if only
        a single int is passed, it is expanded to a list the length of
        number of streets
    num_cards_for_hand : int
        number of cards for a valid poker hand, e.g. for texas hold'em
        num_cards_for_hand=5
    mandatory_num_hole_cards : int
        number of hole cards which have to be used for the hand, e.g.
        for pot limit omaha mandatory_num_hole_cards=2
    start_stack : int
        number of chips each player starts with
    low_end_straight : bool, optional
        toggle to include the low ace straight within valid hands, by
        default True
    order : Optional[List[str]], optional
        optional custom order of hand ranks, must be permutation of
        ['sf', 'fk', 'fh', 'fl', 'st', 'tk', 'tp', 'pa', 'hc']. if
        order=None, hands are ranked by rarity. by default None

    Examples
    --------

        >>> Dealer( # 1-2 Heads Up No Limit Texas Hold'em
        ...     num_players=2, num_streets=4, blinds=[1, 2], antes=0,
        ...     raise_sizes=float('inf'), num_raises=float('inf'),
        ...     num_suits=4, num_ranks=13, num_hole_cards=2,
        ...     mandatory_num_hole_cards=0, start_stack=200
        ... )
        >>> Dealer( # 1-2 6 Player PLO
        ...     num_players=6, num_streets=4, blinds=[0, 1, 2, 0, 0, 0],
        ...     antes=0, raise_sizes='pot', num_raises=float('inf'),
        ...     num_suits=4, num_ranks=13, num_hole_cards=4,
        ...     mandatory_num_hole_cards=2, start_stack=200
        ... )
        >>> Dealer( # 1-2 Heads Up No Limit Short Deck
        ...     num_players=2, num_streets=4, blinds=[1, 2], antes=0,
        ...     raise_sizes=float('inf'), num_raises=float('inf'),
        ...     num_suits=4, num_ranks=9, num_hole_cards=2,
        ...     mandatory_num_hole_cards=0, start_stack=200,
        ...     order=[
        ...         'sf', 'fk', 'fl', 'fh', 'st',
        ...         'tk', 'tp', 'pa', 'hc'
        ...         ]
        ... )
    """

    def __init__(
        self,
        num_players: int,
        num_streets: int,
        blinds: Union[int, List[int]],
        antes: Union[int, List[int]],
        raise_sizes: Union[
            int, Literal["pot", "inf"], List[Union[int, Literal["pot", "inf"]]]
        ],
        num_raises: Union[int, Literal["inf"], List[Union[int, Literal["inf"]]]],
        num_suits: int,
        num_ranks: int,
        num_hole_cards: int,
        num_community_cards: Union[int, List[int]],
        num_cards_for_hand: int,
        mandatory_num_hole_cards: int,
        start_stack: int,
        low_end_straight: bool = True,
        order: Optional[List[str]] = None,
    ) -> None:
        def check_inp(
            var: Union[List[Any], Any], expect_num: int, error_msg: str
        ) -> List[Any]:
            if isinstance(var, list):
                if len(var) != expect_num:
                    raise error.InvalidConfigError(error_msg)
                return var
            return [var] * expect_num

        error_msg = "incorrect {} distribution, expected list of length {}, got {}"
        blinds = check_inp(
            blinds, num_players, error_msg.format("blind", num_players, str(blinds)),
        )
        antes = check_inp(
            antes, num_players, error_msg.format("ante", num_players, str(antes))
        )
        raise_sizes = check_inp(
            raise_sizes,
            num_streets,
            error_msg.format("raise size", num_streets, str(raise_sizes)),
        )
        num_raises = check_inp(
            num_raises,
            num_streets,
            error_msg.format("number of raises", num_streets, str(num_raises)),
        )
        num_community_cards = check_inp(
            num_community_cards,
            num_streets,
            error_msg.format("community card", num_streets, str(num_community_cards)),
        )

        def clean_rs(
            raise_size: Union[int, Literal["pot", "inf"]]
        ) -> Union[float, Literal["pot"]]:
            if raise_size == "inf":
                return float(raise_size)
            if raise_size == "pot":
                return raise_size
            if isinstance(raise_size, int):
                return raise_size
            raise error.InvalidRaiseSizeError(
                f"unknown raise size, expected one of (int, 'inf', 'pot'),"
                f" got {raise_size}"
            )

        # config
        self.num_players = num_players
        self.num_streets = num_streets
        self.blinds = np.array(blinds)
        self.antes = np.array(antes)
        self.big_blind = blinds[1]
        self.raise_sizes = [clean_rs(raise_size) for raise_size in raise_sizes]
        self.num_raises = [float(raise_num) for raise_num in num_raises]
        self.num_suits = num_suits
        self.num_ranks = num_ranks
        self.num_hole_cards = num_hole_cards
        self.num_community_cards = num_community_cards
        self.num_cards_for_hand = num_cards_for_hand
        self.mandatory_num_hole_cards = mandatory_num_hole_cards
        self.start_stack = start_stack

        # dealer
        self.action = -1
        self.active: np.ndarray = np.zeros(self.num_players, dtype=bool)
        self.button = 0
        self.community_cards: List[poker.Card] = []
        self.deck = poker.Deck(self.num_suits, self.num_ranks)
        self.evaluator = poker.Evaluator(
            self.num_suits,
            self.num_ranks,
            self.num_cards_for_hand,
            self.mandatory_num_hole_cards,
            low_end_straight=low_end_straight,
            order=order,
        )
        self.history: List[Tuple[int, int, bool]] = []
        self.hole_cards: List[List[poker.Card]] = []
        self.largest_raise = 0
        self.pot = 0
        self.pot_commit = np.zeros(self.num_players, dtype=np.int32)
        self.stacks: np.ndarray = np.full(
            self.num_players, self.start_stack, dtype=np.int32
        )
        self.street = 0
        self.street_commits: np.ndarray = np.zeros(self.num_players, dtype=np.int32)
        self.street_option: np.ndarray = np.zeros(self.num_players, dtype=bool)
        self.street_raises = 0

        # render
        self.viewer: Optional[render.PokerViewer]
        self.viewer = None
        self.ascii_viewer = render.ASCIIViewer(
            num_players, num_hole_cards, sum(num_community_cards)
        )

    def __str__(self) -> str:
        config = self._render_config()
        return self.ascii_viewer._parse_string(config)

    def __repr__(self) -> str:
        string = (
            f"Dealer ({id(self)}) - num players: {self.num_players}, "
            f"num streets: {self.num_streets}"
        )
        return string

    def reset(
        self, reset_button: bool = False, reset_stacks: bool = False
    ) -> ObservationDict:
        """Resets the table. Shuffles the deck, deals new hole cards
        to all players, moves the button and collects blinds and antes.

        Parameters
        ----------
        reset_button : bool, optional
            reset button to first position at table, by default False
        reset_stacks : bool, optional
            reset stack sizes to starting stack size, by default False

        Returns
        -------
        ObservationDict
            observation dictionary

        Examples
        --------

        >>> dealer = Dealer(**configs.LEDUC_TWO_PLAYER)
        >>> dealer.reset()
        ... {'action': 1,
        ...  'active': [True, True],
        ...  'button': 1,
        ...  'call': 0,
        ...  'community_cards': [],
        ...  'hole_cards': [[Card (139879188163600): A♥], [Card (139879188163504): A♠]],
        ...  'max_raise': 2,
        ...  'min_raise': 2,
        ...  'pot': 2,
        ...  'stacks': [9, 9],
        ...  'street_commits': [0, 0]}
        """
        if reset_stacks:
            self.active.fill(1)
            self.stacks = np.full(self.num_players, self.start_stack)
        else:
            self.active = self.stacks > 0
            if self.active.sum() <= 1:
                raise error.TooFewActivePlayersError(
                    "not enough players have chips, set reset_stacks=True"
                )
        if reset_button:
            self.button = 0
        else:
            self.button = self.button + 1 % self.num_players

        self.deck.shuffle()
        self.community_cards = self.deck.draw(self.num_community_cards[0])
        self.history = []
        self.hole_cards = [
            self.deck.draw(self.num_hole_cards) for _ in range(self.num_players)
        ]
        self.largest_raise = self.big_blind
        self.pot = 0
        self.pot_commit.fill(0)
        self.street = 0
        self.street_commits.fill(0)
        self.street_option.fill(0)
        self.street_raises = 0

        self.action = self.button
        # in heads up button posts small blind
        if self.num_players > 2:
            self._move_action()
        self._collect_multiple_bets(bets=self.antes.tolist(), street_commits=False)
        self._collect_multiple_bets(bets=self.blinds.tolist(), street_commits=True)
        self._move_action()
        self._move_action()

        return self._observation(False)

    def step(self, bet: float) -> Tuple[ObservationDict, List[int], List[bool]]:
        """Advances poker game to next player. If the bet is 0, it is
        either considered a check or fold, depending on the previous
        action. The given bet is always rounded to the closest valid bet
        size. When it is the same distance from two valid bet sizes
        the smaller bet size is used, e.g. if the min raise is 10 and
        the bet is 5, it is rounded down to 0.

        Parameters
        ----------
        bet : int
            number of chips bet by player currently active

        Returns
        -------
        Tuple[ObservationDict, List[int], List[bool]]
            observation dictionary, payouts for every player, boolean value for every
            player showing if that player is still active in the round

        Examples
        --------

        >>> dealer = Dealer(**configs.LEDUC_TWO_PLAYER)
        >>> obs = dealer.reset()
        >>> dealer.step(0)
        ... ({'action': 0,
        ...  'active': [True, True],
        ...  'button': 1,
        ...  'call': 0,
        ...  'community_cards': [],
        ...  'hole_cards': [[Card (139879188163600): A♥], [Card (139879188163504): A♠]],
        ...  'max_raise': 2,
        ...  'min_raise': 2,
        ...  'pot': 2,
        ...  'stacks': [9, 9],
        ...  'street_commits': [0, 0]},
        ...  [0, 0],
        ...  [False, False])
        """
        if self.action == -1:
            if self.active.any():
                done = self._done()
                payouts = self._payouts()
                observation = self._observation(all(done))
                return observation, payouts, done
            raise error.TableResetError("call reset() before calling first step()")

        fold = bet < 0
        bet = round(bet)

        call, min_raise, max_raise = self._bet_sizes()
        # round bet to nearest sizing
        bet = self._clean_bet(bet, call, min_raise, max_raise)

        # only fold if player cannot check
        if call and ((bet < call) or fold):
            self.active[self.action] = 0
            bet = 0

        # if bet is full raise record as largest raise
        if bet and (bet - call) >= self.largest_raise:
            self.largest_raise = bet - call
            self.street_raises += 1

        self._collect_bet(bet)

        self.history.append((self.action, int(bet), bool(fold)))

        self.street_option[self.action] = True
        self._move_action()

        # if all agreed go to next street
        if self._all_agreed():
            self.action = self.button
            self._move_action()
            # if at most 1 player active and not all in turn up all
            # community cards and evaluate hand
            while True:
                self.street += 1
                full_streets = self.street >= self.num_streets
                all_in = self.active * (self.stacks == 0)
                all_all_in = self.active.sum() - all_in.sum() <= 1
                if full_streets:
                    break
                self.community_cards += self.deck.draw(
                    self.num_community_cards[self.street]
                )
                if not all_all_in:
                    break
            self.street_commits.fill(0)
            self.street_option = np.logical_not(self.active).astype(bool)
            self.street_raises = 0

        done = self._done()
        payouts = self._payouts()
        if all(done):
            self.action = -1
            self.pot = 0
            self.stacks += payouts + self.pot_commit
        observation = self._observation(all(done))
        return observation, payouts, done

    def _render_config(self) -> render.viewer.RenderConfig:
        action = int(self.action)
        active = self.active.tolist()
        all_in = (self.active * (self.stacks == 0)).tolist()
        community_cards = self.community_cards
        button = int(self.button)
        done = all(self._done())
        hole_cards = self.hole_cards
        pot = int(self.pot)
        payouts = self._payouts()
        street_commits = self.street_commits.tolist()
        stacks = self.stacks.tolist()

        config: render.viewer.RenderConfig = {
            "action": action,
            "active": active,
            "all_in": all_in,
            "community_cards": community_cards,
            "button": button,
            "done": done,
            "hole_cards": hole_cards,
            "pot": pot,
            "payouts": payouts,
            "prev_action": None if not self.history else self.history[-1],
            "street_commits": street_commits,
            "stacks": stacks,
        }

        return config

    def render(self, mode: str = "human", sleep: float = 0, **kwargs: Any) -> None:
        """Renders poker table. Render mode options are: ascii, human

        Parameters
        ----------
        mode : str, optional
            toggle for using different renderer, by default 'human'
        sleep : float, optional
            time to wait after rendering, by default 0
        """
        viewer: Optional[Type[render.PokerViewer]] = None
        render_modes = ["ascii", "human"]
        if self.viewer is None:
            if mode == "ascii":
                viewer = render.ASCIIViewer
            elif mode == "human":
                viewer = render.GraphicViewer
            else:
                raise error.InvalidRenderModeError(
                    (f"incorrect render mode {mode}," f"use one of {render_modes}")
                )
            self.viewer = viewer(
                self.num_players,
                self.num_hole_cards,
                sum(self.num_community_cards),
                **kwargs,
            )

        config = self._render_config()

        self.viewer.render(config, sleep)

    def win_probabilities(self, n: int = 10000) -> List[float]:
        """Computes win probabilities for each player. If the possible remaining
        community card combinations are below 1000, the combinations are exhaustively
        checked, otherwise, n random samples are taken and averaged to compute an
        estimate of the win probabilities.

        Parameters
        ----------
        n : int, optional
            max number of iterations to approximate win probabilities, by default 10000

        Returns
        -------
        List[float]
            win probabilities
        """
        hand_strengths = []
        num_additional_comm_cards = sum(self.num_community_cards) - len(
            self.community_cards
        )
        num_comm_combinations = poker.evaluator._ncr(
            len(self.deck.cards), num_additional_comm_cards
        )
        if num_comm_combinations < 1000:
            comm_combinations: Iterator[
                Tuple[poker.Card, ...]
            ] = itertools.combinations(self.deck.cards, num_additional_comm_cards)
            n = num_comm_combinations
        else:
            comm_combinations = (
                np.random.choice(
                    self.deck.cards, num_additional_comm_cards, replace=False
                )
                for _ in range(n)
            )
        for additional_comm_cards in comm_combinations:
            community_cards = self.community_cards + list(additional_comm_cards)
            hand_strengths.append(self._eval_hands(self.hole_cards, community_cards))
        best_hand = np.min(hand_strengths, axis=1)
        hand_won_bool = best_hand.reshape(n, 1) == np.array(hand_strengths)
        hands_won = hand_won_bool.sum(axis=0)
        win_probs: List[float] = (hands_won / hands_won.sum()).tolist()
        return win_probs

    def _all_agreed(self) -> bool:
        # not all agreed if not all players had chance to act
        if not self.street_option.all():
            return False
        # all agreed if street commits equal to maximum street commit
        street_commits_bool: np.ndarray = (
            self.street_commits == self.street_commits.max()
        )
        # or player is all in
        all_in_bool: np.ndarray = self.stacks == 0
        # or player is not active
        active_bool: np.ndarray = np.logical_not(self.active)
        bool_arr: np.ndarray = street_commits_bool | all_in_bool | active_bool
        all_agreed: bool = bool_arr.all()
        return all_agreed

    def _bet_sizes(self) -> Tuple[int, int, int]:
        # call difference between commit and maximum commit
        call = self.street_commits.max() - self.street_commits[self.action]
        # min raise at least largest previous raise
        # if limit game min and max raise equal to raise size
        if isinstance(self.raise_sizes[self.street], int):
            max_raise = min_raise = self.raise_sizes[self.street] + call
        else:
            min_raise = max(self.big_blind, self.largest_raise + call)
            if self.raise_sizes[self.street] == "pot":
                max_raise = self.pot + call * 2
            elif self.raise_sizes[self.street] == float("inf"):
                max_raise = self.stacks[self.action]
        # if maximum number of raises in street
        # was reached cap raise at 0
        if self.street_raises >= self.num_raises[self.street]:
            min_raise = max_raise = 0
        # if last full raise was done by active player
        # (another player has raised less than minimum raise amount)
        # cap active players raise size to 0
        if self.street_raises and call < self.largest_raise:
            min_raise = max_raise = 0
        # clip bets to stack size
        call = min(call, self.stacks[self.action])
        min_raise = min(min_raise, self.stacks[self.action])
        max_raise = min(max_raise, self.stacks[self.action])
        return call, min_raise, max_raise

    @staticmethod
    def _clean_bet(bet: int, call: int, min_raise: int, max_raise: int) -> int:
        # find closest bet size to actual bet
        # pessimistic approach: in ties order is fold/check -> call -> raise
        idx = np.argmin(np.absolute(np.array([0, call, min_raise, max_raise]) - bet))
        # if call closest
        if idx == 1:
            return call
        # if min raise or max raise closest
        if idx in (2, 3):
            return round(min(max_raise, max(min_raise, bet)))
        # if check/fold closest
        return 0

    def _collect_multiple_bets(
        self, bets: List[int], street_commits: bool = True
    ) -> None:
        bets_arr = np.roll(bets, self.action)
        bets_arr = (self.stacks > 0) * self.active * bets_arr
        if street_commits:
            self.street_commits += bets_arr
        self.pot_commit += bets_arr
        self.pot += int(bets_arr.sum())
        self.stacks -= bets_arr

    def _collect_bet(self, bet: int) -> None:
        # bet only as large as stack size
        bet = min(self.stacks[self.action], bet)

        self.pot += bet
        self.pot_commit[self.action] += bet
        self.street_commits[self.action] += bet
        self.stacks[self.action] -= bet

    def _done(self) -> List[bool]:
        if self.street >= self.num_streets or self.active.sum() <= 1:
            # end game
            return [True] * self.num_players
        out: List[bool] = np.logical_not(self.active).astype(bool).tolist()
        return out

    def _observation(self, done: bool) -> ObservationDict:
        if done:
            call = min_raise = max_raise = 0
        else:
            call, min_raise, max_raise = self._bet_sizes()
        observation: ObservationDict = {
            "action": self.action,
            "active": self.active.tolist(),
            "button": self.button,
            "call": call,
            "community_cards": self.community_cards,
            "hole_cards": self.hole_cards[self.action],
            "max_raise": max_raise,
            "min_raise": min_raise,
            "pot": self.pot,
            "stacks": self.stacks.tolist(),
            "street_commits": self.street_commits.tolist(),
        }
        return observation

    def _payouts(self) -> List[int]:
        # players that have folded lose their bets
        payouts = -1 * self.pot_commit * np.logical_not(self.active)
        if self.active.sum() == 1:
            payouts += self.active * (self.pot - self.pot_commit)
        # if last street played and still multiple players active
        elif self.street >= self.num_streets:
            payouts = self._eval_round()
            payouts -= self.pot_commit
        out: List[int] = payouts.astype(int).tolist()
        return out

    def _eval_hands(
        self, hole_cards: List[List[poker.Card]], community_cards: List[poker.Card]
    ) -> List[int]:
        # grab array of hand strength and pot commits
        worst_hand = self.evaluator.table.max_rank + 1
        hand_strengths = []
        for player in range(self.num_players):
            # if not active hand strength set
            # to 1 worse than worst possible rank
            hand_strength = (
                self.evaluator.evaluate(hole_cards[player], community_cards)
                if self.active[player]
                else worst_hand
            )
            hand_strengths.append(hand_strength)
        return hand_strengths

    def _eval_round(self) -> List[int]:
        # grab array of hand strength and pot commits
        hand_strengths = self._eval_hands(self.hole_cards, self.community_cards)
        hand_list = [
            [player_idx, hand_strength, self.pot_commit[player_idx]]
            for player_idx, hand_strength in enumerate(hand_strengths)
        ]
        hands = np.array(hand_list)
        # sort hands by hand strength and pot commits
        hands = hands[np.lexsort([hands[:, 2], hands[:, 1]])]
        pot = self.pot
        remainder = 0
        payouts = np.zeros(self.num_players, dtype=int)
        worst_hand = self.evaluator.table.max_rank + 1
        # iterate over hand strength and
        # pot commits from smallest to largest
        for idx, (_, strength, pot_commit) in enumerate(hands):
            eligible = hands[:, 0][hands[:, 1] == strength].astype(int)
            # cut can only be as large as lowest player commit amount
            cut = np.clip(hands[:, 2], None, pot_commit)
            split_pot = cut.sum()
            split = split_pot // len(eligible)
            remain = split_pot % len(eligible)
            payouts[eligible] += split
            remainder += remain
            # remove chips from players and pot
            hands[:, 2] -= cut
            pot -= split_pot
            # remove player from next split pot
            hands[idx, 1] = worst_hand
            if pot == 0:
                break
        # give worst position player remainder chips
        if remainder:
            # worst player is first player after button involved in pot
            involved_players = np.nonzero(payouts)[0]
            button_shift = (involved_players <= self.button) * self.num_players
            button_shifted_players = involved_players + button_shift
            worst_idx = np.argmin(button_shifted_players)
            worst_pos = involved_players[worst_idx]
            payouts[worst_pos] += remainder
        out: List[int] = payouts.astype(int).tolist()
        return out

    def _move_action(self) -> "Dealer":
        action = self.action
        for idx in range(1, self.num_players + 1):
            action = (self.action + idx) % self.num_players
            if self.active[action]:
                break
            else:
                self.street_option[action] = True
        self.action = action
        return self