Skip to content

Commit

Permalink
Remove profiling test remaining from Outlines
Browse files Browse the repository at this point in the history
  • Loading branch information
rlouf committed Oct 9, 2024
1 parent 4a9afea commit 4d0cc84
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 55 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ test = [
"huggingface_hub",
"torch",
"transformers",
"datasets",
"pillow",
"asv",
"setuptools-rust",
Expand Down
1 change: 0 additions & 1 deletion python/outlines_core/fsm/guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import interegular
import torch

from outlines_core.fsm.regex import (
create_fsm_index_tokenizer,
make_byte_level_fsm,
Expand Down
59 changes: 5 additions & 54 deletions tests/fsm/test_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import interegular
import pytest
import torch
from transformers import AutoTokenizer, PreTrainedTokenizer

from datasets.fingerprint import Hasher
from outlines_core.fsm.outlines_core_rs import Vocabulary
from outlines_core.fsm.regex import (
BetterAlphabet,
Expand All @@ -18,6 +17,7 @@
make_deterministic_fsm,
reduced_vocabulary,
)
from transformers import AutoTokenizer, PreTrainedTokenizer


def get_llama_tokenizer_types():
Expand Down Expand Up @@ -106,6 +106,9 @@ def convert_token_to_string(self, token: str) -> str:

return string

def __hash__(self):
return hash(Hasher.hash(self.tokenizer))

def __eq__(self, other):
if isinstance(other, type(self)):
if hasattr(self, "model_name") and hasattr(self, "kwargs"):
Expand Down Expand Up @@ -460,58 +463,6 @@ def test_regex_index_performance():
profiler.print_stats(output_unit=1e-3, summarize=True, stripzeros=True)


@pytest.mark.skip(reason="Only for local profiling")
def test_json_index_performance():
import json
from enum import Enum

from line_profiler import LineProfiler # type: ignore [import]
from pydantic import BaseModel, constr

import outlines_core

class Weapon(str, Enum):
sword = "sword"
axe = "axe"
mace = "mace"
spear = "spear"
bow = "bow"
crossbow = "crossbow"

class Armor(str, Enum):
leather = "leather"
chainmail = "chainmail"
plate = "plate"

class Character(BaseModel):
name: constr(max_length=10)
# TODO: Add support for conint
age: int # conint(int, ge=18, le=100)
armor: Armor
weapon: Weapon
# TODO: Add support for conint
strength: int # conint(int, ge=0, le=100)

model = outlines_core.models.transformers("gpt2", device="cuda")
json_schema = json.dumps(Character.model_json_schema())

def build_regex():
regex_str = outlines_core.index.json_schema.build_regex_from_object(json_schema)
outlines_core.generate.regex(model, regex_str)

profiler = LineProfiler(create_fsm_index_end_to_end)
profiler.add_function(create_fsm_index_tokenizer)
profiler.add_function(outlines_core.index.index.RegexFSM.__init__)

profiler.runctx(
"build_regex()",
globals(),
locals(),
)
profiler.dump_stats("line-profiler-build-json-regex.pkl")
profiler.print_stats(output_unit=1e-3, summarize=True, stripzeros=True)


def test_token_trans_keys_identical():
"""assert two tokens w/ identical behavior wrt FSM have same trans key seq"""

Expand Down

0 comments on commit 4d0cc84

Please sign in to comment.