Skip to content

Commit

Permalink
Hide annoying warning better
Browse files Browse the repository at this point in the history
  • Loading branch information
gbenson committed May 16, 2024
1 parent 69e63ef commit c2037d1
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 6 deletions.
2 changes: 0 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,3 @@ per-file-ignores =
src/dom_tokenizers/**/__init__.py: F401
# line too long
src/dom_tokenizers/pre_tokenizers/dom_snapshot.py: E501
# module level import not at top of file
src/dom_tokenizers/train.py: E402
Empty file.
18 changes: 18 additions & 0 deletions src/dom_tokenizers/internal/transformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os

# Don't print "None of PyTorch, TensorFlow >= 2.0, or Flax have been
# found. Models won't be available and only tokenizers, configuration
# and file/data utilities can be used" warning. Tokenizers is all we
# want!

__var_name = "TRANSFORMERS_NO_ADVISORY_WARNINGS"
__orig_val = os.environ.get(__var_name)
os.environ[__var_name] = "1"
try:
from transformers import AutoTokenizer # noqa: F401
finally:
if __orig_val is None:
os.environ.pop(__var_name)
else:
os.environ[__var_name] = __orig_val
del __var_name, __orig_val, os
5 changes: 1 addition & 4 deletions src/dom_tokenizers/train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import json
import warnings

Expand All @@ -8,9 +7,7 @@
from datasets import load_dataset
from tokenizers.pre_tokenizers import PreTokenizer, WhitespaceSplit

os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = str(True)
from transformers import AutoTokenizer

from .internal.transformers import AutoTokenizer
from .pre_tokenizers import DOMSnapshotPreTokenizer

DEFAULT_BASE = "bert-base-uncased"
Expand Down

0 comments on commit c2037d1

Please sign in to comment.