diff --git a/README.md b/README.md index 52c4aeb..d33e784 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,7 @@ # DOM tokenizers -DOM-aware tokenizers for 🤗 [Hugging Face](https://huggingface.co/) -language models. +DOM-aware tokenizers for Hugging Face language models. ## Installation @@ -31,7 +30,9 @@ pip install --upgrade pip pip install -e .[dev,train] ``` -## Train a tokenizer +## Load a pretrained tokenizer from the Hub + +## Train your own ### On the command line diff --git a/pyproject.toml b/pyproject.toml index 6b73303..ccc9d7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,9 +2,9 @@ name = "dom-tokenizers" version = "0.0.2" authors = [{ name = "Gary Benson" }] -description = "DOM-aware tokenizers for Hugging Face language models" +description = "DOM-aware tokenizers for 🤗 Hugging Face language models" readme = "README.md" -license = { text = "Apache Software License (Apache-2.0)" } +license = { text = "Apache-2.0" } requires-python = ">=3.10" # match..case classifiers = [ "Development Status :: 4 - Beta",