Skip to content

Commit e599219

Browse files
committed
Documentation updates
1 parent 61f46bb commit e599219

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

README.md

+19-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,25 @@
1+
<p style="float: right">
2+
<a href="https://badge.fury.io/py/dom-tokenizers">
3+
<img alt="Build" src="https://badge.fury.io/py/dom-tokenizers.svg">
4+
</a>
5+
<a href="https://github.com/gbenson/dom-tokenizers/blob/master/LICENSE">
6+
<img alt="GitHub" src="https://img.shields.io/github/license/gbenson/dom-tokenizers.svg?color=blue">
7+
</a>
8+
</p>
9+
110
# DOM tokenizers
211

3-
HTML DOM-aware tokenizers for Hugging Face language models.
12+
DOM-aware tokenizers for [🤗 Hugging Face](https://huggingface.co/)
13+
language models.
14+
15+
## Installation
16+
17+
### With PIP
18+
```sh
19+
pip install dom-tokenizers[train]
20+
```
421

5-
## Setup for development
22+
### From sources
623

724
```sh
825
git clone https://github.com/gbenson/dom-tokenizers.git

pyproject.toml

+11-6
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,21 @@
22
name = "dom-tokenizers"
33
version = "0.0.1"
44
authors = [{ name = "Gary Benson" }]
5-
description = "HTML DOM-aware tokenizers for Hugging Face language models"
5+
description = "DOM-aware tokenizers for Hugging Face language models"
66
readme = "README.md"
7-
license = { file = "LICENSE" }
7+
license = { text = "Apache Software License (Apache-2.0)" }
88
requires-python = ">=3.10" # match..case
99
classifiers = [
10-
"Programming Language :: Python :: 3",
10+
"Development Status :: 4 - Beta",
11+
"Intended Audience :: Developers",
12+
"Intended Audience :: Education",
13+
"Intended Audience :: Science/Research",
1114
"License :: OSI Approved :: Apache Software License",
1215
"Operating System :: OS Independent",
13-
"Development Status :: 4 - Beta",
16+
"Programming Language :: Python :: 3",
17+
"Programming Language :: Python :: 3.10",
18+
"Programming Language :: Python :: 3.11",
19+
"Programming Language :: Python :: 3.12",
1420
"Topic :: Internet :: WWW/HTTP",
1521
"Topic :: Software Development :: Libraries",
1622
"Topic :: Scientific/Engineering :: Artificial Intelligence",
@@ -25,8 +31,7 @@ dependencies = [
2531

2632
[project.urls]
2733
Homepage = "https://github.com/gbenson/dom-tokenizers"
28-
Repository = "https://github.com/gbenson/dom-tokenizers"
29-
"Bug Tracker" = "https://github.com/gbenson/dom-tokenizers/issues"
34+
#Source = "https://github.com/gbenson/dom-tokenizers"
3035

3136
[project.optional-dependencies]
3237
dev = [

0 commit comments

Comments
 (0)