Skip to content

Commit

Permalink
Update dependencies and fix convert_model (#458)
Browse files Browse the repository at this point in the history
* feat: Update dependencies in Cargo.toml

Update the dependencies in Cargo.toml to their latest versions:
- rust_tokenizers: 8.1.1
- tch: 0.16.0 (with features = ["download-libtorch"])
- serde_json: 1
- serde: 1 (with features = ["derive"])
- ordered-float: 4.2.0
- uuid: 1 (with features = ["v4"])
- thiserror: 1
- half: 2
- regex: 1.6
- cached-path: 0.6 (with default-features = false and optional = true)
- dirs: 5 (optional = true)
- lazy_static: 1 (optional = true)
- ort: 1.16.3 (optional = true, default-features = false, features = ["half"])
- ndarray: 0.15 (optional = true)
- tokenizers: 0.19.1 (optional = true, default-features = false, features = ["onig"])

```

* chore: Update .gitignore and requirements.txt, and improve convert_model.py

Update .gitignore to exclude the /models/ and /.venv/ directories, and the convert_model.log file.

Remove the requirements.txt file.

In convert_model.py:
- Add a new function, `zipfile_factory`, to handle zip file creation.
- Update the logger configuration to log debug messages to a file named `convert_model.log`.

* delete duplicate requirements file

* update CI req file path

* missing requests dependency

---------

Co-authored-by: Abdulrhman Alkhodiry <[email protected]>
Co-authored-by: Guillaume Becquin <[email protected]>
  • Loading branch information
3 people authored Jun 30, 2024
1 parent f99bf51 commit 33b2944
Show file tree
Hide file tree
Showing 12 changed files with 355 additions and 158 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ jobs:
with:
python-version: '3.10'
- run: |
pip install -r requirements.txt --progress-bar off
pip install -r ./utils/requirements.txt --progress-bar off
python ./utils/download-dependencies_distilbert.py
fmt:
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ Cargo.lock

/target
#**/*.rs.bk
/resources/
/models/
/.venv/
convert_model.log
23 changes: 15 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ features = ["doc-only"]

[dependencies]
rust_tokenizers = "8.1.1"
tch = "0.15.0"
tch = { version = "0.16.0", features = ["download-libtorch"] }
serde_json = "1"
serde = { version = "1", features = ["derive"] }
ordered-float = "3"
ordered-float = "4.2.0"
uuid = { version = "1", features = ["v4"] }
thiserror = "1"
half = "2"
Expand All @@ -88,19 +88,26 @@ regex = "1.6"
cached-path = { version = "0.6", default-features = false, optional = true }
dirs = { version = "5", optional = true }
lazy_static = { version = "1", optional = true }
ort = {version="~1.15.2", optional = true, default-features = false, features = ["half"]}
ndarray = {version="0.15", optional = true}
tokenizers = {version="0.15", optional=true, default-features = false, features = ["onig"]}
ort = { version = "1.16.3", optional = true, default-features = false, features = [
"half",
] }
ndarray = { version = "0.15", optional = true }
tokenizers = { version = "0.19.1", optional = true, default-features = false, features = [
"onig",
] }

[dev-dependencies]
anyhow = "1"
csv = "1"
criterion = "0.5"
tokio = { version = "1.35", features = ["sync", "rt-multi-thread", "macros"] }
tempfile = "3"
itertools = "0.12"
tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }
ort = {version="~1.15.5", features = ["load-dynamic"]}
itertools = "0.13.0"
tracing-subscriber = { version = "0.3", default-features = false, features = [
"env-filter",
"fmt",
] }
ort = { version = "1.16.3", features = ["load-dynamic"] }

[[example]]
name = "onnx-masked-lm"
Expand Down
442 changes: 302 additions & 140 deletions README.md

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions requirements.txt

This file was deleted.

1 change: 1 addition & 0 deletions src/models/bart/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ impl BartDecoder {
}
}

#[allow(dead_code)]
///Container holding a BART decoder output
pub struct BartDecoderOutput {
/// last decoder layer hidden state
Expand Down
1 change: 1 addition & 0 deletions src/models/prophetnet/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ impl ProphetNetEncoder {
}
}

#[allow(dead_code)]
/// Container for the ProphetNet encoder output.
pub struct ProphetNetEncoderOutput {
/// Last hidden states from the model
Expand Down
1 change: 1 addition & 0 deletions src/models/reformer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ impl ChunkReformerFeedForward {
}
}

#[allow(dead_code)]
pub struct ReformerLayerOutput {
pub attention_output: Tensor,
pub hidden_states: Tensor,
Expand Down
1 change: 1 addition & 0 deletions src/models/reformer/reformer_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ impl ReformerLMHead {
}
}

#[allow(dead_code)]
pub struct PaddedReformerInput {
pub input_ids: Option<Tensor>,
pub input_embeds: Option<Tensor>,
Expand Down
1 change: 1 addition & 0 deletions src/models/t5/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ impl T5Stack {
}
}

#[allow(dead_code)]
pub struct T5BlockOutput {
pub hidden_states: Tensor,
pub self_attention_weights: Option<Tensor>,
Expand Down
23 changes: 18 additions & 5 deletions utils/convert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,20 @@
import zipfile
from pathlib import Path
from typing import Dict

import os
import numpy as np
import torch
from numpy.lib.format import write_array
from numpy.lib.npyio import zipfile_factory
# from numpy.lib.npyio import zipfile_factory
from torch import Tensor

def zipfile_factory(file, *args, **kwargs):
if not hasattr(file, 'read'):
file = os.fspath(file)
import zipfile
kwargs['allowZip64'] = True
kwargs['compresslevel'] = 4
return zipfile.ZipFile(file, *args, **kwargs)

def get_bf16_repr(input_tensor: torch.Tensor) -> np.ndarray:
"""Convert a bfloat16 tensor to an equivalent byte representation in Numpy.
Expand Down Expand Up @@ -125,6 +132,12 @@ def append_to_zipf(
help="Use this flag to enable automatic download of the libtorch library.",
)
args = parser.parse_args()

logger = logging.getLogger('convert_model')
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler('convert_model.log')
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)

target_folder = Path(args.source_file[0]).parent
with zipfile_factory(
Expand All @@ -133,7 +146,7 @@ def append_to_zipf(
for source_file_or_pattern in args.source_file:
source_files = glob.glob(source_file_or_pattern)
for source_file in source_files:
logging.info(f"Processing source file {source_file}...")
logger.info(f"Processing source file {source_file}")
nps = {}
source_file = Path(source_file)
weights = torch.load(str(source_file), map_location="cpu")
Expand Down Expand Up @@ -168,11 +181,11 @@ def append_to_zipf(
)
else:
nps[k] = np.ascontiguousarray(tensor)
logging.info(
logger.info(
f"converted {k} - {str(sys.getsizeof(nps[k]))} bytes"
)
else:
logging.info(f"skipped non-tensor object: {k}")
logger.info(f"skipped non-tensor object: {k}")
append_to_zipf(nps, output_zipfile)

source = str(target_folder / "model.npz")
Expand Down
11 changes: 11 additions & 0 deletions utils/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
filelock==3.15.3
fsspec==2024.6.0
Jinja2==3.1.4
MarkupSafe==2.1.5
mpmath==1.3.0
networkx==3.3
numpy==2.0.0
sympy==1.12.1
torch==2.3.1
typing_extensions==4.12.2
requests==2.32.0

0 comments on commit 33b2944

Please sign in to comment.