diff --git a/poetry.lock b/poetry.lock index dcba2b344e..e6f98014b3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -419,7 +419,7 @@ test = ["pytest (>=6.2.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pr [[package]] name = "datasets" -version = "2.0.0" +version = "2.1.0" description = "HuggingFace community-driven open-source library of datasets" category = "main" optional = false @@ -446,13 +446,13 @@ xxhash = "*" apache-beam = ["apache-beam (>=2.26.0)"] audio = ["librosa"] benchmarks = ["numpy (==1.18.5)", "tensorflow (==2.3.0)", "torch (==1.6.0)", "transformers (==3.0.2)"] -dev = ["absl-py", "pytest", "pytest-datadir", "pytest-xdist", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "aiobotocore", "boto3", "botocore", "faiss-cpu (>=1.6.4)", "fsspec", "moto[s3,server] (==2.0.4)", "rarfile (>=4.0)", "s3fs (==2021.08.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "torch", "torchaudio", "soundfile", "transformers", "bs4", "conllu", "h5py", "langdetect", "lxml", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "tldextract", "zstandard", "bert-score (>=0.3.6)", "rouge-score", "sacrebleu", "scipy", "seqeval", "scikit-learn", "jiwer", "sentencepiece", "torchmetrics (==0.6.0)", "mauve-text", "toml (>=0.10.1)", "requests-file (>=1.5.1)", "tldextract (>=3.1.0)", "texttable (>=1.6.3)", "Werkzeug (>=1.0.1)", "six (>=1.15.0,<1.16.0)", "Pillow (>=6.2.1)", "librosa", "wget (>=3.2)", "pytorch-nlp (==0.5.0)", "pytorch-lightning", "fastBPE (==0.1.0)", "fairseq", "black (>=22.0,<23.0)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "pyyaml (>=5.3.1)", "importlib-resources"] +dev = ["absl-py", "pytest", "pytest-datadir", "pytest-xdist", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "aiobotocore", "boto3", "botocore", "faiss-cpu (>=1.6.4)", "fsspec", "moto[server,s3] (==2.0.4)", "rarfile (>=4.0)", "s3fs (==2021.08.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "torch", "torchaudio", "soundfile", "transformers", "bs4", "conllu", "h5py", "langdetect", "lxml", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "tldextract", "zstandard", "bert-score (>=0.3.6)", "rouge-score", "sacrebleu", "scipy", "seqeval", "scikit-learn", "jiwer", "sentencepiece", "mauve-text", "toml (>=0.10.1)", "requests-file (>=1.5.1)", "tldextract (>=3.1.0)", "texttable (>=1.6.3)", "Werkzeug (>=1.0.1)", "six (>=1.15.0,<1.16.0)", "Pillow (>=6.2.1)", "librosa", "black (>=22.0,<23.0)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "pyyaml (>=5.3.1)", "importlib-resources"] docs = ["s3fs"] quality = ["black (>=22.0,<23.0)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "pyyaml (>=5.3.1)"] s3 = ["fsspec", "boto3", "botocore", "s3fs"] tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)"] tensorflow_gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"] -tests = ["absl-py", "pytest", "pytest-datadir", "pytest-xdist", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "aiobotocore", "boto3", "botocore", "faiss-cpu (>=1.6.4)", "fsspec", "moto[s3,server] (==2.0.4)", "rarfile (>=4.0)", "s3fs (==2021.08.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "torch", "torchaudio", "soundfile", "transformers", "bs4", "conllu", "h5py", "langdetect", "lxml", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "tldextract", "zstandard", "bert-score (>=0.3.6)", "rouge-score", "sacrebleu", "scipy", "seqeval", "scikit-learn", "jiwer", "sentencepiece", "torchmetrics (==0.6.0)", "mauve-text", "toml (>=0.10.1)", "requests-file (>=1.5.1)", "tldextract (>=3.1.0)", "texttable (>=1.6.3)", "Werkzeug (>=1.0.1)", "six (>=1.15.0,<1.16.0)", "Pillow (>=6.2.1)", "librosa", "wget (>=3.2)", "pytorch-nlp (==0.5.0)", "pytorch-lightning", "fastBPE (==0.1.0)", "fairseq", "importlib-resources"] +tests = ["absl-py", "pytest", "pytest-datadir", "pytest-xdist", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "aiobotocore", "boto3", "botocore", "faiss-cpu (>=1.6.4)", "fsspec", "moto[server,s3] (==2.0.4)", "rarfile (>=4.0)", "s3fs (==2021.08.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "torch", "torchaudio", "soundfile", "transformers", "bs4", "conllu", "h5py", "langdetect", "lxml", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "tldextract", "zstandard", "bert-score (>=0.3.6)", "rouge-score", "sacrebleu", "scipy", "seqeval", "scikit-learn", "jiwer", "sentencepiece", "mauve-text", "toml (>=0.10.1)", "requests-file (>=1.5.1)", "tldextract (>=3.1.0)", "texttable (>=1.6.3)", "Werkzeug (>=1.0.1)", "six (>=1.15.0,<1.16.0)", "Pillow (>=6.2.1)", "librosa", "importlib-resources"] torch = ["torch"] vision = ["Pillow (>=6.2.1)"] @@ -2560,7 +2560,7 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "1.1" python-versions = "3.9.6" -content-hash = "dd82aacae83a234b5bcafe7298a54b4df9b1f20494e4209b34f935b9a07383a7" +content-hash = "b6474f6d8508bf4aa84ef54a373d0e4912880d6f6932a6d6a27bec2bcd25bb61" [metadata.files] absl-py = [ @@ -3050,8 +3050,8 @@ cryptography = [ {file = "cryptography-36.0.1.tar.gz", hash = "sha256:53e5c1dc3d7a953de055d77bef2ff607ceef7a2aac0353b5d630ab67f7423638"}, ] datasets = [ - {file = "datasets-2.0.0-py3-none-any.whl", hash = "sha256:6219d3674ebfbd6978f2f27f7db89aabdac6f7392efda735b9e005b5d87d3c76"}, - {file = "datasets-2.0.0.tar.gz", hash = "sha256:c93db6b39e5dda72b093d6f11a05945f588e7c5caabb93a0ac4bdf07e0d0ac1a"}, + {file = "datasets-2.1.0-py3-none-any.whl", hash = "sha256:7c186041abb980066bff4482037f41e302741bad4709885b2417569f2f6bac3a"}, + {file = "datasets-2.1.0.tar.gz", hash = "sha256:d2bf81085a07ccbf739a92de9da41822f2979d4352dfdcadb3ec35eeef6cd1ef"}, ] decorator = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, diff --git a/pyproject.toml b/pyproject.toml index 3e18ea7d79..db27a41a4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ apache-beam = "^2.33.0" appdirs = "^1.4.4" bs4 = "^0.0.1" conllu = "^4.4.1" -datasets = { extras = ["audio", "vision"], version = "^2.0.0" } +datasets = { extras = ["audio", "vision"], version = "^2.1.0" } diskcache = "^5.2.1" function-parser = "^0.0.3" gdown = "^4.2.0" diff --git a/tests/models/test_row.py b/tests/models/test_row.py index 0f885a319f..7b7ed19f2a 100644 --- a/tests/models/test_row.py +++ b/tests/models/test_row.py @@ -63,14 +63,3 @@ def test_audio_dataset() -> None: rows = get_rows("abidlabs/test-audio-1", "test", "train", rows_max_number=ROWS_MAX_NUMBER) assert len(rows) == 1 assert rows[0]["Output"]["sampling_rate"] == 48000 - - -def test_libsndfile() -> None: - # see https://github.com/huggingface/datasets-preview-backend/issues/194 - rows = get_rows("polinaeterna/ml_spoken_words", "ar_opus", "train", rows_max_number=ROWS_MAX_NUMBER) - assert len(rows) == ROWS_MAX_NUMBER - assert rows[0]["audio"]["sampling_rate"] == 48000 - - rows = get_rows("polinaeterna/ml_spoken_words", "ar_wav", "train", rows_max_number=ROWS_MAX_NUMBER) - assert len(rows) == ROWS_MAX_NUMBER - assert rows[0]["audio"]["sampling_rate"] == 16000