You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
No matter how I use it, it will report raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
Screenshots
Environment Info
Traceback (most recent call last):
File "D:\pythonprojects\LANGCHAIN\main.py", line 87, in
elements = partition_pdf("D:\pythonprojects\LANGCHAIN\inputs\智能传感器装配调试台架-产品手册.pdf")
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\documents\elements.py", line 581, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\file_utils\filetype.py", line 725, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\file_utils\filetype.py", line 683, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\chunking\dispatch.py", line 74, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\pdf.py", line 209, in partition_pdf
return partition_pdf_or_image(
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\pdf.py", line 350, in partition_pdf_or_image
out_elements = _process_uncategorized_text_elements(elements)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\pdf.py", line 930, in _process_uncategorized_text_elements
new_el = element_from_text(cast(Text, el).text)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text.py", line 149, in element_from_text
elif is_possible_narrative_text(text):
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text_type.py", line 74, in is_possible_narrative_text
if exceeds_cap_ratio(text, threshold=cap_threshold):
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text_type.py", line 270, in exceeds_cap_ratio
if sentence_count(text, 3) > 1:
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text_type.py", line 219, in sentence_count
sentences = sent_tokenize(text)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\nlp\tokenize.py", line 56, in sent_tokenize
_download_nltk_packages_if_not_present()
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\nlp\tokenize.py", line 41, in _download_nltk_packages_if_not_present
tagger_available = check_for_nltk_package(
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\nlp\tokenize.py", line 29, in check_for_nltk_package
nltk.find(f"{package_category}/{package_name}", paths=paths)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 551, in find
return find(modified_name, paths)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 538, in find
return ZipFilePathPointer(p, zipentry)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 391, in init
zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 1020, in init
zipfile.ZipFile.init(self, filename)
File "D:\miniconda\envs\LANGCHAIN\lib\zipfile.py", line 1268, in init
self._RealGetContents()
File "D:\miniconda\envs\LANGCHAIN\lib\zipfile.py", line 1335, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
The text was updated successfully, but these errors were encountered:
Describe the bug
No matter how I use it, it will report raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
Screenshots
Environment Info
Traceback (most recent call last):
File "D:\pythonprojects\LANGCHAIN\main.py", line 87, in
elements = partition_pdf("D:\pythonprojects\LANGCHAIN\inputs\智能传感器装配调试台架-产品手册.pdf")
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\documents\elements.py", line 581, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\file_utils\filetype.py", line 725, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\file_utils\filetype.py", line 683, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\chunking\dispatch.py", line 74, in wrapper
elements = func(*args, **kwargs)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\pdf.py", line 209, in partition_pdf
return partition_pdf_or_image(
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\pdf.py", line 350, in partition_pdf_or_image
out_elements = _process_uncategorized_text_elements(elements)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\pdf.py", line 930, in _process_uncategorized_text_elements
new_el = element_from_text(cast(Text, el).text)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text.py", line 149, in element_from_text
elif is_possible_narrative_text(text):
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text_type.py", line 74, in is_possible_narrative_text
if exceeds_cap_ratio(text, threshold=cap_threshold):
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text_type.py", line 270, in exceeds_cap_ratio
if sentence_count(text, 3) > 1:
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\partition\text_type.py", line 219, in sentence_count
sentences = sent_tokenize(text)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\nlp\tokenize.py", line 56, in sent_tokenize
_download_nltk_packages_if_not_present()
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\nlp\tokenize.py", line 41, in _download_nltk_packages_if_not_present
tagger_available = check_for_nltk_package(
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\unstructured\nlp\tokenize.py", line 29, in check_for_nltk_package
nltk.find(f"{package_category}/{package_name}", paths=paths)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 551, in find
return find(modified_name, paths)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 538, in find
return ZipFilePathPointer(p, zipentry)
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 391, in init
zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
File "D:\miniconda\envs\LANGCHAIN\lib\site-packages\nltk\data.py", line 1020, in init
zipfile.ZipFile.init(self, filename)
File "D:\miniconda\envs\LANGCHAIN\lib\zipfile.py", line 1268, in init
self._RealGetContents()
File "D:\miniconda\envs\LANGCHAIN\lib\zipfile.py", line 1335, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
The text was updated successfully, but these errors were encountered: