Skip to content

Commit

Permalink
fix the ssrf of docx file extractor external images
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnJyong committed Nov 4, 2024
1 parent 8ab05d4 commit c135ec4
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion api/core/rag/extractor/word_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from docx import Document as DocxDocument

from configs import dify_config
from core.helper import ssrf_proxy
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
from extensions.ext_database import db
Expand Down Expand Up @@ -86,7 +87,7 @@ def _extract_images_from_docx(self, doc, image_folder):
image_count += 1
if rel.is_external:
url = rel.reltype
response = requests.get(url, stream=True)
response = ssrf_proxy.get(url, stream=True)
if response.status_code == 200:
image_ext = mimetypes.guess_extension(response.headers["Content-Type"])
file_uuid = str(uuid.uuid4())
Expand Down

0 comments on commit c135ec4

Please sign in to comment.