Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: multi doc support prompt studio #729

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 6 additions & 12 deletions backend/file_management/file_management_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,6 @@
from unstract.connectors.filesystems import connectors as fs_connectors
from unstract.connectors.filesystems.unstract_file_system import UnstractFileSystem

try:
from plugins.processor.file_converter.constants import (
ExtentedFileInformationKey as FileKey,
)
except ImportError:
from file_management.constants import FileInformationKey as FileKey

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -153,12 +146,13 @@ def upload_file(

@staticmethod
@deprecated(reason="Use remote FS APIs from SDK")
jagadeeswaran-zipstack marked this conversation as resolved.
Show resolved Hide resolved
def fetch_file_contents(file_system: UnstractFileSystem, file_path: str) -> Any:
def fetch_file_contents(
file_system: UnstractFileSystem,
file_path: str,
allowed_content_types: list[str],
) -> Any:
fs = file_system.get_fsspec_fs()

# Define allowed content types
allowed_content_types = FileKey.FILE_UPLOAD_ALLOWED_MIME

try:
file_info = fs.info(file_path)
except FileNotFoundError:
Expand Down Expand Up @@ -198,7 +192,7 @@ def fetch_file_contents(file_system: UnstractFileSystem, file_path: str) -> Any:
data = file.read()

else:
raise InvalidFileType(f"File type '{file_content_type}' is not handled.")
logger.warning(f"File type '{file_content_type}' is not handled.")

return {"data": data, "mime_type": file_content_type}

Expand Down
22 changes: 7 additions & 15 deletions backend/prompt_studio/prompt_studio_core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,51 +393,43 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response:
file_name: str = document.document_name
view_type: str = serializer.validated_data.get("view_type")

# Extract filename without extension
filename_without_extension = file_name.rsplit(".", 1)[0]

# Handle view_type logic, always converting to .txt for EXTRACT and SUMMARIZE
if view_type == FileViewTypes.EXTRACT:
file_name = (
f"{FileViewTypes.EXTRACT.lower()}/" f"{filename_without_extension}.txt"
)
elif view_type == FileViewTypes.SUMMARIZE:
if view_type == FileViewTypes.SUMMARIZE:
chandrasekharan-zipstack marked this conversation as resolved.
Show resolved Hide resolved
file_name = (
f"{FileViewTypes.SUMMARIZE.lower()}/"
f"{filename_without_extension}.txt"
)

file_path = FileManagerHelper.handle_sub_directory_for_tenants(
file_path = file_path = FileManagerHelper.handle_sub_directory_for_tenants(
chandrasekharan-zipstack marked this conversation as resolved.
Show resolved Hide resolved
UserSessionUtils.get_organization_id(request),
is_create=True,
user_id=custom_tool.created_by.user_id,
tool_id=str(custom_tool.tool_id),
)

# Ensure file path formatting
file_system = LocalStorageFS(settings={"path": file_path})
if not file_path.endswith("/"):
file_path += "/"
file_path += file_name

file_system = LocalStorageFS(settings={"path": file_path})

# Handle file content retrieval
# Temporary Hack for frictionless onboarding as the user id will be empty
try:
contents = FileManagerHelper.fetch_file_contents(file_system, file_path)
except FileNotFound:
# Retry with empty user_id
file_path = FileManagerHelper.handle_sub_directory_for_tenants(
file_path = file_path = FileManagerHelper.handle_sub_directory_for_tenants(
UserSessionUtils.get_organization_id(request),
is_create=True,
user_id="",
tool_id=str(custom_tool.tool_id),
)
if not file_path.endswith("/"):
file_path += "/"
file_path += file_name
file_path += file_name
contents = FileManagerHelper.fetch_file_contents(file_system, file_path)

return Response(contents, status=status.HTTP_200_OK)
return Response({"data": contents}, status=status.HTTP_200_OK)

@action(detail=True, methods=["post"])
def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response:
Expand Down
21 changes: 14 additions & 7 deletions backend/prompt_studio/prompt_studio_core_v2/views.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import io
import logging
import os
import uuid
from typing import Any, Optional

from account_v2.custom_exceptions import DuplicateData
from django.db import IntegrityError
from django.db.models import QuerySet
from django.http import HttpRequest
from file_management.constants import FileInformationKey as FileKey
from file_management.exceptions import FileNotFound
from file_management.file_management_helper import FileManagerHelper
from permissions.permission import IsOwner, IsOwnerOrSharedUser
Expand Down Expand Up @@ -398,7 +397,14 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response:
document: DocumentManager = DocumentManager.objects.get(pk=document_id)
file_name: str = document.document_name
view_type: str = serializer.validated_data.get("view_type")
file_converter = get_plugin_class_by_name(
name="file_converter",
plugins=self.processor_plugins,
)

allowed_content_types = FileKey.FILE_UPLOAD_ALLOWED_MIME
if file_converter:
allowed_content_types = file_converter.get_extented_file_information_key()
filename_without_extension = file_name.rsplit(".", 1)[0]
if view_type == FileViewTypes.EXTRACT:
file_name = (
Expand All @@ -424,7 +430,9 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response:
file_path += file_name
# Temporary Hack for frictionless onboarding as the user id will be empty
try:
contents = FileManagerHelper.fetch_file_contents(file_system, file_path)
contents = FileManagerHelper.fetch_file_contents(
file_system, file_path, allowed_content_types
)
except FileNotFound:
file_path = file_path = (
FileManagerHelper.handle_sub_directory_for_tenants(
Expand Down Expand Up @@ -467,10 +475,9 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response:
file_type = uploaded_file.content_type
# Convert non-PDF files
if file_converter and file_type != "application/pdf":
file_data_bytes = uploaded_file.read()
with io.BytesIO(file_data_bytes) as file_stream:
file_data = file_converter.convert_to_pdf(file_stream, file_name)
file_name = f"{os.path.splitext(file_name)[0]}.pdf"
file_data, file_name = file_converter.process_file(
uploaded_file, file_name
)

logger.info(
f"Uploading file: {file_name}" if file_name else "Uploading file"
Expand Down
Loading