diff --git a/backend/prompt_studio/prompt_studio_core_v2/views.py b/backend/prompt_studio/prompt_studio_core_v2/views.py index 5e1f0d2a3f..6f447b51e5 100644 --- a/backend/prompt_studio/prompt_studio_core_v2/views.py +++ b/backend/prompt_studio/prompt_studio_core_v2/views.py @@ -4,6 +4,7 @@ from datetime import datetime from typing import Any +import magic from account_v2.custom_exceptions import DuplicateData from api_v2.models import APIDeployment from django.db import IntegrityError @@ -531,6 +532,28 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response: file_name = ( f"{FileViewTypes.SUMMARIZE.lower()}/{filename_without_extension}.txt" ) + + # For ORIGINAL view, check if a converted PDF exists for preview + if ( + view_type != FileViewTypes.EXTRACT + and view_type != FileViewTypes.SUMMARIZE + and file_converter_plugin + ): + converted_name = f"converted/{filename_without_extension}.pdf" + try: + contents = PromptStudioFileHelper.fetch_file_contents( + file_name=converted_name, + org_id=UserSessionUtils.get_organization_id(request), + user_id=custom_tool.created_by.user_id, + tool_id=str(custom_tool.tool_id), + allowed_content_types=allowed_content_types, + ) + return Response(contents, status=status.HTTP_200_OK) + except (FileNotFoundError, FileNotFound): + pass # No converted file — fall through to return original + except Exception: + logger.exception(f"Error fetching converted file: {converted_name}") + try: contents = PromptStudioFileHelper.fetch_file_contents( file_name=file_name, @@ -541,7 +564,7 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response: ) except FileNotFoundError: raise FileNotFound() - return Response({"data": contents}, status=status.HTTP_200_OK) + return Response(contents, status=status.HTTP_200_OK) @action(detail=True, methods=["post"]) def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response: @@ -556,16 +579,32 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response: # Store file file_name = uploaded_file.name file_data = uploaded_file - file_type = uploaded_file.content_type - # Convert non-PDF files + # Detect MIME from file content (not browser-supplied header) + file_type = magic.from_buffer(uploaded_file.read(2048), mime=True) + uploaded_file.seek(0) + if file_converter_plugin and file_type != "application/pdf": file_converter_service = file_converter_plugin["service_class"]() - file_data, file_name = file_converter_service.process_file( - uploaded_file, file_name - ) + if file_converter_service.should_convert_to_pdf(file_type): + # Convert and store in converted/ subdir for preview + converted_data, converted_name = file_converter_service.process_file( + uploaded_file, file_name + ) + PromptStudioFileHelper.upload_converted_for_ide( + org_id=UserSessionUtils.get_organization_id(request), + user_id=custom_tool.created_by.user_id, + tool_id=str(custom_tool.tool_id), + file_name=converted_name, + file_data=converted_data, + ) + # Reset uploaded_file for storing original in main dir + uploaded_file.seek(0) + file_data = uploaded_file + # else: CSV/TXT/Excel — file_data stays as original, no conversion logger.info(f"Uploading file: {file_name}" if file_name else "Uploading file") + # Store original file in main dir (always the original) PromptStudioFileHelper.upload_for_ide( org_id=UserSessionUtils.get_organization_id(request), user_id=custom_tool.created_by.user_id, @@ -574,7 +613,7 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response: file_data=file_data, ) - # Create a record in the db for the file + # Create a record in the db for the file (document_name = original filename) document = PromptStudioDocumentHelper.create( tool_id=str(custom_tool.tool_id), document_name=file_name ) diff --git a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py index 2d1d990673..23a05633fd 100644 --- a/backend/utils/file_storage/helpers/prompt_studio_file_helper.py +++ b/backend/utils/file_storage/helpers/prompt_studio_file_helper.py @@ -38,6 +38,7 @@ def get_or_create_prompt_studio_subdirectory( file_path = str(Path(base_path) / org_id / user_id / tool_id) extract_file_path = str(Path(file_path) / "extract") summarize_file_path = str(Path(file_path) / "summarize") + converted_file_path = str(Path(file_path) / "converted") if is_create: fs_instance = EnvHelper.get_storage( storage_type=StorageType.PERMANENT, @@ -46,6 +47,7 @@ def get_or_create_prompt_studio_subdirectory( fs_instance.mkdir(file_path, create_parents=True) fs_instance.mkdir(extract_file_path, create_parents=True) fs_instance.mkdir(summarize_file_path, create_parents=True) + fs_instance.mkdir(converted_file_path, create_parents=True) return str(file_path) @staticmethod @@ -81,6 +83,38 @@ def upload_for_ide( data=file_data if isinstance(file_data, bytes) else file_data.read(), ) + @staticmethod + def upload_converted_for_ide( + org_id: str, user_id: str, tool_id: str, file_data: Any, file_name: str + ) -> None: + """Stores converted PDF in the converted/ subdirectory for preview. + + Args: + org_id (str): Organization ID + user_id (str): User ID + tool_id (str): ID of the prompt studio tool + file_data (Any): File data (bytes or file-like object) + file_name (str): Name of the converted file + """ + fs_instance = EnvHelper.get_storage( + storage_type=StorageType.PERMANENT, + env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE, + ) + file_system_path = ( + PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory( + org_id=org_id, + is_create=True, + user_id=user_id, + tool_id=str(tool_id), + ) + ) + converted_path = str(Path(file_system_path) / "converted" / file_name) + fs_instance.write( + path=converted_path, + mode="wb", + data=file_data if isinstance(file_data, bytes) else file_data.read(), + ) + @staticmethod def fetch_file_contents( org_id: str, @@ -141,13 +175,22 @@ def fetch_file_contents( encoded_string = base64.b64encode(bytes(text_content_bytes)) return {"data": encoded_string, "mime_type": file_content_type} - elif file_content_type == "text/plain": + elif file_content_type in ("text/plain", "text/csv"): text_content_string: str = fs_instance.read( path=file_path, mode="r", legacy_storage_path=legacy_file_path, encoding="utf-8", ) + elif file_content_type in ( + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel.sheet.macroenabled.12", + ): + text_content_string = ( + "Preview not available for Excel files. " + "Please index the document and view content in the Raw View tab." + ) # Check if the file type is in the allowed list elif file_content_type not in allowed_content_types: raise InvalidFileType(f"File type '{file_content_type}' is not allowed.") @@ -178,7 +221,7 @@ def delete_for_ide(org_id: str, user_id: str, tool_id: str, file_name: str) -> b # Delete the source file fs_instance.rm(str(Path(file_system_path) / file_name)) # Delete all related files for cascade delete - directories = ["extract/", "extract/metadata/", "summarize/"] + directories = ["extract/", "extract/metadata/", "summarize/", "converted/"] base_file_name, _ = os.path.splitext(file_name) # Delete related files file_paths = PromptStudioFileHelper._find_files( diff --git a/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx b/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx index f4808e46e0..9afe20eb29 100644 --- a/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx +++ b/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx @@ -116,6 +116,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { useEffect(() => { // Convert blob URL to an object URL + console.log("here--->", fileData); if (fileData.blob) { const objectUrl = URL.createObjectURL(fileData.blob); setBlobFileUrl(objectUrl); @@ -163,6 +164,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { Object.keys(viewTypes).forEach((item) => { handleFetchContent(viewTypes[item]); }); + console.log(selectedDoc); }, [selectedDoc]); useEffect(() => { @@ -220,7 +222,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { const handleGetDocumentsReq = (getDocsFunc, viewType) => { getDocsFunc(details?.tool_id, selectedDoc?.document_id, viewType) .then((res) => { - const data = res?.data?.data || ""; + const data = res?.data || ""; const mimeType = res?.data?.mime_type || ""; processGetDocsResponse(data, viewType, mimeType); }) @@ -250,24 +252,39 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { }; const processGetDocsResponse = (data, viewType, mimeType) => { + console.log("response-->", viewType, mimeType); if (viewType === viewTypes.original) { - const base64String = data || ""; - const blob = base64toBlobWithMime(base64String, mimeType); - setFileData({ blob, mimeType }); - const reader = new FileReader(); - reader.readAsDataURL(blob); - reader.onload = () => { - setFileUrl(reader.result); - }; - reader.onerror = () => { - throw new Error("Fail to load the file"); - }; + if (mimeType === "application/pdf") { + // Existing flow: base64 → blob → PdfViewer + const base64String = data || ""; + const blob = base64toBlobWithMime(base64String, mimeType); + console.log("blob-->", blob); + setFileData({ blob, mimeType }); + const reader = new FileReader(); + reader.readAsDataURL(blob); + reader.onload = () => { + setFileUrl(reader.result); + }; + reader.onerror = () => { + setFileErrMsg("Failed to load the file"); + }; + } else { + // Non-PDF file (CSV, TXT, Excel, or non-convertible) + // data is text, not base64 + setFileUrl(""); + setFileData({ blob: null, mimeType }); + // Auto-switch to Raw View for non-PDF files + setActiveKey("2"); + } } else if (viewType === viewTypes.extract) { setExtractTxt(data?.data); } }; const handleGetDocsError = (err, viewType) => { + if (viewType === viewTypes.original) { + setFileData({}); + } if (err?.response?.status === 404) { setErrorMessage(viewType); } @@ -298,7 +315,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { info: "Clicked on the 'Summary View' tab", }); } - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; @@ -349,16 +366,20 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { }; const renderDoc = (docName, fileUrl, highlightData) => { - const fileType = docName?.split(".").pop().toLowerCase(); // Get the file extension - switch (fileType) { - case "pdf": - return ; - case "txt": - case "md": - return ; - default: - return
Unsupported file type: {fileType}
; + // Use mimeType from response for rendering decisions + console.log(fileData); + if (fileData.mimeType === "application/pdf") { + return ; } + // Non-PDF: show placeholder message + return ( +
+ + Document preview is not available for this file type. Please index the + document and switch to Raw View. + +
+ ); }; return ( @@ -470,7 +491,10 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) { 0} + isContentAvailable={ + fileUrl?.length > 0 || + (fileData.mimeType && fileData.mimeType !== "application/pdf") + } setOpenManageDocsModal={setOpenManageDocsModal} errMsg={fileErrMsg} > diff --git a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx index c77788b799..29d7d533e2 100644 --- a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx +++ b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx @@ -459,7 +459,7 @@ function ManageDocsModal({ info: "Clicked on index button", document_name: item?.document_name, }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } }; @@ -559,7 +559,7 @@ function ManageDocsModal({ setPostHogCustomEvent("ps_uploaded_file", { info: "Clicked on '+ Upload New File' button", }); - } catch (err) { + } catch (_err) { // If an error occurs while setting custom posthog event, ignore it and continue } @@ -595,8 +595,18 @@ function ManageDocsModal({ return; // Stop further execution } - // If the file is not a PDF, show the modal for confirmation - if (fileType !== "application/pdf") { + // File types that can be uploaded directly without conversion + const DIRECT_UPLOAD_TYPES = new Set([ + "application/pdf", + "text/plain", + "text/csv", + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel.sheet.macroenabled.12", + ]); + + if (!DIRECT_UPLOAD_TYPES.has(fileType)) { + // Non-direct types: show ConfirmMultiDoc modal or error if (!ConfirmMultiDoc) { setAlertDetails({ type: "error", @@ -606,7 +616,7 @@ function ManageDocsModal({ setFileToUpload(file); // Store the file to be uploaded setIsModalVisible(true); // Show the modal } else { - // If the file is a PDF, proceed with the upload immediately + // PDF, CSV, TXT, Excel — proceed with the upload immediately resolve(file); } }; diff --git a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx index 3de0fedf36..727701139f 100644 --- a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx +++ b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx @@ -57,7 +57,7 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) { setIsDocLoading(true); try { const res = await axiosPrivate.get(fileUrlEndpoint); - const base64String = res?.data?.data?.data || ""; + const base64String = res?.data?.data || ""; const blob = base64toBlob(base64String); setFileUrl(URL.createObjectURL(blob)); } catch (err) {