Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions backend/prompt_studio/prompt_studio_core_v2/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime
from typing import Any

import magic
from account_v2.custom_exceptions import DuplicateData
from api_v2.models import APIDeployment
from django.db import IntegrityError
Expand Down Expand Up @@ -531,6 +532,28 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response:
file_name = (
f"{FileViewTypes.SUMMARIZE.lower()}/{filename_without_extension}.txt"
)

# For ORIGINAL view, check if a converted PDF exists for preview
if (
view_type != FileViewTypes.EXTRACT
and view_type != FileViewTypes.SUMMARIZE
and file_converter_plugin
):
converted_name = f"converted/{filename_without_extension}.pdf"
try:
contents = PromptStudioFileHelper.fetch_file_contents(
file_name=converted_name,
org_id=UserSessionUtils.get_organization_id(request),
user_id=custom_tool.created_by.user_id,
tool_id=str(custom_tool.tool_id),
allowed_content_types=allowed_content_types,
)
return Response(contents, status=status.HTTP_200_OK)
except (FileNotFoundError, FileNotFound):
pass # No converted file — fall through to return original
except Exception:
logger.exception(f"Error fetching converted file: {converted_name}")

try:
contents = PromptStudioFileHelper.fetch_file_contents(
file_name=file_name,
Expand All @@ -541,7 +564,7 @@ def fetch_contents_ide(self, request: HttpRequest, pk: Any = None) -> Response:
)
except FileNotFoundError:
raise FileNotFound()
return Response({"data": contents}, status=status.HTTP_200_OK)
return Response(contents, status=status.HTTP_200_OK)

@action(detail=True, methods=["post"])
def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response:
Expand All @@ -556,16 +579,32 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response:
# Store file
file_name = uploaded_file.name
file_data = uploaded_file
file_type = uploaded_file.content_type
# Convert non-PDF files
# Detect MIME from file content (not browser-supplied header)
file_type = magic.from_buffer(uploaded_file.read(2048), mime=True)
uploaded_file.seek(0)

if file_converter_plugin and file_type != "application/pdf":
file_converter_service = file_converter_plugin["service_class"]()
file_data, file_name = file_converter_service.process_file(
uploaded_file, file_name
)
if file_converter_service.should_convert_to_pdf(file_type):
# Convert and store in converted/ subdir for preview
converted_data, converted_name = file_converter_service.process_file(
uploaded_file, file_name
)
PromptStudioFileHelper.upload_converted_for_ide(
org_id=UserSessionUtils.get_organization_id(request),
user_id=custom_tool.created_by.user_id,
tool_id=str(custom_tool.tool_id),
file_name=converted_name,
file_data=converted_data,
)
# Reset uploaded_file for storing original in main dir
uploaded_file.seek(0)
file_data = uploaded_file
# else: CSV/TXT/Excel — file_data stays as original, no conversion

logger.info(f"Uploading file: {file_name}" if file_name else "Uploading file")

# Store original file in main dir (always the original)
PromptStudioFileHelper.upload_for_ide(
org_id=UserSessionUtils.get_organization_id(request),
user_id=custom_tool.created_by.user_id,
Expand All @@ -574,7 +613,7 @@ def upload_for_ide(self, request: HttpRequest, pk: Any = None) -> Response:
file_data=file_data,
)

# Create a record in the db for the file
# Create a record in the db for the file (document_name = original filename)
document = PromptStudioDocumentHelper.create(
tool_id=str(custom_tool.tool_id), document_name=file_name
)
Expand Down
47 changes: 45 additions & 2 deletions backend/utils/file_storage/helpers/prompt_studio_file_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def get_or_create_prompt_studio_subdirectory(
file_path = str(Path(base_path) / org_id / user_id / tool_id)
extract_file_path = str(Path(file_path) / "extract")
summarize_file_path = str(Path(file_path) / "summarize")
converted_file_path = str(Path(file_path) / "converted")
if is_create:
fs_instance = EnvHelper.get_storage(
storage_type=StorageType.PERMANENT,
Expand All @@ -46,6 +47,7 @@ def get_or_create_prompt_studio_subdirectory(
fs_instance.mkdir(file_path, create_parents=True)
fs_instance.mkdir(extract_file_path, create_parents=True)
fs_instance.mkdir(summarize_file_path, create_parents=True)
fs_instance.mkdir(converted_file_path, create_parents=True)
return str(file_path)

@staticmethod
Expand Down Expand Up @@ -81,6 +83,38 @@ def upload_for_ide(
data=file_data if isinstance(file_data, bytes) else file_data.read(),
)

@staticmethod
def upload_converted_for_ide(
org_id: str, user_id: str, tool_id: str, file_data: Any, file_name: str
) -> None:
"""Stores converted PDF in the converted/ subdirectory for preview.

Args:
org_id (str): Organization ID
user_id (str): User ID
tool_id (str): ID of the prompt studio tool
file_data (Any): File data (bytes or file-like object)
file_name (str): Name of the converted file
"""
fs_instance = EnvHelper.get_storage(
storage_type=StorageType.PERMANENT,
env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE,
)
file_system_path = (
PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory(
org_id=org_id,
is_create=True,
user_id=user_id,
tool_id=str(tool_id),
)
)
converted_path = str(Path(file_system_path) / "converted" / file_name)
fs_instance.write(
path=converted_path,
mode="wb",
data=file_data if isinstance(file_data, bytes) else file_data.read(),
)

@staticmethod
def fetch_file_contents(
org_id: str,
Expand Down Expand Up @@ -141,13 +175,22 @@ def fetch_file_contents(
encoded_string = base64.b64encode(bytes(text_content_bytes))
return {"data": encoded_string, "mime_type": file_content_type}

elif file_content_type == "text/plain":
elif file_content_type in ("text/plain", "text/csv"):
text_content_string: str = fs_instance.read(
path=file_path,
mode="r",
legacy_storage_path=legacy_file_path,
encoding="utf-8",
)
elif file_content_type in (
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel.sheet.macroenabled.12",
):
text_content_string = (
"Preview not available for Excel files. "
"Please index the document and view content in the Raw View tab."
)
# Check if the file type is in the allowed list
elif file_content_type not in allowed_content_types:
raise InvalidFileType(f"File type '{file_content_type}' is not allowed.")
Expand Down Expand Up @@ -178,7 +221,7 @@ def delete_for_ide(org_id: str, user_id: str, tool_id: str, file_name: str) -> b
# Delete the source file
fs_instance.rm(str(Path(file_system_path) / file_name))
# Delete all related files for cascade delete
directories = ["extract/", "extract/metadata/", "summarize/"]
directories = ["extract/", "extract/metadata/", "summarize/", "converted/"]
base_file_name, _ = os.path.splitext(file_name)
# Delete related files
file_paths = PromptStudioFileHelper._find_files(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {

useEffect(() => {
// Convert blob URL to an object URL
console.log("here--->", fileData);
if (fileData.blob) {
const objectUrl = URL.createObjectURL(fileData.blob);
setBlobFileUrl(objectUrl);
Expand Down Expand Up @@ -163,6 +164,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
Object.keys(viewTypes).forEach((item) => {
handleFetchContent(viewTypes[item]);
});
console.log(selectedDoc);
}, [selectedDoc]);

useEffect(() => {
Expand Down Expand Up @@ -220,7 +222,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
const handleGetDocumentsReq = (getDocsFunc, viewType) => {
getDocsFunc(details?.tool_id, selectedDoc?.document_id, viewType)
.then((res) => {
const data = res?.data?.data || "";
const data = res?.data || "";
const mimeType = res?.data?.mime_type || "";
processGetDocsResponse(data, viewType, mimeType);
})
Expand Down Expand Up @@ -250,24 +252,39 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
};

const processGetDocsResponse = (data, viewType, mimeType) => {
console.log("response-->", viewType, mimeType);
if (viewType === viewTypes.original) {
const base64String = data || "";
const blob = base64toBlobWithMime(base64String, mimeType);
setFileData({ blob, mimeType });
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onload = () => {
setFileUrl(reader.result);
};
reader.onerror = () => {
throw new Error("Fail to load the file");
};
if (mimeType === "application/pdf") {
// Existing flow: base64 → blob → PdfViewer
const base64String = data || "";
const blob = base64toBlobWithMime(base64String, mimeType);
console.log("blob-->", blob);
setFileData({ blob, mimeType });
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onload = () => {
setFileUrl(reader.result);
};
reader.onerror = () => {
setFileErrMsg("Failed to load the file");
};
} else {
// Non-PDF file (CSV, TXT, Excel, or non-convertible)
// data is text, not base64
setFileUrl("");
setFileData({ blob: null, mimeType });
// Auto-switch to Raw View for non-PDF files
setActiveKey("2");
}
} else if (viewType === viewTypes.extract) {
setExtractTxt(data?.data);
}
};

const handleGetDocsError = (err, viewType) => {
if (viewType === viewTypes.original) {
setFileData({});
}
if (err?.response?.status === 404) {
setErrorMessage(viewType);
}
Expand Down Expand Up @@ -298,7 +315,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
info: "Clicked on the 'Summary View' tab",
});
}
} catch (err) {
} catch (_err) {
// If an error occurs while setting custom posthog event, ignore it and continue
}
};
Expand Down Expand Up @@ -349,16 +366,20 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
};

const renderDoc = (docName, fileUrl, highlightData) => {
const fileType = docName?.split(".").pop().toLowerCase(); // Get the file extension
switch (fileType) {
case "pdf":
return <PdfViewer fileUrl={fileUrl} highlightData={highlightData} />;
case "txt":
case "md":
return <TextViewer fileUrl={fileUrl} />;
default:
return <div>Unsupported file type: {fileType}</div>;
// Use mimeType from response for rendering decisions
console.log(fileData);
if (fileData.mimeType === "application/pdf") {
return <PdfViewer fileUrl={fileUrl} highlightData={highlightData} />;
}
// Non-PDF: show placeholder message
return (
<div className="text-viewer-layout">
<Typography.Text type="secondary">
Document preview is not available for this file type. Please index the
document and switch to Raw View.
</Typography.Text>
</div>
);
};

return (
Expand Down Expand Up @@ -470,7 +491,10 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
<DocumentViewer
doc={selectedDoc?.document_name}
isLoading={isDocLoading}
isContentAvailable={fileUrl?.length > 0}
isContentAvailable={
fileUrl?.length > 0 ||
(fileData.mimeType && fileData.mimeType !== "application/pdf")
}
setOpenManageDocsModal={setOpenManageDocsModal}
errMsg={fileErrMsg}
>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ function ManageDocsModal({
info: "Clicked on index button",
document_name: item?.document_name,
});
} catch (err) {
} catch (_err) {
// If an error occurs while setting custom posthog event, ignore it and continue
}
};
Expand Down Expand Up @@ -559,7 +559,7 @@ function ManageDocsModal({
setPostHogCustomEvent("ps_uploaded_file", {
info: "Clicked on '+ Upload New File' button",
});
} catch (err) {
} catch (_err) {
// If an error occurs while setting custom posthog event, ignore it and continue
}

Expand Down Expand Up @@ -595,8 +595,18 @@ function ManageDocsModal({
return; // Stop further execution
}

// If the file is not a PDF, show the modal for confirmation
if (fileType !== "application/pdf") {
// File types that can be uploaded directly without conversion
const DIRECT_UPLOAD_TYPES = new Set([
"application/pdf",
"text/plain",
"text/csv",
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel.sheet.macroenabled.12",
]);

if (!DIRECT_UPLOAD_TYPES.has(fileType)) {
// Non-direct types: show ConfirmMultiDoc modal or error
if (!ConfirmMultiDoc) {
setAlertDetails({
type: "error",
Expand All @@ -606,7 +616,7 @@ function ManageDocsModal({
setFileToUpload(file); // Store the file to be uploaded
setIsModalVisible(true); // Show the modal
} else {
// If the file is a PDF, proceed with the upload immediately
// PDF, CSV, TXT, Excel — proceed with the upload immediately
resolve(file);
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) {
setIsDocLoading(true);
try {
const res = await axiosPrivate.get(fileUrlEndpoint);
const base64String = res?.data?.data?.data || "";
const base64String = res?.data?.data || "";
const blob = base64toBlob(base64String);
setFileUrl(URL.createObjectURL(blob));
} catch (err) {
Expand Down
Loading