Skip to content

Commit c71a831

Browse files
committed
Merge branch 'main' into hosted-gpu
2 parents d6c24e8 + ecbcbc8 commit c71a831

File tree

9 files changed

+134
-229
lines changed

9 files changed

+134
-229
lines changed

core/database/folder_bootstrap.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ async def bootstrap_folder_hierarchy(engine: AsyncEngine, logger) -> None:
1919
("folders", "parent_id"),
2020
("folders", "depth"),
2121
("documents", "folder_path"),
22+
("documents", "folder_id"),
2223
("graphs", "folder_path"),
2324
}
2425
required_indexes = {
@@ -28,7 +29,9 @@ async def bootstrap_folder_hierarchy(engine: AsyncEngine, logger) -> None:
2829
"uq_folders_app_full_path",
2930
"uq_folders_owner_full_path",
3031
"idx_doc_folder_path",
32+
"idx_doc_folder_id",
3133
"idx_documents_app_folder_path",
34+
"idx_documents_app_folder_id",
3235
"idx_graph_folder_path",
3336
"idx_graphs_app_folder_path",
3437
}
@@ -41,7 +44,7 @@ async def bootstrap_folder_hierarchy(engine: AsyncEngine, logger) -> None:
4144
SELECT table_name, column_name
4245
FROM information_schema.columns
4346
WHERE table_name IN ('folders', 'documents', 'graphs')
44-
AND column_name IN ('full_path', 'parent_id', 'depth', 'folder_path')
47+
AND column_name IN ('full_path', 'parent_id', 'depth', 'folder_path', 'folder_id')
4548
"""
4649
)
4750
)
@@ -60,7 +63,9 @@ async def bootstrap_folder_hierarchy(engine: AsyncEngine, logger) -> None:
6063
'uq_folders_app_full_path',
6164
'uq_folders_owner_full_path',
6265
'idx_doc_folder_path',
66+
'idx_doc_folder_id',
6367
'idx_documents_app_folder_path',
68+
'idx_documents_app_folder_id',
6469
'idx_graph_folder_path',
6570
'idx_graphs_app_folder_path'
6671
)
@@ -102,6 +107,7 @@ async def bootstrap_folder_hierarchy(engine: AsyncEngine, logger) -> None:
102107
"ALTER TABLE folders ADD COLUMN IF NOT EXISTS parent_id TEXT",
103108
"ALTER TABLE folders ADD COLUMN IF NOT EXISTS depth INTEGER",
104109
"ALTER TABLE documents ADD COLUMN IF NOT EXISTS folder_path TEXT",
110+
"ALTER TABLE documents ADD COLUMN IF NOT EXISTS folder_id TEXT",
105111
"ALTER TABLE graphs ADD COLUMN IF NOT EXISTS folder_path TEXT",
106112
]
107113

@@ -118,7 +124,9 @@ async def bootstrap_folder_hierarchy(engine: AsyncEngine, logger) -> None:
118124
"ON folders (owner_id, full_path) WHERE app_id IS NULL"
119125
),
120126
"CREATE INDEX IF NOT EXISTS idx_doc_folder_path ON documents (folder_path)",
127+
"CREATE INDEX IF NOT EXISTS idx_doc_folder_id ON documents (folder_id)",
121128
"CREATE INDEX IF NOT EXISTS idx_documents_app_folder_path ON documents (app_id, folder_path)",
129+
"CREATE INDEX IF NOT EXISTS idx_documents_app_folder_id ON documents (app_id, folder_id)",
122130
"CREATE INDEX IF NOT EXISTS idx_graph_folder_path ON graphs (folder_path)",
123131
"CREATE INDEX IF NOT EXISTS idx_graphs_app_folder_path ON graphs (app_id, folder_path)",
124132
]

core/database/postgres_database.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class DocumentModel(Base):
4949
app_id = Column(String)
5050
folder_name = Column(String)
5151
folder_path = Column(String)
52+
folder_id = Column(String)
5253
end_user_id = Column(String)
5354

5455
# Create indexes
@@ -59,12 +60,14 @@ class DocumentModel(Base):
5960
Index("idx_doc_app_id", "app_id"),
6061
Index("idx_doc_folder_name", "folder_name"),
6162
Index("idx_doc_folder_path", "folder_path"),
63+
Index("idx_doc_folder_id", "folder_id"),
6264
Index("idx_doc_end_user_id", "end_user_id"),
6365
Index("idx_doc_owner_id", "owner_id"),
6466
# Composite indexes for common query patterns
6567
Index("idx_documents_owner_app", "owner_id", "app_id"),
6668
Index("idx_documents_app_folder", "app_id", "folder_name"),
6769
Index("idx_documents_app_folder_path", "app_id", "folder_path"),
70+
Index("idx_documents_app_folder_id", "app_id", "folder_id"),
6871
Index("idx_documents_app_end_user", "app_id", "end_user_id"),
6972
)
7073

@@ -368,8 +371,10 @@ async def store_document(self, document: Document, auth: AuthContext) -> bool:
368371
normalized_metadata, normalized_types = normalize_metadata(metadata, metadata_type_hints)
369372
doc_dict["doc_metadata"] = normalized_metadata
370373
doc_dict["metadata_types"] = normalized_types
371-
# Mirror folder_name into doc_metadata for convenience in downstream filters (allow clearing)
372-
doc_dict["doc_metadata"]["folder_name"] = doc_dict.get("folder_name")
374+
# Mirror folder path into doc_metadata for convenience in downstream filters (allow clearing)
375+
path_for_metadata = doc_dict.get("folder_path") or doc_dict.get("folder_name")
376+
doc_dict["doc_metadata"]["folder_name"] = path_for_metadata
377+
doc_dict["folder_id"] = doc_dict.get("folder_id")
373378

374379
# Keep folder_path in sync with folder_name for backward compatibility
375380
folder_name_value = doc_dict.get("folder_name")
@@ -838,7 +843,13 @@ async def update_document(self, document_id: str, updates: Dict[str, Any], auth:
838843

839844
# Keep doc_metadata.folder_name in sync with the flattened column (support clearing)
840845
if "doc_metadata" in updates:
841-
folder_value = folder_value_for_metadata if "folder_name" in updates else doc_model.folder_name
846+
folder_value = updates.get("folder_path")
847+
if folder_value is None:
848+
folder_value = (
849+
folder_value_for_metadata if "folder_name" in updates else doc_model.folder_path
850+
)
851+
if folder_value is None:
852+
folder_value = doc_model.folder_name
842853
try:
843854
if isinstance(updates["doc_metadata"], dict):
844855
updates["doc_metadata"]["folder_name"] = folder_value
@@ -1256,6 +1267,7 @@ def _document_model_to_dict(self, doc_model) -> Dict[str, Any]:
12561267
# Include flattened fields
12571268
"folder_name": doc_model.folder_name,
12581269
"folder_path": doc_model.folder_path,
1270+
"folder_id": doc_model.folder_id,
12591271
"app_id": doc_model.app_id,
12601272
"end_user_id": doc_model.end_user_id,
12611273
}

core/models/documents.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class Document(BaseModel):
5454
end_user_id: Optional[str] = None
5555
app_id: Optional[str] = None
5656
folder_path: Optional[str] = None
57+
folder_id: Optional[str] = None
5758

5859
# Ensure storage_info values are strings to maintain backward compatibility
5960
@field_validator("storage_info", mode="before")

core/models/request.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,6 @@ class FolderDetailsRequest(BaseModel):
101101
)
102102

103103

104-
class FolderTreeRequest(BaseModel):
105-
"""Request model for retrieving a hierarchical folder tree with documents."""
106-
107-
folder_path: Optional[str] = Field(
108-
default=None,
109-
description="Base folder path to return. Use '/' or null for the full hierarchy.",
110-
)
111-
document_fields: Optional[List[str]] = Field(
112-
default=None,
113-
description="Optional list of fields to include for documents in each folder node (dot notation supported).",
114-
)
115-
116-
117104
class SearchDocumentsRequest(BaseModel):
118105
"""Request model for searching documents by name"""
119106

core/models/responses.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -131,21 +131,6 @@ class FolderDetailsResponse(BaseModel):
131131
folders: List[FolderDetails]
132132

133133

134-
class FolderTreeNode(BaseModel):
135-
"""Nested folder tree entry including contained documents."""
136-
137-
id: Optional[str] = None
138-
name: Optional[str] = None
139-
full_path: Optional[str] = None
140-
description: Optional[str] = None
141-
depth: Optional[int] = None
142-
documents: List[Dict[str, Any]] = Field(default_factory=list)
143-
children: List["FolderTreeNode"] = Field(default_factory=list)
144-
145-
146-
FolderTreeNode.model_rebuild()
147-
148-
149134
class RequeueIngestionResult(BaseModel):
150135
"""Result information for an individual requeued ingestion job."""
151136

core/routes/folders.py

Lines changed: 1 addition & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,14 @@
99
from core.database.postgres_database import InvalidMetadataFilterError
1010
from core.models.auth import AuthContext
1111
from core.models.folders import Folder, FolderCreate, FolderSummary
12-
from core.models.request import FolderDetailsRequest, FolderTreeRequest
12+
from core.models.request import FolderDetailsRequest
1313
from core.models.responses import (
1414
DocumentAddToFolderResponse,
1515
DocumentDeleteResponse,
1616
FolderDeleteResponse,
1717
FolderDetails,
1818
FolderDetailsResponse,
1919
FolderDocumentInfo,
20-
FolderTreeNode,
2120
)
2221
from core.routes.utils import project_document_fields
2322
from core.services.telemetry import TelemetryService
@@ -264,176 +263,6 @@ async def list_folder_summaries(auth: AuthContext = Depends(verify_token)) -> Li
264263
raise HTTPException(status_code=500, detail=str(exc))
265264

266265

267-
@router.post("/tree", response_model=FolderTreeNode)
268-
async def get_folder_tree(
269-
request: FolderTreeRequest,
270-
auth: AuthContext = Depends(verify_token),
271-
) -> FolderTreeNode:
272-
"""
273-
Return a hierarchical folder tree (with documents) rooted at ``folder_path``.
274-
275-
When ``folder_path`` is null or ``/``, the entire accessible hierarchy is returned.
276-
"""
277-
278-
try:
279-
folder_path = request.folder_path
280-
document_fields = request.document_fields
281-
normalized_path: Optional[str] = None
282-
if folder_path is not None:
283-
if isinstance(folder_path, str) and folder_path.lower() == "null":
284-
folder_path = None
285-
else:
286-
try:
287-
normalized_path = normalize_folder_path(folder_path)
288-
except ValueError as exc:
289-
raise HTTPException(status_code=400, detail=str(exc))
290-
if normalized_path == "/":
291-
normalized_path = None
292-
293-
base_path = normalized_path or "/"
294-
295-
base_folder: Optional[Folder] = None
296-
if normalized_path:
297-
base_folder = await document_service.db.get_folder_by_full_path(normalized_path, auth)
298-
if not base_folder:
299-
raise HTTPException(status_code=404, detail=f"Folder {folder_path} not found")
300-
301-
def _canonical_folder_path(folder: Folder) -> Optional[str]:
302-
if folder.full_path:
303-
try:
304-
return normalize_folder_path(folder.full_path)
305-
except ValueError:
306-
return None
307-
if folder.name:
308-
try:
309-
return normalize_folder_path(folder.name)
310-
except ValueError:
311-
return None
312-
return None
313-
314-
def _parent_path(path: str) -> Optional[str]:
315-
if not path or path == "/":
316-
return None
317-
segments = [part for part in path.strip("/").split("/") if part]
318-
if len(segments) <= 1:
319-
return "/"
320-
return "/" + "/".join(segments[:-1])
321-
322-
def _attach_child(parent: FolderTreeNode, child: FolderTreeNode) -> None:
323-
if not any(existing.full_path == child.full_path for existing in parent.children):
324-
parent.children.append(child)
325-
326-
def _make_node(path: str, folder: Optional[Folder]) -> FolderTreeNode:
327-
name = folder.name if folder else ("/" if path == "/" else (path.strip("/").split("/")[-1] or "/"))
328-
depth = folder.depth if folder else (0 if path == "/" else None)
329-
return FolderTreeNode(
330-
id=folder.id if folder else None,
331-
name=name,
332-
full_path=path,
333-
description=folder.description if folder else None,
334-
depth=depth,
335-
documents=[],
336-
children=[],
337-
)
338-
339-
all_folders = await document_service.db.list_folders(auth)
340-
folders_with_paths: List[tuple[str, Folder]] = []
341-
for folder in all_folders:
342-
path = _canonical_folder_path(folder)
343-
if path:
344-
folders_with_paths.append((path, folder))
345-
346-
if normalized_path:
347-
scoped = []
348-
prefix = normalized_path.rstrip("/") + "/"
349-
for path, folder in folders_with_paths:
350-
if path == normalized_path or path.startswith(prefix):
351-
scoped.append((path, folder))
352-
folders_with_paths = scoped
353-
if base_folder:
354-
base_folder_path = _canonical_folder_path(base_folder)
355-
if base_folder_path and all(path != base_folder_path for path, _ in folders_with_paths):
356-
folders_with_paths.append((base_folder_path, base_folder))
357-
358-
nodes_by_path: Dict[str, FolderTreeNode] = {
359-
path: _make_node(path, folder) for path, folder in folders_with_paths
360-
}
361-
362-
root_node = nodes_by_path.get(base_path)
363-
if not root_node:
364-
root_node = _make_node(base_path, base_folder)
365-
nodes_by_path[base_path] = root_node
366-
367-
for path in sorted(nodes_by_path.keys(), key=lambda p: (p.count("/"), p)):
368-
if path == base_path:
369-
continue
370-
node = nodes_by_path[path]
371-
parent_path = _parent_path(path)
372-
parent_node = nodes_by_path.get(parent_path)
373-
if not parent_node:
374-
parent_node = root_node
375-
_attach_child(parent_node, node)
376-
377-
doc_system_filters = {"folder_path_prefix": base_path} if normalized_path else None
378-
document_result = await document_service.db.list_documents_flexible(
379-
auth=auth,
380-
skip=0,
381-
limit=None,
382-
system_filters=doc_system_filters,
383-
include_total_count=False,
384-
include_status_counts=False,
385-
include_folder_counts=False,
386-
return_documents=True,
387-
sort_by="filename",
388-
sort_direction="asc",
389-
)
390-
391-
documents = document_result.get("documents", []) or []
392-
for document in documents:
393-
if hasattr(document, "model_dump"):
394-
doc_dict = document.model_dump(mode="json")
395-
elif hasattr(document, "dict"):
396-
doc_dict = document.dict()
397-
else:
398-
doc_dict = dict(document)
399-
400-
doc_path_raw = doc_dict.get("folder_path")
401-
try:
402-
doc_path = normalize_folder_path(doc_path_raw) if doc_path_raw is not None else None
403-
except ValueError:
404-
doc_path = doc_path_raw
405-
406-
target_path = doc_path or base_path
407-
target_node = nodes_by_path.get(target_path)
408-
if not target_node:
409-
target_node = _make_node(target_path, None)
410-
nodes_by_path[target_path] = target_node
411-
parent_path = _parent_path(target_path or "/")
412-
parent_node = nodes_by_path.get(parent_path) or root_node
413-
_attach_child(parent_node, target_node)
414-
415-
projected_doc = project_document_fields(doc_dict, document_fields)
416-
if doc_path_raw is not None and "folder_path" not in projected_doc:
417-
projected_doc["folder_path"] = doc_path_raw
418-
419-
target_node.documents.append(projected_doc)
420-
421-
def _sort_tree(node: FolderTreeNode) -> None:
422-
node.children.sort(key=lambda child: (child.name or "", child.full_path or ""))
423-
for child in node.children:
424-
_sort_tree(child)
425-
node.documents.sort(key=lambda doc: str(doc.get("filename") or doc.get("external_id") or ""))
426-
427-
_sort_tree(root_node)
428-
return root_node
429-
430-
except HTTPException:
431-
raise
432-
except Exception as exc: # noqa: BLE001
433-
logger.error("Error building folder tree: %s", exc)
434-
raise HTTPException(status_code=500, detail=str(exc))
435-
436-
437266
@router.post("/{folder_id_or_name:path}/documents/{document_id}", response_model=DocumentAddToFolderResponse)
438267
@telemetry.track(operation_type="add_document_to_folder", metadata_resolver=telemetry.add_document_to_folder_metadata)
439268
async def add_document_to_folder(

0 commit comments

Comments
 (0)