From 1b343f0b5584210812a68b067af2db1cd1cfe64c Mon Sep 17 00:00:00 2001 From: Adityavardhan Agrawal Date: Mon, 8 Dec 2025 16:44:03 -0800 Subject: [PATCH] Add secret management via infisical --- core/config.py | 14 ++- core/models/request.py | 13 +++ core/models/responses.py | 15 +++ core/routes/folders.py | 173 +++++++++++++++++++++++++++++++++- core/services/user_service.py | 5 +- ee/config.py | 5 +- morphik.docker.toml | 2 +- morphik.toml | 2 +- start_server.py | 6 +- utils/env_loader.py | 37 ++++++++ 10 files changed, 257 insertions(+), 15 deletions(-) create mode 100644 utils/env_loader.py diff --git a/core/config.py b/core/config.py index 5715cf72..ce15d37e 100644 --- a/core/config.py +++ b/core/config.py @@ -3,11 +3,14 @@ from typing import Any, Dict, List, Literal, Optional import tomli -from dotenv import load_dotenv from pydantic import BaseModel from pydantic_settings import BaseSettings -load_dotenv(override=True) +from utils.env_loader import load_local_env + +# Default to loading from .env unless a secret manager (e.g., Infisical) is +# injecting variables. +load_local_env(override=True) class ParserXMLSettings(BaseModel): @@ -130,7 +133,7 @@ class Settings(BaseSettings): # Mode configuration MODE: Literal["cloud", "self_hosted"] = "cloud" - USE_LOCAL_ENV: bool = True + SECRET_MANAGER: Literal["env", "infisical"] = "env" # API configuration API_DOMAIN: str = "api.morphik.ai" @@ -177,7 +180,7 @@ def dev_mode(self) -> bool: # pragma: no cover - compatibility shim @lru_cache() def get_settings() -> Settings: """Get cached settings instance.""" - load_dotenv(override=True) + load_local_env(override=True) # Load config.toml with open("morphik.toml", "rb") as f: @@ -363,13 +366,14 @@ def get_settings() -> Settings: # Load morphik config api_domain = config["morphik"].get("api_domain", "api.morphik.ai") embedding_api_domain = config["morphik"].get("morphik_embedding_api_domain") or api_domain + secret_manager = config["morphik"].get("secret_manager", "env") settings_dict.update( { "ENABLE_COLPALI": config["morphik"]["enable_colpali"], "COLPALI_MODE": config["morphik"].get("colpali_mode", "local"), "MODE": config["morphik"].get("mode", "cloud"), - "USE_LOCAL_ENV": config["morphik"].get("use_local_env", True), + "SECRET_MANAGER": secret_manager, "API_DOMAIN": api_domain, "MORPHIK_EMBEDDING_API_DOMAIN": embedding_api_domain, } diff --git a/core/models/request.py b/core/models/request.py index c04014e5..b05b40b0 100644 --- a/core/models/request.py +++ b/core/models/request.py @@ -101,6 +101,19 @@ class FolderDetailsRequest(BaseModel): ) +class FolderTreeRequest(BaseModel): + """Request model for retrieving a hierarchical folder tree with documents.""" + + folder_path: Optional[str] = Field( + default=None, + description="Base folder path to return. Use '/' or null for the full hierarchy.", + ) + document_fields: Optional[List[str]] = Field( + default=None, + description="Optional list of fields to include for documents in each folder node (dot notation supported).", + ) + + class SearchDocumentsRequest(BaseModel): """Request model for searching documents by name""" diff --git a/core/models/responses.py b/core/models/responses.py index b42db215..ca7ca6c9 100644 --- a/core/models/responses.py +++ b/core/models/responses.py @@ -131,6 +131,21 @@ class FolderDetailsResponse(BaseModel): folders: List[FolderDetails] +class FolderTreeNode(BaseModel): + """Nested folder tree entry including contained documents.""" + + id: Optional[str] = None + name: Optional[str] = None + full_path: Optional[str] = None + description: Optional[str] = None + depth: Optional[int] = None + documents: List[Dict[str, Any]] = Field(default_factory=list) + children: List["FolderTreeNode"] = Field(default_factory=list) + + +FolderTreeNode.model_rebuild() + + class RequeueIngestionResult(BaseModel): """Result information for an individual requeued ingestion job.""" diff --git a/core/routes/folders.py b/core/routes/folders.py index 85fb1b83..3a199af1 100644 --- a/core/routes/folders.py +++ b/core/routes/folders.py @@ -9,7 +9,7 @@ from core.database.postgres_database import InvalidMetadataFilterError from core.models.auth import AuthContext from core.models.folders import Folder, FolderCreate, FolderSummary -from core.models.request import FolderDetailsRequest +from core.models.request import FolderDetailsRequest, FolderTreeRequest from core.models.responses import ( DocumentAddToFolderResponse, DocumentDeleteResponse, @@ -17,6 +17,7 @@ FolderDetails, FolderDetailsResponse, FolderDocumentInfo, + FolderTreeNode, ) from core.routes.utils import project_document_fields from core.services.telemetry import TelemetryService @@ -263,6 +264,176 @@ async def list_folder_summaries(auth: AuthContext = Depends(verify_token)) -> Li raise HTTPException(status_code=500, detail=str(exc)) +@router.post("/tree", response_model=FolderTreeNode) +async def get_folder_tree( + request: FolderTreeRequest, + auth: AuthContext = Depends(verify_token), +) -> FolderTreeNode: + """ + Return a hierarchical folder tree (with documents) rooted at ``folder_path``. + + When ``folder_path`` is null or ``/``, the entire accessible hierarchy is returned. + """ + + try: + folder_path = request.folder_path + document_fields = request.document_fields + normalized_path: Optional[str] = None + if folder_path is not None: + if isinstance(folder_path, str) and folder_path.lower() == "null": + folder_path = None + else: + try: + normalized_path = normalize_folder_path(folder_path) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if normalized_path == "/": + normalized_path = None + + base_path = normalized_path or "/" + + base_folder: Optional[Folder] = None + if normalized_path: + base_folder = await document_service.db.get_folder_by_full_path(normalized_path, auth) + if not base_folder: + raise HTTPException(status_code=404, detail=f"Folder {folder_path} not found") + + def _canonical_folder_path(folder: Folder) -> Optional[str]: + if folder.full_path: + try: + return normalize_folder_path(folder.full_path) + except ValueError: + return None + if folder.name: + try: + return normalize_folder_path(folder.name) + except ValueError: + return None + return None + + def _parent_path(path: str) -> Optional[str]: + if not path or path == "/": + return None + segments = [part for part in path.strip("/").split("/") if part] + if len(segments) <= 1: + return "/" + return "/" + "/".join(segments[:-1]) + + def _attach_child(parent: FolderTreeNode, child: FolderTreeNode) -> None: + if not any(existing.full_path == child.full_path for existing in parent.children): + parent.children.append(child) + + def _make_node(path: str, folder: Optional[Folder]) -> FolderTreeNode: + name = folder.name if folder else ("/" if path == "/" else (path.strip("/").split("/")[-1] or "/")) + depth = folder.depth if folder else (0 if path == "/" else None) + return FolderTreeNode( + id=folder.id if folder else None, + name=name, + full_path=path, + description=folder.description if folder else None, + depth=depth, + documents=[], + children=[], + ) + + all_folders = await document_service.db.list_folders(auth) + folders_with_paths: List[tuple[str, Folder]] = [] + for folder in all_folders: + path = _canonical_folder_path(folder) + if path: + folders_with_paths.append((path, folder)) + + if normalized_path: + scoped = [] + prefix = normalized_path.rstrip("/") + "/" + for path, folder in folders_with_paths: + if path == normalized_path or path.startswith(prefix): + scoped.append((path, folder)) + folders_with_paths = scoped + if base_folder: + base_folder_path = _canonical_folder_path(base_folder) + if base_folder_path and all(path != base_folder_path for path, _ in folders_with_paths): + folders_with_paths.append((base_folder_path, base_folder)) + + nodes_by_path: Dict[str, FolderTreeNode] = { + path: _make_node(path, folder) for path, folder in folders_with_paths + } + + root_node = nodes_by_path.get(base_path) + if not root_node: + root_node = _make_node(base_path, base_folder) + nodes_by_path[base_path] = root_node + + for path in sorted(nodes_by_path.keys(), key=lambda p: (p.count("/"), p)): + if path == base_path: + continue + node = nodes_by_path[path] + parent_path = _parent_path(path) + parent_node = nodes_by_path.get(parent_path) + if not parent_node: + parent_node = root_node + _attach_child(parent_node, node) + + doc_system_filters = {"folder_path_prefix": base_path} if normalized_path else None + document_result = await document_service.db.list_documents_flexible( + auth=auth, + skip=0, + limit=None, + system_filters=doc_system_filters, + include_total_count=False, + include_status_counts=False, + include_folder_counts=False, + return_documents=True, + sort_by="filename", + sort_direction="asc", + ) + + documents = document_result.get("documents", []) or [] + for document in documents: + if hasattr(document, "model_dump"): + doc_dict = document.model_dump(mode="json") + elif hasattr(document, "dict"): + doc_dict = document.dict() + else: + doc_dict = dict(document) + + doc_path_raw = doc_dict.get("folder_path") + try: + doc_path = normalize_folder_path(doc_path_raw) if doc_path_raw is not None else None + except ValueError: + doc_path = doc_path_raw + + target_path = doc_path or base_path + target_node = nodes_by_path.get(target_path) + if not target_node: + target_node = _make_node(target_path, None) + nodes_by_path[target_path] = target_node + parent_path = _parent_path(target_path or "/") + parent_node = nodes_by_path.get(parent_path) or root_node + _attach_child(parent_node, target_node) + + projected_doc = project_document_fields(doc_dict, document_fields) + if doc_path_raw is not None and "folder_path" not in projected_doc: + projected_doc["folder_path"] = doc_path_raw + + target_node.documents.append(projected_doc) + + def _sort_tree(node: FolderTreeNode) -> None: + node.children.sort(key=lambda child: (child.name or "", child.full_path or "")) + for child in node.children: + _sort_tree(child) + node.documents.sort(key=lambda doc: str(doc.get("filename") or doc.get("external_id") or "")) + + _sort_tree(root_node) + return root_node + + except HTTPException: + raise + except Exception as exc: # noqa: BLE001 + logger.error("Error building folder tree: %s", exc) + raise HTTPException(status_code=500, detail=str(exc)) + + @router.post("/{folder_id_or_name:path}/documents/{document_id}", response_model=DocumentAddToFolderResponse) @telemetry.track(operation_type="add_document_to_folder", metadata_resolver=telemetry.add_document_to_folder_metadata) async def add_document_to_folder( diff --git a/core/services/user_service.py b/core/services/user_service.py index ddb0e496..7038026f 100644 --- a/core/services/user_service.py +++ b/core/services/user_service.py @@ -6,6 +6,8 @@ import jwt from sqlalchemy import or_, select +from utils.env_loader import load_local_env + from ..config import get_settings from ..database.user_limits_db import UserLimitsDatabase from ..models.tiers import AccountTier, get_tier_limits @@ -206,9 +208,8 @@ async def record_usage( import os import stripe - from dotenv import load_dotenv - load_dotenv(override=True) + load_local_env(override=True) # Get Stripe API key from environment variable stripe_api_key = os.environ.get("STRIPE_API_KEY") diff --git a/ee/config.py b/ee/config.py index 97700dba..309a53e4 100644 --- a/ee/config.py +++ b/ee/config.py @@ -3,10 +3,11 @@ from typing import List, Optional import tomli -from dotenv import load_dotenv from pydantic import Field from pydantic_settings import BaseSettings +from utils.env_loader import load_local_env + # Determine the root directory of the EE features (assuming this file is in ee/config.py) EE_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_TOKEN_STORAGE_PATH = os.path.join(EE_ROOT_DIR, "connector_tokens") @@ -50,7 +51,7 @@ def get_ee_settings() -> EESettings: 2. ee.toml file. 3. Default values defined in EESettings model. """ - load_dotenv(override=True) + load_local_env(override=True) ee_toml_path = os.path.join(EE_ROOT_DIR, "ee.toml") config_from_toml = {} diff --git a/morphik.docker.toml b/morphik.docker.toml index 8eaaceb3..552340a5 100644 --- a/morphik.docker.toml +++ b/morphik.docker.toml @@ -139,7 +139,7 @@ colpali_pdf_dpi = 150 # DPI for PDF to image conversion in ColPali processing [morphik] enable_colpali = true mode = "self_hosted" # "cloud" or "self_hosted" -use_local_env = true +secret_manager = "env" # "env" (default) or "infisical" api_domain = "api.morphik.ai" # API domain for cloud URIs # Only call the embedding API if colpali_mode is "api" morphik_embedding_api_domain = "http://localhost:6000" # endpoint for multivector embedding service diff --git a/morphik.toml b/morphik.toml index 4ee94712..fd65fa49 100644 --- a/morphik.toml +++ b/morphik.toml @@ -145,7 +145,7 @@ colpali_pdf_dpi = 150 # DPI for PDF to image conversion in ColPali processing [morphik] enable_colpali = true mode = "self_hosted" # "cloud" or "self_hosted" -use_local_env = true +secret_manager = "env" # "env" (default) or "infisical" api_domain = "api.morphik.ai" # API domain for cloud URIs # Only call the embedding API if colpali_mode is "api" morphik_embedding_api_domain = "http://localhost:6000" # endpoint for multivector embedding service diff --git a/start_server.py b/start_server.py index 1dcef165..dad551a7 100644 --- a/start_server.py +++ b/start_server.py @@ -11,10 +11,10 @@ import requests import tomli import uvicorn -from dotenv import load_dotenv from core.config import get_settings from core.logging_config import setup_logging +from utils.env_loader import load_local_env # Global variable to store the worker process worker_process = None @@ -275,8 +275,8 @@ def main(): if not args.skip_redis_check: check_and_start_redis() - # Load environment variables from .env file - load_dotenv(override=True) + # Load environment variables from .env file if secrets aren't injected + load_local_env(override=True) # Check if Ollama is required and running if not args.skip_ollama_check: diff --git a/utils/env_loader.py b/utils/env_loader.py new file mode 100644 index 00000000..55a4f9a2 --- /dev/null +++ b/utils/env_loader.py @@ -0,0 +1,37 @@ +from pathlib import Path +from typing import Any, Optional + +import tomli +from dotenv import load_dotenv + + +def _secret_manager_from_toml() -> Optional[str]: + """Peek at morphik.toml for the secret_manager setting.""" + toml_path = Path("morphik.toml") + if not toml_path.exists(): + return None + try: + with toml_path.open("rb") as f: + data = tomli.load(f) + return data.get("morphik", {}).get("secret_manager") + except Exception: + return None + + +def should_use_dotenv() -> bool: + """Return True when local .env files should be loaded.""" + toml_value = _secret_manager_from_toml() + if toml_value: + return toml_value.lower() == "env" + + # Default if nothing is specified + return True + + +def load_local_env(*args: Any, **kwargs: Any) -> None: + """ + Load a local .env file if the secret manager is set to 'env'. + Accepts the same arguments as python-dotenv's load_dotenv. + """ + if should_use_dotenv(): + load_dotenv(*args, **kwargs)