Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,22 @@ Pytest marks warnings as errors; update fixtures or add targeted `filterwarnings
## WhatsApp & Admin Endpoints
- **Webhook:** `POST /meta-whatsapp` (signature verification + LangGraph processing). Verification handshake uses `GET /meta-whatsapp`.
- **Progress messaging:** Status texts sourced from `bt_servant_engine.services.status_messages`.
- **Admin API:** See `bt_servant_engine.apps.api.routes.admin` for vector store maintenance (collection merges, document management) secured via bearer token headers when `ENABLE_ADMIN_AUTH=True`.
- **Admin API:** See `bt_servant_engine.apps.api.routes.admin` for vector store maintenance (collection merges, document management) secured via bearer token headers when `ENABLE_ADMIN_AUTH=True`. Cache controls are exposed here as well:
- `POST /cache/clear` wipes every cache namespace.
- `POST /cache/{name}/clear` clears an individual cache (e.g., `passage_summary`).
- `GET /cache/stats` reports global cache settings, hit/miss counters, and disk usage.
- `GET /cache/{name}?sample_limit=10` inspects a specific cache with recent entry metadata.
- Both clear endpoints accept `older_than_days=<float>` to prune only entries older than the cutoff instead of nuking everything.

---

## Cache Configuration
- Defaults: caching is enabled with a disk backend under `${DATA_DIR}/cache`, entries never expire (`TTL=-1`), and a 500 MB cap (per cache) enforced by environment variables.
- Toggle or tune via env settings (see `bt_servant_engine/core/config.py`):
- `CACHE_ENABLED`, `CACHE_BACKEND` (`disk` | `memory`), `CACHE_DISK_MAX_BYTES`
- Per-cache toggles (defaults in parentheses): `CACHE_SELECTION_ENABLED` (`false`), `CACHE_SUMMARY_ENABLED` (`true`), `CACHE_KEYWORDS_ENABLED` (`true`), `CACHE_TRANSLATION_HELPS_ENABLED` (`true`), `CACHE_RAG_VECTOR_ENABLED` (`false`), `CACHE_RAG_FINAL_ENABLED` (`false`)
- Per-cache TTL/size controls: `CACHE_SELECTION_TTL_SECONDS`, `CACHE_SUMMARY_TTL_SECONDS`, `CACHE_TRANSLATION_HELPS_TTL_SECONDS`, etc. (set to `-1` for no expiry)
- Admin endpoints (above) can purge or inspect caches without redeploying; deleting the cache directory in `${DATA_DIR}/cache` also resets disk stores.

---

Expand Down
114 changes: 114 additions & 0 deletions bt_servant_engine/apps/api/routes/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from bt_servant_engine.core.logging import get_logger
from bt_servant_engine.services import runtime
from bt_servant_engine.services.cache_manager import cache_manager

router = APIRouter()
logger = get_logger(__name__)
Expand All @@ -33,6 +34,8 @@
os.environ.get("OPENAI_EMBED_MAX_TOKENS_PER_REQUEST", "290000")
)

MAX_CACHE_SAMPLE_LIMIT = 100

# Re-export merge helpers for compatibility with existing references.
iter_collection_batches = merge_helpers.iter_collection_batches
estimate_tokens = merge_helpers.estimate_tokens
Expand Down Expand Up @@ -870,3 +873,114 @@ async def chroma_catch_all(_path: str, _: None = Depends(require_admin_token)):


__all__ = ["router"]


@router.post("/cache/clear")
async def clear_all_caches(
older_than_days: float | None = None,
_: None = Depends(require_admin_token),
):
"""Clear all registered caches."""
if older_than_days is not None:
if older_than_days <= 0:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "older_than_days must be greater than 0"},
)
cutoff = time.time() - older_than_days * 86400
logger.info(
"[cache-admin] pruning all caches older than %.2f days (cutoff=%s)",
older_than_days,
cutoff,
)
removed = cache_manager.prune_all(cutoff)
return JSONResponse(
status_code=status.HTTP_200_OK,
content={
"status": "pruned",
"cutoff_epoch": cutoff,
"removed": removed,
},
)
logger.info("[cache-admin] clearing all caches via admin endpoint")
cache_manager.clear_all()
return JSONResponse(status_code=status.HTTP_200_OK, content={"status": "cleared"})


@router.post("/cache/{name}/clear")
async def clear_named_cache(
name: str,
older_than_days: float | None = None,
_: None = Depends(require_admin_token),
):
"""Clear a specific cache namespace."""
if older_than_days is not None:
if older_than_days <= 0:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "older_than_days must be greater than 0"},
)
cutoff = time.time() - older_than_days * 86400
try:
removed = cache_manager.prune_cache(name, cutoff)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"error": f"Cache '{name}' not found"},
) from exc
logger.info(
"[cache-admin] pruned cache %s older than %.2f days (cutoff=%s, removed=%d)",
name,
older_than_days,
cutoff,
removed,
)
return JSONResponse(
status_code=status.HTTP_200_OK,
content={
"status": "pruned",
"cache": name,
"cutoff_epoch": cutoff,
"removed": removed,
},
)
try:
cache_manager.clear_cache(name)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail={"error": f"Cache '{name}' not found"}
) from exc
logger.info("[cache-admin] cleared cache %s via admin endpoint", name)
return JSONResponse(
status_code=status.HTTP_200_OK,
content={"status": "cleared", "cache": name},
)


@router.get("/cache/stats")
async def get_cache_stats(_: None = Depends(require_admin_token)):
"""Return summary stats for all caches."""
data = cache_manager.stats()
return JSONResponse(status_code=status.HTTP_200_OK, content=data)


@router.get("/cache/{name}")
async def inspect_cache(name: str, sample_limit: int = 10, _: None = Depends(require_admin_token)):
"""Return detailed stats and samples for a specific cache."""
if sample_limit <= 0 or sample_limit > MAX_CACHE_SAMPLE_LIMIT:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"error": f"sample_limit must be between 1 and {MAX_CACHE_SAMPLE_LIMIT}",
},
)
try:
cache = cache_manager.cache(name)
except KeyError as exc:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail={"error": f"Cache '{name}' not found"}
) from exc
logger.info("[cache-admin] inspecting cache %s (limit=%d)", name, sample_limit)
details = cache.detailed_stats(sample_limit=sample_limit)
details["sample_limit"] = sample_limit
return JSONResponse(status_code=status.HTTP_200_OK, content=details)
24 changes: 24 additions & 0 deletions bt_servant_engine/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,30 @@ class Settings(BaseSettings):
PROGRESS_MESSAGE_EMOJI: str = Field(default="⏳")
PROGRESS_MESSAGE_EMOJI_OVERRIDES: dict[str, str] = Field(default_factory=dict)

# Cache configuration
CACHE_ENABLED: bool = Field(default=True)
CACHE_BACKEND: Literal["disk", "memory"] = Field(default="disk")
CACHE_DISK_MAX_BYTES: int = Field(default=500 * 1024 * 1024) # 500MB
CACHE_DEFAULT_TTL_SECONDS: int = Field(default=-1)
CACHE_SELECTION_ENABLED: bool = Field(default=False)
CACHE_SELECTION_TTL_SECONDS: int = Field(default=-1)
CACHE_SELECTION_MAX_ENTRIES: int = Field(default=5000)
CACHE_SUMMARY_ENABLED: bool = Field(default=True)
CACHE_SUMMARY_TTL_SECONDS: int = Field(default=-1)
CACHE_SUMMARY_MAX_ENTRIES: int = Field(default=1500)
CACHE_KEYWORDS_ENABLED: bool = Field(default=True)
CACHE_KEYWORDS_TTL_SECONDS: int = Field(default=-1)
CACHE_KEYWORDS_MAX_ENTRIES: int = Field(default=3000)
CACHE_TRANSLATION_HELPS_ENABLED: bool = Field(default=True)
CACHE_TRANSLATION_HELPS_TTL_SECONDS: int = Field(default=-1)
CACHE_TRANSLATION_HELPS_MAX_ENTRIES: int = Field(default=1000)
CACHE_RAG_VECTOR_ENABLED: bool = Field(default=False)
CACHE_RAG_VECTOR_TTL_SECONDS: int = Field(default=-1)
CACHE_RAG_VECTOR_MAX_ENTRIES: int = Field(default=3000)
CACHE_RAG_FINAL_ENABLED: bool = Field(default=False)
CACHE_RAG_FINAL_TTL_SECONDS: int = Field(default=-1)
CACHE_RAG_FINAL_MAX_ENTRIES: int = Field(default=1500)

DATA_DIR: Path = Field(default=Path("/data"))
OPENAI_PRICING_JSON: str = Field(
default=(
Expand Down
Loading