diff --git a/bt_servant_engine/__init__.py b/bt_servant_engine/__init__.py index 8e8b1c4..9938f03 100644 --- a/bt_servant_engine/__init__.py +++ b/bt_servant_engine/__init__.py @@ -1,6 +1,6 @@ """Core package placeholder for upcoming onion architecture refactor.""" -BT_SERVANT_VERSION = "1.2.17" +BT_SERVANT_VERSION = "1.2.18" BT_SERVANT_RELEASES_URL = "https://github.com/unfoldingWord/bt-servant-engine/releases" __all__ = ["BT_SERVANT_VERSION", "BT_SERVANT_RELEASES_URL"] diff --git a/bt_servant_engine/adapters/user_state.py b/bt_servant_engine/adapters/user_state.py index 6a1bf85..a7777eb 100644 --- a/bt_servant_engine/adapters/user_state.py +++ b/bt_servant_engine/adapters/user_state.py @@ -124,6 +124,21 @@ def set_user_response_language(user_id: str, language: str) -> None: db.upsert(updated, cond) +def clear_user_response_language(user_id: str) -> None: + """Remove the user's stored response language preference.""" + q = Query() + db = get_user_db().table("users") + cond = cast(QueryLike, q.user_id == user_id) + existing_raw = db.get(cond) + existing = cast(Optional[Dict[str, Any]], existing_raw) + updated: Dict[str, Any] = ( + existing.copy() if isinstance(existing, dict) else {"user_id": user_id} + ) + updated["user_id"] = user_id + updated["response_language"] = None + db.upsert(updated, cond) + + def get_user_agentic_strength(user_id: str) -> Optional[str]: """Get the user's preferred agentic strength, or None if not set.""" q = Query() @@ -200,6 +215,9 @@ def get_response_language(self, user_id: str) -> str | None: def set_response_language(self, user_id: str, language: str) -> None: set_user_response_language(user_id, language) + def clear_response_language(self, user_id: str) -> None: + clear_user_response_language(user_id) + def get_agentic_strength(self, user_id: str) -> str | None: return get_user_agentic_strength(user_id) diff --git a/bt_servant_engine/core/intents.py b/bt_servant_engine/core/intents.py index c7a67a8..bd127a2 100644 --- a/bt_servant_engine/core/intents.py +++ b/bt_servant_engine/core/intents.py @@ -20,6 +20,7 @@ class IntentType(str, Enum): PERFORM_UNSUPPORTED_FUNCTION = "perform-unsupported-function" RETRIEVE_SYSTEM_INFORMATION = "retrieve-system-information" SET_RESPONSE_LANGUAGE = "set-response-language" + CLEAR_RESPONSE_LANGUAGE = "clear-response-language" SET_AGENTIC_STRENGTH = "set-agentic-strength" CONVERSE_WITH_BT_SERVANT = "converse-with-bt-servant" diff --git a/bt_servant_engine/core/language.py b/bt_servant_engine/core/language.py index a3afbe2..3222d17 100644 --- a/bt_servant_engine/core/language.py +++ b/bt_servant_engine/core/language.py @@ -1,10 +1,14 @@ -"""Language models and constants for the BT Servant application.""" +"""Language models and helpers for the BT Servant application.""" from enum import Enum +import re +from typing import Optional, Union -from pydantic import BaseModel +from pydantic import BaseModel, field_validator -# Mapping of ISO 639-1 language codes to friendly names +# Mapping of ISO 639-1 language codes to friendly names. +# This is intentionally non-exhaustive and serves as a set of display overrides +# for the languages we reference most frequently in user-facing copy. SUPPORTED_LANGUAGE_MAP = { "en": "English", "ar": "Arabic", @@ -20,10 +24,13 @@ } LANGUAGE_UNKNOWN = "UNKNOWN" +LANGUAGE_OTHER = "other" +_LANGUAGE_CODE_PATTERN = re.compile(r"^[a-z]{2}(?:-[a-z]{2})?$") +_LANGUAGE_NAME_LOOKUP = {name.lower(): code for code, name in SUPPORTED_LANGUAGE_MAP.items()} class Language(str, Enum): - """Supported ISO 639-1 language codes for responses/messages.""" + """Historical enum for legacy references (kept for compatibility).""" ENGLISH = "en" ARABIC = "ar" @@ -39,16 +46,82 @@ class Language(str, Enum): OTHER = "Other" +def _normalize_candidate(value: Union[str, "Language", None]) -> Optional[str]: + normalized: Optional[str] = None + if value is None: + normalized = None + elif isinstance(value, Language): + normalized = LANGUAGE_OTHER if value is Language.OTHER else value.value + else: + candidate = str(value).strip().lower() + if candidate: + if candidate == LANGUAGE_OTHER: + normalized = LANGUAGE_OTHER + elif _LANGUAGE_CODE_PATTERN.match(candidate): + normalized = candidate + return normalized + + +def normalize_language_code(value: Union[str, "Language", None]) -> Optional[str]: + """Normalize input into a lowercase ISO 639-1 (optionally xx-yy) code.""" + normalized = _normalize_candidate(value) + if normalized == LANGUAGE_OTHER: + return LANGUAGE_OTHER + return normalized + + +def normalized_or_other(value: Union[str, "Language", None]) -> str: + """Normalize to an ISO code; fall back to 'other' when unknown.""" + normalized = normalize_language_code(value) + return normalized or LANGUAGE_OTHER + + +def friendly_language_name( + code: Union[str, "Language", None], *, fallback: str = "that language" +) -> str: + """Return a printable name for the given language code.""" + normalized = normalize_language_code(code) + if not normalized or normalized == LANGUAGE_OTHER: + return fallback + return SUPPORTED_LANGUAGE_MAP.get(normalized, normalized.title()) + + +def lookup_language_code(name: Optional[str]) -> Optional[str]: + """Return a best-effort ISO code for a human-readable language name.""" + if not name: + return None + normalized = name.strip().lower() + return _LANGUAGE_NAME_LOOKUP.get(normalized) + + class ResponseLanguage(BaseModel): """Model for parsing/validating the detected response language.""" - language: Language + language: str + + @field_validator("language", mode="before") + @classmethod + def _coerce_language(cls, value: Union[str, "Language"]) -> str: + normalized = normalized_or_other(value) + if normalized == LANGUAGE_OTHER: + return LANGUAGE_OTHER + if not normalized: + raise ValueError("language must be an ISO 639-1 code or 'Other'") + return normalized class MessageLanguage(BaseModel): """Model for parsing/validating the detected language of a message.""" - language: Language + language: str + + @field_validator("language", mode="before") + @classmethod + def _coerce_language(cls, value: Union[str, "Language"]) -> str: + normalized = normalized_or_other(value) + if not normalized or normalized == LANGUAGE_OTHER: + raise ValueError("message language must be an ISO 639-1 code") + return normalized class TranslatedPassage(BaseModel): @@ -63,14 +136,27 @@ class TranslatedPassage(BaseModel): header_book: str header_suffix: str body: str - content_language: Language + content_language: str + + @field_validator("content_language", mode="before") + @classmethod + def _coerce_content_language(cls, value: Union[str, "Language"]) -> str: + normalized = normalize_language_code(value) + if not normalized or normalized == LANGUAGE_OTHER: + raise ValueError("content_language must be an ISO 639-1 code") + return normalized __all__ = [ "SUPPORTED_LANGUAGE_MAP", "LANGUAGE_UNKNOWN", + "LANGUAGE_OTHER", "Language", "ResponseLanguage", "MessageLanguage", "TranslatedPassage", + "normalize_language_code", + "normalized_or_other", + "friendly_language_name", + "lookup_language_code", ] diff --git a/bt_servant_engine/core/ports.py b/bt_servant_engine/core/ports.py index b4dcb1a..181f525 100644 --- a/bt_servant_engine/core/ports.py +++ b/bt_servant_engine/core/ports.py @@ -90,6 +90,10 @@ def set_response_language(self, user_id: str, language: str) -> None: """Persist the response language preference.""" ... + def clear_response_language(self, user_id: str) -> None: + """Remove any stored response language preference.""" + ... + def get_agentic_strength(self, user_id: str) -> str | None: """Return the stored agentic strength preference.""" ... diff --git a/bt_servant_engine/services/brain_nodes.py b/bt_servant_engine/services/brain_nodes.py index 2eb9a34..debc263 100644 --- a/bt_servant_engine/services/brain_nodes.py +++ b/bt_servant_engine/services/brain_nodes.py @@ -14,7 +14,6 @@ from openai import OpenAI from bt_servant_engine.core.config import config -from bt_servant_engine.core.language import SUPPORTED_LANGUAGE_MAP as supported_language_map from bt_servant_engine.core.logging import get_logger from bt_servant_engine.core.ports import ChromaPort, UserStatePort from bt_servant_engine.services.openai_utils import ( @@ -58,8 +57,11 @@ from bt_servant_engine.services.intents.settings_intents import ( AgenticStrengthDependencies, AgenticStrengthRequest, + ClearResponseLanguageDependencies, + ClearResponseLanguageRequest, ResponseLanguageDependencies, ResponseLanguageRequest, + clear_response_language as clear_response_language_impl, set_agentic_strength as set_agentic_strength_impl, set_response_language as set_response_language_impl, ) @@ -406,12 +408,23 @@ def set_response_language(state: Any) -> dict: chat_history=s["user_chat_history"], ) dependencies = ResponseLanguageDependencies( - supported_language_map=supported_language_map, set_user_response_language=user_state.set_response_language, ) return set_response_language_impl(request, dependencies) +def clear_response_language(state: Any) -> dict: + """Clear the user's stored response language preference.""" + + s = _brain_state(state) + user_state = _user_state_port() + request = ClearResponseLanguageRequest(user_id=s["user_id"]) + dependencies = ClearResponseLanguageDependencies( + clear_user_response_language=user_state.clear_response_language + ) + return clear_response_language_impl(request, dependencies) + + def set_agentic_strength(state: Any) -> dict: """Detect and persist the user's preferred agentic strength.""" @@ -919,4 +932,5 @@ def _sample_for_language_detection(text: str) -> str: "handle_translate_scripture", # Helper functions (for test compatibility) "resolve_selection_for_single_book", + "clear_response_language", ] diff --git a/bt_servant_engine/services/brain_orchestrator.py b/bt_servant_engine/services/brain_orchestrator.py index edf992e..b8d9dbb 100644 --- a/bt_servant_engine/services/brain_orchestrator.py +++ b/bt_servant_engine/services/brain_orchestrator.py @@ -35,6 +35,7 @@ # Higher values = higher priority = processed first when multiple intents detected INTENT_PRIORITY: Dict[IntentType, int] = { # Settings intents: Always process first to configure the session + IntentType.CLEAR_RESPONSE_LANGUAGE: 101, IntentType.SET_RESPONSE_LANGUAGE: 100, IntentType.SET_AGENTIC_STRENGTH: 99, # Scripture retrieval: Get the text before analyzing it @@ -65,6 +66,7 @@ IntentType.RETRIEVE_SCRIPTURE: "handle_retrieve_scripture_node", IntentType.LISTEN_TO_SCRIPTURE: "handle_listen_to_scripture_node", IntentType.SET_RESPONSE_LANGUAGE: "set_response_language_node", + IntentType.CLEAR_RESPONSE_LANGUAGE: "clear_response_language_node", IntentType.SET_AGENTIC_STRENGTH: "set_agentic_strength_node", IntentType.PERFORM_UNSUPPORTED_FUNCTION: "handle_unsupported_function_node", IntentType.RETRIEVE_SYSTEM_INFORMATION: "handle_system_information_request_node", @@ -562,6 +564,10 @@ def _should_show_translation_progress(state: Any) -> bool: "set_response_language_node", wrap_node_with_timing(brain_nodes.set_response_language, "set_response_language_node"), ) + builder.add_node( + "clear_response_language_node", + wrap_node_with_timing(brain_nodes.clear_response_language, "clear_response_language_node"), + ) builder.add_node( "set_agentic_strength_node", wrap_node_with_timing(brain_nodes.set_agentic_strength, "set_agentic_strength_node"), @@ -714,6 +720,7 @@ def _should_show_translation_progress(state: Any) -> bool: builder.add_conditional_edges("determine_intents_node", process_intents) builder.add_edge("query_vector_db_node", "query_open_ai_node") builder.add_edge("set_response_language_node", "translate_responses_node") + builder.add_edge("clear_response_language_node", "translate_responses_node") builder.add_edge("set_agentic_strength_node", "translate_responses_node") # After chunking, finish. Do not loop back to translate, which can recreate # the long message and trigger an infinite chunk cycle. diff --git a/bt_servant_engine/services/continuation_prompts.py b/bt_servant_engine/services/continuation_prompts.py index aba81eb..bc88a64 100644 --- a/bt_servant_engine/services/continuation_prompts.py +++ b/bt_servant_engine/services/continuation_prompts.py @@ -29,6 +29,7 @@ IntentType.PERFORM_UNSUPPORTED_FUNCTION: "help with that request", IntentType.RETRIEVE_SYSTEM_INFORMATION: "provide system information", IntentType.SET_RESPONSE_LANGUAGE: "set your response language", + IntentType.CLEAR_RESPONSE_LANGUAGE: "clear your response language preference", IntentType.SET_AGENTIC_STRENGTH: "adjust your agentic strength preference", IntentType.CONVERSE_WITH_BT_SERVANT: "continue our conversation", } diff --git a/bt_servant_engine/services/intents/fia_intents.py b/bt_servant_engine/services/intents/fia_intents.py index 4762c74..30c9e97 100644 --- a/bt_servant_engine/services/intents/fia_intents.py +++ b/bt_servant_engine/services/intents/fia_intents.py @@ -11,7 +11,6 @@ from openai.types.responses.easy_input_message_param import EasyInputMessageParam from bt_servant_engine.core.intents import IntentType -from bt_servant_engine.core.language import SUPPORTED_LANGUAGE_MAP as supported_language_map from bt_servant_engine.core.logging import get_logger from bt_servant_engine.services.intents.simple_intents import ( BOILER_PLATE_AVAILABLE_FEATURES_MESSAGE, @@ -102,11 +101,8 @@ def consult_fia_resources(request: FIARequest, dependencies: FIADependencies) -> def _resolve_candidate_language(request: FIARequest) -> str: - candidate = (request.user_response_language or request.query_language or "en").lower() - if candidate not in supported_language_map: - logger.info("[consult-fia] unsupported language '%s'; defaulting to English", candidate) - return "en" - return candidate + candidate = (request.user_response_language or request.query_language or "en").strip().lower() + return candidate or "en" def _gather_vector_documents( diff --git a/bt_servant_engine/services/intents/followup_questions.py b/bt_servant_engine/services/intents/followup_questions.py index cbc5303..8fdd0d4 100644 --- a/bt_servant_engine/services/intents/followup_questions.py +++ b/bt_servant_engine/services/intents/followup_questions.py @@ -81,6 +81,19 @@ "id": "Apa lagi yang bisa saya bantu hari ini?", "nl": "Waarmee kan ik u vandaag nog meer helpen?", }, + IntentType.CLEAR_RESPONSE_LANGUAGE: { + "en": "Would you like me to set a new response language?", + "es": "¿Quiere que configure un nuevo idioma de respuesta?", + "fr": "Souhaitez-vous que je définisse une nouvelle langue de réponse ?", + "pt": "Gostaria que eu definisse um novo idioma de resposta?", + "sw": "Je, ungependa nikaweke lugha mpya ya majibu?", + "ar": "هل تريد مني تحديد لغة استجابة جديدة؟", + "hi": "क्या आप चाहते हैं कि मैं नया उत्तर देने का भाषा तय कर दूं?", + "zh": "需要我设置一个新的回复语言吗?", + "ru": "Хотите, чтобы я установил новый язык ответов?", + "id": "Apakah Anda ingin saya menetapkan bahasa tanggapan baru?", + "nl": "Wilt u dat ik een nieuwe antwoordtaal instel?", + }, IntentType.SET_AGENTIC_STRENGTH: { "en": "Is there anything else I can assist you with?", "es": "¿Hay algo más en lo que pueda ayudarle?", diff --git a/bt_servant_engine/services/intents/passage_intents.py b/bt_servant_engine/services/intents/passage_intents.py index 5bdce43..6dcf769 100644 --- a/bt_servant_engine/services/intents/passage_intents.py +++ b/bt_servant_engine/services/intents/passage_intents.py @@ -13,8 +13,12 @@ from bt_servant_engine.core.config import config from bt_servant_engine.core.intents import IntentType -from bt_servant_engine.core.language import Language, ResponseLanguage -from bt_servant_engine.core.language import SUPPORTED_LANGUAGE_MAP as supported_language_map +from bt_servant_engine.core.language import ( + LANGUAGE_OTHER, + ResponseLanguage, + lookup_language_code, + normalize_language_code, +) from bt_servant_engine.core.logging import get_logger from bt_servant_engine.services.cache_manager import CACHE_SCHEMA_VERSION, get_cache from bt_servant_engine.services.openai_utils import track_openai_usage @@ -363,8 +367,8 @@ def _detect_requested_language(request: RetrieveScriptureRequest) -> Optional[st add_tokens, ) tl_parsed = cast(ResponseLanguage | None, tl_resp.output_parsed) - if tl_parsed and tl_parsed.language != Language.OTHER: - return str(tl_parsed.language.value) + if tl_parsed and tl_parsed.language != LANGUAGE_OTHER: + return str(tl_parsed.language) except OpenAIError: logger.info( "[retrieve-scripture] requested-language parse failed; will fallback", @@ -382,9 +386,9 @@ def _detect_requested_language(request: RetrieveScriptureRequest) -> Optional[st ) if match: name = match.group(1).strip().title() - for code, friendly in supported_language_map.items(): - if friendly.lower() == name.lower(): - return code + lookup = lookup_language_code(name) + if lookup: + return lookup return None @@ -460,11 +464,14 @@ def _determine_desired_target( requested_lang: Optional[str], request: RetrieveScriptureRequest, ) -> Optional[str]: - if requested_lang and requested_lang != resolved_language: - return requested_lang if requested_lang: + normalized_request = normalize_language_code(requested_lang) + if normalized_request and normalized_request != resolved_language: + return normalized_request return None - preferred = request.user_response_language or request.selection.query_lang + preferred = normalize_language_code(request.user_response_language) or normalize_language_code( + request.selection.query_lang + ) if preferred and preferred != resolved_language: return preferred return None @@ -754,7 +761,7 @@ def retrieve_scripture(request: RetrieveScriptureRequest) -> dict[str, Any]: requested_lang, request, ) - if desired_target and desired_target in supported_language_map: + if desired_target: logger.info( "[retrieve-scripture] auto-translating scripture to %s", desired_target, diff --git a/bt_servant_engine/services/intents/settings_intents.py b/bt_servant_engine/services/intents/settings_intents.py index d918418..a3040b6 100644 --- a/bt_servant_engine/services/intents/settings_intents.py +++ b/bt_servant_engine/services/intents/settings_intents.py @@ -11,7 +11,12 @@ from bt_servant_engine.core.agentic import AgenticStrengthChoice, AgenticStrengthSetting from bt_servant_engine.core.intents import IntentType -from bt_servant_engine.core.language import Language, ResponseLanguage +from bt_servant_engine.core.language import ( + LANGUAGE_OTHER, + ResponseLanguage, + friendly_language_name, + normalize_language_code, +) from bt_servant_engine.core.logging import get_logger from bt_servant_engine.services.openai_utils import extract_cached_input_tokens, track_openai_usage from utils.identifiers import get_log_safe_user_id @@ -22,12 +27,12 @@ SET_RESPONSE_LANGUAGE_AGENT_SYSTEM_PROMPT = """ Task: Determine the language the user wants responses in, based on conversation context and the latest message. -Allowed outputs: en, ar, fr, es, hi, ru, id, sw, pt, zh, nl, Other +Allowed outputs: any ISO 639-1 language code (e.g., en, fr, tr). If unclear, return Other. Instructions: - Use conversation history and the most recent message to infer the user's desired response language. -- Only return one of the allowed outputs. If unclear or unsupported, return Other. -- Consider explicit requests like "reply in French" or language names/codes. +- Prefer two-letter ISO codes; accept common lowercase variants like "pt-br" when the user is explicit. +- If the request is ambiguous or mentions a language we cannot map to an ISO code, return Other. - Output must match the provided schema with no additional prose. """ @@ -58,7 +63,6 @@ class ResponseLanguageRequest: class ResponseLanguageDependencies: """External helpers needed to persist the detected response language.""" - supported_language_map: dict[str, str] set_user_response_language: Callable[[str, str], Any] @@ -97,26 +101,32 @@ def set_response_language( usage = getattr(response, "usage", None) track_openai_usage(usage, "gpt-4o", extract_cached_input_tokens, add_tokens) resp_lang = cast(ResponseLanguage, response.output_parsed) - if resp_lang.language == Language.OTHER: - supported_language_list = ", ".join(dependencies.supported_language_map.keys()) + if resp_lang.language == LANGUAGE_OTHER: response_text = ( - "I think you're trying to set the response language. " - f"The supported languages are: {supported_language_list}. " - "If this is your intent, please clearly tell me which supported language " - "to use when responding." + "I can set my responses to any language. " + "Please mention the specific language or provide its ISO 639-1 code " + "(for example: en, fr, tr)." ) return { "responses": [{"intent": IntentType.SET_RESPONSE_LANGUAGE, "response": response_text}] } - response_language_code: str = str(resp_lang.language.value) - dependencies.set_user_response_language(request.user_id, response_language_code) - language_name: str = dependencies.supported_language_map.get( - response_language_code, response_language_code - ) + + normalized_code = normalize_language_code(resp_lang.language) + if not normalized_code or normalized_code == LANGUAGE_OTHER: + response_text = ( + "I wasn't able to determine the language you're requesting. " + "Please provide a clear ISO 639-1 code so I can save it." + ) + return { + "responses": [{"intent": IntentType.SET_RESPONSE_LANGUAGE, "response": response_text}] + } + + dependencies.set_user_response_language(request.user_id, normalized_code) + language_name = friendly_language_name(normalized_code, fallback=normalized_code) response_text = f"Setting response language to: {language_name}" return { "responses": [{"intent": IntentType.SET_RESPONSE_LANGUAGE, "response": response_text}], - "user_response_language": response_language_code, + "user_response_language": normalized_code, } @@ -217,13 +227,46 @@ def set_agentic_strength( } +@dataclass(slots=True) +class ClearResponseLanguageRequest: + """Inputs required to clear the stored response language.""" + + user_id: str + + +@dataclass(slots=True) +class ClearResponseLanguageDependencies: + """Helpers needed to remove response language preferences.""" + + clear_user_response_language: Callable[[str], Any] + + +def clear_response_language( + request: ClearResponseLanguageRequest, + dependencies: ClearResponseLanguageDependencies, +) -> dict[str, Any]: + """Remove the persisted response language preference for the user.""" + dependencies.clear_user_response_language(request.user_id) + response_text = ( + "Cleared your response-language preference. " + "I'll match the language of your messages until you set a new preference." + ) + return { + "responses": [{"intent": IntentType.CLEAR_RESPONSE_LANGUAGE, "response": response_text}], + "user_response_language": None, + } + + __all__ = [ "SET_RESPONSE_LANGUAGE_AGENT_SYSTEM_PROMPT", "SET_AGENTIC_STRENGTH_AGENT_SYSTEM_PROMPT", "ResponseLanguageRequest", "ResponseLanguageDependencies", + "ClearResponseLanguageRequest", + "ClearResponseLanguageDependencies", "AgenticStrengthRequest", "AgenticStrengthDependencies", "set_response_language", "set_agentic_strength", + "clear_response_language", ] diff --git a/bt_servant_engine/services/intents/simple_intents.py b/bt_servant_engine/services/intents/simple_intents.py index 0c31182..577e19e 100644 --- a/bt_servant_engine/services/intents/simple_intents.py +++ b/bt_servant_engine/services/intents/simple_intents.py @@ -119,6 +119,16 @@ def get_capabilities() -> List[Capability]: ], "include_in_boilerplate": True, }, + { + "intent": IntentType.CLEAR_RESPONSE_LANGUAGE, + "label": "Clear response language", + "description": "Return to matching your message language automatically.", + "examples": [ + "Stop forcing a specific response language.", + "Clear my response language setting.", + ], + "include_in_boilerplate": True, + }, ] @@ -226,6 +236,20 @@ def build_full_help_message() -> str: {FULL_HELP_MESSAGE} +In addition, you can share these language capabilities when relevant: + + +I can understand and generate text in over 100 languages to varying degrees of fluency. This includes: +- Widely spoken languages like English, Spanish, Mandarin Chinese, Hindi, Arabic, French, Russian, and Portuguese +- Less commonly used languages such as Swahili, Finnish, Haitian Creole, and Maori +- Classical or liturgical languages like Latin, Ancient Greek, Biblical Hebrew, and Classical Arabic + +However, proficiency varies: +- High fluency in major languages (especially English) +- Basic understanding or limited generation in low-resource or endangered languages +- Limited support for some complex scripts, grammatical systems, or dialectal variation + + # Using prior history for better responses Here are some guidelines for using history for better responses: diff --git a/bt_servant_engine/services/intents/translation_intents.py b/bt_servant_engine/services/intents/translation_intents.py index bb54fa7..ecca65e 100644 --- a/bt_servant_engine/services/intents/translation_intents.py +++ b/bt_servant_engine/services/intents/translation_intents.py @@ -14,8 +14,14 @@ from bt_servant_engine.core.config import config from bt_servant_engine.core.intents import IntentType -from bt_servant_engine.core.language import Language, ResponseLanguage, TranslatedPassage -from bt_servant_engine.core.language import SUPPORTED_LANGUAGE_MAP as supported_language_map +from bt_servant_engine.core.language import ( + LANGUAGE_OTHER, + ResponseLanguage, + TranslatedPassage, + friendly_language_name, + lookup_language_code, + normalize_language_code, +) from bt_servant_engine.core.logging import get_logger from bt_servant_engine.services.cache_manager import CACHE_SCHEMA_VERSION, get_cache from bt_servant_engine.services.openai_utils import track_openai_usage @@ -37,7 +43,6 @@ logger = get_logger(__name__) -SUPPORTED_TARGET_CODES = ["en", "ar", "fr", "es", "hi", "ru", "id", "sw", "pt", "zh", "nl"] LANGUAGE_REGEX = re.compile( r"\b(?:into|to|in)\s+([A-Za-z][A-Za-z\- ]{1,30})\b", flags=re.IGNORECASE ) @@ -211,16 +216,13 @@ def _simple_response(message: str) -> dict[str, Any]: return {"responses": [{"intent": IntentType.TRANSLATE_SCRIPTURE, "response": message}]} -def _supported_language_lines() -> str: - return "\n".join(f"- {supported_language_map[code]}" for code in SUPPORTED_TARGET_CODES) - - -def _language_guidance_response(requested_name: str) -> dict[str, Any]: +def _language_guidance_response(requested_name: Optional[str]) -> dict[str, Any]: + label = requested_name or "that language" guidance = ( - f"Translating into {requested_name} is currently not supported.\n\n" - "BT Servant can set your response language to any of:\n\n" - f"{_supported_language_lines()}\n\n" - "Would you like me to set a specific language for your responses?" + f"I couldn't determine how to translate into {label}.\n\n" + "Please mention the language explicitly or provide its ISO 639-1 code (for example: " + "'tr' for Turkish, 'yo' for Yoruba, 'id' for Indonesian) so I can translate " + "Scripture accordingly." ) return _simple_response(guidance) @@ -263,8 +265,8 @@ def _structured_target_language( tl_usage = getattr(tl_resp, "usage", None) track_openai_usage(tl_usage, "gpt-4o", extract_cached_input_tokens_fn, add_tokens) tl_parsed = cast(ResponseLanguage | None, tl_resp.output_parsed) - if tl_parsed and tl_parsed.language != Language.OTHER: - return str(tl_parsed.language.value) + if tl_parsed and tl_parsed.language != LANGUAGE_OTHER: + return str(tl_parsed.language) return None @@ -285,14 +287,24 @@ def _resolve_target_language( explicit_name = _extract_explicit_language(request.query) if structured_code is None else None if structured_code: - return structured_code, None + normalized = normalize_language_code(structured_code) + if normalized and normalized != LANGUAGE_OTHER: + return normalized, None + if explicit_name: + lookup = lookup_language_code(explicit_name) + if lookup: + return lookup, None return None, explicit_name - if request.user_response_language and request.user_response_language in supported_language_map: - return request.user_response_language, None - if request.query_lang and request.query_lang in supported_language_map: - return request.query_lang, None + preferred = normalize_language_code(request.user_response_language) + if preferred and preferred != LANGUAGE_OTHER: + return preferred, None + + detected = normalize_language_code(request.query_lang) + if detected and detected != LANGUAGE_OTHER: + return detected, None + return None, None @@ -352,11 +364,12 @@ def _determine_target_code( dependencies: TranslationDependencies, ) -> str: target_code, requested_name = _resolve_target_language(request, dependencies) - if target_code is None or target_code not in supported_language_map: - fallback_name = supported_language_map.get(target_code or "", "an unsupported language") - name = requested_name or fallback_name - raise TranslationContextError(_language_guidance_response(name)) - return target_code + normalized = normalize_language_code(target_code) + if normalized and normalized != LANGUAGE_OTHER: + return normalized + + name = requested_name or friendly_language_name(target_code, fallback="that language") + raise TranslationContextError(_language_guidance_response(name)) def _load_translation_source(request: TranslationRequestParams) -> TranslationSourceMetadata: @@ -581,7 +594,7 @@ def _build_structured_response( ) -> dict[str, Any]: return { "suppress_translation": True, - "content_language": str(translated.content_language.value), + "content_language": translated.content_language, "header_is_translated": True, "segments": [ { diff --git a/bt_servant_engine/services/passage_followups.py b/bt_servant_engine/services/passage_followups.py index 40c2b7f..0a7c210 100644 --- a/bt_servant_engine/services/passage_followups.py +++ b/bt_servant_engine/services/passage_followups.py @@ -9,7 +9,7 @@ from bt_servant_engine.core.config import config from bt_servant_engine.core.intents import IntentType -from bt_servant_engine.core.language import SUPPORTED_LANGUAGE_MAP +from bt_servant_engine.core.language import friendly_language_name from bt_servant_engine.core.logging import get_logger from utils.bsb import ( BOOK_MAP, @@ -269,7 +269,7 @@ def build_followup_question( raw_target = context.get("target_language") if isinstance(raw_target, str) and raw_target.strip(): target_code = raw_target.strip().lower() - target_label = SUPPORTED_LANGUAGE_MAP.get(target_code, raw_target.strip().title()) + target_label = friendly_language_name(target_code, fallback=raw_target.strip().title()) if target_label: english_question = f"Would you like me to translate {label} into {target_label}?" diff --git a/bt_servant_engine/services/preprocessing.py b/bt_servant_engine/services/preprocessing.py index 11c39f0..7aa4d3a 100644 --- a/bt_servant_engine/services/preprocessing.py +++ b/bt_servant_engine/services/preprocessing.py @@ -20,10 +20,7 @@ UserIntents, UserIntentsStructured, ) -from bt_servant_engine.core.language import ( - Language, - MessageLanguage, -) +from bt_servant_engine.core.language import LANGUAGE_OTHER, MessageLanguage from bt_servant_engine.core.logging import get_logger from bt_servant_engine.services.openai_utils import ( extract_cached_input_tokens as _extract_cached_input_tokens, @@ -334,6 +331,10 @@ The user wants to change the assistant's response language (for example: "Respond in Spanish", "Use Portuguese"). + + The user wants to remove their previously set response language preference so the assistant matches the language + of their future messages (for example: "stop replying in Spanish", "clear my response language setting"). + The user wants to change the agentic strength of the assistant's responses (for example: "Set my agentic strength to low", "Increase the detail of your answers"). Supported levels: normal, low, very_low. @@ -427,6 +428,10 @@ Can you reply to me in French from now on? set-response-language + + Stop forcing a specific response language. + clear-response-language + Set my agentic strength to low. set-agentic-strength @@ -634,7 +639,7 @@ def detect_language(client: OpenAI, text: str, *, agentic_strength: Optional[str usage = getattr(response, "usage", None) track_openai_usage(usage, model_name, _extract_cached_input_tokens, add_tokens) message_language = cast(MessageLanguage | None, response.output_parsed) - predicted = message_language.language.value if message_language else "en" + predicted = message_language.language if message_language else "en" logger.info("language detection (model): %s", predicted) # Heuristic guard: If we predicted Indonesian ('id') but the text looks like @@ -685,7 +690,7 @@ def determine_query_language( ] # If the detected language is not English, also search the matching # language-specific resources collection (e.g., "es_resources"). - if query_language and query_language not in {"en", Language.OTHER.value}: + if query_language and query_language not in {"en", LANGUAGE_OTHER}: localized_collection = f"{query_language}_resources" stack_rank_collections.append(localized_collection) logger.info( diff --git a/bt_servant_engine/services/response_pipeline.py b/bt_servant_engine/services/response_pipeline.py index bbb6655..7a56f1d 100644 --- a/bt_servant_engine/services/response_pipeline.py +++ b/bt_servant_engine/services/response_pipeline.py @@ -12,10 +12,7 @@ from bt_servant_engine.core.agentic import ALLOWED_AGENTIC_STRENGTH from bt_servant_engine.core.config import config -from bt_servant_engine.core.language import ( - LANGUAGE_UNKNOWN, - SUPPORTED_LANGUAGE_MAP as supported_language_map, -) +from bt_servant_engine.core.language import LANGUAGE_UNKNOWN from bt_servant_engine.core.logging import get_logger from bt_servant_engine.services.openai_utils import track_openai_usage from bt_servant_engine.services.preprocessing import detect_language as detect_language_impl @@ -253,19 +250,10 @@ def resolve_target_language( reconstruct_structured_text(resp_item=resp, localize_to=None) for resp in responses_for_translation ] - supported_lang_list = ", ".join(supported_language_map.keys()) - notice = " ".join( - [ - ( - "You haven't set your desired response language and I wasn't able to determine the " - "language of your original message in order to match it." - ), - ( - "You can set your desired response language at any time by saying: " - "Set my response language to Spanish, Indonesian, or any supported language." - ), - f"Supported languages: {supported_lang_list}.", - ] + notice = ( + "You haven't set your desired response language and I couldn't determine the " + "language of your original message. Tell me something like " + "'Set my response language to Turkish (tr)' and I'll use that for future replies." ) passthrough_texts.append(notice) return None, passthrough_texts diff --git a/tests/adapters/test_user_state_adapter.py b/tests/adapters/test_user_state_adapter.py index dbfb704..be2fd93 100644 --- a/tests/adapters/test_user_state_adapter.py +++ b/tests/adapters/test_user_state_adapter.py @@ -43,6 +43,16 @@ def test_response_language_roundtrip(temp_user_db: TinyDB) -> None: assert user_state.get_user_response_language(user_id) == "es" +def test_clear_response_language_removes_value(temp_user_db: TinyDB) -> None: + """Clearing response language removes the persisted field.""" + del temp_user_db + user_id = "clear-lang" + user_state.set_user_response_language(user_id, "fr") + assert user_state.get_user_response_language(user_id) == "fr" + user_state.clear_user_response_language(user_id) + assert user_state.get_user_response_language(user_id) is None + + def test_agentic_strength_roundtrip(temp_user_db: TinyDB) -> None: """Agentic strength enforces the allowed value set.""" del temp_user_db @@ -98,6 +108,9 @@ def fake_get_lang(uid: str) -> str | None: def fake_set_lang(uid: str, lang: str) -> None: record(f"set_lang:{uid}:{lang}") + def fake_clear_lang(uid: str) -> None: + record(f"clear_lang:{uid}") + def fake_get_strength(uid: str) -> str | None: record(f"get_strength:{uid}") return cast(str | None, None) @@ -116,6 +129,7 @@ def fake_is_first(uid: str) -> bool: monkeypatch.setattr(user_state, "update_user_chat_history", fake_update_history) monkeypatch.setattr(user_state, "get_user_response_language", fake_get_lang) monkeypatch.setattr(user_state, "set_user_response_language", fake_set_lang) + monkeypatch.setattr(user_state, "clear_user_response_language", fake_clear_lang) monkeypatch.setattr(user_state, "get_user_agentic_strength", fake_get_strength) monkeypatch.setattr(user_state, "set_user_agentic_strength", fake_set_strength) monkeypatch.setattr(user_state, "set_first_interaction", fake_set_first) @@ -125,6 +139,7 @@ def fake_is_first(uid: str) -> bool: adapter.append_chat_history("u1", "hi", "hello") adapter.get_response_language("u1") adapter.set_response_language("u1", "fr") + adapter.clear_response_language("u1") adapter.get_agentic_strength("u1") adapter.set_agentic_strength("u1", "normal") adapter.set_first_interaction("u1", False) @@ -135,6 +150,7 @@ def fake_is_first(uid: str) -> bool: "append:u1:hi:hello", "get_lang:u1", "set_lang:u1:fr", + "clear_lang:u1", "get_strength:u1", "set_strength:u1:normal", "set_first:u1:False", diff --git a/tests/conftest.py b/tests/conftest.py index 5fdafe4..46e8f48 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -62,6 +62,11 @@ def get_response_language(self, user_id: str) -> str | None: def set_response_language(self, user_id: str, language: str) -> None: self.save_user_state(user_id, {"response_language": language}) + def clear_response_language(self, user_id: str) -> None: + state = self._states.get(user_id, {"user_id": user_id}).copy() + state.pop("response_language", None) + self._states[user_id] = state + def get_agentic_strength(self, user_id: str) -> str | None: state = self._states.get(user_id, {}) return state.get("agentic_strength") diff --git a/tests/services/intents/test_followup_questions.py b/tests/services/intents/test_followup_questions.py index 44b58a3..45b1ef9 100644 --- a/tests/services/intents/test_followup_questions.py +++ b/tests/services/intents/test_followup_questions.py @@ -187,6 +187,10 @@ def test_set_response_language_has_followup(self): """SET_RESPONSE_LANGUAGE intent has follow-up defined.""" assert IntentType.SET_RESPONSE_LANGUAGE in INTENT_FOLLOWUP_QUESTIONS + def test_clear_response_language_has_followup(self): + """CLEAR_RESPONSE_LANGUAGE intent has follow-up defined.""" + assert IntentType.CLEAR_RESPONSE_LANGUAGE in INTENT_FOLLOWUP_QUESTIONS + def test_retrieve_system_information_has_followup(self): """RETRIEVE_SYSTEM_INFORMATION intent has follow-up defined.""" assert IntentType.RETRIEVE_SYSTEM_INFORMATION in INTENT_FOLLOWUP_QUESTIONS diff --git a/tests/services/intents/test_settings_intents.py b/tests/services/intents/test_settings_intents.py new file mode 100644 index 0000000..09505fe --- /dev/null +++ b/tests/services/intents/test_settings_intents.py @@ -0,0 +1,29 @@ +"""Tests for settings-related intents (response language, agentic strength).""" + +from bt_servant_engine.core.intents import IntentType +from bt_servant_engine.services.intents.settings_intents import ( + ClearResponseLanguageDependencies, + ClearResponseLanguageRequest, + clear_response_language, +) + + +def test_clear_response_language_resets_preference(): + """Clearing response language removes the stored preference and responds.""" + calls: list[str] = [] + + def fake_clear(user_id: str) -> None: + calls.append(user_id) + + request = ClearResponseLanguageRequest(user_id="user-42") + dependencies = ClearResponseLanguageDependencies( + clear_user_response_language=fake_clear + ) + + result = clear_response_language(request, dependencies) + + assert calls == ["user-42"] + assert result["user_response_language"] is None + responses = result["responses"] + assert responses and responses[0]["intent"] == IntentType.CLEAR_RESPONSE_LANGUAGE + assert "Cleared your response-language preference" in responses[0]["response"] diff --git a/tests/test_followup_integration.py b/tests/test_followup_integration.py index 5ac5217..00ba9a2 100644 --- a/tests/test_followup_integration.py +++ b/tests/test_followup_integration.py @@ -195,6 +195,7 @@ def test_different_intents_get_different_followups(self): (IntentType.RETRIEVE_SCRIPTURE, "Bible passage"), (IntentType.GET_TRANSLATION_HELPS, "translation question"), (IntentType.SET_RESPONSE_LANGUAGE, "What else"), + (IntentType.CLEAR_RESPONSE_LANGUAGE, "response language"), (IntentType.GET_BIBLE_TRANSLATION_ASSISTANCE, "person, place, or concept"), (IntentType.CONSULT_FIA_RESOURCES, "FIA process"), ] diff --git a/tests/test_listen_to_scripture_paragraph.py b/tests/test_listen_to_scripture_paragraph.py index 5757ff0..bbdff58 100644 --- a/tests/test_listen_to_scripture_paragraph.py +++ b/tests/test_listen_to_scripture_paragraph.py @@ -6,7 +6,7 @@ import pytest -from bt_servant_engine.core.language import Language, ResponseLanguage +from bt_servant_engine.core.language import ResponseLanguage from bt_servant_engine.core.models import PassageRef, PassageSelection from bt_servant_engine.services import brain_nodes from bt_servant_engine.services.brain_orchestrator import BrainState @@ -52,7 +52,7 @@ def parse_stub(*args: Any, **kwargs: Any): # noqa: ANN401 - test stub return _StubParseResult(sel) if tf is ResponseLanguage: # No explicit requested language in message - return _StubParseResult(ResponseLanguage(language=Language.OTHER)) + return _StubParseResult(ResponseLanguage(language="other")) return _StubParseResult(None) monkeypatch.setattr(brain_nodes.open_ai_client.responses, "parse", parse_stub) diff --git a/tests/test_retrieve_scripture_paragraph.py b/tests/test_retrieve_scripture_paragraph.py index b8afa30..e416cfd 100644 --- a/tests/test_retrieve_scripture_paragraph.py +++ b/tests/test_retrieve_scripture_paragraph.py @@ -6,7 +6,7 @@ import pytest -from bt_servant_engine.core.language import Language, ResponseLanguage +from bt_servant_engine.core.language import ResponseLanguage from bt_servant_engine.core.models import PassageRef, PassageSelection from bt_servant_engine.services import brain_nodes from bt_servant_engine.services.brain_orchestrator import BrainState @@ -52,7 +52,7 @@ def parse_stub(*args: Any, **kwargs: Any): # noqa: ANN401 - test stub return _StubParseResult(sel) if tf is ResponseLanguage: # No explicit requested language in message - return _StubParseResult(ResponseLanguage(language=Language.OTHER)) + return _StubParseResult(ResponseLanguage(language="other")) return _StubParseResult(None) monkeypatch.setattr(brain_nodes.open_ai_client.responses, "parse", parse_stub) diff --git a/tests/test_translate_scripture_intent.py b/tests/test_translate_scripture_intent.py index ee4d9da..16e9d9b 100644 --- a/tests/test_translate_scripture_intent.py +++ b/tests/test_translate_scripture_intent.py @@ -5,7 +5,7 @@ import pytest -from bt_servant_engine.core.language import Language, ResponseLanguage, TranslatedPassage +from bt_servant_engine.core.language import ResponseLanguage, TranslatedPassage from bt_servant_engine.core.models import PassageRef, PassageSelection from bt_servant_engine.services import brain_nodes from bt_servant_engine.services.brain_orchestrator import BrainState @@ -26,11 +26,11 @@ def _parse_stub(*args: Any, **kwargs: Any): # noqa: ANN401 - test stub if text_format is ResponseLanguage: q = current_query.lower() if "dutch" in q: - return _StubParseResult(ResponseLanguage(language=Language.DUTCH)) + return _StubParseResult(ResponseLanguage(language="nl")) if "chinese" in q: - return _StubParseResult(ResponseLanguage(language=Language.MANDARIN)) - # For unsupported languages like Italian or when no target present - return _StubParseResult(ResponseLanguage(language=Language.OTHER)) + return _StubParseResult(ResponseLanguage(language="zh")) + # For unsupported/ambiguous languages fall back to "other" + return _StubParseResult(ResponseLanguage(language="other")) # Passage selection parse: text_format=PassageSelection if text_format is PassageSelection: @@ -80,14 +80,14 @@ def _state_for(query: str) -> BrainState: ) -def test_translate_scripture_translates_with_supported_target(monkeypatch: pytest.MonkeyPatch): - # Arrange: supported target (Dutch), stub both selection parse and translation parse - query = "translate gen 1:1 into dutch" +def test_translate_scripture_translates_with_arbitrary_target(monkeypatch: pytest.MonkeyPatch): + # Arrange: target language (Turkish) was previously unsupported; ensure it now works + query = "translate gen 1:1 into turkish" def parse_stub(*args: Any, **kwargs: Any): # noqa: ANN401 - test stub tf = kwargs.get("text_format") if tf is ResponseLanguage: - return _StubParseResult(ResponseLanguage(language=Language.DUTCH)) + return _StubParseResult(ResponseLanguage(language="tr")) if tf is PassageSelection: sel = PassageSelection( selections=[ @@ -101,8 +101,8 @@ def parse_stub(*args: Any, **kwargs: Any): # noqa: ANN401 - test stub tp = TranslatedPassage( header_book="Genesis", header_suffix="1:1", - body="In den beginne...", - content_language=Language.DUTCH, + body="Başlangıçta...", + content_language="tr", ) return _StubParseResult(tp) return _StubParseResult(None) @@ -117,7 +117,7 @@ def parse_stub(*args: Any, **kwargs: Any): # noqa: ANN401 - test stub item = (out.get("responses") or [])[0] resp = cast(dict, item["response"]) assert resp.get("suppress_translation") is True - assert resp.get("content_language") == "nl" + assert resp.get("content_language") == "tr" segs = cast(list, resp.get("segments")) assert any(s.get("type") == "scripture" for s in segs) @@ -146,11 +146,17 @@ def test_translate_scripture_unsupported_book_returns_selection_error( assert "not recognized" in msg and "Enoch" in msg -def test_translate_scripture_guidance_when_unsupported_target(monkeypatch: pytest.MonkeyPatch): - # Arrange: Italian unsupported; expect guidance message - query = "translate gen 1:1-3 into italian" +def test_translate_scripture_guidance_when_language_unspecified(monkeypatch: pytest.MonkeyPatch): + # Arrange: Parser cannot infer target and no fallbacks available; expect guidance message + query = "translate gen 1:1-3" monkeypatch.setattr(brain_nodes.open_ai_client.responses, "parse", _make_parse_stub(query)) + monkeypatch.setattr( + brain_nodes, + "translate_text", + lambda response_text, target_language, *, agentic_strength=None: response_text, # noqa: ANN001 + ) state = _state_for(query) + state["query_language"] = "" # remove fallback so guidance path triggers # Act out = brain_nodes.handle_translate_scripture(state) @@ -158,4 +164,4 @@ def test_translate_scripture_guidance_when_unsupported_target(monkeypatch: pytes # Assert items = out.get("responses") or [] msg = cast(str, items[0]["response"]) # guidance returns a string - assert "Translating into Italian is currently not supported" in msg + assert "couldn't determine how to translate" in msg.lower() diff --git a/tests/test_translate_scripture_paragraph.py b/tests/test_translate_scripture_paragraph.py index edccdd1..5747525 100644 --- a/tests/test_translate_scripture_paragraph.py +++ b/tests/test_translate_scripture_paragraph.py @@ -5,7 +5,7 @@ import pytest -from bt_servant_engine.core.language import Language, ResponseLanguage, TranslatedPassage +from bt_servant_engine.core.language import ResponseLanguage, TranslatedPassage from bt_servant_engine.core.models import PassageRef, PassageSelection from bt_servant_engine.services import brain_nodes from bt_servant_engine.services.brain_orchestrator import BrainState @@ -39,7 +39,7 @@ def test_translate_scripture_normalizes_whitespace(monkeypatch: pytest.MonkeyPat def parse_stub(*_args: Any, **kwargs: Any): # noqa: ANN401 - test stub tf = kwargs.get("text_format") if tf is ResponseLanguage: - return _StubParseResult(ResponseLanguage(language=Language.DUTCH)) + return _StubParseResult(ResponseLanguage(language="nl")) if tf is PassageSelection: sel = PassageSelection( selections=[ @@ -58,7 +58,7 @@ def parse_stub(*_args: Any, **kwargs: Any): # noqa: ANN401 - test stub header_book="Genesis", header_suffix="1:1-2", body=("In den beginne\n\nschiep God\nde hemel en de aarde."), - content_language=Language.DUTCH, + content_language="nl", ) return _StubParseResult(tp) return _StubParseResult(None) diff --git a/utils/perf.py b/utils/perf.py index a8889ea..83ed188 100644 --- a/utils/perf.py +++ b/utils/perf.py @@ -74,6 +74,7 @@ def clear(self, trace_id: str) -> None: "handle_get_passage_keywords_node": "get-passage-keywords", "handle_get_translation_helps_node": "get-translation-helps", "set_response_language_node": "set-response-language", + "clear_response_language_node": "clear-response-language", "handle_unsupported_function_node": "perform-unsupported-function", "handle_system_information_request_node": "retrieve-system-information", "converse_with_bt_servant_node": "converse-with-bt-servant",