diff --git a/src/unstract/sdk/__init__.py b/src/unstract/sdk/__init__.py index 49f83691..b6a4ba8d 100644 --- a/src/unstract/sdk/__init__.py +++ b/src/unstract/sdk/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.54.0rc3" +__version__ = "0.54.0rc4" def get_sdk_version(): diff --git a/src/unstract/sdk/adapters/utils.py b/src/unstract/sdk/adapters/utils.py index efdf1095..588f0ed7 100644 --- a/src/unstract/sdk/adapters/utils.py +++ b/src/unstract/sdk/adapters/utils.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path import filetype @@ -6,8 +7,11 @@ from requests.exceptions import RequestException from unstract.sdk.adapters.constants import Common +from unstract.sdk.constants import MimeType from unstract.sdk.file_storage import FileStorage, FileStorageProvider +logger = logging.getLogger(__name__) + class AdapterUtils: @staticmethod @@ -25,17 +29,38 @@ def get_msg_from_request_exc( Returns: str: Error message returned by the server """ - if hasattr(err, "response"): - err_response: Response = err.response # type: ignore - if err_response.headers["Content-Type"] == "application/json": - err_json = err_response.json() - if message_key in err_json: - return str(err_json[message_key]) - elif err_response.headers["Content-Type"] == "text/plain": - return err_response.text # type: ignore + if not hasattr(err, "response"): + return default_err + + err_response: Response = err.response # type: ignore + err_content_type = err_response.headers.get("Content-Type") + + if not err_content_type: + logger.warning( + f"Content-Type header not found in {err_response}, " + f"returning {default_err}" + ) + return default_err + + if err_content_type == MimeType.JSON: + err_json = err_response.json() + if message_key in err_json: + return str(err_json[message_key]) + else: + logger.warning( + f"Unable to parse error with key '{message_key}' for " + f"'{err_json}', returning '{default_err}' instead." + ) + elif err_content_type == MimeType.TEXT: + return err_response.text # type: ignore + else: + logger.warning( + f"Unhandled err_response type '{err_content_type}' " + f"for {err_response}, returning {default_err}" + ) return default_err - # ToDo: get_file_mime_type() to be removed once migrated to FileStorage + # TODO: get_file_mime_type() to be removed once migrated to FileStorage # FileStorage has mime_type() which could be used instead. @staticmethod def get_file_mime_type( diff --git a/src/unstract/sdk/adapters/x2text/helper.py b/src/unstract/sdk/adapters/x2text/helper.py index 6cac6c6b..b095b94b 100644 --- a/src/unstract/sdk/adapters/x2text/helper.py +++ b/src/unstract/sdk/adapters/x2text/helper.py @@ -8,6 +8,7 @@ from unstract.sdk.adapters.exceptions import AdapterError from unstract.sdk.adapters.utils import AdapterUtils from unstract.sdk.adapters.x2text.constants import X2TextConstants +from unstract.sdk.constants import MimeType from unstract.sdk.file_storage import FileStorage, FileStorageProvider logger = logging.getLogger(__name__) @@ -111,7 +112,7 @@ def make_request( X2TextConstants.PLATFORM_SERVICE_API_KEY ) headers = { - "accept": "application/json", + "accept": MimeType.JSON, "Authorization": f"Bearer {platform_service_api_key}", } body = { diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer/README.md b/src/unstract/sdk/adapters/x2text/llm_whisperer/README.md index 2b64f31d..0c1a9ea1 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer/README.md +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer/README.md @@ -1,8 +1,8 @@ -# Unstract LLM Whisperer X2Text Adapter +# Unstract LLMWhisperer X2Text Adapter ## Env variables -The below env variables are resolved by LLM Whisperer adapter +The below env variables are resolved by LLMWhisperer adapter | Variable | Description | | ---------------------------- | -------------------------------------------------------------------------------------------- | diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py index 9d3054e8..6b11d65b 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/constants.py @@ -39,7 +39,7 @@ class WhispererEndpoint: class WhispererEnv: - """Env variables for LLM whisperer. + """Env variables for LLMWhisperer. Can be used to alter behaviour at runtime. @@ -89,7 +89,7 @@ class WhisperStatus: class WhispererDefaults: - """Defaults meant for LLM whisperer.""" + """Defaults meant for LLMWhisperer.""" MEDIAN_FILTER_SIZE = 0 GAUSSIAN_BLUR_RADIUS = 0.0 @@ -104,4 +104,3 @@ class WhispererDefaults: PAGE_SEPARATOR = "<<< >>>" MARK_VERTICAL_LINES = False MARK_HORIZONTAL_LINES = False - diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py index d66503d2..e753bed8 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/llm_whisperer.py @@ -27,6 +27,7 @@ WhisperStatus, ) from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter +from unstract.sdk.constants import MimeType from unstract.sdk.file_storage import FileStorage, FileStorageProvider logger = logging.getLogger(__name__) @@ -61,13 +62,13 @@ def get_json_schema() -> str: return schema def _get_request_headers(self) -> dict[str, Any]: - """Obtains the request headers to authenticate with LLM Whisperer. + """Obtains the request headers to authenticate with LLMWhisperer. Returns: str: Request headers """ return { - "accept": "application/json", + "accept": MimeType.JSON, WhispererHeader.UNSTRACT_KEY: self.config.get(WhispererConfig.UNSTRACT_KEY), } @@ -79,11 +80,11 @@ def _make_request( params: Optional[dict[str, Any]] = None, data: Optional[Any] = None, ) -> Response: - """Makes a request to LLM whisperer service. + """Makes a request to LLMWhisperer service. Args: request_method (HTTPMethod): HTTPMethod to call. Can be GET or POST - request_endpoint (str): LLM whisperer endpoint to hit + request_endpoint (str): LLMWhisperer endpoint to hit headers (Optional[dict[str, Any]], optional): Headers to pass. Defaults to None. params (Optional[dict[str, Any]], optional): Query params to pass. @@ -119,15 +120,15 @@ def _make_request( except ConnectionError as e: logger.error(f"Adapter error: {e}") raise ExtractorError( - "Unable to connect to LLM Whisperer service, please check the URL" + "Unable to connect to LLMWhisperer service, please check the URL" ) except Timeout as e: - msg = "Request to LLM whisperer has timed out" + msg = "Request to LLMWhisperer has timed out" logger.error(f"{msg}: {e}") raise ExtractorError(msg) except HTTPError as e: logger.error(f"Adapter error: {e}") - default_err = "Error while calling the LLM Whisperer service" + default_err = "Error while calling the LLMWhisperer service" msg = AdapterUtils.get_msg_from_request_exc( err=e, message_key="message", default_err=default_err ) diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json index 344adc95..11786e38 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer/src/static/json_schema.json @@ -1,5 +1,5 @@ { - "title": "LLM Whisperer X2Text", + "title": "LLMWhisperer X2Text", "type": "object", "required": [ "adapter_name", @@ -11,14 +11,14 @@ "type": "string", "title": "Name", "default": "", - "description": "Provide a unique name for this adapter instance. Example: LLM Whisperer 1" + "description": "Provide a unique name for this adapter instance. Example: LLMWhisperer 1" }, "url": { "type": "string", "title": "URL", "format": "uri", "default": "https://llmwhisperer-api.unstract.com", - "description": "Provide the URL of the LLM Whisperer service. Please note that this version of LLM Whisperer is deprecated." + "description": "Provide the URL of the LLMWhisperer service. Please note that this version of LLMWhisperer is deprecated." }, "unstract_key": { "type": "string", diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/README.md b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/README.md index 57ea77b5..f33810b3 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/README.md +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/README.md @@ -1,8 +1,8 @@ -# Unstract LLM Whisperer v2 X2Text Adapter +# Unstract LLMWWhisperer v2 X2Text Adapter ## Env variables -The below env variables are resolved by LLM Whisperer adapter +The below env variables are resolved by LLMWhisperer adapter | Variable | Description | | ---------------------------- | -------------------------------------------------------------------------------------------- | diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py index 146b5ceb..7e2d7dcf 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/constants.py @@ -33,7 +33,7 @@ class WhispererEndpoint: class WhispererEnv: - """Env variables for LLM whisperer. + """Env variables for LLMWhisperer. Can be used to alter behaviour at runtime. @@ -42,10 +42,13 @@ class WhispererEnv: LLMWhisperer's status API. Defaults to 30s MAX_POLLS: Total number of times to poll the status API. Set to -1 to poll indefinitely. Defaults to -1 + STATUS_RETRIES: Number of times to retry calling LLLMWhisperer's status API + on failure during polling. Defaults to 5. """ POLL_INTERVAL = "ADAPTER_LLMW_POLL_INTERVAL" MAX_POLLS = "ADAPTER_LLMW_MAX_POLLS" + STATUS_RETRIES = "ADAPTER_LLMW_STATUS_RETRIES" class WhispererConfig: @@ -84,7 +87,7 @@ class WhisperStatus: class WhispererDefaults: - """Defaults meant for LLM whisperer.""" + """Defaults meant for LLMWhisperer.""" MEDIAN_FILTER_SIZE = 0 GAUSSIAN_BLUR_RADIUS = 0.0 @@ -94,6 +97,7 @@ class WhispererDefaults: HORIZONTAL_STRETCH_FACTOR = 1.0 POLL_INTERVAL = int(os.getenv(WhispererEnv.POLL_INTERVAL, 30)) MAX_POLLS = int(os.getenv(WhispererEnv.MAX_POLLS, 30)) + STATUS_RETRIES = int(os.getenv(WhispererEnv.STATUS_RETRIES, 5)) PAGES_TO_EXTRACT = "" PAGE_SEPARATOR = "<<<" MARK_VERTICAL_LINES = False diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py index 73fa1b46..f9ed0cb7 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/helper.py @@ -20,23 +20,22 @@ WhispererHeader, WhisperStatus, ) -from unstract.sdk.file_storage.fs_impl import FileStorage -from unstract.sdk.file_storage.fs_provider import FileStorageProvider +from unstract.sdk.constants import MimeType +from unstract.sdk.file_storage import FileStorage, FileStorageProvider logger = logging.getLogger(__name__) class LLMWhispererHelper: - @staticmethod def get_request_headers(config: dict[str, Any]) -> dict[str, Any]: - """Obtains the request headers to authenticate with LLM Whisperer. + """Obtains the request headers to authenticate with LLMWhisperer. Returns: str: Request headers """ return { - "accept": "application/json", + "accept": MimeType.JSON, WhispererHeader.UNSTRACT_KEY: config.get(WhispererConfig.UNSTRACT_KEY), } @@ -49,11 +48,11 @@ def make_request( params: Optional[dict[str, Any]] = None, data: Optional[Any] = None, ) -> Response: - """Makes a request to LLM whisperer service. + """Makes a request to LLMWhisperer service. Args: request_method (HTTPMethod): HTTPMethod to call. Can be GET or POST - request_endpoint (str): LLM whisperer endpoint to hit + request_endpoint (str): LLMWhisperer endpoint to hit headers (Optional[dict[str, Any]], optional): Headers to pass. Defaults to None. params (Optional[dict[str, Any]], optional): Query params to pass. @@ -89,15 +88,15 @@ def make_request( except ConnectionError as e: logger.error(f"Adapter error: {e}") raise ExtractorError( - "Unable to connect to LLM Whisperer service, please check the URL" + "Unable to connect to LLMWhisperer service, please check the URL" ) except Timeout as e: - msg = "Request to LLM whisperer has timed out" + msg = "Request to LLMWhisperer has timed out" logger.error(f"{msg}: {e}") raise ExtractorError(msg) except HTTPError as e: logger.error(f"Adapter error: {e}") - default_err = "Error while calling the LLM Whisperer service" + default_err = "Error while calling the LLMWhisperer service" msg = AdapterUtils.get_msg_from_request_exc( err=e, message_key="message", default_err=default_err ) @@ -197,14 +196,16 @@ def check_status_until_ready( """ POLL_INTERVAL = WhispererDefaults.POLL_INTERVAL MAX_POLLS = WhispererDefaults.MAX_POLLS + STATUS_RETRY_THRESHOLD = WhispererDefaults.STATUS_RETRIES + status_retry_count = 0 request_count = 0 # Check status in fixed intervals upto max poll count. while True: request_count += 1 logger.info( - f"Checking status with interval: {POLL_INTERVAL}s" - f", request count: {request_count} [max: {MAX_POLLS}]" + f"Checking status for whisper-hash '{whisper_hash}' with interval: " + f"{POLL_INTERVAL}s, request count: {request_count} [max: {MAX_POLLS}]" ) status_response = LLMWhispererHelper.make_request( config=config, @@ -216,19 +217,28 @@ def check_status_until_ready( if status_response.status_code == 200: status_data = status_response.json() status = status_data.get(WhisperStatus.STATUS, WhisperStatus.UNKNOWN) - logger.info(f"Whisper status for {whisper_hash}: {status}") + logger.info(f"Whisper status for '{whisper_hash}': {status}") if status in [WhisperStatus.PROCESSED, WhisperStatus.DELIVERED]: break else: - raise ExtractorError( - "Error checking LLMWhisperer status: " - f"{status_response.status_code} - {status_response.text}" - ) + if status_retry_count >= STATUS_RETRY_THRESHOLD: + raise ExtractorError( + f"Error checking LLMWhisperer status for whisper-hash " + f"'{whisper_hash}': {status_response.text}" + ) + else: + status_retry_count += 1 + logger.warning( + f"Whisper status for '{whisper_hash}' failed " + f"{status_retry_count} time(s), retrying... " + f"[threshold: {STATUS_RETRY_THRESHOLD}]: {status_response.text}" + ) # Exit with error if max poll count is reached if request_count >= MAX_POLLS: raise ExtractorError( - "Unable to extract text after attempting" f" {request_count} times" + f"Unable to extract text for whisper-hash '{whisper_hash}' " + f"after attempting {request_count} times" ) time.sleep(POLL_INTERVAL) diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py index eecb15c4..94d6b246 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/llm_whisperer_v2.py @@ -16,8 +16,7 @@ ) from unstract.sdk.adapters.x2text.llm_whisperer_v2.src.helper import LLMWhispererHelper from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter -from unstract.sdk.file_storage.fs_impl import FileStorage -from unstract.sdk.file_storage.fs_provider import FileStorageProvider +from unstract.sdk.file_storage import FileStorage, FileStorageProvider logger = logging.getLogger(__name__) diff --git a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/static/json_schema.json b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/static/json_schema.json index e5dcf204..729bc2f8 100644 --- a/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/static/json_schema.json +++ b/src/unstract/sdk/adapters/x2text/llm_whisperer_v2/src/static/json_schema.json @@ -1,5 +1,5 @@ { - "title": "LLM Whisperer X2Text v2", + "title": "LLMWhisperer X2Text v2", "type": "object", "required": [ "adapter_name", @@ -11,7 +11,7 @@ "type": "string", "title": "Name", "default": "llm-whisperer-v2", - "description": "Provide a unique name for this adapter instance. Example: LLM Whisperer 1" + "description": "Provide a unique name for this adapter instance. Example: LLMWhisperer 1" }, "url": { "type": "string", diff --git a/src/unstract/sdk/adapters/x2text/no_op/src/no_op_x2text.py b/src/unstract/sdk/adapters/x2text/no_op/src/no_op_x2text.py index d3c604d0..c518a847 100644 --- a/src/unstract/sdk/adapters/x2text/no_op/src/no_op_x2text.py +++ b/src/unstract/sdk/adapters/x2text/no_op/src/no_op_x2text.py @@ -5,8 +5,7 @@ from unstract.sdk.adapters.x2text.dto import TextExtractionResult from unstract.sdk.adapters.x2text.x2text_adapter import X2TextAdapter -from unstract.sdk.file_storage.fs_impl import FileStorage -from unstract.sdk.file_storage.fs_provider import FileStorageProvider +from unstract.sdk.file_storage import FileStorage, FileStorageProvider logger = logging.getLogger(__name__) diff --git a/src/unstract/sdk/constants.py b/src/unstract/sdk/constants.py index a11c4bac..4616b90d 100644 --- a/src/unstract/sdk/constants.py +++ b/src/unstract/sdk/constants.py @@ -166,3 +166,4 @@ class PublicAdapterKeys: class MimeType: PDF = "application/pdf" TEXT = "text/plain" + JSON = "application/json" diff --git a/src/unstract/sdk/index.py b/src/unstract/sdk/index.py index a44fcc41..e1116a79 100644 --- a/src/unstract/sdk/index.py +++ b/src/unstract/sdk/index.py @@ -23,7 +23,7 @@ from unstract.sdk.adapters.x2text.llm_whisperer.src import LLMWhisperer from unstract.sdk.constants import LogLevel from unstract.sdk.embedding import Embedding -from unstract.sdk.exceptions import IndexingError, SdkError +from unstract.sdk.exceptions import IndexingError, SdkError, X2TextError from unstract.sdk.file_storage import FileStorage, FileStorageProvider from unstract.sdk.tool.base import BaseTool from unstract.sdk.utils import ToolUtils @@ -123,7 +123,7 @@ def extract_text( """Extracts text from a document. Uses the configured service to perform the extraction - - LLM Whisperer + - LLMWhisperer - Unstructured IO Community / Enterprise - Llama Parse @@ -144,13 +144,13 @@ def extract_text( """ self.tool.stream_log("Extracting text from input file") extracted_text = "" + x2text = X2Text( + tool=self.tool, + adapter_instance_id=x2text_instance_id, + usage_kwargs=usage_kwargs, + ) try: - x2text = X2Text( - tool=self.tool, - adapter_instance_id=x2text_instance_id, - usage_kwargs=usage_kwargs, - ) - if enable_highlight and isinstance(x2text._x2text_instance, LLMWhisperer): + if enable_highlight and isinstance(x2text.x2text_instance, LLMWhisperer): process_response: TextExtractionResult = x2text.process( input_file_path=file_path, output_file_path=output_file_path, @@ -158,20 +158,18 @@ def extract_text( fs=fs, ) whisper_hash_value = process_response.extraction_metadata.whisper_hash - metadata = {X2TextConstants.WHISPER_HASH: whisper_hash_value} - self.tool.update_exec_metadata(metadata) - else: process_response: TextExtractionResult = x2text.process( input_file_path=file_path, output_file_path=output_file_path, fs=fs ) - extracted_text = process_response.extracted_text + # TODO: Handle prepend of context where error is raised and remove this except AdapterError as e: - # Wrapping AdapterErrors with SdkError - raise IndexingError(str(e)) from e + msg = f"Error from text extractor '{x2text.x2text_instance.get_name()}'. " + msg += str(e) + raise X2TextError(msg) from e if process_text: try: result = process_text(extracted_text) @@ -180,7 +178,10 @@ def extract_text( else: logger.warning("'process_text' is expected to return an 'str'") except Exception as e: - logger.error(f"Error occured inside function 'process_text': {e}") + logger.error( + f"Error occured inside callable 'process_text': {e}\n" + "continuing processing..." + ) return extracted_text @log_elapsed(operation="CHECK_AND_INDEX(overall)") @@ -236,29 +237,17 @@ def index( ) self.tool.stream_log(f"Checking if doc_id {doc_id} exists") - try: - embedding = Embedding( - tool=self.tool, - adapter_instance_id=embedding_instance_id, - usage_kwargs=usage_kwargs, - ) - except SdkError as e: - self.tool.stream_log( - f"Error loading {embedding_instance_id}", level=LogLevel.ERROR - ) - raise SdkError(f"Error loading {embedding_instance_id}: {e}") + embedding = Embedding( + tool=self.tool, + adapter_instance_id=embedding_instance_id, + usage_kwargs=usage_kwargs, + ) - try: - vector_db = VectorDB( - tool=self.tool, - adapter_instance_id=vector_db_instance_id, - embedding=embedding, - ) - except SdkError as e: - self.tool.stream_log( - f"Error loading {vector_db_instance_id}", level=LogLevel.ERROR - ) - raise SdkError(f"Error loading {vector_db_instance_id}: {e}") + vector_db = VectorDB( + tool=self.tool, + adapter_instance_id=vector_db_instance_id, + embedding=embedding, + ) try: # Checking if document is already indexed against doc_id diff --git a/src/unstract/sdk/prompt.py b/src/unstract/sdk/prompt.py index fd91762f..aed3396f 100644 --- a/src/unstract/sdk/prompt.py +++ b/src/unstract/sdk/prompt.py @@ -4,7 +4,7 @@ import requests from requests import ConnectionError, RequestException, Response -from unstract.sdk.constants import LogLevel, PromptStudioKeys, ToolEnv +from unstract.sdk.constants import LogLevel, MimeType, PromptStudioKeys, ToolEnv from unstract.sdk.helper import SdkHelper from unstract.sdk.tool.base import BaseTool from unstract.sdk.utils.common_utils import log_elapsed @@ -112,7 +112,7 @@ def _post_call( # Extract error information from the response if available error_message = str(e) content_type = response.headers.get("Content-Type", "").lower() - if "application/json" in content_type: + if MimeType.JSON in content_type: response_json = response.json() if "error" in response_json: error_message = response_json["error"] diff --git a/src/unstract/sdk/utils/file_storage_utils.py b/src/unstract/sdk/utils/file_storage_utils.py index ce313a47..35621d09 100644 --- a/src/unstract/sdk/utils/file_storage_utils.py +++ b/src/unstract/sdk/utils/file_storage_utils.py @@ -1,6 +1,6 @@ import logging -from unstract.sdk.file_storage.fs_impl import FileStorage +from unstract.sdk.file_storage.impl import FileStorage logger = logging.getLogger(__name__) diff --git a/src/unstract/sdk/utils/tool_utils.py b/src/unstract/sdk/utils/tool_utils.py index 541c4209..486f04ab 100644 --- a/src/unstract/sdk/utils/tool_utils.py +++ b/src/unstract/sdk/utils/tool_utils.py @@ -165,7 +165,7 @@ def str_to_bool(string: str) -> bool: """ return string.lower() == "true" - # Used the same function from LLM Whisperer + # Used the same function from LLMWhisperer @staticmethod def calculate_page_count( pages_string: str, max_page: int = 0, min_page: int = 1 diff --git a/src/unstract/sdk/x2txt.py b/src/unstract/sdk/x2txt.py index 5cde4378..9b9c9369 100644 --- a/src/unstract/sdk/x2txt.py +++ b/src/unstract/sdk/x2txt.py @@ -36,6 +36,10 @@ def __init__( self._usage_kwargs = usage_kwargs self._initialise() + @property + def x2text_instance(self): + return self._x2text_instance + def _initialise(self): if self._adapter_instance_id: self._x2text_instance = self._get_x2text() @@ -43,9 +47,7 @@ def _initialise(self): def _get_x2text(self) -> X2TextAdapter: try: if not self._adapter_instance_id: - raise X2TextError( - "Adapter instance ID not set. " "Initialisation failed" - ) + raise X2TextError("Adapter instance ID not set. Initialisation failed") x2text_config = ToolAdapter.get_adapter_config( self._tool, self._adapter_instance_id