Skip to content

Commit

Permalink
Handle file not found using cache invalidation and retry again (#130)
Browse files Browse the repository at this point in the history
* contextSizeChanges

* contextSizeChanges

* Version roll and test folder check in

* Fix enum values

* Fix test cases, address review comments

* Address review comments

* Update pyproject.toml

Co-authored-by: Chandrasekharan M <[email protected]>
Signed-off-by: Gayathri <[email protected]>

* Address mypy issues

* Change class design and implementation

* Remove unused definitions

* Add cp() and function refactoring

* Check-in sample env

* Default value of dict changed to None

* Add size()

* Refctor for using FileStorage

* Refactor to use FileStorage

* Fix issues

* Add mim_type, download functions

* change comments

* Refactor het_hash_from_file

* Add return types

* Remove permanent file storage from sdk

* Fix SDK functional issues

* Support minio

* Test cases for Minio

* Bring file variants back to sdk

* Fix copy_on_write

* Add new test cases for uploadd/download

* Add new functions to support platform-service

* Change modififcation_time return type to datetime

* Refactor env pick-up logic

* Sample env

* contextSizeChanges

* Remove commented code and some improvisations

* contextSizeChanges

* Add right JSON formatted string

* Update src/unstract/sdk/file_storage/fs_permanent.py

Co-authored-by: Chandrasekharan M <[email protected]>
Signed-off-by: Gayathri <[email protected]>

* Address review comments

* Address review comments

* Update src/unstract/sdk/file_storage/fs_shared_temporary.py

Co-authored-by: ali <[email protected]>
Signed-off-by: Gayathri <[email protected]>

* Refactor for change in enum value

* Add return type

* Support glob

* Add function to interface

* Update env format

* Support legacy storage and get_hash_from_file

* Change sample path

* Update test env

* Add yaml_dump function

* add more functions

* Type the args

* Add file not found exception

* Optimise checks

* Setup python version

* Handle file not found using cache invalidation and retry

* Revert a change

* Renaming

* Add env helper for

* Add sample env

* Update src/unstract/sdk/file_storage/env_helper.py

Co-authored-by: Chandrasekharan M <[email protected]>
Signed-off-by: Gayathri <[email protected]>

* Review comments - Address

---------

Signed-off-by: Gayathri <[email protected]>
Co-authored-by: Chandrasekharan M <[email protected]>
Co-authored-by: ali <[email protected]>
  • Loading branch information
3 people authored Dec 3, 2024
1 parent 585dc52 commit 265d5b9
Show file tree
Hide file tree
Showing 11 changed files with 175 additions and 114 deletions.
8 changes: 4 additions & 4 deletions src/unstract/sdk/file_storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"SharedTemporaryFileStorage",
]

from unstract.sdk.file_storage.fs_impl import FileStorage
from unstract.sdk.file_storage.fs_permanent import PermanentFileStorage
from unstract.sdk.file_storage.fs_provider import FileStorageProvider
from unstract.sdk.file_storage.fs_shared_temporary import SharedTemporaryFileStorage
from unstract.sdk.file_storage.helper import FileStorageHelper
from unstract.sdk.file_storage.impl import FileStorage
from unstract.sdk.file_storage.permanent import PermanentFileStorage
from unstract.sdk.file_storage.provider import FileStorageProvider
from unstract.sdk.file_storage.shared_temporary import SharedTemporaryFileStorage
13 changes: 13 additions & 0 deletions src/unstract/sdk/file_storage/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from enum import Enum


class FileOperationParams:
READ_ENTIRE_LENGTH = -1
DEFAULT_ENCODING = "utf-8"
Expand All @@ -7,3 +10,13 @@ class FileSeekPosition:
START = 0
CURRENT = 1
END = 2


class StorageType(Enum):
PERMANENT = "permanent"
TEMPORARY = "temporary"


class CredentialKeyword:
PROVIDER = "provider"
CREDENTIALS = "credentials"
37 changes: 37 additions & 0 deletions src/unstract/sdk/file_storage/env_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import json
import logging
import os

from unstract.sdk.exceptions import FileStorageError
from unstract.sdk.file_storage.constants import CredentialKeyword, StorageType
from unstract.sdk.file_storage.impl import FileStorage
from unstract.sdk.file_storage.permanent import PermanentFileStorage
from unstract.sdk.file_storage.provider import FileStorageProvider
from unstract.sdk.file_storage.shared_temporary import SharedTemporaryFileStorage

logger = logging.getLogger(__name__)


class EnvHelper:
@staticmethod
def get_storage(storage_type: StorageType, env_name: str) -> FileStorage:
try:
file_storage_creds = json.loads(os.environ.get(env_name))
provider = FileStorageProvider(
file_storage_creds[CredentialKeyword.PROVIDER]
)
credentials = file_storage_creds.get(CredentialKeyword.CREDENTIALS, {})
if storage_type == StorageType.PERMANENT.value:
file_storage = PermanentFileStorage(provider=provider, **credentials)
elif storage_type == StorageType.TEMPORARY.value:
file_storage = SharedTemporaryFileStorage(
provider=provider, **credentials
)
else:
raise NotImplementedError()
return file_storage
except KeyError as e:
logger.error(f"Required credentials is missing in the env: {str(e)}")
raise e
except FileStorageError as e:
raise e
25 changes: 23 additions & 2 deletions src/unstract/sdk/file_storage/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import fsspec
from fsspec import AbstractFileSystem

from unstract.sdk.exceptions import FileStorageError
from unstract.sdk.file_storage.fs_provider import FileStorageProvider
from unstract.sdk.exceptions import FileOperationError, FileStorageError
from unstract.sdk.file_storage.provider import FileStorageProvider

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,3 +67,24 @@ def local_file_system_init() -> AbstractFileSystem:
f" file system {e}"
)
raise FileStorageError(str(e)) from e


def skip_local_cache(func):
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except FileNotFoundError:
try:
# FileNotFound could have been caused by stale cache.
# Hence invalidate cache and retry again
args[0].fs.invalidate_cache()
return func(*args, **kwargs)
except Exception as e:
if isinstance(e, FileNotFoundError):
raise e
else:
raise FileOperationError(str(e)) from e
except Exception as e:
raise FileOperationError(str(e)) from e

return wrapper
Loading

0 comments on commit 265d5b9

Please sign in to comment.