Skip to content

Commit 32f4ff5

Browse files
authored
DEEP-10: Add check for indra=True, to ensure it's used with read_only=True. (#2826)
* Add check for indra=True, to ensure it's used with read_only=True. * Fixed sample info for indra tensor. * Bump libdeeplake version.
1 parent 3e3990f commit 32f4ff5

File tree

4 files changed

+25
-12
lines changed

4 files changed

+25
-12
lines changed

deeplake/api/dataset.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,16 @@
8181
from deeplake.core.storage.deeplake_memory_object import DeepLakeMemoryObject
8282

8383

84+
def _check_indra_and_read_only_flags(indra: bool, read_only: Optional[bool]):
85+
if indra == False:
86+
return
87+
if read_only == True:
88+
return
89+
raise ValueError(
90+
"'indra = True' is only available for read_only datasets. Please also specify 'read_only = True'."
91+
)
92+
93+
8494
class dataset:
8595
@staticmethod
8696
@spinner
@@ -206,6 +216,7 @@ def init(
206216
Note:
207217
Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
208218
"""
219+
_check_indra_and_read_only_flags(indra, read_only)
209220
access_method, num_workers, scheduler = parse_access_method(access_method)
210221
check_access_method(access_method, overwrite, unlink)
211222

@@ -383,7 +394,6 @@ def empty(
383394
lock_timeout: Optional[int] = 0,
384395
verbose: bool = True,
385396
index_params: Optional[Dict[str, Union[int, str]]] = None,
386-
indra: bool = USE_INDRA,
387397
) -> Dataset:
388398
"""Creates an empty dataset
389399
@@ -408,7 +418,6 @@ def empty(
408418
lock_timeout (int): Number of seconds to wait before throwing a LockException. If None, wait indefinitely
409419
lock_enabled (bool): If true, the dataset manages a write lock. NOTE: Only set to False if you are managing concurrent access externally.
410420
index_params: Optional[Dict[str, Union[int, str]]]: Index parameters used while creating vector store, passed down to dataset.
411-
indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
412421
413422
Returns:
414423
Dataset: Dataset created using the arguments provided.
@@ -448,7 +457,6 @@ def empty(
448457
token=token,
449458
memory_cache_size=memory_cache_size,
450459
local_cache_size=local_cache_size,
451-
indra=indra,
452460
)
453461

454462
feature_report_path(
@@ -615,6 +623,7 @@ def load(
615623
Note:
616624
Any changes made to the dataset in download / local mode will only be made to the local copy and will not be reflected in the original dataset.
617625
"""
626+
_check_indra_and_read_only_flags(indra, read_only)
618627
access_method, num_workers, scheduler = parse_access_method(access_method)
619628
check_access_method(access_method, overwrite=False, unlink=unlink)
620629

@@ -1508,7 +1517,6 @@ def ingest_coco(
15081517
num_workers: int = 0,
15091518
token: Optional[str] = None,
15101519
connect_kwargs: Optional[Dict] = None,
1511-
indra: bool = USE_INDRA,
15121520
**dataset_kwargs,
15131521
) -> Dataset:
15141522
"""Ingest images and annotations in COCO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
@@ -1562,7 +1570,6 @@ def ingest_coco(
15621570
num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default.
15631571
token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake.
15641572
connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect <deeplake.core.dataset.Dataset.connect>`.
1565-
indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
15661573
**dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`.
15671574
15681575
Returns:
@@ -1605,7 +1612,6 @@ def ingest_coco(
16051612
creds=dest_creds,
16061613
verbose=False,
16071614
token=token,
1608-
indra=indra,
16091615
**dataset_kwargs,
16101616
)
16111617
if connect_kwargs is not None:
@@ -1637,7 +1643,6 @@ def ingest_yolo(
16371643
num_workers: int = 0,
16381644
token: Optional[str] = None,
16391645
connect_kwargs: Optional[Dict] = None,
1640-
indra: bool = USE_INDRA,
16411646
**dataset_kwargs,
16421647
) -> Dataset:
16431648
"""Ingest images and annotations (bounding boxes or polygons) in YOLO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.
@@ -1686,7 +1691,6 @@ def ingest_yolo(
16861691
num_workers (int): The number of workers to use for ingestion. Set to ``0`` by default.
16871692
token (Optional[str]): The token to use for accessing the dataset and/or connecting it to Deep Lake.
16881693
connect_kwargs (Optional[Dict]): If specified, the dataset will be connected to Deep Lake, and connect_kwargs will be passed to :meth:`Dataset.connect <deeplake.core.dataset.Dataset.connect>`.
1689-
indra (bool): Flag indicating whether indra api should be used to create the dataset. Defaults to false
16901694
**dataset_kwargs: Any arguments passed here will be forwarded to the dataset creator function. See :func:`deeplake.empty`.
16911695
16921696
Returns:
@@ -1738,7 +1742,6 @@ def ingest_yolo(
17381742
creds=dest_creds,
17391743
verbose=False,
17401744
token=token,
1741-
indra=indra,
17421745
**dataset_kwargs,
17431746
)
17441747
if connect_kwargs is not None:
@@ -1899,7 +1902,6 @@ def ingest_classification(
18991902
creds=dest_creds,
19001903
token=token,
19011904
verbose=False,
1902-
indra=indra,
19031905
**dataset_kwargs,
19041906
)
19051907
if connect_kwargs is not None:

deeplake/core/dataset/indra_tensor_view.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import deeplake.util.shape_interval as shape_interval
22
from deeplake.core import tensor
3-
from typing import List, Union, Optional
3+
from typing import Dict, List, Union, Optional
44
from deeplake.core.index import Index
55
from deeplake.core.tensor import Any
66
import numpy as np
@@ -151,6 +151,16 @@ def index(self):
151151
except:
152152
return Index(slice(0, len(self)))
153153

154+
@property
155+
def sample_info(self):
156+
try:
157+
r = self.indra_tensor.sample_info
158+
if not self.index.values[0].subscriptable():
159+
r = r[0]
160+
return r
161+
except:
162+
return None
163+
154164
@property
155165
def shape_interval(self):
156166
return shape_interval.ShapeInterval(

deeplake/core/tests/test_indra_dataset.py

+1
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def test_query(local_auth_ds_generator):
172172

173173
indra_ds = dataset_to_libdeeplake(deeplake_ds)
174174
deeplake_indra_ds = IndraDatasetView(indra_ds=indra_ds)
175+
assert deeplake_indra_ds.image.sample_info == deeplake_ds.image.sample_info
175176

176177
view = deeplake_indra_ds.query("SELECT * GROUP BY label")
177178
assert len(view) == 10

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def libdeeplake_available():
7070
extras_require["all"] = [req_map[r] for r in all_extras]
7171

7272
if libdeeplake_available():
73-
libdeeplake = "libdeeplake==0.0.118"
73+
libdeeplake = "libdeeplake==0.0.119"
7474
extras_require["enterprise"] = [libdeeplake, "pyjwt"]
7575
extras_require["all"].append(libdeeplake)
7676
install_requires.append(libdeeplake)

0 commit comments

Comments
 (0)