1919from .objid import isValidUuid
2020
2121CHUNK_MIN = 512 * 1024 # Soft lower limit (512k)
22- CHUNK_MAX = 8096 * 1024 # Hard upper limit (2M )
22+ CHUNK_MAX = 8096 * 1024 # Hard upper limit (8M )
2323
2424
2525LAYOUT_CLASSES = (
@@ -87,30 +87,36 @@ def estimateDatasetSize(shape_json, item_size, chunk_min=CHUNK_MIN):
8787
8888def resize_dataset (dset_json , shape ):
8989 """ Update shape dims to the given shape provided new shape is valid for maxdims """
90- shape_json = dset_json ["shape" ]
91- shape_class = shape_json ["class" ]
90+
91+ layout_class = getDatasetLayoutClass (dset_json )
92+ if layout_class != "H5D_CHUNKED" :
93+ raise TypeError ("Only chunked datasets can be resized" )
94+ shape_class = getShapeClass (dset_json )
9295 if shape_class != "H5S_SIMPLE" :
9396 raise TypeError (f"dataset with shape class: { shape_class } cannot be resized" )
94- if len (shape_json ["dims" ]) != len (shape ):
97+ dims = getShapeDims (dset_json )
98+ if len (dims ) != len (shape ):
9599 raise ValueError ("Resize shape parameter doesn't match dataset's rank" )
96- if "maxdims" not in shape_json :
100+ if not isExtensible ( dset_json ) :
97101 raise ValueError ("Dataset is not resizable" )
98- dims = shape_json ["dims" ]
99- maxdims = shape_json ["maxdims" ]
102+ maxdims = getMaxDims (dset_json )
100103
101- if shape_json [ " dims" ] == list (shape ):
104+ if dims == tuple (shape ):
102105 # no change, just return
103- return
104- for i in range (len (dims )):
106+ return None
107+ rank = getRank (dset_json )
108+ for i in range (rank ):
105109 extent = shape [i ]
106110 if extent < 0 :
107111 raise ValueError ("dimensions can't be negative" )
108- if maxdims [i ] == "H5S_UNLIMITED" :
112+ if maxdims [i ] in ( 0 , "H5S_UNLIMITED" ) :
109113 # any positive extent is ok
110114 continue
111115 if extent > maxdims [i ]:
112116 raise ValueError (f"extent for dimension { i } can't be larger than { maxdims [i ]} " )
113117
118+ # update the object json with the new dimensions
119+ shape_json = dset_json ["shape" ]
114120 shape_json ["dims" ] = list (shape )
115121
116122
@@ -185,12 +191,12 @@ def getChunkSize(chunk_dims, type_size: int = 1):
185191def getChunkDims (dset_json ):
186192 """Get chunk layout. Return shape dims for non-chunked layout"""
187193
188- shape_json = dset_json [ "shape" ]
189- if shape_json [ "class" ] == "H5S_NULL" :
194+ shape_class = getShapeClass ( dset_json )
195+ if shape_class == "H5S_NULL" :
190196 return None
191- if shape_json [ "class" ] == "H5S_SCALAR" :
197+ if shape_class == "H5S_SCALAR" :
192198 return (1 , )
193- shape_dims = shape_json [ "dims" ]
199+ shape_dims = getShapeDims ( dset_json )
194200 layout_class = getDatasetLayoutClass (dset_json )
195201 if not layout_class :
196202 return tuple (shape_dims )
@@ -207,7 +213,7 @@ def getChunkDims(dset_json):
207213 return chunk_dims
208214
209215
210- def validateChunkLayout (shape_json , type_json , layout ):
216+ def validateLayout (shape_json , type_json , layout ):
211217 """
212218 Use chunk layout given in the creationPropertiesList (if defined and
213219 layout is valid).
@@ -218,6 +224,7 @@ def validateChunkLayout(shape_json, type_json, layout):
218224 space_dims = None
219225 chunk_dims = None
220226 max_dims = None
227+
221228 item_size = getItemSize (type_json )
222229
223230 if "dims" in shape_json :
@@ -250,7 +257,7 @@ def validateChunkLayout(shape_json, type_json, layout):
250257 if chunk_extent > dim_extent :
251258 msg = "Invalid layout value"
252259 raise ValueError (reason = msg )
253- elif max_dims [i ] != 0 :
260+ elif max_dims [i ] not in ( 0 , "H5S_UNLIMITED" ) :
254261 if chunk_extent > max_dims [i ]:
255262 msg = "Invalid layout value for extensible dimension"
256263 raise ValueError (msg )
@@ -404,7 +411,7 @@ def validateDatasetCreationProps(creation_props, type_json=None, shape=None):
404411 layout_class = None
405412 if "layout" in creation_props :
406413 layout_json = creation_props ["layout" ]
407- validateChunkLayout (shape , type_json , layout_json )
414+ validateLayout (shape , type_json , layout_json )
408415 layout_class = layout_json ["class" ]
409416
410417 if "filters" in creation_props :
@@ -436,7 +443,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN):
436443 if "maxdims" in shape_json :
437444 maxdims = shape_json ["maxdims" ]
438445 for n in range (rank ):
439- if maxdims [n ] == 0 or maxdims [n ] > dims [n ]:
446+ if maxdims [n ] in ( 0 , "H5S_UNLIMITED" ) or maxdims [n ] > dims [n ]:
440447 extendable_dims += 1
441448
442449 dset_size = getDataSize (shape_json , typesize )
@@ -454,7 +461,7 @@ def expandChunk(layout, typesize, shape_json, chunk_min=CHUNK_MIN):
454461 dim = rank - n - 1 # start from last dim
455462
456463 if extendable_dims > 0 :
457- if maxdims [dim ] == 0 :
464+ if maxdims [dim ] in ( 0 , "H5S_UNLIMITED" ) :
458465 # infinitely extendable dimensions
459466 layout [dim ] *= 2
460467 chunk_size = getChunkSize (layout , typesize )
@@ -553,7 +560,7 @@ def guessChunk(shape, typesize, chunk_min=None, chunk_max=None):
553560 typesize = 128 # just take a guess at the item size
554561
555562 # For unlimited dimensions we have to guess. use 1024
556- shape = tuple ((x if x != 0 else 1024 ) for i , x in enumerate (shape ))
563+ shape = tuple ((x if x not in ( 0 , "H5S_UNLIMITED" ) else 1024 ) for i , x in enumerate (shape ))
557564
558565 chunk_size = getChunkSize (shape , typesize )
559566 if chunk_min and chunk_size < chunk_min :
@@ -568,7 +575,7 @@ def guessChunk(shape, typesize, chunk_min=None, chunk_max=None):
568575
569576def generateLayout (
570577 shape_json ,
571- item_size = 0 ,
578+ type_json ,
572579 chunks = None ,
573580 chunk_min = CHUNK_MIN ,
574581 chunk_max = CHUNK_MAX ,
@@ -577,6 +584,9 @@ def generateLayout(
577584
578585 """ Create a dataset layout based on type and shape properties """
579586
587+ item_size = getItemSize (type_json )
588+ if item_size == "H5T_VARIABLE" :
589+ item_size = 128 # take a guess
580590 if item_size < 0 :
581591 raise ValueError ("item_size is invalid" )
582592
@@ -612,6 +622,13 @@ def generateLayout(
612622 chunk_dims = chunks
613623 if len (chunk_dims ) != rank :
614624 raise ValueError ("given chunk dims do not agree with dataset rank" )
625+ for dim in range (rank ):
626+ if max_dims [dim ] in (0 , "H5S_UNLIMITED" ):
627+ pass # unlimited, so any chunk extent is ok
628+ elif chunk_dims [dim ] > max_dims [dim ]:
629+ msg = "Chunk shape must not be greater than data shape in any dimension. "
630+ msg += f"{ chunk_dims } is not compatible with { max_dims } "
631+ raise ValueError ()
615632 else :
616633 pass # otherwise we'll guess a chunk shape below
617634 if not chunk_dims :
@@ -646,12 +663,14 @@ def generateLayout(
646663 layout ["partition_count" ] = partition_count
647664 else :
648665 pass # partition not needed
666+
667+ validateLayout (shape_json , type_json , layout )
649668 return layout
650669
651670
652671def generate_dcpl (
653672 shape_json ,
654- dtype ,
673+ type_json ,
655674 chunks = None ,
656675 filters = [],
657676 chunk_min = CHUNK_MIN ,
@@ -678,12 +697,12 @@ def generate_dcpl(
678697
679698 # End argument validation
680699
681- kwargs = {"item_size" : dtype . itemsize , " has_filter" : filters }
700+ kwargs = {"has_filter" : filters }
682701 kwargs ["chunks" ] = chunks
683702 kwargs ["chunk_min" ] = chunk_min
684703 kwargs ["chunk_max" ] = chunk_max
685704 kwargs ["max_chunks_per_folder" ] = max_chunks_per_folder
686- plist ["layout" ] = generateLayout (shape_json , ** kwargs )
705+ plist ["layout" ] = generateLayout (shape_json , type_json , ** kwargs )
687706
688707 if len (filters ) > 0 :
689708 plist ["filters" ] = filters
@@ -697,3 +716,16 @@ def generate_dcpl(
697716 plist ["initializer" ] = initializer
698717
699718 return plist
719+
720+
721+ def getFillValue (obj_json ):
722+ """ Return the fill value or None if not set """
723+
724+ if "creationProperties" in obj_json :
725+ cpl = obj_json ["creationProperties" ]
726+ else :
727+ cpl = obj_json # assume we've been based a cpl
728+ if "filLValue" in cpl :
729+ return cpl ["fillValue" ]
730+ else :
731+ return None
0 commit comments