@@ -573,6 +573,11 @@ cdef extern from "miniexpr.h":
573573 int me_compile(const char * expression, const me_variable * variables,
574574 int var_count, me_dtype dtype, int * error, me_expr ** out)
575575
576+ int me_compile_nd(const char * expression, const me_variable * variables,
577+ int var_count, me_dtype dtype, int ndims,
578+ const int64_t * shape, const int32_t * chunkshape,
579+ const int32_t * blockshape, int * error, me_expr ** out)
580+
576581 cdef enum me_compile_status:
577582 ME_COMPILE_SUCCESS
578583 ME_COMPILE_ERR_OOM
@@ -583,9 +588,26 @@ cdef extern from "miniexpr.h":
583588 ME_COMPILE_ERR_VAR_MIXED
584589 ME_COMPILE_ERR_VAR_UNSPECIFIED
585590 ME_COMPILE_ERR_INVALID_ARG_TYPE
591+ ME_COMPILE_ERR_MIXED_TYPE_NESTED
592+
593+ cdef enum me_simd_ulp_mode:
594+ ME_SIMD_ULP_DEFAULT
595+ ME_SIMD_ULP_1
596+ ME_SIMD_ULP_3_5
597+
598+ ctypedef struct me_eval_params:
599+ c_bool disable_simd
600+ me_simd_ulp_mode simd_ulp_mode
601+
602+ int me_eval(const me_expr * expr, const void ** vars_block,
603+ int n_vars, void * output_block, int chunk_nitems,
604+ const me_eval_params * params) nogil
586605
587- int me_eval(const me_expr * expr, const void ** vars_chunk,
588- int n_vars, void * output_chunk, int chunk_nitems) nogil
606+ int me_eval_nd(const me_expr * expr, const void ** vars_block,
607+ int n_vars, void * output_block, int block_nitems,
608+ int64_t nchunk, int64_t nblock, const me_eval_params * params) nogil
609+
610+ int me_nd_valid_nitems(const me_expr * expr, int64_t nchunk, int64_t nblock, int64_t * valid_nitems) nogil
589611
590612 void me_print(const me_expr * n) nogil
591613 void me_free(me_expr * n) nogil
@@ -1860,10 +1882,8 @@ cdef int general_filler(blosc2_prefilter_params *params):
18601882 return 0
18611883
18621884
1863- # Auxiliary function for just miniexpr as a prefilter
1864- # Only meant for (input and output) arrays that:
1865- # 1) Are blosc2.NDArray objects
1866- # 2) Do not have padding
1885+ # Auxiliary function for miniexpr as a prefilter
1886+ # Only meant for (input and output) arrays that are blosc2.NDArray objects.
18671887cdef int aux_miniexpr(me_udata * udata, int64_t nchunk, int32_t nblock,
18681888 c_bool is_postfilter, uint8_t * params_output, int32_t typesize) nogil:
18691889 # Declare all C variables at the beginning
@@ -1880,9 +1900,29 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
18801900 cdef void * src
18811901 cdef int32_t chunk_nbytes, chunk_cbytes, block_nbytes
18821902 cdef int start, blocknitems, expected_blocknitems
1903+ cdef int64_t valid_nitems
18831904 cdef int32_t input_typesize
18841905 cdef blosc2_context* dctx
18851906 expected_blocknitems = - 1
1907+ valid_nitems = 0
1908+
1909+ cdef me_expr* miniexpr_handle = udata.miniexpr_handle
1910+ cdef void * aux_reduc_ptr
1911+
1912+ if miniexpr_handle == NULL :
1913+ raise ValueError (" miniexpr: handle not assigned" )
1914+
1915+ # Query valid (unpadded) items for this block
1916+ rc = me_nd_valid_nitems(miniexpr_handle, nchunk, nblock, & valid_nitems)
1917+ if rc != 0 :
1918+ raise RuntimeError (f" miniexpr: invalid block; error code: {rc}" )
1919+ if valid_nitems <= 0 :
1920+ # Nothing to compute for this block.
1921+ # For reductions, keep aux_reduc neutral values untouched.
1922+ if udata.aux_reduc_ptr == NULL :
1923+ memset(params_output, 0 , udata.array.blocknitems * typesize)
1924+ free(input_buffers)
1925+ return 0
18861926 for i in range (udata.ninputs):
18871927 ndarr = udata.inputs[i]
18881928 input_buffers[i] = malloc(ndarr.sc.blocksize)
@@ -1912,48 +1952,35 @@ cdef int aux_miniexpr(me_udata *udata, int64_t nchunk, int32_t nblock,
19121952 # In the future, perhaps one can create a specific (serial) context just for
19131953 # blosc2_getitem_ctx, but this is probably never going to be necessary.
19141954 dctx = blosc2_create_dctx(BLOSC2_DPARAMS_DEFAULTS)
1915- if nchunk * ndarr.chunknitems + start + blocknitems > ndarr.nitems:
1916- blocknitems = ndarr.nitems - (nchunk * ndarr.chunknitems + start)
1917- if blocknitems <= 0 :
1918- # Should never happen, but anyway
1919- continue
1955+ if valid_nitems > blocknitems:
1956+ raise ValueError (" miniexpr: valid items exceed padded block size" )
19201957 rc = blosc2_getitem_ctx(dctx, src, chunk_cbytes, start, blocknitems,
19211958 input_buffers[i], block_nbytes)
19221959 blosc2_free_ctx(dctx)
19231960 if rc < 0 :
19241961 raise ValueError (" miniexpr: error decompressing the chunk" )
1925-
1926- cdef me_expr* miniexpr_handle = udata.miniexpr_handle
1927- cdef void * aux_reduc_ptr
19281962 # For reduction operations, we need to track which block we're processing
19291963 # The linear_block_index should be based on the INPUT array structure, not the output array
19301964 # Get the first input array's chunk and block structure
19311965 cdef b2nd_array_t* first_input = udata.inputs[0 ]
1932- cdef int nblocks_per_chunk = (first_input.chunknitems + first_input.blocknitems - 1 ) // first_input.blocknitems
1966+ cdef int nblocks_per_chunk = 1
1967+ for i in range (first_input.ndim):
1968+ nblocks_per_chunk *= < int > udata.blocks_in_chunk[i]
19331969 # Calculate the global linear block index: nchunk * blocks_per_chunk + nblock
19341970 # This works because blocks never span chunks (chunks are padded to block boundaries)
19351971 cdef int64_t linear_block_index = nchunk * nblocks_per_chunk + nblock
19361972 cdef uintptr_t offset_bytes = typesize * linear_block_index
19371973
1938- if miniexpr_handle == NULL :
1939- raise ValueError (" miniexpr: handle not assigned" )
1940-
1941- # Skip evaluation if blocknitems is invalid (can happen for padding blocks beyond data)
1942- if blocknitems <= 0 :
1943- # Free resources
1944- for i in range (udata.ninputs):
1945- free(input_buffers[i])
1946- free(input_buffers)
1947- return 0
1948-
19491974 # Call thread-safe miniexpr C API
19501975 if udata.aux_reduc_ptr == NULL :
1951- rc = me_eval (miniexpr_handle, < const void ** > input_buffers, udata.ninputs,
1952- < void * > params_output, blocknitems)
1976+ rc = me_eval_nd (miniexpr_handle, < const void ** > input_buffers, udata.ninputs,
1977+ < void * > params_output, blocknitems, nchunk, nblock, NULL )
19531978 else :
1954- # Reduction operation
1979+ # Reduction operation: evaluate only valid items into a single output element.
1980+ # NOTE: miniexpr handles scalar outputs in me_eval_nd without touching tail bytes.
19551981 aux_reduc_ptr = < void * > (< uintptr_t> udata.aux_reduc_ptr + offset_bytes)
1956- rc = me_eval(miniexpr_handle, < const void ** > input_buffers, udata.ninputs, aux_reduc_ptr, blocknitems)
1982+ rc = me_eval_nd(miniexpr_handle, < const void ** > input_buffers, udata.ninputs,
1983+ aux_reduc_ptr, blocknitems, nchunk, nblock, NULL )
19571984 if rc != 0 :
19581985 raise RuntimeError (f" miniexpr: issues during evaluation; error code: {rc}" )
19591986
@@ -2904,7 +2931,12 @@ cdef class NDArray:
29042931 expression = expression.encode(" utf-8" ) if isinstance (expression, str ) else expression
29052932 cdef me_dtype = me_dtype_from_numpy(self .dtype.num)
29062933 cdef me_expr * out_expr
2907- cdef int rc = me_compile(expression, variables, n, me_dtype, & error, & out_expr)
2934+ cdef int ndims = self .array.ndim
2935+ cdef int64_t* shape = & self .array.shape[0 ]
2936+ cdef int32_t* chunkshape = & self .array.chunkshape[0 ]
2937+ cdef int32_t* blockshape = & self .array.blockshape[0 ]
2938+ cdef int rc = me_compile_nd(expression, variables, n, me_dtype, ndims,
2939+ shape, chunkshape, blockshape, & error, & out_expr)
29082940 if rc == ME_COMPILE_ERR_INVALID_ARG_TYPE:
29092941 raise TypeError (f" miniexpr does not support operand or output dtype: {expression}" )
29102942 if rc != ME_COMPILE_SUCCESS:
0 commit comments