diff --git a/.travis.yml b/.travis.yml index 9424ed44..1e13cb2c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,8 @@ before_script: - lcov --directory . --zerocounters script: - - make -j 4 - - make test + - make CXX=mpic++ INCLUDE_PATHS=-I/usr/include/mpich MPILIB=-lmpich -j 4 + - make test CXX=mpic++ INCLUDE_PATHS=-I/usr/include/mpich MPILIB=-lmpich - lcov --directory . --capture --output-file coverage.info - lcov --list coverage.info # debug before upload diff --git a/Makefile b/Makefile index bbc8d4c8..924277a7 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,11 @@ # **************** # OS := $(shell uname) +ifneq ($(shell gcc -v 2>&1 | grep -c "clang"),0) + COMPILER := clang +else + COMPILER := gcc +endif # --- Configuration flags --- # CPPFLAGS = -std=gnu++11 -fPIC -fvisibility=hidden \ @@ -13,22 +18,11 @@ ifdef TRAVIS CPPFLAGS += --coverage endif -# --- Use of mmap function for reading --- # -USE_MMAP = -ifeq ($(USE_MMAP),) - USE_MMAP = 1 -endif -ifeq ($(USE_MMAP),1) - CPPFLAGS += -D_TILEDB_USE_MMAP -endif - -# --- Parallel sort --- # -GNU_PARALLEL = -ifeq ($(GNU_PARALLEL),) - GNU_PARALLEL = 1 -endif -ifeq ($(GNU_PARALLEL),1) - CPPFLAGS += -DGNU_PARALLEL +# --- Support for OpenMP --- # +OPENMP_FLAG = +ifeq ($(COMPILER), gcc) + CPPFLAGS += -DOPENMP + OPENMP_FLAG = -fopenmp endif # --- Debug/Release mode handler --- # @@ -42,7 +36,7 @@ ifeq ($(BUILD),release) endif ifeq ($(BUILD),debug) - CPPFLAGS += -DDEBUG -gdwarf-3 -g3 -Wall + CPPFLAGS += -DDEBUG -gdwarf-3 -g3 -Wall endif # --- Verbose mode handler --- # @@ -107,7 +101,7 @@ DOXYGEN_DIR = doxygen DOXYGEN_MAINPAGE = $(DOXYGEN_DIR)/mainpage.dox # --- Paths --- # -INCLUDE_PATHS = +INCLUDE_PATHS = CORE_INCLUDE_PATHS = $(addprefix -I, $(CORE_INCLUDE_SUBDIRS)) EXAMPLES_INCLUDE_PATHS = -I$(EXAMPLES_INCLUDE_DIR) TEST_INCLUDE_PATHS = $(addprefix -I, $(CORE_INCLUDE_SUBDIRS)) @@ -121,6 +115,7 @@ endif ZLIB = -lz OPENSSLLIB = -lcrypto GTESTLIB = -lgtest -lgtest_main +MPILIB = # --- For the TileDB dynamic library --- # ifeq ($(OS), Darwin) @@ -179,7 +174,7 @@ $(CORE_OBJ_DIR)/%.o: $(CORE_SRC_DIR)/%.cc @mkdir -p $(dir $@) @echo "Compiling $<" @$(CXX) $(CPPFLAGS) $(INCLUDE_PATHS) $(CORE_INCLUDE_PATHS) -c $< -o $@ - @$(CXX) -MM $(CORE_INCLUDE_PATHS) $< > $(@:.o=.d) + @$(CXX) -MM $(CORE_INCLUDE_PATHS) $(INCLUDE_PATHS) $< > $(@:.o=.d) @mv -f $(@:.o=.d) $(@:.o=.d.tmp) @sed 's|.*:|$@:|' < $(@:.o=.d.tmp) > $(@:.o=.d) @rm -f $(@:.o=.d.tmp) @@ -214,8 +209,8 @@ endif $(CORE_LIB_DIR)/libtiledb.$(SHLIB_EXT): $(CORE_OBJ) @mkdir -p $(CORE_LIB_DIR) @echo "Creating dynamic library libtiledb.$(SHLIB_EXT)" - @$(CXX) $(SHLIB_FLAGS) $(SONAME) -o $@ $^ $(LIBRARY_PATHS) $(ZLIB) \ - $(OPENSSLLIB) -fopenmp + @$(CXX) $(SHLIB_FLAGS) $(SONAME) -o $@ $^ $(LIBRARY_PATHS) $(MPILIB) \ + $(ZLIB) $(OPENSSLLIB) $(OPENMP_FLAG) $(CORE_LIB_DIR)/libtiledb.a: $(CORE_OBJ) @mkdir -p $(CORE_LIB_DIR) @@ -239,11 +234,11 @@ clean_libtiledb: $(EXAMPLES_OBJ_DIR)/%.o: $(EXAMPLES_SRC_DIR)/%.cc @mkdir -p $(EXAMPLES_OBJ_DIR) @echo "Compiling $<" - @$(CXX) $(CPPFLAGS) -fopenmp $(INCLUDE_PATHS) \ + @$(CXX) $(CPPFLAGS) $(OPENMP_FLAG) $(INCLUDE_PATHS) \ $(EXAMPLES_INCLUDE_PATHS) \ $(CORE_INCLUDE_PATHS) -c $< -o $@ @$(CXX) -MM $(EXAMPLES_INCLUDE_PATHS) \ - $(CORE_INCLUDE_PATHS) $< > $(@:.o=.d) + $(CORE_INCLUDE_PATHS) $(INCLUDE_PATHS) $< > $(@:.o=.d) @mv -f $(@:.o=.d) $(@:.o=.d.tmp) @sed 's|.*:|$@:|' < $(@:.o=.d.tmp) > $(@:.o=.d) @rm -f $(@:.o=.d.tmp) @@ -253,8 +248,8 @@ $(EXAMPLES_OBJ_DIR)/%.o: $(EXAMPLES_SRC_DIR)/%.cc $(EXAMPLES_BIN_DIR)/%: $(EXAMPLES_OBJ_DIR)/%.o $(CORE_LIB_DIR)/libtiledb.a @mkdir -p $(EXAMPLES_BIN_DIR) @echo "Creating $@" - @$(CXX) -std=gnu++11 -o $@ $^ $(LIBRARY_PATHS) $(ZLIB) $(OPENSSLLIB) \ - -fopenmp + @$(CXX) -std=gnu++11 -o $@ $^ $(LIBRARY_PATHS) $(MPILIB) $(ZLIB) \ + $(OPENSSLLIB) $(OPENMP_FLAG) # --- Cleaning --- # @@ -274,9 +269,10 @@ clean_examples: $(TEST_OBJ_DIR)/%.o: $(TEST_SRC_DIR)/%.cc @mkdir -p $(dir $@) @echo "Compiling $<" - @$(CXX) $(CPPFLAGS) -fopenmp $(TEST_INCLUDE_PATHS) -c $< -o $@ + @$(CXX) $(CPPFLAGS) $(OPENMP_FLAG) $(TEST_INCLUDE_PATHS) \ + $(INCLUDE_PATHS) -c $< -o $@ @$(CXX) -MM $(TEST_INCLUDE_PATHS) \ - $(CORE_INCLUDE_PATHS) $< > $(@:.o=.d) + $(CORE_INCLUDE_PATHS) $(INCLUDE_PATHS) $< > $(@:.o=.d) @mv -f $(@:.o=.d) $(@:.o=.d.tmp) @sed 's|.*:|$@:|' < $(@:.o=.d.tmp) > $(@:.o=.d) @rm -f $(@:.o=.d.tmp) @@ -286,8 +282,8 @@ $(TEST_OBJ_DIR)/%.o: $(TEST_SRC_DIR)/%.cc $(TEST_BIN_DIR)/tiledb_test: $(TEST_OBJ) $(CORE_LIB_DIR)/libtiledb.a @mkdir -p $(TEST_BIN_DIR) @echo "Creating test_cmd" - @$(CXX) -std=gnu++11 -o $@ $^ $(LIBRARY_PATHS) $(ZLIB) $(OPENSSLLIB) \ - $(GTESTLIB) -fopenmp + @$(CXX) -std=gnu++11 -o $@ $^ $(LIBRARY_PATHS) $(MPILIB) $(ZLIB) \ + $(OPENSSLLIB) $(GTESTLIB) $(OPENMP_FLAG) # --- Cleaning --- # diff --git a/core/include/array/aio_request.h b/core/include/array/aio_request.h new file mode 100644 index 00000000..bfb54d47 --- /dev/null +++ b/core/include/array/aio_request.h @@ -0,0 +1,96 @@ +/** + * @file aio_request.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file declares the AIO_Request struct. + */ + +#ifndef __AIO_REQUEST_H__ +#define __AIO_REQUEST_H__ + +#include + +/** Describes an AIO (read or write) request. */ +struct AIO_Request { + /** + * An array of buffers, one for each attribute. These must be + * provided in the same order as the attributes specified in + * array initialization or when resetting the attributes. The case of + * variable-sized attributes is special. Instead of providing a single + * buffer for such an attribute, **two** must be provided: the second + * will hold the variable-sized cell values, whereas the first holds the + * start offsets of each cell in the second buffer. + */ + void** buffers_; + /** + * The sizes (in bytes) allocated by the user for the input + * buffers (there is a one-to-one correspondence). The function will attempt + * to write as many results as can fit in the buffers, and potentially + * alter the buffer size to indicate the size of the *useful* data written + * in the buffer. + */ + size_t* buffer_sizes_; + /** Function to be called upon completion of the request. */ + void *(*completion_handle_) (void*); + /** Data to be passed to the completion handle. */ + void* completion_data_; + /** A unique request id. */ + size_t id_; + /** + * Applicable only to read requests. + * Indicates whether a buffer has overflowed during a read request. + * If it is NULL, it will be ignored. Otherwise, it must be an array + * with as many elements as the number of buffers above. + */ + bool* overflow_; + /** + * The status of the AIO request. It can be one of the following: + * - TILEDB_AIO_COMPLETED + * The request is completed. + * - TILEDB_AIO_INPROGRESS + * The request is still in progress. + * - TILEDB_AIO_OVERFLOW + * At least one of the input buffers overflowed (applicable only to AIO + * read requests) + * - TILEDB_AIO_ERR + * The request caused an error (and thus was canceled). + */ + int* status_; + /** + * The subarray in which the array read/write will be + * constrained on. It should be a sequence of [low, high] pairs (one + * pair per dimension), whose type should be the same as that of the + * coordinates. If it is NULL, then the subarray is set to the entire + * array domain. For the case of writes, this is meaningful only for + * dense arrays, and specifically dense writes. + */ + const void* subarray_; +}; + +#endif + diff --git a/core/include/array/array.h b/core/include/array/array.h index 069bb431..bb8d802f 100644 --- a/core/include/array/array.h +++ b/core/include/array/array.h @@ -33,11 +33,15 @@ #ifndef __ARRAY_H__ #define __ARRAY_H__ +#include "aio_request.h" #include "array_read_state.h" #include "array_schema.h" #include "book_keeping.h" +#include "config.h" #include "constants.h" #include "fragment.h" +#include +#include @@ -78,12 +82,43 @@ class Array { /* ACCESSORS */ /* ********************************* */ + /** + * Enters an indefinite loop that handles all the AIO requests. This is + * executed in the background by the AIO thread. + * + * @return void. + */ + void aio_handle_requests(); + + /** + * Submits an asynchronous (AIO) read request and immediately returns control + * to the caller. The request is queued up and executed in the background by + * another thread. + * + * @param aio_request The AIO read request. + * @return TILEDB_AR_OK for success and TILEDB_AR_ERR for error. + */ + int aio_read(AIO_Request* aio_request); + + /** + * Submits an asynchronous (AIO) write request and immediately returns control + * to the caller. The request is queued up and executed in the background by + * another thread. + * + * @param aio_request The AIO write request. + * @return TILEDB_AR_OK for success and TILEDB_AR_ERR for error. + */ + int aio_write(AIO_Request* aio_request); + /** Returns the array schema. */ const ArraySchema* array_schema() const; /** Returns the ids of the attributes the array focuses on. */ const std::vector& attribute_ids() const; + /** Returns the configuration parameters. */ + const Config* config() const; + /** Returns the number of fragments in this array. */ int fragment_num() const; @@ -93,6 +128,15 @@ class Array { /** Returns the array mode. */ int mode() const; + /** + * Checks if *at least one* attribute buffer has overflown during a read + * operation. + * + * @return *true* if at least one attribute buffer has overflown and *false* + * otherwise. + */ + bool overflow() const; + /** * Checks if an attribute buffer has overflown during a read operation. * @@ -191,6 +235,7 @@ class Array { * the coordinates in the case of sparse arrays). * @param attribute_num The number of the input attributes. If *attributes* is * NULL, then this should be set to 0. + * @param config Configuration parameters. * @return TILEDB_AR_OK on success, and TILEDB_AR_ERR on error. */ int init( @@ -200,7 +245,8 @@ class Array { int mode, const char** attributes, int attribute_num, - const void* range); + const void* range, + const Config* config); /** * Resets the attributes used upon initialization of the array. @@ -272,6 +318,20 @@ class Array { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The AIO mutex condition. */ + pthread_cond_t aio_cond_; + /** Stores the id of the last handled AIO request. */ + size_t aio_last_handled_request_; + /** The AIO mutex. */ + pthread_mutex_t aio_mtx_; + /** The queue that stores the pending AIO requests. */ + std::queue aio_queue_; + /** The thread tha handles all the AIO reads and writes in the background. */ + pthread_t aio_thread_; + /** Indicates whether the AIO thread was canceled or not. */ + bool aio_thread_canceled_; + /** Indicates whether the AIO thread was created or not. */ + bool aio_thread_created_; /** The array schema. */ const ArraySchema* array_schema_; /** The read state of the array. */ @@ -282,6 +342,8 @@ class Array { * reading. */ std::vector attribute_ids_; + /** Configuration parameters. */ + const Config* config_; /** The array fragments. */ std::vector fragments_; /** @@ -303,10 +365,49 @@ class Array { /* ********************************* */ /* PRIVATE METHODS */ /* ********************************* */ + + /** + * Handles an AIO request. + * + * @param aio_request The AIO request. The function will resolve whether it is + * a read or write request based on the array mode. + * @return void. + * + */ + void aio_handle_next_request(AIO_Request* aio_request); + + /** + * Function called by the AIO thread. + * + * @param context This is practically the Array object for which the function + * is called (typically *this* is passed to ths argument by the caller). + */ + static void *aio_handler(void* context); + + /** + * Pusghes an AIO request into the AIO queue. + * + * @return TILEDB_AR_OK for success and TILEDB_AR_ERR for error. + */ + int aio_push_request(AIO_Request* aio_request); + + /** + * Creates the AIO thread. + * + * @return TILEDB_AR_OK for success and TILEDB_AR_ERR for error. + */ + int aio_thread_create(); + + /** + * Destroys the AIO thread. + * + * @return TILEDB_AR_OK for success and TILEDB_AR_ERR for error. + */ + int aio_thread_destroy(); /** * Returns a new fragment name, which is in the form:
- * .___ + * .___ * * Note that this is a temporary name, initiated by a new write process. * After the new fragmemt is finalized, the array will change its name diff --git a/core/include/array/array_read_state.h b/core/include/array/array_read_state.h index cb6e729c..8241f945 100644 --- a/core/include/array/array_read_state.h +++ b/core/include/array/array_read_state.h @@ -69,12 +69,18 @@ class ArrayReadState { /* TYPE DEFINITIONS */ /* ********************************* */ + /** + * Class of fragment cell range objects used in the priority queue algorithm. + */ + template + class PQFragmentCellRange; + /** * Wrapper of comparison function in the priority queue of the fragment cell - * position ranges. + * ranges. */ template - class SmallerFragmentCellRange; + class SmallerPQFragmentCellRange; /** A cell position pair [first, second]. */ typedef std::pair CellPosRange; @@ -89,7 +95,7 @@ class ArrayReadState { typedef std::vector FragmentCellPosRanges; /** A vector of vectors of fragment cell position ranges. */ - typedef std::vector FragmentCellPosRangesVec; + typedef std::vector FragmentCellPosRangesVec; /** * A pair of fragment info and cell range, where the cell range is defined @@ -165,6 +171,12 @@ class ArrayReadState { /** The array this array read state belongs to. */ const Array* array_; + /** The array schema. */ + const ArraySchema* array_schema_; + /** The number of array attributes. */ + int attribute_num_; + /** The size of the array coordinates. */ + size_t coords_size_; /** Indicates whether the read operation for this query is done. */ bool done_; /** State per attribute indicating the number of empty cells written. */ @@ -233,10 +245,8 @@ class ArrayReadState { /** * Computes the relevant fragment cell ranges for the current read run, - * focusing on the **sparse* array case. These cell ranges will be properly - * cut and sorted later on. This function also properly updates the start - * bounding coordinates of the active tiles (to exceed the minimum bounding - * coordinates end). + * focusing on the **dense* array case. These cell ranges will be properly + * cut and sorted later on. * * @template T The coordinates type. * @param unsorted_fragment_cell_ranges It will hold the result of this @@ -244,13 +254,15 @@ class ArrayReadState { * @return TILEDB_ARS_OK on success and TILEDB_ARS_ERR on error. */ template - int compute_unsorted_fragment_cell_ranges_sparse( - FragmentCellRanges& unsorted_fragment_cell_ranges); + int compute_unsorted_fragment_cell_ranges_dense( + std::vector& unsorted_fragment_cell_ranges); /** * Computes the relevant fragment cell ranges for the current read run, - * focusing on the **dense* array case. These cell ranges will be properly - * cut and sorted later on. + * focusing on the **sparse* array case. These cell ranges will be properly + * cut and sorted later on. This function also properly updates the start + * bounding coordinates of the active tiles (to exceed the minimum bounding + * coordinates end). * * @template T The coordinates type. * @param unsorted_fragment_cell_ranges It will hold the result of this @@ -258,8 +270,8 @@ class ArrayReadState { * @return TILEDB_ARS_OK on success and TILEDB_ARS_ERR on error. */ template - int compute_unsorted_fragment_cell_ranges_dense( - FragmentCellRanges& unsorted_fragment_cell_ranges); + int compute_unsorted_fragment_cell_ranges_sparse( + std::vector& unsorted_fragment_cell_ranges); /** * Copies the cell ranges calculated in the current read round into the @@ -356,13 +368,20 @@ class ArrayReadState { size_t& buffer_var_offset, const CellPosRange& cell_pos_range); - // TODO + /** + * Gets the next fragment cell ranges that are relevant in the current read + * round, focusing on the dense case. + * + * @template T The coordinates type. + * @return TILEDB_ARS_OK on success and TILEDB_ARS_ERR on error. + */ + template int get_next_fragment_cell_ranges_dense(); /** * Gets the next fragment cell ranges that are relevant in the current read - * round. + * round, focusing on the sparse case. * * @template T The coordinates type. * @return TILEDB_ARS_OK on success and TILEDB_ARS_ERR on error. @@ -576,33 +595,139 @@ class ArrayReadState { */ template int sort_fragment_cell_ranges( - FragmentCellRanges& unsorted_fragment_cell_ranges, + std::vector& unsorted_fragment_cell_ranges, FragmentCellRanges& fragment_cell_ranges) const; }; +/** + * Class of fragment cell range objects used in the priority queue algorithm. + */ +template +class ArrayReadState::PQFragmentCellRange { + public: + /** + * Constructor. + * + * @param array_schema The schema of the array. + * @param fragment_read_states The read states of all fragments in the array. + */ + PQFragmentCellRange( + const ArraySchema* array_schema, + const std::vector* fragment_read_states); + + /** Returns true if the fragment the range belongs to is dense. */ + bool dense() const; + + /** + * Returns true if the calling object begins after the end of the input + * range. + */ + bool begins_after(const PQFragmentCellRange* fcr) const; + + /** Returns true if the calling object ends after the input range. */ + bool ends_after(const PQFragmentCellRange* fcr) const; + + /** Exports information to a fragment cell range. */ + void export_to(FragmentCellRange& fragment_cell_range); + + /** Imports information from a fragment cell range. */ + void import_from(const FragmentCellRange& fragment_cell_range); + + /** + * Returns true if the calling object range must be split by the input + * range. + */ + bool must_be_split(const PQFragmentCellRange* fcr) const; + + /** + * Returns true if the input range must be trimmed by the callling object. + */ + bool must_trim(const PQFragmentCellRange* fcr) const; + + /** + * Splits the calling object into two ranges based on the first input. The + * first range will replace the calling object. The second range will be + * stored in the second input. The third input is necessary for the + * splitting. + */ + void split( + const PQFragmentCellRange* fcr, + PQFragmentCellRange* fcr_new, + const T* tile_domain); + + /** + * Splits the calling object into three ranges based on the input fcr. + * - First range: Non-overlapping part of calling object range, stored + * at fcr_left. + * - Second range: A unary range at the left end point of the + * first input, stored at fcr_unary. Note that this may not exist. + * - Third range: The updated calling object range, which is trimmed to + * start after the unary range. + */ + void split_to_3( + const PQFragmentCellRange* fcr, + PQFragmentCellRange* fcr_left, + PQFragmentCellRange* fcr_unary); + + /** + * Trims the first input range to the non-overlapping range stored in + * the second input range. If the cell range of fcr_trimmed is NULL, + * then fcr_trimmed is empty. The third input is necessary for the + * trimming. + */ + void trim( + const PQFragmentCellRange* fcr, + PQFragmentCellRange* fcr_trimmed, + const T* tile_domain) const; + + /** Returns true if the range is unary. */ + bool unary() const; + + /** The cell range as a pair of coordinates. */ + T* cell_range_; + /** The fragment id. */ + int fragment_id_; + /** The tile id of the left endpoint of the cell range. */ + int64_t tile_id_l_; + /** The tile id of the right endpoint of the cell range. */ + int64_t tile_id_r_; + /** The position on disk of the tile corresponding to the cell range. */ + int64_t tile_pos_; + + private: + /** The array schema. */ + const ArraySchema* array_schema_; + /** Size of coordinates. */ + size_t coords_size_; + /** Dimension number. */ + int dim_num_; + /** Stores the read state of each fragment in the array. */ + const std::vector* fragment_read_states_; +}; + /** * Wrapper of comparison function in the priority queue of the fragment cell - * position ranges. + * ranges. */ template -class ArrayReadState::SmallerFragmentCellRange { +class ArrayReadState::SmallerPQFragmentCellRange { public: /** Constructor. */ - SmallerFragmentCellRange(); + SmallerPQFragmentCellRange(); /** Constructor. */ - SmallerFragmentCellRange(const ArraySchema* array_schema); + SmallerPQFragmentCellRange(const ArraySchema* array_schema); /** - * Comparison operator. First the smallest start range endpoint wins, - * then the largest fragment id. + * Comparison operator. First the smallest tile id of the left range end point + * wins, then the smallest start range endpoint, then the largest fragment id. */ bool operator () ( - FragmentCellRange a, - FragmentCellRange b) const; + PQFragmentCellRange* a, + PQFragmentCellRange* b) const; private: /** The array schema. */ diff --git a/core/include/array/array_schema.h b/core/include/array/array_schema.h index 04a0ab44..fbffbc28 100644 --- a/core/include/array/array_schema.h +++ b/core/include/array/array_schema.h @@ -452,10 +452,15 @@ class ArraySchema { * @param domain The targeted domain. * @param cell_coords The input cell coordinates, which the function modifies * to store the next coordinates at termination. + * @param coords_retrieved Will store true if the retrieved coordinates are + * inside the domain, and false otherwise. * @return void */ template - void get_next_cell_coords(const T* domain, T* cell_coords) const; + void get_next_cell_coords( + const T* domain, + T* cell_coords, + bool& coords_retrieved) const; /** * Retrieves the next tile coordinates along the array tile order within a @@ -501,6 +506,18 @@ class ArraySchema { T* tile_domain, T* subarray_in_tile_domain) const; + /** + * Returns the tile position along the array tile order within the input + * domain. Applicable only to **dense** arrays. + * + * @template T The domain type. + * @param tile_coords The tile coordinates. + * @return The tile position of *tile_coords* along the tile order of the + * array inside the array domain, or TILEDB_AS_ERR on error. + */ + template + int64_t get_tile_pos(const T* tile_coords) const; + /** * Returns the tile position along the array tile order within the input * domain. Applicable only to **dense** arrays. @@ -565,8 +582,31 @@ class ArraySchema { template int64_t tile_id(const T* cell_coords) const; + /** + * Checks the tile order of the input coordinates. + * + * @template T The coordinates type. + * @param coords_a The first input coordinates. + * @param coords_b The second input coordinates. + * @return One of the following: + * - -1 if the first coordinates precede the second on the tile order + * - 0 if the two coordinates have the same tile order + * - +1 if the first coordinates succeed the second on the tile order + */ + template + int tile_order_cmp(const T* coords_a, const T* coords_b) const; + + /* ********************************* */ + /* AUXILIARY ATTRIBUTES */ + /* ********************************* */ + + /** + * Auxiliary attribute used in the computation of tile ids, in order to avoid + * repeated allocations and deallocations that impact performance. + */ + void* tile_coords_aux_; private: /* ********************************* */ @@ -613,6 +653,8 @@ class ArraySchema { std::vector compression_; /** Auxiliary variable used when calculating Hilbert ids. */ int* coords_for_hilbert_; + /** The size (in bytes) of the coordinates. */ + size_t coords_size_; /** * Specifies if the array is dense or sparse. If the array is dense, * then the user must specify tile extents (see below). @@ -648,6 +690,16 @@ class ArraySchema { * array has irregular tiles (and, hence, it is sparse). */ void* tile_extents_; + /** + * Offsets for calculating tile positions and ids for the column-major + * tile order. + */ + std::vector tile_offsets_col_; + /** + * Offsets for calculating tile positions and ids for the row-major + * tile order. + */ + std::vector tile_offsets_row_; /** * The tile order. It can be one of the following: * - TILEDB_ROW_MAJOR @@ -723,6 +775,22 @@ class ArraySchema { */ void compute_tile_domain(); + /** + * Computes tile offsets neccessary when computing tile positions and ids. + * + * @return void + */ + void compute_tile_offsets(); + + /** + * Computes tile offsets neccessary when computing tile positions and ids. + * + * @template T The coordinates type. + * @return void + */ + template + void compute_tile_offsets(); + /** * Computes the tile domain. Applicable only to arrays with regular tiles. * @@ -775,10 +843,15 @@ class ArraySchema { * @param domain The targeted domain. * @param cell_coords The input cell coordinates, which the function modifies * to store the next coordinates at termination. + * @param coords_retrieved Will store true if the retrieved coordinates are + * inside the domain, and false otherwise. * @return void */ template - void get_next_cell_coords_col(const T* domain, T* cell_coords) const; + void get_next_cell_coords_col( + const T* domain, + T* cell_coords, + bool& coords_retrieved) const; /** * Retrieves the next coordinates along the array cell order within a given @@ -790,10 +863,15 @@ class ArraySchema { * @param domain The targeted domain. * @param cell_coords The input cell coordinates, which the function modifies * to store the next coordinates at termination. + * @param coords_retrieved Will store true if the retrieved coordinates are + * inside the domain, and false otherwise. * @return void */ template - void get_next_cell_coords_row(const T* domain, T* cell_coords) const; + void get_next_cell_coords_row( + const T* domain, + T* cell_coords, + bool& coords_retrieved) const; /** * Retrieves the next tile coordinates along the array tile order within a @@ -853,6 +931,19 @@ class ArraySchema { template void get_previous_cell_coords_row(const T* domain, T* cell_coords) const; + /** + * Returns the tile position along the array tile order within the input + * domain. Applicable only to **dense** arrays, and focusing on the + * **column-major** tile order. + * + * @template T The domain type. + * @param tile_coords The tile coordinates. + * @return The tile position of *tile_coords* along the tile order of the + * array inside the array domain. + */ + template + int64_t get_tile_pos_col(const T* tile_coords) const; + /** * Returns the tile position along the array tile order within the input * domain. Applicable only to **dense** arrays, and focusing on the @@ -871,6 +962,19 @@ class ArraySchema { const T* domain, const T* tile_coords) const; + /** + * Returns the tile position along the array tile order within the input + * domain. Applicable only to **dense** arrays, and focusing on the + * **row-major** tile order. + * + * @template T The domain type. + * @param tile_coords The tile coordinates. + * @return The tile position of *tile_coords* along the tile order of the + * array inside the array domain. + */ + template + int64_t get_tile_pos_row(const T* tile_coords) const; + /** * Returns the tile position along the array tile order within the input * domain. Applicable only to **dense** arrays, and focusing on the diff --git a/core/include/c_api/c_api.h b/core/include/c_api/c_api.h index 19a19545..5395964b 100755 --- a/core/include/c_api/c_api.h +++ b/core/include/c_api/c_api.h @@ -34,6 +34,7 @@ #define __C_API_H__ #include "constants.h" +#include #include #include #include @@ -48,6 +49,44 @@ extern "C" { # define TILEDB_EXPORT #endif +/* ********************************* */ +/* CONFIG */ +/* ********************************* */ + +/** Used to pass congiguration parameters to TileDB. */ +typedef struct TileDB_Config { + /** + * The TileDB home directory. If it is set to "" (empty string) or NULL, the + * default home directory will be used, which is ~/.tiledb/. + */ + const char* home_; + /** The MPI communicator. Use NULL if no MPI is used. */ + MPI_Comm* mpi_comm_; + /** + * The method for reading data from a file. + * It can be one of the following: + * - TILEDB_IO_MMAP + * TileDB will use mmap. + * - TILEDB_IO_READ + * TileDB will use standard OS read. + * - TILEDB_IO_MPI + * TileDB will use MPI-IO read. + */ + int read_method_; + /** + * The method for writing data to a file. + * It can be one of the following: + * - TILEDB_IO_WRITE + * TileDB will use standard OS write. + * - TILEDB_IO_MPI + * TileDB will use MPI-IO write. + */ + int write_method_; +} TileDB_Config; + + + + /* ********************************* */ /* CONTEXT */ /* ********************************* */ @@ -59,13 +98,13 @@ typedef struct TileDB_CTX TileDB_CTX; * Initializes the TileDB context. * * @param tiledb_ctx The TileDB context to be initialized. - * @param config_filename The name of the configuration file. If it is NULL or - * not found, TileDB will use its default configuration parameters. + * @param tiledb_config TileDB configuration parameters. If it is NULL, + * TileDB will use its default configuration parameters. * @return TILEDB_OK for success and TILEDB_ERR for error. */ TILEDB_EXPORT int tiledb_ctx_init( TileDB_CTX** tiledb_ctx, - const char* config_filename); + const TileDB_Config* tiledb_config); /** * Finalizes the TileDB context, properly freeing-up memory. @@ -277,7 +316,7 @@ TILEDB_EXPORT int tiledb_array_create( * @param subarray The subarray in which the array read/write will be * constrained on. It should be a sequence of [low, high] pairs (one * pair per dimension), whose type should be the same as that of the - * coordinates. If it is NULL, then the subarray is set to the entire + * coordinates. If it is NULL, then the subarray is set to the entire * array domain. For the case of writes, this is meaningful only for * dense arrays, and specifically dense writes. * @param attributes A subset of the array attributes the read/write will be @@ -1028,6 +1067,100 @@ TILEDB_EXPORT int tiledb_ls( int* dir_types, int* dir_num); + + + +/* ********************************* */ +/* ASYNCHRONOUS I/O (AIO) */ +/* ********************************* */ + +/** Describes an AIO (read or write) request. */ +typedef struct TileDB_AIO_Request { + /** + * An array of buffers, one for each attribute. These must be + * provided in the same order as the attributes specified in + * tiledb_array_init() or tiledb_array_reset_attributes(). The case of + * variable-sized attributes is special. Instead of providing a single + * buffer for such an attribute, **two** must be provided: the second + * holds the variable-sized cell values, whereas the first holds the + * start offsets of each cell in the second buffer. + */ + void** buffers_; + /** + * The sizes (in bytes) allocated by the user for the + * buffers (there is a one-to-one correspondence). In the case of reads, + * the function will attempt + * to write as many results as can fit in the buffers, and potentially + * alter the buffer sizes to indicate the size of the *useful* data written + * in the corresponding buffers. + */ + size_t* buffer_sizes_; + /** Function to be called upon completion of the request. */ + void *(*completion_handle_) (void*); + /** Data to be passed to the completion handle. */ + void* completion_data_; + /** + * Applicable only to read requests. + * Indicates whether a buffer has overflowed during a read request. + * If it is NULL, it will be ignored. Otherwise, it must be an array + * with as many elements as the number of attributes specified in + * tiledb_array_init() or tiledb_array_reset_attributes(). + */ + bool* overflow_; + /** + * The status of the AIO request. It can be one of the following: + * - TILEDB_AIO_COMPLETED + * The request is completed. + * - TILEDB_AIO_INPROGRESS + * The request is still in progress. + * - TILEDB_AIO_OVERFLOW + * At least one of the input buffers overflowed (applicable only to AIO + * read requests) + * - TILEDB_AIO_ERR + * The request caused an error (and thus was canceled). + */ + int status_; + /** + * The subarray in which the array read/write will be + * constrained on. It should be a sequence of [low, high] pairs (one + * pair per dimension), whose type should be the same as that of the + * coordinates. If it is NULL, then the subarray is set to the entire + * array domain. For the case of writes, this is meaningful only for + * dense arrays, and specifically dense writes. + */ + const void* subarray_; +} TileDB_AIO_Request; + +/** + * Issues an asynchronous read request. + * + * @param tiledb_array An initialized TileDB array. + * @param tiledb_aio_request An asynchronous read request. + * @return TILEDB_OK upon success, and TILEDB_ERR upon error. + * + * @note If the same input request is in progress, the function will fail. + * Moreover, if the input request was issued in the past and caused an + * overflow, the new call will resume it IF there was no other request + * in between the two separate calls for the same input request. + * In other words, a new request that is different than the previous + * one resets the internal read state. + */ +TILEDB_EXPORT int tiledb_array_aio_read( + const TileDB_Array* tiledb_array, + TileDB_AIO_Request* tiledb_aio_request); + +/** + * Issues an asynchronous write request. + * + * @param tiledb_array An initialized TileDB array. + * @param tiledb_aio_request An asynchronous write request. + * @return TILEDB_OK upon success, and TILEDB_ERR upon error. + */ +TILEDB_EXPORT int tiledb_array_aio_write( + const TileDB_Array* tiledb_array, + TileDB_AIO_Request* tiledb_aio_request); + + #undef TILEDB_EXPORT #ifdef __cplusplus } diff --git a/core/include/c_api/constants.h b/core/include/c_api/constants.h index 145b52fa..5d088c0d 100644 --- a/core/include/c_api/constants.h +++ b/core/include/c_api/constants.h @@ -37,7 +37,7 @@ #include /** Version. */ -#define TILEDB_VERSION "0.1" +#define TILEDB_VERSION "0.3.0" /**@{*/ /** Return code. */ @@ -58,6 +58,22 @@ #define TILEDB_METADATA_WRITE 1 /**@}*/ +/**@{*/ +/** I/O method. */ +#define TILEDB_IO_MMAP 0 +#define TILEDB_IO_READ 1 +#define TILEDB_IO_MPI 2 +#define TILEDB_IO_WRITE 0 +/**@}*/ + +/**@{*/ +/** Asynchronous I/O (AIO) code. */ +#define TILEDB_AIO_ERR -1 +#define TILEDB_AIO_COMPLETED 0 +#define TILEDB_AIO_INPROGRESS 1 +#define TILEDB_AIO_OVERFLOW 2 +/**@}*/ + /** * The TileDB home directory, where TileDB-related system metadata structures * are kept. If it is set to "", then the home directory is set to "~/.tiledb" diff --git a/core/include/fragment/read_state.h b/core/include/fragment/read_state.h index 4510c990..9bf7c0c1 100644 --- a/core/include/fragment/read_state.h +++ b/core/include/fragment/read_state.h @@ -33,6 +33,7 @@ #ifndef __READ_STATE_H__ #define __READ_STATE_H__ +#include "array.h" #include "book_keeping.h" #include "fragment.h" #include @@ -40,6 +41,7 @@ + /* ********************************* */ /* CONSTANTS */ /* ********************************* */ @@ -53,6 +55,7 @@ +class Array; class Fragment; /** Stores the state necessary when reading cells from a fragment. */ @@ -142,6 +145,14 @@ class ReadState { /* MUTATORS */ /* ********************************* */ + /** + * Resets the read state. Note that it does not flush any buffered tiles, so + * that they can be reused later if a subsequent request happens to overlap + * with them. + * + */ + void reset(); + /** * Resets the overflow flag of every attribute to *false*. * @@ -354,14 +365,23 @@ class ReadState { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The array the fragment belongs to. */ + const Array* array_; + /** The array schema. */ + const ArraySchema* array_schema_; + /** The number of array attributes. */ + int attribute_num_; /** The book-keeping of the fragment the read state belongs to. */ BookKeeping* book_keeping_; + /** The size of the array coordinates. */ + size_t coords_size_; /** Indicates if the read operation on this fragment finished. */ bool done_; /** Keeps track of which tile is in main memory for each attribute. */ std::vector fetched_tile_; /** The fragment the read state belongs to. */ const Fragment* fragment_; + std::vector is_empty_attribute_; /** * Last investigated tile coordinates. Applicable only to **sparse** fragments * for **dense** arrays. @@ -411,6 +431,10 @@ class ReadState { void* tile_compressed_; /** Allocated size for internal buffer used in the case of compression. */ size_t tile_compressed_allocated_size_; + /** File offset for each attribute tile. */ + std::vector tiles_file_offsets_; + /** File offset for each variable-sized attribute tile. */ + std::vector tiles_var_file_offsets_; /** * Local tile buffers, one per attribute, plus two for coordinates * (the second one is for searching). @@ -433,6 +457,10 @@ class ReadState { std::vector tiles_var_offsets_; /** Sizes of tiles_var_ (one per attribute). */ std::vector tiles_var_sizes_; + /** Temporary coordinates. */ + void* tmp_coords_; + /** Temporary offset. */ + size_t tmp_offset_; @@ -441,6 +469,19 @@ class ReadState { /* PRIVATE METHODS */ /* ********************************* */ + /** + * Compares input coordinates to coordinates from the search tile. + * + * @param buffer The data buffer to be compared. + * @param tile_offset The offset in the tile where the data comparison + * starts form. + * @return 1 if the compared data are equal, 0 if they are not equal and + * TILEDB_RS_ERR for error. + */ + int CMP_COORDS_TO_SEARCH_TILE( + const void* buffer, + size_t tile_offset); + /** * Computes the number of bytes to copy from the local tile buffers of a given * attribute in the case of variable-sized cells. It takes as input a range @@ -460,16 +501,16 @@ class ReadState { * will store the starting offsets of the variable-sized cells. * @param bytes_var_to_copy The returned bytes to copy into the the buffer * that will store the actual variable-sized cells. - * @return void. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. */ - void compute_bytes_to_copy( + int compute_bytes_to_copy( int attribute_id, int64_t start_cell_pos, int64_t& end_cell_pos, size_t buffer_free_space, size_t buffer_var_free_space, size_t& bytes_to_copy, - size_t& bytes_var_to_copy) const; + size_t& bytes_var_to_copy); /** * Computes the ranges of tile positions that need to be searched for finding @@ -521,7 +562,7 @@ class ReadState { * input coordinates. */ template - int64_t get_cell_pos_after(const T* coords) const; + int64_t get_cell_pos_after(const T* coords); /** * Returns the cell position in the search tile that is at or after the @@ -533,7 +574,7 @@ class ReadState { * input coordinates. */ template - int64_t get_cell_pos_at_or_after(const T* coords) const; + int64_t get_cell_pos_at_or_after(const T* coords); /** * Returns the cell position in the search tile that is at or before the @@ -545,166 +586,266 @@ class ReadState { * input coordinates. */ template - int64_t get_cell_pos_at_or_before(const T* coords) const; + int64_t get_cell_pos_at_or_before(const T* coords); /** - * Reads/maps a tile from the disk into a local buffer for an attribute. This - * function focuses on the case there is GZIP compression. + * Retrieves the pointer of the i-th coordinates in the search tile. * - * @param attribute_id The id of the attribute the tile is read for. - * @param tile_i The position of the tile to be read from the disk. + * @param i Indicates the i-th coordinates pointer to be retrieved. + * @param coords The destination pointer. * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. */ - int get_tile_from_disk_cmp_gzip(int attribute_id, int64_t tile_i); + int GET_COORDS_PTR_FROM_SEARCH_TILE( + int64_t i, + const void*& coords); /** - * Reads a tile from the disk into a local buffer for an attribute. This - * function focuses on the case there is no compression. + * Retrieves the pointer of the i-th cell in the offset tile of a + * variable-sized attribute. * - * @param attribute_id The id of the attribute the tile is read for. - * @param tile_i The position of the tile to be read from the disk. + * @param attribute_id The attribute id. + * @param i Indicates the i-th offset pointer to be retrieved. + * @param offset The destination pointer. * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. */ - int get_tile_from_disk_cmp_none(int attribute_id, int64_t tile_i); + int GET_CELL_PTR_FROM_OFFSET_TILE( + int attribute_id, + int64_t i, + const size_t*& offset); - /** - * Reads a tile from the disk into a local buffer for an attribute. This - * function focuses on the case of variable-sized tiles with GZIP compression. - * - * @param attribute_id The id of the attribute the tile is read for. - * @param tile_i The position of the tile to be read from the disk. - * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. - */ - int get_tile_from_disk_var_cmp_gzip(int attribute_id, int64_t tile_i); + /** Returns *true* if the file of the input attribute is empty. */ + bool is_empty_attribute(int attribute_id) const; - /** - * Reads a tile from the disk into a local buffer for an attribute. This - * function focuses on the case of variable-sized tiles with no compression. + /** + * Maps a tile from the disk for an attribute into a local buffer, using + * memory map (mmap). This function focuses on the case of GZIP compression. * - * @param attribute_id The id of the attribute the tile is read for. - * @param tile_i The position of the tile to be read from the disk. - * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + * @param attribute_id The id of the attribute the read occurs for. + * @param offset The offset at which the tile starts in the file. + * @param tile_size The tile size. + * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int get_tile_from_disk_var_cmp_none(int attribute_id, int64_t tile_i); - - /** Returns *true* if the file of the input attribute is empty. */ - bool is_empty_attribute(int attribute_id) const; + int map_tile_from_file_cmp_gzip( + int attribute_id, + off_t offset, + size_t tile_size); /** - * Reads a tile from the disk for an attribute into a local buffer. This - * function focuses on the case there is GZIP compression. + * Maps a variable-sized tile from the disk for an attribute into a local + * buffer, using memory map (mmap). This function focuses on the case of GZIP + * compression. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_cmp_gzip( + int map_tile_from_file_var_cmp_gzip( int attribute_id, off_t offset, size_t tile_size); /** - * Reads a tile from the disk for an attribute into a local buffer. This - * function focuses on the case there is no compression. + * Maps a tile from the disk for an attribute into a local buffer, using + * memory map (mmap). This function focuses on the case of no compression. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_cmp_none( + int map_tile_from_file_cmp_none( int attribute_id, off_t offset, size_t tile_size); /** - * Reads a tile from the disk for an attribute into a local buffer, using - * memory map (mmap). This function is invoked in place of - * ReadState::read_tile_from_file_cmp_gzip if _TILEDB_USE_MMAP is defined. + * Maps a variable-sized tile from the disk for an attribute into a local + * buffer, using memory map (mmap). This function focuses on the case of + * no compression. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. - * @param tile_size The tile size. + * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_with_mmap_cmp_gzip( + int map_tile_from_file_var_cmp_none( int attribute_id, off_t offset, size_t tile_size); /** * Reads a tile from the disk for an attribute into a local buffer, using - * memory map (mmap). This function is invoked in place of - * ReadState::read_tile_from_file_cmp_none if _TILEDB_USE_MMAP is defined. + * MPI-IO. This function focuses on the case of GZIP compression. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. - * @param tile_size The tile size. + * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_with_mmap_cmp_none( + int mpi_io_read_tile_from_file_cmp_gzip( int attribute_id, off_t offset, size_t tile_size); /** - * Reads a tile from the disk for an attribute into a local buffer. This - * function focuses on the case of variable-sized tiles and GZIP compression. + * Reads a variable-sized tile from the disk for an attribute into a local + * buffer, using MPI-IO. This function focuses on the case of GZIP + * compression. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_var_cmp_gzip( + int mpi_io_read_tile_from_file_var_cmp_gzip( int attribute_id, off_t offset, size_t tile_size); + /** + * Prepares a tile from the disk for reading for an attribute. + * + * @param attribute_id The id of the attribute the tile is prepared for. + * @param tile_i The tile position on the disk. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int prepare_tile_for_reading(int attribute_id, int64_t tile_i); + + /** + * Prepares a variable-sized tile from the disk for reading for an attribute. + * + * @param attribute_id The id of the attribute the tile is prepared for. + * @param tile_i The tile position on the disk. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int prepare_tile_for_reading_var(int attribute_id, int64_t tile_i); + + /** + * Prepares a tile from the disk for reading for an attribute. + * This function focuses on the case there is GZIP compression. + * + * @param attribute_id The id of the attribute the tile is prepared for. + * @param tile_i The tile position on the disk. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int prepare_tile_for_reading_cmp_gzip(int attribute_id, int64_t tile_i); + + /** + * Prepares a tile from the disk for reading for an attribute. + * This function focuses on the case there is no compression. + * + * @param attribute_id The id of the attribute the tile is prepared for. + * @param tile_i The tile position on the disk. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int prepare_tile_for_reading_cmp_none(int attribute_id, int64_t tile_i); + + /** + * Prepares a tile from the disk for reading for an attribute. + * This function focuses on the case of variable-sized tiles with GZIP + * compression. + * + * @param attribute_id The id of the attribute the tile is prepared for. + * @param tile_i The tile position on the disk. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int prepare_tile_for_reading_var_cmp_gzip(int attribute_id, int64_t tile_i); + + /** + * Prepares a tile from the disk for reading for an attribute. + * This function focuses on the case of variable-sized tiles with no + * compression. + * + * @param attribute_id The id of the attribute the tile is prepared for. + * @param tile_i The tile position on the disk. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int prepare_tile_for_reading_var_cmp_none(int attribute_id, int64_t tile_i); + + /** + * Reads data from an attribute tile into an input buffer. + * + * @param attribute_id The attribute id. + * @param buffer The destination buffer. + * @param tile_offset The offset in the tile where the read starts from. + * @param bytes_to_copy The number of bytes to copy from the tile into the + * buffer. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int READ_FROM_TILE( + int attribute_id, + void* buffer, + size_t tile_offset, + size_t bytes_to_copy); + + /** + * Reads data from a variable-sized attribute tile into an input buffer. + * + * @param attribute_id The attribute id. + * @param buffer The destination buffer. + * @param tile_offset The offset in the tile where the read starts from. + * @param bytes_to_copy The number of bytes to copy from the tile into the + * buffer. + * @return TILEDB_RS_OK for success and TILEDB_RS_ERR for error. + */ + int READ_FROM_TILE_VAR( + int attribute_id, + void* buffer, + size_t tile_offset, + size_t bytes_to_copy); + /** * Reads a tile from the disk for an attribute into a local buffer. This - * function focuses on the case of variable-sized tiles and no compression. + * function focuses on the case there is GZIP compression. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. - * @param tile_size The tile size. + * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_var_cmp_none( + int read_tile_from_file_cmp_gzip( int attribute_id, off_t offset, size_t tile_size); /** - * Reads a tile from the disk for an attribute into a local buffer, using - * memory map (mmap). This function is invoked in place of - * ReadState::read_tile_from_file_var_cmp_gzip if _TILEDB_USE_MMAP is defined. + * Reads a tile from the disk for an attribute into a local buffer. This + * function focuses on the case of variable-sized tiles and GZIP compression. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_with_mmap_var_cmp_gzip( + int read_tile_from_file_var_cmp_gzip( int attribute_id, off_t offset, size_t tile_size); /** - * Reads a tile from the disk for an attribute into a local buffer, using - * memory map (mmap). This function is invoked in place of - * ReadState::read_tile_from_file_var_cmp_none if _TILEDB_USE_MMAP is defined. + * Saves in the read state the file offset for an attribute tile. + * This will be used in subsequent read requests. * * @param attribute_id The id of the attribute the read occurs for. * @param offset The offset at which the tile starts in the file. - * @param tile_size The tile size. * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. */ - int read_tile_from_file_with_mmap_var_cmp_none( + int set_tile_file_offset( int attribute_id, - off_t offset, - size_t tile_size); + off_t offset); + + /** + * Saves in the read state the file offset for a variable-sized attribute + * tile. This will be used in subsequent read requests. + * + * @param attribute_id The id of the attribute the read occurs for. + * @param offset The offset at which the tile starts in the file. + * @return TILEDB_RS_OK for success, and TILEDB_RS_ERR for error. + */ + int set_tile_var_file_offset( + int attribute_id, + off_t offset); /** * Shifts the offsets stored in the tile buffer of the input attribute, such diff --git a/core/include/fragment/write_state.h b/core/include/fragment/write_state.h index 0f4d62d3..64d3db9f 100644 --- a/core/include/fragment/write_state.h +++ b/core/include/fragment/write_state.h @@ -52,6 +52,7 @@ /**@}*/ class BookKeeping; +class Fragment; diff --git a/core/include/metadata/metadata.h b/core/include/metadata/metadata.h index d4a2672b..94db1a50 100644 --- a/core/include/metadata/metadata.h +++ b/core/include/metadata/metadata.h @@ -34,6 +34,7 @@ #define __METADATA_H__ #include "array.h" +#include "config.h" /* ********************************* */ /* CONSTANTS */ @@ -152,6 +153,7 @@ class Metadata { * the key as an extra attribute in the end). * @param attribute_num The number of the input attributes. If *attributes* is * NULL, then this should be set to 0. + * @param config Congiguration parameters. * @return TILEDB_MT_OK on success, and TILEDB_MT_ERR on error. */ int init( @@ -160,7 +162,8 @@ class Metadata { const std::vector& book_keeping, int mode, const char** attributes, - int attribute_num); + int attribute_num, + const Config* config); /** * Resets the attributes used upon initialization of the metadata. diff --git a/core/include/misc/utils.h b/core/include/misc/utils.h index 6025f956..97afe96e 100644 --- a/core/include/misc/utils.h +++ b/core/include/misc/utils.h @@ -33,12 +33,15 @@ #ifndef __UTILS_H__ #define __UTILS_H__ +#include #include -#include #include #include +#ifdef OPENMP + #include +#endif /* ********************************* */ @@ -387,20 +390,45 @@ bool is_unary_subarray(const T* subarray, int dim_num); bool is_workspace(const std::string& dir); /** - * Destroys an OpenMP mutex. + * Reads data from a file into a buffer using MPI-IO. * - * @param mtx The mutex to be destroyed. - * @return TILEDB_UT_OK for success, and TILEDB_UT_ERR for error. + * @param mpi_comm The MPI communicator. + * @param filename The name of the file. + * @param offset The offset in the file from which the read will start. + * @param buffer The buffer into which the data will be written. + * @param length The size of the data to be read from the file. + * @return TILEDB_UT_OK on success and TILEDB_UT_ERR on error. */ -int mutex_destroy(omp_lock_t* mtx); +int mpi_io_read_from_file( + const MPI_Comm* mpi_comm, + const std::string& filaname, + off_t offset, + void* buffer, + size_t length); +/** + * Writes the input buffer to a file using MPI-IO. + * + * @param mpi_comm The MPI communicator. + * @param filename The name of the file. + * @param buffer The input buffer. + * @param buffer_size The size of the input buffer. + * @return TILEDB_UT_OK on success, and TILEDB_UT_ERR on error. + */ +int mpi_io_write_to_file( + const MPI_Comm* mpi_comm, + const char* filename, + const void* buffer, + size_t buffer_size); + +#ifdef OPENMP /** - * Destroys a pthread mutex. + * Destroys an OpenMP mutex. * * @param mtx The mutex to be destroyed. * @return TILEDB_UT_OK for success, and TILEDB_UT_ERR for error. */ -int mutex_destroy(pthread_mutex_t* mtx); +int mutex_destroy(omp_lock_t* mtx); /** * Initializes an OpenMP mutex. @@ -411,36 +439,45 @@ int mutex_destroy(pthread_mutex_t* mtx); int mutex_init(omp_lock_t* mtx); /** - * Initializes a pthread mutex. + * Locks an OpenMP mutex. * - * @param mtx The mutex to be initialized. + * @param mtx The mutex to be locked. * @return TILEDB_UT_OK for success, and TILEDB_UT_ERR for error. */ -int mutex_init(pthread_mutex_t* mtx); +int mutex_lock(omp_lock_t* mtx); /** - * Locks an OpenMP mutex. + * Unlocks an OpenMP mutex. * - * @param mtx The mutex to be locked. + * @param mtx The mutex to be unlocked. * @return TILEDB_UT_OK for success, and TILEDB_UT_ERR for error. */ -int mutex_lock(omp_lock_t* mtx); +int mutex_unlock(omp_lock_t* mtx); +#endif /** - * Locks a pthread mutex. + * Destroys a pthread mutex. * - * @param mtx The mutex to be locked. + * @param mtx The mutex to be destroyed. * @return TILEDB_UT_OK for success, and TILEDB_UT_ERR for error. */ -int mutex_lock(pthread_mutex_t* mtx); +int mutex_destroy(pthread_mutex_t* mtx); /** - * Unlocks an OpenMP mutex. + * Initializes a pthread mutex. * - * @param mtx The mutex to be unlocked. + * @param mtx The mutex to be initialized. * @return TILEDB_UT_OK for success, and TILEDB_UT_ERR for error. */ -int mutex_unlock(omp_lock_t* mtx); +int mutex_init(pthread_mutex_t* mtx); + +/** + * Locks a pthread mutex. + * + * @param mtx The mutex to be locked. + * @return TILEDB_UT_OK for success, and TILEDB_UT_ERR for error. + */ +int mutex_lock(pthread_mutex_t* mtx); /** * Unlocks a pthread mutex. @@ -519,7 +556,7 @@ std::string real_dir(const std::string& dir); bool starts_with(const std::string& value, const std::string& prefix); /** - * Write the input buffer to a file. + * Writes the input buffer to a file. * * @param filename The name of the file. * @param buffer The input buffer. diff --git a/core/include/storage_manager/config.h b/core/include/storage_manager/config.h new file mode 100644 index 00000000..2461f1a6 --- /dev/null +++ b/core/include/storage_manager/config.h @@ -0,0 +1,137 @@ +/** + * @file config.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file defines class Config. + */ + +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#include +#include + + + + +/** This class is responsible for the TileDB configuration parameters. */ +class Config { + public: + /* ********************************* */ + /* CONSTRUCTORS & DESTRUCTORS */ + /* ********************************* */ + + /** Constructor. */ + Config(); + + /** Destructor. */ + ~Config(); + + + + + /* ********************************* */ + /* MUTATORS */ + /* ********************************* */ + + /** + * Initializes the configuration parameters. + * + * @param home The TileDB home directory. + * @param mpi_comm The MPI communicator. + * @param read_method The method for reading data from a file. + * It can be one of the following: + * - TILEDB_USE_READ + * TileDB will use POSIX read. + * - TILEDB_USE_MMAP + * TileDB will use mmap. + * - TILEDB_USE_MPIIO + * TileDB will use MPI-IO read. + * @param write_method The method for writing data to a file. + * It can be one of the following: + * - TILEDB_USE_WRITE + * TileDB will use POSIX write. + * - TILEDB_USE_MPI_IO + * TileDB will use MPI-IO write. + * @return void. + */ + void init( + const char* home, + MPI_Comm* mpi_comm, + int read_method, + int write_methods); + + + /* ********************************* */ + /* ACCESSORS */ + /* ********************************* */ + + /** Returns the TileDB home directory. */ + const std::string& home() const; + + /** Returns the MPI communicator. */ + MPI_Comm* mpi_comm() const; + + /** Returns the read method. */ + int read_method() const; + + /** Returns the write method. */ + int write_method() const; + + private: + /* ********************************* */ + /* PRIVATE ATTRIBUTES */ + /* ********************************* */ + + /** TileDB home directory. */ + std::string home_; + /** The MPI communicator. */ + MPI_Comm* mpi_comm_; + /** + * The method for reading data from a file. + * It can be one of the following: + * - TILEDB_USE_READ + * TileDB will use POSIX read. + * - TILEDB_USE_MMAP + * TileDB will use mmap. + * - TILEDB_USE_MPI_IO + * TileDB will use MPI-IO read. + */ + int read_method_; + /** + * The method for writing data to a file. + * It can be one of the following: + * - TILEDB_USE_WRITE + * TileDB will use POSIX write. + * - TILEDB_USE_MPI_IO + * TileDB will use MPI-IO write. + */ + int write_method_; +}; + +#endif diff --git a/core/include/storage_manager/storage_manager.h b/core/include/storage_manager/storage_manager.h index 20a70772..ce0dbbfc 100755 --- a/core/include/storage_manager/storage_manager.h +++ b/core/include/storage_manager/storage_manager.h @@ -37,11 +37,14 @@ #include "array_iterator.h" #include "array_schema.h" #include "array_schema_c.h" +#include "config.h" #include "metadata.h" #include "metadata_iterator.h" #include "metadata_schema_c.h" #include -#include +#ifdef OPENMP + #include +#endif #include #include @@ -105,16 +108,16 @@ class StorageManager { int finalize(); /** - * Initializes the storage manager. This function create the TileDB home + * Initializes the storage manager. This function creates the TileDB home * directory, which by default is "~/.tiledb/". If the user home directory * cannot be retrieved, then the TileDB home directory is set to the current * working directory. * - * @param config_filename The input configuration file name. If it is NULL, + * @param config The configuration parameters. If it is NULL, * then the default TileDB parameters are used. * @return TILEDB_SM_OK for success and TILEDB_SM_ERR for error. */ - int init(const char* config_filename); + int init(Config* config); @@ -479,10 +482,14 @@ class StorageManager { /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** The TileDB configuration parameters. */ + Config* config_; /** The directory of the master catalog. */ std::string master_catalog_dir_; /** OpneMP mutex for creating/deleting an OpenArray object. */ +#ifdef OPENMP omp_lock_t open_array_omp_mtx_; +#endif /** Pthread mutex for creating/deleting an OpenArray object. */ pthread_mutex_t open_array_pthread_mtx_; /** Stores the currently open arrays. */ @@ -586,21 +593,12 @@ class StorageManager { OpenArray*& open_array); /** - * It sets the TileDB configuration parameters from a file. + * It sets the TileDB configuration parameters. * - * @param config_filename The name of the configuration file. - * Each line in the file correspond to a single parameter, and should - * be in the form (i.e., space-separated). + * @param config The configuration parameters. * @return TILEDB_SM_OK for success, and TILEDB_SM_ERR for error. */ - int config_set(const char* config_filename); - - /** - * Sets the TileDB configuration parameters to default values. - * - * @return void - */ - void config_set_default(); + int config_set(Config* config); /** * Creates a special file that serves as lock needed for implementing @@ -849,7 +847,9 @@ class StorageManager::OpenArray { * An OpenMP mutex used to lock the array when loading the array schema and * the book-keeping structures from the disk. */ +#ifdef OPENMP omp_lock_t omp_mtx_; +#endif /** * A pthread mutex used to lock the array when loading the array schema and * the book-keeping structures from the disk. diff --git a/core/src/array/array.cc b/core/src/array/array.cc index d781563f..d312e7ec 100644 --- a/core/src/array/array.cc +++ b/core/src/array/array.cc @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -70,6 +69,7 @@ Array::Array() { array_read_state_ = NULL; array_schema_ = NULL; subarray_ = NULL; + aio_thread_created_ = false; } Array::~Array() { @@ -95,6 +95,99 @@ Array::~Array() { /* ACCESSORS */ /* ****************************** */ +void Array::aio_handle_requests() { + // Holds the next AIO request + AIO_Request* aio_next_request; + + // Initiate infinite loop + while(1) { + // Lock AIO mutext + if(pthread_mutex_lock(&aio_mtx_)) { + PRINT_ERROR("Cannot lock AIO mutex"); + return; + } + + + // Wait for AIO requests + while(aio_queue_.size() == 0) { + // If the thread is canceled, unblock and exit + if(aio_thread_canceled_) { + if(pthread_mutex_unlock(&aio_mtx_)) + PRINT_ERROR("Cannot unlock AIO mutex while canceling AIO thread"); + else + aio_thread_created_ = false; + return; + } + + // Wait to be signaled + if(pthread_cond_wait(&aio_cond_, &aio_mtx_)) { + PRINT_ERROR("Cannot wait on AIO mutex condition"); + return; + } + } + + // Pop the next AIO request + aio_next_request = aio_queue_.front(); + aio_queue_.pop(); + + // Unlock AIO mutext + if(pthread_mutex_unlock(&aio_mtx_)) { + PRINT_ERROR("Cannot unlock AIO mutex"); + return; + } + + // Handle the next AIO request + aio_handle_next_request(aio_next_request); + + // Set last handled AIO request + aio_last_handled_request_ = aio_next_request->id_; + + // Clean request + free(aio_next_request); + } +} + +int Array::aio_read(AIO_Request* aio_request) { + // Sanity checks + if(mode_ != TILEDB_ARRAY_READ) { + PRINT_ERROR("Cannot (async) read from array; Invalid mode"); + return TILEDB_AR_ERR; + } + + // Create the AIO thread if not already done + if(!aio_thread_created_) + if(aio_thread_create() != TILEDB_AR_OK) + return TILEDB_ERR; + + // Push the AIO request in the queue + if(aio_push_request(aio_request) != TILEDB_AR_OK) + return TILEDB_AR_ERR; + + // Success + return TILEDB_AR_OK; +} + +int Array::aio_write(AIO_Request* aio_request) { + // Sanity checks + if(mode_ != TILEDB_ARRAY_WRITE && + mode_ != TILEDB_ARRAY_WRITE_UNSORTED) { + PRINT_ERROR("Cannot (async) write to array; Invalid mode"); + return TILEDB_AR_ERR; + } + + // Create the AIO thread if not already done + if(!aio_thread_created_) + if(aio_thread_create() != TILEDB_AR_OK) + return TILEDB_ERR; + + // Push the AIO request in the queue + if(aio_push_request(aio_request) != TILEDB_AR_OK) + return TILEDB_AR_ERR; + + // Success + return TILEDB_AR_OK; +} + const ArraySchema* Array::array_schema() const { return array_schema_; } @@ -103,6 +196,10 @@ const std::vector& Array::attribute_ids() const { return attribute_ids_; } +const Config* Array::config() const { + return config_; +} + int Array::fragment_num() const { return fragments_.size(); } @@ -115,6 +212,18 @@ int Array::mode() const { return mode_; } +bool Array::overflow() const { + // Not applicable to writes + if(mode_ != TILEDB_ARRAY_READ) + return false; + + for(int i=0; icoords_size(); subarray_ = malloc(subarray_size); @@ -387,6 +522,20 @@ int Array::init( array_read_state_ = new ArrayReadState(this); } + // Initialize the AIO-related members + aio_cond_ = PTHREAD_COND_INITIALIZER; + if(pthread_mutex_init(&aio_mtx_, NULL)) { + PRINT_ERROR("Cannot initialize AIO mutex"); + return TILEDB_AR_ERR; + } + if(pthread_cond_init(&aio_cond_, NULL)) { + PRINT_ERROR("Cannot initialize AIO mutex condition"); + return TILEDB_AR_ERR; + } + aio_thread_canceled_ = false; + aio_thread_created_ = false; + aio_last_handled_request_ = 0; + // Return return TILEDB_AR_OK; } @@ -428,11 +577,18 @@ int Array::reset_attributes( } int Array::reset_subarray(const void* subarray) { - // Sanity check on mode - if(mode_ != TILEDB_ARRAY_READ) { - PRINT_ERROR("Cannot reset subarray; Invalid array mode"); - return TILEDB_AR_ERR; - } + // For easy referencd + int fragment_num = fragments_.size(); + + // Finalize fragments if in write mode + if(mode_ != TILEDB_ARRAY_READ) { + // Finalize and delete fragments + for(int i=0; ifinalize(); + delete fragments_[i]; + } + fragments_.clear(); + } // Set subarray size_t subarray_size = 2*array_schema_->coords_size(); @@ -443,17 +599,30 @@ int Array::reset_subarray(const void* subarray) { else memcpy(subarray_, subarray, subarray_size); - // Re-initialize the read state of the fragments - int fragment_num = fragments_.size(); - for(int i=0; ireset_read_state(); + // Re-set of re-initialize fragments + if(mode_ != TILEDB_ARRAY_READ) { // WRITE MODE + // Get new fragment name + std::string new_fragment_name = this->new_fragment_name(); + if(new_fragment_name == "") + return TILEDB_AS_ERR; - // Re-initialize array read state - if(array_read_state_ != NULL) { - delete array_read_state_; - array_read_state_ = NULL; + // Create new fragment + Fragment* fragment = new Fragment(this); + fragments_.push_back(fragment); + if(fragment->init(new_fragment_name, mode_, subarray) != TILEDB_FG_OK) + return TILEDB_AR_ERR; + } else if(mode_ == TILEDB_ARRAY_READ) { // READ MODE + // Re-initialize the read state of the fragments + for(int i=0; ireset_read_state(); + + // Re-initialize array read state + if(array_read_state_ != NULL) { + delete array_read_state_; + array_read_state_ = NULL; + } + array_read_state_ = new ArrayReadState(this); } - array_read_state_ = new ArrayReadState(this); // Success return TILEDB_AR_OK; @@ -504,6 +673,136 @@ int Array::write(const void** buffers, const size_t* buffer_sizes) { /* PRIVATE METHODS */ /* ****************************** */ +void Array::aio_handle_next_request(AIO_Request* aio_request) { + int rc = TILEDB_AR_OK; + if(mode_ == TILEDB_ARRAY_READ) { // READ + // Reset the subarray only if this request does not continue from the last + if(aio_last_handled_request_ != aio_request->id_) + rc = reset_subarray(aio_request->subarray_); + + // Invoke the read + if(rc == TILEDB_AR_OK) + rc = read(aio_request->buffers_, aio_request->buffer_sizes_); + } else { // WRITE + rc = write( + (const void**) aio_request->buffers_, + (const size_t*) aio_request->buffer_sizes_); + } + + if(rc == TILEDB_AR_OK) { // Success + // Check for overflow + if(overflow()) { + *aio_request->status_= TILEDB_AIO_OVERFLOW; + if(aio_request->overflow_ != NULL) { + for(int i=0; ioverflow_[i] = overflow(attribute_ids_[i]); + } + } else { // Completion + *aio_request->status_= TILEDB_AIO_COMPLETED; + + // Invoke the callback + if(aio_request->completion_handle_ != NULL) + (*(aio_request->completion_handle_))(aio_request->completion_data_); + } + } else { // Error + *aio_request->status_= TILEDB_AIO_ERR; + } +} + +void *Array::aio_handler(void* context) { + // This will enter an indefinite loop that will handle all incoming AIO + // requests + ((Array*) context)->aio_handle_requests(); + + // Return + return NULL; +} + +int Array::aio_push_request(AIO_Request* aio_request) { + // Set the request status + *aio_request->status_ = TILEDB_AIO_INPROGRESS; + + // Lock AIO mutext + if(pthread_mutex_lock(&aio_mtx_)) { + PRINT_ERROR("Cannot lock AIO mutex"); + return TILEDB_AR_ERR; + } + + // Push request + aio_queue_.push(aio_request); + + // Unlock AIO mutext + if(pthread_mutex_unlock(&aio_mtx_)) { + PRINT_ERROR("Cannot unlock AIO mutex"); + return TILEDB_AR_ERR; + } + + // Signal AIO thread + if(pthread_cond_signal(&aio_cond_)) { + PRINT_ERROR("Cannot signal AIO thread"); + return TILEDB_AR_ERR; + } + + // Success + return TILEDB_AR_OK; +} + +int Array::aio_thread_create() { + // Trivial case + if(aio_thread_created_) + return TILEDB_AR_OK; + + // Create the thread that will be handling all AIO requests + if(pthread_create(&aio_thread_, NULL, Array::aio_handler, this)) { + PRINT_ERROR("Cannot create AIO thread"); + return TILEDB_AR_ERR; + } + + aio_thread_created_ = true; + + // Success + return TILEDB_AR_OK; +} + +int Array::aio_thread_destroy() { + // Trivial case + if(!aio_thread_created_) + return TILEDB_AR_OK; + + // Lock AIO mutext + if(pthread_mutex_lock(&aio_mtx_)) { + PRINT_ERROR("Cannot lock AIO mutex while destroying AIO thread"); + return TILEDB_AR_ERR; + } + + // Signal the cancelation so that the thread unblocks + aio_thread_canceled_ = true; + if(pthread_cond_signal(&aio_cond_)) { + PRINT_ERROR("Cannot signal AIO thread while destroying AIO thread"); + return TILEDB_AR_ERR; + } + + // Unlock AIO mutext + if(pthread_mutex_unlock(&aio_mtx_)) { + PRINT_ERROR("Cannot unlock AIO mutex while destroying AIO thread"); + return TILEDB_AR_ERR; + } + + // Wait for cancelation to take place + while(aio_thread_created_); + + // Cancel thread + if(pthread_cancel(aio_thread_)) { + PRINT_ERROR("Cannot destroy AIO thread"); + return TILEDB_AR_ERR; + } + + aio_thread_created_ = false; + + // Success + return TILEDB_AR_OK; +} + std::string Array::new_fragment_name() const { struct timeval tp; gettimeofday(&tp, NULL); diff --git a/core/src/array/array_read_state.cc b/core/src/array/array_read_state.cc index 516fd8de..401bb6e7 100644 --- a/core/src/array/array_read_state.cc +++ b/core/src/array/array_read_state.cc @@ -67,19 +67,20 @@ ArrayReadState::ArrayReadState( const Array* array) : array_(array) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int attribute_num = array_schema->attribute_num(); + array_schema_ = array_->array_schema(); + attribute_num_ = array_schema_->attribute_num(); + coords_size_ = array_schema_->coords_size(); // Initializations done_ = false; - empty_cells_written_.resize(attribute_num+1); - fragment_cell_pos_ranges_vec_pos_.resize(attribute_num+1); + empty_cells_written_.resize(attribute_num_+1); + fragment_cell_pos_ranges_vec_pos_.resize(attribute_num_+1); min_bounding_coords_end_ = NULL; - read_round_done_.resize(attribute_num); + read_round_done_.resize(attribute_num_); subarray_tile_coords_ = NULL; subarray_tile_domain_ = NULL; - for(int i=0; iarray_schema(); - int attribute_num = array_schema->attribute_num(); - // Reset overflow - overflow_.resize(attribute_num+1); - for(int i=0; ireset_overflow(); - if(array_schema->dense()) // DENSE + if(array_schema_->dense()) // DENSE return read_dense(buffers, buffer_sizes); else // SPARSE return read_sparse(buffers, buffer_sizes); @@ -152,14 +154,10 @@ int ArrayReadState::read( /* ****************************** */ void ArrayReadState::clean_up_processed_fragment_cell_pos_ranges() { - // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int attribute_num = array_schema->attribute_num(); - // Find the minimum overlapping tile position across all attributes const std::vector& attribute_ids = array_->attribute_ids(); int attribute_id_num = attribute_ids.size(); - int min_pos = fragment_cell_pos_ranges_vec_pos_[0]; + int64_t min_pos = fragment_cell_pos_ranges_vec_pos_[0]; for(int i=1; iarray_schema(); - int dim_num = array_schema->dim_num(); - int fragment_i; + int dim_num = array_schema_->dim_num(); + int fragment_id; int64_t fragment_cell_ranges_num = fragment_cell_ranges.size(); // Compute fragment cell position ranges for(int64_t i=0; idense()) { // DENSE + fragment_id = fragment_cell_ranges[i].first.first; + if(fragment_id == -1 || + fragment_read_states_[fragment_id]->dense()) { // DENSE // Create a new fragment cell position range FragmentCellPosRange fragment_cell_pos_range; fragment_cell_pos_range.first = fragment_cell_ranges[i].first; CellPosRange& cell_pos_range = fragment_cell_pos_range.second; T* cell_range = static_cast(fragment_cell_ranges[i].second); - cell_pos_range.first = array_schema->get_cell_pos(cell_range); - cell_pos_range.second = array_schema->get_cell_pos(&cell_range[dim_num]); + cell_pos_range.first = array_schema_->get_cell_pos(cell_range); + cell_pos_range.second = array_schema_->get_cell_pos(&cell_range[dim_num]); // Insert into the result fragment_cell_pos_ranges.push_back(fragment_cell_pos_range); } else { // SPARSE @@ -238,13 +237,11 @@ int ArrayReadState::compute_fragment_cell_pos_ranges( template void ArrayReadState::compute_min_bounding_coords_end() { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); + int dim_num = array_schema_->dim_num(); // Allocate memeory if(min_bounding_coords_end_ == NULL) - min_bounding_coords_end_ = malloc(coords_size); + min_bounding_coords_end_ = malloc(coords_size_); T* min_bounding_coords_end = static_cast(min_bounding_coords_end_); // Compute min bounding coords end @@ -256,15 +253,15 @@ void ArrayReadState::compute_min_bounding_coords_end() { memcpy( min_bounding_coords_end, &fragment_bounding_coords[dim_num], - coords_size); + coords_size_); first = false; - } else if(array_schema->tile_cell_order_cmp( + } else if(array_schema_->tile_cell_order_cmp( &fragment_bounding_coords[dim_num], min_bounding_coords_end) < 0) { memcpy( min_bounding_coords_end, &fragment_bounding_coords[dim_num], - coords_size); + coords_size_); } } } @@ -272,7 +269,7 @@ void ArrayReadState::compute_min_bounding_coords_end() { template int ArrayReadState::compute_unsorted_fragment_cell_ranges_dense( - FragmentCellRanges& unsorted_fragment_cell_ranges) { + std::vector& unsorted_fragment_cell_ranges) { // Compute cell ranges for all fragments for(int i=0; idone()) { @@ -284,30 +281,32 @@ int ArrayReadState::compute_unsorted_fragment_cell_ranges_dense( fragment_cell_ranges) != TILEDB_RS_OK) return TILEDB_ARS_ERR; // Insert fragment cell ranges to the result - unsorted_fragment_cell_ranges.insert( - unsorted_fragment_cell_ranges.end(), - fragment_cell_ranges.begin(), - fragment_cell_ranges.end()); + unsorted_fragment_cell_ranges.push_back(fragment_cell_ranges); } else { // SPARSE FragmentCellRanges fragment_cell_ranges; + FragmentCellRanges fragment_cell_ranges_tmp; do { // Get next overlapping tiles fragment_read_states_[i]->get_next_overlapping_tile_sparse( static_cast(subarray_tile_coords_)); // Get fragment cell ranges - fragment_cell_ranges.clear(); + fragment_cell_ranges_tmp.clear(); if(fragment_read_states_[i]->get_fragment_cell_ranges_sparse( i, - fragment_cell_ranges) != TILEDB_RS_OK) + fragment_cell_ranges_tmp) != TILEDB_RS_OK) return TILEDB_ARS_ERR; - // Insert fragment cell ranges to the result - unsorted_fragment_cell_ranges.insert( - unsorted_fragment_cell_ranges.end(), - fragment_cell_ranges.begin(), - fragment_cell_ranges.end()); + // Insert fragment cell ranges to temporary ranges + fragment_cell_ranges.insert( + fragment_cell_ranges.end(), + fragment_cell_ranges_tmp.begin(), + fragment_cell_ranges_tmp.end()); } while(!fragment_read_states_[i]->done() && - fragment_read_states_[i]->mbr_overlaps_tile()); + fragment_read_states_[i]->mbr_overlaps_tile()); + unsorted_fragment_cell_ranges.push_back(fragment_cell_ranges); } + } else { + // Append an empty list + unsorted_fragment_cell_ranges.push_back(FragmentCellRanges()); } } @@ -317,11 +316,9 @@ int ArrayReadState::compute_unsorted_fragment_cell_ranges_dense( template int ArrayReadState::compute_unsorted_fragment_cell_ranges_sparse( - FragmentCellRanges& unsorted_fragment_cell_ranges) { + std::vector& unsorted_fragment_cell_ranges) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); + int dim_num = array_schema_->dim_num(); T* min_bounding_coords_end = static_cast(min_bounding_coords_end_); // Compute the relevant fragment cell ranges @@ -330,7 +327,7 @@ int ArrayReadState::compute_unsorted_fragment_cell_ranges_sparse( // Compute new fragment cell ranges if(fragment_bounding_coords != NULL && - array_schema->tile_cell_order_cmp( + array_schema_->tile_cell_order_cmp( fragment_bounding_coords, min_bounding_coords_end) <= 0) { FragmentCellRanges fragment_cell_ranges; @@ -341,10 +338,7 @@ int ArrayReadState::compute_unsorted_fragment_cell_ranges_sparse( fragment_cell_ranges) != TILEDB_RS_OK) return TILEDB_ARS_ERR; - unsorted_fragment_cell_ranges.insert( - unsorted_fragment_cell_ranges.end(), - fragment_cell_ranges.begin(), - fragment_cell_ranges.end()); + unsorted_fragment_cell_ranges.push_back(fragment_cell_ranges); // If the end bounding coordinate is not the same as the smallest one, // update the start bounding coordinate to exceed the smallest @@ -352,7 +346,7 @@ int ArrayReadState::compute_unsorted_fragment_cell_ranges_sparse( if(memcmp( &fragment_bounding_coords[dim_num], min_bounding_coords_end, - coords_size)) { + coords_size_)) { // Get the first coordinates AFTER the min bounding coords end bool coords_retrieved; if(fragment_read_states_[i]->get_coords_after( @@ -365,6 +359,9 @@ int ArrayReadState::compute_unsorted_fragment_cell_ranges_sparse( // Sanity check for the sparse case assert(coords_retrieved); } + } else { + // Append an empty list + unsorted_fragment_cell_ranges.push_back(FragmentCellRanges()); } } @@ -379,25 +376,24 @@ int ArrayReadState::copy_cells( size_t buffer_size, size_t& buffer_offset) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); int64_t pos = fragment_cell_pos_ranges_vec_pos_[attribute_id]; FragmentCellPosRanges& fragment_cell_pos_ranges = - fragment_cell_pos_ranges_vec_[pos]; + *fragment_cell_pos_ranges_vec_[pos]; int64_t fragment_cell_pos_ranges_num = fragment_cell_pos_ranges.size(); - int fragment_i; // Fragment id - int64_t tile_i; // Tile position in the fragment + int fragment_id; // Fragment id + int64_t tile_pos; // Tile position in the fragment // Sanity check - assert(!array_schema->var_size(attribute_id)); + assert(!array_schema_->var_size(attribute_id)); // Copy the cell ranges one by one for(int64_t i=0; i( attribute_id, buffer, @@ -411,9 +407,9 @@ int ArrayReadState::copy_cells( } // Handle non-empty fragment - if(fragment_read_states_[fragment_i]->copy_cells( + if(fragment_read_states_[fragment_id]->copy_cells( attribute_id, - tile_i, + tile_pos, buffer, buffer_size, buffer_offset, @@ -421,7 +417,7 @@ int ArrayReadState::copy_cells( return TILEDB_ARS_ERR; // Handle overflow - if(fragment_read_states_[fragment_i]->overflow(attribute_id)) { + if(fragment_read_states_[fragment_id]->overflow(attribute_id)) { overflow_[attribute_id] = true; break; } @@ -449,25 +445,24 @@ int ArrayReadState::copy_cells_var( size_t buffer_var_size, size_t& buffer_var_offset) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); int64_t pos = fragment_cell_pos_ranges_vec_pos_[attribute_id]; FragmentCellPosRanges& fragment_cell_pos_ranges = - fragment_cell_pos_ranges_vec_[pos]; + *fragment_cell_pos_ranges_vec_[pos]; int64_t fragment_cell_pos_ranges_num = fragment_cell_pos_ranges.size(); - int fragment_i; // Fragment id - int64_t tile_i; // Tile position in the fragment + int fragment_id; // Fragment id + int64_t tile_pos; // Tile position in the fragment // Sanity check - assert(array_schema->var_size(attribute_id)); + assert(array_schema_->var_size(attribute_id)); // Copy the cell ranges one by one for(int64_t i=0; i( attribute_id, buffer, @@ -484,9 +479,9 @@ int ArrayReadState::copy_cells_var( } // Handle non-empty fragment - if(fragment_read_states_[fragment_i]->copy_cells_var( + if(fragment_read_states_[fragment_id]->copy_cells_var( attribute_id, - tile_i, + tile_pos, buffer, buffer_size, buffer_offset, @@ -497,7 +492,7 @@ int ArrayReadState::copy_cells_var( return TILEDB_ARS_ERR; // Handle overflow - if(fragment_read_states_[fragment_i]->overflow(attribute_id)) { + if(fragment_read_states_[fragment_id]->overflow(attribute_id)) { overflow_[attribute_id] = true; break; } @@ -523,8 +518,7 @@ void ArrayReadState::copy_cells_with_empty( size_t& buffer_offset, const CellPosRange& cell_pos_range) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - size_t cell_size = array_schema->cell_size(attribute_id); + size_t cell_size = array_schema_->cell_size(attribute_id); char* buffer_c = static_cast(buffer); // Calculate free space in buffer @@ -536,7 +530,7 @@ void ArrayReadState::copy_cells_with_empty( } // Sanity check - assert(!array_schema->var_size(attribute_id)); + assert(!array_schema_->var_size(attribute_id)); // Calculate number of empty cells to write int64_t cell_num_in_range = cell_pos_range.second - cell_pos_range.first + 1; @@ -547,7 +541,7 @@ void ArrayReadState::copy_cells_with_empty( int64_t cell_num_to_copy = bytes_to_copy / cell_size; // Get the empty value - int type = array_schema->type(attribute_id); + int type = array_schema_->type(attribute_id); void* empty_cell = malloc(cell_size); if(type == TILEDB_INT32) { int empty_cell_v = TILEDB_EMPTY_INT32; @@ -594,7 +588,6 @@ void ArrayReadState::copy_cells_with_empty_var( size_t& buffer_var_offset, const CellPosRange& cell_pos_range) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); size_t cell_size = TILEDB_CELL_VAR_OFFSET_SIZE; char* buffer_c = static_cast(buffer); char* buffer_var_c = static_cast(buffer_var); @@ -611,7 +604,7 @@ void ArrayReadState::copy_cells_with_empty_var( } // Get the empty value - int type = array_schema->type(attribute_id); + int type = array_schema_->type(attribute_id); void* empty_cell = malloc(cell_size); size_t cell_size_var; if(type == TILEDB_INT32) { @@ -637,7 +630,7 @@ void ArrayReadState::copy_cells_with_empty_var( } // Sanity check - assert(array_schema->var_size(attribute_id)); + assert(array_schema_->var_size(attribute_id)); // Calculate cell number to copy int64_t cell_num_in_range = cell_pos_range.second - cell_pos_range.first + 1; @@ -691,7 +684,7 @@ int ArrayReadState::get_next_fragment_cell_ranges_dense() { return TILEDB_ARS_OK; // Compute the unsorted fragment cell ranges needed for this read run - FragmentCellRanges unsorted_fragment_cell_ranges; + std::vector unsorted_fragment_cell_ranges; if(compute_unsorted_fragment_cell_ranges_dense( unsorted_fragment_cell_ranges) != TILEDB_ARS_OK) return TILEDB_ARS_ERR; @@ -704,10 +697,10 @@ int ArrayReadState::get_next_fragment_cell_ranges_dense() { return TILEDB_ARS_ERR; // Compute the fragment cell position ranges - FragmentCellPosRanges fragment_cell_pos_ranges; + FragmentCellPosRanges* fragment_cell_pos_ranges = new FragmentCellPosRanges(); if(compute_fragment_cell_pos_ranges( fragment_cell_ranges, - fragment_cell_pos_ranges) != TILEDB_ARS_OK) + *fragment_cell_pos_ranges) != TILEDB_ARS_OK) return TILEDB_ARS_ERR; // Insert cell pos ranges in the state @@ -738,7 +731,7 @@ int ArrayReadState::get_next_fragment_cell_ranges_sparse() { compute_min_bounding_coords_end(); // Compute the unsorted fragment cell ranges needed for this read run - FragmentCellRanges unsorted_fragment_cell_ranges; + std::vector unsorted_fragment_cell_ranges; if(compute_unsorted_fragment_cell_ranges_sparse( unsorted_fragment_cell_ranges) != TILEDB_ARS_OK) return TILEDB_ARS_ERR; @@ -751,10 +744,10 @@ int ArrayReadState::get_next_fragment_cell_ranges_sparse() { return TILEDB_ARS_ERR; // Compute the fragment cell position ranges - FragmentCellPosRanges fragment_cell_pos_ranges; + FragmentCellPosRanges* fragment_cell_pos_ranges = new FragmentCellPosRanges(); if(compute_fragment_cell_pos_ranges( fragment_cell_ranges, - fragment_cell_pos_ranges) != TILEDB_ARS_OK) + *fragment_cell_pos_ranges) != TILEDB_ARS_OK) return TILEDB_ARS_ERR; // Insert cell pos ranges in the state @@ -770,9 +763,7 @@ int ArrayReadState::get_next_fragment_cell_ranges_sparse() { template void ArrayReadState::get_next_overlapping_tiles_dense() { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); + int dim_num = array_schema_->dim_num(); // Get the first overlapping tile for each fragment if(fragment_cell_pos_ranges_vec_.size() == 0) { @@ -800,7 +791,7 @@ void ArrayReadState::get_next_overlapping_tiles_dense() { memcpy( previous_subarray_tile_coords, subarray_tile_coords_, - coords_size); + coords_size_); // Advance range coordinates get_next_subarray_tile_coords(); @@ -830,9 +821,7 @@ void ArrayReadState::get_next_overlapping_tiles_dense() { template void ArrayReadState::get_next_overlapping_tiles_sparse() { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); + int dim_num = array_schema_->dim_num(); // Get the first overlapping tile for each fragment if(fragment_cell_pos_ranges_vec_.size() == 0) { @@ -845,7 +834,7 @@ void ArrayReadState::get_next_overlapping_tiles_sparse() { for(int i=0; iget_next_overlapping_tile_sparse(); if(!fragment_read_states_[i]->done()) { - fragment_bounding_coords_[i] = malloc(2*coords_size); + fragment_bounding_coords_[i] = malloc(2*coords_size_); fragment_read_states_[i]->get_bounding_coords( fragment_bounding_coords_[i]); done_ = false; @@ -863,7 +852,7 @@ void ArrayReadState::get_next_overlapping_tiles_sparse() { !memcmp( // Coinciding end bounding coords &fragment_bounding_coords[dim_num], min_bounding_coords_end_, - coords_size)) { + coords_size_)) { fragment_read_states_[i]->get_next_overlapping_tile_sparse(); if(!fragment_read_states_[i]->done()) { fragment_read_states_[i]->get_bounding_coords( @@ -890,10 +879,8 @@ void ArrayReadState::get_next_overlapping_tiles_sparse() { template void ArrayReadState::init_subarray_tile_coords() { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); - const T* tile_extents = static_cast(array_schema->tile_extents()); + int dim_num = array_schema_->dim_num(); + const T* tile_extents = static_cast(array_schema_->tile_extents()); const T* subarray = static_cast(array_->subarray()); // Sanity checks @@ -906,7 +893,7 @@ void ArrayReadState::init_subarray_tile_coords() { T* subarray_tile_domain = static_cast(subarray_tile_domain_); // Get subarray in tile domain - array_schema->get_subarray_tile_domain( + array_schema_->get_subarray_tile_domain( subarray, tile_domain, subarray_tile_domain); @@ -928,7 +915,7 @@ void ArrayReadState::init_subarray_tile_coords() { subarray_tile_domain_ = NULL; assert(subarray_tile_coords_ == NULL); } else { // Overlap - subarray_tile_coords_ = malloc(coords_size); + subarray_tile_coords_ = malloc(coords_size_); T* subarray_tile_coords = static_cast(subarray_tile_coords_); for(int i=0; i void ArrayReadState::get_next_subarray_tile_coords() { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int dim_num = array_schema->dim_num(); + int dim_num = array_schema_->dim_num(); T* subarray_tile_domain = static_cast(subarray_tile_domain_); T* subarray_tile_coords = static_cast(subarray_tile_coords_); // Advance subarray tile coordinates - array_schema->get_next_tile_coords( + array_schema_->get_next_tile_coords( subarray_tile_domain, subarray_tile_coords); @@ -974,14 +960,13 @@ int ArrayReadState::read_dense( void** buffers, size_t* buffer_sizes) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); std::vector attribute_ids = array_->attribute_ids(); int attribute_id_num = attribute_ids.size(); // Read each attribute individually int buffer_i = 0; for(int i=0; ivar_size(attribute_ids[i])) { // FIXED CELLS + if(!array_schema_->var_size(attribute_ids[i])) { // FIXED CELLS if(read_dense_attr( attribute_ids[i], buffers[buffer_i], @@ -1009,8 +994,7 @@ int ArrayReadState::read_dense_attr( void* buffer, size_t& buffer_size) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int coords_type = array_schema->coords_type(); + int coords_type = array_schema_->coords_type(); // Invoke the proper templated function if(coords_type == TILEDB_INT32) { @@ -1069,7 +1053,7 @@ int ArrayReadState::read_dense_attr( buffer_size = buffer_offset; return TILEDB_ARS_OK; } - + // Copy cells to buffers if(copy_cells( attribute_id, @@ -1093,8 +1077,7 @@ int ArrayReadState::read_dense_attr_var( void* buffer_var, size_t& buffer_var_size) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int coords_type = array_schema->coords_type(); + int coords_type = array_schema_->coords_type(); // Invoke the proper templated function if(coords_type == TILEDB_INT32) { @@ -1190,8 +1173,6 @@ int ArrayReadState::read_sparse( void** buffers, size_t* buffer_sizes) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int attribute_num = array_schema->attribute_num(); std::vector attribute_ids = array_->attribute_ids(); int attribute_id_num = attribute_ids.size(); @@ -1199,11 +1180,11 @@ int ArrayReadState::read_sparse( int coords_buffer_i = -1; int buffer_i = 0; for(int i=0; ivar_size(attribute_ids[i])) // FIXED CELLS + if(!array_schema_->var_size(attribute_ids[i])) // FIXED CELLS ++buffer_i; else // VARIABLE-SIZED CELLS buffer_i +=2; @@ -1212,7 +1193,7 @@ int ArrayReadState::read_sparse( // Read coordinates attribute first if(coords_buffer_i != -1) { if(read_sparse_attr( - attribute_num, + attribute_num_, buffers[coords_buffer_i], buffer_sizes[coords_buffer_i]) != TILEDB_ARS_OK) return TILEDB_ARS_ERR; @@ -1222,12 +1203,12 @@ int ArrayReadState::read_sparse( buffer_i = 0; for(int i=0; ivar_size(attribute_ids[i])) { // FIXED CELLS + if(!array_schema_->var_size(attribute_ids[i])) { // FIXED CELLS if(read_sparse_attr( attribute_ids[i], buffers[buffer_i], @@ -1257,8 +1238,7 @@ int ArrayReadState::read_sparse_attr( void* buffer, size_t& buffer_size) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int coords_type = array_schema->coords_type(); + int coords_type = array_schema_->coords_type(); // Invoke the proper templated function if(coords_type == TILEDB_INT32) { @@ -1351,8 +1331,7 @@ int ArrayReadState::read_sparse_attr_var( void* buffer_var, size_t& buffer_var_size) { // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int coords_type = array_schema->coords_type(); + int coords_type = array_schema_->coords_type(); // Invoke the proper templated function if(coords_type == TILEDB_INT32) { @@ -1460,21 +1439,22 @@ int ArrayReadState::read_sparse_attr_var( template int ArrayReadState::sort_fragment_cell_ranges( - FragmentCellRanges& unsorted_fragment_cell_ranges, + std::vector& unsorted_fragment_cell_ranges, FragmentCellRanges& fragment_cell_ranges) const { + // Sanity check + assert(fragment_num_ > 0); + // Trivial case - single fragment if(fragment_num_ == 1) { - fragment_cell_ranges = unsorted_fragment_cell_ranges; + fragment_cell_ranges = unsorted_fragment_cell_ranges[0]; unsorted_fragment_cell_ranges.clear(); return TILEDB_ARS_OK; } // For easy reference - const ArraySchema* array_schema = array_->array_schema(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); - const T* domain = static_cast(array_schema->domain()); - const T* tile_extents = static_cast(array_schema->tile_extents()); + int dim_num = array_schema_->dim_num(); + const T* domain = static_cast(array_schema_->domain()); + const T* tile_extents = static_cast(array_schema_->tile_extents()); const T* tile_coords = static_cast(subarray_tile_coords_); int rc = TILEDB_ARS_OK; @@ -1489,197 +1469,209 @@ int ArrayReadState::sort_fragment_cell_ranges( } } + // Initialization of book-keeping for unsorted ranges + int64_t* rlen = new int64_t[fragment_num_]; + int64_t* rid = new int64_t[fragment_num_]; + int fid = 0; + for(int i=0; i* pq_fragment_cell_range; + PQFragmentCellRange* popped; + PQFragmentCellRange* top; + PQFragmentCellRange* trimmed_top; + PQFragmentCellRange* extra_popped; + PQFragmentCellRange* left; + PQFragmentCellRange* unary; + FragmentCellRange result; + // Populate queue std::priority_queue< - FragmentCellRange, - FragmentCellRanges, - SmallerFragmentCellRange > pq(array_schema); - int unsorted_fragment_cell_ranges_num = unsorted_fragment_cell_ranges.size(); - for(int64_t i=0; i*, + std::vector* >, + SmallerPQFragmentCellRange > pq(array_schema_); + for(int i=0; i( + array_schema_, + &fragment_read_states_); + pq_fragment_cell_range->import_from(unsorted_fragment_cell_ranges[i][0]); + pq.push(pq_fragment_cell_range); + ++rid[i]; + } + } // Start processing the queue - FragmentCellRange popped, top; - int popped_fragment_i, top_fragment_i, popped_tile_i, top_tile_i; - T *popped_range, *top_range; while(!pq.empty()) { // Pop the first entry and mark it as popped popped = pq.top(); - popped_fragment_i = popped.first.first; - popped_tile_i = popped.first.second; - popped_range = static_cast(popped.second); pq.pop(); // Last range - just insert it into the results and stop if(pq.empty()) { - fragment_cell_ranges.push_back(popped); + popped->export_to(result); + fragment_cell_ranges.push_back(result); + delete popped; break; } // Mark the second entry (now top) as top top = pq.top(); - top_fragment_i = top.first.first; - top_tile_i = top.first.second; - top_range = static_cast(top.second); // Dinstinguish two cases - if(popped_fragment_i == -1 || // DENSE OR UNARY POPPED - fragment_read_states_[popped_fragment_i]->dense() || - !memcmp(popped_range, &popped_range[dim_num], coords_size)) { - // Keep on discarding ranges from the queue - while(!pq.empty() && - top_fragment_i < popped_fragment_i && - array_schema->cell_order_cmp(top_range, popped_range) >= 0 && - array_schema->cell_order_cmp( - top_range, - &popped_range[dim_num]) <= 0) { + if(popped->dense() || popped->unary()) { // DENSE OR UNARY POPPED + // Keep on trimming ranges from the queue + while(!pq.empty() && popped->must_trim(top)) { // Cut the top range and re-insert, only if there is partial overlap - if(array_schema->cell_order_cmp( - &top_range[dim_num], - &popped_range[dim_num]) > 0) { + if(top->ends_after(popped)) { // Create the new trimmed top range - FragmentCellRange trimmed_top; - trimmed_top.first = FragmentInfo(top_fragment_i, top_tile_i); - trimmed_top.second = malloc(2*coords_size); - T* trimmed_top_range = static_cast(trimmed_top.second); - memcpy(trimmed_top_range, &popped_range[dim_num], coords_size); - memcpy(&trimmed_top_range[dim_num], &top_range[dim_num], coords_size); - if(fragment_read_states_[top_fragment_i]->dense()) { - array_schema->get_next_cell_coords( // TOP IS DENSE - tile_domain, - trimmed_top_range); + trimmed_top = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + popped->trim(top, trimmed_top, tile_domain); + + // Discard top + free(top->cell_range_); + delete top; + pq.pop(); + + if(trimmed_top->cell_range_ != NULL) { + // Re-insert the trimmed range in pq pq.push(trimmed_top); - } else { // TOP IS SPARSE - bool coords_retrieved; - if(fragment_read_states_[top_fragment_i]->get_coords_after( - &popped_range[dim_num], - trimmed_top_range, - coords_retrieved)) { - free(trimmed_top_range); - free(top_range); - free(popped_range); - return TILEDB_ARS_ERR; + } else { + // Get the next range from the top fragment + fid = trimmed_top->fragment_id_; + if(rid[fid] != rlen[fid]) { + pq_fragment_cell_range = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + pq_fragment_cell_range->import_from( + unsorted_fragment_cell_ranges[fid][rid[fid]]); + pq.push(pq_fragment_cell_range); + ++rid[fid]; } - if(coords_retrieved) - pq.push(trimmed_top); - else - free(trimmed_top_range); + // Clear trimmed top + delete trimmed_top; } + } else { + // Discard top + free(top->cell_range_); + delete top; + pq.pop(); } - // Discard top and get a new one - free(top.second); - pq.pop(); - top = pq.top(); - top_fragment_i = top.first.first; - top_tile_i = top.first.second; - top_range = static_cast(top.second); + // Get a new top + if(!pq.empty()) + top = pq.top(); } - // Potentially trim the popped range - if(!pq.empty() && - top_fragment_i > popped_fragment_i && - array_schema->cell_order_cmp( - top_range, - &popped_range[dim_num]) <= 0) { - // Create a new popped range - FragmentCellRange extra_popped; - extra_popped.first.first = popped_fragment_i; - extra_popped.first.second = popped_tile_i; - extra_popped.second = malloc(2*coords_size); - T* extra_popped_range = static_cast(extra_popped.second); - - memcpy(extra_popped_range, top_range, coords_size); - memcpy( - &extra_popped_range[dim_num], - &popped_range[dim_num], - coords_size); - - // Re-instert the extra popped range into the queue - pq.push(extra_popped); - - // Trim last range coordinates of popped - memcpy(&popped_range[dim_num], top_range, coords_size); - - // Get previous cell of the last range coordinates of popped - array_schema->get_previous_cell_coords( - tile_domain, - &popped_range[dim_num]); - } + // Potentially split the popped range + if(!pq.empty()) { + if(popped->must_be_split(top)) { + // Split the popped range + extra_popped = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + popped->split(top, extra_popped, tile_domain); + // Re-instert the extra popped range into the queue + pq.push(extra_popped); + } else { + // Get the next range from popped fragment + fid = popped->fragment_id_; + if(rid[fid] != rlen[fid]) { + pq_fragment_cell_range = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + pq_fragment_cell_range->import_from( + unsorted_fragment_cell_ranges[fid][rid[fid]]); + pq.push(pq_fragment_cell_range); + ++rid[fid]; + } + } + } // Insert the final popped range into the results - fragment_cell_ranges.push_back(popped); + popped->export_to(result); + fragment_cell_ranges.push_back(result); + delete popped; } else { // SPARSE POPPED // If popped does not overlap with top, insert popped into results - if(!pq.empty() && - array_schema->tile_cell_order_cmp( - top_range, - &popped_range[dim_num]) > 0) { - fragment_cell_ranges.push_back(popped); + if(!pq.empty() && top->begins_after(popped)) { + popped->export_to(result); + fragment_cell_ranges.push_back(result); + // Get the next range from the popped fragment + fid = popped->fragment_id_; + if(rid[fid] != rlen[fid]) { + pq_fragment_cell_range = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + pq_fragment_cell_range->import_from( + unsorted_fragment_cell_ranges[fid][rid[fid]]); + pq.push(pq_fragment_cell_range); + ++rid[fid]; + } + delete popped; } else { - // Create up to 3 more ranges (left, new popped/right, unary) - FragmentCellRange left; - left.first.first = popped_fragment_i; - left.first.second = popped_tile_i; - left.second = malloc(2*coords_size); - memcpy(left.second, popped_range, coords_size); - T* left_range = static_cast(left.second); - - // Get the first two coordinates from the coordinates tile - bool left_retrieved, right_retrieved, target_exists; - if(fragment_read_states_[popped_fragment_i]->get_enclosing_coords( - popped_tile_i, // Tile - top_range, // Target coords - popped_range, // Start coords - &popped_range[dim_num], // End coords - &left_range[dim_num], // Left coords - popped_range, // Right coords - left_retrieved, // Left retrieved - right_retrieved, // Right retrieved - target_exists) // Target exists - != TILEDB_RS_OK) { - free(left.second); - free(popped.second); - rc = TILEDB_ARS_ERR; - break; + // Create up to 3 more ranges (left, unary, new popped/right) + left = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + unary = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + popped->split_to_3(top, left, unary); + + // Get the next range from the popped fragment + if(unary->cell_range_ == NULL && popped->cell_range_ == NULL) { + fid = popped->fragment_id_; + if(rid[fid] != rlen[fid]) { + pq_fragment_cell_range = new PQFragmentCellRange( + array_schema_, + &fragment_read_states_); + pq_fragment_cell_range->import_from( + unsorted_fragment_cell_ranges[fid][rid[fid]]); + pq.push(pq_fragment_cell_range); + ++rid[fid]; + } } - // Insert left range to the result - if(left_retrieved) - fragment_cell_ranges.push_back(left); - else - free(left.second); + // Insert left to results or discard it + if(left->cell_range_ != NULL) { + left->export_to(result); + fragment_cell_ranges.push_back(result); + } + delete left; + + // Insert unary to the priority queue + if(unary->cell_range_ != NULL) + pq.push(unary); + else + delete unary; - // Re-insert right range to the priority queue - if(right_retrieved) + // Re-insert new popped (right) range to the priority queue + if(popped->cell_range_ != NULL) pq.push(popped); else - free(popped.second); - - // Re-Insert unary range into the priority queue - if(target_exists) { - FragmentCellRange unary; - unary.first.first = popped_fragment_i; - unary.first.second = popped_tile_i; - unary.second = malloc(2*coords_size); - T* unary_range = static_cast(unary.second); - memcpy(unary_range, top_range, coords_size); - memcpy(&unary_range[dim_num], top_range, coords_size); - pq.push(unary); - } + delete popped; } } } // Clean up + unsorted_fragment_cell_ranges.clear(); if(tile_domain != NULL) delete [] tile_domain; + delete [] rlen; + delete [] rid; // Clean up in case of error if(rc != TILEDB_ARS_OK) { while(!pq.empty()) { - free(pq.top().second); + free(pq.top()->cell_range_); + delete pq.top(); pq.pop(); } for(int64_t i=0; i -ArrayReadState::SmallerFragmentCellRange::SmallerFragmentCellRange() - : array_schema_(NULL) { +ArrayReadState::PQFragmentCellRange::PQFragmentCellRange( + const ArraySchema* array_schema, + const std::vector* fragment_read_states) { + array_schema_ = array_schema; + fragment_read_states_ = fragment_read_states; + + cell_range_ = NULL; + fragment_id_ = -1; + tile_id_l_ = -1; + tile_id_r_ = -1; + tile_pos_ = -1; + + coords_size_ = array_schema_->coords_size(); + dim_num_ = array_schema_->dim_num(); } template -ArrayReadState::SmallerFragmentCellRange::SmallerFragmentCellRange( - const ArraySchema* array_schema) - : array_schema_(array_schema) { +bool ArrayReadState::PQFragmentCellRange::begins_after( + const PQFragmentCellRange* fcr) const { + return tile_id_l_ > fcr->tile_id_r_ || + (tile_id_l_ == fcr->tile_id_r_ && + array_schema_->cell_order_cmp( + cell_range_, + &(fcr->cell_range_[dim_num_])) > 0); +} + +template +bool ArrayReadState::PQFragmentCellRange::dense() const { + return fragment_id_ == -1 || (*fragment_read_states_)[fragment_id_]->dense(); +} + +template +bool ArrayReadState::PQFragmentCellRange::ends_after( + const PQFragmentCellRange* fcr) const { + return tile_id_r_ > fcr->tile_id_r_ || + (tile_id_r_ == fcr->tile_id_r_ && + array_schema_->cell_order_cmp( + &cell_range_[dim_num_], + &fcr->cell_range_[dim_num_]) > 0); +} + +template +void ArrayReadState::PQFragmentCellRange::export_to( + FragmentCellRange& fragment_cell_range) { + // Copy members + fragment_cell_range.second = cell_range_; + fragment_cell_range.first.first = fragment_id_; + fragment_cell_range.first.second = tile_pos_; +} + +template +void ArrayReadState::PQFragmentCellRange::import_from( + const FragmentCellRange& fragment_cell_range) { + // Copy members + cell_range_ = static_cast(fragment_cell_range.second); + fragment_id_ = fragment_cell_range.first.first; + tile_pos_ = fragment_cell_range.first.second; + + // Compute tile ids + tile_id_l_ = array_schema_->tile_id(cell_range_); + tile_id_r_ = array_schema_->tile_id(&cell_range_[dim_num_]); } template -bool ArrayReadState::SmallerFragmentCellRange::operator () ( - FragmentCellRange a, - FragmentCellRange b) const { +bool ArrayReadState::PQFragmentCellRange::must_be_split( + const PQFragmentCellRange* fcr) const { + return fcr->fragment_id_ > fragment_id_ && + (fcr->tile_id_l_ < tile_id_r_ || + (fcr->tile_id_l_ == tile_id_r_ && + array_schema_->cell_order_cmp( + fcr->cell_range_, + &cell_range_[dim_num_]) <= 0)); +} + +template +bool ArrayReadState::PQFragmentCellRange::must_trim( + const PQFragmentCellRange* fcr) const { + return fcr->fragment_id_ < fragment_id_ && + (fcr->tile_id_l_ > tile_id_l_ || + (fcr->tile_id_l_ == tile_id_l_ && + array_schema_->cell_order_cmp(fcr->cell_range_, cell_range_) >= 0)) && + (fcr->tile_id_l_ < tile_id_r_ || + (fcr->tile_id_l_ == tile_id_r_ && + array_schema_->cell_order_cmp( + fcr->cell_range_, + &cell_range_[dim_num_]) <= 0)); +} + +template +void ArrayReadState::PQFragmentCellRange::split( + const PQFragmentCellRange* fcr, + PQFragmentCellRange* fcr_new, + const T* tile_domain) { + // Create the new range + fcr_new->fragment_id_ = fragment_id_; + fcr_new->tile_pos_ = tile_pos_; + fcr_new->cell_range_ = (T*) malloc(2*coords_size_); + fcr_new->tile_id_l_ = fcr->tile_id_l_; + memcpy( + fcr_new->cell_range_, + fcr->cell_range_, + coords_size_); + fcr_new->tile_id_r_ = tile_id_r_; + memcpy( + &(fcr_new->cell_range_[dim_num_]), + &cell_range_[dim_num_], + coords_size_); + + // Trim the calling object range + memcpy(&cell_range_[dim_num_], fcr->cell_range_, coords_size_); + array_schema_->get_previous_cell_coords( + tile_domain, + &cell_range_[dim_num_]); + tile_id_r_ = array_schema_->tile_id(&cell_range_[dim_num_]); +} + +template +void ArrayReadState::PQFragmentCellRange::split_to_3( + const PQFragmentCellRange* fcr, + PQFragmentCellRange* fcr_left, + PQFragmentCellRange* fcr_unary) { + // Initialize fcr_left + fcr_left->fragment_id_ = fragment_id_; + fcr_left->tile_pos_ = tile_pos_; + fcr_left->cell_range_ = (T*) malloc(2*coords_size_); + fcr_left->tile_id_l_ = tile_id_l_; + memcpy(fcr_left->cell_range_, cell_range_, coords_size_); + + // Get enclosing coordinates + bool left_retrieved, right_retrieved, target_exists; + int rc = (*fragment_read_states_)[fragment_id_]->get_enclosing_coords( + tile_pos_, // Tile + fcr->cell_range_, // Target coords + cell_range_, // Start coords + &cell_range_[dim_num_], // End coords + &fcr_left->cell_range_[dim_num_], // Left coords + cell_range_, // Right coords + left_retrieved, // Left retrieved + right_retrieved, // Right retrieved + target_exists); // Target exists + assert(rc == TILEDB_RS_OK); + + // Clean up if necessary + if(left_retrieved) { + fcr_left->tile_id_r_ = + array_schema_->tile_id(&fcr_left->cell_range_[dim_num_]); + } else { + free(fcr_left->cell_range_); + fcr_left->cell_range_ = NULL; + } + + if(right_retrieved) { + tile_id_l_ = array_schema_->tile_id(cell_range_); + } else { + free(cell_range_); + cell_range_ = NULL; + } + + // Create unary range + if(target_exists) { + fcr_unary->fragment_id_ = fragment_id_; + fcr_unary->tile_pos_ = tile_pos_; + fcr_unary->cell_range_ = (T*) malloc(2*coords_size_); + fcr_unary->tile_id_l_ = fcr->tile_id_l_; + memcpy(fcr_unary->cell_range_, fcr->cell_range_, coords_size_); + fcr_unary->tile_id_r_ = fcr->tile_id_l_; + memcpy(&(fcr_unary->cell_range_[dim_num_]), fcr->cell_range_, coords_size_); + } else { + fcr_unary->cell_range_ = NULL; + } +} + + +template +void ArrayReadState::PQFragmentCellRange::trim( + const PQFragmentCellRange* fcr, + PQFragmentCellRange* fcr_trimmed, + const T* tile_domain) const { + // Construct trimmed range + fcr_trimmed->fragment_id_ = fcr->fragment_id_; + fcr_trimmed->tile_pos_ = fcr->tile_pos_; + fcr_trimmed->cell_range_ = (T*) malloc(2*coords_size_); + memcpy(fcr_trimmed->cell_range_, &cell_range_[dim_num_], coords_size_); + fcr_trimmed->tile_id_l_ = tile_id_r_; + memcpy( + &(fcr_trimmed->cell_range_[dim_num_]), + &(fcr->cell_range_[dim_num_]), + coords_size_); + fcr_trimmed->tile_id_r_ = fcr->tile_id_r_; + + // Advance the left endpoint of the trimmed range + bool coords_retrieved; + if(fcr_trimmed->dense()) { + array_schema_->get_next_cell_coords( // fcr is DENSE + tile_domain, + fcr_trimmed->cell_range_, + coords_retrieved); + } else { // fcr is SPARSE + int rc = (*fragment_read_states_)[fcr->fragment_id_]->get_coords_after( + &(cell_range_[dim_num_]), + fcr_trimmed->cell_range_, + coords_retrieved); + assert(rc == TILEDB_RS_OK); + } + + if(!coords_retrieved) { + free(fcr_trimmed->cell_range_); + fcr_trimmed->cell_range_ = NULL; + } +} + +template +bool ArrayReadState::PQFragmentCellRange::unary() const { + return !memcmp(cell_range_, &cell_range_[dim_num_], coords_size_); +} + + + + +template +ArrayReadState::SmallerPQFragmentCellRange::SmallerPQFragmentCellRange() { + array_schema_ = NULL; +} + +template +ArrayReadState::SmallerPQFragmentCellRange::SmallerPQFragmentCellRange( + const ArraySchema* array_schema) { + array_schema_ = array_schema; +} + +template +bool ArrayReadState::SmallerPQFragmentCellRange::operator () ( + PQFragmentCellRange* a, + PQFragmentCellRange* b) const { // Sanity check assert(array_schema_ != NULL); + // First check the tile ids + if(a->tile_id_l_ < b->tile_id_l_) + return false; + else if(a->tile_id_l_ > b->tile_id_l_) + return true; + // else, check the coordinates + // Get cell ordering information for the first range endpoints int cmp = array_schema_->cell_order_cmp( - static_cast(a.second), - static_cast(b.second)); + a->cell_range_, + b->cell_range_); if(cmp < 0) { // a's range start precedes b's return false; } else if(cmp > 0) { // b's range start preceded a's return true; } else { // a's and b's range starts match - latest fragment wins - if(a.first.first < b.first.first) + if(a->fragment_id_ < b->fragment_id_) return true; - else if(a.first.first > b.first.first) + else if(a->fragment_id_ > b->fragment_id_) return false; else - return (a.first.second > b.first.second); + assert(0); // This should never happen (equal coordinates and fragment id) } } + + + // Explicit template instantiations -template class ArrayReadState::SmallerFragmentCellRange; -template class ArrayReadState::SmallerFragmentCellRange; -template class ArrayReadState::SmallerFragmentCellRange; -template class ArrayReadState::SmallerFragmentCellRange; +template class ArrayReadState::PQFragmentCellRange; +template class ArrayReadState::PQFragmentCellRange; +template class ArrayReadState::PQFragmentCellRange; +template class ArrayReadState::PQFragmentCellRange; + +template class ArrayReadState::SmallerPQFragmentCellRange; +template class ArrayReadState::SmallerPQFragmentCellRange; +template class ArrayReadState::SmallerPQFragmentCellRange; +template class ArrayReadState::SmallerPQFragmentCellRange; diff --git a/core/src/array/array_schema.cc b/core/src/array/array_schema.cc index 71be49df..7e1d4a39 100644 --- a/core/src/array/array_schema.cc +++ b/core/src/array/array_schema.cc @@ -74,6 +74,7 @@ ArraySchema::ArraySchema() { hilbert_curve_ = NULL; tile_extents_ = NULL; tile_domain_ = NULL; + tile_coords_aux_ = NULL; } ArraySchema::~ArraySchema() { @@ -91,6 +92,9 @@ ArraySchema::~ArraySchema() { if(tile_domain_ != NULL) free(tile_domain_); + + if(tile_coords_aux_ != NULL) + free(tile_coords_aux_); } @@ -212,7 +216,11 @@ void ArraySchema::array_schema_export( } const std::string& ArraySchema::attribute(int attribute_id) const { - assert(attribute_id >= 0 && attribute_id <= attribute_num_); + assert(attribute_id >= 0 && attribute_id <= attribute_num_+1); + + // Special case for the search attribute (same as coordinates) + if(attribute_id == attribute_num_+1) + attribute_id = attribute_num_; return attributes_[attribute_id]; } @@ -255,17 +263,26 @@ int ArraySchema::cell_order() const { } size_t ArraySchema::cell_size(int attribute_id) const { + // Special case for the search tile + if(attribute_id == attribute_num_+1) + attribute_id = attribute_num_; + return cell_sizes_[attribute_id]; } int ArraySchema::compression(int attribute_id) const { - assert(attribute_id >= 0 && attribute_id <= attribute_num_); + assert(attribute_id >= 0 && attribute_id <= attribute_num_+1); + + // Special case for the "search tile", which is essentially the + // coordinates tile + if(attribute_id == attribute_num_+1) + attribute_id = attribute_num_; return compression_[attribute_id]; } size_t ArraySchema::coords_size() const { - return cell_sizes_[attribute_num_]; + return coords_size_; } int ArraySchema::coords_type() const { @@ -668,8 +685,9 @@ int64_t ArraySchema::tile_num() const { return tile_num(); else if(types_[attribute_num_] == TILEDB_INT64) return tile_num(); - else - assert(0); + + assert(0); + return TILEDB_AS_ERR; } template @@ -691,8 +709,10 @@ int64_t ArraySchema::tile_num(const void* domain) const { return tile_num(static_cast(domain)); else if(types_[attribute_num_] == TILEDB_INT64) return tile_num(static_cast(domain)); - else - assert(0); + + + assert(0); + return TILEDB_AS_ERR; } template @@ -879,6 +899,8 @@ int ArraySchema::deserialize( cell_sizes_.resize(attribute_num_+1); for(int i=0; i<= attribute_num_; ++i) cell_sizes_[i] = compute_cell_size(i); + // Set coordinates size + coords_size_ = cell_sizes_[attribute_num_]; // Compute number of cells per tile compute_cell_num_per_tile(); @@ -886,9 +908,17 @@ int ArraySchema::deserialize( // Compute tile domain compute_tile_domain(); + // Compute tile offsets + compute_tile_offsets(); + // Initialize Hilbert curve init_hilbert_curve(); + // Initialize static auxiliary variables + if(tile_coords_aux_ != NULL) + free(tile_coords_aux_); + tile_coords_aux_ = malloc(coords_size_*dim_num_); + // Success return TILEDB_AS_OK; } @@ -937,9 +967,17 @@ int ArraySchema::init(const ArraySchemaC* array_schema_c) { // Compute tile domain compute_tile_domain(); + // Compute tile offsets + compute_tile_offsets(); + // Initialize Hilbert curve init_hilbert_curve(); + // Initialize static auxiliary variables + if(tile_coords_aux_ != NULL) + free(tile_coords_aux_); + tile_coords_aux_ = malloc(coords_size_*dim_num_); + // Success return TILEDB_AS_OK; } @@ -1332,6 +1370,9 @@ int ArraySchema::set_types(const int* types) { for(int i=0; i < attribute_num_+1; ++i) cell_sizes_[i] = compute_cell_size(i); + // Set the coordinates size + coords_size_ = cell_sizes_[attribute_num_]; + return TILEDB_AS_OK; } @@ -1344,11 +1385,8 @@ int ArraySchema::set_types(const int* types) { template int ArraySchema::cell_order_cmp(const T* coords_a, const T* coords_b) const { - // For easy reference - size_t coords_size = cell_sizes_[attribute_num_]; - // Check if they are equal - if(memcmp(coords_a, coords_b, coords_size) == 0) + if(memcmp(coords_a, coords_b, coords_size_) == 0) return 0; // Check for precedence @@ -1436,15 +1474,16 @@ int64_t ArraySchema::get_cell_pos(const T* coords) const { template void ArraySchema::get_next_cell_coords( const T* domain, - T* cell_coords) const { + T* cell_coords, + bool& coords_retrieved) const { // Sanity check assert(dense_); // Invoke the proper function based on the tile order if(cell_order_ == TILEDB_ROW_MAJOR) - get_next_cell_coords_row(domain, cell_coords); + get_next_cell_coords_row(domain, cell_coords, coords_retrieved); else if(cell_order_ == TILEDB_COL_MAJOR) - get_next_cell_coords_col(domain, cell_coords); + get_next_cell_coords_col(domain, cell_coords, coords_retrieved); else // Sanity check assert(0); } @@ -1509,6 +1548,22 @@ void ArraySchema::get_subarray_tile_domain( } } +template +int64_t ArraySchema::get_tile_pos(const T* tile_coords) const { + // Sanity check + assert(tile_extents_); + + // Invoke the proper function based on the tile order + if(tile_order_ == TILEDB_ROW_MAJOR) { + return get_tile_pos_row(tile_coords); + } else if(tile_order_ == TILEDB_COL_MAJOR) { + return get_tile_pos_col(tile_coords); + } else { // Sanity check + assert(0); + return TILEDB_AS_ERR; + } +} + template int64_t ArraySchema::get_tile_pos( const T* domain, @@ -1563,22 +1618,17 @@ template int ArraySchema::tile_cell_order_cmp( const T* coords_a, const T* coords_b) const { - // If there are regular tiles, first check tile ids - if(tile_extents_ != NULL) { - int64_t tile_id_a = tile_id(coords_a); - int64_t tile_id_b = tile_id(coords_b); - - if(tile_id_a < tile_id_b) - return -1; - else if(tile_id_a > tile_id_b) - return 1; - } + // Check tile order + int tile_cmp = tile_order_cmp(coords_a, coords_b); + if(tile_cmp) + return tile_cmp; - // Tile ids are non-existent or equal --> check coordinates + // Check cell order return cell_order_cmp(coords_a, coords_b); } template +inline int64_t ArraySchema::tile_id(const T* cell_coords) const { // For easy reference const T* domain = static_cast(domain_); @@ -1589,19 +1639,263 @@ int64_t ArraySchema::tile_id(const T* cell_coords) const { return 0; // Calculate tile coordinates - T* tile_coords = new T[dim_num_]; + T* tile_coords = static_cast(tile_coords_aux_); for(int i=0; i +int ArraySchema::tile_order_cmp( + const int* coords_a, + const int* coords_b) const { + // For easy reference + int diff; + int norm; + const int* domain = static_cast(domain_); + const int* tile_extents = static_cast(tile_extents_); + + // If there are regular tiles, first check tile order + if(tile_extents_ != NULL) { + // ROW-MAJOR + if(tile_order_ == TILEDB_ROW_MAJOR) { + // Check if the cells are definitely IN the same tile + for(int i=0; i 0) + norm = (coords_b[i] - domain[2*i]) % tile_extents[i]; + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } else { // COLUMN-MAJOR + // Check if the cells are definitely IN the same tile + for(int i=dim_num_-1; i>=0; --i) { + diff = coords_a[i] - coords_b[i]; + + if(diff < 0) + norm = (coords_a[i] - domain[2*i]) % tile_extents[i]; + else if(diff > 0) + norm = (coords_b[i] - domain[2*i]) % tile_extents[i]; + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } + } + + // Same tile order + return 0; +} + +template<> +int ArraySchema::tile_order_cmp( + const int64_t* coords_a, + const int64_t* coords_b) const { + // For easy reference + int64_t diff; + int64_t norm; + const int64_t* domain = static_cast(domain_); + const int64_t* tile_extents = static_cast(tile_extents_); + + // If there are regular tiles, first check tile order + if(tile_extents_ != NULL) { + // ROW-MAJOR + if(tile_order_ == TILEDB_ROW_MAJOR) { + // Check if the cells are definitely IN the same tile + for(int i=0; i 0) + norm = (coords_b[i] - domain[2*i]) % tile_extents[i]; + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } else { // COLUMN-MAJOR + // Check if the cells are definitely IN the same tile + for(int i=dim_num_-1; i>=0; --i) { + diff = coords_a[i] - coords_b[i]; + + if(diff < 0) + norm = (coords_a[i] - domain[2*i]) % tile_extents[i]; + else if(diff > 0) + norm = (coords_b[i] - domain[2*i]) % tile_extents[i]; + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } + } + + // Same tile order + return 0; +} + +template<> +int ArraySchema::tile_order_cmp( + const float* coords_a, + const float* coords_b) const { + // For easy reference + float diff; + float norm, norm_temp; + const float* domain = static_cast(domain_); + const float* tile_extents = static_cast(tile_extents_); + + // If there are regular tiles, first check tile order + if(tile_extents_ != NULL) { + // ROW-MAJOR + if(tile_order_ == TILEDB_ROW_MAJOR) { + // Check if the cells are definitely IN the same tile + for(int i=0; i= domain[2*i]); + } else if(diff > 0) { + norm_temp = coords_b[i]; + do { + norm = norm_temp; + norm_temp -= tile_extents[i]; + } while(norm_temp >= domain[2*i]); + } + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } else { // COLUMN-MAJOR + // Check if the cells are definitely IN the same tile + for(int i=dim_num_-1; i>=0; --i) { + diff = coords_a[i] - coords_b[i]; + + if(diff < 0) { + norm_temp = coords_a[i]; + do { + norm = norm_temp; + norm_temp -= tile_extents[i]; + } while(norm_temp >= domain[2*i]); + } else if(diff > 0) { + norm_temp = coords_b[i]; + do { + norm = norm_temp; + norm_temp -= tile_extents[i]; + } while(norm_temp >= domain[2*i]); + } + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } + } + + // Same tile order + return 0; +} + +template<> +int ArraySchema::tile_order_cmp( + const double* coords_a, + const double* coords_b) const { + // For easy reference + double diff; + double norm, norm_temp; + const double* domain = static_cast(domain_); + const double* tile_extents = static_cast(tile_extents_); + + // If there are regular tiles, first check tile order + if(tile_extents_ != NULL) { + // ROW-MAJOR + if(tile_order_ == TILEDB_ROW_MAJOR) { + // Check if the cells are definitely IN the same tile + for(int i=0; i= domain[2*i]); + } else if(diff > 0) { + norm_temp = coords_b[i]; + do { + norm = norm_temp; + norm_temp -= tile_extents[i]; + } while(norm_temp >= domain[2*i]); + } + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } else { // COLUMN-MAJOR + // Check if the cells are definitely IN the same tile + for(int i=dim_num_-1; i>=0; --i) { + diff = coords_a[i] - coords_b[i]; + + if(diff < 0) { + norm_temp = coords_a[i]; + do { + norm = norm_temp; + norm_temp -= tile_extents[i]; + } while(norm_temp >= domain[2*i]); + } else if(diff > 0) { + norm_temp = coords_b[i]; + do { + norm = norm_temp; + norm_temp -= tile_extents[i]; + } while(norm_temp >= domain[2*i]); + } + + if(diff < 0 && (norm - diff) >= tile_extents[i]) + return -1; + else if(diff > 0 && (norm + diff) >= tile_extents[i]) + return 1; + } + } + } + + // Same tile order + return 0; +} @@ -1699,7 +1993,7 @@ size_t ArraySchema::compute_cell_size(int i) const { return TILEDB_VAR_SIZE; // Fixed-sized cell - size_t size; + size_t size = 0; // Attributes if(i < attribute_num_) { @@ -1783,6 +2077,50 @@ void ArraySchema::compute_tile_domain() { } } +void ArraySchema::compute_tile_offsets() { + // Invoke the proper templated function + if(types_[attribute_num_] == TILEDB_INT32) { + compute_tile_offsets(); + } else if(types_[attribute_num_] == TILEDB_INT64) { + compute_tile_offsets(); + } else if(types_[attribute_num_] == TILEDB_FLOAT32) { + compute_tile_offsets(); + } else if(types_[attribute_num_] == TILEDB_FLOAT64) { + compute_tile_offsets(); + } else { // The program should never reach this point + assert(0); + } +} + +template +void ArraySchema::compute_tile_offsets() { + // Applicable only to non-NULL space tiles + if(tile_extents_ == NULL) + return; + + // For easy reference + const T* domain = static_cast(domain_); + const T* tile_extents = static_cast(tile_extents_); + int64_t tile_num; // Per dimension + + // Calculate tile offsets for column-major tile order + tile_offsets_col_.push_back(1); + for(int i=1; i=0; --i) { + tile_num = (domain[2*(i+1)+1] - + domain[2*(i+1)] + 1) / tile_extents[i+1]; + tile_offsets_row_.push_back(tile_offsets_row_.back() * tile_num); + } + std::reverse(tile_offsets_row_.begin(), tile_offsets_row_.end()); +} + size_t ArraySchema::compute_type_size(int i) const { // Sanity check assert(i>= 0 && i <= attribute_num_); @@ -1863,7 +2201,8 @@ int64_t ArraySchema::get_cell_pos_row(const T* coords) const { template void ArraySchema::get_next_cell_coords_col( const T* domain, - T* cell_coords) const { + T* cell_coords, + bool& coords_retrieved) const { int i = 0; ++cell_coords[i]; @@ -1871,12 +2210,18 @@ void ArraySchema::get_next_cell_coords_col( cell_coords[i] = domain[2*i]; ++cell_coords[++i]; } + + if(i == dim_num_-1 && cell_coords[i] > domain[2*i+1]) + coords_retrieved = false; + else + coords_retrieved = true; } template void ArraySchema::get_next_cell_coords_row( const T* domain, - T* cell_coords) const { + T* cell_coords, + bool& coords_retrieved) const { int i = dim_num_-1; ++cell_coords[i]; @@ -1884,6 +2229,11 @@ void ArraySchema::get_next_cell_coords_row( cell_coords[i] = domain[2*i]; ++cell_coords[--i]; } + + if(i == 0 && cell_coords[i] > domain[2*i+1]) + coords_retrieved = false; + else + coords_retrieved = true; } template @@ -1938,6 +2288,17 @@ void ArraySchema::get_next_tile_coords_row( } } +template +int64_t ArraySchema::get_tile_pos_col(const T* tile_coords) const { + // Calculate position + int64_t pos = 0; + for(int i=0; i int64_t ArraySchema::get_tile_pos_col( const T* domain, @@ -1964,6 +2325,17 @@ int64_t ArraySchema::get_tile_pos_col( return pos; } +template +int64_t ArraySchema::get_tile_pos_row(const T* tile_coords) const { + // Calculate position + int64_t pos = 0; + for(int i=0; i int64_t ArraySchema::get_tile_pos_row( const T* domain, @@ -2043,16 +2415,20 @@ template int64_t ArraySchema::get_cell_pos( template void ArraySchema::get_next_cell_coords( const int* domain, - int* cell_coords) const; + int* cell_coords, + bool& coords_retrieved) const; template void ArraySchema::get_next_cell_coords( const int64_t* domain, - int64_t* cell_coords) const; + int64_t* cell_coords, + bool& coords_retrieved) const; template void ArraySchema::get_next_cell_coords( const float* domain, - float* cell_coords) const; + float* cell_coords, + bool& coords_retrieved) const; template void ArraySchema::get_next_cell_coords( const double* domain, - double* cell_coords) const; + double* cell_coords, + bool& coords_retrieved) const; template void ArraySchema::get_next_tile_coords( const int* domain, diff --git a/core/src/c_api/c_api.cc b/core/src/c_api/c_api.cc index b5bcd8d8..f8aff989 100644 --- a/core/src/c_api/c_api.cc +++ b/core/src/c_api/c_api.cc @@ -30,7 +30,9 @@ * This file defines the C API of TileDB. */ +#include "aio_request.h" #include "c_api.h" +#include "config.h" #include "array_schema_c.h" #include "storage_manager.h" #include @@ -60,15 +62,9 @@ typedef struct TileDB_CTX { StorageManager* storage_manager_; } TileDB_CTX; -int tiledb_ctx_init(TileDB_CTX** tiledb_ctx, const char* config_filename) { - // Check config filename length - if(config_filename != NULL) { - if(strlen(config_filename) > TILEDB_NAME_MAX_LEN) { - PRINT_ERROR("Invalid filename length"); - return TILEDB_ERR; - } - } - +int tiledb_ctx_init( + TileDB_CTX** tiledb_ctx, + const TileDB_Config* tiledb_config) { // Initialize context *tiledb_ctx = (TileDB_CTX*) malloc(sizeof(struct TileDB_CTX)); if(*tiledb_ctx == NULL) { @@ -77,9 +73,18 @@ int tiledb_ctx_init(TileDB_CTX** tiledb_ctx, const char* config_filename) { return TILEDB_ERR; } + // Initialize a Config object + Config* config = new Config(); + if(tiledb_config != NULL) + config->init( + tiledb_config->home_, + tiledb_config->mpi_comm_, + tiledb_config->read_method_, + tiledb_config->write_method_); + // Create storage manager (*tiledb_ctx)->storage_manager_ = new StorageManager(); - if((*tiledb_ctx)->storage_manager_->init(config_filename) != TILEDB_SM_OK) + if((*tiledb_ctx)->storage_manager_->init(config) != TILEDB_SM_OK) return TILEDB_ERR; else return TILEDB_OK; @@ -559,7 +564,7 @@ int tiledb_array_free_schema( free(tiledb_array_schema->compression_); // Free cell val num - if(tiledb_array_schema->cell_val_num_ != NULL); + if(tiledb_array_schema->cell_val_num_ != NULL) free(tiledb_array_schema->cell_val_num_); // Success @@ -1305,3 +1310,58 @@ int tiledb_ls( else return TILEDB_OK; } + + + + +/* ****************************** */ +/* ASYNCHRONOUS I/O (AIO */ +/* ****************************** */ + +int tiledb_array_aio_read( + const TileDB_Array* tiledb_array, + TileDB_AIO_Request* tiledb_aio_request) { + // Sanity check + if(!sanity_check(tiledb_array)) + return TILEDB_ERR; + + // Copy the AIO request + AIO_Request* aio_request = (AIO_Request*) malloc(sizeof(struct AIO_Request)); + aio_request->id_ = (size_t) tiledb_aio_request; + aio_request->buffers_ = tiledb_aio_request->buffers_; + aio_request->buffer_sizes_ = tiledb_aio_request->buffer_sizes_; + aio_request->status_ = &(tiledb_aio_request->status_); + aio_request->subarray_ = tiledb_aio_request->subarray_; + aio_request->completion_handle_ = tiledb_aio_request->completion_handle_; + aio_request->completion_data_ = tiledb_aio_request->completion_data_; + + // Submit the AIO read request + if(tiledb_array->array_->aio_read(aio_request) != TILEDB_AR_OK) + return TILEDB_ERR; + else + return TILEDB_OK; +} + +int tiledb_array_aio_write( + const TileDB_Array* tiledb_array, + TileDB_AIO_Request* tiledb_aio_request) { + // Sanity check + if(!sanity_check(tiledb_array)) + return TILEDB_ERR; + + // Copy the AIO request + AIO_Request* aio_request = (AIO_Request*) malloc(sizeof(struct AIO_Request)); + aio_request->id_ = (size_t) tiledb_aio_request; + aio_request->buffers_ = tiledb_aio_request->buffers_; + aio_request->buffer_sizes_ = tiledb_aio_request->buffer_sizes_; + aio_request->status_ = &(tiledb_aio_request->status_); + aio_request->subarray_ = tiledb_aio_request->subarray_; + aio_request->completion_handle_ = tiledb_aio_request->completion_handle_; + aio_request->completion_data_ = tiledb_aio_request->completion_data_; + + // Submit the AIO write request + if(tiledb_array->array_->aio_write(aio_request) != TILEDB_AR_OK) + return TILEDB_ERR; + else + return TILEDB_OK; +} diff --git a/core/src/fragment/fragment.cc b/core/src/fragment/fragment.cc index 2f346999..cf1e7ef5 100644 --- a/core/src/fragment/fragment.cc +++ b/core/src/fragment/fragment.cc @@ -223,9 +223,7 @@ int Fragment::init( } void Fragment::reset_read_state() { - if(read_state_ != NULL) - delete read_state_; - read_state_ = new ReadState(this, book_keeping_); + read_state_->reset(); } int Fragment::write(const void** buffers, const size_t* buffer_sizes) { diff --git a/core/src/fragment/read_state.cc b/core/src/fragment/read_state.cc index 90d957f1..7eedf981 100644 --- a/core/src/fragment/read_state.cc +++ b/core/src/fragment/read_state.cc @@ -63,22 +63,6 @@ # define PRINT_WARNING(x) do { } while(0) #endif -#ifdef _TILEDB_USE_MMAP -# define READ_FROM_FILE read_from_file_with_mmap -# define READ_TILE_FROM_FILE_CMP_NONE read_tile_from_file_with_mmap_cmp_none -# define READ_TILE_FROM_FILE_CMP_GZIP read_tile_from_file_with_mmap_cmp_gzip -# define READ_TILE_FROM_FILE_VAR_CMP_NONE \ - read_tile_from_file_with_mmap_var_cmp_none -# define READ_TILE_FROM_FILE_VAR_CMP_GZIP \ - read_tile_from_file_with_mmap_var_cmp_gzip -#else -# define READ_FROM_FILE read_from_file -# define READ_TILE_FROM_FILE_CMP_NONE read_tile_from_file_cmp_none -# define READ_TILE_FROM_FILE_CMP_GZIP read_tile_from_file_cmp_gzip -# define READ_TILE_FROM_FILE_VAR_CMP_NONE read_tile_from_file_var_cmp_none -# define READ_TILE_FROM_FILE_VAR_CMP_GZIP read_tile_from_file_var_cmp_gzip -#endif - @@ -91,34 +75,37 @@ ReadState::ReadState( BookKeeping* book_keeping) : book_keeping_(book_keeping), fragment_(fragment) { - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - size_t coords_size = array_schema->coords_size(); + array_ = fragment_->array(); + array_schema_ = array_->array_schema(); + attribute_num_ = array_schema_->attribute_num(); + coords_size_ = array_schema_->coords_size(); done_ = false; - fetched_tile_.resize(attribute_num+2); - overflow_.resize(attribute_num+1); + fetched_tile_.resize(attribute_num_+2); + overflow_.resize(attribute_num_+1); last_tile_coords_ = NULL; - map_addr_.resize(attribute_num+2); - map_addr_lengths_.resize(attribute_num+2); + map_addr_.resize(attribute_num_+2); + map_addr_lengths_.resize(attribute_num_+2); map_addr_compressed_ = NULL; map_addr_compressed_length_ = 0; - map_addr_var_.resize(attribute_num); - map_addr_var_lengths_.resize(attribute_num); - search_tile_overlap_subarray_ = malloc(2*coords_size); + map_addr_var_.resize(attribute_num_); + map_addr_var_lengths_.resize(attribute_num_); + search_tile_overlap_subarray_ = malloc(2*coords_size_); search_tile_pos_ = -1; tile_compressed_ = NULL; tile_compressed_allocated_size_ = 0; - tiles_.resize(attribute_num+2); - tiles_offsets_.resize(attribute_num+2); - tiles_sizes_.resize(attribute_num+2); - tiles_var_.resize(attribute_num); - tiles_var_offsets_.resize(attribute_num); - tiles_var_sizes_.resize(attribute_num); - tiles_var_allocated_size_.resize(attribute_num); - - for(int i=0; ifragment_name(); + std::string filename; + is_empty_attribute_.resize(attribute_num_+1); + for(int i=0; iattribute(i) + TILEDB_FILE_SUFFIX; + is_empty_attribute_[i] = !is_file(filename); + } } ReadState::~ReadState() { @@ -176,6 +175,8 @@ ReadState::~ReadState() { if(search_tile_overlap_subarray_ != NULL) free(search_tile_overlap_subarray_); + + free(tmp_coords_); } @@ -195,11 +196,12 @@ bool ReadState::done() const { void ReadState::get_bounding_coords(void* bounding_coords) const { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - size_t coords_size = array_schema->coords_size(); int64_t pos = search_tile_pos_; assert(pos != -1); - memcpy(bounding_coords, book_keeping_->bounding_coords()[pos], 2*coords_size); + memcpy( + bounding_coords, + book_keeping_->bounding_coords()[pos], + 2*coords_size_); } bool ReadState::mbr_overlaps_tile() const { @@ -217,6 +219,18 @@ bool ReadState::overflow(int attribute_id) const { /* MUTATORS */ /* ****************************** */ +void ReadState::reset() { + if(last_tile_coords_ != NULL) { + free(last_tile_coords_); + last_tile_coords_ = NULL; + } + + reset_overflow(); + done_ = false; + search_tile_pos_ = -1; + compute_tile_search_range(); +} + void ReadState::reset_overflow() { for(int i=0; iarray()->array_schema(); - size_t cell_size = array_schema->cell_size(attribute_id); + size_t cell_size = array_schema_->cell_size(attribute_id); - // Fetch the attribute tile from disk if necessary - int compression = array_schema->compression(attribute_id); - int rc; - if(compression == TILEDB_GZIP) - rc = get_tile_from_disk_cmp_gzip(attribute_id, tile_i); - else - rc = get_tile_from_disk_cmp_none(attribute_id, tile_i); - if(rc != TILEDB_RS_OK) + // Prepare attribute tile + if(prepare_tile_for_reading(attribute_id, tile_i) != TILEDB_RS_OK) return TILEDB_RS_ERR; - // For easy reference - char* tile = static_cast(tiles_[attribute_id]); - // Calculate free space in buffer size_t buffer_free_space = buffer_size - buffer_offset; buffer_free_space = (buffer_free_space / cell_size) * cell_size; @@ -266,7 +270,7 @@ int ReadState::copy_cells( } // Sanity check - assert(!array_schema->var_size(attribute_id)); + assert(!array_schema_->var_size(attribute_id)); // For each cell position range, copy the respective cells to the buffer size_t start_offset, end_offset; @@ -287,12 +291,13 @@ int ReadState::copy_cells( bytes_to_copy = std::min(bytes_left_to_copy, buffer_free_space); // Copy and update current buffer and tile offsets - char* buffer_c = static_cast(buffer); if(bytes_to_copy != 0) { - memcpy( - buffer_c + buffer_offset, - tile + tiles_offsets_[attribute_id], - bytes_to_copy); + if(READ_FROM_TILE( + attribute_id, + static_cast(buffer) + buffer_offset, + tiles_offsets_[attribute_id], + bytes_to_copy) != TILEDB_RS_OK) + return TILEDB_RS_ERR; buffer_offset += bytes_to_copy; tiles_offsets_[attribute_id] += bytes_to_copy; buffer_free_space = buffer_size - buffer_offset; @@ -317,7 +322,6 @@ int ReadState::copy_cells_var( size_t& buffer_var_offset, const CellPosRange& cell_pos_range) { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); size_t cell_size = TILEDB_CELL_VAR_OFFSET_SIZE; // Calculate free space in buffer @@ -331,26 +335,12 @@ int ReadState::copy_cells_var( return TILEDB_RS_OK; } - // Fetch the attribute tile from disk if necessary - int compression = array_schema->compression(attribute_id); - int rc; - if(compression == TILEDB_GZIP) - rc = get_tile_from_disk_var_cmp_gzip(attribute_id, tile_i); - else - rc = get_tile_from_disk_var_cmp_none(attribute_id, tile_i); - if(rc != TILEDB_RS_OK) + // Prepare attribute tile + if(prepare_tile_for_reading_var(attribute_id, tile_i) != TILEDB_RS_OK) return TILEDB_RS_ERR; - // For easy reference - char* buffer_c = static_cast(buffer); - void* buffer_start = buffer_c + buffer_offset; - char* buffer_var_c = static_cast(buffer_var); - char* tile = static_cast(tiles_[attribute_id]); - size_t* tile_s = static_cast(tiles_[attribute_id]); - char* tile_var = static_cast(tiles_var_[attribute_id]); - // Sanity check - assert(array_schema->var_size(attribute_id)); + assert(array_schema_->var_size(attribute_id)); // For each cell position range, copy the respective cells to the buffer size_t start_offset, end_offset; @@ -374,26 +364,37 @@ int ReadState::copy_cells_var( // Compute actual bytes to copy start_cell_pos = tiles_offsets_[attribute_id] / cell_size; end_cell_pos = start_cell_pos + bytes_to_copy/cell_size - 1; - compute_bytes_to_copy( - attribute_id, - start_cell_pos, - end_cell_pos, - buffer_free_space, - buffer_var_free_space, - bytes_to_copy, - bytes_var_to_copy); + if(compute_bytes_to_copy( + attribute_id, + start_cell_pos, + end_cell_pos, + buffer_free_space, + buffer_var_free_space, + bytes_to_copy, + bytes_var_to_copy) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + // For easy reference + void* buffer_start = static_cast(buffer) + buffer_offset; // Potentially update tile offset to the beginning of the overlap range - if(tiles_var_offsets_[attribute_id] < tile_s[start_cell_pos]) - tiles_var_offsets_[attribute_id] = tile_s[start_cell_pos]; + const size_t* tile_var_start; + if(GET_CELL_PTR_FROM_OFFSET_TILE( + attribute_id, + start_cell_pos, + tile_var_start) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + if(tiles_var_offsets_[attribute_id] < *tile_var_start) + tiles_var_offsets_[attribute_id] = *tile_var_start; // Copy and update current buffer and tile offsets - buffer_start = buffer_c + buffer_offset; if(bytes_to_copy != 0) { - memcpy( + if(READ_FROM_TILE( + attribute_id, buffer_start, - tile + tiles_offsets_[attribute_id], - bytes_to_copy); + tiles_offsets_[attribute_id], + bytes_to_copy) != TILEDB_RS_OK) + return TILEDB_RS_ERR; buffer_offset += bytes_to_copy; tiles_offsets_[attribute_id] += bytes_to_copy; buffer_free_space = buffer_size - buffer_offset; @@ -405,10 +406,12 @@ int ReadState::copy_cells_var( buffer_var_offset); // Copy and update current variable buffer and tile offsets - memcpy( - buffer_var_c + buffer_var_offset, - tile_var + tiles_var_offsets_[attribute_id], - bytes_var_to_copy); + if(READ_FROM_TILE_VAR( + attribute_id, + static_cast(buffer_var) + buffer_var_offset, + tiles_var_offsets_[attribute_id], + bytes_var_to_copy) != TILEDB_RS_OK) + return TILEDB_RS_ERR; buffer_var_offset += bytes_var_to_copy; tiles_var_offsets_[attribute_id] += bytes_var_to_copy; buffer_var_free_space = buffer_var_size - buffer_var_offset; @@ -418,8 +421,8 @@ int ReadState::copy_cells_var( if(tiles_offsets_[attribute_id] != end_offset + 1) overflow_[attribute_id] = true; - // Entering this if condition implies that the var data in this cell is so large - // that the allocated buffer cannot hold it + // Entering this if condition implies that the var data in this cell is so + // large that the allocated buffer cannot hold it if(buffer_offset == 0u && bytes_to_copy == 0u) { overflow_[attribute_id] = true; return TILEDB_RS_OK; @@ -435,25 +438,13 @@ int ReadState::get_coords_after( T* coords_after, bool& coords_retrieved) { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - int dim_num = array_schema->dim_num(); int64_t cell_num = book_keeping_->cell_num(search_tile_pos_); - size_t coords_size = array_schema->coords_size(); - // Fetch the coordinates search tile from disk if necessary - int compression = array_schema->compression(attribute_num); - int rc; - if(compression == TILEDB_GZIP) - rc = get_tile_from_disk_cmp_gzip(attribute_num+1, search_tile_pos_); - else - rc = get_tile_from_disk_cmp_none(attribute_num+1, search_tile_pos_); - if(rc != TILEDB_RS_OK) + // Prepare attribute tile + if(prepare_tile_for_reading(attribute_num_+1, search_tile_pos_) != + TILEDB_RS_OK) return TILEDB_RS_ERR; - // For easy reference - const T* tile = static_cast(tiles_[attribute_num+1]); - // Compute the cell position at or after the coords int64_t coords_after_pos = get_cell_pos_after(coords); @@ -464,7 +455,12 @@ int ReadState::get_coords_after( } // Copy result - memcpy(coords_after, &tile[coords_after_pos*dim_num], coords_size); + if(READ_FROM_TILE( + attribute_num_+1, + coords_after, + coords_after_pos*coords_size_, + coords_size_) != TILEDB_RS_OK) + return TILEDB_RS_ERR; coords_retrieved = true; // Success @@ -482,36 +478,30 @@ int ReadState::get_enclosing_coords( bool& left_retrieved, bool& right_retrieved, bool& target_exists) { - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); - - // Fetch the coordinates search tile from disk if necessary - int compression = array_schema->compression(attribute_num); - int rc; - if(compression == TILEDB_GZIP) - rc = get_tile_from_disk_cmp_gzip(attribute_num+1, tile_i); - else - rc = get_tile_from_disk_cmp_none(attribute_num+1, tile_i); - if(rc != TILEDB_RS_OK) + // Prepare attribute tile + if(prepare_tile_for_reading(attribute_num_+1, tile_i) != + TILEDB_RS_OK) return TILEDB_RS_ERR; - // For easy reference - const T* tile = static_cast(tiles_[attribute_num+1]); - // Compute the appropriate cell positions int64_t start_pos = get_cell_pos_at_or_after(start_coords); int64_t end_pos = get_cell_pos_at_or_before(end_coords); int64_t target_pos = get_cell_pos_at_or_before(target_coords); // Check if target exists - if(target_pos >= start_pos && target_pos <= end_pos && - !memcmp(target_coords, &tile[target_pos*dim_num], coords_size)) - target_exists = true; - else + if(target_pos >= start_pos && target_pos <= end_pos) { + int cmp = CMP_COORDS_TO_SEARCH_TILE( + target_coords, + target_pos*coords_size_); + if(cmp == TILEDB_RS_ERR) + return TILEDB_RS_ERR; + if(cmp) + target_exists = true; + else + target_exists = false; + } else { target_exists = false; + } // Calculate left and right pos int64_t left_pos = (target_exists) ? target_pos-1 : target_pos; @@ -519,7 +509,12 @@ int ReadState::get_enclosing_coords( // Copy left if it exists if(left_pos >= start_pos && left_pos <= end_pos) { - memcpy(left_coords, &tile[left_pos*dim_num], coords_size); + if(READ_FROM_TILE( + attribute_num_+1, + left_coords, + left_pos*coords_size_, + coords_size_) != TILEDB_RS_OK) + return TILEDB_RS_ERR; left_retrieved = true; } else { left_retrieved = false; @@ -527,7 +522,12 @@ int ReadState::get_enclosing_coords( // Copy right if it exists if(right_pos >= start_pos && right_pos <= end_pos) { - memcpy(right_coords, &tile[right_pos*dim_num], coords_size); + if(READ_FROM_TILE( + attribute_num_+1, + right_coords, + right_pos*coords_size_, + coords_size_) != TILEDB_RS_OK) + return TILEDB_RS_ERR; right_retrieved = true; } else { right_retrieved = false; @@ -543,19 +543,12 @@ int ReadState::get_fragment_cell_pos_range_sparse( const T* cell_range, FragmentCellPosRange& fragment_cell_pos_range) { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - int dim_num = array_schema->dim_num(); + int dim_num = array_schema_->dim_num(); int64_t tile_i = fragment_info.second; - // Fetch the coordinates search tile from disk if necessary - int compression = array_schema->compression(attribute_num); - int rc; - if(compression == TILEDB_GZIP) - rc = get_tile_from_disk_cmp_gzip(attribute_num+1, tile_i); - else - rc = get_tile_from_disk_cmp_none(attribute_num+1, tile_i); - if(rc != TILEDB_RS_OK) + // Prepare attribute tile + if(prepare_tile_for_reading(attribute_num_+1, tile_i) != + TILEDB_RS_OK) return TILEDB_RS_ERR; // Compute the appropriate cell positions @@ -582,10 +575,9 @@ int ReadState::get_fragment_cell_ranges_dense( return TILEDB_RS_OK; // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int dim_num = array_schema->dim_num(); - int cell_order = array_schema->cell_order(); - size_t cell_range_size = 2*array_schema->coords_size(); + int dim_num = array_schema_->dim_num(); + int cell_order = array_schema_->cell_order(); + size_t cell_range_size = 2*coords_size_; const T* search_tile_overlap_subarray = static_cast(search_tile_overlap_subarray_); FragmentInfo fragment_info = FragmentInfo(fragment_i, search_tile_pos_); @@ -681,8 +673,7 @@ int ReadState::get_fragment_cell_ranges_sparse( return TILEDB_RS_OK; // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int dim_num = array_schema->dim_num(); + int dim_num = array_schema_->dim_num(); const T* search_tile_overlap_subarray = static_cast(search_tile_overlap_subarray_); @@ -715,38 +706,31 @@ int ReadState::get_fragment_cell_ranges_sparse( const T* start_coords, const T* end_coords, FragmentCellRanges& fragment_cell_ranges) { + // Sanity checks assert(search_tile_pos_ >= tile_search_range_[0] && search_tile_pos_ <= tile_search_range_[1]); assert(search_tile_overlap_); // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); - const T* subarray = static_cast(fragment_->array()->subarray()); + int dim_num = array_schema_->dim_num(); + const T* subarray = static_cast(array_->subarray()); // Handle full overlap if(search_tile_overlap_ == 1) { FragmentCellRange fragment_cell_range; fragment_cell_range.first = FragmentInfo(fragment_i, search_tile_pos_); - fragment_cell_range.second = malloc(2*coords_size); + fragment_cell_range.second = malloc(2*coords_size_); T* cell_range = static_cast(fragment_cell_range.second); - memcpy(cell_range, start_coords, coords_size); - memcpy(&cell_range[dim_num], end_coords, coords_size); + memcpy(cell_range, start_coords, coords_size_); + memcpy(&cell_range[dim_num], end_coords, coords_size_); fragment_cell_ranges.push_back(fragment_cell_range); return TILEDB_RS_OK; } - // Fetch the coordinates search tile from disk if necessary - int compression = array_schema->compression(attribute_num); - int rc; - if(compression == TILEDB_GZIP) - rc = get_tile_from_disk_cmp_gzip(attribute_num+1, search_tile_pos_); - else - rc = get_tile_from_disk_cmp_none(attribute_num+1, search_tile_pos_); - if(rc != TILEDB_RS_OK) + // Prepare attribute tile + if(prepare_tile_for_reading(attribute_num_+1, search_tile_pos_) != + TILEDB_RS_OK) return TILEDB_RS_ERR; // Get cell positions for the cell range @@ -754,12 +738,13 @@ int ReadState::get_fragment_cell_ranges_sparse( int64_t end_pos = get_cell_pos_at_or_before(end_coords); // Get the cell ranges - const T* cell; - const T* tile = static_cast(tiles_[attribute_num+1]); + const void* cell; int64_t current_start_pos, current_end_pos = -2; for(int64_t i=start_pos; i<=end_pos; ++i) { - cell = &tile[i*dim_num]; - if(cell_in_subarray(cell, subarray, dim_num)) { + if(GET_COORDS_PTR_FROM_SEARCH_TILE(i, cell) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + if(cell_in_subarray(static_cast(cell), subarray, dim_num)) { if(i-1 == current_end_pos) { // The range is expanded ++current_end_pos; } else { // A new range starts @@ -770,13 +755,22 @@ int ReadState::get_fragment_cell_ranges_sparse( if(i-1 == current_end_pos) { // The range needs to be added to the list FragmentCellRange fragment_cell_range; fragment_cell_range.first = FragmentInfo(fragment_i, search_tile_pos_); - fragment_cell_range.second = malloc(2*coords_size); + fragment_cell_range.second = malloc(2*coords_size_); T* cell_range = static_cast(fragment_cell_range.second); - memcpy(cell_range, &tile[current_start_pos*dim_num], coords_size); - memcpy( - &cell_range[dim_num], - &tile[current_end_pos*dim_num], - coords_size); + + if(READ_FROM_TILE( + attribute_num_+1, + cell_range, + current_start_pos*coords_size_, + coords_size_) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + if(READ_FROM_TILE( + attribute_num_+1, + &cell_range[dim_num], + current_end_pos*coords_size_, + coords_size_) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + fragment_cell_ranges.push_back(fragment_cell_range); current_end_pos = -2; // This indicates that there is no active range } @@ -787,10 +781,22 @@ int ReadState::get_fragment_cell_ranges_sparse( if(current_end_pos != -2) { FragmentCellRange fragment_cell_range; fragment_cell_range.first = FragmentInfo(fragment_i, search_tile_pos_); - fragment_cell_range.second = malloc(2*coords_size); + fragment_cell_range.second = malloc(2*coords_size_); T* cell_range = static_cast(fragment_cell_range.second); - memcpy(cell_range, &tile[current_start_pos*dim_num], coords_size); - memcpy(&cell_range[dim_num], &tile[current_end_pos*dim_num], coords_size); + + if(READ_FROM_TILE( + attribute_num_+1, + cell_range, + current_start_pos*coords_size_, + coords_size_) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + if(READ_FROM_TILE( + attribute_num_+1, + &cell_range[dim_num], + current_end_pos*coords_size_, + coords_size_) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + fragment_cell_ranges.push_back(fragment_cell_range); } @@ -805,23 +811,22 @@ void ReadState::get_next_overlapping_tile_dense(const T* tile_coords) { return; // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int dim_num = array_schema->dim_num(); - const T* tile_extents = static_cast(array_schema->tile_extents()); - const T* array_domain = static_cast(array_schema->domain()); - const T* subarray = static_cast(fragment_->array()->subarray()); + int dim_num = array_schema_->dim_num(); + const T* tile_extents = static_cast(array_schema_->tile_extents()); + const T* array_domain = static_cast(array_schema_->domain()); + const T* subarray = static_cast(array_->subarray()); const T* domain = static_cast(book_keeping_->domain()); const T* non_empty_domain = static_cast(book_keeping_->non_empty_domain()); // Compute the tile subarray T* tile_subarray = new T[2*dim_num]; - array_schema->get_tile_subarray(tile_coords, tile_subarray); + array_schema_->get_tile_subarray(tile_coords, tile_subarray); // Compute overlap of tile subarray with non-empty fragment domain T* tile_domain_overlap_subarray = new T[2*dim_num]; bool tile_domain_overlap = - array_schema->subarray_overlap( + array_schema_->subarray_overlap( tile_subarray, non_empty_domain, tile_domain_overlap_subarray); @@ -834,19 +839,19 @@ void ReadState::get_next_overlapping_tile_dense(const T* tile_coords) { for(int i=0; iget_tile_pos(domain, tile_coords_norm); + search_tile_pos_ = array_schema_->get_tile_pos(domain, tile_coords_norm); delete [] tile_coords_norm; // Compute overlap of the query subarray with tile T* query_tile_overlap_subarray = new T[2*dim_num]; - array_schema->subarray_overlap( + array_schema_->subarray_overlap( subarray, tile_subarray, query_tile_overlap_subarray); // Compute the overlap of the previous results with the non-empty domain search_tile_overlap_ = - array_schema->subarray_overlap( + array_schema_->subarray_overlap( query_tile_overlap_subarray, tile_domain_overlap_subarray, static_cast(search_tile_overlap_subarray_)); @@ -867,9 +872,8 @@ void ReadState::get_next_overlapping_tile_sparse() { return; // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); const std::vector& mbrs = book_keeping_->mbrs(); - const T* subarray = static_cast(fragment_->array()->subarray()); + const T* subarray = static_cast(array_->subarray()); // Update the search tile position if(search_tile_pos_ == -1) @@ -887,7 +891,7 @@ void ReadState::get_next_overlapping_tile_sparse() { const T* mbr = static_cast(mbrs[search_tile_pos_]); search_tile_overlap_ = - array_schema->subarray_overlap( + array_schema_->subarray_overlap( subarray, mbr, static_cast(search_tile_overlap_subarray_)); @@ -907,17 +911,15 @@ void ReadState::get_next_overlapping_tile_sparse( return; // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int dim_num = array_schema->dim_num(); - size_t coords_size = array_schema->coords_size(); + int dim_num = array_schema_->dim_num(); const std::vector& mbrs = book_keeping_->mbrs(); - const T* subarray = static_cast(fragment_->array()->subarray()); + const T* subarray = static_cast(array_->subarray()); // Compute the tile subarray T* tile_subarray = new T[2*dim_num]; T* mbr_tile_overlap_subarray = new T[2*dim_num]; T* tile_subarray_end = new T[dim_num]; - array_schema->get_tile_subarray(tile_coords, tile_subarray); + array_schema_->get_tile_subarray(tile_coords, tile_subarray); for(int i=0; i( book_keeping_->bounding_coords()[search_tile_pos_]); - if(array_schema->tile_cell_order_cmp( + if(array_schema_->tile_cell_order_cmp( &bounding_coords[dim_num], tile_subarray_end) <= 0) { ++search_tile_pos_; @@ -947,7 +949,7 @@ void ReadState::get_next_overlapping_tile_sparse( return; } } else { - memcpy(last_tile_coords_, tile_coords, coords_size); + memcpy(last_tile_coords_, tile_coords, coords_size_); } } @@ -962,7 +964,7 @@ void ReadState::get_next_overlapping_tile_sparse( // Get overlap between MBR and tile subarray const T* mbr = static_cast(mbrs[search_tile_pos_]); mbr_tile_overlap_ = - array_schema->subarray_overlap( + array_schema_->subarray_overlap( tile_subarray, mbr, mbr_tile_overlap_subarray); @@ -973,7 +975,7 @@ void ReadState::get_next_overlapping_tile_sparse( const T* bounding_coords = static_cast( book_keeping_->bounding_coords()[search_tile_pos_]); - if(array_schema->tile_cell_order_cmp( + if(array_schema_->tile_cell_order_cmp( &bounding_coords[dim_num], tile_subarray_end) > 0) { break; @@ -985,7 +987,7 @@ void ReadState::get_next_overlapping_tile_sparse( // Get overlap of MBR with the query inside the tile subarray search_tile_overlap_ = - array_schema->subarray_overlap( + array_schema_->subarray_overlap( subarray, mbr_tile_overlap_subarray, static_cast(search_tile_overlap_subarray_)); @@ -1016,30 +1018,81 @@ void ReadState::get_next_overlapping_tile_sparse( /* PRIVATE METHODS */ /* ****************************** */ -void ReadState::compute_bytes_to_copy( +int ReadState::CMP_COORDS_TO_SEARCH_TILE( + const void* buffer, + size_t tile_offset) { + // For easy reference + char* tile = static_cast(tiles_[attribute_num_+1]); + + // The tile is in main memory + if(tile != NULL) { + return !memcmp(buffer, tile + tile_offset, coords_size_); + } + + // We need to read from the disk + std::string filename = + fragment_->fragment_name() + "/" + TILEDB_COORDS + TILEDB_FILE_SUFFIX; + int rc = TILEDB_UT_OK; + int read_method = array_->config()->read_method(); + MPI_Comm* mpi_comm = array_->config()->mpi_comm(); + if(read_method == TILEDB_IO_READ) + rc = read_from_file( + filename, + tiles_file_offsets_[attribute_num_+1] + tile_offset, + tmp_coords_, + coords_size_); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_from_file( + mpi_comm, + filename, + tiles_file_offsets_[attribute_num_+1] + tile_offset, + tmp_coords_, + coords_size_); + if(rc != TILEDB_UT_OK) + return TILEDB_RS_ERR; + + // Return + return !memcmp(buffer, tmp_coords_, coords_size_); +} + +int ReadState::compute_bytes_to_copy( int attribute_id, int64_t start_cell_pos, int64_t& end_cell_pos, size_t buffer_free_space, size_t buffer_var_free_space, size_t& bytes_to_copy, - size_t& bytes_var_to_copy) const { + size_t& bytes_var_to_copy) { // Trivial case if(buffer_free_space == 0 || buffer_var_free_space == 0) { bytes_to_copy = 0; bytes_var_to_copy = 0; - return; + return TILEDB_RS_OK; } // Calculate number of cells in the current tile for this attribute int64_t cell_num = book_keeping_->cell_num(fetched_tile_[attribute_id]); // Calculate bytes to copy from the variable tile - const size_t* tile = static_cast(tiles_[attribute_id]); - if(end_cell_pos + 1 < cell_num) - bytes_var_to_copy = tile[end_cell_pos + 1] - tile[start_cell_pos]; - else - bytes_var_to_copy = tiles_var_sizes_[attribute_id] - tile[start_cell_pos]; + const size_t* start_offset; + const size_t* end_offset; + const size_t* med_offset; + if(GET_CELL_PTR_FROM_OFFSET_TILE( + attribute_id, + start_cell_pos, + start_offset) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + if(end_cell_pos + 1 < cell_num) { + if(GET_CELL_PTR_FROM_OFFSET_TILE( + attribute_id, + end_cell_pos+1, + end_offset) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + bytes_var_to_copy = *end_offset - *start_offset; + } else { + bytes_var_to_copy = tiles_var_sizes_[attribute_id] - *start_offset; + } // If bytes do not fit in variable buffer, we need to adjust if(bytes_var_to_copy > buffer_var_free_space) { @@ -1054,7 +1107,12 @@ void ReadState::compute_bytes_to_copy( med = min + ((max - min) / 2); // Calculate variable bytes to copy - bytes_var_to_copy = tile[med] - tile[start_cell_pos]; + if(GET_CELL_PTR_FROM_OFFSET_TILE( + attribute_id, + med, + med_offset) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + bytes_var_to_copy = *med_offset - *start_offset; // Check condition if(bytes_var_to_copy > buffer_var_free_space) @@ -1075,7 +1133,13 @@ void ReadState::compute_bytes_to_copy( end_cell_pos = std::max(tmp_end, start_cell_pos-1); // Update variable bytes to copy - bytes_var_to_copy = tile[end_cell_pos + 1] - tile[start_cell_pos]; + if(GET_CELL_PTR_FROM_OFFSET_TILE( + attribute_id, + end_cell_pos+1, + end_offset) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + bytes_var_to_copy = *end_offset - *start_offset; } // Update bytes to copy @@ -1085,12 +1149,13 @@ void ReadState::compute_bytes_to_copy( // Sanity checks assert(bytes_to_copy <= buffer_free_space); assert(bytes_var_to_copy <= buffer_var_free_space); + + return TILEDB_RS_OK; } void ReadState::compute_tile_search_range() { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int coords_type = array_schema->coords_type(); + int coords_type = array_schema_->coords_type(); // Applicable only to sparse fragments if(fragment_->dense()) @@ -1114,8 +1179,7 @@ void ReadState::compute_tile_search_range() { template void ReadState::compute_tile_search_range() { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int cell_order = array_schema->cell_order(); + int cell_order = array_schema_->cell_order(); // Initialize the tile search range if(cell_order == TILEDB_HILBERT) // HILBERT CELL ORDER @@ -1133,9 +1197,8 @@ void ReadState::compute_tile_search_range() { template void ReadState::compute_tile_search_range_col_or_row() { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int dim_num = array_schema->dim_num(); - const T* subarray = static_cast(fragment_->array()->subarray()); + int dim_num = array_schema_->dim_num(); + const T* subarray = static_cast(array_->subarray()); int64_t tile_num = book_keeping_->tile_num(); const std::vector& bounding_coords = book_keeping_->bounding_coords(); @@ -1164,11 +1227,11 @@ void ReadState::compute_tile_search_range_col_or_row() { tile_end_coords = &(static_cast(bounding_coords[med])[dim_num]); // Calculate precedence - if(array_schema->tile_cell_order_cmp( + if(array_schema_->tile_cell_order_cmp( subarray_min_coords, tile_start_coords) < 0) { // Subarray min precedes MBR max = med-1; - } else if(array_schema->tile_cell_order_cmp( + } else if(array_schema_->tile_cell_order_cmp( subarray_min_coords, tile_end_coords) > 0) { // Subarray min succeeds MBR min = med+1; @@ -1202,11 +1265,11 @@ void ReadState::compute_tile_search_range_col_or_row() { tile_end_coords = &(static_cast(bounding_coords[med])[dim_num]); // Calculate precedence - if(array_schema->tile_cell_order_cmp( + if(array_schema_->tile_cell_order_cmp( subarray_max_coords, tile_start_coords) < 0) { // Subarray max precedes MBR max = med-1; - } else if(array_schema->tile_cell_order_cmp( + } else if(array_schema_->tile_cell_order_cmp( subarray_max_coords, tile_end_coords) > 0) { // Subarray max succeeds MBR min = med+1; @@ -1236,9 +1299,8 @@ void ReadState::compute_tile_search_range_col_or_row() { template void ReadState::compute_tile_search_range_hil() { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int dim_num = array_schema->dim_num(); - const T* subarray = static_cast(fragment_->array()->subarray()); + int dim_num = array_schema_->dim_num(); + const T* subarray = static_cast(array_->subarray()); int64_t tile_num = book_keeping_->tile_num(); if(is_unary_subarray(subarray, dim_num)) { // Unary range @@ -1265,11 +1327,11 @@ void ReadState::compute_tile_search_range_hil() { tile_end_coords = &(static_cast(bounding_coords[med])[dim_num]); // Calculate precedence - if(array_schema->tile_cell_order_cmp( + if(array_schema_->tile_cell_order_cmp( subarray_coords, tile_start_coords) < 0) { // Unary subarray precedes MBR max = med-1; - } else if(array_schema->tile_cell_order_cmp( + } else if(array_schema_->tile_cell_order_cmp( subarray_coords, tile_end_coords) > 0) { // Unary subarray succeeds MBR min = med+1; @@ -1301,30 +1363,33 @@ void ReadState::compute_tile_search_range_hil() { } template -int64_t ReadState::get_cell_pos_after(const T* coords) const { +int64_t ReadState::get_cell_pos_after(const T* coords) { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - int dim_num = array_schema->dim_num(); - const T* tile = static_cast(tiles_[attribute_num+1]); - int64_t cell_num = book_keeping_->cell_num(fetched_tile_[attribute_num+1]); + int64_t cell_num = book_keeping_->cell_num(fetched_tile_[attribute_num_+1]); // Perform binary search to find the position of coords in the tile int64_t min = 0; int64_t max = cell_num - 1; int64_t med; int cmp; + const void* coords_t; while(min <= max) { med = min + ((max - min) / 2); // Update search range - cmp = array_schema->tile_cell_order_cmp(coords, &tile[med*dim_num]); + if(GET_COORDS_PTR_FROM_SEARCH_TILE(med, coords_t) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + // Compute order + cmp = array_schema_->tile_cell_order_cmp( + coords, + static_cast(coords_t)); if(cmp < 0) max = med-1; else if(cmp > 0) min = med+1; else - break; + break; } // Return @@ -1335,30 +1400,34 @@ int64_t ReadState::get_cell_pos_after(const T* coords) const { } template -int64_t ReadState::get_cell_pos_at_or_after(const T* coords) const { +int64_t ReadState::get_cell_pos_at_or_after(const T* coords) { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - int dim_num = array_schema->dim_num(); - const T* tile = static_cast(tiles_[attribute_num+1]); - int64_t cell_num = book_keeping_->cell_num(fetched_tile_[attribute_num+1]); + int64_t cell_num = book_keeping_->cell_num(fetched_tile_[attribute_num_+1]); // Perform binary search to find the position of coords in the tile int64_t min = 0; int64_t max = cell_num - 1; int64_t med; int cmp; + const void* coords_t; while(min <= max) { med = min + ((max - min) / 2); // Update search range - cmp = array_schema->tile_cell_order_cmp(coords, &tile[med*dim_num]); + if(GET_COORDS_PTR_FROM_SEARCH_TILE(med, coords_t) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + // Compute order + cmp = array_schema_->tile_cell_order_cmp( + coords, + static_cast(coords_t)); + if(cmp < 0) max = med-1; else if(cmp > 0) min = med+1; else - break; + break; } // Return @@ -1369,30 +1438,33 @@ int64_t ReadState::get_cell_pos_at_or_after(const T* coords) const { } template -int64_t ReadState::get_cell_pos_at_or_before(const T* coords) const { +int64_t ReadState::get_cell_pos_at_or_before(const T* coords) { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - int dim_num = array_schema->dim_num(); - const T* tile = static_cast(tiles_[attribute_num+1]); - int64_t cell_num = book_keeping_->cell_num(fetched_tile_[attribute_num+1]); + int64_t cell_num = book_keeping_->cell_num(fetched_tile_[attribute_num_+1]); // Perform binary search to find the position of coords in the tile int64_t min = 0; int64_t max = cell_num - 1; int64_t med; int cmp; + const void* coords_t; while(min <= max) { med = min + ((max - min) / 2); // Update search range - cmp = array_schema->tile_cell_order_cmp(coords, &tile[med*dim_num]); + if(GET_COORDS_PTR_FROM_SEARCH_TILE(med, coords_t) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + // Compute order + cmp = array_schema_->tile_cell_order_cmp( + coords, + static_cast(coords_t)); if(cmp < 0) max = med-1; else if(cmp > 0) min = med+1; else - break; + break; } // Return @@ -1402,136 +1474,655 @@ int64_t ReadState::get_cell_pos_at_or_before(const T* coords) const { return med; // At } -int ReadState::get_tile_from_disk_cmp_gzip(int attribute_id, int64_t tile_i) { - // Return if the tile has already been fetched - if(tile_i == fetched_tile_[attribute_id]) - return TILEDB_RS_OK; - - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - - // To handle the special case of the search tile - // The real attribute id corresponds to an actual attribute or coordinates - int attribute_id_real = - (attribute_id == attribute_num+1) ? attribute_num : attribute_id; - +inline +int ReadState::GET_COORDS_PTR_FROM_SEARCH_TILE( + int64_t i, + const void*& coords) { // For easy reference - size_t cell_size = array_schema->cell_size(attribute_id_real); - size_t full_tile_size = fragment_->tile_size(attribute_id_real); - int64_t cell_num = book_keeping_->cell_num(tile_i); - size_t tile_size = cell_num * cell_size; - const std::vector >& tile_offsets = - book_keeping_->tile_offsets(); - int64_t tile_num = book_keeping_->tile_num(); - - // Allocate space for the tile if needed - if(tiles_[attribute_id] == NULL) - tiles_[attribute_id] = malloc(full_tile_size); - - // Prepare attribute file name - std::string filename = fragment_->fragment_name() + "/" + - array_schema->attribute(attribute_id_real) + - TILEDB_FILE_SUFFIX; + char* tile = static_cast(tiles_[attribute_num_+1]); - // Find file offset where the tile begins - off_t file_offset = tile_offsets[attribute_id_real][tile_i]; - off_t file_size = ::file_size(filename); - size_t tile_compressed_size = - (tile_i == tile_num-1) - ? file_size - tile_offsets[attribute_id_real][tile_i] - : tile_offsets[attribute_id_real][tile_i+1] - - tile_offsets[attribute_id_real][tile_i]; + // The tile is in main memory + if(tile != NULL) { + coords = tile + i*coords_size_; + return TILEDB_RS_OK; + } - // Read tile from file - if(READ_TILE_FROM_FILE_CMP_GZIP( - attribute_id, - file_offset, - tile_compressed_size) != TILEDB_RS_OK) - return TILEDB_RS_ERR; + // We need to read from the disk + std::string filename = + fragment_->fragment_name() + "/" + TILEDB_COORDS + TILEDB_FILE_SUFFIX; + int rc = TILEDB_UT_OK; + int read_method = array_->config()->read_method(); + MPI_Comm* mpi_comm = array_->config()->mpi_comm(); + if(read_method == TILEDB_IO_READ) + rc = read_from_file( + filename, + tiles_file_offsets_[attribute_num_+1] + i*coords_size_, + tmp_coords_, + coords_size_); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_from_file( + mpi_comm, + filename, + tiles_file_offsets_[attribute_num_+1] + i*coords_size_, + tmp_coords_, + coords_size_); + + // Get coordinates pointer + coords = tmp_coords_; - // Decompress tile - size_t gunzip_out_size; - if(gunzip( - static_cast(tile_compressed_), - tile_compressed_size, - static_cast(tiles_[attribute_id]), - full_tile_size, - gunzip_out_size) != TILEDB_UT_OK) + // Return + if(rc == TILEDB_UT_OK) + return TILEDB_RS_OK; + else return TILEDB_RS_ERR; +} - // Sanity check - assert(gunzip_out_size == tile_size); +int ReadState::GET_CELL_PTR_FROM_OFFSET_TILE( + int attribute_id, + int64_t i, + const size_t*& offset) { + // For easy reference + char* tile = static_cast(tiles_[attribute_id]); - // Set the tile size - tiles_sizes_[attribute_id] = tile_size; + // The tile is in main memory + if(tile != NULL) { + offset = (const size_t*) (tile + i*sizeof(size_t)); + return TILEDB_RS_OK; + } - // Set tile offset - tiles_offsets_[attribute_id] = 0; + // We need to read from the disk + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id) + + TILEDB_FILE_SUFFIX; + int rc = TILEDB_UT_OK; + int read_method = array_->config()->read_method(); + MPI_Comm* mpi_comm = array_->config()->mpi_comm(); + if(read_method == TILEDB_IO_READ) + rc = read_from_file( + filename, + tiles_file_offsets_[attribute_id] + i*sizeof(size_t), + &tmp_offset_, + sizeof(size_t)); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_from_file( + mpi_comm, + filename, + tiles_file_offsets_[attribute_id] + i*sizeof(size_t), + &tmp_offset_, + sizeof(size_t)); + + // Get coordinates pointer + offset = &tmp_offset_; - // Mark as fetched - fetched_tile_[attribute_id] = tile_i; - - // Success - return TILEDB_RS_OK; + // Return + if(rc == TILEDB_UT_OK) + return TILEDB_RS_OK; + else + return TILEDB_RS_ERR; } -int ReadState::get_tile_from_disk_cmp_none(int attribute_id, int64_t tile_i) { - // Return if the tile has already been fetched - if(tile_i == fetched_tile_[attribute_id]) - return TILEDB_RS_OK; +bool ReadState::is_empty_attribute(int attribute_id) const { + // Special case for search coordinate tiles + if(attribute_id == attribute_num_ + 1) + attribute_id = attribute_num_; - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); + return is_empty_attribute_[attribute_id]; +} +int ReadState::map_tile_from_file_cmp_gzip( + int attribute_id, + off_t offset, + size_t tile_size) { // To handle the special case of the search tile // The real attribute id corresponds to an actual attribute or coordinates int attribute_id_real = - (attribute_id == attribute_num+1) ? attribute_num : attribute_id; + (attribute_id == attribute_num_+1) ? attribute_num_ : attribute_id; - // For easy reference - size_t cell_size = array_schema->cell_size(attribute_id_real); - size_t full_tile_size = fragment_->tile_size(attribute_id_real); - int64_t cell_num = book_keeping_->cell_num(tile_i); - size_t tile_size = cell_num * cell_size; + // Unmap + if(map_addr_compressed_ != NULL) { + if(munmap(map_addr_compressed_, map_addr_compressed_length_)) { + PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); + return TILEDB_RS_ERR; + } + } - // Find file offset where the tile begins - off_t file_offset = tile_i * full_tile_size; + // Prepare attribute file name + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id_real) + + TILEDB_FILE_SUFFIX; - // Read tile from file - if(READ_TILE_FROM_FILE_CMP_NONE( - attribute_id, - file_offset, - tile_size) != TILEDB_RS_OK) - return TILEDB_RS_ERR; + // Calculate offset considering the page size + size_t page_size = sysconf(_SC_PAGE_SIZE); + off_t start_offset = (offset / page_size) * page_size; + size_t extra_offset = offset - start_offset; + size_t new_length = tile_size + extra_offset; - // Set the tile size - tiles_sizes_[attribute_id] = tile_size; + // Open file + int fd = open(filename.c_str(), O_RDONLY); + if(fd == -1) { + munmap(map_addr_compressed_, map_addr_compressed_length_); + map_addr_compressed_ = NULL; + map_addr_compressed_length_ = 0; + tile_compressed_ = NULL; + PRINT_ERROR("Cannot read tile from file; File opening error"); + return TILEDB_RS_ERR; + } - // Set tile offset - tiles_offsets_[attribute_id] = 0; + // Map + map_addr_compressed_ = mmap( + map_addr_compressed_, + new_length, + PROT_READ, + MAP_SHARED, + fd, + start_offset); + if(map_addr_compressed_ == MAP_FAILED) { + map_addr_compressed_ = NULL; + map_addr_compressed_length_ = 0; + tile_compressed_ = NULL; + PRINT_ERROR("Cannot read tile from file; Memory map error"); + return TILEDB_RS_ERR; + } + map_addr_compressed_length_ = new_length; - // Mark as fetched - fetched_tile_[attribute_id] = tile_i; - - // Success - return TILEDB_RS_OK; -} + // Set properly the compressed tile pointer + tile_compressed_ = + static_cast(map_addr_compressed_) + extra_offset; -int ReadState::get_tile_from_disk_var_cmp_gzip( - int attribute_id, - int64_t tile_i) { - // Return if the tile has already been fetched - if(tile_i == fetched_tile_[attribute_id]) - return TILEDB_RS_OK; + // Close file + if(close(fd)) { + munmap(map_addr_compressed_, map_addr_compressed_length_); + map_addr_compressed_ = NULL; + map_addr_compressed_length_ = 0; + tile_compressed_ = NULL; + PRINT_ERROR("Cannot read tile from file; File closing error"); + return TILEDB_RS_ERR; + } + + return TILEDB_RS_OK; +} + +int ReadState::map_tile_from_file_var_cmp_gzip( + int attribute_id, + off_t offset, + size_t tile_size) { + // Unmap + if(map_addr_compressed_ != NULL) { + if(munmap(map_addr_compressed_, map_addr_compressed_length_)) { + PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); + return TILEDB_RS_ERR; + } + } + + // Prepare attribute file name + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id) + "_var" + + TILEDB_FILE_SUFFIX; + + // Calculate offset considering the page size + size_t page_size = sysconf(_SC_PAGE_SIZE); + off_t start_offset = (offset / page_size) * page_size; + size_t extra_offset = offset - start_offset; + size_t new_length = tile_size + extra_offset; + + // Open file + int fd = open(filename.c_str(), O_RDONLY); + if(fd == -1) { + munmap(map_addr_compressed_, map_addr_compressed_length_); + map_addr_compressed_ = NULL; + map_addr_compressed_length_ = 0; + tile_compressed_ = NULL; + PRINT_ERROR("Cannot read tile from file; File opening error"); + return TILEDB_RS_ERR; + } + + // Map + // new_length could be 0 for variable length fields, mmap will fail + // if new_length == 0 + if(new_length > 0u) { + map_addr_compressed_ = mmap( + map_addr_compressed_, + new_length, + PROT_READ, + MAP_SHARED, + fd, + start_offset); + if(map_addr_compressed_ == MAP_FAILED) { + map_addr_compressed_ = NULL; + map_addr_compressed_length_ = 0; + tile_compressed_ = NULL; + PRINT_ERROR("Cannot read tile from file; Memory map error"); + return TILEDB_RS_ERR; + } + } else { + map_addr_var_[attribute_id] = 0; + } + map_addr_compressed_length_ = new_length; + + // Set properly the compressed tile pointer + tile_compressed_ = + static_cast(map_addr_compressed_) + extra_offset; + + // Close file + if(close(fd)) { + munmap(map_addr_compressed_, map_addr_compressed_length_); + map_addr_compressed_ = NULL; + map_addr_compressed_length_ = 0; + tile_compressed_ = NULL; + PRINT_ERROR("Cannot read tile from file; File closing error"); + return TILEDB_RS_ERR; + } + + return TILEDB_RS_OK; +} + +int ReadState::map_tile_from_file_cmp_none( + int attribute_id, + off_t offset, + size_t tile_size) { + // To handle the special case of the search tile + // The real attribute id corresponds to an actual attribute or coordinates + int attribute_id_real = + (attribute_id == attribute_num_+1) ? attribute_num_ : attribute_id; + + // Unmap + if(map_addr_[attribute_id] != NULL) { + if(munmap(map_addr_[attribute_id], map_addr_lengths_[attribute_id])) { + PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); + return TILEDB_RS_ERR; + } + } + + // Prepare attribute file name + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id_real) + + TILEDB_FILE_SUFFIX; + + // Calculate offset considering the page size + size_t page_size = sysconf(_SC_PAGE_SIZE); + off_t start_offset = (offset / page_size) * page_size; + size_t extra_offset = offset - start_offset; + size_t new_length = tile_size + extra_offset; + + // Open file + int fd = open(filename.c_str(), O_RDONLY); + if(fd == -1) { + map_addr_[attribute_id] = NULL; + map_addr_lengths_[attribute_id] = 0; + tiles_[attribute_id] = NULL; + tiles_sizes_[attribute_id] = 0; + PRINT_ERROR("Cannot read tile from file; File opening error"); + return TILEDB_RS_ERR; + } + + // Map + bool var_size = array_schema_->var_size(attribute_id_real); + int prot = var_size ? (PROT_READ | PROT_WRITE) : PROT_READ; + int flags = var_size ? MAP_PRIVATE : MAP_SHARED; + map_addr_[attribute_id] = + mmap(map_addr_[attribute_id], new_length, prot, flags, fd, start_offset); + if(map_addr_[attribute_id] == MAP_FAILED) { + map_addr_[attribute_id] = NULL; + map_addr_lengths_[attribute_id] = 0; + tiles_[attribute_id] = NULL; + tiles_sizes_[attribute_id] = 0; + PRINT_ERROR("Cannot read tile from file; Memory map error"); + return TILEDB_RS_ERR; + } + map_addr_lengths_[attribute_id] = new_length; + + // Set properly the tile pointer + tiles_[attribute_id] = + static_cast(map_addr_[attribute_id]) + extra_offset; + + // Close file + if(close(fd)) { + munmap(map_addr_[attribute_id], map_addr_lengths_[attribute_id]); + map_addr_[attribute_id] = NULL; + map_addr_lengths_[attribute_id] = 0; + tiles_[attribute_id] = NULL; + tiles_sizes_[attribute_id] = 0; + PRINT_ERROR("Cannot read tile from file; File closing error"); + return TILEDB_RS_ERR; + } + + return TILEDB_RS_OK; +} + +int ReadState::map_tile_from_file_var_cmp_none( + int attribute_id, + off_t offset, + size_t tile_size) { + // Unmap + if(map_addr_var_[attribute_id] != NULL) { + if(munmap( + map_addr_var_[attribute_id], + map_addr_var_lengths_[attribute_id])) { + PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); + return TILEDB_RS_ERR; + } + } + + // Prepare attribute file name + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id) + "_var" + + TILEDB_FILE_SUFFIX; + + // Calculate offset considering the page size + size_t page_size = sysconf(_SC_PAGE_SIZE); + off_t start_offset = (offset / page_size) * page_size; + size_t extra_offset = offset - start_offset; + size_t new_length = tile_size + extra_offset; + + // Open file + int fd = open(filename.c_str(), O_RDONLY); + if(fd == -1) { + map_addr_var_[attribute_id] = NULL; + map_addr_var_lengths_[attribute_id] = 0; + tiles_var_[attribute_id] = NULL; + tiles_var_sizes_[attribute_id] = 0; + PRINT_ERROR("Cannot read tile from file; File opening error"); + return TILEDB_RS_ERR; + } + + // Map + // new_length could be 0 for variable length fields, mmap will fail + // if new_length == 0 + if(new_length > 0u) { + map_addr_var_[attribute_id] = mmap( + map_addr_var_[attribute_id], + new_length, + PROT_READ, + MAP_SHARED, + fd, + start_offset); + if(map_addr_var_[attribute_id] == MAP_FAILED) { + map_addr_var_[attribute_id] = NULL; + map_addr_var_lengths_[attribute_id] = 0; + tiles_var_[attribute_id] = NULL; + tiles_var_sizes_[attribute_id] = 0; + PRINT_ERROR("Cannot read tile from file; Memory map error"); + return TILEDB_RS_ERR; + } + } else { + map_addr_var_[attribute_id] = 0; + } + map_addr_var_lengths_[attribute_id] = new_length; + + // Set properly the tile pointer + tiles_var_[attribute_id] = + static_cast(map_addr_var_[attribute_id]) + extra_offset; + tiles_var_sizes_[attribute_id] = tile_size; + + // Close file + if(close(fd)) { + munmap(map_addr_var_[attribute_id], map_addr_var_lengths_[attribute_id]); + map_addr_var_[attribute_id] = NULL; + map_addr_var_lengths_[attribute_id] = 0; + tiles_var_[attribute_id] = NULL; + tiles_var_sizes_[attribute_id] = 0; + PRINT_ERROR("Cannot read tile from file; File closing error"); + return TILEDB_RS_ERR; + } + + // Success + return TILEDB_RS_OK; +} + +int ReadState::mpi_io_read_tile_from_file_cmp_gzip( + int attribute_id, + off_t offset, + size_t tile_size) { + // For easy reference + const MPI_Comm* mpi_comm = array_->config()->mpi_comm(); + + // To handle the special case of the search tile + // The real attribute id corresponds to an actual attribute or coordinates + int attribute_id_real = + (attribute_id == attribute_num_+1) ? attribute_num_ : attribute_id; + + // Potentially allocate compressed tile buffer + if(tile_compressed_ == NULL) { + size_t full_tile_size = fragment_->tile_size(attribute_id_real); + size_t tile_max_size = + full_tile_size + 6 + 5*(ceil(full_tile_size/16834.0)); + tile_compressed_ = malloc(tile_max_size); + tile_compressed_allocated_size_ = tile_max_size; + } + + // Prepare attribute file name + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id_real) + + TILEDB_FILE_SUFFIX; + + // Read from file + if(mpi_io_read_from_file( + mpi_comm, + filename, + offset, + tile_compressed_, + tile_size) != TILEDB_UT_OK) + return TILEDB_RS_ERR; + else + return TILEDB_RS_OK; +} + +int ReadState::mpi_io_read_tile_from_file_var_cmp_gzip( + int attribute_id, + off_t offset, + size_t tile_size) { + // For easy reference + const MPI_Comm* mpi_comm = array_->config()->mpi_comm(); + + // Potentially allocate compressed tile buffer + if(tile_compressed_ == NULL) { + tile_compressed_ = malloc(tile_size); + tile_compressed_allocated_size_ = tile_size; + } + + // Potentially expand compressed tile buffer + if(tile_compressed_allocated_size_ < tile_size) { + tile_compressed_ = realloc(tile_compressed_, tile_size); + tile_compressed_allocated_size_ = tile_size; + } + + // Prepare attribute file name + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id) + "_var" + + TILEDB_FILE_SUFFIX; + + // Read from file + if(mpi_io_read_from_file( + mpi_comm, + filename, + offset, + tile_compressed_, + tile_size) != TILEDB_UT_OK) + return TILEDB_RS_ERR; + else + return TILEDB_RS_OK; +} + +int ReadState::prepare_tile_for_reading( + int attribute_id, + int64_t tile_i) { + // For easy reference + int compression = array_schema_->compression(attribute_id); + + // Invoke the proper function based on the compression type + if(compression == TILEDB_GZIP) + return prepare_tile_for_reading_cmp_gzip(attribute_id, tile_i); + else + return prepare_tile_for_reading_cmp_none(attribute_id, tile_i); +} + +int ReadState::prepare_tile_for_reading_var( + int attribute_id, + int64_t tile_i) { + // For easy reference + int compression = array_schema_->compression(attribute_id); + + // Invoke the proper function based on the compression type + if(compression == TILEDB_GZIP) + return prepare_tile_for_reading_var_cmp_gzip(attribute_id, tile_i); + else + return prepare_tile_for_reading_var_cmp_none(attribute_id, tile_i); +} + +int ReadState::prepare_tile_for_reading_cmp_gzip( + int attribute_id, + int64_t tile_i) { + // Return if the tile has already been fetched + if(tile_i == fetched_tile_[attribute_id]) + return TILEDB_RS_OK; + + // To handle the special case of the search tile + // The real attribute id corresponds to an actual attribute or coordinates + int attribute_id_real = + (attribute_id == attribute_num_+1) ? attribute_num_ : attribute_id; + + // For easy reference + size_t cell_size = array_schema_->cell_size(attribute_id_real); + size_t full_tile_size = fragment_->tile_size(attribute_id_real); + int64_t cell_num = book_keeping_->cell_num(tile_i); + size_t tile_size = cell_num * cell_size; + const std::vector >& tile_offsets = + book_keeping_->tile_offsets(); + int64_t tile_num = book_keeping_->tile_num(); + + // Allocate space for the tile if needed + if(tiles_[attribute_id] == NULL) + tiles_[attribute_id] = malloc(full_tile_size); + + // Prepare attribute file name + std::string filename = fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id_real) + + TILEDB_FILE_SUFFIX; + + // Find file offset where the tile begins + off_t file_offset = tile_offsets[attribute_id_real][tile_i]; + off_t file_size = ::file_size(filename); + size_t tile_compressed_size = + (tile_i == tile_num-1) + ? file_size - tile_offsets[attribute_id_real][tile_i] + : tile_offsets[attribute_id_real][tile_i+1] - + tile_offsets[attribute_id_real][tile_i]; + + // Read tile from file + int rc = TILEDB_RS_OK; + int read_method = array_->config()->read_method(); + if(read_method == TILEDB_IO_READ) + rc = read_tile_from_file_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + else if(read_method == TILEDB_IO_MMAP) + rc = map_tile_from_file_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_tile_from_file_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + if(rc != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + // Decompress tile + size_t gunzip_out_size; + if(gunzip( + static_cast(tile_compressed_), + tile_compressed_size, + static_cast(tiles_[attribute_id]), + full_tile_size, + gunzip_out_size) != TILEDB_UT_OK) + return TILEDB_RS_ERR; + + // Sanity check + assert(gunzip_out_size == tile_size); + + // Set the tile size + tiles_sizes_[attribute_id] = tile_size; + + // Set tile offset + tiles_offsets_[attribute_id] = 0; + + // Mark as fetched + fetched_tile_[attribute_id] = tile_i; + + // Success + return TILEDB_RS_OK; +} + +int ReadState::prepare_tile_for_reading_cmp_none( + int attribute_id, + int64_t tile_i) { + // Return if the tile has already been fetched + if(tile_i == fetched_tile_[attribute_id]) + return TILEDB_RS_OK; + + // To handle the special case of the search tile + // The real attribute id corresponds to an actual attribute or coordinates + int attribute_id_real = + (attribute_id == attribute_num_+1) ? attribute_num_ : attribute_id; // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); + size_t cell_size = array_schema_->cell_size(attribute_id_real); + size_t full_tile_size = fragment_->tile_size(attribute_id_real); + int64_t cell_num = book_keeping_->cell_num(tile_i); + size_t tile_size = cell_num * cell_size; + + // Find file offset where the tile begins + off_t file_offset = tile_i * full_tile_size; + + // Read tile from file + int rc = TILEDB_RS_OK; + int read_method = array_->config()->read_method(); + if(read_method == TILEDB_IO_READ || + read_method == TILEDB_IO_MPI) + rc = set_tile_file_offset( + attribute_id, + file_offset); + else if(read_method == TILEDB_IO_MMAP) + rc = map_tile_from_file_cmp_none( + attribute_id, + file_offset, + tile_size); + if(rc != TILEDB_RS_OK) + return TILEDB_RS_ERR; + + // Set the tile size + tiles_sizes_[attribute_id] = tile_size; + + // Set tile offset + tiles_offsets_[attribute_id] = 0; + + // Mark as fetched + fetched_tile_[attribute_id] = tile_i; + + // Success + return TILEDB_RS_OK; +} + +int ReadState::prepare_tile_for_reading_var_cmp_gzip( + int attribute_id, + int64_t tile_i) { + // Return if the tile has already been fetched + if(tile_i == fetched_tile_[attribute_id]) + return TILEDB_RS_OK; // Sanity check - assert(attribute_id < attribute_num && array_schema->var_size(attribute_id)); + assert( + attribute_id < attribute_num_ && + array_schema_->var_size(attribute_id)); // For easy reference size_t cell_size = TILEDB_CELL_VAR_OFFSET_SIZE; @@ -1548,7 +2139,7 @@ int ReadState::get_tile_from_disk_var_cmp_gzip( // Prepare attribute file name std::string filename = fragment_->fragment_name() + "/" + - array_schema->attribute(attribute_id) + + array_schema_->attribute(attribute_id) + TILEDB_FILE_SUFFIX; // Find file offset where the tile begins @@ -1564,10 +2155,24 @@ int ReadState::get_tile_from_disk_var_cmp_gzip( tiles_[attribute_id] = malloc(full_tile_size); // Read tile from file - if(READ_TILE_FROM_FILE_CMP_GZIP( + int rc = TILEDB_RS_OK; + int read_method = array_->config()->read_method(); + if(read_method == TILEDB_IO_READ) + rc = read_tile_from_file_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + else if(read_method == TILEDB_IO_MMAP) + rc = map_tile_from_file_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_tile_from_file_cmp_gzip( attribute_id, file_offset, - tile_compressed_size) != TILEDB_RS_OK) + tile_compressed_size); + if(rc != TILEDB_RS_OK) return TILEDB_RS_ERR; // Decompress tile @@ -1593,7 +2198,7 @@ int ReadState::get_tile_from_disk_var_cmp_gzip( // Prepare variable attribute file name filename = fragment_->fragment_name() + "/" + - array_schema->attribute(attribute_id) + "_var" + + array_schema_->attribute(attribute_id) + "_var" + TILEDB_FILE_SUFFIX; // Calculate offset and compressed tile size @@ -1622,12 +2227,26 @@ int ReadState::get_tile_from_disk_var_cmp_gzip( tiles_var_allocated_size_[attribute_id] = tile_var_size; } - // Read tile from file - if(READ_TILE_FROM_FILE_VAR_CMP_GZIP( - attribute_id, - file_offset, - tile_compressed_size) != TILEDB_RS_OK) - return TILEDB_RS_ERR; + // Read tile from file + int rc = TILEDB_RS_OK; + int read_method = array_->config()->read_method(); + if(read_method == TILEDB_IO_READ) + rc = read_tile_from_file_var_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + else if(read_method == TILEDB_IO_MMAP) + rc = map_tile_from_file_var_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_tile_from_file_var_cmp_gzip( + attribute_id, + file_offset, + tile_compressed_size); + if(rc != TILEDB_RS_OK) + return TILEDB_RS_ERR; // Decompress tile if(gunzip( @@ -1658,19 +2277,17 @@ int ReadState::get_tile_from_disk_var_cmp_gzip( return TILEDB_RS_OK; } -int ReadState::get_tile_from_disk_var_cmp_none( +int ReadState::prepare_tile_for_reading_var_cmp_none( int attribute_id, int64_t tile_i) { // Return if the tile has already been fetched if(tile_i == fetched_tile_[attribute_id]) return TILEDB_RS_OK; - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - // Sanity check - assert(attribute_id < attribute_num && array_schema->var_size(attribute_id)); + assert( + attribute_id < attribute_num_ && + array_schema_->var_size(attribute_id)); // For easy reference size_t full_tile_size = fragment_->tile_size(attribute_id); @@ -1680,10 +2297,19 @@ int ReadState::get_tile_from_disk_var_cmp_none( off_t file_offset = tile_i * full_tile_size; // Read tile from file - if(READ_TILE_FROM_FILE_CMP_NONE( + int rc = TILEDB_RS_OK; + int read_method = array_->config()->read_method(); + if(read_method == TILEDB_IO_READ || + read_method == TILEDB_IO_MPI) + rc = set_tile_file_offset( + attribute_id, + file_offset); + else if(read_method == TILEDB_IO_MMAP) + rc = map_tile_from_file_cmp_none( attribute_id, file_offset, - tile_size) != TILEDB_RS_OK) + tile_size); + if(rc != TILEDB_RS_OK) return TILEDB_RS_ERR; // Set tile size @@ -1691,36 +2317,58 @@ int ReadState::get_tile_from_disk_var_cmp_none( // Calculate the start and end offsets for the variable-sized tile, // as well as the variable tile size - const size_t* tile_s = static_cast(tiles_[attribute_id]); - off_t start_tile_var_offset = tile_s[0]; - off_t end_tile_var_offset; + const size_t* tile_s; + if(GET_CELL_PTR_FROM_OFFSET_TILE( + attribute_id, + 0, + tile_s) != TILEDB_RS_OK) + return TILEDB_RS_ERR; + off_t start_tile_var_offset = *tile_s; + off_t end_tile_var_offset = 0; size_t tile_var_size; std::string filename = fragment_->fragment_name() + "/" + - array_schema->attribute(attribute_id) + + array_schema_->attribute(attribute_id) + TILEDB_FILE_SUFFIX; if(tile_i != tile_num - 1) { // Not the last tile - if(read_from_file( - filename, file_offset + full_tile_size, - &end_tile_var_offset, - TILEDB_CELL_VAR_OFFSET_SIZE) != TILEDB_UT_OK) - return TILEDB_RS_ERR; + if(read_method == TILEDB_IO_READ || + read_method == TILEDB_IO_MMAP) { + if(read_from_file( + filename, file_offset + full_tile_size, + &end_tile_var_offset, + TILEDB_CELL_VAR_OFFSET_SIZE) != TILEDB_UT_OK) + return TILEDB_RS_ERR; + } else if(read_method == TILEDB_IO_MPI) { + if(mpi_io_read_from_file( + array_->config()->mpi_comm(), + filename, file_offset + full_tile_size, + &end_tile_var_offset, + TILEDB_CELL_VAR_OFFSET_SIZE) != TILEDB_UT_OK) + return TILEDB_RS_ERR; + } tile_var_size = end_tile_var_offset - tile_s[0]; } else { // Last tile // Prepare variable attribute file name std::string filename = fragment_->fragment_name() + "/" + - array_schema->attribute(attribute_id) + "_var" + + array_schema_->attribute(attribute_id) + "_var" + TILEDB_FILE_SUFFIX; tile_var_size = file_size(filename) - tile_s[0]; } // Read tile from file - if(READ_TILE_FROM_FILE_VAR_CMP_NONE( + if(read_method == TILEDB_IO_READ || + read_method == TILEDB_IO_MPI) + rc = set_tile_var_file_offset( + attribute_id, + start_tile_var_offset); + else if(read_method == TILEDB_IO_MMAP) + rc = map_tile_from_file_var_cmp_none( attribute_id, start_tile_var_offset, - tile_var_size) != TILEDB_RS_OK) + tile_var_size); + if(rc != TILEDB_RS_OK) return TILEDB_RS_ERR; // Set offsets @@ -1731,272 +2379,123 @@ int ReadState::get_tile_from_disk_var_cmp_none( tiles_var_sizes_[attribute_id] = tile_var_size; // Shift starting offsets of variable-sized cells - shift_var_offsets(attribute_id); - - // Mark as fetched - fetched_tile_[attribute_id] = tile_i; - - // Success - return TILEDB_RS_OK; -} - -bool ReadState::is_empty_attribute(int attribute_id) const { - // Prepare attribute file name - std::string filename = - fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id) + - TILEDB_FILE_SUFFIX; - - // Check if the attribute file exists - return !is_file(filename); -} - -int ReadState::read_tile_from_file_cmp_gzip( - int attribute_id, - off_t offset, - size_t tile_size) { - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - - // To handle the special case of the search tile - // The real attribute id corresponds to an actual attribute or coordinates - int attribute_id_real = - (attribute_id == attribute_num+1) ? attribute_num : attribute_id; - - // Potentially allocate compressed tile buffer - if(tile_compressed_ == NULL) { - size_t full_tile_size = fragment_->tile_size(attribute_id_real); - size_t tile_max_size = - full_tile_size + 6 + 5*(ceil(full_tile_size/16834.0)); - tile_compressed_ = malloc(tile_max_size); - tile_compressed_allocated_size_ = tile_max_size; - } - - // Prepare attribute file name - std::string filename = - fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id_real) + - TILEDB_FILE_SUFFIX; - - // Read from file - if(read_from_file(filename, offset, tile_compressed_, tile_size) != - TILEDB_UT_OK) - return TILEDB_RS_ERR; - else - return TILEDB_RS_OK; -} - -int ReadState::read_tile_from_file_cmp_none( - int attribute_id, - off_t offset, - size_t tile_size) { - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - - // To handle the special case of the search tile - // The real attribute id corresponds to an actual attribute or coordinates - int attribute_id_real = - (attribute_id == attribute_num+1) ? attribute_num : attribute_id; - - // Allocate space for the tile if needed - if(tiles_[attribute_id] == NULL) { - size_t full_tile_size = fragment_->tile_size(attribute_id_real); - tiles_[attribute_id] = malloc(full_tile_size); - } - - // Prepare attribute file name - std::string filename = - fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id_real) + - TILEDB_FILE_SUFFIX; - - // Read from file - if(read_from_file(filename, offset, tiles_[attribute_id], tile_size) != - TILEDB_UT_OK) - return TILEDB_RS_ERR; - else - return TILEDB_RS_OK; -} - -int ReadState::read_tile_from_file_with_mmap_cmp_gzip( - int attribute_id, - off_t offset, - size_t tile_size) { - // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - - // To handle the special case of the search tile - // The real attribute id corresponds to an actual attribute or coordinates - int attribute_id_real = - (attribute_id == attribute_num+1) ? attribute_num : attribute_id; - - // Unmap - if(map_addr_compressed_ != NULL) { - if(munmap(map_addr_compressed_, map_addr_compressed_length_)) { - PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); - return TILEDB_RS_ERR; - } - } - - // Prepare attribute file name - std::string filename = - fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id_real) + - TILEDB_FILE_SUFFIX; - - // Calculate offset considering the page size - size_t page_size = sysconf(_SC_PAGE_SIZE); - off_t start_offset = (offset / page_size) * page_size; - size_t extra_offset = offset - start_offset; - size_t new_length = tile_size + extra_offset; - - // Open file - int fd = open(filename.c_str(), O_RDONLY); - if(fd == -1) { - munmap(map_addr_compressed_, map_addr_compressed_length_); - map_addr_compressed_ = NULL; - map_addr_compressed_length_ = 0; - tile_compressed_ = NULL; - PRINT_ERROR("Cannot read tile from file; File opening error"); - return TILEDB_RS_ERR; - } - - // Map - map_addr_compressed_ = mmap( - map_addr_compressed_, - new_length, - PROT_READ, - MAP_SHARED, - fd, - start_offset); - if(map_addr_compressed_ == MAP_FAILED) { - map_addr_compressed_ = NULL; - map_addr_compressed_length_ = 0; - tile_compressed_ = NULL; - PRINT_ERROR("Cannot read tile from file; Memory map error"); - return TILEDB_RS_ERR; - } - map_addr_compressed_length_ = new_length; - - // Set properly the compressed tile pointer - tile_compressed_ = - static_cast(map_addr_compressed_) + extra_offset; - - // Close file - if(close(fd)) { - munmap(map_addr_compressed_, map_addr_compressed_length_); - map_addr_compressed_ = NULL; - map_addr_compressed_length_ = 0; - tile_compressed_ = NULL; - PRINT_ERROR("Cannot read tile from file; File closing error"); - return TILEDB_RS_ERR; - } + shift_var_offsets(attribute_id); + + // Mark as fetched + fetched_tile_[attribute_id] = tile_i; + // Success return TILEDB_RS_OK; } -int ReadState::read_tile_from_file_with_mmap_cmp_none( +int ReadState::READ_FROM_TILE( int attribute_id, - off_t offset, - size_t tile_size) { + void* buffer, + size_t tile_offset, + size_t bytes_to_copy) { // For easy reference - const ArraySchema* array_schema = fragment_->array()->array_schema(); - int attribute_num = array_schema->attribute_num(); - - // To handle the special case of the search tile - // The real attribute id corresponds to an actual attribute or coordinates - int attribute_id_real = - (attribute_id == attribute_num+1) ? attribute_num : attribute_id; + char* tile = static_cast(tiles_[attribute_id]); - // Unmap - if(map_addr_[attribute_id] != NULL) { - if(munmap(map_addr_[attribute_id], map_addr_lengths_[attribute_id])) { - PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); - return TILEDB_RS_ERR; - } - } + // The tile is in main memory + if(tile != NULL) { + memcpy(buffer, tile + tile_offset, bytes_to_copy); + return TILEDB_RS_OK; + } - // Prepare attribute file name + // We need to read from the disk std::string filename = fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id_real) + + array_schema_->attribute(attribute_id) + TILEDB_FILE_SUFFIX; + int rc = TILEDB_UT_OK; + int read_method = array_->config()->read_method(); + MPI_Comm* mpi_comm = array_->config()->mpi_comm(); + if(read_method == TILEDB_IO_READ) + rc = read_from_file( + filename, + tiles_file_offsets_[attribute_id] + tile_offset, + buffer, + bytes_to_copy); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_from_file( + mpi_comm, + filename, + tiles_file_offsets_[attribute_id] + tile_offset, + buffer, + bytes_to_copy); - // Calculate offset considering the page size - size_t page_size = sysconf(_SC_PAGE_SIZE); - off_t start_offset = (offset / page_size) * page_size; - size_t extra_offset = offset - start_offset; - size_t new_length = tile_size + extra_offset; - - // Open file - int fd = open(filename.c_str(), O_RDONLY); - if(fd == -1) { - map_addr_[attribute_id] = NULL; - map_addr_lengths_[attribute_id] = 0; - tiles_[attribute_id] = NULL; - tiles_sizes_[attribute_id] = 0; - PRINT_ERROR("Cannot read tile from file; File opening error"); + // Return + if(rc == TILEDB_UT_OK) + return TILEDB_RS_OK; + else return TILEDB_RS_ERR; - } +} - // Map - bool var_size = - fragment_->array()->array_schema()->var_size(attribute_id_real); - int prot = var_size ? (PROT_READ | PROT_WRITE) : PROT_READ; - int flags = var_size ? MAP_PRIVATE : MAP_SHARED; - map_addr_[attribute_id] = - mmap(map_addr_[attribute_id], new_length, prot, flags, fd, start_offset); - if(map_addr_[attribute_id] == MAP_FAILED) { - map_addr_[attribute_id] = NULL; - map_addr_lengths_[attribute_id] = 0; - tiles_[attribute_id] = NULL; - tiles_sizes_[attribute_id] = 0; - PRINT_ERROR("Cannot read tile from file; Memory map error"); - return TILEDB_RS_ERR; - } - map_addr_lengths_[attribute_id] = new_length; +int ReadState::READ_FROM_TILE_VAR( + int attribute_id, + void* buffer, + size_t tile_offset, + size_t bytes_to_copy) { + // For easy reference + char* tile = static_cast(tiles_var_[attribute_id]); - // Set properly the tile pointer - tiles_[attribute_id] = - static_cast(map_addr_[attribute_id]) + extra_offset; + // The tile is in main memory + if(tile != NULL) { + memcpy(buffer, tile + tile_offset, bytes_to_copy); + return TILEDB_RS_OK; + } - // Close file - if(close(fd)) { - munmap(map_addr_[attribute_id], map_addr_lengths_[attribute_id]); - map_addr_[attribute_id] = NULL; - map_addr_lengths_[attribute_id] = 0; - tiles_[attribute_id] = NULL; - tiles_sizes_[attribute_id] = 0; - PRINT_ERROR("Cannot read tile from file; File closing error"); - return TILEDB_RS_ERR; - } + // We need to read from the disk + std::string filename = + fragment_->fragment_name() + "/" + + array_schema_->attribute(attribute_id) + "_var" + + TILEDB_FILE_SUFFIX; + int rc = TILEDB_UT_OK; + int read_method = array_->config()->read_method(); + MPI_Comm* mpi_comm = array_->config()->mpi_comm(); + if(read_method == TILEDB_IO_READ) + rc = read_from_file( + filename, + tiles_var_file_offsets_[attribute_id] + tile_offset, + buffer, + bytes_to_copy); + else if(read_method == TILEDB_IO_MPI) + rc = mpi_io_read_from_file( + mpi_comm, + filename, + tiles_var_file_offsets_[attribute_id] + tile_offset, + buffer, + bytes_to_copy); - return TILEDB_RS_OK; + // Return + if(rc == TILEDB_UT_OK) + return TILEDB_RS_OK; + else + return TILEDB_RS_ERR; } -int ReadState::read_tile_from_file_var_cmp_gzip( +int ReadState::read_tile_from_file_cmp_gzip( int attribute_id, off_t offset, size_t tile_size) { + // To handle the special case of the search tile + // The real attribute id corresponds to an actual attribute or coordinates + int attribute_id_real = + (attribute_id == attribute_num_+1) ? attribute_num_ : attribute_id; + // Potentially allocate compressed tile buffer if(tile_compressed_ == NULL) { - tile_compressed_ = malloc(tile_size); - tile_compressed_allocated_size_ = tile_size; - } - - // Potentially expand compressed tile buffer - if(tile_compressed_allocated_size_ < tile_size) { - tile_compressed_ = realloc(tile_compressed_, tile_size); - tile_compressed_allocated_size_ = tile_size; + size_t full_tile_size = fragment_->tile_size(attribute_id_real); + size_t tile_max_size = + full_tile_size + 6 + 5*(ceil(full_tile_size/16834.0)); + tile_compressed_ = malloc(tile_max_size); + tile_compressed_allocated_size_ = tile_max_size; } // Prepare attribute file name std::string filename = fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id) + "_var" + + array_schema_->attribute(attribute_id_real) + TILEDB_FILE_SUFFIX; // Read from file @@ -2007,190 +2506,51 @@ int ReadState::read_tile_from_file_var_cmp_gzip( return TILEDB_RS_OK; } -int ReadState::read_tile_from_file_var_cmp_none( +int ReadState::read_tile_from_file_var_cmp_gzip( int attribute_id, off_t offset, size_t tile_size) { - // Allocate space for the variable tile if needed - if(tiles_var_[attribute_id] == NULL) { - tiles_var_[attribute_id] = malloc(tile_size); - tiles_var_allocated_size_[attribute_id] = tile_size; + // Potentially allocate compressed tile buffer + if(tile_compressed_ == NULL) { + tile_compressed_ = malloc(tile_size); + tile_compressed_allocated_size_ = tile_size; } - // Expand variable tile buffer if necessary - if(tiles_var_allocated_size_[attribute_id] < tile_size) { - tiles_var_[attribute_id] = realloc(tiles_var_[attribute_id], tile_size); - tiles_var_allocated_size_[attribute_id] = tile_size; + // Potentially expand compressed tile buffer + if(tile_compressed_allocated_size_ < tile_size) { + tile_compressed_ = realloc(tile_compressed_, tile_size); + tile_compressed_allocated_size_ = tile_size; } - // Set the actual variable tile size - tiles_var_sizes_[attribute_id] = tile_size; - - // Prepare variable attribute file name + // Prepare attribute file name std::string filename = fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id) + "_var" + + array_schema_->attribute(attribute_id) + "_var" + TILEDB_FILE_SUFFIX; // Read from file - if(read_from_file(filename, offset, tiles_var_[attribute_id], tile_size) != + if(read_from_file(filename, offset, tile_compressed_, tile_size) != TILEDB_UT_OK) return TILEDB_RS_ERR; else - return TILEDB_RS_OK; + return TILEDB_RS_OK; } -int ReadState::read_tile_from_file_with_mmap_var_cmp_gzip( +int ReadState::set_tile_file_offset( int attribute_id, - off_t offset, - size_t tile_size) { - // Unmap - if(map_addr_compressed_ != NULL) { - if(munmap(map_addr_compressed_, map_addr_compressed_length_)) { - PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); - return TILEDB_RS_ERR; - } - } - - // Prepare attribute file name - std::string filename = - fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id) + "_var" + - TILEDB_FILE_SUFFIX; - - // Calculate offset considering the page size - size_t page_size = sysconf(_SC_PAGE_SIZE); - off_t start_offset = (offset / page_size) * page_size; - size_t extra_offset = offset - start_offset; - size_t new_length = tile_size + extra_offset; - - // Open file - int fd = open(filename.c_str(), O_RDONLY); - if(fd == -1) { - munmap(map_addr_compressed_, map_addr_compressed_length_); - map_addr_compressed_ = NULL; - map_addr_compressed_length_ = 0; - tile_compressed_ = NULL; - PRINT_ERROR("Cannot read tile from file; File opening error"); - return TILEDB_RS_ERR; - } - - // Map - // new_length could be 0 for variable length fields, mmap will fail - // if new_length == 0 - if(new_length > 0u) { - map_addr_compressed_ = mmap( - map_addr_compressed_, - new_length, - PROT_READ, - MAP_SHARED, - fd, - start_offset); - if(map_addr_compressed_ == MAP_FAILED) { - map_addr_compressed_ = NULL; - map_addr_compressed_length_ = 0; - tile_compressed_ = NULL; - PRINT_ERROR("Cannot read tile from file; Memory map error"); - return TILEDB_RS_ERR; - } - } else { - map_addr_var_[attribute_id] = 0; - } - map_addr_compressed_length_ = new_length; - - // Set properly the compressed tile pointer - tile_compressed_ = - static_cast(map_addr_compressed_) + extra_offset; - - // Close file - if(close(fd)) { - munmap(map_addr_compressed_, map_addr_compressed_length_); - map_addr_compressed_ = NULL; - map_addr_compressed_length_ = 0; - tile_compressed_ = NULL; - PRINT_ERROR("Cannot read tile from file; File closing error"); - return TILEDB_RS_ERR; - } + off_t offset) { + // Set file offset + tiles_file_offsets_[attribute_id] = offset; + // Success return TILEDB_RS_OK; } -int ReadState::read_tile_from_file_with_mmap_var_cmp_none( +int ReadState::set_tile_var_file_offset( int attribute_id, - off_t offset, - size_t tile_size) { - // Unmap - if(map_addr_var_[attribute_id] != NULL) { - if(munmap( - map_addr_var_[attribute_id], - map_addr_var_lengths_[attribute_id])) { - PRINT_ERROR("Cannot read tile from file with map; Memory unmap error"); - return TILEDB_RS_ERR; - } - } - - // Prepare attribute file name - std::string filename = - fragment_->fragment_name() + "/" + - fragment_->array()->array_schema()->attribute(attribute_id) + "_var" + - TILEDB_FILE_SUFFIX; - - // Calculate offset considering the page size - size_t page_size = sysconf(_SC_PAGE_SIZE); - off_t start_offset = (offset / page_size) * page_size; - size_t extra_offset = offset - start_offset; - size_t new_length = tile_size + extra_offset; - - // Open file - int fd = open(filename.c_str(), O_RDONLY); - if(fd == -1) { - map_addr_var_[attribute_id] = NULL; - map_addr_var_lengths_[attribute_id] = 0; - tiles_var_[attribute_id] = NULL; - tiles_var_sizes_[attribute_id] = 0; - PRINT_ERROR("Cannot read tile from file; File opening error"); - return TILEDB_RS_ERR; - } - - // Map - // new_length could be 0 for variable length fields, mmap will fail - // if new_length == 0 - if(new_length > 0u) { - map_addr_var_[attribute_id] = mmap( - map_addr_var_[attribute_id], - new_length, - PROT_READ, - MAP_SHARED, - fd, - start_offset); - if(map_addr_var_[attribute_id] == MAP_FAILED) { - map_addr_var_[attribute_id] = NULL; - map_addr_var_lengths_[attribute_id] = 0; - tiles_var_[attribute_id] = NULL; - tiles_var_sizes_[attribute_id] = 0; - PRINT_ERROR("Cannot read tile from file; Memory map error"); - return TILEDB_RS_ERR; - } - } else { - map_addr_var_[attribute_id] = 0; - } - map_addr_var_lengths_[attribute_id] = new_length; - - // Set properly the tile pointer - tiles_var_[attribute_id] = - static_cast(map_addr_var_[attribute_id]) + extra_offset; - tiles_var_sizes_[attribute_id] = tile_size; - - // Close file - if(close(fd)) { - munmap(map_addr_var_[attribute_id], map_addr_var_lengths_[attribute_id]); - map_addr_var_[attribute_id] = NULL; - map_addr_var_lengths_[attribute_id] = 0; - tiles_var_[attribute_id] = NULL; - tiles_var_sizes_[attribute_id] = 0; - PRINT_ERROR("Cannot read tile from file; File closing error"); - return TILEDB_RS_ERR; - } + off_t offset) { + // Set file offset + tiles_var_file_offsets_[attribute_id] = offset; // Success return TILEDB_RS_OK; diff --git a/core/src/fragment/write_state.cc b/core/src/fragment/write_state.cc index 8f674eba..9ae0b891 100644 --- a/core/src/fragment/write_state.cc +++ b/core/src/fragment/write_state.cc @@ -61,7 +61,7 @@ # define PRINT_WARNING(x) do { } while(0) #endif -#ifdef GNU_PARALLEL +#ifdef OPENMP #include #define SORT(first, last, comp) __gnu_parallel::sort((first), (last), (comp)) #else @@ -272,10 +272,20 @@ int WriteState::compress_and_write_tile(int attribute_id) { TILEDB_FILE_SUFFIX; // Write segment to file - if(write_to_file( - filename.c_str(), - tile_compressed_, - tile_compressed_size) != TILEDB_UT_OK) + int rc = TILEDB_UT_OK; + int write_method = fragment_->array()->config()->write_method(); + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + tile_compressed_, + tile_compressed_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + fragment_->array()->config()->mpi_comm(), + filename.c_str(), + tile_compressed_, + tile_compressed_size); + if(rc != TILEDB_UT_OK) return TILEDB_WS_ERR; // Append offset to book-keeping @@ -331,10 +341,20 @@ int WriteState::compress_and_write_tile_var(int attribute_id) { TILEDB_FILE_SUFFIX; // Write segment to file - if(write_to_file( - filename.c_str(), - tile_compressed_, - tile_compressed_size) != TILEDB_UT_OK) + int rc = TILEDB_UT_OK; + int write_method = fragment_->array()->config()->write_method(); + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + tile_compressed_, + tile_compressed_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + fragment_->array()->config()->mpi_comm(), + filename.c_str(), + tile_compressed_, + tile_compressed_size); + if(rc != TILEDB_UT_OK) return TILEDB_WS_ERR; // Append offset to book-keeping @@ -630,7 +650,20 @@ int WriteState::write_dense_attr_cmp_none( std::string filename = fragment_->fragment_name() + "/" + array_schema->attribute(attribute_id) + TILEDB_FILE_SUFFIX; - if(write_to_file(filename.c_str(), buffer, buffer_size) != TILEDB_UT_OK) + int rc = TILEDB_UT_OK; + int write_method = fragment_->array()->config()->write_method(); + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + buffer, + buffer_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + fragment_->array()->config()->mpi_comm(), + filename.c_str(), + buffer, + buffer_size); + if(rc != TILEDB_UT_OK) return TILEDB_WS_ERR; else return TILEDB_WS_OK; @@ -746,10 +779,21 @@ int WriteState::write_dense_attr_var_cmp_none( std::string filename = fragment_->fragment_name() + "/" + array_schema->attribute(attribute_id) + "_var" + TILEDB_FILE_SUFFIX; - if(write_to_file( - filename.c_str(), - buffer_var, - buffer_var_size) != TILEDB_UT_OK) + int rc = TILEDB_UT_OK; + int write_method = fragment_->array()->config()->write_method(); + MPI_Comm* mpi_comm = fragment_->array()->config()->mpi_comm(); + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + buffer_var, + buffer_var_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + mpi_comm, + filename.c_str(), + buffer_var, + buffer_var_size); + if(rc != TILEDB_UT_OK) return TILEDB_WS_ERR; // Recalculate offsets @@ -765,9 +809,16 @@ int WriteState::write_dense_attr_var_cmp_none( filename = fragment_->fragment_name() + "/" + array_schema->attribute(attribute_id) + TILEDB_FILE_SUFFIX; - int rc = write_to_file( - filename.c_str(), - shifted_buffer, + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + shifted_buffer, + buffer_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + mpi_comm, + filename.c_str(), + shifted_buffer, buffer_size); // Clean up @@ -1037,9 +1088,23 @@ int WriteState::write_sparse_attr_cmp_none( std::string filename = fragment_->fragment_name() + "/" + array_schema->attribute(attribute_id) + TILEDB_FILE_SUFFIX; - if(write_to_file(filename.c_str(), buffer, buffer_size) != TILEDB_UT_OK) + int rc = TILEDB_UT_OK; + int write_method = fragment_->array()->config()->write_method(); + MPI_Comm* mpi_comm = fragment_->array()->config()->mpi_comm(); + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + buffer, + buffer_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + mpi_comm, + filename.c_str(), + buffer, + buffer_size); + if(rc != TILEDB_UT_OK) return TILEDB_WS_ERR; - else + else return TILEDB_WS_OK; } @@ -1162,10 +1227,21 @@ int WriteState::write_sparse_attr_var_cmp_none( std::string filename = fragment_->fragment_name() + "/" + array_schema->attribute(attribute_id) + "_var" + TILEDB_FILE_SUFFIX; - if(write_to_file( - filename.c_str(), - buffer_var, - buffer_var_size) != TILEDB_UT_OK) + int rc = TILEDB_UT_OK; + int write_method = fragment_->array()->config()->write_method(); + MPI_Comm* mpi_comm = fragment_->array()->config()->mpi_comm(); + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + buffer_var, + buffer_var_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + mpi_comm, + filename.c_str(), + buffer_var, + buffer_var_size); + if(rc != TILEDB_UT_OK) return TILEDB_WS_ERR; // Recalculate offsets @@ -1181,9 +1257,16 @@ int WriteState::write_sparse_attr_var_cmp_none( filename = fragment_->fragment_name() + "/" + array_schema->attribute(attribute_id) + TILEDB_FILE_SUFFIX; - int rc = write_to_file( - filename.c_str(), - shifted_buffer, + if(write_method == TILEDB_IO_WRITE) + rc = write_to_file( + filename.c_str(), + shifted_buffer, + buffer_size); + else if(write_method == TILEDB_IO_MPI) + rc = mpi_io_write_to_file( + mpi_comm, + filename.c_str(), + shifted_buffer, buffer_size); // Clean up diff --git a/core/src/metadata/metadata.cc b/core/src/metadata/metadata.cc index 5a7cc2aa..ec86c1f2 100644 --- a/core/src/metadata/metadata.cc +++ b/core/src/metadata/metadata.cc @@ -151,7 +151,8 @@ int Metadata::init( const std::vector& book_keeping, int mode, const char** attributes, - int attribute_num) { + int attribute_num, + const Config* config) { // Sanity check on mode if(mode != TILEDB_METADATA_READ && mode != TILEDB_METADATA_WRITE) { @@ -209,7 +210,8 @@ int Metadata::init( array_mode, (const char**) array_attributes, array_attribute_num, - NULL); + NULL, + config); // Clean up for(int i=0; i +inline bool cell_in_subarray(const T* cell, const T* subarray, int dim_num) { for(int i=0; i subarray[2*i+1]) - return false; + if(cell[i] >= subarray[2*i] && cell[i] <= subarray[2*i+1]) + continue; // Inside this dimension domain + + return false; // NOT inside this dimension domain } return true; @@ -298,6 +301,8 @@ bool empty_value(T value) { return value == T(TILEDB_EMPTY_FLOAT32); else if(&typeid(T) == &typeid(double)) return value == T(TILEDB_EMPTY_FLOAT64); + else + return false; } int expand_buffer(void*& buffer, size_t& buffer_allocated_size) { @@ -585,12 +590,119 @@ bool is_workspace(const std::string& dir) { return false; } +int mpi_io_read_from_file( + const MPI_Comm* mpi_comm, + const std::string& filename, + off_t offset, + void* buffer, + size_t length) { + // Sanity check + if(mpi_comm == NULL) { + PRINT_ERROR("Cannot read from file; Invalid MPI communicator"); + return TILEDB_UT_ERR; + } + + // Open file + MPI_File fh; + if(MPI_File_open( + *mpi_comm, + filename.c_str(), + MPI_MODE_RDONLY, + MPI_INFO_NULL, + &fh)) { + PRINT_ERROR("Cannot read from file; File opening error"); + return TILEDB_UT_ERR; + } + + // Read + MPI_File_seek(fh, offset, MPI_SEEK_SET); + MPI_Status mpi_status; + if(MPI_File_read(fh, buffer, length, MPI_CHAR, &mpi_status)) { + PRINT_ERROR("Cannot read from file; File reading error"); + return TILEDB_UT_ERR; + } + + // Close file + if(MPI_File_close(&fh)) { + PRINT_ERROR("Cannot read from file; File closing error"); + return TILEDB_UT_ERR; + } + + // Success + return TILEDB_UT_OK; +} + +int mpi_io_write_to_file( + const MPI_Comm* mpi_comm, + const char* filename, + const void* buffer, + size_t buffer_size) { + // Open file + MPI_File fh; + if(MPI_File_open( + *mpi_comm, + filename, + MPI_MODE_WRONLY | MPI_MODE_APPEND | + MPI_MODE_CREATE | MPI_MODE_SEQUENTIAL, + MPI_INFO_NULL, + &fh)) { + PRINT_ERROR(std::string("Cannot write to file '") + filename + + "'; File opening error"); + return TILEDB_UT_ERR; + } + + // Append attribute data to the file + MPI_Status mpi_status; + if(MPI_File_write(fh, buffer, buffer_size, MPI_CHAR, &mpi_status)) { + PRINT_ERROR(std::string("Cannot write to file '") + filename + + "'; File writing error"); + return TILEDB_UT_ERR; + } + + // Sync + if(MPI_File_sync(fh)) { + PRINT_ERROR(std::string("Cannot write to file '") + filename + + "'; File syncing error"); + return TILEDB_UT_ERR; + } + + // Close file + if(MPI_File_close(&fh)) { + PRINT_ERROR(std::string("Cannot write to file '") + filename + + "'; File closing error"); + return TILEDB_UT_ERR; + } + + // Success + return TILEDB_UT_OK; +} + +#ifdef OPENMP int mutex_destroy(omp_lock_t* mtx) { omp_destroy_lock(mtx); return TILEDB_UT_OK; } +int mutex_init(omp_lock_t* mtx) { + omp_init_lock(mtx); + + return TILEDB_UT_OK; +} + +int mutex_lock(omp_lock_t* mtx) { + omp_set_lock(mtx); + + return TILEDB_UT_OK; +} + +int mutex_unlock(omp_lock_t* mtx) { + omp_unset_lock(mtx); + + return TILEDB_UT_OK; +} +#endif + int mutex_destroy(pthread_mutex_t* mtx) { if(pthread_mutex_destroy(mtx) != 0) { PRINT_ERROR("Cannot destroy mutex"); @@ -600,12 +712,6 @@ int mutex_destroy(pthread_mutex_t* mtx) { } } -int mutex_init(omp_lock_t* mtx) { - omp_init_lock(mtx); - - return TILEDB_UT_OK; -} - int mutex_init(pthread_mutex_t* mtx) { if(pthread_mutex_init(mtx, NULL) != 0) { PRINT_ERROR("Cannot initialize mutex"); @@ -615,12 +721,6 @@ int mutex_init(pthread_mutex_t* mtx) { } } -int mutex_lock(omp_lock_t* mtx) { - omp_set_lock(mtx); - - return TILEDB_UT_OK; -} - int mutex_lock(pthread_mutex_t* mtx) { if(pthread_mutex_lock(mtx) != 0) { PRINT_ERROR("Cannot lock mutex"); @@ -630,12 +730,6 @@ int mutex_lock(pthread_mutex_t* mtx) { } } -int mutex_unlock(omp_lock_t* mtx) { - omp_unset_lock(mtx); - - return TILEDB_UT_OK; -} - int mutex_unlock(pthread_mutex_t* mtx) { if(pthread_mutex_unlock(mtx) != 0) { PRINT_ERROR("Cannot unlock mutex"); @@ -843,7 +937,7 @@ int write_to_file( // Open file int fd = open( filename, - O_WRONLY | O_APPEND | O_CREAT | O_SYNC, + O_WRONLY | O_APPEND | O_CREAT, S_IRWXU); if(fd == -1) { PRINT_ERROR(std::string("Cannot write to file '") + filename + @@ -859,6 +953,13 @@ int write_to_file( return TILEDB_UT_ERR; } + // Sync + if(fsync(fd)) { + PRINT_ERROR(std::string("Cannot write to file '") + filename + + "'; File syncing error"); + return TILEDB_UT_ERR; + } + // Close file if(close(fd)) { PRINT_ERROR(std::string("Cannot write to file '") + filename + diff --git a/core/src/storage_manager/config.cc b/core/src/storage_manager/config.cc new file mode 100644 index 00000000..fc3b6e25 --- /dev/null +++ b/core/src/storage_manager/config.cc @@ -0,0 +1,133 @@ +/** + * @file config.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file implements the Config class. + */ + + + +#include "config.h" +#include "constants.h" + + + + +/* ****************************** */ +/* MACROS */ +/* ****************************** */ + +#if VERBOSE == 1 +# define PRINT_ERROR(x) std::cerr << "[TileDB] Error: " << x << ".\n" +# define PRINT_WARNING(x) std::cerr << "[TileDB] Warning: " \ + << x << ".\n" +#elif VERBOSE == 2 +# define PRINT_ERROR(x) std::cerr << "[TileDB::StorageManager] Error: " \ + << x << ".\n" +# define PRINT_WARNING(x) std::cerr << "[TileDB::StorageManager] Warning: " \ + << x << ".\n" +#else +# define PRINT_ERROR(x) do { } while(0) +# define PRINT_WARNING(x) do { } while(0) +#endif + + + + +/* ****************************** */ +/* CONSTRUCTORS & DESTRUCTORS */ +/* ****************************** */ + +Config::Config() { + // Default values + home_ = ""; + read_method_ = TILEDB_IO_MMAP; + write_method_ = TILEDB_IO_WRITE; + mpi_comm_ = NULL; +} + +Config::~Config() { +} + + + + +/* ****************************** */ +/* MUTATORS */ +/* ****************************** */ + +void Config::init( + const char* home, + MPI_Comm* mpi_comm, + int read_method, + int write_method) { + // Initialize home + if(home == NULL) + home_ = ""; + else + home_ = home; + + // Initialize MPI communicator + mpi_comm_ = mpi_comm; + + // Initialize read method + read_method_ = read_method; + if(read_method_ != TILEDB_IO_READ && + read_method_ != TILEDB_IO_MMAP && + read_method_ != TILEDB_IO_MPI) + read_method_ = TILEDB_IO_MMAP; // Use default + + // Initialize write method + write_method_ = write_method; + if(write_method_ != TILEDB_IO_WRITE && + write_method_ != TILEDB_IO_MPI) + write_method_ = TILEDB_IO_WRITE; // Use default +} + + + + +/* ****************************** */ +/* ACCESSORS */ +/* ****************************** */ + +const std::string& Config::home() const { + return home_; +} + +MPI_Comm* Config::mpi_comm() const { + return mpi_comm_; +} + +int Config::read_method() const { + return read_method_; +} + +int Config::write_method() const { + return write_method_; +} diff --git a/core/src/storage_manager/storage_manager.cc b/core/src/storage_manager/storage_manager.cc index 9717eefd..186c6d24 100755 --- a/core/src/storage_manager/storage_manager.cc +++ b/core/src/storage_manager/storage_manager.cc @@ -59,7 +59,7 @@ # define PRINT_WARNING(x) do { } while(0) #endif -#ifdef GNU_PARALLEL +#ifdef OPENMP #include #define SORT_LIB __gnu_parallel #else @@ -93,33 +93,17 @@ StorageManager::~StorageManager() { /* ****************************** */ int StorageManager::finalize() { + if(config_ != NULL) + delete config_; + return open_array_mtx_destroy(); } -int StorageManager::init(const char* config_filename) { +int StorageManager::init(Config* config) { // Set configuration parameters - if(config_filename == NULL) - config_set_default(); - else if(config_set(config_filename) != TILEDB_SM_OK) + if(config_set(config) != TILEDB_SM_OK) return TILEDB_SM_ERR; - // Set the TileDB home directory - tiledb_home_ = TILEDB_HOME; - if(tiledb_home_ == "") { - auto env_home_ptr = getenv("HOME"); - tiledb_home_ = env_home_ptr ? env_home_ptr : ""; - if(tiledb_home_ == "") { - char cwd[1024]; - if(getcwd(cwd, sizeof(cwd)) != NULL) { - tiledb_home_ = cwd; - } else { - PRINT_ERROR("Cannot set TileDB home directory"); - return TILEDB_SM_ERR; - } - } - tiledb_home_ += "/.tiledb"; - } - // Set the master catalog directory master_catalog_dir_ = tiledb_home_ + "/" + TILEDB_SM_MASTER_CATALOG; @@ -128,7 +112,9 @@ int StorageManager::init(const char* config_filename) { if(!is_dir(tiledb_home_)) { if(create_dir(tiledb_home_) != TILEDB_UT_OK) return TILEDB_SM_ERR; + } + if(!is_metadata(master_catalog_dir_)) { if(master_catalog_create() != TILEDB_SM_OK) return TILEDB_SM_ERR; } @@ -518,7 +504,7 @@ int StorageManager::array_init( return TILEDB_SM_ERR; // Open the array - OpenArray* open_array; + OpenArray* open_array = NULL; if(mode == TILEDB_ARRAY_READ) { if(array_open(real_dir(array_dir), open_array) != TILEDB_SM_OK) return TILEDB_SM_ERR; @@ -533,7 +519,8 @@ int StorageManager::array_init( mode, attributes, attribute_num, - subarray) != TILEDB_AR_OK) { + subarray, + config_) != TILEDB_AR_OK) { delete array_schema; delete array; array = NULL; @@ -854,7 +841,7 @@ int StorageManager::metadata_init( return TILEDB_SM_ERR; // Open the array that implements the metadata - OpenArray* open_array; + OpenArray* open_array = NULL; if(mode == TILEDB_METADATA_READ) { if(array_open(real_dir(metadata_dir), open_array) != TILEDB_SM_OK) return TILEDB_SM_ERR; @@ -868,7 +855,8 @@ int StorageManager::metadata_init( open_array->book_keeping_, mode, attributes, - attribute_num); + attribute_num, + config_); // Return if(rc != TILEDB_MT_OK) { @@ -1065,7 +1053,7 @@ int StorageManager::delete_entire(const std::string& dir) { } else if(is_metadata(dir)) { return metadata_delete(dir); } else { - PRINT_ERROR("Clear failed; Invalid directory"); + PRINT_ERROR("Delete failed; Invalid directory"); return TILEDB_SM_ERR; } @@ -1369,14 +1357,34 @@ int StorageManager::array_open( return TILEDB_SM_OK; } -int StorageManager::config_set(const char* config_filename) { +int StorageManager::config_set(Config* config) { + // Store config locally + config_ = config; + + // Set the TileDB home directory + tiledb_home_ = config->home(); + if(tiledb_home_ == "") { + auto env_home_ptr = getenv("HOME"); + tiledb_home_ = env_home_ptr ? env_home_ptr : ""; + if(tiledb_home_ == "") { + char cwd[1024]; + if(getcwd(cwd, sizeof(cwd)) != NULL) { + tiledb_home_ = cwd; + } else { + PRINT_ERROR("Cannot set TileDB home directory"); + return TILEDB_SM_ERR; + } + } + tiledb_home_ += "/.tiledb"; + } + + // Get read path + tiledb_home_ = real_dir(tiledb_home_); + // Success return TILEDB_SM_OK; } -void StorageManager::config_set_default() { -} - int StorageManager::consolidation_filelock_create( const std::string& dir) const { std::string filename = dir + "/" + TILEDB_SM_CONSOLIDATION_FILELOCK_NAME; @@ -1828,7 +1836,11 @@ int StorageManager::metadata_move( } int StorageManager::open_array_mtx_destroy() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_destroy(&open_array_omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_destroy(&open_array_pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) @@ -1838,7 +1850,11 @@ int StorageManager::open_array_mtx_destroy() { } int StorageManager::open_array_mtx_init() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_init(&open_array_omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_init(&open_array_pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) @@ -1848,7 +1864,11 @@ int StorageManager::open_array_mtx_init() { } int StorageManager::open_array_mtx_lock() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_lock(&open_array_omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_lock(&open_array_pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) @@ -1858,7 +1878,11 @@ int StorageManager::open_array_mtx_lock() { } int StorageManager::open_array_mtx_unlock() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_unlock(&open_array_omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_unlock(&open_array_pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) @@ -2070,7 +2094,11 @@ int StorageManager::workspace_move( } int StorageManager::OpenArray::mutex_destroy() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_destroy(&omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_destroy(&pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) @@ -2080,7 +2108,11 @@ int StorageManager::OpenArray::mutex_destroy() { } int StorageManager::OpenArray::mutex_init() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_init(&omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_init(&pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) @@ -2090,7 +2122,11 @@ int StorageManager::OpenArray::mutex_init() { } int StorageManager::OpenArray::mutex_lock() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_lock(&omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_lock(&pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) @@ -2100,7 +2136,11 @@ int StorageManager::OpenArray::mutex_lock() { } int StorageManager::OpenArray::mutex_unlock() { +#ifdef OPENMP int rc_omp_mtx = ::mutex_unlock(&omp_mtx_); +#else + int rc_omp_mtx = TILEDB_UT_OK; +#endif int rc_pthread_mtx = ::mutex_unlock(&pthread_mtx_); if(rc_pthread_mtx != TILEDB_UT_OK || rc_omp_mtx != TILEDB_UT_OK) diff --git a/examples/src/tiledb_array_aio_read_dense.cc b/examples/src/tiledb_array_aio_read_dense.cc new file mode 100644 index 00000000..37cf37d5 --- /dev/null +++ b/examples/src/tiledb_array_aio_read_dense.cc @@ -0,0 +1,113 @@ +/** + * @file tiledb_array_aio_read_dense_1.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * It shows how to read asynchronously from a dense array. + */ + +#include "c_api.h" +#include +#include + +// Simply prints the input string to stdout +void *print_upon_completion(void* s) { + printf("%s\n", (char*) s); + + return NULL; +} + +int main() { + // Initialize context with the default configuration parameters + TileDB_CTX* tiledb_ctx; + tiledb_ctx_init(&tiledb_ctx, NULL); + + // Initialize array + TileDB_Array* tiledb_array; + tiledb_array_init( + tiledb_ctx, // Context + &tiledb_array, // Array object + "my_workspace/dense_arrays/my_array_A", // Array name + TILEDB_ARRAY_READ, // Mode + NULL, // Whole domain + NULL, // All attributes + 0); // Number of attributes + + // Prepare subarray + int64_t subarray[] = { 3, 4, 2, 4 }; // [3,4] on first dim, [2,4] on second + + // Prepare cell buffers + int buffer_a1[16]; + size_t buffer_a2[16]; + char buffer_var_a2[40]; + float buffer_a3[32]; + void* buffers[] = { buffer_a1, buffer_a2, buffer_var_a2, buffer_a3 }; + size_t buffer_sizes[] = + { + sizeof(buffer_a1), + sizeof(buffer_a2), + sizeof(buffer_var_a2), + sizeof(buffer_a3) + }; + + // Prepare AIO request + TileDB_AIO_Request tiledb_aio_request; + memset(&tiledb_aio_request, 0, sizeof(struct TileDB_AIO_Request)); + tiledb_aio_request.buffers_ = buffers; + tiledb_aio_request.buffer_sizes_ = buffer_sizes; + tiledb_aio_request.subarray_ = subarray; + tiledb_aio_request.completion_handle_ = print_upon_completion; + char s[100] = "AIO request completed"; + tiledb_aio_request.completion_data_ = s; + + // Read from array + tiledb_array_aio_read(tiledb_array, &tiledb_aio_request); + + // Wait for AIO to complete + printf("AIO in progress\n"); + while(tiledb_aio_request.status_ != TILEDB_AIO_COMPLETED); + + // Print cell values + int64_t result_num = buffer_sizes[0] / sizeof(int); + printf(" a1\t a2\t (a3.first, a3.second)\n"); + printf("-----------------------------------------\n"); + for(int i=0; i + +// Simply prints the input string to stdout +void *print_upon_completion(void* s) { + printf("%s\n", (char*) s); + + return NULL; +} + +int main() { + // Initialize context with the default configuration parameters + TileDB_CTX* tiledb_ctx; + tiledb_ctx_init(&tiledb_ctx, NULL); + + // Initialize array + TileDB_Array* tiledb_array; + tiledb_array_init( + tiledb_ctx, // Context + &tiledb_array, // Array object + "my_workspace/dense_arrays/my_array_A", // Array name + TILEDB_ARRAY_WRITE, // Mode + NULL, // Entire domain + NULL, // All attributes + 0); // Number of attributes + + // Prepare cell buffers + int buffer_a1[] = + { + 0, 1, 2, 3, // Upper left tile + 4, 5, 6, 7, // Upper right tile + 8, 9, 10, 11, // Lower left tile + 12, 13, 14, 15 // Lower right tile + }; + size_t buffer_a2[] = + { + 0, 1, 3, 6, // Upper left tile + 10, 11, 13, 16, // Upper right tile + 20, 21, 23, 26, // Lower left tile + 30, 31, 33, 36 // Lower right tile + }; + char buffer_var_a2[] = + "abbcccdddd" // Upper left tile + "effggghhhh" // Upper right tile + "ijjkkkllll" // Lower left tile + "mnnooopppp"; // Lower right tile + float buffer_a3[] = + { + 0.1, 0.2, 1.1, 1.2, 2.1, 2.2, 3.1, 3.2, // Upper left tile + 4.1, 4.2, 5.1, 5.2, 6.1, 6.2, 7.1, 7.2, // Upper right tile + 8.1, 8.2, 9.1, 9.2, 10.1, 10.2, 11.1, 11.2, // Lower left tile + 12.1, 12.2, 13.1, 13.2, 14.1, 14.2, 15.1, 15.2, // Lower right tile + }; + void* buffers[] = { buffer_a1, buffer_a2, buffer_var_a2, buffer_a3 }; + size_t buffer_sizes[] = + { + sizeof(buffer_a1), + sizeof(buffer_a2), + sizeof(buffer_var_a2)-1, // No need to store the last '\0' character + sizeof(buffer_a3) + }; + + // Prepare AIO request + TileDB_AIO_Request tiledb_aio_request; + // ALWAYS zero out the struct before populating it + memset(&tiledb_aio_request, 0, sizeof(struct TileDB_AIO_Request)); + tiledb_aio_request.buffers_ = buffers; + tiledb_aio_request.buffer_sizes_ = buffer_sizes; + tiledb_aio_request.completion_handle_ = print_upon_completion; + char s[100] = "AIO request completed"; + tiledb_aio_request.completion_data_ = s; + + // Write to array + tiledb_array_aio_write(tiledb_array, &tiledb_aio_request); + + // Wait for AIO to complete + printf("AIO in progress\n"); + while(tiledb_aio_request.status_ != TILEDB_AIO_COMPLETED); + + // Finalize array + tiledb_array_finalize(tiledb_array); + + // Finalize context + tiledb_ctx_finalize(tiledb_ctx); + + return 0; +} diff --git a/examples/src/tiledb_array_create_sparse.cc b/examples/src/tiledb_array_create_sparse.cc index 93a8cfc9..6ea8d25b 100644 --- a/examples/src/tiledb_array_create_sparse.cc +++ b/examples/src/tiledb_array_create_sparse.cc @@ -59,6 +59,11 @@ int main() { TILEDB_NO_COMPRESSION, // a3 TILEDB_NO_COMPRESSION // coordinates }; + int64_t tile_extents[] = + { + 2, // d1 + 2 // d2 + }; const int types[] = { TILEDB_INT32, // a1 @@ -75,7 +80,7 @@ int main() { attributes, // Attributes 3, // Number of attributes 2, // Capacity - TILEDB_HILBERT, // Cell order + TILEDB_ROW_MAJOR, // Cell order cell_val_num, // Number of cell values per attribute compression, // Compression 0, // Sparse array @@ -83,9 +88,9 @@ int main() { 2, // Number of dimensions domain, // Domain 4*sizeof(int64_t), // Domain length in bytes - NULL, // Tile extents - 0, // Tile extents length in bytes - 0, // Tile order (will be ingored) + tile_extents, // Tile extents + 2*sizeof(int64_t), // Tile extents length in bytes + TILEDB_ROW_MAJOR, // Tile order types // Types ); diff --git a/examples/src/tiledb_array_parallel_consolidate_sparse.cc b/examples/src/tiledb_array_parallel_consolidate_sparse.cc index 5b50b8da..db52e034 100644 --- a/examples/src/tiledb_array_parallel_consolidate_sparse.cc +++ b/examples/src/tiledb_array_parallel_consolidate_sparse.cc @@ -65,12 +65,12 @@ int main() { // Prepare cell buffers // --- First read --- const int64_t subarray_1[] = { 1, 2, 1, 4 }; - int buffer_a1_1[4]; + int buffer_a1_1[10]; void* buffers_1[] = { buffer_a1_1 }; size_t buffer_sizes_1[] = { sizeof(buffer_a1_1) }; // --- Upper right tile --- const int64_t subarray_2[] = { 3, 4, 1, 4 }; - int buffer_a1_2[4]; + int buffer_a1_2[10]; void* buffers_2[] = { buffer_a1_2 }; size_t buffer_sizes_2[] = { sizeof(buffer_a1_2) }; diff --git a/examples/src/tiledb_array_parallel_read_dense_1.cc b/examples/src/tiledb_array_parallel_read_dense_1.cc index 626ecd5e..5bafce0f 100644 --- a/examples/src/tiledb_array_parallel_read_dense_1.cc +++ b/examples/src/tiledb_array_parallel_read_dense_1.cc @@ -84,7 +84,7 @@ int main() { pthread_t threads[4]; thread_data_t thread_data[4]; - // Write in parallel + // Read in parallel for(int i=0; i<4; ++i) { // Populate the thread data thread_data[i].tiledb_ctx = tiledb_ctx; diff --git a/examples/src/tiledb_array_parallel_read_dense_2.cc b/examples/src/tiledb_array_parallel_read_dense_2.cc index cc48abe6..5d2def45 100644 --- a/examples/src/tiledb_array_parallel_read_dense_2.cc +++ b/examples/src/tiledb_array_parallel_read_dense_2.cc @@ -31,11 +31,10 @@ */ #include "c_api.h" -#include #include - - +#ifdef OPENMP +#include // The function to be computed in parallel void parallel_read( @@ -162,3 +161,12 @@ void parallel_read( tiledb_array_finalize(tiledb_array); } +#else + +int main() { + printf("OpenMP not supported."); + + return 0; +} + +#endif diff --git a/examples/src/tiledb_array_parallel_read_mpi_io_dense.cc b/examples/src/tiledb_array_parallel_read_mpi_io_dense.cc new file mode 100644 index 00000000..04986dd1 --- /dev/null +++ b/examples/src/tiledb_array_parallel_read_mpi_io_dense.cc @@ -0,0 +1,126 @@ +/** + * @file tiledb_array_read_mpi_io_dense.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * It shows how to read from a dense array in parallel with MPI, activating + * also the MPI-IO read mode (although the latter is optional - the user + * could alternatively use mmap or standard OS read). Note that the case + * of sparse arrays is similar. + */ + +#include "c_api.h" +#include +#include +#include + + + + +int main(int argc, char** argv) { + // Initialize MPI and get rank + MPI_Init(&argc, &argv); + int rank; + MPI_Comm mpi_comm = MPI_COMM_WORLD; + MPI_Comm_rank(mpi_comm, &rank); + + // Properly set the configuration parameters + TileDB_Config tiledb_config; + memset(&tiledb_config, 0, sizeof(struct TileDB_Config)); + tiledb_config.read_method_ = TILEDB_IO_MPI; // Activate MPI-IO + tiledb_config.mpi_comm_ = &mpi_comm; + + // Initialize context with the default configuration parameters + TileDB_CTX* tiledb_ctx; + tiledb_ctx_init(&tiledb_ctx, &tiledb_config); + + // Array name + const char* array_name = "my_workspace/dense_arrays/my_array_A"; + + // Prepare cell buffers + // --- Upper left tile --- + const int64_t subarray_0[] = { 1, 2, 1, 2 }; + // --- Upper right tile --- + const int64_t subarray_1[] = { 1, 2, 3, 4 }; + // --- Lower left tile --- + const int64_t subarray_2[] = { 3, 4, 1, 2 }; + // --- Lower right tile --- + const int64_t subarray_3[] = { 3, 4, 3, 4 }; + + // Set buffers + int buffer[4]; + void* buffers[] = { buffer }; + size_t buffer_sizes[] = { sizeof(buffer) }; + + // Only attribute "a1" is needed + const char* attributes[] = { "a1" }; + + // Choose subarray based on rank + const int64_t* subarray; + if(rank == 0) + subarray = subarray_0; + else if(rank == 1) + subarray = subarray_1; + else if(rank == 2) + subarray = subarray_2; + else if(rank == 3) + subarray = subarray_3; + + // Initialize array + TileDB_Array* tiledb_array; + tiledb_array_init( + tiledb_ctx, // Context + &tiledb_array, // Array object + array_name, // Array name + TILEDB_ARRAY_READ, // Mode + subarray, // Subarray + attributes, // Subset on attributes + 1); // Number of attributes + + // Read from array + tiledb_array_read(tiledb_array, buffers, buffer_sizes); + + // Finalize array + tiledb_array_finalize(tiledb_array); + + // Output result + int total_count = 0; + for(int i=0; i<4; ++i) + if(buffer[i] > 10) + ++total_count; + printf("Process %d: Number of a1 values greater " + "than 10: %d \n", rank, total_count); + + // Finalize context + tiledb_ctx_finalize(tiledb_ctx); + + // Finalize MPI + MPI_Finalize(); + + return 0; +} + diff --git a/examples/src/tiledb_array_parallel_read_sparse_1.cc b/examples/src/tiledb_array_parallel_read_sparse_1.cc index 50085c3e..956c5711 100644 --- a/examples/src/tiledb_array_parallel_read_sparse_1.cc +++ b/examples/src/tiledb_array_parallel_read_sparse_1.cc @@ -61,12 +61,12 @@ int main() { // Prepare cell buffers // --- First read --- const int64_t subarray_1[] = { 1, 2, 1, 4 }; - int buffer_a1_1[4]; + int buffer_a1_1[10]; void* buffers_1[] = { buffer_a1_1 }; size_t buffer_sizes_1[] = { sizeof(buffer_a1_1) }; // --- Upper right tile --- const int64_t subarray_2[] = { 3, 4, 1, 4 }; - int buffer_a1_2[4]; + int buffer_a1_2[10]; void* buffers_2[] = { buffer_a1_2 }; size_t buffer_sizes_2[] = { sizeof(buffer_a1_2) }; @@ -134,8 +134,8 @@ void *parallel_read(void* args) { data->count = 0; int* a1 = (int*) data->buffers[0]; int num = data->buffer_sizes[0] / sizeof(int); - for(int i=0; i 5) + for(int i=0; i 5) ++data->count; // Finalize array diff --git a/examples/src/tiledb_array_parallel_read_sparse_2.cc b/examples/src/tiledb_array_parallel_read_sparse_2.cc index 4b209e50..3cd1019e 100644 --- a/examples/src/tiledb_array_parallel_read_sparse_2.cc +++ b/examples/src/tiledb_array_parallel_read_sparse_2.cc @@ -31,10 +31,10 @@ */ #include "c_api.h" -#include #include - +#ifdef OPENMP +#include // The function to be computed in parallel @@ -144,3 +144,12 @@ void parallel_read( tiledb_array_finalize(tiledb_array); } +#else + +int main() { + printf("OpenMP not supported."); + + return 0; +} + +#endif diff --git a/examples/src/tiledb_array_parallel_write_dense_2.cc b/examples/src/tiledb_array_parallel_write_dense_2.cc index 9a86d3ad..e9265648 100644 --- a/examples/src/tiledb_array_parallel_write_dense_2.cc +++ b/examples/src/tiledb_array_parallel_write_dense_2.cc @@ -31,6 +31,8 @@ */ #include "c_api.h" + +#ifdef OPENMP #include @@ -178,3 +180,14 @@ void parallel_write( tiledb_array_finalize(tiledb_array); } +#else + +#include + +int main() { + printf("OpenMP not supported."); + + return 0; +} + +#endif diff --git a/examples/src/tiledb_array_parallel_write_sparse_1.cc b/examples/src/tiledb_array_parallel_write_sparse_1.cc index c67ab894..1230d04b 100644 --- a/examples/src/tiledb_array_parallel_write_sparse_1.cc +++ b/examples/src/tiledb_array_parallel_write_sparse_1.cc @@ -59,7 +59,7 @@ int main() { size_t buffer_a2_1[] = { 0, 4, 6 }; const char buffer_var_a2_1[] = "hhhhffa"; float buffer_a3_1[] = { 7.1, 7.2, 5.1, 5.2, 0.1, 0.2 }; - int64_t buffer_coords_1[] = { 3, 1, 3, 4, 1, 1 }; + int64_t buffer_coords_1[] = { 3, 4, 4, 2, 1, 1 }; const void* buffers_1[] = { buffer_a1_1, @@ -82,7 +82,7 @@ int main() { const char buffer_var_a2_2[] = "gggeddddbbccc"; float buffer_a3_2[] = { 6.1, 6.2, 4.1, 4.2, 3.1, 3.2, 1.1, 1.2, 2.1, 2.2 }; - int64_t buffer_coords_2[] = { 4, 2, 3, 3, 2, 3, 1, 2, 1, 4 }; + int64_t buffer_coords_2[] = { 3, 3, 3, 1, 2, 3, 1, 2, 1, 4 }; const void* buffers_2[] = { buffer_a1_2, diff --git a/examples/src/tiledb_array_parallel_write_sparse_2.cc b/examples/src/tiledb_array_parallel_write_sparse_2.cc index c4af8955..8ef5fdb0 100644 --- a/examples/src/tiledb_array_parallel_write_sparse_2.cc +++ b/examples/src/tiledb_array_parallel_write_sparse_2.cc @@ -31,8 +31,11 @@ */ #include "c_api.h" + +#ifdef OPENMP #include + // The function to be computed in parallel void parallel_write( const TileDB_CTX* tiledb_ctx, @@ -55,7 +58,7 @@ int main() { size_t buffer_a2_1[] = { 0, 4, 6 }; const char buffer_var_a2_1[] = "hhhhffa"; float buffer_a3_1[] = { 7.1, 7.2, 5.1, 5.2, 0.1, 0.2 }; - int64_t buffer_coords_1[] = { 3, 1, 3, 4, 1, 1 }; + int64_t buffer_coords_1[] = { 3, 4, 4, 2, 1, 1 }; const void* buffers_1[] = { buffer_a1_1, @@ -78,7 +81,7 @@ int main() { const char buffer_var_a2_2[] = "gggeddddbbccc"; float buffer_a3_2[] = { 6.1, 6.2, 4.1, 4.2, 3.1, 3.2, 1.1, 1.2, 2.1, 2.2 }; - int64_t buffer_coords_2[] = { 4, 2, 3, 3, 2, 3, 1, 2, 1, 4 }; + int64_t buffer_coords_2[] = { 3, 3, 3, 1, 2, 3, 1, 2, 1, 4 }; const void* buffers_2[] = { buffer_a1_2, @@ -147,3 +150,15 @@ void parallel_write( // Finalize array tiledb_array_finalize(tiledb_array); } + +#else + +#include + +int main() { + printf("OpenMP not supported."); + + return 0; +} + +#endif diff --git a/examples/src/tiledb_array_update_sparse_1.cc b/examples/src/tiledb_array_update_sparse_1.cc index d1a2f076..b21bb3b9 100644 --- a/examples/src/tiledb_array_update_sparse_1.cc +++ b/examples/src/tiledb_array_update_sparse_1.cc @@ -50,12 +50,12 @@ int main() { 0); // Number of attributes // Prepare cell buffers - int buffer_a1[] = { 109, 104, 108, 105 }; - size_t buffer_a2[] = { 0, 1, 2, 6 }; - const char buffer_var_a2[] = "uwvvvvyyy"; + int buffer_a1[] = { 107, 104, 106, 105 }; + size_t buffer_a2[] = { 0, 3, 4, 5 }; + const char buffer_var_a2[] = "yyyuwvvvv"; float buffer_a3[] = - { 109.1, 109.2, 104.1, 104.2, 108.1, 108.2, 105.1, 105.2 }; - int64_t buffer_coords[] = { 3, 2, 3, 3, 4, 1, 3, 4 }; + { 107.1, 107.2, 104.1, 104.2, 106.1, 106.2, 105.1, 105.2 }; + int64_t buffer_coords[] = { 3, 4, 3, 2, 3, 3, 4, 1 }; const void* buffers[] = { buffer_a1, buffer_a2, buffer_var_a2, buffer_a3, buffer_coords }; size_t buffer_sizes[] = diff --git a/examples/src/tiledb_array_update_sparse_2.cc b/examples/src/tiledb_array_update_sparse_2.cc index d5a1ad7e..5d40cd8d 100644 --- a/examples/src/tiledb_array_update_sparse_2.cc +++ b/examples/src/tiledb_array_update_sparse_2.cc @@ -50,16 +50,16 @@ int main() { 0); // Number of attributes // Prepare cell buffers - int buffer_a1[] = { 109, TILEDB_EMPTY_INT32, 108, 105 }; - size_t buffer_a2[] = { 0, 1, 2, 6 }; + int buffer_a1[] = { 107, TILEDB_EMPTY_INT32, 106, 105 }; + size_t buffer_a2[] = { 0, 3, 4, 5 }; const char buffer_var_a2[] = - { 'u', TILEDB_EMPTY_CHAR, 'v', 'v', 'v', 'v', 'y', 'y', 'y' }; + { 'y', 'y', 'y', TILEDB_EMPTY_CHAR, 'w', 'v', 'v', 'v', 'v' }; float buffer_a3[] = { - 109.1, 109.2, TILEDB_EMPTY_FLOAT32, TILEDB_EMPTY_FLOAT32, - 108.1, 108.2, 105.1, 105.2 + 107.1, 107.2, TILEDB_EMPTY_FLOAT32, TILEDB_EMPTY_FLOAT32, + 106.1, 106.2, 105.1, 105.2 }; - int64_t buffer_coords[] = { 3, 2, 3, 3, 4, 1, 3, 4 }; + int64_t buffer_coords[] = { 3, 4, 3, 2, 3, 3, 4, 1 }; const void* buffers[] = { buffer_a1, buffer_a2, buffer_var_a2, buffer_a3, buffer_coords }; size_t buffer_sizes[] = diff --git a/examples/src/tiledb_array_write_sparse_1.cc b/examples/src/tiledb_array_write_sparse_1.cc index b4d6306c..36159128 100644 --- a/examples/src/tiledb_array_write_sparse_1.cc +++ b/examples/src/tiledb_array_write_sparse_1.cc @@ -57,7 +57,7 @@ int main() { 0.1, 0.2, 1.1, 1.2, 2.1, 2.2, 3.1, 3.2, 4.1, 4.2, 5.1, 5.2, 6.1, 6.2, 7.1, 7.2 }; - int64_t buffer_coords[] = { 1, 1, 1, 2, 1, 4, 2, 3, 3, 3, 3, 4, 4, 2, 3, 1 }; + int64_t buffer_coords[] = { 1, 1, 1, 2, 1, 4, 2, 3, 3, 1, 4, 2, 3, 3, 3, 4 }; const void* buffers[] = { buffer_a1, buffer_a2, buffer_var_a2, buffer_a3, buffer_coords }; size_t buffer_sizes[] = diff --git a/examples/src/tiledb_array_write_sparse_2.cc b/examples/src/tiledb_array_write_sparse_2.cc index f743188c..db7c81dd 100644 --- a/examples/src/tiledb_array_write_sparse_2.cc +++ b/examples/src/tiledb_array_write_sparse_2.cc @@ -77,7 +77,7 @@ int main() { size_t* buffer_a2_2 = NULL; const char* buffer_var_a2_2 = NULL; float* buffer_a3_2 = NULL; - int64_t buffer_coords_2[] = { 1, 4, 2, 3, 3, 3, 3, 4, 4, 2, 3, 1 }; + int64_t buffer_coords_2[] = { 1, 4, 2, 3, 3, 1, 4, 2, 3, 3, 3, 4 }; const void* buffers_2[] = { buffer_a1_2, diff --git a/examples/src/tiledb_array_write_sparse_3.cc b/examples/src/tiledb_array_write_sparse_3.cc index 5c44bb58..95f792f4 100644 --- a/examples/src/tiledb_array_write_sparse_3.cc +++ b/examples/src/tiledb_array_write_sparse_3.cc @@ -57,7 +57,7 @@ int main() { 7.1, 7.2, 5.1, 5.2, 0.1, 0.2, 6.1, 6.2, 4.1, 4.2, 3.1, 3.2, 1.1, 1.2, 2.1, 2.2 }; - int64_t buffer_coords[] = { 3, 1, 3, 4, 1, 1, 4, 2, 3, 3, 2, 3, 1, 2, 1, 4 }; + int64_t buffer_coords[] = { 3, 4, 4, 2, 1, 1, 3, 3, 3, 1, 2, 3, 1, 2, 1, 4 }; const void* buffers[] = { buffer_a1, buffer_a2, buffer_var_a2, buffer_a3, buffer_coords }; size_t buffer_sizes[] = diff --git a/examples/src/tiledb_array_write_sparse_4.cc b/examples/src/tiledb_array_write_sparse_4.cc index 4719160e..6fdb5d70 100644 --- a/examples/src/tiledb_array_write_sparse_4.cc +++ b/examples/src/tiledb_array_write_sparse_4.cc @@ -53,7 +53,7 @@ int main() { size_t buffer_a2[] = { 0, 4, 6 }; const char buffer_var_a2[] = "hhhhffa"; float buffer_a3[] = { 7.1, 7.2, 5.1, 5.2, 0.1, 0.2 }; - int64_t buffer_coords[] = { 3, 1, 3, 4, 1, 1 }; + int64_t buffer_coords[] = { 3, 4, 4, 2, 1, 1 }; const void* buffers[] = { buffer_a1, buffer_a2, buffer_var_a2, buffer_a3, buffer_coords }; size_t buffer_sizes[] = @@ -74,7 +74,7 @@ int main() { const char buffer_var_a2_2[] = "gggeddddbbccc"; float buffer_a3_2[] = { 6.1, 6.2, 4.1, 4.2, 3.1, 3.2, 1.1, 1.2, 2.1, 2.2 }; - int64_t buffer_coords_2[] = { 4, 2, 3, 3, 2, 3, 1, 2, 1, 4 }; + int64_t buffer_coords_2[] = { 3, 3, 3, 1, 2, 3, 1, 2, 1, 4 }; const void* buffers_2[] = { buffer_a1_2, diff --git a/examples/src/tiledb_config.cc b/examples/src/tiledb_config.cc new file mode 100644 index 00000000..5e442ea7 --- /dev/null +++ b/examples/src/tiledb_config.cc @@ -0,0 +1,58 @@ +/** + * @file tiledb_config.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2016 MIT and Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * It shows how to set the TileDB configuration parameters. + */ + +#include "c_api.h" +#include + +int main() { + /* Create a TileDB configuration. */ + TileDB_Config tiledb_config; + /* + * IMPORTANT: You need to zero out the members of the config structure if you + * are setting only a subset of them, so that the rest can take default + * values. + */ + memset(&tiledb_config, 0, sizeof(struct TileDB_Config)); + tiledb_config.home_ = "."; // TileDB home will be the current directory + tiledb_config.read_method_ = TILEDB_IO_READ; // OS read instead of mmap + + // Initialize context with the default configuration parameters + TileDB_CTX* tiledb_ctx; + tiledb_ctx_init(&tiledb_ctx, &tiledb_config); + + /* --- Your code here --- */ + + /* Finalize context. */ + tiledb_ctx_finalize(tiledb_ctx); + + return 0; +} diff --git a/test/src/c_api/c_api_spec.cc b/test/src/c_api/c_api_spec.cc index 741654dd..efc6450a 100644 --- a/test/src/c_api/c_api_spec.cc +++ b/test/src/c_api/c_api_spec.cc @@ -377,7 +377,6 @@ bool check_buffer( return fail; } TEST_F(TileDBAPITest, DenseArrayRandomUpdates) { - int64_t dim0 = 100; int64_t dim1 = 100; int64_t chunkDim0 = 10;