From ea239473e0881d0be704c131223fd026a89d3fbc Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:47:15 -0500 Subject: [PATCH 01/29] search timestamp works for unstructured logs --- src/clp_ffi_js/ir/StreamReader.cpp | 270 +++++++++--------- src/clp_ffi_js/ir/StreamReader.hpp | 3 + .../ir/StructuredIrStreamReader.cpp | 6 + .../ir/StructuredIrStreamReader.hpp | 3 + .../ir/UnstructuredIrStreamReader.cpp | 24 ++ .../ir/UnstructuredIrStreamReader.hpp | 3 + 6 files changed, 176 insertions(+), 133 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index d12c8761..f08c3d8a 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -26,182 +26,186 @@ #include namespace { -using ClpFfiJsException = clp_ffi_js::ClpFfiJsException; -using IRErrorCode = clp::ffi::ir_stream::IRErrorCode; - -// Function declarations -/** - * Rewinds the reader to the beginning then validates the CLP IR data encoding type. - * @param reader - * @throws ClpFfiJsException if the encoding type couldn't be decoded or the encoding type is - * unsupported. - */ -auto rewind_reader_and_validate_encoding_type(clp::ReaderInterface& reader) -> void; - -/** - * Gets the version of the IR stream. - * @param reader - * @throws ClpFfiJsException if the preamble couldn't be deserialized. - * @return The IR stream's version. - */ -auto get_version(clp::ReaderInterface& reader) -> std::string; - -auto rewind_reader_and_validate_encoding_type(clp::ReaderInterface& reader) -> void { - reader.seek_from_begin(0); - - bool is_four_bytes_encoding{true}; - if (auto const err{clp::ffi::ir_stream::get_encoding_type(reader, is_four_bytes_encoding)}; - IRErrorCode::IRErrorCode_Success != err) - { - throw ClpFfiJsException{ + using ClpFfiJsException = clp_ffi_js::ClpFfiJsException; + using IRErrorCode = clp::ffi::ir_stream::IRErrorCode; + + // Function declarations + /** + * Rewinds the reader to the beginning then validates the CLP IR data encoding type. + * @param reader + * @throws ClpFfiJsException if the encoding type couldn't be decoded or the encoding type is + * unsupported. + */ + auto rewind_reader_and_validate_encoding_type(clp::ReaderInterface &reader) -> void; + + /** + * Gets the version of the IR stream. + * @param reader + * @throws ClpFfiJsException if the preamble couldn't be deserialized. + * @return The IR stream's version. + */ + auto get_version(clp::ReaderInterface &reader) -> std::string; + + auto rewind_reader_and_validate_encoding_type(clp::ReaderInterface &reader) -> void { + reader.seek_from_begin(0); + + bool is_four_bytes_encoding{true}; + if (auto const err{clp::ffi::ir_stream::get_encoding_type(reader, is_four_bytes_encoding)}; + IRErrorCode::IRErrorCode_Success != err) { + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_MetadataCorrupted, __FILENAME__, __LINE__, std::format( - "Failed to decode encoding type: IR error code {}", - clp::enum_to_underlying_type(err) + "Failed to decode encoding type: IR error code {}", + clp::enum_to_underlying_type(err) ) - }; - } - if (false == is_four_bytes_encoding) { - throw ClpFfiJsException{ + }; + } + if (false == is_four_bytes_encoding) { + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Unsupported, __FILENAME__, __LINE__, "IR stream uses unsupported encoding." - }; + }; + } } -} - -auto get_version(clp::ReaderInterface& reader) -> std::string { - // Deserialize metadata bytes from preamble. - clp::ffi::ir_stream::encoded_tag_t metadata_type{}; - std::vector metadata_bytes; - auto const err{clp::ffi::ir_stream::deserialize_preamble(reader, metadata_type, metadata_bytes) - }; - if (IRErrorCode::IRErrorCode_Success != err) { - throw ClpFfiJsException{ + + auto get_version(clp::ReaderInterface &reader) -> std::string { + // Deserialize metadata bytes from preamble. + clp::ffi::ir_stream::encoded_tag_t metadata_type{}; + std::vector metadata_bytes; + auto const err{ + clp::ffi::ir_stream::deserialize_preamble(reader, metadata_type, metadata_bytes) + }; + if (IRErrorCode::IRErrorCode_Success != err) { + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Failure, __FILENAME__, __LINE__, std::format( - "Failed to deserialize preamble: IR error code {}", - clp::enum_to_underlying_type(err) + "Failed to deserialize preamble: IR error code {}", + clp::enum_to_underlying_type(err) ) - }; - } + }; + } - std::string version; - try { - // Deserialize metadata bytes as JSON. - std::string_view const metadata_view{ + std::string version; + try { + // Deserialize metadata bytes as JSON. + std::string_view const metadata_view{ clp::size_checked_pointer_cast(metadata_bytes.data()), metadata_bytes.size() - }; - nlohmann::json const metadata = nlohmann::json::parse(metadata_view); - version = metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey); - } catch (nlohmann::json::exception const& e) { - throw ClpFfiJsException{ + }; + nlohmann::json const metadata = nlohmann::json::parse(metadata_view); + version = metadata.at(clp::ffi::ir_stream::cProtocol::Metadata::VersionKey); + } catch (nlohmann::json::exception const &e) { + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_MetadataCorrupted, __FILENAME__, __LINE__, std::format("Failed to parse stream's metadata: {}", e.what()) - }; + }; + } + + SPDLOG_INFO("IR version is {}", version); + return version; } - SPDLOG_INFO("IR version is {}", version); - return version; -} - -EMSCRIPTEN_BINDINGS(ClpStreamReader) { - // JS types used as inputs - emscripten::register_type("Uint8Array"); - emscripten::register_type("number[] | null"); - emscripten::register_type("{timestampKey: string} | null"); - - // JS types used as outputs - emscripten::enum_("IrStreamType") - .value("STRUCTURED", clp_ffi_js::ir::StreamType::Structured) - .value("UNSTRUCTURED", clp_ffi_js::ir::StreamType::Unstructured); - emscripten::register_type( + EMSCRIPTEN_BINDINGS(ClpStreamReader) { + // JS types used as inputs + emscripten::register_type("Uint8Array"); + emscripten::register_type("number[] | null"); + emscripten::register_type("{timestampKey: string} | null"); + + // JS types used as outputs + emscripten::enum_("IrStreamType") + .value("STRUCTURED", clp_ffi_js::ir::StreamType::Structured) + .value("UNSTRUCTURED", clp_ffi_js::ir::StreamType::Unstructured); + emscripten::register_type( "Array<[string, bigint, number, number]>" - ); - emscripten::register_type("number[] | null"); - emscripten::class_("ClpStreamReader") - .constructor( + ); + emscripten::register_type("number[] | null"); + emscripten::class_("ClpStreamReader") + .constructor( &clp_ffi_js::ir::StreamReader::create, emscripten::return_value_policy::take_ownership() - ) - .function("getIrStreamType", &clp_ffi_js::ir::StreamReader::get_ir_stream_type) - .function( + ) + .function("getIrStreamType", &clp_ffi_js::ir::StreamReader::get_ir_stream_type) + .function( "getNumEventsBuffered", &clp_ffi_js::ir::StreamReader::get_num_events_buffered - ) - .function( + ) + .function( "getFilteredLogEventMap", &clp_ffi_js::ir::StreamReader::get_filtered_log_event_map - ) - .function("filterLogEvents", &clp_ffi_js::ir::StreamReader::filter_log_events) - .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) - .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range); -} -} // namespace + ) + .function("filterLogEvents", &clp_ffi_js::ir::StreamReader::filter_log_events) + .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) + .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) + .function("getLogEventIndexByTimestamp", + &clp_ffi_js::ir::StreamReader::find_timestamp_last_occurrence); + } +} // namespace namespace clp_ffi_js::ir { -auto StreamReader::create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) + auto StreamReader::create(DataArrayTsType const &data_array, + ReaderOptions const &reader_options) -> std::unique_ptr { - auto const length{data_array["length"].as()}; - SPDLOG_INFO("StreamReader::create: got buffer of length={}", length); - - // Copy array from JavaScript to C++. - clp::Array data_buffer{length}; - // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) - emscripten::val::module_property("HEAPU8") - .call("set", data_array, reinterpret_cast(data_buffer.data())); - // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) - - auto zstd_decompressor{std::make_unique()}; - zstd_decompressor->open(data_buffer.data(), length); - - rewind_reader_and_validate_encoding_type(*zstd_decompressor); - - // Validate the stream's version and decide which type of IR stream reader to create. - auto pos = zstd_decompressor->get_pos(); - auto const version{get_version(*zstd_decompressor)}; - try { - auto const version_validation_result{clp::ffi::ir_stream::validate_protocol_version(version) - }; - if (clp::ffi::ir_stream::IRProtocolErrorCode::Supported == version_validation_result) { - zstd_decompressor->seek_from_begin(0); - return std::make_unique(StructuredIrStreamReader::create( + auto const length{data_array["length"].as()}; + SPDLOG_INFO("StreamReader::create: got buffer of length={}", length); + + // Copy array from JavaScript to C++. + clp::Array data_buffer{length}; + // NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast) + emscripten::val::module_property("HEAPU8") + .call("set", data_array, reinterpret_cast(data_buffer.data())); + // NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast) + + auto zstd_decompressor{std::make_unique()}; + zstd_decompressor->open(data_buffer.data(), length); + + rewind_reader_and_validate_encoding_type(*zstd_decompressor); + + // Validate the stream's version and decide which type of IR stream reader to create. + auto pos = zstd_decompressor->get_pos(); + auto const version{get_version(*zstd_decompressor)}; + try { + auto const version_validation_result{ + clp::ffi::ir_stream::validate_protocol_version(version) + }; + if (clp::ffi::ir_stream::IRProtocolErrorCode::Supported == version_validation_result) { + zstd_decompressor->seek_from_begin(0); + return std::make_unique(StructuredIrStreamReader::create( std::move(zstd_decompressor), std::move(data_buffer), reader_options - )); - } - if (clp::ffi::ir_stream::IRProtocolErrorCode::BackwardCompatible - == version_validation_result) - { - zstd_decompressor->seek_from_begin(pos); - return std::make_unique(UnstructuredIrStreamReader::create( - std::move(zstd_decompressor), - std::move(data_buffer) - )); - } - } catch (ZstdDecompressor::OperationFailed const& e) { - throw ClpFfiJsException{ + )); + } + if (clp::ffi::ir_stream::IRProtocolErrorCode::BackwardCompatible + == version_validation_result) { + zstd_decompressor->seek_from_begin(pos); + return std::make_unique( + UnstructuredIrStreamReader::create( + std::move(zstd_decompressor), + std::move(data_buffer) + )); + } + } catch (ZstdDecompressor::OperationFailed const &e) { + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Failure, __FILENAME__, __LINE__, std::format("Unable to rewind zstd decompressor: {}", e.what()) - }; - } + }; + } - throw ClpFfiJsException{ + throw ClpFfiJsException{ clp::ErrorCode::ErrorCode_Unsupported, __FILENAME__, __LINE__, std::format("Unable to create reader for IR stream with version {}.", version) - }; -} -} // namespace clp_ffi_js::ir + }; + } +} // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 06e7c094..ec90520c 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -7,6 +7,7 @@ #include #include +#include namespace clp_ffi_js::ir { // JS types used as inputs @@ -100,6 +101,8 @@ class StreamReader { [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; + [[nodiscard]] virtual auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp) + -> std::ptrdiff_t = 0; protected: explicit StreamReader() = default; }; diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 799da91c..8c69ac55 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -182,6 +182,12 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo return DecodedResultsTsType(results); } +auto StructuredIrStreamReader::find_timestamp_last_occurrence( + clp::ir::epoch_time_ms_t input_timestamp +) -> std::ptrdiff_t { + return 0; +} + StructuredIrStreamReader::StructuredIrStreamReader( StreamReaderDataContext&& stream_reader_data_context, std::shared_ptr> deserialized_log_events diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index e93dee03..b443ddbb 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -171,6 +171,9 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; + [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp) + -> std::ptrdiff_t override; + private: // Constructor explicit StructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 37363fd0..661858b4 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -212,6 +212,30 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, return DecodedResultsTsType(results); } +auto UnstructuredIrStreamReader::find_timestamp_last_occurrence( + clp::ir::epoch_time_ms_t input_timestamp +) -> std::ptrdiff_t { + // Use std::lower_bound with a custom comparator + auto it = std::lower_bound( + m_encoded_log_events.begin(), + m_encoded_log_events.end(), + input_timestamp, + [](const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { + return event.get_timestamp() <= timestamp; + } + ); + + // Adjust the iterator to find the last valid index + if (it == m_encoded_log_events.end() || it->get_timestamp() > input_timestamp) { + if (it == m_encoded_log_events.begin()) { + return -1; // No element satisfies the condition + } + --it; + } + + return std::distance(m_encoded_log_events.begin(), it); +} + UnstructuredIrStreamReader::UnstructuredIrStreamReader( StreamReaderDataContext&& stream_reader_data_context ) diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 20137c39..e14267a8 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -78,6 +78,9 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; + [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp) + -> std::ptrdiff_t override; + private: // Constructor explicit UnstructuredIrStreamReader( From 3ee05a261cc5e4fac0cffd04adb722731d2445f9 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Thu, 19 Dec 2024 14:15:22 -0500 Subject: [PATCH 02/29] fix lint --- src/clp_ffi_js/ir/StreamReader.cpp | 6 ++++-- src/clp_ffi_js/ir/StreamReader.hpp | 8 +++++--- src/clp_ffi_js/ir/StructuredIrStreamReader.hpp | 4 ++-- src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp | 5 ++--- src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp | 4 ++-- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 86447326..bd065ef8 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -146,8 +146,10 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { .function("filterLogEvents", &clp_ffi_js::ir::StreamReader::filter_log_events) .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) - .function("getLogEventIndexByTimestamp", - &clp_ffi_js::ir::StreamReader::find_timestamp_last_occurrence); + .function( + "getLogEventIndexByTimestamp", + &clp_ffi_js::ir::StreamReader::find_timestamp_last_occurrence + ); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index b8c35ad1..9ce68d32 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -15,7 +16,6 @@ #include #include #include -#include #include #include @@ -125,8 +125,10 @@ class StreamReader { [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; - [[nodiscard]] virtual auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp) - -> std::ptrdiff_t = 0; + [[nodiscard]] virtual auto find_timestamp_last_occurrence( + clp::ir::epoch_time_ms_t input_timestamp + ) -> std::ptrdiff_t = 0; + protected: explicit StreamReader() = default; diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index 748cc0b1..1af21d96 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -74,8 +74,8 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp) - -> std::ptrdiff_t override; + [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp + ) -> std::ptrdiff_t override; private: // Constructor diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 478edcec..5e0994f4 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -166,9 +166,8 @@ auto UnstructuredIrStreamReader::find_timestamp_last_occurrence( m_encoded_log_events.begin(), m_encoded_log_events.end(), input_timestamp, - [](const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { - return event.get_timestamp() <= timestamp; - } + [](LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp) { return event.get_timestamp() <= timestamp; } ); // Adjust the iterator to find the last valid index diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 81ee930e..3fffdf6c 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,8 +71,8 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp) - -> std::ptrdiff_t override; + [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp + ) -> std::ptrdiff_t override; private: // Constructor From f1a71a1304e169313c994f46f089b114b76099d4 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Sun, 22 Dec 2024 00:36:19 -0500 Subject: [PATCH 03/29] implement structured logs search by timestamp --- .../ir/StructuredIrStreamReader.cpp | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 01c75bd3..36832845 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -150,7 +150,25 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo auto StructuredIrStreamReader::find_timestamp_last_occurrence( clp::ir::epoch_time_ms_t input_timestamp ) -> std::ptrdiff_t { - return 0; + // Use std::lower_bound with a custom comparator + auto it = std::lower_bound( + m_deserialized_log_events->begin(), + m_deserialized_log_events->end(), + input_timestamp, + [](const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { + return event.get_timestamp() <= timestamp; + } + ); + + // Adjust the iterator to find the last valid index + if (it == m_deserialized_log_events->end() || it->get_timestamp() > input_timestamp) { + if (it == m_deserialized_log_events->begin()) { + return -1; // No element satisfies the condition + } + --it; + } + + return std::distance(m_deserialized_log_events->begin(), it); } StructuredIrStreamReader::StructuredIrStreamReader( From c423ec5bc58298fea9bba6841e04f78ff7b5902f Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:54:08 -0500 Subject: [PATCH 04/29] address partial changes from review --- src/clp_ffi_js/ir/StreamReader.cpp | 3 ++- src/clp_ffi_js/ir/StreamReader.hpp | 16 +++++++++++----- src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 15 +++++++-------- src/clp_ffi_js/ir/StructuredIrStreamReader.hpp | 5 +++-- src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp | 8 ++++---- src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp | 4 ++-- 6 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index bd065ef8..ea015a35 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -129,6 +129,7 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { "Array<[string, bigint, number, number]>" ); emscripten::register_type("number[] | null"); + emscripten::register_type("number | null"); emscripten::class_("ClpStreamReader") .constructor( &clp_ffi_js::ir::StreamReader::create, @@ -148,7 +149,7 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) .function( "getLogEventIndexByTimestamp", - &clp_ffi_js::ir::StreamReader::find_timestamp_last_occurrence + &clp_ffi_js::ir::StreamReader::get_log_event_index_by_timestamp ); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 9ce68d32..ad9a9c7f 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -30,6 +30,7 @@ EMSCRIPTEN_DECLARE_VAL_TYPE(ReaderOptions); // JS types used as outputs EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); +EMSCRIPTEN_DECLARE_VAL_TYPE(LogEventIdxTsType); enum class StreamType : uint8_t { Structured, @@ -124,10 +125,15 @@ class StreamReader { */ [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; - - [[nodiscard]] virtual auto find_timestamp_last_occurrence( - clp::ir::epoch_time_ms_t input_timestamp - ) -> std::ptrdiff_t = 0; + /** + * Retrieves the last index of the log event that matches the given timestamp. + * + * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. + * @return The index of the log event with the specified timestamp, or null value if not found. + */ + [[nodiscard]] virtual auto get_log_event_index_by_timestamp( + clp::ir::epoch_time_ms_t timestamp + ) -> LogEventIdxTsType = 0; protected: explicit StreamReader() = default; diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 36832845..04b4c0f2 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -147,28 +147,27 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo ); } -auto StructuredIrStreamReader::find_timestamp_last_occurrence( - clp::ir::epoch_time_ms_t input_timestamp -) -> std::ptrdiff_t { - // Use std::lower_bound with a custom comparator +auto StructuredIrStreamReader::get_log_event_index_by_timestamp( + clp::ir::epoch_time_ms_t timestamp +) -> LogEventIdxTsType { auto it = std::lower_bound( m_deserialized_log_events->begin(), m_deserialized_log_events->end(), - input_timestamp, + timestamp, [](const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { return event.get_timestamp() <= timestamp; } ); // Adjust the iterator to find the last valid index - if (it == m_deserialized_log_events->end() || it->get_timestamp() > input_timestamp) { + if (it == m_deserialized_log_events->end() || it->get_timestamp() > timestamp) { if (it == m_deserialized_log_events->begin()) { - return -1; // No element satisfies the condition + return LogEventIdxTsType{emscripten::val::null()}; } --it; } - return std::distance(m_deserialized_log_events->begin(), it); + return LogEventIdxTsType{emscripten::val(std::distance(m_deserialized_log_events->begin(), it))}; } StructuredIrStreamReader::StructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index 1af21d96..28a68a0e 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -74,8 +75,8 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp - ) -> std::ptrdiff_t override; + [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp + ) -> LogEventIdxTsType override; private: // Constructor diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 5e0994f4..42574113 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -158,9 +158,9 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, ); } -auto UnstructuredIrStreamReader::find_timestamp_last_occurrence( +auto UnstructuredIrStreamReader::get_log_event_index_by_timestamp( clp::ir::epoch_time_ms_t input_timestamp -) -> std::ptrdiff_t { +) -> LogEventIdxTsType { // Use std::lower_bound with a custom comparator auto it = std::lower_bound( m_encoded_log_events.begin(), @@ -173,12 +173,12 @@ auto UnstructuredIrStreamReader::find_timestamp_last_occurrence( // Adjust the iterator to find the last valid index if (it == m_encoded_log_events.end() || it->get_timestamp() > input_timestamp) { if (it == m_encoded_log_events.begin()) { - return -1; // No element satisfies the condition + return LogEventIdxTsType{emscripten::val::null()}; } --it; } - return std::distance(m_encoded_log_events.begin(), it); + return LogEventIdxTsType{emscripten::val(std::distance(m_encoded_log_events.begin(), it))}; } UnstructuredIrStreamReader::UnstructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 3fffdf6c..d6298615 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,8 +71,8 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto find_timestamp_last_occurrence(clp::ir::epoch_time_ms_t input_timestamp - ) -> std::ptrdiff_t override; + [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t input_timestamp + ) -> LogEventIdxTsType override; private: // Constructor From 467e9984d3b55d19491adc9fb0f42efe33ad8ec3 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Sun, 29 Dec 2024 19:52:25 -0500 Subject: [PATCH 05/29] snapshot: get_timestamp seems to be undefined for std::upper_bound --- src/clp_ffi_js/ir/StreamReader.hpp | 52 ++++++++++++++++++- .../ir/StructuredIrStreamReader.cpp | 23 ++------ .../ir/UnstructuredIrStreamReader.cpp | 23 ++------ .../ir/UnstructuredIrStreamReader.hpp | 4 +- 4 files changed, 63 insertions(+), 39 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index ad9a9c7f..a0b67257 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -174,7 +174,6 @@ class StreamReader { * @tparam LogEvent * @param log_level_filter * @param log_events Derived class's log events. - * @param log_events * @param[out] filtered_log_event_map Returns the filtered log events. */ template @@ -183,6 +182,25 @@ class StreamReader { LogLevelFilterTsType const& log_level_filter, LogEvents const& log_events ) -> void; + + /** + * Retrieves the index of the last log event that matches the given timestamp. + * + * @tparam LogEvent + * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. + * @return The index of the last matched log event, or null value if not found. + */ + template + requires requires(const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { + { + event.get_timestamp() + } -> std::convertible_to; + } + static auto generic_get_log_event_index_by_timestamp( + Iterator begin, + Iterator end, + clp::ir::epoch_time_ms_t timestamp + ) -> LogEventIdxTsType; }; template @@ -269,6 +287,38 @@ auto StreamReader::generic_filter_log_events( } } } +template +requires requires(const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { + { + event.get_timestamp() + } -> std::convertible_to; +} +auto StreamReader::generic_get_log_event_index_by_timestamp( + Iterator begin, + Iterator end, + clp::ir::epoch_time_ms_t timestamp +) -> LogEventIdxTsType { + if (begin == end) { + return LogEventIdxTsType{emscripten::val::null()}; + } + auto it = std::upper_bound( + begin, + end, + timestamp, + [](const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t ts) { + return event.get_timestamp() > ts; + } + ); + + // it points to first element that is larger than timestamp, + // adjust the iterator to find the last valid index. + --it; + if (it->get_timestamp() < timestamp) { + return LogEventIdxTsType{emscripten::val::null()}; + } + + return LogEventIdxTsType{emscripten::val(std::distance(begin, it))}; +} } // namespace clp_ffi_js::ir #endif // CLP_FFI_JS_IR_STREAMREADER_HPP diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 04b4c0f2..8fcdaacd 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -148,26 +148,13 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo } auto StructuredIrStreamReader::get_log_event_index_by_timestamp( - clp::ir::epoch_time_ms_t timestamp + const clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType { - auto it = std::lower_bound( - m_deserialized_log_events->begin(), - m_deserialized_log_events->end(), - timestamp, - [](const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { - return event.get_timestamp() <= timestamp; - } + return generic_get_log_event_index_by_timestamp( + m_deserialized_log_events->begin(), + m_deserialized_log_events->end(), + timestamp ); - - // Adjust the iterator to find the last valid index - if (it == m_deserialized_log_events->end() || it->get_timestamp() > timestamp) { - if (it == m_deserialized_log_events->begin()) { - return LogEventIdxTsType{emscripten::val::null()}; - } - --it; - } - - return LogEventIdxTsType{emscripten::val(std::distance(m_deserialized_log_events->begin(), it))}; } StructuredIrStreamReader::StructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 42574113..40ebb286 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -159,26 +159,13 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, } auto UnstructuredIrStreamReader::get_log_event_index_by_timestamp( - clp::ir::epoch_time_ms_t input_timestamp + const clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType { - // Use std::lower_bound with a custom comparator - auto it = std::lower_bound( - m_encoded_log_events.begin(), - m_encoded_log_events.end(), - input_timestamp, - [](LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp) { return event.get_timestamp() <= timestamp; } + return generic_get_log_event_index_by_timestamp( + m_encoded_log_events.begin(), + m_encoded_log_events.end(), + timestamp ); - - // Adjust the iterator to find the last valid index - if (it == m_encoded_log_events.end() || it->get_timestamp() > input_timestamp) { - if (it == m_encoded_log_events.begin()) { - return LogEventIdxTsType{emscripten::val::null()}; - } - --it; - } - - return LogEventIdxTsType{emscripten::val(std::distance(m_encoded_log_events.begin(), it))}; } UnstructuredIrStreamReader::UnstructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index d6298615..8712baf7 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,8 +71,8 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t input_timestamp - ) -> LogEventIdxTsType override; + [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp) + -> LogEventIdxTsType override; private: // Constructor From c66cb80fdceb7043b63aed4dfdd20adfa20f3636 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Sun, 29 Dec 2024 20:07:50 -0500 Subject: [PATCH 06/29] fix lint --- src/clp_ffi_js/ir/StreamReader.hpp | 18 ++++++++++++------ src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 8 ++++---- .../ir/UnstructuredIrStreamReader.cpp | 8 ++++---- .../ir/UnstructuredIrStreamReader.hpp | 4 ++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index a0b67257..7f972996 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -5,13 +5,13 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include #include @@ -131,8 +131,7 @@ class StreamReader { * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. * @return The index of the log event with the specified timestamp, or null value if not found. */ - [[nodiscard]] virtual auto get_log_event_index_by_timestamp( - clp::ir::epoch_time_ms_t timestamp + [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType = 0; protected: @@ -191,7 +190,10 @@ class StreamReader { * @return The index of the last matched log event, or null value if not found. */ template - requires requires(const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { + requires requires( + LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp + ) { { event.get_timestamp() } -> std::convertible_to; @@ -287,8 +289,12 @@ auto StreamReader::generic_filter_log_events( } } } + template -requires requires(const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t timestamp) { +requires requires( + LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp + ) { { event.get_timestamp() } -> std::convertible_to; @@ -305,7 +311,7 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( begin, end, timestamp, - [](const LogEventWithFilterData& event, clp::ir::epoch_time_ms_t ts) { + [](LogEventWithFilterData const& event, clp::ir::epoch_time_ms_t ts) { return event.get_timestamp() > ts; } ); diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 8fcdaacd..909e73d9 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -148,12 +148,12 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo } auto StructuredIrStreamReader::get_log_event_index_by_timestamp( - const clp::ir::epoch_time_ms_t timestamp + clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { return generic_get_log_event_index_by_timestamp( - m_deserialized_log_events->begin(), - m_deserialized_log_events->end(), - timestamp + m_deserialized_log_events->begin(), + m_deserialized_log_events->end(), + timestamp ); } diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 40ebb286..27d02e98 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -159,12 +159,12 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, } auto UnstructuredIrStreamReader::get_log_event_index_by_timestamp( - const clp::ir::epoch_time_ms_t timestamp + clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { return generic_get_log_event_index_by_timestamp( - m_encoded_log_events.begin(), - m_encoded_log_events.end(), - timestamp + m_encoded_log_events.begin(), + m_encoded_log_events.end(), + timestamp ); } diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 8712baf7..4b6ae92d 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,8 +71,8 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp) - -> LogEventIdxTsType override; + [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp + ) -> LogEventIdxTsType override; private: // Constructor From 89338f7fc409c36b4b2e6beb3c20c7c1cb9ac37e Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Wed, 1 Jan 2025 22:11:53 -0500 Subject: [PATCH 07/29] pass in log_events instead of iterators to generic_get_log_event_index_by_timestamp, use std::ranges::upper_bound instead of std::upper_bound --- src/clp_ffi_js/ir/StreamReader.hpp | 30 +++++++++---------- .../ir/StructuredIrStreamReader.cpp | 3 +- .../ir/UnstructuredIrStreamReader.cpp | 3 +- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 7f972996..d9b94350 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -189,7 +189,7 @@ class StreamReader { * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. * @return The index of the last matched log event, or null value if not found. */ - template + template requires requires( LogEventWithFilterData const& event, clp::ir::epoch_time_ms_t timestamp @@ -199,8 +199,7 @@ class StreamReader { } -> std::convertible_to; } static auto generic_get_log_event_index_by_timestamp( - Iterator begin, - Iterator end, + LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType; }; @@ -290,7 +289,7 @@ auto StreamReader::generic_filter_log_events( } } -template +template requires requires( LogEventWithFilterData const& event, clp::ir::epoch_time_ms_t timestamp @@ -300,22 +299,23 @@ requires requires( } -> std::convertible_to; } auto StreamReader::generic_get_log_event_index_by_timestamp( - Iterator begin, - Iterator end, + LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType { - if (begin == end) { + if (log_events.empty()) { return LogEventIdxTsType{emscripten::val::null()}; } - auto it = std::upper_bound( - begin, - end, - timestamp, - [](LogEventWithFilterData const& event, clp::ir::epoch_time_ms_t ts) { - return event.get_timestamp() > ts; - } + auto it = std::ranges::upper_bound( + log_events, + timestamp, + [](const LogEventWithFilterData& log_event, const clp::ir::epoch_time_ms_t& ts) { + return ts < log_event.get_timestamp(); + } ); + if (it == log_events.begin()) { + return LogEventIdxTsType{emscripten::val::null()}; + } // it points to first element that is larger than timestamp, // adjust the iterator to find the last valid index. --it; @@ -323,7 +323,7 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( return LogEventIdxTsType{emscripten::val::null()}; } - return LogEventIdxTsType{emscripten::val(std::distance(begin, it))}; + return LogEventIdxTsType{emscripten::val(std::distance(log_events.begin(), it))}; } } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 909e73d9..c6a45d60 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -151,8 +151,7 @@ auto StructuredIrStreamReader::get_log_event_index_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { return generic_get_log_event_index_by_timestamp( - m_deserialized_log_events->begin(), - m_deserialized_log_events->end(), + *m_deserialized_log_events, timestamp ); } diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 27d02e98..5ad02b70 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -162,8 +162,7 @@ auto UnstructuredIrStreamReader::get_log_event_index_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { return generic_get_log_event_index_by_timestamp( - m_encoded_log_events.begin(), - m_encoded_log_events.end(), + m_encoded_log_events, timestamp ); } From a99ec2a0e039dbb9cefc891ff5ee79cfdfa42678 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 6 Jan 2025 13:58:08 -0500 Subject: [PATCH 08/29] switch back to std::upper_bound because std::ranges::upper_bound is not suitable here --- src/clp_ffi_js/ir/StreamReader.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index d9b94350..19bc348c 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -305,10 +305,11 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( if (log_events.empty()) { return LogEventIdxTsType{emscripten::val::null()}; } - auto it = std::ranges::upper_bound( - log_events, + auto it = std::upper_bound( + log_events.begin(), + log_events.end(), timestamp, - [](const LogEventWithFilterData& log_event, const clp::ir::epoch_time_ms_t& ts) { + [](clp::ir::epoch_time_ms_t ts, LogEventWithFilterData const& log_event) { return ts < log_event.get_timestamp(); } ); From 9ec039fad74f0812df0462ba0a48a1e8bce7693a Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:12:22 -0500 Subject: [PATCH 09/29] fix lint --- src/clp_ffi_js/ir/StreamReader.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 19bc348c..ba17913d 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -306,12 +306,12 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( return LogEventIdxTsType{emscripten::val::null()}; } auto it = std::upper_bound( - log_events.begin(), - log_events.end(), - timestamp, - [](clp::ir::epoch_time_ms_t ts, LogEventWithFilterData const& log_event) { - return ts < log_event.get_timestamp(); - } + log_events.begin(), + log_events.end(), + timestamp, + [](clp::ir::epoch_time_ms_t ts, LogEventWithFilterData const& log_event) { + return ts < log_event.get_timestamp(); + } ); if (it == log_events.begin()) { From 4f125b89eccc09e1fa48c87a67e28c0eeb9f340a Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:32:18 -0500 Subject: [PATCH 10/29] change generic_get_log_event_index_by_timestamp behavior: only returns null when log events are empty, and return index with "best effort" --- src/clp_ffi_js/ir/StreamReader.hpp | 28 ++++++++++--------- .../ir/StructuredIrStreamReader.cpp | 1 + 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index ba17913d..92235f6a 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -126,10 +126,14 @@ class StreamReader { [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; /** - * Retrieves the last index of the log event that matches the given timestamp. + * Retrieves the index of the last log event that matches the given timestamp. * + * @tparam LogEvent * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. - * @return The index of the log event with the specified timestamp, or null value if not found. + * @return The index of the last matched log event. + * @return null value if log events are empty. + * @return first index greater than the timestamp, or the last index smaller than the timestamp + * if no exact timestamp match. */ [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType = 0; @@ -183,11 +187,14 @@ class StreamReader { ) -> void; /** - * Retrieves the index of the last log event that matches the given timestamp. + * Templated implementation of `get_log_event_index_by_timestamp`. * * @tparam LogEvent * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. - * @return The index of the last matched log event, or null value if not found. + * @return The index of the last matched log event. + * @return null value if log events are empty. + * @return first index greater than the timestamp, or the last index smaller than the timestamp + * if no exact timestamp match. */ template requires requires( @@ -314,16 +321,11 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( } ); - if (it == log_events.begin()) { - return LogEventIdxTsType{emscripten::val::null()}; + // If the iterator is not pointing to the beginning, decrement it so that it points to + // first element that is larger than timestamp. + if (it != log_events.begin()) { + --it; } - // it points to first element that is larger than timestamp, - // adjust the iterator to find the last valid index. - --it; - if (it->get_timestamp() < timestamp) { - return LogEventIdxTsType{emscripten::val::null()}; - } - return LogEventIdxTsType{emscripten::val(std::distance(log_events.begin(), it))}; } } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index c6a45d60..64aa1ba9 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include From 52215881710f39717193db2292fbf89bffd3cc13 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:42:39 -0500 Subject: [PATCH 11/29] edit docstring for get_log_event_index_by_timestamp --- src/clp_ffi_js/ir/StreamReader.hpp | 35 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 92235f6a..246fd61d 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -62,10 +62,9 @@ class StreamReader { * @return The created instance. * @throw ClpFfiJsException if any error occurs. */ - [[nodiscard]] static auto create( - DataArrayTsType const& data_array, - ReaderOptions const& reader_options - ) -> std::unique_ptr; + [[nodiscard]] static auto + create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) + -> std::unique_ptr; // Destructor virtual ~StreamReader() = default; @@ -124,7 +123,8 @@ class StreamReader { * @throw ClpFfiJsException if a message cannot be decoded. */ [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const - -> DecodedResultsTsType = 0; + -> DecodedResultsTsType + = 0; /** * Retrieves the index of the last log event that matches the given timestamp. * @@ -132,11 +132,12 @@ class StreamReader { * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. * @return The index of the last matched log event. * @return null value if log events are empty. - * @return first index greater than the timestamp, or the last index smaller than the timestamp - * if no exact timestamp match. + * @return the last index smaller than the timestamp if no exact timestamp match, unless all log + * event timestamps are larger than the target. In that case, return the first log event index. */ - [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp - ) -> LogEventIdxTsType = 0; + [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp) + -> LogEventIdxTsType + = 0; protected: explicit StreamReader() = default; @@ -193,14 +194,14 @@ class StreamReader { * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. * @return The index of the last matched log event. * @return null value if log events are empty. - * @return first index greater than the timestamp, or the last index smaller than the timestamp - * if no exact timestamp match. + * @return the last index smaller than the timestamp if no exact timestamp match, unless all log + * event timestamps are larger than the target. In that case, return the first log event index. */ template requires requires( - LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp - ) { + LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp + ) { { event.get_timestamp() } -> std::convertible_to; @@ -298,9 +299,9 @@ auto StreamReader::generic_filter_log_events( template requires requires( - LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp - ) { + LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp +) { { event.get_timestamp() } -> std::convertible_to; From abeb4f8a93f0decc32f539d478662ff11cbafa30 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 6 Jan 2025 17:10:45 -0500 Subject: [PATCH 12/29] fix lint --- src/clp_ffi_js/ir/StreamReader.hpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 246fd61d..e7736b00 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -62,9 +62,10 @@ class StreamReader { * @return The created instance. * @throw ClpFfiJsException if any error occurs. */ - [[nodiscard]] static auto - create(DataArrayTsType const& data_array, ReaderOptions const& reader_options) - -> std::unique_ptr; + [[nodiscard]] static auto create( + DataArrayTsType const& data_array, + ReaderOptions const& reader_options + ) -> std::unique_ptr; // Destructor virtual ~StreamReader() = default; @@ -123,8 +124,7 @@ class StreamReader { * @throw ClpFfiJsException if a message cannot be decoded. */ [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const - -> DecodedResultsTsType - = 0; + -> DecodedResultsTsType = 0; /** * Retrieves the index of the last log event that matches the given timestamp. * @@ -135,9 +135,8 @@ class StreamReader { * @return the last index smaller than the timestamp if no exact timestamp match, unless all log * event timestamps are larger than the target. In that case, return the first log event index. */ - [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp) - -> LogEventIdxTsType - = 0; + [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp + ) -> LogEventIdxTsType = 0; protected: explicit StreamReader() = default; @@ -199,9 +198,9 @@ class StreamReader { */ template requires requires( - LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp - ) { + LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp + ) { { event.get_timestamp() } -> std::convertible_to; @@ -299,9 +298,9 @@ auto StreamReader::generic_filter_log_events( template requires requires( - LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp -) { + LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp + ) { { event.get_timestamp() } -> std::convertible_to; From 6647fbd56b1d2c043f78b9acc01cd978a043b4a9 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Wed, 8 Jan 2025 14:18:05 -0500 Subject: [PATCH 13/29] Apply suggestions from code review Co-authored-by: Junhao Liao --- src/clp_ffi_js/ir/StreamReader.hpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index e7736b00..d5477f45 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -126,14 +126,13 @@ class StreamReader { [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; /** - * Retrieves the index of the last log event that matches the given timestamp. + * Finds the index of the last log event that matches or next to the given timestamp. * * @tparam LogEvent * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. - * @return The index of the last matched log event. - * @return null value if log events are empty. - * @return the last index smaller than the timestamp if no exact timestamp match, unless all log - * event timestamps are larger than the target. In that case, return the first log event index. + * @return The last index of the log event whose timestamp is smaller than or equal to the `timestamp`. + * @return `0` if all log event timestamps are larger than the target. + * @return null if no log event exists in the stream. */ [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType = 0; @@ -190,10 +189,7 @@ class StreamReader { * Templated implementation of `get_log_event_index_by_timestamp`. * * @tparam LogEvent - * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. - * @return The index of the last matched log event. - * @return null value if log events are empty. - * @return the last index smaller than the timestamp if no exact timestamp match, unless all log + * @param timestamp * event timestamps are larger than the target. In that case, return the first log event index. */ template @@ -312,6 +308,7 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( if (log_events.empty()) { return LogEventIdxTsType{emscripten::val::null()}; } + auto it = std::upper_bound( log_events.begin(), log_events.end(), @@ -321,12 +318,14 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( } ); - // If the iterator is not pointing to the beginning, decrement it so that it points to - // first element that is larger than timestamp. - if (it != log_events.begin()) { - --it; + if (upper == log_events.begin()) { + return LogEventIdxTsType{0}; } - return LogEventIdxTsType{emscripten::val(std::distance(log_events.begin(), it))}; + + size_t upper_index{std::distance(log_events.begin(), upper)}; + const auto index{upper_index - 1}; + + return LogEventIdxTsType{emscripten::val(index)}; } } // namespace clp_ffi_js::ir From 046191e5773c1aec944d5f7f3fca55ef2797fd7e Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Wed, 8 Jan 2025 18:20:45 -0500 Subject: [PATCH 14/29] use concept to shorten function definition; minor change to generic_get_log_event_index_by_timestamp --- src/clp_ffi_js/ir/StreamReader.hpp | 68 +++++++++++++----------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index d5477f45..d3258364 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -46,6 +46,23 @@ using LogEvents = std::vector>; */ using FilteredLogEventsMap = std::optional>; +template +concept GetLogEventIdxInterface = requires( + LogEventWithFilterData const& event, + clp::ir::epoch_time_ms_t timestamp +) { + { + event.get_timestamp() + } -> std::convertible_to; +}; + +template +concept DecodeRangeInterface = requires(ToStringFunc func, LogEvent const& log_event) { + { + func(log_event) + } -> std::convertible_to; +}; + /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. @@ -130,7 +147,8 @@ class StreamReader { * * @tparam LogEvent * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. - * @return The last index of the log event whose timestamp is smaller than or equal to the `timestamp`. + * @return The last index of the log event whose timestamp is smaller than or equal to the + * `timestamp`. * @return `0` if all log event timestamps are larger than the target. * @return null if no log event exists in the stream. */ @@ -155,12 +173,7 @@ class StreamReader { * @return See `decode_range`. * @throws Propagates `ToStringFunc`'s exceptions. */ - template - requires requires(ToStringFunc func, LogEvent const& log_event) { - { - func(log_event) - } -> std::convertible_to; - } + template static auto generic_decode_range( size_t begin_idx, size_t end_idx, @@ -192,27 +205,14 @@ class StreamReader { * @param timestamp * event timestamps are larger than the target. In that case, return the first log event index. */ - template - requires requires( - LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp - ) { - { - event.get_timestamp() - } -> std::convertible_to; - } - static auto generic_get_log_event_index_by_timestamp( - LogEvents const& log_events, + template + auto generic_get_log_event_index_by_timestamp( + std::vector> const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType; }; -template -requires requires(ToStringFunc func, LogEvent const& log_event) { - { - func(log_event) - } -> std::convertible_to; -} +template auto StreamReader::generic_decode_range( size_t begin_idx, size_t end_idx, @@ -292,15 +292,7 @@ auto StreamReader::generic_filter_log_events( } } -template -requires requires( - LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp - ) { - { - event.get_timestamp() - } -> std::convertible_to; -} +template auto StreamReader::generic_get_log_event_index_by_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp @@ -309,21 +301,21 @@ auto StreamReader::generic_get_log_event_index_by_timestamp( return LogEventIdxTsType{emscripten::val::null()}; } - auto it = std::upper_bound( + auto upper{std::upper_bound( log_events.begin(), log_events.end(), timestamp, [](clp::ir::epoch_time_ms_t ts, LogEventWithFilterData const& log_event) { return ts < log_event.get_timestamp(); } - ); + )}; if (upper == log_events.begin()) { - return LogEventIdxTsType{0}; + return LogEventIdxTsType{emscripten::val(0)}; } - size_t upper_index{std::distance(log_events.begin(), upper)}; - const auto index{upper_index - 1}; + auto const upper_index{std::distance(log_events.begin(), upper)}; + auto const index{upper_index - 1}; return LogEventIdxTsType{emscripten::val(index)}; } From c40036133f305551cca18f6eda3a41a2430e4909 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Wed, 8 Jan 2025 19:55:07 -0500 Subject: [PATCH 15/29] fix lint & syntax --- src/clp_ffi_js/ir/StreamReader.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index d3258364..13ba01a6 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -173,7 +173,8 @@ class StreamReader { * @return See `decode_range`. * @throws Propagates `ToStringFunc`'s exceptions. */ - template + template + requires DecodeRangeInterface static auto generic_decode_range( size_t begin_idx, size_t end_idx, @@ -212,7 +213,8 @@ class StreamReader { ) -> LogEventIdxTsType; }; -template +template +requires DecodeRangeInterface auto StreamReader::generic_decode_range( size_t begin_idx, size_t end_idx, From ca4a616ddeb1ff5b45eee35037c932700b8f252b Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:58:00 -0500 Subject: [PATCH 16/29] rename get_log_event_index_by_timestamp to get_log_event_idx_by_timestamp --- src/clp_ffi_js/ir/StreamReader.cpp | 2 +- src/clp_ffi_js/ir/StreamReader.hpp | 8 ++++---- src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 4 ++-- src/clp_ffi_js/ir/StructuredIrStreamReader.hpp | 2 +- src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp | 4 ++-- src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index ea015a35..4df84aaa 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -149,7 +149,7 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) .function( "getLogEventIndexByTimestamp", - &clp_ffi_js::ir::StreamReader::get_log_event_index_by_timestamp + &clp_ffi_js::ir::StreamReader::get_log_event_idx_by_timestamp ); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 13ba01a6..449389bd 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -152,7 +152,7 @@ class StreamReader { * @return `0` if all log event timestamps are larger than the target. * @return null if no log event exists in the stream. */ - [[nodiscard]] virtual auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp + [[nodiscard]] virtual auto get_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType = 0; protected: @@ -200,14 +200,14 @@ class StreamReader { ) -> void; /** - * Templated implementation of `get_log_event_index_by_timestamp`. + * Templated implementation of `get_log_event_idx_by_timestamp`. * * @tparam LogEvent * @param timestamp * event timestamps are larger than the target. In that case, return the first log event index. */ template - auto generic_get_log_event_index_by_timestamp( + auto generic_get_log_event_idx_by_timestamp( std::vector> const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType; @@ -295,7 +295,7 @@ auto StreamReader::generic_filter_log_events( } template -auto StreamReader::generic_get_log_event_index_by_timestamp( +auto StreamReader::generic_get_log_event_idx_by_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType { diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 64aa1ba9..74d5a9f1 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -148,10 +148,10 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo ); } -auto StructuredIrStreamReader::get_log_event_index_by_timestamp( +auto StructuredIrStreamReader::get_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { - return generic_get_log_event_index_by_timestamp( + return generic_get_log_event_idx_by_timestamp( *m_deserialized_log_events, timestamp ); diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index 28a68a0e..8abb5223 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -75,7 +75,7 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp + [[nodiscard]] auto get_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType override; private: diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 5ad02b70..187ed425 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -158,10 +158,10 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, ); } -auto UnstructuredIrStreamReader::get_log_event_index_by_timestamp( +auto UnstructuredIrStreamReader::get_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { - return generic_get_log_event_index_by_timestamp( + return generic_get_log_event_idx_by_timestamp( m_encoded_log_events, timestamp ); diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 4b6ae92d..2fb8bc26 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,7 +71,7 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_index_by_timestamp(clp::ir::epoch_time_ms_t timestamp + [[nodiscard]] auto get_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType override; private: From 9443ba31d559cb61d00a1e78a0d4f9597961aeed Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:57:02 -0500 Subject: [PATCH 17/29] revert comments and plan to fix in the next pr --- src/clp_ffi_js/ir/StreamReader.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 449389bd..90006686 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -190,6 +190,7 @@ class StreamReader { * @tparam LogEvent * @param log_level_filter * @param log_events Derived class's log events. + * @param log_events * @param[out] filtered_log_event_map Returns the filtered log events. */ template From db60efdbc835ad1065dc4b89f452b45c3ad7e1fb Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Fri, 10 Jan 2025 16:00:02 -0500 Subject: [PATCH 18/29] add back the missing space --- src/clp_ffi_js/ir/StreamReader.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 90006686..4e7a9d73 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -190,7 +190,7 @@ class StreamReader { * @tparam LogEvent * @param log_level_filter * @param log_events Derived class's log events. - * @param log_events + * @param log_events * @param[out] filtered_log_event_map Returns the filtered log events. */ template From 0e3e21b67a9be34c566168eed4858feaf0b2a237 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Fri, 10 Jan 2025 17:22:30 -0500 Subject: [PATCH 19/29] revert decode_range (without creating concept) --- src/clp_ffi_js/ir/StreamReader.hpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 4e7a9d73..04f430d3 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -56,13 +56,6 @@ concept GetLogEventIdxInterface = requires( } -> std::convertible_to; }; -template -concept DecodeRangeInterface = requires(ToStringFunc func, LogEvent const& log_event) { - { - func(log_event) - } -> std::convertible_to; -}; - /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. @@ -174,7 +167,11 @@ class StreamReader { * @throws Propagates `ToStringFunc`'s exceptions. */ template - requires DecodeRangeInterface + requires requires(ToStringFunc func, LogEvent const& log_event) { + { + func(log_event) + } -> std::convertible_to; + } static auto generic_decode_range( size_t begin_idx, size_t end_idx, @@ -215,7 +212,11 @@ class StreamReader { }; template -requires DecodeRangeInterface +requires requires(ToStringFunc func, LogEvent const& log_event) { + { + func(log_event) + } -> std::convertible_to; +} auto StreamReader::generic_decode_range( size_t begin_idx, size_t end_idx, From 412b96e2bbae03e1aad83f0a6e80477adbd59f32 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:02:57 -0500 Subject: [PATCH 20/29] address changes from Marco's review --- src/clp_ffi_js/ir/StreamReader.cpp | 2 +- src/clp_ffi_js/ir/StreamReader.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 4df84aaa..caf27422 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -148,7 +148,7 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) .function( - "getLogEventIndexByTimestamp", + "getLogEventIdxByTimestamp", &clp_ffi_js::ir::StreamReader::get_log_event_idx_by_timestamp ); } diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 04f430d3..87920b8c 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -202,11 +202,11 @@ class StreamReader { * * @tparam LogEvent * @param timestamp - * event timestamps are larger than the target. In that case, return the first log event index. + * @return the best matched log event index. */ template auto generic_get_log_event_idx_by_timestamp( - std::vector> const& log_events, + LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType; }; From 54c7df1a527c4c7bb50c9bcec5cb932f43c4352f Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:44:50 -0500 Subject: [PATCH 21/29] remove unnecessary require statement for get_log_event_idx_by_timestamp --- src/clp_ffi_js/ir/StreamReader.hpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 87920b8c..fff78c19 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -46,16 +46,6 @@ using LogEvents = std::vector>; */ using FilteredLogEventsMap = std::optional>; -template -concept GetLogEventIdxInterface = requires( - LogEventWithFilterData const& event, - clp::ir::epoch_time_ms_t timestamp -) { - { - event.get_timestamp() - } -> std::convertible_to; -}; - /** * Class to deserialize and decode Zstandard-compressed CLP IR streams as well as format decoded * log events. @@ -204,7 +194,7 @@ class StreamReader { * @param timestamp * @return the best matched log event index. */ - template + template auto generic_get_log_event_idx_by_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp @@ -296,7 +286,7 @@ auto StreamReader::generic_filter_log_events( } } -template +template auto StreamReader::generic_get_log_event_idx_by_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp From eff1849c080277806a22c8b91158cf6c30f1a70e Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:01:37 -0500 Subject: [PATCH 22/29] address the rest of the comments --- src/clp_ffi_js/ir/StreamReader.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index fff78c19..759ce96c 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -295,7 +295,7 @@ auto StreamReader::generic_get_log_event_idx_by_timestamp( return LogEventIdxTsType{emscripten::val::null()}; } - auto upper{std::upper_bound( + auto first_greater_it{std::upper_bound( log_events.begin(), log_events.end(), timestamp, @@ -304,14 +304,13 @@ auto StreamReader::generic_get_log_event_idx_by_timestamp( } )}; - if (upper == log_events.begin()) { + if (first_greater_it == log_events.begin()) { return LogEventIdxTsType{emscripten::val(0)}; } - auto const upper_index{std::distance(log_events.begin(), upper)}; - auto const index{upper_index - 1}; + auto const first_greater_idx{std::distance(log_events.begin(), first_greater_it)}; - return LogEventIdxTsType{emscripten::val(index)}; + return LogEventIdxTsType{emscripten::val(first_greater_idx - 1)}; } } // namespace clp_ffi_js::ir From 00f89d465a08f7b36d8b68318b4cebc42b893785 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:12:41 -0500 Subject: [PATCH 23/29] address code review changes --- src/clp_ffi_js/ir/StreamReader.hpp | 2 +- src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 2 +- src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 759ce96c..b03decc6 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -128,7 +128,6 @@ class StreamReader { /** * Finds the index of the last log event that matches or next to the given timestamp. * - * @tparam LogEvent * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. * @return The last index of the log event whose timestamp is smaller than or equal to the * `timestamp`. @@ -191,6 +190,7 @@ class StreamReader { * Templated implementation of `get_log_event_idx_by_timestamp`. * * @tparam LogEvent + * @param log_events * @param timestamp * @return the best matched log event index. */ diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 74d5a9f1..f519c253 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -151,7 +151,7 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo auto StructuredIrStreamReader::get_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { - return generic_get_log_event_idx_by_timestamp( + return generic_get_log_event_idx_by_timestamp( *m_deserialized_log_events, timestamp ); diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 187ed425..a968859c 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -161,7 +161,7 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, auto UnstructuredIrStreamReader::get_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { - return generic_get_log_event_idx_by_timestamp( + return generic_get_log_event_idx_by_timestamp( m_encoded_log_events, timestamp ); From 122f1cf383ff0c6eaae3ec8a1b761251cfea44bf Mon Sep 17 00:00:00 2001 From: Henry <50559854+Henry8192@users.noreply.github.com> Date: Wed, 15 Jan 2025 12:36:36 -0500 Subject: [PATCH 24/29] fix lint --- src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 5 +---- src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index f519c253..6bd8c425 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -151,10 +151,7 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo auto StructuredIrStreamReader::get_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { - return generic_get_log_event_idx_by_timestamp( - *m_deserialized_log_events, - timestamp - ); + return generic_get_log_event_idx_by_timestamp(*m_deserialized_log_events, timestamp); } StructuredIrStreamReader::StructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index a968859c..727b10b7 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -161,10 +161,7 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, auto UnstructuredIrStreamReader::get_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const timestamp ) -> LogEventIdxTsType { - return generic_get_log_event_idx_by_timestamp( - m_encoded_log_events, - timestamp - ); + return generic_get_log_event_idx_by_timestamp(m_encoded_log_events, timestamp); } UnstructuredIrStreamReader::UnstructuredIrStreamReader( From f4a0207e05aecd53455d5fb76a731b9f5dc6317d Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Wed, 29 Jan 2025 21:08:48 +0800 Subject: [PATCH 25/29] resolve the rest of the conflicts --- CMakeLists.txt | 6 +++++- src/clp_ffi_js/ir/StreamReader.cpp | 4 ++-- src/clp_ffi_js/ir/StreamReader.hpp | 20 +++++++++---------- .../ir/StructuredIrStreamReader.cpp | 6 +++--- .../ir/StructuredIrStreamReader.hpp | 2 +- .../ir/UnstructuredIrStreamReader.cpp | 6 +++--- .../ir/UnstructuredIrStreamReader.hpp | 2 +- 7 files changed, 25 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 19685032..44f0556f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,11 @@ boost_${CLP_FFI_JS_BOOST_ARCHIVE_VERSION_PART}.tar.gz" URL_MD5 "53aeccc3167909ee770e34469f8dd592" ) message(STATUS "Fetching Boost.") -FetchContent_MakeAvailable(Boost) +set(FETCHCONTENT_QUIET OFF) +FetchContent_Declare( + Boost + URL "${CMAKE_FIND_PACKAGE_REDIRECTS_DIR/boost_1_85_0.tar.gz}" +) message("Boost sources successfully fetched into ${boost_SOURCE_DIR}") set(CMAKE_EXECUTABLE_SUFFIX ".js" CACHE STRING "Binary type to be generated by Emscripten.") diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index caf27422..2c96c95a 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -148,8 +148,8 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) .function( - "getLogEventIdxByTimestamp", - &clp_ffi_js::ir::StreamReader::get_log_event_idx_by_timestamp + "getLogEventIdxWithNearestTimestamp", + &clp_ffi_js::ir::StreamReader::get_log_event_idx_with_nearest_timestamp ); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index b03decc6..8d82109c 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -126,15 +126,15 @@ class StreamReader { [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; /** - * Finds the index of the last log event that matches or next to the given timestamp. - * - * @param timestamp The timestamp to search for, in milliseconds since the Unix epoch. - * @return The last index of the log event whose timestamp is smaller than or equal to the - * `timestamp`. - * @return `0` if all log event timestamps are larger than the target. + * Finds the log event with the timestamp that's nearest to the `target_ts`. + * @param target_ts + * @return The index of the log event with: + * - the largest timestamp less than or equal to `target_ts`, + * - or the index `0` if all timestamps are greater than `target_ts`. * @return null if no log event exists in the stream. */ - [[nodiscard]] virtual auto get_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t timestamp + [[nodiscard]] virtual auto get_log_event_idx_with_nearest_timestamp( + clp::ir::epoch_time_ms_t target_ts ) -> LogEventIdxTsType = 0; protected: @@ -187,7 +187,7 @@ class StreamReader { ) -> void; /** - * Templated implementation of `get_log_event_idx_by_timestamp`. + * Templated implementation of `get_log_event_idx_with_nearest_timestamp`. * * @tparam LogEvent * @param log_events @@ -195,7 +195,7 @@ class StreamReader { * @return the best matched log event index. */ template - auto generic_get_log_event_idx_by_timestamp( + auto generic_get_log_event_idx_with_nearest_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType; @@ -287,7 +287,7 @@ auto StreamReader::generic_filter_log_events( } template -auto StreamReader::generic_get_log_event_idx_by_timestamp( +auto StreamReader::generic_get_log_event_idx_with_nearest_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t timestamp ) -> LogEventIdxTsType { diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index 6bd8c425..a684bc04 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -148,10 +148,10 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo ); } -auto StructuredIrStreamReader::get_log_event_idx_by_timestamp( - clp::ir::epoch_time_ms_t const timestamp +auto StructuredIrStreamReader::get_log_event_idx_with_nearest_timestamp( + clp::ir::epoch_time_ms_t const target_ts ) -> LogEventIdxTsType { - return generic_get_log_event_idx_by_timestamp(*m_deserialized_log_events, timestamp); + return generic_get_log_event_idx_with_nearest_timestamp(*m_deserialized_log_events, target_ts); } StructuredIrStreamReader::StructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index 8abb5223..38db17a2 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -75,7 +75,7 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t timestamp + [[nodiscard]] auto get_log_event_idx_with_nearest_timestamp(clp::ir::epoch_time_ms_t target_ts ) -> LogEventIdxTsType override; private: diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 727b10b7..19d4c403 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -158,10 +158,10 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, ); } -auto UnstructuredIrStreamReader::get_log_event_idx_by_timestamp( - clp::ir::epoch_time_ms_t const timestamp +auto UnstructuredIrStreamReader::get_log_event_idx_with_nearest_timestamp( + clp::ir::epoch_time_ms_t const target_ts ) -> LogEventIdxTsType { - return generic_get_log_event_idx_by_timestamp(m_encoded_log_events, timestamp); + return generic_get_log_event_idx_with_nearest_timestamp(m_encoded_log_events, target_ts); } UnstructuredIrStreamReader::UnstructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 2fb8bc26..7a104097 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,7 +71,7 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t timestamp + [[nodiscard]] auto get_log_event_idx_with_nearest_timestamp(clp::ir::epoch_time_ms_t target_ts ) -> LogEventIdxTsType override; private: From 46fa81f42082567a45e13b682b909387192a39e5 Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Thu, 30 Jan 2025 22:39:16 +0800 Subject: [PATCH 26/29] revert the CMakeList Change --- CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 44f0556f..19685032 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,11 +72,7 @@ boost_${CLP_FFI_JS_BOOST_ARCHIVE_VERSION_PART}.tar.gz" URL_MD5 "53aeccc3167909ee770e34469f8dd592" ) message(STATUS "Fetching Boost.") -set(FETCHCONTENT_QUIET OFF) -FetchContent_Declare( - Boost - URL "${CMAKE_FIND_PACKAGE_REDIRECTS_DIR/boost_1_85_0.tar.gz}" -) +FetchContent_MakeAvailable(Boost) message("Boost sources successfully fetched into ${boost_SOURCE_DIR}") set(CMAKE_EXECUTABLE_SUFFIX ".js" CACHE STRING "Binary type to be generated by Emscripten.") From 8c8c42e23ff82062cbc17da14ac1f628068bb4fa Mon Sep 17 00:00:00 2001 From: Henry8192 <50559854+Henry8192@users.noreply.github.com> Date: Sat, 1 Feb 2025 22:18:16 +0800 Subject: [PATCH 27/29] address kirk's comments in the code review --- src/clp_ffi_js/ir/StreamReader.cpp | 6 +-- src/clp_ffi_js/ir/StreamReader.hpp | 50 +++++++++++-------- .../ir/StructuredIrStreamReader.cpp | 6 +-- .../ir/StructuredIrStreamReader.hpp | 4 +- .../ir/UnstructuredIrStreamReader.cpp | 6 +-- .../ir/UnstructuredIrStreamReader.hpp | 4 +- 6 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 2c96c95a..48d652ff 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -129,7 +129,7 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { "Array<[string, bigint, number, number]>" ); emscripten::register_type("number[] | null"); - emscripten::register_type("number | null"); + emscripten::register_type("number | null"); emscripten::class_("ClpStreamReader") .constructor( &clp_ffi_js::ir::StreamReader::create, @@ -148,8 +148,8 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) .function( - "getLogEventIdxWithNearestTimestamp", - &clp_ffi_js::ir::StreamReader::get_log_event_idx_with_nearest_timestamp + "findNearestLogEventIdxByTimestamp", + &clp_ffi_js::ir::StreamReader::find_nearest_log_event_idx_by_timestamp ); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 8d82109c..232c0be2 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -30,7 +30,7 @@ EMSCRIPTEN_DECLARE_VAL_TYPE(ReaderOptions); // JS types used as outputs EMSCRIPTEN_DECLARE_VAL_TYPE(DecodedResultsTsType); EMSCRIPTEN_DECLARE_VAL_TYPE(FilteredLogEventMapTsType); -EMSCRIPTEN_DECLARE_VAL_TYPE(LogEventIdxTsType); +EMSCRIPTEN_DECLARE_VAL_TYPE(NullableLogEventIdx); enum class StreamType : uint8_t { Structured, @@ -125,17 +125,24 @@ class StreamReader { */ [[nodiscard]] virtual auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType = 0; + /** - * Finds the log event with the timestamp that's nearest to the `target_ts`. + * Finds the log event, L, where if we assume: + * + * - the collection of log events is sorted in ascending timestamp order; + * - and we insert a marker log event, M, with timestamp `target_ts` into the collection (if log + * events with timestamp `target_ts` already exist in the collection, M should be inserted + * after them). + * + * L is the event just before M, if M is not the first event in the collection; otherwise L is + * the event just after M. + * * @param target_ts - * @return The index of the log event with: - * - the largest timestamp less than or equal to `target_ts`, - * - or the index `0` if all timestamps are greater than `target_ts`. - * @return null if no log event exists in the stream. + * @return The index of the log event L. */ - [[nodiscard]] virtual auto get_log_event_idx_with_nearest_timestamp( + [[nodiscard]] virtual auto find_nearest_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t target_ts - ) -> LogEventIdxTsType = 0; + ) -> NullableLogEventIdx = 0; protected: explicit StreamReader() = default; @@ -187,18 +194,18 @@ class StreamReader { ) -> void; /** - * Templated implementation of `get_log_event_idx_with_nearest_timestamp`. + * Templated implementation of `find_nearest_log_event_idx_by_timestamp`. * * @tparam LogEvent * @param log_events - * @param timestamp - * @return the best matched log event index. + * @param target_ts + * @return See `find_nearest_log_event_idx_by_timestamp`. */ template - auto generic_get_log_event_idx_with_nearest_timestamp( + auto generic_find_nearest_log_event_idx_by_timestamp( LogEvents const& log_events, - clp::ir::epoch_time_ms_t timestamp - ) -> LogEventIdxTsType; + clp::ir::epoch_time_ms_t target_ts + ) -> NullableLogEventIdx; }; template @@ -287,30 +294,31 @@ auto StreamReader::generic_filter_log_events( } template -auto StreamReader::generic_get_log_event_idx_with_nearest_timestamp( +auto StreamReader::generic_find_nearest_log_event_idx_by_timestamp( LogEvents const& log_events, - clp::ir::epoch_time_ms_t timestamp -) -> LogEventIdxTsType { + clp::ir::epoch_time_ms_t target_ts +) -> NullableLogEventIdx { if (log_events.empty()) { - return LogEventIdxTsType{emscripten::val::null()}; + return NullableLogEventIdx{emscripten::val::null()}; } + // Find the log event whose timestamp is just after `target_ts` auto first_greater_it{std::upper_bound( log_events.begin(), log_events.end(), - timestamp, + target_ts, [](clp::ir::epoch_time_ms_t ts, LogEventWithFilterData const& log_event) { return ts < log_event.get_timestamp(); } )}; if (first_greater_it == log_events.begin()) { - return LogEventIdxTsType{emscripten::val(0)}; + return NullableLogEventIdx{emscripten::val(0)}; } auto const first_greater_idx{std::distance(log_events.begin(), first_greater_it)}; - return LogEventIdxTsType{emscripten::val(first_greater_idx - 1)}; + return NullableLogEventIdx{emscripten::val(first_greater_idx - 1)}; } } // namespace clp_ffi_js::ir diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index a684bc04..b7997d19 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -148,10 +148,10 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo ); } -auto StructuredIrStreamReader::get_log_event_idx_with_nearest_timestamp( +auto StructuredIrStreamReader::find_nearest_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const target_ts -) -> LogEventIdxTsType { - return generic_get_log_event_idx_with_nearest_timestamp(*m_deserialized_log_events, target_ts); +) -> NullableLogEventIdx { + return generic_find_nearest_log_event_idx_by_timestamp(*m_deserialized_log_events, target_ts); } StructuredIrStreamReader::StructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index 38db17a2..cf1c0179 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -75,8 +75,8 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_idx_with_nearest_timestamp(clp::ir::epoch_time_ms_t target_ts - ) -> LogEventIdxTsType override; + [[nodiscard]] auto find_nearest_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t target_ts + ) -> NullableLogEventIdx override; private: // Constructor diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 19d4c403..0f288d5b 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -158,10 +158,10 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, ); } -auto UnstructuredIrStreamReader::get_log_event_idx_with_nearest_timestamp( +auto UnstructuredIrStreamReader::find_nearest_log_event_idx_by_timestamp( clp::ir::epoch_time_ms_t const target_ts -) -> LogEventIdxTsType { - return generic_get_log_event_idx_with_nearest_timestamp(m_encoded_log_events, target_ts); +) -> NullableLogEventIdx { + return generic_find_nearest_log_event_idx_by_timestamp(m_encoded_log_events, target_ts); } UnstructuredIrStreamReader::UnstructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index 7a104097..a22d168c 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,8 +71,8 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto get_log_event_idx_with_nearest_timestamp(clp::ir::epoch_time_ms_t target_ts - ) -> LogEventIdxTsType override; + [[nodiscard]] auto find_nearest_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t target_ts + ) -> NullableLogEventIdx override; private: // Constructor From a984d7ba75689570cddc7aced6a5ad70e982d820 Mon Sep 17 00:00:00 2001 From: Henry <50559854+Henry8192@users.noreply.github.com> Date: Mon, 3 Feb 2025 14:17:26 +0800 Subject: [PATCH 28/29] revert function name to find_nearest_log_event_by_timestamp --- src/clp_ffi_js/ir/StreamReader.cpp | 4 ++-- src/clp_ffi_js/ir/StreamReader.hpp | 10 +++++----- src/clp_ffi_js/ir/StructuredIrStreamReader.cpp | 4 ++-- src/clp_ffi_js/ir/StructuredIrStreamReader.hpp | 2 +- src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp | 4 ++-- src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/clp_ffi_js/ir/StreamReader.cpp b/src/clp_ffi_js/ir/StreamReader.cpp index 48d652ff..0359000f 100644 --- a/src/clp_ffi_js/ir/StreamReader.cpp +++ b/src/clp_ffi_js/ir/StreamReader.cpp @@ -148,8 +148,8 @@ EMSCRIPTEN_BINDINGS(ClpStreamReader) { .function("deserializeStream", &clp_ffi_js::ir::StreamReader::deserialize_stream) .function("decodeRange", &clp_ffi_js::ir::StreamReader::decode_range) .function( - "findNearestLogEventIdxByTimestamp", - &clp_ffi_js::ir::StreamReader::find_nearest_log_event_idx_by_timestamp + "findNearestLogEventByTimestamp", + &clp_ffi_js::ir::StreamReader::find_nearest_log_event_by_timestamp ); } } // namespace diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 232c0be2..50fbc277 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -140,7 +140,7 @@ class StreamReader { * @param target_ts * @return The index of the log event L. */ - [[nodiscard]] virtual auto find_nearest_log_event_idx_by_timestamp( + [[nodiscard]] virtual auto find_nearest_log_event_by_timestamp( clp::ir::epoch_time_ms_t target_ts ) -> NullableLogEventIdx = 0; @@ -194,15 +194,15 @@ class StreamReader { ) -> void; /** - * Templated implementation of `find_nearest_log_event_idx_by_timestamp`. + * Templated implementation of `find_nearest_log_event_by_timestamp`. * * @tparam LogEvent * @param log_events * @param target_ts - * @return See `find_nearest_log_event_idx_by_timestamp`. + * @return See `find_nearest_log_event_by_timestamp`. */ template - auto generic_find_nearest_log_event_idx_by_timestamp( + auto generic_find_nearest_log_event_by_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t target_ts ) -> NullableLogEventIdx; @@ -294,7 +294,7 @@ auto StreamReader::generic_filter_log_events( } template -auto StreamReader::generic_find_nearest_log_event_idx_by_timestamp( +auto StreamReader::generic_find_nearest_log_event_by_timestamp( LogEvents const& log_events, clp::ir::epoch_time_ms_t target_ts ) -> NullableLogEventIdx { diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp index b7997d19..47a47732 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.cpp @@ -148,10 +148,10 @@ auto StructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, bo ); } -auto StructuredIrStreamReader::find_nearest_log_event_idx_by_timestamp( +auto StructuredIrStreamReader::find_nearest_log_event_by_timestamp( clp::ir::epoch_time_ms_t const target_ts ) -> NullableLogEventIdx { - return generic_find_nearest_log_event_idx_by_timestamp(*m_deserialized_log_events, target_ts); + return generic_find_nearest_log_event_by_timestamp(*m_deserialized_log_events, target_ts); } StructuredIrStreamReader::StructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp index cf1c0179..0d48aee0 100644 --- a/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/StructuredIrStreamReader.hpp @@ -75,7 +75,7 @@ class StructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto find_nearest_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t target_ts + [[nodiscard]] auto find_nearest_log_event_by_timestamp(clp::ir::epoch_time_ms_t target_ts ) -> NullableLogEventIdx override; private: diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp index 0f288d5b..7b4c1cf3 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.cpp @@ -158,10 +158,10 @@ auto UnstructuredIrStreamReader::decode_range(size_t begin_idx, size_t end_idx, ); } -auto UnstructuredIrStreamReader::find_nearest_log_event_idx_by_timestamp( +auto UnstructuredIrStreamReader::find_nearest_log_event_by_timestamp( clp::ir::epoch_time_ms_t const target_ts ) -> NullableLogEventIdx { - return generic_find_nearest_log_event_idx_by_timestamp(m_encoded_log_events, target_ts); + return generic_find_nearest_log_event_by_timestamp(m_encoded_log_events, target_ts); } UnstructuredIrStreamReader::UnstructuredIrStreamReader( diff --git a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp index a22d168c..f9ece434 100644 --- a/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp +++ b/src/clp_ffi_js/ir/UnstructuredIrStreamReader.hpp @@ -71,7 +71,7 @@ class UnstructuredIrStreamReader : public StreamReader { [[nodiscard]] auto decode_range(size_t begin_idx, size_t end_idx, bool use_filter) const -> DecodedResultsTsType override; - [[nodiscard]] auto find_nearest_log_event_idx_by_timestamp(clp::ir::epoch_time_ms_t target_ts + [[nodiscard]] auto find_nearest_log_event_by_timestamp(clp::ir::epoch_time_ms_t target_ts ) -> NullableLogEventIdx override; private: From 47881579a7435826a552cf95f2992c6f918d73a7 Mon Sep 17 00:00:00 2001 From: Henry <50559854+Henry8192@users.noreply.github.com> Date: Wed, 5 Feb 2025 00:12:47 +0800 Subject: [PATCH 29/29] amend find_nearest_log_event_by_timestamp's comments, warning this function ONLY works with timestamps in chronological order. --- src/clp_ffi_js/ir/StreamReader.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clp_ffi_js/ir/StreamReader.hpp b/src/clp_ffi_js/ir/StreamReader.hpp index 50fbc277..25ddcc57 100644 --- a/src/clp_ffi_js/ir/StreamReader.hpp +++ b/src/clp_ffi_js/ir/StreamReader.hpp @@ -129,7 +129,7 @@ class StreamReader { /** * Finds the log event, L, where if we assume: * - * - the collection of log events is sorted in ascending timestamp order; + * - the collection of log events is sorted in chronological order, or the search won't work; * - and we insert a marker log event, M, with timestamp `target_ts` into the collection (if log * events with timestamp `target_ts` already exist in the collection, M should be inserted * after them).