Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -402,12 +402,6 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/SchemaWriter.hpp
src/clp_s/search/AddTimestampConditions.cpp
src/clp_s/search/AddTimestampConditions.hpp
src/clp_s/search/clp_search/EncodedVariableInterpreter.cpp
src/clp_s/search/clp_search/EncodedVariableInterpreter.hpp
src/clp_s/search/clp_search/Grep.cpp
src/clp_s/search/clp_search/Grep.hpp
src/clp_s/search/clp_search/Query.cpp
src/clp_s/search/clp_search/Query.hpp
src/clp_s/search/EvaluateRangeIndexFilters.cpp
src/clp_s/search/EvaluateRangeIndexFilters.hpp
src/clp_s/search/EvaluateTimestampIndex.cpp
Expand All @@ -429,10 +423,6 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/TimestampEntry.hpp
src/clp_s/Utils.cpp
src/clp_s/Utils.hpp
src/clp_s/VariableDecoder.cpp
src/clp_s/VariableDecoder.hpp
src/clp_s/VariableEncoder.cpp
src/clp_s/VariableEncoder.hpp
src/clp_s/ZstdCompressor.cpp
src/clp_s/ZstdCompressor.hpp
src/clp_s/ZstdDecompressor.cpp
Expand Down
2 changes: 2 additions & 0 deletions components/core/cmake/Options/options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ function(set_clp_s_clp_dependencies_dependencies)
CLP_NEED_BOOST
CLP_NEED_CURL
CLP_NEED_FMT
CLP_NEED_LOG_SURGEON
CLP_NEED_MSGPACKCXX
CLP_NEED_NLOHMANN_JSON
CLP_NEED_OPENSSL
Expand Down Expand Up @@ -311,6 +312,7 @@ endfunction()
function(set_clp_s_search_dependencies)
set_clp_need_flags(
CLP_NEED_ABSL
CLP_NEED_LOG_SURGEON
CLP_NEED_SIMDJSON
CLP_NEED_SPDLOG
)
Expand Down
2 changes: 0 additions & 2 deletions components/core/src/clp/StringReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#include <cassert>
#include <cerrno>

#include <boost/filesystem.hpp>

using std::string;

namespace clp {
Expand Down
6 changes: 0 additions & 6 deletions components/core/src/clp/ir/parsing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ bool is_delim(signed char c) {
|| ('A' <= c && c <= 'Z') || '\\' == c || '_' == c || ('a' <= c && c <= 'z'));
}

bool is_variable_placeholder(char c) {
return (enum_to_underlying_type(VariablePlaceholder::Integer) == c)
|| (enum_to_underlying_type(VariablePlaceholder::Dictionary) == c)
|| (enum_to_underlying_type(VariablePlaceholder::Float) == c);
}

bool is_var(std::string_view value) {
size_t begin_pos = 0;
size_t end_pos = 0;
Expand Down
11 changes: 10 additions & 1 deletion components/core/src/clp/ir/parsing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include <string>
#include <string_view>

#include "../type_utils.hpp"
#include "types.hpp"

namespace clp::ir {
/**
* Checks if the given character is a delimiter
Expand All @@ -23,10 +26,16 @@ namespace clp::ir {
bool is_delim(signed char c);

/**
* NOTE: This method is marked inline for a ~50% performance improvement to
* `append_constant_to_logtype`.
* @param c
* @return Whether the character is a variable placeholder
*/
bool is_variable_placeholder(char c);
inline bool is_variable_placeholder(char c) {
return (enum_to_underlying_type(VariablePlaceholder::Integer) == c)
|| (enum_to_underlying_type(VariablePlaceholder::Dictionary) == c)
|| (enum_to_underlying_type(VariablePlaceholder::Float) == c);
}

/**
* NOTE: This method is marked inline for a 1-2% performance improvement
Expand Down
28 changes: 23 additions & 5 deletions components/core/src/clp_s/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,22 @@ set(
../clp/cli_utils.cpp
../clp/cli_utils.hpp
../clp/Defs.h
../clp/EncodedVariableInterpreter.cpp
../clp/EncodedVariableInterpreter.hpp
../clp/ErrorCode.hpp
../clp/ffi/encoding_methods.cpp
../clp/ffi/encoding_methods.hpp
../clp/ffi/encoding_methods.inc
../clp/ffi/ir_stream/byteswap.hpp
../clp/ffi/ir_stream/decoding_methods.cpp
../clp/ffi/ir_stream/decoding_methods.hpp
../clp/ffi/ir_stream/decoding_methods.inc
../clp/ffi/ir_stream/Deserializer.hpp
../clp/ffi/ir_stream/encoding_methods.cpp
../clp/ffi/ir_stream/encoding_methods.hpp
../clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp
../clp/ffi/ir_stream/ir_unit_deserialization_methods.hpp
../clp/ffi/ir_stream/protocol_constants.hpp
../clp/ffi/ir_stream/Serializer.cpp
../clp/ffi/ir_stream/Serializer.hpp
../clp/ffi/ir_stream/search/AstEvaluationResult.hpp
Expand All @@ -48,17 +56,28 @@ set(
../clp/FileDescriptor.hpp
../clp/FileReader.cpp
../clp/FileReader.hpp
../clp/GrepCore.cpp
../clp/GrepCore.hpp
../clp/hash_utils.cpp
../clp/hash_utils.hpp
../clp/ir/constants.hpp
../clp/ir/EncodedTextAst.cpp
../clp/ir/EncodedTextAst.hpp
../clp/ir/LogEvent.hpp
../clp/ir/parsing.cpp
../clp/ir/parsing.hpp
../clp/ir/parsing.inc
../clp/ir/types.hpp
../clp/LogSurgeonReader.cpp
../clp/LogSurgeonReader.hpp
../clp/NetworkReader.cpp
../clp/NetworkReader.hpp
../clp/networking/socket_utils.cpp
../clp/networking/socket_utils.hpp
../clp/Query.cpp
../clp/Query.hpp
../clp/QueryToken.cpp
../clp/QueryToken.hpp
../clp/ReaderInterface.cpp
../clp/ReaderInterface.hpp
../clp/ReadOnlyMemoryMappedFile.cpp
Expand All @@ -69,10 +88,12 @@ set(
../clp/streaming_archive/Constants.hpp
../clp/streaming_compression/zstd/Decompressor.cpp
../clp/streaming_compression/zstd/Decompressor.hpp
../clp/StringReader.cpp
../clp/StringReader.hpp
../clp/Thread.cpp
../clp/Thread.hpp
../clp/TraceableException.hpp
../clp/time_types.hpp
../clp/TraceableException.hpp
../clp/type_utils.hpp
../clp/utf8_utils.cpp
../clp/utf8_utils.hpp
Expand All @@ -92,6 +113,7 @@ if(CLP_BUILD_CLP_S_CLP_DEPENDENCIES)
clp_s_clp_dependencies
PUBLIC
clp::string_utils
log_surgeon::log_surgeon
ystdlib::containers
PRIVATE
Boost::regex
Expand Down Expand Up @@ -223,8 +245,6 @@ set(
TraceableException.hpp
Utils.cpp
Utils.hpp
VariableEncoder.cpp
VariableEncoder.hpp
)

if(CLP_BUILD_CLP_S_ARCHIVEWRITER)
Expand Down Expand Up @@ -286,8 +306,6 @@ set(
TraceableException.hpp
Utils.cpp
Utils.hpp
VariableDecoder.cpp
VariableDecoder.hpp
)

if(CLP_BUILD_CLP_S_ARCHIVEREADER)
Expand Down
13 changes: 9 additions & 4 deletions components/core/src/clp_s/ColumnReader.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#include "ColumnReader.hpp"

#include "../clp/EncodedVariableInterpreter.hpp"
#include "BufferViewReader.hpp"
#include "ColumnWriter.hpp"
#include "Utils.hpp"
#include "VariableDecoder.hpp"

namespace clp_s {
void Int64ColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
Expand Down Expand Up @@ -113,9 +113,14 @@ ClpStringColumnReader::extract_string_value_into_buffer(uint64_t cur_message, st
}

int64_t encoded_vars_offset = ClpStringColumnWriter::get_encoded_offset(value);
auto encoded_vars = m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_vars());
auto encoded_vars = m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_variables());

VariableDecoder::decode_variables_into_message(entry, *m_var_dict, encoded_vars, buffer);
clp::EncodedVariableInterpreter::decode_variables_into_message(
entry,
*m_var_dict,
encoded_vars,
buffer
);
}

void ClpStringColumnReader::extract_escaped_string_value_into_buffer(
Expand Down Expand Up @@ -149,7 +154,7 @@ UnalignedMemSpan<int64_t> ClpStringColumnReader::get_encoded_vars(uint64_t cur_m

int64_t encoded_vars_offset = ClpStringColumnWriter::get_encoded_offset(value);

return m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_vars());
return m_encoded_vars.sub_span(encoded_vars_offset, entry.get_num_variables());
}

void VariableStringColumnReader::load(BufferViewReader& reader, uint64_t num_messages) {
Expand Down
34 changes: 21 additions & 13 deletions components/core/src/clp_s/ColumnWriter.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
#include "ColumnWriter.hpp"

#include <cstdint>
#include <variant>

#include "../clp/Defs.h"
#include "../clp/EncodedVariableInterpreter.hpp"
#include "ParsedMessage.hpp"
#include "ZstdCompressor.hpp"

namespace clp_s {
size_t Int64ColumnWriter::add_value(ParsedMessage::variable_t& value) {
m_values.push_back(std::get<int64_t>(value));
Expand Down Expand Up @@ -49,15 +57,16 @@ void BooleanColumnWriter::store(ZstdCompressor& compressor) {
}

size_t ClpStringColumnWriter::add_value(ParsedMessage::variable_t& value) {
std::string string_var = std::get<std::string>(value);
uint64_t id;
uint64_t offset = m_encoded_vars.size();
VariableEncoder::encode_and_add_to_dictionary(
string_var,
uint64_t offset{m_encoded_vars.size()};
std::vector<clp::variable_dictionary_id_t> temp_var_dict_ids;
clp::EncodedVariableInterpreter::encode_and_add_to_dictionary(
std::get<std::string>(value),
m_logtype_entry,
*m_var_dict,
m_encoded_vars
m_encoded_vars,
temp_var_dict_ids
);
clp::logtype_dictionary_id_t id{};
m_log_dict->add_entry(m_logtype_entry, id);
auto encoded_id = encode_log_dict_id(id, offset);
m_logtypes.push_back(encoded_id);
Expand All @@ -74,16 +83,15 @@ void ClpStringColumnWriter::store(ZstdCompressor& compressor) {
}

size_t VariableStringColumnWriter::add_value(ParsedMessage::variable_t& value) {
std::string string_var = std::get<std::string>(value);
uint64_t id;
m_var_dict->add_entry(string_var, id);
m_variables.push_back(id);
return sizeof(int64_t);
clp::variable_dictionary_id_t id{};
m_var_dict->add_entry(std::get<std::string>(value), id);
m_var_dict_ids.push_back(id);
return sizeof(clp::variable_dictionary_id_t);
}

void VariableStringColumnWriter::store(ZstdCompressor& compressor) {
size_t size = m_variables.size() * sizeof(int64_t);
compressor.write(reinterpret_cast<char const*>(m_variables.data()), size);
auto size{m_var_dict_ids.size() * sizeof(clp::variable_dictionary_id_t)};
compressor.write(reinterpret_cast<char const*>(m_var_dict_ids.data()), size);
}

size_t DateStringColumnWriter::add_value(ParsedMessage::variable_t& value) {
Expand Down
28 changes: 16 additions & 12 deletions components/core/src/clp_s/ColumnWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
#include <utility>
#include <variant>

#include "../clp/Defs.h"
#include "DictionaryWriter.hpp"
#include "FileWriter.hpp"
#include "ParsedMessage.hpp"
#include "TimestampDictionaryWriter.hpp"
#include "VariableEncoder.hpp"
#include "ZstdCompressor.hpp"

namespace clp_s {
Expand Down Expand Up @@ -117,6 +117,9 @@ class BooleanColumnWriter : public BaseColumnWriter {

class ClpStringColumnWriter : public BaseColumnWriter {
public:
// Types
using encoded_log_dict_id_t = uint64_t;

Comment on lines +120 to +122
Copy link
Contributor

@coderabbitai coderabbitai bot Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Consider hoisting the encoded_log_dict_id_t alias to a common header

encoded_log_dict_id_t is introduced here as a class-scoped alias, but the same type is likely needed by multiple compilation units (e.g., readers and tests). Defining it in a shared header (perhaps Defs.h, alongside the other strong typedefs) would avoid repetitive aliases and ensure a single source of truth.

🤖 Prompt for AI Agents
In components/core/src/clp_s/ColumnWriter.hpp around lines 120 to 122, the type
alias encoded_log_dict_id_t is currently defined locally but should be moved to
a common shared header file such as Defs.h. This change will allow multiple
compilation units like readers and tests to use the same alias consistently. To
fix, remove the alias from ColumnWriter.hpp and add it to the shared header
alongside other typedefs, ensuring a single source of truth for this type.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This header is currently already included in all places this type needs to be used.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gibber9809, thank you for the clarification! You're right - if ColumnWriter.hpp is already included in all the places where encoded_log_dict_id_t is needed, then the current placement as a class-scoped alias is appropriate and avoids unnecessary coupling. I appreciate you providing this context about the project's include structure.


✏️ Learnings added
Learnt from: gibber9809
PR: y-scope/clp#1163
File: components/core/src/clp_s/ColumnWriter.hpp:120-122
Timestamp: 2025-08-06T23:57:49.720Z
Learning: In the CLP codebase, the `encoded_log_dict_id_t` type alias in `components/core/src/clp_s/ColumnWriter.hpp` is appropriately placed as a class-scoped alias because the ColumnWriter.hpp header is already included in all places where this type is used, eliminating the need to move it to a common header.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: LinZhihao-723
PR: y-scope/clp#557
File: components/core/src/clp/ffi/ir_stream/utils.hpp:0-0
Timestamp: 2024-10-18T02:31:18.595Z
Learning: In `components/core/src/clp/ffi/ir_stream/utils.hpp`, the function `size_dependent_encode_and_serialize_schema_tree_node_id` assumes that the caller checks that `node_id` fits within the range of `encoded_node_id_t` before casting.

Learnt from: LinZhihao-723
PR: y-scope/clp#557
File: components/core/tests/test-ir_encoding_methods.cpp:1216-1286
Timestamp: 2024-10-13T09:27:43.408Z
Learning: In the unit test case `ffi_ir_stream_serialize_schema_tree_node_id` in `test-ir_encoding_methods.cpp`, suppressing the `readability-function-cognitive-complexity` warning is acceptable due to the expansion of Catch2 macros in C++ tests, and such test cases may not have readability issues.

Learnt from: jackluo923
PR: y-scope/clp#1054
File: components/core/src/glt/SQLitePreparedStatement.hpp:4-4
Timestamp: 2025-07-01T14:49:07.333Z
Learning: In the CLP codebase, standard headers like `<cstdint>` and `<string>` in alphabetical order (as seen in components/core/src/glt/SQLitePreparedStatement.hpp) are correctly ordered and do not need adjustment.

Learnt from: LinZhihao-723
PR: y-scope/clp#486
File: components/core/tests/test-error_handling.cpp:44-91
Timestamp: 2024-11-26T19:16:19.933Z
Learning: In `components/core/tests/test-error_handling.cpp` (C++), when specializing template methods of `ErrorCategory` via type aliases (e.g., `AlwaysSuccessErrorCategory`), it's acceptable to define these specializations in the current namespace, even though the primary template is in the `clp::error_handling` namespace, due to the use of type aliases and template instantiation.

Learnt from: LinZhihao-723
PR: y-scope/clp#558
File: components/core/tests/test-ffi_KeyValuePairLogEvent.cpp:85-103
Timestamp: 2024-10-14T03:42:53.145Z
Learning: The function `assert_kv_pair_log_event_creation_failure` is correctly placed within the anonymous namespace in `test-ffi_KeyValuePairLogEvent.cpp`.

Learnt from: LinZhihao-723
PR: y-scope/clp#558
File: components/core/tests/test-ffi_KeyValuePairLogEvent.cpp:14-14
Timestamp: 2024-10-14T03:42:10.355Z
Learning: In the file `components/core/tests/test-ffi_KeyValuePairLogEvent.cpp`, including `<json/single_include/nlohmann/json.hpp>` is consistent with the project's coding standards.

Learnt from: haiqi96
PR: y-scope/clp#523
File: components/core/src/clp/BufferedFileReader.cpp:96-106
Timestamp: 2024-10-24T14:45:26.265Z
Learning: In `components/core/src/clp/BufferedFileReader.cpp`, refactoring the nested error handling conditions may not apply due to the specific logic in the original code.

Learnt from: AVMatthews
PR: y-scope/clp#543
File: components/core/src/clp_s/clp-s.cpp:196-265
Timestamp: 2024-10-07T20:10:08.254Z
Learning: In `clp-s.cpp`, the `run_serializer` function interleaves serialization and writing of IR files, making it difficult to restructure it into separate functions.

Learnt from: AVMatthews
PR: y-scope/clp#543
File: components/core/src/clp_s/clp-s.cpp:196-265
Timestamp: 2024-10-08T15:52:50.753Z
Learning: In `clp-s.cpp`, the `run_serializer` function interleaves serialization and writing of IR files, making it difficult to restructure it into separate functions.

Learnt from: gibber9809
PR: y-scope/clp#955
File: components/core/src/clp_s/search/sql/CMakeLists.txt:8-26
Timestamp: 2025-06-02T18:22:24.060Z
Learning: In the clp project, ANTLR code generation at build time is being removed by another PR. When reviewing CMake files, be aware that some temporary suboptimal configurations may exist to reduce merge conflicts between concurrent PRs, especially around ANTLR_TARGET calls.

Learnt from: gibber9809
PR: y-scope/clp#630
File: components/core/src/clp_s/JsonParser.cpp:702-703
Timestamp: 2024-12-10T16:03:08.691Z
Learning: In `components/core/src/clp_s/JsonParser.cpp`, within the `get_archive_node_id` function, validation and exception throwing for UTF-8 compliance of `curr_node.get_key_name()` are unnecessary and should be omitted.

Learnt from: gibber9809
PR: y-scope/clp#630
File: components/core/src/clp_s/JsonParser.cpp:702-703
Timestamp: 2024-12-10T16:03:13.322Z
Learning: In `components/core/src/clp_s/JsonParser.cpp`, validation and exception throwing are unnecessary in the `get_archive_node_id` method when processing nodes, and should not be added.

Learnt from: Bill-hbrhbr
PR: y-scope/clp#1122
File: components/core/src/clp/clp/CMakeLists.txt:175-195
Timestamp: 2025-07-23T09:54:45.185Z
Learning: In the CLP project, when reviewing CMakeLists.txt changes that introduce new compression library dependencies (BZip2, LibLZMA, LZ4, ZLIB), the team prefers to address conditional linking improvements in separate PRs rather than expanding the scope of focused migration PRs like the LibArchive task-based installation migration.

Learnt from: haiqi96
PR: y-scope/clp#651
File: components/clp-package-utils/clp_package_utils/scripts/compress.py:0-0
Timestamp: 2025-01-16T16:58:43.190Z
Learning: In the clp-package compression flow, path validation and error handling is performed at the scheduler level rather than in the compress.py script to maintain simplicity and avoid code duplication.

// Constructor
ClpStringColumnWriter(
int32_t id,
Expand All @@ -141,16 +144,16 @@ class ClpStringColumnWriter : public BaseColumnWriter {
* @param encoded_id
* @return the encoded log dict id
*/
static int64_t get_encoded_log_dict_id(uint64_t encoded_id) {
return (int64_t)encoded_id & cLogDictIdMask;
static clp::logtype_dictionary_id_t get_encoded_log_dict_id(encoded_log_dict_id_t encoded_id) {
return static_cast<clp::logtype_dictionary_id_t>(encoded_id & cLogDictIdMask);
}

/**
* @param encoded_id
* @return The encoded offset
*/
static int64_t get_encoded_offset(uint64_t encoded_id) {
return ((int64_t)encoded_id & cOffsetMask) >> cOffsetBitPosition;
static uint64_t get_encoded_offset(encoded_log_dict_id_t encoded_id) {
return (encoded_id & cOffsetMask) >> cOffsetBitPosition;
}

private:
Expand All @@ -160,20 +163,21 @@ class ClpStringColumnWriter : public BaseColumnWriter {
* @param offset
* @return The encoded log dict id
*/
static int64_t encode_log_dict_id(uint64_t id, uint64_t offset) {
return ((int64_t)id) | ((int64_t)offset) << cOffsetBitPosition;
static encoded_log_dict_id_t
encode_log_dict_id(clp::logtype_dictionary_id_t id, uint64_t offset) {
return static_cast<encoded_log_dict_id_t>(id) | (offset << cOffsetBitPosition);
}

static constexpr int cOffsetBitPosition = 24;
static constexpr int64_t cLogDictIdMask = ~(-1ULL << cOffsetBitPosition);
static constexpr int64_t cOffsetMask = ~cLogDictIdMask;
static constexpr uint64_t cLogDictIdMask = (1ULL << cOffsetBitPosition) - 1;
static constexpr uint64_t cOffsetMask = ~cLogDictIdMask;

std::shared_ptr<VariableDictionaryWriter> m_var_dict;
std::shared_ptr<LogTypeDictionaryWriter> m_log_dict;
LogTypeDictionaryEntry m_logtype_entry;

std::vector<int64_t> m_logtypes;
std::vector<int64_t> m_encoded_vars;
std::vector<encoded_log_dict_id_t> m_logtypes;
std::vector<clp::encoded_variable_t> m_encoded_vars;
};

class VariableStringColumnWriter : public BaseColumnWriter {
Expand All @@ -193,7 +197,7 @@ class VariableStringColumnWriter : public BaseColumnWriter {

private:
std::shared_ptr<VariableDictionaryWriter> m_var_dict;
std::vector<int64_t> m_variables;
std::vector<clp::variable_dictionary_id_t> m_var_dict_ids;
};

class DateStringColumnWriter : public BaseColumnWriter {
Expand Down
Loading
Loading