Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into escape-char-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
gibber9809 committed Jan 8, 2025
2 parents 84a1403 + 5d3b671 commit 0a00281
Show file tree
Hide file tree
Showing 44 changed files with 1,588 additions and 453 deletions.
2 changes: 1 addition & 1 deletion components/core/.clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ IncludeCategories:
# NOTE: A header is grouped by first matching regex
# Library headers. Update when adding new libraries.
# NOTE: clang-format retains leading white-space on a line in violation of the YAML spec.
- Regex: "<(absl|antlr4|archive|boost|bsoncxx|catch2|curl|date|fmt|json|log_surgeon|mongocxx\
- Regex: "<(absl|antlr4|archive|boost|bsoncxx|catch2|curl|date|fmt|json|log_surgeon|lzma|mongocxx\
|msgpack|mysql|openssl|outcome|regex_utils|simdjson|spdlog|sqlite3|string_utils|yaml-cpp|zstd)"
Priority: 3
# C system headers
Expand Down
30 changes: 27 additions & 3 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Set general compressor
set(GENERAL_COMPRESSOR "zstd" CACHE STRING "The general-purpose compressor used as the 2nd-stage compressor")
set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS passthrough zstd)
if ("${GENERAL_COMPRESSOR}" STREQUAL "passthrough")
set_property(CACHE GENERAL_COMPRESSOR PROPERTY STRINGS lzma passthrough zstd)
if ("${GENERAL_COMPRESSOR}" STREQUAL "lzma")
add_definitions(-DUSE_LZMA_COMPRESSION=1)
message(STATUS "Using Lempel–Ziv–Markov chain Algorithm compression")
elseif ("${GENERAL_COMPRESSOR}" STREQUAL "passthrough")
add_definitions(-DUSE_PASSTHROUGH_COMPRESSION=1)
message(STATUS "Using passthrough compression")
elseif ("${GENERAL_COMPRESSOR}" STREQUAL "zstd")
Expand Down Expand Up @@ -224,6 +227,21 @@ else()
message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for ZStd")
endif()

# Find and setup LZMA Library
# TODO: Add a script in ./cmake/Modules to properly import LZMA in find_package()'s module mode
if(CLP_USE_STATIC_LIBS)
set(LIBLZMA_USE_STATIC_LIBS ON)
endif()
find_package(LibLZMA REQUIRED)
if(LIBLZMA_FOUND)
message(STATUS "Found Lzma ${LIBLZMA_VERSION_STRING}")
message(STATUS "Lzma library location: ${LIBLZMA_LIBRARIES}")
message(STATUS "Lzma Include Dir: ${LIBLZMA_INCLUDE_DIRS}")
else()
message(FATAL_ERROR "Could not find ${CLP_LIBS_STRING} libraries for Lzma")
endif()
include_directories(${LIBLZMA_INCLUDE_DIRS})

# sqlite dependencies
set(sqlite_DYNAMIC_LIBS "dl;m;pthread")
include(cmake/Modules/FindLibraryDependencies.cmake)
Expand Down Expand Up @@ -257,6 +275,8 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/FileReader.hpp
src/clp_s/FileWriter.cpp
src/clp_s/FileWriter.hpp
src/clp_s/InputConfig.cpp
src/clp_s/InputConfig.hpp
src/clp_s/JsonConstructor.cpp
src/clp_s/JsonConstructor.hpp
src/clp_s/JsonFileIterator.cpp
Expand Down Expand Up @@ -516,6 +536,9 @@ set(SOURCE_FILES_unitTest
src/clp/streaming_compression/Compressor.hpp
src/clp/streaming_compression/Constants.hpp
src/clp/streaming_compression/Decompressor.hpp
src/clp/streaming_compression/lzma/Compressor.cpp
src/clp/streaming_compression/lzma/Compressor.hpp
src/clp/streaming_compression/lzma/Constants.hpp
src/clp/streaming_compression/passthrough/Compressor.cpp
src/clp/streaming_compression/passthrough/Compressor.hpp
src/clp/streaming_compression/passthrough/Decompressor.cpp
Expand Down Expand Up @@ -592,7 +615,7 @@ target_include_directories(unitTest
target_link_libraries(unitTest
PRIVATE
absl::flat_hash_map
Boost::filesystem Boost::iostreams Boost::program_options Boost::regex
Boost::filesystem Boost::iostreams Boost::program_options Boost::regex Boost::url
${CURL_LIBRARIES}
fmt::fmt
kql
Expand All @@ -608,6 +631,7 @@ target_link_libraries(unitTest
clp::regex_utils
clp::string_utils
yaml-cpp::yaml-cpp
${LIBLZMA_LIBRARIES}
ZStd::ZStd
)
target_compile_features(unitTest
Expand Down
51 changes: 0 additions & 51 deletions components/core/src/clp/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,57 +88,6 @@ ErrorCode create_directory_structure(string const& path, mode_t mode) {
return ErrorCode_Success;
}

string get_parent_directory_path(string const& path) {
string dirname = get_unambiguous_path(path);

size_t last_slash_pos = dirname.find_last_of('/');
if (0 == last_slash_pos) {
dirname = "/";
} else if (string::npos == last_slash_pos) {
dirname = ".";
} else {
dirname.resize(last_slash_pos);
}

return dirname;
}

string get_unambiguous_path(string const& path) {
string unambiguous_path;
if (path.empty()) {
return unambiguous_path;
}

// Break path into components
vector<string> path_components;
boost::split(path_components, path, boost::is_any_of("/"), boost::token_compress_on);

// Remove ambiguous components
list<string> unambiguous_components;
size_t num_components_to_ignore = 0;
for (size_t i = path_components.size(); i-- > 0;) {
if (".." == path_components[i]) {
++num_components_to_ignore;
} else if ("." == path_components[i] || path_components[i].empty()) {
// Do nothing
} else if (num_components_to_ignore > 0) {
--num_components_to_ignore;
} else {
unambiguous_components.emplace_front(path_components[i]);
}
}

// Assemble unambiguous path from leading slash (if any) and the unambiguous components
if ('/' == path[0]) {
unambiguous_path += '/';
}
if (!unambiguous_components.empty()) {
unambiguous_path += boost::join(unambiguous_components, "/");
}

return unambiguous_path;
}

ErrorCode read_list_of_paths(string const& list_path, vector<string>& paths) {
unique_ptr<FileReader> file_reader;
try {
Expand Down
22 changes: 0 additions & 22 deletions components/core/src/clp/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,6 @@ ErrorCode create_directory(std::string const& path, mode_t mode, bool exist_ok);
*/
ErrorCode create_directory_structure(std::string const& path, mode_t mode);

/**
* Gets the parent directory path for a given path
* Corner cases:
* - get_dirname("abc") = "."
* - get_dirname(".") = "."
* - get_dirname("..") = "."
* - get_dirname("/") = "/"
* - get_dirname("/.") = "/"
* - get_dirname("/..") = "/"
* - get_dirname("/abc") = "/"
* @param path
* @return Parent directory path
*/
std::string get_parent_directory_path(std::string const& path);

/**
* Removes ".", "..", and consecutive "/" from a given path and returns the result
* @param path The given path
* @return The unambiguous path
*/
std::string get_unambiguous_path(std::string const& path);

/**
* Read a list of paths from a file
* @param list_path
Expand Down
Loading

0 comments on commit 0a00281

Please sign in to comment.