diff --git a/userspace/libsinsp/test/sinsp_utils.ut.cpp b/userspace/libsinsp/test/sinsp_utils.ut.cpp index 8779e0b0fe..6c685d85b0 100644 --- a/userspace/libsinsp/test/sinsp_utils.ut.cpp +++ b/userspace/libsinsp/test/sinsp_utils.ut.cpp @@ -25,12 +25,6 @@ TEST(sinsp_utils_test, concatenate_paths) // Some tests were motivated by this resource: // https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap04.html#tag_04_11 - // PLEASE NOTE: - // * current impl does not support unicode. - // * current impl does not sanitize path1 - // * current impl expects path1 to end with '/' - // * current impl skips path1 altogether if path2 is absolute - std::string path1, path2, res; res = sinsp_utils::concatenate_paths("", ""); @@ -39,57 +33,12 @@ TEST(sinsp_utils_test, concatenate_paths) path1 = ""; path2 = "../"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("", res); - - path1 = ""; - path2 = ".."; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("", res); - - path1 = "/"; - path2 = "../"; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("/", res); - - path1 = "a"; - path2 = "../"; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("a..", res); // since the helper does not add any "/" between path1 and path2, we end up with this. - - path1 = "a/"; - path2 = "../"; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("", res); - - path1 = ""; - path2 = "/foo"; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("/foo", res); - - path1 = "foo/"; - path2 = "..//a"; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("a", res); // path2 has been sanitized, plus we moved up a folder because of ".." - - path1 = "/foo/"; - path2 = "..//a"; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("/a", res); // path2 has been sanitized, plus we moved up a folder because of ".." - - path1 = "heolo"; - path2 = "w////////////..//////.////////r.|"; - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("r.|", res); // since the helper does not add any "/" between path1 and path2, we end up with this. - - path1 = "heolo"; - path2 = "w/////////////..//"; // heolow/////////////..// > heolow/..// -> / - res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("", res); // since the helper does not add any "/" between path1 and path2, we end up with this, ie a folder up from "heolow/" + EXPECT_EQ("..", res); path1 = ""; path2 = "./"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("", res); + EXPECT_EQ(".", res); path1 = ""; path2 = "dir/term"; @@ -144,28 +93,27 @@ TEST(sinsp_utils_test, concatenate_paths) path1 = "./app"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("./appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields + EXPECT_EQ("app/custom/term", res); path1 = "/app"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("/appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields + EXPECT_EQ("/app/custom/term", res); path1 = "app"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields + EXPECT_EQ("app/custom/term", res); - path1 = "app/"; + path1 = "app//"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); EXPECT_EQ("app/custom/term", res); - // We don't support sanitizing path1 path1 = "app/////"; path2 = "custom////term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("app/////custom/term", res); + EXPECT_EQ("app/custom/term", res); path1 = "/"; path2 = "/app/custom/dir/././././../../term/"; @@ -177,7 +125,6 @@ TEST(sinsp_utils_test, concatenate_paths) res = sinsp_utils::concatenate_paths(path1, path2); EXPECT_EQ("/app", res); - /* No unicode support path1 = "/root/"; path2 = "../😉"; res = sinsp_utils::concatenate_paths(path1, path2); @@ -196,7 +143,7 @@ TEST(sinsp_utils_test, concatenate_paths) path1 = "/root"; path2 = "c:/hello/world/"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("/root/c:/hello/world", res); */ + EXPECT_EQ("/root/c:/hello/world", res); } TEST(sinsp_utils_test, sinsp_split) diff --git a/userspace/libsinsp/utils.cpp b/userspace/libsinsp/utils.cpp index 77a59869b9..0bd0551452 100644 --- a/userspace/libsinsp/utils.cpp +++ b/userspace/libsinsp/utils.cpp @@ -53,6 +53,7 @@ limitations under the License. #include #include #include +#include #ifndef PATH_MAX #define PATH_MAX 4096 @@ -594,187 +595,36 @@ bool sinsp_utils::sockinfo_to_str(sinsp_sockinfo* sinfo, scap_fd_type stype, cha return true; } -// -// Helper function to move a directory up in a path string -// -static inline void rewind_to_parent_path(const char* targetbase, char** tc, const char** pc, uint32_t delta) +std::string sinsp_utils::concatenate_paths(std::string_view path1, std::string_view path2, size_t max_len) { - if(*tc <= targetbase + 1) - { - (*pc) += delta; - return; - } - - (*tc)--; - - while((*tc) >= targetbase + 1 && *((*tc) - 1) != '/') - { - (*tc)--; - } + auto p1 = std::filesystem::path(path1, std::filesystem::path::format::generic_format); + auto p2 = std::filesystem::path(path2, std::filesystem::path::format::generic_format); - (*pc) += delta; -} - -// -// Args: -// - target: the string where we are supposed to start copying -// - targetbase: the base of the path, i.e. the furthest we can go back when -// following parent directories -// - path: the path to copy -// -static inline void copy_and_sanitize_path(char* target, char* targetbase, const char *path, char separator) -{ - char* tc = target; - const char* pc = path; - g_invalidchar ic; - const bool empty_base = target == targetbase; - - while(true) - { - if(*pc == 0) - { - *tc = 0; - - // - // If the path ends with a separator, remove it, as the OS does. - // Properly manage case where path is just "/". - // - if((tc > (targetbase + 1)) && (*(tc - 1) == separator)) - { - *(tc - 1) = 0; - } - - return; - } - - if(ic(*pc)) - { - // - // Invalid char, substitute with a '.' - // - *tc = '.'; - tc++; - pc++; - } - else - { - // - // If path begins with '.' or '.' is the first char after a '/' - // - if(*pc == '.' && (tc == targetbase || *(tc - 1) == separator)) - { - // - // '../', rewind to the previous separator - // - if(*(pc + 1) == '.' && *(pc + 2) == separator) - { - rewind_to_parent_path(targetbase, &tc, &pc, 3); - } - // - // '..', with no separator. - // This is valid if we are at the end of the string, and in that case we rewind. - // - else if(*(pc + 1) == '.' && *(pc + 2) == 0) - { - rewind_to_parent_path(targetbase, &tc, &pc, 2); - } - // - // './', just skip it - // - else if(*(pc + 1) == separator) - { - pc += 2; - } - // - // '.', with no separator. - // This is valid if we are at the end of the string, and in that case we rewind. - // - else if(*(pc + 1) == 0) - { - pc++; - } - // - // Otherwise, we leave the string intact. - // - else - { - *tc = *pc; - pc++; - tc++; - } - } - else if(*pc == separator) - { - // - // separator: - // * if the last char is already a separator, skip it - // * if we are back at targetbase but targetbase was not empty before, it means we - // fully rewinded back to targetbase and the string is now empty. Skip separator. - // Example: "/foo/../a" -> "/a" BUT "foo/../a" -> "a" - // -> Otherwise: "foo/../a" -> "/a" - // - if((tc > targetbase && *(tc - 1) == separator) || (tc == targetbase && !empty_base)) - { - pc++; - } - else - { - *tc = *pc; - tc++; - pc++; - } - } - else - { - // - // Normal char, copy it - // - *tc = *pc; - tc++; - pc++; - } - } - } -} +#ifdef _WIN32 + // This is an ugly workaround to make sure we will not try to interpret root names (e.g. "c:/", "//?/") on Windows since this function only deals with unix-like paths + p1 = workaround_win_root_name(p1); + p2 = workaround_win_root_name(p2); +#endif // _WIN32 + // note: if p2 happens to be an absolute path, p1 / p2 == p2 + auto path_concat = (p1 / p2).lexically_normal(); + std::string result = path_concat.generic_string(); -/* - * Return false if path2 is an absolute path. - * path1 MUST be '/' terminated. - * path1 is not sanitized. - * If path2 is absolute, we only account for it. - */ -static inline bool concatenate_paths_(char* target, uint32_t targetlen, const char* path1, uint32_t len1, - const char* path2, uint32_t len2) -{ - if(targetlen < (len1 + len2 + 1)) + // + // If the path ends with a separator, remove it, as the OS does. + // + if(result.length() > 1 && result.back() == '/') { - strlcpy(target, "/PATH_TOO_LONG", targetlen); - return false; + result.pop_back(); } - if(len2 != 0 && path2[0] != '/') + if(result.length() > max_len) { - memcpy(target, path1, len1); - copy_and_sanitize_path(target + len1, target, path2, '/'); - return true; - } - else - { - target[0] = 0; - copy_and_sanitize_path(target, target, path2, '/'); - return false; + return "/PATH_TOO_LONG"; } -} -std::string sinsp_utils::concatenate_paths(std::string_view path1, std::string_view path2) -{ - char fullpath[SCAP_MAX_PATH_SIZE]; - concatenate_paths_(fullpath, SCAP_MAX_PATH_SIZE, path1.data(), (uint32_t)path1.length(), path2.data(), - path2.size()); - return std::string(fullpath); + return result; } - bool sinsp_utils::is_ipv4_mapped_ipv6(uint8_t* paddr) { if(paddr[0] == 0 && paddr[1] == 0 && paddr[2] == 0 && paddr[3] == 0 && paddr[4] == 0 && diff --git a/userspace/libsinsp/utils.h b/userspace/libsinsp/utils.h index 4eeaf3cb20..d78b602fe5 100644 --- a/userspace/libsinsp/utils.h +++ b/userspace/libsinsp/utils.h @@ -116,7 +116,7 @@ class sinsp_utils // If path2 is absolute, the result will be equivalent to path2. // If the result would be too long, the output will contain the string "/PATH_TOO_LONG" instead. // - static std::string concatenate_paths(std::string_view path1, std::string_view path2); + static std::string concatenate_paths(std::string_view path1, std::string_view path2, size_t max_len=SCAP_MAX_PATH_SIZE-1); // // Determines if an IPv6 address is IPv4-mapped