From cbde67c609f32cb2d3da3762f52b19721fac4c23 Mon Sep 17 00:00:00 2001 From: Yonggang Luo Date: Tue, 17 Dec 2024 03:06:53 +0800 Subject: [PATCH] Introduce cpj_path_join_and_normalize Make sure the test case more consistence with nodejs --- CMakeLists.txt | 3 +- include/cpj.h | 190 ++++- src/cpj.c | 1675 ++++++++++++++++++----------------------- test/join_test.c | 39 +- test/normalize_test.c | 21 +- test/relative_test.c | 16 +- 6 files changed, 954 insertions(+), 990 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4846658..5858d18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.9.2) +cmake_minimum_required(VERSION 3.10) # set project name project(cpj @@ -175,6 +175,7 @@ if(ENABLE_TESTS) create_test(DEFAULT normalize mixed) create_test(DEFAULT normalize overlap) create_test(DEFAULT normalize empty) + create_test(DEFAULT normalize zero_length) create_test(DEFAULT normalize only_separators) create_test(DEFAULT normalize back_after_root) create_test(DEFAULT normalize forward_slashes) diff --git a/include/cpj.h b/include/cpj.h index fb905d1..d45f63c 100644 --- a/include/cpj.h +++ b/include/cpj.h @@ -5,6 +5,7 @@ #include #include +#include #if defined(_WIN32) || defined(__CYGWIN__) #define CPJ_EXPORT __declspec(dllexport) @@ -34,17 +35,25 @@ extern "C" typedef char cpj_char_t; typedef size_t cpj_size_t; +#define CPJ_SIZE_MAX SIZE_MAX /** * Description of a JerryScript string for arguments passing */ typedef struct { - const cpj_char_t - *ptr; /**< pointer to the zero-terminated ASCII/UTF-8/CESU-8 string */ + /**< pointer to the zero-terminated ASCII/UTF-8/CESU-8 string */ + const cpj_char_t *ptr; cpj_size_t size; /**< size of the string, excluding '\0' terminator */ } cpj_string_t; +typedef struct +{ + cpj_size_t segment_count_base; + cpj_size_t segment_count_other; + cpj_size_t equal_segment; +} cpj_path_intersection_t; + /** * A segment represents a single component of a path. For instance, on linux a * path might look like this "/var/log/", which consists of two segments "var" @@ -56,7 +65,7 @@ struct cpj_segment const cpj_char_t *segments; const cpj_char_t *begin; const cpj_char_t *end; - size_t size; + cpj_size_t size; }; /** @@ -96,6 +105,69 @@ typedef enum #define CPJ_ZSTR_ARG(str) \ ((CPJ_ZSTR_LITERAL(str))), ((cpj_size_t)(sizeof(str) - 1)) +/** + * @brief Checks whether the submitted pointer points to a separator. + * + * This function simply checks whether the submitted pointer points to a + * separator, which has to be null-terminated (but not necessarily after the + * separator). The function will return true if it is a separator, or false + * otherwise. + * + * @param path_style Style depending on the operating system. So this should + * detect whether we should use windows or unix paths. + * @param ch A character + * @return Returns true if it is a separator, or false otherwise. + */ +CPJ_PUBLIC bool +cpj_path_is_separator(cpj_path_style_t style, const cpj_char_t ch); + +/** + * Join and normalize the `path_list_p`. + * + * @note + * - If is_resolve is true. The given sequence of paths is processed from right + * to left, with each subsequent path prepended until an absolute path is + * constructed. For instance, given the sequence of path segments: /foo, /bar, + * baz, calling path.resolve('/foo', '/bar', 'baz') would return /bar/baz + * because 'baz' is not an absolute path but + * '/bar' + '/' + 'baz' is. + * - If is_resolve is false. All paths are joined + * + * @return The size of the joined path, excluding the '\0' teminiator + */ +CPJ_PUBLIC +cpj_size_t cpj_path_join_and_normalize( + cpj_path_style_t path_style, /**< The style of the path list */ + bool is_resolve, /**< Join path in resolve mode */ + bool remove_trailing_slash, /**< If remove the trailing slash symbol */ + const cpj_string_t *path_list_p, /**< Path list */ + cpj_size_t path_list_count, /**< Path list count */ + cpj_char_t *buffer_p, /**< The buffer to storaged the joined path */ + cpj_size_t buffer_size /**< The size of the buffer_p */ +); + +CPJ_PUBLIC +cpj_path_intersection_t cpj_path_get_intersection_segments( + cpj_path_style_t path_style, const cpj_string_t *path_base, + const cpj_string_t *path_other, cpj_size_t path_count +); + +CPJ_PUBLIC +cpj_size_t cpj_path_relative_to( + cpj_path_style_t path_style, const cpj_string_t *cwd_directory, + const cpj_string_t *path_directory, const cpj_string_t *path, + cpj_char_t *buffer, cpj_size_t buffer_size +); + +cpj_size_t cpj_strlen(const cpj_char_t *str); + +static inline cpj_string_t +cpj_string_create(const cpj_char_t *ptr, cpj_size_t size) +{ + cpj_string_t str = {ptr, size}; + return str; +} + /** * @brief Generates an absolute path based on a base. * @@ -116,10 +188,25 @@ typedef enum * @param buffer_size The size of the result buffer. * @return Returns the total amount of characters of the new absolute path. */ -CPJ_PUBLIC cpj_size_t cpj_path_get_absolute( +static inline cpj_size_t cpj_path_get_absolute( cpj_path_style_t path_style, const cpj_char_t *base, const cpj_char_t *path, cpj_char_t *buffer, cpj_size_t buffer_size -); +) +{ + cpj_string_t paths[3]; + + // The basename should be an absolute path if the caller is using the API + // correctly. However, he might not and in that case we will append a fake + // root at the beginning. + paths[0] = cpj_string_create(CPJ_ZSTR_ARG("/")); + paths[1] = cpj_string_create(base, cpj_strlen(base)); + paths[2] = cpj_string_create(path, cpj_strlen(path)); + + // Finally join everything together and normalize it. + return cpj_path_join_and_normalize( + path_style, true, true, paths, 3, buffer, buffer_size + ); +} /** * @brief Generates a relative path based on a base. @@ -166,18 +253,31 @@ CPJ_PUBLIC cpj_size_t cpj_path_get_relative( * @param buffer_size The size of the result buffer. * @return Returns the total amount of characters of the full, combined path. */ -CPJ_PUBLIC cpj_size_t cpj_path_join( +static inline cpj_size_t cpj_path_join( cpj_path_style_t path_style, const cpj_char_t *path_a, const cpj_char_t *path_b, cpj_char_t *buffer, cpj_size_t buffer_size -); +) +{ + cpj_string_t paths[2]; + + // This is simple. We will just create an array with the two paths which we + // wish to join. + paths[0] = cpj_string_create(path_a, cpj_strlen(path_a)); + paths[1] = cpj_string_create(path_b, cpj_strlen(path_b)); + + // And then call the join and normalize function which will do the hard work + // for us. + return cpj_path_join_and_normalize( + path_style, false, true, paths, 2, buffer, buffer_size + ); +} /** - * @brief Joins multiple paths together. + * @brief Joins two paths together. * - * This function generates a new path by joining multiple paths together. It + * This function generates a new path by combining the two submitted paths. It * will remove double separators, and unlike cpj_path_get_absolute it permits - * the use of multiple relative paths to combine. The last path of the - * submitted string array must be set to NULL. The result will be written to a + * the use of two relative paths to combine. The result will be written to a * buffer, which might be truncated if the buffer is not large enough to hold * the full path. However, the truncated result will always be * null-terminated. The returned value is the amount of characters which the @@ -186,15 +286,26 @@ CPJ_PUBLIC cpj_size_t cpj_path_join( * * @param path_style Style depending on the operating system. So this should * detect whether we should use windows or unix paths. - * @param paths An array of paths which will be joined. + * @param path_a The first path which comes first. + * @param path_b The second path which comes after the first. * @param buffer The buffer where the result will be written to. * @param buffer_size The size of the result buffer. * @return Returns the total amount of characters of the full, combined path. */ -CPJ_PUBLIC cpj_size_t cpj_path_join_multiple( - cpj_path_style_t path_style, const cpj_char_t **paths, cpj_char_t *buffer, - cpj_size_t buffer_size -); +static inline cpj_size_t cpj_path_join_module( + cpj_path_style_t path_style, const cpj_char_t *path_a, + const cpj_char_t *path_b, cpj_char_t *buffer, cpj_size_t buffer_size +) +{ + cpj_string_t path_list[2] = { + {(const cpj_char_t *)path_a, (cpj_size_t)cpj_strlen(path_a)}, + {(const cpj_char_t *)path_b, (cpj_size_t)cpj_strlen(path_b)}, + }; + return cpj_path_join_and_normalize( + path_style, true, true, path_list, sizeof(path_list) / sizeof(path_list[0]), + buffer, buffer_size + ); +} /** * @brief Determines the root of a path. @@ -246,8 +357,13 @@ CPJ_PUBLIC cpj_size_t cpj_path_change_root( * @param path The path which will be checked. * @return Returns true if the path is absolute or false otherwise. */ -CPJ_PUBLIC bool -cpj_path_is_absolute(cpj_path_style_t path_style, const cpj_char_t *path); +static inline bool +cpj_path_is_absolute(cpj_path_style_t path_style, const cpj_char_t *path) +{ + cpj_size_t length = cpj_path_get_root(path_style, path); + return length > 0 ? cpj_path_is_separator(path_style, path[length - 1]) + : false; +} /** * @brief Determine whether the path is relative or not. @@ -260,8 +376,11 @@ cpj_path_is_absolute(cpj_path_style_t path_style, const cpj_char_t *path); * @param path The path which will be checked. * @return Returns true if the path is relative or false otherwise. */ -CPJ_PUBLIC bool -cpj_path_is_relative(cpj_path_style_t path_style, const cpj_char_t *path); +static inline bool +cpj_path_is_relative(cpj_path_style_t path_style, const cpj_char_t *path) +{ + return !cpj_path_is_absolute(path_style, path); +} /** * @brief Gets the basename of a file path. @@ -416,10 +535,21 @@ CPJ_PUBLIC cpj_size_t cpj_path_change_extension( * @return The size which the complete normalized path has if it was not * truncated. */ -CPJ_PUBLIC cpj_size_t cpj_path_normalize( +static inline cpj_size_t cpj_path_normalize( cpj_path_style_t path_style, const cpj_char_t *path, cpj_char_t *buffer, cpj_size_t buffer_size -); +) +{ + cpj_string_t paths[1]; + + // Now we initialize the paths which we will normalize. Since this function + // only supports submitting a single path, we will only add that one. + paths[0] = cpj_string_create(path, cpj_strlen(path)); + + return cpj_path_join_and_normalize( + path_style, false, true, paths, 1, buffer, buffer_size + ); +} /** * @brief Finds common portions in two paths. @@ -548,22 +678,6 @@ CPJ_PUBLIC cpj_size_t cpj_path_change_segment( const cpj_char_t *value, cpj_char_t *buffer, cpj_size_t buffer_size ); -/** - * @brief Checks whether the submitted pointer points to a separator. - * - * This function simply checks whether the submitted pointer points to a - * separator, which has to be null-terminated (but not necessarily after the - * separator). The function will return true if it is a separator, or false - * otherwise. - * - * @param path_style Style depending on the operating system. So this should - * detect whether we should use windows or unix paths. - * @param str A pointer to a string. - * @return Returns true if it is a separator, or false otherwise. - */ -CPJ_PUBLIC bool -cpj_path_is_separator(cpj_path_style_t path_style, const cpj_char_t *str); - /** * @brief Guesses the path style. * diff --git a/src/cpj.c b/src/cpj.c index ddfbbdd..4075752 100644 --- a/src/cpj.c +++ b/src/cpj.c @@ -4,532 +4,67 @@ #include #include -/** - * This is a list of separators used in different styles. Windows can read - * multiple separators, but it generally outputs just a backslash. The output - * will always use the first character for the output. - */ -static const cpj_char_t *separators[] = { - "\\/", // CPJ_STYLE_WINDOWS - "/" // CPJ_STYLE_UNIX -}; - -/** - * A joined path represents multiple path strings which are concatenated, but - * not (necessarily) stored in contiguous memory. The joined path allows to - * iterate over the segments as if it was one piece of path. - */ -struct cpj_segment_joined -{ - struct cpj_segment segment; - const cpj_char_t **paths; - cpj_size_t path_index; -}; - -static cpj_size_t cpj_path_output_sized( - cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position, - const cpj_char_t *str, cpj_size_t length -) -{ - cpj_size_t amount_written; - - // First we determine the amount which we can write to the buffer. There are - // three cases. In the first case we have enough to store the whole string in - // it. In the second one we can only store a part of it, and in the third we - // have no space left. - if (buffer_size > position + length) { - amount_written = length; - } else if (buffer_size > position) { - amount_written = buffer_size - position; - } else { - amount_written = 0; - } - - // If we actually want to write out something we will do that here. We will - // always append a '\0', this way we are guaranteed to have a valid string at - // all times. - if (amount_written > 0) { - memmove(&buffer[position], str, amount_written); - } - - // Return the theoretical length which would have been written when everything - // would have fit in the buffer. - return length; -} - -static cpj_size_t cpj_path_output_current( - cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position -) -{ - // We output a "current" directory, which is a single character. This - // character is currently not style dependant. - return cpj_path_output_sized(buffer, buffer_size, position, ".", 1); -} - -static cpj_size_t cpj_path_output_back( - cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position -) -{ - // We output a "back" directory, which ahs two characters. This - // character is currently not style dependant. - return cpj_path_output_sized(buffer, buffer_size, position, "..", 2); -} - -static cpj_size_t cpj_path_output_separator( - cpj_path_style_t path_style, cpj_char_t *buffer, cpj_size_t buffer_size, - cpj_size_t position -) -{ - // We output a separator, which is a single character. - return cpj_path_output_sized( - buffer, buffer_size, position, separators[path_style], 1 - ); -} - -static cpj_size_t cpj_path_output_dot( - cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position -) +typedef struct { - // We output a dot, which is a single character. This is used for extensions. - return cpj_path_output_sized(buffer, buffer_size, position, ".", 1); -} + const cpj_string_t *path_list_p; + cpj_size_t path_list_count; + cpj_size_t root_length; + bool root_is_absolute; -static cpj_size_t cpj_path_output( - cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position, - const cpj_char_t *str -) -{ + bool end_with_separator; + cpj_size_t list_pos; + cpj_size_t pos; cpj_size_t length; + cpj_size_t segment_eat_count; + cpj_size_t segment_count; +} cpj_segment_iterator_t; - // This just does a sized output internally, but first measuring the - // null-terminated string. - length = strlen(str); - return cpj_path_output_sized(buffer, buffer_size, position, str, length); -} - -static void cpj_path_terminate_output( - cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t pos -) +bool cpj_path_is_separator(cpj_path_style_t style, const cpj_char_t ch) { - if (buffer_size > 0) { - if (pos >= buffer_size) { - buffer[buffer_size - 1] = '\0'; - } else { - buffer[pos] = '\0'; - } - } -} - -static bool cpj_path_is_string_equal( - cpj_path_style_t path_style, const cpj_char_t *first, - const cpj_char_t *second, cpj_size_t first_size, cpj_size_t second_size -) -{ - bool are_both_separators; - - // The two strings are not equal if the sizes are not equal. - if (first_size != second_size) { - return false; - } - - // If the path style is UNIX, we will compare case sensitively. This can be - // done easily using strncmp. - if (path_style == CPJ_STYLE_UNIX) { - return strncmp(first, second, first_size) == 0; - } - - // However, if this is windows we will have to compare case insensitively. - // Since there is no standard method to do that we will have to do it on our - // own. - while (first_size > 0) { - int a = *first; - int b = *second; - if (!(a && b)) { - break; - } - - // We can consider the string to be not equal if the two lowercase - // characters are not equal. The two chars may also be separators, which - // means they would be equal. - are_both_separators = strchr(separators[path_style], a) != NULL && - strchr(separators[path_style], b) != NULL; - - if (tolower(a) != tolower(b) && !are_both_separators) { - return false; - } - - first++; - second++; - - --first_size; + if (style == CPJ_STYLE_WINDOWS) { + return ch == '/' || ch == '\\'; + } else { + return ch == '/'; } - - // The string must be equal since they both have the same length and all the - // characters are the same. - return true; -} +} /* cpj_path_is_separator */ static const cpj_char_t * cpj_path_find_next_stop(cpj_path_style_t path_style, const cpj_char_t *c) { // We just move forward until we find a '\0' or a separator, which will be our // next "stop". - while (*c != '\0' && !cpj_path_is_separator(path_style, c)) { + while (*c != '\0' && !cpj_path_is_separator(path_style, *c)) { ++c; } // Return the pointer of the next stop. return c; -} +} /* cpj_path_find_next_stop */ -static const cpj_char_t *cpj_path_find_previous_stop( - cpj_path_style_t path_style, const cpj_char_t *begin, const cpj_char_t *c -) -{ - // We just move back until we find a separator or reach the beginning of the - // path, which will be our previous "stop". - while (c > begin && !cpj_path_is_separator(path_style, c)) { - --c; - } - - // Return the pointer to the previous stop. We have to return the first - // character after the separator, not on the separator itself. - if (cpj_path_is_separator(path_style, c)) { - return c + 1; - } else { - return c; - } -} - -static bool cpj_path_get_first_segment_without_root( - cpj_path_style_t path_style, const cpj_char_t *path, - const cpj_char_t *segments, struct cpj_segment *segment -) -{ - // Let's remember the path. We will move the path pointer afterwards, that's - // why this has to be done first. - segment->path = path; - segment->segments = segments; - segment->begin = segments; - segment->end = segments; - segment->size = 0; - - // Now let's check whether this is an empty string. An empty string has no - // segment it could use. - if (*segments == '\0') { - return false; - } - - // If the string starts with separators, we will jump over those. If there is - // only a slash and a '\0' after it, we can't determine the first segment - // since there is none. - while (cpj_path_is_separator(path_style, segments)) { - ++segments; - if (*segments == '\0') { - return false; - } - } - - // So this is the beginning of our segment. - segment->begin = segments; - - // Now let's determine the end of the segment, which we do by moving the path - // pointer further until we find a separator. - segments = cpj_path_find_next_stop(path_style, segments); - - // And finally, calculate the size of the segment by subtracting the position - // from the end. - segment->size = (cpj_size_t)(segments - segment->begin); - segment->end = segments; - - // Tell the caller that we found a segment. - return true; -} - -static bool cpj_path_get_last_segment_without_root( - cpj_path_style_t path_style, const cpj_char_t *path, - struct cpj_segment *segment -) -{ - // Now this is fairly similar to the normal algorithm, however, it will assume - // that there is no root in the path. So we grab the first segment at this - // position, assuming there is no root. - if (!cpj_path_get_first_segment_without_root( - path_style, path, path, segment - )) { - return false; - } - - // Now we find our last segment. The segment struct of the caller - // will contain the last segment, since the function we call here will not - // change the segment struct when it reaches the end. - while (cpj_path_get_next_segment(path_style, segment)) { - // We just loop until there is no other segment left. - } - - return true; -} - -static bool cpj_path_get_first_segment_joined( - cpj_path_style_t path_style, const cpj_char_t **paths, - struct cpj_segment_joined *sj -) -{ - bool result; - - // Prepare the first segment. We position the joined segment on the first path - // and assign the path array to the struct. - sj->path_index = 0; - sj->paths = paths; - - // We loop through all paths until we find one which has a segment. The result - // is stored in a variable, so we can let the caller know whether we found one - // or not. - result = false; - while (paths[sj->path_index] != NULL && - (result = cpj_path_get_first_segment( - path_style, paths[sj->path_index], &sj->segment - )) == false) { - ++sj->path_index; - } - - return result; -} - -static bool cpj_path_get_next_segment_joined( - cpj_path_style_t path_style, struct cpj_segment_joined *sj -) -{ - bool result; - - if (sj->paths[sj->path_index] == NULL) { - // We reached already the end of all paths, so there is no other segment - // left. - return false; - } else if (cpj_path_get_next_segment(path_style, &sj->segment)) { - // There was another segment on the current path, so we are good to - // continue. - return true; - } - - // We try to move to the next path which has a segment available. We must at - // least move one further since the current path reached the end. - result = false; - - do { - ++sj->path_index; - - // And we obviously have to stop this loop if there are no more paths left. - if (sj->paths[sj->path_index] == NULL) { - break; - } - - // Grab the first segment of the next path and determine whether this path - // has anything useful in it. There is one more thing we have to consider - // here - for the first time we do this we want to skip the root, but - // afterwards we will consider that to be part of the segments. - result = cpj_path_get_first_segment_without_root( - path_style, sj->paths[sj->path_index], sj->paths[sj->path_index], - &sj->segment - ); - - } while (!result); - - // Finally, report the result back to the caller. - return result; -} - -static bool cpj_path_get_previous_segment_joined( - cpj_path_style_t path_style, struct cpj_segment_joined *sj -) -{ - bool result; - - if (*sj->paths == NULL) { - // It's possible that there is no initialized segment available in the - // struct since there are no paths. In that case we can return false, since - // there is no previous segment. - return false; - } else if (cpj_path_get_previous_segment(path_style, &sj->segment)) { - // Now we try to get the previous segment from the current path. If we can - // do that successfully, we can let the caller know that we found one. - return true; - } - - result = false; - - do { - // We are done once we reached index 0. In that case there are no more - // segments left. - if (sj->path_index == 0) { - break; - } - - // There is another path which we have to inspect. So we decrease the path - // index. - --sj->path_index; - - // If this is the first path we will have to consider that this path might - // include a root, otherwise we just treat is as a segment. - if (sj->path_index == 0) { - result = cpj_path_get_last_segment( - path_style, sj->paths[sj->path_index], &sj->segment - ); - } else { - result = cpj_path_get_last_segment_without_root( - path_style, sj->paths[sj->path_index], &sj->segment - ); - } - - } while (!result); - - return result; -} - -static bool cpj_path_segment_back_will_be_removed( - cpj_path_style_t path_style, struct cpj_segment_joined *sj -) -{ - enum cpj_segment_type type; - int counter; - - // We are handling back segments here. We must verify how many back segments - // and how many normal segments come before this one to decide whether we keep - // or remove it. - - // The counter determines how many normal segments are our current segment, - // which will popped off before us. If the counter goes above zero it means - // that our segment will be popped as well. - counter = 0; - - // We loop over all previous segments until we either reach the beginning, - // which means our segment will not be dropped or the counter goes above zero. - while (cpj_path_get_previous_segment_joined(path_style, sj)) { - - // Now grab the type. The type determines whether we will increase or - // decrease the counter. We don't handle a CPJ_CURRENT frame here since it - // has no influence. - type = cpj_path_get_segment_type(&sj->segment); - if (type == CPJ_NORMAL) { - // This is a normal segment. The normal segment will increase the counter - // since it neutralizes one back segment. If we go above zero we can - // return immediately. - ++counter; - if (counter > 0) { - return true; - } - } else if (type == CPJ_BACK) { - // A CPJ_BACK segment will reduce the counter by one. We can not remove a - // back segment as long we are not above zero since we don't have the - // opposite normal segment which we would remove. - --counter; - } - } - - // We never got a count larger than zero, so we will keep this segment alive. - return false; -} - -static bool cpj_path_segment_normal_will_be_removed( - cpj_path_style_t path_style, struct cpj_segment_joined *sj -) -{ - enum cpj_segment_type type; - int counter; - - // The counter determines how many segments are above our current segment, - // which will popped off before us. If the counter goes below zero it means - // that our segment will be popped as well. - counter = 0; - - // We loop over all following segments until we either reach the end, which - // means our segment will not be dropped or the counter goes below zero. - while (cpj_path_get_next_segment_joined(path_style, sj)) { - - // First, grab the type. The type determines whether we will increase or - // decrease the counter. We don't handle a CPJ_CURRENT frame here since it - // has no influence. - type = cpj_path_get_segment_type(&sj->segment); - if (type == CPJ_NORMAL) { - // This is a normal segment. The normal segment will increase the counter - // since it will be removed by a "../" before us. - ++counter; - } else if (type == CPJ_BACK) { - // A CPJ_BACK segment will reduce the counter by one. If we are below zero - // we can return immediately. - --counter; - if (counter < 0) { - return true; - } - } - } - - // We never got a negative count, so we will keep this segment alive. - return false; -} - -static bool cpj_path_segment_will_be_removed( - cpj_path_style_t path_style, const struct cpj_segment_joined *sj, - bool absolute -) -{ - enum cpj_segment_type type; - struct cpj_segment_joined sjc; - - // We copy the joined path so we don't need to modify it. - sjc = *sj; - - // First we check whether this is a CPJ_CURRENT or CPJ_BACK segment, since - // those will always be dropped. - type = cpj_path_get_segment_type(&sj->segment); - if (type == CPJ_CURRENT || (type == CPJ_BACK && absolute)) { - return true; - } else if (type == CPJ_BACK) { - return cpj_path_segment_back_will_be_removed(path_style, &sjc); - } else { - return cpj_path_segment_normal_will_be_removed(path_style, &sjc); - } -} - -static bool cpj_path_segment_joined_skip_invisible( - cpj_path_style_t path_style, struct cpj_segment_joined *sj, bool absolute -) -{ - while (cpj_path_segment_will_be_removed(path_style, sj, absolute)) { - if (!cpj_path_get_next_segment_joined(path_style, sj)) { - return false; - } - } - - return true; -} - -static void -cpj_path_get_root_windows(const cpj_char_t *path, cpj_size_t *length) +static cpj_size_t cpj_path_get_root_windows(const cpj_char_t *path) { const cpj_char_t *c; - bool is_device_path; - + cpj_size_t length = 0; // We can not determine the root if this is an empty string. So we set the // root to NULL and the length to zero and cancel the whole thing. c = path; - *length = 0; if (!*c) { - return; + return length; } // Now we have to verify whether this is a windows network path (UNC), which // we will consider our root. - if (cpj_path_is_separator(CPJ_STYLE_WINDOWS, c)) { + if (cpj_path_is_separator(CPJ_STYLE_WINDOWS, *c)) { + bool is_device_path; ++c; // Check whether the path starts with a single backslash, which means this // is not a network path - just a normal path starting with a backslash. - if (!cpj_path_is_separator(CPJ_STYLE_WINDOWS, c)) { + if (!cpj_path_is_separator(CPJ_STYLE_WINDOWS, *c)) { // Okay, this is not a network path but we still use the backslash as a // root. - ++(*length); - return; + ++length; + return length; } // A device path is a path which starts with "\\." or "\\?". A device path @@ -541,13 +76,13 @@ cpj_path_get_root_windows(const cpj_char_t *path, cpj_size_t *length) // anyway. ++c; is_device_path = (*c == '?' || *c == '.') && - cpj_path_is_separator(CPJ_STYLE_WINDOWS, ++c); + cpj_path_is_separator(CPJ_STYLE_WINDOWS, *(++c)); if (is_device_path) { // That's a device path, and the root must be either "\\.\" or "\\?\" // which is 4 characters long. (at least that's how Windows // GetFullPathName behaves.) - *length = 4; - return; + length = 4; + return length; } // We will grab anything up to the next stop. The next stop might be a '\0' @@ -556,7 +91,7 @@ cpj_path_get_root_windows(const cpj_char_t *path, cpj_size_t *length) // If this is a separator and not the end of a string we wil have to include // it. However, if this is a '\0' we must not skip it. - while (cpj_path_is_separator(CPJ_STYLE_WINDOWS, c)) { + while (cpj_path_is_separator(CPJ_STYLE_WINDOWS, *c)) { ++c; } @@ -566,239 +101,645 @@ cpj_path_get_root_windows(const cpj_char_t *path, cpj_size_t *length) // Then there might be a separator at the end. We will include that as well, // it will mark the path as absolute. - if (cpj_path_is_separator(CPJ_STYLE_WINDOWS, c)) { + if (cpj_path_is_separator(CPJ_STYLE_WINDOWS, *c)) { ++c; } // Finally, calculate the size of the root. - *length = (cpj_size_t)(c - path); - return; + length = (cpj_size_t)(c - path); + return length; } // Move to the next and check whether this is a colon. if (*++c == ':') { - *length = 2; + length = 2; // Now check whether this is a backslash (or slash). If it is not, we could // assume that the next character is a '\0' if it is a valid path. However, // we will not assume that - since ':' is not valid in a path it must be a // mistake by the caller than. We will try to understand it anyway. - if (cpj_path_is_separator(CPJ_STYLE_WINDOWS, ++c)) { - *length = 3; + if (cpj_path_is_separator(CPJ_STYLE_WINDOWS, *(++c))) { + length = 3; } } -} + return length; +} /* cpj_path_get_root_windows */ -static void cpj_path_get_root_unix(const cpj_char_t *path, cpj_size_t *length) +static cpj_size_t cpj_path_get_root_unix(const cpj_char_t *path) { // The slash of the unix path represents the root. There is no root if there // is no slash. - if (cpj_path_is_separator(CPJ_STYLE_UNIX, path)) { - *length = 1; + return cpj_path_is_separator(CPJ_STYLE_UNIX, *path) ? 1 : 0; +} /* cpj_path_get_root_unix */ + +cpj_size_t +cpj_path_get_root(cpj_path_style_t path_style, const cpj_char_t *path) +{ + // We use a different implementation here based on the configuration of the + // library. + return path_style == CPJ_STYLE_WINDOWS ? cpj_path_get_root_windows(path) + : cpj_path_get_root_unix(path); +} /* cpj_path_get_root */ + +static bool cpj_path_iterator_is_head(cpj_segment_iterator_t *it) +{ + return it->segment_eat_count == 0 && it->list_pos == 0 && + it->pos == CPJ_SIZE_MAX && it->length == 0; +} /* cpj_path_iterator_is_head */ + +static bool cpj_path_iterator_before_root(cpj_segment_iterator_t *it) +{ + return it->list_pos == 0 && ((it->pos + 1) <= it->root_length); +} /* cpj_path_iterator_before_root */ + +static cpj_char_t cpj_path_char_get(cpj_segment_iterator_t *it) +{ + if (cpj_path_iterator_before_root(it)) { + return 0; + } + if (it->pos == CPJ_SIZE_MAX) { + /** + * The tail '\0' treat as '/' that is a path separator for both + * POSIX/WINDOWS + */ + return '/'; + } + return it->path_list_p[it->list_pos].ptr[it->pos]; +} /* cpj_path_char_get */ + +static void cpj_path_char_iter_prev(cpj_segment_iterator_t *it) +{ + if (cpj_path_iterator_before_root(it)) { + return; + } + if (it->pos == CPJ_SIZE_MAX) { + it->list_pos -= 1; + it->pos = it->path_list_p[it->list_pos].size - 1; } else { - *length = 0; + --it->pos; } -} +} /* cpj_path_char_iter_prev */ + +/** + * Get the path separator according to `path_style` + * + * @return The path separator + */ +static cpj_char_t cpj_path_get_separator(cpj_path_style_t path_style +) /**< The path style */ +{ + return path_style == CPJ_STYLE_UNIX ? '/' : '\\'; +} /* cpj_path_get_separator */ + +static void cpj_path_get_prev_segment_detail( + cpj_path_style_t path_style, cpj_segment_iterator_t *it +) +{ + if (it->segment_eat_count > 0) { + --it->segment_eat_count; + if (it->segment_eat_count > 0) { + return; + } + } + if (it->list_pos == 0 && it->pos == CPJ_SIZE_MAX && + it->length == it->root_length) { + /* To the head */ + it->length = 0; + return; + } + for (;;) { + cpj_char_t ch; + cpj_size_t segment_length = 0; + for (;;) { + ch = cpj_path_char_get(it); + if (cpj_path_is_separator(path_style, ch)) { + cpj_path_char_iter_prev(it); + continue; + } + if (ch != 0) { + segment_length += 1; + } + break; + } + for (;;) { + cpj_path_char_iter_prev(it); + ch = cpj_path_char_get(it); + if (ch == 0 || cpj_path_is_separator(path_style, ch)) { + break; + } + segment_length += 1; + } + const cpj_string_t *path_current = it->path_list_p + it->list_pos; + if (segment_length == 1) { + if (path_current->ptr[it->pos + 1] == '.') { + continue; + } + } else if (segment_length == 2) { + if (path_current->ptr[it->pos + 1] == '.' || + path_current->ptr[it->pos + 2] == '.') { + it->segment_eat_count += 1; + continue; + } + } + if (it->segment_eat_count > 0 && segment_length > 0) { + it->segment_eat_count -= 1; + continue; + } + if (segment_length > 0) { + /* segment_eat_count must be zero, this is a normal segment */ + it->length = segment_length; + return; + } + it->length = 0; + if (ch == 0 && it->root_is_absolute) { + /* Dropping segment eat when the root segment is absolute */ + it->segment_eat_count = 0; + } + if (it->segment_eat_count > 0) { + /* Genearting .. segment for relative path */ + return; + } + + if (ch == 0) { + if (it->segment_count == 0 && !it->root_is_absolute) { + /* Path like `C:` `C:abc\..` `` `abc\..` `.` should place a . as the + * path component */ + return; + } + + /* Return the root segment or head depends on `root_length` */ + it->pos = CPJ_SIZE_MAX; + it->length = it->root_length; + } + return; + } /* for (;;) */ +} /* cpj_path_get_prev_segment */ -static bool cpj_path_is_root_absolute( - cpj_path_style_t path_style, const cpj_char_t *path, cpj_size_t length +static bool cpj_path_get_prev_segment( + cpj_path_style_t path_style, cpj_segment_iterator_t *it ) { - // This is definitely not absolute if there is no root. - if (length == 0) { + cpj_path_get_prev_segment_detail(path_style, it); + if (cpj_path_iterator_is_head(it) && it->segment_count > 0) { return false; } + if (it->list_pos == 0 && it->pos == SIZE_MAX && it->root_length > 0) { + /* Root have no trailing separator */ + it->end_with_separator = false; + } else if (it->segment_count > 0) { + it->end_with_separator = true; + } else { + /* The last segment preserve the original separator. */ + } - // If there is a separator at the end of the root, we can safely consider this - // to be an absolute path. - return cpj_path_is_separator(path_style, &path[length - 1]); + it->segment_count += 1; + return true; } -static void cpj_path_fix_root( - cpj_path_style_t path_style, cpj_char_t *buffer, cpj_size_t buffer_size, - cpj_size_t length -) +cpj_size_t cpj_path_get_segment_length(cpj_segment_iterator_t *it) { - cpj_size_t i; + if (it->segment_eat_count > 0) { + return 2; + } + if (it->length > 0) { + return it->length; + } + return 1; +} - // This only affects windows. - if (path_style != CPJ_STYLE_WINDOWS) { - return; +const cpj_char_t *cpj_path_get_segment_ptr(cpj_segment_iterator_t *it) +{ + if (it->segment_eat_count > 0) { + return CPJ_ZSTR_LITERAL(".."); } + if (it->length > 0) { + cpj_size_t pos = it->pos + 1; + return it->path_list_p[it->list_pos].ptr + pos; + } + return CPJ_ZSTR_LITERAL("."); +} - // Make sure we are not writing further than we are actually allowed to. - if (length > buffer_size) { - length = buffer_size; +static void cpj_path_push_front( + cpj_path_style_t path_style, cpj_char_t *buffer_p, cpj_size_t buffer_size, + cpj_size_t *buffer_index, cpj_char_t ch +) +{ + *buffer_index -= 1; + cpj_size_t buffer_index_current = *buffer_index; + if (buffer_index_current < buffer_size) { + if (buffer_index_current == (buffer_size - 1)) { + buffer_p[buffer_index_current] = '\0'; + } else if (cpj_path_is_separator(path_style, ch)) { + buffer_p[buffer_index_current] = cpj_path_get_separator(path_style); + } else { + buffer_p[buffer_index_current] = ch; + } } +} /* cpj_path_push_front */ - // Replace all forward slashes with backwards slashes. Since this is windows - // we can't have any forward slashes in the root. - for (i = 0; i < length; ++i) { - if (cpj_path_is_separator(CPJ_STYLE_WINDOWS, &buffer[i])) { - buffer[i] = *separators[CPJ_STYLE_WINDOWS]; +static const cpj_string_t path_list_empty = {CPJ_ZSTR_ARG("")}; + +/** + * @note + */ +cpj_segment_iterator_t cpj_path_interator_init( + cpj_path_style_t path_style, /**< The style of the path list */ + bool is_resolve, /**< If do path resolve */ + bool remove_trailing_slash, /**< If remove the trailing slash symbol */ + const cpj_string_t *path_list_p, /**< Path list */ + cpj_size_t path_list_count +) +{ + cpj_segment_iterator_t it = {0}; + cpj_size_t path_list_i; + uint8_t end_with_separator = UINT8_MAX; + if (path_list_count == 0) { + path_list_count = 1; + path_list_p = &path_list_empty; + } + path_list_i = path_list_count; + it.path_list_p = path_list_p; + it.path_list_count = path_list_count; + for (; path_list_i > 0;) { + const cpj_string_t *path_list_current = path_list_p + (--path_list_i); + if (end_with_separator == UINT8_MAX && path_list_current->size > 0) { + end_with_separator = cpj_path_is_separator( + path_style, + path_list_current->ptr[path_list_current->size - 1] + ) + ? 1 + : 0; } + if (it.root_length == 0 && (is_resolve || path_list_i == 0)) { + /* Find the first root path from right to left when `is_resolve` are + * `true` */ + it.root_length = cpj_path_get_root(path_style, path_list_current->ptr); + if (it.root_length > 0) { + it.path_list_p += path_list_i; + it.path_list_count -= path_list_i; + } + } + } + it.root_is_absolute = it.root_length > 0 && + cpj_path_is_separator( + path_style, it.path_list_p[0].ptr[it.root_length - 1] + ); + it.list_pos = it.path_list_count; + it.pos = CPJ_SIZE_MAX; + if (remove_trailing_slash) { + it.end_with_separator = false; + } else { + it.end_with_separator = end_with_separator == 1; } + return it; } -static cpj_size_t cpj_path_join_and_normalize_multiple( - cpj_path_style_t path_style, const cpj_char_t **paths, cpj_char_t *buffer, - cpj_size_t buffer_size +/** + * Join and normalize the `path_list_p`. + * + * @note + * - If is_resolve is true. The given sequence of paths is processed from right + * to left, with each subsequent path prepended until an absolute path is + * constructed. For instance, given the sequence of path segments: /foo, /bar, + * baz, calling path.resolve('/foo', '/bar', 'baz') would return /bar/baz + * because 'baz' is not an absolute path but + * '/bar' + '/' + 'baz' is. + * - If is_resolve is false. All paths are joined + * + * @return The size of the joined path, excluding the '\0' teminiator + */ +cpj_size_t cpj_path_join_and_normalize( + cpj_path_style_t path_style, /**< The style of the path list */ + bool is_resolve, /**< Join path in resolve mode */ + bool remove_trailing_slash, /**< If remove the trailing slash symbol */ + const cpj_string_t *path_list_p, /**< Path list */ + cpj_size_t path_list_count, /**< Path list count */ + cpj_char_t *buffer_p, /**< The buffer to storaged the joined path */ + cpj_size_t buffer_size /**< The size of the buffer_p */ ) { - cpj_size_t pos; - bool absolute, has_segment_output; - struct cpj_segment_joined sj; - - // We initialize the position after the root, which should get us started. - pos = cpj_path_get_root(path_style, paths[0]); + cpj_size_t buffer_size_calculated = 0; + for (;;) { + cpj_segment_iterator_t it = cpj_path_interator_init( + path_style, is_resolve, remove_trailing_slash, path_list_p, + path_list_count + ); + cpj_size_t buffer_index = CPJ_SIZE_MAX; + cpj_char_t *buffer_p_used; - // Determine whether the path is absolute or not. We need that to determine - // later on whether we can remove superfluous "../" or not. - absolute = cpj_path_is_root_absolute(path_style, paths[0], pos); + if (buffer_size_calculated == 0) { + buffer_p_used = NULL; + /* For calculating the path size */ + buffer_index = CPJ_SIZE_MAX; + } else { + buffer_p_used = buffer_p; + if (buffer_size_calculated > buffer_size) { + /** + * When `buffer_p` exist and `buffer_size_calculated > buffer_size`, + * that means there is not enough buffer to storage the final generated + * path, so that set buffer_index_init to `buffer_size_calculated` to + * ensure only the head part of the generated path are stored into + * `buffer_p` + */ + buffer_index = buffer_size_calculated; + } else { + if (buffer_p == it.path_list_p[0].ptr && it.path_list_count == 1) { + /** + * The input path and output buffer are the same, storing the + * generated path at the tail of `buffer_p`; so that when normalizing + * the path inplace, the path won't be corrupted. + */ + buffer_index = buffer_size; + } else { + buffer_index = buffer_size_calculated; + } + } + } + cpj_path_push_front( + path_style, buffer_p_used, buffer_size, &buffer_index, '\0' + ); + for (; cpj_path_get_prev_segment(path_style, &it);) { + if (it.end_with_separator) { + cpj_path_push_front( + path_style, buffer_p_used, buffer_size, &buffer_index, '/' + ); + } + cpj_size_t segment_length = cpj_path_get_segment_length(&it); + const cpj_char_t *segment_ptr = cpj_path_get_segment_ptr(&it); + cpj_size_t segment_i = segment_length; + for (; segment_i > 0;) { + segment_i -= 1; + cpj_path_push_front( + path_style, buffer_p_used, buffer_size, &buffer_index, + segment_ptr[segment_i] + ); + } + } + if (buffer_size_calculated == 0) { + buffer_size_calculated = CPJ_SIZE_MAX - buffer_index; + if (!buffer_p) { + return buffer_size_calculated; + } + continue; + } + if (buffer_p) { + if (buffer_index > 0) { + memmove(buffer_p, buffer_p + buffer_index, buffer_size_calculated); + } + } + return buffer_size_calculated - 1; + } +} /* cpj_path_join_and_normalize */ - // First copy the root to the output. After copying, we will normalize the - // root. - cpj_path_output_sized(buffer, buffer_size, 0, paths[0], pos); - cpj_path_fix_root(path_style, buffer, buffer_size, pos); +static bool cpj_path_is_string_equal( + cpj_path_style_t path_style, const cpj_char_t *first, + const cpj_char_t *second, cpj_size_t first_size, cpj_size_t second_size +) +{ + bool are_both_separators; - // So we just grab the first segment. If there is no segment we will always - // output a "/", since we currently only support absolute paths here. - if (!cpj_path_get_first_segment_joined(path_style, paths, &sj)) { - goto done; + // The two strings are not equal if the sizes are not equal. + if (first_size != second_size) { + return false; } - // Let's assume that we don't have any segment output for now. We will toggle - // this flag once there is some output. - has_segment_output = false; + // If the path style is UNIX, we will compare case sensitively. This can be + // done easily using strncmp. + if (path_style == CPJ_STYLE_UNIX) { + return strncmp(first, second, first_size) == 0; + } - do { - // Check whether we have to drop this segment because of resolving a - // relative path or because it is a CPJ_CURRENT segment. - if (cpj_path_segment_will_be_removed(path_style, &sj, absolute)) { - continue; + // However, if this is windows we will have to compare case insensitively. + // Since there is no standard method to do that we will have to do it on our + // own. + while (first_size > 0) { + int a = *first; + int b = *second; + if (!(a && b)) { + break; } - // We add a separator if we previously wrote a segment. The last segment - // must not have a trailing separator. This must happen before the segment - // output, since we would override the null terminating character with - // reused buffers if this was done afterwards. - if (has_segment_output) { - pos += cpj_path_output_separator(path_style, buffer, buffer_size, pos); + // We can consider the string to be not equal if the two lowercase + // characters are not equal. The two chars may also be separators, which + // means they would be equal. + are_both_separators = cpj_path_is_separator(path_style, a) && + cpj_path_is_separator(path_style, b); + + if (tolower(a) != tolower(b) && !are_both_separators) { + return false; } - // Remember that we have segment output, so we can handle the trailing slash - // later on. This is necessary since we might have segments but they are all - // removed. - has_segment_output = true; - - // Write out the segment but keep in mind that we need to follow the - // buffer size limitations. That's why we use the path output functions - // here. - pos += cpj_path_output_sized( - buffer, buffer_size, pos, sj.segment.begin, sj.segment.size - ); - } while (cpj_path_get_next_segment_joined(path_style, &sj)); + first++; + second++; - // Remove the trailing slash, but only if we have segment output. We don't - // want to remove anything from the root. - if (!has_segment_output && pos == 0) { - // This may happen if the path is absolute and all segments have been - // removed. We can not have an empty output - and empty output means we stay - // in the current directory. So we will output a ".". - assert(absolute == false); - pos += cpj_path_output_current(buffer, buffer_size, pos); + --first_size; } - // We must append a '\0' in any case, unless the buffer size is zero. If the - // buffer size is zero, which means we can not. -done: - cpj_path_terminate_output(buffer, buffer_size, pos); - - // And finally let our caller know about the total size of the normalized - // path. - return pos; + // The string must be equal since they both have the same length and all the + // characters are the same. + return true; } -cpj_size_t cpj_path_get_absolute( - cpj_path_style_t path_style, const cpj_char_t *base, const cpj_char_t *path, - cpj_char_t *buffer, cpj_size_t buffer_size +cpj_path_intersection_t cpj_path_get_intersection_segments( + cpj_path_style_t path_style, const cpj_string_t *path_base, + const cpj_string_t *path_other, cpj_size_t path_count ) { - cpj_size_t i; - const cpj_char_t *paths[4]; - - // The basename should be an absolute path if the caller is using the API - // correctly. However, he might not and in that case we will append a fake - // root at the beginning. - if (cpj_path_is_absolute(path_style, base)) { - i = 0; - } else if (path_style == CPJ_STYLE_WINDOWS) { - paths[0] = "\\"; - i = 1; + cpj_path_intersection_t intersection; + cpj_segment_iterator_t it_base = + cpj_path_interator_init(path_style, true, true, path_base, path_count); + cpj_segment_iterator_t it_other = + cpj_path_interator_init(path_style, true, true, path_other, path_count); + cpj_size_t k; + intersection.equal_segment = 0; + while (cpj_path_get_prev_segment(path_style, &it_base)) { + } + while (cpj_path_get_prev_segment(path_style, &it_other)) { + } + intersection.segment_count_base = it_base.segment_count; + intersection.segment_count_other = it_other.segment_count; + + it_base = + cpj_path_interator_init(path_style, true, true, path_base, path_count); + it_other = + cpj_path_interator_init(path_style, true, true, path_other, path_count); + if (intersection.segment_count_base > intersection.segment_count_other) { + for (k = intersection.segment_count_base; + k > intersection.segment_count_other;) { + k -= 1; + cpj_path_get_prev_segment(path_style, &it_base); + } + } else if (intersection.segment_count_base < + intersection.segment_count_other) { + for (k = intersection.segment_count_other; + k > intersection.segment_count_base;) { + k -= 1; + cpj_path_get_prev_segment(path_style, &it_other); + } } else { - paths[0] = "/"; - i = 1; + k = intersection.segment_count_base; + } + for (; k > 0;) { + k -= 1; + cpj_path_get_prev_segment(path_style, &it_base); + cpj_path_get_prev_segment(path_style, &it_other); + cpj_size_t segment_length_base = cpj_path_get_segment_length(&it_base); + cpj_size_t segment_length_other = cpj_path_get_segment_length(&it_other); + if (segment_length_base != segment_length_other) { + intersection.equal_segment = 0; + continue; + } + const cpj_char_t *segment_ptr_base = cpj_path_get_segment_ptr(&it_base); + const cpj_char_t *segment_ptr_other = cpj_path_get_segment_ptr(&it_other); + if (!cpj_path_is_string_equal( + path_style, segment_ptr_base, segment_ptr_other, segment_length_base, + segment_length_other + )) { + intersection.equal_segment = 0; + continue; + } + intersection.equal_segment += 1; } + return intersection; +} - if (cpj_path_is_absolute(path_style, path)) { - // If the submitted path is not relative the base path becomes irrelevant. - // We will only normalize the submitted path instead. - paths[i++] = path; - paths[i] = NULL; +static cpj_size_t cpj_path_relative_generate( + cpj_path_style_t path_style, cpj_path_intersection_t *intersection, + cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t buffer_index +) +{ + cpj_size_t segment_eat_count; + if (intersection->equal_segment == intersection->segment_count_base) { + if (intersection->equal_segment == intersection->segment_count_other) { + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '\0'); + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '.'); + segment_eat_count = 2; + } else { + segment_eat_count = 0; + } } else { - // Otherwise we append the relative path to the base path and normalize it. - // The result will be a new absolute path. - paths[i++] = base; - paths[i++] = path; - paths[i] = NULL; + cpj_size_t k = intersection->segment_count_base; + segment_eat_count = 0; + if (intersection->equal_segment == intersection->segment_count_other) { + --k; + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '\0'); + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '.'); + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '.'); + segment_eat_count += 3; + } + for (; k > intersection->equal_segment;) { + --k; + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '/'); + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '.'); + cpj_path_push_front(path_style, buffer, buffer_size, &buffer_index, '.'); + segment_eat_count += 3; + } } - - // Finally join everything together and normalize it. - return cpj_path_join_and_normalize_multiple( - path_style, paths, buffer, buffer_size - ); + return segment_eat_count; } -static void cpj_path_skip_segments_until_diverge( - cpj_path_style_t path_style, struct cpj_segment_joined *bsj, - struct cpj_segment_joined *osj, bool absolute, bool *base_available, - bool *other_available +cpj_size_t cpj_path_relative_to( + cpj_path_style_t path_style, const cpj_string_t *cwd_directory, + const cpj_string_t *path_directory, const cpj_string_t *path, + cpj_char_t *buffer, cpj_size_t buffer_size ) { - // Now looping over all segments until they start to diverge. A path may - // diverge if two segments are not equal or if one path reaches the end. - do { - - // Check whether there is anything available after we skip everything which - // is invisible. We do that for both paths, since we want to let the caller - // know which path has some trailing segments after they diverge. - *base_available = - cpj_path_segment_joined_skip_invisible(path_style, bsj, absolute); - *other_available = - cpj_path_segment_joined_skip_invisible(path_style, osj, absolute); - - // We are done if one or both of those paths reached the end. They either - // diverge or both reached the end - but in both cases we can not continue - // here. - if (!*base_available || !*other_available) { - break; + const cpj_string_t path_base_original[] = {*cwd_directory, *path_directory}; + const cpj_string_t path_other_original[] = {*cwd_directory, *path}; + const cpj_string_t *path_base = path_base_original + 2 - 1; + const cpj_string_t *path_other = path_other_original + 2 - 1; + cpj_size_t path_count = 1; + cpj_path_intersection_t intersection = cpj_path_get_intersection_segments( + path_style, path_base, path_other, path_count + ); + if (intersection.equal_segment == 0) { + path_base = path_base_original; + path_other = path_other_original; + path_count = 2; + intersection = cpj_path_get_intersection_segments( + path_style, path_base, path_other, path_count + ); + if (intersection.equal_segment == 0) { + return cpj_path_join_and_normalize( + path_style, true, true, path_other, path_count, buffer, buffer_size + ); } - - // Compare the content of both segments. We are done if they are not equal, - // since they diverge. - if (!cpj_path_is_string_equal( - path_style, bsj->segment.begin, osj->segment.begin, bsj->segment.size, - osj->segment.size - )) { - break; + } + { + cpj_segment_iterator_t it_other = + cpj_path_interator_init(path_style, true, true, path_other, path_count); + cpj_size_t segment_eat_count = + cpj_path_relative_generate(path_style, &intersection, NULL, 0, 0); + cpj_size_t other_path_length; + if (intersection.equal_segment == intersection.segment_count_other) { + other_path_length = 0; + } else { + cpj_size_t k = intersection.segment_count_other; + for (; k > intersection.equal_segment;) { + k -= 1; + cpj_path_get_prev_segment(path_style, &it_other); + } + const cpj_char_t *segment_ptr = cpj_path_get_segment_ptr(&it_other); + cpj_string_t path_other_normalize[2]; + cpj_size_t path_other_normalize_count; + path_other_normalize[0] = it_other.path_list_p[it_other.list_pos]; + path_other_normalize[0].ptr = segment_ptr; + path_other_normalize[0] + .size = it_other.path_list_p[it_other.list_pos].ptr + + it_other.path_list_p[it_other.list_pos].size - segment_ptr; + if (it_other.list_pos == 0 && it_other.path_list_count == 2) { + path_other_normalize_count = 2; + path_other_normalize[1] = it_other.path_list_p[1]; + } else { + path_other_normalize_count = 1; + } + cpj_char_t *buffer_other = NULL; + cpj_size_t buffer_size_other = 0; + if (buffer && buffer_size > segment_eat_count) { + buffer_other = buffer + segment_eat_count; + buffer_size_other = buffer_size - segment_eat_count; + } + other_path_length = cpj_path_join_and_normalize( + path_style, true, true, path_other_normalize, + path_other_normalize_count, buffer_other, buffer_size_other + ); } + if (buffer) { + cpj_path_relative_generate( + path_style, &intersection, buffer, buffer_size, segment_eat_count + ); + } + return other_path_length == 0 ? segment_eat_count - 1 + : segment_eat_count + other_path_length; + } + return 0; +} - // We keep going until one of those segments reached the end. The next - // segment might be invisible, but we will check for that in the beginning - // of the loop once again. - *base_available = cpj_path_get_next_segment_joined(path_style, bsj); - *other_available = cpj_path_get_next_segment_joined(path_style, osj); - } while (*base_available && *other_available); +cpj_size_t cpj_path_get_intersection( + cpj_path_style_t path_style, const cpj_char_t *path_base, + const cpj_char_t *path_other +) +{ + cpj_string_t + path_base_str = cpj_string_create(path_base, cpj_strlen(path_base)); + cpj_string_t + path_other_str = cpj_string_create(path_other, cpj_strlen(path_other)); + cpj_path_intersection_t intersection = cpj_path_get_intersection_segments( + path_style, &path_base_str, &path_other_str, 1 + ); + cpj_segment_iterator_t it_base = + cpj_path_interator_init(path_style, false, true, &path_base_str, 1); + cpj_size_t k; + if (intersection.equal_segment == 0) { + return 0; + } + for (k = intersection.segment_count_base; k >= intersection.equal_segment;) { + k -= 1; + cpj_path_get_prev_segment(path_style, &it_base); + } + cpj_size_t segment_length = cpj_path_get_segment_length(&it_base); + const cpj_char_t *segment_ptr = cpj_path_get_segment_ptr(&it_base); + return segment_ptr - path_base + segment_length; } cpj_size_t cpj_path_get_relative( @@ -806,158 +747,155 @@ cpj_size_t cpj_path_get_relative( const cpj_char_t *path, cpj_char_t *buffer, cpj_size_t buffer_size ) { - cpj_size_t pos, base_root_length, path_root_length; - bool absolute, base_available, other_available, has_output; - const cpj_char_t *base_paths[2], *other_paths[2]; - struct cpj_segment_joined bsj, osj; - - pos = 0; - - // First we compare the roots of those two paths. If the roots are not equal - // we can't continue, since there is no way to get a relative path from - // different roots. - base_root_length = cpj_path_get_root(path_style, base_directory); - path_root_length = cpj_path_get_root(path_style, path); - if (base_root_length != path_root_length || - !cpj_path_is_string_equal( - path_style, base_directory, path, base_root_length, path_root_length - )) { - cpj_path_terminate_output(buffer, buffer_size, pos); - return pos; - } - - // Verify whether this is an absolute path. We need to know that since we can - // remove all back-segments if it is. - absolute = - cpj_path_is_root_absolute(path_style, base_directory, base_root_length); - - // Initialize our joined segments. This will allow us to use the internal - // functions to skip until diverge and invisible. We only have one path in - // them though. - base_paths[0] = base_directory; - base_paths[1] = NULL; - other_paths[0] = path; - other_paths[1] = NULL; - cpj_path_get_first_segment_joined(path_style, base_paths, &bsj); - cpj_path_get_first_segment_joined(path_style, other_paths, &osj); - - // Okay, now we skip until the segments diverge. We don't have anything to do - // with the segments which are equal. - cpj_path_skip_segments_until_diverge( - path_style, &bsj, &osj, absolute, &base_available, &other_available + cpj_string_t base_directory_str = + cpj_string_create(base_directory, cpj_strlen(base_directory)); + cpj_string_t path_str = cpj_string_create(path, cpj_strlen(path)); + cpj_string_t path_cwd = {CPJ_ZSTR_ARG("/")}; + return cpj_path_relative_to( + path_style, &path_cwd, &base_directory_str, &path_str, buffer, buffer_size ); +} - // Assume there is no output until we have got some. We will need this - // information later on to remove trailing slashes or alternatively output a - // current-segment. - has_output = false; - - // So if we still have some segments left in the base path we will now output - // a back segment for all of them. - if (base_available) { - do { - // Skip any invisible segment. We don't care about those and we don't need - // to navigate back because of them. - if (!cpj_path_segment_joined_skip_invisible(path_style, &bsj, absolute)) { - break; - } +/** + * This is a list of separators used in different styles. Windows can read + * multiple separators, but it generally outputs just a backslash. The output + * will always use the first character for the output. + */ +static const cpj_char_t *separators[] = { + "\\/", // CPJ_STYLE_WINDOWS + "/" // CPJ_STYLE_UNIX +}; - // Toggle the flag if we have output. We need to remember that, since we - // want to remove the trailing slash. - has_output = true; +static cpj_size_t cpj_path_output_sized( + cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position, + const cpj_char_t *str, cpj_size_t length +) +{ + cpj_size_t amount_written; - // Output the back segment and a separator. No need to worry about the - // superfluous segment since it will be removed later on. - pos += cpj_path_output_back(buffer, buffer_size, pos); - pos += cpj_path_output_separator(path_style, buffer, buffer_size, pos); - } while (cpj_path_get_next_segment_joined(path_style, &bsj)); + // First we determine the amount which we can write to the buffer. There are + // three cases. In the first case we have enough to store the whole string in + // it. In the second one we can only store a part of it, and in the third we + // have no space left. + if (buffer_size > position + length) { + amount_written = length; + } else if (buffer_size > position) { + amount_written = buffer_size - position; + } else { + amount_written = 0; } - // And if we have some segments available of the target path we will output - // all of those. - if (other_available) { - do { - // Again, skip any invisible segments since we don't need to navigate into - // them. - if (!cpj_path_segment_joined_skip_invisible(path_style, &osj, absolute)) { - break; - } - - // Toggle the flag if we have output. We need to remember that, since we - // want to remove the trailing slash. - has_output = true; - - // Output the current segment and a separator. No need to worry about the - // superfluous segment since it will be removed later on. - pos += cpj_path_output_sized( - buffer, buffer_size, pos, osj.segment.begin, osj.segment.size - ); - pos += cpj_path_output_separator(path_style, buffer, buffer_size, pos); - } while (cpj_path_get_next_segment_joined(path_style, &osj)); + // If we actually want to write out something we will do that here. We will + // always append a '\0', this way we are guaranteed to have a valid string at + // all times. + if (amount_written > 0) { + memmove(&buffer[position], str, amount_written); } - // If we have some output by now we will have to remove the trailing slash. We - // simply do that by moving back one character. The terminate output function - // will then place the '\0' on this position. Otherwise, if there is no - // output, we will have to output a "current directory", since the target path - // points to the base path. - if (has_output) { - --pos; - } else { - pos += cpj_path_output_current(buffer, buffer_size, pos); - } + // Return the theoretical length which would have been written when everything + // would have fit in the buffer. + return length; +} - // Finally, we can terminate the output - which means we place a '\0' at the - // current position or at the end of the buffer. - cpj_path_terminate_output(buffer, buffer_size, pos); +static cpj_size_t cpj_path_output_dot( + cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position +) +{ + // We output a dot, which is a single character. This is used for extensions. + return cpj_path_output_sized(buffer, buffer_size, position, ".", 1); +} - return pos; +cpj_size_t cpj_strlen(const cpj_char_t *str) +{ + return (cpj_size_t)strlen((const char *)str); } -cpj_size_t cpj_path_join( - cpj_path_style_t path_style, const cpj_char_t *path_a, - const cpj_char_t *path_b, cpj_char_t *buffer, cpj_size_t buffer_size +static cpj_size_t cpj_path_output( + cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t position, + const cpj_char_t *str ) { - const cpj_char_t *paths[3]; - - // This is simple. We will just create an array with the two paths which we - // wish to join. - paths[0] = path_a; - paths[1] = path_b; - paths[2] = NULL; - - // And then call the join and normalize function which will do the hard work - // for us. - return cpj_path_join_and_normalize_multiple( - path_style, paths, buffer, buffer_size - ); + // This just does a sized output internally, but first measuring the + // null-terminated string. + cpj_size_t length = cpj_strlen(str); + return cpj_path_output_sized(buffer, buffer_size, position, str, length); } -cpj_size_t cpj_path_join_multiple( - cpj_path_style_t path_style, const cpj_char_t **paths, cpj_char_t *buffer, - cpj_size_t buffer_size +static void cpj_path_terminate_output( + cpj_char_t *buffer, cpj_size_t buffer_size, cpj_size_t pos ) { - // We can just call the internal join and normalize function for this one, - // since it will handle everything. - return cpj_path_join_and_normalize_multiple( - path_style, paths, buffer, buffer_size - ); + if (buffer_size > 0) { + if (pos >= buffer_size) { + buffer[buffer_size - 1] = '\0'; + } else { + buffer[pos] = '\0'; + } + } } -cpj_size_t -cpj_path_get_root(cpj_path_style_t path_style, const cpj_char_t *path) +static const cpj_char_t *cpj_path_find_previous_stop( + cpj_path_style_t path_style, const cpj_char_t *begin, const cpj_char_t *c +) { - cpj_size_t length; - // We use a different implementation here based on the configuration of the - // library. - if (path_style == CPJ_STYLE_WINDOWS) { - cpj_path_get_root_windows(path, &length); + // We just move back until we find a separator or reach the beginning of the + // path, which will be our previous "stop". + while (c > begin && !cpj_path_is_separator(path_style, *c)) { + --c; + } + + // Return the pointer to the previous stop. We have to return the first + // character after the separator, not on the separator itself. + if (cpj_path_is_separator(path_style, *c)) { + return c + 1; } else { - cpj_path_get_root_unix(path, &length); + return c; } - return length; +} + +static bool cpj_path_get_first_segment_without_root( + cpj_path_style_t path_style, const cpj_char_t *path, + const cpj_char_t *segments, struct cpj_segment *segment +) +{ + // Let's remember the path. We will move the path pointer afterwards, that's + // why this has to be done first. + segment->path = path; + segment->segments = segments; + segment->begin = segments; + segment->end = segments; + segment->size = 0; + + // Now let's check whether this is an empty string. An empty string has no + // segment it could use. + if (*segments == '\0') { + return false; + } + + // If the string starts with separators, we will jump over those. If there is + // only a slash and a '\0' after it, we can't determine the first segment + // since there is none. + while (cpj_path_is_separator(path_style, *segments)) { + ++segments; + if (*segments == '\0') { + return false; + } + } + + // So this is the beginning of our segment. + segment->begin = segments; + + // Now let's determine the end of the segment, which we do by moving the path + // pointer further until we find a separator. + segments = cpj_path_find_next_stop(path_style, segments); + + // And finally, calculate the size of the segment by subtracting the position + // from the end. + segment->size = (cpj_size_t)(segments - segment->begin); + segment->end = segments; + + // Tell the caller that we found a segment. + return true; } cpj_size_t cpj_path_change_root( @@ -975,8 +913,8 @@ cpj_size_t cpj_path_change_root( // Now we determine the sizes of the new root and the path. We need that to // determine the size of the part after the root (the tail). - new_root_length = strlen(new_root); - path_length = strlen(path); + new_root_length = cpj_strlen(new_root); + path_length = cpj_strlen(path); // Okay, now we calculate the position of the tail and the length of it. tail = path + root_length; @@ -998,24 +936,6 @@ cpj_size_t cpj_path_change_root( return new_path_size; } -bool cpj_path_is_absolute(cpj_path_style_t path_style, const cpj_char_t *path) -{ - cpj_size_t length; - - // We grab the root of the path. This root does not include the first - // separator of a path. - length = cpj_path_get_root(path_style, path); - - // Now we can determine whether the root is absolute or not. - return cpj_path_is_root_absolute(path_style, path, length); -} - -bool cpj_path_is_relative(cpj_path_style_t path_style, const cpj_char_t *path) -{ - // The path is relative if it is not absolute. - return !cpj_path_is_absolute(path_style, path); -} - void cpj_path_get_basename( cpj_path_style_t path_style, const cpj_char_t *path, const cpj_char_t **basename, cpj_size_t *length @@ -1062,7 +982,7 @@ cpj_size_t cpj_path_change_basename( // We have to trim the separators from the beginning of the new basename. // This is quite easy to do. - while (cpj_path_is_separator(path_style, new_basename)) { + while (cpj_path_is_separator(path_style, *new_basename)) { ++new_basename; } @@ -1077,7 +997,7 @@ cpj_size_t cpj_path_change_basename( // the first valid character. while ( new_basename_size > 0 && - cpj_path_is_separator(path_style, &new_basename[new_basename_size - 1]) + cpj_path_is_separator(path_style, new_basename[new_basename_size - 1]) ) { --new_basename_size; } @@ -1213,7 +1133,7 @@ cpj_size_t cpj_path_change_extension( // output the trail - which is any part of the path coming after the // extension. We must output this first, since the buffer may overlap with the // submitted path - and it would be overridden by longer extensions. - new_extension_size = strlen(new_extension) + 1; + new_extension_size = cpj_strlen(new_extension) + 1; trail_size = cpj_path_output(buffer, buffer_size, pos + new_extension_size, segment.end); @@ -1232,98 +1152,6 @@ cpj_size_t cpj_path_change_extension( return pos; } -cpj_size_t cpj_path_normalize( - cpj_path_style_t path_style, const cpj_char_t *path, cpj_char_t *buffer, - cpj_size_t buffer_size -) -{ - const cpj_char_t *paths[2]; - - // Now we initialize the paths which we will normalize. Since this function - // only supports submitting a single path, we will only add that one. - paths[0] = path; - paths[1] = NULL; - - return cpj_path_join_and_normalize_multiple( - path_style, paths, buffer, buffer_size - ); -} - -cpj_size_t cpj_path_get_intersection( - cpj_path_style_t path_style, const cpj_char_t *path_base, - const cpj_char_t *path_other -) -{ - bool absolute; - cpj_size_t base_root_length, other_root_length; - const cpj_char_t *end; - const cpj_char_t *paths_base[2], *paths_other[2]; - struct cpj_segment_joined base, other; - - // We first compare the two roots. We just return zero if they are not equal. - // This will also happen to return zero if the paths are mixed relative and - // absolute. - base_root_length = cpj_path_get_root(path_style, path_base); - other_root_length = cpj_path_get_root(path_style, path_other); - if (!cpj_path_is_string_equal( - path_style, path_base, path_other, base_root_length, other_root_length - )) { - return 0; - } - - // Configure our paths. We just have a single path in here for now. - paths_base[0] = path_base; - paths_base[1] = NULL; - paths_other[0] = path_other; - paths_other[1] = NULL; - - // So we get the first segment of both paths. If one of those paths don't have - // any segment, we will return 0. - if (!cpj_path_get_first_segment_joined(path_style, paths_base, &base) || - !cpj_path_get_first_segment_joined(path_style, paths_other, &other)) { - return base_root_length; - } - - // We now determine whether the path is absolute or not. This is required - // because if will ignore removed segments, and this behaves differently if - // the path is absolute. However, we only need to check the base path because - // we are guaranteed that both paths are either relative or absolute. - absolute = cpj_path_is_root_absolute(path_style, path_base, base_root_length); - - // We must keep track of the end of the previous segment. Initially, this is - // set to the beginning of the path. This means that 0 is returned if the - // first segment is not equal. - end = path_base + base_root_length; - - // Now we loop over both segments until one of them reaches the end or their - // contents are not equal. - do { - // We skip all segments which will be removed in each path, since we want to - // know about the true path. - if (!cpj_path_segment_joined_skip_invisible(path_style, &base, absolute) || - !cpj_path_segment_joined_skip_invisible(path_style, &other, absolute)) { - break; - } - - if (!cpj_path_is_string_equal( - path_style, base.segment.begin, other.segment.begin, - base.segment.size, other.segment.size - )) { - // So the content of those two segments are not equal. We will return the - // size up to the beginning. - return (cpj_size_t)(end - path_base); - } - - // Remember the end of the previous segment before we go to the next one. - end = base.segment.end; - } while (cpj_path_get_next_segment_joined(path_style, &base) && - cpj_path_get_next_segment_joined(path_style, &other)); - - // Now we calculate the length up to the last point where our paths pointed to - // the same place. - return (cpj_size_t)(end - path_base); -} - bool cpj_path_get_first_segment( cpj_path_style_t path_style, const cpj_char_t *path, struct cpj_segment *segment @@ -1381,10 +1209,10 @@ bool cpj_path_get_next_segment( // Now we skip all separator until we reach something else. We are not yet // guaranteed to have a segment, since the string could just end afterwards. - assert(cpj_path_is_separator(path_style, c)); + assert(cpj_path_is_separator(path_style, *c)); do { ++c; - } while (cpj_path_is_separator(path_style, c)); + } while (cpj_path_is_separator(path_style, *c)); // If the string ends here, we can safely assume that there is no other // segment after this one. @@ -1428,7 +1256,7 @@ bool cpj_path_get_previous_segment( // false and don't change the segment structure submitted by the caller. return false; } - } while (cpj_path_is_separator(path_style, c)); + } while (cpj_path_is_separator(path_style, *c)); // We are guaranteed now that there is another segment, since we moved before // the previous separator and did not reach the segment path beginning. @@ -1454,23 +1282,6 @@ cpj_path_get_segment_type(const struct cpj_segment *segment) return CPJ_NORMAL; } -bool cpj_path_is_separator(cpj_path_style_t path_style, const cpj_char_t *str) -{ - const cpj_char_t *c; - - // We loop over all characters in the read symbols. - c = separators[path_style]; - while (*c) { - if (*c == *str) { - return true; - } - - ++c; - } - - return false; -} - cpj_size_t cpj_path_change_segment( cpj_path_style_t path_style, struct cpj_segment *segment, const cpj_char_t *value, cpj_char_t *buffer, cpj_size_t buffer_size @@ -1487,7 +1298,7 @@ cpj_size_t cpj_path_change_segment( // In order to trip the submitted value, we will skip any separator at the // beginning of it and behave as if it was never there. - while (cpj_path_is_separator(path_style, value)) { + while (cpj_path_is_separator(path_style, *value)) { ++value; } @@ -1502,13 +1313,13 @@ cpj_size_t cpj_path_change_segment( // have to subtract from the size until there are either no more characters // left or the last character is no separator. while (value_size > 0 && - cpj_path_is_separator(path_style, &value[value_size - 1])) { + cpj_path_is_separator(path_style, value[value_size - 1])) { --value_size; } // We also have to determine the tail size, which is the part of the string // following the current segment. This part will not change. - tail_size = strlen(segment->end); + tail_size = cpj_strlen(segment->end); // Now we output the tail. We have to do that, because if the buffer and the // source are overlapping we would override the tail if the value is @@ -1534,12 +1345,13 @@ cpj_path_style_t cpj_path_guess_style(const cpj_char_t *path) { const cpj_char_t *c; cpj_size_t root_length; - struct cpj_segment segment; + const cpj_char_t *basename; + cpj_size_t basename_length; // First we determine the root. Only windows roots can be longer than a single // slash, so if we can determine that it starts with something like "C:", we // know that this is a windows path. - cpj_path_get_root_windows(path, &root_length); + root_length = cpj_path_get_root_windows(path); if (root_length > 1) { return CPJ_STYLE_WINDOWS; } @@ -1559,20 +1371,21 @@ cpj_path_style_t cpj_path_guess_style(const cpj_char_t *path) // actually must be the first one), and determine whether the segment starts // with a dot. A dot is a hidden folder or file in the UNIX world, in that // case we assume the path to have UNIX style. - if (!cpj_path_get_last_segment(CPJ_STYLE_UNIX, path, &segment)) { + cpj_path_get_basename(CPJ_STYLE_UNIX, path, &basename, &basename_length); + if (basename == NULL) { // We couldn't find any segments, so we default to a UNIX path style since // there is no way to make any assumptions. return CPJ_STYLE_UNIX; } - if (*segment.begin == '.') { + if (*basename == '.') { return CPJ_STYLE_UNIX; } // And finally we check whether the last segment contains a dot. If it // contains a dot, that might be an extension. Windows is more likely to have // file names with extensions, so our guess would be windows. - for (c = segment.begin; *c; ++c) { + for (c = basename; *c; ++c) { if (*c == '.') { return CPJ_STYLE_WINDOWS; } diff --git a/test/join_test.c b/test/join_test.c index a31bad6..8ff7dc2 100644 --- a/test/join_test.c +++ b/test/join_test.c @@ -7,20 +7,22 @@ int join_multiple(void) { cpj_char_t buffer[FILENAME_MAX]; - const cpj_char_t *paths[3]; + cpj_string_t paths[2]; cpj_size_t length; + const cpj_char_t *expected; - paths[0] = "hello/there"; - paths[1] = "../world"; - paths[2] = NULL; + paths[0] = cpj_string_create(CPJ_ZSTR_ARG("hello/there")); + paths[1] = cpj_string_create(CPJ_ZSTR_ARG("../world/")); - length = cpj_path_join_multiple(CPJ_STYLE_UNIX, paths, buffer, sizeof(buffer)); - - if (length != 11) { + expected = "hello/world"; + length = cpj_path_join_and_normalize(CPJ_STYLE_UNIX, false, true, paths, 2, buffer, sizeof(buffer)); + if (length != strlen(expected) || strcmp(buffer, expected) != 0) { return EXIT_FAILURE; } - if (strcmp(buffer, "hello/world") != 0) { + expected = "hello/world/"; + length = cpj_path_join_and_normalize(CPJ_STYLE_UNIX, false, false, paths, 2, buffer, sizeof(buffer)); + if (length != strlen(expected) || strcmp(buffer, expected) != 0) { return EXIT_FAILURE; } @@ -62,6 +64,17 @@ int join_back_after_root(void) return EXIT_FAILURE; } + length = cpj_path_join_module(CPJ_STYLE_WINDOWS, "this\\", "C:\\..\\..\\is\\a\\test\\", buffer, + sizeof(buffer)); + + if (length != 12) { + return EXIT_FAILURE; + } + + if (strcmp(buffer, "C:\\is\\a\\test") != 0) { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; } @@ -118,6 +131,16 @@ int join_two_absolute(void) return EXIT_FAILURE; } + length = cpj_path_join_module(CPJ_STYLE_UNIX, "/first", "/second", buffer, sizeof(buffer)); + + if (length != 7) { + return EXIT_FAILURE; + } + + if (strcmp(buffer, "/second") != 0) { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; } diff --git a/test/normalize_test.c b/test/normalize_test.c index ed65855..f8f2fbe 100644 --- a/test/normalize_test.c +++ b/test/normalize_test.c @@ -57,6 +57,23 @@ int normalize_only_separators(void) return EXIT_SUCCESS; } +int normalize_zero_length(void) +{ + cpj_size_t count; + cpj_char_t result[FILENAME_MAX]; + cpj_char_t *input, *expected; + + input = ""; + strcpy(result, input); + expected = "."; + count = cpj_path_normalize(CPJ_STYLE_UNIX, input, result, sizeof(result)); + if (count != strlen(expected) || strcmp(result, expected) != 0) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + int normalize_empty(void) { cpj_size_t count; @@ -188,13 +205,13 @@ int normalize_relative_too_far(void) return EXIT_FAILURE; } - expected = "C:"; + expected = "C:."; count = cpj_path_normalize(CPJ_STYLE_WINDOWS, CPJ_ZSTR_LITERAL("C:rel/../"), result, sizeof(result)); if (count != strlen(expected) || strcmp(result, expected) != 0) { return EXIT_FAILURE; } - expected = "C:"; + expected = "C:."; count = cpj_path_normalize(CPJ_STYLE_WINDOWS, CPJ_ZSTR_LITERAL("C:"), result, sizeof(result)); if (count != strlen(expected) || strcmp(result, expected) != 0) { return EXIT_FAILURE; diff --git a/test/relative_test.c b/test/relative_test.c index b7c647e..e21f586 100755 --- a/test/relative_test.c +++ b/test/relative_test.c @@ -87,16 +87,14 @@ int relative_relative_and_absolute(void) { cpj_char_t result[FILENAME_MAX]; cpj_size_t length; + cpj_char_t *expected; *result = 1; length = cpj_path_get_relative(CPJ_STYLE_UNIX, "./foo", "/bar", result, sizeof(result)); - if (length != 0) { - return EXIT_FAILURE; - } - - if (*result != '\0') { + expected = "../bar"; + if (length != strlen(expected) || strcmp(result, expected) != 0) { return EXIT_FAILURE; } @@ -107,17 +105,15 @@ int relative_different_roots(void) { cpj_char_t result[FILENAME_MAX]; cpj_size_t length; + cpj_char_t *expected; *result = 1; length = cpj_path_get_relative(CPJ_STYLE_WINDOWS, "C:/path/same", "D:/path/same", result, sizeof(result)); - if (length != 0) { - return EXIT_FAILURE; - } - - if (*result != '\0') { + expected = "D:\\path\\same"; + if (length != strlen(expected) || strcmp(result, expected) != 0) { return EXIT_FAILURE; }