From fc455e5f3210c9a4ba406e724e8b9433826a12e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Riikka=20Kalliom=C3=A4ki?= Date: Wed, 25 Mar 2015 21:01:21 +0200 Subject: [PATCH] Include normalize method --- CHANGES.md | 6 ++ README.md | 109 ++++++++++++-------------- composer.json | 4 +- src/Path.php | 163 +++++++++++++++++++++++++++------------ tests/tests/PathTest.php | 33 +++++++- 5 files changed, 205 insertions(+), 110 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index cfedac2..140f89d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,11 @@ # Changelog # +## v1.1.0 (2015-03-25) ## + + * Added `Path::normalize()` method for normalizing a single path. + * The `Path::join()` method now correctly returns '.' instead of an empty + path, similar to the `dirname()` function. + ## v1.0.1 (2015-01-24) ## * Improvements in code quality and documentation diff --git a/README.md b/README.md index 16fdd52..76c9d28 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,19 @@ # Path joiner and normalizer # -*PathJoin* is a PHP library for joining file systems paths and resolving parent -`..` and current `.` directory references. In order to support both Windows and -Unix platforms, the library treats both forward `/` and backward `\` slashes as -directory separators. +*PathJoin* is PHP library for normalizing and joining file system paths. The +purpose of this library is to make easier to work with file system paths +irregardless of the platform and the system directory separator. -This library makes it easier to join file system paths, since you can ignore the -fact that different directory separators exist. Joining paths together is also -easier, because you don't have to remember if paths are followed by a directory -separator or not. +The purpose of file path normalization is to provide a single consistent file +path representation. In other words, the normalization in this library will +resolve `.` and `..` directory references and also condense multiple directory +separators into one. This makes it much easier to avoid common problems when +comparing paths against each other. -In some cases this library can also provide additional security, since only the -first path can denote an absolute path and by resolving parent directory -references, it's easier to tell if you're about to access allowed paths. +While PHP provides a built in function `realpath()`, it is not usable in every +case since it works by using the file system. This library simply combines and +normalizes the paths using string handling. There is no requirement for the +files or directories to be readable or even exist. The API documentation, which can be generated using Apigen, can be read online at: http://kit.riimu.net/api/pathjoin/ @@ -59,75 +60,67 @@ the provided `src/autoload.php` file. ## Usage ## -This library provides exactly one method: `Path::join($path, ...)`. This method -can either take paths as multiple arguments or as a single array. The returned -path uses the system directory separators. For example: +This library provides two convenient methods, `Path::normalize()` and +`Path::join()`. Both of these methods work in a very similar fashion. The main +difference is that while the `join()` method can accept multiple paths to join, +the `normalize()` will only accept a single path. Both of the methods will +return a normalized path as the result. -```php - * @copyright Copyright (c) 2014, Riikka Kalliomäki * @license http://opensource.org/licenses/mit-license.php MIT License @@ -11,69 +11,135 @@ class Path { /** - * Joins and normalizes file systems paths. + * Normalizes the provided file system path. * - * The method can take either multiple string arguments or an array of - * strings. The paths will be joined together using a directory separator - * and any parent and current directory references will be resolved. The - * resulting path may, however, begin with parent directory references if - * it is not an absolute path. Only the first path may denote an absolute - * path, however, since all following paths are relative to the first path. + * Normalizing file system paths means that all forward and backward + * slashes in the path will be replaced with the system directory separator + * and multiple directory separators will be condensed into one. + * Additionally, all `.` and `..` directory references will be resolved in + * the returned path. * - * In order to support multiple different platforms, this method will treat - * all forward and backslashes as directory separators. The resulting path - * will only contain system directory separators, however. + * Note that if the normalized path is not an absolute path, the resulting + * path may begin with `..` directory references if it is not possible to + * resolve them simply by using string handling. You should also note that + * if the resulting path would result in an empty string, this method will + * return `.` instead. * - * It is possible to simple provide a single path to this function in order - * to normalize that path. + * If the `$prependDrive` option is enabled, the normalized path will be + * prepended with the drive name on Windows platforms using the current + * working directory, if the path is an absolute path that does not include + * a drive name. * - * @param string[]|string $path Paths to join and normalize - * @return string The joined and normalized path - * @throws \InvalidArgumentException If the path contains invalid characters + * @param string $path File system path to normalize + * @param bool $prependDrive True to prepend drive name to absolute paths + * @return string The normalizes file system path */ - public static function join($path) + public static function normalize($path, $prependDrive = true) { - $paths = self::canonize(is_array($path) ? $path : func_get_args(), $absolute); - $parts = self::normalize($paths, $absolute); - return $absolute && count($parts) === 1 - ? reset($parts) . DIRECTORY_SEPARATOR - : implode(DIRECTORY_SEPARATOR, $parts); + $path = self::join((string) $path); + + if ($path[0] === DIRECTORY_SEPARATOR && $prependDrive) { + return strstr(getcwd(), DIRECTORY_SEPARATOR, true) . $path; + } + + return $path; } /** - * Canonizes the path into separate parts regardless of directory separator. - * @param string[] $args Array of paths - * @param boolean $absolute Will be set to true if the path is absolute - * @return string[] Parts in the paths separated into a single array + * Joins the provided file systems paths together and normalizes the result. + * + * The paths can be provided either as multiple arguments to this method + * or as an array. The paths will be joined using the system directory + * separator and the result will be normalized similar to the normalization + * method (the drive letter will not be prepended however). + * + * Note that unless the first path in the list is an absolute path, the + * entire resulting path will be treated as a relative path. + * + * @param string[]|string $paths File system paths to join + * @return string The joined file system paths */ - private static function canonize(array $args, & $absolute) + public static function join($paths) { - $args = array_map('trim', $args); - $paths = explode('/', str_replace('\\', '/', implode('/', $args))); - $absolute = $args[0] !== '' && ($paths[0] === '' || substr($paths[0], -1) === ':'); - return $paths; + $paths = self::getPaths(func_get_args()); + $parts = self::getParts($paths); + + $absolute = self::isAbsolute($paths[0]); + $root = $absolute ? array_shift($parts) . DIRECTORY_SEPARATOR : ''; + $parts = self::resolve($parts, $absolute); + + if ($parts === []) { + return $root ?: '.'; + } + + return $root . implode(DIRECTORY_SEPARATOR, $parts); } /** - * Normalizes that parent directory references and removes redundant ones. - * @param string[] $paths List of parts in the the path + * Returns the paths from the arguments list. + * @param array $args The arguments list + * @return string[] Paths from the arguments list + * @throws \InvalidArgumentException If the path array is empty + */ + private static function getPaths($args) + { + if (is_array($args[0])) { + $args = $args[0]; + + if ($args === []) { + throw new \InvalidArgumentException('You must provide at least one path'); + } + } + + return $args; + } + + /** + * Merges the paths and returns the individual parts. + * @param string[] $paths Array of paths + * @return string[] Parts in the paths merged into a single array + */ + private static function getParts(array $paths) + { + return array_map('trim', explode('/', str_replace('\\', '/', implode('/', $paths)))); + } + + /** + * Tells if the path is an absolute path. + * @param string $path The file system path to test + * @return bool True if the path is an absolute path, false if not + */ + private static function isAbsolute($path) + { + $path = trim($path); + + if ($path === '') { + return false; + } + + $length = strcspn($path, '/\\'); + return $length === 0 || $path[$length - 1] === ':'; + } + + /** + * Resolves parent directory references and removes redundant entries + * @param string[] $parts List of parts in the the path * @param boolean $absolute Whether the path is an absolute path or not - * @return string[] Normalized list of paths + * @return string[] Resolved list of parts in the path */ - private static function normalize(array $paths, $absolute) + private static function resolve(array $parts, $absolute) { - $parts = $absolute ? [array_shift($paths)] : []; - $paths = array_filter($paths, [__CLASS__, 'isValidPath']); - - foreach ($paths as $part) { - if ($part === '..') { - self::resolveParent($parts, $absolute); - } else { - $parts[] = $part; + $resolved = []; + + foreach ($parts as $path) { + if ($path === '..') { + self::resolveParent($resolved, $absolute); + } elseif (self::isValidPath($path)) { + $resolved[] = $path; } } - return $parts; + return $resolved; } /** @@ -95,13 +161,14 @@ private static function isValidPath($path) * Resolves the relative parent directory for the path. * @param string[] $parts Path parts to modify * @param boolean $absolute True if dealing with absolute path, false if not + * @return string|null The removed parent or null if nothing was removed */ private static function resolveParent(& $parts, $absolute) { - if (in_array(end($parts), ['..', false], true)) { - $parts[] = '..'; - } elseif (count($parts) > 1 || !$absolute) { - array_pop($parts); + if ($absolute || !in_array(end($parts), ['..', false], true)) { + return array_pop($parts); } + + $parts[] = '..'; } } diff --git a/tests/tests/PathTest.php b/tests/tests/PathTest.php index 56e7db4..1985903 100644 --- a/tests/tests/PathTest.php +++ b/tests/tests/PathTest.php @@ -9,6 +9,12 @@ */ class PathTest extends \PHPUnit_Framework_TestCase { + public function testEmptyPathArray() + { + $this->setExpectedException('InvalidArgumentException'); + Path::join([]); + } + public function testArgumentVariations() { $this->assertPath(['foo', 'bar', 'baz'], Path::join('foo', 'bar', 'baz')); @@ -17,6 +23,14 @@ public function testArgumentVariations() $this->assertPath(['foo', 'bar', 'baz'], Path::join(['foo', 'bar/baz'])); } + public function testEmptyAbsolutePaths() + { + $this->assertPath(['', ''], Path::join('/', '')); + $this->assertPath(['', ''], Path::join('\\', '')); + $this->assertPath(['C:', ''], Path::join('C:\\', '')); + $this->assertPath(['C:', ''], Path::join('C:', '')); + } + public function testAbsolutePaths() { $this->assertPath(['', 'foo', 'bar'], Path::join('/foo', 'bar')); @@ -25,7 +39,7 @@ public function testAbsolutePaths() public function testRelativePaths() { - $this->assertPath([''], Path::join('', '/', '//')); + $this->assertPath(['.'], Path::join('', '/', '//')); $this->assertPath(['foo', 'bar'], Path::join('foo', '/bar')); } @@ -49,7 +63,7 @@ public function testSpecialDirectories() public function testRelativeBacktracking() { - $this->assertPath([''], Path::join('foo/bar', '..', '/..')); + $this->assertPath(['.'], Path::join('foo/bar', '..', '/..')); $this->assertPath(['..', 'baz'], Path::join('foo/bar', '..', '/../../', 'baz')); } @@ -64,6 +78,21 @@ public function testWindowsAbsolutePaths() $this->assertPath(['C:', 'baz'], Path::join('C:/foo/bar', '..', '/../../', 'baz')); } + public function testNormalization() + { + $this->assertPath(['foo', 'bar'], Path::normalize('foo/bar')); + } + + public function testDriveNormalization() + { + $this->assertPath([strstr(getcwd(), DIRECTORY_SEPARATOR, true), 'foo', 'bar'], Path::normalize('/foo/bar')); + } + + public function testEmptyPath() + { + $this->assertSame('.', Path::normalize('')); + } + private function assertPath(array $expected, $actual) { $this->assertSame(implode(DIRECTORY_SEPARATOR, $expected), $actual);