Skip to content

Commit

Permalink
Canonicalizer
Browse files Browse the repository at this point in the history
  • Loading branch information
bpolaszek committed Dec 6, 2017
1 parent b9c679d commit 417512f
Show file tree
Hide file tree
Showing 4 changed files with 273 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.idea
vendor
composer.lock
12 changes: 12 additions & 0 deletions src/Helper/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace BenTools\UriFactory\Helper;

use BenTools\UriFactory\UriCanonicalizer;
use BenTools\UriFactory\UriFactory;
use BenTools\UriFactory\UriFactoryInterface;
use Psr\Http\Message\UriInterface;
Expand All @@ -19,8 +20,19 @@ function uri(string $uri, UriFactoryInterface $factory = null): UriInterface
/**
* @param UriFactoryInterface|null $factory
* @return UriInterface
* @throws \RuntimeException
*/
function current_location(UriFactoryInterface $factory = null): UriInterface
{
return UriFactory::factory()->createUriFromCurrentLocation($factory);
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
function canonicalize(UriInterface $uri): UriInterface
{
return UriCanonicalizer::canonicalize($uri);
}
165 changes: 165 additions & 0 deletions src/UriCanonicalizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<?php

namespace BenTools\UriFactory;

use Psr\Http\Message\UriInterface;

final class UriCanonicalizer
{
/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function canonicalize(UriInterface $uri): UriInterface
{
$uri = self::ensureIsPercentUnescaped($uri);
$uri = self::ensureSchemeIsNotBlank($uri);
$uri = self::removeUnwantedChars($uri);
$uri = self::removeFragment($uri);
$uri = self::removeLeadingAndTrailingDots($uri);
$uri = self::replaceConsecutiveDotsWithASingleDot($uri);
$uri = self::normalizeHostname($uri);
$uri = self::normalizePath($uri);
return $uri;
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function ensureIsPercentUnescaped(UriInterface $uri): UriInterface
{
return $uri
->withHost(self::percentUnescape($uri->getHost()))
->withPath(self::percentUnescape($uri->getPath()))
;
}

/**
* @param string $string
* @return string
*/
public static function percentUnescape(string $string): string
{
while ($string !== ($decoded = urldecode($string))) {
$string = $decoded;
}
return $string;
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function ensureSchemeIsNotBlank(UriInterface $uri): UriInterface
{
return '' === $uri->getScheme() ? $uri->withScheme('http') : $uri;
}

/**
* @param UriInterface $uri
* @return UriInterface
*/
public static function removeFragment(UriInterface $uri): UriInterface
{
return $uri->withFragment('');
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function removeUnwantedChars(UriInterface $uri): UriInterface
{
$removeUnwantedChars = function (?string $string) {
if (null === $string) {
return null;
}
return str_replace(["\x09", "\x0A", "\x0D", "\x0B", "\t", "\r", "\n"], '', $string);
};

return $uri
->withUserInfo($removeUnwantedChars($uri->getUserInfo()))
->withHost($removeUnwantedChars($uri->getHost()))
->withPath($removeUnwantedChars($uri->getPath()))
->withQuery($removeUnwantedChars($uri->getQuery()))
->withFragment($removeUnwantedChars($uri->getFragment()));
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function removeLeadingAndTrailingDots(UriInterface $uri): UriInterface
{
return $uri->withHost(
trim($uri->getHost(), '.')
);
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function replaceConsecutiveDotsWithASingleDot(UriInterface $uri): UriInterface
{
return $uri->withHost(
preg_replace('/\.{2,}/', '.', $uri->getHost())
);
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function normalizeHostname(UriInterface $uri): UriInterface
{
$hostname = strtolower($uri->getHost());

$hostnameIP = is_numeric($hostname) ? ip2long(long2ip($hostname)) : ip2long($hostname);

if (false !== $hostnameIP) {
$hostname = long2ip($hostnameIP);
}

return $uri->withHost($hostname);
}

/**
* @param UriInterface $uri
* @return UriInterface
* @throws \InvalidArgumentException
*/
public static function normalizePath(UriInterface $uri): UriInterface
{
$path = $uri->getPath();
$segments = explode('/', $path);
$parts = [];
foreach ($segments as $segment) {
switch ($segment) {
case '.':
// Don't need to do anything here
break;
case '..':
array_pop($parts);
break;
default:
$parts[] = $segment;
break;
}
}
$path = implode('/', $parts);
$path = preg_replace('#/{2,}#', '/', $path);
if (0 !== strpos($path, '/')) {
$path = '/' . $path;
}
return $uri->withPath($path);
}
}
95 changes: 95 additions & 0 deletions tests/UriCanonicalizerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
<?php

namespace BenTools\UriFactory\Tests;

use BenTools\UriFactory\Adapter\GuzzleAdapter;
use function BenTools\UriFactory\Helper\canonicalize;
use function BenTools\UriFactory\Helper\uri;
use BenTools\UriFactory\UriCanonicalizer;
use PHPUnit\Framework\TestCase;

class UriCanonicalizerTest extends TestCase
{

public function testEnsureSchemeIsNotBlank()
{
$uri = uri('www.example.org');
$this->assertEquals('http', UriCanonicalizer::ensureSchemeIsNotBlank($uri)->getScheme());
}

public function testRemoveFragment()
{
$uri = uri('http://example.org/foo#bar');
$this->assertEquals('', UriCanonicalizer::removeFragment($uri)->getFragment());
}

public function testRemoveLeadingAndTrailingDots()
{
$uri = uri('http://...www.example.org.../foo');
$this->assertEquals('www.example.org', UriCanonicalizer::removeLeadingAndTrailingDots($uri)->getHost());
}

public function testReplaceConsecutiveDotsWithASingleDot()
{
$uri = uri('http://www..example...org/foo');
$this->assertEquals('www.example.org', UriCanonicalizer::replaceConsecutiveDotsWithASingleDot($uri)->getHost());
}

public function testNormalizeHostname()
{
$uri = uri('http://WWW.EXAMPLE.ORG');
$this->assertEquals('www.example.org', UriCanonicalizer::normalizeHostname($uri)->getHost());
}

public function testNormalizePath()
{
$uri = uri('http://example.org/apple/../../cherry/banana/strawberry/../../orange/./pear//pineapple/');
$this->assertEquals('/cherry/orange/pear/pineapple/', UriCanonicalizer::normalizePath($uri)->getPath());
$uri = uri('http://example.org//');
$this->assertEquals('/', UriCanonicalizer::normalizePath($uri)->getPath());
$uri = uri('http://example.org/../');
$this->assertEquals('/', UriCanonicalizer::normalizePath($uri)->getPath());
$uri = uri('http://example.org/');
$this->assertEquals('/', UriCanonicalizer::normalizePath($uri)->getPath());
$uri = uri('http://example.org');
$this->assertEquals('/', UriCanonicalizer::normalizePath($uri)->getPath());
}

/**
* @dataProvider dataProvider
*/
public function testCanonicalize(string $testedUrl, string $expectedUrl)
{
$result = (string) canonicalize(uri($testedUrl, GuzzleAdapter::factory()));
$this->assertEquals($expectedUrl, $result);
}

public function dataProvider()
{
return [
['http://host/%25%32%35', 'http://host/%25'],
['http://host/%25%32%35%25%32%35', 'http://host/%25%25'],
['http://host/%2525252525252525', 'http://host/%25'],
['http://host/asdf%25%32%35asd', 'http://host/asdf%25asd'],
['http://host/%%%25%32%35asd%%', 'http://host/%25%25%25asd%25%25'],
['http://www.google.com/', 'http://www.google.com/'],
['http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77%77%77%2E%65%62%61%79%2E%63%6F%6D/', 'http://168.188.99.26/.secure/www.ebay.com/'],
['http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/', 'http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/'],
['http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A22%252833%252944_55%252B', 'http://host.com/~a!b@c%23d$e%25f%5E00&11*22(33)44_55%20'],
['http://3279880203/blah', 'http://195.127.0.11/blah'],
['http://www.google.com/blah/..', 'http://www.google.com/'],
['http://www.evil.com/blah#frag', 'http://www.evil.com/blah'],
['http://www.GOOgle.com/', 'http://www.google.com/'],
['http://www.google.com.../', 'http://www.google.com/'],
['http://www.google.com/q?r?', 'http://www.google.com/q?r?'],
['http://www.google.com/q?r?s', 'http://www.google.com/q?r?s'],
['http://evil.com/foo#bar#baz', 'http://evil.com/foo'],
['http://evil.com/foo;', 'http://evil.com/foo;'],
['http://evil.com/foo?bar;', 'http://evil.com/foo?bar;'],
['http://notrailingslash.com', 'http://notrailingslash.com/'],
['https://www.securesite.com/', 'https://www.securesite.com/'],
['http://host.com/ab%23cd', 'http://host.com/ab%23cd'],
['http://host.com//twoslashes?more//slashes', 'http://host.com/twoslashes?more//slashes'],
];
}
}

0 comments on commit 417512f

Please sign in to comment.