diff --git a/.travis.yml b/.travis.yml index 04e5d81..6946ed7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,6 @@ language: php php: - - 7.1 - - 7.2 - - 7.3 - - 7.4 - - 8.0snapshot + - 8.0 before_install: - sudo apt-get -qq update diff --git a/CHANGELOG.md b/CHANGELOG.md index 4284f7f..125848f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,21 @@ # Change Log -All notable changes to this project will be documented in this file. -This project adheres to [Semantic Versioning] (http://semver.org/). -For change log format, use [Keep a Changelog] (http://keepachangelog.com/). + +All notable changes to this project will be documented in this file. This project adheres +to [Semantic Versioning] (http://semver.org/). For change log format, +use [Keep a Changelog] (http://keepachangelog.com/). + +## [2.0.0-alpha1] - 2021-03-22 + +### Added + +- `HtmlSelector` class to init and manage relations +- `XpathSolver` class to solve a selector to xpath +- Extensions to add pseudo classes + +### Changed + +- Refactoring ## [1.0.0] - 2020-11-05 + First version diff --git a/README.md b/README.md index 76557dd..345e4c0 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ $ composer require berlioz/html-selector ### Dependencies -- **PHP** ^7.1 || ^8.0 +- **PHP** ^8.0 - PHP libraries: - **dom** - **libxml** diff --git a/composer.json b/composer.json index 17af151..b655d9d 100644 --- a/composer.json +++ b/composer.json @@ -20,7 +20,7 @@ } }, "require": { - "php": "^7.1 || ^8.0", + "php": "^8.0", "ext-dom": "*", "ext-libxml": "*", "ext-mbstring": "*", @@ -29,6 +29,6 @@ }, "require-dev": { "berlioz/http-message": "^1.0", - "phpunit/phpunit": "^7.0 || ^8.0 || ^9.0" + "phpunit/phpunit": "^9.3" } } diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 52a9130..6b979d3 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,13 +1,16 @@ - + + + + src + + ./tests - - - src - - - \ No newline at end of file + diff --git a/src/CssSelector/CssSelector.php b/src/CssSelector/CssSelector.php new file mode 100644 index 0000000..1ecc0ca --- /dev/null +++ b/src/CssSelector/CssSelector.php @@ -0,0 +1,113 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\CssSelector; + +/** + * Class CssSelector. + */ +class CssSelector +{ + protected ?NextCssSelector $next = null; + + public function __construct( + protected string $selector, + protected ?string $type = null, + protected ?string $id = null, + protected array $classes = [], + protected array $attributes = [], + protected array $pseudoClasses = [], + ) { + } + + /** + * __toString() PHP magic method. + * + * @return string + */ + public function __toString(): string + { + return $this->selector; + } + + /** + * Get type. + * + * @return string|null + */ + public function getType(): ?string + { + return $this->type; + } + + /** + * Get id. + * + * @return string|null + */ + public function getId(): ?string + { + return $this->id; + } + + /** + * Get classes. + * + * @return array + */ + public function getClasses(): array + { + return $this->classes; + } + + /** + * Get attributes. + * + * @return array + */ + public function getAttributes(): array + { + return $this->attributes; + } + + /** + * Get pseudo classes. + * + * @return array + */ + public function getPseudoClasses(): array + { + return $this->pseudoClasses; + } + + /** + * Get next. + * + * @return NextCssSelector|null + */ + public function getNext(): ?NextCssSelector + { + return $this->next; + } + + /** + * Set next. + * + * @param NextCssSelector|null $next + */ + public function setNext(?NextCssSelector $next): void + { + $this->next = $next; + } +} \ No newline at end of file diff --git a/src/CssSelector/CssSelectorParser.php b/src/CssSelector/CssSelectorParser.php new file mode 100644 index 0000000..6eb37fa --- /dev/null +++ b/src/CssSelector/CssSelectorParser.php @@ -0,0 +1,262 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\CssSelector; + +/** + * Class CssSelectorParser. + */ +class CssSelectorParser +{ + public const REGEX_DECLARATIONS = <<<'EOD' +(?(DEFINE) + (? '(?>[^'\\]++|\\.)*' | "(?>[^"\\]++|\\.)*" ) + + (? \w+ | \* ) + (? \#(?:[\w\-]+) ) + (? \.(?:[\w\-]+) ) + (? \g+ ) + (? \[ \s* [\w\-]+ (?: \s* (?: = | \^= | \$= | \*= | != | \~= | \|= ) \s* (\g|[^\]]+))? \s* \] ) + (? \g+ ) + (? :([\w\-]+ (?: \( \s* (\g | \g | [^)]*) \s* \) )? ) ) + (? \g+ ) + + (? \g? \g? \g? \g? \g? ) + (? \g \s* ( \s* ([+>\~] | >> )? \s* \g )* ) + (? \g \s* ( , \s* \g )* ) +) +EOD; + + /** + * Parse. + * + * @param string $selector + * + * @return CssSelectorSet + */ + public function parse(string $selector): CssSelectorSet + { + return $this->parseSelectors($selector); + } + + /** + * Parse selectors from a multiple selector. + * + * Like ".class, .class2[attribute]" > 2 selectors: ".class" and ".class2[attribute]". + * + * @param string $selector + * + * @return CssSelectorSet + */ + private function parseSelectors(string $selector): CssSelectorSet + { + $selectors = []; + + // Regex + $regex = + '~' . + static::REGEX_DECLARATIONS . + '(? \g )' . + '~xis'; + + $matches = []; + if (false !== preg_match_all($regex, $selector, $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL)) { + $matches = array_filter(array_column($matches, 'selector')); + + foreach ($matches as $expression) { + $selectors[] = $this->parseExpressions($expression); + } + } + + $selectors = array_filter($selectors); + + return new CssSelectorSet(...$selectors); + } + + /** + * Parse expressions from a selector. + * + * Like ".class[attribute] .class2" > 2 expressions: ".class[attribute]" and ".class2". + * + * @param string $selector + * + * @return CssSelector|null + */ + private function parseExpressions(string $selector): ?CssSelector + { + $expressions = []; + + // Regex + $regex = + '~' . + static::REGEX_DECLARATIONS . + '(? [+>\~] | >> )? \s* (? \g )' . + '~xis'; + + $matches = []; + if (false !== preg_match_all($regex, $selector, $matches, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL)) { + $lastExpression = null; + + foreach ($matches as $match) { + if (empty(trim($match[0]))) { + continue; + } + + $expression = $this->parseExpression($match['expression']); + + if ($lastExpression) { + $lastExpression->setNext( + new NextCssSelector( + selector: $expression, + predecessor: $match['predecessor'] ?? null + ) + ); + } + + $expressions[] = $lastExpression = $expression; + } + } + + return reset($expressions) ?? null; + } + + /** + * Parse expression into parameters. + * + * Example of result for expression "select#toto.class.class2[attribute1][attribute2^="value"]:disabled:eq(1)": + * ['type' => 'select', + * 'id' => 'toto', + * 'classes' => ['class', 'class2'], + * 'attributes' => [['name' => 'attribute1', + * 'comparison' => null, + * 'value' => null], + * ['name' => 'attribute2', + * 'comparison' => '^=', + * 'value' => 'value']]], + * 'filters' => ['disabled' => null, + * 'eq' => '1']] + * + * @param string $expression + * + * @return CssSelector + */ + private function parseExpression(string $expression): CssSelector + { + $regex = + '~' . + static::REGEX_DECLARATIONS . + '^ \s* (? \g)? (? \g)? (? \g)? (? \g)? (? \g)? \s* $' . + '~xis'; + + $match = []; + if (1 !== preg_match($regex, $expression, $match, PREG_UNMATCHED_AS_NULL)) { + return new CssSelector($expression); + } + + // Classes + { + $classes = []; + + if (!empty($match['classes'])) { + $regexClass = + '~' . + static::REGEX_DECLARATIONS . + '\.(? [\w\-]+ )' . + '~xis'; + + $matchesClass = []; + if (preg_match_all( + $regexClass, + $match['classes'], + $matchesClass, + PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL + )) { + foreach ($matchesClass as $matchClass) { + $classes[] = $matchClass['class']; + } + } + } + } + + // Attributes + { + $attributes = []; + + if (!empty($match['attributes'])) { + $regexAttribute = + '~' . + static::REGEX_DECLARATIONS . + '\[ \s* (? [\w\-]+ ) (?: \s* (? = | \^= | \$= | \*= | != | \~= | \|= ) \s* (?: (? \g) | (? [^\]]+) ) )? \s* \]' . + '~xis'; + + $matchesAttribute = []; + if (preg_match_all( + $regexAttribute, + $match['attributes'], + $matchesAttribute, + PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL + )) { + foreach ($matchesAttribute as $matchAttribute) { + $attributes[] = [ + 'name' => $matchAttribute['name'], + 'comparison' => $matchAttribute['comparison'] ?? null, + 'value' => + !empty($matchAttribute['quotes']) ? + stripslashes(substr($matchAttribute['quotes'], 1, -1)) : + (!empty($matchAttribute['value']) ? + $matchAttribute['value'] : + null) + ]; + } + } + } + } + + // Filters + { + $filters = []; + + if (!empty($match['filters'])) { + $regexFilter = + '~' . + static::REGEX_DECLARATIONS . + ':(:? (? [\w\-]+ ) (?: \( \s* (? \g | \g | [^)]*) \s* \) )? )' . + '~xis'; + + $matchesFilter = []; + if (preg_match_all( + $regexFilter, + $match['filters'], + $matchesFilter, + PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL + )) { + foreach ($matchesFilter as $matchFilter) { + $filters[$matchFilter['name']] = $matchFilter['value'] ?? null; + } + } + } + } + + // Definition + return + new CssSelector( + $expression, + type: $match['type'] ?? null, + id: isset($match['id']) ? substr($match['id'], 1) : null, + classes: $classes, + attributes: $attributes, + pseudoClasses: $filters, + ); + } +} \ No newline at end of file diff --git a/src/CssSelector/CssSelectorSet.php b/src/CssSelector/CssSelectorSet.php new file mode 100644 index 0000000..2e2c102 --- /dev/null +++ b/src/CssSelector/CssSelectorSet.php @@ -0,0 +1,58 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\CssSelector; + +use Countable; + +/** + * Class CssSelectorSet. + */ +class CssSelectorSet implements Countable +{ + protected array $selectors = []; + + public function __construct(CssSelector ...$selector) + { + array_push($this->selectors, ...$selector); + } + + /** + * @inheritDoc + */ + public function count(): int + { + return count($this->selectors); + } + + /** + * __toString() PHP magic method. + * + * @return string + */ + public function __toString(): string + { + return implode(', ', array_map(fn(CssSelector $selector) => (string)$selector, $this->selectors)); + } + + /** + * Get selectors. + * + * @return CssSelector[] + */ + public function all(): array + { + return $this->selectors; + } +} \ No newline at end of file diff --git a/src/CssSelector/NextCssSelector.php b/src/CssSelector/NextCssSelector.php new file mode 100644 index 0000000..d4f5dff --- /dev/null +++ b/src/CssSelector/NextCssSelector.php @@ -0,0 +1,44 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\CssSelector; + +class NextCssSelector +{ + public function __construct( + protected CssSelector $selector, + protected ?string $predecessor + ) { + } + + /** + * Get selector. + * + * @return CssSelector + */ + public function getSelector(): CssSelector + { + return $this->selector; + } + + /** + * Get predecessor. + * + * @return string|null + */ + public function getPredecessor(): ?string + { + return $this->predecessor; + } +} \ No newline at end of file diff --git a/src/Exception/HtmlSelectorException.php b/src/Exception/HtmlSelectorException.php index 10eaeae..e3a5f49 100644 --- a/src/Exception/HtmlSelectorException.php +++ b/src/Exception/HtmlSelectorException.php @@ -1,9 +1,9 @@ * * For the full copyright and license information, please view the LICENSE diff --git a/src/Exception/LoaderException.php b/src/Exception/LoaderException.php new file mode 100644 index 0000000..90a7ba6 --- /dev/null +++ b/src/Exception/LoaderException.php @@ -0,0 +1,19 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\Exception; + +class LoaderException extends HtmlSelectorException +{ +} \ No newline at end of file diff --git a/src/Exception/QueryException.php b/src/Exception/QueryException.php index 1e7c1a0..fb88978 100644 --- a/src/Exception/QueryException.php +++ b/src/Exception/QueryException.php @@ -1,9 +1,9 @@ * * For the full copyright and license information, please view the LICENSE diff --git a/src/Exception/SelectorException.php b/src/Exception/SelectorException.php index f3c9d65..3ab5097 100644 --- a/src/Exception/SelectorException.php +++ b/src/Exception/SelectorException.php @@ -1,9 +1,9 @@ * * For the full copyright and license information, please view the LICENSE @@ -14,6 +14,20 @@ namespace Berlioz\HtmlSelector\Exception; +use Berlioz\HtmlSelector\CssSelector\CssSelector; + class SelectorException extends HtmlSelectorException { + /** + * Unknown pseudo class. + * + * @param string $name + * @param CssSelector $selector + * + * @return static + */ + public static function unknownPseudoClass(string $name, CssSelector $selector): static + { + return new static(sprintf('Invalid "%s" in selector "%s"', $name, (string)$selector)); + } } \ No newline at end of file diff --git a/src/Extension/CssExtension.php b/src/Extension/CssExtension.php new file mode 100644 index 0000000..9a456ee --- /dev/null +++ b/src/Extension/CssExtension.php @@ -0,0 +1,370 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\Extension; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\Exception\SelectorException; +use Berlioz\HtmlSelector\HtmlSelector; +use Berlioz\HtmlSelector\PseudoClass\Nth; +use Berlioz\HtmlSelector\PseudoClass\PseudoClass; +use Berlioz\HtmlSelector\XpathSolver; + +/** + * Class CssExtension. + */ +class CssExtension implements ExtensionInterface +{ + public function __construct(protected HtmlSelector $htmlSelector) + { + } + + /** + * @inheritDoc + */ + public function getPseudoClasses(): array + { + return [ + new PseudoClass('any', [$this, 'any']), + new PseudoClass('any-link', [$this, 'anyLink']), + new PseudoClass('blank', [$this, 'blank']), + new PseudoClass('checked', [$this, 'checked']), + new PseudoClass('dir', [$this, 'dir']), + new PseudoClass('disabled', [$this, 'disabled']), + new PseudoClass('empty', [$this, 'empty']), + new PseudoClass('enabled', [$this, 'enabled']), + new PseudoClass('first', [$this, 'first']), + new PseudoClass('first-child', [$this, 'firstChild']), + new PseudoClass('first-of-type', [$this, 'firstOfType'], true), + new PseudoClass('has', [$this, 'has']), + new PseudoClass('lang', [$this, 'lang']), + new PseudoClass('last-child', [$this, 'lastChild']), + new PseudoClass('last-of-type', [$this, 'lastOfType'], true), + new PseudoClass('not', [$this, 'not']), + new Nth('nth-child', $this->htmlSelector), + new Nth('nth-last-child', $this->htmlSelector), + new Nth('nth-of-type', $this->htmlSelector), + new Nth('nth-last-of-type', $this->htmlSelector), + new PseudoClass('only-child', [$this, 'onlyChild']), + new PseudoClass('only-of-type', [$this, 'onlyOfType'], true), + new PseudoClass('optional', [$this, 'optional']), + new PseudoClass('read-only', [$this, 'readOnly']), + new PseudoClass('read-write', [$this, 'readWrite']), + new PseudoClass('required', [$this, 'required']), + new PseudoClass('root', [$this, 'root']), + ]; + } + + /** + * :any(selector) + * + * @param string $xpath + * @param string $arguments + * + * @return string + * @throws SelectorException + */ + public function any(string $xpath, string $arguments): string + { + $subXpath = $this->htmlSelector->solveXpath($arguments ?? '*', XpathSolver::CONTEXT_SELF); + + return sprintf('%s[%s]', $xpath, $subXpath); + } + + /** + * :any-link + * + * @param string $xpath + * + * @return string + */ + public function anyLink(string $xpath): string + { + return $xpath . '[( name() = "a" or name() = "area" or name() = "link" ) and @href]'; + } + + /** + * :blank + * + * @param string $xpath + * + * @return string + */ + public function blank(string $xpath): string + { + return $xpath . '[count(child::*) = 0 and not(normalize-space())]'; + } + + /** + * :checked + * + * @param string $xpath + * + * @return string + */ + public function checked(string $xpath): string + { + return $xpath . '[( name() = "input" and ( @type = "checkbox" or @type = "radio" ) and @checked ) or ( name() = "option" and @selected )]'; + } + + /** + * :dir(...) + * + * @param string $xpath + * @param string $arguments + * + * @return string + */ + public function dir(string $xpath, string $arguments): string + { + if (!in_array(trim($arguments), ['ltr', 'rtl'])) { + return $xpath; + } + + return $xpath . sprintf('[(ancestor-or-self::*[@dir])[last()][@dir = "%s"]]', trim($arguments)); + } + + /** + * :disabled + * + * @param string $xpath + * + * @return string + */ + public function disabled(string $xpath): string + { + return $xpath . '[( name() = "button" or name() = "input" or name() = "optgroup" or name() = "option" or name() = "select" or name() = "textarea" or name() = "menuitem" or name() = "fieldset" ) and @disabled]'; + } + + /** + * :empty + * + * @param string $xpath + * + * @return string + */ + public function empty(string $xpath): string + { + return $xpath . '[count(child::*) = 0]'; + } + + /** + * :enabled + * + * @param string $xpath + * + * @return string + */ + public function enabled(string $xpath): string + { + return $xpath . '[( name() = "button" or name() = "input" or name() = "optgroup" or name() = "option" or name() = "select" or name() = "textarea" ) and not( @disabled )]'; + } + + /** + * :first + * + * @param string $xpath + * + * @return string + */ + public function first(string $xpath): string + { + return sprintf('(%s)[1]', $xpath); + } + + /** + * :first-child + * + * @param string $xpath + * + * @return string + */ + public function firstChild(string $xpath): string + { + return $xpath . '[../*[1] = node()]'; + } + + /** + * :first-of-type(...) + * + * @param string $xpath + * @param CssSelector $selector + * + * @return string + * @throws SelectorException + */ + public function firstOfType(string $xpath, CssSelector $selector): string + { + if (null !== $selector->getType() && '*' !== $selector->getType()) { + return $xpath . '[last()]'; + } + + throw new SelectorException('"*:first-of-type" isn\'t implemented'); + } + + /** + * :has(selector) + * + * @param string $xpath + * @param string $arguments + * + * @return string + * @throws SelectorException + */ + public function has(string $xpath, string $arguments): string + { + $subXpath = $this->htmlSelector->solveXpath($arguments ?? '*', XpathSolver::CONTEXT_CHILD); + + return sprintf('%s[%s]', $xpath, $subXpath); + } + + /** + * :lang(...) + * + * @param string $xpath + * @param string $arguments + * + * @return string + */ + public function lang(string $xpath, string $arguments): string + { + return $xpath . sprintf('[@lang = "%1$s" or starts-with(@lang, "%1$s")]', addslashes($arguments)); + } + + /** + * :last-child + * + * @param string $xpath + * + * @return string + */ + public function lastChild(string $xpath): string + { + return $xpath . '[../*[last()] = node()]'; + } + + /** + * :last-of-type(...) + * + * @param string $xpath + * @param CssSelector $selector + * + * @return string + * @throws SelectorException + */ + public function lastOfType(string $xpath, CssSelector $selector): string + { + if (null !== $selector->getType() && '*' !== $selector->getType()) { + return $xpath . '[last()]'; + } + + throw new SelectorException('"*:last-of-type" isn\'t implemented'); + } + + /** + * :only-child + * + * @param string $xpath + * + * @return string + */ + public function onlyChild(string $xpath): string + { + return $xpath . '[last() = 1]'; + } + + /** + * :only-of-type + * + * @param string $xpath + * @param CssSelector $selector + * + * @return string + */ + public function onlyOfType(string $xpath, CssSelector $selector): string + { + return $xpath . sprintf('[count(../%s)=1]', $selector->getType() ?? '*'); + } + + /** + * :optional + * + * @param string $xpath + * + * @return string + */ + public function optional(string $xpath): string + { + return $xpath . '[name() = "input" or name() = "textarea" or name() = "select"][not( @required )]'; + } + + /** + * :read-only + * + * @param string $xpath + * + * @return string + */ + public function readOnly(string $xpath): string + { + return $xpath . + '[( not(@contenteditable) or @contenteditable = "false" ) and ' . + ' not( ( name() = "input" or name() = "textarea" or name() = "select" ) and not(@readonly) and not(@disabled) )]'; + } + + /** + * :read-write + * + * @param string $xpath + * + * @return string + */ + public function readWrite(string $xpath): string + { + return $xpath . + '[( @contenteditable and ( @contenteditable = "true" or not(normalize-space(@contenteditable)) ) ) or ' . + ' ( ( name() = "input" or name() = "textarea" or name() = "select" ) and not(@readonly) and not(@disabled) )]'; + } + + /** + * :required + * + * @param string $xpath + * + * @return string + */ + public function required(string $xpath): string + { + return $xpath . '[name() = "input" or name() = "textarea" or name() = "select"][@required]'; + } + + /** + * :root + * + * @param string $xpath + * + * @return string + */ + public function root(string $xpath): string + { + return sprintf('(%s/ancestor::*)[1]/*[1]', $xpath); + } + + public function not(string $xpath, string $arguments): string + { + $subXpath = $this->htmlSelector->solveXpath($arguments ?? '*', XpathSolver::CONTEXT_SELF); + + return sprintf('%s[not(%s)]', $xpath, $subXpath); + } +} \ No newline at end of file diff --git a/src/Extension/ExtensionInterface.php b/src/Extension/ExtensionInterface.php new file mode 100644 index 0000000..8a8a5e1 --- /dev/null +++ b/src/Extension/ExtensionInterface.php @@ -0,0 +1,30 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\Extension; + +use Berlioz\HtmlSelector\PseudoClass\PseudoClassInterface; + +/** + * Interface ExtensionInterface. + */ +interface ExtensionInterface +{ + /** + * Get pseudo classes. + * + * @return PseudoClassInterface[] + */ + public function getPseudoClasses(): array; +} \ No newline at end of file diff --git a/src/Extension/QueryExtension.php b/src/Extension/QueryExtension.php new file mode 100644 index 0000000..b640007 --- /dev/null +++ b/src/Extension/QueryExtension.php @@ -0,0 +1,187 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\Extension; + +use Berlioz\HtmlSelector\HtmlSelector; +use Berlioz\HtmlSelector\PseudoClass\PseudoClass; + +/** + * Class QueryExtension. + */ +class QueryExtension implements ExtensionInterface +{ + public function __construct(protected HtmlSelector $htmlSelector) + { + } + + public function getPseudoClasses(): array + { + return [ + new PseudoClass('button', [$this, 'button']), + new PseudoClass('checkbox', [$this, 'checkbox']), + new PseudoClass('contains', [$this, 'contains']), +// new PseudoClass('count', [$this, 'count']), + new PseudoClass('eq', [$this, 'eq']), + new PseudoClass('even', [$this, 'even']), + new PseudoClass('file', [$this, 'file']), + new PseudoClass('gt', [$this, 'gt']), + new PseudoClass('gte', [$this, 'gte']), + new PseudoClass('header', [$this, 'header']), + new PseudoClass('image', [$this, 'image']), + new PseudoClass('input', [$this, 'input']), + new PseudoClass('last', [$this, 'last']), + new PseudoClass('lt', [$this, 'lt']), + new PseudoClass('lte', [$this, 'lte']), + new PseudoClass('odd', [$this, 'odd']), + new PseudoClass('parent', [$this, 'parent']), + new PseudoClass('password', [$this, 'password']), + new PseudoClass('radio', [$this, 'radio']), + new PseudoClass('reset', [$this, 'reset']), + new PseudoClass('selected', [$this, 'selected']), + new PseudoClass('submit', [$this, 'submit']), + new PseudoClass('text', [$this, 'text']), + ]; + } + + public function button(string $xpath): string + { + return $xpath . '[( name() = "button" and @type != "submit" ) or ( name() = "input" and @type = "button" )]'; + } + + public function checkbox(string $xpath): string + { + return $xpath . '[@type = "checkbox"]'; + } + + public function contains(string $xpath, string $arguments): string + { + return $xpath . sprintf('[contains(text(), "%s")]', addslashes($arguments)); + } + + public function eq(string $xpath, string $arguments): string + { + if (intval($arguments) >= 0) { + return sprintf('(%s)[position() = %d]', $xpath, intval($arguments) + 1); + } + + return sprintf('(%s)[last() - position() = %d]', $xpath, abs(intval($arguments) + 1)); + } + + public function even(string $xpath): string + { + return sprintf('(%s)[position() mod 2 != 1]', $xpath); + } + + public function file(string $xpath): string + { + return $xpath . '[@type="file"]'; + } + + public function gt(string $xpath, string $arguments): string + { + if (intval($arguments) >= 0) { + return sprintf('(%s)[position() > %d]', $xpath, intval($arguments) + 1); + } + + return sprintf('(%s)[last() - position() < %d]', $xpath, abs(intval($arguments) + 1)); + } + + public function gte(string $xpath, string $arguments): string + { + if (intval($arguments) >= 0) { + return sprintf('(%s)[position() >= %d]', $xpath, intval($arguments) + 1); + } + + return sprintf('(%s)[last() - position() <= %d]', $xpath, abs(intval($arguments) + 1)); + } + + public function header(string $xpath): string + { + return $xpath . '[name() = "h1" or name() = "h2" or name() = "h3" or name() = "h4" or name() = "h5" or name() = "h6"]'; + } + + public function image(string $xpath): string + { + return $xpath . '[@type="image"]'; + } + + public function input(string $xpath): string + { + return $xpath . '[name() = "input" or name() = "textarea" or name() = "select" or name() = "button"]'; + } + + public function last(string $xpath): string + { + return sprintf('(%s)[last()]', $xpath); + } + + public function lt(string $xpath, string $arguments): string + { + if (intval($arguments) >= 0) { + return sprintf('(%s)[position() < %d]', $xpath, intval($arguments) + 1); + } + + return sprintf('(%s)[last() - position() > %d]', $xpath, abs(intval($arguments) + 1)); + } + + public function lte(string $xpath, string $arguments): string + { + if (intval($arguments) >= 0) { + return sprintf('(%s)[position() <= %d]', $xpath, intval($arguments) + 1); + } + + return sprintf('(%s)[last() - position() >= %d]', $xpath, abs(intval($arguments) + 1)); + } + + public function odd(string $xpath): string + { + return sprintf('(%s)[position() mod 2 = 1]', $xpath); + } + + public function parent(string $xpath): string + { + return $xpath . '[normalize-space()]'; + } + + public function password(string $xpath): string + { + return $xpath . '[@type="password"]'; + } + + public function radio(string $xpath): string + { + return $xpath . '[@type="radio"]'; + } + + public function reset(string $xpath): string + { + return $xpath . '[@type="reset"]'; + } + + public function selected(string $xpath): string + { + return $xpath . '[name() = "option" and @selected]'; + } + + public function submit(string $xpath): string + { + return $xpath . '[( name() = "button" or name() = "input" ) and @type = "submit"]'; + } + + public function text(string $xpath): string + { + return $xpath . '[name() = "input" and ( @type="text" or not( @type ) )]'; + } +} \ No newline at end of file diff --git a/src/HtmlLoader.php b/src/HtmlLoader.php new file mode 100644 index 0000000..7acaf9c --- /dev/null +++ b/src/HtmlLoader.php @@ -0,0 +1,155 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector; + +use Berlioz\HtmlSelector\Exception\LoaderException; +use DOMDocument; +use SimpleXMLElement; + +/** + * Class HtmlLoader. + */ +class HtmlLoader +{ + private SimpleXMLElement $xml; + + /** + * HtmlLoader constructor. + * + * @param SimpleXMLElement|string $contents + * @param bool $contentsIsFile + * @param string|null $encoding + * + * @throws LoaderException + */ + public function __construct( + SimpleXMLElement|string $contents, + bool $contentsIsFile = false, + ?string $encoding = null + ) { + if (is_string($contents)) { + if (true === $contentsIsFile) { + $contents = $this->loadFile($contents); + } + + $contents = $this->loadSimpleXml($contents, $encoding); + } + + $this->xml = $contents; + } + + /** + * Get XML. + * + * @return SimpleXMLElement + */ + public function getXml(): SimpleXMLElement + { + return $this->xml; + } + + /** + * Load file. + * + * @param string $filename + * + * @return string + * @throws LoaderException + */ + private function loadFile(string $filename): string + { + if (false === ($content = @file_get_contents($filename))) { + throw new LoaderException(sprintf('Unable to load file "%s"', $filename)); + } + + return $content; + } + + /** + * Load SimpleXML. + * + * @param string $contents + * @param string|null $encoding + * + * @return SimpleXMLElement + * @throws LoaderException + */ + private function loadSimpleXml(string $contents, ?string $encoding = null): SimpleXMLElement + { + // Encoding + $encoding = $encoding ?? (mb_detect_encoding($contents) ?: 'ASCII'); + + // Empty string + if (empty($contents)) { + return new SimpleXMLElement(''); + } + + // Prepare html + $contents = str_replace([' ', chr(13)], [' ', ''], $contents); + $contents = $this->stripInvalidXml($contents); + + // Convert HTML string to \DOMDocument + libxml_use_internal_errors(true); + $domHtml = new DOMDocument('1.0', $encoding); + if (!$domHtml->loadHTML(mb_convert_encoding($contents, 'HTML-ENTITIES', $encoding), LIBXML_COMPACT)) { + throw new LoaderException('Unable to parse HTML data.'); + } + + // Add 'document' root node + $nodeDocument = $domHtml->createElement('document'); + $nodeDocument->setAttribute('dir', 'ltr'); + while (isset($domHtml->childNodes[0])) { + $nodeDocument->appendChild($domHtml->childNodes[0]); + } + $domHtml->appendChild($nodeDocument); + + // Convert \DOMDocument to \SimpleXMLElement object + return simplexml_import_dom($domHtml); + } + + /** + * Strip invalid xml. + * + * @param $xml + * + * @return string + */ + private function stripInvalidXml($xml) + { + if (empty($xml)) { + return ''; + } + + $result = ''; + $length = strlen($xml); + for ($i = 0; $i < $length; $i++) { + $current = ord($xml[$i]); + + if ((0x9 == $current) || + (0xA == $current) || + (0xD == $current) || + (($current >= 0x20) && ($current <= 0xD7FF)) || + (($current >= 0xE000) && ($current <= 0xFFFD)) || + (($current >= 0x10000) && ($current <= 0x10FFFF)) + ) { + $result .= chr($current); + } else { + $result .= " "; + } + } + + return $result; + } +} \ No newline at end of file diff --git a/src/HtmlSelector.php b/src/HtmlSelector.php new file mode 100644 index 0000000..8035478 --- /dev/null +++ b/src/HtmlSelector.php @@ -0,0 +1,139 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector; + +use Berlioz\HtmlSelector\CssSelector\CssSelectorParser; +use Berlioz\HtmlSelector\Extension\CssExtension; +use Berlioz\HtmlSelector\Extension\ExtensionInterface; +use Berlioz\HtmlSelector\Extension\QueryExtension; +use Berlioz\HtmlSelector\PseudoClass\PseudoClassSet; +use Berlioz\HtmlSelector\Query\Query; +use Psr\Http\Message\ResponseInterface; +use SimpleXMLElement; + +/** + * Class HtmlSelector. + */ +class HtmlSelector +{ + protected PseudoClassSet $pseudoClasses; + protected XpathSolver $xpathSolver; + protected CssSelectorParser $selectorParser; + + public function __construct() + { + $this->pseudoClasses = new PseudoClassSet(); + $this->xpathSolver = new XpathSolver($this->pseudoClasses); + $this->selectorParser = new CssSelectorParser(); + + $this->addExtension( + new CssExtension($this), + new QueryExtension($this) + ); + } + + /** + * Add extension. + * + * @param ExtensionInterface ...$extension + */ + public function addExtension(ExtensionInterface ...$extension): void + { + array_walk($extension, fn($extension) => $this->pseudoClasses->add(...$extension->getPseudoClasses())); + } + + /** + * Get pseudo classes. + * + * @return PseudoClassSet + */ + public function getPseudoClasses(): PseudoClassSet + { + return $this->pseudoClasses; + } + + /** + * Solve xpath. + * + * @param string $selector + * @param string|null $context + * + * @return string + * @throws Exception\SelectorException + */ + public function solveXpath(string $selector, ?string $context = XpathSolver::CONTEXT_ALL): string + { + return $this->xpathSolver->solve($selector, $context); + } + + /** + * Get selector parser. + * + * @return CssSelectorParser + */ + public function getSelectorParser(): CssSelectorParser + { + return $this->selectorParser; + } + + /** + * Query from response. + * + * @param ResponseInterface $response + * @param string|null $encoding + * + * @return Query + * @throws Exception\LoaderException + */ + public function queryFromResponse(ResponseInterface $response, ?string $encoding = null): Query + { + if (null === $encoding) { + if ($contentType = $response->getHeader('Content-Type')) { + $contentType = implode(' ; ', $contentType); + $matches = []; + + if (1 === preg_match('/charset\s*=\s*(?[\w-]+)/i', $contentType, $matches)) { + $encoding = $matches['charset']; + } + } + } + + $contents = $response->getBody()->getContents(); + + return $this->query($contents, encoding: $encoding); + } + + /** + * Create query. + * + * @param SimpleXMLElement|string $contents + * @param bool $contentsIsFile + * @param string|null $encoding + * + * @return Query + * @throws Exception\LoaderException + */ + public function query( + SimpleXMLElement|string $contents, + bool $contentsIsFile = false, + ?string $encoding = null + ): Query { + if (is_string($contents)) { + $contents = new HtmlLoader($contents, $contentsIsFile, $encoding); + } + + return new Query([$contents->getXml()], null, $this); + } +} \ No newline at end of file diff --git a/src/PseudoClass/Nth.php b/src/PseudoClass/Nth.php new file mode 100644 index 0000000..2e85d9e --- /dev/null +++ b/src/PseudoClass/Nth.php @@ -0,0 +1,179 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\PseudoClass; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\CssSelector\CssSelectorParser; +use Berlioz\HtmlSelector\Exception\SelectorException; +use Berlioz\HtmlSelector\HtmlSelector; +use Berlioz\HtmlSelector\XpathSolver; + +class Nth implements PseudoClassInterface +{ + public function __construct( + protected string $name, + protected HtmlSelector $htmlSelector + ) { + } + + /** + * @inheritDoc + */ + public function getName(): string + { + return $this->name; + } + + /** + * @inheritDoc + * @throws SelectorException + */ + public function buildXpath(string $xpath, ?string $arguments, CssSelector $selector): string + { + $arguments = $this->parseArguments($arguments); + + if ($this->isOfType()) { + $xpath .= '/../*'; + } + + // Has selector + if ($arguments['selector']) { + $xpath = sprintf( + '%s[%s]', + $xpath, + $this->htmlSelector->solveXpath($arguments['selector'] ?? '*', XpathSolver::CONTEXT_SELF) + ); + } + + $xpath = $this->getExpression($xpath, $arguments); + + if (false === $this->isOfType()) { + if (null !== $selector->getType() && $selector->getType() != '*') { + $xpath = sprintf('%s[name() = "%s"]', $xpath, $selector->getType()); + } + } + + return $xpath; + } + + /** + * Parse arguments. + * + * @param string $arguments + * + * @return array + * @throws SelectorException + */ + protected function parseArguments(string $arguments): array + { + // Regex + $regex = '~' . + CssSelectorParser::REGEX_DECLARATIONS . + "^ \s* (?: (? odd | even ) | (? [-+]? \d+ )? \s* n \s* (? [-+] \s* \d+ )? | (? [-|+]? \d+ ) ) ( \s+ of \s+ (? \g ) )? \s* $" . + "~x"; + $matches = []; + + if (1 !== preg_match($regex, $arguments, $matches, PREG_UNMATCHED_AS_NULL)) { + throw new SelectorException(sprintf('Bad syntax "%s" for :%s', $arguments, $this->name)); + } + + return $matches; + } + + /** + * Is NTH of type. + * + * @return bool + */ + protected function isOfType(): bool + { + return false !== stripos($this->name, 'type'); + } + + /** + * Is NTH last. + * + * @return bool + */ + protected function isLast(): bool + { + return false !== stripos($this->name, 'last'); + } + + /** + * Treat expression. + * + * @param string $xpath + * @param array $arguments + * + * @return string + */ + protected function getExpression(string $xpath, array $arguments): string + { + if (isset($arguments['value_oddEven'])) { + if ($arguments['value_oddEven'] == 'odd') { + if ($this->isLast()) { + return $xpath . '[(last() - position() + 1) mod 2 = 1]'; + } + + return $xpath . '[position() mod 2 = 1]'; + } + + if ($this->isLast()) { + return $xpath . '[(last() - position() + 1) mod 2 = 0]'; + } + + return $xpath . '[position() mod 2 = 0]'; + } + + if (isset($arguments['value_d'])) { + return $xpath . sprintf('[%d]', intval($arguments['value_d']) - 1); + } + + $nth_val_a = isset($arguments['value_a']) && is_numeric($arguments['value_a']) ? intval( + $arguments['value_a'] + ) : 1; + $nth_val_b = isset($arguments['value_b']) ? intval($arguments['value_b']) : 0; + + if ($nth_val_a >= 0) { + if ($this->isLast()) { + $xpath = sprintf('%s[(last() - position() + 1) > %d]', $xpath, $nth_val_b - $nth_val_a); + } else { + $xpath = sprintf('%s[position() > %d]', $xpath, $nth_val_b - $nth_val_a); + } + + if ($nth_val_a > 0) { + if ($this->isLast()) { + return sprintf('(%s)[(position() - %d) mod %d = 0]', $xpath, $nth_val_b, $nth_val_a); + } + + return sprintf('(%s)[((last() - position() + 1) - %d) mod %d = 0]', $xpath, $nth_val_b, $nth_val_a); + } + + return $xpath; + } + + + if (!$this->isLast()) { + $xpath = sprintf('%s[position() <= %d]', $xpath, $nth_val_b); + + return sprintf('(%s)[(last() - position()) mod %d = 0]', $xpath, abs($nth_val_a)); + } + + $xpath = sprintf('%s[(last() - position() + 1) <= %d]', $xpath, $nth_val_b); + + return sprintf('(%s)[(last() - (last() - position() + 1)) mod %d = 0]', $xpath, abs($nth_val_a)); + } +} \ No newline at end of file diff --git a/src/PseudoClass/PseudoClass.php b/src/PseudoClass/PseudoClass.php new file mode 100644 index 0000000..a39ae8b --- /dev/null +++ b/src/PseudoClass/PseudoClass.php @@ -0,0 +1,61 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\PseudoClass; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; + +/** + * Class PseudoClass. + */ +class PseudoClass implements PseudoClassInterface +{ + protected $callback; + + public function __construct( + protected string $name, + callable $callback, + protected bool $withSelector = false + ) { + $this->callback = $callback; + } + + /** + * @inheritDoc + */ + public function getName(): string + { + return $this->name; + } + + /** + * @inheritDoc + */ + public function buildXpath(string $xpath, ?string $arguments, CssSelector $selector): string + { + $args = array_filter( + [ + 'xpath' => $xpath, + 'arguments' => $arguments, + ], + fn($value) => null !== $value + ); + + if (true === $this->withSelector) { + $args['selector'] = $selector; + } + + return call_user_func_array($this->callback, $args); + } +} \ No newline at end of file diff --git a/src/PseudoClass/PseudoClassInterface.php b/src/PseudoClass/PseudoClassInterface.php new file mode 100644 index 0000000..8f892bc --- /dev/null +++ b/src/PseudoClass/PseudoClassInterface.php @@ -0,0 +1,41 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\PseudoClass; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; + +/** + * Interface PseudoClassInterface. + */ +interface PseudoClassInterface +{ + /** + * Get name. + * + * @return string + */ + public function getName(): string; + + /** + * Build xpath. + * + * @param string $xpath + * @param string|null $arguments + * @param CssSelector $selector + * + * @return string + */ + public function buildXpath(string $xpath, ?string $arguments, CssSelector $selector): string; +} \ No newline at end of file diff --git a/src/PseudoClass/PseudoClassSet.php b/src/PseudoClass/PseudoClassSet.php new file mode 100644 index 0000000..cceb0f8 --- /dev/null +++ b/src/PseudoClass/PseudoClassSet.php @@ -0,0 +1,81 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\PseudoClass; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\Exception\SelectorException; + +/** + * Class PseudoClassSet. + */ +class PseudoClassSet +{ + private array $pseudoClasses = []; + + public function __construct(array $pseudoClasses = []) + { + } + + /** + * Add pseudo class. + * + * @param PseudoClassInterface ...$pseudoClass + */ + public function add(PseudoClassInterface ...$pseudoClass) + { + array_push($this->pseudoClasses, ...$pseudoClass); + } + + /** + * Get pseudo class. + * + * @param string $name + * + * @return PseudoClassInterface|null + */ + public function get(string $name): ?PseudoClassInterface + { + /** @var PseudoClassInterface $pseudoClass */ + foreach ($this->pseudoClasses as $pseudoClass) { + if ($pseudoClass->getName() === $name) { + return $pseudoClass; + } + } + + return null; + } + + /** + * Build xpath. + * + * @param string $xpath + * @param CssSelector $selector + * + * @return string + * @throws SelectorException + */ + public function buildXpath(string $xpath, CssSelector $selector): string + { + foreach ($selector->getPseudoClasses() as $name => $arguments) { + if (null === ($pseudoClass = $this->get($name))) { + throw SelectorException::unknownPseudoClass($name, $selector); + } + + $xpath = $pseudoClass->buildXpath($xpath, $arguments, $selector); + } + + return $xpath; + } +} \ No newline at end of file diff --git a/src/Query.php b/src/Query.php deleted file mode 100644 index dd57eb4..0000000 --- a/src/Query.php +++ /dev/null @@ -1,993 +0,0 @@ - - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code, to the root. - */ - -declare(strict_types=1); - -namespace Berlioz\HtmlSelector; - -use Berlioz\HtmlSelector\Exception\QueryException; -use Psr\Http\Message\ResponseInterface; - -/** - * Class Query. - * - * @package Berlioz\HtmlSelector - */ -class Query implements \IteratorAggregate, \Countable -{ - /** @var \Berlioz\HtmlSelector\Selector Selector */ - private $selector; - /** @var int Selector context */ - private $selectorContext = Selector::CONTEXT_ALL; - /** @var \SimpleXMLElement[] Simple XML Element */ - private $simpleXml; - /** @var callable[] Dynamics functions */ - private static $functions; - - /** - * Query constructor. - * - * @param Query|\SimpleXMLElement|\SimpleXMLElement[] $element Element - * @param Selector|string $selector Selector - * @param int $selectorContext Context of selector - * - * @throws \InvalidArgumentException if bad arguments given. - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function __construct($element, $selector = null, int $selectorContext = Selector::CONTEXT_ALL) - { - // Element - /** @var \SimpleXMLElement[] $elements */ - if ($element instanceof \SimpleXMLElement) { - $elements = [$element]; - } else { - // Array of \SimpleXMLElement - if (is_array($element)) { - array_walk( - $element, - function ($v) { - if (!$v instanceof \SimpleXMLElement) { - throw new \InvalidArgumentException(sprintf('Element parameter must be a \SimpleXmlElement object (or array of this) or Query object, "%s" given', gettype($v))); - } - } - ); - - $elements = $element; - } else { - // Query object - if ($element instanceof Query) { - $elements = $element->get(); - } else { - throw new \InvalidArgumentException(sprintf('Element parameter must be a \SimpleXmlElement object (or array of this) or Query object, "%s" given', gettype($element))); - } - } - } - - // Selector - if (!is_null($selector)) { - if ($selector instanceof Selector) { - $this->selector = $selector; - } else { - if (is_string($selector)) { - $this->selector = new Selector($selector); - } else { - throw new \InvalidArgumentException(sprintf('Selector parameter must be a string or Selector object, "%s" given', gettype($selector))); - } - } - } - $this->selectorContext = $selectorContext; - - // Perform selection - if (!is_null($this->getSelector())) { - $this->simpleXml = []; - foreach ($elements as $simpleXml) { - if (($result = $simpleXml->xpath($this->getSelector()->xpath($this->selectorContext))) !== false) { - $this->simpleXml = array_merge(($this->simpleXml ?? []), $result); - } - } - } else { - $this->simpleXml = $elements; - } - } - - /** - * __sleep() magic method. - * - * @throws \Berlioz\HtmlSelector\Exception\QueryException - */ - public function __sleep() - { - throw new QueryException('It\'s not possible to serialize Query object.'); - } - - /** - * __call magic method. - * - * @param string $name Name - * @param array $arguments Arguments - * - * @return mixed - * @throws \Berlioz\HtmlSelector\Exception\QueryException if function not declared - */ - public function __call($name, $arguments) - { - if (isset(self::$functions[$name])) { - return call_user_func_array(self::$functions[$name], array_merge([$this], $arguments)); - } else { - throw new QueryException(sprintf('Function "%s" not declared', $name)); - } - } - - /** - * Add user defined function. - * - * Must be a function, the first argument given during call is the Query object. - * The others arguments, are the arguments given by user. - * - * @param string $name Name - * @param callable $callback Callback - */ - public static function addFunction(string $name, callable $callback): void - { - self::$functions[$name] = $callback; - } - - /** - * Create new iterator. - * - * @return \Berlioz\HtmlSelector\QueryIterator - */ - public function getIterator(): QueryIterator - { - return new QueryIterator($this); - } - - /** - * Load HTML from ResponseInterface. - * - * @param \Psr\Http\Message\ResponseInterface Response - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public static function loadResponse(ResponseInterface $response) - { - $encoding = null; - $contentType = $response->getHeader('Content-Type'); - if (($contentType = reset($contentType)) !== false) { - if (preg_match('/charset=([\w\-]+)/i', $contentType, $matches) === 1) { - $encoding = $matches[1]; - } - } - - return static::loadHtml($response->getBody()->getContents(), false, $encoding); - } - - /** - * Load HTML file. - * - * @param string $html HTML string. - * @param bool $isFile If first parameter is filename (default: false) - * @param string|null $encoding Force encoding - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public static function loadHtml(string $html, bool $isFile = false, string $encoding = null): Query - { - // Load file - if ($isFile) { - if (($html = @file_get_contents($html)) === false) { - throw new QueryException(sprintf('Unable to load file "%s"', $html)); - } - } - - // Encoding - $encoding = $encoding ?? (mb_detect_encoding($html) ?: 'ASCII'); - - // Empty string - if (empty($html)) { - return new Query(new \SimpleXMLElement('')); - } - - // Prepare html - $html = str_replace([' ', chr(13)], [' ', ''], $html); - $html = static::stripInvalidXml($html); - - // Convert HTML string to \DOMDocument - libxml_use_internal_errors(true); - $domHtml = new \DOMDocument('1.0', $encoding); - if (!$domHtml->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', $encoding), LIBXML_COMPACT)) { - throw new QueryException('Unable to parse HTML data.'); - } else { - // Add 'document' root node - $nodeDocument = $domHtml->createElement('document'); - $nodeDocument->setAttribute('dir', 'ltr'); - while (isset($domHtml->childNodes[0])) { - $nodeDocument->appendChild($domHtml->childNodes[0]); - } - $domHtml->appendChild($nodeDocument); - - // Convert \DOMDocument to \SimpleXMLElement object - $simpleXml = simplexml_import_dom($domHtml); - - return new Query($simpleXml); - } - } - - /** - * Strip invalid XML for init method. - * - * @param string $xml XML file - * - * @return string - */ - private static function stripInvalidXml($xml) - { - $ret = ""; - - if (empty($xml)) { - return $ret; - } - - $length = strlen($xml); - for ($i = 0; $i < $length; $i++) { - $current = ord($xml[$i]); - - if ((0x9 == $current) || - (0xA == $current) || - (0xD == $current) || - (($current >= 0x20) && ($current <= 0xD7FF)) || - (($current >= 0xE000) && ($current <= 0xFFFD)) || - (($current >= 0x10000) && ($current <= 0x10FFFF)) - ) { - $ret .= chr($current); - } else { - $ret .= " "; - } - } - - return $ret; - } - - /** - * Get selector. - * - * @return \Berlioz\HtmlSelector\Selector|null - */ - public function getSelector(): ?Selector - { - return $this->selector; - } - - /** - * Count direct elements in query. - * - * @return int - */ - public function count() - { - return count($this->simpleXml); - } - - /** - * Isset SimpleXMLElement ? - * - * @param int $key - * - * @return bool - */ - public function isset(int $key): bool - { - return isset($this->simpleXml[$key]); - } - - /** - * Get SimpleXMLElements. - * - * @param int|null $key - * - * @return \SimpleXMLElement|\SimpleXMLElement[] - * @throws \Berlioz\HtmlSelector\Exception\QueryException if element not found - */ - public function get(?int $key = null) - { - if (is_null($key)) { - return $this->simpleXml; - } else { - if (isset($this->simpleXml[$key])) { - return $this->simpleXml[$key]; - } else { - throw new QueryException(sprintf('Element %d not found in DOM', $key)); - } - } - } - - /** - * Get index of first element in selector. - * - * @param string|Query|null $selector Selector - * - * @return int - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function index($selector = null): int - { - if (empty($selector)) { - if (isset($this->simpleXml[0])) { - return count($this->simpleXml[0]->xpath('./preceding-sibling::*')); - } - } else { - if (!$selector instanceof Query) { - // Make selector - $selector = new Query($this, $selector ?? '*', Selector::CONTEXT_ROOT); - } - - if ($selector->isset(0) && ($result = array_search($selector->get(0), $this->get())) !== false) { - return intval($result); - } - } - - return -1; - } - - /** - * Find child elements with selector. - * - * @param string $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function find(string $selector): Query - { - return new Query($this, $selector); - } - - /** - * Filter current elements with selector. - * - * @param string $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function filter(string $selector): Query - { - return new Query($this, new Selector($selector), Selector::CONTEXT_SELF); - } - - /** - * Check if elements valid the selector specified or if elements are in Query elements given. - * - * @param string|Query $selector Selector - * - * @return bool - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function is($selector): bool - { - // Selector - if (!$selector instanceof Query) { - $selector = new Selector($selector); - } - - foreach ($this->simpleXml as $simpleXml) { - if ($selector instanceof Query) { - if (in_array($simpleXml, $selector->get())) { - return true; - } - } else { - if (count($simpleXml->xpath(sprintf('self::*[%s]', $selector->xpath(Selector::CONTEXT_SELF)))) == 1) { - return true; - } - } - } - - return false; - } - - /** - * Not elements of selector in current elements. - * - * @param string $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function not(string $selector): Query - { - return new Query($this, new Selector(sprintf(':not(%s)', $selector)), Selector::CONTEXT_SELF); - } - - /** - * Get parent of currents elements. - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function parent(): Query - { - $parents = []; - - foreach ($this->simpleXml as $simpleXml) { - $parents = array_merge($parents, $simpleXml->xpath('./..')); - } - - return new Query($parents); - } - - /** - * Get all parents of currents elements. - * - * @param string|null $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function parents(?string $selector = null): Query - { - $parents = []; - - // Selector - $selector = new Selector($selector ?? '*'); - - foreach ($this->simpleXml as $simpleXml) { - $parents = array_merge($parents, $simpleXml->xpath($selector->xpath(Selector::CONTEXT_PARENTS))); - } - - return new Query($parents); - } - - /** - * Get children of current elements. - * - * @param string|null $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function children(?string $selector = null): Query - { - $children = []; - - // Selector - if (!is_null($selector)) { - $selector = new Selector($selector); - } - - foreach ($this->simpleXml as $simpleXml) { - if (!is_null($selector)) { - $children = array_merge($children, $simpleXml->xpath('./child::*[boolean(' . $selector->xpath(Selector::CONTEXT_SELF) . ')]')); - } else { - $children = array_merge($children, $simpleXml->xpath('./child::*')); - } - } - - return new Query($children); - } - - /** - * Get html of the first element. - * - * @return string - */ - public function html(): string - { - if (isset($this->simpleXml[0])) { - $regex = <<<'EOD' -~ -(?(DEFINE) -(? '(?>[^'\\]++|\\.)*' | "(?>[^"\\]++|\\.)*" ) - (? \g | [^>]+ ) - (? < \g+ > ) - (? <\/ \g > ) -) - -^ \s* \g (? .*) \g \s* $ -~ixs -EOD; - - if (preg_match($regex, (string)$this->simpleXml[0]->asXML(), $matches) === 1) { - return $matches['html'] ?? ''; - } - } - - return ''; - } - - /** - * Get text of elements and children elements. - * - * @param bool $withChildren With children (default: true) - * - * @return string - */ - public function text(bool $withChildren = true): string - { - $str = ''; - - /** @var \SimpleXMLElement $simpleXml */ - foreach ($this->simpleXml as $simpleXml) { - if ($withChildren) { - $str .= strip_tags((string)$simpleXml->asXML()); - } else { - $str .= (string)$simpleXml; - } - } - - return $str; - } - - /** - * Get/Set attribute value of the first element, null if attribute undefined. - * - * @param string $name Name - * @param string|null $value Value - * - * @return null|string|\Berlioz\HtmlSelector\Query - */ - public function attr(string $name, string $value = null) - { - if (isset($this->simpleXml[0])) { - if (!is_null($value)) { - if (isset($this->simpleXml[0]->attributes()->{$name})) { - $this->simpleXml[0]->attributes()->{$name} = $value; - } else { - $this->simpleXml[0]->addAttribute($name, $value); - } - - return $this; - } else { - if ($this->simpleXml[0]->attributes()->{$name}) { - return (string)$this->simpleXml[0]->attributes()->{$name}; - } else { - return null; - } - } - } else { - if (!is_null($value)) { - return $this; - } else { - return null; - } - } - } - - /** - * Get/Set property value of attribute of the first element, false if attribute undefined. - * - * @param string $name Name - * @param bool|null $value Value - * - * @return bool|\Berlioz\HtmlSelector\Query - */ - public function prop(string $name, bool $value = null) - { - if (isset($this->simpleXml[0])) { - if (!is_null($value)) { - if ($value === true) { - if (isset($this->simpleXml[0]->attributes()->{$name})) { - $this->simpleXml[0]->attributes()->{$name} = $name; - } else { - $this->simpleXml[0]->addAttribute($name, $name); - } - } else { - unset($this->simpleXml[0]->attributes()->{$name}); - } - - return $this; - } else { - return isset($this->simpleXml[0]->attributes()->{$name}); - } - } else { - if (!is_null($value)) { - return $this; - } else { - return false; - } - } - } - - /** - * Get data value. - * - * @param string $name Name of data with camelCase syntax - * @param string|null $value Value - * - * @return null|string|\Berlioz\HtmlSelector\Query - */ - public function data(string $name, string $value = null) - { - $name = mb_strtolower(preg_replace('/([a-z0-9])([A-Z])/', '\\1-\\2', $name)); - - return $this->attr(sprintf('data-%s', $name), $value); - } - - /** - * Has class? - * - * @param string $classes Classes separated by space - * - * @return bool - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function hasClass(string $classes) - { - $classes = explode(' ', $classes); - - // Filter values - $classes = array_map('trim', $classes); - $classes = array_filter($classes); - - if (count($classes) > 0) { - // Make selector - $selector = implode( - '', - array_map( - function ($class) { - return sprintf('[class~="%s"]', $class); - }, - $classes - ) - ); - $selector = new Selector($selector); - - // Check all elements - foreach ($this->simpleXml as $simpleXml) { - if (count($simpleXml->xpath($selector->xpath(Selector::CONTEXT_SELF))) > 0) { - return true; - } - } - } - - return false; - } - - /** - * Add class. - * - * @param string $classes Classes separated by space - * - * @return static - */ - public function addClass(string $classes): Query - { - $classes = explode(' ', $classes); - $classes = array_map('trim', $classes); - - foreach ($this->simpleXml as $simpleXml) { - $elClasses = (string)($simpleXml->attributes()->class ?? ''); - $elClasses = explode(' ', $elClasses); - $elClasses = array_map('trim', $elClasses); - $elClasses = array_merge($elClasses, $classes); - $elClasses = array_unique($elClasses); - - if (is_null($simpleXml->attributes()->class)) { - $simpleXml->addAttribute('class', ''); - } - $simpleXml->attributes()->class = implode(' ', $elClasses); - } - - return $this; - } - - /** - * Remove class. - * - * @param string $classes Classes separated by space - * - * @return static - */ - public function removeClass(string $classes): Query - { - $classes = explode(' ', $classes); - $classes = array_map('trim', $classes); - - foreach ($this->simpleXml as $simpleXml) { - $elClasses = (string)($simpleXml->attributes()->class ?? ''); - $elClasses = explode(' ', $elClasses); - $elClasses = array_map('trim', $elClasses); - $elClasses = array_diff($elClasses, $classes); - $elClasses = array_unique($elClasses); - - if (!is_null($simpleXml->attributes()->class)) { - $simpleXml->attributes()->class = implode(' ', $elClasses); - } - } - - return $this; - } - - /** - * Toggle class. - * - * @param string $classes Classes separated by space - * @param bool|null $test - * - * @return \Berlioz\HtmlSelector\Query - */ - public function toggleClass(string $classes, bool $test = null): Query - { - if (!is_null($test)) { - if ($test === false) { - return $this->removeClass($classes); - } - - return $this->addClass($classes); - } - - $classes = explode(' ', $classes); - $classes = array_map('trim', $classes); - - foreach ($this->simpleXml as $simpleXml) { - $elClasses = (string)($simpleXml->attributes()->class ?? ''); - $elClasses = explode(' ', $elClasses); - $elClasses = array_map('trim', $elClasses); - - foreach ($classes as $class) { - if (($foundClass = array_search($class, $elClasses)) === false) { - $elClasses[] = $class; - continue; - } - - unset($elClasses[$foundClass]); - } - - if (is_null($simpleXml->attributes()->class)) { - $simpleXml->addAttribute('class', ''); - } - $simpleXml->attributes()->class = implode(' ', $elClasses); - } - - return $this; - } - - /** - * Get strictly immediately next element. - * - * @param string|null $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function next(string $selector = null): Query - { - $next = []; - - // Selector - $selector = new Selector($selector ?? '*'); - - foreach ($this->simpleXml as $simpleXml) { - $next = array_merge($next, $simpleXml->xpath($selector->xpath(Selector::CONTEXT_NEXT))); - } - - return new Query($next); - } - - /** - * Get all next elements. - * - * @param string|null $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function nextAll(string $selector = null): Query - { - $nextAll = []; - - // Selector - $selector = new Selector($selector ?? '*'); - - foreach ($this->simpleXml as $simpleXml) { - $nextAll = array_merge($nextAll, $simpleXml->xpath($selector->xpath(Selector::CONTEXT_NEXT_ALL))); - } - - return new Query($nextAll); - } - - /** - * Get strictly immediately prev element. - * - * @param string|null $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function prev(string $selector = null): Query - { - $prev = []; - - // Selector - $selector = new Selector($selector ?? '*'); - - foreach ($this->simpleXml as $simpleXml) { - $prev = array_merge($prev, $simpleXml->xpath($selector->xpath(Selector::CONTEXT_PREV))); - } - - return new Query($prev); - } - - /** - * Get all prev elements. - * - * @param string|null $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function prevAll(string $selector = null): Query - { - $prevAll = []; - - // Selector - $selector = new Selector($selector ?? '*'); - - foreach ($this->simpleXml as $simpleXml) { - $prevAll = array_merge($prevAll, $simpleXml->xpath($selector->xpath(Selector::CONTEXT_PREV_ALL))); - } - - return new Query($prevAll); - } - - /** - * Get value of a form element. - * - * @return array|null|string - */ - public function val() - { - if (isset($this->simpleXml[0])) { - switch ($this->simpleXml[0]->getName()) { - case 'button': - case 'input': - switch ($this->simpleXml[0]->attributes()->{'type'} ?? 'text') { - case 'checkbox': - return (string)$this->simpleXml[0]->attributes()->{'value'} ?? null; - case 'radio': - return (string)$this->simpleXml[0]->attributes()->{'value'} ?? 'on'; - default: - return (string)$this->simpleXml[0]->attributes()->{'value'} ?? ''; - } - break; - case 'select': - $allSelected = $this->simpleXml[0]->xpath('./option[@selected]'); - $values = []; - - if (empty($allSelected)) { - $options = $this->simpleXml[0]->xpath('./option'); - - if (!empty($options)) { - array_push($allSelected, $this->simpleXml[0]->xpath('./option')[0]); - } - } - - foreach ($allSelected as $selected) { - if (isset($selected->attributes()->{'value'})) { - if (isset($selected->attributes()->{'value'})) { - $values[] = (string)$selected->attributes()->{'value'}; - } else { - $values[] = (string)$selected; - } - } else { - $values[] = (string)$selected; - } - } - - if (!isset($this->simpleXml[0]->attributes()->{'multiple'})) { - if (($value = end($values)) !== false) { - return $value; - } else { - return null; - } - } else { - return $values; - } - case 'textarea': - return (string)$this->simpleXml[0]; - default: - return null; - } - } - - return null; - } - - /** - * Serialize values of forms elements in an array. - * - * Typically, the function is called on main form elements, but can be called on input elements. - * - * @return array - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function serializeArray() - { - $result = []; - - $query = $this->filter('form :input, :input') - ->filter('[name]:enabled:not(:button, :submit, [type=reset], [type="checkbox"]:not(:checked), [type="radio"]:not(:checked))'); - - foreach ($query as $element) { - foreach ((array)$element->val() as $value) { - $result[] = [ - 'name' => $element->attr('name'), - 'value' => $value, - ]; - } - } - - return $result; - } - - /** - * Encode form elements as a string for HTTP submission. - * - * @return string - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function serialize() - { - $arraySerialized = $this->serializeArray(); - $queryStrings = []; - - foreach ($arraySerialized as $element) { - $queryStrings[] = sprintf('%s=%s', urlencode($element['name']), urlencode($element['value'])); - } - - return implode('&', $queryStrings); - } - - /** - * Remove elements. - * - * @param string|null $selector Selector - * - * @return \Berlioz\HtmlSelector\Query - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - */ - public function remove(string $selector = null): Query - { - if (!is_null($selector)) { - $query = $this->filter($selector); - } else { - $query = $this; - } - - /** @var \SimpleXMLElement $simpleXml */ - foreach ($this->simpleXml as $i => $simpleXml) { - $domNode = dom_import_simplexml($simpleXml); - $domNode->parentNode->removeChild($domNode); - } - - return $query; - } -} diff --git a/src/Query/Query.php b/src/Query/Query.php new file mode 100644 index 0000000..eceb422 --- /dev/null +++ b/src/Query/Query.php @@ -0,0 +1,747 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\Query; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\Exception\QueryException; +use Berlioz\HtmlSelector\Exception\SelectorException; +use Berlioz\HtmlSelector\HtmlSelector; +use Berlioz\HtmlSelector\XpathSolver; +use Countable; +use IteratorAggregate; +use SimpleXMLElement; + +/** + * Class Query. + */ +class Query implements Countable, IteratorAggregate +{ + public function __construct( + protected array $html, + protected CssSelector|string|null $selector, + protected HtmlSelector $htmlSelector, + ) { + $this->html = array_filter($this->html, fn($value) => $value instanceof SimpleXMLElement); + } + + public function getIterator() + { + return new QueryIterator($this, $this->htmlSelector); + } + + protected function query(string $selector, string $context = XpathSolver::CONTEXT_ALL): static + { + return new Query($this->selector($selector, $context), $selector, $this->htmlSelector); + } + + protected function selector(string $selector, string $context = XpathSolver::CONTEXT_ALL): array + { + $xpath = $this->htmlSelector->solveXpath($selector, $context); + + return $this->xpath($xpath); + } + + protected function xpath(string $xpath): array + { + $result = []; + + /** @var SimpleXMLElement $element */ + foreach ($this->html as $element) { + if (false === ($elementResult = $element->xpath($xpath))) { + throw new SelectorException(sprintf('Xpath error "%s"', $xpath)); + } + + array_push($result, ...$elementResult); + } + + return $result; + } + + /** + * Get selector. + * + * @return CssSelector|string|null + */ + public function getSelector(): CssSelector|string|null + { + if (null === $this->selector) { + return null; + } + + return $this->selector; + } + + /** + * @inheritDoc + */ + public function count(): int + { + return count($this->html); + } + + /** + * Isset? + * + * @param int $key + * + * @return bool + */ + public function isset(int $key): bool + { + return isset($this->html[$key]); + } + + /** + * Get elements. + * + * @param int|null $key + * + * @return SimpleXMLElement|SimpleXMLElement[] + * @throws QueryException if element not found + */ + public function get(?int $key = null): SimpleXMLElement|array + { + if (null === $key) { + return $this->html; + } + + if (isset($this->html[$key])) { + return $this->html[$key]; + } + + throw new QueryException(sprintf('Element %d not found in DOM', $key)); + } + + /** + * Get index of first element in selector. + * + * @param Query|string|null $selector Selector + * + * @return int + * @throws QueryException + */ + public function index(Query|string|null $selector = null): int + { + if (empty($selector)) { + if (isset($this->html[0])) { + return count($this->html[0]->xpath('./preceding-sibling::*')); + } + + return -1; + } + + if (is_string($selector)) { + $elements = $this->selector($selector, XpathSolver::CONTEXT_PARENTS); + $index = array_search(reset($elements), $this->get()); + + if (false !== $index) { + return intval($index); + } + return -1; + } + + if ($selector instanceof Query) { + $index = array_search($selector->get(0), $this->get()); + + if (false !== $index) { + return intval($index); + } + } + + + return -1; + } + + /** + * Find child elements with selector. + * + * @param string $selector Selector + * + * @return static + */ + public function find(string $selector): static + { + $result = $this->selector($selector); + + return new Query($result, $selector, $this->htmlSelector); + } + + /** + * Filter current elements with selector. + * + * @param string $selector Selector + * + * @return static + */ + public function filter(string $selector): static + { + return new Query($this->selector($selector, XpathSolver::CONTEXT_SELF), $selector, $this->htmlSelector); + } + + /** + * Check if elements valid the selector specified or if elements are in Query elements given. + * + * @param string|Query $selector Selector + * + * @return bool + * @throws QueryException + */ + public function is(Query|string $selector): bool + { + // Selector + if (!$selector instanceof Query) { + $selector = $this->find($selector); + } + + foreach ($this->html as $simpleXml) { + if (in_array($simpleXml, $selector->get())) { + return true; + } + } + + return false; + } + + /** + * Not elements of selector in current elements. + * + * @param string $selector Selector + * + * @return static + */ + public function not(string $selector): static + { + return $this->query(sprintf(':not(%s)', $selector), XpathSolver::CONTEXT_SELF); + } + + /** + * Get parent of currents elements. + * + * @return static + */ + public function parent(): static + { + return new Query($this->xpath('./..'), null, $this->htmlSelector); + } + + /** + * Get all parents of currents elements. + * + * @param string|null $selector Selector + * + * @return static + */ + public function parents(?string $selector = null): static + { + return $this->query($selector ?? '*', XpathSolver::CONTEXT_PARENTS); + } + + /** + * Get children of current elements. + * + * @param string|null $selector Selector + * + * @return static + * @throws SelectorException + */ + public function children(?string $selector = null): static + { + if (null === $selector) { + return new Query($this->xpath('./child::*'), null, $this->htmlSelector); + } + + return new Query( + $this->xpath(sprintf('./child::*[boolean(%s)]', $this->selector($selector, XpathSolver::CONTEXT_SELF))), + null, + $this->htmlSelector + ); + } + + /** + * Get html of the first element. + * + * @return string + */ + public function html(): string + { + if (!isset($this->html[0])) { + return ''; + } + + $regex = <<<'EOD' +~ +(?(DEFINE) +(? '(?>[^'\\]++|\\.)*' | "(?>[^"\\]++|\\.)*" ) + (? \g | [^>]+ ) + (? < \g+ > ) + (? <\/ \g > ) +) + +^ \s* \g (? .*) \g \s* $ +~ixs +EOD; + + if (preg_match($regex, (string)$this->html[0]->asXML(), $matches) === 1) { + return $matches['html'] ?? ''; + } + + return ''; + } + + /** + * Get text of elements and children elements. + * + * @param bool $withChildren With children (default: true) + * + * @return string + */ + public function text(bool $withChildren = true): string + { + $str = ''; + + /** @var SimpleXMLElement $simpleXml */ + foreach ($this->html as $simpleXml) { + if ($withChildren) { + $str .= strip_tags((string)$simpleXml->asXML()); + continue; + } + + $str .= (string)$simpleXml; + } + + return $str; + } + + /** + * Get/Set attribute value of the first element, null if attribute undefined. + * + * @param string $name Name + * @param string|null $value Value + * + * @return static|string|null + */ + public function attr(string $name, string $value = null): static|string|null + { + if (isset($this->html[0])) { + // Setter + if (null !== $value) { + if (isset($this->html[0]->attributes()->{$name})) { + $this->html[0]->attributes()->{$name} = $value; + } else { + $this->html[0]->addAttribute($name, $value); + } + + return $this; + } + + // Getter + if ($this->html[0]->attributes()->{$name}) { + return (string)$this->html[0]->attributes()->{$name}; + } + + return null; + } + + if (null !== $value) { + return $this; + } + + return null; + } + + /** + * Get/Set property value of attribute of the first element, false if attribute undefined. + * + * @param string $name Name + * @param bool|null $value Value + * + * @return bool|Query + */ + public function prop(string $name, bool $value = null): static|bool + { + if (isset($this->html[0])) { + // Set & Unset + if (null !== $value) { + // Set + if ($value === true) { + if (isset($this->html[0]->attributes()->{$name})) { + $this->html[0]->attributes()->{$name} = $name; + + return $this; + } + + $this->html[0]->addAttribute($name, $name); + + return $this; + } + + // Unset + unset($this->html[0]->attributes()->{$name}); + + return $this; + } + + // Getter + return isset($this->html[0]->attributes()->{$name}); + } + + if (null !== $value) { + return $this; + } + + return false; + } + + /** + * Get data value. + * + * @param string $name Name of data with camelCase syntax + * @param string|null $value Value + * + * @return static|string|null + */ + public function data(string $name, string $value = null): static|string|null + { + $name = mb_strtolower(preg_replace('/([a-z0-9])([A-Z])/', '\\1-\\2', $name)); + + return $this->attr(sprintf('data-%s', $name), $value); + } + + /** + * Has class? + * + * @param string $classes Classes separated by space + * + * @return bool + */ + public function hasClass(string $classes): bool + { + $classes = explode(' ', $classes); + + // Filter values + $classes = array_map('trim', $classes); + $classes = array_filter($classes); + + if (count($classes) === 0) { + return false; + } + + // Make selector + $selector = implode(array_map(fn($class) => sprintf('[class~="%s"]', $class), $classes)); + + return count($this->selector($selector, XpathSolver::CONTEXT_SELF)) > 0; + } + + /** + * Add class. + * + * @param string $classes Classes separated by space + * + * @return static + */ + public function addClass(string $classes): static + { + $classes = explode(' ', $classes); + $classes = array_map('trim', $classes); + $classes = array_filter($classes); + $classes = array_unique($classes); + + foreach ($this->html as $simpleXml) { + $elClasses = (string)($simpleXml->attributes()->class ?? ''); + $elClasses = explode(' ', $elClasses); + $elClasses = array_map('trim', $elClasses); + $elClasses = array_filter($elClasses); + $elClasses = array_merge($elClasses, $classes); + $elClasses = array_unique($elClasses); + + if (null === $simpleXml->attributes()->class) { + $simpleXml->addAttribute('class', implode(' ', $elClasses)); + continue; + } + + $simpleXml->attributes()->class = implode(' ', $elClasses); + } + + return $this; + } + + /** + * Remove class. + * + * @param string $classes Classes separated by space + * + * @return static + */ + public function removeClass(string $classes): static + { + $classes = explode(' ', $classes); + $classes = array_map('trim', $classes); + $classes = array_filter($classes); + $classes = array_unique($classes); + + foreach ($this->html as $simpleXml) { + if (null === $simpleXml->attributes()->class) { + continue; + } + + $elClasses = (string)($simpleXml->attributes()->class ?? ''); + $elClasses = explode(' ', $elClasses); + $elClasses = array_map('trim', $elClasses); + $elClasses = array_filter($elClasses); + $elClasses = array_diff($elClasses, $classes); + $elClasses = array_unique($elClasses); + + $simpleXml->attributes()->class = implode(' ', $elClasses); + } + + return $this; + } + + /** + * Toggle class. + * + * @param string $classes Classes separated by space + * @param bool|callable|null $test + * + * @return static + */ + public function toggleClass(string $classes, bool|callable|null $test = null): static + { + // With test parameter + if (null !== $test) { + if (is_callable($test)) { + $test = !!$test(); + } + + if ($test === false) { + return $this->removeClass($classes); + } + + return $this->addClass($classes); + } + + $classes = explode(' ', $classes); + $classes = array_map('trim', $classes); + $classes = array_filter($classes); + $classes = array_unique($classes); + + foreach ($this->html as $simpleXml) { + $elClasses = (string)($simpleXml->attributes()->class ?? ''); + $elClasses = explode(' ', $elClasses); + $elClasses = array_map('trim', $elClasses); + $elClasses = array_filter($elClasses); + $elClasses = array_unique($elClasses); + + foreach ($classes as $class) { + if (($foundClass = array_search($class, $elClasses)) === false) { + $elClasses[] = $class; + continue; + } + + unset($elClasses[$foundClass]); + } + + if (null === $simpleXml->attributes()->class) { + $simpleXml->addAttribute('class', implode(' ', $elClasses)); + continue; + } + + $simpleXml->attributes()->class = implode(' ', $elClasses); + } + + return $this; + } + + /** + * Get strictly immediately next element. + * + * @param string|null $selector Selector + * + * @return static + */ + public function next(string $selector = null): static + { + return new Query($this->selector($selector ?? '*', XpathSolver::CONTEXT_NEXT), null, $this->htmlSelector); + } + + /** + * Get all next elements. + * + * @param string|null $selector Selector + * + * @return static + */ + public function nextAll(string $selector = null): static + { + return new Query($this->selector($selector ?? '*', XpathSolver::CONTEXT_NEXT_ALL), null, $this->htmlSelector); + } + + /** + * Get strictly immediately prev element. + * + * @param string|null $selector Selector + * + * @return static + */ + public function prev(string $selector = null): static + { + return new Query($this->selector($selector ?? '*', XpathSolver::CONTEXT_PREV), null, $this->htmlSelector); + } + + /** + * Get all prev elements. + * + * @param string|null $selector Selector + * + * @return static + */ + public function prevAll(string $selector = null): static + { + return new Query($this->selector($selector ?? '*', XpathSolver::CONTEXT_PREV_ALL), null, $this->htmlSelector); + } + + /** + * Get value of a form element. + * + * @return array|string|null + */ + public function val(): array|string|null + { + if (!isset($this->html[0])) { + return null; + } + + switch ($this->html[0]->getName()) { + case 'button': + case 'input': + return + match ($this->html[0]->attributes()->{'type'} ?? 'text') { + 'checkbox' => (string)$this->html[0]->attributes()->{'value'} ?? null, + 'radio' => (string)$this->html[0]->attributes()->{'value'} ?? 'on', + default => (string)$this->html[0]->attributes()->{'value'} ?? '', + }; + case 'select': + $allSelected = $this->html[0]->xpath('./option[@selected]'); + $values = []; + + if (empty($allSelected)) { + $options = $this->html[0]->xpath('./option'); + + if (!empty($options)) { + array_push($allSelected, $this->html[0]->xpath('./option')[0]); + } + } + + foreach ($allSelected as $selected) { + if (isset($selected->attributes()->{'value'})) { + if (isset($selected->attributes()->{'value'})) { + $values[] = (string)$selected->attributes()->{'value'}; + continue; + } + + $values[] = (string)$selected; + continue; + } + + $values[] = (string)$selected; + } + + if (!isset($this->html[0]->attributes()->{'multiple'})) { + if (($value = end($values)) !== false) { + return $value; + } + + return null; + } + + return $values; + case 'textarea': + return (string)$this->html[0]; + default: + return null; + } + } + + /** + * Serialize values of forms elements in an array. + * + * Typically, the function is called on main form elements, but can be called on input elements. + * + * @return array + */ + public function serializeArray(): array + { + $result = []; + + $query = + $this + ->filter('form :input, :input') + ->filter( + '[name]:enabled:not(:button, :submit, [type=reset], [type="checkbox"]:not(:checked), [type="radio"]:not(:checked))' + ); + + foreach ($query as $element) { + foreach ((array)$element->val() as $value) { + $result[] = [ + 'name' => $element->attr('name'), + 'value' => $value, + ]; + } + } + + return $result; + } + + /** + * Encode form elements as a string for HTTP submission. + * + * @return string + */ + public function serialize(): string + { + $arraySerialized = $this->serializeArray(); + $queryStrings = []; + + foreach ($arraySerialized as $element) { + $queryStrings[] = sprintf('%s=%s', urlencode($element['name']), urlencode($element['value'])); + } + + return implode('&', $queryStrings); + } + + /** + * Remove elements. + * + * @param string|null $selector Selector + * + * @return static + * @throws QueryException + */ + public function remove(string $selector = null): static + { + $query = $this; + if (!is_null($selector)) { + $query = $this->filter($selector); + } + + /** @var SimpleXMLElement $simpleXml */ + foreach ($query->get() as $i => $simpleXml) { + $domNode = dom_import_simplexml($simpleXml); + $domNode->parentNode->removeChild($domNode); + } + + return $query; + } +} \ No newline at end of file diff --git a/src/Query/QueryIterator.php b/src/Query/QueryIterator.php new file mode 100644 index 0000000..a58afd2 --- /dev/null +++ b/src/Query/QueryIterator.php @@ -0,0 +1,106 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector\Query; + +use Berlioz\HtmlSelector\Exception\QueryException; +use Berlioz\HtmlSelector\HtmlSelector; +use Countable; +use OutOfBoundsException; +use SeekableIterator; + +/** + * Class QueryIterator. + */ +class QueryIterator implements SeekableIterator, Countable +{ + private int $position = 0; + + /** + * QueryIterator constructor. + * + * @param Query $query + * @param HtmlSelector $htmlSelector + */ + public function __construct( + protected Query $query, + protected HtmlSelector $htmlSelector + ) { + } + + /** + * @inheritDoc + * @throws QueryException + */ + public function current(): Query + { + return new Query( + [$this->query->get($this->position)], + $this->query->getSelector(), + $this->htmlSelector + ); + } + + /** + * @inheritDoc + */ + public function next(): void + { + $this->seek($this->position + 1); + } + + /** + * @inheritDoc + */ + public function key(): int + { + return $this->position; + } + + /** + * @inheritDoc + */ + public function valid(): bool + { + return $this->query->isset($this->position); + } + + /** + * @inheritDoc + */ + public function rewind(): void + { + $this->position = 0; + } + + /** + * @inheritDoc + */ + public function count(): int + { + return count($this->query); + } + + /** + * @inheritDoc + */ + public function seek($position): void + { + if (!$this->query->isset($this->position)) { + throw new OutOfBoundsException(sprintf('Invalid seek position (%d)', $position)); + } + + $this->position = $position; + } +} \ No newline at end of file diff --git a/src/QueryIterator.php b/src/QueryIterator.php deleted file mode 100644 index 6ed8d31..0000000 --- a/src/QueryIterator.php +++ /dev/null @@ -1,125 +0,0 @@ - - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code, to the root. - */ - -declare(strict_types=1); - -namespace Berlioz\HtmlSelector; - -/** - * Class QueryIterator. - * - * @package Berlioz\HtmlSelector - */ -class QueryIterator implements \SeekableIterator, \Countable -{ - /** @var int Position */ - private $position; - /** @var \Berlioz\HtmlSelector\Query Query */ - private $query; - - /** - * QueryIterator constructor. - * - * @param \Berlioz\HtmlSelector\Query $query - */ - public function __construct(Query $query) - { - $this->position = 0; - $this->query = $query; - } - - /** - * Return the current element. - * - * @return Query Can return any type. - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - * @link http://php.net/manual/en/iterator.current.php - */ - public function current() - { - return new Query($this->query->get($this->position)); - } - - /** - * Move forward to next element. - * - * @return void - * @link http://php.net/manual/en/iterator.next.php - */ - public function next() - { - $this->seek($this->position + 1); - } - - /** - * Return the key of the current element. - * - * @return mixed Scalar on success, or null on failure. - * @link http://php.net/manual/en/iterator.key.php - */ - public function key() - { - return $this->position; - } - - /** - * Checks if current position is valid - * - * @return bool - * @link http://php.net/manual/en/iterator.valid.php - */ - public function valid() - { - return $this->query->isset($this->position); - } - - /** - * Rewind the Iterator to the first element. - * - * @return void Any returned value is ignored. - * @link http://php.net/manual/en/iterator.rewind.php - */ - public function rewind() - { - $this->position = 0; - } - - /** - * Count elements of an object. - * - * @return int - * @throws \Berlioz\HtmlSelector\Exception\QueryException - * @link http://php.net/manual/en/countable.count.php - */ - public function count() - { - return count($this->query->get()); - } - - /** - * Seeks to a position. - * - * @param int $position The position to seek to. - * - * @return void - * @link http://php.net/manual/en/seekableiterator.seek.php - */ - public function seek($position) - { - if (!$this->query->isset($this->position)) { - throw new \OutOfBoundsException(sprintf('Invalid seek position (%d)', $position)); - } - - $this->position = $position; - } -} \ No newline at end of file diff --git a/src/Selector.php b/src/Selector.php deleted file mode 100644 index 917e08c..0000000 --- a/src/Selector.php +++ /dev/null @@ -1,730 +0,0 @@ - - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code, to the root. - */ - -declare(strict_types=1); - -namespace Berlioz\HtmlSelector; - -use Berlioz\HtmlSelector\Exception\SelectorException; - -/** - * Class Selector. - * - * @package Berlioz\HtmlSelector - */ -class Selector -{ - /** Context definitions */ - const CONTEXT_ROOT = 0; - const CONTEXT_ALL = 1; - const CONTEXT_CHILD = 2; - const CONTEXT_SELF = 3; - const CONTEXT_PARENTS = 4; - const CONTEXT_NEXT = 5; - const CONTEXT_NEXT_ALL = 6; - const CONTEXT_PREV = 7; - const CONTEXT_PREV_ALL = 8; - /** Regex declarations */ - const REGEX_DECLARATIONS = <<<'EOD' -(?(DEFINE) - (? '(?>[^'\\]++|\\.)*' | "(?>[^"\\]++|\\.)*" ) - - (? \w+ | \* ) - (? \#(?:[\w\-]+) ) - (? \.(?:[\w\-]+) ) - (? \g+ ) - (? \[ \s* [\w\-]+ (?: \s* (?: = | \^= | \$= | \*= | != | \~= | \|= ) \s* (\g|[^\]]+))? \s* \] ) - (? \g+ ) - (? :([\w\-]+ (?: \( \s* (\g | \g | [^)]*) \s* \) )? ) ) - (? \g+ ) - - (? \g? \g? \g? \g? \g? ) - (? \g \s* ( \s* ([+>\~] | >> )? \s* \g )* ) - (? \g \s* ( , \s* \g )* ) -) - -EOD; - /** @var string Selector */ - private $selector; - /** @var string Xpath */ - private $xpath; - - /** - * Selector constructor. - * - * @param string $selector - * - * @throws \InvalidArgumentException if it's an invalid selector. - */ - public function __construct(string $selector) - { - // Check selector - $regex = "~" . - static::REGEX_DECLARATIONS . - "^ \g $" . - "~xis"; - - if (preg_match($regex, $selector) == 1) { - $this->selector = $selector; - } else { - throw new \InvalidArgumentException(sprintf('Invalid selector "%s" format', $selector)); - } - } - - /** - * __toString() magic method. - * - * @return string - */ - public function __toString() - { - return $this->selector; - } - - /** - * Extract selectors from a multiple selector. - * - * Like ".class, .class2[attribute]" > 2 selectors: ".class" and ".class2[attribute]". - * - * @param string $pSelector - * - * @return array - */ - private function extractSelectors(string $pSelector): array - { - $selectors = []; - - // Regex - $regex = - '~' . - static::REGEX_DECLARATIONS . - '(? \g )' . - '~xis'; - - $matches = []; - if (preg_match_all($regex, $pSelector, $matches, PREG_SET_ORDER) !== false) { - $matches = array_filter(array_column($matches, 'selector')); - - foreach ($matches as $selector) { - $selectors[] = $this->extractExpressions($selector); - } - } - - return $selectors; - } - - /** - * Extract expressions from a selector. - * - * Like ".class[attribute] .class2" > 2 expressions: ".class[attribute]" and ".class2". - * - * @param string $selector - * - * @return array - */ - private function extractExpressions(string $selector): array - { - $expressions = []; - - // Regex - $regex = - '~' . - static::REGEX_DECLARATIONS . - '(? [+>\~] | >> )? \s* (? \g )' . - '~xis'; - - $matches = []; - if (preg_match_all($regex, $selector, $matches, PREG_SET_ORDER) !== false) { - foreach ($matches as $match) { - if (!empty($match[0])) { - if (!empty($expression = $this->extractExpression($match['expression']))) { - $expression['predecessor'] = $match['predecessor']; - - $expressions[] = $expression; - } - } - } - } - - return $expressions; - } - - /** - * Extract expression into parameters. - * - * Example of result for expression "select#toto.class.class2[attribute1][attribute2^="value"]:disabled:eq(1)": - * ['type' => 'select', - * 'id' => 'toto', - * 'classes' => ['class', 'class2'], - * 'attributes' => [['name' => 'attribute1', - * 'comparison' => null, - * 'value' => null], - * ['name' => 'attribute2', - * 'comparison' => '^=', - * 'value' => 'value']]], - * 'filters' => ['disabled' => null, - * 'eq' => '1']] - * - * @param string $expression - * - * @return array - */ - private function extractExpression(string $expression): array - { - $expressionDef = []; - $regex = - '~' . - static::REGEX_DECLARATIONS . - '^ \s* (? \g)? (? \g)? (? \g)? (? \g)? (? \g)? \s* $' . - '~xis'; - - $match = []; - if (preg_match($regex, $expression, $match) !== false) { - if (!empty($match[0])) { - // Classes - { - $classes = []; - - if (!empty($match['classes'])) { - $regexClass = - '~' . - static::REGEX_DECLARATIONS . - '\.(? [\w\-]+ )' . - '~xis'; - - $matchesClass = []; - if (preg_match_all($regexClass, $match['classes'], $matchesClass, PREG_SET_ORDER)) { - foreach ($matchesClass as $matchClass) { - $classes[] = $matchClass['class']; - } - } - } - } - - // Attributes - { - $attributes = []; - - if (!empty($match['attributes'])) { - $regexAttribute = - '~' . - static::REGEX_DECLARATIONS . - '\[ \s* (? [\w\-]+ ) (?: \s* (? = | \^= | \$= | \*= | != | \~= | \|= ) \s* (?: (? \g) | (? [^\]]+) ) )? \s* \]' . - '~xis'; - - $matchesAttribute = []; - if (preg_match_all($regexAttribute, $match['attributes'], $matchesAttribute, PREG_SET_ORDER)) { - foreach ($matchesAttribute as $matchAttribute) { - $attributes[] = ['name' => $matchAttribute['name'], - 'comparison' => $matchAttribute['comparison'] ?? null, - 'value' => - !empty($matchAttribute['quotes']) ? - stripslashes(substr($matchAttribute['quotes'], 1, -1)) : - (!empty($matchAttribute['value']) ? - $matchAttribute['value'] : - null)]; - } - } - } - } - - // Filters - { - $filters = []; - - if (!empty($match['filters'])) { - $regexFilter = - '~' . - static::REGEX_DECLARATIONS . - ':(:? (? [\w\-]+ ) (?: \( \s* (? \g | \g | [^)]*) \s* \) )? )' . - '~xis'; - - $matchesFilter = []; - if (preg_match_all($regexFilter, $match['filters'], $matchesFilter, PREG_SET_ORDER)) { - foreach ($matchesFilter as $matchFilter) { - $filters[$matchFilter['name']] = $matchFilter['value'] ?? null; - } - } - } - } - - // Definition - $expressionDef = ['type' => $match['type'] ?? null, - 'id' => isset($match['id']) ? substr($match['id'], 1) : null, - 'classes' => $classes, - 'attributes' => $attributes, - 'filters' => $filters]; - } - } - - return $expressionDef; - } - - /** - * Convert selector to an xpath selector. - * - * "%CONTEXT%" special variable is inserted in Xpath selector to define context. - * She will be replaced by the good context like './/' for all children, referred to the class constants. - * - * Not implemented CSS pseudo classes: - * - :default - * - :fullscreen - * - :focus - * - :hover - * - :in-range - * - :indeterminate - * - :invalid - * - :left - * - :link - * - :matches() - * - :nth-column() - * - :nth-last-column() - * - :out-of-range - * - :right - * - :root - * - :scope - * - :target - * - :valid - * - :visited - * - * @param string $pSelector - * - * @return string - * @throws \Berlioz\HtmlSelector\Exception\SelectorException if a filter hasn't good value. - * - * @link http://erwy.developpez.com/tutoriels/xml/xpath-langage-selection-xml/ - * @link http://erwy.developpez.com/tutoriels/xml/xpath-liste-fonctions/ - */ - private function xpathConversion(string $pSelector) - { - $xpath = ''; - - $iSelector = 0; - foreach ($this->extractSelectors($pSelector) as $selector) { - $anXpath = '%CONTEXT%'; - - $iExpression = 0; - foreach ($selector as $expression) { - // Predecessor - if ($iExpression > 0) { - switch ($expression['predecessor']) { - case '>': - $anXpath .= '/'; - break; - case '+': - $anXpath .= '/following-sibling::*[1]/self::'; - break; - case '~': - $anXpath .= '/following-sibling::'; - break; - default: - $anXpath .= '//'; - } - } - - // Type - $expression['type'] = !empty($expression['type']) ? $expression['type'] : '*'; - $anXpath .= $expression['type']; - - // ID - if (!empty($expression['id'])) { - $anXpath .= '[@id="' . addslashes($expression['id']) . '"]'; - } - - // Classes - foreach ($expression['classes'] as $class) { - $anXpath .= '[contains(concat(" ", @class, " "), " ' . addslashes($class) . ' ")]'; - } - - // Attributes - foreach ($expression['attributes'] as $attribute) { - switch ($attribute['comparison']) { - case '=': - $anXpath .= '[@' . $attribute['name'] . '="' . addslashes($attribute['value']) . '"]'; - break; - case '^=': - $anXpath .= '[starts-with(@' . $attribute['name'] . ', "' . addslashes($attribute['value']) . '")]'; - break; - case '$=': - $anXpath .= '[\'' . addslashes($attribute['value']) . '\' = substring(@' . $attribute['name'] . ', string-length(@' . $attribute['name'] . ') - string-length(\'' . addslashes($attribute['value']) . '\') +1)]'; - break; - case '*=': - $anXpath .= '[contains(@' . $attribute['name'] . ', "' . addslashes($attribute['value']) . '")]'; - break; - case '!=': - $anXpath .= '[@' . $attribute['name'] . '!="' . addslashes($attribute['value']) . '"]'; - break; - case '~=': - $anXpath .= sprintf('[contains(concat(" ", @%s, " "), " %s ")]', $attribute['name'], addslashes($attribute['value'])); - break; - case '|=': - $anXpath .= sprintf('[@%1$s = \'%2$s\' or starts-with(@%1$s, \'%2$s\')]', $attribute['name'], addslashes($attribute['value'])); - break; - default: - $anXpath .= '[@' . $attribute['name'] . ']'; - } - } - - // Filters - foreach ($expression['filters'] as $filterName => $filter) { - switch ($filterName) { - // CSS Pseudo Classes - case 'any': - $subSelector = new Selector($filter ?? '*'); - $anXpath .= sprintf('[%s]', $subSelector->xpath(Selector::CONTEXT_SELF)); - break; - case 'any-link': - $anXpath .= '[( name() = "a" or name() = "area" or name() = "link" ) and @href]'; - break; - case 'blank': - $anXpath .= '[count(child::*) = 0 and not(normalize-space())]'; - break; - case 'checked': - $anXpath .= '[( name() = "input" and ( @type = "checkbox" or @type = "radio" ) and @checked ) or ( name() = "option" and @selected )]'; - break; - case 'dir': - if (in_array(trim($filter), ['ltr', 'rtl'])) { - $anXpath .= sprintf('[(ancestor-or-self::*[@dir])[last()][@dir = "%s"]]', trim($filter)); - } - break; - case 'disabled': - $anXpath .= '[( name() = "button" or name() = "input" or name() = "optgroup" or name() = "option" or name() = "select" or name() = "textarea" or name() = "menuitem" or name() = "fieldset" ) and @disabled]'; - break; - case 'empty': - $anXpath .= '[count(child::*) = 0]'; - break; - case 'enabled': - $anXpath .= '[( name() = "button" or name() = "input" or name() = "optgroup" or name() = "option" or name() = "select" or name() = "textarea" ) and not( @disabled )]'; - break; - case 'first': - $anXpath = sprintf('(%s)[1]', $anXpath); - break; - case 'first-child': - $anXpath .= '[../*[1] = node()]'; - break; - case 'first-of-type': - if ($expression['type'] != '*') { - $anXpath .= '[1]'; - } else { - throw new SelectorException('"*:first-of-type" isn\'t implemented'); - } - break; - case 'has': - $subSelector = new Selector($filter ?? '*'); - $anXpath .= sprintf('[%s]', $subSelector->xpath(Selector::CONTEXT_CHILD)); - break; - case 'lang': - $anXpath .= sprintf('[@lang = \'%1$s\' or starts-with(@lang, \'%1$s\')]', addslashes($filter)); - break; - case 'last-child': - $anXpath .= '[../*[last()] = node()]'; - break; - case 'last-of-type': - if ($expression['type'] != '*') { - $anXpath .= '[last()]'; - } else { - throw new SelectorException('"*:last-of-type" isn\'t implemented'); - } - break; - case 'not': - $subSelector = new Selector($filter ?? '*'); - $anXpath .= sprintf('[not(%s)]', $subSelector->xpath(Selector::CONTEXT_SELF)); - break; - case 'nth-child': - case 'nth-last-child': - case 'nth-of-type': - case 'nth-last-of-type': - //$filter = preg_replace("/\s+/", '', $filter); - $nth_type = in_array($filterName, ['nth-of-type', 'nth-last-of-type']); - $nth_last = in_array($filterName, ['nth-last-of-type', 'nth-last-child']); - - // Not implemented ? - if ($nth_type && $expression['type'] == '*') { - throw new SelectorException(sprintf('"*:%s" isn\'t implemented', $nth_last ? 'nth-last-of-type' : 'nth-of-type')); - } - - // Regex - $nth_regex = '~' . - static::REGEX_DECLARATIONS . - "^ \s* (?: (? odd | even ) | (? [-+]? \d+ )? \s* n \s* (? [-+] \s* \d+ )? | (? [-|+]? \d+ ) ) ( \s+ of \s+ (? \g ) )? \s* $" . - "~x"; - $nth_matches = []; - - if (preg_match($nth_regex, $filter, $nth_matches)) { - if ($nth_type === false) { - $anXpath .= '/../*'; - } - - // Selector ? - if (!empty($nth_matches['selector'])) { - $subSelector = new Selector($nth_matches['selector'] ?? '*'); - $anXpath .= sprintf('[%s]', $subSelector->xpath(Selector::CONTEXT_SELF)); - } - - if (isset($nth_matches['value_oddEven']) && $nth_matches['value_oddEven'] == 'odd') { - if (!$nth_last) { - $anXpath .= '[position() mod 2 = 1]'; - } else { - $anXpath .= '[(last() - position() + 1) mod 2 = 1]'; - } - } else { - if (isset($nth_matches['value_oddEven']) && $nth_matches['value_oddEven'] == 'even') { - if (!$nth_last) { - $anXpath .= '[position() mod 2 = 0]'; - } else { - $anXpath .= '[(last() - position() + 1) mod 2 = 0]'; - } - } else { - if (isset($nth_matches['value_d']) && is_numeric($nth_matches['value_d'])) { - $anXpath .= sprintf('[%d]', intval($nth_matches['value_d']) - 1); - } else { - $nth_val_a = isset($nth_matches['value_a']) && is_numeric($nth_matches['value_a']) ? intval($nth_matches['value_a']) : 1; - $nth_val_b = isset($nth_matches['value_b']) ? intval($nth_matches['value_b']) : 0; - - if ($nth_val_a >= 0) { - if (!$nth_last) { - $anXpath = sprintf('%s[position() > %d]', $anXpath, $nth_val_b - $nth_val_a); - } else { - $anXpath = sprintf('%s[(last() - position() + 1) > %d]', $anXpath, $nth_val_b - $nth_val_a); - } - - if ($nth_val_a > 0) { - if (!$nth_last) { - $anXpath = sprintf('(%s)[(position() - %d) mod %d = 0]', $anXpath, $nth_val_b, $nth_val_a); - } else { - $anXpath = sprintf('(%s)[((last() - position() + 1) - %d) mod %d = 0]', $anXpath, $nth_val_b, $nth_val_a); - } - } - } else { - if (!$nth_last) { - $anXpath = sprintf('%s[position() <= %d]', $anXpath, $nth_val_b); - $anXpath = sprintf('(%s)[(last() - position()) mod %d = 0]', $anXpath, abs($nth_val_a)); - } else { - $anXpath = sprintf('%s[(last() - position() + 1) <= %d]', $anXpath, $nth_val_b); - $anXpath = sprintf('(%s)[(last() - (last() - position() + 1)) mod %d = 0]', $anXpath, abs($nth_val_a)); - } - } - } - } - } - - if ($nth_type === false) { - if ($expression['type'] != '*') { - $anXpath = sprintf('%s[name() = "%s"]', $anXpath, $expression['type']); - } - } - } - break; - case 'only-child': - $anXpath .= '[last() = 1]'; - break; - case 'only-of-type': - $anXpath .= sprintf('[count(../%s)=1]', $expression['type']); - break; - case 'optional': - $anXpath .= '[name() = "input" or name() = "textarea" or name() = "select"][not( @required )]'; - break; - case 'read-only': - $anXpath .= '[( not(@contenteditable) or @contenteditable = "false" ) and ' . - ' not( ( name() = "input" or name() = "textarea" or name() = "select" ) and not(@readonly) and not(@disabled) )]'; - break; - case 'read-write': - $anXpath .= '[( @contenteditable and ( @contenteditable = "true" or not(normalize-space(@contenteditable)) ) ) or ' . - ' ( ( name() = "input" or name() = "textarea" or name() = "select" ) and not(@readonly) and not(@disabled) )]'; - break; - case 'required': - $anXpath .= '[name() = "input" or name() = "textarea" or name() = "select"][@required]'; - break; - case 'root': - $anXpath = sprintf('(%s/ancestor::*)[1]/*[1]', $anXpath); - break; - - // Additional pseudo classes (not in CSS specifications) from jQuery library - case 'button': - $anXpath .= '[( name() = "button" and @type != "submit" ) or ( name() = "input" and @type = "button" )]'; - break; - case 'checkbox': - $anXpath .= '[@type = "checkbox"]'; - break; - case 'contains': - $anXpath .= sprintf('[contains(text(), \'%s\')]', addslashes($filter)); - break; - case 'eq': - if (intval($filter) >= 0) { - $anXpath = sprintf('(%s)[position() = %d]', $anXpath, intval($filter) + 1); - } else { - $anXpath = sprintf('(%s)[last() - position() = %d]', $anXpath, abs(intval($filter) + 1)); - } - break; - case 'even': - $anXpath = sprintf('(%s)[position() mod 2 != 1]', $anXpath); - break; - case 'file': - $anXpath .= '[@type="file"]'; - break; - case 'gt': - if (intval($filter) >= 0) { - $anXpath = sprintf('(%s)[position() > %d]', $anXpath, intval($filter) + 1); - } else { - $anXpath = sprintf('(%s)[last() - position() < %d]', $anXpath, abs(intval($filter) + 1)); - } - break; - case 'gte': - if (intval($filter) >= 0) { - $anXpath = sprintf('(%s)[position() >= %d]', $anXpath, intval($filter) + 1); - } else { - $anXpath = sprintf('(%s)[last() - position() <= %d]', $anXpath, abs(intval($filter) + 1)); - } - break; - case 'header': - $anXpath .= '[name() = "h1" or name() = "h2" or name() = "h3" or name() = "h4" or name() = "h5" or name() = "h6"]'; - break; - case 'image': - $anXpath .= '[@type="image"]'; - break; - case 'input': - $anXpath .= '[name() = "input" or name() = "textarea" or name() = "select" or name() = "button"]'; - break; - case 'last': - $anXpath = sprintf('(%s)[last()]', $anXpath); - break; - case 'lt': - if (intval($filter) >= 0) { - $anXpath = sprintf('(%s)[position() < %d]', $anXpath, intval($filter) + 1); - } else { - $anXpath = sprintf('(%s)[last() - position() > %d]', $anXpath, abs(intval($filter) + 1)); - } - break; - case 'lte': - if (intval($filter) >= 0) { - $anXpath = sprintf('(%s)[position() <= %d]', $anXpath, intval($filter) + 1); - } else { - $anXpath = sprintf('(%s)[last() - position() >= %d]', $anXpath, abs(intval($filter) + 1)); - } - break; - case 'odd': - $anXpath = sprintf('(%s)[position() mod 2 = 1]', $anXpath); - break; - case 'parent': - $anXpath .= '[normalize-space()]'; - break; - case 'password': - $anXpath .= '[@type="password"]'; - break; - case 'radio': - $anXpath .= '[@type="radio"]'; - break; - case 'reset': - $anXpath .= '[@type="reset"]'; - break; - case 'selected': - $anXpath .= '[name() = "option" and @selected]'; - break; - case 'submit': - $anXpath .= '[( name() = "button" or name() = "input" ) and @type = "submit"]'; - break; - case 'text': - $anXpath .= '[name() = "input" and ( @type="text" or not( @type ) )]'; - break; - - // Additional pseudo classes (not in CSS specifications) - case 'count': - switch (substr($filter, 0, 2)) { - case '>=': - $anXpath .= sprintf('[last() >= %d]', intval(substr($filter, 2))); - break; - case '<=': - $anXpath .= sprintf('[last() <= %d]', intval(substr($filter, 2))); - break; - default: - switch (substr($filter, 0, 1)) { - case '>': - $anXpath .= sprintf('[last() > %d]', intval(substr($filter, 1))); - break; - case '<': - $anXpath .= sprintf('[last() < %d]', intval(substr($filter, 1))); - break; - case '=': - $anXpath .= sprintf('[last() = %d]', intval(substr($filter, 1))); - break; - default: - $anXpath .= sprintf('[last() = %d]', intval($filter)); - } - } - break; - - default: - throw new SelectorException(sprintf('Filter "%s" is not valid in selector "%s"', $filterName, $this->selector)); - } - } - - $iExpression++; - } - - // Concat all xpath - $xpath .= ($iSelector == 0 ? '' : ' | ') . $anXpath; - $iSelector++; - } - - return $xpath; - } - - /** - * Get xpath with a context defined. - * - * @param int $context Context (checks constants) - * - * @return string - * @throws \Berlioz\HtmlSelector\Exception\SelectorException - * @throws \InvalidArgumentException if a bad context chosen. - */ - public function xpath(int $context = Selector::CONTEXT_ALL): string - { - if (is_null($this->xpath)) { - $this->xpath = $this->xpathConversion($this->selector); - } - - switch ($context) { - case self::CONTEXT_ROOT: - $contextValue = '//'; - break; - case self::CONTEXT_ALL: - $contextValue = './/'; - break; - case self::CONTEXT_CHILD: - $contextValue = './'; - break; - case self::CONTEXT_SELF: - $contextValue = 'self::'; - break; - case self::CONTEXT_PARENTS: - $contextValue = 'ancestor::'; - break; - case self::CONTEXT_NEXT: - $contextValue = 'following-sibling::*[1]/self::'; - break; - case self::CONTEXT_NEXT_ALL: - $contextValue = 'following-sibling::'; - break; - case self::CONTEXT_PREV: - $contextValue = 'preceding-sibling::*[last()]/self::'; - break; - case self::CONTEXT_PREV_ALL: - $contextValue = 'preceding-sibling::'; - break; - default: - throw new \InvalidArgumentException('Bad context chosen, checks Selector class constants'); - } - - return str_replace('%CONTEXT%', $contextValue, $this->xpath); - } -} \ No newline at end of file diff --git a/src/XpathSolver.php b/src/XpathSolver.php new file mode 100644 index 0000000..e828e14 --- /dev/null +++ b/src/XpathSolver.php @@ -0,0 +1,144 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +declare(strict_types=1); + +namespace Berlioz\HtmlSelector; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\CssSelector\CssSelectorParser; +use Berlioz\HtmlSelector\CssSelector\CssSelectorSet; +use Berlioz\HtmlSelector\Exception\SelectorException; +use Berlioz\HtmlSelector\PseudoClass\PseudoClassSet; + +/** + * Class XpathSolver. + */ +class XpathSolver +{ + public const CONTEXT_ROOT = '//'; + public const CONTEXT_ALL = './/'; + public const CONTEXT_CHILD = './'; + public const CONTEXT_SELF = 'self::'; + public const CONTEXT_PARENTS = 'ancestor::'; + public const CONTEXT_NEXT = 'following-sibling::*[1]/self::'; + public const CONTEXT_NEXT_ALL = 'following-sibling::'; + public const CONTEXT_PREV = 'preceding-sibling::*[last()]/self::'; + public const CONTEXT_PREV_ALL = 'preceding-sibling::'; + + protected CssSelectorParser $parser; + + public function __construct(protected PseudoClassSet $pseudoClasses) + { + $this->parser = new CssSelectorParser(); + } + + /** + * Handle. + * + * @param string $selector + * @param string|null $context + * + * @return string + * @throws SelectorException + */ + public function solve(string $selector, ?string $context = self::CONTEXT_ALL): string + { + $selector = $this->parser->parse($selector); + + return $this->solveMultiple($selector, $context); + } + + /** + * Solve multiple. + * + * @param CssSelectorSet $selectors + * @param string|null $context + * + * @return string + * @throws SelectorException + */ + protected function solveMultiple(CssSelectorSet $selectors, ?string $context = self::CONTEXT_ALL): string + { + $xpaths = array_map(fn(CssSelector $selector) => $this->solveUnique($selector, $context), $selectors->all()); + $xpaths = array_filter($xpaths); + + return implode(' | ', $xpaths); + } + + /** + * Solve a selector. + * + * @param CssSelector $selector + * @param string|null $context + * + * @return string + * @throws SelectorException + */ + protected function solveUnique(CssSelector $selector, ?string $context = self::CONTEXT_ALL): string + { + // Type + $xpath = ($context ?? '') . ($selector->getType() ?: '*'); + + // ID + if (null !== $selector->getId()) { + $xpath .= '[@id="' . addslashes($selector->getId()) . '"]'; + } + + // Classes + foreach ($selector->getClasses() as $class) { + $xpath .= '[contains(concat(" ", @class, " "), " ' . addslashes($class) . ' ")]'; + } + + // Attributes + foreach ($selector->getAttributes() as $attribute) { + $xpath .= match ($attribute['comparison']) { + '=' => sprintf('[@%s="%s"]', $attribute['name'], addslashes($attribute['value'])), + '^=' => sprintf('[starts-with(@%s, "%s")]', $attribute['name'], addslashes($attribute['value'])), + '$=' => sprintf( + '["%2$s" = substring(@%1$s, string-length(@%1$s) - string-length("%2$s") + 1]', + $attribute['name'], + addslashes($attribute['value']) + ), + '*=' => sprintf('[contains(@%s, "%s")]', $attribute['name'], addslashes($attribute['value'])), + '!=' => sprintf('[@%s!="%s"]', $attribute['name'], addslashes($attribute['value'])), + '~=' => sprintf( + '[contains(concat(" ", @%s, " "), " %s ")]', + $attribute['name'], + addslashes($attribute['value']) + ), + '|=' => sprintf( + '[@%1$s = "%2$s" or starts-with(@%1$s, "%2$s")]', + $attribute['name'], + addslashes($attribute['value']) + ), + default => sprintf('[@%s]', $attribute['name']), + }; + } + + // Pseudo classes + $xpath = $this->pseudoClasses->buildXpath($xpath, $selector); + + // Next? + if (null !== ($next = $selector->getNext())) { + $xpath .= match ($next->getPredecessor()) { + '>' => '/', + '+' => '/following-sibling::*[1]/self::', + '~' => '/following-sibling::', + default => '//', + }; + $xpath = $this->solveUnique($next->getSelector(), $xpath); + } + + return $xpath; + } +} \ No newline at end of file diff --git a/tests/CssSelector/CssSelectorParserTest.php b/tests/CssSelector/CssSelectorParserTest.php new file mode 100644 index 0000000..2d7c3e7 --- /dev/null +++ b/tests/CssSelector/CssSelectorParserTest.php @@ -0,0 +1,132 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\CssSelector; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\CssSelector\CssSelectorParser; +use PHPUnit\Framework\TestCase; + +class CssSelectorParserTest extends TestCase +{ + public function provider(): array + { + return [ + [ + 'selectorStr' => 'input[type=text][required]:disabled', + 'expected' => [ + [ + 'type' => 'input', + 'id' => null, + 'classes' => [], + 'attributes' => [ + ['name' => 'type', 'comparison' => '=', 'value' => 'text'], + ['name' => 'required', 'comparison' => null, 'value' => null], + ], + 'pseudoClasses' => ['disabled' => null] + ], + ], + ], + [ + 'selectorStr' => '#foo.bar.baz:nth-child(n+1)', + 'expected' => [ + [ + 'type' => null, + 'id' => 'foo', + 'classes' => ['bar', 'baz'], + 'attributes' => [], + 'pseudoClasses' => ['nth-child' => 'n+1'], + ], + ] + ], + [ + 'selectorStr' => '#foo.bar, input.baz[type=text]', + 'expected' => [ + [ + 'type' => null, + 'id' => 'foo', + 'classes' => ['bar'], + 'attributes' => [], + 'pseudoClasses' => [], + ], + [ + 'type' => 'input', + 'id' => null, + 'classes' => ['baz'], + 'attributes' => [['name' => 'type', 'comparison' => '=', 'value' => 'text']], + 'pseudoClasses' => [], + ], + ] + ], + [ + 'selectorStr' => '[role=main] ul:eq(0) > li', + 'expected' => [ + [ + 'type' => null, + 'id' => null, + 'classes' => [], + 'attributes' => [['name' => 'role', 'comparison' => '=', 'value' => 'main']], + 'pseudoClasses' => [], + 'next' => [ + 'predecessor' => null, + 'type' => 'ul', + 'id' => null, + 'classes' => [], + 'attributes' => [], + 'pseudoClasses' => ['eq' => '0'], + 'next' => [ + 'predecessor' => '>', + 'type' => 'li', + 'id' => null, + 'classes' => [], + 'attributes' => [], + 'pseudoClasses' => [], + ], + ], + ], + ] + ], + ]; + } + + /** + * @dataProvider provider + * + * @param string $selectorStr + * @param array $expected + */ + public function testParse(string $selectorStr, array $expected) + { + $selectorParser = new CssSelectorParser(); + $selectorSet = $selectorParser->parse($selectorStr); + + $this->assertCount(count($expected), $selectorSet); + + foreach ($selectorSet->all() as $iSelector => $selector) { + $this->subtestSelector($selector, $expected[$iSelector]); + } + } + + public function subtestSelector(CssSelector $selector, array $expected) + { + $this->assertEquals($expected['type'], $selector->getType()); + $this->assertEquals($expected['id'], $selector->getId()); + $this->assertEquals($expected['classes'], $selector->getClasses()); + $this->assertEquals($expected['attributes'], $selector->getAttributes()); + $this->assertEquals($expected['pseudoClasses'], $selector->getPseudoClasses()); + + if (isset($expected['next'])) { + $this->assertEquals($expected['next']['predecessor'], $selector->getNext()?->getPredecessor()); + $this->subtestSelector($selector->getNext()->getSelector(), $expected['next']); + } + } +} diff --git a/tests/CssSelector/CssSelectorSetTest.php b/tests/CssSelector/CssSelectorSetTest.php new file mode 100644 index 0000000..e1dcc5e --- /dev/null +++ b/tests/CssSelector/CssSelectorSetTest.php @@ -0,0 +1,36 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\CssSelector; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\CssSelector\CssSelectorSet; +use PHPUnit\Framework\TestCase; + +class CssSelectorSetTest extends TestCase +{ + public function test__toString() + { + $selectorSet = new CssSelectorSet(new CssSelector('#foo'), new CssSelector('#bar')); + + $this->assertEquals('#foo, #bar', (string)$selectorSet); + } + + public function testAll() + { + $selectors = [new CssSelector('#foo'), new CssSelector('#bar')]; + $selectorSet = new CssSelectorSet(...$selectors); + + $this->assertCount(2, $selectorSet); + $this->assertSame($selectors, $selectorSet->all()); + } +} diff --git a/tests/CssSelector/CssSelectorTest.php b/tests/CssSelector/CssSelectorTest.php new file mode 100644 index 0000000..b8edf5c --- /dev/null +++ b/tests/CssSelector/CssSelectorTest.php @@ -0,0 +1,69 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\CssSelector; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\CssSelector\NextCssSelector; +use PHPUnit\Framework\TestCase; + +class CssSelectorTest extends TestCase +{ + public function test() + { + $selector = new CssSelector( + selector: $selectorStr = 'input#foo.bar.baz[qux="value"]["baz" != "value"][bar]:pseudo(1):pseudo2', + type: $type = 'input', + id: $id = '#foo', + classes: $classes = ['bar', 'baz'], + attributes: $attributes = [ + [ + 'name' => 'qux', + 'comparison' => '=', + 'value' => 'value', + ], + [ + 'name' => 'baz', + 'comparison' => '!=', + 'value' => 'value', + ], + [ + 'name' => 'bar', + 'comparison' => null, + 'value' => null, + ], + ], + pseudoClasses: $pseudoClasses = [ + 'pseudo' => '1', + 'pseudo2' => null + ], + ); + + $this->assertEquals($selectorStr, (string)$selector); + $this->assertEquals($type, $selector->getType()); + $this->assertEquals($id, $selector->getId()); + $this->assertEquals($classes, $selector->getClasses()); + $this->assertEquals($attributes, $selector->getAttributes()); + $this->assertEquals($pseudoClasses, $selector->getPseudoClasses()); + } + + public function testNext() + { + $selector = new CssSelector(''); + + $this->assertNull($selector->getNext()); + + $selector->setNext($next = new NextCssSelector(new CssSelector(''), '>')); + + $this->assertSame($next, $selector->getNext()); + } +} diff --git a/tests/CssSelector/NextCssSelectorTest.php b/tests/CssSelector/NextCssSelectorTest.php new file mode 100644 index 0000000..e13a2e0 --- /dev/null +++ b/tests/CssSelector/NextCssSelectorTest.php @@ -0,0 +1,28 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\CssSelector; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\CssSelector\NextCssSelector; +use PHPUnit\Framework\TestCase; + +class NextCssSelectorTest extends TestCase +{ + public function test() + { + $next = new NextCssSelector($selector = new CssSelector(''), $predecessor = '>'); + + $this->assertSame($selector, $next->getSelector()); + $this->assertEquals($predecessor, $next->getPredecessor()); + } +} diff --git a/tests/Extension/CssExtensionTest.php b/tests/Extension/CssExtensionTest.php new file mode 100644 index 0000000..8341b8b --- /dev/null +++ b/tests/Extension/CssExtensionTest.php @@ -0,0 +1,301 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\Extension; + +use Berlioz\HtmlSelector\CssSelector\CssSelector; +use Berlioz\HtmlSelector\Exception\SelectorException; +use Berlioz\HtmlSelector\Extension\CssExtension; +use Berlioz\HtmlSelector\HtmlSelector; +use Berlioz\HtmlSelector\PseudoClass\PseudoClassInterface; +use PHPUnit\Framework\TestCase; + +class CssExtensionTest extends TestCase +{ + public function testGetPseudoClasses() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertCount(27, $extension->getPseudoClasses()); + $this->assertContainsOnlyInstancesOf(PseudoClassInterface::class, $extension->getPseudoClasses()); + } + + public function testDisabled() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( name() = "button" or name() = "input" or name() = "optgroup" or name() = "option" or name() = "select" or name() = "textarea" or name() = "menuitem" or name() = "fieldset" ) and @disabled]', + $extension->disabled('XPATH') + ); + } + + public function testEmpty() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[count(child::*) = 0]', + $extension->empty('XPATH') + ); + } + + public function testFirstOfType() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[last()]', + $extension->firstOfType('XPATH', new CssSelector(selector: 'FOO', type: 'foo')) + ); + } + + public function testFirstOfType_notSpecified() + { + $this->expectException(SelectorException::class); + + $extension = new CssExtension(new HtmlSelector()); + $extension->firstOfType('XPATH', new CssSelector(selector: 'FOO', type: null)); + } + + public function testFirstOfType_all() + { + $this->expectException(SelectorException::class); + + $extension = new CssExtension(new HtmlSelector()); + $extension->firstOfType('XPATH', new CssSelector(selector: 'FOO', type: '*')); + } + + public function testLastChild() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[../*[last()] = node()]', + $extension->lastChild('XPATH') + ); + } + + public function testOnlyOfType() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[count(../foo)=1]', + $extension->onlyOfType('XPATH', new CssSelector(selector: 'FOO', type: 'foo')) + ); + } + + public function testOptional() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[name() = "input" or name() = "textarea" or name() = "select"][not( @required )]', + $extension->optional('XPATH') + ); + } + + public function testRequired() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[name() = "input" or name() = "textarea" or name() = "select"][@required]', + $extension->required('XPATH') + ); + } + + public function testRoot() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH/ancestor::*)[1]/*[1]', + $extension->root('XPATH') + ); + } + + public function testFirst() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[1]', + $extension->first('XPATH') + ); + } + + public function testLastOfType() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[last()]', + $extension->lastOfType('XPATH', new CssSelector(selector: 'FOO', type: 'foo')) + ); + } + + public function testLastOfType_notSpecified() + { + $this->expectException(SelectorException::class); + + $extension = new CssExtension(new HtmlSelector()); + $extension->lastOfType('XPATH', new CssSelector(selector: 'FOO', type: null)); + } + + public function testLastOfType_all() + { + $this->expectException(SelectorException::class); + + $extension = new CssExtension(new HtmlSelector()); + $extension->lastOfType('XPATH', new CssSelector(selector: 'FOO', type: '*')); + } + + public function testBlank() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[count(child::*) = 0 and not(normalize-space())]', + $extension->blank('XPATH') + ); + } + + public function testHas() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[./SELECTOR]', + $extension->has('XPATH', 'SELECTOR') + ); + } + + public function testReadOnly() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( not(@contenteditable) or @contenteditable = "false" ) and not( ( name() = "input" or name() = "textarea" or name() = "select" ) and not(@readonly) and not(@disabled) )]', + $extension->readOnly('XPATH') + ); + } + + public function testAnyLink() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( name() = "a" or name() = "area" or name() = "link" ) and @href]', + $extension->anyLink('XPATH') + ); + } + + public function testEnabled() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( name() = "button" or name() = "input" or name() = "optgroup" or name() = "option" or name() = "select" or name() = "textarea" ) and not( @disabled )]', + $extension->enabled('XPATH') + ); + } + + public function testOnlyChild() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[last() = 1]', + $extension->onlyChild('XPATH') + ); + } + + public function testAny() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[self::ARGUMENTS]', + $extension->any('XPATH', 'ARGUMENTS') + ); + } + + public function testReadWrite() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( @contenteditable and ( @contenteditable = "true" or not(normalize-space(@contenteditable)) ) ) or ( ( name() = "input" or name() = "textarea" or name() = "select" ) and not(@readonly) and not(@disabled) )]', + $extension->readWrite('XPATH') + ); + } + + public function testDir() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[(ancestor-or-self::*[@dir])[last()][@dir = "ltr"]]', + $extension->dir('XPATH', 'ltr') + ); + $this->assertEquals( + 'XPATH[(ancestor-or-self::*[@dir])[last()][@dir = "rtl"]]', + $extension->dir('XPATH', 'rtl') + ); + $this->assertEquals( + 'XPATH', + $extension->dir('XPATH', 'invalid') + ); + } + + public function testFirstChild() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[../*[1] = node()]', + $extension->firstChild('XPATH') + ); + } + + public function testLang() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[@lang = "ARGUMENTS" or starts-with(@lang, "ARGUMENTS")]', + $extension->lang('XPATH', 'ARGUMENTS') + ); + } + + public function testChecked() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( name() = "input" and ( @type = "checkbox" or @type = "radio" ) and @checked ) or ( name() = "option" and @selected )]', + $extension->checked('XPATH') + ); + } + + public function testNot() + { + $extension = new CssExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[not(self::ARGUMENTS)]', + $extension->not('XPATH', 'ARGUMENTS') + ); + } +} diff --git a/tests/Extension/QueryExtensionTest.php b/tests/Extension/QueryExtensionTest.php new file mode 100644 index 0000000..86be460 --- /dev/null +++ b/tests/Extension/QueryExtensionTest.php @@ -0,0 +1,269 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\Extension; + +use Berlioz\HtmlSelector\Extension\QueryExtension; +use Berlioz\HtmlSelector\HtmlSelector; +use Berlioz\HtmlSelector\PseudoClass\PseudoClassInterface; +use PHPUnit\Framework\TestCase; + +class QueryExtensionTest extends TestCase +{ + public function testGetPseudoClasses() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertCount(22, $extension->getPseudoClasses()); + $this->assertContainsOnlyInstancesOf(PseudoClassInterface::class, $extension->getPseudoClasses()); + } + + public function testInput() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[name() = "input" or name() = "textarea" or name() = "select" or name() = "button"]', + $extension->input('XPATH') + ); + } + + public function testEq() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[position() = 2]', + $extension->eq('XPATH', 1) + ); + $this->assertEquals( + '(XPATH)[last() - position() = 0]', + $extension->eq('XPATH', -1) + ); + } + + public function testLt() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[position() < 2]', + $extension->lt('XPATH', 1) + ); + $this->assertEquals( + '(XPATH)[last() - position() > 0]', + $extension->lt('XPATH', -1) + ); + } + + public function testLte() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[position() <= 2]', + $extension->lte('XPATH', 1) + ); + $this->assertEquals( + '(XPATH)[last() - position() >= 0]', + $extension->lte('XPATH', -1) + ); + } + + public function testGt() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[position() > 2]', + $extension->gt('XPATH', 1) + ); + $this->assertEquals( + '(XPATH)[last() - position() < 0]', + $extension->gt('XPATH', -1) + ); + } + + public function testGte() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[position() >= 2]', + $extension->gte('XPATH', 1) + ); + $this->assertEquals( + '(XPATH)[last() - position() <= 0]', + $extension->gte('XPATH', -1) + ); + } + + public function testReset() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[@type="reset"]', + $extension->reset('XPATH') + ); + } + + public function testSubmit() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( name() = "button" or name() = "input" ) and @type = "submit"]', + $extension->submit('XPATH') + ); + } + + public function testSelected() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[name() = "option" and @selected]', + $extension->selected('XPATH') + ); + } + + public function testLast() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[last()]', + $extension->last('XPATH') + ); + } + + public function testOdd() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[position() mod 2 = 1]', + $extension->odd('XPATH') + ); + } + + public function testFile() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[@type="file"]', + $extension->file('XPATH') + ); + } + + public function testCheckbox() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[@type = "checkbox"]', + $extension->checkbox('XPATH') + ); + } + + public function testRadio() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[@type="radio"]', + $extension->radio('XPATH') + ); + } + + public function testPassword() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[@type="password"]', + $extension->password('XPATH') + ); + } + + public function testEven() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + '(XPATH)[position() mod 2 != 1]', + $extension->even('XPATH') + ); + } + + public function testContains() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[contains(text(), "ARGUMENTS")]', + $extension->contains('XPATH', 'ARGUMENTS') + ); + } + + public function testText() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[name() = "input" and ( @type="text" or not( @type ) )]', + $extension->text('XPATH') + ); + } + + public function testButton() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[( name() = "button" and @type != "submit" ) or ( name() = "input" and @type = "button" )]', + $extension->button('XPATH') + ); + } + + public function testHeader() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[name() = "h1" or name() = "h2" or name() = "h3" or name() = "h4" or name() = "h5" or name() = "h6"]', + $extension->header('XPATH') + ); + } + + public function testParent() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[normalize-space()]', + $extension->parent('XPATH') + ); + } + + public function testImage() + { + $extension = new QueryExtension(new HtmlSelector()); + + $this->assertEquals( + 'XPATH[@type="image"]', + $extension->image('XPATH') + ); + } +} diff --git a/tests/Query/QueryIteratorTest.php b/tests/Query/QueryIteratorTest.php new file mode 100644 index 0000000..edb29a1 --- /dev/null +++ b/tests/Query/QueryIteratorTest.php @@ -0,0 +1,43 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\Query; + +use Berlioz\HtmlSelector\HtmlSelector; +use PHPUnit\Framework\TestCase; + +class QueryIteratorTest extends TestCase +{ + public function test() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + $result = $query->find('footer ul:first :nth-child(2n)'); + $count = 0; + $values = []; + + // Count and get elements individually + foreach ($result as $value) { + $count++; + $values[] = $value; + } + + // Count elements + $this->assertEquals(7, $count); + + // Compare elements + foreach ($values as $key => $value) { + $this->assertEquals((string)$result->get($key), $value->text()); + $this->assertEquals($result->get($key), $value->get(0)); + } + } +} diff --git a/tests/Query/QueryTest.php b/tests/Query/QueryTest.php new file mode 100644 index 0000000..7ef8bd5 --- /dev/null +++ b/tests/Query/QueryTest.php @@ -0,0 +1,342 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests\Query; + +use Berlioz\HtmlSelector\HtmlSelector; +use PHPUnit\Framework\TestCase; + +class QueryTest extends TestCase +{ + public function testIndex() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('li:eq(2)'); + $this->assertEquals(2, (string)$result->index(), (string)$result->getSelector()); + + $result = $query->find('li'); + $this->assertEquals( + 4, + (string)$result->index('[role=main] ul:eq(0) > li:lt(2)'), + (string)$result->getSelector() + ); + + $result2 = $query->find('li:eq(2)'); + $this->assertEquals(2, (string)$result->index($result2), (string)$result->getSelector()); + } + + public function testFilter() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('[role=main] ul:eq(0) > li'); + $result = $result->filter('.second'); + + $this->assertCount(1, $result, (string)$result->getSelector()); + $this->assertEquals('Second element of list 1', (string)$result->get(0), (string)$result->getSelector()); + } + + public function testNot() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('[role=main] ul:eq(0) > li'); + $result = $result->not('.second'); + + $this->assertCount(2, $result, (string)$result->getSelector()); + $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); + $this->assertEquals('Third element of list 1', (string)$result->get(1), (string)$result->getSelector()); + } + + public function testParent() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('h1')->parent(); + + $this->assertCount(1, $result, (string)$result->getSelector()); + $this->assertEquals('div', $result->get(0)->getName(), (string)$result->getSelector()); + $this->assertEquals( + 'starter-template', + $result->get(0)->attributes()->{'class'}, + (string)$result->getSelector() + ); + } + + public function testChildren() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('[aria-labelledby="dropdown01"]'); + $result = $result->children(); + + $this->assertCount(3, $result, (string)$result->getSelector()); + } + + public function testAttr() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('main p:first'); + $this->assertEquals('en-us', $result->attr('lang'), (string)$result->getSelector()); + $this->assertEquals('center', $result->attr('align'), (string)$result->getSelector()); + + $result->attr('align', 'left'); + $this->assertEquals('left', $result->attr('align'), (string)$result->getSelector()); + $this->assertNull($result->attr('test'), (string)$result->getSelector()); + + $result->attr('valign', 'top'); + $this->assertEquals('top', $result->attr('valign'), (string)$result->getSelector()); + } + + public function testProp() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('#formTest [name=checkbox1]'); + $this->assertFalse($result->prop('checked'), (string)$result->getSelector()); + + $result = $query->find('#formTest [name=checkbox2]'); + $this->assertTrue($result->prop('checked'), (string)$result->getSelector()); + + $result = $query->find('#formTest [name=checkbox3]'); + $this->assertTrue($result->prop('checked'), (string)$result->getSelector()); + + $result = $query->find('#formTest [name=checkbox4]'); + $this->assertTrue($result->prop('required'), (string)$result->getSelector()); + + $result = $query->find('#formTest [name=checkbox5]'); + $this->assertTrue($result->prop('disabled'), (string)$result->getSelector()); + + $result->prop('disabled', false); + $this->assertFalse($result->prop('disabled'), (string)$result->getSelector()); + + $result->prop('disabled', true); + $this->assertTrue($result->prop('disabled'), (string)$result->getSelector()); + } + + public function testData() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('#formTest'); + + $this->assertEquals('valueTest', $result->data('testTest2Test3'), (string)$result->getSelector()); + } + + public function testText() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('p:lang(en-us)'); + $this->assertEquals( + "\n Usé this document as a way to\n quickly start any new project. All you get is this text and a mostly barebones HTML document.\n ", + $result->text(), + (string)$result->getSelector() + ); + + $result = $query->find('p:lang(en-us)'); + $this->assertEquals( + "\n Usé this document as a way to\n any new project. All you get is this text and a mostly barebones HTML document.\n ", + $result->text(false), + (string)$result->getSelector() + ); + } + + public function testHtml() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('p:lang(en-us)'); + + $this->assertEquals( + "\n Usé this document as a way to\n quickly start any new project.
All you get is this text and a mostly barebones HTML document.\n ", + $result->html(), + (string)$result->getSelector() + ); + $this->assertStringStartsWith( + "\n\n ", + $query->html() + ); + } + + public function testHasClass() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('[role=main] p'); + + $this->assertCount(3, $result, (string)$result->getSelector()); + $this->assertTrue($result->hasClass('lead'), (string)$result->getSelector()); + $this->assertFalse($result->hasClass('test'), (string)$result->getSelector()); + } + + public function testClass_AddRemoveToggle() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('#list1 li'); + $result->addClass('classAdded1 classAdded2'); + + $this->assertTrue($result->hasClass('classAdded1 classAdded2'), (string)$result->getSelector()); + + $result->removeClass('classAdded2'); + + $this->assertTrue($result->hasClass('classAdded1'), (string)$result->getSelector()); + $this->assertFalse($result->hasClass('classAdded2'), (string)$result->getSelector()); + + $result->toggleClass('classToggled'); + $this->assertTrue($result->hasClass('classToggled'), (string)$result->getSelector()); + + $result->toggleClass('classToggled', false); + $this->assertFalse($result->hasClass('classToggled'), (string)$result->getSelector()); + + $result->toggleClass('classToggled', true); + $this->assertTrue($result->hasClass('classToggled'), (string)$result->getSelector()); + + $result->toggleClass('classToggled', true); + $this->assertTrue($result->hasClass('classToggled'), (string)$result->getSelector()); + } + + public function testNext() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('footer > div:last :first-child'); + $result2 = $result->next(); + + $this->assertCount(1, $result2, (string)$result2->getSelector()); + $this->assertEquals('Contact 5', (string)$result2->get(0), (string)$result2->getSelector()); + + $result2 = $result->next('button'); + + $this->assertCount(0, $result2, (string)$result2->getSelector()); + + $result = $query->find('footer > ul:last :eq(1)'); + $result = $result->next(); + + $this->assertCount(1, $result, (string)$result2->getSelector()); + $this->assertEquals('Link 4.3', (string)$result->get(0), (string)$result->getSelector()); + } + + public function testNextAll() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('footer > div:last :first-child'); + $result2 = $result->nextAll(); + + $this->assertCount(7, $result2, (string)$result2->getSelector()); + $this->assertEquals('Contact 5', (string)$result2->get(0), (string)$result2->getSelector()); + $this->assertEquals('Contact 6', (string)$result2->get(1), (string)$result2->getSelector()); + + $result2 = $result->next('button'); + + $this->assertCount(0, $result2, (string)$result2->getSelector()); + + $result = $query->find('footer > ul:last :first-child'); + $result = $result->nextAll(); + + $this->assertCount(2, $result, (string)$result2->getSelector()); + $this->assertEquals('Link 4.2', (string)$result->get(0), (string)$result->getSelector()); + $this->assertEquals('Link 4.3', (string)$result->get(1), (string)$result->getSelector()); + } + + public function testPrev() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('footer > div:last :last-child'); + $result2 = $result->prev(); + + $this->assertCount(1, $result2, (string)$result2->getSelector()); + $this->assertEquals('Contact 4', (string)$result2->get(0), (string)$result2->getSelector()); + + $result2 = $result->prev('span'); + + $this->assertCount(0, $result2, (string)$result2->getSelector()); + + $result = $query->find('footer > ul:last :eq(1)'); + $result = $result->prev(); + + $this->assertCount(1, $result, (string)$result2->getSelector()); + $this->assertEquals('Link 4.1', (string)$result->get(0), (string)$result->getSelector()); + } + + public function testPrevAll() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('footer > div:last :last-child'); + $result2 = $result->prevAll(); + + $this->assertCount(7, $result2, (string)$result2->getSelector()); + $this->assertEquals('Contact 4', (string)$result2->get(0), (string)$result2->getSelector()); + $this->assertEquals('Contact 5', (string)$result2->get(1), (string)$result2->getSelector()); + + $result2 = $result->prev('span'); + + $this->assertCount(0, $result2, (string)$result2->getSelector()); + + $result = $query->find('footer > ul:last :last-child'); + $result = $result->prevAll(); + + $this->assertCount(2, $result, (string)$result2->getSelector()); + $this->assertEquals('Link 4.1', (string)$result->get(0), (string)$result->getSelector()); + $this->assertEquals('Link 4.2', (string)$result->get(1), (string)$result->getSelector()); + } + + public function testSerializeArray() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('form#formTest'); + + $this->assertCount(11, $result->serializeArray(), '"form#formTest".serializeArray()'); + + $result = $query->find('form#formTest select'); + + $this->assertCount(2, $result->serializeArray(), '"form#formTest select".serializeArray()'); + } + + public function testSerialize() + { + $htmlSelector = new HtmlSelector(); + $query = $htmlSelector->query(__DIR__ . '/../files/test.html', true); + + $result = $query->find('form#formTest'); + + $this->assertEquals( + 'text1=&password1=&text2=&checkbox2=&checkbox3=&radio=radio2&select1%5B%5D=option2&select1%5B%5D=Option+3&textarea1=Text+inside.&file1%5B%5D=&image1=', + $result->serialize(), + '"form#formTest".serialize()' + ); + } +} \ No newline at end of file diff --git a/tests/QueryTest.php b/tests/QueryTest.php deleted file mode 100644 index f20a54b..0000000 --- a/tests/QueryTest.php +++ /dev/null @@ -1,760 +0,0 @@ - - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code, to the root. - */ - -namespace Berlioz\HtmlSelector\Tests; - - -use Berlioz\HtmlSelector\Query; -use Berlioz\Http\Message\Response; -use Berlioz\Http\Message\Stream; -use PHPUnit\Framework\TestCase; - -class QueryTest extends TestCase -{ - public function testUtf8() - { - $query = Query::loadHtml(__DIR__ . '/files/test_utf8.html', true); - $result = $query->find('head > title'); - - $this->assertEquals('Test éèà', $result->text()); - } - - public function testLoadResponseInterface() - { - $body = new Stream(fopen(__DIR__ . '/files/test_encoding.html', 'r')); - $response = new Response($body, 200, ['Content-Type' => 'text/html; charset=ISO-8859-1']); - $query = Query::loadResponse($response); - - $result = $query->find('h1'); - $this->assertEquals('Ceci est un test avec des accents éàèô', $result->text()); - - $result = $query->find('head > meta[name=description]'); - $this->assertEquals('Accès à l\'espace', $result->attr('content')); - } - - /** - * Provider to test HTML files. - * - * @return array - */ - public function htmlFilesDataProvider() - { - return [ - ['files/test1.html'], - ['files/test2.html'], - ['files/test3.html'], - ['files/test4.html'], - ['files/test5.html'], - ]; - } - - /** - * Test query init with files. - * - * @param string $file File name - * - * @dataProvider htmlFilesDataProvider - */ - public function testQueryInit($file) - { - $this->assertInstanceOf(Query::class, Query::loadHtml(__DIR__ . '/' . $file, true)); - } - - /** - * Test query selector with HTML files. - * - * @param string $file File name - * - * @dataProvider htmlFilesDataProvider - */ - public function testQuerySelector($file) - { - $query = Query::loadHtml(__DIR__ . '/' . $file, true); - $result = $query->find('h1'); - $this->assertCount(1, $result); - } - - /** - * Test query selector with different depth. - */ - public function testQueryDepth() - { - $query = Query::loadHtml(__DIR__ . '/files/test.html', true); - - // Descendants - $result = $query->find('[role=main] ul:eq(1)'); - $this->assertCount(1, $result); - - // Descendants of query - $result = $result->find('li.second'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Second element of list 2', (string)$result->get(0), (string)$result->getSelector()); - - // Children - $result = $query->find('body > main'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $result = $query->find('body > ul'); - $this->assertCount(0, $result, (string)$result->getSelector()); - - // Next - $result = $query->find('#myId > :eq(0) + i'); - $this->assertCount(0, $result, (string)$result->getSelector()); - $result = $query->find('#myId > :eq(0) + span'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // Next all - $result = $query->find('#myId > :eq(0) ~ i'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $result = $query->find('#myId > :eq(0) ~ span'); - $this->assertCount(2, $result, (string)$result->getSelector()); - } - - /** - * Test query methods. - */ - public function testQueryMethods() - { - $query = Query::loadHtml(__DIR__ . '/files/test.html', true); - - // index() - $result = $query->find('li:eq(2)'); - $this->assertEquals(2, (string)$result->index(), (string)$result->getSelector()); - $result = $query->find('li'); - $this->assertEquals(4, (string)$result->index('[role=main] ul:eq(0) > li:lt(2)'), (string)$result->getSelector()); - $result2 = $query->find('li:eq(2)'); - $this->assertEquals(2, (string)$result->index($result2), (string)$result->getSelector()); - - // filter() - $result = $query->find('[role=main] ul:eq(0) > li'); - $result = $result->filter('.second'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Second element of list 1', (string)$result->get(0), (string)$result->getSelector()); - - // not() - $result = $query->find('[role=main] ul:eq(0) > li'); - $result = $result->not('.second'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(1), (string)$result->getSelector()); - - // parent() - $result = $query->find('h1')->parent(); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('div', $result->get(0)->getName(), (string)$result->getSelector()); - $this->assertEquals('starter-template', $result->get(0)->attributes()->{'class'}, (string)$result->getSelector()); - - // children() - $result = $query->find('[aria-labelledby="dropdown01"]'); - $result = $result->children(); - $this->assertCount(3, $result, (string)$result->getSelector()); - - // attr() - $result = $query->find('main p:first'); - $this->assertEquals('en-us', $result->attr('lang'), (string)$result->getSelector()); - $this->assertEquals('center', $result->attr('align'), (string)$result->getSelector()); - $result->attr('align', 'left'); - $this->assertEquals('left', $result->attr('align'), (string)$result->getSelector()); - $this->assertNull($result->attr('test'), (string)$result->getSelector()); - $result->attr('valign', 'top'); - $this->assertEquals('top', $result->attr('valign'), (string)$result->getSelector()); - - // prop() - $result = $query->find('#formTest [name=checkbox1]'); - $this->assertFalse($result->prop('checked'), (string)$result->getSelector()); - $result = $query->find('#formTest [name=checkbox2]'); - $this->assertTrue($result->prop('checked'), (string)$result->getSelector()); - $result = $query->find('#formTest [name=checkbox3]'); - $this->assertTrue($result->prop('checked'), (string)$result->getSelector()); - $result = $query->find('#formTest [name=checkbox4]'); - $this->assertTrue($result->prop('required'), (string)$result->getSelector()); - $result = $query->find('#formTest [name=checkbox5]'); - $this->assertTrue($result->prop('disabled'), (string)$result->getSelector()); - $result->prop('disabled', false); - $this->assertFalse($result->prop('disabled'), (string)$result->getSelector()); - $result->prop('disabled', true); - $this->assertTrue($result->prop('disabled'), (string)$result->getSelector()); - - // data() - $result = $query->find('#formTest'); - $this->assertEquals('valueTest', $result->data('testTest2Test3'), (string)$result->getSelector()); - - // text() - $result = $query->find('p:lang(en-us)'); - $this->assertEquals("\n Usé this document as a way to\n quickly start any new project. All you get is this text and a mostly barebones HTML document.\n ", $result->text(), (string)$result->getSelector()); - $result = $query->find('p:lang(en-us)'); - $this->assertEquals("\n Usé this document as a way to\n any new project. All you get is this text and a mostly barebones HTML document.\n ", $result->text(false), (string)$result->getSelector()); - - // html() - $result = $query->find('p:lang(en-us)'); - $this->assertEquals("\n Usé this document as a way to\n quickly start any new project.
All you get is this text and a mostly barebones HTML document.\n ", $result->html(), (string)$result->getSelector()); - $this->assertStringStartsWith("\n\n ", $query->html()); - - // hasClass() - $result = $query->find('[role=main] p'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertTrue($result->hasClass('lead'), (string)$result->getSelector()); - $this->assertFalse($result->hasClass('test'), (string)$result->getSelector()); - - // addClass() - $result = $query->find('#list1 li'); - $result->addClass('classAdded1 classAdded2'); - $this->assertTrue($result->hasClass('classAdded1 classAdded2'), (string)$result->getSelector()); - - // removeClass() - $result->removeClass('classAdded2'); - $this->assertTrue($result->hasClass('classAdded1'), (string)$result->getSelector()); - $this->assertFalse($result->hasClass('classAdded2'), (string)$result->getSelector()); - - // toggleClass() - $result->toggleClass('classToggled'); - $this->assertTrue($result->hasClass('classToggled'), (string)$result->getSelector()); - $result->toggleClass('classToggled', false); - $this->assertFalse($result->hasClass('classToggled'), (string)$result->getSelector()); - $result->toggleClass('classToggled', true); - $this->assertTrue($result->hasClass('classToggled'), (string)$result->getSelector()); - $result->toggleClass('classToggled', true); - $this->assertTrue($result->hasClass('classToggled'), (string)$result->getSelector()); - - // next() - $result = $query->find('footer > div:last :first-child'); - $result2 = $result->next(); - $this->assertCount(1, $result2, (string)$result2->getSelector()); - $this->assertEquals('Contact 5', (string)$result2->get(0), (string)$result2->getSelector()); - $result2 = $result->next('button'); - $this->assertCount(0, $result2, (string)$result2->getSelector()); - $result = $query->find('footer > ul:last :eq(1)'); - $result = $result->next(); - $this->assertCount(1, $result, (string)$result2->getSelector()); - $this->assertEquals('Link 4.3', (string)$result->get(0), (string)$result->getSelector()); - - // nextAll() - $result = $query->find('footer > div:last :first-child'); - $result2 = $result->nextAll(); - $this->assertCount(7, $result2, (string)$result2->getSelector()); - $this->assertEquals('Contact 5', (string)$result2->get(0), (string)$result2->getSelector()); - $this->assertEquals('Contact 6', (string)$result2->get(1), (string)$result2->getSelector()); - $result2 = $result->next('button'); - $this->assertCount(0, $result2, (string)$result2->getSelector()); - $result = $query->find('footer > ul:last :first-child'); - $result = $result->nextAll(); - $this->assertCount(2, $result, (string)$result2->getSelector()); - $this->assertEquals('Link 4.2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 4.3', (string)$result->get(1), (string)$result->getSelector()); - - // prev() - $result = $query->find('footer > div:last :last-child'); - $result2 = $result->prev(); - $this->assertCount(1, $result2, (string)$result2->getSelector()); - $this->assertEquals('Contact 4', (string)$result2->get(0), (string)$result2->getSelector()); - $result2 = $result->prev('span'); - $this->assertCount(0, $result2, (string)$result2->getSelector()); - $result = $query->find('footer > ul:last :eq(1)'); - $result = $result->prev(); - $this->assertCount(1, $result, (string)$result2->getSelector()); - $this->assertEquals('Link 4.1', (string)$result->get(0), (string)$result->getSelector()); - - // prevAll() - $result = $query->find('footer > div:last :last-child'); - $result2 = $result->prevAll(); - $this->assertCount(7, $result2, (string)$result2->getSelector()); - $this->assertEquals('Contact 4', (string)$result2->get(0), (string)$result2->getSelector()); - $this->assertEquals('Contact 5', (string)$result2->get(1), (string)$result2->getSelector()); - $result2 = $result->prev('span'); - $this->assertCount(0, $result2, (string)$result2->getSelector()); - $result = $query->find('footer > ul:last :last-child'); - $result = $result->prevAll(); - $this->assertCount(2, $result, (string)$result2->getSelector()); - $this->assertEquals('Link 4.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 4.2', (string)$result->get(1), (string)$result->getSelector()); - - // serializeArray() - $result = $query->find('form#formTest'); - $this->assertCount(11, $result->serializeArray(), '"form#formTest".serializeArray()'); - $result = $query->find('form#formTest select'); - $this->assertCount(2, $result->serializeArray(), '"form#formTest select".serializeArray()'); - - // serialize() - $result = $query->find('form#formTest'); - $this->assertEquals( - 'text1=&password1=&text2=&checkbox2=&checkbox3=&radio=radio2&select1%5B%5D=option2&select1%5B%5D=Option+3&textarea1=Text+inside.&file1%5B%5D=&image1=', - $result->serialize(), - '"form#formTest".serialize()' - ); - } - - /** - * Test query with selector who have filters. - * Test all filters. - */ - public function testQuerySelectorWithFiltersResult() - { - $query = Query::loadHtml(__DIR__ . '/files/test.html', true); - - // :any - $result = $query->find('body :any(ul, p)'); - $this->assertCount(11, $result, (string)$result->getSelector()); - $result = $query->find('body :any(ul, p) li'); - $this->assertCount(33, $result, (string)$result->getSelector()); - - // :any-link - $result = $query->find('body :any-link'); - $this->assertCount(8, $result, (string)$result->getSelector()); - - // :dir() - $result = $query->find('main[role=main] .starter-template p:dir(ltr)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $result = $query->find('main[role=main] .starter-template p:dir(rtl)'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // :empty - $result = $query->find('main[role=main] .starter-template :blank'); - $this->assertCount(2, $result, (string)$result->getSelector()); - - // :button - $result = $query->find('body :button'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('Toggle navigation', (string)$result->get(0)->attributes()->{'aria-label'}, (string)$result->getSelector()); - $this->assertEquals('form-button', (string)$result->get(1)->attributes()->{'name'}, (string)$result->getSelector()); - - // :checkbox - $result = $query->find('form :checkbox'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('checkbox1', (string)$result->get(0)->attributes()->{'name'}, (string)$result->getSelector()); - $this->assertEquals('checkbox2', (string)$result->get(1)->attributes()->{'name'}, (string)$result->getSelector()); - $this->assertEquals('checkbox3', (string)$result->get(2)->attributes()->{'name'}, (string)$result->getSelector()); - $this->assertEquals('checkbox4', (string)$result->get(3)->attributes()->{'name'}, (string)$result->getSelector()); - $this->assertEquals('checkbox5', (string)$result->get(4)->attributes()->{'name'}, (string)$result->getSelector()); - - // :checked - $result = $query->find('form :checked'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('checkbox2', (string)$result->get(0)->attributes()->{'name'}, (string)$result->getSelector()); - $this->assertEquals('checkbox3', (string)$result->get(1)->attributes()->{'name'}, (string)$result->getSelector()); - $this->assertEquals('radio2', (string)$result->get(2)->attributes()->{'value'}, (string)$result->getSelector()); - - // :checkbox:checked - $result = $query->find('form :checkbox:checked'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('checkbox2', (string)$result->get(0)->attributes()->{'name'}, (string)$result->getSelector()); - $this->assertEquals('checkbox3', (string)$result->get(1)->attributes()->{'name'}, (string)$result->getSelector()); - - // :contains - $result = $query->find('main[role=main] :contains(document as a way)'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // :count - $result = $query->find('footer li:count(3)'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('Link 4.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 4.2', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 4.3', (string)$result->get(2), (string)$result->getSelector()); - $result = $query->find('footer ul:has(li:count(3))'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $result = $query->find('footer ul:has(li:count(>=3))'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $result = $query->find('footer ul:has(li:count(=3))'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $result = $query->find('footer ul:has(li:count(>3))'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $result = $query->find('footer ul:has(li:count(<3))'); - $this->assertCount(2, $result, (string)$result->getSelector()); - - // :disabled - $result = $query->find('main[role=main] :disabled'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // :empty - $result = $query->find('main[role=main] .starter-template :empty'); - $this->assertCount(7, $result, (string)$result->getSelector()); - - // :enabled - $result = $query->find('main[role=main] form > :enabled'); - $this->assertCount(17, $result, (string)$result->getSelector()); - - // :eq - $result = $query->find('main[role=main] li:eq(4)'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Second element of list 2', (string)$result->get(0), (string)$result->getSelector()); - $result = $query->find('main[role=main] li:eq(-2)'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Second element of list 3', (string)$result->get(0), (string)$result->getSelector()); - - // :even - $result = $query->find('main[role=main] li:even'); - $this->assertCount(4, $result, (string)$result->getSelector()); - $this->assertEquals('Second element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('First element of list 2', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Third element of list 2', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Second element of list 3', (string)$result->get(3), (string)$result->getSelector()); - - // :file - $result = $query->find('main[role=main] form > :file'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // :first - $result = $query->find('main[role=main] li:first'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - // :first-child - $result = $query->find('main[role=main] li:first-child'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('First element of list 2', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('First element of list 3', (string)$result->get(2), (string)$result->getSelector()); - // :first-of-type - $result = $query->find('footer div:last button:first-of-type'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 4', (string)$result->get(0), (string)$result->getSelector()); - $result = $query->find('footer div:last span:first-of-type'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 5', (string)$result->get(0), (string)$result->getSelector()); - $result = $query->find('footer div:last button:first-of-type'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 4', (string)$result->get(0), (string)$result->getSelector()); - - // :gt - $result = $query->find('main[role=main] li:gt(6)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('Second element of list 3', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Third element of list 3', (string)$result->get(1), (string)$result->getSelector()); - $result = $query->find('main[role=main] li:gt(-2)'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Third element of list 3', (string)$result->get(0), (string)$result->getSelector()); - - // :gt:lt - $result = $query->find('footer ul:first :gt(2):lt(5)'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $result = $query->find('footer ul:first :lt(5):gt(2)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - - // :gte - $result = $query->find('main[role=main] li:gte(6)'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 3', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Second element of list 3', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Third element of list 3', (string)$result->get(2), (string)$result->getSelector()); - $result = $query->find('main[role=main] li:gte(-2)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('Second element of list 3', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Third element of list 3', (string)$result->get(1), (string)$result->getSelector()); - - // :has - $result = $query->find('main[role=main] p:has(span:contains(foo))'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('p', $result->get(0)->getName(), (string)$result->getSelector()); - - // :header - $result = $query->find('main[role=main] :header'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Bootstrap starter template', (string)$result->get(0), (string)$result->getSelector()); - - // :image - $result = $query->find('main[role=main] form > :image'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // :input - $result = $query->find('main[role=main] :input'); - $this->assertCount(18, $result, (string)$result->getSelector()); - - // :lang - $result = $query->find(':lang(en)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('html', $result->get(0)->getName(), (string)$result->getSelector()); - $this->assertEquals('p', $result->get(1)->getName(), (string)$result->getSelector()); - - // :last - $result = $query->find('main[role=main] li:last'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Third element of list 3', (string)$result->get(0), (string)$result->getSelector()); - // :last-child - $result = $query->find('main[role=main] li:last-child'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Third element of list 2', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Third element of list 3', (string)$result->get(2), (string)$result->getSelector()); - // :last-of-type - $result = $query->find('footer div:last button:last-of-type'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 11', (string)$result->get(0), (string)$result->getSelector()); - $result = $query->find('footer div:last span:last-of-type'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 10', (string)$result->get(0), (string)$result->getSelector()); - - // :lt - $result = $query->find('main[role=main] li:lt(3)'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Second element of list 1', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(2), (string)$result->getSelector()); - $result = $query->find('main[role=main] li:lt(-5)'); - $this->assertCount(4, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Second element of list 1', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('First element of list 2', (string)$result->get(3), (string)$result->getSelector()); - - // :lte - $result = $query->find('main[role=main] li:lte(3)'); - $this->assertCount(4, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Second element of list 1', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('First element of list 2', (string)$result->get(3), (string)$result->getSelector()); - $result = $query->find('main[role=main] li:lte(-5)'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Second element of list 1', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('First element of list 2', (string)$result->get(3), (string)$result->getSelector()); - $this->assertEquals('Second element of list 2', (string)$result->get(4), (string)$result->getSelector()); - - // not() - $result = $query->find('[role=main] ul:eq(0) > li:not(.second)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(1), (string)$result->getSelector()); - - // :nth-child - $result = $query->find('footer ul:first :nth-child(2n)'); - $this->assertCount(7, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.4', (string)$result->get(1), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(2n of .important)'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.3', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.8', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.10', (string)$result->get(2), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(n+5)'); - $this->assertCount(10, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.5', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.14', (string)$result->get(9), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(3n+3)'); - $this->assertCount(4, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.3', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.6', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.9', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Link 1.12', (string)$result->get(3), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(3n-2)'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.4', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.7', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Link 1.10', (string)$result->get(3), (string)$result->getSelector()); - $this->assertEquals('Link 1.13', (string)$result->get(4), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(-3n+8)'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.5', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.8', (string)$result->get(2), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(-2n+7)'); - $this->assertCount(4, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.3', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.5', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Link 1.7', (string)$result->get(3), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(odd)'); - $this->assertCount(7, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.13', (string)$result->get(6), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(even)'); - $this->assertCount(7, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.14', (string)$result->get(6), (string)$result->getSelector()); - - // :nth-last-child - $result = $query->find('footer ul:first :nth-last-child(3n+2)'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.4', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.7', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Link 1.10', (string)$result->get(3), (string)$result->getSelector()); - $this->assertEquals('Link 1.13', (string)$result->get(4), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-last-child(3n-2)'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.5', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.8', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Link 1.11', (string)$result->get(3), (string)$result->getSelector()); - $this->assertEquals('Link 1.14', (string)$result->get(4), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-last-child(-3n+8)'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.7', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.10', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.13', (string)$result->get(2), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-last-child(odd)'); - $this->assertCount(7, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.14', (string)$result->get(6), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-last-child(even)'); - $this->assertCount(7, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.13', (string)$result->get(6), (string)$result->getSelector()); - - // :nth-of-type - $result = $query->find('footer ul:first :nth-child(3n+3)'); - $this->assertCount(4, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.3', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.6', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.9', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Link 1.12', (string)$result->get(3), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(3n-2)'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.4', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.7', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('Link 1.10', (string)$result->get(3), (string)$result->getSelector()); - $this->assertEquals('Link 1.13', (string)$result->get(4), (string)$result->getSelector()); - $result = $query->find('footer ul:first :nth-child(-3n+8)'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $this->assertEquals('Link 1.2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Link 1.5', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Link 1.8', (string)$result->get(2), (string)$result->getSelector()); - $result = $query->find('footer div:last span:nth-child(2n+2)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 5', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Contact 9', (string)$result->get(1), (string)$result->getSelector()); - $result = $query->find('footer div:last span:nth-of-type(2n+2)'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 8', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Contact 10', (string)$result->get(1), (string)$result->getSelector()); - - // :odd - $result = $query->find('main[role=main] li:odd'); - $this->assertCount(5, $result, (string)$result->getSelector()); - $this->assertEquals('First element of list 1', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Third element of list 1', (string)$result->get(1), (string)$result->getSelector()); - $this->assertEquals('Second element of list 2', (string)$result->get(2), (string)$result->getSelector()); - $this->assertEquals('First element of list 3', (string)$result->get(3), (string)$result->getSelector()); - $this->assertEquals('Third element of list 3', (string)$result->get(4), (string)$result->getSelector()); - - // :only-child - $result = $query->find('footer li:only-child'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Link 2.1', (string)$result->get(0), (string)$result->getSelector()); - - // :optional - $result = $query->find('form :input:optional'); - $this->assertCount(16, $result, (string)$result->getSelector()); - $result = $query->find('form input:optional'); - $this->assertCount(14, $result, (string)$result->getSelector()); - - // :only-of-type - $result = $query->find('footer button:only-of-type'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('Contact 3', (string)$result->get(0), (string)$result->getSelector()); - - // :parent - $result = $query->find('main[role=main] .starter-template :parent'); - $this->assertCount(8, $result, (string)$result->getSelector()); - $this->assertEquals('h1', (string)$result->get(0)->getName(), (string)$result->getSelector()); - $this->assertEquals('p', $result->get(1)->getName(), (string)$result->getSelector()); - $this->assertEquals('strong', $result->get(2)->getName(), (string)$result->getSelector()); - $this->assertEquals('p', $result->get(3)->getName(), (string)$result->getSelector()); - $this->assertEquals('span', $result->get(4)->getName(), (string)$result->getSelector()); - $this->assertEquals('p', $result->get(5)->getName(), (string)$result->getSelector()); - $this->assertEquals('span', $result->get(6)->getName(), (string)$result->getSelector()); - $this->assertEquals('span', $result->get(7)->getName(), (string)$result->getSelector()); - - // :password - $result = $query->find('body :password'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // :radio - $result = $query->find('main[role=main] :radio'); - $this->assertCount(3, $result, (string)$result->getSelector()); - - // :read-only / :read-write - $result = $query->find('main[role=main] *'); - $this->assertCount(55, $result, (string)$result->getSelector()); - $result = $query->find('main[role=main] :read-only'); - $this->assertCount(37, $result, (string)$result->getSelector()); - $result = $query->find('main[role=main] :read-write'); - $this->assertCount(18, $result, (string)$result->getSelector()); - - // :required - $result = $query->find('form :input:required'); - $this->assertCount(3, $result, (string)$result->getSelector()); - $result = $query->find('form input:required'); - $this->assertCount(3, $result, (string)$result->getSelector()); - - // :reset - $result = $query->find('main[role=main] :reset'); - $this->assertCount(1, $result, (string)$result->getSelector()); - - // :root - $result = $query->find(':root'); - $this->assertCount(1, $result, (string)$result->getSelector()); - $this->assertEquals('html', (string)$result->get(0)->getName(), (string)$result->getSelector()); - - // :selected - $result = $query->find('main[role=main] :selected'); - $this->assertCount(2, $result, (string)$result->getSelector()); - $this->assertEquals('Option 2', (string)$result->get(0), (string)$result->getSelector()); - $this->assertEquals('Option 3', (string)$result->get(1), (string)$result->getSelector()); - - // :submit - $result = $query->find('body :submit'); - $this->assertCount(2, $result, (string)$result->getSelector()); - - // :text - $result = $query->find(':text'); - $this->assertCount(3, $result, (string)$result->getSelector()); - } - - /** - * Test query iterator. - */ - public function testQueryIterator() - { - $query = Query::loadHtml(__DIR__ . '/files/test.html', true); - $result = $query->find('footer ul:first :nth-child(2n)'); - $count = 0; - $values = []; - - // Count and get elements individually - foreach ($result as $value) { - $count++; - $values[] = $value; - } - - // Count elements - $this->assertEquals(7, $count); - - // Compare elements - foreach ($values as $key => $value) { - $this->assertEquals((string)$result->get($key), $value->text()); - $this->assertEquals($result->get($key), $value->get(0)); - } - } - - /** - * Test user defined functions. - */ - public function testUserDefinedFunctions() - { - $self = $this; - - // Define function - Query::addFunction( - 'test', - function (Query $query, $arg1 = null, $arg2 = null) use ($self) { - $self->assertEquals('ul', $query->get(0)->getName()); - $self->assertEquals('Argument 1', $arg1); - $self->assertEquals('Argument 2', $arg2); - } - ); - - $query = Query::loadHtml(__DIR__ . '/files/test.html', true); - $result = $query->find('footer ul:first'); - $result->{'test'}('Argument 1', 'Argument 2'); - } -} diff --git a/tests/SelectorTest.php b/tests/SelectorTest.php deleted file mode 100644 index 3f7cc02..0000000 --- a/tests/SelectorTest.php +++ /dev/null @@ -1,80 +0,0 @@ - - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code, to the root. - */ - -namespace Berlioz\HtmlSelector\Tests; - - -use Berlioz\HtmlSelector\Exception\SelectorException; -use Berlioz\HtmlSelector\Selector; -use PHPUnit\Framework\TestCase; - -class SelectorTest extends TestCase -{ - /** - * Provider to test conversions. - * - * @return array - */ - public function selectorDataProvider() - { - return [// Element with class - ['select.class', - './/select[contains(concat(" ", @class, " "), " class ")]'], - // Element with 2 classes and a not comparison attribute - ['select.class.class2[attr1 != "test"]', - './/select[contains(concat(" ", @class, " "), " class ")][contains(concat(" ", @class, " "), " class2 ")][@attr1!="test"]'], - // Element with class and direct children element with attribute comparison - ['select.class > option[value="test"]', - './/select[contains(concat(" ", @class, " "), " class ")]/option[@value="test"]'], - // Class with not direct element with attribute comparison - ['.class option[value="test"]', - './/*[contains(concat(" ", @class, " "), " class ")]//option[@value="test"]'], - // Class with not direct element with just attribute name - ['.class option[value]', - './/*[contains(concat(" ", @class, " "), " class ")]//option[@value]']]; - } - - /** - * Test constructor with not valid selector. - */ - public function testConstructorNotValidSelector() - { - $this->expectException(\InvalidArgumentException::class); - new Selector('$select.class.class2[attr1 != "test"]'); - } - - /** - * Test xpath conversion with not valid selector. - */ - public function testXpathConversionNotValidSelector() - { - $this->expectException(SelectorException::class); - (new Selector('select.class.class2[attr1 != "test"]:notvalid'))->xpath(); - } - - /** - * Test Xpath conversion. - * - * @param string $selector - * @param string $xpath - * - * @dataProvider selectorDataProvider - */ - public function testXpathConversion($selector, $xpath) - { - $selector = new Selector($selector); - - $this->assertEquals($xpath, - $selector->xpath(), - sprintf('Invalid xpath conversion for %s', $selector)); - } -} diff --git a/tests/XpathSolverTest.php b/tests/XpathSolverTest.php new file mode 100644 index 0000000..d4f27a3 --- /dev/null +++ b/tests/XpathSolverTest.php @@ -0,0 +1,80 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code, to the root. + */ + +namespace Berlioz\HtmlSelector\Tests; + +use Berlioz\HtmlSelector\Exception\SelectorException; +use Berlioz\HtmlSelector\HtmlSelector; +use Berlioz\HtmlSelector\XpathSolver; +use PHPUnit\Framework\TestCase; + +class XpathSolverTest extends TestCase +{ + public function provider(): array + { + return [ + [ + 'selector' => '*', + 'xpath' => './/*' + ], + [ + 'selector' => '*[data-foo="bar"]', + 'xpath' => './/*[@data-foo="bar"]' + ], + [ + 'selector' => '#id', + 'xpath' => './/*[@id="id"]' + ], + [ + 'selector' => '.foo.bar', + 'xpath' => './/*[contains(concat(" ", @class, " "), " foo ")][contains(concat(" ", @class, " "), " bar ")]' + ], + [ + 'selector' => '[foo="value"][bar="value2"]', + 'xpath' => './/*[@foo="value"][@bar="value2"]' + ], + [ + 'selector' => '[role=main] ul:eq(0) > li', + 'xpath' => '(.//*[@role="main"]//ul)[position() = 1]/li' + ], + [ + 'selector' => 'footer button:only-of-type', + 'xpath' => './/footer//button[count(../button)=1]' + ], + [ + 'selector' => 'main[role=main] .starter-template :parent', + 'xpath' => './/main[@role="main"]//*[contains(concat(" ", @class, " "), " starter-template ")]//*[normalize-space()]' + ], + [ + 'selector' => 'footer ul:first :nth-child(2n of .foo)', + 'xpath' => '((.//footer//ul)[1]//*[self::*[contains(concat(" ", @class, " "), " foo ")]][position() > -2])[((last() - position() + 1) - 0) mod 2 = 0]' + ], + ]; + } + + /** + * Test xpath. + * + * @param string $selector + * @param string $xpath + * + * @throws SelectorException + * @dataProvider provider + */ + public function testSolve(string $selector, string $xpath) + { + $htmlSelector = new HtmlSelector(); + $xpathSolver = new XpathSolver($htmlSelector->getPseudoClasses()); + + $this->assertEquals($xpath, $xpathSolver->solve($selector)); + } +} diff --git a/tests/bootstrap.php b/tests/bootstrap.php index 06dbced..b919281 100644 --- a/tests/bootstrap.php +++ b/tests/bootstrap.php @@ -1,9 +1,9 @@ * * For the full copyright and license information, please view the LICENSE diff --git a/tests/files/test.html b/tests/files/test.html index 586adae..fde0317 100644 --- a/tests/files/test.html +++ b/tests/files/test.html @@ -1,3 +1,14 @@ + + diff --git a/tests/files/test1.html b/tests/files/test1.html index 660f521..4941f6a 100644 --- a/tests/files/test1.html +++ b/tests/files/test1.html @@ -1,3 +1,14 @@ + + diff --git a/tests/files/test2.html b/tests/files/test2.html index 0be50f8..b99f597 100644 --- a/tests/files/test2.html +++ b/tests/files/test2.html @@ -1,3 +1,14 @@ + + diff --git a/tests/files/test3.html b/tests/files/test3.html index a6a64cd..d2b8fcb 100644 --- a/tests/files/test3.html +++ b/tests/files/test3.html @@ -1,3 +1,14 @@ + + diff --git a/tests/files/test4.html b/tests/files/test4.html index 19ef465..72d5e1b 100644 --- a/tests/files/test4.html +++ b/tests/files/test4.html @@ -1,3 +1,14 @@ + + diff --git a/tests/files/test5.html b/tests/files/test5.html index 611bb62..38ea792 100644 --- a/tests/files/test5.html +++ b/tests/files/test5.html @@ -1,3 +1,14 @@ + +

Only HTML, not complete page !

  • First element of list 1
  • diff --git a/tests/files/test_encoding.html b/tests/files/test_encoding.html index 8732561..e6ca2d7 100644 --- a/tests/files/test_encoding.html +++ b/tests/files/test_encoding.html @@ -1,11 +1,22 @@ + + - + -

    Ceci est un test avec des accents éàèô

    +

    Ceci est un test avec des accents ����

    diff --git a/tests/files/test_utf8.html b/tests/files/test_utf8.html index 59ff2a9..a13ac88 100644 --- a/tests/files/test_utf8.html +++ b/tests/files/test_utf8.html @@ -1,3 +1,14 @@ + + Test éèÃ