From 8f7a8760f81c26c079762f8d454cb95d7b4bf627 Mon Sep 17 00:00:00 2001 From: Paul Crovella Date: Mon, 5 Mar 2018 02:43:45 -0800 Subject: [PATCH] Simplify parser interface, refactor tree building --- composer.json | 1 - src/JsonReader.php | 121 +++++----- src/Parser/JsonParser.php | 217 ++++++++++++++++++ src/Parser/Parser.php | 204 +--------------- test/JsonReaderTest.php | 26 ++- .../{ParserTest.php => JsonParserTest.php} | 23 +- 6 files changed, 309 insertions(+), 283 deletions(-) create mode 100644 src/Parser/JsonParser.php rename test/Parser/{ParserTest.php => JsonParserTest.php} (94%) diff --git a/composer.json b/composer.json index 7b055d5..ec65e65 100644 --- a/composer.json +++ b/composer.json @@ -19,7 +19,6 @@ "require": { "php": "^7.0", "ext-intl": "*", - "pcrov/iteratorstackiterator": "^1", "pcrov/unicode": "^0.1" }, "require-dev": { diff --git a/src/JsonReader.php b/src/JsonReader.php index d16e413..202c779 100644 --- a/src/JsonReader.php +++ b/src/JsonReader.php @@ -2,39 +2,39 @@ namespace pcrov\JsonReader; -use pcrov\IteratorStackIterator; use pcrov\JsonReader\InputStream\IOException; use pcrov\JsonReader\InputStream\Stream; use pcrov\JsonReader\InputStream\Uri; use pcrov\JsonReader\InputStream\StringInput; -use pcrov\JsonReader\Parser\Parser; +use pcrov\JsonReader\Parser\JsonParser; use pcrov\JsonReader\Parser\Lexer; +use pcrov\JsonReader\Parser\Parser; class JsonReader { /* Node types */ - const NONE = 0; - const STRING = 1; - const NUMBER = 2; - const BOOL = 3; - const NULL = 4; - const ARRAY = 5; - const END_ARRAY = 6; - const OBJECT = 7; - const END_OBJECT = 8; + const NONE = "NONE"; + const STRING = "STRING"; + const NUMBER = "NUMBER"; + const BOOL = "BOOL"; + const NULL = "NULL"; + const ARRAY = "ARRAY"; + const END_ARRAY = "END_ARRAY"; + const OBJECT = "OBJECT"; + const END_OBJECT = "END_OBJECT"; /* Options */ - const FLOAT_AS_STRING = 1; + const FLOAT_AS_STRING = 0b00000001; /** - * @var IteratorStackIterator|null + * @var Parser|null */ private $parser; /** * @var array[] Tuples from the parser, cached during tree building. */ - private $parseCache = []; + private $cache = []; /** * @var int bit field of reader options @@ -42,7 +42,7 @@ class JsonReader private $options; /** - * @var int + * @var string */ private $type = self::NONE; @@ -69,13 +69,10 @@ public function __construct(int $options = 0) /** * @return void */ - public function init(\Traversable $parser) + public function init(Parser $parser) { $this->close(); - $stack = new IteratorStackIterator(); - $stack->push(new \IteratorIterator($parser)); - $stack->rewind(); - $this->parser = $stack; + $this->parser = $parser; } /** @@ -83,7 +80,7 @@ public function init(\Traversable $parser) */ public function json(string $json) { - $this->init(new Parser(new Lexer(new StringInput($json)))); + $this->init(new JsonParser(new Lexer(new StringInput($json)))); } /** @@ -93,7 +90,7 @@ public function json(string $json) */ public function open(string $uri) { - $this->init(new Parser(new Lexer(new Uri($uri)))); + $this->init(new JsonParser(new Lexer(new Uri($uri)))); } /** @@ -104,13 +101,13 @@ public function open(string $uri) */ public function stream($stream) { - $this->init(new Parser(new Lexer(new Stream($stream)))); + $this->init(new JsonParser(new Lexer(new Stream($stream)))); } /** - * @return int One of the JsonReader node constants. + * @return string One of the JsonReader node constants. */ - public function type(): int + public function type(): string { return $this->type; } @@ -128,19 +125,18 @@ public function name() */ public function value() { - $type = $this->type(); + $type = $this->type; + $value = &$this->value; - if ($this->value === null && ($type === self::ARRAY || $type === self::OBJECT)) { - $this->value = $this->buildTree($type); - $this->parser->push(new \ArrayIterator($this->parseCache)); - $this->parseCache = []; + if ($value === null && ($type === self::ARRAY || $type === self::OBJECT)) { + $value = $this->buildTree($type, empty($this->cache)); } if ($type === self::NUMBER) { - return $this->castNumber($this->value); + return $this->castNumber($value); } - return $this->value; + return $value; } public function depth(): int @@ -151,32 +147,32 @@ public function depth(): int /** * @throws Exception */ - public function next(string $name = null): bool + public function next(string $target = null): bool { if ($this->parser === null) { throw new Exception("Load data before trying to read."); } - $depth = $this->depth(); - $end = $this->getEndType($this->type()); + $currentDepth = $this->depth; + $endType = $this->getEndType($this->type); while ($result = $this->read()) { - if ($this->depth() <= $depth) { + if ($this->depth <= $currentDepth) { break; } } // If we were on an object or array when called, we want to skip its end node. - if ($end !== self::NONE && - $this->depth() === $depth && - $this->type() === $end + if ($endType !== self::NONE && + $this->depth === $currentDepth && + $this->type === $endType ) { $result = $this->read(); } - if ($name !== null) { + if ($target !== null) { do { - if ($this->name() === $name) { + if ($this->name === $target) { break; } } while ($result = $this->next()); @@ -188,7 +184,7 @@ public function next(string $name = null): bool /** * @throws Exception */ - public function read(string $name = null): bool + public function read(string $target = null): bool { $parser = $this->parser; @@ -196,26 +192,28 @@ public function read(string $name = null): bool throw new Exception("Load data before trying to read."); } - if (!$parser->valid()) { + if (empty($this->cache)) { + $node = $parser->read(); + } else { + $node = \array_shift($this->cache); + } + + if ($node === null) { $this->resetNode(); return false; } - //@formatter:off silly ide list ( $this->type, $this->name, $this->value, $this->depth - ) = $parser->current(); - //@formatter:on - - $parser->next(); + ) = $node; $result = true; - if ($name !== null) { + if ($target !== null) { do { - if ($this->name() === $name) { + if ($this->name === $target) { break; } } while ($result = $this->read()); @@ -233,26 +231,31 @@ public function close() $this->parser = null; } - private function buildTree(int $type): array + private function buildTree(string $type, bool $writeCache): array { \assert($type === self::ARRAY || $type === self::OBJECT); $parser = $this->parser; + $cache = &$this->cache; $end = $this->getEndType($type); $result = []; while (true) { - $current = $parser->current(); - $this->parseCache[] = $current; - list ($type, $name, $value) = $current; - $parser->next(); + if ($writeCache) { + $node = $parser->read(); + $cache[] = $node; + } else { + $node = \current($cache); + \next($cache); + } + list ($type, $name, $value) = $node; if ($type === $end) { break; } if ($type === self::ARRAY || $type === self::OBJECT) { - $value = $this->buildTree($type); + $value = $this->buildTree($type, $writeCache); } if ($type === self::NUMBER) { @@ -269,6 +272,9 @@ private function buildTree(int $type): array return $result; } + /** + * @return int|float|string + */ private function castNumber(string $number) { $cast = +$number; @@ -278,7 +284,7 @@ private function castNumber(string $number) return $cast; } - private function getEndType(int $type): int + private function getEndType(string $type): string { switch ($type) { case self::ARRAY: @@ -296,5 +302,6 @@ private function resetNode() $this->name = null; $this->value = null; $this->depth = 0; + $this->cache = []; } } diff --git a/src/Parser/JsonParser.php b/src/Parser/JsonParser.php new file mode 100644 index 0000000..e054c77 --- /dev/null +++ b/src/Parser/JsonParser.php @@ -0,0 +1,217 @@ + JsonReader::STRING, + Tokenizer::T_NUMBER => JsonReader::NUMBER, + Tokenizer::T_TRUE => JsonReader::BOOL, + Tokenizer::T_FALSE => JsonReader::BOOL, + Tokenizer::T_NULL => JsonReader::NULL, + Tokenizer::T_BEGIN_ARRAY => JsonReader::ARRAY, + Tokenizer::T_END_ARRAY => JsonReader::END_ARRAY, + Tokenizer::T_BEGIN_OBJECT => JsonReader::OBJECT, + Tokenizer::T_END_OBJECT => JsonReader::END_OBJECT + ]; + + /** + * @var Tokenizer + */ + private $tokenizer; + + /** + * @var int + */ + private $depth = 0; + + /** + * @var string|null Name of the current object pair. + */ + private $name; + + /** + * @var \Generator + */ + private $nodeGenerator; + + public function __construct(Tokenizer $tokenizer) + { + $this->tokenizer = $tokenizer; + $this->nodeGenerator = $this->getNodeGenerator(); + } + + /** + * @throws ParseException + */ + public function read() + { + $this->nodeGenerator->next(); + return $this->nodeGenerator->current(); + } + + /** + * @throws ParseException + */ + private function getNodeGenerator(): \Generator + { + yield; // skipped by first read() + + $tokenizer = $this->tokenizer; + yield from $this->parseValue($tokenizer->read()); + + $token = $tokenizer->read(); + if ($token[0] !== Tokenizer::T_EOF) { + throw new ParseException($this->getExceptionMessage($token)); + } + } + + private function getExceptionMessage(array $token): string + { + list ($tokenType, , $tokenLine) = $token; + + if ($tokenType === Tokenizer::T_EOF) { + return \sprintf( + "Line %d: Unexpected end of file.", + $tokenLine + ); + } + + return \sprintf( + "Line %d: Unexpected token %s.", + $tokenLine, + $tokenType + ); + } + + /** + * @throws ParseException + */ + private function parseArray(): \Generator + { + $tokenizer = $this->tokenizer; + $depth = &$this->depth; + $name = &$this->name; + + $arrayName = $name; + yield [JsonReader::ARRAY, $arrayName, null, $depth]; + + $name = null; + $depth++; + $token = $tokenizer->read(); + $tokenType = $token[0]; + + if ($tokenType !== Tokenizer::T_END_ARRAY) { + yield from $this->parseValue($token); + $token = $tokenizer->read(); + $tokenType = $token[0]; + + while ($tokenType === Tokenizer::T_COMMA) { + yield from $this->parseValue($tokenizer->read()); + $token = $tokenizer->read(); + $tokenType = $token[0]; + } + } + + if ($tokenType !== Tokenizer::T_END_ARRAY) { + throw new ParseException($this->getExceptionMessage($token)); + } + + $depth--; + yield [JsonReader::END_ARRAY, $arrayName, null, $depth]; + } + + /** + * @throws ParseException + */ + private function parseObject(): \Generator + { + $tokenizer = $this->tokenizer; + $depth = &$this->depth; + + $objectName = $this->name; + yield [JsonReader::OBJECT, $objectName, null, $depth]; + + $depth++; + $token = $tokenizer->read(); + $tokenType = $token[0]; + + // name:value property pairs + if ($tokenType === Tokenizer::T_STRING) { + yield from $this->parsePair($token); + $token = $tokenizer->read(); + $tokenType = $token[0]; + + while ($tokenType === Tokenizer::T_COMMA) { + yield from $this->parsePair($tokenizer->read()); + $token = $tokenizer->read(); + $tokenType = $token[0]; + } + } + + if ($tokenType !== Tokenizer::T_END_OBJECT) { + throw new ParseException($this->getExceptionMessage($token)); + } + + $depth--; + yield [JsonReader::END_OBJECT, $objectName, null, $depth]; + } + + /** + * @throws ParseException + */ + private function parsePair(array $token): \Generator + { + $tokenizer = $this->tokenizer; + $name = &$this->name; + + // name + list($tokenType, $tokenValue) = $token; + if ($tokenType !== Tokenizer::T_STRING) { + throw new ParseException($this->getExceptionMessage($token)); + } + $name = $tokenValue; + + $token = $tokenizer->read(); + // : + if ($token[0] !== Tokenizer::T_COLON) { + throw new ParseException($this->getExceptionMessage($token)); + } + + // value + yield from $this->parseValue($tokenizer->read()); + $name = null; + } + + /** + * @throws ParseException + */ + private function parseValue(array $token): \Generator + { + list($tokenType, $tokenValue) = $token; + + switch ($tokenType) { + case Tokenizer::T_STRING: + case Tokenizer::T_NUMBER: + case Tokenizer::T_TRUE: + case Tokenizer::T_FALSE: + case Tokenizer::T_NULL: + yield [self::$tokenTypeMap[$tokenType], $this->name, $tokenValue, $this->depth]; + break; + case Tokenizer::T_BEGIN_ARRAY: + yield from $this->parseArray(); + break; + case Tokenizer::T_BEGIN_OBJECT: + yield from $this->parseObject(); + break; + default: + throw new ParseException($this->getExceptionMessage($token)); + } + } +} diff --git a/src/Parser/Parser.php b/src/Parser/Parser.php index 81809e1..13348d5 100644 --- a/src/Parser/Parser.php +++ b/src/Parser/Parser.php @@ -2,208 +2,10 @@ namespace pcrov\JsonReader\Parser; -use pcrov\JsonReader\JsonReader; - -final class Parser implements \IteratorAggregate +interface Parser { /** - * @var array Map of tokens to node types. - */ - private $tokenTypeMap = [ - Tokenizer::T_STRING => JsonReader::STRING, - Tokenizer::T_NUMBER => JsonReader::NUMBER, - Tokenizer::T_TRUE => JsonReader::BOOL, - Tokenizer::T_FALSE => JsonReader::BOOL, - Tokenizer::T_NULL => JsonReader::NULL, - Tokenizer::T_BEGIN_ARRAY => JsonReader::ARRAY, - Tokenizer::T_END_ARRAY => JsonReader::END_ARRAY, - Tokenizer::T_BEGIN_OBJECT => JsonReader::OBJECT, - Tokenizer::T_END_OBJECT => JsonReader::END_OBJECT - ]; - - /** - * @var Tokenizer - */ - private $tokenizer; - - /** - * @var int - */ - private $depth; - - /** - * @var string|null Name of the current object pair. - */ - private $name; - - public function __construct(Tokenizer $tokenizer) - { - $this->tokenizer = $tokenizer; - } - - /** - * Generates tuples in the form of: - * [$type, $name, $value, $depth] - * - * Objects and arrays will have a value of null. The consumer should use a - * tree builder to flesh these if desired. - * - * @return \Generator - * @throws ParseException - */ - public function getIterator(): \Generator - { - $this->name = null; - $this->depth = 0; - $tokenizer = $this->tokenizer; - - yield from $this->parseValue($tokenizer->read()); - - $token = $tokenizer->read(); - if ($token[0] !== Tokenizer::T_EOF) { - throw new ParseException($this->getExceptionMessage($token)); - } - } - - private function getExceptionMessage(array $token): string - { - list ($tokenType, , $tokenLine) = $token; - - if ($tokenType === Tokenizer::T_EOF) { - return \sprintf( - "Line %d: Unexpected end of file.", - $tokenLine - ); - } - - return \sprintf( - "Line %d: Unexpected token %s.", - $tokenLine, - $tokenType - ); - } - - /** - * @throws ParseException + * @return array|null Tuples in the form of [$type, $name, $value, $depth], null when finished. */ - private function parseArray(): \Generator - { - $tokenizer = $this->tokenizer; - $depth = &$this->depth; - - $name = $this->name; - yield [JsonReader::ARRAY, $name, null, $depth]; - - $this->name = null; - $depth++; - $token = $tokenizer->read(); - $tokenType = $token[0]; - - if ($tokenType !== Tokenizer::T_END_ARRAY) { - yield from $this->parseValue($token); - $token = $tokenizer->read(); - $tokenType = $token[0]; - - while ($tokenType === Tokenizer::T_COMMA) { - yield from $this->parseValue($tokenizer->read()); - $token = $tokenizer->read(); - $tokenType = $token[0]; - } - } - - if ($tokenType !== Tokenizer::T_END_ARRAY) { - throw new ParseException($this->getExceptionMessage($token)); - } - - $depth--; - yield [JsonReader::END_ARRAY, $name, null, $depth]; - } - - /** - * @throws ParseException - */ - private function parseObject(): \Generator - { - $tokenizer = $this->tokenizer; - $depth = &$this->depth; - - $name = $this->name; - yield [JsonReader::OBJECT, $name, null, $depth]; - - $depth++; - $token = $tokenizer->read(); - $tokenType = $token[0]; - - // name:value property pairs - if ($tokenType === Tokenizer::T_STRING) { - yield from $this->parsePair($token); - $token = $tokenizer->read(); - $tokenType = $token[0]; - - while ($tokenType === Tokenizer::T_COMMA) { - yield from $this->parsePair($tokenizer->read()); - $token = $tokenizer->read(); - $tokenType = $token[0]; - } - } - - if ($tokenType !== Tokenizer::T_END_OBJECT) { - throw new ParseException($this->getExceptionMessage($token)); - } - - $depth--; - yield [JsonReader::END_OBJECT, $name, null, $depth]; - } - - /** - * @throws ParseException - */ - private function parsePair(array $token): \Generator - { - $tokenizer = $this->tokenizer; - - // name - list($tokenType, $tokenValue) = $token; - if ($tokenType !== Tokenizer::T_STRING) { - throw new ParseException($this->getExceptionMessage($token)); - } - $this->name = $tokenValue; - - $token = $tokenizer->read(); - $tokenType = $token[0]; - // : - if ($tokenType !== Tokenizer::T_COLON) { - throw new ParseException($this->getExceptionMessage($token)); - } - - // value - yield from $this->parseValue($tokenizer->read()); - $this->name = null; - } - - /** - * @throws ParseException - */ - private function parseValue(array $token): \Generator - { - list($tokenType, $tokenValue) = $token; - - switch ($tokenType) { - case Tokenizer::T_STRING: - case Tokenizer::T_NUMBER: - case Tokenizer::T_TRUE: - case Tokenizer::T_FALSE: - case Tokenizer::T_NULL: - yield [$this->tokenTypeMap[$tokenType], $this->name, $tokenValue, $this->depth]; - break; - case Tokenizer::T_BEGIN_ARRAY: - yield from $this->parseArray(); - break; - case Tokenizer::T_BEGIN_OBJECT: - yield from $this->parseObject(); - break; - default: - throw new ParseException($this->getExceptionMessage($token)); - } - } + public function read(); } diff --git a/test/JsonReaderTest.php b/test/JsonReaderTest.php index 676bfde..cfe89fd 100644 --- a/test/JsonReaderTest.php +++ b/test/JsonReaderTest.php @@ -2,6 +2,7 @@ namespace pcrov\JsonReader; +use pcrov\JsonReader\Parser\Parser; use PHPUnit\Framework\TestCase; class JsonReaderTest extends TestCase @@ -15,7 +16,7 @@ class JsonReaderTest extends TestCase public function setUp() { $this->reader = new JsonReader(); - $this->parser = new class implements \IteratorAggregate + $this->parser = new class implements Parser { private $nodes = [ [JsonReader::OBJECT, null, null, 0], @@ -37,9 +38,16 @@ public function setUp() [JsonReader::END_OBJECT, null, null, 0], ]; - public function getIterator(): \Generator + public function read() { - yield from $this->nodes; + $nodes = &$this->nodes; + + if (($current = \current($nodes)) === false) { + return null; + } + next($nodes); + + return $current; } }; } @@ -86,7 +94,7 @@ public function testInitialState() { $reader = $this->reader; $this->assertSame(0, $reader->depth()); - $this->assertSame(0, $reader->type()); + $this->assertSame(JsonReader::NONE, $reader->type()); $this->assertNull($reader->name()); $this->assertNull($reader->value()); } @@ -96,7 +104,7 @@ public function testStateFollowingInit() $reader = $this->reader; $reader->init($this->parser); $this->assertSame(0, $reader->depth()); - $this->assertSame(0, $reader->type()); + $this->assertSame(JsonReader::NONE, $reader->type()); $this->assertNull($reader->name()); $this->assertNull($reader->value()); } @@ -148,7 +156,7 @@ public function testRead() } } - public function testReadWithOptionFloatsAsStrings() + public function testReadWithOptionFloatAsString() { $expected = [ [ @@ -224,7 +232,7 @@ public function testStateFollowingReadCompletion() $reader->init($this->parser); while ($reader->read()); $this->assertSame(0, $reader->depth()); - $this->assertSame(0, $reader->type()); + $this->assertSame(JsonReader::NONE, $reader->type()); $this->assertNull($reader->name()); $this->assertNull($reader->value()); } @@ -307,7 +315,7 @@ public function testStateFollowingNextCompletion() $reader->init($this->parser); while ($reader->next()); $this->assertSame(0, $reader->depth()); - $this->assertSame(0, $reader->type()); + $this->assertSame(JsonReader::NONE, $reader->type()); $this->assertNull($reader->name()); $this->assertNull($reader->value()); } @@ -319,7 +327,7 @@ public function testClose() $reader->read(); $reader->close(); $this->assertSame(0, $reader->depth()); - $this->assertSame(0, $reader->type()); + $this->assertSame(JsonReader::NONE, $reader->type()); $this->assertNull($reader->name()); $this->assertNull($reader->value()); } diff --git a/test/Parser/ParserTest.php b/test/Parser/JsonParserTest.php similarity index 94% rename from test/Parser/ParserTest.php rename to test/Parser/JsonParserTest.php index 22c2340..bb6dce0 100644 --- a/test/Parser/ParserTest.php +++ b/test/Parser/JsonParserTest.php @@ -5,7 +5,7 @@ use pcrov\JsonReader\JsonReader; use PHPUnit\Framework\TestCase; -class ParserTest extends TestCase +class JsonParserTest extends TestCase { /** @var Tokenizer */ @@ -16,17 +16,12 @@ public function testParser($input, $expected) { $tokenizer = $this->tokenizer; $tokenizer->setTokens($input); + $parser = new JsonParser($tokenizer); - $parser = new \IteratorIterator(new Parser($tokenizer)); - $parser->rewind(); - $this->assertTrue($parser->valid()); - - $iterator = new \MultipleIterator(\MultipleIterator::MIT_NEED_ANY | \MultipleIterator::MIT_KEYS_ASSOC); - $iterator->attachIterator($parser, "parser"); - $iterator->attachIterator(new \ArrayIterator($expected), "expected"); - - foreach ($iterator as $tuple) { - $this->assertSame($tuple["expected"], $tuple["parser"]); + $i = 0; + while (($node = $parser->read()) !== null) { + self::assertSame($expected[$i], $node); + $i++; } } @@ -38,11 +33,9 @@ public function testParserError($input, $expectedMessage) $tokenizer = $this->tokenizer; $tokenizer->setTokens($input); + $parser = new JsonParser($tokenizer); - $parser = new \IteratorIterator(new Parser($tokenizer)); - $parser->rewind(); - - foreach ($parser as $_) { + while ($parser->read() !== null) { ; } }