Skip to content

Commit

Permalink
Performance boost via isset() fast lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
halaxa committed Jan 4, 2022
1 parent abd39f3 commit 7e44e5d
Showing 1 changed file with 38 additions and 29 deletions.
67 changes: 38 additions & 29 deletions src/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,18 @@ public function __construct($jsonChunks)
#[\ReturnTypeWillChange]
public function getIterator()
{
// init ASCII byte map as variable variables for the fastest lookup
// init the map of JSON-structure (in)significant bytes as local variable variables for the fastest lookup
foreach (range(0,255) as $ord) {
${chr($ord)} = ! in_array(
if (! in_array(
chr($ord),
["\\", '"', "\xEF", "\xBB", "\xBF", ' ', "\n", "\r", "\t", '{', '}', '[', ']', ':', ',']
);
)) {
${chr($ord)} = true;
}
}

$boundary = $this->mapOfBoundaryBytes();
$tokenBoundaries = $this->tokenBoundaries();
$colonCommaBracket = $this->colonCommaBracketTokenBoundaries();

$inString = false;
$tokenBuffer = '';
Expand All @@ -41,13 +44,14 @@ public function getIterator()
$bytesLength = strlen($jsonChunk);
for ($i = 0; $i < $bytesLength; ++$i) {
$byte = $jsonChunk[$i];

if ($escaping) {
$escaping = false;
$tokenBuffer .= $byte;
continue;
}

if ($$byte) { // is non-significant byte
if (isset($$byte)) { // is a JSON-structure insignificant byte
$tokenBuffer .= $byte;
continue;
}
Expand All @@ -62,18 +66,16 @@ public function getIterator()
continue;
}

if (isset($boundary[$byte])) { // if byte is any token boundary
if (isset($tokenBoundaries[$byte])) {
if ($tokenBuffer != '') {
yield $tokenBuffer;
$tokenBuffer = '';
}
if ($boundary[$byte]) { // if byte is not whitespace token boundary
if (isset($colonCommaBracket[$byte])) {
yield $byte;
}
} else {
if ($byte == '"') {
$inString = true;
}
} else { // else branch matches `"` but also `\` outside of a string literal which is an error anyway but strictly speaking not correctly parsed token
$inString = true;
$tokenBuffer .= $byte;
}
}
Expand All @@ -83,29 +85,36 @@ public function getIterator()
}
}

private function mapOfBoundaryBytes(): array
private function tokenBoundaries()
{
$utf8bom1 = "\xEF";
$utf8bom2 = "\xBB";
$utf8bom3 = "\xBF";

$boundary = [];
$boundary[$utf8bom1] = false;
$boundary[$utf8bom2] = false;
$boundary[$utf8bom3] = false;
$boundary[' '] = false;
$boundary["\n"] = false;
$boundary["\r"] = false;
$boundary["\t"] = false;

$boundary['{'] = true;
$boundary['}'] = true;
$boundary['['] = true;
$boundary[']'] = true;
$boundary[':'] = true;
$boundary[','] = true;

return $boundary;
return array_merge(
[
$utf8bom1 => true,
$utf8bom2 => true,
$utf8bom3 => true,
' ' => true,
"\n" => true,
"\r" => true,
"\t" => true,
],
$this->colonCommaBracketTokenBoundaries()
);
}

private function colonCommaBracketTokenBoundaries(): array
{
return [
'{' => true,
'}' => true,
'[' => true,
']' => true,
':' => true,
',' => true,
];
}

public function getPosition(): int
Expand Down

0 comments on commit 7e44e5d

Please sign in to comment.