From 0690568fca769d304cefada6663cedcb5b9c31c9 Mon Sep 17 00:00:00 2001 From: tsctx <91457664+tsctx@users.noreply.github.com> Date: Fri, 8 Dec 2023 17:58:21 +0900 Subject: [PATCH] perf(util/parseHeaders): If the header name is buffer (#2501) * initial implementation * test: fix * compatible API * fix: tree * add benchmark * fix: lint * fix: benchmark * perf * use number key * remove unsafe * format & add comment * fix: benchmark import path * better benchmark * better benchmark * perf: rewrite tree * test: fuzz test * fix test * test * test: remove tree * refactor * refactor * suggested change * test: refactor * add use strict * test: refactor * add type comment * check length * test: perf * improve type * fix: type --- benchmarks/parseHeaders.mjs | 105 +++++++++++++++++++++++++++++ lib/core/constants.js | 2 + lib/core/tree.js | 129 ++++++++++++++++++++++++++++++++++++ lib/core/util.js | 32 ++++++--- package.json | 1 + test/tree.js | 40 +++++++++++ test/util.js | 5 +- 7 files changed, 303 insertions(+), 11 deletions(-) create mode 100644 benchmarks/parseHeaders.mjs create mode 100644 lib/core/tree.js create mode 100644 test/tree.js diff --git a/benchmarks/parseHeaders.mjs b/benchmarks/parseHeaders.mjs new file mode 100644 index 00000000000..6fb898062b3 --- /dev/null +++ b/benchmarks/parseHeaders.mjs @@ -0,0 +1,105 @@ +import { bench, group, run } from 'mitata' +import { parseHeaders } from '../lib/core/util.js' + +const target = [ + { + 'Content-Type': 'application/json', + Date: 'Wed, 01 Nov 2023 00:00:00 GMT', + 'Powered-By': 'NodeJS', + 'Content-Encoding': 'gzip', + 'Set-Cookie': '__Secure-ID=123; Secure; Domain=example.com', + 'Content-Length': '150', + Vary: 'Accept-Encoding, Accept, X-Requested-With' + }, + { + 'Content-Type': 'text/html; charset=UTF-8', + 'Content-Length': '1234', + Date: 'Wed, 06 Dec 2023 12:47:57 GMT', + Server: 'Bing' + }, + { + 'Content-Type': 'image/jpeg', + 'Content-Length': '56789', + Date: 'Wed, 06 Dec 2023 12:48:12 GMT', + Server: 'Bing', + ETag: '"a1b2c3d4e5f6g7h8i9j0"' + }, + { + Cookie: 'session_id=1234567890abcdef', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', + Host: 'www.bing.com', + Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate, br' + }, + { + Location: 'https://www.bing.com/search?q=bing', + Status: '302 Found', + Date: 'Wed, 06 Dec 2023 12:48:27 GMT', + Server: 'Bing', + 'Content-Type': 'text/html; charset=UTF-8', + 'Content-Length': '0' + }, + { + 'Content-Type': + 'multipart/form-data; boundary=----WebKitFormBoundary1234567890', + 'Content-Length': '98765', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', + Host: 'www.bing.com', + Accept: '*/*', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate, br' + }, + { + 'Content-Type': 'application/json; charset=UTF-8', + 'Content-Length': '2345', + Date: 'Wed, 06 Dec 2023 12:48:42 GMT', + Server: 'Bing', + Status: '200 OK', + 'Cache-Control': 'no-cache, no-store, must-revalidate' + }, + { + Host: 'www.example.com', + Connection: 'keep-alive', + Accept: 'text/html, application/xhtml+xml, application/xml;q=0.9,;q=0.8', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' + } +] + +const headers = Array.from(target, (x) => + Object.entries(x) + .flat() + .map((c) => Buffer.from(c)) +) + +const headersIrregular = Array.from( + target, + (x) => Object.entries(x) + .flat() + .map((c) => Buffer.from(c.toUpperCase())) +) + +// avoid JIT bias +bench('noop', () => {}) +bench('noop', () => {}) +bench('noop', () => {}) +bench('noop', () => {}) +bench('noop', () => {}) +bench('noop', () => {}) + +group('parseHeaders', () => { + bench('parseHeaders', () => { + for (let i = 0; i < headers.length; ++i) { + parseHeaders(headers[i]) + } + }) + bench('parseHeaders (irregular)', () => { + for (let i = 0; i < headersIrregular.length; ++i) { + parseHeaders(headersIrregular[i]) + } + }) +}) + +await new Promise((resolve) => setTimeout(resolve, 7000)) + +await run() diff --git a/lib/core/constants.js b/lib/core/constants.js index 0f827cc4ae0..6ec770dd533 100644 --- a/lib/core/constants.js +++ b/lib/core/constants.js @@ -1,3 +1,5 @@ +'use strict' + /** @type {Record} */ const headerNameLowerCasedRecord = {} diff --git a/lib/core/tree.js b/lib/core/tree.js new file mode 100644 index 00000000000..aa1641d217f --- /dev/null +++ b/lib/core/tree.js @@ -0,0 +1,129 @@ +'use strict' + +const { wellknownHeaderNames } = require('./constants') + +class TstNode { + /** @type {any} */ + value = null + /** @type {null | TstNode} */ + left = null + /** @type {null | TstNode} */ + middle = null + /** @type {null | TstNode} */ + right = null + /** @type {number} */ + code + /** + * @param {Uint8Array} key + * @param {any} value + */ + constructor (key, value) { + if (key.length === 0) { + throw new TypeError('Unreachable') + } + this.code = key[0] + if (key.length > 1) { + this.middle = new TstNode(key.subarray(1), value) + } else { + this.value = value + } + } + + /** + * @param {Uint8Array} key + * @param {any} value + */ + add (key, value) { + if (key.length === 0) { + throw new TypeError('Unreachable') + } + const code = key[0] + if (this.code === code) { + if (key.length === 1) { + this.value = value + } else if (this.middle !== null) { + this.middle.add(key.subarray(1), value) + } else { + this.middle = new TstNode(key.subarray(1), value) + } + } else if (this.code < code) { + if (this.left !== null) { + this.left.add(key, value) + } else { + this.left = new TstNode(key, value) + } + } else { + if (this.right !== null) { + this.right.add(key, value) + } else { + this.right = new TstNode(key, value) + } + } + } + + /** + * @param {Uint8Array} key + * @return {TstNode | null} + */ + search (key) { + const keylength = key.length + let index = 0 + let node = this + while (node !== null && index < keylength) { + let code = key[index] + // A-Z + if (code >= 0x41 && code <= 0x5a) { + // Lowercase for uppercase. + code |= 32 + } + while (node !== null) { + if (code === node.code) { + if (keylength === ++index) { + // Returns Node since it is the last key. + return node + } + node = node.middle + break + } + node = node.code < code ? node.left : node.right + } + } + return null + } +} + +class TernarySearchTree { + /** @type {TstNode | null} */ + node = null + + /** + * @param {Uint8Array} key + * @param {any} value + * */ + insert (key, value) { + if (this.node === null) { + this.node = new TstNode(key, value) + } else { + this.node.add(key, value) + } + } + + /** + * @param {Uint8Array} key + */ + lookup (key) { + return this.node?.search(key)?.value ?? null + } +} + +const tree = new TernarySearchTree() + +for (let i = 0; i < wellknownHeaderNames.length; ++i) { + const key = wellknownHeaderNames[i].toLowerCase() + tree.insert(Buffer.from(key), key) +} + +module.exports = { + TernarySearchTree, + tree +} diff --git a/lib/core/util.js b/lib/core/util.js index d221e98673b..75d31888221 100644 --- a/lib/core/util.js +++ b/lib/core/util.js @@ -10,6 +10,7 @@ const { Blob } = require('buffer') const nodeUtil = require('util') const { stringify } = require('querystring') const { headerNameLowerCasedRecord } = require('./constants') +const { tree } = require('./tree') const [nodeMajor, nodeMinor] = process.versions.node.split('.').map(v => Number(v)) @@ -219,26 +220,40 @@ function parseKeepAliveTimeout (val) { return m ? parseInt(m[1], 10) * 1000 : null } -function parseHeaders (headers, obj = {}) { +/** + * @param {string | Buffer} value + */ +function headerNameToString (value) { + return typeof value === 'string' + ? headerNameLowerCasedRecord[value] ?? value.toLowerCase() + : tree.lookup(value) ?? value.toString().toLowerCase() +} + +/** + * @param {Record | (Buffer | string | (Buffer | string)[])[]} headers + * @param {Record} [obj] + * @returns {Record} + */ +function parseHeaders (headers, obj) { // For H2 support if (!Array.isArray(headers)) return headers + if (obj === undefined) obj = {} for (let i = 0; i < headers.length; i += 2) { - const key = headers[i].toString() - const lowerCasedKey = headerNameLowerCasedRecord[key] ?? key.toLowerCase() - let val = obj[lowerCasedKey] + const key = headerNameToString(headers[i]) + let val = obj[key] if (!val) { const headersValue = headers[i + 1] if (typeof headersValue === 'string') { - obj[lowerCasedKey] = headersValue + obj[key] = headersValue } else { - obj[lowerCasedKey] = Array.isArray(headersValue) ? headersValue.map(x => x.toString('utf8')) : headersValue.toString('utf8') + obj[key] = Array.isArray(headersValue) ? headersValue.map(x => x.toString('utf8')) : headersValue.toString('utf8') } } else { - if (!Array.isArray(val)) { + if (typeof val === 'string') { val = [val] - obj[lowerCasedKey] = val + obj[key] = val } val.push(headers[i + 1].toString('utf8')) } @@ -461,6 +476,7 @@ module.exports = { isIterable, isAsyncIterable, isDestroyed, + headerNameToString, parseRawHeaders, parseHeaders, parseKeepAliveTimeout, diff --git a/package.json b/package.json index 0933d911830..05aa2050474 100644 --- a/package.json +++ b/package.json @@ -117,6 +117,7 @@ "jest": "^29.0.2", "jsdom": "^23.0.0", "jsfuzz": "^1.0.15", + "mitata": "^0.1.6", "mocha": "^10.0.0", "mockttp": "^3.9.2", "p-timeout": "^3.2.0", diff --git a/test/tree.js b/test/tree.js new file mode 100644 index 00000000000..2a2342a1961 --- /dev/null +++ b/test/tree.js @@ -0,0 +1,40 @@ +'use strict' + +const { TernarySearchTree } = require('../lib/core/tree') +const { test } = require('tap') + +test('Ternary Search Tree', (t) => { + t.plan(1) + const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789' + const charactersLength = characters.length + + function generateAsciiString (length) { + let result = '' + for (let i = 0; i < length; ++i) { + result += characters[Math.floor(Math.random() * charactersLength)] + } + return result + } + const tst = new TernarySearchTree() + + const LENGTH = 5000 + + /** @type {string[]} */ + const random = new Array(LENGTH) + /** @type {Buffer[]} */ + const randomBuffer = new Array(LENGTH) + + for (let i = 0; i < LENGTH; ++i) { + const key = generateAsciiString((Math.random() * 100 + 5) | 0) + const lowerCasedKey = random[i] = key.toLowerCase() + randomBuffer[i] = Buffer.from(key) + tst.insert(Buffer.from(lowerCasedKey), lowerCasedKey) + } + + t.test('all', (t) => { + t.plan(LENGTH) + for (let i = 0; i < LENGTH; ++i) { + t.equal(tst.lookup(randomBuffer[i]), random[i]) + } + }) +}) diff --git a/test/util.js b/test/util.js index 75a4d8c1617..71a63f5c8af 100644 --- a/test/util.js +++ b/test/util.js @@ -1,7 +1,6 @@ 'use strict' -const t = require('tap') -const { test } = t +const { test } = require('tap') const { Stream } = require('stream') const { EventEmitter } = require('events') @@ -125,5 +124,5 @@ test('buildURL', (t) => { test('headerNameLowerCasedRecord', (t) => { t.plan(1) - t.ok(typeof headerNameLowerCasedRecord.hasOwnProperty === 'undefined') + t.ok(typeof headerNameLowerCasedRecord.hasOwnProperty !== 'function') })