From 728aad89bb4b5d47c604da9b6568a5dc1c242eda Mon Sep 17 00:00:00 2001 From: tsctx <91457664+tsctx@users.noreply.github.com> Date: Wed, 13 Mar 2024 09:04:55 +0900 Subject: [PATCH] perf: improve parsing form-data (#2944) * perf: improve parsing form-data * apply suggestions from code review * apply suggestions from code review --- lib/core/util.js | 12 ++++++++- lib/web/fetch/formdata-parser.js | 42 ++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/lib/core/util.js b/lib/core/util.js index cbb6d7495e5..a62396e23e0 100644 --- a/lib/core/util.js +++ b/lib/core/util.js @@ -440,7 +440,8 @@ function addAbortListener (signal, listener) { return () => signal.removeListener('abort', listener) } -const hasToWellFormed = !!String.prototype.toWellFormed +const hasToWellFormed = typeof String.prototype.toWellFormed === 'function' +const hasIsWellFormed = typeof String.prototype.isWellFormed === 'function' /** * @param {string} val @@ -449,6 +450,14 @@ function toUSVString (val) { return hasToWellFormed ? `${val}`.toWellFormed() : nodeUtil.toUSVString(val) } +/** + * @param {string} val + */ +// TODO: move this to webidl +function isUSVString (val) { + return hasIsWellFormed ? `${val}`.isWellFormed() : toUSVString(val) === `${val}` +} + /** * @see https://tools.ietf.org/html/rfc7230#section-3.2.6 * @param {number} c @@ -538,6 +547,7 @@ module.exports = { isErrored, isReadable, toUSVString, + isUSVString, isReadableAborted, isBlobLike, parseOrigin, diff --git a/lib/web/fetch/formdata-parser.js b/lib/web/fetch/formdata-parser.js index 8269dce42e9..b889fbf898d 100644 --- a/lib/web/fetch/formdata-parser.js +++ b/lib/web/fetch/formdata-parser.js @@ -1,12 +1,12 @@ 'use strict' -const { webidl } = require('./webidl') +const { toUSVString, isUSVString, bufferToLowerCasedHeaderName } = require('../../core/util') const { utf8DecodeBytes } = require('./util') const { HTTP_TOKEN_CODEPOINTS, isomorphicDecode } = require('./data-url') const { isFileLike, File: UndiciFile } = require('./file') const { makeEntry } = require('./formdata') const assert = require('node:assert') -const { isAscii, File: NodeFile } = require('node:buffer') +const { File: NodeFile } = require('node:buffer') const File = globalThis.File ?? NodeFile ?? UndiciFile @@ -15,6 +15,18 @@ const filenameBuffer = Buffer.from('; filename') const dd = Buffer.from('--') const ddcrlf = Buffer.from('--\r\n') +/** + * @param {string} chars + */ +function isAsciiString (chars) { + for (let i = 0; i < chars.length; ++i) { + if ((chars.charCodeAt(i) & ~0x7F) !== 0) { + return false + } + } + return true +} + /** * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary * @param {string} boundary @@ -30,7 +42,7 @@ function validateBoundary (boundary) { // - it is composed by bytes in the ranges 0x30 to 0x39, 0x41 to 0x5A, or // 0x61 to 0x7A, inclusive (ASCII alphanumeric), or which are 0x27 ('), // 0x2D (-) or 0x5F (_). - for (let i = 0; i < boundary.length; i++) { + for (let i = 0; i < length; ++i) { const cp = boundary.charCodeAt(i) if (!( @@ -58,12 +70,12 @@ function escapeFormDataName (name, encoding = 'utf-8', isFilename = false) { // 1. If isFilename is true: if (isFilename) { // 1.1. Set name to the result of converting name into a scalar value string. - name = webidl.converters.USVString(name) + name = toUSVString(name) } else { // 2. Otherwise: // 2.1. Assert: name is a scalar value string. - assert(name === webidl.converters.USVString(name)) + assert(isUSVString(name)) // 2.2. Replace every occurrence of U+000D (CR) not followed by U+000A (LF), // and every occurrence of U+000A (LF) not preceded by U+000D (CR), in @@ -94,14 +106,16 @@ function multipartFormDataParser (input, mimeType) { // 1. Assert: mimeType’s essence is "multipart/form-data". assert(mimeType !== 'failure' && mimeType.essence === 'multipart/form-data') + const boundaryString = mimeType.parameters.get('boundary') + // 2. If mimeType’s parameters["boundary"] does not exist, return failure. // Otherwise, let boundary be the result of UTF-8 decoding mimeType’s // parameters["boundary"]. - if (!mimeType.parameters.has('boundary')) { + if (boundaryString === undefined) { return 'failure' } - const boundary = Buffer.from(`--${mimeType.parameters.get('boundary')}`, 'utf8') + const boundary = Buffer.from(`--${boundaryString}`, 'utf8') // 3. Let entry list be an empty entry list. const entryList = [] @@ -200,7 +214,10 @@ function multipartFormDataParser (input, mimeType) { contentType ??= 'text/plain' // 5.10.2. If contentType is not an ASCII string, set contentType to the empty string. - if (!isAscii(Buffer.from(contentType))) { + + // Note: `buffer.isAscii` can be used at zero-cost, but converting a string to a buffer is a high overhead. + // Content-Type is a relatively small string, so it is faster to use `String#charCodeAt`. + if (!isAsciiString(contentType)) { contentType = '' } @@ -214,8 +231,8 @@ function multipartFormDataParser (input, mimeType) { } // 5.12. Assert: name is a scalar value string and value is either a scalar value string or a File object. - assert(name === webidl.converters.USVString(name)) - assert((typeof value === 'string' && value === webidl.converters.USVString(value)) || isFileLike(value)) + assert(isUSVString(name)) + assert((typeof value === 'string' && isUSVString(value)) || isFileLike(value)) // 5.13. Create an entry with name and value, and append it to entry list. entryList.push(makeEntry(name, value, filename)) @@ -280,7 +297,7 @@ function parseMultipartFormDataHeaders (input, position) { ) // 2.8. Byte-lowercase header name and switch on the result: - switch (new TextDecoder().decode(headerName).toLowerCase()) { + switch (bufferToLowerCasedHeaderName(headerName)) { case 'content-disposition': { // 1. Set name and filename to null. name = filename = null @@ -428,10 +445,9 @@ function parseMultipartFormDataName (input, position) { */ function collectASequenceOfBytes (condition, input, position) { const result = [] - let index = 0 while (position.position < input.length && condition(input[position.position])) { - result[index++] = input[position.position] + result.push(input[position.position]) position.position++ }