Skip to content

Commit

Permalink
perf: improve parsing form-data (nodejs#2944)
Browse files Browse the repository at this point in the history
* perf: improve parsing form-data

* apply suggestions from code review

* apply suggestions from code review
  • Loading branch information
tsctx authored and KhafraDev committed Mar 14, 2024
1 parent 318f3d3 commit 728aad8
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
12 changes: 11 additions & 1 deletion lib/core/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,8 @@ function addAbortListener (signal, listener) {
return () => signal.removeListener('abort', listener)
}

const hasToWellFormed = !!String.prototype.toWellFormed
const hasToWellFormed = typeof String.prototype.toWellFormed === 'function'
const hasIsWellFormed = typeof String.prototype.isWellFormed === 'function'

/**
* @param {string} val
Expand All @@ -449,6 +450,14 @@ function toUSVString (val) {
return hasToWellFormed ? `${val}`.toWellFormed() : nodeUtil.toUSVString(val)
}

/**
* @param {string} val
*/
// TODO: move this to webidl
function isUSVString (val) {
return hasIsWellFormed ? `${val}`.isWellFormed() : toUSVString(val) === `${val}`
}

/**
* @see https://tools.ietf.org/html/rfc7230#section-3.2.6
* @param {number} c
Expand Down Expand Up @@ -538,6 +547,7 @@ module.exports = {
isErrored,
isReadable,
toUSVString,
isUSVString,
isReadableAborted,
isBlobLike,
parseOrigin,
Expand Down
42 changes: 29 additions & 13 deletions lib/web/fetch/formdata-parser.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
'use strict'

const { webidl } = require('./webidl')
const { toUSVString, isUSVString, bufferToLowerCasedHeaderName } = require('../../core/util')
const { utf8DecodeBytes } = require('./util')
const { HTTP_TOKEN_CODEPOINTS, isomorphicDecode } = require('./data-url')
const { isFileLike, File: UndiciFile } = require('./file')
const { makeEntry } = require('./formdata')
const assert = require('node:assert')
const { isAscii, File: NodeFile } = require('node:buffer')
const { File: NodeFile } = require('node:buffer')

const File = globalThis.File ?? NodeFile ?? UndiciFile

Expand All @@ -15,6 +15,18 @@ const filenameBuffer = Buffer.from('; filename')
const dd = Buffer.from('--')
const ddcrlf = Buffer.from('--\r\n')

/**
* @param {string} chars
*/
function isAsciiString (chars) {
for (let i = 0; i < chars.length; ++i) {
if ((chars.charCodeAt(i) & ~0x7F) !== 0) {
return false
}
}
return true
}

/**
* @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary
* @param {string} boundary
Expand All @@ -30,7 +42,7 @@ function validateBoundary (boundary) {
// - it is composed by bytes in the ranges 0x30 to 0x39, 0x41 to 0x5A, or
// 0x61 to 0x7A, inclusive (ASCII alphanumeric), or which are 0x27 ('),
// 0x2D (-) or 0x5F (_).
for (let i = 0; i < boundary.length; i++) {
for (let i = 0; i < length; ++i) {
const cp = boundary.charCodeAt(i)

if (!(
Expand Down Expand Up @@ -58,12 +70,12 @@ function escapeFormDataName (name, encoding = 'utf-8', isFilename = false) {
// 1. If isFilename is true:
if (isFilename) {
// 1.1. Set name to the result of converting name into a scalar value string.
name = webidl.converters.USVString(name)
name = toUSVString(name)
} else {
// 2. Otherwise:

// 2.1. Assert: name is a scalar value string.
assert(name === webidl.converters.USVString(name))
assert(isUSVString(name))

// 2.2. Replace every occurrence of U+000D (CR) not followed by U+000A (LF),
// and every occurrence of U+000A (LF) not preceded by U+000D (CR), in
Expand Down Expand Up @@ -94,14 +106,16 @@ function multipartFormDataParser (input, mimeType) {
// 1. Assert: mimeType’s essence is "multipart/form-data".
assert(mimeType !== 'failure' && mimeType.essence === 'multipart/form-data')

const boundaryString = mimeType.parameters.get('boundary')

// 2. If mimeType’s parameters["boundary"] does not exist, return failure.
// Otherwise, let boundary be the result of UTF-8 decoding mimeType’s
// parameters["boundary"].
if (!mimeType.parameters.has('boundary')) {
if (boundaryString === undefined) {
return 'failure'
}

const boundary = Buffer.from(`--${mimeType.parameters.get('boundary')}`, 'utf8')
const boundary = Buffer.from(`--${boundaryString}`, 'utf8')

// 3. Let entry list be an empty entry list.
const entryList = []
Expand Down Expand Up @@ -200,7 +214,10 @@ function multipartFormDataParser (input, mimeType) {
contentType ??= 'text/plain'

// 5.10.2. If contentType is not an ASCII string, set contentType to the empty string.
if (!isAscii(Buffer.from(contentType))) {

// Note: `buffer.isAscii` can be used at zero-cost, but converting a string to a buffer is a high overhead.
// Content-Type is a relatively small string, so it is faster to use `String#charCodeAt`.
if (!isAsciiString(contentType)) {
contentType = ''
}

Expand All @@ -214,8 +231,8 @@ function multipartFormDataParser (input, mimeType) {
}

// 5.12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
assert(name === webidl.converters.USVString(name))
assert((typeof value === 'string' && value === webidl.converters.USVString(value)) || isFileLike(value))
assert(isUSVString(name))
assert((typeof value === 'string' && isUSVString(value)) || isFileLike(value))

// 5.13. Create an entry with name and value, and append it to entry list.
entryList.push(makeEntry(name, value, filename))
Expand Down Expand Up @@ -280,7 +297,7 @@ function parseMultipartFormDataHeaders (input, position) {
)

// 2.8. Byte-lowercase header name and switch on the result:
switch (new TextDecoder().decode(headerName).toLowerCase()) {
switch (bufferToLowerCasedHeaderName(headerName)) {
case 'content-disposition': {
// 1. Set name and filename to null.
name = filename = null
Expand Down Expand Up @@ -428,10 +445,9 @@ function parseMultipartFormDataName (input, position) {
*/
function collectASequenceOfBytes (condition, input, position) {
const result = []
let index = 0

while (position.position < input.length && condition(input[position.position])) {
result[index++] = input[position.position]
result.push(input[position.position])

position.position++
}
Expand Down

0 comments on commit 728aad8

Please sign in to comment.