From bb21c9dc14b52ff275e5c4ab3e2c3c7de9dd63f8 Mon Sep 17 00:00:00 2001 From: Blaine Bublitz Date: Sun, 7 Apr 2024 21:29:20 -0700 Subject: [PATCH] fix: Avoid blowing the call stack when processing many files (#133) --- .gitignore | 3 ++ index.js | 97 ++++++++++++++++++++++----------------------------- package.json | 1 - test/index.js | 45 ++++++++++++++++-------- 4 files changed, 75 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index 58a757a..bcaf63c 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,6 @@ typings/ # Test results test.xunit + +# Massive, generated directory +test/fixtures/too-many/ diff --git a/index.js b/index.js index 426e8cd..977b284 100644 --- a/index.js +++ b/index.js @@ -12,7 +12,6 @@ var globParent = require('glob-parent'); var normalizePath = require('normalize-path'); var isNegatedGlob = require('is-negated-glob'); var toAbsoluteGlob = require('@gulpjs/to-absolute-glob'); -var mapSeries = require('now-and-later').mapSeries; var globErrMessage1 = 'File not found with singular glob: '; var globErrMessage2 = ' (if this was purposeful, use `allowEmpty` option)'; @@ -24,21 +23,6 @@ function isFound(glob) { return isGlob(glob); } -function getSymlinkInfo(filepath, cb) { - fs.realpath(filepath, function (err, realPath) { - if (err) return cb(err); - - fs.lstat(realPath, function (err, lstat) { - if (err) return cb(err); - - cb(null, { - destinationPath: realPath, - destinationStat: lstat, - }); - }); - }); -} - function walkdir() { var readdirOpts = { withFileTypes: true, @@ -46,7 +30,7 @@ function walkdir() { var ee = new EventEmitter(); - var queue = fastq(process, 1); + var queue = fastq(onAction, 1); queue.drain = function () { ee.emit('end'); }; @@ -69,10 +53,7 @@ function walkdir() { }; ee.walk = walk; ee.exists = exists; - - function isDefined(value) { - return typeof value !== 'undefined'; - } + ee.resolve = resolve; function walk(path) { queue.push({ action: 'walk', path: path }); @@ -82,11 +63,41 @@ function walkdir() { queue.push({ action: 'exists', path: path }); } - function process(data, cb) { + function resolve(path) { + queue.push({ action: 'resolve', path: path }); + } + + function resolveSymlink(symlinkPath, cb) { + fs.realpath(symlinkPath, function (err, realpath) { + if (err) { + return cb(err); + } + + fs.lstat(realpath, function (err, stat) { + if (err) { + return cb(err); + } + + if (stat.isDirectory() && !symlinkPath.startsWith(realpath + path.sep)) { + walk(symlinkPath); + } + + cb(); + }) + }); + } + + function onAction(data, cb) { if (data.action === 'walk') { - fs.readdir(data.path, readdirOpts, onReaddir); - } else { - fs.stat(data.path, onStat); + return fs.readdir(data.path, readdirOpts, onReaddir); + } + + if (data.action === 'exists') { + return fs.stat(data.path, onStat); + } + + if (data.action === 'resolve') { + return resolveSymlink(data.path, cb); } function onStat(err, stat) { @@ -106,48 +117,22 @@ function walkdir() { return cb(err); } - mapSeries(dirents, processDirents, function (err, dirs) { - if (err) { - return cb(err); - } + dirents.forEach(processDirent); - dirs.filter(isDefined).forEach(walk); - - cb(); - }); + cb(); } - function processDirents(dirent, key, cb) { + function processDirent(dirent) { var nextpath = path.join(data.path, dirent.name); ee.emit('path', nextpath, dirent); if (dirent.isDirectory()) { - cb(null, nextpath); - - return; + return walk(nextpath); } if (dirent.isSymbolicLink()) { - // If it's a symlink, check if the symlink points to a directory - getSymlinkInfo(nextpath, function (err, info) { - if (err) { - return cb(err); - } - - if ( - info.destinationStat.isDirectory() && - !nextpath.startsWith(info.destinationPath + path.sep) // don't follow circular symlinks - ) { - cb(null, nextpath); - } else { - cb(); - } - }); - - return; + return resolve(nextpath); } - - cb(); } } diff --git a/package.json b/package.json index 1d28c6f..9dce4a1 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,6 @@ "is-glob": "^4.0.3", "is-negated-glob": "^1.0.0", "normalize-path": "^3.0.0", - "now-and-later": "^3.0.0", "streamx": "^2.12.5" }, "devDependencies": { diff --git a/test/index.js b/test/index.js index 5325a0e..4943370 100644 --- a/test/index.js +++ b/test/index.js @@ -370,9 +370,37 @@ function suite(moduleName) { stream.pipeline([globStream(globs, { cwd: dir }), concat(assert)], done); }); + // By default, we only run this in non-Windows CI since it takes so long + it('does not stack overflow if there are an insane amount of files', function (done) { + if (process.env.CI !== "true" || os.platform() === 'win32') { + this.skip(); + } + + this.timeout(0); + var largeDir = path.join(dir, 'fixtures/too-many'); + fs.mkdirSync(largeDir, { recursive: true }); + + for (var i = 0; i < 100000; i++) { + fs.writeFileSync(path.join(largeDir, 'file-' + i + '.txt'), "test-" + i) + } + + function assert(pathObjs) { + for (var i = 0; i < 100000; i++) { + fs.unlinkSync(path.join(largeDir, 'file-' + i + '.txt')) + } + fs.rmdirSync(largeDir); + + expect(pathObjs.length).toEqual(100000); + } + + var glob = deWindows(largeDir) + '/*.txt'; + + stream.pipeline([globStream(glob), concat(assert)], done); + }); + it('emits all objects (unordered) when given multiple absolute paths and no cwd', function (done) { - var testFile = path.join(os.tmpdir(), "glob-stream-test.txt"); - fs.writeFileSync(testFile, "test"); + var testFile = path.join(os.tmpdir(), 'glob-stream-test.txt'); + fs.writeFileSync(testFile, 'test'); var tmp = deWindows(os.tmpdir()); @@ -408,7 +436,7 @@ function suite(moduleName) { ]; function assert(pathObjs) { - fs.unlinkSync(testFile, "test"); + fs.unlinkSync(testFile, 'test'); expect(pathObjs.length).toEqual(4); expect(pathObjs).toContainEqual(expected[0]); expect(pathObjs).toContainEqual(expected[1]); @@ -818,7 +846,6 @@ function suite(moduleName) { dir + '/fixtures/symlinks/symlink-dest/hey/isaidhey/whatsgoingon/test.txt', }, - { cwd: dir, base: dir + '/fixtures/symlinks', @@ -829,7 +856,6 @@ function suite(moduleName) { base: dir + '/fixtures/symlinks', path: dir + '/fixtures/symlinks/folder-b/folder-b-file.txt', }, - // It should follow these circular symlinks, but not infinitely { cwd: dir, @@ -841,15 +867,6 @@ function suite(moduleName) { base: dir + '/fixtures/symlinks', path: dir + '/fixtures/symlinks/folder-b/link-to-a/folder-a-file.txt', }, - - // And it should follow a symlink to a parent directory (circular symlink) without blowing up - { - cwd: dir, - base: dir + '/fixtures/symlinks', - path: - dir + - '/fixtures/symlinks/symlink-dest/hey/isaidhey/whatsgoingon/test.txt', - }, ]; function assert(pathObjs) {