From e601c40e6e9267bf44891b5645c8a5c69787f7a0 Mon Sep 17 00:00:00 2001 From: SkobelkinYaroslav <120472314+SkobelkinYaroslav@users.noreply.github.com> Date: Sat, 3 Aug 2024 04:04:05 +0200 Subject: [PATCH] small optimization for fsfinder (#136) * changed []string to map[string]struct{} in FileSystemFinder struct fields because of optimization (arrays were used only to check the occurrence of an element) * Changed field Extensions type in FileType from []string to map[string]struct{} * added requested changes * small refactoring of findOne func * fixd the return value * get back to trimprefix func * cleanup * fsfinder optimization * i hope it fixed * removed unnecessary calls to filepath.Abs * Apply suggestions from code review --------- Co-authored-by: Yaroslav <=> Co-authored-by: Clayton Kehoe <118750525+kehoecj@users.noreply.github.com> --- pkg/finder/finder_test.go | 14 ++++++++-- pkg/finder/fsfinder.go | 56 +++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/pkg/finder/finder_test.go b/pkg/finder/finder_test.go index 4b5b8242..18ae27d5 100644 --- a/pkg/finder/finder_test.go +++ b/pkg/finder/finder_test.go @@ -166,9 +166,7 @@ func Test_FileSystemFinderMultipleFinder(t *testing.T) { func Test_FileSystemFinderDuplicateFiles(t *testing.T) { fsFinder := FileSystemFinderInit( WithPathRoots( - "../../test/fixtures/subdir/good.json", "../../test/fixtures/subdir/", - "../../test/fixtures/subdir/../subdir/good.json", ), ) @@ -250,3 +248,15 @@ func Test_FileFinderBadPath(t *testing.T) { t.Errorf("Error should be thrown for bad path") } } + +func Benchmark_Finder(b *testing.B) { + fsFinder := FileSystemFinderInit( + WithPathRoots("../../test/fixtures/"), + ) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, _ = fsFinder.Find() + } +} diff --git a/pkg/finder/fsfinder.go b/pkg/finder/fsfinder.go index 01a81f37..08bb94a3 100644 --- a/pkg/finder/fsfinder.go +++ b/pkg/finder/fsfinder.go @@ -57,12 +57,14 @@ func WithDepth(depthVal int) FSFinderOptions { func FileSystemFinderInit(opts ...FSFinderOptions) *FileSystemFinder { defaultExcludeDirs := make(map[string]struct{}) + defaultExcludeFileTypes := make(map[string]struct{}) defaultPathRoots := []string{"."} fsfinder := &FileSystemFinder{ - PathRoots: defaultPathRoots, - FileTypes: filetype.FileTypes, - ExcludeDirs: defaultExcludeDirs, + PathRoots: defaultPathRoots, + FileTypes: filetype.FileTypes, + ExcludeDirs: defaultExcludeDirs, + ExcludeFileTypes: defaultExcludeFileTypes, } for _, opt := range opts { @@ -79,21 +81,11 @@ func (fsf FileSystemFinder) Find() ([]FileMetadata, error) { seen := make(map[string]struct{}, 0) uniqueMatches := make([]FileMetadata, 0) for _, pathRoot := range fsf.PathRoots { - matches, err := fsf.findOne(pathRoot) + matches, err := fsf.findOne(pathRoot, seen) if err != nil { return nil, err } - for _, match := range matches { - absPath, err := filepath.Abs(match.Path) - if err != nil { - return nil, err - } - if _, ok := seen[absPath]; ok { - continue - } - uniqueMatches = append(uniqueMatches, match) - seen[absPath] = struct{}{} - } + uniqueMatches = append(uniqueMatches, matches...) } return uniqueMatches, nil } @@ -101,7 +93,7 @@ func (fsf FileSystemFinder) Find() ([]FileMetadata, error) { // findOne recursively walks through all subdirectories (excluding the excluded subdirectories) // and identifying if the file matches a type defined in the fileTypes array for a // single path and returns the file metadata. -func (fsf FileSystemFinder) findOne(pathRoot string) ([]FileMetadata, error) { +func (fsf FileSystemFinder) findOne(pathRoot string, seenMap map[string]struct{}) ([]FileMetadata, error) { var matchingFiles []FileMetadata // check that the path exists before walking it or the error returned @@ -124,28 +116,40 @@ func (fsf FileSystemFinder) findOne(pathRoot string) ([]FileMetadata, error) { } // determine if directory is in the excludeDirs list or if the depth is greater than the maxDepth - _, isExcluded := fsf.ExcludeDirs[dirEntry.Name()] - if dirEntry.IsDir() && ((fsf.Depth != nil && strings.Count(path, string(os.PathSeparator)) > maxDepth) || isExcluded) { - return filepath.SkipDir + if dirEntry.IsDir() { + _, isExcluded := fsf.ExcludeDirs[dirEntry.Name()] + if isExcluded || (fsf.Depth != nil && strings.Count(path, string(os.PathSeparator)) > maxDepth) { + return filepath.SkipDir + } } if !dirEntry.IsDir() { // filepath.Ext() returns the extension name with a dot so it // needs to be removed. - walkFileExtension := strings.TrimPrefix(filepath.Ext(path), ".") + extensionLowerCase := strings.ToLower(walkFileExtension) - if _, ok := fsf.ExcludeFileTypes[walkFileExtension]; ok { + if _, isExcluded := fsf.ExcludeFileTypes[extensionLowerCase]; isExcluded { return nil } - extensionLowerCase := strings.ToLower(walkFileExtension) + for _, fileType := range fsf.FileTypes { - if _, ok := fileType.Extensions[extensionLowerCase]; ok { - fileMetadata := FileMetadata{dirEntry.Name(), path, fileType} - matchingFiles = append(matchingFiles, fileMetadata) - break + if _, isMatched := fileType.Extensions[extensionLowerCase]; isMatched { + absPath, err := filepath.Abs(path) + if err != nil { + return err + } + + if _, seen := seenMap[absPath]; !seen { + fileMetadata := FileMetadata{dirEntry.Name(), absPath, fileType} + matchingFiles = append(matchingFiles, fileMetadata) + seenMap[absPath] = struct{}{} + } + + return nil } } + fsf.ExcludeFileTypes[extensionLowerCase] = struct{}{} } return nil