Skip to content

Commit

Permalink
Merge pull request #1500 from flanksource/feat/recursive-folder-check
Browse files Browse the repository at this point in the history
feat: recursive folder check
  • Loading branch information
moshloop authored Dec 21, 2023
2 parents 1238cb7 + e438bd9 commit 841f039
Show file tree
Hide file tree
Showing 31 changed files with 523 additions and 952 deletions.
15 changes: 9 additions & 6 deletions api/v1/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ type Bucket struct {
}

type S3Check struct {
Description `yaml:",inline" json:",inline"`
connection.AWSConnection `yaml:",inline" json:",inline"`
BucketName string `yaml:"bucketName" json:"bucketName,omitempty"`
Description `yaml:",inline" json:",inline"`
connection.S3Connection `yaml:",inline" json:",inline"`
BucketName string `yaml:"bucketName" json:"bucketName,omitempty"`
}

func (c S3Check) GetEndpoint() string {
Expand Down Expand Up @@ -797,11 +797,14 @@ type FolderCheck struct {
Description `yaml:",inline" json:",inline"`
Templatable `yaml:",inline" json:",inline"`
// Path to folder or object storage, e.g. `s3://<bucket-name>`, `gcs://<bucket-name>`, `/path/tp/folder`
Path string `yaml:"path" json:"path"`
Path string `yaml:"path" json:"path"`
// Recursive when set to true will recursively scan the folder to list the files in it.
// However, symlinks are simply listed but not traversed.
Recursive bool `yaml:"recursive,omitempty" json:"recursive,omitempty"`
Filter FolderFilter `yaml:"filter,omitempty" json:"filter,omitempty"`
FolderTest `yaml:",inline" json:",inline"`
*connection.AWSConnection `yaml:"awsConnection,omitempty" json:"awsConnection,omitempty"`
*connection.GCPConnection `yaml:"gcpConnection,omitempty" json:"gcpConnection,omitempty"`
*connection.S3Connection `yaml:"awsConnection,omitempty" json:"awsConnection,omitempty"`
*connection.GCSConnection `yaml:"gcpConnection,omitempty" json:"gcpConnection,omitempty"`
*connection.SMBConnection `yaml:"smbConnection,omitempty" json:"smbConnection,omitempty"`
*connection.SFTPConnection `yaml:"sftpConnection,omitempty" json:"sftpConnection,omitempty"`
}
Expand Down
3 changes: 2 additions & 1 deletion api/v1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ type FolderFilterContext struct {
FolderFilter
minAge, maxAge *time.Duration
minSize, maxSize *int64
AllowDir bool // Allow directories to support recursive folder checks
Since *time.Time
// kubebuilder:object:generate=false
regex *regexp.Regexp
Expand Down Expand Up @@ -157,7 +158,7 @@ func tryParse(s string) (time.Time, error) {
}

func (f *FolderFilterContext) Filter(i fs.FileInfo) bool {
if i.IsDir() {
if i.IsDir() && !f.AllowDir {
return false
}
if f.maxAge != nil && time.Since(i.ModTime()) > *f.maxAge {
Expand Down
14 changes: 7 additions & 7 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

110 changes: 65 additions & 45 deletions checks/folder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks

import (
"fmt"
"io/fs"
"os"
"strings"

Expand All @@ -12,6 +13,7 @@ import (
"github.com/flanksource/canary-checker/api/external"
v1 "github.com/flanksource/canary-checker/api/v1"
"github.com/flanksource/canary-checker/pkg"
"github.com/flanksource/duty/models"
)

const SizeNotSupported = -1
Expand Down Expand Up @@ -89,7 +91,14 @@ func checkLocalFolder(ctx *context.Context, check v1.FolderCheck) pkg.Results {
result := pkg.Success(check, ctx.Canary)
var results pkg.Results
results = append(results, result)
folders, err := getLocalFolderCheck(check.Path, check.Filter)

// Form a dummy connection to get a local filesystem
localFS, err := artifacts.GetFSForConnection(ctx.Duty(), models.Connection{Type: models.ConnectionTypeFolder})
if err != nil {
return results.ErrorMessage(err)
}

folders, err := genericFolderCheck(localFS, check.Path, check.Recursive, check.Filter)
result.AddDetails(folders)

if err != nil {
Expand All @@ -102,80 +111,91 @@ func checkLocalFolder(ctx *context.Context, check v1.FolderCheck) pkg.Results {
return results
}

func getLocalFolderCheck(path string, filter v1.FolderFilter) (FolderCheck, error) {
func genericFolderCheck(dirFS artifacts.Filesystem, path string, recursive bool, filter v1.FolderFilter) (FolderCheck, error) {
return _genericFolderCheck(true, dirFS, path, recursive, filter)
}

// genericFolderCheckWithoutPrecheck is used for those filesystems that do not support fetching the stat of a directory.
// Eg: s3, gcs.
// It will not pre check whether the given path is a directory.
func genericFolderCheckWithoutPrecheck(dirFS artifacts.Filesystem, path string, recursive bool, filter v1.FolderFilter) (FolderCheck, error) {
return _genericFolderCheck(false, dirFS, path, recursive, filter)
}

func _genericFolderCheck(supportsDirStat bool, dirFS artifacts.Filesystem, path string, recursive bool, filter v1.FolderFilter) (FolderCheck, error) {
result := FolderCheck{}
_filter, err := filter.New()
if err != nil {
return result, err
}
if dir, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
return result, nil
_filter.AllowDir = recursive

var fileInfo os.FileInfo
if supportsDirStat {
fileInfo, err := dirFS.Stat(path)
if err != nil {
if os.IsNotExist(err) {
return result, nil
}
return result, err
} else if !fileInfo.IsDir() {
return result, fmt.Errorf("%s is not a directory", path)
}
return result, err
} else if !dir.IsDir() {
return result, fmt.Errorf("%s is not a directory", path)
}
files, err := os.ReadDir(path)

files, err := getFolderContents(dirFS, path, _filter)
if err != nil {
return result, err
}

if len(files) == 0 {
// directory is empty. returning duration of directory
info, err := os.Stat(path)
if err != nil {
return result, err
if fileInfo == nil {
return FolderCheck{}, nil
}

// directory is empty. returning duration of directory
return FolderCheck{
Oldest: newFile(info),
Newest: newFile(info),
Oldest: newFile(fileInfo),
Newest: newFile(fileInfo),
AvailableSize: SizeNotSupported,
TotalSize: SizeNotSupported}, nil
}

for _, file := range files {
info, err := file.Info()
if err != nil {
return result, err
}
if file.IsDir() || !_filter.Filter(info) {
continue
}

result.Append(info)
result.Append(file)
}

return result, err
}

func getGenericFolderCheck(fs artifacts.Filesystem, dir string, filter v1.FolderFilter) (*FolderCheck, error) {
result := FolderCheck{}
_filter, err := filter.New()
if err != nil {
return nil, err
}
files, err := fs.ReadDir(dir)
// getFolderContents walks the folder and returns all files.
// Also supports recursively fetching contents
func getFolderContents(dirFs artifacts.Filesystem, path string, filter *v1.FolderFilterContext) ([]fs.FileInfo, error) {
files, err := dirFs.ReadDir(path)
if err != nil {
return nil, err
}

if len(files) == 0 {
// directory is empty. returning duration of directory
info, err := fs.Stat(dir)
if err != nil {
return nil, err
}
return &FolderCheck{
Oldest: newFile(info),
Newest: newFile(info),
AvailableSize: SizeNotSupported,
TotalSize: SizeNotSupported}, nil
return nil, nil
}

for _, file := range files {
if file.IsDir() || !_filter.Filter(file) {
var result []fs.FileInfo
for _, info := range files {
if !filter.Filter(info) {
continue
}

result.Append(file)
result = append(result, info)
if info.IsDir() { // This excludes even directory symlinks
subFiles, err := getFolderContents(dirFs, path+"/"+info.Name(), filter)
if err != nil {
return nil, err
}

result = append(result, subFiles...)
}
}
return &result, nil

return result, err
}
1 change: 1 addition & 0 deletions checks/folder_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func newFile(file os.FileInfo) *File {
IsDir: file.IsDir(),
}
}

func (f *FolderCheck) Append(osFile os.FileInfo) {
file := newFile(osFile)
if f.Oldest == nil || f.Oldest.Modified.After(osFile.ModTime()) {
Expand Down
80 changes: 37 additions & 43 deletions checks/folder_gcs.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package checks

import (
"errors"
"strings"

gcs "cloud.google.com/go/storage"
"github.com/flanksource/artifacts/clients/gcp"
"github.com/flanksource/artifacts"
"github.com/flanksource/canary-checker/api/context"
v1 "github.com/flanksource/canary-checker/api/v1"
"github.com/flanksource/canary-checker/pkg"
"github.com/flanksource/duty/models"
)

type GCS struct {
Expand All @@ -20,62 +22,54 @@ func CheckGCSBucket(ctx *context.Context, check v1.FolderCheck) pkg.Results {
var results pkg.Results
results = append(results, result)

if err := check.GCPConnection.HydrateConnection(ctx); err != nil {
return results.Failf("failed to populate GCP connection: %v", err)
if check.GCSConnection == nil {
return results.ErrorMessage(errors.New("missing GCS connection"))
}

cfg, err := gcp.NewSession(ctx.Duty(), check.GCPConnection)
var bucket string
bucket, check.Path = parseGCSPath(check.Path)

connection, err := ctx.HydrateConnectionByURL(check.GCPConnection.ConnectionName)
if err != nil {
return results.ErrorMessage(err)
return results.Failf("failed to populate GCS connection: %v", err)
} else if connection == nil {
connection = &models.Connection{Type: models.ConnectionTypeGCS}
if check.GCSConnection.Bucket == "" {
check.GCSConnection.Bucket = bucket
}

connection, err = connection.Merge(ctx, check.GCSConnection)
if err != nil {
return results.Failf("failed to populate GCS connection: %v", err)
}
}
client := GCS{
BucketName: getGCSBucketName(check.Path),
Client: cfg,

fs, err := artifacts.GetFSForConnection(ctx.Duty(), *connection)
if err != nil {
return results.ErrorMessage(err)
}
folders, err := client.CheckFolder(ctx, check.Filter)

folders, err := genericFolderCheckWithoutPrecheck(fs, check.Path, check.Recursive, check.Filter)
if err != nil {
return results.ErrorMessage(err)
}
result.AddDetails(folders)

if test := folders.Test(check.FolderTest); test != "" {
results.Failf(test)
return results.Failf(test)
}

return results
}

func (conn *GCS) CheckFolder(ctx *context.Context, filter v1.FolderFilter) (*FolderCheck, error) {
result := FolderCheck{}
bucket := conn.Bucket(conn.BucketName)
objs := bucket.Objects(ctx, nil)
_filter, err := filter.New()
if err != nil {
return nil, err
}
obj, err := objs.Next()
// empty bucket
if obj == nil {
return &result, nil
}
if err != nil {
return nil, nil
// parseGCSPath returns the bucket name and the actual path stripping of the gcs:// prefix and the bucket name.
// The path is expected to be in the format "gcs://bucket_name/<actual_path>"
func parseGCSPath(fullpath string) (bucket, path string) {
trimmed := strings.TrimPrefix(fullpath, "gcs://")
splits := strings.SplitN(trimmed, "/", 2)
if len(splits) != 2 {
return splits[0], ""
}
for {
file := gcp.GCSFileInfo{Object: obj}
if file.IsDir() || !_filter.Filter(file) {
continue
}

result.Append(file)
obj, err = objs.Next()
if obj == nil {
return &result, nil
}
if err != nil {
return nil, err
}
}
}

func getGCSBucketName(bucket string) string {
return strings.TrimPrefix(bucket, "gcs://")
return splits[0], splits[1]
}
Loading

0 comments on commit 841f039

Please sign in to comment.