Skip to content

Commit

Permalink
Cache bucket content by prefix
Browse files Browse the repository at this point in the history
When a publishing uses a publish prefix, instead of listing the contents
of the whole bucket under the storage prefix, only list the contents of
the bucket under the storage prefix and publish prefix, and cache it by
publish prefix.
This speeds up publish operations under a prefix.
  • Loading branch information
cavedon authored and neolynx committed Jan 20, 2024
1 parent 740dce7 commit 5541ac9
Show file tree
Hide file tree
Showing 9 changed files with 136 additions and 55 deletions.
2 changes: 1 addition & 1 deletion aptly/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ type PublishedStorage interface {
// Remove removes single file under public path
Remove(path string) error
// LinkFromPool links package file from pool to dist's pool location
LinkFromPool(prefix string, path string, fileName string, sourcePool PackagePool, sourcePath string, sourceChecksums utils.ChecksumInfo, force bool) error
LinkFromPool(publishedPrefix, publishedRelPath, fileName string, sourcePool PackagePool, sourcePath string, sourceChecksums utils.ChecksumInfo, force bool) error
// Filelist returns list of files under prefix
Filelist(prefix string) ([]string, error)
// RenameFile renames (moves) file
Expand Down
30 changes: 18 additions & 12 deletions azure/public.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
type PublishedStorage struct {
container azblob.ContainerURL
prefix string
pathCache map[string]string
pathCache map[string]map[string]string
}

// Check interface
Expand Down Expand Up @@ -174,32 +174,38 @@ func (storage *PublishedStorage) Remove(path string) error {

// LinkFromPool links package file from pool to dist's pool location
//
// publishedDirectory is desired location in pool (like prefix/pool/component/liba/libav/)
// publishedPrefix is desired prefix for the location in the pool.
// publishedRelParh is desired location in pool (like pool/component/liba/libav/)
// sourcePool is instance of aptly.PackagePool
// sourcePath is filepath to package file in package pool
//
// LinkFromPool returns relative path for the published file to be included in package index
func (storage *PublishedStorage) LinkFromPool(prefix string, path string, fileName string, sourcePool aptly.PackagePool,
func (storage *PublishedStorage) LinkFromPool(publishedPrefix, publishedRelPath, fileName string, sourcePool aptly.PackagePool,
sourcePath string, sourceChecksums utils.ChecksumInfo, force bool) error {

publishedDirectory := filepath.Join(prefix, path)
relPath := filepath.Join(publishedDirectory, fileName)
poolPath := filepath.Join(storage.prefix, relPath)
relFilePath := filepath.Join(publishedRelPath, fileName)
prefixRelFilePath := filepath.Join(publishedPrefix, relFilePath)
poolPath := filepath.Join(storage.prefix, prefixRelFilePath)

if storage.pathCache == nil {
paths, md5s, err := storage.internalFilelist("")
storage.pathCache = make(map[string]map[string]string)
}
pathCache := storage.pathCache[publishedPrefix]
if pathCache == nil {
paths, md5s, err := storage.internalFilelist(publishedPrefix)
if err != nil {
return fmt.Errorf("error caching paths under prefix: %s", err)
}

storage.pathCache = make(map[string]string, len(paths))
pathCache = make(map[string]string, len(paths))

for i := range paths {
storage.pathCache[paths[i]] = md5s[i]
pathCache[paths[i]] = md5s[i]
}
storage.pathCache[publishedPrefix] = pathCache
}

destinationMD5, exists := storage.pathCache[relPath]
destinationMD5, exists := pathCache[relFilePath]
sourceMD5 := sourceChecksums.MD5

if exists {
Expand All @@ -222,9 +228,9 @@ func (storage *PublishedStorage) LinkFromPool(prefix string, path string, fileNa
}
defer source.Close()

err = storage.putFile(relPath, source, sourceMD5)
err = storage.putFile(prefixRelFilePath, source, sourceMD5)
if err == nil {
storage.pathCache[relPath] = sourceMD5
pathCache[relFilePath] = sourceMD5
} else {
err = errors.Wrap(err, fmt.Sprintf("error uploading %s to %s: %s", sourcePath, storage, poolPath))
}
Expand Down
14 changes: 7 additions & 7 deletions azure/public_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,45 +300,45 @@ func (s *PublishedStorageSuite) TestLinkFromPool(c *C) {
c.Assert(err, IsNil)

// first link from pool
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
err = s.storage.LinkFromPool("", filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
c.Check(err, IsNil)

c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))

// duplicate link from pool
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
c.Check(err, IsNil)

c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))

// link from pool with conflict
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, false)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, false)
c.Check(err, ErrorMatches, ".*file already exists and is different.*")

c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))

// link from pool with conflict and force
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, true)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src2, cksum2, true)
c.Check(err, IsNil)

c.Check(s.GetFile(c, "pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Spam"))

// for prefixed storage:
// first link from pool
err = s.prefixedStorage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
err = s.prefixedStorage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, src1, cksum1, false)
c.Check(err, IsNil)

// 2nd link from pool, providing wrong path for source file
//
// this test should check that file already exists in S3 and skip upload (which would fail if not skipped)
s.prefixedStorage.pathCache = nil
err = s.prefixedStorage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, "wrong-looks-like-pathcache-doesnt-work", cksum1, false)
err = s.prefixedStorage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, "wrong-looks-like-pathcache-doesnt-work", cksum1, false)
c.Check(err, IsNil)

c.Check(s.GetFile(c, "lala/pool/main/m/mars-invaders/mars-invaders_1.03.deb"), DeepEquals, []byte("Contents"))

// link from pool with nested file name
err = s.storage.LinkFromPool("dists/jessie/non-free/installer-i386/current/images", "netboot/boot.img.gz", pool, src3, cksum3, false)
err = s.storage.LinkFromPool("", "dists/jessie/non-free/installer-i386/current/images", "netboot/boot.img.gz", pool, src3, cksum3, false)
c.Check(err, IsNil)

c.Check(s.GetFile(c, "dists/jessie/non-free/installer-i386/current/images/netboot/boot.img.gz"), DeepEquals, []byte("Contents"))
Expand Down
8 changes: 4 additions & 4 deletions files/public.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,17 @@ func (storage *PublishedStorage) RemoveDirs(path string, progress aptly.Progress

// LinkFromPool links package file from pool to dist's pool location
//
// publishedDirectory is desired location in pool (like prefix/pool/component/liba/libav/)
// publishedPrefix is desired prefix for the location in the pool.
// publishedRelParh is desired location in pool (like pool/component/liba/libav/)
// sourcePool is instance of aptly.PackagePool
// sourcePath is a relative path to package file in package pool
//
// LinkFromPool returns relative path for the published file to be included in package index
func (storage *PublishedStorage) LinkFromPool(prefix string, path string, fileName string, sourcePool aptly.PackagePool,
func (storage *PublishedStorage) LinkFromPool(publishedPrefix, publishedRelPath, fileName string, sourcePool aptly.PackagePool,
sourcePath string, sourceChecksums utils.ChecksumInfo, force bool) error {

publishedDirectory := filepath.Join(prefix, path)
baseName := filepath.Base(fileName)
poolPath := filepath.Join(storage.rootPath, publishedDirectory, filepath.Dir(fileName))
poolPath := filepath.Join(storage.rootPath, publishedPrefix, publishedRelPath, filepath.Dir(fileName))

err := os.MkdirAll(poolPath, 0777)
if err != nil {
Expand Down
22 changes: 11 additions & 11 deletions files/public_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func (s *PublishedStorageSuite) TestLinkFromPool(c *C) {
c.Assert(err, IsNil)

// Test using hardlinks
err = s.storage.LinkFromPool(filepath.Join(t.prefix, t.publishedDirectory), t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
err = s.storage.LinkFromPool(t.prefix, t.publishedDirectory, t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
c.Assert(err, IsNil)

st, err := os.Stat(filepath.Join(s.storage.rootPath, t.prefix, t.expectedFilename))
Expand All @@ -243,7 +243,7 @@ func (s *PublishedStorageSuite) TestLinkFromPool(c *C) {
c.Check(int(info.Nlink), Equals, 3)

// Test using symlinks
err = s.storageSymlink.LinkFromPool(filepath.Join(t.prefix, t.publishedDirectory), t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
err = s.storageSymlink.LinkFromPool(t.prefix, t.publishedDirectory, t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
c.Assert(err, IsNil)

st, err = os.Lstat(filepath.Join(s.storageSymlink.rootPath, t.prefix, t.expectedFilename))
Expand All @@ -254,7 +254,7 @@ func (s *PublishedStorageSuite) TestLinkFromPool(c *C) {
c.Check(int(info.Mode&syscall.S_IFMT), Equals, int(syscall.S_IFLNK))

// Test using copy with checksum verification
err = s.storageCopy.LinkFromPool(filepath.Join(t.prefix, t.publishedDirectory), t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
err = s.storageCopy.LinkFromPool(t.prefix, t.publishedDirectory, t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
c.Assert(err, IsNil)

st, err = os.Stat(filepath.Join(s.storageCopy.rootPath, t.prefix, t.expectedFilename))
Expand All @@ -264,7 +264,7 @@ func (s *PublishedStorageSuite) TestLinkFromPool(c *C) {
c.Check(int(info.Nlink), Equals, 1)

// Test using copy with size verification
err = s.storageCopySize.LinkFromPool(filepath.Join(t.prefix, t.publishedDirectory), t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
err = s.storageCopySize.LinkFromPool(t.prefix, t.publishedDirectory, t.sourcePath, pool, srcPoolPath, sourceChecksum, false)
c.Assert(err, IsNil)

st, err = os.Stat(filepath.Join(s.storageCopySize.rootPath, t.prefix, t.expectedFilename))
Expand All @@ -289,37 +289,37 @@ func (s *PublishedStorageSuite) TestLinkFromPool(c *C) {
c.Assert(err, IsNil)
nlinks := int(st.Sys().(*syscall.Stat_t).Nlink)

err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
c.Check(err, ErrorMatches, ".*file already exists and is different")

st, err = pool.Stat(srcPoolPath)
c.Assert(err, IsNil)
c.Check(int(st.Sys().(*syscall.Stat_t).Nlink), Equals, nlinks)

// linking with force
err = s.storage.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, true)
err = s.storage.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, true)
c.Check(err, IsNil)

st, err = pool.Stat(srcPoolPath)
c.Assert(err, IsNil)
c.Check(int(st.Sys().(*syscall.Stat_t).Nlink), Equals, nlinks+1)

// Test using symlinks
err = s.storageSymlink.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
err = s.storageSymlink.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
c.Check(err, ErrorMatches, ".*file already exists and is different")

err = s.storageSymlink.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, true)
err = s.storageSymlink.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, true)
c.Check(err, IsNil)

// Test using copy with checksum verification
err = s.storageCopy.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
err = s.storageCopy.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
c.Check(err, ErrorMatches, ".*file already exists and is different")

err = s.storageCopy.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, true)
err = s.storageCopy.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, true)
c.Check(err, IsNil)

// Test using copy with size verification (this will NOT detect the difference)
err = s.storageCopySize.LinkFromPool(filepath.Join("", "pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
err = s.storageCopySize.LinkFromPool("", filepath.Join("pool", "main", "m/mars-invaders"), "mars-invaders_1.03.deb", pool, srcPoolPath, sourceChecksum, false)
c.Check(err, IsNil)
}

Expand Down
9 changes: 5 additions & 4 deletions s3/public.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,20 +305,21 @@ func (storage *PublishedStorage) RemoveDirs(path string, _ aptly.Progress) error

// LinkFromPool links package file from pool to dist's pool location
//
// publishedDirectory is desired location in pool (like prefix/pool/component/liba/libav/)
// publishedPrefix is desired prefix for the location in the pool.
// publishedRelParh is desired location in pool (like pool/component/liba/libav/)
// sourcePool is instance of aptly.PackagePool
// sourcePath is filepath to package file in package pool
//
// LinkFromPool returns relative path for the published file to be included in package index
func (storage *PublishedStorage) LinkFromPool(prefix string, path string, fileName string, sourcePool aptly.PackagePool,
func (storage *PublishedStorage) LinkFromPool(publishedPrefix, publishedRelPath, fileName string, sourcePool aptly.PackagePool,
sourcePath string, sourceChecksums utils.ChecksumInfo, force bool) error {

publishedDirectory := filepath.Join(prefix, path)
publishedDirectory := filepath.Join(publishedPrefix, publishedRelPath)
relPath := filepath.Join(publishedDirectory, fileName)
poolPath := filepath.Join(storage.prefix, relPath)

if storage.pathCache == nil {
paths, md5s, err := storage.internalFilelist(filepath.Join(storage.prefix, prefix, "pool"), true)
paths, md5s, err := storage.internalFilelist(filepath.Join(storage.prefix, publishedPrefix, "pool"), true)
if err != nil {
return errors.Wrap(err, "error caching paths under prefix")
}
Expand Down
Loading

0 comments on commit 5541ac9

Please sign in to comment.