Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More archive formats #47

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions ar.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package xtractr

import (
"errors"
"fmt"
"io"
"os"
"strings"

"github.com/peterebden/ar"
)

// ExtractAr extracts a raw ar archive. Used by debian (.deb) packages.
func ExtractAr(xFile *XFile) (int64, []string, error) {
arFile, err := os.Open(xFile.FilePath)
if err != nil {
return 0, nil, fmt.Errorf("os.Open: %w", err)
}
defer arFile.Close()

return xFile.unAr(arFile)
}

func (x *XFile) unAr(reader io.Reader) (int64, []string, error) {
arReader := ar.NewReader(reader)
files := []string{}
size := int64(0)

for {
header, err := arReader.Next()

switch {
case errors.Is(err, io.EOF):
return size, files, nil
case err != nil:
return size, files, fmt.Errorf("%s: arReader.Next: %w", x.FilePath, err)
case header == nil:
return size, files, fmt.Errorf("%w: %s", ErrInvalidHead, x.FilePath)
}

wfile := x.clean(header.Name)
if !strings.HasPrefix(wfile, x.OutputDir) {
// The file being written is trying to write outside of our base path. Malicious archive?
return size, files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, wfile, header.Name)
}

// ar format does not store directory paths. Flat list of files.
fSize, err := writeFile(wfile, arReader, os.FileMode(header.Mode), x.DirMode)
if err != nil {
return size, files, err
}

files = append(files, wfile)
size += fSize
}
}
99 changes: 99 additions & 0 deletions cpio.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package xtractr

import (
"compress/gzip"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/cavaliergopher/cpio"
)

// ExtractCPIOGzip extracts a gzip-compressed cpio archive (cpgz).
func ExtractCPIOGzip(xFile *XFile) (int64, []string, error) {
compressedFile, err := os.Open(xFile.FilePath)
if err != nil {
return 0, nil, fmt.Errorf("os.Open: %w", err)
}
defer compressedFile.Close()

zipStream, err := gzip.NewReader(compressedFile)
if err != nil {
return 0, nil, fmt.Errorf("gzip.NewReader: %w", err)
}
defer zipStream.Close()

return xFile.uncpio(zipStream)
}

// ExtractCPIO extracts a .cpio file.
func ExtractCPIO(xFile *XFile) (int64, []string, error) {
fileReader, err := os.Open(xFile.FilePath)
if err != nil {
return 0, nil, fmt.Errorf("os.Open: %w", err)
}
defer fileReader.Close()

return xFile.uncpio(fileReader)
}

func (x *XFile) uncpio(reader io.Reader) (int64, []string, error) {
zipReader := cpio.NewReader(reader)
files := []string{}
size := int64(0)

for {
zipFile, err := zipReader.Next()
if errors.Is(err, io.EOF) {
return size, files, nil
} else if err != nil {
return 0, nil, fmt.Errorf("cpio Next() failed: %w", err)
}

fSize, err := x.uncpioFile(zipFile, zipReader)
if err != nil {
return size, files, fmt.Errorf("%s: %w", x.FilePath, err)
}

files = append(files, filepath.Join(x.OutputDir, zipFile.Name))
size += fSize
}
}

func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (int64, error) {
wfile := x.clean(cpioFile.Name)
if !strings.HasPrefix(wfile, x.OutputDir) {
// The file being written is trying to write outside of the base path. Malicious archive?
return 0, fmt.Errorf("%s: %w: %s (from: %s)", cpioFile.FileInfo().Name(), ErrInvalidPath, wfile, cpioFile.Name)
}

if cpioFile.Mode.IsDir() || cpioFile.FileInfo().IsDir() {
if err := os.MkdirAll(wfile, x.DirMode); err != nil {
return 0, fmt.Errorf("making cpio dir: %w", err)
}

return 0, nil
}

// This turns hard links into symlinks.
if cpioFile.Linkname != "" {
err := os.Symlink(cpioFile.Linkname, wfile)
if err != nil {
return 0, fmt.Errorf("%s symlink: %w: %s (from: %s)", cpioFile.FileInfo().Name(), err, wfile, cpioFile.Name)
}

return 0, nil
}

// This should turn non-regular files into empty files.
// ie. sockets, block, character and fifo devices.
s, err := writeFile(wfile, cpioReader, x.FileMode, x.DirMode)
if err != nil {
return s, fmt.Errorf("%s: %w: %s (from: %s)", cpioFile.FileInfo().Name(), err, wfile, cpioFile.Name)
}

return s, nil
}
49 changes: 49 additions & 0 deletions decompress.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/pierrec/lz4/v4"
lzw "github.com/sshaman1101/dcompress"
"github.com/therootcompany/xz"
"github.com/ulikunitz/xz/lzma"
)

// ExtractXZ extracts an XZ-compressed file. A single file.
Expand Down Expand Up @@ -65,6 +66,54 @@ func ExtractZlib(xFile *XFile) (int64, []string, error) {
return size, []string{wfile}, nil
}

// ExtractLZMA extracts an lzma-compressed file. A single file.
func ExtractLZMA(xFile *XFile) (int64, []string, error) {
compressedFile, err := os.Open(xFile.FilePath)
if err != nil {
return 0, nil, fmt.Errorf("os.Open: %w", err)
}
defer compressedFile.Close()

zipReader, err := lzma.NewReader(compressedFile)
if err != nil {
return 0, nil, fmt.Errorf("lzma.NewReader: %w", err)
}

// Get the absolute path of the file being written.
wfile := xFile.clean(xFile.FilePath, ".lzma", ".lz", ".lzip")

size, err := writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode)
if err != nil {
return size, nil, err
}

return size, []string{wfile}, nil
}

// ExtractLZMA2 extracts an lzma2-compressed file. A single file.
func ExtractLZMA2(xFile *XFile) (int64, []string, error) {
compressedFile, err := os.Open(xFile.FilePath)
if err != nil {
return 0, nil, fmt.Errorf("os.Open: %w", err)
}
defer compressedFile.Close()

zipReader, err := lzma.NewReader2(compressedFile)
if err != nil {
return 0, nil, fmt.Errorf("lzma.NewReader2: %w", err)
}

// Get the absolute path of the file being written.
wfile := xFile.clean(xFile.FilePath, ".lzma", ".lzma2")

size, err := writeFile(wfile, zipReader, xFile.FileMode, xFile.DirMode)
if err != nil {
return size, nil, err
}

return size, []string{wfile}, nil
}

// ExtractZstandard extracts a Zstandard-compressed file. A single file.
func ExtractZstandard(xFile *XFile) (int64, []string, error) {
compressedFile, err := os.Open(xFile.FilePath)
Expand Down
23 changes: 19 additions & 4 deletions files.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 +27,48 @@ type Interface func(*XFile) (int64, []string, []string, error)

// https://github.com/golift/xtractr/issues/44
//
// This list of archive types is used in a few places as extension lists.
//
//nolint:gochecknoglobals
var extension2function = []archive{
{Extension: ".tar.bz2", Extract: ChngInt(ExtractTarBzip)},
{Extension: ".cpio.gz", Extract: ChngInt(ExtractCPIOGzip)},
{Extension: ".tar.gz", Extract: ChngInt(ExtractTarGzip)},
{Extension: ".tar.xz", Extract: ChngInt(ExtractTarXZ)},
{Extension: ".tar.z", Extract: ChngInt(ExtractTarZ)},
// The ones with double extensions that match a single (below) need to come first.
{Extension: ".7z", Extract: Extract7z},
{Extension: ".7z.001", Extract: Extract7z},
{Extension: ".z", Extract: ChngInt(ExtractLZW)}, // everything is lowercase...
{Extension: ".ar", Extract: ChngInt(ExtractAr)},
{Extension: ".br", Extract: ChngInt(ExtractBrotli)},
{Extension: ".brotli", Extract: ChngInt(ExtractBrotli)},
{Extension: ".bz2", Extract: ChngInt(ExtractBzip)},
{Extension: ".cpgz", Extract: ChngInt(ExtractCPIOGzip)},
{Extension: ".cpio", Extract: ChngInt(ExtractCPIO)},
{Extension: ".deb", Extract: ChngInt(ExtractAr)},
{Extension: ".gz", Extract: ChngInt(ExtractGzip)},
{Extension: ".gzip", Extract: ChngInt(ExtractGzip)},
{Extension: ".iso", Extract: ChngInt(ExtractISO)},
{Extension: ".lz4", Extract: ChngInt(ExtractLZ4)},
{Extension: ".lz", Extract: ChngInt(ExtractLZMA)},
{Extension: ".lzip", Extract: ChngInt(ExtractLZMA)},
{Extension: ".lzma", Extract: ChngInt(ExtractLZMA)},
{Extension: ".lzma2", Extract: ChngInt(ExtractLZMA2)},
{Extension: ".r00", Extract: ExtractRAR},
{Extension: ".rar", Extract: ExtractRAR},
{Extension: ".s2", Extract: ChngInt(ExtractS2)},
{Extension: ".rpm", Extract: ChngInt(ExtractRPM)},
{Extension: ".snappy", Extract: ChngInt(ExtractSnappy)},
{Extension: ".sz", Extract: ChngInt(ExtractSnappy)},
{Extension: ".tar", Extract: ChngInt(ExtractTar)},
{Extension: ".tbz", Extract: ChngInt(ExtractTarBzip)},
{Extension: ".tbz2", Extract: ChngInt(ExtractTarBzip)},
{Extension: ".tgz", Extract: ChngInt(ExtractTarGzip)},
{Extension: ".tlz", Extract: ChngInt(ExtractTarLzip)},
{Extension: ".txz", Extract: ChngInt(ExtractTarXZ)},
{Extension: ".tz", Extract: ChngInt(ExtractTarZ)},
{Extension: ".xz", Extract: ChngInt(ExtractXZ)},
{Extension: ".z", Extract: ChngInt(ExtractLZW)}, // everything is lowercase...
{Extension: ".zip", Extract: ChngInt(ExtractZIP)},
{Extension: ".zlib", Extract: ChngInt(ExtractZlib)},
{Extension: ".zst", Extract: ChngInt(ExtractZstandard)},
Expand Down Expand Up @@ -430,19 +443,21 @@ func (x *XFile) clean(filePath string, trim ...string) string {
// Returns a list of supported extensions minus the ones provided.
// Extensions for like-types such as .rar and .r00 need to both be provided.
// Same for .tar.gz and .tgz variants.
func AllExcept(onlyThese []string) Exclude {
func AllExcept(onlyThese ...string) Exclude {
// Start by excluding everything.
output := SupportedExtensions()

// Loop through the extensions we want to keep.
for _, str := range onlyThese {
idx := 0

// Remove each one from the output list.
for _, ext := range output {
if !strings.EqualFold(ext, str) {
output[idx] = ext
idx++
}
}

// Truncate the output to the size of items kept.
output = output[:idx]
}

Expand Down
2 changes: 1 addition & 1 deletion files_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func TestAllExcept(t *testing.T) {
t.Parallel()

includeOnlyThese := []string{".rar", ".zip", ".7z"}
allExcept := xtractr.AllExcept(includeOnlyThese)
allExcept := xtractr.AllExcept(includeOnlyThese...)

assert.Len(t, allExcept, len(xtractr.SupportedExtensions())-len(includeOnlyThese),
"we should have 3 fewer entries that the total supported extensions")
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ go 1.19
require (
github.com/andybalholm/brotli v1.1.0
github.com/bodgit/sevenzip v1.5.1
github.com/cavaliergopher/cpio v1.0.1
github.com/kdomanski/iso9660 v0.4.0
github.com/klauspost/compress v1.17.9
github.com/nwaples/rardecode v1.1.3
github.com/peterebden/ar v0.0.0-20230524111245-4f7c7b065694
github.com/pierrec/lz4/v4 v4.1.21
github.com/sshaman1101/dcompress v0.0.0-20200109162717-50436a6332de
github.com/stretchr/testify v1.9.0
Expand All @@ -17,6 +19,7 @@ require (
require (
github.com/bodgit/plumbing v1.3.0 // indirect
github.com/bodgit/windows v1.0.1 // indirect
github.com/cavaliergopher/rpm v1.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dsnet/compress v0.0.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
Expand All @@ -25,6 +28,7 @@ require (
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/ulikunitz/xz v0.5.12 // indirect
go4.org v0.0.0-20230225012048-214862532bf5 // indirect
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 // indirect
golang.org/x/text v0.16.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ github.com/bodgit/sevenzip v1.5.1 h1:rVj0baZsooZFy64DJN0zQogPzhPrT8BQ8TTRd1H4WHw
github.com/bodgit/sevenzip v1.5.1/go.mod h1:Q3YMySuVWq6pyGEolyIE98828lOfEoeWg5zeH6x22rc=
github.com/bodgit/windows v1.0.1 h1:tF7K6KOluPYygXa3Z2594zxlkbKPAOvqr97etrGNIz4=
github.com/bodgit/windows v1.0.1/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM=
github.com/cavaliergopher/cpio v1.0.1 h1:KQFSeKmZhv0cr+kawA3a0xTQCU4QxXF1vhU7P7av2KM=
github.com/cavaliergopher/cpio v1.0.1/go.mod h1:pBdaqQjnvXxdS/6CvNDwIANIFSP0xRKI16PX4xejRQc=
github.com/cavaliergopher/rpm v1.2.0 h1:s0h+QeVK252QFTolkhGiMeQ1f+tMeIMhGl8B1HUmGUc=
github.com/cavaliergopher/rpm v1.2.0/go.mod h1:R0q3vTqa7RUvPofAZYrnjJ63hh2vngjFfphuXiExVos=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
Expand Down Expand Up @@ -91,6 +95,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/peterebden/ar v0.0.0-20230524111245-4f7c7b065694 h1:pDBk3JWSIjS3gNxwEk1RjGdyZLsyTW4pOHaShBs9FK8=
github.com/peterebden/ar v0.0.0-20230524111245-4f7c7b065694/go.mod h1:hpFkyhCgB5Rm8FK+ISypOE+9UyrCuL6MNcjPMB1s1ec=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand Down Expand Up @@ -125,6 +131,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
Expand Down
Loading
Loading