Added ZIP64 Support to zip-stream.js #259

magnus-rattlehead · 2021-12-14T22:56:27Z

Feel free to test. I used the Sintel movie and a 10GB file created by fallocate. Testing shows that the zip works. For some reason on my zip archiver (Ark from KDE), it complains about a corrupted zip, but is able to read and extract successfully. zip -T archive.zip confirms that it is valid. Feel free to test it.

… for testing

magnus-rattlehead · 2021-12-20T17:22:55Z

Nevermind. archiving software doesn't complain about corrupted zips.

FallingHazard · 2021-12-20T23:10:00Z

It would be awesome if this was built intro a transform stream like in: https://github.com/transcend-io/conflux/blob/master/src/write.js

PidgeyBE · 2024-04-18T10:31:01Z

FYI, the solution here gave me corrupt zip files.
I asked ChatGPT to rewrite the original one and did a bit of cleanup to end up on a working file...
Tested with 10.000 files which result in a total zip file of 9GB.

zip-stream64.js


class Crc32 {
    constructor() {
        this.crc = -1
    }

    append(data) {
        var crc = this.crc | 0;
        var table = this.table
        for (var offset = 0, len = data.length | 0; offset < len; offset++) {
            crc = (crc >>> 8) ^ table[(crc ^ data[offset]) & 0xFF]
        }
        this.crc = crc
    }

    get() {
        return ~this.crc
    }
}

Crc32.prototype.table = (() => {
    var i;
    var j;
    var t;
    var table = []
    for (i = 0; i < 256; i++) {
        t = i
        for (j = 0; j < 8; j++) {
            t = (t & 1) ?
                (t >>> 1) ^ 0xEDB88320 :
                t >>> 1
        }
        table[i] = t
    }
    return table
})()

const getDataHelper = byteLength => {
    var uint8 = new Uint8Array(byteLength)
    return {
        array: uint8,
        view: new DataView(uint8.buffer)
    }
}

const pump = zipObj => zipObj.reader.read().then(chunk => {
    if (chunk.done) return zipObj.writeFooter()
    const outputData = chunk.value
    zipObj.crc.append(outputData)
    zipObj.uncompressedLength += outputData.length
    zipObj.compressedLength += outputData.length
    zipObj.ctrl.enqueue(outputData)
})

/**
 * [createWriter description]
 * @param  {Object} underlyingSource [description]
 * @return {Boolean}                  [description]
 */
function createWriter(underlyingSource) {
    const files = Object.create(null)
    const filenames = []
    const encoder = new TextEncoder()
    let offset = 0
    let activeZipIndex = 0
    let ctrl
    let activeZipObject, closed

    function next() {
        activeZipIndex++
        activeZipObject = files[filenames[activeZipIndex]]
        if (activeZipObject) processNextChunk()
        else if (closed) closeZip()
    }

    var zipWriter = {
        enqueue(fileLike) {
            if (closed) throw new TypeError('Cannot enqueue a chunk into a readable stream that is closed or has been requested to be closed')

            let name = fileLike.name.trim()
            const date = new Date(typeof fileLike.lastModified === 'undefined' ? Date.now() : fileLike.lastModified)

            if (fileLike.directory && !name.endsWith('/')) name += '/'
            if (files[name]) throw new Error('File already exists.')


            const nameBuf = encoder.encode(name)
            filenames.push(name)


            const zipObject = files[name] = {
                level: 0,
                ctrl,
                directory: !!fileLike.directory,
                nameBuf,
                comment: encoder.encode(fileLike.comment || ''),
                compressedLength: 0,
                uncompressedLength: 0,
                extraArray: null,

                writeHeader() {
                    var header = getDataHelper(26)
                    var data = getDataHelper(30 + nameBuf.length)

                    zipObject.header = header
                    zipObject.offset = offset
                    if (zipObject.level !== 0 && !zipObject.directory) {
                        header.view.setUint16(4, 0x0800)
                    }
                    header.view.setUint32(0, 0x14000808)

                    header.view.setUint16(0, 45, true)

                    header.view.setUint16(6, (((date.getHours() << 6) | date.getMinutes()) << 5) | date.getSeconds() / 2, true)
                    header.view.setUint16(8, ((((date.getFullYear() - 1980) << 4) | (date.getMonth() + 1)) << 5) | date.getDate(), true)
                    header.view.setUint16(22, nameBuf.length, true)
                    data.view.setUint32(0, 0x504b0304)
                    data.array.set(header.array, 4)
                    data.array.set(nameBuf, 30)
                    offset += data.array.length
                    ctrl.enqueue(data.array)
                },

                writeFooter() {
                    zipObject.header.view.setUint16(0, 45)

                    var footer = getDataHelper(24)
                    footer.view.setUint32(0, 0x504b0708)

                    if (zipObject.crc) {
                        zipObject.header.view.setUint32(10, zipObject.crc.get(), true)
                        footer.view.setUint32(4, zipObject.crc.get(), true)
                    }

                    let zip64Extra = getDataHelper(28)
                    zipObject.header.view.setUint32(14, 0xffffffff, true)
                    zipObject.header.view.setUint32(18, 0xffffffff, true)
                    footer.view.setBigUint64(8, BigInt(zipObject.compressedLength), true)
                    footer.view.setBigInt64(16, BigInt(zipObject.uncompressedLength), true)
                    zip64Extra.view.setUint16(0, 0x0001, true)
                    zip64Extra.view.setUint16(2, 24, true)
                    zip64Extra.view.setBigUint64(4, BigInt(zipObject.uncompressedLength), true)
                    zip64Extra.view.setBigUint64(12, BigInt(zipObject.compressedLength), true)
                    zip64Extra.view.setBigUint64(20, BigInt(files[name].offset), true)
                    zipObject.extraArray = zip64Extra.array


                    ctrl.enqueue(footer.array)
                    offset += zipObject.compressedLength + footer.array.length
                    next()
                },
                fileLike
            }

            if (!activeZipObject) {
                activeZipObject = zipObject
                processNextChunk()
            }
        },
        close() {
            if (closed) throw new TypeError('Cannot close a readable stream that has already been requested to be closed')
            if (!activeZipObject) closeZip()
            closed = true
        }
    }

    function closeZip() {
        var length = 0
        var index = 0
        var indexFilename, file, cdOffset, totalEntries = filenames.length
        for (indexFilename = 0; indexFilename < totalEntries; indexFilename++) {
            file = files[filenames[indexFilename]]
            length += 46 + file.nameBuf.length + file.comment.length
            if (file.extraArray) {
                length += file.extraArray.length
            }
        }
        cdOffset = offset

        const data = getDataHelper(length + 56 + 20 + 22)
        for (indexFilename = 0; indexFilename < totalEntries; indexFilename++) {
            file = files[filenames[indexFilename]]
            data.view.setUint32(index, 0x504b0102)
            data.view.setUint16(index + 4, 0x1400)
            data.array.set(file.header.array, index + 6)
            if (file.extraArray) {
                data.view.setUint16(index + 30, file.extraArray.length, true)
            }
            data.view.setUint16(index + 32, file.comment.length, true)
            if (file.directory) {
                data.view.setUint8(index + 38, 0x10)
            }
            if (file.offset >= 0xffffffff)
                data.view.setUint32(index + 42, 0xffffffff, true)
            else
                data.view.setUint32(index + 42, file.offset, true)

            data.array.set(file.nameBuf, index + 46)
            var extraLength = 0
            if (file.extraArray) {
                extraLength = file.extraArray.length
                data.array.set(file.extraArray, index + 46 + file.nameBuf.length)
            }
            data.array.set(file.comment, index + 46 + file.nameBuf.length + extraLength)
            index += 46 + file.nameBuf.length + file.comment.length + extraLength
        }

        // Zip64 End of Central Directory record
        // 0: Signature
        data.view.setUint32(index, 0x504b0606);
        // 4: Size of zip64 EOCD
        data.view.setBigUint64(index + 4, BigInt(44), true);
        // 12: Version made By
        data.view.setUint16(index + 12, 45, true);
        // 14: version needed to extract
        data.view.setUint16(index + 14, 45, true);
        // 16: number of this disk
        // 20: number of the disk with the start of CD
        // 24: total number of entries in the central directory on this disk
        data.view.setBigUint64(index + 24, BigInt(totalEntries), true);
        // 32: total number of entries in the central directory
        data.view.setBigUint64(index + 32, BigInt(totalEntries), true);
        // 40: size of the central directory
        data.view.setBigUint64(index + 40, BigInt(length), true);
        // 48: Offset of start of central directory
        data.view.setBigUint64(index + 48, BigInt(cdOffset), true);
        index += 56

        // Zip64 End of Central Directory locator
        // 0: Signature
        data.view.setUint32(index, 0x504b0607);
        // 4: number of the disk with the zip64 EOCD
        // 8: Offset of the zip64 EOCD
        data.view.setBigUint64(index + 8, BigInt(cdOffset + length), true);
        // 16: total number of disks
        data.view.setUint32(index + 16, 1, true);
        index += 20

        // EOCD must set these values to 0xffff and 0xffffffff when using ZIP64 format
        totalEntries = 0xffff;
        cdOffset = 0xffffffff;

        data.view.setUint32(index, 0x504b0506)
        data.view.setUint16(index + 8, totalEntries, true)
        data.view.setUint16(index + 10, totalEntries, true)
        data.view.setUint32(index + 12, length, true)
        data.view.setUint32(index + 16, cdOffset, true)
        ctrl.enqueue(data.array)
        ctrl.close()
    }

    function processNextChunk() {
        if (!activeZipObject) return
        if (activeZipObject.directory) return activeZipObject.writeFooter(activeZipObject.writeHeader())
        if (activeZipObject.reader) return pump(activeZipObject)
        if (activeZipObject.fileLike.stream) {
            activeZipObject.crc = new Crc32()
            activeZipObject.reader = activeZipObject.fileLike.stream().getReader()
            activeZipObject.writeHeader()
        } else next()
    }
    return new ReadableStream({
        start: c => {
            ctrl = c
            underlyingSource.start && Promise.resolve(underlyingSource.start(zipWriter))
        },
        pull() {
            return processNextChunk() || (
                underlyingSource.pull &&
                Promise.resolve(underlyingSource.pull(zipWriter))
            )
        }
    })
}

window.ZIP = createWriter

aleimu · 2024-09-19T09:12:23Z

The file is damaged because the variable zip64 is not switched to true in a timely manner, resulting in some files being in zip32 format and some in zip64 format. However, unzip cannot correctly recognize this. To solve the file damage and "Header error", I set zip64 to be true by default. This way, it's okay!

magnus-rattlehead and others added 8 commits October 19, 2021 15:07

Initial commit for zip64

48c461f

Fixed some syntax errors

3435860

Made code look better, updated example to include large file over 4GB…

e74aad8

… for testing

Potential bug fix for broken downlaod >4.2GB

0e6b2f4

cleanup

dac46ce

syntax error fix

5454823

style cleanup

dbe2798

zip64 condition edit

0987f3d

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Added ZIP64 Support to zip-stream.js #259

Added ZIP64 Support to zip-stream.js #259

magnus-rattlehead commented Dec 14, 2021

magnus-rattlehead commented Dec 20, 2021

FallingHazard commented Dec 20, 2021 •

edited

Loading

PidgeyBE commented Apr 18, 2024 •

edited

Loading

aleimu commented Sep 19, 2024

Added ZIP64 Support to zip-stream.js #259

Are you sure you want to change the base?

Added ZIP64 Support to zip-stream.js #259

Conversation

magnus-rattlehead commented Dec 14, 2021

magnus-rattlehead commented Dec 20, 2021

FallingHazard commented Dec 20, 2021 • edited Loading

PidgeyBE commented Apr 18, 2024 • edited Loading

aleimu commented Sep 19, 2024

FallingHazard commented Dec 20, 2021 •

edited

Loading

PidgeyBE commented Apr 18, 2024 •

edited

Loading