diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf13ff..030ef63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 0.6.4 +- Made streaming inflate consume all data possible +- Optimized use of values near 32-bit boundary ## 0.6.3 - Patch exports of async functions - Fix streaming unzip diff --git a/package.json b/package.json index fc98e39..73dc5f6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fflate", - "version": "0.6.3", + "version": "0.6.4", "description": "High performance (de)compression in an 8kB package", "main": "./lib/index.cjs", "module": "./esm/browser.js", diff --git a/rs/fflate/Cargo.toml b/rs/fflate/Cargo.toml index 6a3ffb1..f78c37e 100644 --- a/rs/fflate/Cargo.toml +++ b/rs/fflate/Cargo.toml @@ -21,9 +21,10 @@ edition = "2018" [dependencies] lazy_static = "1.4" +miniz_oxide = "*" [profile.release] -opt-level = "s" +opt-level = 3 lto = true [features] diff --git a/rs/fflate/src/lib.rs b/rs/fflate/src/lib.rs index 9ee716f..a6569fd 100644 --- a/rs/fflate/src/lib.rs +++ b/rs/fflate/src/lib.rs @@ -5,11 +5,11 @@ // Instead of trying to read this code, check out the TypeScript version #![allow(non_upper_case_globals)] -#![cfg_attr(not(feature = "std"), no_std)] +// #![cfg_attr(not(feature = "std"), no_std)] use lazy_static::lazy_static; -#[cfg(feature = "std")] -use std::{vec::Vec, io::{Read, Write, Error, ErrorKind}}; +// #[cfg(feature = "std")] +use std::{vec::Vec, io::{Read, Write, Error, ErrorKind}, ops::Range}; const fleb: [usize; 32] = [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 0, @@ -63,11 +63,11 @@ fn freb(b: &[u16], r: &mut [u32]) { // hmap base fn hmb(cd: &[u8], mb: u8, le: &mut [u16]) { let t = (mb + 1) as usize; - for i in 1..t { - le[i] = 0; - } + le.iter_mut().for_each(|v| *v = 0); for &cl in cd { - le[cl as usize] += 1; + if cl != 0 { + le[cl as usize] += 1; + } } let mut v = 0; for i in 1..t { @@ -93,17 +93,12 @@ fn hrmap(cd: &[u8], mb: u8, co: &mut [u16], le: &mut [u16]) { let mbu = mb as usize; for i in 0..cd.len() { let cl = cd[i] as usize; - // TODO: remove cond if cl != 0 { let r = mbu - cl; let v = (le[cl] << r) as usize; + le[cl] += 1; let m = v + (1 << r); - let sv = if cl != 0 { - le[cl] += 1; - ((i as u16) << 4) | cl as u16 - } else { - 0 - }; + let sv = ((i as u16) << 4) | cl as u16; for j in v..m { co[rev[j] >> rvb] = sv; } @@ -112,7 +107,7 @@ fn hrmap(cd: &[u8], mb: u8, co: &mut [u16], le: &mut [u16]) { } lazy_static! { - static ref revfl: [u32; 261]= { + static ref revfl: [u32; 261] = { let mut v = [0u32; 261]; freb(&fl, &mut v); v[258] = 28; @@ -156,26 +151,27 @@ lazy_static! { } #[inline(always)] -fn byte(dat: &[u8], bpos: usize) -> u8 { - if bpos < dat.len() { - dat[bpos] - } else { - 0 - } +fn mbits(dat: &[u8], pos: usize, mask: u8) -> u8 { + (dat[pos >> 3] >> (pos & 7)) & mask +} + +fn mbits16(dat: &[u8], pos: usize, mask: u16) -> u16 { + let b = pos >> 3; + ((dat[b] as u16 | ((dat[b + 1] as u16) << 8)) >> (pos & 7)) & mask } #[inline(always)] fn bits(dat: &[u8], pos: usize, mask: u8) -> u8 { let b = pos >> 3; - ((byte(dat, b) as u16 | ((byte(dat, b + 1) as u16) << 8)) >> (pos & 7)) as u8 & mask + ((dat[b] as u16 | ((dat[b + 1] as u16) << 8)) >> (pos & 7)) as u8 & mask } #[inline(always)] fn bits16(dat: &[u8], pos: usize, mask: u16) -> u16 { let b = pos >> 3; - ((byte(dat, b) as u32 - | ((byte(dat, b + 1) as u32) << 8) - | ((byte(dat, b + 2) as u32) << 16)) + ((dat[b] as u32 + | ((dat[b + 1] as u32) << 8) + | ((dat[b + 2] as u32) << 16)) >> (pos & 7)) as u16 & mask } @@ -234,7 +230,7 @@ pub enum InflateError { InvalidDistance } -#[cfg(feature = "std")] +// #[cfg(feature = "std")] impl From for Error { fn from(error: InflateError) -> Self { Error::new(match error { @@ -249,6 +245,16 @@ impl From for Error { } } +fn max(dat: &[u8]) -> u8 { + let mut m = 0; + for &v in dat { + if v > m { + m = v; + } + } + m +} + pub trait OutputBuffer { fn write(&mut self, value: u8); fn write_all(&mut self, slice: &[u8]) { @@ -260,18 +266,18 @@ pub trait OutputBuffer { fn back(&self, back: usize) -> u8; } -#[cfg(feature = "std")] +// #[cfg(feature = "std")] impl OutputBuffer for Vec { #[inline(always)] - fn w(&mut self, value: u8) { + fn write(&mut self, value: u8) { self.push(value); } #[inline(always)] - fn wall(&mut self, slice: &[u8]) { + fn write_all(&mut self, slice: &[u8]) { self.extend(slice.iter()); } #[inline(always)] - fn palloc(&mut self, extra_bytes: usize) { + fn pre_alloc(&mut self, extra_bytes: usize) { self.reserve(extra_bytes); } #[inline(always)] @@ -325,6 +331,8 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul let sl = dat.len(); if sl == 0 || (st.head && sl < 5) { return Ok(()); } let tbts = sl << 3; + let tbts1 = tbts - 8; + let tbts2 = tbts1 - 8; loop { if st.head { st.bfinal = bits(dat, pos, 1) != 0; @@ -333,8 +341,7 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul match btype { 0 => { let s = shft(pos) + 4; - let l = dat[s - 4] as u16 | ((dat[s - 3] as u16) << 8); - let t = s + l as usize; + let t = s + (dat[s - 4] as u16 | ((dat[s - 3] as u16) << 8)) as usize; if t > dat.len() { if st.last { return Err(InflateError::UnexpectedEOF); @@ -362,7 +369,7 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul for i in hclen..19 { st.clt[clim[i]] = 0; } - let clb = *st.clt.iter().max().unwrap(); + let clb = max(&st.clt); let clbmsk = (1 << clb) - 1; if !st.last && pos + tl * (clb + 7) as usize > tbts { break; @@ -405,8 +412,8 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul } let lt = &st.ldt[0..hlit]; let dt = &st.ldt[hlit..tl]; - st.lbits = *lt.iter().max().unwrap(); - st.dbits = *dt.iter().max().unwrap(); + st.lbits = max(lt); + st.dbits = max(dt); hrmap(lt, st.lbits, &mut st.lmap, &mut st.le); hrmap(dt, st.dbits, &mut st.dmap, &mut st.le); } @@ -419,15 +426,32 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul } } st.head = false; - let lms = (1 << st.lbits) - 1; - let dms = (1 << st.dbits) - 1; - let mxa = (st.lbits + st.dbits + 18) as usize; - while st.last || pos + mxa < tbts { - let c = st.lmap[bits16(dat, pos, lms) as usize]; + let lms = (1u16 << st.lbits) - 1; + let lms8 = lms as u8; + let dms = (1u16 << st.dbits) - 1; + let dms8 = dms as u8; + let topl = tbts - st.lbits as usize; + let topd = tbts - st.dbits as usize; + let top = tbts - (st.lbits + st.dbits + 18) as usize; + while st.last || pos < top { + let c = st.lmap[ + if pos > topl { + return Err(InflateError::UnexpectedEOF); + } else if st.lbits < 10 { + if pos > tbts1 { + mbits(dat, pos, lms8) as usize + } else { + bits(dat, pos, lms8) as usize + } + } else { + if pos > tbts2 { + mbits16(dat, pos, lms) as usize + } else { + bits16(dat, pos, lms) as usize + } + } + ]; pos += (c & 15) as usize; - if pos > tbts { - return Err(InflateError::UnexpectedEOF); - } if c == 0 { return Err(InflateError::InvalidLengthOrLiteral); } @@ -440,12 +464,28 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul } else { let mut add = sym - 254; if add > 10 { - let i = (add as usize) - 3; + let i = add as usize - 3; let b = fleb[i]; - add = bits(dat, pos, (1 << b) - 1) as u16 + fl[i as usize]; + add = bits(dat, pos, (1 << b) - 1) as u16 + fl[i]; pos += b; } - let d = st.dmap[bits16(dat, pos, dms) as usize]; + let d = st.dmap[ + if pos > topd { + return Err(InflateError::UnexpectedEOF); + } else if st.dbits < 10 { + if pos > tbts1 { + mbits(dat, pos, dms8) as usize + } else { + bits(dat, pos, dms8) as usize + } + } else { + if pos > tbts2 { + mbits16(dat, pos, dms) as usize + } else { + bits16(dat, pos, dms) as usize + } + } + ]; if d == 0 { return Err(InflateError::InvalidDistance); } @@ -475,46 +515,45 @@ fn inflt(dat: &[u8], buf: &mut dyn OutputBuffer, st: &mut InflateState) -> Resul } pub fn inflate(dat: &[u8], out: &mut dyn OutputBuffer) -> Result<(), InflateError> { - out.pre_alloc(dat.len() * 3); let mut st = InflateState::new(); st.last = true; inflt(dat, out, &mut st)?; Ok(()) } -pub struct Inflate<'a> { - pub sink: &'a mut dyn OutputBuffer, - state: InflateState -} - -impl<'a> Inflate<'a> { - pub fn push(&mut self, data: &[u8]) -> Result { - inflt(data, self.sink, &mut self.state)?; - let bytes = self.state.pos >> 3; - self.state.pos &= 7; - Ok(bytes) - } - pub fn end(&mut self) -> Result<(), InflateError> { - self.state.last = true; - self.push(&et)?; - Ok(()) - } - pub fn new(sink: &'a mut dyn OutputBuffer) -> Inflate<'a> { - Inflate { - state: InflateState::new(), - sink: sink - } - } -} - -#[cfg(feature = "std")] -impl<'a> Write for Inflate<'a> { - #[inline(always)] - fn write(&mut self, data: &[u8]) -> Result { - Ok(self.push(data)?) - } - #[inline(always)] - fn flush(&mut self) -> Result<(), Error> { - Ok(self.end()?) - } -} \ No newline at end of file +// // pub struct Inflate<'a> { +// // pub sink: &'a mut dyn OutputBuffer, +// // state: InflateState +// // } + +// // impl<'a> Inflate<'a> { +// // pub fn push(&mut self, data: &[u8]) -> Result { +// // inflt(data, self.sink, &mut self.state)?; +// // let bytes = self.state.pos >> 3; +// // self.state.pos &= 7; +// // Ok(bytes) +// // } +// // pub fn end(&mut self) -> Result<(), InflateError> { +// // self.state.last = true; +// // self.push(&et)?; +// // Ok(()) +// // } +// // pub fn new(sink: &'a mut dyn OutputBuffer) -> Inflate<'a> { +// // Inflate { +// // state: InflateState::new(), +// // sink: sink +// // } +// // } +// // } + +// #[cfg(feature = "std")] +// impl<'a> Write for Inflate<'a> { +// #[inline(always)] +// fn write(&mut self, data: &[u8]) -> Result { +// Ok(self.push(data)?) +// } +// #[inline(always)] +// fn flush(&mut self) -> Result<(), Error> { +// Ok(self.end()?) +// } +// } \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 6706c7e..1ff81f4 100644 --- a/src/index.ts +++ b/src/index.ts @@ -240,7 +240,6 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => { pos += hcLen * 3; // code lengths bits const clb = max(clt), clbmsk = (1 << clb) - 1; - if (!noSt && pos + tl * (clb + 7) > tbts) break; // code lengths map const clm = hMap(clt, clb, 1); for (let i = 0; i < tl;) { @@ -270,25 +269,30 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => { lm = hMap(lt, lbt, 1); dm = hMap(dt, dbt, 1); } else throw 'invalid block type'; - if (pos > tbts) throw 'unexpected EOF'; + if (pos > tbts) { + if (noSt) throw 'unexpected EOF'; + break; + } } // Make sure the buffer can hold this + the largest possible addition // Maximum chunk size (practically, theoretically infinite) is 2^17; if (noBuf) cbuf(bt + 131072); const lms = (1 << lbt) - 1, dms = (1 << dbt) - 1; - const mxa = lbt + dbt + 18; - while (noSt || pos + mxa < tbts) { + let lpos = pos; + for (;; lpos = pos) { // bits read, code const c = lm[bits16(dat, pos) & lms], sym = c >>> 4; pos += c & 15; - if (pos > tbts) throw 'unexpected EOF'; + if (pos > tbts) { + if (noSt) throw 'unexpected EOF'; + break; + } if (!c) throw 'invalid length/literal'; if (sym < 256) buf[bt++] = sym; else if (sym == 256) { - lm = null; + lpos = pos, lm = null; break; - } - else { + } else { let add = sym - 254; // no extra bits needed if less if (sym > 264) { @@ -306,7 +310,10 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => { const b = fdeb[dsym]; dt += bits16(dat, pos) & ((1 << b) - 1), pos += b; } - if (pos > tbts) throw 'unexpected EOF'; + if (pos > tbts) { + if (noSt) throw 'unexpected EOF'; + break; + } if (noBuf) cbuf(bt + 131072); const end = bt + add; for (; bt < end; bt += 4) { @@ -318,7 +325,7 @@ const inflt = (dat: Uint8Array, buf?: Uint8Array, st?: InflateState) => { bt = end; } } - st.l = lm, st.p = pos, st.b = bt; + st.l = lm, st.p = lpos, st.b = bt; if (lm) final = 1, st.m = lbt, st.d = dm, st.n = dbt; } while (!final) return bt == buf.length ? buf : slc(buf, 0, bt); @@ -706,7 +713,7 @@ const adler = (): CRCV => { }, d() { a %= 65521, b %= 65521; - return ((a >>> 8) << 16 | (b & 255) << 8 | (b >>> 8)) + ((a & 255) << 23) * 2; + return (a & 255) << 24 | (a >>> 8) << 16 | (b & 255) << 8 | (b >>> 8); } } } @@ -983,9 +990,9 @@ const astrmify = (fns: (() => unknown[])[], strm: Astrm, opts: T | 0, init: ( const b2 = (d: Uint8Array, b: number) => d[b] | (d[b + 1] << 8); // read 4 bytes -const b4 = (d: Uint8Array, b: number) => (d[b] | (d[b + 1] << 8) | (d[b + 2] << 16)) + (d[b + 3] << 23) * 2; +const b4 = (d: Uint8Array, b: number) => (d[b] | (d[b + 1] << 8) | (d[b + 2] << 16) | (d[b + 3] << 24)) >>> 0; -const b8 = (d: Uint8Array, b: number) => b4(d, b) | (b4(d, b) * 4294967296); +const b8 = (d: Uint8Array, b: number) => b4(d, b) + (b4(d, b + 4) * 4294967296); // write bytes const wbytes = (d: Uint8Array, b: number, v: number) => { @@ -1018,7 +1025,7 @@ const gzs = (d: Uint8Array) => { // gzip length const gzl = (d: Uint8Array) => { const l = d.length; - return (d[l - 4] | d[l - 3] << 8 | d[l - 2] << 16) + (2 * (d[l - 1] << 23)); + return ((d[l - 4] | d[l - 3] << 8 | d[l - 2] << 16) | (d[l - 1] << 24)) >>> 0; } // gzip header length @@ -2081,7 +2088,10 @@ export class DecodeUTF8 { push(chunk: Uint8Array, final?: boolean) { if (!this.ondata) throw 'no callback'; if (!final) final = false; - if (this.t) return this.ondata(this.t.decode(chunk, { stream: !final }), final); + if (this.t) { + this.ondata(this.t.decode(chunk, { stream: true }), false); + if (final) this.ondata(this.t.decode(), true); + } const dat = new u8(this.p.length + chunk.length); dat.set(this.p); dat.set(chunk, this.p.length); @@ -2227,7 +2237,7 @@ const wzh = (d: Uint8Array, b: number, f: ZHF, fn: Uint8Array, u: boolean, c?: n d[b++] = f.compression & 255, d[b++] = f.compression >> 8; const dt = new Date(f.mtime == null ? Date.now() : f.mtime), y = dt.getFullYear() - 1980; if (y < 0 || y > 119) throw 'date not in range 1980-2099'; - wbytes(d, b, ((y << 24) * 2) | ((dt.getMonth() + 1) << 21) | (dt.getDate() << 16) | (dt.getHours() << 11) | (dt.getMinutes() << 5) | (dt.getSeconds() >>> 1)), b += 4; + wbytes(d, b, (y << 25) | ((dt.getMonth() + 1) << 21) | (dt.getDate() << 16) | (dt.getHours() << 11) | (dt.getMinutes() << 5) | (dt.getSeconds() >>> 1)), b += 4; if (c != null) { wbytes(d, b, f.crc); wbytes(d, b + 4, c);