diff --git a/Makefile b/Makefile index 34d425e0995..7c3fab5ab9e 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ # ################################################################ # Version number -export VERSION := 0.3.3 +export VERSION := 0.3.4 PRGDIR = programs ZSTDDIR = lib diff --git a/NEWS b/NEWS index ee8c47bea62..1c7facdcc1a 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +v0.3.4 +Faster fast cLevels + v0.3.3 Small compression ratio improvement diff --git a/README.md b/README.md index 54fc53bf4d7..b8ff8783e49 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ It is provided as a BSD-license package, hosted on Github. |master | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=master)](https://travis-ci.org/Cyan4973/zstd) | |dev | [![Build Status](https://travis-ci.org/Cyan4973/zstd.svg?branch=dev)](https://travis-ci.org/Cyan4973/zstd) | -For a taste of its performance, here are a few benchmark numbers from a number of compression codecs suitable for real-time. The test was completed on a Core i7-5600U @ 2.6 GHz, using [fsbench 0.14.3](http://encode.ru/threads/1371-Filesystem-benchmark?p=34029&viewfull=1#post34029), an open-source benchmark program by m^2. +For a taste of its performance, here are a few benchmark numbers from a number of compression codecs suitable for real-time. The test was completed on a Core i7-5600U @ 2.6 GHz, using m^2's [fsbench 0.14.3](http://encode.ru/threads/1371-Filesystem-benchmark?p=34029&viewfull=1#post34029) compiled with gcc 4.8.4, on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia). |Name | Ratio | C.speed | D.speed | |-----------------|-------|--------:|--------:| @@ -23,7 +23,7 @@ For a taste of its performance, here are a few benchmark numbers from a number o [zlib]:http://www.zlib.net/ [LZ4]:http://www.lz4.org/ -Zstd can also offer stronger compression ratio at the cost of compression speed. Speed / Ratio trade-off is configurable by small increment, to fit different situations. Note however that decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib]. The following test is run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by inikep, on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia) +Zstd can also offer stronger compression ratio at the cost of compression speed. Speed / Ratio trade-off is configurable by small increment, to fit different situations. Note however that decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib]. The following test is run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by inikep compiled with gcc 5.2.1, on the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia). [lzbench]:https://github.com/inikep/lzbench @@ -32,9 +32,9 @@ Compression Speed vs Ratio | Decompression Speed ![Compression Speed vs Ratio](images/CSpeed.png "Compression Speed vs Ratio") | ![Decompression Speed](images/DSpeed.png "Decompression Speed") -Zstd entropy stage is provided by [Huff0 and FSE, from Finite State Entrop library](https://github.com/Cyan4973/FiniteStateEntropy). +Zstd entropy stage is provided by [Huff0 and FSE, from Finite State Entropy library](https://github.com/Cyan4973/FiniteStateEntropy). -Its memory requirement can also be configured to fit into low-memory hardware configurations, or servers handling multiple connections/contexts in parallel. +Its memory requirement can be configured to fit into low-memory hardware configurations, or servers handling multiple connections/contexts in parallel. Zstd has not yet reached "stable format" status. It doesn't guarantee yet that its current compressed format will remain stable and supported in future versions. During this period, it can still change to adapt new optimizations still being investigated. "Stable Format" is projected sometimes early 2016. diff --git a/images/CSpeed.png b/images/CSpeed.png old mode 100644 new mode 100755 index 88d302c8a1d..338c27b2729 Binary files a/images/CSpeed.png and b/images/CSpeed.png differ diff --git a/images/DSpeed.png b/images/DSpeed.png old mode 100644 new mode 100755 index b252ab65166..b503b5f837a Binary files a/images/DSpeed.png and b/images/DSpeed.png differ diff --git a/lib/zstd.c b/lib/zstd.c index 776a5bca529..a76d111f3de 100644 --- a/lib/zstd.c +++ b/lib/zstd.c @@ -125,7 +125,6 @@ static const U32 g_maxDistance = 4 * BLOCKSIZE; static const U32 g_maxLimit = 1 GB; -static const U32 g_searchStrength = 8; #define WORKPLACESIZE (BLOCKSIZE*3) #define MINMATCH 4 @@ -524,8 +523,6 @@ static U32 ZSTD_hashPtr(const void* p) { return ( (MEM_read64(p) * prime7bytes //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); } -static void ZSTD_addPtr(U32* table, const BYTE* p, const BYTE* start) { table[ZSTD_hashPtr(p)] = (U32)(p-start); } - static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start) { U32 h = ZSTD_hashPtr(p); @@ -540,6 +537,8 @@ static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip) return MEM_read32(match) == MEM_read32(ip); } +static void ZSTD_addPtr(U32* table, const BYTE* p, const BYTE* start) { table[ZSTD_hashPtr(p)] = (U32)(p-start); } + static size_t ZSTD_compressBlock(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { diff --git a/lib/zstd.h b/lib/zstd.h index d79410d1517..115046df872 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -48,7 +48,7 @@ extern "C" { ***************************************/ #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ #define ZSTD_VERSION_MINOR 3 /* for new (non-breaking) interface capabilities */ -#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_RELEASE 4 /* for tweaks, bug-fixes, or development */ #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) unsigned ZSTD_versionNumber (void); diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index ee0f67359ae..dffa1fe684a 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -206,6 +206,8 @@ typedef struct { void ZSTD_resetSeqStore(seqStore_t* ssPtr); +static const U32 g_searchStrength = 8; + #define REPCODE_STARTVALUE 4 #define MLbits 7 #define LLbits 6 @@ -217,7 +219,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr); #define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/) #define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE) - /** ZSTD_storeSeq Store a sequence (literal length, literals, offset code and match length) into seqStore_t @offsetCode : distance to match, or 0 == repCode diff --git a/lib/zstdhc.c b/lib/zstdhc.c index fb10b4b101f..d9b587b96bb 100644 --- a/lib/zstdhc.c +++ b/lib/zstdhc.c @@ -91,7 +91,7 @@ struct ZSTD_HC_CCtx_s seqStore_t seqStore; /* sequences storage ptrs */ U32* hashTable; - U32* chainTable; + U32* contentTable; }; @@ -113,7 +113,7 @@ size_t ZSTD_HC_freeCCtx(ZSTD_HC_CCtx* cctx) optimize for srcSize if srcSize > 0 */ void ZSTD_HC_validateParams(ZSTD_HC_parameters* params, size_t srcSize) { - const U32 chainplus = (params->strategy == ZSTD_HC_btlazy2); + const U32 btPlus = (params->strategy == ZSTD_HC_btlazy2); /* validate params */ if (params->windowLog > ZSTD_HC_WINDOWLOG_MAX) params->windowLog = ZSTD_HC_WINDOWLOG_MAX; @@ -126,8 +126,8 @@ void ZSTD_HC_validateParams(ZSTD_HC_parameters* params, size_t srcSize) if (params->windowLog > srcLog) params->windowLog = srcLog; } - if (params->chainLog > params->windowLog + chainplus) params->chainLog = params->windowLog+chainplus; /* <= ZSTD_HC_CHAINLOG_MAX */ - if (params->chainLog < ZSTD_HC_CHAINLOG_MIN) params->chainLog = ZSTD_HC_CHAINLOG_MIN; + if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_HC_CONTENTLOG_MAX */ + if (params->contentLog < ZSTD_HC_CONTENTLOG_MIN) params->contentLog = ZSTD_HC_CONTENTLOG_MIN; if (params->hashLog > ZSTD_HC_HASHLOG_MAX) params->hashLog = ZSTD_HC_HASHLOG_MAX; if (params->hashLog < ZSTD_HC_HASHLOG_MIN) params->hashLog = ZSTD_HC_HASHLOG_MIN; if (params->searchLog > ZSTD_HC_SEARCHLOG_MAX) params->searchLog = ZSTD_HC_SEARCHLOG_MAX; @@ -135,7 +135,6 @@ void ZSTD_HC_validateParams(ZSTD_HC_parameters* params, size_t srcSize) if (params->searchLength> ZSTD_HC_SEARCHLENGTH_MAX) params->searchLength = ZSTD_HC_SEARCHLENGTH_MAX; if (params->searchLength< ZSTD_HC_SEARCHLENGTH_MIN) params->searchLength = ZSTD_HC_SEARCHLENGTH_MIN; if ((U32)params->strategy>(U32)ZSTD_HC_btlazy2) params->strategy = ZSTD_HC_btlazy2; - if ((int)params->strategy<(int)ZSTD_HC_greedy) params->strategy = ZSTD_HC_greedy; } @@ -146,7 +145,8 @@ static size_t ZSTD_HC_resetCCtx_advanced (ZSTD_HC_CCtx* zc, /* reserve table memory */ { - const size_t tableSpace = ((1 << params.chainLog) + (1 << params.hashLog)) * sizeof(U32); + const U32 contentLog = params.strategy == ZSTD_HC_fast ? 1 : params.contentLog; + const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32); const size_t neededSpace = tableSpace + WORKPLACESIZE; if (zc->workSpaceSize < neededSpace) { @@ -155,10 +155,10 @@ static size_t ZSTD_HC_resetCCtx_advanced (ZSTD_HC_CCtx* zc, zc->workSpace = malloc(neededSpace); if (zc->workSpace == NULL) return ERROR(memory_allocation); } - zc->hashTable = (U32*)zc->workSpace; - zc->chainTable = zc->hashTable + ((size_t)1 << params.hashLog); - zc->seqStore.buffer = (void*) (zc->chainTable + ((size_t)1 << params.chainLog)); - memset(zc->hashTable, 0, tableSpace ); + memset(zc->workSpace, 0, tableSpace ); + zc->hashTable = (U32*)(zc->workSpace); + zc->contentTable = zc->hashTable + ((size_t)1 << params.hashLog); + zc->seqStore.buffer = (void*) (zc->contentTable + ((size_t)1 << contentLog)); } zc->nextToUpdate = 0; @@ -195,6 +195,10 @@ static const U64 prime6bytes = 227718039650203ULL; static size_t ZSTD_HC_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } static size_t ZSTD_HC_hash6Ptr(const void* p, U32 h) { return ZSTD_HC_hash6(MEM_read64(p), h); } +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_HC_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } +static size_t ZSTD_HC_hash7Ptr(const void* p, U32 h) { return ZSTD_HC_hash7(MEM_read64(p), h); } + static size_t ZSTD_HC_hashPtr(const void* p, U32 hBits, U32 mls) { switch(mls) @@ -203,10 +207,108 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 hBits, U32 mls) case 4: return ZSTD_HC_hash4Ptr(p, hBits); case 5: return ZSTD_HC_hash5Ptr(p, hBits); case 6: return ZSTD_HC_hash6Ptr(p, hBits); + case 7: return ZSTD_HC_hash7Ptr(p, hBits); } } -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] +/* ************************************* +* Fast Scan +***************************************/ + +FORCE_INLINE +size_t ZSTD_HC_compressBlock_fast_generic(ZSTD_HC_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* hashTable = ctx->hashTable; + const U32 hBits = ctx->params.hashLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const size_t maxDist = ((size_t)1 << ctx->params.windowLog); + + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const lowest = (size_t)(istart-base) > maxDist ? istart-maxDist : base; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + + size_t offset_2=4, offset_1=4; + + + /* init */ + if (ip == base) + { + hashTable[ZSTD_HC_hashPtr(base+1, hBits, mls)] = 1; + hashTable[ZSTD_HC_hashPtr(base+2, hBits, mls)] = 2; + hashTable[ZSTD_HC_hashPtr(base+3, hBits, mls)] = 3; + ip = base+4; + } + ZSTD_resetSeqStore(seqStorePtr); + + /* Main Search Loop */ + while (ip < ilimit) /* < instead of <=, because unconditionnal ZSTD_addPtr(ip+1) */ + { + const size_t h = ZSTD_HC_hashPtr(ip, hBits, mls); + const BYTE* match = base + hashTable[h]; + hashTable[h] = (U32)(ip-base); + + if (MEM_read32(ip-offset_2) == MEM_read32(ip)) match = ip-offset_2; + if ( (match < lowest) || + (MEM_read32(match) != MEM_read32(ip)) ) + { ip += ((ip-anchor) >> g_searchStrength) + 1; offset_2 = offset_1; continue; } + while ((ip>anchor) && (match>base) && (ip[-1] == match[-1])) { ip--; match--; } /* catch up */ + + { + size_t litLength = ip-anchor; + size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend); + size_t offsetCode = ip-match; + if (offsetCode == offset_2) offsetCode = 0; + offset_2 = offset_1; + offset_1 = ip-match; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength); + + /* Fill Table */ + hashTable[ZSTD_HC_hashPtr(ip+1, hBits, mls)] = (U32)(ip+1-base); + ip += matchLength + MINMATCH; + anchor = ip; + if (ip < ilimit) /* same test as loop, for speed */ + hashTable[ZSTD_HC_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + } + } + + /* Last Literals */ + { + size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } + + /* Finale compression stage */ + return ZSTD_compressSequences((BYTE*)dst, maxDstSize, + seqStorePtr, srcSize); +} + + +size_t ZSTD_HC_compressBlock_fast(ZSTD_HC_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.searchLength; + switch(mls) + { + default: + case 4 : + return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 4); + case 5 : + return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 5); + case 6 : + return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 6); + case 7 : + return ZSTD_HC_compressBlock_fast_generic(ctx, dst, maxDstSize, src, srcSize, 7); + } +} /* ************************************* @@ -219,8 +321,8 @@ static void ZSTD_HC_insertBt1(ZSTD_HC_CCtx* zc, const BYTE* const ip, const U32 U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; const size_t h = ZSTD_HC_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - const U32 btLog = zc->params.chainLog - 1; + U32* const bt = zc->contentTable; + const U32 btLog = zc->params.contentLog - 1; const U32 btMask= (1 << btLog) - 1; U32 matchIndex = hashTable[h]; size_t commonLengthSmaller=0, commonLengthLarger=0; @@ -280,8 +382,8 @@ size_t ZSTD_HC_insertBtAndFindBestMatch ( U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; const size_t h = ZSTD_HC_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - const U32 btLog = zc->params.chainLog - 1; + U32* const bt = zc->contentTable; + const U32 btLog = zc->params.contentLog - 1; const U32 btMask= (1 << btLog) - 1; U32 matchIndex = hashTable[h]; size_t commonLengthSmaller=0, commonLengthLarger=0; @@ -385,138 +487,19 @@ FORCE_INLINE size_t ZSTD_HC_BtFindBestMatch_selectMLS ( } -size_t ZSTD_HC_compressBlock_btLazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - const U32 maxSearches = 1 << ctx->params.searchLog; - const U32 mls = ctx->params.searchLength; - - /* init */ - ZSTD_resetSeqStore(seqStorePtr); - if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; - - /* Match Loop */ - while (ip <= ilimit) - { - size_t matchLength; - size_t offset=999999; - const BYTE* start; - - /* try to find a first match */ - if (MEM_read32(ip) == MEM_read32(ip - offset_2)) - { - /* repcode : we take it*/ - size_t offtmp = offset_2; - size_t litLength = ip - anchor; - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - offset_2 = offset_1; - offset_1 = offtmp; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); - ip += matchLength+MINMATCH; - anchor = ip; - continue; - } - - offset_2 = offset_1; - matchLength = ZSTD_HC_BtFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); - if (!matchLength) { ip++; continue; } - - /* let's try to find a better solution */ - start = ip; - - while (ip gain1) - matchLength = ml2, offset = 0, start = ip; - } - { - size_t offset2=999999; - size_t ml2 = ZSTD_HC_BtFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); - if (gain2 > gain1) - { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } - } - - /* let's find an even better one */ - if (ip gain1) - matchLength = ml2, offset = 0, start = ip; - } - { - size_t offset2=999999; - size_t ml2 = ZSTD_HC_BtFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7); - if (gain2 > gain1) - { - matchLength = ml2, offset = offset2, start = ip; - continue; - } - } - } - break; /* nothing found : store previous solution */ - } - - /* store sequence */ - { - size_t litLength = start - anchor; - if (offset) offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - ip = start + matchLength; - anchor = ip; - } - - } - - /* Last Literals */ - { - size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } - - /* Final compression stage */ - return ZSTD_compressSequences((BYTE*)dst, maxDstSize, - seqStorePtr, srcSize); -} - - - /* *********************** * Hash Chain *************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + /* Update chains up to ip (excluded) */ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls) { U32* const hashTable = zc->hashTable; const U32 hashLog = zc->params.hashLog; - U32* const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->params.chainLog) - 1; + U32* const chainTable = zc->contentTable; + const U32 chainMask = (1 << zc->params.contentLog) - 1; const BYTE* const base = zc->base; const U32 target = (U32)(ip - base); U32 idx = zc->nextToUpdate; @@ -535,14 +518,14 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ -size_t ZSTD_HC_insertAndFindBestMatch ( +size_t ZSTD_HC_HcFindBestMatch ( ZSTD_HC_CCtx* zc, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, size_t* offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) { - U32* const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->params.chainLog); + U32* const chainTable = zc->contentTable; + const U32 chainSize = (1 << zc->params.contentLog); const U32 chainMask = chainSize-1; const BYTE* const base = zc->base; const BYTE* const dictBase = zc->dictBase; @@ -598,7 +581,7 @@ size_t ZSTD_HC_insertAndFindBestMatch ( } -FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( +FORCE_INLINE size_t ZSTD_HC_HcFindBestMatch_selectMLS ( ZSTD_HC_CCtx* zc, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -607,14 +590,18 @@ FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS ( switch(matchLengthSearch) { default : - case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + case 4 : return ZSTD_HC_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_HC_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_HC_HcFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); } } -size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +/* common lazy function, to be inlined */ +FORCE_INLINE +size_t ZSTD_HC_compressBlock_lazy_generic(ZSTD_HC_CCtx* ctx, + void* dst, size_t maxDstSize, const void* src, size_t srcSize, + const U32 searchMethod, const U32 deep) /* 0 : hc; 1 : bt */ { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -627,6 +614,11 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs const U32 maxSearches = 1 << ctx->params.searchLog; const U32 mls = ctx->params.searchLength; + typedef size_t (*searchMax_f)(ZSTD_HC_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_HC_BtFindBestMatch_selectMLS : ZSTD_HC_HcFindBestMatch_selectMLS; + /* init */ ZSTD_resetSeqStore(seqStorePtr); if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; @@ -654,7 +646,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs } offset_2 = offset_1; - matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); + matchLength = searchMax(ctx, ip, iend, &offset, maxSearches, mls); if (!matchLength) { ip++; continue; } /* let's try to find a better solution */ @@ -673,9 +665,9 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs } { size_t offset2=999999; - size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4); + size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int gain2 = (int)(ml2*(3+deep) - ZSTD_highbit((U32)offset2+1)); /* raw approx */ + int gain1 = (int)(matchLength*(3+deep) - ZSTD_highbit((U32)offset+1) + (3+deep)); if (gain2 > gain1) { matchLength = ml2, offset = offset2, start = ip; @@ -684,7 +676,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs } /* let's find an even better one */ - if (ip gain1) @@ -733,103 +725,19 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs seqStorePtr, srcSize); } - -size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - const U32 maxSearches = 1 << ctx->params.searchLog; - const U32 mls = ctx->params.searchLength; - - /* init */ - ZSTD_resetSeqStore(seqStorePtr); - if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; - - /* Match Loop */ - while (ip <= ilimit) - { - size_t matchLength; - size_t offset=0; - const BYTE* start; - - /* try to find a first match */ - if (MEM_read32(ip) == MEM_read32(ip - offset_2)) - { - /* repcode : we take it*/ - size_t offtmp = offset_2; - size_t litLength = ip - anchor; - matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend); - offset_2 = offset_1; - offset_1 = offtmp; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength); - ip += matchLength+MINMATCH; - anchor = ip; - continue; - } - - offset_2 = offset_1; - matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); - if (!matchLength) { ip++; continue; } - - /* let's try to find a better solution */ - start = ip; - - while (ip gain1) - { - matchLength = ml2, offset = 0, start = ip; - - } - } - { - size_t offset2=999999; - size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls); - int gain2 = (int)(ml2*3 - ZSTD_highbit((U32)offset2+1)); /* raw approx */ - int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 3); - if (gain2 > gain1) - { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } - } - - break; /* nothing found : store previous one */ - } - - /* store sequence */ - { - size_t litLength = start - anchor; - if (offset) offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH); - ip = start + matchLength; - anchor = ip; - } - - } + return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 1); +} - /* Last Literals */ - { - size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } +size_t ZSTD_HC_compressBlock_lazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1); +} - /* Final compression stage */ - return ZSTD_compressSequences((BYTE*)dst, maxDstSize, - seqStorePtr, srcSize); +size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0); } @@ -884,8 +792,9 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS /* search */ { size_t offset=999999; - size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); + size_t matchLength = ZSTD_HC_HcFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls); if (!matchLength) { ip++; continue; } + while ((ip>anchor) && (ip-offset>ctx->base) && (ip[-1] == ip[-1-offset])) { ip--; } /* catch up */ /* store sequence */ { size_t litLength = ip-anchor; @@ -913,20 +822,21 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS typedef size_t (*ZSTD_HC_blockCompressor) (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); - static ZSTD_HC_blockCompressor ZSTD_HC_selectBlockCompressor(ZSTD_HC_strategy strat) { switch(strat) { default : + case ZSTD_HC_fast: + return ZSTD_HC_compressBlock_fast; case ZSTD_HC_greedy: return ZSTD_HC_compressBlock_greedy; case ZSTD_HC_lazy: return ZSTD_HC_compressBlock_lazy; - case ZSTD_HC_lazydeep: - return ZSTD_HC_compressBlock_lazydeep; + case ZSTD_HC_lazy2: + return ZSTD_HC_compressBlock_lazy2; case ZSTD_HC_btlazy2: - return ZSTD_HC_compressBlock_btLazy2; + return ZSTD_HC_compressBlock_btlazy2; } } @@ -992,7 +902,7 @@ size_t ZSTD_HC_compressContinue (ZSTD_HC_CCtx* ctxPtr, if (ip != ctxPtr->end) { if (ctxPtr->end != NULL) - ZSTD_HC_resetCCtx_advanced(ctxPtr, ctxPtr->params); /* just reset, but no need to re-alloc */ + ZSTD_HC_resetCCtx_advanced(ctxPtr, ctxPtr->params); ctxPtr->base = ip; } @@ -1048,9 +958,12 @@ size_t ZSTD_HC_compress_advanced (ZSTD_HC_CCtx* ctx, size_t oSize; /* correct params, to use less memory */ - U32 srcLog = ZSTD_highbit((U32)srcSize-1) + 1; - if (params.windowLog > srcLog) params.windowLog = srcLog; - if (params.chainLog > srcLog) params.chainLog = srcLog; + { + U32 srcLog = ZSTD_highbit((U32)srcSize-1) + 1; + U32 contentBtPlus = (ctx->params.strategy == ZSTD_HC_btlazy2); + if (params.windowLog > srcLog) params.windowLog = srcLog; + if (params.contentLog > srcLog+contentBtPlus) params.contentLog = srcLog+contentBtPlus; + } /* Header */ oSize = ZSTD_HC_compressBegin_advanced(ctx, dst, maxDstSize, params); diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h index 0cf7476b7b3..52e9bb88df2 100644 --- a/lib/zstdhc_static.h +++ b/lib/zstdhc_static.h @@ -45,27 +45,29 @@ extern "C" { /* ************************************* * Types ***************************************/ -typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_lazydeep, ZSTD_HC_btlazy2 } ZSTD_HC_strategy; +/** from faster to stronger */ +typedef enum { ZSTD_HC_fast, ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_lazy2, ZSTD_HC_btlazy2 } ZSTD_HC_strategy; + typedef struct { U32 windowLog; /* largest match distance : impact decompression buffer size */ - U32 chainLog; /* full search distance : larger == more compression, slower, more memory*/ + U32 contentLog; /* full search segment : larger == more compression, slower, more memory (useless for fast) */ U32 hashLog; /* dispatch table : larger == more memory, faster*/ U32 searchLog; /* nb of searches : larger == more compression, slower*/ U32 searchLength; /* size of matches : larger == faster decompression */ - ZSTD_HC_strategy strategy; /* greedy, lazy, lazydeep */ + ZSTD_HC_strategy strategy; } ZSTD_HC_parameters; /* parameters boundaries */ #define ZSTD_HC_WINDOWLOG_MAX 26 #define ZSTD_HC_WINDOWLOG_MIN 18 -#define ZSTD_HC_CHAINLOG_MAX (ZSTD_HC_WINDOWLOG_MAX+1) -#define ZSTD_HC_CHAINLOG_MIN 4 +#define ZSTD_HC_CONTENTLOG_MAX (ZSTD_HC_WINDOWLOG_MAX+1) +#define ZSTD_HC_CONTENTLOG_MIN 4 #define ZSTD_HC_HASHLOG_MAX 28 #define ZSTD_HC_HASHLOG_MIN 4 -#define ZSTD_HC_SEARCHLOG_MAX (ZSTD_HC_CHAINLOG_MAX-1) +#define ZSTD_HC_SEARCHLOG_MAX (ZSTD_HC_CONTENTLOG_MAX-1) #define ZSTD_HC_SEARCHLOG_MIN 1 -#define ZSTD_HC_SEARCHLENGTH_MAX 6 +#define ZSTD_HC_SEARCHLENGTH_MAX 7 #define ZSTD_HC_SEARCHLENGTH_MIN 4 @@ -96,32 +98,30 @@ size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize); /* ************************************* * Pre-defined compression levels ***************************************/ -#define ZSTD_HC_MAX_CLEVEL 22 +#define ZSTD_HC_MAX_CLEVEL 20 static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] = { /* W, C, H, S, L, strat */ - { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */ - { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */ - { 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */ - { 19, 14, 18, 2, 5, ZSTD_HC_greedy }, /* level 3 */ - { 20, 17, 19, 3, 5, ZSTD_HC_greedy }, /* level 4 */ - { 20, 18, 19, 2, 5, ZSTD_HC_lazy }, /* level 5 */ - { 21, 18, 20, 3, 5, ZSTD_HC_lazy }, /* level 6 */ - { 21, 20, 20, 3, 5, ZSTD_HC_lazy }, /* level 7 */ - { 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */ - { 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */ - { 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */ - { 21, 20, 20, 5, 5, ZSTD_HC_lazydeep }, /* level 11 */ - { 22, 20, 22, 5, 5, ZSTD_HC_lazydeep }, /* level 12 */ - { 22, 20, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 13 */ - { 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 14 */ - { 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 15 */ - { 22, 21, 22, 5, 5, ZSTD_HC_btlazy2 }, /* level 16 */ - { 22, 22, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 17 */ - { 23, 23, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 18 */ - { 25, 25, 22, 5, 5, ZSTD_HC_btlazy2 }, /* level 19 */ - { 25, 25, 23, 8, 5, ZSTD_HC_btlazy2 }, /* level 20 */ - { 25, 26, 23, 9, 5, ZSTD_HC_btlazy2 }, /* level 21 */ - { 25, 26, 23, 9, 5, ZSTD_HC_btlazy2 }, /* level 22 */ + { 18, 12, 12, 1, 4, ZSTD_HC_fast }, /* level 0 - never used */ + { 18, 14, 14, 1, 7, ZSTD_HC_fast }, /* level 1 - in fact redirected towards zstd fast */ + { 19, 15, 16, 1, 6, ZSTD_HC_fast }, /* level 2 */ + { 20, 18, 20, 1, 6, ZSTD_HC_fast }, /* level 3 */ + { 21, 19, 21, 1, 6, ZSTD_HC_fast }, /* level 4 */ + { 19, 14, 19, 2, 5, ZSTD_HC_greedy }, /* level 5 */ + { 20, 17, 19, 3, 5, ZSTD_HC_greedy }, /* level 6 */ + { 21, 17, 20, 3, 5, ZSTD_HC_lazy }, /* level 7 */ + { 21, 19, 20, 3, 5, ZSTD_HC_lazy }, /* level 8 */ + { 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 9 */ + { 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */ + { 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 11 */ + { 22, 20, 22, 5, 5, ZSTD_HC_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, ZSTD_HC_lazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, ZSTD_HC_lazy2 }, /* level 14 */ + { 22, 21, 22, 6, 5, ZSTD_HC_lazy2 }, /* level 15 */ + { 22, 21, 22, 4, 5, ZSTD_HC_btlazy2 }, /* level 16 */ + { 23, 23, 23, 4, 5, ZSTD_HC_btlazy2 }, /* level 17 */ + { 25, 24, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 18 */ + { 25, 26, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 19 */ + { 26, 27, 24, 6, 5, ZSTD_HC_btlazy2 }, /* level 20 */ }; diff --git a/programs/Makefile b/programs/Makefile index 8c700323235..c3a250e57ae 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -30,7 +30,7 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -VERSION?= 0.3.3 +VERSION?= 0.3.4 DESTDIR?= PREFIX ?= /usr/local diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 40033a6a341..520acc1e107 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -280,7 +280,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); U32 Wlog = params.windowLog; - U32 Clog = params.chainLog; + U32 Clog = params.contentLog; U32 Hlog = params.hashLog; U32 Slog = params.searchLog; U32 Slength = params.searchLength; @@ -429,14 +429,18 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, } -const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy ", "ZSTD_HC_lazydeep", "ZSTD_HC_btlazy2 " }; +const char* g_stratName[] = { "ZSTD_HC_fast ", + "ZSTD_HC_greedy ", + "ZSTD_HC_lazy ", + "ZSTD_HC_lazy2 ", + "ZSTD_HC_btlazy2" }; static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize) { DISPLAY("\r%79s\r", ""); fprintf(f," {%3u,%3u,%3u,%3u,%3u, %s }, ", - params.windowLog, params.chainLog, params.hashLog, params.searchLog, params.searchLength, - g_stratName[params.strategy]); + params.windowLog, params.contentLog, params.hashLog, params.searchLog, params.searchLength, + g_stratName[(U32)(params.strategy)]); fprintf(f, "/* level %2u */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.); @@ -483,7 +487,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_HC_parameters params, BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); - for (cLevel = 2; cLevel <= ZSTD_HC_MAX_CLEVEL; cLevel++) + for (cLevel = 1; cLevel <= ZSTD_HC_MAX_CLEVEL; cLevel++) { if (testResult.cSpeed < g_cSpeedTarget[cLevel]) continue; /* not fast enough for this level */ @@ -509,8 +513,10 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_HC_parameters params, double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); - size_t W_CMemUsed = (1 << params.windowLog) + 4 * (1 << params.hashLog) + 4 * (1 << params.chainLog); - size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + 4 * (1 << winners[cLevel].params.hashLog) + 4 * (1 << winners[cLevel].params.chainLog); + size_t W_CMemUsed = (1 << params.windowLog) + 4 * (1 << params.hashLog) + + ((params.strategy==ZSTD_HC_fast) ? 0 : 4 * (1 << params.contentLog)); + size_t O_CMemUsed = (1 << winners[cLevel].params.windowLog) + 4 * (1 << winners[cLevel].params.hashLog) + + ((winners[cLevel].params.strategy==ZSTD_HC_fast) ? 0 : 4 * (1 << winners[cLevel].params.contentLog)); double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); @@ -576,15 +582,15 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_HC_parameters params, #define MAX(a,b) ( (a) > (b) ? (a) : (b) ) static BYTE g_alreadyTested[ZSTD_HC_WINDOWLOG_MAX+1-ZSTD_HC_WINDOWLOG_MIN] - [ZSTD_HC_CHAINLOG_MAX+1-ZSTD_HC_CHAINLOG_MIN] + [ZSTD_HC_CONTENTLOG_MAX+1-ZSTD_HC_CONTENTLOG_MIN] [ZSTD_HC_HASHLOG_MAX+1-ZSTD_HC_HASHLOG_MIN] [ZSTD_HC_SEARCHLOG_MAX+1-ZSTD_HC_SEARCHLOG_MIN] [ZSTD_HC_SEARCHLENGTH_MAX+1-ZSTD_HC_SEARCHLENGTH_MIN] - [4 /* strategy */ ] = {}; /* init to zero */ + [ZSTD_HC_btlazy2+1 /* strategy */ ] = {}; /* init to zero */ #define NB_TESTS_PLAYED(p) \ g_alreadyTested[p.windowLog-ZSTD_HC_WINDOWLOG_MIN] \ - [p.chainLog-ZSTD_HC_CHAINLOG_MIN] \ + [p.contentLog-ZSTD_HC_CONTENTLOG_MIN] \ [p.hashLog-ZSTD_HC_HASHLOG_MIN] \ [p.searchLog-ZSTD_HC_SEARCHLOG_MIN] \ [p.searchLength-ZSTD_HC_SEARCHLENGTH_MIN] \ @@ -611,9 +617,9 @@ static void playAround(FILE* f, winnerInfo_t* winners, switch(changeID) { case 0: - p.chainLog++; break; + p.contentLog++; break; case 1: - p.chainLog--; break; + p.contentLog--; break; case 2: p.hashLog++; break; case 3: @@ -665,17 +671,17 @@ static void BMK_selectRandomStart( const void* srcBuffer, size_t srcSize, ZSTD_HC_CCtx* ctx) { - U32 id = (FUZ_rand(&g_rand) % ZSTD_HC_MAX_CLEVEL) + 1; - if ((id<2) || (winners[id].params.windowLog==0)) + U32 id = (FUZ_rand(&g_rand) % (ZSTD_HC_MAX_CLEVEL+1)); + if ((id==0) || (winners[id].params.windowLog==0)) { /* totally random entry */ ZSTD_HC_parameters p; - p.chainLog = FUZ_rand(&g_rand) % (ZSTD_HC_CHAINLOG_MAX+1 - ZSTD_HC_CHAINLOG_MIN) + ZSTD_HC_CHAINLOG_MIN; + p.contentLog = FUZ_rand(&g_rand) % (ZSTD_HC_CONTENTLOG_MAX+1 - ZSTD_HC_CONTENTLOG_MIN) + ZSTD_HC_CONTENTLOG_MIN; p.hashLog = FUZ_rand(&g_rand) % (ZSTD_HC_HASHLOG_MAX+1 - ZSTD_HC_HASHLOG_MIN) + ZSTD_HC_HASHLOG_MIN; p.searchLog = FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLOG_MAX+1 - ZSTD_HC_SEARCHLOG_MIN) + ZSTD_HC_SEARCHLOG_MIN; p.windowLog = FUZ_rand(&g_rand) % (ZSTD_HC_WINDOWLOG_MAX+1 - ZSTD_HC_WINDOWLOG_MIN) + ZSTD_HC_WINDOWLOG_MIN; p.searchLength=FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLENGTH_MAX+1 - ZSTD_HC_SEARCHLENGTH_MIN) + ZSTD_HC_SEARCHLENGTH_MIN; - p.strategy = (ZSTD_HC_strategy) (FUZ_rand(&g_rand) % 4); + p.strategy = (ZSTD_HC_strategy) (FUZ_rand(&g_rand) % (ZSTD_HC_btlazy2+1)); playAround(f, winners, p, srcBuffer, srcSize, ctx); } else @@ -708,32 +714,35 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } if (g_target) - g_cSpeedTarget[2] = g_target * 1000; + g_cSpeedTarget[1] = g_target * 1000; else { - /* baseline config for level 2 */ + /* baseline config for level 1 */ BMK_result_t testResult; - params = g_seedParams[2]; - params.windowLog = MIN(srcLog, params.windowLog); - params.chainLog = MIN(params.windowLog, params.chainLog); - params.searchLog = MIN(params.chainLog, params.searchLog); + params.windowLog = MIN(srcLog, 18); + params.hashLog = 14; + params.contentLog = 1; + params.searchLog = 1; + params.searchLength = 7; + params.strategy = ZSTD_HC_fast; BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); - g_cSpeedTarget[2] = (testResult.cSpeed * 15) >> 4; + g_cSpeedTarget[1] = (testResult.cSpeed * 15) >> 4; } - /* establish speed objectives (relative to level 2) */ - for (i=3; i<=ZSTD_HC_MAX_CLEVEL; i++) - g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 13) >> 4; + /* establish speed objectives (relative to level 1) */ + for (i=2; i<=ZSTD_HC_MAX_CLEVEL; i++) + g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) >> 5; /* populate initial solution */ { - const int maxSeeds = g_noSeed ? 2 : ZSTD_HC_MAX_CLEVEL; - for (i=2; i<=maxSeeds; i++) + const int maxSeeds = g_noSeed ? 1 : ZSTD_HC_MAX_CLEVEL; + for (i=1; i<=maxSeeds; i++) { + const U32 btPlus = (params.strategy == ZSTD_HC_btlazy2); params = g_seedParams[i]; params.windowLog = MIN(srcLog, params.windowLog); - params.chainLog = MIN(params.windowLog, params.chainLog); - params.searchLog = MIN(params.chainLog, params.searchLog); + params.contentLog = MIN(params.windowLog+btPlus, params.contentLog); + params.searchLog = MIN(params.contentLog, params.searchLog); BMK_seed(winners, params, srcBuffer, srcSize, ctx); } } @@ -951,10 +960,10 @@ int main(int argc, char** argv) g_params.windowLog *= 10, g_params.windowLog += *argument++ - '0'; continue; case 'c': - g_params.chainLog = 0; + g_params.contentLog = 0; argument++; while ((*argument>= '0') && (*argument<='9')) - g_params.chainLog *= 10, g_params.chainLog += *argument++ - '0'; + g_params.contentLog *= 10, g_params.contentLog += *argument++ - '0'; continue; case 'h': g_params.hashLog = 0; @@ -975,7 +984,7 @@ int main(int argc, char** argv) g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0'; continue; case 't': /* strategy */ - g_params.strategy = ZSTD_HC_greedy; + g_params.strategy = (ZSTD_HC_strategy)0; argument++; while ((*argument>= '0') && (*argument<='9')) { @@ -989,7 +998,7 @@ int main(int argc, char** argv) argument++; while ((*argument>= '0') && (*argument<='9')) cLevel *= 10, cLevel += *argument++ - '0'; - if (cLevel < 2) cLevel = 2; + if (cLevel < 1) cLevel = 1; if (cLevel > ZSTD_HC_MAX_CLEVEL) cLevel = ZSTD_HC_MAX_CLEVEL; g_params = g_seedParams[cLevel]; continue; @@ -1000,7 +1009,7 @@ int main(int argc, char** argv) } break; - /* target level2 speed objective, in MB/s */ + /* target level1 speed objective, in MB/s */ case 'T': argument++; g_target = 0; diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 4c8460d8a2c..2cb59c76b6e 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -70,7 +70,7 @@ **************************************/ #define COMPRESSOR_NAME "zstd command line interface" #ifndef ZSTD_VERSION -# define ZSTD_VERSION "v0.3.3" +# define ZSTD_VERSION "v0.3.4" #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__