281 lines
10 KiB
C++
281 lines
10 KiB
C++
/*
|
|
LZ4 - Fast LZ compression algorithm
|
|
Copyright (C) 2011-2015, Yann Collet.
|
|
|
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
in the documentation and/or other materials provided with the
|
|
distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
You can contact the author at :
|
|
- LZ4 source repository : https://github.com/Cyan4973/lz4
|
|
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
|
*/
|
|
|
|
|
|
/**************************************
|
|
* Reading and writing into memory
|
|
**************************************/
|
|
|
|
/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
|
|
static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
|
|
{
|
|
BYTE* d = (BYTE*)dstPtr;
|
|
const BYTE* s = (const BYTE*)srcPtr;
|
|
BYTE* const e = (BYTE*)dstEnd;
|
|
|
|
#if 0
|
|
const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
|
|
LZ4_copy8(d,s); if (d>e-9) return;
|
|
d+=l2; s+=l2;
|
|
#endif /* join to align */
|
|
|
|
do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
|
|
}
|
|
|
|
|
|
/**************************************
|
|
* Common Constants
|
|
**************************************/
|
|
#define MINMATCH 4
|
|
|
|
#define WILDCOPYLENGTH 8
|
|
#define LASTLITERALS 5
|
|
#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
|
|
static const int LZ4_minLength = (MFLIMIT+1);
|
|
|
|
#define KB *(1 <<10)
|
|
#define MB *(1 <<20)
|
|
#define GB *(1U<<30)
|
|
|
|
#define MAXD_LOG 16
|
|
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
|
|
|
|
#define ML_BITS 4
|
|
#define ML_MASK ((1U<<ML_BITS)-1)
|
|
#define RUN_BITS (8-ML_BITS)
|
|
#define RUN_MASK ((1U<<RUN_BITS)-1)
|
|
|
|
|
|
/**************************************
|
|
* Local Structures and types
|
|
**************************************/
|
|
typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
|
|
typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
|
|
typedef enum { full = 0, partial = 1 } earlyEnd_directive;
|
|
|
|
|
|
|
|
/*******************************
|
|
* Decompression functions
|
|
*******************************/
|
|
/*
|
|
* This generic decompression function cover all use cases.
|
|
* It shall be instantiated several times, using different sets of directives
|
|
* Note that it is essential this generic function is really inlined,
|
|
* in order to remove useless branches during compilation optimization.
|
|
*/
|
|
FORCE_INLINE int LZ4_decompress_generic(
|
|
const char* const source,
|
|
char* const dest,
|
|
int inputSize,
|
|
int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
|
|
|
|
int endOnInput, /* endOnOutputSize, endOnInputSize */
|
|
int partialDecoding, /* full, partial */
|
|
int targetOutputSize, /* only used if partialDecoding==partial */
|
|
int dict, /* noDict, withPrefix64k, usingExtDict */
|
|
const BYTE* const lowPrefix, /* == dest if dict == noDict */
|
|
const BYTE* const dictStart, /* only if dict==usingExtDict */
|
|
const size_t dictSize /* note : = 0 if noDict */
|
|
)
|
|
{
|
|
/* Local Variables */
|
|
const BYTE* ip = (const BYTE*) source;
|
|
const BYTE* const iend = ip + inputSize;
|
|
|
|
BYTE* op = (BYTE*) dest;
|
|
BYTE* const oend = op + outputSize;
|
|
BYTE* cpy;
|
|
BYTE* oexit = op + targetOutputSize;
|
|
const BYTE* const lowLimit = lowPrefix - dictSize;
|
|
|
|
const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
|
|
const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
|
|
const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
|
|
|
|
const int safeDecode = (endOnInput==endOnInputSize);
|
|
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
|
|
const int inPlaceDecode = ((ip >= op) && (ip < oend));
|
|
|
|
|
|
/* Special cases */
|
|
if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */
|
|
if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
|
|
if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
|
|
|
|
|
|
/* Main Loop */
|
|
while (1)
|
|
{
|
|
unsigned token;
|
|
size_t length;
|
|
const BYTE* match;
|
|
size_t offset;
|
|
|
|
if (unlikely((inPlaceDecode) && (op + WILDCOPYLENGTH > ip))) goto _output_error; /* output stream ran over input stream */
|
|
|
|
/* get literal length */
|
|
token = *ip++;
|
|
if ((length=(token>>ML_BITS)) == RUN_MASK)
|
|
{
|
|
unsigned s;
|
|
do
|
|
{
|
|
s = *ip++;
|
|
length += s;
|
|
}
|
|
while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) && (s==255) );
|
|
if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* overflow detection */
|
|
if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* overflow detection */
|
|
}
|
|
|
|
/* copy literals */
|
|
cpy = op+length;
|
|
if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
|
|
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)))
|
|
{
|
|
if (partialDecoding)
|
|
{
|
|
if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */
|
|
if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */
|
|
}
|
|
else
|
|
{
|
|
if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */
|
|
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */
|
|
}
|
|
memmove(op, ip, length);
|
|
ip += length;
|
|
op += length;
|
|
break; /* Necessarily EOF, due to parsing restrictions */
|
|
}
|
|
LZ4_wildCopy(op, ip, cpy);
|
|
ip += length; op = cpy;
|
|
|
|
/* get offset */
|
|
offset = LZ4_readLE16(ip); ip+=2;
|
|
match = op - offset;
|
|
if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside buffers */
|
|
|
|
/* get matchlength */
|
|
length = token & ML_MASK;
|
|
if (length == ML_MASK)
|
|
{
|
|
unsigned s;
|
|
do
|
|
{
|
|
if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
|
|
s = *ip++;
|
|
length += s;
|
|
} while (s==255);
|
|
if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* overflow detection */
|
|
}
|
|
length += MINMATCH;
|
|
|
|
/* check external dictionary */
|
|
if ((dict==usingExtDict) && (match < lowPrefix))
|
|
{
|
|
if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */
|
|
|
|
if (length <= (size_t)(lowPrefix-match))
|
|
{
|
|
/* match can be copied as a single segment from external dictionary */
|
|
match = dictEnd - (lowPrefix-match);
|
|
memmove(op, match, length); op += length;
|
|
}
|
|
else
|
|
{
|
|
/* match encompass external dictionary and current block */
|
|
size_t copySize = (size_t)(lowPrefix-match);
|
|
memcpy(op, dictEnd - copySize, copySize);
|
|
op += copySize;
|
|
copySize = length - copySize;
|
|
if (copySize > (size_t)(op-lowPrefix)) /* overlap copy */
|
|
{
|
|
BYTE* const endOfMatch = op + copySize;
|
|
const BYTE* copyFrom = lowPrefix;
|
|
while (op < endOfMatch) *op++ = *copyFrom++;
|
|
}
|
|
else
|
|
{
|
|
memcpy(op, lowPrefix, copySize);
|
|
op += copySize;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* copy match within block */
|
|
cpy = op + length;
|
|
if (unlikely(offset<8))
|
|
{
|
|
const int dec64 = dec64table[offset];
|
|
op[0] = match[0];
|
|
op[1] = match[1];
|
|
op[2] = match[2];
|
|
op[3] = match[3];
|
|
match += dec32table[offset];
|
|
memcpy(op+4, match, 4);
|
|
match -= dec64;
|
|
} else { LZ4_copy8(op, match); match+=8; }
|
|
op += 8;
|
|
|
|
if (unlikely(cpy>oend-12))
|
|
{
|
|
BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
|
|
if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
|
|
if (op < oCopyLimit)
|
|
{
|
|
LZ4_wildCopy(op, match, oCopyLimit);
|
|
match += oCopyLimit - op;
|
|
op = oCopyLimit;
|
|
}
|
|
while (op<cpy) *op++ = *match++;
|
|
}
|
|
else
|
|
LZ4_wildCopy(op, match, cpy);
|
|
op=cpy; /* correction */
|
|
}
|
|
|
|
/* end of decoding */
|
|
if (endOnInput)
|
|
return (int) (((char*)op)-dest); /* Nb of output bytes decoded */
|
|
else
|
|
return (int) (((const char*)ip)-source); /* Nb of input bytes read */
|
|
|
|
/* Overflow error detected */
|
|
_output_error:
|
|
return (int) (-(((const char*)ip)-source))-1;
|
|
}
|