d91f3a4eaf
The decompression is critical for speed of boot. So we sacrifice some generated code size to optimize for speed. This change speeds up the LZMA decompression between 3% and 6% at a cost of just over 2k of additional code space. BUG=b:223985641 TEST=Majolica The test is done on Majolica and the result is listed below. Time saved: We tested the boot time with each flag for 10 times. The duration of each decompression process is listed as below. Load FSP-M Load ramstage Load payload Ofast Os Ofast Os Ofast Os ------------------------------------------ 62543 62959 20585 22458 9945 10626 62548 62967 20587 22461 9951 10637 62560 62980 20588 22478 9951 10641 62561 62988 20596 22478 9954 10643 62569 62993 20596 22479 9954 10643 62574 63000 20605 22492 9958 10647 62575 63026 20615 22495 9959 10647 62576 63038 20743 22614 9960 10647 62587 63044 20758 22625 9961 10647 62592 63045 20769 22637 9961 10647 ----------------------------------------- average 62568 63004 20644 22521 9955 10642 (unit: microseconds) Size sacrificed: The size of object file with -Os: ./build/ramstage/lib/lzmadecode.o: file format elf32-i386 4 .text.LzmaDecode 00000d84 00000000 00000000 00000076 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE The size of object file with -Ofast: ./build/ramstage/lib/lzmadecode.o: file format elf32-i386 4 .text.LzmaDecode 00001719 00000000 00000000 00000080 2**4 CONTENTS, ALLOC, LOAD, READONLY, CODE (Output by running "objdump -h ./build/ramstage/lib/lzmadecode.o") We can see that size is increased from 3460 bytes to 5913 bytes, a change of 2453 bytes or 171%. Change-Id: Ie003164e2e93ba8ed3ccd207f3af31c6acf1c5e2 Signed-off-by: Zheng Bao <fishbaozi@gmail.com> Reviewed-on: https://review.coreboot.org/c/coreboot/+/66392 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Martin Roth <martin.roth@amd.corp-partner.google.com>
441 lines
10 KiB
C
441 lines
10 KiB
C
/*
|
|
LzmaDecode.c
|
|
LZMA Decoder (optimized for Speed version)
|
|
|
|
LZMA SDK 4.40 Copyright (c) 1999-2006 Igor Pavlov (2006-05-01)
|
|
http://www.7-zip.org/
|
|
|
|
LZMA SDK is licensed under two licenses:
|
|
1) GNU Lesser General Public License (GNU LGPL)
|
|
2) Common Public License (CPL)
|
|
It means that you can select one of these two licenses and
|
|
follow rules of that license.
|
|
|
|
SPECIAL EXCEPTION:
|
|
Igor Pavlov, as the author of this Code, expressly permits you to
|
|
statically or dynamically link your Code (or bind by name) to the
|
|
interfaces of this file without subjecting your linked Code to the
|
|
terms of the CPL or GNU LGPL. Any modifications or additions
|
|
to this file, however, are subject to the LGPL or CPL terms.
|
|
*/
|
|
|
|
#if CONFIG(DECOMPRESS_OFAST)
|
|
#define __lzma_attribute_Ofast__ __attribute__((optimize("Ofast")))
|
|
#else
|
|
#define __lzma_attribute_Ofast__
|
|
#endif
|
|
|
|
#include "lzmadecode.h"
|
|
#include <types.h>
|
|
|
|
#define kNumTopBits 24
|
|
#define kTopValue ((UInt32)1 << kNumTopBits)
|
|
|
|
#define kNumBitModelTotalBits 11
|
|
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
|
#define kNumMoveBits 5
|
|
|
|
/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back
|
|
* to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim
|
|
* is *reached* (not surpassed!), meaning we can't allow that to happen while
|
|
* there are still bytes to decode from the algorithm's point of view. */
|
|
#define RC_READ_BYTE \
|
|
(look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
|
|
: ((((uintptr_t) Buffer & 3) \
|
|
|| ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \
|
|
: ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \
|
|
(look_ahead_ptr = 1), look_ahead.raw[0])))
|
|
|
|
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
|
|
{ \
|
|
int i; \
|
|
\
|
|
for (i = 0; i < 5; i++) { \
|
|
RC_TEST; \
|
|
Code = (Code << 8) | RC_READ_BYTE; \
|
|
} \
|
|
}
|
|
|
|
|
|
#define RC_TEST { if (Buffer == BufferLim) return LZMA_RESULT_DATA_ERROR; }
|
|
|
|
#define RC_INIT(buffer, bufferSize) Buffer = buffer; \
|
|
BufferLim = buffer + bufferSize; RC_INIT2
|
|
|
|
|
|
#define RC_NORMALIZE \
|
|
if (Range < kTopValue) { \
|
|
RC_TEST; \
|
|
Range <<= 8; \
|
|
Code = (Code << 8) | RC_READ_BYTE; \
|
|
}
|
|
|
|
#define IfBit0(p) \
|
|
RC_NORMALIZE; \
|
|
bound = (Range >> kNumBitModelTotalBits) * *(p); \
|
|
if (Code < bound)
|
|
|
|
#define UpdateBit0(p) \
|
|
Range = bound; \
|
|
*(p) += (kBitModelTotal - *(p)) >> kNumMoveBits
|
|
|
|
#define UpdateBit1(p) \
|
|
Range -= bound; \
|
|
Code -= bound; \
|
|
*(p) -= (*(p)) >> kNumMoveBits
|
|
|
|
#define RC_GET_BIT2(p, mi, A0, A1) \
|
|
IfBit0(p) { \
|
|
UpdateBit0(p); \
|
|
mi <<= 1; \
|
|
A0; \
|
|
} else { \
|
|
UpdateBit1(p); \
|
|
mi = (mi + mi) + 1; \
|
|
A1; \
|
|
}
|
|
|
|
#define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ;, ;)
|
|
|
|
#define RangeDecoderBitTreeDecode(probs, numLevels, res) \
|
|
{ \
|
|
int i = numLevels; \
|
|
\
|
|
res = 1; \
|
|
do { \
|
|
CProb *cp = probs + res; \
|
|
RC_GET_BIT(cp, res) \
|
|
} while (--i != 0); \
|
|
res -= (1 << numLevels); \
|
|
}
|
|
|
|
|
|
#define kNumPosBitsMax 4
|
|
#define kNumPosStatesMax (1 << kNumPosBitsMax)
|
|
|
|
#define kLenNumLowBits 3
|
|
#define kLenNumLowSymbols (1 << kLenNumLowBits)
|
|
#define kLenNumMidBits 3
|
|
#define kLenNumMidSymbols (1 << kLenNumMidBits)
|
|
#define kLenNumHighBits 8
|
|
#define kLenNumHighSymbols (1 << kLenNumHighBits)
|
|
|
|
#define LenChoice 0
|
|
#define LenChoice2 (LenChoice + 1)
|
|
#define LenLow (LenChoice2 + 1)
|
|
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
|
|
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
|
|
#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
|
|
|
|
|
|
#define kNumStates 12
|
|
#define kNumLitStates 7
|
|
|
|
#define kStartPosModelIndex 4
|
|
#define kEndPosModelIndex 14
|
|
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
|
|
|
|
#define kNumPosSlotBits 6
|
|
#define kNumLenToPosStates 4
|
|
|
|
#define kNumAlignBits 4
|
|
#define kAlignTableSize (1 << kNumAlignBits)
|
|
|
|
#define kMatchMinLen 2
|
|
|
|
#define IsMatch 0
|
|
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
|
|
#define IsRepG0 (IsRep + kNumStates)
|
|
#define IsRepG1 (IsRepG0 + kNumStates)
|
|
#define IsRepG2 (IsRepG1 + kNumStates)
|
|
#define IsRep0Long (IsRepG2 + kNumStates)
|
|
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
|
|
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
|
|
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
|
|
#define LenCoder (Align + kAlignTableSize)
|
|
#define RepLenCoder (LenCoder + kNumLenProbs)
|
|
#define Literal (RepLenCoder + kNumLenProbs)
|
|
|
|
#if Literal != LZMA_BASE_SIZE
|
|
StopCompilingDueBUG
|
|
#endif
|
|
|
|
int LzmaDecodeProperties(CLzmaProperties *propsRes,
|
|
const unsigned char *propsData, int size)
|
|
{
|
|
unsigned char prop0;
|
|
if (size < LZMA_PROPERTIES_SIZE)
|
|
return LZMA_RESULT_DATA_ERROR;
|
|
prop0 = propsData[0];
|
|
if (prop0 >= (9 * 5 * 5))
|
|
return LZMA_RESULT_DATA_ERROR;
|
|
{
|
|
for (propsRes->pb = 0; prop0 >= (9 * 5);
|
|
propsRes->pb++, prop0 -= (9 * 5))
|
|
;
|
|
for (propsRes->lp = 0; prop0 >= 9; propsRes->lp++, prop0 -= 9)
|
|
;
|
|
propsRes->lc = prop0;
|
|
/*
|
|
* unsigned char remainder = (unsigned char)(prop0 / 9);
|
|
* propsRes->lc = prop0 % 9;
|
|
* propsRes->pb = remainder / 5;
|
|
* propsRes->lp = remainder % 5;
|
|
*/
|
|
}
|
|
|
|
return LZMA_RESULT_OK;
|
|
}
|
|
|
|
#define kLzmaStreamWasFinishedId (-1)
|
|
|
|
__lzma_attribute_Ofast__
|
|
int LzmaDecode(CLzmaDecoderState *vs,
|
|
const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed,
|
|
unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed)
|
|
{
|
|
CProb *p = vs->Probs;
|
|
SizeT nowPos = 0;
|
|
Byte previousByte = 0;
|
|
UInt32 posStateMask = (1 << (vs->Properties.pb)) - 1;
|
|
UInt32 literalPosMask = (1 << (vs->Properties.lp)) - 1;
|
|
int lc = vs->Properties.lc;
|
|
|
|
|
|
int state = 0;
|
|
UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
|
|
int len = 0;
|
|
const Byte *Buffer;
|
|
const Byte *BufferLim;
|
|
int look_ahead_ptr = 4;
|
|
union {
|
|
Byte raw[4];
|
|
UInt32 dw;
|
|
} look_ahead;
|
|
UInt32 Range;
|
|
UInt32 Code;
|
|
|
|
*inSizeProcessed = 0;
|
|
*outSizeProcessed = 0;
|
|
|
|
{
|
|
UInt32 i;
|
|
UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc
|
|
+ vs->Properties.lp));
|
|
for (i = 0; i < numProbs; i++)
|
|
p[i] = kBitModelTotal >> 1;
|
|
}
|
|
|
|
RC_INIT(inStream, inSize);
|
|
|
|
|
|
while (nowPos < outSize) {
|
|
CProb *prob;
|
|
UInt32 bound;
|
|
int posState = (int)((nowPos)&posStateMask);
|
|
|
|
prob = p + IsMatch + (state << kNumPosBitsMax) + posState;
|
|
IfBit0(prob) {
|
|
int symbol = 1;
|
|
UpdateBit0(prob);
|
|
prob = p + Literal + (LZMA_LIT_SIZE *
|
|
((((nowPos) & literalPosMask) << lc)
|
|
+ (previousByte >> (8 - lc))));
|
|
|
|
if (state >= kNumLitStates) {
|
|
int matchByte;
|
|
matchByte = outStream[nowPos - rep0];
|
|
do {
|
|
int bit;
|
|
CProb *probLit;
|
|
matchByte <<= 1;
|
|
bit = (matchByte & 0x100);
|
|
probLit = prob + 0x100 + bit + symbol;
|
|
RC_GET_BIT2(probLit, symbol,
|
|
if (bit != 0)
|
|
break,
|
|
if (bit == 0)
|
|
break)
|
|
} while (symbol < 0x100);
|
|
}
|
|
while (symbol < 0x100) {
|
|
CProb *probLit = prob + symbol;
|
|
RC_GET_BIT(probLit, symbol)
|
|
}
|
|
previousByte = (Byte)symbol;
|
|
|
|
outStream[nowPos++] = previousByte;
|
|
if (state < 4)
|
|
state = 0;
|
|
else if (state < 10)
|
|
state -= 3;
|
|
else
|
|
state -= 6;
|
|
} else {
|
|
UpdateBit1(prob);
|
|
prob = p + IsRep + state;
|
|
IfBit0(prob) {
|
|
UpdateBit0(prob);
|
|
rep3 = rep2;
|
|
rep2 = rep1;
|
|
rep1 = rep0;
|
|
state = state < kNumLitStates ? 0 : 3;
|
|
prob = p + LenCoder;
|
|
} else {
|
|
UpdateBit1(prob);
|
|
prob = p + IsRepG0 + state;
|
|
IfBit0(prob) {
|
|
UpdateBit0(prob);
|
|
prob = p + IsRep0Long
|
|
+ (state << kNumPosBitsMax)
|
|
+ posState;
|
|
IfBit0(prob) {
|
|
UpdateBit0(prob);
|
|
|
|
if (nowPos == 0)
|
|
return LZMA_RESULT_DATA_ERROR;
|
|
|
|
state = state < kNumLitStates
|
|
? 9 : 11;
|
|
previousByte = outStream[nowPos
|
|
- rep0];
|
|
outStream[nowPos++] =
|
|
previousByte;
|
|
|
|
continue;
|
|
} else {
|
|
UpdateBit1(prob);
|
|
}
|
|
} else {
|
|
UInt32 distance;
|
|
UpdateBit1(prob);
|
|
prob = p + IsRepG1 + state;
|
|
IfBit0(prob) {
|
|
UpdateBit0(prob);
|
|
distance = rep1;
|
|
} else {
|
|
UpdateBit1(prob);
|
|
prob = p + IsRepG2 + state;
|
|
IfBit0(prob) {
|
|
UpdateBit0(prob);
|
|
distance = rep2;
|
|
} else {
|
|
UpdateBit1(prob);
|
|
distance = rep3;
|
|
rep3 = rep2;
|
|
}
|
|
rep2 = rep1;
|
|
}
|
|
rep1 = rep0;
|
|
rep0 = distance;
|
|
}
|
|
state = state < kNumLitStates ? 8 : 11;
|
|
prob = p + RepLenCoder;
|
|
}
|
|
{
|
|
int numBits, offset;
|
|
CProb *probLen = prob + LenChoice;
|
|
IfBit0(probLen) {
|
|
UpdateBit0(probLen);
|
|
probLen = prob + LenLow
|
|
+ (posState << kLenNumLowBits);
|
|
offset = 0;
|
|
numBits = kLenNumLowBits;
|
|
} else {
|
|
UpdateBit1(probLen);
|
|
probLen = prob + LenChoice2;
|
|
IfBit0(probLen) {
|
|
UpdateBit0(probLen);
|
|
probLen = prob + LenMid
|
|
+ (posState <<
|
|
kLenNumMidBits);
|
|
offset = kLenNumLowSymbols;
|
|
numBits = kLenNumMidBits;
|
|
} else {
|
|
UpdateBit1(probLen);
|
|
probLen = prob + LenHigh;
|
|
offset = kLenNumLowSymbols
|
|
+ kLenNumMidSymbols;
|
|
numBits = kLenNumHighBits;
|
|
}
|
|
}
|
|
RangeDecoderBitTreeDecode(probLen, numBits,
|
|
len);
|
|
len += offset;
|
|
}
|
|
|
|
if (state < 4) {
|
|
int posSlot;
|
|
state += kNumLitStates;
|
|
prob = p + PosSlot +
|
|
((len < kNumLenToPosStates ? len :
|
|
kNumLenToPosStates - 1) <<
|
|
kNumPosSlotBits);
|
|
RangeDecoderBitTreeDecode(prob, kNumPosSlotBits,
|
|
posSlot);
|
|
if (posSlot >= kStartPosModelIndex) {
|
|
int numDirectBits = ((posSlot >> 1)
|
|
- 1);
|
|
rep0 = (2 | ((UInt32)posSlot & 1));
|
|
if (posSlot < kEndPosModelIndex) {
|
|
rep0 <<= numDirectBits;
|
|
prob = p + SpecPos + rep0
|
|
- posSlot - 1;
|
|
} else {
|
|
numDirectBits -= kNumAlignBits;
|
|
do {
|
|
RC_NORMALIZE
|
|
Range >>= 1;
|
|
rep0 <<= 1;
|
|
if (Code >= Range) {
|
|
Code -= Range;
|
|
rep0 |= 1;
|
|
}
|
|
} while (--numDirectBits != 0);
|
|
prob = p + Align;
|
|
rep0 <<= kNumAlignBits;
|
|
numDirectBits = kNumAlignBits;
|
|
}
|
|
{
|
|
int i = 1;
|
|
int mi = 1;
|
|
do {
|
|
CProb *prob3 = prob
|
|
+ mi;
|
|
RC_GET_BIT2(prob3, mi,
|
|
;, rep0 |= i);
|
|
i <<= 1;
|
|
} while (--numDirectBits != 0);
|
|
}
|
|
} else
|
|
rep0 = posSlot;
|
|
if (++rep0 == (UInt32)(0)) {
|
|
/* it's for stream version */
|
|
len = kLzmaStreamWasFinishedId;
|
|
break;
|
|
}
|
|
}
|
|
|
|
len += kMatchMinLen;
|
|
if (rep0 > nowPos)
|
|
return LZMA_RESULT_DATA_ERROR;
|
|
|
|
|
|
do {
|
|
previousByte = outStream[nowPos - rep0];
|
|
len--;
|
|
outStream[nowPos++] = previousByte;
|
|
} while (len != 0 && nowPos < outSize);
|
|
}
|
|
}
|
|
RC_NORMALIZE;
|
|
/*
|
|
* Tell static analysis we know len can have a dead assignment.
|
|
*/
|
|
(void)len;
|
|
|
|
|
|
*inSizeProcessed = (SizeT)(Buffer - inStream);
|
|
*outSizeProcessed = nowPos;
|
|
return LZMA_RESULT_OK;
|
|
}
|