coreboot-kgpe-d16/src/lib/lzmadecode.c
Zheng Bao d91f3a4eaf lib/lzma: Build the source for decompression with flag -Ofast
The decompression is critical for speed of boot. So we sacrifice some
generated code size to optimize for speed.
This change speeds up the LZMA decompression between 3% and 6% at a
cost of just over 2k of additional code space.

BUG=b:223985641
TEST=Majolica

The test is done on Majolica and the result is listed below.

Time saved:
We tested the boot time with each flag for 10 times. The duration of
each decompression process is listed as below.
        Load FSP-M    Load ramstage  Load payload
        Ofast Os      Ofast Os       Ofast Os
        ------------------------------------------
        62543 62959   20585 22458    9945  10626
        62548 62967   20587 22461    9951  10637
        62560 62980   20588 22478    9951  10641
        62561 62988   20596 22478    9954  10643
        62569 62993   20596 22479    9954  10643
        62574 63000   20605 22492    9958  10647
        62575 63026   20615 22495    9959  10647
        62576 63038   20743 22614    9960  10647
        62587 63044   20758 22625    9961  10647
        62592 63045   20769 22637    9961  10647
        -----------------------------------------
average 62568 63004   20644 22521    9955  10642
                             (unit: microseconds)

Size sacrificed:
The size of object file with -Os:
  ./build/ramstage/lib/lzmadecode.o:     file format elf32-i386
    4 .text.LzmaDecode 00000d84  00000000  00000000  00000076  2**0
                  CONTENTS, ALLOC, LOAD, READONLY, CODE
The size of object file with -Ofast:
  ./build/ramstage/lib/lzmadecode.o:     file format elf32-i386
    4 .text.LzmaDecode 00001719  00000000  00000000  00000080  2**4
                  CONTENTS, ALLOC, LOAD, READONLY, CODE
(Output by running "objdump -h ./build/ramstage/lib/lzmadecode.o")

We can see that size is increased from 3460 bytes to 5913 bytes, a
change of 2453 bytes or 171%.

Change-Id: Ie003164e2e93ba8ed3ccd207f3af31c6acf1c5e2
Signed-off-by: Zheng Bao <fishbaozi@gmail.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/66392
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Martin Roth <martin.roth@amd.corp-partner.google.com>
2022-10-12 16:55:58 +00:00

441 lines
10 KiB
C

/*
LzmaDecode.c
LZMA Decoder (optimized for Speed version)
LZMA SDK 4.40 Copyright (c) 1999-2006 Igor Pavlov (2006-05-01)
http://www.7-zip.org/
LZMA SDK is licensed under two licenses:
1) GNU Lesser General Public License (GNU LGPL)
2) Common Public License (CPL)
It means that you can select one of these two licenses and
follow rules of that license.
SPECIAL EXCEPTION:
Igor Pavlov, as the author of this Code, expressly permits you to
statically or dynamically link your Code (or bind by name) to the
interfaces of this file without subjecting your linked Code to the
terms of the CPL or GNU LGPL. Any modifications or additions
to this file, however, are subject to the LGPL or CPL terms.
*/
#if CONFIG(DECOMPRESS_OFAST)
#define __lzma_attribute_Ofast__ __attribute__((optimize("Ofast")))
#else
#define __lzma_attribute_Ofast__
#endif
#include "lzmadecode.h"
#include <types.h>
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back
* to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim
* is *reached* (not surpassed!), meaning we can't allow that to happen while
* there are still bytes to decode from the algorithm's point of view. */
#define RC_READ_BYTE \
(look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
: ((((uintptr_t) Buffer & 3) \
|| ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \
: ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \
(look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
{ \
int i; \
\
for (i = 0; i < 5; i++) { \
RC_TEST; \
Code = (Code << 8) | RC_READ_BYTE; \
} \
}
#define RC_TEST { if (Buffer == BufferLim) return LZMA_RESULT_DATA_ERROR; }
#define RC_INIT(buffer, bufferSize) Buffer = buffer; \
BufferLim = buffer + bufferSize; RC_INIT2
#define RC_NORMALIZE \
if (Range < kTopValue) { \
RC_TEST; \
Range <<= 8; \
Code = (Code << 8) | RC_READ_BYTE; \
}
#define IfBit0(p) \
RC_NORMALIZE; \
bound = (Range >> kNumBitModelTotalBits) * *(p); \
if (Code < bound)
#define UpdateBit0(p) \
Range = bound; \
*(p) += (kBitModelTotal - *(p)) >> kNumMoveBits
#define UpdateBit1(p) \
Range -= bound; \
Code -= bound; \
*(p) -= (*(p)) >> kNumMoveBits
#define RC_GET_BIT2(p, mi, A0, A1) \
IfBit0(p) { \
UpdateBit0(p); \
mi <<= 1; \
A0; \
} else { \
UpdateBit1(p); \
mi = (mi + mi) + 1; \
A1; \
}
#define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ;, ;)
#define RangeDecoderBitTreeDecode(probs, numLevels, res) \
{ \
int i = numLevels; \
\
res = 1; \
do { \
CProb *cp = probs + res; \
RC_GET_BIT(cp, res) \
} while (--i != 0); \
res -= (1 << numLevels); \
}
#define kNumPosBitsMax 4
#define kNumPosStatesMax (1 << kNumPosBitsMax)
#define kLenNumLowBits 3
#define kLenNumLowSymbols (1 << kLenNumLowBits)
#define kLenNumMidBits 3
#define kLenNumMidSymbols (1 << kLenNumMidBits)
#define kLenNumHighBits 8
#define kLenNumHighSymbols (1 << kLenNumHighBits)
#define LenChoice 0
#define LenChoice2 (LenChoice + 1)
#define LenLow (LenChoice2 + 1)
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
#define kNumStates 12
#define kNumLitStates 7
#define kStartPosModelIndex 4
#define kEndPosModelIndex 14
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
#define kNumPosSlotBits 6
#define kNumLenToPosStates 4
#define kNumAlignBits 4
#define kAlignTableSize (1 << kNumAlignBits)
#define kMatchMinLen 2
#define IsMatch 0
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
#define IsRepG0 (IsRep + kNumStates)
#define IsRepG1 (IsRepG0 + kNumStates)
#define IsRepG2 (IsRepG1 + kNumStates)
#define IsRep0Long (IsRepG2 + kNumStates)
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
#define LenCoder (Align + kAlignTableSize)
#define RepLenCoder (LenCoder + kNumLenProbs)
#define Literal (RepLenCoder + kNumLenProbs)
#if Literal != LZMA_BASE_SIZE
StopCompilingDueBUG
#endif
int LzmaDecodeProperties(CLzmaProperties *propsRes,
const unsigned char *propsData, int size)
{
unsigned char prop0;
if (size < LZMA_PROPERTIES_SIZE)
return LZMA_RESULT_DATA_ERROR;
prop0 = propsData[0];
if (prop0 >= (9 * 5 * 5))
return LZMA_RESULT_DATA_ERROR;
{
for (propsRes->pb = 0; prop0 >= (9 * 5);
propsRes->pb++, prop0 -= (9 * 5))
;
for (propsRes->lp = 0; prop0 >= 9; propsRes->lp++, prop0 -= 9)
;
propsRes->lc = prop0;
/*
* unsigned char remainder = (unsigned char)(prop0 / 9);
* propsRes->lc = prop0 % 9;
* propsRes->pb = remainder / 5;
* propsRes->lp = remainder % 5;
*/
}
return LZMA_RESULT_OK;
}
#define kLzmaStreamWasFinishedId (-1)
__lzma_attribute_Ofast__
int LzmaDecode(CLzmaDecoderState *vs,
const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed,
unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed)
{
CProb *p = vs->Probs;
SizeT nowPos = 0;
Byte previousByte = 0;
UInt32 posStateMask = (1 << (vs->Properties.pb)) - 1;
UInt32 literalPosMask = (1 << (vs->Properties.lp)) - 1;
int lc = vs->Properties.lc;
int state = 0;
UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
int len = 0;
const Byte *Buffer;
const Byte *BufferLim;
int look_ahead_ptr = 4;
union {
Byte raw[4];
UInt32 dw;
} look_ahead;
UInt32 Range;
UInt32 Code;
*inSizeProcessed = 0;
*outSizeProcessed = 0;
{
UInt32 i;
UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc
+ vs->Properties.lp));
for (i = 0; i < numProbs; i++)
p[i] = kBitModelTotal >> 1;
}
RC_INIT(inStream, inSize);
while (nowPos < outSize) {
CProb *prob;
UInt32 bound;
int posState = (int)((nowPos)&posStateMask);
prob = p + IsMatch + (state << kNumPosBitsMax) + posState;
IfBit0(prob) {
int symbol = 1;
UpdateBit0(prob);
prob = p + Literal + (LZMA_LIT_SIZE *
((((nowPos) & literalPosMask) << lc)
+ (previousByte >> (8 - lc))));
if (state >= kNumLitStates) {
int matchByte;
matchByte = outStream[nowPos - rep0];
do {
int bit;
CProb *probLit;
matchByte <<= 1;
bit = (matchByte & 0x100);
probLit = prob + 0x100 + bit + symbol;
RC_GET_BIT2(probLit, symbol,
if (bit != 0)
break,
if (bit == 0)
break)
} while (symbol < 0x100);
}
while (symbol < 0x100) {
CProb *probLit = prob + symbol;
RC_GET_BIT(probLit, symbol)
}
previousByte = (Byte)symbol;
outStream[nowPos++] = previousByte;
if (state < 4)
state = 0;
else if (state < 10)
state -= 3;
else
state -= 6;
} else {
UpdateBit1(prob);
prob = p + IsRep + state;
IfBit0(prob) {
UpdateBit0(prob);
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
state = state < kNumLitStates ? 0 : 3;
prob = p + LenCoder;
} else {
UpdateBit1(prob);
prob = p + IsRepG0 + state;
IfBit0(prob) {
UpdateBit0(prob);
prob = p + IsRep0Long
+ (state << kNumPosBitsMax)
+ posState;
IfBit0(prob) {
UpdateBit0(prob);
if (nowPos == 0)
return LZMA_RESULT_DATA_ERROR;
state = state < kNumLitStates
? 9 : 11;
previousByte = outStream[nowPos
- rep0];
outStream[nowPos++] =
previousByte;
continue;
} else {
UpdateBit1(prob);
}
} else {
UInt32 distance;
UpdateBit1(prob);
prob = p + IsRepG1 + state;
IfBit0(prob) {
UpdateBit0(prob);
distance = rep1;
} else {
UpdateBit1(prob);
prob = p + IsRepG2 + state;
IfBit0(prob) {
UpdateBit0(prob);
distance = rep2;
} else {
UpdateBit1(prob);
distance = rep3;
rep3 = rep2;
}
rep2 = rep1;
}
rep1 = rep0;
rep0 = distance;
}
state = state < kNumLitStates ? 8 : 11;
prob = p + RepLenCoder;
}
{
int numBits, offset;
CProb *probLen = prob + LenChoice;
IfBit0(probLen) {
UpdateBit0(probLen);
probLen = prob + LenLow
+ (posState << kLenNumLowBits);
offset = 0;
numBits = kLenNumLowBits;
} else {
UpdateBit1(probLen);
probLen = prob + LenChoice2;
IfBit0(probLen) {
UpdateBit0(probLen);
probLen = prob + LenMid
+ (posState <<
kLenNumMidBits);
offset = kLenNumLowSymbols;
numBits = kLenNumMidBits;
} else {
UpdateBit1(probLen);
probLen = prob + LenHigh;
offset = kLenNumLowSymbols
+ kLenNumMidSymbols;
numBits = kLenNumHighBits;
}
}
RangeDecoderBitTreeDecode(probLen, numBits,
len);
len += offset;
}
if (state < 4) {
int posSlot;
state += kNumLitStates;
prob = p + PosSlot +
((len < kNumLenToPosStates ? len :
kNumLenToPosStates - 1) <<
kNumPosSlotBits);
RangeDecoderBitTreeDecode(prob, kNumPosSlotBits,
posSlot);
if (posSlot >= kStartPosModelIndex) {
int numDirectBits = ((posSlot >> 1)
- 1);
rep0 = (2 | ((UInt32)posSlot & 1));
if (posSlot < kEndPosModelIndex) {
rep0 <<= numDirectBits;
prob = p + SpecPos + rep0
- posSlot - 1;
} else {
numDirectBits -= kNumAlignBits;
do {
RC_NORMALIZE
Range >>= 1;
rep0 <<= 1;
if (Code >= Range) {
Code -= Range;
rep0 |= 1;
}
} while (--numDirectBits != 0);
prob = p + Align;
rep0 <<= kNumAlignBits;
numDirectBits = kNumAlignBits;
}
{
int i = 1;
int mi = 1;
do {
CProb *prob3 = prob
+ mi;
RC_GET_BIT2(prob3, mi,
;, rep0 |= i);
i <<= 1;
} while (--numDirectBits != 0);
}
} else
rep0 = posSlot;
if (++rep0 == (UInt32)(0)) {
/* it's for stream version */
len = kLzmaStreamWasFinishedId;
break;
}
}
len += kMatchMinLen;
if (rep0 > nowPos)
return LZMA_RESULT_DATA_ERROR;
do {
previousByte = outStream[nowPos - rep0];
len--;
outStream[nowPos++] = previousByte;
} while (len != 0 && nowPos < outSize);
}
}
RC_NORMALIZE;
/*
* Tell static analysis we know len can have a dead assignment.
*/
(void)len;
*inSizeProcessed = (SizeT)(Buffer - inStream);
*outSizeProcessed = nowPos;
return LZMA_RESULT_OK;
}