coreboot-kgpe-d16/src/lib/lzmadecode.c

442 lines
10 KiB
C
Raw Normal View History

/*
LzmaDecode.c
LZMA Decoder (optimized for Speed version)
LZMA SDK 4.40 Copyright (c) 1999-2006 Igor Pavlov (2006-05-01)
http://www.7-zip.org/
LZMA SDK is licensed under two licenses:
1) GNU Lesser General Public License (GNU LGPL)
2) Common Public License (CPL)
It means that you can select one of these two licenses and
follow rules of that license.
SPECIAL EXCEPTION:
Igor Pavlov, as the author of this Code, expressly permits you to
statically or dynamically link your Code (or bind by name) to the
interfaces of this file without subjecting your linked Code to the
terms of the CPL or GNU LGPL. Any modifications or additions
to this file, however, are subject to the LGPL or CPL terms.
*/
lib/lzma: Build the source for decompression with flag -Ofast The decompression is critical for speed of boot. So we sacrifice some generated code size to optimize for speed. This change speeds up the LZMA decompression between 3% and 6% at a cost of just over 2k of additional code space. BUG=b:223985641 TEST=Majolica The test is done on Majolica and the result is listed below. Time saved: We tested the boot time with each flag for 10 times. The duration of each decompression process is listed as below. Load FSP-M Load ramstage Load payload Ofast Os Ofast Os Ofast Os ------------------------------------------ 62543 62959 20585 22458 9945 10626 62548 62967 20587 22461 9951 10637 62560 62980 20588 22478 9951 10641 62561 62988 20596 22478 9954 10643 62569 62993 20596 22479 9954 10643 62574 63000 20605 22492 9958 10647 62575 63026 20615 22495 9959 10647 62576 63038 20743 22614 9960 10647 62587 63044 20758 22625 9961 10647 62592 63045 20769 22637 9961 10647 ----------------------------------------- average 62568 63004 20644 22521 9955 10642 (unit: microseconds) Size sacrificed: The size of object file with -Os: ./build/ramstage/lib/lzmadecode.o: file format elf32-i386 4 .text.LzmaDecode 00000d84 00000000 00000000 00000076 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE The size of object file with -Ofast: ./build/ramstage/lib/lzmadecode.o: file format elf32-i386 4 .text.LzmaDecode 00001719 00000000 00000000 00000080 2**4 CONTENTS, ALLOC, LOAD, READONLY, CODE (Output by running "objdump -h ./build/ramstage/lib/lzmadecode.o") We can see that size is increased from 3460 bytes to 5913 bytes, a change of 2453 bytes or 171%. Change-Id: Ie003164e2e93ba8ed3ccd207f3af31c6acf1c5e2 Signed-off-by: Zheng Bao <fishbaozi@gmail.com> Reviewed-on: https://review.coreboot.org/c/coreboot/+/66392 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Martin Roth <martin.roth@amd.corp-partner.google.com>
2022-08-03 11:57:47 +02:00
#if CONFIG(DECOMPRESS_OFAST)
#define __lzma_attribute_Ofast__ __attribute__((optimize("Ofast")))
#else
#define __lzma_attribute_Ofast__
#endif
#include "lzmadecode.h"
#include <types.h>
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back
* to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim
* is *reached* (not surpassed!), meaning we can't allow that to happen while
* there are still bytes to decode from the algorithm's point of view. */
#define RC_READ_BYTE \
(look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
: ((((uintptr_t) Buffer & 3) \
|| ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \
: ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \
(look_ahead_ptr = 1), look_ahead.raw[0])))
src/lib: Fix spacing Fix the following errors and warnings detected by checkpatch.pl: ERROR: spaces required around that '?' (ctx:WxV) ERROR: spaces required around that '=' (ctx:VxV) ERROR: spaces required around that '<' (ctx:VxV) ERROR: spaces required around that '+=' (ctx:VxV) ERROR: space required after that ',' (ctx:VxV) ERROR: space required before the open brace '{' ERROR: space required after that close brace '}' ERROR: need consistent spacing around '+' (ctx:WxV) ERROR: need consistent spacing around '*' (ctx:WxV) ERROR: need consistent spacing around '&' (ctx:VxW) ERROR: spaces required around that '?' (ctx:VxW) ERROR: spaces required around that ':' (ctx:VxW) ERROR: trailing whitespace ERROR: space prohibited before that '++' (ctx:WxO) ERROR: space prohibited before that ',' (ctx:WxW) ERROR: space prohibited after that '!' (ctx:BxW) ERROR: spaces prohibited around that '->' (ctx:VxW) ERROR: space prohibited after that '-' (ctx:WxW) WARNING: space prohibited before semicolon WARNING: unnecessary whitespace before a quoted newline WARNING: missing space after return type Note that lib/libgcov.c and lib/lzmadecode.c are providing false positives for ERROR: need consistent spacing around '*' (ctx:WxV) An example is: void __gcov_merge_add(gcov_type *counters __attribute__ ((unused)), unsigned int n_counters __attribute__ ((unused))) {} TEST=Build and run on Galileo Gen2 Change-Id: I0016327a5754018eaeb25bedf42338291632c7c1 Signed-off-by: Lee Leahy <Leroy.P.Leahy@intel.com> Reviewed-on: https://review.coreboot.org/18733 Tested-by: build bot (Jenkins) Reviewed-by: Martin Roth <martinroth@google.com>
2017-03-10 02:35:28 +01:00
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
{ \
int i; \
\
for (i = 0; i < 5; i++) { \
RC_TEST; \
Code = (Code << 8) | RC_READ_BYTE; \
} \
}
#define RC_TEST { if (Buffer == BufferLim) return LZMA_RESULT_DATA_ERROR; }
#define RC_INIT(buffer, bufferSize) Buffer = buffer; \
BufferLim = buffer + bufferSize; RC_INIT2
#define RC_NORMALIZE \
if (Range < kTopValue) { \
RC_TEST; \
Range <<= 8; \
Code = (Code << 8) | RC_READ_BYTE; \
}
#define IfBit0(p) \
RC_NORMALIZE; \
bound = (Range >> kNumBitModelTotalBits) * *(p); \
if (Code < bound)
#define UpdateBit0(p) \
Range = bound; \
*(p) += (kBitModelTotal - *(p)) >> kNumMoveBits
#define UpdateBit1(p) \
Range -= bound; \
Code -= bound; \
*(p) -= (*(p)) >> kNumMoveBits
#define RC_GET_BIT2(p, mi, A0, A1) \
IfBit0(p) { \
UpdateBit0(p); \
mi <<= 1; \
A0; \
} else { \
UpdateBit1(p); \
mi = (mi + mi) + 1; \
A1; \
}
src/lib: Fix spacing Fix the following errors and warnings detected by checkpatch.pl: ERROR: spaces required around that '?' (ctx:WxV) ERROR: spaces required around that '=' (ctx:VxV) ERROR: spaces required around that '<' (ctx:VxV) ERROR: spaces required around that '+=' (ctx:VxV) ERROR: space required after that ',' (ctx:VxV) ERROR: space required before the open brace '{' ERROR: space required after that close brace '}' ERROR: need consistent spacing around '+' (ctx:WxV) ERROR: need consistent spacing around '*' (ctx:WxV) ERROR: need consistent spacing around '&' (ctx:VxW) ERROR: spaces required around that '?' (ctx:VxW) ERROR: spaces required around that ':' (ctx:VxW) ERROR: trailing whitespace ERROR: space prohibited before that '++' (ctx:WxO) ERROR: space prohibited before that ',' (ctx:WxW) ERROR: space prohibited after that '!' (ctx:BxW) ERROR: spaces prohibited around that '->' (ctx:VxW) ERROR: space prohibited after that '-' (ctx:WxW) WARNING: space prohibited before semicolon WARNING: unnecessary whitespace before a quoted newline WARNING: missing space after return type Note that lib/libgcov.c and lib/lzmadecode.c are providing false positives for ERROR: need consistent spacing around '*' (ctx:WxV) An example is: void __gcov_merge_add(gcov_type *counters __attribute__ ((unused)), unsigned int n_counters __attribute__ ((unused))) {} TEST=Build and run on Galileo Gen2 Change-Id: I0016327a5754018eaeb25bedf42338291632c7c1 Signed-off-by: Lee Leahy <Leroy.P.Leahy@intel.com> Reviewed-on: https://review.coreboot.org/18733 Tested-by: build bot (Jenkins) Reviewed-by: Martin Roth <martinroth@google.com>
2017-03-10 02:35:28 +01:00
#define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ;, ;)
#define RangeDecoderBitTreeDecode(probs, numLevels, res) \
{ \
int i = numLevels; \
\
res = 1; \
do { \
CProb *cp = probs + res; \
RC_GET_BIT(cp, res) \
} while (--i != 0); \
res -= (1 << numLevels); \
}
#define kNumPosBitsMax 4
#define kNumPosStatesMax (1 << kNumPosBitsMax)
#define kLenNumLowBits 3
#define kLenNumLowSymbols (1 << kLenNumLowBits)
#define kLenNumMidBits 3
#define kLenNumMidSymbols (1 << kLenNumMidBits)
#define kLenNumHighBits 8
#define kLenNumHighSymbols (1 << kLenNumHighBits)
#define LenChoice 0
#define LenChoice2 (LenChoice + 1)
#define LenLow (LenChoice2 + 1)
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
#define kNumStates 12
#define kNumLitStates 7
#define kStartPosModelIndex 4
#define kEndPosModelIndex 14
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
#define kNumPosSlotBits 6
#define kNumLenToPosStates 4
#define kNumAlignBits 4
#define kAlignTableSize (1 << kNumAlignBits)
#define kMatchMinLen 2
#define IsMatch 0
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
#define IsRepG0 (IsRep + kNumStates)
#define IsRepG1 (IsRepG0 + kNumStates)
#define IsRepG2 (IsRepG1 + kNumStates)
#define IsRep0Long (IsRepG2 + kNumStates)
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
#define LenCoder (Align + kAlignTableSize)
#define RepLenCoder (LenCoder + kNumLenProbs)
#define Literal (RepLenCoder + kNumLenProbs)
#if Literal != LZMA_BASE_SIZE
StopCompilingDueBUG
#endif
int LzmaDecodeProperties(CLzmaProperties *propsRes,
const unsigned char *propsData, int size)
{
unsigned char prop0;
if (size < LZMA_PROPERTIES_SIZE)
return LZMA_RESULT_DATA_ERROR;
prop0 = propsData[0];
if (prop0 >= (9 * 5 * 5))
return LZMA_RESULT_DATA_ERROR;
{
for (propsRes->pb = 0; prop0 >= (9 * 5);
propsRes->pb++, prop0 -= (9 * 5))
;
for (propsRes->lp = 0; prop0 >= 9; propsRes->lp++, prop0 -= 9)
;
propsRes->lc = prop0;
/*
* unsigned char remainder = (unsigned char)(prop0 / 9);
* propsRes->lc = prop0 % 9;
* propsRes->pb = remainder / 5;
* propsRes->lp = remainder % 5;
*/
}
return LZMA_RESULT_OK;
}
#define kLzmaStreamWasFinishedId (-1)
lib/lzma: Build the source for decompression with flag -Ofast The decompression is critical for speed of boot. So we sacrifice some generated code size to optimize for speed. This change speeds up the LZMA decompression between 3% and 6% at a cost of just over 2k of additional code space. BUG=b:223985641 TEST=Majolica The test is done on Majolica and the result is listed below. Time saved: We tested the boot time with each flag for 10 times. The duration of each decompression process is listed as below. Load FSP-M Load ramstage Load payload Ofast Os Ofast Os Ofast Os ------------------------------------------ 62543 62959 20585 22458 9945 10626 62548 62967 20587 22461 9951 10637 62560 62980 20588 22478 9951 10641 62561 62988 20596 22478 9954 10643 62569 62993 20596 22479 9954 10643 62574 63000 20605 22492 9958 10647 62575 63026 20615 22495 9959 10647 62576 63038 20743 22614 9960 10647 62587 63044 20758 22625 9961 10647 62592 63045 20769 22637 9961 10647 ----------------------------------------- average 62568 63004 20644 22521 9955 10642 (unit: microseconds) Size sacrificed: The size of object file with -Os: ./build/ramstage/lib/lzmadecode.o: file format elf32-i386 4 .text.LzmaDecode 00000d84 00000000 00000000 00000076 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE The size of object file with -Ofast: ./build/ramstage/lib/lzmadecode.o: file format elf32-i386 4 .text.LzmaDecode 00001719 00000000 00000000 00000080 2**4 CONTENTS, ALLOC, LOAD, READONLY, CODE (Output by running "objdump -h ./build/ramstage/lib/lzmadecode.o") We can see that size is increased from 3460 bytes to 5913 bytes, a change of 2453 bytes or 171%. Change-Id: Ie003164e2e93ba8ed3ccd207f3af31c6acf1c5e2 Signed-off-by: Zheng Bao <fishbaozi@gmail.com> Reviewed-on: https://review.coreboot.org/c/coreboot/+/66392 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Martin Roth <martin.roth@amd.corp-partner.google.com>
2022-08-03 11:57:47 +02:00
__lzma_attribute_Ofast__
int LzmaDecode(CLzmaDecoderState *vs,
const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed,
unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed)
{
CProb *p = vs->Probs;
SizeT nowPos = 0;
Byte previousByte = 0;
UInt32 posStateMask = (1 << (vs->Properties.pb)) - 1;
UInt32 literalPosMask = (1 << (vs->Properties.lp)) - 1;
int lc = vs->Properties.lc;
int state = 0;
UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
int len = 0;
const Byte *Buffer;
const Byte *BufferLim;
int look_ahead_ptr = 4;
union {
Byte raw[4];
UInt32 dw;
} look_ahead;
UInt32 Range;
UInt32 Code;
*inSizeProcessed = 0;
*outSizeProcessed = 0;
{
UInt32 i;
UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc
+ vs->Properties.lp));
for (i = 0; i < numProbs; i++)
p[i] = kBitModelTotal >> 1;
}
RC_INIT(inStream, inSize);
while (nowPos < outSize) {
CProb *prob;
UInt32 bound;
src/lib: Fix spacing Fix the following errors and warnings detected by checkpatch.pl: ERROR: spaces required around that '?' (ctx:WxV) ERROR: spaces required around that '=' (ctx:VxV) ERROR: spaces required around that '<' (ctx:VxV) ERROR: spaces required around that '+=' (ctx:VxV) ERROR: space required after that ',' (ctx:VxV) ERROR: space required before the open brace '{' ERROR: space required after that close brace '}' ERROR: need consistent spacing around '+' (ctx:WxV) ERROR: need consistent spacing around '*' (ctx:WxV) ERROR: need consistent spacing around '&' (ctx:VxW) ERROR: spaces required around that '?' (ctx:VxW) ERROR: spaces required around that ':' (ctx:VxW) ERROR: trailing whitespace ERROR: space prohibited before that '++' (ctx:WxO) ERROR: space prohibited before that ',' (ctx:WxW) ERROR: space prohibited after that '!' (ctx:BxW) ERROR: spaces prohibited around that '->' (ctx:VxW) ERROR: space prohibited after that '-' (ctx:WxW) WARNING: space prohibited before semicolon WARNING: unnecessary whitespace before a quoted newline WARNING: missing space after return type Note that lib/libgcov.c and lib/lzmadecode.c are providing false positives for ERROR: need consistent spacing around '*' (ctx:WxV) An example is: void __gcov_merge_add(gcov_type *counters __attribute__ ((unused)), unsigned int n_counters __attribute__ ((unused))) {} TEST=Build and run on Galileo Gen2 Change-Id: I0016327a5754018eaeb25bedf42338291632c7c1 Signed-off-by: Lee Leahy <Leroy.P.Leahy@intel.com> Reviewed-on: https://review.coreboot.org/18733 Tested-by: build bot (Jenkins) Reviewed-by: Martin Roth <martinroth@google.com>
2017-03-10 02:35:28 +01:00
int posState = (int)((nowPos)&posStateMask);
prob = p + IsMatch + (state << kNumPosBitsMax) + posState;
IfBit0(prob) {
int symbol = 1;
UpdateBit0(prob);
prob = p + Literal + (LZMA_LIT_SIZE *
((((nowPos) & literalPosMask) << lc)
+ (previousByte >> (8 - lc))));
if (state >= kNumLitStates) {
int matchByte;
matchByte = outStream[nowPos - rep0];
do {
int bit;
CProb *probLit;
matchByte <<= 1;
bit = (matchByte & 0x100);
probLit = prob + 0x100 + bit + symbol;
RC_GET_BIT2(probLit, symbol,
if (bit != 0)
break,
if (bit == 0)
break)
} while (symbol < 0x100);
}
while (symbol < 0x100) {
CProb *probLit = prob + symbol;
RC_GET_BIT(probLit, symbol)
}
previousByte = (Byte)symbol;
outStream[nowPos++] = previousByte;
if (state < 4)
state = 0;
else if (state < 10)
state -= 3;
else
state -= 6;
} else {
UpdateBit1(prob);
prob = p + IsRep + state;
IfBit0(prob) {
UpdateBit0(prob);
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
state = state < kNumLitStates ? 0 : 3;
prob = p + LenCoder;
} else {
UpdateBit1(prob);
prob = p + IsRepG0 + state;
IfBit0(prob) {
UpdateBit0(prob);
prob = p + IsRep0Long
+ (state << kNumPosBitsMax)
+ posState;
IfBit0(prob) {
UpdateBit0(prob);
if (nowPos == 0)
return LZMA_RESULT_DATA_ERROR;
state = state < kNumLitStates
? 9 : 11;
previousByte = outStream[nowPos
- rep0];
outStream[nowPos++] =
previousByte;
continue;
} else {
UpdateBit1(prob);
}
} else {
UInt32 distance;
UpdateBit1(prob);
prob = p + IsRepG1 + state;
IfBit0(prob) {
UpdateBit0(prob);
distance = rep1;
} else {
UpdateBit1(prob);
prob = p + IsRepG2 + state;
IfBit0(prob) {
UpdateBit0(prob);
distance = rep2;
} else {
UpdateBit1(prob);
distance = rep3;
rep3 = rep2;
}
rep2 = rep1;
}
rep1 = rep0;
rep0 = distance;
}
state = state < kNumLitStates ? 8 : 11;
prob = p + RepLenCoder;
}
{
int numBits, offset;
CProb *probLen = prob + LenChoice;
IfBit0(probLen) {
UpdateBit0(probLen);
probLen = prob + LenLow
+ (posState << kLenNumLowBits);
offset = 0;
numBits = kLenNumLowBits;
} else {
UpdateBit1(probLen);
probLen = prob + LenChoice2;
IfBit0(probLen) {
UpdateBit0(probLen);
probLen = prob + LenMid
+ (posState <<
kLenNumMidBits);
offset = kLenNumLowSymbols;
numBits = kLenNumMidBits;
} else {
UpdateBit1(probLen);
probLen = prob + LenHigh;
offset = kLenNumLowSymbols
+ kLenNumMidSymbols;
numBits = kLenNumHighBits;
}
}
RangeDecoderBitTreeDecode(probLen, numBits,
len);
len += offset;
}
if (state < 4) {
int posSlot;
state += kNumLitStates;
prob = p + PosSlot +
((len < kNumLenToPosStates ? len :
kNumLenToPosStates - 1) <<
kNumPosSlotBits);
RangeDecoderBitTreeDecode(prob, kNumPosSlotBits,
posSlot);
if (posSlot >= kStartPosModelIndex) {
int numDirectBits = ((posSlot >> 1)
- 1);
rep0 = (2 | ((UInt32)posSlot & 1));
if (posSlot < kEndPosModelIndex) {
rep0 <<= numDirectBits;
prob = p + SpecPos + rep0
- posSlot - 1;
} else {
numDirectBits -= kNumAlignBits;
do {
RC_NORMALIZE
Range >>= 1;
rep0 <<= 1;
if (Code >= Range) {
Code -= Range;
rep0 |= 1;
}
} while (--numDirectBits != 0);
prob = p + Align;
rep0 <<= kNumAlignBits;
numDirectBits = kNumAlignBits;
}
{
int i = 1;
int mi = 1;
do {
CProb *prob3 = prob
+ mi;
RC_GET_BIT2(prob3, mi,
;, rep0 |= i);
i <<= 1;
} while (--numDirectBits != 0);
}
} else
rep0 = posSlot;
if (++rep0 == (UInt32)(0)) {
/* it's for stream version */
len = kLzmaStreamWasFinishedId;
break;
}
}
len += kMatchMinLen;
if (rep0 > nowPos)
return LZMA_RESULT_DATA_ERROR;
do {
previousByte = outStream[nowPos - rep0];
len--;
outStream[nowPos++] = previousByte;
} while (len != 0 && nowPos < outSize);
}
}
RC_NORMALIZE;
/*
* Tell static analysis we know len can have a dead assignment.
*/
(void)len;
*inSizeProcessed = (SizeT)(Buffer - inStream);
*outSizeProcessed = nowPos;
return LZMA_RESULT_OK;
}