load_payload: Use 32-bit accesses to speed up decompression.

Flash prefers 32-bit sequential access. On some platforms ROM is
not cached due to i.a. MTRR shortage. Moreover ROM caching is not
currently enabled by default. With this patch payload decompression
is sped up by theoretical factor of 4.

Test on X201, with caching disabled:

Before:
  90:load payload                  4,470,841 (24,505)
  99:selfboot jump                 6,073,812 (1,602,971)

After:
  90:load payload                  4,530,979 (17,728)
  99:selfboot jump                 5,103,408 (572,429)

Change-Id: Id17e61316dbbf73f4a837bf173f88bf26c01c62b
Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com>
Reviewed-on: http://review.coreboot.org/5144
Reviewed-by: Aaron Durbin <adurbin@google.com>
Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net>
Tested-by: build bot (Jenkins)
This commit is contained in:
Vladimir Serbinenko 2014-02-05 17:00:40 +01:00
parent 79c712cb9e
commit 3d6ffe76f8
2 changed files with 16 additions and 1 deletions

View File

@ -202,6 +202,12 @@ int cbfs_decompress(int algo, void *src, void *dst, int len)
{ {
switch (algo) { switch (algo) {
case CBFS_COMPRESS_NONE: case CBFS_COMPRESS_NONE:
/* Reads need to be aligned at 4 bytes to avoid
poor flash performance. */
while (len && ((u32)src & 3)) {
*(u8*)dst++ = *(u8*)src++;
len--;
}
memmove(dst, src, len); memmove(dst, src, len);
return len; return len;
#ifdef CBFS_CORE_WITH_LZMA #ifdef CBFS_CORE_WITH_LZMA

View File

@ -28,7 +28,10 @@
#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5 #define kNumMoveBits 5
#define RC_READ_BYTE (*Buffer++) /* Use 32-bit reads whenever possible to avoid bad flash performance. */
#define RC_READ_BYTE (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
: ((((UInt32) Buffer & 3) || ((SizeT) (BufferLim - Buffer) < 4)) ? (*Buffer++) \
: ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), (look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \ #define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
{ int i; for(i = 0; i < 5; i++) { RC_TEST; Code = (Code << 8) | RC_READ_BYTE; }} { int i; for(i = 0; i < 5; i++) { RC_TEST; Code = (Code << 8) | RC_READ_BYTE; }}
@ -149,6 +152,12 @@ int LzmaDecode(CLzmaDecoderState *vs,
int len = 0; int len = 0;
const Byte *Buffer; const Byte *Buffer;
const Byte *BufferLim; const Byte *BufferLim;
int look_ahead_ptr = 4;
union
{
Byte raw[4];
UInt32 dw;
} look_ahead;
UInt32 Range; UInt32 Range;
UInt32 Code; UInt32 Code;