cbfstool: Replace C++ code with C code

cbfstool was using a C++ wrapper around the C written LZMA functions. And a C wrapper around those C++ functions. Drop the mess and rewrite the functions to be all C. Change-Id: Ieb6645a42f19efcc857be323ed8bdfcd9f48ee7c Signed-off-by: Stefan Reinauer <reinauer@google.com> Reviewed-on: http://review.coreboot.org/3010 Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net> Tested-by: build bot (Jenkins) Reviewed-by: Ronald G. Minnich <rminnich@gmail.com>
2013-03-28 16:51:45 -07:00 · 2013-03-28 16:51:45 -07:00 · aa3f7ba36e
parent 60a4a73fcd
commit aa3f7ba36e
8 changed files with 208 additions and 1151 deletions
--- a/util/cbfstool/Makefile
+++ b/util/cbfstool/Makefile
@ -1,6 +1,5 @@
 obj ?= $(shell pwd)
 HOSTCXX  ?= g++
 HOSTCC   ?= gcc
 CFLAGS   ?= -g -Wall -Werror
 CFLAGS   += -D_7ZIP_ST
@ -20,9 +19,6 @@ all: dep $(BINARY)
 $(obj)/%.o: %.c
 	$(HOSTCC) $(CFLAGS) -c -o $@ $<
 $(obj)/%.o: %.cc
 	$(HOSTCXX) $(CFLAGS) -c -o $@ $<
 clean:
 	rm -f $(COMMON) $(BINARY)
@ -30,11 +26,11 @@ tags:
 	ctags *.[ch]
 $(obj)/cbfstool:$(COMMON)
-	$(HOSTCXX) $(CFLAGS) -o $@ $^
+	$(HOSTCC) $(CFLAGS) -o $@ $^
 dep:
 	@$(HOSTCC) $(CFLAGS) -MM *.c > .dependencies
-	@$(HOSTCC) $(CFLAGS) -MM lzma/*.cc >> .dependencies
+	@$(HOSTCC) $(CFLAGS) -MM lzma/*.c >> .dependencies
 	@$(HOSTCC) $(CFLAGS) -MM lzma/C/*.c >> .dependencies
 -include .dependencies
--- a/util/cbfstool/Makefile.inc
+++ b/util/cbfstool/Makefile.inc
@ -25,15 +25,15 @@ $(objutil)/cbfstool/%.o: $(top)/util/cbfstool/%.c
 	printf "    HOSTCC     $(subst $(objutil)/,,$(@))\n"
 	$(HOSTCC) $(CBFSTOOLFLAGS) $(HOSTCFLAGS) -c -o $@ $<
-$(objutil)/cbfstool/%.o: $(top)/util/cbfstool/lzma/%.cc
+$(objutil)/cbfstool/%.o: $(top)/util/cbfstool/lzma/%.c
-	printf "    HOSTCXX    $(subst $(objutil)/,,$(@))\n"
+	printf "    HOSTCC     $(subst $(objutil)/,,$(@))\n"
-	$(HOSTCXX) $(CBFSTOOLFLAGS) $(HOSTCXXFLAGS) -c -o $@ $<
+	$(HOSTCC) $(CBFSTOOLFLAGS) $(HOSTCFLAGS) -c -o $@ $<
 $(objutil)/cbfstool/%.o: $(top)/util/cbfstool/lzma/C/%.c
 	printf "    HOSTCC     $(subst $(objutil)/,,$(@))\n"
 	$(HOSTCC) $(CBFSTOOLFLAGS) $(HOSTCFLAGS) -c -o $@ $<
 $(objutil)/cbfstool/cbfstool: $(objutil)/cbfstool $(addprefix $(objutil)/cbfstool/,$(cbfsobj))
-	printf "    HOSTCXX    $(subst $(objutil)/,,$(@)) (link)\n"
+	printf "    HOSTCC     $(subst $(objutil)/,,$(@)) (link)\n"
-	$(HOSTCXX) $(CBFSTOOLFLAGS) -o $@ $(addprefix $(objutil)/cbfstool/,$(cbfsobj))
+	$(HOSTCC) $(CBFSTOOLFLAGS) -o $@ $(addprefix $(objutil)/cbfstool/,$(cbfsobj))
--- a/util/cbfstool/compress.c
+++ b/util/cbfstool/compress.c
@ -26,14 +26,14 @@
 #include <stdio.h>
 #include "common.h"
-extern void do_lzma_compress(char *in, int in_len, char *out, int *out_len);
+void do_lzma_compress(char *in, int in_len, char *out, int *out_len);
-void lzma_compress(char *in, int in_len, char *out, int *out_len)
+static void lzma_compress(char *in, int in_len, char *out, int *out_len)
 {
 	do_lzma_compress(in, in_len, out, out_len);
 }
-void none_compress(char *in, int in_len, char *out, int *out_len)
+static void none_compress(char *in, int in_len, char *out, int *out_len)
 {
 	memcpy(out, in, in_len);
 	*out_len = in_len;
--- a/util/cbfstool/lzma/ORIGIN
+++ b/util/cbfstool/lzma/ORIGIN
@ -1,8 +0,0 @@
 The contents of this directory are extracted from
 the official LZMA SDK, version 9.12, for the use in
 mkcromfs (cromfs 1.5.10.1).
 However, cromfs does not include all files from that
 archive. Basically, only those which are required in
 compiling cromfs, plus a few text files.
--- a/util/cbfstool/lzma/endian.hh
+++ b/util/cbfstool/lzma/endian.hh
@ -1,178 +0,0 @@
 #ifndef bqtEndianHH
 #define bqtEndianHH
 #ifndef __STDC_CONSTANT_MACROS
 #define __STDC_CONSTANT_MACROS /* for UINT16_C etc */
 #endif
 #include <stdint.h>
 #if defined(__x86_64)||defined(__i386)
 #define LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
 #else
 #undef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
 #endif
 #ifdef WIN32
 # define LL_FMT "I64"
 #else
 # define LL_FMT "ll"
 #endif
 static inline uint_fast16_t get_8(const void* p)
 {
    const unsigned char* data = (const unsigned char*)p;
    return data[0];
 }
 static inline uint_fast16_t get_16(const void* p)
 {
  #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
    return *(const uint_least16_t*)p;
  #else
    const unsigned char* data = (const unsigned char*)p;
    return get_8(data)  | (get_8(data+1) << UINT16_C(8));
  #endif
 }
 static inline uint_fast16_t R16r(const void* p)
 {
  #ifdef BIG_ENDIAN_AND_UNALIGNED_ACCESS_OK
    return *(const uint_least16_t*)p;
  #else
    const unsigned char* data = (const unsigned char*)p;
    return get_8(data+1)  | (get_8(data) << UINT16_C(8));
  #endif
 }
 static inline uint_fast32_t R24(const void* p)
 {
    /* Note: This might be faster if implemented through R32 and a bitwise and,
     * but we cannot do that because we don't know if the third byte is a valid
     * memory location.
     */
    const unsigned char* data = (const unsigned char*)p;
    return get_16(data) | (get_8(data+2) << UINT32_C(16));
 }
 static inline uint_fast32_t R24r(const void* p)
 {
    const unsigned char* data = (const unsigned char*)p;
    return get_16(data+1) | (get_8(data) << UINT32_C(16));
 }
 static inline uint_fast32_t get_32(const void* p)
 {
  #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
    return *(const uint_least32_t*)p;
  #else
    const unsigned char* data = (const unsigned char*)p;
    return get_16(data) | (get_16(data+2) << UINT32_C(16));
  #endif
 }
 static inline uint_fast32_t R32r(const void* p)
 {
  #ifdef BIG_ENDIAN_AND_UNALIGNED_ACCESS_OK
    return *(const uint_least32_t*)p;
  #else
    const unsigned char* data = (const unsigned char*)p;
    return get_16(data+2) | (get_16(data) << UINT32_C(16));
  #endif
 }
 #define L (uint_fast64_t)
 static inline uint_fast64_t get_64(const void* p)
 {
  #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
    return *(const uint_least64_t*)p;
  #else
    const unsigned char* data = (const unsigned char*)p;
    return (L get_32(data)) | ((L get_32(data+4)) << UINT64_C(32));
  #endif
 }
 static inline uint_fast64_t R64r(const void* p)
 {
  #ifdef BIG_ENDIAN_AND_UNALIGNED_ACCESS_OK
    return *(const uint_least64_t*)p;
  #else
    const unsigned char* data = (const unsigned char*)p;
    return (L get_32(data+4)) | ((L get_32(data)) << UINT64_C(32));
  #endif
 }
 #undef L
 static inline uint_fast64_t get_n(const void* p, unsigned bytes)
 {
    const unsigned char* data = (const unsigned char*)p;
    uint_fast64_t res(0);
    switch(bytes)
    {
        case 8: return get_64(p);
        case 4: return get_32(p);
        case 2: return get_16(p);
        case 7: res |= ((uint_fast64_t)get_8(data+6)) << 48;
        case 6: res |= ((uint_fast64_t)get_8(data+5)) << 40;
        case 5: res |= ((uint_fast64_t)get_16(data+3)) << 24;
        case 3: res |= ((uint_fast64_t)get_16(data+1)) << 8;
        case 1: res |= get_8(data);
    }
    return res;
 }
 static void put_8(void* p, uint_fast8_t value)
 {
    unsigned char* data = (unsigned char*)p;
    data[0] = value;
 }
 static void put_16(void* p, uint_fast16_t value)
 {
  #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
    *(uint_least16_t*)p = value;
  #else
    unsigned char* data = (unsigned char*)p;
    put_8(data+0, value   );
    put_8(data+1, value>>8);
  #endif
 }
 static void W24(void* p, uint_fast32_t value)
 {
    unsigned char* data = (unsigned char*)p;
    put_16(data+0, value);
    put_8(data+2,  value >> UINT32_C(16));
 }
 static void put_32(void* p, uint_fast32_t value)
 {
  #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
    *(uint_least32_t*)p = value;
  #else
    unsigned char* data = (unsigned char*)p;
    put_16(data+0, value);
    put_16(data+2, value >> UINT32_C(16));
  #endif
 }
 static void put_64(void* p, uint_fast64_t value)
 {
  #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
    *(uint_least64_t*)p = value;
  #else
    unsigned char* data = (unsigned char*)p;
    put_32(data+0, (value));
    put_32(data+4, (value >> UINT64_C(32)));
  #endif
 }
 static inline void put_n(void* p, uint_fast64_t value, unsigned bytes)
 {
    unsigned char* data = (unsigned char*)p;
    switch(bytes)
    {
        case 8: put_64(p, value); break;
        case 7: put_8(data+6, value>>48);
        case 6: put_8(data+5, value>>40);
        case 5: put_8(data+4, value>>32);
        case 4: put_32(p, value); break;
        case 3: W24(p, value); break;
        case 2: put_16(p, value); break;
        case 1: put_8(p, value); break;
    }
 }
 #endif
--- a/util/cbfstool/lzma/lzma.c
+++ b/util/cbfstool/lzma/lzma.c
@ -0,0 +1,198 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include "../common.h"
 #include "C/LzmaDec.h"
 #include "C/LzmaEnc.h"
 /* Endianness / unaligned memory access handling */
 #if defined(__x86_64__) || defined(__i386__)
 #define LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
 #else
 #undef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
 #endif
 #define L (uint64_t)
 static inline uint64_t get_64(const void *p)
 {
 #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
 	return *(const uint64_t *)p;
 #else
 	const unsigned char *data = (const unsigned char *)p;
 	return (L data[0]) | (L data[1] << 8) | (L data[2] << 16) |
 		(L data[3] << 24) | (L data [4] << 32) | (L data[5] << 40) |
 		(L data[6] << 48) | (L data[7] << 56);
 #endif
 }
 static void put_64(void *p, uint64_t value)
 {
 #ifdef LITTLE_ENDIAN_AND_UNALIGNED_ACCESS_OK
 	*(uint64_t *) p = value;
 #else
 	unsigned char *data = (unsigned char *)p;
 	data[0] = value & 0xff;
 	data[1] = (value >> 8) & 0xff;
 	data[2] = (value >> 16) & 0xff;
 	data[3] = (value >> 24) & 0xff;
 	data[4] = (value >> 32) & 0xff;
 	data[5] = (value >> 40) & 0xff;
 	data[6] = (value >> 48) & 0xff;
 	data[7] = (value >> 56) & 0xff;
 #endif
 }
 /* Memory Allocation API */
 static void *SzAlloc(void *unused, size_t size)
 {
 	return malloc(size);
 }
 static void SzFree(void *unused, void *address)
 {
 	free(address);
 }
 static ISzAlloc LZMAalloc = { SzAlloc, SzFree };
 /* Streaming API */
 typedef struct vector {
 	char *p;
 	size_t pos;
 	size_t size;
 } vector_t;
 static vector_t instream, outstream;
 static SRes Read(void *unused, void *buf, size_t *size)
 {
 	if ((instream.size - instream.pos) < *size)
 		*size = instream.size - instream.pos;
 	memcpy(buf, instream.p + instream.pos, *size);
 	instream.pos += *size;
 	return SZ_OK;
 }
 static size_t Write(void *unused, const void *buf, size_t size)
 {
 	if(outstream.size - outstream.pos < size)
 		size = outstream.size - outstream.pos;
 	memcpy(outstream.p + outstream.pos, buf, size);
 	outstream.pos += size;
 	return size;
 }
 static ISeqInStream is = { Read };
 static ISeqOutStream os = { Write };
 /**
 * Compress a buffer with lzma
 * Don't copy the result back if it is too large.
 * @param in a pointer to the buffer
 * @param in_len the length in bytes
 * @param out a pointer to a buffer of at least size in_len
 * @param out_len a pointer to the compressed length of in
 */
 void do_lzma_compress(char *in, int in_len, char *out, int *out_len)
 {
 	if (in_len == 0) {
 		ERROR("LZMA: Input length is zero.\n");
 		return;
 	}
 	CLzmaEncProps props;
 	LzmaEncProps_Init(&props);
 	props.dictSize = in_len;
 	props.pb = 0; /* PosStateBits, default: 2, range: 0..4 */
 	props.lp = 0; /* LiteralPosStateBits, default: 0, range: 0..4 */
 	props.lc = 1; /* LiteralContextBits, default: 3, range: 0..8 */
 	props.fb = 273; /* NumFastBytes */
 	props.mc = 0; /* MatchFinderCycles, default: 0 */
 	props.algo = 1; /* AlgorithmNo, apparently, 0 and 1 are valid values. 0 = fast mode */
 	props.numThreads = 1;
 	switch (props.algo) {
 	case 0:	// quick: HC4
 		props.btMode = 0;
 		props.level = 1;
 		break;
 	case 1:	// full: BT4
 	default:
 		props.level = 9;
 		props.btMode = 1;
 		props.numHashBytes = 4;
 		break;
 	}
 	CLzmaEncHandle p = LzmaEnc_Create(&LZMAalloc);
 	int res = LzmaEnc_SetProps(p, &props);
 	if (res != SZ_OK) {
 		ERROR("LZMA: LzmaEnc_SetProps failed.\n");
 		return;
 	}
 	unsigned char propsEncoded[LZMA_PROPS_SIZE + 8];
 	size_t propsSize = sizeof propsEncoded;
 	res = LzmaEnc_WriteProperties(p, propsEncoded, &propsSize);
 	if (res != SZ_OK) {
 		ERROR("LZMA: LzmaEnc_WriteProperties failed.\n");
 		return;
 	}
 	instream.p = in;
 	instream.size = in_len;
 	outstream.p = out;
 	outstream.size = in_len;
 	put_64(propsEncoded + LZMA_PROPS_SIZE, in_len);
 	Write(&os, propsEncoded, LZMA_PROPS_SIZE+8);
 	res = LzmaEnc_Encode(p, &os, &is, 0, &LZMAalloc, &LZMAalloc);
 	if (res != SZ_OK) {
 		ERROR("LZMA: LzmaEnc_Encode failed %d.\n", res);
 		return;
 	}
 	*out_len = outstream.pos;
 }
 void do_lzma_uncompress(char *dst, int dst_len, char *src, int src_len)
 {
 	if (src_len <= LZMA_PROPS_SIZE + 8) {
 		ERROR("LZMA: Input length is too small.\n");
 		return;
 	}
 	uint64_t out_sizemax = get_64(&src[LZMA_PROPS_SIZE]);
 	if (out_sizemax > (size_t) dst_len) {
 		ERROR("Not copying %d bytes to %d-byte buffer!\n",
 			(unsigned int)out_sizemax, dst_len);
 		return;
 	}
 	ELzmaStatus status;
 	size_t destlen = out_sizemax;
 	size_t srclen = src_len - (LZMA_PROPS_SIZE + 8);
 	int res = LzmaDecode((Byte *) dst, &destlen,
 			     (Byte *) &src[LZMA_PROPS_SIZE + 8], &srclen,
 			     (Byte *) &src[0], LZMA_PROPS_SIZE,
 			     LZMA_FINISH_END,
 			     &status,
 			     &LZMAalloc);
 	if (res != SZ_OK) {
 		ERROR("Error while decompressing.\n");
 		return;
 	}
 }
--- a/util/cbfstool/lzma/lzma.cc
+++ b/util/cbfstool/lzma/lzma.cc
@ -1,842 +0,0 @@
 #include "endian.hh" /* For R64 */
 extern "C" {
 #include "C/LzmaDec.h"
 #include "C/LzmaEnc.h"
 }
 #include "lzma.hh"
 #include <algorithm> // min,max,swap
 #include <vector>
 #include <string>
 #include <cstring> // std::memcpy
 #include <cstdio>
 #include <stdint.h>
 /* We don't want threads */
 #ifdef linux
 #include <sched.h>
 #define ForceSwitchThread() sched_yield()
 #else
 #define ForceSwitchThread()
 #endif
 int LZMA_verbose = 0;
 // -fb
 unsigned LZMA_NumFastBytes = 273;
 /*from lzma.txt:
          Set number of fast bytes - [5, 273], default: 273
          Usually big number gives a little bit better compression ratio
          and slower compression process.
  from anonymous:
 This one is hard to explain... To my knowledge (please correct me if I
 am wrong), this refers to the optimal parsing algorithm. The algorithm
 tries many different combinations of matches to find the best one. If a
 match is found that is over the fb value, then it will not be optimised,
 and will just be used straight.
 This speeds up corner cases such as pic.
 */
 /* apparently, 0 and 1 are valid values. 0 = fast mode */
 unsigned LZMA_AlgorithmNo  = 1;
 unsigned LZMA_MatchFinderCycles = 0; // default: 0
 // -pb
 unsigned LZMA_PosStateBits = 0; // default: 2, range: 0..4
 /*from lzma.txt:
          pb switch is intended for periodical data
          when period is equal 2^N.
 */
 // -lp
 unsigned LZMA_LiteralPosStateBits = 0; // default: 0, range: 0..4
 /*from lzma.txt:
          lp switch is intended for periodical data when period is
          equal 2^N. For example, for 32-bit (4 bytes)
          periodical data you can use lp=2.
          Often it's better to set lc0, if you change lp switch.
 */
 // -lc
 unsigned LZMA_LiteralContextBits = 1; // default: 3, range: 0..8
 /*from lzma.txt:
          Sometimes lc=4 gives gain for big files.
  from anonymous:
 The context for the literal coder is 2^(lc) long. The longer it is, the
 better the statistics, but also the slower it adapts. A tradeoff, which
 is why 3 or 4 is reccommended.
 */
 /*
 Discoveries:
 INODES:
    Best LZMA for raw_inotab_inode(40->48): pb0 lp0 lc0
    Best LZMA for raw_root_inode(28->32): pb0 lp0 lc0
    Start LZMA(rootdir, 736 bytes)
    Yay result with pb0 lp0 lc0: 218
    Yay result with pb0 lp0 lc1: 217
    Best LZMA for rootdir(736->217): pb0 lp0 lc1
    Start LZMA(inotab, 379112 bytes)
    Yay result with pb0 lp0 lc0: 24504
    Best LZMA for inotab(379112->24504): pb0 lp0 lc0
 BLKTAB:
    Best LZMA for raw_blktab(10068->2940): pb2 lp2 lc0
    ---with fastbytes=128---
    Start LZMA(blktab, 12536608 bytes)
    Yay result with pb0 lp0 lc0: 1386141
    Yay result with pb0 lp1 lc0: 1308137
    Yay result with pb0 lp2 lc0: 1305403
    Yay result with pb0 lp3 lc0: 1303072
    Yay result with pb1 lp1 lc0: 1238990
    Yay result with pb1 lp2 lc0: 1227973
    Yay result with pb1 lp3 lc0: 1221205
    Yay result with pb2 lp1 lc0: 1197035
    Yay result with pb2 lp2 lc0: 1188979
    Yay result with pb2 lp3 lc0: 1184531
    Yay result with pb3 lp1 lc0: 1183866
    Yay result with pb3 lp2 lc0: 1172994
    Yay result with pb3 lp3 lc0: 1169048
    Best LZMA for blktab(12536608->1169048): pb3 lp3 lc0
    It seems, lc=0 and pb=lp=N is a wise choice,
    where N is 2 for packed blktab and 3 for unpacked.
 FBLOCKS:
    For SPC sound+code data, the best results
     are between:
      pb0 lp0 lc0 (10%)
      pb0 lp0 lc1 (90%)
     For inotab, these were observed:
      pb1 lp0 lc1
      pb2 lp0 lc0
      pb1 lp1 lc0
      pb3 lp1 lc0
      pb1 lp2 lc0
      pb2 lp1 lc0
    For C source code data, the best results
     are between:
      pb1 lp0 lc3 (10%)
      pb0 lp0 lc3 (90%)
     Occasionally:
      pb0 lp1 lc0
      pb0 lp0 lc3 (mostly)
      pb0 lp0 lc2
      pb0 lp0 lc4
     Occasionally 2:
      pb0 lp0 lc8
      pb0 lp0 lc4
    BUT:
    Best LZMA for fblock(204944->192060): pb0 lp4 lc8 -- surprise! (INOTAB PROBABLY)
 */
 static UInt32 SelectDictionarySizeFor(unsigned datasize)
 {
   #if 1
    if(datasize >= (1 << 30U)) return 1 << 30U;
    return datasize;
   #else
 #ifdef __GNUC__
    /* gnu c can optimize this switch statement into a fast binary
     * search, but it cannot do so for the list of the if statements.
     */
    switch(datasize)
    {
        case 0 ... 512 : return 512;
        case 513 ... 1024: return 2048;
        case 1025 ... 4096: return 8192;
        case 4097 ... 16384: return 32768;
        case 16385 ... 65536: return 528288;
        case 65537 ... 528288: return 1048576*4;
        case 528289 ... 786432: return 1048576*16;
        default: return 1048576*32;
    }
 #else
    if(datasize <= 512) return 512;
    if(datasize <= 1024) return 1024;
    if(datasize <= 4096) return 4096;
    if(datasize <= 16384) return 32768;
    if(datasize <= 65536) return 528288;
    if(datasize <= 528288) return 1048576*4;
    if(datasize <= 786432) reutrn 1048576*16;
    return 32*1048576;
 #endif
   #endif
 }
 static void *SzAlloc(void*, size_t size)
    { return new unsigned char[size]; }
 static void SzFree(void*, void *address)
    { unsigned char*a = (unsigned char*)address; delete[] a; }
 static ISzAlloc LZMAalloc = { SzAlloc, SzFree };
 class MemReader: public ISeqInStream
 {
 public:
    const unsigned char* const indata;
    const size_t         inlength;
    size_t pos;
 public:
    MemReader(const unsigned char* d, size_t l)
        : ISeqInStream(), indata(d), inlength(l), pos(0)
    {
        Read = ReadMethod;
    }
    static SRes ReadMethod(void *pp, void *buf, size_t *size)
    {
        MemReader& p = *(MemReader*)pp;
        size_t rem = p.inlength-p.pos;
        size_t read = *size;
        if(read > rem) read= rem;
        std::memcpy(buf, &p.indata[p.pos], read);
        *size = read;
        p.pos += read;
        return SZ_OK;
    }
 };
 class MemWriter: public ISeqOutStream
 {
 public:
    std::vector<unsigned char> buf;
 public:
    MemWriter(): ISeqOutStream(), buf() { Write = WriteMethod; }
    static size_t WriteMethod(void*pp, const void* from, size_t size)
    {
        MemWriter& p = *(MemWriter*)pp;
        const unsigned char* i = (const unsigned char*)from;
        p.buf.insert(p.buf.end(), i, i+size);
        return size;
    }
 };
 const std::vector<unsigned char> LZMACompress(const unsigned char* data, size_t length,
    unsigned pb,
    unsigned lp,
    unsigned lc)
 {
    return LZMACompress(data,length, pb,lp,lc,
        SelectDictionarySizeFor(length));
 }
 const std::vector<unsigned char> LZMACompress(
    const unsigned char* data, size_t length,
    unsigned pb,
    unsigned lp,
    unsigned lc,
    unsigned dictionarysize)
 {
    if(!length) return std::vector<unsigned char>();
    CLzmaEncProps props;
    LzmaEncProps_Init(&props);
    props.dictSize = dictionarysize;
    props.pb       = pb;
    props.lp       = lp;
    props.lc       = lc;
    props.fb       = LZMA_NumFastBytes;
    props.mc       = LZMA_MatchFinderCycles;
    props.algo     = LZMA_AlgorithmNo;
    props.numThreads = 1;
    switch(LZMA_AlgorithmNo)
    {
        case 0: // quick: HC4
            props.btMode = 0;
            props.level = 1;
            break;
        case 1: // full: BT4
        default:
            props.level = 9;
            props.btMode       = 1;
            props.numHashBytes = 4;
            break;
    }
    CLzmaEncHandle p = LzmaEnc_Create(&LZMAalloc);
    struct AutoReleaseLzmaEnc
    {
        AutoReleaseLzmaEnc(CLzmaEncHandle pp) : p(pp) { }
        ~AutoReleaseLzmaEnc()
        { LzmaEnc_Destroy(p, &LZMAalloc, &LZMAalloc); }
        CLzmaEncHandle p;
        AutoReleaseLzmaEnc(const AutoReleaseLzmaEnc&);
        void operator=(const AutoReleaseLzmaEnc&);
    } AutoReleaser(p); // Create a destructor that ensures
    // that the CLzmaEncHandle is not leaked, even if an
    // exception happens
    int res = LzmaEnc_SetProps(p, &props);
    if(res != SZ_OK)
    {
    Error:
        return std::vector<unsigned char> ();
    }
    unsigned char propsEncoded[LZMA_PROPS_SIZE + 8];
    size_t propsSize = sizeof propsEncoded;
    res = LzmaEnc_WriteProperties(p, propsEncoded, &propsSize);
    if(res != SZ_OK) goto Error;
    MemReader is(data, length);
    MemWriter os;
    put_64(propsEncoded+LZMA_PROPS_SIZE, length);
    os.buf.insert(os.buf.end(), propsEncoded, propsEncoded+LZMA_PROPS_SIZE+8);
    res = LzmaEnc_Encode(p, &os, &is, 0, &LZMAalloc, &LZMAalloc);
    if(res != SZ_OK) goto Error;
    return os.buf;
 }
 const std::vector<unsigned char> LZMACompress(const unsigned char* data, size_t length)
 {
    return LZMACompress(data, length,
        LZMA_PosStateBits,
        LZMA_LiteralPosStateBits,
        LZMA_LiteralContextBits);
 }
 #undef RC_NORMALIZE
 const std::vector<unsigned char> LZMADeCompress
    (const unsigned char* data, size_t length, bool& ok)
 {
    if(length <= LZMA_PROPS_SIZE+8)
    {
    /*clearly_not_ok:*/
        ok = false;
        return std::vector<unsigned char> ();
    }
    uint_least64_t out_sizemax = get_64(&data[LZMA_PROPS_SIZE]);
    /*if(out_sizemax >= (size_t)~0ULL)
    {
        // cannot even allocate a vector this large.
        goto clearly_not_ok;
    }*/
    std::vector<unsigned char> result(out_sizemax);
    ELzmaStatus status;
    SizeT destlen = result.size();
    SizeT srclen = length-(LZMA_PROPS_SIZE+8);
    int res = LzmaDecode(
        &result[0], &destlen,
        &data[LZMA_PROPS_SIZE+8], &srclen,
        &data[0], LZMA_PROPS_SIZE,
        LZMA_FINISH_END,
        &status,
        &LZMAalloc);
    /*
    std::fprintf(stderr, "res=%d, status=%d, in_done=%d (buf=%d), out_done=%d (max=%d)\n",
        res,
        (int)status,
        (int)srclen, (int)length,
        (int)destlen, (int)out_sizemax);
    */
    ok = res == SZ_OK && (status == LZMA_STATUS_FINISHED_WITH_MARK
                       || status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
      && srclen == (length-(LZMA_PROPS_SIZE+8))
      && destlen == out_sizemax;
    return result;
 }
 const std::vector<unsigned char> LZMADeCompress
    (const unsigned char* data, size_t length)
 {
    bool ok_unused;
    return LZMADeCompress(data, length, ok_unused);
 }
 #if 0
 #include <stdio.h>
 int main(void)
 {
    char Buf[2048*2048];
    int s = fread(Buf,1,sizeof(Buf),stdin);
    std::vector<unsigned char> result = LZMADeCompress(std::vector<unsigned char>(Buf,Buf+s));
    fwrite(&result[0],1,result.size(),stdout);
 }
 #endif
 const std::vector<unsigned char> LZMACompressHeavy(const unsigned char* data, size_t length,
    const char* why)
 {
    std::vector<unsigned char> bestresult;
    char best[512];
    bool first = true;
    if(LZMA_verbose >= 1)
    {
        std::fprintf(stderr, "Start LZMA(%s, %u bytes)\n", why, (unsigned)length);
        std::fflush(stderr);
    }
    unsigned minresultsize=0, maxresultsize=0;
    unsigned sizemap[5][5][9] = {{{0}}};
    bool use_small_dict = false;
    for(int compress_mode = 0; compress_mode < (5*5*9); ++compress_mode)
    {
        const unsigned pb = compress_mode % 5;
        const unsigned lp = (compress_mode / 5) % 5;
        const unsigned lc = (compress_mode / 5 / 5) % 9;
        std::vector<unsigned char>
            result = use_small_dict
                ? LZMACompress(data,length,pb,lp,lc, 4096)
                : LZMACompress(data,length,pb,lp,lc);
       {
        sizemap[pb][lp][lc] = result.size();
        if(first || result.size() < minresultsize) minresultsize = result.size();
        if(first || result.size() > maxresultsize) maxresultsize = result.size();
        if(first || result.size() < bestresult.size())
        {
            sprintf(best, "pb%u lp%u lc%u",
                pb,lp,lc);
            if(LZMA_verbose >= 1)
                std::fprintf(stderr, "Yay result with %s: %u\n", best, (unsigned)result.size());
            bestresult.swap(result);
            first = false;
        }
        else
        {
            char tmp[512];
            sprintf(tmp, "pb%u lp%u lc%u",
                pb,lp,lc);
            if(LZMA_verbose >= 2)
                std::fprintf(stderr, "Blaa result with %s: %u\n", tmp, (unsigned)result.size());
        }
        if(LZMA_verbose >= 2)
        {
            std::fprintf(stderr, "%*s\n", (5 * (4+9+2)), "");
            /* Visualize the size map: */
            std::string lines[6] = {};
            for(unsigned pbt = 0; pbt <= 4; ++pbt)
            {
                char buf[64]; sprintf(buf, "pb%u:%11s", pbt,"");
                lines[0] += buf;
                for(unsigned lpt = 0; lpt <= 4; ++lpt)
                {
                    char buf[64]; sprintf(buf, "lp%u:", lpt);
                    std::string line;
                    line += buf;
                    for(unsigned lct = 0; lct <= 8; ++lct)
                    {
                        unsigned s = sizemap[pbt][lpt][lct];
                        char c;
                        if(!s) c = '.';
                        else c = 'a' + ('z'-'a'+1)
                                     * (s - minresultsize)
                                     / (maxresultsize-minresultsize+1);
                        line += c;
                    }
                    lines[1 + lpt] += line + "  ";
                }
            }
            for(unsigned a=0; a<6; ++a) std::fprintf(stderr, "%s\n", lines[a].c_str());
            std::fprintf(stderr, "\33[%uA", 7);
        }
       }
    }
    if(LZMA_verbose >= 2)
        std::fprintf(stderr, "\n\n\n\n\n\n\n\n");
    if(LZMA_verbose >= 1)
    {
        std::fprintf(stderr, "Best LZMA for %s(%u->%u): %s\n",
            why,
            (unsigned)length,
            (unsigned)bestresult.size(),
            best);
    }
    std::fflush(stderr);
    return bestresult;
 }
 /*
 The LZMA compression power is controlled by these parameters:
  Dictionary size (we use the maximum)
  Compression algorithm (we use BT4, the heaviest available)
  Number of fast bytes (we use the maximum)
  pb (0..4), lp (0..4) and lc (0..8) -- the effect of these depends on data.
 Since the only parameters whose effect depends on the data to be compressed
 are the three (pb, lp, lc), the "auto" and "full" compression algorithms
 only try to find the optimal values for those.
 The "auto" LZMA compression algorithm is based on these two assumptions:
  - It is possible to find the best value for each component (pb, lp, lc)
    by individually testing the most effective one of them while keeping
    the others static.
    I.e.,    step 1: pb=<find best>, lp=0, lc=0
             step 2: pb=<use result>, lp=<find best>, lc=0
             step 3: pb=<use result>, lp=<use result>, lc=<find best>
             final: pb=<use result>, lp=<use result>, lc=<use result>
  - That the effect of each of these components forms a parabolic function
    that has a starting point, ending point, and possibly a mountain or a
    valley somewhere in the middle, but never a valley _and_ a mountain, nor
    two valleys nor two mountains.
 These assumptions are not always true, but it gets very close to the optimum.
 The ParabolicFinder class below finds the lowest point in a parabolic curve
 with a small number of tests, determining the shape of the curve by sampling
 a few cue values as needed.
 The algorithm is like this:
  Never check any value more than once.
  Check the first two values.
  If they differ, then check the last in sequence.
    If not, then check everything in sequential order.
  If the first two values and the last form an ascending sequence, accept the first value.
    If they form a descending sequence, start Focus Mode
    such that the focus lower limit is index 2 and upper
    limit is the second last. Then check the second last.
      If they don't, then check the third value of sequence,
      and everything else in sequential order.
  If in Focus Mode, check if being in the lower or upper end of the focus.
    If in upper end, check if the current value is bigger than the next one.
      If it is, end the process, because the smallest value has already been found.
        If not, next check the value at focus_low, and increase focus_low.
    If in lower end, check if the current value is bigger than the previous one.
      If it is, end the process, because the smallest value has already been found.
        If not, next check the value at focus_high, and decrease focus_high.
 For any sample space, it generally does 3 tests, but if it detects a curve
 forming a valley, it may do more.
 Note that ParabolicFinder does not _indicate_ the lowest value. It leaves that
 to the caller. It just stops searching when it thinks that no lower value will
 be found.
 Note: The effect of pb, lp and lc depend also on the dictionary size setting
 and compression algorithm. You cannot estimate the optimal value for those
 parameters reliably using different compression settings than in the actual case.
 */
 class ParabolicFinder
 {
 public:
    enum QueryState      { Unknown, Pending, Done };
    enum InstructionType { HereYouGo, WaitingResults, End };
 public:
    ParabolicFinder(unsigned Start, unsigned End)
        : begin(Start),
          results(End-Start+1, 0),
          state  (End-Start+1, Unknown),
          LeftRightSwap(false)
    {
    }
    InstructionType GetNextInstruction(unsigned& attempt)
    {
      InstructionType result = End;
      const int Last  = begin + results.size()-1;
      #define RetIns(n) do{ result = (n); goto DoneCrit; }while(0)
      #define RetVal(n) do{ state[attempt = (n)] = Pending; RetIns(HereYouGo); }while(0)
      {
        /*
        std::fprintf(stderr, "NextInstruction...");
        for(unsigned a=0; a<state.size(); ++a)
            std::fprintf(stderr, " %u=%s", a,
                state[a]==Unknown?"??"
               :state[a]==Done?"Ok"
               :"..");
        std::fprintf(stderr, "\n");*/
        if(CountUnknown() == 0)
        {
            // No unassigned slots remain. Don't need more workers.
            RetIns(End);
        }
        if(1) // scope for local variables
        {
            // Alternate which side to do next if both are available.
            bool LeftSideFirst = LeftRightSwap ^= 1;
            // Check left side descend type
            int LeftSideNext = -1; bool LeftSideDoable = false;
            for(int c=0; c<=Last; ++c)
                switch(state[c])
                {
                    case Unknown: LeftSideNext = c; LeftSideDoable = true; goto ExitLeftSideFor;
                    case Pending: LeftSideNext = c; LeftSideDoable = false; goto ExitLeftSideFor;
                    case Done:
                        if(c == 0) continue;
                        if(results[c] > results[c-1])
                        {
                            // Left side stopped descending.
                            if(state[Last] != Unknown) RetIns(End);
                            goto ExitLeftSideFor;
                        }
                        else if(results[c] == results[c-1])
                            LeftSideFirst = true;
                }
        ExitLeftSideFor: ;
            // Check right side descend type
            int RightSideNext = -1; bool RightSideDoable = false;
            for(int c=Last; c>=0; --c)
                switch(state[c])
                {
                    case Unknown: RightSideNext = c; RightSideDoable = true; goto ExitRightSideFor;
                    case Pending: RightSideNext = c; RightSideDoable = false; goto ExitRightSideFor;
                    case Done:
                        if(c == Last) continue;
                        if(results[c] > results[c+1])
                        {
                            // Right side stopped descending.
                            if(state[0] != Unknown) RetIns(End);
                            goto ExitRightSideFor;
                        }
                        else if(results[c] == results[c+1])
                            LeftSideFirst = false;
                }
        ExitRightSideFor: ;
            if(!LeftSideFirst)
                 { std::swap(LeftSideDoable, RightSideDoable);
                   std::swap(LeftSideNext,   RightSideNext); }
            if(LeftSideDoable) RetVal(LeftSideNext);
            if(RightSideDoable) RetVal(RightSideNext);
            // If we have excess threads and work to do, give them something
            if(CountHandled() > 2) if(LeftSideNext >= 0) RetVal(LeftSideNext);
            if(CountHandled() > 3) if(RightSideNext >= 0) RetVal(RightSideNext);
            RetIns(WaitingResults);
        }
      DoneCrit: ;
      }
      return result;
    }
    void GotResult(unsigned attempt, unsigned value)
    {
      {
        results[attempt] = value;
        state[attempt]   = Done;
      }
    }
 private:
    unsigned CountUnknown() const
    {
        unsigned result=0;
        for(size_t a=0, b=state.size(); a<b; ++a)
            if(state[a] == Unknown) ++result;
        return result;
    }
    unsigned CountHandled() const
    {
        return state.size() - CountUnknown();
    }
 private:
    unsigned begin;
    std::vector<unsigned>   results;
    std::vector<QueryState> state;
    bool LeftRightSwap;
 };
 static void LZMACompressAutoHelper(
    const unsigned char* data, size_t length,
    bool use_small_dict,
    const char* why,
    unsigned& pb, unsigned& lp, unsigned& lc,
    unsigned& which_iterate, ParabolicFinder& finder,
    bool&first, std::vector<unsigned char>& bestresult)
 {
    for(;;)
    {
        unsigned t=0;
        switch(finder.GetNextInstruction(t))
        {
            case ParabolicFinder::End:
                return;
            case ParabolicFinder::HereYouGo:
                break;
            case ParabolicFinder::WaitingResults:
                ForceSwitchThread();
                continue;
        }
        const unsigned try_pb = &which_iterate == &pb ? t : pb;
        const unsigned try_lp = &which_iterate == &lp ? t : lp;
        const unsigned try_lc = &which_iterate == &lc ? t : lc;
        if(LZMA_verbose >= 2)
            std::fprintf(stderr, "%s:Trying pb%u lp%u lc%u\n",
                why,try_pb,try_lp,try_lc);
        std::vector<unsigned char> result = use_small_dict
            ? LZMACompress(data,length,try_pb,try_lp,try_lc, 65536)
            : LZMACompress(data,length,try_pb,try_lp,try_lc);
        if(LZMA_verbose >= 2)
            std::fprintf(stderr, "%s:       pb%u lp%u lc%u -> %u\n",
                why,try_pb,try_lp,try_lc, (unsigned)result.size());
        finder.GotResult(t, result.size());
      {
        if(first || result.size() <= bestresult.size())
        {
            first    = false;
            bestresult.swap(result);
            which_iterate = t;
        }
      }
    }
 }
 const std::vector<unsigned char> LZMACompressAuto(const unsigned char* data, size_t length,
    const char* why)
 {
    if(LZMA_verbose >= 1)
    {
        std::fprintf(stderr, "Start LZMA(%s, %u bytes)\n", why, (unsigned)length);
        std::fflush(stderr);
    }
    unsigned backup_algorithm = LZMA_AlgorithmNo;
    bool use_small_dict = false;//length >= 1048576;
    if(use_small_dict) LZMA_AlgorithmNo = 0;
    unsigned pb=0, lp=0, lc=0;
    std::vector<unsigned char> bestresult;
  {
    ParabolicFinder pb_finder(0,4);
    ParabolicFinder lp_finder(0,4);
    ParabolicFinder lc_finder(0,8);
    bool first=true;
   {
    /* Using parallelism here. However, we need barriers after
     * each step, because the comparisons are made based on the
     * result size, and if the pb/lp/lc values other than the
     * one being focused change, it won't work. Only one parameter
     * must change in the loop.
     */
    /* step 1: find best value in pb axis */
    LZMACompressAutoHelper(data,length,use_small_dict,why,
        pb, lp, lc,
        pb, pb_finder, first, bestresult);
    lp_finder.GotResult(lp, bestresult.size());
    /* step 2: find best value in lp axis */
    LZMACompressAutoHelper(data,length,use_small_dict,why,
        pb, lp, lc,
        lp, lp_finder, first, bestresult);
    lc_finder.GotResult(lc, bestresult.size());
    /* step 3: find best value in lc axis */
    LZMACompressAutoHelper(data,length,use_small_dict,why,
        pb, lp, lc,
        lc, lc_finder, first, bestresult);
   }
  }
    if(use_small_dict || LZMA_AlgorithmNo != backup_algorithm)
    {
        LZMA_AlgorithmNo = backup_algorithm;
        bestresult = LZMACompress(data,length, pb,lp,lc);
    }
    if(LZMA_verbose >= 1)
    {
        std::fprintf(stderr, "Best LZMA for %s(%u->%u): pb%u lp%u lc%u\n",
            why,
            (unsigned)length,
            (unsigned)bestresult.size(),
            pb,lp,lc);
    }
    std::fflush(stderr);
    return bestresult;
 }
 const std::vector<unsigned char>
    DoLZMACompress(int HeavyLevel,
        const unsigned char* data, size_t length,
        const char* why)
 {
    if(HeavyLevel >= 2) return LZMACompressHeavy(data,length, why);
    if(HeavyLevel >= 1) return LZMACompressAuto(data,length, why);
    return LZMACompress(data,length);
 }
 extern "C" {
 /**
 * Compress a buffer with lzma
 * Don't copy the result back if it is too large.
 * @param in a pointer to the buffer
 * @param in_len the length in bytes
 * @param out a pointer to a buffer of at least size in_len
 * @param out_len a pointer to the compressed length of in
 */
 void do_lzma_compress(char *in, int in_len, char *out, int *out_len) {
 	std::vector<unsigned char> result;
 	result = LZMACompress(std::vector<unsigned char>(in, in + in_len));
 	*out_len = result.size();
 	if (*out_len < in_len)
 		std::memcpy(out, &result[0], *out_len);
 }
 void do_lzma_uncompress(char *dst, int dst_len, char *src, int src_len) {
 	std::vector<unsigned char> result;
 	result = LZMADeCompress(std::vector<unsigned char>(src, src + src_len));
 	if (result.size() <= (SizeT)dst_len)
 		std::memcpy(dst, &result[0], result.size());
 	else
 	{
 		fprintf(stderr, "Not copying %d bytes to %d-byte buffer!\n",
 			(unsigned int)result.size(), dst_len);
 		exit(1);
 	}
 }
 }
--- a/util/cbfstool/lzma/lzma.hh
+++ b/util/cbfstool/lzma/lzma.hh
@ -1,109 +0,0 @@
 #ifndef HHlzmaHH
 #define HHlzmaHH
 #include <vector>
 extern int LZMA_verbose;
 extern unsigned LZMA_NumFastBytes;
 extern unsigned LZMA_AlgorithmNo;
 extern unsigned LZMA_PosStateBits;
 extern unsigned LZMA_LiteralPosStateBits;
 extern unsigned LZMA_LiteralContextBits;
 /* decompress LZMA-compressed data. */
 const std::vector<unsigned char> LZMADeCompress
    (const unsigned char* data, std::size_t length);
 const std::vector<unsigned char> LZMADeCompress
    (const unsigned char* data, std::size_t length, bool& ok);
 static inline const std::vector<unsigned char> LZMADeCompress
    (const std::vector<unsigned char>& buf)
    { return LZMADeCompress(&buf[0], buf.size()); }
 static inline const std::vector<unsigned char> LZMADeCompress
    (const std::vector<unsigned char>& buf, bool& ok)
    { return LZMADeCompress(&buf[0], buf.size(), ok); }
 /* LZMA-compress data with current settings. */
 const std::vector<unsigned char> LZMACompress
    (const unsigned char* data, std::size_t length);
 static inline const std::vector<unsigned char> LZMACompress
    (const std::vector<unsigned char>& buf)
        { return LZMACompress(&buf[0], buf.size()); }
 /* LZMA-compress data with given settings. */
 const std::vector<unsigned char> LZMACompress
    (const unsigned char* data, std::size_t length,
     unsigned pb,
     unsigned lp,
     unsigned lc);
 static inline const std::vector<unsigned char> LZMACompress
    (const std::vector<unsigned char>& buf,
     unsigned pb,
     unsigned lp,
     unsigned lc)
     { return LZMACompress(&buf[0], buf.size(), pb,lp,lc); }
 const std::vector<unsigned char> LZMACompress(
    const unsigned char* data, std::size_t length,
    unsigned pb,
    unsigned lp,
    unsigned lc,
    unsigned dictionarysize);
 static inline const std::vector<unsigned char> LZMACompress(
    const std::vector<unsigned char>& buf,
    unsigned pb,
    unsigned lp,
    unsigned lc,
    unsigned dictionarysize)
    { return LZMACompress(&buf[0], buf.size(), pb,lp,lc,dictionarysize); }
 /* LZMA-compress data with every settings (5*5*9 times), taking the best.
 * It will consume a lot of time and output useful statistics,
 * so a context parameter ("why") is also given.
 */
 const std::vector<unsigned char> LZMACompressHeavy
    (const unsigned char* data, std::size_t length,
     const char* why = "?");
 const std::vector<unsigned char> LZMACompressAuto
    (const unsigned char* data, std::size_t length,
     const char* why = "?");
 static inline const std::vector<unsigned char> LZMACompressHeavy
    (const std::vector<unsigned char>& buf,
     const char* why = "?")
     { return LZMACompressHeavy(&buf[0],buf.size(),why); }
 static inline const std::vector<unsigned char> LZMACompressAuto
    (const std::vector<unsigned char>& buf,
     const char* why = "?")
     { return LZMACompressAuto(&buf[0],buf.size(),why); }
 const std::vector<unsigned char>
    DoLZMACompress(int HeavyLevel,
        const unsigned char* data,
        std::size_t          length,
        const char* why = "?");
 static inline const std::vector<unsigned char>
    DoLZMACompress(int HeavyLevel,
        const std::vector<unsigned char>& data, const char* why = "?")
    { return DoLZMACompress(HeavyLevel, &data[0], data.size(), why); }
 /*
 LZMA compressed file format
 ---------------------------
 Offset Size Description
   0     1   Special LZMA properties for compressed data
   1     4   Dictionary size (little endian)
   5     8   Uncompressed size (little endian). -1 means unknown size
  13         Compressed data
 */
 #endif