cbfs: Add LZ4 in-place decompression support for pre-RAM stages

This patch ports the LZ4 decompression code that debuted in libpayload last year to coreboot for use in CBFS stages (upgrading the base algorithm to LZ4's dev branch to access the new in-place decompression checks). This is especially useful for pre-RAM stages in constrained SRAM-based systems, which previously could not be compressed due to the size requirements of the LZMA scratchpad and bounce buffer. The LZ4 algorithm offers a very lean decompressor function and in-place decompression support to achieve roughly the same boot speed gains (trading compression ratio for decompression time) with nearly no memory overhead. For now we only activate it for the stages that had previously not been compressed at all on non-XIP (read: non-x86) boards. In the future we may also consider replacing LZMA completely for certain boards, since which algorithm wins out on boot speed depends on board-specific parameters (architecture, processor speed, SPI transfer rate, etc.). BRANCH=None BUG=None TEST=Built and booted Oak, Jerry, Nyan and Falco. Measured boot time on Oak to be about ~20ms faster (cutting load times for affected stages almost in half). Change-Id: Iec256c0e6d585d1b69985461939884a54e3ab900 Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-on: https://review.coreboot.org/13638 Tested-by: build bot (Jenkins) Reviewed-by: Aaron Durbin <adurbin@chromium.org>
2015-09-29 13:51:35 -07:00 · 2015-09-29 13:51:35 -07:00 · 09f2921b5d
parent 0e3d7de741
commit 09f2921b5d
45 changed files with 7795 additions and 113 deletions
--- a/Makefile.inc
+++ b/Makefile.inc
@ -308,6 +308,11 @@ ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZMA),y)
 CBFS_PAYLOAD_COMPRESS_FLAG:=LZMA
 endif

+CBFS_PRERAM_COMPRESS_FLAG:=none
+ifeq ($(CONFIG_COMPRESS_PRERAM_STAGES),y)
+CBFS_PRERAM_COMPRESS_FLAG:=LZ4
+endif
+
 ifneq ($(CONFIG_LOCALVERSION),"")
 export COREBOOT_EXTRA_VERSION := -$(call strip_quotes,$(CONFIG_LOCALVERSION))
 endif
@ -773,7 +778,7 @@ endif
 cbfs-files-y += $(CONFIG_CBFS_PREFIX)/romstage
 $(CONFIG_CBFS_PREFIX)/romstage-file := $(objcbfs)/romstage.elf
 $(CONFIG_CBFS_PREFIX)/romstage-type := stage
-$(CONFIG_CBFS_PREFIX)/romstage-compression := none
+$(CONFIG_CBFS_PREFIX)/romstage-compression := $(CBFS_PRERAM_COMPRESS_FLAG)
 ifeq ($(CONFIG_ARCH_ROMSTAGE_ARM),y)
 $(CONFIG_CBFS_PREFIX)/romstage-options := -b 0
 endif
--- a/payloads/libpayload/include/cbfs_core.h
+++ b/payloads/libpayload/include/cbfs_core.h
@ -58,6 +58,7 @@

 #define CBFS_COMPRESS_NONE  0
 #define CBFS_COMPRESS_LZMA  1
+#define CBFS_COMPRESS_LZ4   2

 /** These are standard component types for well known
    components (i.e - those that coreboot needs to consume.
--- a/payloads/libpayload/include/lz4.h
+++ b/payloads/libpayload/include/lz4.h
@ -36,7 +36,10 @@

 /* Decompresses an LZ4F image (multiple LZ4 blocks with frame header) from src
 * to dst, ensuring that it doesn't read more than srcn bytes and doesn't write
- * more than dstn. Buffer sizes must stay below 2GB.
+ * more than dstn. Buffer sizes must stay below 2GB. Can decompress files loaded
+ * to the end of a buffer in-place, as long as buffer is larger than the final
+ * output size. (Usually just a few bytes, but may be up to (8 + dstn/255) in
+ * worst case. Will reliably return an error if buffer was too small.)
 * Returns amount of decompressed bytes, or 0 on error.
 */
 size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
@ -44,4 +47,4 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
 /* Same as ulz4fn() but does not perform any bounds checks. */
 size_t ulz4f(const void *src, void *dst);

-#endif /* __LZO_H_ */
+#endif /* __LZ4_H_ */
--- a/payloads/libpayload/libcbfs/cbfs.c
+++ b/payloads/libpayload/libcbfs/cbfs.c
@ -35,11 +35,16 @@
 #  include <lzma.h>
 #  define CBFS_CORE_WITH_LZMA
 # endif
+# if IS_ENABLED(CONFIG_LP_LZ4)
+#  include <lz4.h>
+#  define CBFS_CORE_WITH_LZ4
+# endif
 # define CBFS_MINI_BUILD
 #elif defined(__SMM__)
 # define CBFS_MINI_BUILD
 #else
 # define CBFS_CORE_WITH_LZMA
+# define CBFS_CORE_WITH_LZ4
 # include <lib.h>
 #endif

--- a/payloads/libpayload/libcbfs/cbfs_core.c
+++ b/payloads/libpayload/libcbfs/cbfs_core.c
@ -34,6 +34,9 @@
 * CBFS_CORE_WITH_LZMA (must be #define)
 *      if defined, ulzma() must exist for decompression of data streams
 *
+ * CBFS_CORE_WITH_LZ4 (must be #define)
+ *      if defined, ulz4f() must exist for decompression of data streams
+ *
 * ERROR(x...)
 *      print an error message x (in printf format)
 *
@ -329,6 +332,10 @@ int cbfs_decompress(int algo, void *src, void *dst, int len)
 #ifdef CBFS_CORE_WITH_LZMA
 		case CBFS_COMPRESS_LZMA:
 			return ulzma(src, dst);
+#endif
+#ifdef CBFS_CORE_WITH_LZ4
+		case CBFS_COMPRESS_LZ4:
+			return ulz4f(src, dst);
 #endif
 		default:
 			ERROR("tried to decompress %d bytes with algorithm #%x,"
--- a/payloads/libpayload/liblz4/lz4.c.inc
+++ b/payloads/libpayload/liblz4/lz4.c.inc
@ -37,12 +37,19 @@
 *  Reading and writing into memory
 **************************************/

-/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
+/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
 static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 {
    BYTE* d = (BYTE*)dstPtr;
    const BYTE* s = (const BYTE*)srcPtr;
-    BYTE* e = (BYTE*)dstEnd;
+    BYTE* const e = (BYTE*)dstEnd;
+
+#if 0
+    const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
+    LZ4_copy8(d,s); if (d>e-9) return;
+    d+=l2; s+=l2;
+#endif /* join to align */
+
    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
 }

@ -52,9 +59,9 @@ static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 **************************************/
 #define MINMATCH 4

-#define COPYLENGTH 8
+#define WILDCOPYLENGTH 8
 #define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
 static const int LZ4_minLength = (MFLIMIT+1);

 #define KB *(1 <<10)
@ -114,11 +121,12 @@ FORCE_INLINE int LZ4_decompress_generic(
    const BYTE* const lowLimit = lowPrefix - dictSize;

    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
-    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
-    const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+    const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};

    const int safeDecode = (endOnInput==endOnInputSize);
    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+    const int inPlaceDecode = ((ip >= op) && (ip < oend));


    /* Special cases */
@ -133,6 +141,9 @@ FORCE_INLINE int LZ4_decompress_generic(
        unsigned token;
        size_t length;
        const BYTE* match;
+        size_t offset;
+
+        if (unlikely((inPlaceDecode) && (op + WILDCOPYLENGTH > ip))) goto _output_error;   /* output stream ran over input stream */

        /* get literal length */
        token = *ip++;
@ -144,7 +155,7 @@ FORCE_INLINE int LZ4_decompress_generic(
                s = *ip++;
                length += s;
            }
-            while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255));
+            while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) && (s==255) );
            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
        }
@ -152,7 +163,7 @@ FORCE_INLINE int LZ4_decompress_generic(
        /* copy literals */
        cpy = op+length;
        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)))
        {
            if (partialDecoding)
            {
@ -164,7 +175,7 @@ FORCE_INLINE int LZ4_decompress_generic(
                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
            }
-            memcpy(op, ip, length);
+            memmove(op, ip, length);
            ip += length;
            op += length;
            break;     /* Necessarily EOF, due to parsing restrictions */
@ -173,8 +184,9 @@ FORCE_INLINE int LZ4_decompress_generic(
        ip += length; op = cpy;

        /* get offset */
-        match = cpy - LZ4_readLE16(ip); ip+=2;
-        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside destination buffer */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */

        /* get matchlength */
        length = token & ML_MASK;
@ -204,12 +216,12 @@ FORCE_INLINE int LZ4_decompress_generic(
            }
            else
            {
-                /* match encompass external dictionary and current segment */
+                /* match encompass external dictionary and current block */
                size_t copySize = (size_t)(lowPrefix-match);
                memcpy(op, dictEnd - copySize, copySize);
                op += copySize;
                copySize = length - copySize;
-                if (copySize > (size_t)(op-lowPrefix))   /* overlap within current segment */
+                if (copySize > (size_t)(op-lowPrefix))   /* overlap copy */
                {
                    BYTE* const endOfMatch = op + copySize;
                    const BYTE* copyFrom = lowPrefix;
@ -224,28 +236,30 @@ FORCE_INLINE int LZ4_decompress_generic(
            continue;
        }

-        /* copy repeated sequence */
+        /* copy match within block */
        cpy = op + length;
-        if (unlikely((op-match)<8))
+        if (unlikely(offset<8))
        {
-            const size_t dec64 = dec64table[op-match];
+            const int dec64 = dec64table[offset];
            op[0] = match[0];
            op[1] = match[1];
            op[2] = match[2];
            op[3] = match[3];
-            match += dec32table[op-match];
-            LZ4_copy4(op+4, match);
-            op += 8; match -= dec64;
-        } else { LZ4_copy8(op, match); op+=8; match+=8; }
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;

        if (unlikely(cpy>oend-12))
        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals */
-            if (op < oend-8)
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit)
            {
-                LZ4_wildCopy(op, match, oend-8);
-                match += (oend-8) - op;
-                op = oend-8;
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
            }
            while (op<cpy) *op++ = *match++;
        }
--- a/payloads/libpayload/liblz4/lz4_wrapper.c
+++ b/payloads/libpayload/liblz4/lz4_wrapper.c
@ -29,7 +29,6 @@
 * SUCH DAMAGE.
 */

-#include <assert.h>
 #include <endian.h>
 #include <libpayload.h>
 #include <lz4.h>
@ -38,9 +37,28 @@
 * seem to be very inefficient in practice (at least on ARM64). Since libpayload
 * knows about endinaness and allows some basic assumptions (such as unaligned
 * access support), we can easily write the ones we need ourselves. */
-static u16 LZ4_readLE16(const void *src) { return le16toh(*(u16 *)src); }
-static void LZ4_copy4(void *dst, const void *src) { *(u32 *)dst = *(u32 *)src; }
-static void LZ4_copy8(void *dst, const void *src) { *(u64 *)dst = *(u64 *)src; }
+static uint16_t LZ4_readLE16(const void *src)
+{
+	return le16toh(*(uint16_t *)src);
+}
+static void LZ4_copy8(void *dst, const void *src)
+{
+/* ARM32 needs to be a special snowflake to prevent GCC from coalescing the
+ * access into LDRD/STRD (which don't support unaligned accesses). */
+#ifdef __arm__
+	uint32_t x0, x1;
+	asm volatile (
+		"ldr %[x0], [%[src]]\n\t"
+		"ldr %[x1], [%[src], #4]\n\t"
+		"str %[x0], [%[dst]]\n\t"
+		"str %[x1], [%[dst], #4]\n\t"
+		: [x0]"=r"(x0), [x1]"=r"(x1)
+		: [src]"r"(src), [dst]"r"(dst)
+		: "memory" );
+#else
+	*(uint64_t *)dst = *(const uint64_t *)src;
+#endif
+}

 typedef  uint8_t BYTE;
 typedef uint16_t U16;
@ -52,58 +70,59 @@ typedef uint64_t U64;
 #define likely(expr) __builtin_expect((expr) != 0, 1)
 #define unlikely(expr) __builtin_expect((expr) != 0, 0)

-/* Unaltered (except removing unrelated code) from github.com/Cyan4973/lz4. */
-#include "lz4.c"	/* #include for inlining, do not link! */
+/* Unaltered (just removed unrelated code) from github.com/Cyan4973/lz4/dev. */
+#include "lz4.c.inc"	/* #include for inlining, do not link! */

 #define LZ4F_MAGICNUMBER 0x184D2204

 struct lz4_frame_header {
-	u32 magic;
+	uint32_t magic;
 	union {
-		u8 flags;
+		uint8_t flags;
 		struct {
-			u8 reserved0		: 2;
-			u8 has_content_checksum	: 1;
-			u8 has_content_size	: 1;
-			u8 has_block_checksum	: 1;
-			u8 independent_blocks	: 1;
-			u8 version		: 2;
+			uint8_t reserved0		: 2;
+			uint8_t has_content_checksum	: 1;
+			uint8_t has_content_size	: 1;
+			uint8_t has_block_checksum	: 1;
+			uint8_t independent_blocks	: 1;
+			uint8_t version			: 2;
 		};
 	};
 	union {
-		u8 block_descriptor;
+		uint8_t block_descriptor;
 		struct {
-			u8 reserved1		: 4;
-			u8 max_block_size	: 3;
-			u8 reserved2		: 1;
+			uint8_t reserved1		: 4;
+			uint8_t max_block_size		: 3;
+			uint8_t reserved2		: 1;
 		};
 	};
-	/* + u64 content_size iff has_content_size is set */
-	/* + u8 header_checksum */
+	/* + uint64_t content_size iff has_content_size is set */
+	/* + uint8_t header_checksum */
 } __attribute__((packed));

 struct lz4_block_header {
 	union {
-		u32 raw;
+		uint32_t raw;
 		struct {
-			u32 size		: 31;
-			u32 not_compressed	: 1;
+			uint32_t size		: 31;
+			uint32_t not_compressed	: 1;
 		};
 	};
 	/* + size bytes of data */
-	/* + u32 block_checksum iff has_block_checksum is set */
+	/* + uint32_t block_checksum iff has_block_checksum is set */
 } __attribute__((packed));

 size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
 {
 	const void *in = src;
 	void *out = dst;
+	size_t out_size = 0;
 	int has_block_checksum;

 	{ /* With in-place decompression the header may become invalid later. */
 		const struct lz4_frame_header *h = in;

-		if (srcn < sizeof(*h) + sizeof(u64) + sizeof(u8))
+		if (srcn < sizeof(*h) + sizeof(uint64_t) + sizeof(uint8_t))
 			return 0;	/* input overrun */

 		/* We assume there's always only a single, standard frame. */
@ -117,25 +136,27 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)

 		in += sizeof(*h);
 		if (h->has_content_size)
-			in += sizeof(u64);
-		in += sizeof(u8);
+			in += sizeof(uint64_t);
+		in += sizeof(uint8_t);
 	}

 	while (1) {
-		struct lz4_block_header b = { .raw = le32toh(*(u32 *)in) };
+		struct lz4_block_header b = { .raw = le32toh(*(uint32_t *)in) };
 		in += sizeof(struct lz4_block_header);

-		if (in - src + b.size > srcn)
-			return 0;		/* input overrun */
+		if ((size_t)(in - src) + b.size > srcn)
+			break;			/* input overrun */

-		if (!b.size)
-			return out - dst;	/* decompression successful */
+		if (!b.size) {
+			out_size = out - dst;
+			break;			/* decompression successful */
+		}

 		if (b.not_compressed) {
-			size_t size = MIN((u32)b.size, dst + dstn - out);
+			size_t size = MIN((uint32_t)b.size, dst + dstn - out);
 			memcpy(out, in, size);
 			if (size < b.size)
-				return 0;	/* output overrun */
+				break;		/* output overrun */
 			else
 				out += size;
 		} else {
@ -144,15 +165,17 @@ size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
 					dst + dstn - out, endOnInputSize,
 					full, 0, noDict, out, NULL, 0);
 			if (ret < 0)
-				return 0;	/* decompression error */
+				break;		/* decompression error */
 			else
 				out += ret;
 		}

 		in += b.size;
 		if (has_block_checksum)
-			in += sizeof(u32);
+			in += sizeof(uint32_t);
 	}
+
+	return out_size;
 }

 size_t ulz4f(const void *src, void *dst)
--- a/src/Kconfig
+++ b/src/Kconfig
@ -156,6 +156,17 @@ config COMPRESS_RAMSTAGE
 	  that decompression might slow down booting if the boot flash
 	  is connected through a slow link (i.e. SPI).

+config COMPRESS_PRERAM_STAGES
+	bool "Compress romstage and verstage with LZ4"
+	default y if !ARCH_X86
+	default n
+	help
+	  Compress romstage and (if it exists) verstage with LZ4 to save flash
+	  space and speed up boot, since the time for reading the image from SPI
+	  (and in the vboot case verifying it) is usually much greater than the
+	  time spent decompressing. Doesn't work for XIP stages (assume all
+	  ARCH_X86 for now) for obvious reasons.
+
 config INCLUDE_CONFIG_FILE
 	bool "Include the coreboot .config file into the ROM image"
 	default y
--- a/src/commonlib/Makefile.inc
+++ b/src/commonlib/Makefile.inc
@ -16,3 +16,8 @@ verstage-y += cbfs.c
 romstage-y += cbfs.c
 ramstage-y += cbfs.c
 smm-y += cbfs.c
+
+bootblock-y += lz4_wrapper.c
+verstage-y += lz4_wrapper.c
+romstage-y += lz4_wrapper.c
+ramstage-y += lz4_wrapper.c
--- a/src/commonlib/include/commonlib/cbfs_serialized.h
+++ b/src/commonlib/include/commonlib/cbfs_serialized.h
@ -56,6 +56,7 @@

 #define CBFS_COMPRESS_NONE  0
 #define CBFS_COMPRESS_LZMA  1
+#define CBFS_COMPRESS_LZ4   2

 /** These are standard component types for well known
    components (i.e - those that coreboot needs to consume.
--- a/src/commonlib/include/commonlib/compression.h
+++ b/src/commonlib/include/commonlib/compression.h
@ -0,0 +1,34 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright 2016 Google Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _COMMONLIB_COMPRESSION_H_
+#define _COMMONLIB_COMPRESSION_H_
+
+#include <stddef.h>
+
+/* Decompresses an LZ4F image (multiple LZ4 blocks with frame header) from src
+ * to dst, ensuring that it doesn't read more than srcn bytes and doesn't write
+ * more than dstn. Buffer sizes must stay below 2GB. Can decompress files loaded
+ * to the end of a buffer in-place, as long as buffer is larger than the final
+ * output size. (Usually just a few bytes, but may be up to (8 + dstn/255) in
+ * worst case. Will reliably return an error if buffer was too small.)
+ * Returns amount of decompressed bytes, or 0 on error.
+ */
+size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn);
+
+/* Same as ulz4fn() but does not perform any bounds checks. */
+size_t ulz4f(const void *src, void *dst);
+
+#endif	/* _COMMONLIB_COMPRESSION_H_ */
--- a/src/commonlib/include/commonlib/timestamp_serialized.h
+++ b/src/commonlib/include/commonlib/timestamp_serialized.h
@ -47,6 +47,8 @@ enum timestamp_id {
 	TS_END_COPYROM = 14,
 	TS_START_ULZMA = 15,
 	TS_END_ULZMA = 16,
+	TS_START_ULZ4F = 17,
+	TS_END_ULZ4F = 18,
 	TS_DEVICE_ENUMERATE = 30,
 	TS_DEVICE_CONFIGURE = 40,
 	TS_DEVICE_ENABLE = 50,
--- a/src/commonlib/lz4.c.inc
+++ b/src/commonlib/lz4.c.inc
@ -0,0 +1,280 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+/**************************************
+*  Reading and writing into memory
+**************************************/
+
+/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
+static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+#if 0
+    const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1));
+    LZ4_copy8(d,s); if (d>e-9) return;
+    d+=l2; s+=l2;
+#endif /* join to align */
+
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+
+/**************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/**************************************
+*  Local Structures and types
+**************************************/
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+
+
+/*******************************
+*  Decompression functions
+*******************************/
+/*
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is essential this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest if dict == noDict */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    /* Local Variables */
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
+
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const unsigned dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+    const int inPlaceDecode = ((ip >= op) && (ip < oend));
+
+
+    /* Special cases */
+    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                         /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+
+    /* Main Loop */
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+        const BYTE* match;
+        size_t offset;
+
+        if (unlikely((inPlaceDecode) && (op + WILDCOPYLENGTH > ip))) goto _output_error;   /* output stream ran over input stream */
+
+        /* get literal length */
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)
+        {
+            unsigned s;
+            do
+            {
+                s = *ip++;
+                length += s;
+            }
+            while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) && (s==255) );
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
+        }
+
+        /* copy literals */
+        cpy = op+length;
+        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)))
+        {
+            if (partialDecoding)
+            {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            }
+            else
+            {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+            }
+            memmove(op, ip, length);
+            ip += length;
+            op += length;
+            break;     /* Necessarily EOF, due to parsing restrictions */
+        }
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        offset = LZ4_readLE16(ip); ip+=2;
+        match = op - offset;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside buffers */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK)
+        {
+            unsigned s;
+            do
+            {
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+                s = *ip++;
+                length += s;
+            } while (s==255);
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
+        }
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix))
+        {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match))
+            {
+                /* match can be copied as a single segment from external dictionary */
+                match = dictEnd - (lowPrefix-match);
+                memmove(op, match, length); op += length;
+            }
+            else
+            {
+                /* match encompass external dictionary and current block */
+                size_t copySize = (size_t)(lowPrefix-match);
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                copySize = length - copySize;
+                if (copySize > (size_t)(op-lowPrefix))   /* overlap copy */
+                {
+                    BYTE* const endOfMatch = op + copySize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                }
+                else
+                {
+                    memcpy(op, lowPrefix, copySize);
+                    op += copySize;
+                }
+            }
+            continue;
+        }
+
+        /* copy match within block */
+        cpy = op + length;
+        if (unlikely(offset<8))
+        {
+            const int dec64 = dec64table[offset];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[offset];
+            memcpy(op+4, match, 4);
+            match -= dec64;
+        } else { LZ4_copy8(op, match); match+=8; }
+        op += 8;
+
+        if (unlikely(cpy>oend-12))
+        {
+            BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+            if (op < oCopyLimit)
+            {
+                LZ4_wildCopy(op, match, oCopyLimit);
+                match += oCopyLimit - op;
+                op = oCopyLimit;
+            }
+            while (op<cpy) *op++ = *match++;
+        }
+        else
+            LZ4_wildCopy(op, match, cpy);
+        op=cpy;   /* correction */
+    }
+
+    /* end of decoding */
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+    else
+       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
+
+    /* Overflow error detected */
+_output_error:
+    return (int) (-(((const char*)ip)-source))-1;
+}
--- a/src/commonlib/lz4_wrapper.c
+++ b/src/commonlib/lz4_wrapper.c
@ -0,0 +1,193 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <commonlib/compression.h>
+#include <commonlib/endian.h>
+#include <commonlib/helpers.h>
+#include <stdint.h>
+#include <string.h>
+
+/* LZ4 comes with its own supposedly portable memory access functions, but they
+ * seem to be very inefficient in practice (at least on ARM64). Since coreboot
+ * knows about endinaness and allows some basic assumptions (such as unaligned
+ * access support), we can easily write the ones we need ourselves. */
+static uint16_t LZ4_readLE16(const void *src)
+{
+	return read_le16(src);
+}
+static void LZ4_copy8(void *dst, const void *src)
+{
+/* ARM32 needs to be a special snowflake to prevent GCC from coalescing the
+ * access into LDRD/STRD (which don't support unaligned accesses). */
+#ifdef __arm__	/* ARMv < 6 doesn't support unaligned accesses at all. */
+	#if defined(__COREBOOT_ARM_ARCH__) && __COREBOOT_ARM_ARCH__ < 6
+		int i;
+		for (i = 0; i < 8; i++)
+			((uint8_t *)dst)[i] = ((uint8_t *)src)[i];
+	#else
+		uint32_t x0, x1;
+		asm volatile (
+			"ldr %[x0], [%[src]]\n\t"
+			"ldr %[x1], [%[src], #4]\n\t"
+			"str %[x0], [%[dst]]\n\t"
+			"str %[x1], [%[dst], #4]\n\t"
+			: [x0]"=r"(x0), [x1]"=r"(x1)
+			: [src]"r"(src), [dst]"r"(dst)
+			: "memory" );
+	#endif
+#else
+	*(uint64_t *)dst = *(const uint64_t *)src;
+#endif
+}
+
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+
+#define FORCE_INLINE static inline __attribute__((always_inline))
+#define likely(expr) __builtin_expect((expr) != 0, 1)
+#define unlikely(expr) __builtin_expect((expr) != 0, 0)
+
+/* Unaltered (just removed unrelated code) from github.com/Cyan4973/lz4/dev. */
+#include "lz4.c.inc"	/* #include for inlining, do not link! */
+
+#define LZ4F_MAGICNUMBER 0x184D2204
+
+struct lz4_frame_header {
+	uint32_t magic;
+	union {
+		uint8_t flags;
+		struct {
+			uint8_t reserved0		: 2;
+			uint8_t has_content_checksum	: 1;
+			uint8_t has_content_size	: 1;
+			uint8_t has_block_checksum	: 1;
+			uint8_t independent_blocks	: 1;
+			uint8_t version			: 2;
+		};
+	};
+	union {
+		uint8_t block_descriptor;
+		struct {
+			uint8_t reserved1		: 4;
+			uint8_t max_block_size		: 3;
+			uint8_t reserved2		: 1;
+		};
+	};
+	/* + uint64_t content_size iff has_content_size is set */
+	/* + uint8_t header_checksum */
+} __attribute__((packed));
+
+struct lz4_block_header {
+	union {
+		uint32_t raw;
+		struct {
+			uint32_t size		: 31;
+			uint32_t not_compressed	: 1;
+		};
+	};
+	/* + size bytes of data */
+	/* + uint32_t block_checksum iff has_block_checksum is set */
+} __attribute__((packed));
+
+size_t ulz4fn(const void *src, size_t srcn, void *dst, size_t dstn)
+{
+	const void *in = src;
+	void *out = dst;
+	size_t out_size = 0;
+	int has_block_checksum;
+
+	{ /* With in-place decompression the header may become invalid later. */
+		const struct lz4_frame_header *h = in;
+
+		if (srcn < sizeof(*h) + sizeof(uint64_t) + sizeof(uint8_t))
+			return 0;	/* input overrun */
+
+		/* We assume there's always only a single, standard frame. */
+		if (read_le32(&h->magic) != LZ4F_MAGICNUMBER || h->version != 1)
+			return 0;	/* unknown format */
+		if (h->reserved0 || h->reserved1 || h->reserved2)
+			return 0;	/* reserved must be zero */
+		if (!h->independent_blocks)
+			return 0;	/* we don't support block dependency */
+		has_block_checksum = h->has_block_checksum;
+
+		in += sizeof(*h);
+		if (h->has_content_size)
+			in += sizeof(uint64_t);
+		in += sizeof(uint8_t);
+	}
+
+	while (1) {
+		struct lz4_block_header b = { .raw = read_le32(in) };
+		in += sizeof(struct lz4_block_header);
+
+		if ((size_t)(in - src) + b.size > srcn)
+			break;			/* input overrun */
+
+		if (!b.size) {
+			out_size = out - dst;
+			break;			/* decompression successful */
+		}
+
+		if (b.not_compressed) {
+			size_t size = MIN((uint32_t)b.size, dst + dstn - out);
+			memcpy(out, in, size);
+			if (size < b.size)
+				break;		/* output overrun */
+			else
+				out += size;
+		} else {
+			/* constant folding essential, do not touch params! */
+			int ret = LZ4_decompress_generic(in, out, b.size,
+					dst + dstn - out, endOnInputSize,
+					full, 0, noDict, out, NULL, 0);
+			if (ret < 0)
+				break;		/* decompression error */
+			else
+				out += ret;
+		}
+
+		in += b.size;
+		if (has_block_checksum)
+			in += sizeof(uint32_t);
+	}
+
+	return out_size;
+}
+
+size_t ulz4f(const void *src, void *dst)
+{
+	/* LZ4 uses signed size parameters, so can't just use ((u32)-1) here. */
+	return ulz4fn(src, 1*GiB, dst, 1*GiB);
+}
--- a/src/include/cbfs.h
+++ b/src/include/cbfs.h
@ -35,6 +35,14 @@ int cbfs_boot_locate(struct cbfsf *fh, const char *name, uint32_t *type);
 * the mapping and sets the size of the file. */
 void *cbfs_boot_map_with_leak(const char *name, uint32_t type, size_t *size);

+/* Load |in_size| bytes from |rdev| at |offset| to the |buffer_size| bytes
+ * large |buffer|, decompressing it according to |compression| in the process.
+ * Returns the decompressed file size, or 0 on error.
+ * LZMA files will be mapped for decompression. LZ4 files will be decompressed
+ * in-place with the buffer size requirements outlined in compression.h. */
+size_t cbfs_load_and_decompress(const struct region_device *rdev, size_t offset,
+	size_t in_size, void *buffer, size_t buffer_size, uint32_t compression);
+
 /* Load stage into memory filling in prog. Return 0 on success. < 0 on error. */
 int cbfs_prog_stage_load(struct prog *prog);

--- a/src/lib/cbfs.c
+++ b/src/lib/cbfs.c
@ -19,9 +19,11 @@
 #include <stdlib.h>
 #include <boot_device.h>
 #include <cbfs.h>
+#include <commonlib/compression.h>
 #include <endian.h>
 #include <lib.h>
 #include <symbols.h>
+#include <timestamp.h>

 #define ERROR(x...) printk(BIOS_ERR, "CBFS: " x)
 #define LOG(x...) printk(BIOS_INFO, "CBFS: " x)
@ -68,13 +70,57 @@ void *cbfs_boot_map_with_leak(const char *name, uint32_t type, size_t *size)
 	return rdev_mmap(&fh.data, 0, fsize);
 }

-static size_t inflate(void *src, void *dst)
+size_t cbfs_load_and_decompress(const struct region_device *rdev, size_t offset,
+	size_t in_size, void *buffer, size_t buffer_size, uint32_t compression)
 {
+	size_t out_size;
+
+	switch (compression) {
+	case CBFS_COMPRESS_NONE:
+		if (rdev_readat(rdev, buffer, offset, in_size) != in_size)
+			return 0;
+		return in_size;
+
+	case CBFS_COMPRESS_LZ4:
+		if ((ENV_BOOTBLOCK || ENV_VERSTAGE) &&
+		    !IS_ENABLED(CONFIG_COMPRESS_PRERAM_STAGES))
+			return 0;
+
+		/* Load the compressed image to the end of the available memory
+		 * area for in-place decompression. It is the responsibility of
+		 * the caller to ensure that buffer_size is large enough
+		 * (see compression.h, guaranteed by cbfstool for stages). */
+		void *compr_start = buffer + buffer_size - in_size;
+		if (rdev_readat(rdev, compr_start, offset, in_size) != in_size)
+			return 0;
+
+		timestamp_add_now(TS_START_ULZ4F);
+		out_size = ulz4fn(compr_start, in_size, buffer, buffer_size);
+		timestamp_add_now(TS_END_ULZ4F);
+		return out_size;
+
+	case CBFS_COMPRESS_LZMA:
 		if (ENV_BOOTBLOCK || ENV_VERSTAGE)
 			return 0;
 		if (ENV_ROMSTAGE && !IS_ENABLED(CONFIG_COMPRESS_RAMSTAGE))
 			return 0;
-	return ulzma(src, dst);
+
+		void *map = rdev_mmap(rdev, offset, in_size);
+		if (map == NULL)
+			return 0;
+
+		/* Note: timestamp not useful for memory-mapped media (x86) */
+		timestamp_add_now(TS_START_ULZMA);
+		out_size = ulzman(map, in_size, buffer, buffer_size);
+		timestamp_add_now(TS_END_ULZMA);
+
+		rdev_munmap(rdev, map);
+
+		return out_size;
+
+	default:
+		return 0;
+	}
 }

 static inline int tohex4(unsigned int c)
@ -152,23 +198,10 @@ int cbfs_prog_stage_load(struct prog *pstage)
 			goto out;
 	}

-	if (stage.compression == CBFS_COMPRESS_NONE) {
-		if (rdev_readat(fh, load, foffset, fsize) != fsize)
-			return -1;
-	} else if (stage.compression == CBFS_COMPRESS_LZMA) {
-		void *map = rdev_mmap(fh, foffset, fsize);
-
-		if (map == NULL)
-			return -1;
-
-		fsize = inflate(map, load);
-
-		rdev_munmap(fh, map);
-
+	fsize = cbfs_load_and_decompress(fh, foffset, fsize, load,
+					 stage.memlen, stage.compression);
 	if (!fsize)
 		return -1;
-	} else
-		return -1;

 	/* Clear area not covered by file. */
 	memset(&load[fsize], 0, stage.memlen - fsize);
--- a/src/lib/lzma.c
+++ b/src/lib/lzma.c
@ -29,8 +29,6 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
 	MAYBE_STATIC unsigned char scratchpad[15980];
 	const unsigned char *cp;

-	/* Note: these timestamps aren't useful for memory-mapped media (x86) */
-	timestamp_add_now(TS_START_ULZMA);
 	memcpy(properties, src, LZMA_PROPERTIES_SIZE);
 	/* The outSize in LZMA stream is a 64bit integer stored in little-endian
 	 * (ref: lzma.cc@LZMACompress: put_64). To prevent accessing by
@ -55,7 +53,6 @@ size_t ulzman(const void *src, size_t srcn, void *dst, size_t dstn)
 		printk(BIOS_WARNING, "lzma: Decoding error = %d\n", res);
 		return 0;
 	}
-	timestamp_add_now(TS_END_ULZMA);
 	return outProcessed;
 }

--- a/src/lib/rmodule.c
+++ b/src/lib/rmodule.c
@ -259,8 +259,6 @@ int rmodule_stage_load(struct rmod_stage_load *rsl)
 	struct cbfs_stage stage;
 	void *rmod_loc;
 	struct region_device *fh;
-	const int use_lzma = ENV_RAMSTAGE
-		|| (ENV_ROMSTAGE && IS_ENABLED(CONFIG_COMPRESS_RAMSTAGE));

 	if (rsl->prog == NULL || prog_name(rsl->prog) == NULL)
 		return -1;
@ -284,24 +282,8 @@ int rmodule_stage_load(struct rmod_stage_load *rsl)
 	printk(BIOS_INFO, "Decompressing stage %s @ 0x%p (%d bytes)\n",
 	       prog_name(rsl->prog), rmod_loc, stage.memlen);

-	if (stage.compression == CBFS_COMPRESS_NONE) {
-		if (rdev_readat(fh, rmod_loc, sizeof(stage), stage.len) !=
-		    stage.len)
-			return -1;
-	} else if (use_lzma && (stage.compression == CBFS_COMPRESS_LZMA)) {
-		size_t fsize;
-		void *map = rdev_mmap(fh, sizeof(stage), stage.len);
-
-		if (map == NULL)
-			return -1;
-
-		fsize = ulzman(map, stage.len, rmod_loc, stage.memlen);
-
-		rdev_munmap(fh, map);
-
-		if (!fsize)
-			return -1;
-	} else
+	if (!cbfs_load_and_decompress(fh, sizeof(stage), stage.len, rmod_loc,
+				      stage.memlen, stage.compression))
 		return -1;

 	if (rmodule_parse(rmod_loc, &rmod_stage))
--- a/src/lib/selfboot.c
+++ b/src/lib/selfboot.c
@ -14,6 +14,7 @@
 * GNU General Public License for more details.
 */

+#include <commonlib/compression.h>
 #include <console/console.h>
 #include <cpu/cpu.h>
 #include <endian.h>
@ -25,6 +26,7 @@
 #include <lib.h>
 #include <bootmem.h>
 #include <program_loading.h>
+#include <timestamp.h>

 static const unsigned long lb_start = (unsigned long)&_program;
 static const unsigned long lb_end = (unsigned long)&_eprogram;
@ -386,7 +388,18 @@ static int load_self_segments(
 			switch(ptr->compression) {
 				case CBFS_COMPRESS_LZMA: {
 					printk(BIOS_DEBUG, "using LZMA\n");
+					timestamp_add_now(TS_START_ULZMA);
 					len = ulzman(src, len, dest, memsz);
+					timestamp_add_now(TS_END_ULZMA);
+					if (!len) /* Decompression Error. */
+						return 0;
+					break;
+				}
+				case CBFS_COMPRESS_LZ4: {
+					printk(BIOS_DEBUG, "using LZ4\n");
+					timestamp_add_now(TS_START_ULZ4F);
+					len = ulz4fn(src, len, dest, memsz);
+					timestamp_add_now(TS_END_ULZ4F);
 					if (!len) /* Decompression Error. */
 						return 0;
 					break;
--- a/src/vendorcode/google/chromeos/vboot2/Makefile.inc
+++ b/src/vendorcode/google/chromeos/vboot2/Makefile.inc
@ -72,6 +72,7 @@ ifeq ($(CONFIG_SEPARATE_VERSTAGE),y)
 cbfs-files-$(CONFIG_SEPARATE_VERSTAGE) += $(CONFIG_CBFS_PREFIX)/verstage
 $(CONFIG_CBFS_PREFIX)/verstage-file := $(objcbfs)/verstage.elf
 $(CONFIG_CBFS_PREFIX)/verstage-type := stage
+$(CONFIG_CBFS_PREFIX)/verstage-compression := $(CBFS_PRERAM_COMPRESS_FLAG)
 # Verstage on x86 expected to be xip.
 ifeq ($(CONFIG_ARCH_VERSTAGE_X86_32)$(CONFIG_ARCH_VERSTAGE_X86_64),y)
 $(CONFIG_CBFS_PREFIX)/verstage-options := -a 64 --xip -S ".car.data"
--- a/util/cbfstool/Makefile.inc
+++ b/util/cbfstool/Makefile.inc
@ -2,19 +2,21 @@ cbfsobj :=
 cbfsobj += cbfstool.o
 cbfsobj += common.o
 cbfsobj += compress.o
-cbfsobj += cbfs.o
 cbfsobj += cbfs_hash.o
 cbfsobj += cbfs_image.o
 cbfsobj += cbfs-mkstage.o
 cbfsobj += cbfs-mkpayload.o
 cbfsobj += elfheaders.o
-cbfsobj += mem_pool.o
-cbfsobj += region.o
 cbfsobj += rmodule.o
 cbfsobj += xdr.o
 cbfsobj += fit.o
 cbfsobj += partitioned_file.o
+# COMMONLIB
+cbfsobj += cbfs.o
 cbfsobj += fsp1_1_relocate.o
+cbfsobj += lz4_wrapper.o
+cbfsobj += mem_pool.o
+cbfsobj += region.o
 # LZMA
 cbfsobj += lzma.o
 cbfsobj += LzFind.o
@ -25,6 +27,11 @@ cbfsobj += 2sha_utility.o
 cbfsobj += 2sha1.o
 cbfsobj += 2sha256.o
 cbfsobj += 2sha512.o
+# LZ4
+cbfsobj += lz4.o
+cbfsobj += lz4hc.o
+cbfsobj += lz4frame.o
+cbfsobj += xxhash.o
 # FMAP
 cbfsobj += fmap.o
 cbfsobj += kv_pair.o
@ -110,6 +117,10 @@ $(objutil)/cbfstool/%.o: $(top)/src/commonlib/%.c
 	printf "    HOSTCC     $(subst $(objutil)/,,$(@))\n"
 	$(HOSTCC) $(TOOLCPPFLAGS) $(TOOLCFLAGS) $(HOSTCFLAGS) -c -o $@ $<

+$(objutil)/cbfstool/%.o: $(top)/util/cbfstool/lz4/lib/%.c
+	printf "    HOSTCC     $(subst $(objutil)/,,$(@))\n"
+	$(HOSTCC) $(TOOLCPPFLAGS) $(TOOLCFLAGS) $(HOSTCFLAGS) -c -o $@ $<
+
 $(objutil)/cbfstool/cbfstool: $(addprefix $(objutil)/cbfstool/,$(cbfsobj))
 	printf "    HOSTCC     $(subst $(objutil)/,,$(@)) (link)\n"
 	$(HOSTCC) $(TOOLLDFLAGS) -o $@ $(addprefix $(objutil)/cbfstool/,$(cbfsobj))
@ -137,6 +148,8 @@ $(objutil)/cbfstool/2sha1.o: TOOLCFLAGS += -Wno-cast-qual
 $(objutil)/cbfstool/region.o: TOOLCFLAGS += -Wno-sign-compare -Wno-cast-qual
 $(objutil)/cbfstool/cbfs.o: TOOLCFLAGS += -Wno-sign-compare -Wno-cast-qual
 $(objutil)/cbfstool/mem_pool.o: TOOLCFLAGS += -Wno-sign-compare -Wno-cast-qual
+# Tolerate lz4 warnings
+$(objutil)/cbfstool/lz4.o: TOOLCFLAGS += -Wno-missing-prototypes

 $(objutil)/cbfstool/fmd.o: $(objutil)/cbfstool/fmd_parser.h
 $(objutil)/cbfstool/fmd.o: $(objutil)/cbfstool/fmd_scanner.h
--- a/util/cbfstool/cbfs-mkstage.c
+++ b/util/cbfstool/cbfs-mkstage.c
@ -26,6 +26,8 @@
 #include "cbfs.h"
 #include "rmodule.h"

+#include <commonlib/compression.h>
+
 /* Checks if program segment contains the ignored section */
 static int is_phdr_ignored(Elf64_Phdr *phdr, Elf64_Shdr *shdr)
 {
@ -267,8 +269,44 @@ int parse_elf_to_stage(const struct buffer *input, struct buffer *output,
 		     "- disabled.\n");
 		memcpy(output->data + sizeof(struct cbfs_stage),
 		       buffer, data_end - data_start);
+		outlen = data_end - data_start;
 		algo = CBFS_COMPRESS_NONE;
 	}
+
+	/* Check for enough BSS scratch space to decompress LZ4 in-place. */
+	if (algo == CBFS_COMPRESS_LZ4) {
+		size_t result;
+		size_t memlen = mem_end - data_start;
+		size_t compressed_size = outlen;
+		char *compare_buffer = malloc(memlen);
+		char *start = compare_buffer + memlen - compressed_size;
+
+		if (compare_buffer == NULL) {
+			ERROR("Can't allocate memory!\n");
+			free(buffer);
+			goto err;
+		}
+
+		memcpy(start, output->data + sizeof(struct cbfs_stage),
+		       compressed_size);
+		result = ulz4fn(start, compressed_size, compare_buffer, memlen);
+
+		if (result == 0) {
+			ERROR("Not enough scratch space to decompress LZ4 in-place -- increase BSS size or disable compression!\n");
+			free(compare_buffer);
+			free(buffer);
+			goto err;
+		}
+		if (result != data_end - data_start ||
+		    memcmp(compare_buffer, buffer, data_end - data_start)) {
+			ERROR("LZ4 compression BUG! Report to mailing list.\n");
+			free(compare_buffer);
+			free(buffer);
+			goto err;
+		}
+		free(compare_buffer);
+	}
+
 	free(buffer);

 	/* Set up for output marshaling. */
--- a/util/cbfstool/cbfs_image.c
+++ b/util/cbfstool/cbfs_image.c
@ -54,7 +54,8 @@
 static const struct typedesc_t types_cbfs_compression[] = {
 	{CBFS_COMPRESS_NONE, "none"},
 	{CBFS_COMPRESS_LZMA, "LZMA"},
-	{0, NULL}
+	{CBFS_COMPRESS_LZ4, "LZ4"},
+	{0, NULL},
 };

 static const char *lookup_name_by_type(const struct typedesc_t *desc, uint32_t type,
--- a/util/cbfstool/common.h
+++ b/util/cbfstool/common.h
@ -165,7 +165,11 @@ typedef int (*comp_func_ptr) (char *in, int in_len, char *out, int *out_len);
 typedef int (*decomp_func_ptr) (char *in, int in_len, char *out, int out_len,
 				size_t *actual_size);

-enum comp_algo { CBFS_COMPRESS_NONE = 0, CBFS_COMPRESS_LZMA = 1 };
+enum comp_algo {
+	CBFS_COMPRESS_NONE = 0,
+	CBFS_COMPRESS_LZMA = 1,
+	CBFS_COMPRESS_LZ4 = 2,
+};

 comp_func_ptr compression_function(enum comp_algo algo);
 decomp_func_ptr decompression_function(enum comp_algo algo);
--- a/util/cbfstool/compress.c
+++ b/util/cbfstool/compress.c
@ -20,7 +20,42 @@

 #include <string.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include "common.h"
+#include "lz4/lib/lz4frame.h"
+#include <commonlib/compression.h>
+
+static int lz4_compress(char *in, int in_len, char *out, int *out_len)
+{
+	LZ4F_preferences_t prefs = {
+		.compressionLevel = 20,
+		.frameInfo = {
+			.blockSizeID = max4MB,
+			.blockMode = blockIndependent,
+			.contentChecksumFlag = noContentChecksum,
+		},
+	};
+	size_t worst_size = LZ4F_compressFrameBound(in_len, &prefs);
+	void *bounce = malloc(worst_size);
+	if (!bounce)
+		return -1;
+	*out_len = LZ4F_compressFrame(bounce, worst_size, in, in_len, &prefs);
+	if (LZ4F_isError(*out_len) || *out_len >= in_len)
+		return -1;
+	memcpy(out, bounce, *out_len);
+	return 0;
+}
+
+static int lz4_decompress(char *in, int in_len, char *out, int out_len,
+			  size_t *actual_size)
+{
+	size_t result = ulz4fn(in, in_len, out, out_len);
+	if (result == 0)
+		return -1;
+	if (actual_size != NULL)
+		*actual_size = result;
+	return 0;
+}

 static int lzma_compress(char *in, int in_len, char *out, int *out_len)
 {
@ -58,6 +93,9 @@ comp_func_ptr compression_function(enum comp_algo algo)
 	case CBFS_COMPRESS_LZMA:
 		compress = lzma_compress;
 		break;
+	case CBFS_COMPRESS_LZ4:
+		compress = lz4_compress;
+		break;
 	default:
 		ERROR("Unknown compression algorithm %d!\n", algo);
 		return NULL;
@ -75,6 +113,9 @@ decomp_func_ptr decompression_function(enum comp_algo algo)
 	case CBFS_COMPRESS_LZMA:
 		decompress = lzma_decompress;
 		break;
+	case CBFS_COMPRESS_LZ4:
+		decompress = lz4_decompress;
+		break;
 	default:
 		ERROR("Unknown compression algorithm %d!\n", algo);
 		return NULL;
--- a/util/cbfstool/lz4/Makefile
+++ b/util/cbfstool/lz4/Makefile
@ -0,0 +1,119 @@
+# ################################################################
+# LZ4 - Makefile
+# Copyright (C) Yann Collet 2011-2015
+# All rights reserved.
+#
+# BSD license
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# You can contact the author at :
+#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version number
+export VERSION=132
+export RELEASE=r$(VERSION)
+
+DESTDIR?=
+PREFIX ?= /usr/local
+
+LIBDIR ?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+PRGDIR  = programs
+LZ4DIR  = lib
+
+
+# Define nul output
+ifneq (,$(filter Windows%,$(OS)))
+VOID = nul
+else
+VOID = /dev/null
+endif
+
+
+.PHONY: default all lib lz4programs clean test versionsTest examples
+
+default: lz4programs
+
+all: lib lz4programs
+
+lib:
+	@$(MAKE) -C $(LZ4DIR) all
+
+lz4programs:
+	@$(MAKE) -C $(PRGDIR)
+
+clean:
+	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
+	@$(MAKE) -C $(LZ4DIR) $@ > $(VOID)
+	@$(MAKE) -C examples $@ > $(VOID)
+	@$(MAKE) -C versionsTest $@ > $(VOID)
+	@echo Cleaning completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+install:
+	@$(MAKE) -C $(LZ4DIR) $@
+	@$(MAKE) -C $(PRGDIR) $@
+
+uninstall:
+	@$(MAKE) -C $(LZ4DIR) $@
+	@$(MAKE) -C $(PRGDIR) $@
+
+travis-install:
+	sudo $(MAKE) install
+
+test:
+	$(MAKE) -C $(PRGDIR) test
+
+cmake:
+	@cd cmake_unofficial; cmake CMakeLists.txt; $(MAKE)
+
+gpptest: clean
+	$(MAKE) all CC=g++ CFLAGS="-O3 -I../lib -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+
+clangtest: clean
+	CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) all CC=clang
+
+sanitize: clean
+	CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test CC=clang FUZZER_TIME="-T1mn" NB_LOOPS=-i1
+
+staticAnalyze: clean
+	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
+
+armtest: clean
+	CFLAGS="-O3 -Werror" $(MAKE) -C $(LZ4DIR) all CC=arm-linux-gnueabi-gcc
+	CFLAGS="-O3 -Werror" $(MAKE) -C $(PRGDIR) bins CC=arm-linux-gnueabi-gcc
+
+versionsTest: clean
+	$(MAKE) -C versionsTest
+
+examples:
+	$(MAKE) -C $(LZ4DIR)
+	$(MAKE) -C $(PRGDIR) lz4
+	$(MAKE) -C examples test
+
+endif
--- a/util/cbfstool/lz4/NEWS
+++ b/util/cbfstool/lz4/NEWS
@ -0,0 +1,185 @@
+r132
+Improved: Small decompression speed boost (+4%)
+Improved: Performance on ARMv6 and ARMv7
+Added : Debianization, by Evgeniy Polyakov
+Makefile: Safely protects lz4 version when selecting custom CFLAGS
+Makefile: Generates intermediate object files (*.o) for faster compilation on low power systems
+
+r131
+New    : Dos/DJGPP target, thanks to Louis Santillan (#114)
+Added  : Example using lz4frame library, by Zbigniew Jędrzejewski-Szmek (#118)
+Changed: xxhash symbols are modified (namespace emulation) within liblz4
+
+r130:
+Fixed  : incompatibility sparse mode vs console, reported by Yongwoon Cho (#105)
+Fixed  : LZ4IO exits too early when frame crc not present, reported by Yongwoon Cho (#106)
+Fixed  : incompatibility sparse mode vs append mode, reported by Takayuki Matsuoka (#110)
+Performance fix : big compression speed boost for clang (+30%)
+New    : cross-version test, by Takayuki Matsuoka
+
+r129:
+Added  : LZ4_compress_fast(), LZ4_compress_fast_continue()
+Added  : LZ4_compress_destSize()
+Changed: New lz4 and lz4hc compression API. Previous function prototypes still supported.
+Changed: Sparse file support enabled by default
+New    : LZ4 CLI improved performance compressing/decompressing multiple files (#86, kind contribution from Kyle J. Harper & Takayuki Matsuoka)
+Fixed  : GCC 4.9+ optimization bug - Reported by Markus Trippelsdorf, Greg Slazinski & Evan Nemerson
+Changed: Enums converted to LZ4F_ namespace convention - by Takayuki Matsuoka
+Added  : AppVeyor CI environment, for Visual tests - Suggested by Takayuki Matsuoka
+Modified:Obsolete functions generate warnings - Suggested by Evan Nemerson, contributed by Takayuki Matsuoka
+Fixed  : Bug #75 (unfinished stream), reported by Yongwoon Cho
+Updated: Documentation converted to MarkDown format
+
+r128:
+New    : lz4cli sparse file support (Requested by Neil Wilson, and contributed by Takayuki Matsuoka)
+New    : command -m, to compress multiple files in a single command (suggested by Kyle J. Harper)
+Fixed  : Restored lz4hc compression ratio (slightly lower since r124)
+New    : lz4 cli supports long commands (suggested by Takayuki Matsuoka)
+New    : lz4frame & lz4cli frame content size support
+New    : lz4frame supports skippable frames, as requested by Sergey Cherepanov
+Changed: Default "make install" directory is /usr/local, as notified by Ron Johnson
+New    : lz4 cli supports "pass-through" mode, requested by Neil Wilson
+New    : datagen can generate sparse files
+New    : scan-build tests, thanks to kind help by Takayuki Matsuoka
+New    : g++ compatibility tests
+New    : arm cross-compilation test, thanks to kind help by Takayuki Matsuoka
+Fixed  : Fuzzer + frametest compatibility with NetBSD (issue #48, reported by Thomas Klausner)
+Added  : Visual project directory
+Updated: Man page & Specification
+
+r127:
+N/A   : added a file on SVN
+
+r126:
+New   : lz4frame API is now integrated into liblz4
+Fixed : GCC 4.9 bug on highest performance settings, reported by Greg Slazinski
+Fixed : bug within LZ4 HC streaming mode, reported by James Boyle
+Fixed : older compiler don't like nameless unions, reported by Cheyi Lin
+Changed : lz4 is C90 compatible
+Changed : added -pedantic option, fixed a few mminor warnings
+
+r125:
+Changed : endian and alignment code
+Changed : directory structure : new "lib" directory
+Updated : lz4io, now uses lz4frame
+Improved: slightly improved decoding speed
+Fixed : LZ4_compress_limitedOutput(); Special thanks to Christopher Speller !
+Fixed : some alignment warnings under clang
+Fixed : deprecated function LZ4_slideInputBufferHC()
+
+r124:
+New : LZ4 HC streaming mode
+Fixed : LZ4F_compressBound() using null preferencesPtr
+Updated : xxHash to r38
+Updated library number, to 1.4.0
+
+r123:
+Added : experimental lz4frame API, thanks to Takayuki Matsuoka and Christopher Jackson for testings
+Fix : s390x support, thanks to Nobuhiro Iwamatsu
+Fix : test mode (-t) no longer requires confirmation, thanks to Thary Nguyen
+
+r122:
+Fix : AIX & AIX64 support (SamG)
+Fix : mips 64-bits support (lew van)
+Added : Examples directory, using code examples from Takayuki Matsuoka
+Updated : Framing specification, to v1.4.1
+Updated : xxHash, to r36
+
+r121:
+Added : Makefile : install for kFreeBSD and Hurd (Nobuhiro Iwamatsu)
+Fix : Makefile : install for OS-X and BSD, thanks to Takayuki Matsuoka
+
+r120:
+Modified : Streaming API, using strong types
+Added : LZ4_versionNumber(), thanks to Takayuki Matsuoka
+Fix : OS-X : library install name, thanks to Clemens Lang
+Updated : Makefile : synchronize library version number with lz4.h, thanks to Takayuki Matsuoka
+Updated : Makefile : stricter compilation flags
+Added : pkg-config, thanks to Zbigniew Jędrzejewski-Szmek (issue 135)
+Makefile : lz4-test only test native binaries, as suggested by Michał Górny (issue 136)
+Updated : xxHash to r35
+
+r119:
+Fix : Issue 134 : extended malicious address space overflow in 32-bits mode for some specific configurations
+
+r118:
+New : LZ4 Streaming API (Fast version), special thanks to Takayuki Matsuoka
+New : datagen : parametrable synthetic data generator for tests
+Improved : fuzzer, support more test cases, more parameters, ability to jump to specific test
+fix : support ppc64le platform (issue 131)
+fix : Issue 52 (malicious address space overflow in 32-bits mode when using large custom format)
+fix : Makefile : minor issue 130 : header files permissions
+
+r117:
+Added : man pages for lz4c and lz4cat
+Added : automated tests on Travis, thanks to Takayuki Matsuoka !
+fix : block-dependency command line (issue 127)
+fix : lz4fullbench (issue 128)
+
+r116:
+hotfix (issue 124 & 125)
+
+r115:
+Added : lz4cat utility, installed on POSX systems (issue 118)
+OS-X compatible compilation of dynamic library (issue 115)
+
+r114:
+Makefile : library correctly compiled with -O3 switch (issue 114)
+Makefile : library compilation compatible with clang
+Makefile : library is versioned and linked (issue 119)
+lz4.h : no more static inline prototypes (issue 116)
+man : improved header/footer (issue 111)
+Makefile : Use system default $(CC) & $(MAKE) variables (issue 112)
+xxhash : updated to r34
+
+r113:
+Large decompression speed improvement for GCC 32-bits. Thanks to Valery Croizier !
+LZ4HC : Compression Level is now a programmable parameter (CLI from 4 to 9)
+Separated IO routines from command line (lz4io.c)
+Version number into lz4.h (suggested by Francesc Alted)
+
+r112:
+quickfix
+
+r111 :
+Makefile : added capability to install libraries
+Modified Directory tree, to better separate libraries from programs.
+
+r110 :
+lz4 & lz4hc : added capability to allocate state & stream state with custom allocator (issue 99)
+fuzzer & fullbench : updated to test new functions
+man : documented -l command (Legacy format, for Linux kernel compression) (issue 102)
+cmake : improved version by Mika Attila, building programs and libraries (issue 100)
+xxHash : updated to r33
+Makefile : clean also delete local package .tar.gz
+
+r109 :
+lz4.c : corrected issue 98 (LZ4_compress_limitedOutput())
+Makefile : can specify version number from makefile
+
+r108 :
+lz4.c : corrected compression efficiency issue 97 in 64-bits chained mode (-BD) for streams > 4 GB (thanks Roman Strashkin for reporting)
+
+r107 :
+Makefile : support DESTDIR for staged installs. Thanks Jorge Aparicio.
+Makefile : make install installs both lz4 and lz4c (Jorge Aparicio)
+Makefile : removed -Wno-implicit-declaration compilation switch
+lz4cli.c : include <stduni.h> for isatty() (Luca Barbato)
+lz4.h : introduced LZ4_MAX_INPUT_SIZE constant (Shay Green)
+lz4.h : LZ4_compressBound() : unified macro and inline definitions (Shay Green)
+lz4.h : LZ4_decompressSafe_partial() : clarify comments (Shay Green)
+lz4.c : LZ4_compress() verify input size condition (Shay Green)
+bench.c : corrected a bug in free memory size evaluation
+cmake : install into bin/ directory (Richard Yao)
+cmake : check for just C compiler (Elan Ruusamae)
+
+r106 :
+Makefile : make dist modify text files in the package to respect Unix EoL convention
+lz4cli.c : corrected small display bug in HC mode
+
+r105 :
+Makefile : New install script and man page, contributed by Prasad Pandit
+lz4cli.c : Minor modifications, for easier extensibility
+COPYING  : added license file
+LZ4_Streaming_Format.odt : modified file name to remove white space characters
+Makefile : .exe suffix now properly added only for Windows target
--- a/util/cbfstool/lz4/README.md
+++ b/util/cbfstool/lz4/README.md
@ -0,0 +1,90 @@
+LZ4 - Extremely fast compression
+================================
+
+LZ4 is lossless compression algorithm,
+providing compression speed at 400 MB/s per core,
+scalable with multi-cores CPU.
+It features an extremely fast decoder,
+with speed in multiple GB/s per core,
+typically reaching RAM speed limits on multi-core systems.
+
+Speed can be tuned dynamically, selecting an "acceleration" factor
+which trades compression ratio for more speed up.
+On the other end, a high compression derivative, LZ4_HC, is also provided,
+trading CPU time for improved compression ratio.
+All versions feature the same decompression speed.
+
+LZ4 library is provided as open-source software using BSD license.s
+
+
+|Branch      |Status   |
+|------------|---------|
+|master      | [![Build Status][travisMasterBadge]][travisLink] [![Build status][AppveyorMasterBadge]][AppveyorLink] [![coverity][coverBadge]][coverlink] |
+|dev         | [![Build Status][travisDevBadge]][travisLink]    [![Build status][AppveyorDevBadge]][AppveyorLink]                                         |
+
+[travisMasterBadge]: https://travis-ci.org/Cyan4973/lz4.svg?branch=master "Continuous Integration test suite"
+[travisDevBadge]: https://travis-ci.org/Cyan4973/lz4.svg?branch=dev "Continuous Integration test suite"
+[travisLink]: https://ci.appveyor.com/project/YannCollet/lz4
+[AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/v6kxv9si529477cq/branch/master?svg=true "Visual test suite"
+[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/v6kxv9si529477cq/branch/dev?svg=true "Visual test suite"
+[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/lz4
+[coverBadge]: https://scan.coverity.com/projects/4735/badge.svg "Static code analysis of Master branch"
+[coverlink]: https://scan.coverity.com/projects/4735
+
+> **Branch Policy:**
+
+> - The "master" branch is considered stable, at all times.
+> - The "dev" branch is the one where all contributions must be merged
+    before being promoted to master.
+>   + If you plan to propose a patch, please commit into the "dev" branch,
+      or its own feature branch.
+      Direct commit to "master" are not permitted.
+
+Benchmarks
+-------------------------
+
+The benchmark uses the [Open-Source Benchmark program by m^2 (v0.14.3)]
+compiled with GCC v4.8.2 on Linux Mint 64-bits v17.
+The reference system uses a Core i5-4300U @1.9GHz.
+Benchmark evaluates the compression of reference [Silesia Corpus]
+in single-thread mode.
+
+|  Compressor          | Ratio   | Compression | Decompression |
+|  ----------          | -----   | ----------- | ------------- |
+|  memcpy              |  1.000  | 4200 MB/s   |   4200 MB/s   |
+|**LZ4 fast 17 (r129)**|  1.607  |**690 MB/s** | **2220 MB/s** |
+|**LZ4 default (r129)**|**2.101**|**385 MB/s** | **1850 MB/s** |
+|  LZO 2.06            |  2.108  |  350 MB/s   |    510 MB/s   |
+|  QuickLZ 1.5.1.b6    |  2.238  |  320 MB/s   |    380 MB/s   |
+|  Snappy 1.1.0        |  2.091  |  250 MB/s   |    960 MB/s   |
+|  LZF v3.6            |  2.073  |  175 MB/s   |    500 MB/s   |
+|  zlib 1.2.8 -1       |  2.730  |   59 MB/s   |    250 MB/s   |
+|**LZ4 HC (r129)**     |**2.720**|   22 MB/s   | **1830 MB/s** |
+|  zlib 1.2.8 -6       |  3.099  |   18 MB/s   |    270 MB/s   |
+
+
+Documentation
+-------------------------
+
+The raw LZ4 block compression format is detailed within [lz4_Block_format].
+
+To compress an arbitrarily long file or data stream, multiple blocks are required.
+Organizing these blocks and providing a common header format to handle their content
+is the purpose of the Frame format, defined into [lz4_Frame_format].
+Interoperable versions of LZ4 must respect this frame format.
+
+
+Other source versions
+-------------------------
+
+Beyond the C reference source,
+many contributors have created versions of lz4 in multiple languages
+(Java, C#, Python, Perl, Ruby, etc.).
+A list of known source ports is maintained on the [LZ4 Homepage].
+
+
+[Open-Source Benchmark program by m^2 (v0.14.3)]: http://encode.ru/threads/1371-Filesystem-benchmark?p=34029&viewfull=1#post34029
+[Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
+[lz4_Block_format]: lz4_Block_format.md
+[lz4_Frame_format]: lz4_Frame_format.md
+[LZ4 Homepage]: http://www.lz4.org
--- a/util/cbfstool/lz4/lib/LICENSE
+++ b/util/cbfstool/lz4/lib/LICENSE
@ -0,0 +1,24 @@
+LZ4 Library
+Copyright (c) 2011-2015, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/util/cbfstool/lz4/lib/Makefile
+++ b/util/cbfstool/lz4/lib/Makefile
@ -0,0 +1,118 @@
+# ################################################################
+# LZ4 library - Makefile
+# Copyright (C) Yann Collet 2011-2015
+# All rights reserved.
+#
+# BSD license
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# You can contact the author at :
+#  - LZ4 source repository : https://github.com/Cyan4973/lz4
+#  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
+
+# Version numbers
+VERSION?= 132
+LIBVER_MAJOR:=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER_MINOR:=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER_PATCH:=`sed -n '/define LZ4_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
+
+DESTDIR?=
+PREFIX ?= /usr/local
+CPPFLAGS= -DXXH_NAMESPACE=LZ4_
+CFLAGS ?= -O3
+CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wcast-qual -Wstrict-prototypes -pedantic
+FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+LIBDIR?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+
+
+# OS X linker doesn't support -soname, and use different extension
+# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
+ifeq ($(shell uname), Darwin)
+	SHARED_EXT = dylib
+	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
+	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
+	SONAME_FLAGS = -install_name $(PREFIX)/lib/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+else
+	SONAME_FLAGS = -Wl,-soname=liblz4.$(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT = so
+	SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
+endif
+
+default: liblz4
+
+all: liblz4
+
+liblz4: lz4.c lz4hc.c lz4frame.c xxhash.c   # need to compile once with -fPIC, and once without -fPIC
+	@echo compiling static library
+	@$(AR) rcs $@.a $^
+	@echo compiling dynamic library $(LIBVER)
+	@$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
+	@echo creating versioned links
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
+
+clean:
+	@rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* liblz4.pc
+	@echo Cleaning library completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+liblz4.pc: liblz4.pc.in Makefile
+	@echo creating pkgconfig
+	@sed -e 's|@PREFIX@|$(PREFIX)|' \
+            -e 's|@LIBDIR@|$(LIBDIR)|' \
+            -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+            -e 's|@VERSION@|$(VERSION)|' \
+             $< >$@
+
+install: liblz4 liblz4.pc
+	@install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/
+	@install -m 755 liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)
+	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
+	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
+	@install -m 644 liblz4.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
+	@install -m 644 liblz4.a $(DESTDIR)$(LIBDIR)/liblz4.a
+	@install -m 644 lz4.h $(DESTDIR)$(INCLUDEDIR)/lz4.h
+	@install -m 644 lz4hc.h $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
+	@install -m 644 lz4frame.h $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+	@echo lz4 static and shared libraries installed
+
+uninstall:
+	@rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
+	@rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
+	@rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/liblz4.pc
+	@[ -x $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER)
+	@[ -f $(DESTDIR)$(LIBDIR)/liblz4.a ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.a
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4hc.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4frame.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+	@echo lz4 libraries successfully uninstalled
+
+endif
--- a/util/cbfstool/lz4/lib/README.md
+++ b/util/cbfstool/lz4/lib/README.md
@ -0,0 +1,21 @@
+LZ4 - Library Files
+================================
+
+The __lib__ directory contains several files, but you don't necessarily need them all.
+
+To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**".
+
+For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly.
+
+If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm.
+(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.)
+
+A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***.
+
+The other files are not source code. There are :
+
+ - LICENSE : contains the BSD license text
+ - Makefile : script to compile or install lz4 library (static or dynamic)
+ - liblz4.pc.in : for pkg-config (make install)
+
+[official interoperable frame format]: ../lz4_Frame_format.md
--- a/util/cbfstool/lz4/lib/liblz4.pc.in
+++ b/util/cbfstool/lz4/lib/liblz4.pc.in
@ -0,0 +1,14 @@
+#   LZ4 - Fast LZ compression algorithm
+#   Copyright (C) 2011-2014, Yann Collet.
+#   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=@INCLUDEDIR@
+
+Name: lz4
+Description: fast lossless compression algorithm library
+URL: http://lz4.org/
+Version: @VERSION@
+Libs: -L@LIBDIR@ -llz4
+Cflags: -I@INCLUDEDIR@
--- a/util/cbfstool/lz4/lib/lz4.c
+++ b/util/cbfstool/lz4/lib/lz4.c
--- a/util/cbfstool/lz4/lib/lz4.h
+++ b/util/cbfstool/lz4/lib/lz4.h
@ -0,0 +1,360 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * lz4.h provides block compression functions, and gives full buffer control to programmer.
+ * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
+ * and can let the library handle its own memory, please use lz4frame.h instead.
+*/
+
+/**************************************
+*  Version
+**************************************/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+int LZ4_versionNumber (void);
+
+/**************************************
+*  Tuning parameter
+**************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 14
+
+
+/**************************************
+*  Simple Functions
+**************************************/
+
+int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
+int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+
+/*
+LZ4_compress_default() :
+    Compresses 'sourceSize' bytes from buffer 'source'
+    into already allocated 'dest' buffer of size 'maxDestSize'.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    It also runs faster, so it's a recommended setting.
+    If the function cannot compress 'source' into a more limited 'dest' budget,
+    compression stops *immediately*, and the function result is zero.
+    As a consequence, 'dest' content is not valid.
+    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
+        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
+        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
+              or 0 if compression fails
+
+LZ4_decompress_safe() :
+    compressedSize : is the precise full size of the compressed block.
+    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
+             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits, including malicious data packets.
+             It never writes outside output buffer, nor reads outside input buffer.
+*/
+
+
+/**************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*
+LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*/
+int LZ4_compressBound(int inputSize);
+
+/*
+LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+*/
+int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
+
+
+/*
+LZ4_compress_fast_extState() :
+    Same compression function, just using an externally allocated memory space to store compression state.
+    Use LZ4_sizeofState() to know how much memory must be allocated,
+    and allocate it on 8-bytes boundaries (using malloc() typically).
+    Then, provide it as 'void* state' to compression function.
+*/
+int LZ4_sizeofState(void);
+int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+
+
+/*
+LZ4_compress_destSize() :
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
+    into already allocated buffer 'dest' of size 'targetDestSize'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
+                         New value is necessarily <= old value.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+              or 0 if compression fails
+*/
+int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
+
+
+/*
+LZ4_decompress_fast() :
+    originalSize : is the original and therefore uncompressed size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+    note : This function fully respect memory boundaries for properly formed compressed data.
+           It is a bit faster than LZ4_decompress_safe().
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
+           Use this function in trusted environment only (data to decode comes from a trusted source).
+*/
+int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
+
+/*
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'compressedSize' at position 'source'
+    into destination buffer 'dest' of size 'maxDecompressedSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/***********************************************
+*  Streaming Compression Functions
+***********************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
+/*
+ * LZ4_stream_t
+ * information structure to track an LZ4 stream.
+ * important : init this structure content before first use !
+ * note : only allocated directly the structure if you are statically linking LZ4
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
+ */
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+
+/*
+ * LZ4_resetStream
+ * Use this function to init an allocated LZ4_stream_t structure
+ */
+void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+/*
+ * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
+ * LZ4_freeStream releases its memory.
+ * In the context of a DLL (liblz4), please use these methods rather than the static struct.
+ * They are more future proof, in case of a change of LZ4_stream_t size.
+ */
+LZ4_stream_t* LZ4_createStream(void);
+int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*
+ * LZ4_loadDict
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ * Loading a size of 0 is allowed.
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*
+ * LZ4_compress_fast_continue
+ * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still be present and unmodified !
+ * 'dst' buffer must be already allocated.
+ * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict
+ * If previously compressed data block is not guaranteed to remain available at its memory location
+ * save it into a safer place (char* safeBuffer)
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
+ * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
+ */
+int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+
+
+/************************************************
+*  Streaming Decompression Functions
+************************************************/
+
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
+/*
+ * LZ4_streamDecode_t
+ * information structure to track an LZ4 stream.
+ * init this structure content using LZ4_setStreamDecode or memset() before first use !
+ *
+ * In the context of a DLL (liblz4) please prefer usage of construction methods below.
+ * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
+ * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
+ * LZ4_freeStreamDecode releases its memory.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*
+ * LZ4_setStreamDecode
+ * Use this function to instruct where to find the dictionary.
+ * Setting a size of 0 is allowed (same effect as reset).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
+    In the case of a ring buffers, decoding buffer must be either :
+    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
+      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
+    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including small ones ( < 64 KB).
+    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including larger than decoding buffer.
+    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
+    and indicate where it is saved using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as
+    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
+    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
+*/
+int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
+int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+
+
+/**************************************
+*  Obsolete Functions
+**************************************/
+/* Deprecate Warnings */
+/* Should these warnings messages be a problem,
+   it is generally possible to disable them,
+   with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
+   Otherwise, you can also define LZ4_DISABLE_DEPRECATE_WARNINGS */
+#define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED()   /* disable deprecation warnings */
+#else
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/* Obsolete compression functions */
+/* These functions will generate warnings in a future release */
+int LZ4_compress               (const char* source, char* dest, int sourceSize);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete decompression functions */
+/* These function names are completely deprecated and must no longer be used.
+   They are only provided in lz4.c for compatibility with older programs.
+    - LZ4_uncompress is the same as LZ4_decompress_fast
+    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
+   These function prototypes are now disabled; uncomment them only if you really need them.
+   It is highly recommended to stop using these prototypes and migrate to maintained ones */
+/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
+/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
+
+/* Obsolete streaming functions; use new streaming interface whenever possible */
+LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+
+/* Obsolete streaming decoding functions */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
--- a/util/cbfstool/lz4/lib/lz4frame.c
+++ b/util/cbfstool/lz4/lib/lz4frame.c
--- a/util/cbfstool/lz4/lib/lz4frame.h
+++ b/util/cbfstool/lz4/lib/lz4frame.h
@ -0,0 +1,303 @@
+/*
+   LZ4 auto-framing library
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API to create LZ4-compressed frames
+ * conformant with specification v1.5.1.
+ * All related operations, including memory management, are handled internally by the library.
+ * You don't need lz4.h when using lz4frame.h.
+ * */
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**************************************
+*  Includes
+**************************************/
+#include <stddef.h>   /* size_t */
+
+
+/**************************************
+*  Error management
+**************************************/
+typedef size_t LZ4F_errorCode_t;
+
+unsigned    LZ4F_isError(LZ4F_errorCode_t code);
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /* return error code string; useful for debugging */
+
+
+/**************************************
+*  Frame compression types
+**************************************/
+//#define LZ4F_DISABLE_OBSOLETE_ENUMS
+#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS
+#  define LZ4F_OBSOLETE_ENUM(x) ,x
+#else
+#  define LZ4F_OBSOLETE_ENUM(x)
+#endif
+
+typedef enum {
+    LZ4F_default=0,
+    LZ4F_max64KB=4,
+    LZ4F_max256KB=5,
+    LZ4F_max1MB=6,
+    LZ4F_max4MB=7
+    LZ4F_OBSOLETE_ENUM(max64KB = LZ4F_max64KB)
+    LZ4F_OBSOLETE_ENUM(max256KB = LZ4F_max256KB)
+    LZ4F_OBSOLETE_ENUM(max1MB = LZ4F_max1MB)
+    LZ4F_OBSOLETE_ENUM(max4MB = LZ4F_max4MB)
+} LZ4F_blockSizeID_t;
+
+typedef enum {
+    LZ4F_blockLinked=0,
+    LZ4F_blockIndependent
+    LZ4F_OBSOLETE_ENUM(blockLinked = LZ4F_blockLinked)
+    LZ4F_OBSOLETE_ENUM(blockIndependent = LZ4F_blockIndependent)
+} LZ4F_blockMode_t;
+
+typedef enum {
+    LZ4F_noContentChecksum=0,
+    LZ4F_contentChecksumEnabled
+    LZ4F_OBSOLETE_ENUM(noContentChecksum = LZ4F_noContentChecksum)
+    LZ4F_OBSOLETE_ENUM(contentChecksumEnabled = LZ4F_contentChecksumEnabled)
+} LZ4F_contentChecksum_t;
+
+typedef enum {
+    LZ4F_frame=0,
+    LZ4F_skippableFrame
+    LZ4F_OBSOLETE_ENUM(skippableFrame = LZ4F_skippableFrame)
+} LZ4F_frameType_t;
+
+#ifndef LZ4F_DISABLE_OBSOLETE_ENUMS
+typedef LZ4F_blockSizeID_t blockSizeID_t;
+typedef LZ4F_blockMode_t blockMode_t;
+typedef LZ4F_frameType_t frameType_t;
+typedef LZ4F_contentChecksum_t contentChecksum_t;
+#endif
+
+typedef struct {
+  LZ4F_blockSizeID_t     blockSizeID;           /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
+  LZ4F_blockMode_t       blockMode;             /* blockLinked, blockIndependent ; 0 == default */
+  LZ4F_contentChecksum_t contentChecksumFlag;   /* noContentChecksum, contentChecksumEnabled ; 0 == default  */
+  LZ4F_frameType_t       frameType;             /* LZ4F_frame, skippableFrame ; 0 == default */
+  unsigned long long     contentSize;           /* Size of uncompressed (original) content ; 0 == unknown */
+  unsigned               reserved[2];           /* must be zero for forward compatibility */
+} LZ4F_frameInfo_t;
+
+typedef struct {
+  LZ4F_frameInfo_t frameInfo;
+  int      compressionLevel;       /* 0 == default (fast mode); values above 16 count as 16; values below 0 count as 0 */
+  unsigned autoFlush;              /* 1 == always flush (reduce need for tmp buffer) */
+  unsigned reserved[4];            /* must be zero for forward compatibility */
+} LZ4F_preferences_t;
+
+
+/***********************************
+*  Simple compression function
+***********************************/
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+/* LZ4F_compressFrame()
+ * Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.1
+ * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+ * You can get the minimum value of dstMaxSize by using LZ4F_compressFrameBound()
+ * If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode)
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * The result of the function is the number of bytes written into dstBuffer.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+
+
+/**********************************
+*  Advanced compression functions
+**********************************/
+typedef struct LZ4F_cctx_s* LZ4F_compressionContext_t;   /* must be aligned on 8-bytes */
+
+typedef struct {
+  unsigned stableSrc;    /* 1 == src content will remain available on future calls to LZ4F_compress(); avoid saving src content within tmp buffer as future dictionary */
+  unsigned reserved[3];
+} LZ4F_compressOptions_t;
+
+/* Resource Management */
+
+#define LZ4F_VERSION 100
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* cctxPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t cctx);
+/* LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries.
+ * The function will provide a pointer to a fully allocated LZ4F_compressionContext_t object.
+ * If the result LZ4F_errorCode_t is not zero, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+ */
+
+
+/* Compression */
+
+size_t LZ4F_compressBegin(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const LZ4F_preferences_t* prefsPtr);
+/* LZ4F_compressBegin() :
+ * will write the frame header into dstBuffer.
+ * dstBuffer must be large enough to accommodate a header (dstMaxSize). Maximum header size is 15 bytes.
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * The result of the function is the number of bytes written into dstBuffer for the header
+ * or an error code (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
+/* LZ4F_compressBound() :
+ * Provides the minimum size of Dst buffer given srcSize to handle worst case situations.
+ * Different preferences can produce different results.
+ * prefsPtr is optional : you can provide NULL as argument, all preferences will then be set to cover worst case.
+ * This function includes frame termination cost (4 bytes, or 8 if frame checksum is enabled)
+ */
+
+size_t LZ4F_compressUpdate(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* cOptPtr);
+/* LZ4F_compressUpdate()
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+ * You can get the minimum value of dstMaxSize by using LZ4F_compressBound().
+ * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+ * LZ4F_compressUpdate() doesn't guarantee error recovery, so you have to reset compression context when an error occurs.
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * The result of the function is the number of bytes written into dstBuffer : it can be zero, meaning input data was just buffered.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_flush(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* cOptPtr);
+/* LZ4F_flush()
+ * Should you need to generate compressed data immediately, without waiting for the current block to be filled,
+ * you can call LZ4_flush(), which will immediately compress any remaining data buffered within cctx.
+ * Note that dstMaxSize must be large enough to ensure the operation will be successful.
+ * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * The result of the function is the number of bytes written into dstBuffer
+ * (it can be zero, this means there was no data left within cctx)
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_compressEnd(LZ4F_compressionContext_t cctx, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* cOptPtr);
+/* LZ4F_compressEnd()
+ * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ * It will flush whatever data remained within compressionContext (like LZ4_flush())
+ * but also properly finalize the frame, with an endMark and a checksum.
+ * The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled)
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * A successful call to LZ4F_compressEnd() makes cctx available again for next compression task.
+ */
+
+
+/***********************************
+*  Decompression functions
+***********************************/
+
+typedef struct LZ4F_dctx_s* LZ4F_decompressionContext_t;   /* must be aligned on 8-bytes */
+
+typedef struct {
+  unsigned stableDst;       /* guarantee that decompressed data will still be there on next function calls (avoid storage into tmp buffers) */
+  unsigned reserved[3];
+} LZ4F_decompressOptions_t;
+
+
+/* Resource management */
+
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_decompressionContext_t* dctxPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_decompressionContext_t dctx);
+/* LZ4F_createDecompressionContext() :
+ * The first thing to do is to create an LZ4F_decompressionContext_t object, which will be used in all decompression operations.
+ * This is achieved using LZ4F_createDecompressionContext().
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential breaking differences between different versions.
+ * The function will provide a pointer to a fully allocated and initialized LZ4F_decompressionContext_t object.
+ * The result is an errorCode, which can be tested using LZ4F_isError().
+ * dctx memory can be released using LZ4F_freeDecompressionContext();
+ * The result of LZ4F_freeDecompressionContext() is indicative of the current state of decompressionContext when being released.
+ * That is, it should be == 0 if decompression has been completed fully and correctly.
+ */
+
+
+/* Decompression */
+
+size_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t dctx,
+                         LZ4F_frameInfo_t* frameInfoPtr,
+                         const void* srcBuffer, size_t* srcSizePtr);
+/* LZ4F_getFrameInfo()
+ * This function decodes frame header information (such as max blockSize, frame checksum, etc.).
+ * Its usage is optional. The objective is to extract frame header information, typically for allocation purposes.
+ * A header size is variable and can be from 7 to 15 bytes. It's also possible to input more bytes than that.
+ * The number of bytes read from srcBuffer will be updated within *srcSizePtr (necessarily <= original value).
+ * (note that LZ4F_getFrameInfo() can also be used anytime *after* starting decompression, in this case 0 input byte is enough)
+ * Frame header info is *copied into* an already allocated LZ4F_frameInfo_t structure.
+ * The function result is an hint about how many srcSize bytes LZ4F_decompress() expects for next call,
+ *                        or an error code which can be tested using LZ4F_isError()
+ *                        (typically, when there is not enough src bytes to fully decode the frame header)
+ * Decompression is expected to resume from where it stopped (srcBuffer + *srcSizePtr)
+ */
+
+size_t LZ4F_decompress(LZ4F_decompressionContext_t dctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const LZ4F_decompressOptions_t* dOptPtr);
+/* LZ4F_decompress()
+ * Call this function repetitively to regenerate data compressed within srcBuffer.
+ * The function will attempt to decode *srcSizePtr bytes from srcBuffer, into dstBuffer of maximum size *dstSizePtr.
+ *
+ * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ * The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ * If number of bytes read is < number of bytes provided, then decompression operation is not completed.
+ * It typically happens when dstBuffer is not large enough to contain all decoded data.
+ * LZ4F_decompress() must be called again, starting from where it stopped (srcBuffer + *srcSizePtr)
+ * The function will check this condition, and refuse to continue if it is not respected.
+ *
+ * dstBuffer is supposed to be flushed between each call to the function, since its content will be overwritten.
+ * dst arguments can be changed at will with each consecutive call to the function.
+ *
+ * The function result is an hint of how many srcSize bytes LZ4F_decompress() expects for next call.
+ * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ * Respecting the hint provides some boost to performance, since it does skip intermediate buffers.
+ * This is just a hint, you can always provide any srcSize you want.
+ * When a frame is fully decoded, the function result will be 0 (no more data expected).
+ * If decompression failed, function result is an error code, which can be tested using LZ4F_isError().
+ *
+ * After a frame is fully decoded, dctx can be used again to decompress another frame.
+ */
+
+
+#if defined (__cplusplus)
+}
+#endif
--- a/util/cbfstool/lz4/lib/lz4frame_static.h
+++ b/util/cbfstool/lz4/lib/lz4frame_static.h
@ -0,0 +1,81 @@
+/*
+   LZ4 auto-framing library
+   Header File for static linking only
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* lz4frame_static.h should be used solely in the context of static linking.
+ * It contains definitions which may still change overtime.
+ * Never use it in the context of DLL linking.
+ * */
+
+
+/**************************************
+*  Includes
+**************************************/
+#include "lz4frame.h"
+
+
+/**************************************
+ * Error management
+ * ************************************/
+#define LZ4F_LIST_ERRORS(ITEM) \
+        ITEM(OK_NoError) ITEM(ERROR_GENERIC) \
+        ITEM(ERROR_maxBlockSize_invalid) ITEM(ERROR_blockMode_invalid) ITEM(ERROR_contentChecksumFlag_invalid) \
+        ITEM(ERROR_compressionLevel_invalid) \
+        ITEM(ERROR_headerVersion_wrong) ITEM(ERROR_blockChecksum_unsupported) ITEM(ERROR_reservedFlag_set) \
+        ITEM(ERROR_allocation_failed) \
+        ITEM(ERROR_srcSize_tooLarge) ITEM(ERROR_dstMaxSize_tooSmall) \
+        ITEM(ERROR_frameHeader_incomplete) ITEM(ERROR_frameType_unknown) ITEM(ERROR_frameSize_wrong) \
+        ITEM(ERROR_srcPtr_wrong) \
+        ITEM(ERROR_decompressionFailed) \
+        ITEM(ERROR_headerChecksum_invalid) ITEM(ERROR_contentChecksum_invalid) \
+        ITEM(ERROR_maxCode)
+
+//#define LZ4F_DISABLE_OLD_ENUMS
+#ifndef LZ4F_DISABLE_OLD_ENUMS
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM, ENUM = LZ4F_##ENUM,
+#else
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+#endif
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;  /* enum is exposed, to handle specific errors; compare function result to -enum value */
+
+
+#if defined (__cplusplus)
+}
+#endif
--- a/util/cbfstool/lz4/lib/lz4hc.c
+++ b/util/cbfstool/lz4/lib/lz4hc.c
@ -0,0 +1,748 @@
+/*
+    LZ4 HC - High Compression Mode of LZ4
+    Copyright (C) 2011-2015, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - LZ4 source repository : https://github.com/Cyan4973/lz4
+       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+
+/* *************************************
+*  Tuning Parameter
+***************************************/
+static const int LZ4HC_compressionLevel_default = 9;
+
+/*!
+ * HEAPMODE :
+ * Select how default compression function will allocate workplace memory,
+ * in stack (0:fastest), or in heap (1:requires malloc()).
+ * Since workplace is rather large, heap mode is recommended.
+ */
+#define LZ4HC_HEAPMODE 0
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include "lz4hc.h"
+
+
+/* *************************************
+*  Local Compiler Options
+***************************************/
+#if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined (__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+
+/* *************************************
+*  Common LZ4 definition
+***************************************/
+#define LZ4_COMMONDEFS_ONLY
+#include "lz4.c"
+
+
+/* *************************************
+*  Local Constants
+***************************************/
+#define DICTIONARY_LOGSIZE 16
+#define MAXD (1<<DICTIONARY_LOGSIZE)
+#define MAXD_MASK (MAXD - 1)
+
+#define HASH_LOG (DICTIONARY_LOGSIZE-1)
+#define HASHTABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASHTABLESIZE - 1)
+
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+
+static const int g_maxCompressionLevel = 16;
+
+
+/**************************************
+*  Local Types
+**************************************/
+typedef struct
+{
+    U32   hashTable[HASHTABLESIZE];
+    U16   chainTable[MAXD];
+    const BYTE* end;        /* next block here to continue on current prefix */
+    const BYTE* base;       /* All index relative to this position */
+    const BYTE* dictBase;   /* alternate base for extDict */
+    BYTE* inputBuffer;      /* deprecated */
+    U32   dictLimit;        /* below that point, need extDict */
+    U32   lowLimit;         /* below that point, no more dict */
+    U32   nextToUpdate;     /* index from which to continue dictionary update */
+    U32   compressionLevel;
+} LZ4HC_Data_Structure;
+
+
+/**************************************
+*  Local Macros
+**************************************/
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
+//#define DELTANEXTU16(p)        chainTable[(p) & MAXD_MASK]   /* flexible, MAXD dependent */
+#define DELTANEXTU16(p)        chainTable[(U16)(p)]   /* faster */
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+
+/**************************************
+*  HC Compression
+**************************************/
+static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)
+{
+    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+    hc4->nextToUpdate = 64 KB;
+    hc4->base = start - 64 KB;
+    hc4->end = start;
+    hc4->dictBase = start - 64 KB;
+    hc4->dictLimit = 64 KB;
+    hc4->lowLimit = 64 KB;
+}
+
+
+/* Update chains up to ip (excluded) */
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+{
+    U16* chainTable = hc4->chainTable;
+    U32* HashTable  = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = hc4->nextToUpdate;
+
+    while(idx < target)
+    {
+        U32 h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - HashTable[h];
+        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+        DELTANEXTU16(idx) = (U16)delta;
+        HashTable[h] = idx;
+        idx++;
+    }
+
+    hc4->nextToUpdate = target;
+}
+
+
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   /* Index table will be updated */
+                                               const BYTE* ip, const BYTE* const iLimit,
+                                               const BYTE** matchpos,
+                                               const int maxNbAttempts)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const BYTE* const dictBase = hc4->dictBase;
+    const U32 dictLimit = hc4->dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    U32 matchIndex;
+    const BYTE* match;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=0;
+
+    /* HC4 match finder */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (matchIndex >= dictLimit)
+        {
+            match = base + matchIndex;
+            if (*(match+ml) == *(ip+ml)
+                && (LZ4_read32(match) == LZ4_read32(ip)))
+            {
+                size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                if (mlt > ml) { ml = mlt; *matchpos = match; }
+            }
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip))
+            {
+                size_t mlt;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iLimit) vLimit = iLimit;
+                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
+                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   /* virtual matchpos */
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return (int)ml;
+}
+
+
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+    LZ4HC_Data_Structure* hc4,
+    const BYTE* const ip,
+    const BYTE* const iLowLimit,
+    const BYTE* const iHighLimit,
+    int longest,
+    const BYTE** matchpos,
+    const BYTE** startpos,
+    const int maxNbAttempts)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 dictLimit = hc4->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    const BYTE* const dictBase = hc4->dictBase;
+    U32   matchIndex;
+    int nbAttempts = maxNbAttempts;
+    int delta = (int)(ip-iLowLimit);
+
+
+    /* First Match */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (matchIndex >= dictLimit)
+        {
+            const BYTE* matchPtr = base + matchIndex;
+            if (*(iLowLimit + longest) == *(matchPtr - delta + longest))
+                if (LZ4_read32(matchPtr) == LZ4_read32(ip))
+                {
+                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    int back = 0;
+
+                    while ((ip+back>iLowLimit)
+                           && (matchPtr+back > lowPrefixPtr)
+                           && (ip[back-1] == matchPtr[back-1]))
+                            back--;
+
+                    mlt -= back;
+
+                    if (mlt > longest)
+                    {
+                        longest = (int)mlt;
+                        *matchpos = matchPtr+back;
+                        *startpos = ip+back;
+                    }
+                }
+        }
+        else
+        {
+            const BYTE* matchPtr = dictBase + matchIndex;
+            if (LZ4_read32(matchPtr) == LZ4_read32(ip))
+            {
+                size_t mlt;
+                int back=0;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
+                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--;
+                mlt -= back;
+                if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
+            }
+        }
+        matchIndex -= DELTANEXTU16(matchIndex);
+    }
+
+    return longest;
+}
+
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+
+#define LZ4HC_DEBUG 0
+#if LZ4HC_DEBUG
+static unsigned debug = 0;
+#endif
+
+FORCE_INLINE int LZ4HC_encodeSequence (
+    const BYTE** ip,
+    BYTE** op,
+    const BYTE** anchor,
+    int matchLength,
+    const BYTE* const match,
+    limitedOutput_directive limitedOutputBuffer,
+    BYTE* oend)
+{
+    int length;
+    BYTE* token;
+
+#if LZ4HC_DEBUG
+    if (debug) printf("literal : %u  --  match : %u  --  offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
+#endif
+
+    /* Encode Literal length */
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
+    else *token = (BYTE)(length<<ML_BITS);
+
+    /* Copy Literals */
+    LZ4_wildCopy(*op, *anchor, (*op) + length);
+    *op += length;
+
+    /* Encode Offset */
+    LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+
+    /* Encode MatchLength */
+    length = (int)(matchLength-MINMATCH);
+    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
+    else *token += (BYTE)(length);
+
+    /* Prepare next loop */
+    *ip += matchLength;
+    *anchor = *ip;
+
+    return 0;
+}
+
+
+static int LZ4HC_compress_generic (
+    void* ctxvoid,
+    const char* source,
+    char* dest,
+    int inputSize,
+    int maxOutputSize,
+    int compressionLevel,
+    limitedOutput_directive limit
+    )
+{
+    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    unsigned maxNbAttempts;
+    int   ml, ml2, ml3, ml0;
+    const BYTE* ref=NULL;
+    const BYTE* start2=NULL;
+    const BYTE* ref2=NULL;
+    const BYTE* start3=NULL;
+    const BYTE* ref3=NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+
+    /* init */
+    if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel;
+    if (compressionLevel < 1) compressionLevel = LZ4HC_compressionLevel_default;
+    maxNbAttempts = 1 << (compressionLevel-1);
+    ctx->end += inputSize;
+
+    ip++;
+
+    /* Main Loop */
+    while (ip < mflimit)
+    {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
+        if (!ml) { ip++; continue; }
+
+        /* saved, in case we would skip too much */
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+        else ml2 = ml;
+
+        if (ml2 == ml)  /* No better match */
+        {
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start0 < ip)
+        {
+            if (start2 < ip + ml0)   /* empirical */
+            {
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        /* Here, start0==ip */
+        if ((start2 - ip) < 3)   /* First Match too small : removed */
+        {
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        /*
+        * Currently we have :
+        * ml2 > ml1, and
+        * ip1+3 <= ip2 (usually < ip1+ml1)
+        */
+        if ((start2 - ip) < OPTIMAL_ML)
+        {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0)
+            {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) /* No better match : 2 sequences to encode */
+        {
+            /* ip & ref are known; Now for ml */
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            /* Now, encode 2 sequences */
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            ip = start2;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
+        {
+            if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+            {
+                if (start2 < ip+ml)
+                {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH)
+                    {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        /*
+        * OK, now we have 3 ascending matches; let's write at least the first one
+        * ip & ref are known; Now for ml
+        */
+        if (start2 < ip+ml)
+        {
+            if ((start2 - ip) < (int)ML_MASK)
+            {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0)
+                {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            }
+            else
+            {
+                ml = (int)(start2 - ip);
+            }
+        }
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+    }
+
+    /* Encode Last Literals */
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); }
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
+{
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)src);
+    if (maxDstSize < LZ4_compressBound(srcSize))
+        return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput);
+    else
+        return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit);
+}
+
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel)
+{
+#if LZ4HC_HEAPMODE==1
+    LZ4HC_Data_Structure* statePtr = malloc(sizeof(LZ4HC_Data_Structure));
+#else
+    LZ4HC_Data_Structure state;
+    LZ4HC_Data_Structure* const statePtr = &state;
+#endif
+    int cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel);
+#if LZ4HC_HEAPMODE==1
+    free(statePtr);
+#endif
+    return cSize;
+}
+
+
+
+/**************************************
+*  Streaming Functions
+**************************************/
+/* allocation */
+LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
+int             LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; }
+
+
+/* initialization */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= sizeof(LZ4_streamHC_t));   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
+    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
+}
+
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
+{
+    LZ4HC_Data_Structure* ctxPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr;
+    if (dictSize > 64 KB)
+    {
+        dictionary += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
+    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
+    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    return dictSize;
+}
+
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock)
+{
+    if (ctxPtr->end >= ctxPtr->base + 4)
+        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+    ctxPtr->lowLimit  = ctxPtr->dictLimit;
+    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+    ctxPtr->dictBase  = ctxPtr->base;
+    ctxPtr->base = newBlock - ctxPtr->dictLimit;
+    ctxPtr->end  = newBlock;
+    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
+}
+
+static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
+                                            const char* source, char* dest,
+                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
+{
+    /* auto-init if forgotten */
+    if (ctxPtr->base == NULL)
+        LZ4HC_init (ctxPtr, (const BYTE*) source);
+
+    /* Check overflow */
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB)
+    {
+        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+        if (dictSize > 64 KB) dictSize = 64 KB;
+
+        LZ4_loadDictHC((LZ4_streamHC_t*)ctxPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+    }
+
+    /* Check if blocks follow each other */
+    if ((const BYTE*)source != ctxPtr->end)
+        LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
+
+    /* Check overlapping input/dictionary space */
+    {
+        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        const BYTE* dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd))
+        {
+            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+        }
+    }
+
+    return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    if (maxOutputSize < LZ4_compressBound(inputSize))
+        return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
+    else
+        return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit);
+}
+
+
+/* dictionary saving */
+
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
+{
+    LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr;
+    int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    if (dictSize > 64 KB) dictSize = 64 KB;
+    if (dictSize < 4) dictSize = 0;
+    if (dictSize > prefixSize) dictSize = prefixSize;
+    memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+    {
+        U32 endIndex = (U32)(streamPtr->end - streamPtr->base);
+        streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+        streamPtr->base = streamPtr->end - endIndex;
+        streamPtr->dictLimit = endIndex - dictSize;
+        streamPtr->lowLimit = endIndex - dictSize;
+        if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
+    }
+    return dictSize;
+}
+
+
+/***********************************
+*  Deprecated Functions
+***********************************/
+/* Deprecated compression functions */
+/* These functions are planned to start generate warnings by r131 approximately */
+int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
+
+
+/* Deprecated streaming functions */
+/* These functions currently generate deprecation warnings */
+int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
+
+int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
+{
+    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
+    LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
+    ((LZ4HC_Data_Structure*)state)->inputBuffer = (BYTE*)inputBuffer;
+    return 0;
+}
+
+void* LZ4_createHC (char* inputBuffer)
+{
+    void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure));
+    if (hc4 == NULL) return NULL;   /* not enough memory */
+    LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
+    ((LZ4HC_Data_Structure*)hc4)->inputBuffer = (BYTE*)inputBuffer;
+    return hc4;
+}
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+    FREEMEM(LZ4HC_Data);
+    return (0);
+}
+
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
+}
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+    LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
+    int dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
+    return (char*)(hc4->inputBuffer + dictSize);
+}
--- a/util/cbfstool/lz4/lib/lz4hc.h
+++ b/util/cbfstool/lz4/lib/lz4hc.h
@ -0,0 +1,189 @@
+/*
+   LZ4 HC - High Compression Mode of LZ4
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*****************************
+*  Includes
+*****************************/
+#include <stddef.h>   /* size_t */
+
+
+/**************************************
+*  Block Compression
+**************************************/
+int LZ4_compress_HC (const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+/*
+LZ4_compress_HC :
+    Destination buffer 'dst' must be already allocated.
+    Compression completion is guaranteed if 'dst' buffer is sized to handle worst circumstances (data not compressible)
+    Worst size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
+      srcSize  : Max supported value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+      compressionLevel : Recommended values are between 4 and 9, although any value between 0 and 16 will work.
+                         0 means "use default value" (see lz4hc.c).
+                         Values >16 behave the same as 16.
+      return : the number of bytes written into buffer 'dst'
+            or 0 if compression fails.
+*/
+
+
+/* Note :
+   Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
+*/
+
+
+int LZ4_sizeofStateHC(void);
+int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+/*
+LZ4_compress_HC_extStateHC() :
+   Use this function if you prefer to manually allocate memory for compression tables.
+   To know how much memory must be allocated for the compression tables, use :
+      int LZ4_sizeofStateHC();
+
+   Allocated memory must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
+
+   The allocated memory can then be provided to the compression functions using 'void* state' parameter.
+   LZ4_compress_HC_extStateHC() is equivalent to previously described function.
+   It just uses externally allocated memory for stateHC.
+*/
+
+
+/**************************************
+*  Streaming Compression
+**************************************/
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+typedef struct { size_t table[LZ4_STREAMHCSIZE_SIZET]; } LZ4_streamHC_t;
+/*
+  LZ4_streamHC_t
+  This structure allows static allocation of LZ4 HC streaming state.
+  State must then be initialized using LZ4_resetStreamHC() before first use.
+
+  Static allocation should only be used in combination with static linking.
+  If you want to use LZ4 as a DLL, please use construction functions below, which are future-proof.
+*/
+
+
+LZ4_streamHC_t* LZ4_createStreamHC(void);
+int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
+/*
+  These functions create and release memory for LZ4 HC streaming state.
+  Newly created states are already initialized.
+  Existing state space can be re-used anytime using LZ4_resetStreamHC().
+  If you use LZ4 as a DLL, use these functions instead of static structure allocation,
+  to avoid size mismatch between different versions.
+*/
+
+void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
+
+int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+
+/*
+  These functions compress data in successive blocks of any size, using previous blocks as dictionary.
+  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+  There is an exception for ring buffers, which can be smaller 64 KB.
+  Such case is automatically detected and correctly handled by LZ4_compress_HC_continue().
+
+  Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
+  A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
+
+  Then, use LZ4_compress_HC_continue() to compress each successive block.
+  It works like LZ4_compress_HC(), but use previous memory blocks as dictionary to improve compression.
+  Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
+  As a reminder, size 'dst' buffer to handle worst cases, using LZ4_compressBound(), to ensure success of compression operation.
+
+  If, for any reason, previous data blocks can't be preserved unmodified in memory during next compression block,
+  you must save it to a safer memory space, using LZ4_saveDictHC().
+  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
+*/
+
+
+
+/**************************************
+*  Deprecated Functions
+**************************************/
+/* Deprecate Warnings */
+/* Should these warnings messages be a problem,
+   it is generally possible to disable them,
+   with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
+   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
+#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif // LZ4_DEPRECATE_WARNING_DEFBLOCK
+
+/* compression functions */
+/* these functions are planned to trigger warning messages by r131 approximately */
+int LZ4_compressHC                (const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput  (const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compressHC2               (const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Streaming functions following the older model; should no longer be used */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
+#if defined (__cplusplus)
+}
+#endif
--- a/util/cbfstool/lz4/lib/xxhash.c
+++ b/util/cbfstool/lz4/lib/xxhash.c
@ -0,0 +1,962 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2015, Yann Collet
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/**************************************
+*  Tuning parameters
+**************************************/
+/* XXH_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which generate assembly depending on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define XXH_FORCE_MEMORY_ACCESS 2
+#  elif defined(__INTEL_COMPILER) || \
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/* XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/* XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+/* XXH_USELESS_ALIGN_BRANCH :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : don't make a test between aligned/unaligned, because performance will be the same.
+ * It saves one initial branch per hash.
+ */
+#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USELESS_ALIGN_BRANCH 1
+#endif
+
+
+/**************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/**************************************
+*  Includes & Memory related functions
+***************************************/
+#include "xxhash.h"
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+
+/**************************************
+*  Basic Types
+***************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+  typedef uint8_t  BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char      BYTE;
+  typedef unsigned short     U16;
+  typedef unsigned int       U32;
+  typedef   signed int       S32;
+  typedef unsigned long long U64;
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
+
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U32 XXH_read32(const void* memPtr)
+{
+    U32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+static U64 XXH_read64(const void* memPtr)
+{
+    U64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
+
+
+/******************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/***************************************
+*  Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
+#endif
+
+
+/*****************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+
+/***************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/***************************************
+*  Constants
+***************************************/
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3  1609587929392839161ULL
+#define PRIME64_4  9650029242287828579ULL
+#define PRIME64_5  2870177450012600261ULL
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_get32bits(p) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_get32bits(p) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_get32bits(p) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_get32bits(p) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USELESS_ALIGN_BRANCH)
+    if ((((size_t)input) & 3) == 0)   /* Input is 4-bytes aligned, leverage the speed benefit */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do
+        {
+            v1 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            v2 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            v3 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            v4 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+        }
+        while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_get64bits(p);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USELESS_ALIGN_BRANCH)
+    if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/****************************************************
+*  Advanced Hash Functions
+****************************************************/
+
+/*** Allocation ***/
+typedef struct
+{
+    U64 total_len;
+    U32 seed;
+    U32 v1;
+    U32 v2;
+    U32 v3;
+    U32 v4;
+    U32 mem32[4];   /* defined as U32 for alignment */
+    U32 memsize;
+} XXH_istate32_t;
+
+typedef struct
+{
+    U64 total_len;
+    U64 seed;
+    U64 v1;
+    U64 v2;
+    U64 v3;
+    U64 v4;
+    U64 mem64[4];   /* defined as U64 for alignment */
+    U32 memsize;
+} XXH_istate64_t;
+
+
+XXH32_state_t* XXH32_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH64_state_t* XXH64_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+{
+    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME64_1 + PRIME64_2;
+    state->v2 = seed + PRIME64_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME64_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   /* fill in tmp buffer */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {
+            const U32* p32 = state->mem32;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v1 = XXH_rotl32(state->v1, 13);
+            state->v1 *= PRIME32_1;
+            p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v2 = XXH_rotl32(state->v2, 13);
+            state->v2 *= PRIME32_1;
+            p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v3 = XXH_rotl32(state->v3, 13);
+            state->v3 *= PRIME32_1;
+            p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v4 = XXH_rotl32(state->v4, 13);
+            state->v4 *= PRIME32_1;
+            p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32(p, endian) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_readLE32(p, endian) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_readLE32(p, endian) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_readLE32(p, endian) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem32, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
+    const BYTE * p = (const BYTE*)state->mem32;
+    const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32)   /* fill in tmp buffer */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        {
+            const U64* p64 = state->mem64;
+            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v1 = XXH_rotl64(state->v1, 31);
+            state->v1 *= PRIME64_1;
+            p64++;
+            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v2 = XXH_rotl64(state->v2, 31);
+            state->v2 *= PRIME64_1;
+            p64++;
+            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v3 = XXH_rotl64(state->v3, 31);
+            state->v3 *= PRIME64_1;
+            p64++;
+            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v4 = XXH_rotl64(state->v4, 31);
+            state->v4 *= PRIME64_1;
+            p64++;
+        }
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE64(p, endian) * PRIME64_2;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            p+=8;
+            v2 += XXH_readLE64(p, endian) * PRIME64_2;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            p+=8;
+            v3 += XXH_readLE64(p, endian) * PRIME64_2;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            p+=8;
+            v4 += XXH_readLE64(p, endian) * PRIME64_2;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+            p+=8;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem64, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+{
+    const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
+    const BYTE * p = (const BYTE*)state->mem64;
+    const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32)
+    {
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_readLE64(p, endian);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
--- a/util/cbfstool/lz4/lib/xxhash.h
+++ b/util/cbfstool/lz4/lib/xxhash.h
@ -0,0 +1,192 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bits version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bits applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*****************************
+*  Definitions
+*****************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*****************************
+*  Namespace Emulation
+*****************************/
+/* Motivations :
+
+If you need to include xxHash into your library,
+but wish to avoid xxHash symbols to be present on your library interface
+in an effort to avoid potential name collision if another library also includes xxHash,
+
+you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash
+with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values).
+
+Note that no change is required within the calling program :
+it can still call xxHash functions using their regular name.
+They will be automatically translated by this header.
+*/
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#endif
+
+
+/*****************************
+*  Simple Hash Functions
+*****************************/
+
+unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
+unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    Faster on 64-bits systems. Slower on 32-bits systems.
+*/
+
+
+
+/*****************************
+*  Advanced Hash Functions
+*****************************/
+typedef struct { long long ll[ 6]; } XXH32_state_t;
+typedef struct { long long ll[11]; } XXH64_state_t;
+
+/*
+These structures allow static allocation of XXH states.
+States must then be initialized using XXHnn_reset() before first use.
+
+If you prefer dynamic allocation, please refer to functions below.
+*/
+
+XXH32_state_t* XXH32_createState(void);
+XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH64_state_t* XXH64_createState(void);
+XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*
+These functions create and release memory for XXH state.
+States must then be initialized using XXHnn_reset() before first use.
+*/
+
+
+XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
+XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions calculate the xxHash of an input provided in multiple smaller packets,
+as opposed to an input provided as a single block.
+
+XXH state space must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, meaning allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, you can produce a hash anytime, by using XXHnn_digest().
+This function returns the final nn-bits hash.
+You can nonetheless continue feeding the hash state with more input,
+and therefore get some new hashes, by calling again XXHnn_digest().
+
+When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
--- a/util/cbfstool/lz4/lz4_Block_format.md
+++ b/util/cbfstool/lz4/lz4_Block_format.md
@ -0,0 +1,127 @@
+LZ4 Block Format Description
+============================
+Last revised: 2015-05-07.
+Author : Yann Collet
+
+
+This specification is intended for developers
+willing to produce LZ4-compatible compressed data blocks
+using any programming language.
+
+LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
+There is no entropy encoder back-end nor framing layer.
+The latter is assumed to be handled by other parts of the system (see [LZ4 Frame format]).
+This design is assumed to favor simplicity and speed.
+It helps later on for optimizations, compactness, and features.
+
+This document describes only the block format,
+not how the compressor nor decompressor actually work.
+The correctness of the decompressor should not depend
+on implementation details of the compressor, and vice versa.
+
+[LZ4 Frame format]: lz4_Frame_format.md
+
+
+
+Compressed block format
+-----------------------
+An LZ4 compressed block is composed of sequences.
+A sequence is a suite of literals (not-compressed bytes),
+followed by a match copy.
+
+Each sequence starts with a token.
+The token is a one byte value, separated into two 4-bits fields.
+Therefore each field ranges from 0 to 15.
+
+
+The first field uses the 4 high-bits of the token.
+It provides the length of literals to follow.
+
+If the field value is 0, then there is no literal.
+If it is 15, then we need to add some more bytes to indicate the full length.
+Each additional byte then represent a value from 0 to 255,
+which is added to the previous value to produce a total length.
+When the byte value is 255, another byte is output.
+There can be any number of bytes following the token. There is no "size limit".
+(Side note : this is why a not-compressible input block is expanded by 0.4%).
+
+Example 1 : A length of 48 will be represented as :
+
+  - 15 : value for the 4-bits High field
+  - 33 : (=48-15) remaining length to reach 48
+
+Example 2 : A length of 280 will be represented as :
+
+  - 15  : value for the 4-bits High field
+  - 255 : following byte is maxed, since 280-15 >= 255
+  - 10  : (=280 - 15 - 255) ) remaining length to reach 280
+
+Example 3 : A length of 15 will be represented as :
+
+  - 15 : value for the 4-bits High field
+  - 0  : (=15-15) yes, the zero must be output
+
+Following the token and optional length bytes, are the literals themselves.
+They are exactly as numerous as previously decoded (length of literals).
+It's possible that there are zero literal.
+
+
+Following the literals is the match copy operation.
+
+It starts by the offset.
+This is a 2 bytes value, in little endian format
+(the 1st byte is the "low" byte, the 2nd one is the "high" byte).
+
+The offset represents the position of the match to be copied from.
+1 means "current position - 1 byte".
+The maximum offset value is 65535, 65536 cannot be coded.
+Note that 0 is an invalid value, not used.
+
+Then we need to extract the match length.
+For this, we use the second token field, the low 4-bits.
+Value, obviously, ranges from 0 to 15.
+However here, 0 means that the copy operation will be minimal.
+The minimum length of a match, called minmatch, is 4.
+As a consequence, a 0 value means 4 bytes, and a value of 15 means 19+ bytes.
+Similar to literal length, on reaching the highest possible value (15),
+we output additional bytes, one at a time, with values ranging from 0 to 255.
+They are added to total to provide the final match length.
+A 255 value means there is another byte to read and add.
+There is no limit to the number of optional bytes that can be output this way.
+(This points towards a maximum achievable compression ratio of about 250).
+
+With the offset and the matchlength,
+the decoder can now proceed to copy the data from the already decoded buffer.
+On decoding the matchlength, we reach the end of the compressed sequence,
+and therefore start another one.
+
+
+Parsing restrictions
+-----------------------
+There are specific parsing rules to respect in order to remain compatible
+with assumptions made by the decoder :
+
+1. The last 5 bytes are always literals
+2. The last match must start at least 12 bytes before end of block.
+   Consequently, a block with less than 13 bytes cannot be compressed.
+
+These rules are in place to ensure that the decoder
+will never read beyond the input buffer, nor write beyond the output buffer.
+
+Note that the last sequence is also incomplete,
+and stops right after literals.
+
+
+Additional notes
+-----------------------
+There is no assumption nor limits to the way the compressor
+searches and selects matches within the source data block.
+It could be a fast scan, a multi-probe, a full search using BST,
+standard hash chains or MMC, well whatever.
+
+Advanced parsing strategies can also be implemented, such as lazy match,
+or full optimal parsing.
+
+All these trade-off offer distinctive speed/memory/compression advantages.
+Whatever the method used by the compressor, its result will be decodable
+by any LZ4 decoder if it follows the format specification described above.
--- a/util/cbfstool/lz4/lz4_Frame_format.md
+++ b/util/cbfstool/lz4/lz4_Frame_format.md
@ -0,0 +1,385 @@
+LZ4 Frame Format Description
+============================
+
+###Notices
+
+Copyright (c) 2013-2015 Yann Collet
+
+Permission is granted to copy and distribute this document
+for any  purpose and without charge,
+including translations into other  languages
+and incorporation into compilations,
+provided that the copyright notice and this notice are preserved,
+and that any substantive changes or deletions from the original
+are clearly marked.
+Distribution of this document is unlimited.
+
+###Version
+
+1.5.1 (31/03/2015)
+
+
+Introduction
+------------
+
+The purpose of this document is to define a lossless compressed data format,
+that is independent of CPU type, operating system,
+file system and character set, suitable for
+File compression, Pipe and streaming compression
+using the [LZ4 algorithm](http://www.lz4.org).
+
+The data can be produced or consumed,
+even for an arbitrarily long sequentially presented input data stream,
+using only an a priori bounded amount of intermediate storage,
+and hence can be used in data communications.
+The format uses the LZ4 compression method,
+and optional [xxHash-32 checksum method](https://github.com/Cyan4973/xxHash),
+for detection of data corruption.
+
+The data format defined by this specification
+does not attempt to allow random access to compressed data.
+
+This specification is intended for use by implementers of software
+to compress data into LZ4 format and/or decompress data from LZ4 format.
+The text of the specification assumes a basic background in programming
+at the level of bits and other primitive data representations.
+
+Unless otherwise indicated below,
+a compliant compressor must produce data sets
+that conform to the specifications presented here.
+It doesn’t need to support all options though.
+
+A compliant decompressor must be able to decompress
+at least one working set of parameters
+that conforms to the specifications presented here.
+It may also ignore checksums.
+Whenever it does not support a specific parameter within the compressed stream,
+it must produce a non-ambiguous error code
+and associated error message explaining which parameter is unsupported.
+
+
+General Structure of LZ4 Frame format
+-------------------------------------
+
+| MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
+|:-------:|:-------------:| ----- | ----- | ------- | ----------- |
+| 4 bytes |  3-11 bytes   |       |       | 4 bytes | 0-4 bytes   |
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0x184D2204
+
+__Frame Descriptor__
+
+3 to 11 Bytes, to be detailed in the next part.
+Most important part of the spec.
+
+__Data Blocks__
+
+To be detailed later on.
+That’s where compressed data is stored.
+
+__EndMark__
+
+The flow of blocks ends when the last data block has a size of “0”.
+The size is expressed as a 32-bits value.
+
+__Content Checksum__
+
+Content Checksum verify that the full content has been decoded correctly.
+The content checksum is the result
+of [xxh32() hash function](https://github.com/Cyan4973/xxHash)
+digesting the original (decoded) data as input, and a seed of zero.
+Content checksum is only present when its associated flag
+is set in the frame descriptor.
+Content Checksum validates the result,
+that all blocks were fully transmitted in the correct order and without error,
+and also that the encoding/decoding process itself generated no distortion.
+Its usage is recommended.
+
+__Frame Concatenation__
+
+In some circumstances, it may be preferable to append multiple frames,
+for example in order to add new data to an existing compressed file
+without re-framing it.
+
+In such case, each frame has its own set of descriptor flags.
+Each frame is considered independent.
+The only relation between frames is their sequential order.
+
+The ability to decode multiple concatenated frames
+within a single stream or file
+is left outside of this specification.
+As an example, the reference lz4 command line utility behavior is
+to decode all concatenated frames in their sequential order.
+
+
+Frame Descriptor
+----------------
+
+| FLG     | BD      | (Content Size) | HC      |
+| ------- | ------- |:--------------:| ------- |
+| 1 byte  | 1 byte  |  0 - 8 bytes   | 1 byte  |
+
+The descriptor uses a minimum of 3 bytes,
+and up to 11 bytes depending on optional parameters.
+
+__FLG byte__
+
+|  BitNb  |   7-6   |    5    |     4     |   3     |     2     |    1-0   |
+| ------- | ------- | ------- | --------- | ------- | --------- | -------- |
+|FieldName| Version | B.Indep | B.Checksum| C.Size  | C.Checksum|*Reserved*|
+
+
+__BD byte__
+
+|  BitNb  |     7    |     6-5-4    |  3-2-1-0 |
+| ------- | -------- | ------------ | -------- |
+|FieldName|*Reserved*| Block MaxSize|*Reserved*|
+
+In the tables, bit 7 is highest bit, while bit 0 is lowest.
+
+__Version Number__
+
+2-bits field, must be set to “01”.
+Any other value cannot be decoded by this version of the specification.
+Other version numbers will use different flag layouts.
+
+__Block Independence flag__
+
+If this flag is set to “1”, blocks are independent.
+If this flag is set to “0”, each block depends on previous ones
+(up to LZ4 window size, which is 64 KB).
+In such case, it’s necessary to decode all blocks in sequence.
+
+Block dependency improves compression ratio, especially for small blocks.
+On the other hand, it makes direct jumps or multi-threaded decoding impossible.
+
+__Block checksum flag__
+
+If this flag is set, each data block will be followed by a 4-bytes checksum,
+calculated by using the xxHash-32 algorithm on the raw (compressed) data block.
+The intention is to detect data corruption (storage or transmission errors)
+immediately, before decoding.
+Block checksum usage is optional.
+
+__Content Size flag__
+
+If this flag is set, the uncompressed size of data included within the frame
+will be present as an 8 bytes unsigned little endian value, after the flags.
+Content Size usage is optional.
+
+__Content checksum flag__
+
+If this flag is set, a content checksum will be appended after the EndMark.
+
+Recommended value : “1” (content checksum is present)
+
+__Block Maximum Size__
+
+This information is intended to help the decoder allocate memory.
+Size here refers to the original (uncompressed) data size.
+Block Maximum Size is one value among the following table :
+
+|  0  |  1  |  2  |  3  |   4   |   5    |  6   |  7   |
+| --- | --- | --- | --- | ----- | ------ | ---- | ---- |
+| N/A | N/A | N/A | N/A | 64 KB | 256 KB | 1 MB | 4 MB |
+
+The decoder may refuse to allocate block sizes above a (system-specific) size.
+Unused values may be used in a future revision of the spec.
+A decoder conformant to the current version of the spec
+is only able to decode blocksizes defined in this spec.
+
+__Reserved bits__
+
+Value of reserved bits **must** be 0 (zero).
+Reserved bit might be used in a future version of the specification,
+typically enabling new optional features.
+If this happens, a decoder respecting the current version of the specification
+shall not be able to decode such a frame.
+
+__Content Size__
+
+This is the original (uncompressed) size.
+This information is optional, and only present if the associated flag is set.
+Content size is provided using unsigned 8 Bytes, for a maximum of 16 HexaBytes.
+Format is Little endian.
+This value is informational, typically for display or memory allocation.
+It can be skipped by a decoder, or used to validate content correctness.
+
+__Header Checksum__
+
+One-byte checksum of combined descriptor fields, including optional ones.
+The value is the second byte of xxh32() : ` (xxh32()>>8) & 0xFF `
+using zero as a seed,
+and the full Frame Descriptor as an input
+(including optional fields when they are present).
+A wrong checksum indicates an error in the descriptor.
+Header checksum is informational and can be skipped.
+
+
+Data Blocks
+-----------
+
+| Block Size |  data  | (Block Checksum) |
+|:----------:| ------ |:----------------:|
+|  4 bytes   |        |   0 - 4 bytes    |
+
+
+__Block Size__
+
+This field uses 4-bytes, format is little-endian.
+
+The highest bit is “1” if data in the block is uncompressed.
+
+The highest bit is “0” if data in the block is compressed by LZ4.
+
+All other bits give the size, in bytes, of the following data block
+(the size does not include the block checksum if present).
+
+Block Size shall never be larger than Block Maximum Size.
+Such a thing could happen for incompressible source data.
+In such case, such a data block shall be passed in uncompressed format.
+
+__Data__
+
+Where the actual data to decode stands.
+It might be compressed or not, depending on previous field indications.
+Uncompressed size of Data can be any size, up to “block maximum size”.
+Note that data block is not necessarily full :
+an arbitrary “flush” may happen anytime. Any block can be “partially filled”.
+
+__Block checksum__
+
+Only present if the associated flag is set.
+This is a 4-bytes checksum value, in little endian format,
+calculated by using the xxHash-32 algorithm on the raw (undecoded) data block,
+and a seed of zero.
+The intention is to detect data corruption (storage or transmission errors)
+before decoding.
+
+Block checksum is cumulative with Content checksum.
+
+
+Skippable Frames
+----------------
+
+| Magic Number | Frame Size | User Data |
+|:------------:|:----------:| --------- |
+|   4 bytes    |  4 bytes   |           |
+
+Skippable frames allow the integration of user-defined data
+into a flow of concatenated frames.
+Its design is pretty straightforward,
+with the sole objective to allow the decoder to quickly skip
+over user-defined data and continue decoding.
+
+For the purpose of facilitating identification,
+it is discouraged to start a flow of concatenated frames with a skippable frame.
+If there is a need to start such a flow with some user data
+encapsulated into a skippable frame,
+it’s recommended to start with a zero-byte LZ4 frame
+followed by a skippable frame.
+This will make it easier for file type identifiers.
+
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0x184D2A5X, which means any value from 0x184D2A50 to 0x184D2A5F.
+All 16 values are valid to identify a skippable frame.
+
+__Frame Size__
+
+This is the size, in bytes, of the following User Data
+(without including the magic number nor the size field itself).
+4 Bytes, Little endian format, unsigned 32-bits.
+This means User Data can’t be bigger than (2^32-1) Bytes.
+
+__User Data__
+
+User Data can be anything. Data will just be skipped by the decoder.
+
+
+Legacy frame
+------------
+
+The Legacy frame format was defined into the initial versions of “LZ4Demo”.
+Newer compressors should not use this format anymore, as it is too restrictive.
+
+Main characteristics of the legacy format :
+
+- Fixed block size : 8 MB.
+- All blocks must be completely filled, except the last one.
+- All blocks are always compressed, even when compression is detrimental.
+- The last block is detected either because
+  it is followed by the “EOF” (End of File) mark,
+  or because it is followed by a known Frame Magic Number.
+- No checksum
+- Convention is Little endian
+
+| MagicNb | B.CSize | CData | B.CSize | CData |  (...)  | EndMark |
+| ------- | ------- | ----- | ------- | ----- | ------- | ------- |
+| 4 bytes | 4 bytes | CSize | 4 bytes | CSize | x times |   EOF   |
+
+
+__Magic Number__
+
+4 Bytes, Little endian format.
+Value : 0x184C2102
+
+__Block Compressed Size__
+
+This is the size, in bytes, of the following compressed data block.
+4 Bytes, Little endian format.
+
+__Data__
+
+Where the actual compressed data stands.
+Data is always compressed, even when compression is detrimental.
+
+__EndMark__
+
+End of legacy frame is implicit only.
+It must be followed by a standard EOF (End Of File) signal,
+wether it is a file or a stream.
+
+Alternatively, if the frame is followed by a valid Frame Magic Number,
+it is considered completed.
+It makes legacy frames compatible with frame concatenation.
+
+Any other value will be interpreted as a block size,
+and trigger an error if it does not fit within acceptable range.
+
+
+Version changes
+---------------
+
+1.5.1 : changed format to MarkDown compatible
+
+1.5 : removed Dictionary ID from specification
+
+1.4.1 : changed wording from “stream” to “frame”
+
+1.4 : added skippable streams, re-added stream checksum
+
+1.3 : modified header checksum
+
+1.2 : reduced choice of “block size”, to postpone decision on “dynamic size of BlockSize Field”.
+
+1.1 : optional fields are now part of the descriptor
+
+1.0 : changed “block size” specification, adding a compressed/uncompressed flag
+
+0.9 : reduced scale of “block maximum size” table
+
+0.8 : removed : high compression flag
+
+0.7 : removed : stream checksum
+
+0.6 : settled : stream size uses 8 bytes, endian convention is little endian
+
+0.5: added copyright notice
+
+0.4 : changed format to Google Doc compatible OpenDocument
--- a/util/cbmem/cbmem.c
+++ b/util/cbmem/cbmem.c
@ -500,6 +500,8 @@ static const struct timestamp_id_to_name {
 	{ TS_END_COPYROM,	"finished loading romstage" },
 	{ TS_START_ULZMA,	"starting LZMA decompress (ignore for x86)" },
 	{ TS_END_ULZMA,		"finished LZMA decompress (ignore for x86)" },
+	{ TS_START_ULZ4F,	"starting LZ4 decompress (ignore for x86)" },
+	{ TS_END_ULZ4F,		"finished LZ4 decompress (ignore for x86)" },
 	{ TS_DEVICE_ENUMERATE,	"device enumeration" },
 	{ TS_DEVICE_CONFIGURE,	"device configuration" },
 	{ TS_DEVICE_ENABLE,	"device enable" },
--- a/util/nvramtool/cbfs.h
+++ b/util/nvramtool/cbfs.h
@ -59,6 +59,7 @@ typedef uint8_t u8;

 #define CBFS_COMPRESS_NONE  0
 #define CBFS_COMPRESS_LZMA  1
+#define CBFS_COMPRESS_LZ4   2

 /** These are standard component types for well known
    components (i.e - those that coreboot needs to consume.