From 0655f78041ef617844f436306fa5431e211f4431 Mon Sep 17 00:00:00 2001
From: Julius Werner <jwerner@chromium.org>
Date: Wed, 11 Dec 2019 16:19:48 -0800
Subject: [PATCH] commonlib/bsd: Add new CBFS core implementation

This patch adds a new CBFS implementation that is intended to replace
the existing commonlib/cbfs.c. The new implementation is designed to
meet a bunch of current and future goals that in aggregate make it
easier to start from scratch than to adapt the exisiting implementation:

1. Be BSD-licensed so it can evetually be shared with libpayload.
2. Allow generating/verifying a metadata hash for future CBFS per-file
   verification (see [1][2]).
3. Be very careful about reading (not mmaping) all data only once, to be
   suitable for eventual TOCTOU-safe verification.
4. Make it possible to efficiently implement all current and future
   firmware use cases (both with and without verification).

The main primitive is the cbfs_walk() function which will traverse a
CBFS and call a callback for every file. cbfs_lookup() uses this to
implement the most common use case of finding a file so that it can be
read. A host application using this code (e.g. coreboot, libpayload,
cbfstool) will need to provide a <cbfs_glue.h> header to provide the
glue to access the respective CBFS storage backend implementation.

This patch merely adds the code, the next patch will integrate it into
coreboot.

[1]: https://www.youtube.com/watch?v=Hs_EhewBgtM
[2]: https://osfc.io/uploads/talk/paper/47/The_future_of_firmware_verification_in_coreboot.pdf
(Note: In early discussions the metadata hash was called "master hash".)

Change-Id: Ica64c1751fa37686814c0247460c399261d5814c
Signed-off-by: Julius Werner <jwerner@chromium.org>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/38421
Reviewed-by: Aaron Durbin <adurbin@chromium.org>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
---
 MAINTAINERS                                   |   9 +-
 src/commonlib/bsd/cbfs_private.c              | 161 ++++++++++++++++++
 .../bsd/include/commonlib/bsd/cb_err.h        |   5 +
 .../bsd/include/commonlib/bsd/cbfs_private.h  | 116 +++++++++++++
 .../include/commonlib/bsd/cbfs_serialized.h   |   6 +-
 5 files changed, 292 insertions(+), 5 deletions(-)
 create mode 100644 src/commonlib/bsd/cbfs_private.c
 create mode 100644 src/commonlib/bsd/include/commonlib/bsd/cbfs_private.h

diff --git a/MAINTAINERS b/MAINTAINERS
index d867c78465..ba88813509 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -694,8 +694,13 @@ OPTION ROM EXECUTION & X86EMU
 F:	src/device/oprom/
 
 CBFS
-F:	src/include/cbfs.h
-F:	src/commonlib/bsd/include/commonlib/bsd/cbfs_serialized.h
+M:	Julius Werner <jwerner@chromium.org>
+F:	src/include/cbfs*
+F:	src/commonlib/bsd/include/commonlib/bsd/cbfs*
+F:	src/commonlib/bsd/cbfs*
+F:	src/lib/cbfs.c
+
+CBFSTOOL
 F:	util/cbfstool/
 
 CBMEM
diff --git a/src/commonlib/bsd/cbfs_private.c b/src/commonlib/bsd/cbfs_private.c
new file mode 100644
index 0000000000..035684b91e
--- /dev/null
+++ b/src/commonlib/bsd/cbfs_private.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
+
+#include <commonlib/bsd/cbfs_private.h>
+#include <assert.h>
+
+static cb_err_t read_next_header(cbfs_dev_t dev, size_t *offset, struct cbfs_file *buffer)
+{
+	const size_t devsize = cbfs_dev_size(dev);
+	DEBUG("Looking for next file @%#zx...\n", *offset);
+	*offset = ALIGN_UP(*offset, CBFS_ALIGNMENT);
+	while (*offset + sizeof(*buffer) < devsize) {
+		if (cbfs_dev_read(dev, buffer, *offset, sizeof(*buffer)) != sizeof(*buffer))
+			return CB_CBFS_IO;
+
+		if (memcmp(buffer->magic, CBFS_FILE_MAGIC, sizeof(buffer->magic)) == 0)
+			return CB_SUCCESS;
+
+		*offset += CBFS_ALIGNMENT;
+	}
+
+	DEBUG("End of CBFS reached\n");
+	return CB_CBFS_NOT_FOUND;
+}
+
+cb_err_t cbfs_walk(cbfs_dev_t dev, cb_err_t (*walker)(cbfs_dev_t dev, size_t offset,
+						      const union cbfs_mdata *mdata,
+						      size_t already_read, void *arg),
+		   void *arg, struct vb2_hash *metadata_hash, enum cbfs_walk_flags flags)
+{
+	const bool do_hash = CBFS_ENABLE_HASHING && metadata_hash;
+	struct vb2_digest_context dc;
+	vb2_error_t vbrv;
+
+	assert(CBFS_ENABLE_HASHING || (!metadata_hash && !(flags & CBFS_WALK_WRITEBACK_HASH)));
+	if (do_hash && (vbrv = vb2_digest_init(&dc, metadata_hash->algo))) {
+		ERROR("Metadata hash digest (%d) init error: %#x\n", metadata_hash->algo, vbrv);
+		return CB_ERR_ARG;
+	}
+
+	size_t offset = 0;
+	cb_err_t ret_header;
+	cb_err_t ret_walker = CB_CBFS_NOT_FOUND;
+	union cbfs_mdata mdata;
+	while ((ret_header = read_next_header(dev, &offset, &mdata.h)) == CB_SUCCESS) {
+		const uint32_t attr_offset = be32toh(mdata.h.attributes_offset);
+		const uint32_t data_offset = be32toh(mdata.h.offset);
+		const uint32_t data_length = be32toh(mdata.h.len);
+		const uint32_t type = be32toh(mdata.h.type);
+		const bool empty = (type == CBFS_TYPE_DELETED || type == CBFS_TYPE_DELETED2);
+
+		DEBUG("Found CBFS header @%#zx (type %d, attr +%#x, data +%#x, length %#x)\n",
+		      offset, type, attr_offset, data_offset, data_length);
+		if (data_offset > sizeof(mdata)) {
+			ERROR("File metadata @%#zx too large\n", offset);
+			goto next_file;
+		}
+
+		if (empty && !(flags & CBFS_WALK_INCLUDE_EMPTY))
+			goto next_file;
+
+		/* When hashing we need to read everything. Otherwise skip the attributes.
+		   attr_offset may be 0, which means there are no attributes. */
+		ssize_t todo;
+		if (do_hash || attr_offset == 0)
+			todo = data_offset - sizeof(mdata.h);
+		else
+			todo = attr_offset - sizeof(mdata.h);
+		if (todo <= 0 || data_offset < attr_offset) {
+			ERROR("Corrupt file header @%#zx\n", offset);
+			goto next_file;
+		}
+
+		/* Read the rest of the metadata (filename, and possibly attributes). */
+		assert(todo > 0 && todo <= sizeof(mdata) - sizeof(mdata.h));
+		if (cbfs_dev_read(dev, mdata.raw + sizeof(mdata.h),
+				  offset + sizeof(mdata.h), todo) != todo)
+			return CB_CBFS_IO;
+		DEBUG("File name: '%s'\n", mdata.filename);
+
+		if (do_hash && !empty && vb2_digest_extend(&dc, mdata.raw, data_offset))
+			return CB_ERR;
+
+		if (walker && ret_walker == CB_CBFS_NOT_FOUND)
+			ret_walker = walker(dev, offset, &mdata, sizeof(mdata.h) + todo, arg);
+
+		/* Return IO errors immediately. For others, finish the hash first if needed. */
+		if (ret_walker == CB_CBFS_IO || (ret_walker != CB_CBFS_NOT_FOUND && !do_hash))
+			return ret_walker;
+
+next_file:
+		offset += data_offset + data_length;
+	}
+
+	if (ret_header != CB_CBFS_NOT_FOUND)
+		return ret_header;
+
+	if (do_hash) {
+		uint8_t real_hash[VB2_MAX_DIGEST_SIZE];
+		size_t hash_size = vb2_digest_size(metadata_hash->algo);
+		if (vb2_digest_finalize(&dc, real_hash, hash_size))
+			return CB_ERR;
+		if (flags & CBFS_WALK_WRITEBACK_HASH)
+			memcpy(metadata_hash->raw, real_hash, hash_size);
+		else if (memcmp(metadata_hash->raw, real_hash, hash_size) != 0)
+			return CB_CBFS_HASH_MISMATCH;
+	}
+
+	return ret_walker;
+}
+
+cb_err_t cbfs_copy_fill_metadata(union cbfs_mdata *dst, const union cbfs_mdata *src,
+				 size_t already_read, cbfs_dev_t dev, size_t offset)
+{
+	/* First, copy the stuff that cbfs_walk() already read for us. */
+	memcpy(dst, src, already_read);
+
+	/* Then read in whatever metadata may be left (will only happen in non-hashing case). */
+	const size_t todo = be32toh(src->h.offset) - already_read;
+	assert(todo <= sizeof(*dst) - already_read);
+	if (todo && cbfs_dev_read(dev, dst->raw + already_read, offset + already_read,
+				  todo) != todo)
+		return CB_CBFS_IO;
+	return CB_SUCCESS;
+}
+
+struct cbfs_lookup_args {
+	union cbfs_mdata *mdata_out;
+	const char *name;
+	size_t namesize;
+	size_t *data_offset_out;
+};
+
+static cb_err_t lookup_walker(cbfs_dev_t dev, size_t offset, const union cbfs_mdata *mdata,
+			      size_t already_read, void *arg)
+{
+	struct cbfs_lookup_args *args = arg;
+
+	/* Check if the name we're looking for could fit, then we can safely memcmp() it. */
+	if (args->namesize > already_read - offsetof(union cbfs_mdata, filename) ||
+	    memcmp(args->name, mdata->filename, args->namesize) != 0)
+		return CB_CBFS_NOT_FOUND;
+
+	LOG("Found '%s' @%#zx size %#x\n", args->name, offset, be32toh(mdata->h.len));
+	if (cbfs_copy_fill_metadata(args->mdata_out, mdata, already_read, dev, offset))
+		return CB_CBFS_IO;
+
+	*args->data_offset_out = offset + be32toh(mdata->h.offset);
+	return CB_SUCCESS;
+}
+
+cb_err_t cbfs_lookup(cbfs_dev_t dev, const char *name, union cbfs_mdata *mdata_out,
+		     size_t *data_offset_out, struct vb2_hash *metadata_hash)
+{
+	struct cbfs_lookup_args args = {
+		.mdata_out = mdata_out,
+		.name = name,
+		.namesize = strlen(name) + 1,	/* Count trailing \0 so we can memcmp() it. */
+		.data_offset_out = data_offset_out,
+	};
+	return cbfs_walk(dev, lookup_walker, &args, metadata_hash, 0);
+}
diff --git a/src/commonlib/bsd/include/commonlib/bsd/cb_err.h b/src/commonlib/bsd/include/commonlib/bsd/cb_err.h
index ab419a7709..e5aa852617 100644
--- a/src/commonlib/bsd/include/commonlib/bsd/cb_err.h
+++ b/src/commonlib/bsd/include/commonlib/bsd/cb_err.h
@@ -34,6 +34,11 @@ enum cb_err {
 	CB_I2C_PROTOCOL_ERROR	= -302,	/**< Data lost or spurious slave
 					     device response, try again? */
 	CB_I2C_TIMEOUT		= -303, /**< Transmission timed out */
+
+	/* CBFS errors */
+	CB_CBFS_IO		= -400, /**< Underlying I/O error */
+	CB_CBFS_NOT_FOUND	= -401, /**< File not found in directory */
+	CB_CBFS_HASH_MISMATCH	= -402, /**< Master hash validation failed */
 };
 
 /* Don't typedef the enum directly, so the size is unambiguous for serialization. */
diff --git a/src/commonlib/bsd/include/commonlib/bsd/cbfs_private.h b/src/commonlib/bsd/include/commonlib/bsd/cbfs_private.h
new file mode 100644
index 0000000000..aaee62f4c3
--- /dev/null
+++ b/src/commonlib/bsd/include/commonlib/bsd/cbfs_private.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
+
+#ifndef _COMMONLIB_BSD_CBFS_PRIVATE_H_
+#define _COMMONLIB_BSD_CBFS_PRIVATE_H_
+
+
+#include <commonlib/bsd/cb_err.h>
+#include <commonlib/bsd/cbfs_serialized.h>
+#include <endian.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vb2_sha.h>
+
+/*
+ * This header implements low-level CBFS access APIs that can be shared across different
+ * host applications (e.g. coreboot, libpayload, cbfstool). For verification purposes it
+ * implements the metadata hashing part but not the file hashing part, so the host application
+ * will need to verify file hashes itself after loading each file. Host applications that use
+ * verification should implement wrapper APIs that combine the lookup, loading and hashing steps
+ * into a single, safe function call and outside of the code implementing those APIs should not
+ * be accessing the low-level APIs in this file directly (e.g. coreboot SoC/driver code should
+ * never directly #include this file, and always use the higher level APIs in src/lib/cbfs.c).
+ *
+ * <cbfs_glue.h> needs to be provided by the host application using this CBFS library. It must
+ * define the following type, macros and functions:
+ *
+ * cbfs_dev_t		An opaque type representing a CBFS storage backend.
+ * CBFS_ENABLE_HASHING	Should be 0 to avoid linking hashing features, 1 otherwise. (Only for
+ *			metadata hashing. Host application needs to check file hashes itself.)
+ * ERROR(...)		printf-style macro to print errors.
+ * LOG(...)		printf-style macro to print normal-operation log messages.
+ * DEBUG(...)		printf-style macro to print detailed debug output.
+ *
+ * ssize_t cbfs_dev_read(cbfs_dev_t dev, void *buffer, size_t offset, size_t size);
+ *			Read |size| bytes starting at |offset| from |dev| into |buffer|.
+ *			Returns amount of bytes read on success and < 0 on error.
+ *			This function *MUST* sanity-check offset/size on its own.
+ *
+ * size_t cbfs_dev_size(cbfs_dev_t dev);
+ *			Return the total size in bytes of the CBFS storage (actual CBFS area).
+ */
+#include <cbfs_glue.h>
+
+/*
+ * Helper structure to allocate space for a blob of metadata on the stack.
+ * NOTE: The fields in any union cbfs_mdata or any of its substructures from cbfs_serialized.h
+ * should always remain in the same byte order as they are stored on flash (= big endian). To
+ * avoid byte-order confusion, fields should always and only be converted to host byte order at
+ * exactly the time they are read from one of these structures into their own separate variable.
+ */
+#define CBFS_METADATA_MAX_SIZE 256
+union cbfs_mdata {
+	struct {
+		struct cbfs_file h;
+		char filename[];
+	};
+	uint8_t raw[CBFS_METADATA_MAX_SIZE];
+};
+
+/* Flags that modify behavior of cbfs_walk(). */
+enum cbfs_walk_flags {
+	/* Write the calculated hash back out to |metadata_hash->hash| rather than comparing it.
+	   |metadata_hash->algo| must still have been initialized by the caller. */
+	CBFS_WALK_WRITEBACK_HASH	= (1 << 0),
+	/* Call |walker| for empty file entries (i.e. entries with one of the CBFS_TYPE_DELETED
+	   types that mark free space in the CBFS). Otherwise, those entries will be skipped.
+	   Either way, these entries are never included in the metadata_hash calculation. */
+	CBFS_WALK_INCLUDE_EMPTY		= (1 << 1),
+};
+
+/*
+ * Traverse a CBFS and call a |walker| callback function for every file. Can additionally
+ * calculate a hash over the metadata of all files in the CBFS. If |metadata_hash| is NULL,
+ * hashing is disabled. If |walker| is NULL, will just traverse and hash the CBFS without
+ * invoking any callbacks (and always return CB_CBFS_NOT_FOUND unless there was another error).
+ *
+ * |arg| and |dev| will be passed through to |walker| unmodified. |offset| is the absolute
+ * offset in |dev| at which the current file metadata starts. |mdata| is a temporary buffer
+ * (only valid for the duration of this call to |walker|) containing already read metadata from
+ * the current file, up to |already_read| bytes. This will always at least contain the header
+ * fields and filename, but may contain more (i.e. attributes), depending on whether hashing is
+ * enabled. |walker| should call into cbfs_copy_fill_medadata() to copy the metadata of a file
+ * to a persistent buffer and automatically load remaining metadata from |dev| as needed based
+ * on the value of |already_read|.
+ *
+ * |walker| should return CB_CBFS_NOT_FOUND if it wants to continue being called for further
+ * files. Any other return code will be used as the final return code for cbfs_walk(). It will
+ * return immediately unless it needs to calculate a hash in which case it will still traverse
+ * the remaining CBFS (but not call |walker| anymore).
+ *
+ * Returns, from highest to lowest priority:
+ * CB_CBFS_IO		- There was an IO error with the CBFS device (always considered fatal)
+ * CB_CBFS_HASH_MISMATCH - |metadata_hash| was provided and did not match the CBFS
+ * CB_SUCCESS/<other>	- First non-CB_CBFS_NOT_FOUND code returned by walker()
+ * CB_CBFS_NOT_FOUND	- walker() returned CB_CBFS_NOT_FOUND for every file in the CBFS
+ */
+cb_err_t cbfs_walk(cbfs_dev_t dev, cb_err_t (*walker)(cbfs_dev_t dev, size_t offset,
+						      const union cbfs_mdata *mdata,
+						      size_t already_read, void *arg),
+		   void *arg, struct vb2_hash *metadata_hash, enum cbfs_walk_flags);
+
+/*
+ * Helper function that can be used by a |walker| callback to cbfs_walk() to copy the metadata
+ * of a file into a permanent buffer. Will copy the |already_read| metadata from |src| into
+ * |dst| and load remaining metadata from |dev| as required.
+ */
+cb_err_t cbfs_copy_fill_metadata(union cbfs_mdata *dst, const union cbfs_mdata *src,
+				 size_t already_read, cbfs_dev_t dev, size_t offset);
+
+/* Find a file named |name| in the CBFS on |dev|. Copy its metadata (including attributes)
+ * into |mdata_out| and pass out the offset to the file data on the CBFS device.
+ * Verify the metadata with |metadata_hash| if provided. */
+cb_err_t cbfs_lookup(cbfs_dev_t dev, const char *name, union cbfs_mdata *mdata_out,
+		     size_t *data_offset_out, struct vb2_hash *metadata_hash);
+
+#endif	/* _COMMONLIB_BSD_CBFS_PRIVATE_H_ */
diff --git a/src/commonlib/bsd/include/commonlib/bsd/cbfs_serialized.h b/src/commonlib/bsd/include/commonlib/bsd/cbfs_serialized.h
index 3c76a49f55..7171634c8e 100644
--- a/src/commonlib/bsd/include/commonlib/bsd/cbfs_serialized.h
+++ b/src/commonlib/bsd/include/commonlib/bsd/cbfs_serialized.h
@@ -4,6 +4,7 @@
 #define _CBFS_SERIALIZED_H_
 
 #include <stdint.h>
+#include <vb2_sha.h>
 
 /** These are standard values for the known compression
     algorithms that coreboot knows about for stages and
@@ -124,12 +125,11 @@ struct cbfs_file_attr_compression {
 	uint32_t decompressed_size;
 } __packed;
 
+/* Actual size in CBFS may be larger/smaller than struct size! */
 struct cbfs_file_attr_hash {
 	uint32_t tag;
 	uint32_t len;
-	uint32_t hash_type;
-	/* hash_data is len - sizeof(struct) bytes */
-	uint8_t  hash_data[];
+	struct vb2_hash hash;
 } __packed;
 
 struct cbfs_file_attr_position {