lynxpoint: Add cbfs_load_payload() implementation
SPI accesses can be slow depending on the setup and the access pattern. The current SPI hardware setup to cache and prefetch. The alternative cbfs_load_payload() function takes advantage of the caching in the CPU because the ROM is cached as write protected as well as the SPI's hardware's caching/prefetching implementation. The CPU will fetch consecutive aligned cachelines which will hit the ROM as cacheline-aligned addresses. Once the payload is mirrored into RAM the segment loading can take place by reading RAM instead of ROM. With the alternative cbfs_load_payload() the boot time on a baskingridge board saves ~100ms. This savings is observed using cbmem.py after performing warm reboots and looking at TS_SELFBOOT_JUMP (99) entries. This is booting with a depthcharge payload whose payload file fits within the SMM_DEFAULT_SIZE (0x10000 bytes). Datapoints with TS_LOAD_PAYLOAD (90) & TS_SELFBOOT_JUMP (99) cbmem entries: Baseline Alt -------- -------- 90:3,859,310 (473) 90:3,863,647 (454) 99:3,989,578 (130,268) 99:3,888,709 (25,062) 90:3,899,450 (477) 90:3,860,926 (463) 99:4,029,459 (130,008) 99:3,890,583 (29,657) 90:3,834,600 (466) 90:3,890,564 (465) 99:3,964,535 (129,934) 99:3,920,213 (29,649) Booted baskingridge many times and observed 100ms reduction in TS_SELFBOOT_JUMP times (time to load payload). Change-Id: I27b2dec59ecd469a4906b4179b39928e9201db81 Signed-off-by: Aaron Durbin <adurbin@chromium.org> Reviewed-on: http://review.coreboot.org/2783 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer <stefan.reinauer@coreboot.org>
This commit is contained in:
parent
633f11274f
commit
94998c4d3f
|
@ -31,6 +31,7 @@ config SOUTH_BRIDGE_OPTIONS # dummy
|
||||||
select PCIEXP_ASPM
|
select PCIEXP_ASPM
|
||||||
select PCIEXP_COMMON_CLOCK
|
select PCIEXP_COMMON_CLOCK
|
||||||
select SPI_FLASH
|
select SPI_FLASH
|
||||||
|
select ALT_CBFS_LOAD_PAYLOAD
|
||||||
|
|
||||||
config INTEL_LYNXPOINT_LP
|
config INTEL_LYNXPOINT_LP
|
||||||
bool
|
bool
|
||||||
|
|
|
@ -37,6 +37,7 @@ ramstage-y += me_status.c
|
||||||
ramstage-y += reset.c
|
ramstage-y += reset.c
|
||||||
ramstage-y += watchdog.c
|
ramstage-y += watchdog.c
|
||||||
ramstage-y += acpi.c
|
ramstage-y += acpi.c
|
||||||
|
ramstage-$(CONFIG_ALT_CBFS_LOAD_PAYLOAD) += spi_loading.c
|
||||||
|
|
||||||
ramstage-$(CONFIG_ELOG) += elog.c
|
ramstage-$(CONFIG_ELOG) += elog.c
|
||||||
ramstage-y += spi.c
|
ramstage-y += spi.c
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
* This file is part of the coreboot project.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013 ChromeOS Authors
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; version 2 of the License.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <arch/byteorder.h>
|
||||||
|
#include <cbfs.h>
|
||||||
|
#include <console/console.h>
|
||||||
|
#include <cpu/x86/smm.h>
|
||||||
|
|
||||||
|
#define CACHELINE_SIZE 64
|
||||||
|
#define INTRA_CACHELINE_MASK (CACHELINE_SIZE - 1)
|
||||||
|
#define CACHELINE_MASK (~INTRA_CACHELINE_MASK)
|
||||||
|
|
||||||
|
/* Mirror the payload file to the default SMM location if it is small enough.
|
||||||
|
* The default SMM region can be used since no one is using the memory at this
|
||||||
|
* location at this stage in the boot. */
|
||||||
|
static inline void *spi_mirror(void *file_start, int file_len)
|
||||||
|
{
|
||||||
|
int alignment_diff;
|
||||||
|
char *src;
|
||||||
|
char *dest = (void *)SMM_DEFAULT_BASE;
|
||||||
|
|
||||||
|
alignment_diff = (INTRA_CACHELINE_MASK & (long)file_start);
|
||||||
|
|
||||||
|
/* Adjust file length so that the start and end points are aligned to a
|
||||||
|
* cacheline. Coupled with the ROM caching in the CPU the SPI hardware
|
||||||
|
* will read and cache full length cachelines. It will also prefetch
|
||||||
|
* data as well. Once things are mirrored in memory all accesses should
|
||||||
|
* hit the CPUs cache. */
|
||||||
|
file_len += alignment_diff;
|
||||||
|
file_len = ALIGN(file_len, CACHELINE_SIZE);
|
||||||
|
|
||||||
|
printk(BIOS_DEBUG, "Payload aligned size: 0x%x\n", file_len);
|
||||||
|
|
||||||
|
/* Just pass back the pointer to ROM space if the file is larger
|
||||||
|
* than the RAM mirror region. */
|
||||||
|
if (file_len > SMM_DEFAULT_SIZE)
|
||||||
|
return file_start;
|
||||||
|
|
||||||
|
src = (void *)(CACHELINE_MASK & (long)file_start);
|
||||||
|
/* Note that if mempcy is not using 32-bit moves the performance will
|
||||||
|
* degrade because the SPI hardware prefetchers look for
|
||||||
|
* cacheline-aligned 32-bit accesses to kick in. */
|
||||||
|
memcpy(dest, src, file_len);
|
||||||
|
|
||||||
|
/* Provide pointer into mirrored space. */
|
||||||
|
return &dest[alignment_diff];
|
||||||
|
}
|
||||||
|
|
||||||
|
void *cbfs_load_payload(struct cbfs_media *media, const char *name)
|
||||||
|
{
|
||||||
|
int file_len;
|
||||||
|
void *file_start;
|
||||||
|
struct cbfs_file *file = cbfs_get_file(media, name);
|
||||||
|
|
||||||
|
if (file == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (ntohl(file->type) != CBFS_TYPE_PAYLOAD)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
file_len = ntohl(file->len);
|
||||||
|
|
||||||
|
file_start = CBFS_SUBHEADER(file);
|
||||||
|
|
||||||
|
return spi_mirror(file_start, file_len);
|
||||||
|
}
|
Loading…
Reference in New Issue