haswell: Parallel AP bringup

This patch parallelizes the AP startup for Haswell-based devices. It
does not touch the generic secondary startup code. Instead it provides
its own MP support matching up with the Haswell BWG. It seemed to be too
much trouble to support the old startup way and this new way. Because of
that parallel loading is the only thing supported.

A couple of things to note:
1. Micrcode needs to be loaded twice. Once before MTRR and caching is
   enabled. And a second time after SMM relocation.
2. The sipi_vector is entirely self-contained. Once it is loaded and
   written back to RAM the APs do not access memory outside of the
   sipi_vector load location until a sync up in ramstage.
3. SMM relocation is kicked off by an IPI to self w/ SMI set as the
   destination mode.

The following are timings from cbmem with dev mode disabled and recovery mode
enabled to boot directly into the kernel. This was done on the
baskingridge CRB with a 4-core 8-thread CPU and 2 DIMMs 1GiB each. The
kernel has console enabled on the serial port. Entry 70 is the device
initialization, and that is where the APs are brought up. With these two
examples it looks to shave off ~200 ms of boot time.

Before:
   1:55,382
   2:57,606 (2,223)
   3:3,108,983 (3,051,377)
   4:3,110,084 (1,101)
   8:3,113,109 (3,024)
   9:3,156,694 (43,585)
  10:3,156,815 (120)
  30:3,157,110 (295)
  40:3,158,180 (1,069)
  50:3,160,157 (1,977)
  60:3,160,366 (208)
  70:4,221,044 (1,060,677)
  75:4,221,062 (18)
  80:4,227,185 (6,122)
  90:4,227,669 (484)
  99:4,265,596 (37,927)
1000:4,267,822 (2,225)
1001:4,268,507 (685)
1002:4,268,780 (272)
1003:4,398,676 (129,896)
1004:4,398,979 (303)
1100:7,477,601 (3,078,621)
1101:7,480,210 (2,608)

After:
   1:49,518
   2:51,778 (2,259)
   3:3,081,186 (3,029,407)
   4:3,082,252 (1,066)
   8:3,085,137 (2,884)
   9:3,130,339 (45,202)
  10:3,130,518 (178)
  30:3,130,544 (26)
  40:3,131,125 (580)
  50:3,133,023 (1,897)
  60:3,133,278 (255)
  70:4,009,259 (875,980)
  75:4,009,273 (13)
  80:4,015,947 (6,674)
  90:4,016,430 (482)
  99:4,056,265 (39,835)
1000:4,058,492 (2,226)
1001:4,059,176 (684)
1002:4,059,450 (273)
1003:4,189,333 (129,883)
1004:4,189,770 (436)
1100:7,262,358 (3,072,588)
1101:7,263,926 (1,567)

Booted the baskingridge board as noted above. Also analyzed serial
messages with pcserial enabled.

Change-Id: Ifedc7f787953647c228b11afdb725686e38c4098
Signed-off-by: Aaron Durbin <adurbin@chromium.org>
Reviewed-on: http://review.coreboot.org/2779
Tested-by: build bot (Jenkins)
Reviewed-by: Ronald G. Minnich <rminnich@gmail.com>
This commit is contained in:
Aaron Durbin 2013-01-15 08:27:05 -06:00 committed by Ronald G. Minnich
parent 98ffb426f4
commit 305b1f0d30
8 changed files with 844 additions and 103 deletions

View File

@ -11,6 +11,7 @@ config CPU_SPECIFIC_OPTIONS
select UDELAY_LAPIC select UDELAY_LAPIC
select SMM_TSEG select SMM_TSEG
select SMM_MODULES select SMM_MODULES
select RELOCATABLE_MODULES
select CPU_MICROCODE_IN_CBFS select CPU_MICROCODE_IN_CBFS
#select AP_IN_SIPI_WAIT #select AP_IN_SIPI_WAIT
select TSC_SYNC_MFENCE select TSC_SYNC_MFENCE

View File

@ -1,5 +1,6 @@
ramstage-y += haswell_init.c ramstage-y += haswell_init.c
subdirs-y += ../../x86/name subdirs-y += ../../x86/name
ramstage-y += mp_init.c
romstage-y += romstage.c romstage-y += romstage.c
ramstage-$(CONFIG_GENERATE_ACPI_TABLES) += acpi.c ramstage-$(CONFIG_GENERATE_ACPI_TABLES) += acpi.c
@ -10,3 +11,25 @@ cpu_microcode-$(CONFIG_CPU_MICROCODE_CBFS_GENERATE) += microcode_blob.c
smm-$(CONFIG_HAVE_SMI_HANDLER) += finalize.c smm-$(CONFIG_HAVE_SMI_HANDLER) += finalize.c
cpu_incs += $(src)/cpu/intel/haswell/cache_as_ram.inc cpu_incs += $(src)/cpu/intel/haswell/cache_as_ram.inc
# AP startub stub
SIPI_ELF=$(obj)/cpu/intel/haswell/sipi_vector.elf
SIPI_BIN=$(SIPI_ELF:.elf=)
SIPI_DOTO=$(SIPI_ELF:.elf=.o)
ramstage-srcs += $(SIPI_BIN)
rmodules-y += sipi_vector.S
rmodules-y += sipi_header.c
$(SIPI_DOTO): $(dir $(SIPI_ELF))sipi_vector.rmodules.o $(dir $(SIPI_ELF))sipi_header.rmodules.o
$(CC) $(LDFLAGS) -nostdlib -r -o $@ $^
$(eval $(call rmodule_link,$(SIPI_ELF), $(SIPI_ELF:.elf=.o), 0))
$(SIPI_BIN): $(SIPI_ELF)
$(OBJCOPY) -O binary $< $@
$(SIPI_BIN).ramstage.o: $(SIPI_BIN)
@printf " OBJCOPY $(subst $(obj)/,,$(@))\n"
cd $(dir $@); $(OBJCOPY) -I binary $(notdir $<) -O elf32-i386 -B i386 $(notdir $@)

View File

@ -141,8 +141,17 @@ void intel_cpu_haswell_finalize_smm(void);
/* Configure power limits for turbo mode */ /* Configure power limits for turbo mode */
void set_power_limits(u8 power_limit_1_time); void set_power_limits(u8 power_limit_1_time);
int cpu_config_tdp_levels(void); int cpu_config_tdp_levels(void);
/* Returns 0 on success, < 0 on failure. */
int smm_initialize(void);
void smm_initiate_relocation(void);
struct bus; struct bus;
void bsp_init_and_start_aps(struct bus *cpu_bus); void bsp_init_and_start_aps(struct bus *cpu_bus);
/* Returns 0 on succes. < 0 on failure. */
int setup_ap_init(struct bus *cpu_bus, int *max_cpus,
const void *microcode_patch);
/* Returns 0 on success, < 0 on failure. */
int start_aps(struct bus *cpu_bus, int max_cpus);
void release_aps_for_smm_relocation(void);
#endif #endif
#endif #endif

View File

@ -442,71 +442,30 @@ static void configure_mca(void)
static unsigned ehci_debug_addr; static unsigned ehci_debug_addr;
#endif #endif
/* static void bsp_init_before_ap_bringup(struct bus *cpu_bus)
* Initialize any extra cores/threads in this package.
*/
static void intel_cores_init(device_t cpu)
{
struct cpuid_result result;
unsigned threads_per_package, threads_per_core, i;
/* Logical processors (threads) per core */
result = cpuid_ext(0xb, 0);
threads_per_core = result.ebx & 0xffff;
/* Logical processors (threads) per package */
result = cpuid_ext(0xb, 1);
threads_per_package = result.ebx & 0xffff;
/* Only initialize extra cores from BSP */
if (cpu->path.apic.apic_id)
return;
printk(BIOS_DEBUG, "CPU: %u has %u cores, %u threads per core\n",
cpu->path.apic.apic_id, threads_per_package/threads_per_core,
threads_per_core);
for (i = 1; i < threads_per_package; ++i) {
struct device_path cpu_path;
device_t new;
/* Build the cpu device path */
cpu_path.type = DEVICE_PATH_APIC;
cpu_path.apic.apic_id =
cpu->path.apic.apic_id + i;
/* Update APIC ID if no hyperthreading */
if (threads_per_core == 1)
cpu_path.apic.apic_id <<= 1;
/* Allocate the new cpu device structure */
new = alloc_dev(cpu->bus, &cpu_path);
if (!new)
continue;
printk(BIOS_DEBUG, "CPU: %u has core %u\n",
cpu->path.apic.apic_id,
new->path.apic.apic_id);
#if CONFIG_SMP && CONFIG_MAX_CPUS > 1
/* Start the new cpu */
if (!start_cpu(new)) {
/* Record the error in cpu? */
printk(BIOS_ERR, "CPU %u would not start!\n",
new->path.apic.apic_id);
}
#endif
}
}
static void bsp_init_before_ap_bringup(void)
{ {
struct device_path cpu_path;
struct cpu_info *info;
char processor_name[49]; char processor_name[49];
/* Print processor name */ /* Print processor name */
fill_processor_name(processor_name); fill_processor_name(processor_name);
printk(BIOS_INFO, "CPU: %s.\n", processor_name); printk(BIOS_INFO, "CPU: %s.\n", processor_name);
/* Ensure the local apic is enabled */
enable_lapic();
/* Set the device path of the boot cpu. */
cpu_path.type = DEVICE_PATH_APIC;
cpu_path.apic.apic_id = lapicid();
/* Find the device structure for the boot cpu. */
info = cpu_info();
info->cpu = alloc_find_dev(cpu_bus, &cpu_path);
if (info->index != 0)
printk(BIOS_CRIT, "BSP index(%d) != 0!\n", info->index);
#if CONFIG_USBDEBUG #if CONFIG_USBDEBUG
// Is this caution really needed? // Is this caution really needed?
if(!ehci_debug_addr) if(!ehci_debug_addr)
@ -523,23 +482,12 @@ static void bsp_init_before_ap_bringup(void)
set_ehci_debug(ehci_debug_addr); set_ehci_debug(ehci_debug_addr);
#endif #endif
enable_lapic(); /* Call through the cpu driver's initialization. */
cpu_initialize(0);
} }
static void ap_init(device_t cpu) /* All CPUs including BSP will run the following function. */
{ static void haswell_init(device_t cpu)
/* Microcode needs to be loaded before caching is enabled. */
intel_update_microcode_from_cbfs();
/* Turn on caching if we haven't already */
x86_enable_cache();
x86_setup_fixed_mtrrs();
x86_setup_var_mtrrs(cpuid_eax(0x80000008) & 0xff, 2);
enable_lapic();
}
static void cpu_common_init(device_t cpu)
{ {
/* Clear out pending MCEs */ /* Clear out pending MCEs */
configure_mca(); configure_mca();
@ -572,33 +520,40 @@ static void cpu_common_init(device_t cpu)
void bsp_init_and_start_aps(struct bus *cpu_bus) void bsp_init_and_start_aps(struct bus *cpu_bus)
{ {
int max_cpus;
int num_aps;
const void *microcode_patch;
/* Perform any necesarry BSP initialization before APs are brought up. /* Perform any necesarry BSP initialization before APs are brought up.
* This call alos allows the BSP to prepare for any secondary effects * This call alos allows the BSP to prepare for any secondary effects
* from calling cpu_initialize() such as smm_init(). */ * from calling cpu_initialize() such as smm_init(). */
bsp_init_before_ap_bringup(); bsp_init_before_ap_bringup(cpu_bus);
/* microcode_patch = intel_microcode_find();
* This calls into the gerneic initialize_cpus() which attempts to
* start APs on the APIC bus in the devicetree. No APs get started
* because there is only the BSP and a placeholder (disabled) in the
* devicetree. initialize_cpus() also does SMM initialization by way
* of smm_init(). It will eventually call cpu_initialize(0) which calls
* dev_ops->init(). For Haswell the dev_ops->init() starts up the APs
* by way of intel_cores_init().
*/
initialize_cpus(cpu_bus);
}
static void haswell_init(device_t cpu) /* This needs to be called after the mtrr setup so the BSP mtrrs
{ * can be mirrored by the APs. */
if (cpu->path.apic.apic_id == 0) { if (setup_ap_init(cpu_bus, &max_cpus, microcode_patch)) {
cpu_common_init(cpu); printk(BIOS_CRIT, "AP setup initialization failed. "
/* Start up extra cores */ "No APs will be brought up.\n");
intel_cores_init(cpu); return;
} else {
ap_init(cpu);
cpu_common_init(cpu);
} }
num_aps = max_cpus - 1;
if (start_aps(cpu_bus, num_aps)) {
printk(BIOS_CRIT, "AP startup failed. Trying to continue.\n");
}
if (smm_initialize()) {
printk(BIOS_CRIT, "SMM Initialiazation failed...\n");
return;
}
/* Release APs to perform SMM relocation. */
release_aps_for_smm_relocation();
/* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_patch);
} }
static struct device_operations cpu_dev_ops = { static struct device_operations cpu_dev_ops = {

View File

@ -0,0 +1,550 @@
/*
* This file is part of the coreboot project.
*
* Copyright (C) 2013 ChromeOS Authors
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; version 2 of
* the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
* MA 02110-1301 USA
*/
#include <console/console.h>
#include <stdint.h>
#include <rmodule.h>
#include <arch/cpu.h>
#include <cpu/cpu.h>
#include <cpu/intel/microcode.h>
#include <cpu/x86/cache.h>
#include <cpu/x86/lapic.h>
#include <cpu/x86/msr.h>
#include <cpu/x86/mtrr.h>
#include <cpu/x86/smm.h>
#include <delay.h>
#include <device/device.h>
#include <device/path.h>
#include <lib.h>
#include <smp/atomic.h>
#include <smp/spinlock.h>
#include "haswell.h"
/* This needs to match the layout in the .module_parametrs section. */
struct sipi_params {
u16 gdtlimit;
u32 gdt;
u16 unused;
u32 idt_ptr;
u32 stack_top;
u32 stack_size;
u32 microcode_ptr;
u32 msr_table_ptr;
u32 msr_count;
u32 c_handler;
u32 c_handler_arg;
u8 apic_to_cpu_num[CONFIG_MAX_CPUS];
} __attribute__((packed));
/* This also needs to match the assembly code for saved MSR encoding. */
struct saved_msr {
u32 index;
u32 lo;
u32 hi;
} __attribute__((packed));
/* The sipi vector rmodule is included in the ramstage using 'objdump -B'. */
extern char _binary_sipi_vector_start[];
/* These symbols are defined in c_start.S. */
extern char gdt[];
extern char gdt_limit[];
extern char idtarg[];
/* This table keeps track of each CPU's APIC id. */
static u8 apic_id_table[CONFIG_MAX_CPUS];
static device_t cpu_devs[CONFIG_MAX_CPUS];
/* Number of APs checked that have checked in. */
static atomic_t num_aps;
/* Barrier to stop APs from performing SMM relcoation. */
static int smm_relocation_barrier_begin __attribute__ ((aligned (64)));
static inline void wait_for_barrier(volatile int *barrier)
{
while (*barrier == 0) {
asm ("pause");
}
}
static inline void release_barrier(volatile int *barrier)
{
*barrier = 1;
}
static void ap_wait_for_smm_relocation_begin(void)
{
wait_for_barrier(&smm_relocation_barrier_begin);
}
/* Returns 1 if timeout waiting for APs. 0 if target aps found. */
static int wait_for_aps(int target, int total_delay, int delay_step)
{
int timeout = 0;
int delayed = 0;
while (atomic_read(&num_aps) != target) {
udelay(delay_step);
delayed += delay_step;
if (delayed >= total_delay) {
timeout = 1;
break;
}
}
return timeout;
}
void release_aps_for_smm_relocation(void)
{
release_barrier(&smm_relocation_barrier_begin);
}
/* The mtrr code sets up ROM caching on the BSP, but not the others. However,
* the boot loader payload disables this. In order for Linux not to complain
* ensure the caching is disabled for tha APs before going to sleep. */
static void cleanup_rom_caching(void)
{
#if CONFIG_CACHE_ROM
msr_t msr;
unsigned int last_var_mtrr;
msr = rdmsr(MTRRcap_MSR);
last_var_mtrr = (msr.lo & 0xff) - 1;
/* Check if the MTRR is valid. */
msr = rdmsr(MTRRphysMask_MSR(last_var_mtrr));
if ((msr.lo & MTRRphysMaskValid) == 0)
return;
msr = rdmsr(MTRRphysBase_MSR(last_var_mtrr));
/* Assum that if the MTRR is of write protected type, the MTRR is used
* to cache the ROM. */
if ((msr.lo & MTRR_NUM_TYPES) == MTRR_TYPE_WRPROT) {
msr.lo = msr.hi = 0;
disable_cache();
wrmsr(MTRRphysMask_MSR(last_var_mtrr), msr);
wrmsr(MTRRphysBase_MSR(last_var_mtrr), msr);
enable_cache();
}
#endif
}
/* By the time APs call ap_init() caching has been setup, and microcode has
* been loaded. */
static void ap_init(unsigned int cpu, void *microcode_ptr)
{
struct cpu_info *info;
/* Signal that the AP has arrived. */
atomic_inc(&num_aps);
/* Ensure the local apic is enabled */
enable_lapic();
info = cpu_info();
info->index = cpu;
info->cpu = cpu_devs[cpu];
apic_id_table[info->index] = lapicid();
info->cpu->path.apic.apic_id = apic_id_table[info->index];
/* Call through the cpu driver's initialization. */
cpu_initialize(info->index);
ap_wait_for_smm_relocation_begin();
smm_initiate_relocation();
/* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_ptr);
/* Cleanup ROM caching. */
cleanup_rom_caching();
/* FIXME(adurbin): park CPUs properly -- preferably somewhere in a
* reserved part of memory that the OS cannot get to. */
stop_this_cpu();
}
static void setup_default_sipi_vector_params(struct sipi_params *sp)
{
int i;
sp->gdt = (u32)&gdt;
sp->gdtlimit = (u32)&gdt_limit;
sp->idt_ptr = (u32)&idtarg;
sp->stack_size = CONFIG_STACK_SIZE;
sp->stack_top = (u32)&_estack;
/* Adjust the stack top to take into account cpu_info. */
sp->stack_top -= sizeof(struct cpu_info);
/* Default to linear APIC id space. */
for (i = 0; i < CONFIG_MAX_CPUS; i++)
sp->apic_to_cpu_num[i] = i;
}
#define NUM_FIXED_MTRRS 11
static unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {
MTRRfix64K_00000_MSR, MTRRfix16K_80000_MSR, MTRRfix16K_A0000_MSR,
MTRRfix4K_C0000_MSR, MTRRfix4K_C8000_MSR, MTRRfix4K_D0000_MSR,
MTRRfix4K_D8000_MSR, MTRRfix4K_E0000_MSR, MTRRfix4K_E8000_MSR,
MTRRfix4K_F0000_MSR, MTRRfix4K_F8000_MSR,
};
static inline struct saved_msr *save_msr(int index, struct saved_msr *entry)
{
msr_t msr;
msr = rdmsr(index);
entry->index = index;
entry->lo = msr.lo;
entry->hi = msr.hi;
/* Return the next entry. */
entry++;
return entry;
}
static int save_bsp_msrs(char *start, int size)
{
int msr_count;
int num_var_mtrrs;
struct saved_msr *msr_entry;
int i;
msr_t msr;
/* Determine number of MTRRs need to be saved. */
msr = rdmsr(MTRRcap_MSR);
num_var_mtrrs = msr.lo & 0xff;
/* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE. */
msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1;
if ((msr_count * sizeof(struct saved_msr)) > size) {
printk(BIOS_CRIT, "Cannot mirror all %d msrs.\n", msr_count);
return -1;
}
msr_entry = (void *)start;
for (i = 0; i < NUM_FIXED_MTRRS; i++) {
msr_entry = save_msr(fixed_mtrrs[i], msr_entry);
}
for (i = 0; i < num_var_mtrrs; i++) {
msr_entry = save_msr(MTRRphysBase_MSR(i), msr_entry);
msr_entry = save_msr(MTRRphysMask_MSR(i), msr_entry);
}
msr_entry = save_msr(MTRRdefType_MSR, msr_entry);
return msr_count;
}
/* The SIPI vector is loaded at the SMM_DEFAULT_BASE. The reason is at the
* memory range is already reserved so the OS cannot use it. That region is
* free to use for AP bringup before SMM is initialized. */
static u32 sipi_vector_location = SMM_DEFAULT_BASE;
static int sipi_vector_location_size = SMM_DEFAULT_SIZE;
static int load_sipi_vector(const void *microcode_patch)
{
struct rmodule sipi_mod;
int module_size;
int num_msrs;
struct sipi_params *sp;
char *mod_loc = (void *)sipi_vector_location;
const int loc_size = sipi_vector_location_size;
if (rmodule_parse(&_binary_sipi_vector_start, &sipi_mod)) {
printk(BIOS_CRIT, "Unable to parse sipi module.\n");
return -1;
}
if (rmodule_entry_offset(&sipi_mod) != 0) {
printk(BIOS_CRIT, "SIPI module entry offset is not 0!\n");
return -1;
}
if (rmodule_load_alignment(&sipi_mod) != 4096) {
printk(BIOS_CRIT, "SIPI module load alignment(%d) != 4096.\n",
rmodule_load_alignment(&sipi_mod));
return -1;
}
module_size = rmodule_memory_size(&sipi_mod);
/* Align to 4 bytes. */
module_size += 3;
module_size &= ~3;
if (module_size > loc_size) {
printk(BIOS_CRIT, "SIPI module size (%d) > region size (%d).\n",
module_size, loc_size);
return -1;
}
num_msrs = save_bsp_msrs(&mod_loc[module_size], loc_size - module_size);
if (num_msrs < 0) {
printk(BIOS_CRIT, "Error mirroring BSP's msrs.\n");
return -1;
}
if (rmodule_load(mod_loc, &sipi_mod)) {
printk(BIOS_CRIT, "Unable to load SIPI module.\n");
return -1;
}
sp = rmodule_parameters(&sipi_mod);
if (sp == NULL) {
printk(BIOS_CRIT, "SIPI module has no parameters.\n");
return -1;
}
setup_default_sipi_vector_params(sp);
/* Setup MSR table. */
sp->msr_table_ptr = (u32)&mod_loc[module_size];
sp->msr_count = num_msrs;
/* Provide pointer to microcode patch. */
sp->microcode_ptr = (u32)microcode_patch;
/* The microcode pointer is passed on through to the c handler so
* that it can be loaded again after SMM relocation. */
sp->c_handler_arg = (u32)microcode_patch;
sp->c_handler = (u32)&ap_init;
/* Make sure SIPI vector hits RAM so the APs that come up will see
* the startup code even if the caches are disabled. */
wbinvd();
return 0;
}
static int allocate_cpu_devices(struct bus *cpu_bus, int *total_hw_threads)
{
int i;
int num_threads;
int num_cores;
int max_cpus;
struct cpu_info *info;
msr_t msr;
info = cpu_info();
cpu_devs[info->index] = info->cpu;
apic_id_table[info->index] = info->cpu->path.apic.apic_id;
msr = rdmsr(CORE_THREAD_COUNT_MSR);
num_threads = (msr.lo >> 0) & 0xffff;
num_cores = (msr.lo >> 16) & 0xffff;
printk(BIOS_DEBUG, "CPU has %u cores, %u threads enabled.\n",
num_cores, num_threads);
max_cpus = num_threads;
*total_hw_threads = num_threads;
if (num_threads > CONFIG_MAX_CPUS) {
printk(BIOS_CRIT, "CPU count(%d) exceeds CONFIG_MAX_CPUS(%d)\n",
num_threads, CONFIG_MAX_CPUS);
max_cpus = CONFIG_MAX_CPUS;
}
for (i = 1; i < max_cpus; i++) {
struct device_path cpu_path;
device_t new;
/* Build the cpu device path */
cpu_path.type = DEVICE_PATH_APIC;
cpu_path.apic.apic_id = info->cpu->path.apic.apic_id + i;
/* Allocate the new cpu device structure */
new = alloc_find_dev(cpu_bus, &cpu_path);
if (new == NULL) {
printk(BIOS_CRIT, "Could not allocte cpu device\n");
max_cpus--;
}
cpu_devs[i] = new;
}
return max_cpus;
}
int setup_ap_init(struct bus *cpu_bus, int *max_cpus,
const void *microcode_patch)
{
int num_cpus;
int hw_threads;
/* Default to currently running CPU. */
num_cpus = allocate_cpu_devices(cpu_bus, &hw_threads);
/* Load the SIPI vector. */
if (load_sipi_vector(microcode_patch))
return -1;
*max_cpus = num_cpus;
if (num_cpus < hw_threads) {
printk(BIOS_CRIT,
"ERROR: More HW threads (%d) than support (%d).\n",
hw_threads, num_cpus);
return -1;
}
return 0;
}
/* Returns 1 for timeout. 0 on success. */
static int apic_wait_timeout(int total_delay, int delay_step)
{
int total = 0;
int timeout = 0;
while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) {
udelay(delay_step);
total += delay_step;
if (total >= total_delay) {
timeout = 1;
break;
}
}
return timeout;
}
int start_aps(struct bus *cpu_bus, int ap_count)
{
int sipi_vector;
if (ap_count == 0)
return 0;
/* The vector is sent as a 4k aligned address in one byte. */
sipi_vector = sipi_vector_location >> 12;
if (sipi_vector > 256) {
printk(BIOS_CRIT, "SIPI vector too large! 0x%08x\n",
sipi_vector);
return -1;
}
printk(BIOS_DEBUG, "Attempting to start %d APs\n", ap_count);
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
printk(BIOS_DEBUG, "timed out. Aborting.\n");
return -1;
} else
printk(BIOS_DEBUG, "done.\n");
}
/* Send INIT IPI to all but self. */
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
LAPIC_DM_INIT);
printk(BIOS_DEBUG, "Waiting for INIT to complete...");
/* Wait for 10 ms to complete. */
if (apic_wait_timeout(10000 /* 10 ms */, 100 /* us */)) {
printk(BIOS_DEBUG, "timed out. Bailing. \n");
return -1;
} else {
printk(BIOS_DEBUG, "done.\n");
}
/* Send 1st SIPI */
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
printk(BIOS_DEBUG, "timed out. Aborting.\n");
return -1;
} else
printk(BIOS_DEBUG, "done.\n");
}
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
LAPIC_DM_STARTUP | sipi_vector);
printk(BIOS_DEBUG, "Waiting for 1st SIPI to complete...");
if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
printk(BIOS_DEBUG, "timed out.\n");
return -1;
} else {
printk(BIOS_DEBUG, "done.\n");
}
/* Wait for CPUs to check in up to 200 us. */
wait_for_aps(ap_count, 200 /* us */, 15 /* us */);
/* Send 2nd SIPI */
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
printk(BIOS_DEBUG, "timed out. Aborting.\n");
return -1;
} else
printk(BIOS_DEBUG, "done.\n");
}
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
LAPIC_DM_STARTUP | sipi_vector);
printk(BIOS_DEBUG, "Waiting for 2nd SIPI to complete...");
if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
printk(BIOS_DEBUG, "timed out.\n");
return -1;
} else {
printk(BIOS_DEBUG, "done.\n");
}
/* Wait for CPUs to check in. */
if (wait_for_aps(ap_count, 10000 /* 10 ms */, 50 /* us */)) {
printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n",
atomic_read(&num_aps), ap_count);
return -1;
}
return 0;
}
DECLARE_SPIN_LOCK(smm_relocation_lock);
void smm_initiate_relocation(void)
{
spin_lock(&smm_relocation_lock);
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
printk(BIOS_DEBUG, "timed out. Aborting.\n");
spin_unlock(&smm_relocation_lock);
return;
} else
printk(BIOS_DEBUG, "done.\n");
}
lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(lapicid()));
lapic_write_around(LAPIC_ICR, LAPIC_INT_ASSERT | LAPIC_DM_SMI);
if (apic_wait_timeout(1000 /* 1 ms */, 100 /* us */)) {
printk(BIOS_DEBUG, "SMI Relocation timed out.\n");
} else
printk(BIOS_DEBUG, "Relocation complete.\n");
spin_unlock(&smm_relocation_lock);
}

View File

@ -0,0 +1,6 @@
#include <rmodule.h>
extern void *ap_start;
DEFINE_RMODULE_HEADER(sipi_vector_header, ap_start, RMODULE_TYPE_SIPI_VECTOR);

View File

@ -0,0 +1,190 @@
/*
* This file is part of the coreboot project.
*
* Copyright (C) 2013 ChromeOS Authors
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; version 2 of
* the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
* MA 02110-1301 USA
*/
/* The SIPI vector is responsible for initializing the APs in the sytem. It
* loads microcode, sets up MSRs, and enables caching before calling into
* C code. */
/* These segment selectors need to match the gdt entries in c_start.S. */
#define CODE_SEG 0x10
#define DATA_SEG 0x18
#define IA32_UPDT_TRIG 0x79
#define IA32_BIOS_SIGN_ID 0x8b
.section ".module_parameters", "aw", @progbits
ap_start_params:
gdtaddr:
.word 0 /* limit */
.long 0 /* table */
.word 0 /* unused */
idt_ptr:
.long 0
stack_top:
.long 0
stack_size:
.long 0
microcode_ptr:
.long 0
msr_table_ptr:
.long 0
msr_count:
.long 0
c_handler:
.long 0
c_handler_arg:
.long 0
apic_to_cpu_num:
.fill CONFIG_MAX_CPUS,1,0xff
.text
.code16
.global ap_start
ap_start:
cli
xorl %eax, %eax
movl %eax, %cr3 /* Invalidate TLB*/
/* On hyper threaded cpus, invalidating the cache here is
* very very bad. Don't.
*/
/* setup the data segment */
movw %cs, %ax
movw %ax, %ds
/* The gdtaddr needs to be releative to the data segment in order
* to properly dereference it. The .text section comes first in an
* rmodule so ap_start can be used as a proxy for the load address. */
movl $(gdtaddr), %ebx
sub $(ap_start), %ebx
data32 lgdt (%ebx)
movl %cr0, %eax
andl $0x7FFAFFD1, %eax /* PG,AM,WP,NE,TS,EM,MP = 0 */
orl $0x60000001, %eax /* CD, NW, PE = 1 */
movl %eax, %cr0
ljmpl $CODE_SEG, $1f
1:
.code32
movw $DATA_SEG, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
movw %ax, %fs
movw %ax, %gs
/* Load the Interrupt descriptor table */
mov idt_ptr, %ebx
lidt (%ebx)
/* The CPU number is calculated by reading the initial APIC id. */
mov $1, %eax
cpuid
/* Default APIC id in ebx[31:24]. Move it to bl. */
bswap %ebx
mov $(apic_to_cpu_num), %eax
xor %ecx, %ecx
1:
cmp (%eax, %ecx, 1), %bl
je 1f
inc %ecx
cmp $CONFIG_MAX_CPUS, %ecx
jne 1b
/* This is bad. No CPU number found. However, the BSP should have setup
* the AP handler properly. Just park the CPU. */
mov $0x80, %dx
movw $0xdead, %ax
outw %ax, %dx
jmp halt_jump
1:
/* Setup stacks for each CPU. */
movl stack_size, %eax
mul %ecx
movl stack_top, %edx
subl %eax, %edx
mov %edx, %esp
/* Save cpu number. */
mov %ecx, %esi
/* Determine if one should check microcode versions. */
mov microcode_ptr, %edi
test %edi, %edi
jz 1f /* Bypass if no microde exists. */
/* Get the Microcode version. */
mov $1, %eax
cpuid
mov $IA32_BIOS_SIGN_ID, %ecx
rdmsr
/* If something already loaded skip loading again. */
test %edx, %edx
jnz 1f
/* Load new microcode. */
mov $IA32_UPDT_TRIG, %ecx
xor %edx, %edx
mov %edi, %eax
/* The microcode pointer is passed in pointing to the header. Adjust
* pointer to reflect the payload (header size is 48 bytes). */
add $48, %eax
pusha
wrmsr
popa
1:
/*
* Load MSRs. Each entry in the table consists of:
* 0: index,
* 4: value[31:0]
* 8: value[63:32]
*/
mov msr_table_ptr, %edi
mov msr_count, %ebx
test %ebx, %ebx
jz 1f
load_msr:
mov (%edi), %ecx
mov 4(%edi), %eax
mov 8(%edi), %edx
wrmsr
add $12, %edi
dec %ebx
jnz load_msr
1:
/* Enable caching. */
mov %cr0, %eax
and $0x9fffffff, %eax /* CD, NW = 0 */
mov %eax, %cr0
/* c_handler(cpu_num, *c_handler_arg) */
push c_handler_arg
push %esi /* cpu_num */
mov c_handler, %eax
call *%eax
halt_jump:
hlt
jmp halt_jump

View File

@ -23,6 +23,7 @@
#include <device/pci.h> #include <device/pci.h>
#include <cpu/cpu.h> #include <cpu/cpu.h>
#include <cpu/x86/cache.h> #include <cpu/x86/cache.h>
#include <cpu/x86/lapic.h>
#include <cpu/x86/msr.h> #include <cpu/x86/msr.h>
#include <cpu/x86/mtrr.h> #include <cpu/x86/mtrr.h>
#include <cpu/x86/smm.h> #include <cpu/x86/smm.h>
@ -297,24 +298,30 @@ static int cpu_smm_setup(void)
return 0; return 0;
} }
void smm_init(void) int smm_initialize(void)
{ {
/* Return early if CPU SMM setup failed. */ /* Return early if CPU SMM setup failed. */
if (cpu_smm_setup()) if (cpu_smm_setup())
return; return -1;
southbridge_smm_init(); southbridge_smm_init();
/* Initiate first SMI to kick off SMM-context relocation. Note: this /* Run the relocation handler. */
* SMI being triggered here queues up an SMI in the APs which are in smm_initiate_relocation();
* wait-for-SIPI state. Once an AP gets an SIPI it will service the SMI
* at the SMM_DEFAULT_BASE before jumping to startup vector. */
southbridge_trigger_smi();
printk(BIOS_DEBUG, "Relocation complete.\n");
/* Lock down the SMRAM space. */ /* Lock down the SMRAM space. */
smm_lock(); smm_lock();
return 0;
}
void smm_init(void)
{
/* smm_init() is normally called from initialize_cpus() in
* lapic_cpu_init.c. However, that path is no longer used. Don't reuse
* the function name because that would cause confusion.
* The smm_initialize() function above is used to setup SMM at the
* appropriate time. */
} }
void smm_lock(void) void smm_lock(void)