haswell: support for parallel SMM relocation

The haswell processors support the ability to save their SMM state
into MSR space instead of the memory. This feaure allows for parallel
SMM relocation handlers as well as setting the same SMBASE for each
CPU since the save state memory area is not used.

The catch is that in order determine if this feature is available the
CPU needs to be in SMM context. In order to implement parallel SMM
relocation the BSP enters the relocation handler twice. The first time
is to determine if that feature is available. If it is, then that
feature is enabled the BSP exits the relocation handler without
relocating SMBASE. It then releases the APs to run the SMM relocation
handler. After the APs have completed the relocation the BSP will
re-enter the SMM relocation handler to relocate its own SMBASE to the
final location.  If the parallel SMM feature is not available the BSP
relocates its SMBASE as it did before.

This change also introduces the BSP waiting for the APs to relocate
their SMBASE before proceeding with the remainder of the boot process.

Ensured both the parallel path and the serial path still continue
to work on cold, warm, and S3 resume paths.

Change-Id: Iea24fd8f9561f1b194393cdb77c79adb48039ea2
Signed-off-by: Aaron Durbin <adurbin@chromium.org>
Reviewed-on: http://review.coreboot.org/2801
Tested-by: build bot (Jenkins)
Reviewed-by: Ronald G. Minnich <rminnich@gmail.com>
This commit is contained in:
Aaron Durbin 2013-02-13 11:22:25 -06:00 committed by Ronald G. Minnich
parent 159f2ef03a
commit 738af675d1
4 changed files with 162 additions and 36 deletions

View File

@ -144,6 +144,7 @@ int cpu_config_tdp_levels(void);
/* Returns 0 on success, < 0 on failure. */ /* Returns 0 on success, < 0 on failure. */
int smm_initialize(void); int smm_initialize(void);
void smm_initiate_relocation(void); void smm_initiate_relocation(void);
void smm_initiate_relocation_parallel(void);
struct bus; struct bus;
void bsp_init_and_start_aps(struct bus *cpu_bus); void bsp_init_and_start_aps(struct bus *cpu_bus);
/* Returns 0 on succes. < 0 on failure. */ /* Returns 0 on succes. < 0 on failure. */
@ -151,7 +152,7 @@ int setup_ap_init(struct bus *cpu_bus, int *max_cpus,
const void *microcode_patch); const void *microcode_patch);
/* Returns 0 on success, < 0 on failure. */ /* Returns 0 on success, < 0 on failure. */
int start_aps(struct bus *cpu_bus, int max_cpus); int start_aps(struct bus *cpu_bus, int max_cpus);
void release_aps_for_smm_relocation(void); void release_aps_for_smm_relocation(int do_parallel_relocation);
#endif #endif
#endif #endif

View File

@ -549,9 +549,6 @@ void bsp_init_and_start_aps(struct bus *cpu_bus)
return; return;
} }
/* Release APs to perform SMM relocation. */
release_aps_for_smm_relocation();
/* After SMM relocation a 2nd microcode load is required. */ /* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_patch); intel_microcode_load_unlocked(microcode_patch);
} }

View File

@ -75,9 +75,16 @@ static device_t cpu_devs[CONFIG_MAX_CPUS];
/* Number of APs checked that have checked in. */ /* Number of APs checked that have checked in. */
static atomic_t num_aps; static atomic_t num_aps;
/* Number of APs that have relocated their SMM handler. */
static atomic_t num_aps_relocated_smm;
/* Barrier to stop APs from performing SMM relcoation. */ /* Barrier to stop APs from performing SMM relcoation. */
static int smm_relocation_barrier_begin __attribute__ ((aligned (64))); static int smm_relocation_barrier_begin __attribute__ ((aligned (64)));
static inline void mfence(void)
{
__asm__ __volatile__("mfence\t\n": : :"memory");
}
static inline void wait_for_barrier(volatile int *barrier) static inline void wait_for_barrier(volatile int *barrier)
{ {
while (*barrier == 0) { while (*barrier == 0) {
@ -95,13 +102,18 @@ static void ap_wait_for_smm_relocation_begin(void)
wait_for_barrier(&smm_relocation_barrier_begin); wait_for_barrier(&smm_relocation_barrier_begin);
} }
/* This function pointer is used by the non-BSP CPUs to initiate relocation. It
* points to either a serial or parallel SMM initiation. */
static void (*ap_initiate_smm_relocation)(void) = &smm_initiate_relocation;
/* Returns 1 if timeout waiting for APs. 0 if target aps found. */ /* Returns 1 if timeout waiting for APs. 0 if target aps found. */
static int wait_for_aps(int target, int total_delay, int delay_step) static int wait_for_aps(atomic_t *val, int target, int total_delay,
int delay_step)
{ {
int timeout = 0; int timeout = 0;
int delayed = 0; int delayed = 0;
while (atomic_read(&num_aps) != target) { while (atomic_read(val) != target) {
udelay(delay_step); udelay(delay_step);
delayed += delay_step; delayed += delay_step;
if (delayed >= total_delay) { if (delayed >= total_delay) {
@ -113,9 +125,19 @@ static int wait_for_aps(int target, int total_delay, int delay_step)
return timeout; return timeout;
} }
void release_aps_for_smm_relocation(void) void release_aps_for_smm_relocation(int do_parallel)
{ {
/* Change the AP SMM initiation function, and ensure it is visible
* before releasing the APs. */
if (do_parallel) {
ap_initiate_smm_relocation = &smm_initiate_relocation_parallel;
mfence();
}
release_barrier(&smm_relocation_barrier_begin); release_barrier(&smm_relocation_barrier_begin);
/* Wait for CPUs to relocate their SMM handler up to 100ms. */
if (wait_for_aps(&num_aps_relocated_smm, atomic_read(&num_aps),
100000 /* 100 ms */, 200 /* us */))
printk(BIOS_DEBUG, "Timed out waiting for AP SMM relocation\n");
} }
/* The mtrr code sets up ROM caching on the BSP, but not the others. However, /* The mtrr code sets up ROM caching on the BSP, but not the others. However,
@ -172,7 +194,10 @@ ap_init(unsigned int cpu, void *microcode_ptr)
ap_wait_for_smm_relocation_begin(); ap_wait_for_smm_relocation_begin();
smm_initiate_relocation(); ap_initiate_smm_relocation();
/* Indicate that SMM relocation has occured on this thread. */
atomic_inc(&num_aps_relocated_smm);
/* After SMM relocation a 2nd microcode load is required. */ /* After SMM relocation a 2nd microcode load is required. */
intel_microcode_load_unlocked(microcode_ptr); intel_microcode_load_unlocked(microcode_ptr);
@ -483,7 +508,7 @@ int start_aps(struct bus *cpu_bus, int ap_count)
printk(BIOS_DEBUG, "done.\n"); printk(BIOS_DEBUG, "done.\n");
} }
/* Wait for CPUs to check in up to 200 us. */ /* Wait for CPUs to check in up to 200 us. */
wait_for_aps(ap_count, 200 /* us */, 15 /* us */); wait_for_aps(&num_aps, ap_count, 200 /* us */, 15 /* us */);
/* Send 2nd SIPI */ /* Send 2nd SIPI */
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
@ -507,7 +532,7 @@ int start_aps(struct bus *cpu_bus, int ap_count)
} }
/* Wait for CPUs to check in. */ /* Wait for CPUs to check in. */
if (wait_for_aps(ap_count, 10000 /* 10 ms */, 50 /* us */)) { if (wait_for_aps(&num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) {
printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n", printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n",
atomic_read(&num_aps), ap_count); atomic_read(&num_aps), ap_count);
return -1; return -1;
@ -516,17 +541,12 @@ int start_aps(struct bus *cpu_bus, int ap_count)
return 0; return 0;
} }
DECLARE_SPIN_LOCK(smm_relocation_lock); void smm_initiate_relocation_parallel(void)
void smm_initiate_relocation(void)
{ {
spin_lock(&smm_relocation_lock);
if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
if (apic_wait_timeout(1000 /* 1 ms */, 50)) { if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
printk(BIOS_DEBUG, "timed out. Aborting.\n"); printk(BIOS_DEBUG, "timed out. Aborting.\n");
spin_unlock(&smm_relocation_lock);
return; return;
} else } else
printk(BIOS_DEBUG, "done.\n"); printk(BIOS_DEBUG, "done.\n");
@ -539,6 +559,14 @@ void smm_initiate_relocation(void)
} else } else
printk(BIOS_DEBUG, "Relocation complete.\n"); printk(BIOS_DEBUG, "Relocation complete.\n");
}
DECLARE_SPIN_LOCK(smm_relocation_lock);
void smm_initiate_relocation(void)
{
spin_lock(&smm_relocation_lock);
smm_initiate_relocation_parallel();
spin_unlock(&smm_relocation_lock); spin_unlock(&smm_relocation_lock);
} }

View File

@ -36,6 +36,14 @@
#define EMRRphysMask_MSR 0x1f5 #define EMRRphysMask_MSR 0x1f5
#define UNCORE_EMRRphysBase_MSR 0x2f4 #define UNCORE_EMRRphysBase_MSR 0x2f4
#define UNCORE_EMRRphysMask_MSR 0x2f5 #define UNCORE_EMRRphysMask_MSR 0x2f5
#define SMM_MCA_CAP_MSR 0x17d
#define SMM_CPU_SVRSTR_BIT 57
#define SMM_CPU_SVRSTR_MASK (1 << (SMM_CPU_SVRSTR_BIT - 32))
#define SMM_FEATURE_CONTROL_MSR 0x4e0
#define SMM_CPU_SAVE_EN (1 << 1)
/* SMM save state MSRs */
#define SMBASE_MSR 0xc20
#define IEDBASE_MSR 0xc22
#define SMRR_SUPPORTED (1<<11) #define SMRR_SUPPORTED (1<<11)
#define EMRR_SUPPORTED (1<<12) #define EMRR_SUPPORTED (1<<12)
@ -51,6 +59,10 @@ struct smm_relocation_params {
msr_t emrr_mask; msr_t emrr_mask;
msr_t uncore_emrr_base; msr_t uncore_emrr_base;
msr_t uncore_emrr_mask; msr_t uncore_emrr_mask;
/* The smm_save_state_in_msrs field indicates if SMM save state
* locations live in MSRs. This indicates to the CPUs how to adjust
* the SMMBASE and IEDBASE */
int smm_save_state_in_msrs;
}; };
/* This gets filled in and used during relocation. */ /* This gets filled in and used during relocation. */
@ -82,13 +94,79 @@ static inline void write_uncore_emrr(struct smm_relocation_params *relo_params)
wrmsr(UNCORE_EMRRphysMask_MSR, relo_params->uncore_emrr_mask); wrmsr(UNCORE_EMRRphysMask_MSR, relo_params->uncore_emrr_mask);
} }
static void update_save_state(int cpu,
struct smm_relocation_params *relo_params,
const struct smm_runtime *runtime)
{
u32 smbase;
u32 iedbase;
/* The relocated handler runs with all CPUs concurrently. Therefore
* stagger the entry points adjusting SMBASE downwards by save state
* size * CPU num. */
smbase = relo_params->smram_base - cpu * runtime->save_state_size;
iedbase = relo_params->ied_base;
printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x\n",
smbase, iedbase);
/* All threads need to set IEDBASE and SMBASE to the relocated
* handler region. However, the save state location depends on the
* smm_save_state_in_msrs field in the relocation parameters. If
* smm_save_state_in_msrs is non-zero then the CPUs are relocating
* the SMM handler in parallel, and each CPUs save state area is
* located in their respective MSR space. If smm_save_state_in_msrs
* is zero then the SMM relocation is happening serially so the
* save state is at the same default location for all CPUs. */
if (relo_params->smm_save_state_in_msrs) {
msr_t smbase_msr;
msr_t iedbase_msr;
smbase_msr.lo = smbase;
smbase_msr.hi = 0;
/* According the BWG the IEDBASE MSR is in bits 63:32. It's
* not clear why it differs from the SMBASE MSR. */
iedbase_msr.lo = 0;
iedbase_msr.hi = iedbase;
wrmsr(SMBASE_MSR, smbase_msr);
wrmsr(IEDBASE_MSR, iedbase_msr);
} else {
em64t101_smm_state_save_area_t *save_state;
save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE -
runtime->save_state_size);
save_state->smbase = smbase;
save_state->iedbase = iedbase;
}
}
/* Returns 1 if SMM MSR save state was set. */
static int bsp_setup_msr_save_state(struct smm_relocation_params *relo_params)
{
msr_t smm_mca_cap;
smm_mca_cap = rdmsr(SMM_MCA_CAP_MSR);
if (smm_mca_cap.hi & SMM_CPU_SVRSTR_MASK) {
msr_t smm_feature_control;
smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR);
smm_feature_control.hi = 0;
smm_feature_control.lo |= SMM_CPU_SAVE_EN;
wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control);
relo_params->smm_save_state_in_msrs = 1;
}
return relo_params->smm_save_state_in_msrs;
}
/* The relocation work is actually performed in SMM context, but the code /* The relocation work is actually performed in SMM context, but the code
* resides in the ramstage module. This occurs by trampolining from the default * resides in the ramstage module. This occurs by trampolining from the default
* SMRAM entry point to here. */ * SMRAM entry point to here. */
static void __attribute__((cdecl)) static void __attribute__((cdecl))
cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime) cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
{ {
em64t101_smm_state_save_area_t *save_state;
msr_t mtrr_cap; msr_t mtrr_cap;
struct smm_relocation_params *relo_params = arg; struct smm_relocation_params *relo_params = arg;
@ -100,21 +178,32 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
printk(BIOS_DEBUG, "In relocation handler: cpu %d\n", cpu); printk(BIOS_DEBUG, "In relocation handler: cpu %d\n", cpu);
/* All threads need to set IEDBASE and SMBASE in the save state area. /* Determine if the processor supports saving state in MSRs. If so,
* Since one thread runs at a time during the relocation the save state * enable it before the non-BSPs run so that SMM relocation can occur
* is the same for all cpus. */ * in parallel in the non-BSP CPUs. */
save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE - if (cpu == 0) {
runtime->save_state_size); /* If smm_save_state_in_msrs is 1 then that means this is the
* 2nd time through the relocation handler for the BSP.
* Parallel SMM handler relocation is taking place. However,
* it is desired to access other CPUs save state in the real
* SMM handler. Therefore, disable the SMM save state in MSRs
* feature. */
if (relo_params->smm_save_state_in_msrs) {
msr_t smm_feature_control;
/* The relocated handler runs with all CPUs concurrently. Therefore smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR);
* stagger the entry points adjusting SMBASE downwards by save state smm_feature_control.lo &= ~SMM_CPU_SAVE_EN;
* size * CPU num. */ wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control);
save_state->smbase = relo_params->smram_base - } else if (bsp_setup_msr_save_state(relo_params))
cpu * runtime->save_state_size; /* Just return from relocation handler if MSR save
save_state->iedbase = relo_params->ied_base; * state is enabled. In that case the BSP will come
* back into the relocation handler to setup the new
* SMBASE as well disabling SMM save state in MSRs. */
return;
}
printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x @ %p\n", /* Make appropriate changes to the save state map. */
save_state->smbase, save_state->iedbase, save_state); update_save_state(cpu, relo_params, runtime);
/* Write EMRR and SMRR MSRs based on indicated support. */ /* Write EMRR and SMRR MSRs based on indicated support. */
mtrr_cap = rdmsr(MTRRcap_MSR); mtrr_cap = rdmsr(MTRRcap_MSR);
@ -128,8 +217,6 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime)
if (cpu == 0) if (cpu == 0)
write_uncore_emrr(relo_params); write_uncore_emrr(relo_params);
} }
southbridge_clear_smi_status();
} }
static u32 northbridge_get_base_reg(device_t dev, int reg) static u32 northbridge_get_base_reg(device_t dev, int reg)
@ -199,10 +286,12 @@ static void fill_in_relocation_params(device_t dev,
static int install_relocation_handler(int num_cpus, static int install_relocation_handler(int num_cpus,
struct smm_relocation_params *relo_params) struct smm_relocation_params *relo_params)
{ {
/* The default SMM entry happens serially at the default location. /* The default SMM entry can happen in parallel or serially. If the
* Therefore, there is only 1 concurrent save state area. Set the * default SMM entry is done in parallel the BSP has already setup
* stack size to the save state size, and call into the * the saving state to each CPU's MSRs. At least one save state size
* do_relocation handler. */ * is required for the initial SMM entry for the BSP to determine if
* parallel SMM relocation is even feasible. Set the stack size to
* the save state size, and call into the do_relocation handler. */
int save_state_size = sizeof(em64t101_smm_state_save_area_t); int save_state_size = sizeof(em64t101_smm_state_save_area_t);
struct smm_loader_params smm_params = { struct smm_loader_params smm_params = {
.per_cpu_stack_size = save_state_size, .per_cpu_stack_size = save_state_size,
@ -309,6 +398,17 @@ int smm_initialize(void)
/* Run the relocation handler. */ /* Run the relocation handler. */
smm_initiate_relocation(); smm_initiate_relocation();
/* If smm_save_state_in_msrs is non-zero then parallel SMM relocation
* shall take place. Run the relocation handler a second time to do
* the final move. */
if (smm_reloc_params.smm_save_state_in_msrs) {
printk(BIOS_DEBUG, "Doing parallel SMM relocation.\n");
release_aps_for_smm_relocation(1);
smm_initiate_relocation_parallel();
} else {
release_aps_for_smm_relocation(0);
}
/* Lock down the SMRAM space. */ /* Lock down the SMRAM space. */
smm_lock(); smm_lock();