diff --git a/src/cpu/intel/haswell/haswell.h b/src/cpu/intel/haswell/haswell.h index 26807e9f3a..a510e7de58 100644 --- a/src/cpu/intel/haswell/haswell.h +++ b/src/cpu/intel/haswell/haswell.h @@ -144,6 +144,7 @@ int cpu_config_tdp_levels(void); /* Returns 0 on success, < 0 on failure. */ int smm_initialize(void); void smm_initiate_relocation(void); +void smm_initiate_relocation_parallel(void); struct bus; void bsp_init_and_start_aps(struct bus *cpu_bus); /* Returns 0 on succes. < 0 on failure. */ @@ -151,7 +152,7 @@ int setup_ap_init(struct bus *cpu_bus, int *max_cpus, const void *microcode_patch); /* Returns 0 on success, < 0 on failure. */ int start_aps(struct bus *cpu_bus, int max_cpus); -void release_aps_for_smm_relocation(void); +void release_aps_for_smm_relocation(int do_parallel_relocation); #endif #endif diff --git a/src/cpu/intel/haswell/haswell_init.c b/src/cpu/intel/haswell/haswell_init.c index 82430b750d..c7f89ee646 100644 --- a/src/cpu/intel/haswell/haswell_init.c +++ b/src/cpu/intel/haswell/haswell_init.c @@ -549,9 +549,6 @@ void bsp_init_and_start_aps(struct bus *cpu_bus) return; } - /* Release APs to perform SMM relocation. */ - release_aps_for_smm_relocation(); - /* After SMM relocation a 2nd microcode load is required. */ intel_microcode_load_unlocked(microcode_patch); } diff --git a/src/cpu/intel/haswell/mp_init.c b/src/cpu/intel/haswell/mp_init.c index 7f15c391d7..c8bd5c22be 100644 --- a/src/cpu/intel/haswell/mp_init.c +++ b/src/cpu/intel/haswell/mp_init.c @@ -75,9 +75,16 @@ static device_t cpu_devs[CONFIG_MAX_CPUS]; /* Number of APs checked that have checked in. */ static atomic_t num_aps; +/* Number of APs that have relocated their SMM handler. */ +static atomic_t num_aps_relocated_smm; /* Barrier to stop APs from performing SMM relcoation. */ static int smm_relocation_barrier_begin __attribute__ ((aligned (64))); +static inline void mfence(void) +{ + __asm__ __volatile__("mfence\t\n": : :"memory"); +} + static inline void wait_for_barrier(volatile int *barrier) { while (*barrier == 0) { @@ -95,13 +102,18 @@ static void ap_wait_for_smm_relocation_begin(void) wait_for_barrier(&smm_relocation_barrier_begin); } +/* This function pointer is used by the non-BSP CPUs to initiate relocation. It + * points to either a serial or parallel SMM initiation. */ +static void (*ap_initiate_smm_relocation)(void) = &smm_initiate_relocation; + /* Returns 1 if timeout waiting for APs. 0 if target aps found. */ -static int wait_for_aps(int target, int total_delay, int delay_step) +static int wait_for_aps(atomic_t *val, int target, int total_delay, + int delay_step) { int timeout = 0; int delayed = 0; - while (atomic_read(&num_aps) != target) { + while (atomic_read(val) != target) { udelay(delay_step); delayed += delay_step; if (delayed >= total_delay) { @@ -113,9 +125,19 @@ static int wait_for_aps(int target, int total_delay, int delay_step) return timeout; } -void release_aps_for_smm_relocation(void) +void release_aps_for_smm_relocation(int do_parallel) { + /* Change the AP SMM initiation function, and ensure it is visible + * before releasing the APs. */ + if (do_parallel) { + ap_initiate_smm_relocation = &smm_initiate_relocation_parallel; + mfence(); + } release_barrier(&smm_relocation_barrier_begin); + /* Wait for CPUs to relocate their SMM handler up to 100ms. */ + if (wait_for_aps(&num_aps_relocated_smm, atomic_read(&num_aps), + 100000 /* 100 ms */, 200 /* us */)) + printk(BIOS_DEBUG, "Timed out waiting for AP SMM relocation\n"); } /* The mtrr code sets up ROM caching on the BSP, but not the others. However, @@ -172,7 +194,10 @@ ap_init(unsigned int cpu, void *microcode_ptr) ap_wait_for_smm_relocation_begin(); - smm_initiate_relocation(); + ap_initiate_smm_relocation(); + + /* Indicate that SMM relocation has occured on this thread. */ + atomic_inc(&num_aps_relocated_smm); /* After SMM relocation a 2nd microcode load is required. */ intel_microcode_load_unlocked(microcode_ptr); @@ -483,7 +508,7 @@ int start_aps(struct bus *cpu_bus, int ap_count) printk(BIOS_DEBUG, "done.\n"); } /* Wait for CPUs to check in up to 200 us. */ - wait_for_aps(ap_count, 200 /* us */, 15 /* us */); + wait_for_aps(&num_aps, ap_count, 200 /* us */, 15 /* us */); /* Send 2nd SIPI */ if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { @@ -507,7 +532,7 @@ int start_aps(struct bus *cpu_bus, int ap_count) } /* Wait for CPUs to check in. */ - if (wait_for_aps(ap_count, 10000 /* 10 ms */, 50 /* us */)) { + if (wait_for_aps(&num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) { printk(BIOS_DEBUG, "Not all APs checked in: %d/%d.\n", atomic_read(&num_aps), ap_count); return -1; @@ -516,17 +541,12 @@ int start_aps(struct bus *cpu_bus, int ap_count) return 0; } -DECLARE_SPIN_LOCK(smm_relocation_lock); - -void smm_initiate_relocation(void) +void smm_initiate_relocation_parallel(void) { - spin_lock(&smm_relocation_lock); - if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) { printk(BIOS_DEBUG, "Waiting for ICR not to be busy..."); if (apic_wait_timeout(1000 /* 1 ms */, 50)) { printk(BIOS_DEBUG, "timed out. Aborting.\n"); - spin_unlock(&smm_relocation_lock); return; } else printk(BIOS_DEBUG, "done.\n"); @@ -539,6 +559,14 @@ void smm_initiate_relocation(void) } else printk(BIOS_DEBUG, "Relocation complete.\n"); +} + +DECLARE_SPIN_LOCK(smm_relocation_lock); + +void smm_initiate_relocation(void) +{ + spin_lock(&smm_relocation_lock); + smm_initiate_relocation_parallel(); spin_unlock(&smm_relocation_lock); } diff --git a/src/cpu/intel/haswell/smmrelocate.c b/src/cpu/intel/haswell/smmrelocate.c index 2bf304ebbf..2a322a7f9a 100644 --- a/src/cpu/intel/haswell/smmrelocate.c +++ b/src/cpu/intel/haswell/smmrelocate.c @@ -36,6 +36,14 @@ #define EMRRphysMask_MSR 0x1f5 #define UNCORE_EMRRphysBase_MSR 0x2f4 #define UNCORE_EMRRphysMask_MSR 0x2f5 +#define SMM_MCA_CAP_MSR 0x17d +#define SMM_CPU_SVRSTR_BIT 57 +#define SMM_CPU_SVRSTR_MASK (1 << (SMM_CPU_SVRSTR_BIT - 32)) +#define SMM_FEATURE_CONTROL_MSR 0x4e0 +#define SMM_CPU_SAVE_EN (1 << 1) +/* SMM save state MSRs */ +#define SMBASE_MSR 0xc20 +#define IEDBASE_MSR 0xc22 #define SMRR_SUPPORTED (1<<11) #define EMRR_SUPPORTED (1<<12) @@ -51,6 +59,10 @@ struct smm_relocation_params { msr_t emrr_mask; msr_t uncore_emrr_base; msr_t uncore_emrr_mask; + /* The smm_save_state_in_msrs field indicates if SMM save state + * locations live in MSRs. This indicates to the CPUs how to adjust + * the SMMBASE and IEDBASE */ + int smm_save_state_in_msrs; }; /* This gets filled in and used during relocation. */ @@ -82,13 +94,79 @@ static inline void write_uncore_emrr(struct smm_relocation_params *relo_params) wrmsr(UNCORE_EMRRphysMask_MSR, relo_params->uncore_emrr_mask); } +static void update_save_state(int cpu, + struct smm_relocation_params *relo_params, + const struct smm_runtime *runtime) +{ + u32 smbase; + u32 iedbase; + + /* The relocated handler runs with all CPUs concurrently. Therefore + * stagger the entry points adjusting SMBASE downwards by save state + * size * CPU num. */ + smbase = relo_params->smram_base - cpu * runtime->save_state_size; + iedbase = relo_params->ied_base; + + printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x\n", + smbase, iedbase); + + /* All threads need to set IEDBASE and SMBASE to the relocated + * handler region. However, the save state location depends on the + * smm_save_state_in_msrs field in the relocation parameters. If + * smm_save_state_in_msrs is non-zero then the CPUs are relocating + * the SMM handler in parallel, and each CPUs save state area is + * located in their respective MSR space. If smm_save_state_in_msrs + * is zero then the SMM relocation is happening serially so the + * save state is at the same default location for all CPUs. */ + if (relo_params->smm_save_state_in_msrs) { + msr_t smbase_msr; + msr_t iedbase_msr; + + smbase_msr.lo = smbase; + smbase_msr.hi = 0; + + /* According the BWG the IEDBASE MSR is in bits 63:32. It's + * not clear why it differs from the SMBASE MSR. */ + iedbase_msr.lo = 0; + iedbase_msr.hi = iedbase; + + wrmsr(SMBASE_MSR, smbase_msr); + wrmsr(IEDBASE_MSR, iedbase_msr); + } else { + em64t101_smm_state_save_area_t *save_state; + + save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE - + runtime->save_state_size); + + save_state->smbase = smbase; + save_state->iedbase = iedbase; + } +} + +/* Returns 1 if SMM MSR save state was set. */ +static int bsp_setup_msr_save_state(struct smm_relocation_params *relo_params) +{ + msr_t smm_mca_cap; + + smm_mca_cap = rdmsr(SMM_MCA_CAP_MSR); + if (smm_mca_cap.hi & SMM_CPU_SVRSTR_MASK) { + msr_t smm_feature_control; + + smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR); + smm_feature_control.hi = 0; + smm_feature_control.lo |= SMM_CPU_SAVE_EN; + wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control); + relo_params->smm_save_state_in_msrs = 1; + } + return relo_params->smm_save_state_in_msrs; +} + /* The relocation work is actually performed in SMM context, but the code * resides in the ramstage module. This occurs by trampolining from the default * SMRAM entry point to here. */ static void __attribute__((cdecl)) cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime) { - em64t101_smm_state_save_area_t *save_state; msr_t mtrr_cap; struct smm_relocation_params *relo_params = arg; @@ -100,21 +178,32 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime) printk(BIOS_DEBUG, "In relocation handler: cpu %d\n", cpu); - /* All threads need to set IEDBASE and SMBASE in the save state area. - * Since one thread runs at a time during the relocation the save state - * is the same for all cpus. */ - save_state = (void *)(runtime->smbase + SMM_DEFAULT_SIZE - - runtime->save_state_size); + /* Determine if the processor supports saving state in MSRs. If so, + * enable it before the non-BSPs run so that SMM relocation can occur + * in parallel in the non-BSP CPUs. */ + if (cpu == 0) { + /* If smm_save_state_in_msrs is 1 then that means this is the + * 2nd time through the relocation handler for the BSP. + * Parallel SMM handler relocation is taking place. However, + * it is desired to access other CPUs save state in the real + * SMM handler. Therefore, disable the SMM save state in MSRs + * feature. */ + if (relo_params->smm_save_state_in_msrs) { + msr_t smm_feature_control; - /* The relocated handler runs with all CPUs concurrently. Therefore - * stagger the entry points adjusting SMBASE downwards by save state - * size * CPU num. */ - save_state->smbase = relo_params->smram_base - - cpu * runtime->save_state_size; - save_state->iedbase = relo_params->ied_base; + smm_feature_control = rdmsr(SMM_FEATURE_CONTROL_MSR); + smm_feature_control.lo &= ~SMM_CPU_SAVE_EN; + wrmsr(SMM_FEATURE_CONTROL_MSR, smm_feature_control); + } else if (bsp_setup_msr_save_state(relo_params)) + /* Just return from relocation handler if MSR save + * state is enabled. In that case the BSP will come + * back into the relocation handler to setup the new + * SMBASE as well disabling SMM save state in MSRs. */ + return; + } - printk(BIOS_DEBUG, "New SMBASE=0x%08x IEDBASE=0x%08x @ %p\n", - save_state->smbase, save_state->iedbase, save_state); + /* Make appropriate changes to the save state map. */ + update_save_state(cpu, relo_params, runtime); /* Write EMRR and SMRR MSRs based on indicated support. */ mtrr_cap = rdmsr(MTRRcap_MSR); @@ -128,8 +217,6 @@ cpu_smm_do_relocation(void *arg, int cpu, const struct smm_runtime *runtime) if (cpu == 0) write_uncore_emrr(relo_params); } - - southbridge_clear_smi_status(); } static u32 northbridge_get_base_reg(device_t dev, int reg) @@ -199,10 +286,12 @@ static void fill_in_relocation_params(device_t dev, static int install_relocation_handler(int num_cpus, struct smm_relocation_params *relo_params) { - /* The default SMM entry happens serially at the default location. - * Therefore, there is only 1 concurrent save state area. Set the - * stack size to the save state size, and call into the - * do_relocation handler. */ + /* The default SMM entry can happen in parallel or serially. If the + * default SMM entry is done in parallel the BSP has already setup + * the saving state to each CPU's MSRs. At least one save state size + * is required for the initial SMM entry for the BSP to determine if + * parallel SMM relocation is even feasible. Set the stack size to + * the save state size, and call into the do_relocation handler. */ int save_state_size = sizeof(em64t101_smm_state_save_area_t); struct smm_loader_params smm_params = { .per_cpu_stack_size = save_state_size, @@ -309,6 +398,17 @@ int smm_initialize(void) /* Run the relocation handler. */ smm_initiate_relocation(); + /* If smm_save_state_in_msrs is non-zero then parallel SMM relocation + * shall take place. Run the relocation handler a second time to do + * the final move. */ + if (smm_reloc_params.smm_save_state_in_msrs) { + printk(BIOS_DEBUG, "Doing parallel SMM relocation.\n"); + release_aps_for_smm_relocation(1); + smm_initiate_relocation_parallel(); + } else { + release_aps_for_smm_relocation(0); + } + /* Lock down the SMRAM space. */ smm_lock();