northbridge/amd/pi/00730F01: enable PARALLEL_MP

Disable LEGACY_SMP_INIT to enable PARALLEL_MP.
Also remove a large amount of APIC code that is now unnecessary.

TEST=Boot on PC Engines apu3

Boot time reduced from 1.707 seconds to 1.620 seconds average across
5 coldboots.

Inspired by CB:59693

Change-Id: Ib49e7d3f5956ac7831664d50db5f233b70aa54db
Signed-off-by: Michał Kopeć <michal.kopec@3mdeb.com>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/59808
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Angel Pons <th3fanbus@gmail.com>
Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
This commit is contained in:
Michał Kopeć 2021-11-30 17:40:52 +01:00 committed by Patrick Georgi
parent 5b334b88a6
commit dc35d2a693
3 changed files with 27 additions and 127 deletions

View File

@ -23,21 +23,6 @@ static void model_16_init(struct device *dev)
msr_t msr;
u32 siblings;
/*
* All cores are initialized sequentially, so the solution for APs will be created
* before they start.
*/
x86_setup_mtrrs_with_detect();
/*
* Enable ROM caching on BSP we just lost when creating MTRR solution, for faster
* execution of e.g. AmdInitLate
*/
if (boot_cpu()) {
mtrr_use_temp_range(OPTIMAL_CACHE_ROM_BASE, OPTIMAL_CACHE_ROM_SIZE,
MTRR_TYPE_WRPROT);
}
x86_mtrr_check();
/* zero the machine check error status registers */
mca_clear_status();

View File

@ -2,7 +2,6 @@
config NORTHBRIDGE_AMD_PI_00730F01
bool
select LEGACY_SMP_INIT
if NORTHBRIDGE_AMD_PI_00730F01

View File

@ -13,7 +13,7 @@
#include <string.h>
#include <stdlib.h>
#include <lib.h>
#include <cpu/cpu.h>
#include <cpu/x86/mp.h>
#include <Porting.h>
#include <AGESA.h>
#include <Topology.h>
@ -864,22 +864,7 @@ static void sysconf_init(struct device *dev) // first node
static void cpu_bus_scan(struct device *dev)
{
struct bus *cpu_bus;
struct device *dev_mc;
int i,j;
int coreid_bits;
int core_max = 0;
unsigned int ApicIdCoreIdSize;
unsigned int core_nums;
int siblings = 0;
unsigned int family;
u32 modules = 0;
int ioapic_count = 0;
/* For binaryPI there is no multiprocessor configuration, the number of
* modules will always be 1. */
modules = 1;
ioapic_count = CONFIG_NUM_OF_IOAPICS;
dev_mc = pcidev_on_root(DEV_CDB, 0);
if (!dev_mc) {
@ -887,109 +872,40 @@ static void cpu_bus_scan(struct device *dev)
die("");
}
sysconf_init(dev_mc);
/* Get Max Number of cores(MNC) */
coreid_bits = (cpuid_ecx(0x80000008) & 0x0000F000) >> 12;
core_max = 1 << (coreid_bits & 0x000F); //mnc
ApicIdCoreIdSize = ((cpuid_ecx(0x80000008)>>12) & 0xF);
if (ApicIdCoreIdSize) {
core_nums = (1 << ApicIdCoreIdSize) - 1;
} else {
core_nums = 3; //quad core
}
/* Find which cpus are present */
cpu_bus = dev->link_list;
for (i = 0; i < node_nums; i++) {
struct device *cdb_dev;
unsigned int devn;
struct bus *pbus;
devn = DEV_CDB + i;
pbus = dev_mc->bus;
/* Find the cpu's pci device */
cdb_dev = pcidev_on_root(devn, 0);
if (!cdb_dev) {
/* If I am probing things in a weird order
* ensure all of the cpu's pci devices are found.
*/
int fn;
for (fn = 0; fn <= 5; fn++) { //FBDIMM?
cdb_dev = pci_probe_dev(NULL, pbus,
PCI_DEVFN(devn, fn));
}
cdb_dev = pcidev_on_root(devn, 0);
} else {
/* Ok, We need to set the links for that device.
* otherwise the device under it will not be scanned
*/
add_more_links(cdb_dev, 4);
}
family = cpuid_eax(1);
family = (family >> 20) & 0xFF;
if (family == 1) { //f10
u32 dword;
cdb_dev = pcidev_on_root(devn, 3);
dword = pci_read_config32(cdb_dev, 0xe8);
siblings = ((dword & BIT15) >> 13) | ((dword & (BIT13 | BIT12)) >> 12);
} else if (family == 7) {//f16
cdb_dev = pcidev_on_root(devn, 5);
if (cdb_dev && cdb_dev->enabled) {
siblings = pci_read_config32(cdb_dev, 0x84);
siblings &= 0xFF;
}
} else {
siblings = 0; //default one core
}
int enable_node = cdb_dev && cdb_dev->enabled;
printk(BIOS_SPEW, "%s family%xh, core_max = 0x%x, core_nums = 0x%x, siblings = 0x%x\n",
dev_path(cdb_dev), 0x0f + family, core_max, core_nums, siblings);
for (j = 0; j <= siblings; j++) {
u32 lapicid_start = 0;
/*
* APIC ID calculation is tightly coupled with AGESA v5 code.
* This calculation MUST match the assignment calculation done
* in LocalApicInitializationAtEarly() function.
* And reference GetLocalApicIdForCore()
*
* Apply APIC enumeration rules
* For systems with >= 16 APICs, put the IO-APICs at 0..n and
* put the local-APICs at m..z
*
* This is needed because many IO-APIC devices only have 4 bits
* for their APIC id and therefore must reside at 0..15
*/
if ((node_nums * core_max) + ioapic_count >= 0x10) {
lapicid_start = (ioapic_count - 1) / core_max;
lapicid_start = (lapicid_start + 1) * core_max;
printk(BIOS_SPEW, "lpaicid_start = 0x%x ", lapicid_start);
}
u32 apic_id = (lapicid_start * (i/modules + 1)) + ((i % modules) ? (j + (siblings + 1)) : j);
printk(BIOS_SPEW, "node 0x%x core 0x%x apicid = 0x%x\n",
i, j, apic_id);
struct device *cpu = add_cpu_device(cpu_bus, apic_id, enable_node);
if (cpu)
amd_cpu_topology(cpu, i, j);
} //j
}
}
static void cpu_bus_init(struct device *dev)
static void pre_mp_init(void)
{
initialize_cpus(dev->link_list);
x86_setup_mtrrs_with_detect();
x86_mtrr_check();
}
static int get_cpu_count(void)
{
uint8_t siblings = cpuid_ecx(0x80000008) & 0xff;
return siblings + 1;
}
static const struct mp_ops mp_ops = {
.pre_mp_init = pre_mp_init,
.get_cpu_count = get_cpu_count,
};
void mp_init_cpus(struct bus *cpu_bus)
{
/* TODO: Handle mp_init_with_smm failure? */
mp_init_with_smm(cpu_bus, &mp_ops);
/* The flash is now no longer cacheable. Reset to WP for performance. */
mtrr_use_temp_range(OPTIMAL_CACHE_ROM_BASE, OPTIMAL_CACHE_ROM_SIZE,
MTRR_TYPE_WRPROT);
}
static struct device_operations cpu_bus_ops = {
.read_resources = noop_read_resources,
.set_resources = noop_set_resources,
.init = cpu_bus_init,
.init = mp_cpu_bus_init,
.scan_bus = cpu_bus_scan,
};