soc/intel/xeon_sp: Don't sort struct device cpus for numa

Currently the xeon_sp code reassigns struct devices apic_id so that srat
entries can be added in a certain order.

This is not a good idea as it breaks thread local storage which contains
a pointer to its struct device cpu.

This moves the sorting of the lapic_ids to the srat table generation
and adds the numa node id in each core init entry. Now it is done in
parallel too as a bonus.

Change-Id: I372bcea1932d28e9bf712cc712f19a76fe3199b1
Signed-off-by: Arthur Heymans <arthur@aheymans.xyz>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/68912
Reviewed-by: Patrick Rudolph <siro@das-labor.org>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
This commit is contained in:
Arthur Heymans 2022-10-27 15:11:05 +02:00 committed by Lean Sheng Tan
parent 4e498e169e
commit 36e6f9bc04
5 changed files with 39 additions and 120 deletions

View File

@ -78,8 +78,9 @@ static void each_cpu_init(struct device *cpu)
{
msr_t msr;
printk(BIOS_SPEW, "%s dev: %s, cpu: %lu, apic_id: 0x%x\n",
__func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id);
printk(BIOS_SPEW, "%s dev: %s, cpu: %lu, apic_id: 0x%x, package_id: 0x%x\n",
__func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id,
cpu->path.apic.package_id);
/*
* Set HWP base feature, EPP reg enumeration, lock thermal and msr
@ -227,7 +228,4 @@ void mp_init_cpus(struct bus *bus)
* rest of the CPU devices do not have chip_info updated.
*/
chip_config = bus->dev->chip_info;
/* update numa domain for all cpu devices */
xeonsp_init_cpu_config();
}

View File

@ -13,7 +13,6 @@ msr_t read_msr_ppin(void);
int get_platform_thread_count(void);
const IIO_UDS *get_iio_uds(void);
unsigned int soc_get_num_cpus(void);
void xeonsp_init_cpu_config(void);
void set_bios_init_completion(void);
uint8_t soc_get_iio_ioapicid(int socket, int stack);

View File

@ -59,8 +59,9 @@ static void xeon_sp_core_init(struct device *cpu)
{
msr_t msr;
printk(BIOS_INFO, "%s dev: %s, cpu: %lu, apic_id: 0x%x\n",
__func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id);
printk(BIOS_INFO, "%s dev: %s, cpu: %lu, apic_id: 0x%x, package_id: 0x%x\n",
__func__, dev_path(cpu), cpu_index(), cpu->path.apic.apic_id,
cpu->path.apic.package_id);
assert(chip_config);
/* set MSR_PKG_CST_CONFIG_CONTROL - scope per core*/
@ -247,8 +248,5 @@ void mp_init_cpus(struct bus *bus)
/* TODO: Handle mp_init_with_smm failure? */
mp_init_with_smm(bus, &mp_ops);
/* update numa domain for all cpu devices */
xeonsp_init_cpu_config();
FUNC_EXIT();
}

View File

@ -6,6 +6,7 @@
#include <assert.h>
#include <cbmem.h>
#include <cpu/x86/lapic.h>
#include <commonlib/sort.h>
#include <device/mmio.h>
#include <device/pci.h>
#include <device/pciexp.h>
@ -22,29 +23,54 @@
/* NUMA related ACPI table generation. SRAT, SLIT, etc */
/* Increase if necessary. Currently all x86 CPUs only have 2 SMP threads */
#define MAX_THREAD 2
unsigned long acpi_create_srat_lapics(unsigned long current)
{
struct device *cpu;
unsigned int cpu_index = 0;
unsigned int num_cpus = 0;
int apic_ids[CONFIG_MAX_CPUS] = {};
for (cpu = all_devices; cpu; cpu = cpu->next) {
if (!is_enabled_cpu(cpu))
unsigned int sort_start = 0;
for (unsigned int thread_id = 0; thread_id < MAX_THREAD; thread_id++) {
for (cpu = all_devices; cpu; cpu = cpu->next) {
if (!is_enabled_cpu(cpu))
continue;
if (num_cpus >= ARRAY_SIZE(apic_ids))
break;
if (cpu->path.apic.thread_id != thread_id)
continue;
apic_ids[num_cpus++] = cpu->path.apic.apic_id;
}
bubblesort(&apic_ids[sort_start], num_cpus - sort_start, NUM_ASCENDING);
sort_start = num_cpus;
}
for (unsigned int i = 0; i < num_cpus; i++) {
/* Match the sorted apic_ids to a struct device */
for (cpu = all_devices; cpu; cpu = cpu->next) {
if (!is_enabled_cpu(cpu))
continue;
if (cpu->path.apic.apic_id == apic_ids[i])
break;
}
if (!cpu)
continue;
if (is_x2apic_mode()) {
printk(BIOS_DEBUG, "SRAT: x2apic cpu_index=%08x, node_id=%02x, apic_id=%08x\n",
cpu_index, cpu->path.apic.node_id, cpu->path.apic.apic_id);
printk(BIOS_DEBUG, "SRAT: x2apic cpu_index=%04x, node_id=%02x, apic_id=%08x\n",
i, cpu->path.apic.node_id, cpu->path.apic.apic_id);
current += acpi_create_srat_x2apic((acpi_srat_x2apic_t *)current,
cpu->path.apic.node_id, cpu->path.apic.apic_id);
} else {
printk(BIOS_DEBUG, "SRAT: lapic cpu_index=%02x, node_id=%02x, apic_id=%02x\n",
cpu_index, cpu->path.apic.node_id, cpu->path.apic.apic_id);
i, cpu->path.apic.node_id, cpu->path.apic.apic_id);
current += acpi_create_srat_lapic((acpi_srat_lapic_t *)current,
cpu->path.apic.node_id, cpu->path.apic.apic_id);
}
cpu_index++;
}
return current;
}

View File

@ -125,108 +125,6 @@ unsigned int soc_get_num_cpus(void)
}
#if ENV_RAMSTAGE /* Setting devtree variables is only allowed in ramstage. */
static void get_core_thread_bits(uint32_t *core_bits, uint32_t *thread_bits)
{
register int ecx;
struct cpuid_result cpuid_regs;
/* get max index of CPUID */
cpuid_regs = cpuid(0);
assert(cpuid_regs.eax >= 0xb); /* cpuid_regs.eax is max input value for cpuid */
*thread_bits = *core_bits = 0;
ecx = 0;
while (1) {
cpuid_regs = cpuid_ext(0xb, ecx);
if (ecx == 0) {
*thread_bits = (cpuid_regs.eax & 0x1f);
} else {
*core_bits = (cpuid_regs.eax & 0x1f) - *thread_bits;
break;
}
ecx++;
}
}
static void get_cpu_info_from_apicid(uint32_t apicid, uint32_t core_bits, uint32_t thread_bits,
uint8_t *package, uint8_t *core, uint8_t *thread)
{
if (package)
*package = (apicid >> (thread_bits + core_bits));
if (core)
*core = (uint32_t)((apicid >> thread_bits) & ~((~0) << core_bits));
if (thread)
*thread = (uint32_t)(apicid & ~((~0) << thread_bits));
}
void xeonsp_init_cpu_config(void)
{
struct device *dev;
int apic_ids[CONFIG_MAX_CPUS] = {0}, apic_ids_by_thread[CONFIG_MAX_CPUS] = {0};
int num_apics = 0;
uint32_t core_bits, thread_bits;
unsigned int core_count, thread_count;
unsigned int num_sockets;
/*
* sort APIC ids in ascending order to identify apicid ranges for
* each numa domain
*/
for (dev = all_devices; dev; dev = dev->next) {
if ((dev->path.type != DEVICE_PATH_APIC) ||
(dev->bus->dev->path.type != DEVICE_PATH_CPU_CLUSTER)) {
continue;
}
if (!dev->enabled)
continue;
if (num_apics >= ARRAY_SIZE(apic_ids))
break;
apic_ids[num_apics++] = dev->path.apic.apic_id;
}
if (num_apics > 1)
bubblesort(apic_ids, num_apics, NUM_ASCENDING);
num_sockets = soc_get_num_cpus();
cpu_read_topology(&core_count, &thread_count);
assert(num_apics == (num_sockets * thread_count));
/* sort them by thread i.e., all cores with thread 0 and then thread 1 */
int index = 0;
for (int id = 0; id < num_apics; ++id) {
int apic_id = apic_ids[id];
if (apic_id & 0x1) { /* 2nd thread */
apic_ids_by_thread[index + (num_apics/2) - 1] = apic_id;
} else { /* 1st thread */
apic_ids_by_thread[index++] = apic_id;
}
}
/* update apic_id, node_id in sorted order */
num_apics = 0;
get_core_thread_bits(&core_bits, &thread_bits);
for (dev = all_devices; dev; dev = dev->next) {
uint8_t package;
if ((dev->path.type != DEVICE_PATH_APIC) ||
(dev->bus->dev->path.type != DEVICE_PATH_CPU_CLUSTER)) {
continue;
}
if (!dev->enabled)
continue;
if (num_apics >= ARRAY_SIZE(apic_ids))
break;
dev->path.apic.apic_id = apic_ids_by_thread[num_apics];
get_cpu_info_from_apicid(dev->path.apic.apic_id, core_bits, thread_bits,
&package, NULL, NULL);
dev->path.apic.node_id = package;
printk(BIOS_DEBUG, "CPU %d apic_id: 0x%x (%d), node_id: 0x%x\n",
num_apics, dev->path.apic.apic_id,
dev->path.apic.apic_id, dev->path.apic.node_id);
++num_apics;
}
}
/* return true if command timed out else false */
static bool wait_for_bios_cmd_cpl(pci_devfn_t dev, uint32_t reg, uint32_t mask,
uint32_t target)