From d4db36e672644ac7f528d12c5ce3539725456085 Mon Sep 17 00:00:00 2001 From: Rocky Phagura Date: Sat, 3 Apr 2021 08:49:32 -0700 Subject: [PATCH] src/intel/xeon_sp: add hardware error support (HEST) This patch adds the ACPI hardware error source table (HEST) support. This involves a few different parts: (1) The ACPI HEST table which is filled with the appropriate fields (2) Reserved memory which is used by runtime SW to provide error information. OS will not accept a HEST table with this memory set to 0. The ASL code to enable APEI bit will be submitted in a separate patch. Tested on DeltaLake mainboard with following options enabled SOC_INTEL_XEON_RAS After boot to Linux, the following will show in dmesg: HEST: Table parsing has been initialized Change-Id: If76b2af153616182cc053ca878f30fe056e9c8bd Signed-off-by: Rocky Phagura Reviewed-on: https://review.coreboot.org/c/coreboot/+/52090 Tested-by: build bot (Jenkins) Reviewed-by: Arthur Heymans --- src/commonlib/include/commonlib/cbmem_id.h | 2 + .../common/block/acpi/acpi/globalnvs.asl | 1 + .../common/block/include/intelblocks/nvs.h | 1 + src/soc/intel/xeon_sp/Kconfig | 6 ++ src/soc/intel/xeon_sp/Makefile.inc | 2 + src/soc/intel/xeon_sp/include/soc/hest.h | 40 +++++++ src/soc/intel/xeon_sp/nb_acpi.c | 4 + src/soc/intel/xeon_sp/ras/Kconfig | 20 ++++ src/soc/intel/xeon_sp/ras/Makefile.inc | 3 + src/soc/intel/xeon_sp/ras/hest.c | 100 ++++++++++++++++++ 10 files changed, 179 insertions(+) create mode 100644 src/soc/intel/xeon_sp/include/soc/hest.h create mode 100644 src/soc/intel/xeon_sp/ras/Kconfig create mode 100644 src/soc/intel/xeon_sp/ras/Makefile.inc create mode 100644 src/soc/intel/xeon_sp/ras/hest.c diff --git a/src/commonlib/include/commonlib/cbmem_id.h b/src/commonlib/include/commonlib/cbmem_id.h index ae644de27c..84d0a313a6 100644 --- a/src/commonlib/include/commonlib/cbmem_id.h +++ b/src/commonlib/include/commonlib/cbmem_id.h @@ -6,6 +6,7 @@ #define CBMEM_ID_ACPI 0x41435049 #define CBMEM_ID_ACPI_BERT 0x42455254 #define CBMEM_ID_ACPI_GNVS 0x474e5653 +#define CMBMEM_ID_ACPI_HEST 0x48455354 #define CBMEM_ID_ACPI_UCSI 0x55435349 #define CBMEM_ID_AFTER_CAR 0xc4787a93 #define CBMEM_ID_AGESA_RUNTIME 0x41474553 @@ -81,6 +82,7 @@ { CBMEM_ID_ACPI, "ACPI " }, \ { CBMEM_ID_ACPI_BERT, "ACPI BERT " }, \ { CBMEM_ID_ACPI_GNVS, "ACPI GNVS " }, \ + { CMBMEM_ID_ACPI_HEST, "ACPI HEST " }, \ { CBMEM_ID_ACPI_UCSI, "ACPI UCSI " }, \ { CBMEM_ID_AGESA_RUNTIME, "AGESA RSVD " }, \ { CBMEM_ID_AFTER_CAR, "AFTER CAR " }, \ diff --git a/src/soc/intel/common/block/acpi/acpi/globalnvs.asl b/src/soc/intel/common/block/acpi/acpi/globalnvs.asl index 161381f5e5..75215f8789 100644 --- a/src/soc/intel/common/block/acpi/acpi/globalnvs.asl +++ b/src/soc/intel/common/block/acpi/acpi/globalnvs.asl @@ -24,4 +24,5 @@ Field (GNVS, ByteAcc, NoLock, Preserve) UIOR, 8, // 0x2f - UART debug controller init on S3 resume A4GB, 64, // 0x30 - 0x37 Base of above 4GB MMIO Resource A4GS, 64, // 0x38 - 0x3f Length of above 4GB MMIO Resource + , 8, // 0x40 - 0x48 Hest log buffer (used in SMM, not ASL code) } diff --git a/src/soc/intel/common/block/include/intelblocks/nvs.h b/src/soc/intel/common/block/include/intelblocks/nvs.h index c98fa012d0..89b682e95e 100644 --- a/src/soc/intel/common/block/include/intelblocks/nvs.h +++ b/src/soc/intel/common/block/include/intelblocks/nvs.h @@ -26,6 +26,7 @@ struct __packed global_nvs { u8 uior; /* 0x2f - UART debug controller init on S3 resume */ u64 a4gb; /* 0x30 - 0x37 Base of above 4GB MMIO Resource */ u64 a4gs; /* 0x38 - 0x3f Length of above 4GB MMIO Resource */ + u64 hest_log_addr; /* 0x40 - 48 err log addr (used in SMM, not ASL code) */ }; #endif diff --git a/src/soc/intel/xeon_sp/Kconfig b/src/soc/intel/xeon_sp/Kconfig index 09f72a343a..a2876b5ae3 100644 --- a/src/soc/intel/xeon_sp/Kconfig +++ b/src/soc/intel/xeon_sp/Kconfig @@ -2,6 +2,7 @@ source "src/soc/intel/xeon_sp/skx/Kconfig" source "src/soc/intel/xeon_sp/cpx/Kconfig" +source "src/soc/intel/xeon_sp/ras/Kconfig" config XEON_SP_COMMON_BASE bool @@ -114,4 +115,9 @@ config HEAP_SIZE hex default 0x80000 +config SOC_INTEL_XEON_RAS + bool + select SOC_ACPI_HEST + select SOC_RAS_ELOG + endif ## SOC_INTEL_XEON_SP diff --git a/src/soc/intel/xeon_sp/Makefile.inc b/src/soc/intel/xeon_sp/Makefile.inc index 89e43fcc8b..4c351a41e3 100644 --- a/src/soc/intel/xeon_sp/Makefile.inc +++ b/src/soc/intel/xeon_sp/Makefile.inc @@ -16,6 +16,8 @@ ramstage-$(CONFIG_HAVE_SMI_HANDLER) += smmrelocate.c smm-y += smihandler.c pmutil.c postcar-y += spi.c +subdirs-$(CONFIG_SOC_INTEL_XEON_RAS) += ras + CPPFLAGS_common += -I$(src)/soc/intel/xeon_sp/include CPPFLAGS_common += -I$(CONFIG_FSP_HEADER_PATH) diff --git a/src/soc/intel/xeon_sp/include/soc/hest.h b/src/soc/intel/xeon_sp/include/soc/hest.h new file mode 100644 index 0000000000..ad79d45fdf --- /dev/null +++ b/src/soc/intel/xeon_sp/include/soc/hest.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _HEST_H_ +#define _HEST_H_ +#include + +#define MCE_ERR_POLL_MS_INTERVAL 1000 +#define HEST_PCIE_RP_AER_DESC_TYPE 6 +#define HEST_GHES_DESC_TYPE 9 +#define GHES_MAX_RAW_DATA_LENGTH (((CONFIG_ERROR_LOG_BUFFER_SIZE) >> 1) - 8) +#define GHEST_ERROR_STATUS_BLOCK_LENGTH ((CONFIG_ERROR_LOG_BUFFER_SIZE) >> 1) +#define GHEST_ASSIST (1 << 2) +#define FIRMWARE_FIRST (1 << 0) +#define MEM_VALID_BITS 0x66ff +#define PCIE_VALID_BITS 0xef +#define QWORD_ACCESS 4 +#define NOTIFY_TYPE_SCI 3 + +/* Generic Error Source Descriptor */ +typedef struct acpi_ghes_esd { + u16 type; + u16 source_id; + u16 related_src_id; + u8 flags; + u8 enabled; + u32 prealloc_erecords; + u32 max_section_per_record; +} __packed acpi_ghes_esd_t; + +typedef struct ghes_record { + acpi_ghes_esd_t esd; + u32 max_raw_data_length; + acpi_addr64_t sts_addr; + acpi_hest_hen_t notify; + u32 err_sts_blk_len; +} __packed ghes_record_t; + +unsigned long hest_create(unsigned long current, struct acpi_rsdp *rsdp); + +#endif diff --git a/src/soc/intel/xeon_sp/nb_acpi.c b/src/soc/intel/xeon_sp/nb_acpi.c index 19c3921ce7..0c1c5ab30d 100644 --- a/src/soc/intel/xeon_sp/nb_acpi.c +++ b/src/soc/intel/xeon_sp/nb_acpi.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -449,5 +450,8 @@ unsigned long northbridge_write_acpi_tables(const struct device *device, acpi_add_table(rsdp, dmar); } + if (CONFIG(SOC_ACPI_HEST)) + current = hest_create(current, rsdp); + return current; } diff --git a/src/soc/intel/xeon_sp/ras/Kconfig b/src/soc/intel/xeon_sp/ras/Kconfig new file mode 100644 index 0000000000..79c599c2fb --- /dev/null +++ b/src/soc/intel/xeon_sp/ras/Kconfig @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +config SOC_ACPI_HEST + def_bool n + depends on HAVE_ACPI_TABLES + help + This variable provides control for ACPI hardware error source table (HEST) + +config SOC_RAS_ELOG + def_bool n + depends on SOC_ACPI_HEST + help + This variable provides enhanced error logging support used with HEST + +config ERROR_LOG_BUFFER_SIZE + hex + default 0x4000 + depends on SOC_RAS_ELOG + help + This variable allows a configurable error log based on system requirements diff --git a/src/soc/intel/xeon_sp/ras/Makefile.inc b/src/soc/intel/xeon_sp/ras/Makefile.inc new file mode 100644 index 0000000000..93c8705f94 --- /dev/null +++ b/src/soc/intel/xeon_sp/ras/Makefile.inc @@ -0,0 +1,3 @@ +## SPDX-License-Identifier: GPL-2.0-or-later + +ramstage-$(CONFIG_SOC_ACPI_HEST) += hest.c diff --git a/src/soc/intel/xeon_sp/ras/hest.c b/src/soc/intel/xeon_sp/ras/hest.c new file mode 100644 index 0000000000..6164edfe19 --- /dev/null +++ b/src/soc/intel/xeon_sp/ras/hest.c @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include +#include +#include +#include +#include +#include + +static u64 hest_get_elog_addr(void) +{ + /* The elog address comes from reserved memory */ + struct global_nvs *gnvs; + gnvs = acpi_get_gnvs(); + if (!gnvs) { + printk(BIOS_ERR, "Unable to get gnvs\n"); + return 0; + } + + /* Runtime logging address */ + printk(BIOS_DEBUG, "\t status blk start addr = %llx\n", gnvs->hest_log_addr); + printk(BIOS_DEBUG, "\t size = %x\n", CONFIG_ERROR_LOG_BUFFER_SIZE); + return gnvs->hest_log_addr; +} + +static u32 acpi_hest_add_ghes(void *current) +{ + ghes_record_t *rec = (ghes_record_t *)current; + u32 size = sizeof(ghes_record_t); + + /* Fill GHES error source descriptor */ + memset(rec, 0, size); + rec->esd.type = HEST_GHES_DESC_TYPE; + rec->esd.source_id = 0; /* 0 for MCE check exception source */ + rec->esd.enabled = 1; + rec->esd.related_src_id = 0xffff; + rec->esd.prealloc_erecords = 1; + rec->esd.max_section_per_record = 0xf; + rec->max_raw_data_length = GHES_MAX_RAW_DATA_LENGTH; + + /* Add error_status_address */ + rec->sts_addr.space_id = 0; + rec->sts_addr.bit_width = 0x40; + rec->sts_addr.bit_offset = 0; + rec->sts_addr.access_size = QWORD_ACCESS; + + /* Add notification structure */ + rec->notify.type = NOTIFY_TYPE_SCI; + rec->notify.length = sizeof(acpi_hest_hen_t); + rec->err_sts_blk_len = GHEST_ERROR_STATUS_BLOCK_LENGTH; + + /* error status block entries start address */ + if (CONFIG(SOC_ACPI_HEST)) + rec->sts_addr.addr = hest_get_elog_addr(); + + return size; +} + +static unsigned long acpi_fill_hest(acpi_hest_t *hest) +{ + acpi_header_t *header = &(hest->header); + void *current; + current = (void *)(hest); + void *next = current; + next = hest + 1; + next += acpi_hest_add_ghes(next); + hest->error_source_count += 1; + header->length += next - current; + return header->length; +} + +unsigned long hest_create(unsigned long current, struct acpi_rsdp *rsdp) +{ + struct global_nvs *gnvs; + acpi_hest_t *hest; + + /* Reserve memory for Enhanced error logging */ + void *mem = cbmem_add(CMBMEM_ID_ACPI_HEST, CONFIG_ERROR_LOG_BUFFER_SIZE); + if (!mem) { + printk(BIOS_ERR, "Unable to allocate HEST memory\n"); + return current; + } + + printk(BIOS_DEBUG, "HEST memory created: %p\n", mem); + gnvs = acpi_get_gnvs(); + if (!gnvs) { + printk(BIOS_ERR, "Unable to get gnvs\n"); + return current; + } + gnvs->hest_log_addr = (uintptr_t)mem; + printk(BIOS_DEBUG, "elog_addr: %llx, size:%x\n", gnvs->hest_log_addr, + CONFIG_ERROR_LOG_BUFFER_SIZE); + + current = ALIGN(current, 8); + hest = (acpi_hest_t *)current; + acpi_write_hest(hest, acpi_fill_hest); + acpi_add_table(rsdp, (void *)current); + current += hest->header.length; + return current; +}