diff --git a/hardware_init_review.pdf b/hardware_init_review.pdf index 8e11c54..124166f 100644 Binary files a/hardware_init_review.pdf and b/hardware_init_review.pdf differ diff --git a/hardware_init_review.tex b/hardware_init_review.tex index c8220cd..521ebf5 100644 --- a/hardware_init_review.tex +++ b/hardware_init_review.tex @@ -49,6 +49,10 @@ A copy of the license is included in the section entitled "GNU Free Documentation License". + Source-code included in this document is licensed under the GNU General + Public License version 2 or later. You can find a copy of this license + at . + \newpage % ------------------------------------------------------------------------------ @@ -1920,7 +1924,9 @@ Thanks, I guess ? (TODO) impedance matching is essential for maintaining signal integrity, reducing signal reflections, and ensuring reliable data communication between the memory controller and the - memory modules. + memory modules. It is important to note that ZQ calibration + is done directly by the memory controller, and that the firmware + is simply triggering it. \begin{itemize} \item \textbf{Sending ZQCL commands}: The BIOS initiates @@ -2227,7 +2233,8 @@ Thanks, I guess ? (TODO) The memory modules must be initialized. All modules present on valid nodes are configured with 1.5V voltage - (lst. \ref{lst:mctAutoInitMCT_D_3}). \\ + (lst. \ref{lst:mctAutoInitMCT_D_3}). The ZQ calibration + is triggered at this stage. \\ \begin{listing} \begin{adjustwidth}{0.5cm}{0.5cm} @@ -2339,9 +2346,279 @@ Thanks, I guess ? (TODO) are enabled, and the function ends by activating power-saving features if requested by the user. \\ - \subsubsection{Details on the DQS training implementation [WIP]} + \subsubsection{Details on the DQS training function} - TODO study \path{DQSTiming_D} \\ + The \path{DQSTiming_D} function is a critical part of the + firmware responsible for initializing and training the system's + memory. + The function primarily handles the DQS timing, which is + essential for ensuring data integrity and synchronization + between the memory controller and the DRAM. Proper DQS training + is crucial to align the data signals correctly with the clock + signals. + + The function begins by declaring local variables, which are + used throughout the function for various operations. It also + includes an early exit condition to bypass DQS training if a + specific status flag (\path{GSB_EnDIMMSpareNW}) is set, + indicating that a DIMM spare feature is enabled + (lst. \ref{lst:var_decl_and_exit}). \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +uint8_t Node; +u8 nv_DQSTrainCTL; +uint8_t retry_requested; + +if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) { + return; +} + \end{minted} + \end{adjustwidth} + \caption{Initial variable declarations and early exit check.} + \label{lst:var_decl_and_exit} + \end{listing} + + Next, the function initializes the TCWL (CAS Write Latency) + offset to zero for each node and DCT (DRAM Controller Timing). + This ensures that the memory write latency is properly aligned + before the DQS training begins + (lst. \ref{lst:set_tcwl_offset}). \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + uint8_t dct; + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + for (dct = 0; dct < 2; dct++) + pDCTstat->tcwl_delay[dct] = 0; +} + \end{minted} + \end{adjustwidth} + \caption{Setting initial TCWL offset to zero for all nodes and DCTs, + extract from the + \protect\path{DQSTiming_D} function in + \protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}} + \label{lst:set_tcwl_offset} + \end{listing} + + A retry mechanism is introduced to handle potential errors + during DQS training. The \path{nv_DQSTrainCTL} variable is + set based on the \path{allow_config_restore} parameter, + determining whether to restore a previous configuration or + proceed with fresh training, but non-working on the current + implementation of ASUS KGPE-D16 + (lst. \ref{lst:mctAutoInitMCT_D_fixme}). \\ + + Then, the pre-training function are called + (lst. \ref{lst:retry_pre_training}). \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +retry_dqs_training_and_levelization: + nv_DQSTrainCTL = !allow_config_restore; + + mct_BeforeDQSTrain_D(pMCTstat, pDCTstatA); + phyAssistedMemFnceTraining(pMCTstat, pDCTstatA, -1); + \end{minted} + \end{adjustwidth} + \caption{Retry mechanism initialization and pre-training operations, + extract from the + \protect\path{DQSTiming_D} function in + \protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}} + \label{lst:retry_pre_training} + \end{listing} + + For AMD's Fam15h processors, additional PHY compensation is + performed for each node and valid DCT + (lst. \ref{lst:phy_compensation_init}). This is necessary to + fine-tune the electrical characteristics of the memory + interface. For more information about the PHY training, see + the earlier sections about RAM training algorithm. \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +if (is_fam15h()) { + struct DCTStatStruc *pDCTstat; + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + if (pDCTstat->NodePresent) { + if (pDCTstat->DIMMValidDCT[0]) + InitPhyCompensation(pMCTstat, pDCTstat, 0); + if (pDCTstat->DIMMValidDCT[1]) + InitPhyCompensation(pMCTstat, pDCTstat, 1); + } + } +} + \end{minted} + \end{adjustwidth} + \caption{Family-specific PHY compensation initialization for Fam15h processors, + extract from the + \protect\path{DQSTiming_D} function in + \protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}} + \label{lst:phy_compensation_init} + \end{listing} + + Before proceeding with the main DQS training, the function + invokes a hook function that allows for additional + configurations or custom operations. \\ + + If \path{nv_DQSTrainCTL} indicates that fresh training should + proceed, the function performs the main DQS training in multiple + passes, including receiver enable training with + \path{TrainReceiverEn_D} and DQS position + training with \path{mct_TrainDQSPos_D} + (lst. \ref{dqs_training_process}). The process is + repeated in different modes to achieve optimal timing. \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +if (nv_DQSTrainCTL) { + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, FirstPass); + + if (is_fam15h()) { + TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); + } + + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, SecondPass); + + if (is_fam15h()) { + TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); + } else { + TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); + } + + mct_TrainDQSPos_D(pMCTstat, pDCTstatA); + [...] +} + \end{minted} + \end{adjustwidth} + \caption{Main DQS training process in multiple passes, + extract from the + \protect\path{DQSTiming_D} function in + \protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}} + \label{lst:dqs_training_process} + \end{listing} + + The function checks for any errors during the DQS training. If + errors are detected, it may request a retrain, reset certain + parameters, and restart the training process and even restart + the whole system if needed (lst. \ref{lst:error_handling}). \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +retry_requested = 0; +for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->NodePresent) { + if (pDCTstat->TrainErrors & (1 << SB_FatalError)) { + printk(BIOS_ERR, "DIMM training FAILED! Restarting system..."); + soft_reset(); + } + if (pDCTstat->TrainErrors & (1 << SB_RetryConfigTrain)) { + retry_requested = 1; + + pDCTstat->TrainErrors &= ~(1 << SB_RetryConfigTrain); + pDCTstat->TrainErrors &= ~(1 << SB_NODQSPOS); + pDCTstat->ErrStatus &= ~(1 << SB_RetryConfigTrain); + pDCTstat->ErrStatus &= ~(1 << SB_NODQSPOS); + } + } +} + \end{minted} + \end{adjustwidth} + \caption{Error detection and retry mechanism during DQS training, + extract from the + \protect\path{DQSTiming_D} function in + \protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}} + \label{lst:error_handling} + \end{listing} + + Once the training is successfully completed without errors, the + function finalizes the process by setting the maximum read + latency and exiting the training mode. For systems with + \path{allow_config_restore} enabled, it restores the previous + configuration from NVRAM instead of performing a fresh training + (lst. \ref{lst:finalization_exit}). \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +TrainMaxRdLatency_En_D(pMCTstat, pDCTstatA); + +if (is_fam15h()) + exit_training_mode_fam15(pMCTstat, pDCTstatA); +else + mctSetEccDQSRcvrEn_D(pMCTstat, pDCTstatA); +} else { + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, FirstPass); + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, SecondPass); + +#if CONFIG(HAVE_ACPI_RESUME) + printk(BIOS_DEBUG, "mctAutoInitMCT_D: Restoring DIMM training configuration from NVRAM\n"); + if (restore_mct_information_from_nvram(1) != 0) + printk(BIOS_CRIT, "%s: ERROR: Unable to restore DCT configuration from NVRAM\n", __func__); +#endif + + if (is_fam15h()) + exit_training_mode_fam15(pMCTstat, pDCTstatA); + + pMCTstat->GStatus |= 1 << GSB_ConfigRestored; +} + \end{minted} + \end{adjustwidth} + \caption{Finalization of DQS training and configuration restoration, + extract from the + \protect\path{DQSTiming_D} function in + \protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}} + \label{lst:finalization_exit} + \end{listing} + + Finally, the function performs a cleanup operation specific to + Fam15h processors, where it switches the DCT control register + as required by a known erratum from AMD for the BKDG + (Erratum 505). This is followed by a post-training hook that + allows for any additional necessary actions + (lst. \ref{lst:post_training_cleanup}). \\ + + \begin{listing} + \begin{adjustwidth}{0.5cm}{0.5cm} + \begin{minted}[linenos]{c} +if (is_fam15h()) { + struct DCTStatStruc *pDCTstat; + + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + if (pDCTstat->NodePresent) { + fam15h_switch_dct(pDCTstat->dev_map, 0); + } + } +} + +/* FIXME - currently uses calculated value TrainMaxReadLatency_D(pMCTstat, pDCTstatA); */ +mctHookAfterAnyTraining(); + \end{minted} + \end{adjustwidth} + \caption{Post-training cleanup and final hook execution} + \label{lst:post_training_cleanup} + \end{listing} + + \subsubsection{Details on the DQS receiver training function} + + TODO study \path{TrainReceiverEn_D} \\ + + \subsubsection{Details on the DQS position training function} + + TODO study \path{mct_TrainDQSPos_D} \\ \subsection{Potential enhancements [WIP]} \begin{itemize} @@ -2365,7 +2642,7 @@ Thanks, I guess ? (TODO) and absolutely not adapted to every DIMM module in the market. \\ See \path{TrainDQSRdWrPos_D_Fam15} in - \path{src/drivers/amd/amdmct/mct/mct_ddr3/mctdqs_d.c} : allowed + \path{src/northbridge/amd/amdmct/mct/mct_ddr3/mctdqs_d.c} : allowed to have negative DQS ("Attempting to continue but your system may be unstable"). This kind of value should be discarded and calculation done again. \\ diff --git a/hardware_init_review.toc b/hardware_init_review.toc index e560f02..17d4b35 100644 --- a/hardware_init_review.toc +++ b/hardware_init_review.toc @@ -36,18 +36,20 @@ \contentsline {subsubsection}{\numberline {4.2.2.3}Write leveling process}{36}{subsubsection.4.2.2.3}% \contentsline {section}{\numberline {4.3}Current implementation and potential improvements}{37}{section.4.3}% \contentsline {subsection}{\numberline {4.3.1}Current implementation in coreboot on the KGPE-D16}{37}{subsection.4.3.1}% -\contentsline {subsubsection}{\numberline {4.3.1.1}Details on the DQS training implementation [WIP]}{47}{subsubsection.4.3.1.1}% -\contentsline {subsection}{\numberline {4.3.2}Potential enhancements [WIP]}{47}{subsection.4.3.2}% -\contentsline {chapter}{\numberline {5}Virtualization of the operating system through firmware abstraction}{48}{chapter.5}% -\contentsline {section}{\numberline {5.1}ACPI and abstraction of hardware control}{48}{section.5.1}% -\contentsline {section}{\numberline {5.2}SMM as a hidden execution layer}{49}{section.5.2}% -\contentsline {section}{\numberline {5.3}UEFI and persistence}{49}{section.5.3}% -\contentsline {subsection}{\numberline {5.3.1}Memory Management}{50}{subsection.5.3.1}% -\contentsline {subsection}{\numberline {5.3.2}File System Management}{50}{subsection.5.3.2}% -\contentsline {subsection}{\numberline {5.3.3}Device Drivers}{50}{subsection.5.3.3}% -\contentsline {subsection}{\numberline {5.3.4}Power Management}{50}{subsection.5.3.4}% -\contentsline {section}{\numberline {5.4}Intel and AMD: control beyond the OS}{50}{section.5.4}% -\contentsline {section}{\numberline {5.5}The OS as a virtualized environment}{51}{section.5.5}% -\contentsline {chapter}{Conclusion}{52}{chapter*.4}% -\contentsline {chapter}{Bibliography}{53}{chapter*.4}% -\contentsline {chapter}{GNU Free Documentation License}{60}{chapter*.6}% +\contentsline {subsubsection}{\numberline {4.3.1.1}Details on the DQS training function}{47}{subsubsection.4.3.1.1}% +\contentsline {subsubsection}{\numberline {4.3.1.2}Details on the DQS receiver training function}{48}{subsubsection.4.3.1.2}% +\contentsline {subsubsection}{\numberline {4.3.1.3}Details on the DQS position training function}{48}{subsubsection.4.3.1.3}% +\contentsline {subsection}{\numberline {4.3.2}Potential enhancements [WIP]}{48}{subsection.4.3.2}% +\contentsline {chapter}{\numberline {5}Virtualization of the operating system through firmware abstraction}{52}{chapter.5}% +\contentsline {section}{\numberline {5.1}ACPI and abstraction of hardware control}{52}{section.5.1}% +\contentsline {section}{\numberline {5.2}SMM as a hidden execution layer}{53}{section.5.2}% +\contentsline {section}{\numberline {5.3}UEFI and persistence}{53}{section.5.3}% +\contentsline {subsection}{\numberline {5.3.1}Memory Management}{54}{subsection.5.3.1}% +\contentsline {subsection}{\numberline {5.3.2}File System Management}{54}{subsection.5.3.2}% +\contentsline {subsection}{\numberline {5.3.3}Device Drivers}{54}{subsection.5.3.3}% +\contentsline {subsection}{\numberline {5.3.4}Power Management}{54}{subsection.5.3.4}% +\contentsline {section}{\numberline {5.4}Intel and AMD: control beyond the OS}{54}{section.5.4}% +\contentsline {section}{\numberline {5.5}The OS as a virtualized environment}{55}{section.5.5}% +\contentsline {chapter}{Conclusion}{56}{chapter*.4}% +\contentsline {chapter}{Bibliography}{57}{chapter*.4}% +\contentsline {chapter}{GNU Free Documentation License}{64}{chapter*.6}% diff --git a/listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_DQSTiming_D.c b/listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_DQSTiming_D.c new file mode 100644 index 0000000..a09a20e --- /dev/null +++ b/listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_DQSTiming_D.c @@ -0,0 +1,165 @@ +static void DQSTiming_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA, + uint8_t allow_config_restore) +{ + uint8_t Node; + u8 nv_DQSTrainCTL; + uint8_t retry_requested; + + if (pMCTstat->GStatus & (1 << GSB_EnDIMMSpareNW)) { + return; + } + + /* Set initial TCWL offset to zero */ + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + uint8_t dct; + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + for (dct = 0; dct < 2; dct++) + pDCTstat->tcwl_delay[dct] = 0; + } + +retry_dqs_training_and_levelization: + nv_DQSTrainCTL = !allow_config_restore; + + mct_BeforeDQSTrain_D(pMCTstat, pDCTstatA); + phyAssistedMemFnceTraining(pMCTstat, pDCTstatA, -1); + + if (is_fam15h()) { + struct DCTStatStruc *pDCTstat; + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + if (pDCTstat->NodePresent) { + if (pDCTstat->DIMMValidDCT[0]) + InitPhyCompensation(pMCTstat, pDCTstat, 0); + if (pDCTstat->DIMMValidDCT[1]) + InitPhyCompensation(pMCTstat, pDCTstat, 1); + } + } + } + + mctHookBeforeAnyTraining(pMCTstat, pDCTstatA); + if (!is_fam15h()) { + /* TODO: should be in mctHookBeforeAnyTraining */ + _WRMSR(MTRR_FIX_4K_E0000, 0x04040404, 0x04040404); + _WRMSR(MTRR_FIX_4K_E8000, 0x04040404, 0x04040404); + _WRMSR(MTRR_FIX_4K_F0000, 0x04040404, 0x04040404); + _WRMSR(MTRR_FIX_4K_F8000, 0x04040404, 0x04040404); + } + + if (nv_DQSTrainCTL) { + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, FirstPass); + + if (is_fam15h()) { + /* Receiver Enable Training Pass 1 */ + TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); + } + + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, SecondPass); + + if (is_fam15h()) { + + /* TODO: + * Determine why running TrainReceiverEn_D in SecondPass + * mode yields less stable training values than when run + * in FirstPass mode as in the HACK below. + */ + TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); + } else { + TrainReceiverEn_D(pMCTstat, pDCTstatA, FirstPass); + } + + mct_TrainDQSPos_D(pMCTstat, pDCTstatA); + + /* Determine if DQS training requested a retrain attempt */ + retry_requested = 0; + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + + if (pDCTstat->NodePresent) { + if (pDCTstat->TrainErrors & (1 << SB_FatalError)) { + printk(BIOS_ERR, "DIMM training FAILED! Restarting system..."); + soft_reset(); + } + if (pDCTstat->TrainErrors & (1 << SB_RetryConfigTrain)) { + retry_requested = 1; + + /* Clear previous errors */ + pDCTstat->TrainErrors &= ~(1 << SB_RetryConfigTrain); + pDCTstat->TrainErrors &= ~(1 << SB_NODQSPOS); + pDCTstat->ErrStatus &= ~(1 << SB_RetryConfigTrain); + pDCTstat->ErrStatus &= ~(1 << SB_NODQSPOS); + } + } + } + + /* Retry training and levelization if requested */ + if (retry_requested) { + printk(BIOS_DEBUG, "%s: Restarting training on algorithm request\n", __func__); + /* Reset frequency to minimum */ + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + if (pDCTstat->NodePresent) { + uint8_t original_target_freq = pDCTstat->TargetFreq; + uint8_t original_auto_speed = pDCTstat->DIMMAutoSpeed; + pDCTstat->TargetFreq = mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK)); + pDCTstat->Speed = pDCTstat->DIMMAutoSpeed = pDCTstat->TargetFreq; + SetTargetFreq(pMCTstat, pDCTstatA, Node); + pDCTstat->TargetFreq = original_target_freq; + pDCTstat->DIMMAutoSpeed = original_auto_speed; + } + } + /* Apply any DIMM timing changes */ + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + struct DCTStatStruc *pDCTstat; + pDCTstat = pDCTstatA + Node; + if (pDCTstat->NodePresent) { + AutoCycTiming_D(pMCTstat, pDCTstat, 0); + if (!pDCTstat->GangedMode) + if (pDCTstat->DIMMValidDCT[1] > 0) + AutoCycTiming_D(pMCTstat, pDCTstat, 1); + } + } + goto retry_dqs_training_and_levelization; + } + + TrainMaxRdLatency_En_D(pMCTstat, pDCTstatA); + + if (is_fam15h()) + exit_training_mode_fam15(pMCTstat, pDCTstatA); + else + mctSetEccDQSRcvrEn_D(pMCTstat, pDCTstatA); + } else { + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, FirstPass); + + mct_WriteLevelization_HW(pMCTstat, pDCTstatA, SecondPass); + +#if CONFIG(HAVE_ACPI_RESUME) + printk(BIOS_DEBUG, "mctAutoInitMCT_D: Restoring DIMM training configuration from NVRAM\n"); + if (restore_mct_information_from_nvram(1) != 0) + printk(BIOS_CRIT, "%s: ERROR: Unable to restore DCT configuration from NVRAM\n", __func__); +#endif + + if (is_fam15h()) + exit_training_mode_fam15(pMCTstat, pDCTstatA); + + pMCTstat->GStatus |= 1 << GSB_ConfigRestored; + } + + if (is_fam15h()) { + struct DCTStatStruc *pDCTstat; + + /* Switch DCT control register to DCT 0 per Erratum 505 */ + for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) { + pDCTstat = pDCTstatA + Node; + if (pDCTstat->NodePresent) { + fam15h_switch_dct(pDCTstat->dev_map, 0); + } + } + } + + /* FIXME - currently uses calculated value TrainMaxReadLatency_D(pMCTstat, pDCTstatA); */ + mctHookAfterAnyTraining(); +}