WIP: Chapter 4 (progressing well)
This commit is contained in:
parent
1a232d42dd
commit
e74c77ba4d
2
Makefile
2
Makefile
|
@ -10,7 +10,7 @@ clean:
|
|||
rm -rf *.log *.bak *.out *.xml *.gz *.aux *.bcf *.blg
|
||||
|
||||
distclean: clean
|
||||
rm -rf *.bbl *.lof *.lol *.pdf *.toc $(DOC).bibready
|
||||
rm -rf *.bbl *.lof *.lol *.pdf *.toc $(DOC).bibready _minted-hardware_init_review
|
||||
|
||||
$(DOC).bibready:
|
||||
$(XELATEX) $(DOC).tex
|
||||
|
|
|
@ -1214,3 +1214,11 @@ note = "[Online; accessed 17-August-2024]"
|
|||
note = {Accessed: 2024-08-24},
|
||||
url = {https://cdrdv2.intel.com/v1/dl/getContent/772726}
|
||||
}
|
||||
|
||||
@misc{coreboot_4_11,
|
||||
author = {{Coreboot Project}},
|
||||
title = {coreboot repository, tag 4.11},
|
||||
year = {2019},
|
||||
note = {Accessed: 2024-08-24},
|
||||
url = {https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/tags/4.11}
|
||||
}
|
||||
|
|
|
@ -700,6 +700,7 @@
|
|||
\strng{authorbibnamehash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\strng{authornamehash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\strng{authorfullhash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\field{extraname}{1}
|
||||
\field{sortinit}{C}
|
||||
\field{sortinithash}{4d103a86280481745c9c897c925753c0}
|
||||
\field{labelnamesource}{author}
|
||||
|
@ -714,6 +715,33 @@
|
|||
\verb https://doc.coreboot.org/memory-map.html
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{coreboot_4_11}{misc}{}
|
||||
\name{author}{1}{}{%
|
||||
{{hash=9d0db915bc81244c5474dc57d9fb132a}{%
|
||||
family={{Coreboot Project}},
|
||||
familyi={C\bibinitperiod}}}%
|
||||
}
|
||||
\strng{namehash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\strng{fullhash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\strng{bibnamehash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\strng{authorbibnamehash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\strng{authornamehash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\strng{authorfullhash}{9d0db915bc81244c5474dc57d9fb132a}
|
||||
\field{extraname}{2}
|
||||
\field{sortinit}{C}
|
||||
\field{sortinithash}{4d103a86280481745c9c897c925753c0}
|
||||
\field{labelnamesource}{author}
|
||||
\field{labeltitlesource}{title}
|
||||
\field{note}{Accessed: 2024-08-24}
|
||||
\field{title}{coreboot repository, tag 4.11}
|
||||
\field{year}{2019}
|
||||
\verb{urlraw}
|
||||
\verb https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/tags/4.11
|
||||
\endverb
|
||||
\verb{url}
|
||||
\verb https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/tags/4.11
|
||||
\endverb
|
||||
\endentry
|
||||
\entry{intel_acpi_programming_2023}{manual}{}
|
||||
\name{author}{1}{}{%
|
||||
{{hash=42af28f239d9ce2a4d0f9a032741150e}{%
|
||||
|
@ -737,7 +765,6 @@
|
|||
\field{note}{Accessed: 2024-08-24}
|
||||
\field{title}{ACPI Programming Reference}
|
||||
\field{year}{2023}
|
||||
\true{nocite}
|
||||
\verb{urlraw}
|
||||
\verb https://cdrdv2.intel.com/v1/dl/getContent/772726
|
||||
\endverb
|
||||
|
@ -828,7 +855,6 @@
|
|||
\field{note}{Accessed: 2024-08-24}
|
||||
\field{title}{Introduction to ACPI}
|
||||
\field{year}{2023}
|
||||
\true{nocite}
|
||||
\verb{urlraw}
|
||||
\verb https://cdrdv2.intel.com/v1/dl/getContent/772721
|
||||
\endverb
|
||||
|
@ -2526,7 +2552,6 @@
|
|||
\field{note}{[Online; accessed 25-August-2024]}
|
||||
\field{title}{{UEFI - OSDev Wiki}}
|
||||
\field{year}{2024}
|
||||
\true{nocite}
|
||||
\verb{urlraw}
|
||||
\verb https://wiki.osdev.org/UEFI#Memory
|
||||
\endverb
|
||||
|
|
Binary file not shown.
|
@ -745,8 +745,13 @@ Thanks, I guess ? (TODO)
|
|||
|
||||
The firmware of the ASUS KGPE-D16 is crucial in ensuring the proper
|
||||
functioning and optimization of the mainboard's hardware components.
|
||||
For this to be done efficiently, \textit{coreboot} is organized in
|
||||
different stages (fig. \ref{fig:coreboot_stages}) \cite{coreboot_docs}.
|
||||
In this chapter and for the rest of this document, we're basing our
|
||||
study on the 4.11 version of \textit{coreboot} \cite{coreboot_4_11},
|
||||
which is the last version that supported the ASUS KGPE-D16 mainboard. \\
|
||||
|
||||
For the firmware tasks to be done efficiently, \textit{coreboot} is
|
||||
organized in different stages (fig. \ref{fig:coreboot_stages})
|
||||
\cite{coreboot_docs}.
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
|
@ -807,36 +812,36 @@ Thanks, I guess ? (TODO)
|
|||
|>{\centering\arraybackslash}p{0.35\textwidth}
|
||||
|>{\centering\arraybackslash}p{0.5\textwidth}|}
|
||||
\hline
|
||||
\texttt{0x00000 - 0x9FFFF}
|
||||
\path{0x00000 - 0x9FFFF}
|
||||
& Low memory (first 640KB). Never used. \\
|
||||
\hline
|
||||
\texttt{0xA0000 - 0xAFFFF}
|
||||
\path{0xA0000 - 0xAFFFF}
|
||||
& VGA graphics address range. \\
|
||||
\hline
|
||||
\texttt{0xB0000 - 0xB7FFF}
|
||||
\path{0xB0000 - 0xB7FFF}
|
||||
& Monochrome text mode address range.
|
||||
Few motherboards use
|
||||
it, but the KGPE-D16 does. \\
|
||||
\hline
|
||||
\texttt{0xB8000 - 0xBFFFF}
|
||||
\path{0xB8000 - 0xBFFFF}
|
||||
& Text mode address range. \\
|
||||
\hline
|
||||
\texttt{0xFEC00000}
|
||||
\path{0xFEC00000}
|
||||
& IOAPIC address. \\
|
||||
\hline
|
||||
\texttt{0xFED44000 - 0xFED4FFFF}
|
||||
\path{0xFED44000 - 0xFED4FFFF}
|
||||
& Address range for TPM registers. \\
|
||||
\hline
|
||||
\texttt{0xFF000000 - 0xFFFFFFFF}
|
||||
\path{0xFF000000 - 0xFFFFFFFF}
|
||||
& 16 MB ROM mapping address range. \\
|
||||
\hline
|
||||
\texttt{0xFF800000 - 0xFFFFFFFF}
|
||||
\path{0xFF800000 - 0xFFFFFFFF}
|
||||
& 8 MB ROM mapping address range. \\
|
||||
\hline
|
||||
\texttt{0xFFC00000 - 0xFFFFFFFF}
|
||||
\path{0xFFC00000 - 0xFFFFFFFF}
|
||||
& 4 MB ROM mapping address range. \\
|
||||
\hline
|
||||
\texttt{0xFEC00000 - DEVICE MEM HIGH}
|
||||
\path{0xFEC00000 - DEVICE MEM HIGH}
|
||||
& Reserved area for OS use. \\
|
||||
\hline
|
||||
\end{tabular}}
|
||||
|
@ -844,7 +849,7 @@ Thanks, I guess ? (TODO)
|
|||
\label{tab:memmap}
|
||||
\end{table}
|
||||
|
||||
\subsection{Bootblock stage}
|
||||
\subsection{Bootblock}
|
||||
|
||||
The bootblock is the first stage executed after the CPU reset. The
|
||||
beginning of this stage is written in assembly language, and its
|
||||
|
@ -870,9 +875,9 @@ Thanks, I guess ? (TODO)
|
|||
processor architecture and, for our board, is stored in the
|
||||
architecture-specific sources for x86 within \textit{coreboot}.
|
||||
The entry point into \textit{coreboot} code is defined in two files
|
||||
in the \path{src/cpu/x86/16bit/} directory: \texttt{reset16.inc}
|
||||
and \texttt{entry16.inc}. The first file serves as a jump to the
|
||||
\texttt{\_start16bit} procedure defined in the second. Due to space
|
||||
in the \path{src/cpu/x86/16bit/} directory: \path{reset16.inc}
|
||||
and \path{entry16.inc}. The first file serves as a jump to the
|
||||
\path{_start16bit} procedure defined in the second. Due to space
|
||||
constraints this function must remain below the 1MB address space
|
||||
because the IOMMU has not yet been configured to allow anything
|
||||
else. \\
|
||||
|
@ -889,7 +894,7 @@ Thanks, I guess ? (TODO)
|
|||
slightest exception, the BSC will halt. The code then switches to
|
||||
32-bit protected mode by mapping the first 4 GB of address space for
|
||||
code and data, and finally jumps to the 32-bit reset code labeled
|
||||
\texttt{\_protected\_start}. \\
|
||||
\path{_protected_start}. \\
|
||||
|
||||
|
||||
Once in protected mode, which constitutes the "normal" operating
|
||||
|
@ -901,12 +906,12 @@ Thanks, I guess ? (TODO)
|
|||
stack, transitions to long mode (64-bit addressing) with paging
|
||||
enabled, and sets up a proper exception vector table. The execution
|
||||
then jumps to chipset-specific code via the
|
||||
\texttt{bootblock\_pre\_c\_entry} procedure.
|
||||
\path{bootblock_pre_c_entry} procedure.
|
||||
Once these steps are completed, the bootblock has a minimal C
|
||||
environment. The procedure now involves allocating
|
||||
memory for the BSS, and decompressing and loading the next stage. \\
|
||||
|
||||
The jump to \texttt{\_bootblock\_pre\_entry} leads to the code files
|
||||
The jump to \path{_bootblock_pre_entry} leads to the code files
|
||||
\path{src/soc/amd/common/block/cpu/car/cache_as_ram.S} and
|
||||
\path{src/vendorcode/amd/agesa/f15tn/gcccar.inc}, which are specific
|
||||
to AMD chipsets. It's worth noting that these files were developed by
|
||||
|
@ -926,7 +931,7 @@ Thanks, I guess ? (TODO)
|
|||
All cores except the BSC are halted and will restart during the
|
||||
romstage. Finally, the execution jumps to the entry point of the
|
||||
\textit{bootblock} written in C, labeled
|
||||
\texttt{bootblock\_c\_entry}.
|
||||
\path{bootblock_c_entry}.
|
||||
This entry point is located in
|
||||
\path{src/soc/amd/stoneyridge/bootblock/bootblock.c} and is
|
||||
specific to AMD processors. It is the first C routine executed, and
|
||||
|
@ -936,28 +941,28 @@ Thanks, I guess ? (TODO)
|
|||
|
||||
We are now in the file \path{src/lib/bootblock.c}, written by
|
||||
Google's team, and entering the
|
||||
\texttt{bootblock\_main\_with\_basetime} function, which immediately
|
||||
calls \texttt{bootblock\_main\_with\_timestamp}. At this stage, the
|
||||
\path{bootblock_main_with_basetime} function, which immediately
|
||||
calls \path{bootblock_main_with_timestamp}. At this stage, the
|
||||
goal is to start the romstage, but a few more tasks need to be
|
||||
completed.
|
||||
|
||||
The \texttt{bootblock\_soc\_early\_init} function is called to
|
||||
The \path{bootblock_soc_early_init} function is called to
|
||||
initialize the I2C bus of the southbridge. The
|
||||
\texttt{bootblock\_fch\_early\_init} function is invoked to
|
||||
\path{bootblock_fch_early_init} function is invoked to
|
||||
initialize the SPI buses (including the one for the ROM) and the
|
||||
serial and "legacy" buses of the southbridge. The CMOS clock is then
|
||||
initialized, followed by the pre-initialization of the serial
|
||||
console.
|
||||
The code then calls the \texttt{bootblock\_mainboard\_init}
|
||||
The code then calls the \path{bootblock_mainboard_init}
|
||||
function, which enters, for the first time, the files specific to
|
||||
the ASUS KGPE-D16 motherboard:
|
||||
\path{src/mainboard/ASUS/kgpe-d16/bootblock.c}.
|
||||
This code performs the northbridge initialization via the
|
||||
\texttt{bootblock\_northbridge\_init} function found in
|
||||
\path{bootblock_northbridge_init} function found in
|
||||
\path{src/northbridge/amd/amdfam10/bootblock.c}. This involves
|
||||
locating the HyperTransport bus and enabling the discovery of
|
||||
devices connected to it (e.g., processors). The southbridge is
|
||||
initialized using the \texttt{bootblock\_southbridge\_init}
|
||||
initialized using the \path{bootblock_southbridge_init}
|
||||
function from \path{src/southbridge/amd/sb700/bootblock.c}.
|
||||
This function, largely programmed by Timothy Pearson from Raptor
|
||||
Engineering, who performed the first coreboot port for the ASUS
|
||||
|
@ -965,18 +970,18 @@ Thanks, I guess ? (TODO)
|
|||
to the ROM memory via SuperIO. The state of a recovery jumper is
|
||||
then checked (this jumper is intended to reset the CMOS content,
|
||||
although it is not fully functional at the moment, as indicated by
|
||||
the \texttt{FIXME} comment in the code). Control then returns to
|
||||
\texttt{bootblock\_main} in \path{src/lib/bootblock.c}. \\
|
||||
the \path{FIXME} comment in the code). Control then returns to
|
||||
\path{bootblock_main} in \path{src/lib/bootblock.c}. \\
|
||||
|
||||
At this point, everything is ready to enter the romstage.
|
||||
\textit{coreboot} has successfully started and can now continue its
|
||||
execution by calling the \texttt{run\_romstage} function from
|
||||
execution by calling the \path{run_romstage} function from
|
||||
\path{src/lib/prog_loaders.c}. This function begins by locating
|
||||
the corresponding segment in the ROM via the southbridge and SPI
|
||||
bus using \texttt{prog\_locate}, which utilizes the SPI driver in
|
||||
bus using \path{prog_locate}, which utilizes the SPI driver in
|
||||
\path{src/drivers/cbfs_spi.c}. The contents of the romstage are
|
||||
then copied into the cache-as-ram by
|
||||
\texttt{cbfs\_prog\_stage\_load}. Finally, the \texttt{prog\_run}
|
||||
\path{cbfs_prog_stage_load}. Finally, the \path{prog_run}
|
||||
function transitions to the romstage after switching back to
|
||||
32-bit mode.
|
||||
|
||||
|
@ -999,18 +1004,18 @@ Thanks, I guess ? (TODO)
|
|||
northbridge, ensuring smooth data flow between these components. \\
|
||||
|
||||
The \textit{romstage} begins with a call to the
|
||||
\texttt{\_start} function, defined in
|
||||
\path{_start} function, defined in
|
||||
\path{src/cpu/x86/32bit/entry32.inc} via
|
||||
\path{src/arch/x86/assembly_entry.S}. We then enter the
|
||||
\texttt{cache\_as\_ram\_setup} procedure, written in assembly
|
||||
\path{cache_as_ram_setup} procedure, written in assembly
|
||||
language, located in \path{src/cpu/amd/car/cache_as_ram.inc}. This
|
||||
procedure configures the cache to load the future \textit{ramstage}
|
||||
and initialize memory based on the number of processors and cores
|
||||
present. Once this is completed, the code calls
|
||||
\texttt{cache\_as\_ram\_main} in
|
||||
\path{cache_as_ram_main} in
|
||||
\path{src/mainboard/asus/kgpe-d16/romstage.c}, which serves as the
|
||||
main function of the \textit{romstage}.
|
||||
In the \texttt{cache\_as\_ram\_main} function, after reducing the
|
||||
In the \path{cache_as_ram_main} function, after reducing the
|
||||
speed of the HyperTransport bus, only the Bootstrap Core (BSC)
|
||||
initializes the spinlocks for the serial console, the CMOS storage
|
||||
memory (used for saving parameters), and the ROM. At this point, the
|
||||
|
@ -1028,7 +1033,7 @@ Thanks, I guess ? (TODO)
|
|||
\textit{bootblock}, shows no anomalies, all cores of all nodes are
|
||||
configured, and they are placed back into sleep mode (except for the
|
||||
Core 0s). If everything goes well, the code 0x32 is sent, and the
|
||||
process continues. Using the \texttt{enable\_sr5650\_dev8} function,
|
||||
process continues. Using the \path{enable_sr5650_dev8} function,
|
||||
the southbridge’s P2P bridge is activated. Additionally, a check is
|
||||
performed to ensure that the number of physical processors detected
|
||||
does not exceed the number of sockets available on the board. If any
|
||||
|
@ -1038,27 +1043,27 @@ Thanks, I guess ? (TODO)
|
|||
constructed, and the microcode of the physical processors is updated
|
||||
if necessary. If everything proceeds correctly, the code 0x33 and
|
||||
then 0x34 is sent, and the process continues. The information about
|
||||
the physical processors is retrieved using \texttt{amd\_ht\_init},
|
||||
the physical processors is retrieved using \path{amd_ht_init},
|
||||
and communication between the two sockets is configured via
|
||||
\texttt{amd\_ht\_fixup}. This process includes disabling any
|
||||
\path{amd_ht_fixup}. This process includes disabling any
|
||||
defective HT links (one per socket in this AMD Family 15h chipset).
|
||||
If everything is working as expected, the code 0x35 is sent, and
|
||||
the boot process continues.
|
||||
With the \texttt{finalize\_node\_setup} function, the PCI bus is
|
||||
With the \path{finalize_node_setup} function, the PCI bus is
|
||||
initialized, and a mapping is created
|
||||
(\texttt{setup\_mb\_resource\_map}). If all goes well, the code 0x36
|
||||
(\path{setup_mb_resource_map}). If all goes well, the code 0x36
|
||||
is sent. This is done in parallel across all Core 0s, so the system
|
||||
waits for all cores to finish using the
|
||||
\texttt{wait\_all\_core0\_started} function. The communication
|
||||
\path{wait_all_core0_started} function. The communication
|
||||
between the northbridge and southbridge is prepared using
|
||||
\texttt{sr5650\_early\_setup} and
|
||||
\texttt{sb7xx\_51xx\_early\_setup}, followed by the activation of
|
||||
\path{sr5650_early_setup} and
|
||||
\path{sb7xx_51xx_early_setup}, followed by the activation of
|
||||
all cores on all nodes, with the system waiting for all cores to be
|
||||
fully initialized. If everything is successful, the code 0x38 is
|
||||
sent. \\
|
||||
|
||||
At this point, the timer is activated, and a warm reset is performed
|
||||
via the \texttt{soft\_reset} function to validate all configuration
|
||||
via the \path{soft_reset} function to validate all configuration
|
||||
changes to the HT, PCI buses, and voltage/power settings of the
|
||||
processors and buses. This results in a system reboot, passing again
|
||||
through the \textit{bootblock}, but much faster this time since the
|
||||
|
@ -1077,16 +1082,16 @@ Thanks, I guess ? (TODO)
|
|||
in detail during the next chapter. \\
|
||||
|
||||
After memory initialization, the process returns to the
|
||||
\texttt{cache\_as\_ram\_main} function, where a memory test is
|
||||
\path{cache_as_ram_main} function, where a memory test is
|
||||
performed. This involves writing predefined values to specific
|
||||
memory locations and then verifying that the values can be read
|
||||
back correctly.
|
||||
If everything passes successfully, the CBMEM is initialized and
|
||||
one sends code \texttt{0x41}. At this point, the configuration of
|
||||
one sends code \path{0x41}. At this point, the configuration of
|
||||
the PCI bus is prepared, which will be completed during the ramstage
|
||||
by configuring the PCI bridges. The system then exits
|
||||
\texttt{cache\_as\_ram\_main} and returns to
|
||||
\texttt{cache\_as\_ram\_setup} to finalize the process.
|
||||
\path{cache_as_ram_main} and returns to
|
||||
\path{cache_as_ram_setup} to finalize the process.
|
||||
|
||||
\textit{coreboot} then transitions to the next stage, known as the
|
||||
postcar stage, where it exits the cache-as-RAM mode and
|
||||
|
@ -1371,10 +1376,23 @@ Thanks, I guess ? (TODO)
|
|||
|
||||
\section{Importance of DDR3 Memory Initialization}
|
||||
|
||||
Memory modules are designed solely for storing data. The only valid
|
||||
operations on a memory device are reading data stored in the device,
|
||||
writing (or storing) data into the device, and refreshing the data.
|
||||
Memory modules consist of large rectangular arrays of memory cells,
|
||||
including circuits used to read and write data into the arrays, and
|
||||
refresh circuits to maintain the integrity of the stored data. The
|
||||
memory arrays are organized into rows and columns of memory cells,
|
||||
known as word lines and bit lines, respectively. Each memory cell
|
||||
has a unique location or address defined by the intersection of a
|
||||
row and a column. A DRAM memory cell is a capacitor that is charged
|
||||
to produce a 1 or a 0. \\
|
||||
|
||||
DDR3 (Double Data Rate Type 3) is a widely used type of
|
||||
SDRAM (Synchronous Dynamic Random-Access Memory) that offers
|
||||
significant performance improvements over its predecessors,
|
||||
DDR and DDR2. Key features of DDR3 include higher data rates,
|
||||
DDR and DDR2. A DDR3 DIMM module contains 240 contacts.
|
||||
Key features of DDR3 include higher data rates,
|
||||
lower power consumption, and increased memory capacity, making
|
||||
it essential for high-performance computing environments
|
||||
\cite{DDR3_wiki}. One of the critical aspects of DDR3 is its
|
||||
|
@ -1386,15 +1404,44 @@ Thanks, I guess ? (TODO)
|
|||
available in larger capacities, allowing systems to handle larger
|
||||
datasets and more complex computing tasks \cite{altera2008}.
|
||||
However, the advanced features of DDR3 come with increased
|
||||
complexity in its initialization and operation. For example,
|
||||
DDR3 uses a fly-by topology (fig. \ref{fig:fly-by}) for routing the
|
||||
address, command, and clock signals.
|
||||
complexity in its initialization and operation.
|
||||
The DDR3 memory interface, used by the ASUS KGPE-D16, is
|
||||
source-synchronous. Each memory module generates a Data Strobe
|
||||
(DQS) pulse simultaneously with the data (DQ) it sends during
|
||||
a memory read operation. Similarly, a DQS must be generated
|
||||
with its DQ information when writing to memory. The DQS differs
|
||||
between write and read operations. Specifically, the DQS generated
|
||||
by the system for a write operation is centered in the data bit
|
||||
period, while the DQS provided by the memory during a read operation
|
||||
is aligned with the edge of the data period \cite{samsung_ddr3}. \\
|
||||
|
||||
Due to this edge alignment, the read DQS timing can be adjusted
|
||||
to meet the setup and hold requirements of the registers capturing
|
||||
the read data. To improve timing margins or reduce simultaneous
|
||||
switching noise in the system, the DDR3 memory interface also allows
|
||||
various other timing parameters to be adjusted. If the system uses
|
||||
dual-inline memory modules (DIMMs), as in our case, the interface
|
||||
provides write leveling: a timing adjustment that compensates for
|
||||
variations in signal travel time \cite{micron_ddr3}.
|
||||
To reduce simultaneous switching noise, DIMM modules feature a
|
||||
fly-by architecture for routing the address, command, and clock
|
||||
signals, which causes command signals to reach the
|
||||
different memory devices with a delay. The fly-by topology has a
|
||||
"daisy-chain" structure with either very short stubs or no stubs
|
||||
at all. This structure results in fewer branches and point-to-point
|
||||
connections: everything originates from the controller, passing
|
||||
through each module on the node, thereby increasing the throughput.
|
||||
In this topology, signals are routed sequentially
|
||||
from the memory controller to each DRAM chip, reducing signal
|
||||
reflections and improving overall signal integrity. This design
|
||||
is essential for maintaining stability at the high speeds DDR3
|
||||
operates at, but it also introduces timing challenges, such as
|
||||
timing skew, that must be carefully managed \cite{micron_ddr3}. \\
|
||||
reflections and improving overall signal integrity.
|
||||
It means that routing is done in the order of byte lane numbers,
|
||||
and the data byte lanes are routed on the same layer. Routing can be
|
||||
simplified by swapping data bits within a byte lane if necessary.
|
||||
The fly-by topology contrasts with the dual-T topology
|
||||
(fig. \ref{fig:fly-by}). This design is essential for maintaining
|
||||
stability at the high speeds DDR3 operates at, but it also
|
||||
introduces timing challenges, such as timing skew, that must be
|
||||
carefully managed \cite{micron_ddr3}. \\
|
||||
|
||||
\begin{figure}[H]
|
||||
\centering
|
||||
|
@ -1411,7 +1458,6 @@ Thanks, I guess ? (TODO)
|
|||
\label{fig:fly-by}
|
||||
\end{figure}
|
||||
|
||||
|
||||
Proper memory initialization ensures that the memory controller
|
||||
and the memory modules are correctly configured to work together,
|
||||
allowing for efficient data transfer and reliable operation. The
|
||||
|
@ -1511,7 +1557,8 @@ Thanks, I guess ? (TODO)
|
|||
best possible performance. Voltage settings, such as DRAM voltage
|
||||
(typically 1.5V for DDR3) and termination voltage (VTT), are also
|
||||
configured to maintain stable operation, especially under varying
|
||||
conditions such as temperature fluctuations \cite{micron_ddr3}.
|
||||
conditions such as temperature fluctuations \cite{micron_ddr3}. \\
|
||||
|
||||
Training and calibration are among the most complex and crucial
|
||||
stages of DDR3 memory initialization. The fly-by topology used
|
||||
for address, command, and clock signals in DDR3 modules enhances
|
||||
|
@ -1775,7 +1822,7 @@ Thanks, I guess ? (TODO)
|
|||
The initialization of DDR3 memory begins with configuring the DDR
|
||||
supply voltage regulator, which ensures that the memory modules
|
||||
receive the correct power levels. Following this, the Northbridge
|
||||
(NB) P-state is forced to \texttt{NBP0}, a state that guarantees
|
||||
(NB) P-state is forced to \path{NBP0}, a state that guarantees
|
||||
stable operation during the initial configuration phases. Once these
|
||||
preliminary steps are completed, the initialization of the DDR
|
||||
physical layer (PHY) begins, which is critical for setting up
|
||||
|
@ -1803,9 +1850,9 @@ Thanks, I guess ? (TODO)
|
|||
\item \textbf{Enable DRAM initialization}: The process
|
||||
begins by
|
||||
enabling DRAM initialization. This is done
|
||||
by setting the \texttt{EnDramInit} bit in
|
||||
the \texttt{D18F2x7C\_dct} register to 1. The
|
||||
\texttt{D18F2x7C\_dct} register is a specific
|
||||
by setting the \path{EnDramInit} bit in
|
||||
the \path{D18F2x7C_dct} register to 1. The
|
||||
\path{D18F2x7C_dct} register is a specific
|
||||
configuration register within the memory
|
||||
controller that controls various aspects of the
|
||||
DRAM initialization process. Enabling this bit
|
||||
|
@ -1816,10 +1863,10 @@ Thanks, I guess ? (TODO)
|
|||
|
||||
\item \textbf{Deassert memory reset}: Next, the memory
|
||||
reset
|
||||
signal, known as \texttt{MemRstX}, is deasserted
|
||||
by setting the \texttt{DeassertMemRstX} bit in the
|
||||
\texttt{D18F2x7C\_dct} register to 1. Deasserting
|
||||
\texttt{MemRstX} effectively takes the memory
|
||||
signal, known as \path{MemRstX}, is deasserted
|
||||
by setting the \path{DeassertMemRstX} bit in the
|
||||
\path{D18F2x7C_dct} register to 1. Deasserting
|
||||
\path{MemRstX} effectively takes the memory
|
||||
components out of their reset state, allowing them
|
||||
to begin normal operation. The system then waits
|
||||
for an additional 500 microseconds to ensure that
|
||||
|
@ -1828,12 +1875,12 @@ Thanks, I guess ? (TODO)
|
|||
|
||||
\item \textbf{Assert clock enable (CKE)}: The next
|
||||
step involves asserting the clock enable signal, known as
|
||||
`CKE`, by setting the \texttt{AssertCke} bit in the
|
||||
\texttt{D18F2x7C\_dct} register to 1. The \texttt{CKE}
|
||||
`CKE`, by setting the \path{AssertCke} bit in the
|
||||
\path{D18F2x7C_dct} register to 1. The \path{CKE}
|
||||
signal is critical because it enables the clocking
|
||||
of the DRAM modules, allowing them to synchronize
|
||||
with the memory controller. The system must wait
|
||||
for 360 nanoseconds after asserting \texttt{CKE}
|
||||
for 360 nanoseconds after asserting \path{CKE}
|
||||
to ensure that the clocking is correctly established.
|
||||
|
||||
\item \textbf{Registered DIMMs and LRDIMMs initialization}:
|
||||
|
@ -1843,8 +1890,8 @@ Thanks, I guess ? (TODO)
|
|||
buffering mechanisms that reduce electrical loading
|
||||
and improve signal integrity, especially in systems
|
||||
with multiple memory modules. During initialization,
|
||||
the BIOS programs the \texttt{ParEn} bit in the
|
||||
\texttt{D18F2x90\_dct} register based on whether
|
||||
the BIOS programs the \path{ParEn} bit in the
|
||||
\path{D18F2x90_dct} register based on whether
|
||||
the DIMM is buffered or unbuffered. For RDIMMs,
|
||||
specific Register Control (RC) commands, such as RC0
|
||||
through RC7, are sent to initialize the memory module's
|
||||
|
@ -1885,8 +1932,8 @@ Thanks, I guess ? (TODO)
|
|||
process compensates for variations due to manufacturing
|
||||
differences, voltage fluctuations, and temperature
|
||||
changes. To send a ZQCL command, the BIOS programs the
|
||||
\texttt{SendZQCmd} bit in the \texttt{D18F2x7C\_dct}
|
||||
register to 1 and sets the \texttt{MrsAddress[10]} bit to 1,
|
||||
\path{SendZQCmd} bit in the \path{D18F2x7C_dct}
|
||||
register to 1 and sets the \path{MrsAddress[10]} bit to 1,
|
||||
indicating that the ZQCL command should be sent to the
|
||||
memory module.
|
||||
|
||||
|
@ -1902,8 +1949,8 @@ Thanks, I guess ? (TODO)
|
|||
|
||||
\item \textbf{Finalization of initialization}: Once the
|
||||
ZQ calibration is complete, the BIOS deactivates the DRAM
|
||||
initialization process by setting the \texttt{EnDramInit}
|
||||
bit in the \texttt{D18F2x7C\_dct} register to 0. For
|
||||
initialization process by setting the \path{EnDramInit}
|
||||
bit in the \path{D18F2x7C_dct} register to 0. For
|
||||
LRDIMMs, additional configuration steps are required to
|
||||
finalize the initialization process. These steps include
|
||||
programming the DCT registers to monitor for errors and
|
||||
|
@ -1958,11 +2005,11 @@ Thanks, I guess ? (TODO)
|
|||
(see fig. \ref{fig:ddr3_state_machine}). Mode registers in DDR3
|
||||
memory are used to configure various operational parameters such
|
||||
as latency settings, burst length, and write leveling. One of
|
||||
the key mode registers is \texttt{MR1\_dct}, which is specific to
|
||||
the key mode registers is \path{MR1_dct}, which is specific to
|
||||
DDR3 and controls certain features of the memory module,
|
||||
including write leveling. \texttt{MR1\_dct} is used to enable or
|
||||
including write leveling. \path{MR1_dct} is used to enable or
|
||||
disable specific functions such as write leveling and output
|
||||
driver settings. The \texttt{dct} suffix refers to the Data
|
||||
driver settings. The \path{dct} suffix refers to the Data
|
||||
Control Timing that is specific to this register's function in
|
||||
managing the timing and control of data operations within the
|
||||
memory module. For RDIMMs, a 4-rank module is treated as two
|
||||
|
@ -1980,16 +2027,16 @@ Thanks, I guess ? (TODO)
|
|||
\begin{itemize}
|
||||
\item For the first rank (target):
|
||||
\begin{itemize}
|
||||
\item Set \texttt{MR1\_dct[1:0][Level] = 1}
|
||||
\item Set \path{MR1_dct[1:0][Level] = 1}
|
||||
to enable write leveling.
|
||||
\item Set \texttt{MR1\_dct[1:0][Qoff] = 0}
|
||||
\item Set \path{MR1_dct[1:0][Qoff] = 0}
|
||||
to ensure the output drivers are active.
|
||||
\end{itemize}
|
||||
\item For other ranks:
|
||||
\begin{itemize}
|
||||
\item Set \texttt{MR1\_dct[1:0][Level] = 1}
|
||||
\item Set \path{MR1_dct[1:0][Level] = 1}
|
||||
to prepare for write leveling.
|
||||
\item Set \texttt{MR1\_dct[1:0][Qoff] = 1}
|
||||
\item Set \path{MR1_dct[1:0][Qoff] = 1}
|
||||
to deactivate the output drivers for
|
||||
ranks that are not currently being
|
||||
leveled.
|
||||
|
@ -1998,8 +2045,8 @@ Thanks, I guess ? (TODO)
|
|||
or if there is one DIMM per three channels:
|
||||
\begin{itemize}
|
||||
\item Program the target rank’s
|
||||
\texttt{RttNom} (nominal termination
|
||||
resistance value) for \texttt{RttWr}
|
||||
\path{RttNom} (nominal termination
|
||||
resistance value) for \path{RttWr}
|
||||
termination, which helps in managing signal
|
||||
integrity during the write process by
|
||||
ensuring the correct impedance matching.
|
||||
|
@ -2010,7 +2057,7 @@ Thanks, I guess ? (TODO)
|
|||
operation:}
|
||||
\begin{itemize}
|
||||
\item After the initial configuration, the
|
||||
\texttt{RttNom} values for the non-target ranks
|
||||
\path{RttNom} values for the non-target ranks
|
||||
are set to their normal operating states.
|
||||
\item A wait time of 40 MEMCLKs is observed to
|
||||
ensure the configuration settings are stable
|
||||
|
@ -2049,8 +2096,8 @@ Thanks, I guess ? (TODO)
|
|||
\item \textbf{Step 6: Program the DIMM to normal operation:}
|
||||
\begin{itemize}
|
||||
\item Finally, the DIMM is reprogrammed to its
|
||||
normal operational state, resetting \texttt{Qoff}
|
||||
and \texttt{Level} to \texttt{0} to conclude the
|
||||
normal operational state, resetting \path{Qoff}
|
||||
and \path{Level} to \path{0} to conclude the
|
||||
write leveling process and return to standard
|
||||
operation.
|
||||
\end{itemize}
|
||||
|
@ -2078,18 +2125,21 @@ Thanks, I guess ? (TODO)
|
|||
as the system stabilizes.
|
||||
\end{itemize}
|
||||
|
||||
\section{Current implementation and potential improvements [WIP]}
|
||||
\subsection{Current implementation in coreboot on the KGPE-D16 [WIP]}
|
||||
\begin{itemize}
|
||||
\item Overview of the current DDR3 initialization process in
|
||||
\textit{coreboot} on the KGPE-D16
|
||||
\item Analysis of strengths and weaknesses of the current
|
||||
implementation
|
||||
\end{itemize}
|
||||
\section{Current implementation and potential improvements}
|
||||
|
||||
The process starts by calling the \texttt{fill\_mem\_ctrl}
|
||||
function from
|
||||
\path{src/northbridge/amd/amdfam10/raminit_sysinfo_in_ram.c}.
|
||||
\subsection{Current implementation in coreboot on the KGPE-D16}
|
||||
|
||||
In this part as for the rest of this document, we're basing our
|
||||
study on the 4.11 version of \textit{coreboot} \cite{coreboot_4_11},
|
||||
which is the last version that supported the ASUS KGPE-D16
|
||||
mainboard. \\
|
||||
|
||||
The process starts in
|
||||
\path{src/mainboard/asus/kgpe-d16/romstage.c}, in the
|
||||
\path{cache_as_ram_main} function by calling
|
||||
\path{fill_mem_ctrl} from
|
||||
\path{src/northbridge/amd/amdfam10/raminit_sysinfo_in_ram.c}
|
||||
(lst. \ref{lst:fill_mem_ctrl}).
|
||||
At this current step, only the BSC is running the firmware code.
|
||||
This function iterates over all memory controllers (one per
|
||||
node) and initializes their corresponding structures with the
|
||||
|
@ -2097,19 +2147,32 @@ Thanks, I guess ? (TODO)
|
|||
the addresses of PCI nodes (important for DMA operations) and
|
||||
SPD addresses, which are internal ROMs in each memory slot
|
||||
containing crucial information for detecting and initializing
|
||||
memory modules. If successful, the system posts codes
|
||||
\texttt{0x3D} and then \texttt{0x40}. The
|
||||
\texttt{raminit\_amdmct} function from
|
||||
\path{src/northbridge/amd/amdfam10/raminit\_amdmct.c} is then
|
||||
called. This function, in turn, calls \texttt{mctAutoInitMCT\_D}
|
||||
from \path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c},
|
||||
memory modules. \\
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdfam10_raminit_sysinfo_in_ram.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
\protect\path{fill_mem_ctrl()}, extract from
|
||||
\protect\path{src/northbridge/amd/amdfam10/raminit_sysinfo_in_ram.c}}
|
||||
\label{lst:fill_mem_ctrl}
|
||||
\end{listing}
|
||||
|
||||
If successful, the system posts codes \path{0x3D} and then
|
||||
\path{0x40}. The \path{raminit_amdmct} function from
|
||||
\path{src/northbridge/amd/amdfam10/raminit_amdmct.c} is then
|
||||
called. This function, in turn, calls \path{mctAutoInitMCT_D}
|
||||
(lst. \ref{lst:mctAutoInitMCT_D_1}) from
|
||||
\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c},
|
||||
which is responsible for the initial memory initialization,
|
||||
predominantly written by Raptor Engineering.
|
||||
|
||||
At this stage, it is assumed that memory has been pre-mapped
|
||||
contiguously from address 0 to 4GB and that the previous code
|
||||
has correctly mapped non-cacheable I/O areas below 4GB for the
|
||||
PCI bus and Local APIC access for processor cores.
|
||||
PCI bus and Local APIC access for processor cores. \\
|
||||
|
||||
The following prerequisites must be in place from the previous
|
||||
steps:
|
||||
|
@ -2125,13 +2188,25 @@ Thanks, I guess ? (TODO)
|
|||
verified with checksums.
|
||||
\end{itemize}
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_1.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
Beginning of
|
||||
\protect\path{mctAutoInitMCT_D()}, extract from
|
||||
\protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}}
|
||||
\label{lst:mctAutoInitMCT_D_1}
|
||||
\end{listing}
|
||||
|
||||
The memory controller for the BSP is queried to check if it can
|
||||
manage ECC memory, which is a type of memory that includes
|
||||
error-correcting code to detect and correct common types of data
|
||||
corruption.
|
||||
corruption (lst. \ref{lst:mctAutoInitMCT_D_2}).
|
||||
|
||||
For each node available in the system, the memory controllers
|
||||
are identified and initialized using a \texttt{DCTStatStruc}
|
||||
are identified and initialized using a \path{DCTStatStruc}
|
||||
structure defined in
|
||||
\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.h}. This
|
||||
structure contains all necessary fields for managing a memory
|
||||
|
@ -2140,35 +2215,133 @@ Thanks, I guess ? (TODO)
|
|||
\begin{itemize}
|
||||
\item Retrieving the corresponding field in the sysinfo
|
||||
structure for the node.
|
||||
\item Clearing fields with \texttt{zero}.
|
||||
\item Clearing fields with \path{zero}.
|
||||
\item Initializing basic fields.
|
||||
\item Initializing the controller linked to the current node.
|
||||
\item Verifying the presence of the node (checking if the
|
||||
processor associated with this controller is present).
|
||||
If yes, the SMBus is informed.
|
||||
\item Pre-initializing the memory module controller for this
|
||||
node using \texttt{mct\_preInitDCT}.
|
||||
node using \path{mct_preInitDCT}.
|
||||
\end{itemize}
|
||||
|
||||
The memory modules must be initialized. All modules present on
|
||||
valid nodes are configured with 1.5V voltage
|
||||
(lst. \ref{lst:mctAutoInitMCT_D_3}). \\
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_2.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
DIMM initialization in
|
||||
\protect\path{mctAutoInitMCT_D()}, extract from
|
||||
\protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}}
|
||||
\label{lst:mctAutoInitMCT_D_2}
|
||||
\end{listing}
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_3.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
Voltage control in
|
||||
\protect\path{mctAutoInitMCT_D()}, extract from
|
||||
\protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}}
|
||||
\label{lst:mctAutoInitMCT_D_3}
|
||||
\end{listing}
|
||||
|
||||
Now, present memory modules are detected using \path{mct_initDCT}
|
||||
(lst. \ref{lst:mctAutoInitMCT_D_4}). The memory modules existence
|
||||
is checked and the machine halts immediately after displaying a
|
||||
message if there is no memory.
|
||||
\textit{coreboot} waits for all modules to be available using
|
||||
\path{SyncDCTsReady_D}. \\
|
||||
|
||||
The firmware maps the physical memory address ranges into the
|
||||
address space with \path{HTMemMapInit_D} as contiguously as possible
|
||||
while also constructing the physical memory map. If there is an
|
||||
area occupied by something else, it is ignored, and a memory hole is
|
||||
created. \\
|
||||
|
||||
Mapping the address ranges into the cache is done with
|
||||
\path{CPUMemTyping_D} either as WriteBack (cacheable) or
|
||||
Uncacheable, depending on whether the area corresponds to physical
|
||||
memory or a memory hole. \\
|
||||
|
||||
The external northbridge is notified of this new memory
|
||||
configuration. \\
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_fixme.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
\protect\path{mctAutoInitMCT_D()} does not allow restoring
|
||||
previous training values, extract from
|
||||
\protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}}
|
||||
\label{lst:mctAutoInitMCT_D_fixme}
|
||||
\end{listing}
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_4.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
Preparing SMBus, DCTs and NB in
|
||||
\protect\path{mctAutoInitMCT_D()}
|
||||
\protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}}
|
||||
\label{lst:mctAutoInitMCT_D_4}
|
||||
\end{listing}
|
||||
|
||||
The \textit{coreboot} code compensates for the delay between DQS
|
||||
and DQ signals, as well as between CMD and DQ. This is handled in
|
||||
the \texttt{DQSTiming\_D} function.
|
||||
and DQ signals, as well as between CMD and DQ. This is handled by
|
||||
the \path{DQSTiming_D} function (lst. \ref{lst:mctAutoInitMCT_D_5}).
|
||||
The initialization can be done again if needed after that, otherwise
|
||||
the channels and nodes are interleaved and ECC is enabled (if
|
||||
supported by every module). \\
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_5.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
Get DQS, reset and activate ECC in
|
||||
\protect\path{mctAutoInitMCT_D()}
|
||||
\protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}}
|
||||
\label{lst:mctAutoInitMCT_D_5}
|
||||
\end{listing}
|
||||
|
||||
After that being done, the DRAM can be mapped into the address
|
||||
space with cacheability, and the init process finishes with
|
||||
validation of every populated DCT node
|
||||
(lst. \ref{lst:mctAutoInitMCT_D_6}). \\
|
||||
|
||||
\begin{listing}
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{
|
||||
listings/src_northbridge_amd_amdmct_mct_ddr3_mct_d_6.c}
|
||||
\end{adjustwidth}
|
||||
\caption{
|
||||
Mapping DRAM with cache, validating DCT nodes
|
||||
and finishing the init process in
|
||||
\protect\path{mctAutoInitMCT_D()}
|
||||
\protect\path{src/northbridge/amd/amdmct/mct_ddr3/mct_d.c}}
|
||||
\label{lst:mctAutoInitMCT_D_6}
|
||||
\end{listing}
|
||||
|
||||
Finally, if the RAM is of the ECC type, error-correcting codes
|
||||
are enabled, and the function ends by activating power-saving
|
||||
features if requested by the user.
|
||||
features if requested by the user. \\
|
||||
|
||||
TODO (continue notes from PROJET)
|
||||
\subsubsection{Details on the DQS training implementation [WIP]}
|
||||
|
||||
\begin{listing}[H]
|
||||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{c}{listings/test.c}
|
||||
\end{adjustwidth}
|
||||
\caption{\textit{Example C code}}
|
||||
\label{lst:c_code}
|
||||
\end{listing}
|
||||
|
||||
We saw that in (lst. \ref{lst:c_code}).
|
||||
TODO study \path{DQSTiming_D} \\
|
||||
|
||||
\subsection{Potential enhancements [WIP]}
|
||||
\begin{itemize}
|
||||
|
@ -2180,6 +2353,23 @@ Thanks, I guess ? (TODO)
|
|||
systems using \textit{coreboot}
|
||||
\end{itemize}
|
||||
|
||||
FIXME (lst. \ref{lst:mctAutoInitMCT_D_fixme}) \\
|
||||
|
||||
It seems that that seeds for used for DQS training should be
|
||||
extensively determined for each motherboard, and the BKDG
|
||||
\cite{BKDG} does not tell otherwise. Moreover, seeds can be
|
||||
configured uniquely for every possible socket, channel, DIMM module,
|
||||
and even byte lane combination. The current implementation of
|
||||
\path{DQSTiming_D} code is only using the recommended seeds from
|
||||
the table 99 of the BKDG \cite{BKDG}, which is not sufficient
|
||||
and absolutely not adapted to every DIMM module in the market. \\
|
||||
|
||||
See \path{TrainDQSRdWrPos_D_Fam15} in
|
||||
\path{src/drivers/amd/amdmct/mct/mct_ddr3/mctdqs_d.c} : allowed
|
||||
to have negative DQS ("Attempting to continue but your system may
|
||||
be unstable"). This kind of value should be discarded and
|
||||
calculation done again. \\
|
||||
|
||||
% ------------------------------------------------------------------------------
|
||||
% CHAPTER 5: Virtualization of the operating system through firmware abstraction
|
||||
% ------------------------------------------------------------------------------
|
||||
|
@ -2231,7 +2421,7 @@ Thanks, I guess ? (TODO)
|
|||
\begin{adjustwidth}{0.5cm}{0.5cm}
|
||||
\inputminted{sh}{listings/acpica_size.sh}
|
||||
\end{adjustwidth}
|
||||
\caption{\textit{How to estimate the impact of ACPICA in Linux}}
|
||||
\caption{How to estimate the impact of ACPICA in Linux}
|
||||
\label{lst:acpica_in_linux}
|
||||
\end{listing}
|
||||
|
||||
|
@ -2517,7 +2707,7 @@ Thanks, I guess ? (TODO)
|
|||
|
||||
\bigskip
|
||||
|
||||
\texttt{<https://fsf.org/>}
|
||||
\path{<https://fsf.org/>}
|
||||
|
||||
\bigskip
|
||||
|
||||
|
@ -2952,7 +3142,7 @@ The Free Software Foundation may publish new, revised versions
|
|||
of the GNU Free Documentation License from time to time. Such new
|
||||
versions will be similar in spirit to the present version, but may
|
||||
differ in detail to address new problems or concerns. See
|
||||
\texttt{https://www.gnu.org/licenses/}.
|
||||
\path{https://www.gnu.org/licenses/}.
|
||||
|
||||
Each version of the License is given a distinguishing version number.
|
||||
If the Document specifies that a particular numbered version of this
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
\contentsline {section}{\numberline {2.4}Baseboard Management Controller}{19}{section.2.4}%
|
||||
\contentsline {chapter}{\numberline {3}Key components in modern firmware}{21}{chapter.3}%
|
||||
\contentsline {section}{\numberline {3.1}General structure of coreboot}{21}{section.3.1}%
|
||||
\contentsline {subsection}{\numberline {3.1.1}Bootblock stage}{22}{subsection.3.1.1}%
|
||||
\contentsline {subsection}{\numberline {3.1.1}Bootblock}{22}{subsection.3.1.1}%
|
||||
\contentsline {subsection}{\numberline {3.1.2}Romstage}{24}{subsection.3.1.2}%
|
||||
\contentsline {subsection}{\numberline {3.1.3}Ramstage}{25}{subsection.3.1.3}%
|
||||
\contentsline {subsubsection}{\numberline {3.1.3.1}Advanced Configuration and Power Interface}{25}{subsubsection.3.1.3.1}%
|
||||
|
@ -28,25 +28,26 @@
|
|||
\contentsline {chapter}{\numberline {4}Memory initialization and training}{29}{chapter.4}%
|
||||
\contentsline {section}{\numberline {4.1}Importance of DDR3 Memory Initialization}{29}{section.4.1}%
|
||||
\contentsline {subsection}{\numberline {4.1.1}General steps for DDR3 configuration}{30}{subsection.4.1.1}%
|
||||
\contentsline {section}{\numberline {4.2}Memory initialization techniques}{32}{section.4.2}%
|
||||
\contentsline {subsection}{\numberline {4.2.1}Memory training algorithms}{32}{subsection.4.2.1}%
|
||||
\contentsline {subsection}{\numberline {4.2.2}BIOS and Kernel Developer Guide (BKDG) recommendations}{33}{subsection.4.2.2}%
|
||||
\contentsline {subsubsection}{\numberline {4.2.2.1}DDR3 initialization procedure}{34}{subsubsection.4.2.2.1}%
|
||||
\contentsline {subsubsection}{\numberline {4.2.2.2}ZQ calibration process}{34}{subsubsection.4.2.2.2}%
|
||||
\contentsline {subsubsection}{\numberline {4.2.2.3}Write leveling process}{35}{subsubsection.4.2.2.3}%
|
||||
\contentsline {section}{\numberline {4.3}Current implementation and potential improvements [WIP]}{36}{section.4.3}%
|
||||
\contentsline {subsection}{\numberline {4.3.1}Current implementation in coreboot on the KGPE-D16 [WIP]}{36}{subsection.4.3.1}%
|
||||
\contentsline {subsection}{\numberline {4.3.2}Potential enhancements [WIP]}{37}{subsection.4.3.2}%
|
||||
\contentsline {chapter}{\numberline {5}Virtualization of the operating system through firmware abstraction}{38}{chapter.5}%
|
||||
\contentsline {section}{\numberline {5.1}ACPI and abstraction of hardware control}{38}{section.5.1}%
|
||||
\contentsline {section}{\numberline {5.2}SMM as a hidden execution layer}{39}{section.5.2}%
|
||||
\contentsline {section}{\numberline {5.3}UEFI and persistence}{39}{section.5.3}%
|
||||
\contentsline {subsection}{\numberline {5.3.1}Memory Management}{40}{subsection.5.3.1}%
|
||||
\contentsline {subsection}{\numberline {5.3.2}File System Management}{40}{subsection.5.3.2}%
|
||||
\contentsline {subsection}{\numberline {5.3.3}Device Drivers}{40}{subsection.5.3.3}%
|
||||
\contentsline {subsection}{\numberline {5.3.4}Power Management}{40}{subsection.5.3.4}%
|
||||
\contentsline {section}{\numberline {5.4}Intel and AMD: control beyond the OS}{40}{section.5.4}%
|
||||
\contentsline {section}{\numberline {5.5}The OS as a virtualized environment}{41}{section.5.5}%
|
||||
\contentsline {chapter}{Conclusion}{42}{chapter*.4}%
|
||||
\contentsline {chapter}{Bibliography}{43}{chapter*.4}%
|
||||
\contentsline {chapter}{GNU Free Documentation License}{50}{chapter*.6}%
|
||||
\contentsline {section}{\numberline {4.2}Memory initialization techniques}{33}{section.4.2}%
|
||||
\contentsline {subsection}{\numberline {4.2.1}Memory training algorithms}{33}{subsection.4.2.1}%
|
||||
\contentsline {subsection}{\numberline {4.2.2}BIOS and Kernel Developer Guide (BKDG) recommendations}{34}{subsection.4.2.2}%
|
||||
\contentsline {subsubsection}{\numberline {4.2.2.1}DDR3 initialization procedure}{35}{subsubsection.4.2.2.1}%
|
||||
\contentsline {subsubsection}{\numberline {4.2.2.2}ZQ calibration process}{35}{subsubsection.4.2.2.2}%
|
||||
\contentsline {subsubsection}{\numberline {4.2.2.3}Write leveling process}{36}{subsubsection.4.2.2.3}%
|
||||
\contentsline {section}{\numberline {4.3}Current implementation and potential improvements}{37}{section.4.3}%
|
||||
\contentsline {subsection}{\numberline {4.3.1}Current implementation in coreboot on the KGPE-D16}{37}{subsection.4.3.1}%
|
||||
\contentsline {subsubsection}{\numberline {4.3.1.1}Details on the DQS training implementation [WIP]}{47}{subsubsection.4.3.1.1}%
|
||||
\contentsline {subsection}{\numberline {4.3.2}Potential enhancements [WIP]}{47}{subsection.4.3.2}%
|
||||
\contentsline {chapter}{\numberline {5}Virtualization of the operating system through firmware abstraction}{48}{chapter.5}%
|
||||
\contentsline {section}{\numberline {5.1}ACPI and abstraction of hardware control}{48}{section.5.1}%
|
||||
\contentsline {section}{\numberline {5.2}SMM as a hidden execution layer}{49}{section.5.2}%
|
||||
\contentsline {section}{\numberline {5.3}UEFI and persistence}{49}{section.5.3}%
|
||||
\contentsline {subsection}{\numberline {5.3.1}Memory Management}{50}{subsection.5.3.1}%
|
||||
\contentsline {subsection}{\numberline {5.3.2}File System Management}{50}{subsection.5.3.2}%
|
||||
\contentsline {subsection}{\numberline {5.3.3}Device Drivers}{50}{subsection.5.3.3}%
|
||||
\contentsline {subsection}{\numberline {5.3.4}Power Management}{50}{subsection.5.3.4}%
|
||||
\contentsline {section}{\numberline {5.4}Intel and AMD: control beyond the OS}{50}{section.5.4}%
|
||||
\contentsline {section}{\numberline {5.5}The OS as a virtualized environment}{51}{section.5.5}%
|
||||
\contentsline {chapter}{Conclusion}{52}{chapter*.4}%
|
||||
\contentsline {chapter}{Bibliography}{53}{chapter*.4}%
|
||||
\contentsline {chapter}{GNU Free Documentation License}{60}{chapter*.6}%
|
||||
|
|
|
@ -1,67 +1,29 @@
|
|||
#include <device/pci_ops.h>
|
||||
#include <northbridge/amd/amdfam10/raminit.h>
|
||||
#include <northbridge/amd/amdfam10/amdfam10.h>
|
||||
#include <delay.h>
|
||||
|
||||
static void set_htic_bit(u8 i, u32 val, u8 bit)
|
||||
void fill_mem_ctrl(u32 controllers,
|
||||
struct mem_controller *ctrl_a,
|
||||
const u8 *spd_addr)
|
||||
{
|
||||
u32 dword;
|
||||
dword = pci_read_config32(NODE_PCI(i, 0), HT_INIT_CONTROL);
|
||||
dword &= ~(1<<bit);
|
||||
dword |= ((val & 1) <<bit);
|
||||
pci_write_config32(NODE_PCI(i, 0), HT_INIT_CONTROL, dword);
|
||||
}
|
||||
|
||||
#ifdef UNUSED_CODE
|
||||
static u32 get_htic_bit(u8 i, u8 bit)
|
||||
{
|
||||
u32 dword;
|
||||
dword = pci_read_config32(NODE_PCI(i, 0), HT_INIT_CONTROL);
|
||||
dword &= (1<<bit);
|
||||
return dword;
|
||||
}
|
||||
|
||||
static void wait_till_sysinfo_in_ram(void)
|
||||
{
|
||||
while (1) {
|
||||
/* give the NB a break, many CPUs spinning on one bit makes a
|
||||
* lot of traffic and time is not too important to APs.
|
||||
*/
|
||||
udelay(1000);
|
||||
if (get_htic_bit(0, 9)) return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void fill_mem_ctrl(u32 controllers, struct mem_controller *ctrl_a, const u8 *spd_addr)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
int index = 0;
|
||||
struct mem_controller *ctrl;
|
||||
for (i = 0; i < controllers; i++) {
|
||||
ctrl = &ctrl_a[i];
|
||||
ctrl->node_id = i;
|
||||
ctrl->f0 = NODE_PCI(i, 0);
|
||||
ctrl->f1 = NODE_PCI(i, 1);
|
||||
ctrl->f2 = NODE_PCI(i, 2);
|
||||
ctrl->f3 = NODE_PCI(i, 3);
|
||||
ctrl->f4 = NODE_PCI(i, 4);
|
||||
ctrl->f5 = NODE_PCI(i, 5);
|
||||
|
||||
if (spd_addr == (void *)0) continue;
|
||||
|
||||
ctrl->spd_switch_addr = spd_addr[index++];
|
||||
|
||||
for (j = 0; j < 8; j++) {
|
||||
ctrl->spd_addr[j] = spd_addr[index++];
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void set_sysinfo_in_ram(u32 val)
|
||||
{
|
||||
set_htic_bit(0, val, 9);
|
||||
int i;
|
||||
int j;
|
||||
int index = 0;
|
||||
struct mem_controller *ctrl;
|
||||
for (i = 0; i < controllers; i++) {
|
||||
ctrl = &ctrl_a[i];
|
||||
ctrl->node_id = i;
|
||||
ctrl->f0 = NODE_PCI(i, 0);
|
||||
ctrl->f1 = NODE_PCI(i, 1);
|
||||
ctrl->f2 = NODE_PCI(i, 2);
|
||||
ctrl->f3 = NODE_PCI(i, 3);
|
||||
ctrl->f4 = NODE_PCI(i, 4);
|
||||
ctrl->f5 = NODE_PCI(i, 5);
|
||||
|
||||
if (spd_addr == (void *)0) continue;
|
||||
|
||||
ctrl->spd_switch_addr = spd_addr[index++];
|
||||
|
||||
for (j = 0; j < 8; j++) {
|
||||
ctrl->spd_addr[j] = spd_addr[index++];
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,332 +0,0 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
/*
|
||||
* Memory may be mapped contiguously all the way up to 4GB (depending on setup
|
||||
* options). It is the responsibility of PCI subsystem to create an uncacheable
|
||||
* IO region below 4GB and to adjust TOP_MEM downward prior to any IO mapping or
|
||||
* accesses. It is the same responsibility of the CPU sub-system prior to
|
||||
* accessing LAPIC.
|
||||
*
|
||||
* Slot Number is an external convention, and is determined by OEM with accompanying
|
||||
* silk screening. OEM may choose to use Slot number convention which is consistent
|
||||
* with DIMM number conventions. All AMD engineering platforms do.
|
||||
*
|
||||
* Build Requirements:
|
||||
* 1. MCT_SEG0_START and MCT_SEG0_END macros to begin and end the code segment,
|
||||
* defined in mcti.inc.
|
||||
*
|
||||
* Run-Time Requirements:
|
||||
* 1. Complete Hypertransport Bus Configuration
|
||||
* 2. SMBus Controller Initialized
|
||||
* 1. BSP in Big Real Mode
|
||||
* 2. Stack at SS:SP, located somewhere between A000:0000 and F000:FFFF
|
||||
* 3. Checksummed or Valid NVRAM bits
|
||||
* 4. MCG_CTL = -1, MC4_CTL_EN = 0 for all CPUs
|
||||
* 5. MCi_STS from shutdown/warm reset recorded (if desired) prior to entry
|
||||
* 6. All var MTRRs reset to zero
|
||||
* 7. State of NB_CFG.DisDatMsk set properly on all CPUs
|
||||
* 8. All CPUs at 2GHz Speed (unless DQS training is not installed).
|
||||
* 9. All cHT links at max Speed/Width (unless DQS training is not installed).
|
||||
*
|
||||
*
|
||||
* Global relationship between index values and item values:
|
||||
*
|
||||
* pDCTstat.CASL pDCTstat.Speed
|
||||
* j CL(j) k F(k)
|
||||
* --------------------------
|
||||
* 0 2.0 - -
|
||||
* 1 3.0 1 200 MHz
|
||||
* 2 4.0 2 266 MHz
|
||||
* 3 5.0 3 333 MHz
|
||||
* 4 6.0 4 400 MHz
|
||||
* 5 7.0 5 533 MHz
|
||||
* 6 8.0 6 667 MHz
|
||||
* 7 9.0 7 800 MHz
|
||||
*/
|
||||
u8 Node, NodesWmem;
|
||||
u32 node_sys_base;
|
||||
uint8_t dimm;
|
||||
uint8_t nvram;
|
||||
uint8_t enable_cc6;
|
||||
uint8_t ecc_enabled;
|
||||
uint8_t allow_config_restore;
|
||||
|
||||
uint8_t s3resume = acpi_is_wakeup_s3();
|
||||
|
||||
restartinit:
|
||||
|
||||
if (!mctGet_NVbits(NV_ECC_CAP) || !mctGet_NVbits(NV_ECC))
|
||||
pMCTstat->try_ecc = 0;
|
||||
else
|
||||
pMCTstat->try_ecc = 1;
|
||||
|
||||
mctInitMemGPIOs_A_D(); /* Set any required GPIOs*/
|
||||
if (s3resume) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_En_Fam15\n");
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
mct_ForceNBPState0_En_Fam15(pMCTstat, pDCTstat);
|
||||
}
|
||||
|
||||
#if CONFIG(HAVE_ACPI_RESUME)
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: Restoring DCT configuration from NVRAM\n");
|
||||
if (restore_mct_information_from_nvram(0) != 0)
|
||||
printk(BIOS_CRIT, "%s: ERROR: Unable to restore DCT configuration from NVRAM\n", __func__);
|
||||
pMCTstat->GStatus |= 1 << GSB_ConfigRestored;
|
||||
#endif
|
||||
|
||||
if (is_fam15h()) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
NodesWmem = 0;
|
||||
node_sys_base = 0;
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
/* Zero out data structures to avoid false detection of DIMMs */
|
||||
memset(pDCTstat, 0, sizeof(struct DCTStatStruc));
|
||||
|
||||
/* Initialize data structures */
|
||||
pDCTstat->Node_ID = Node;
|
||||
pDCTstat->dev_host = PA_HOST(Node);
|
||||
pDCTstat->dev_map = PA_MAP(Node);
|
||||
pDCTstat->dev_dct = PA_DCT(Node);
|
||||
pDCTstat->dev_nbmisc = PA_NBMISC(Node);
|
||||
pDCTstat->dev_link = PA_LINK(Node);
|
||||
pDCTstat->dev_nbctl = PA_NBCTL(Node);
|
||||
pDCTstat->NodeSysBase = node_sys_base;
|
||||
|
||||
if (mctGet_NVbits(NV_PACK_TYPE) == PT_GR) {
|
||||
uint32_t dword;
|
||||
pDCTstat->Dual_Node_Package = 1;
|
||||
|
||||
/* Get the internal node number */
|
||||
dword = Get_NB32(pDCTstat->dev_nbmisc, 0xe8);
|
||||
dword = (dword >> 30) & 0x3;
|
||||
pDCTstat->Internal_Node_ID = dword;
|
||||
} else {
|
||||
pDCTstat->Dual_Node_Package = 0;
|
||||
}
|
||||
|
||||
printk(BIOS_DEBUG, "%s: mct_init Node %d\n", __func__, Node);
|
||||
mct_init(pMCTstat, pDCTstat);
|
||||
mctNodeIDDebugPort_D();
|
||||
pDCTstat->NodePresent = NodePresent_D(Node);
|
||||
if (pDCTstat->NodePresent) {
|
||||
pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node);
|
||||
|
||||
printk(BIOS_DEBUG, "%s: mct_InitialMCT_D\n", __func__);
|
||||
mct_InitialMCT_D(pMCTstat, pDCTstat);
|
||||
|
||||
printk(BIOS_DEBUG, "%s: mctSMBhub_Init\n", __func__);
|
||||
mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node */
|
||||
|
||||
printk(BIOS_DEBUG, "%s: mct_preInitDCT\n", __func__);
|
||||
mct_preInitDCT(pMCTstat, pDCTstat);
|
||||
}
|
||||
node_sys_base = pDCTstat->NodeSysBase;
|
||||
node_sys_base += (pDCTstat->NodeSysLimit + 2) & ~0x0F;
|
||||
}
|
||||
|
||||
/* If the boot fails make sure training is attempted after reset */
|
||||
nvram = 0;
|
||||
set_option("allow_spd_nvram_cache_restore", &nvram);
|
||||
|
||||
#if CONFIG(DIMM_VOLTAGE_SET_SUPPORT)
|
||||
printk(BIOS_DEBUG, "%s: DIMMSetVoltage\n", __func__);
|
||||
DIMMSetVoltages(pMCTstat, pDCTstatA); /* Set the DIMM voltages (mainboard specific) */
|
||||
#endif
|
||||
if (!CONFIG(DIMM_VOLTAGE_SET_SUPPORT)) {
|
||||
/* Assume 1.5V operation */
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (!pDCTstat->NodePresent)
|
||||
continue;
|
||||
|
||||
for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm++) {
|
||||
if (pDCTstat->DIMMValid & (1 << dimm))
|
||||
pDCTstat->DimmConfiguredVoltage[dimm] = 0x1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If DIMM configuration has not changed since last boot restore training values */
|
||||
allow_config_restore = 1;
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (pDCTstat->NodePresent)
|
||||
if (!pDCTstat->spd_data.nvram_spd_match)
|
||||
allow_config_restore = 0;
|
||||
}
|
||||
|
||||
/* FIXME
|
||||
* Stability issues have arisen on multiple Family 15h systems
|
||||
* when configuration restoration is enabled. In all cases these
|
||||
* stability issues resolved by allowing the RAM to go through a
|
||||
* full training cycle.
|
||||
*
|
||||
* Debug and reenable this!
|
||||
*/
|
||||
allow_config_restore = 0;
|
||||
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (pDCTstat->NodePresent) {
|
||||
printk(BIOS_DEBUG, "%s: mctSMBhub_Init\n", __func__);
|
||||
mctSMBhub_Init(Node); /* Switch SMBUS crossbar to proper node*/
|
||||
|
||||
printk(BIOS_DEBUG, "%s: mct_initDCT\n", __func__);
|
||||
mct_initDCT(pMCTstat, pDCTstat);
|
||||
if (pDCTstat->ErrCode == SC_FatalErr) {
|
||||
goto fatalexit; /* any fatal errors?*/
|
||||
} else if (pDCTstat->ErrCode < SC_StopError) {
|
||||
NodesWmem++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NodesWmem == 0) {
|
||||
printk(BIOS_ALERT, "Unable to detect valid memory on any nodes. Halting!\n");
|
||||
goto fatalexit;
|
||||
}
|
||||
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: SyncDCTsReady_D\n");
|
||||
SyncDCTsReady_D(pMCTstat, pDCTstatA); /* Make sure DCTs are ready for accesses.*/
|
||||
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: HTMemMapInit_D\n");
|
||||
HTMemMapInit_D(pMCTstat, pDCTstatA); /* Map local memory into system address space.*/
|
||||
mctHookAfterHTMap();
|
||||
|
||||
if (!is_fam15h()) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: CPUMemTyping_D\n");
|
||||
CPUMemTyping_D(pMCTstat, pDCTstatA); /* Map dram into WB/UC CPU cacheability */
|
||||
}
|
||||
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctHookAfterCPU\n");
|
||||
mctHookAfterCPU(); /* Setup external northbridge(s) */
|
||||
|
||||
/* FIXME
|
||||
* Previous training values should only be used if the current desired
|
||||
* speed is the same as the speed used in the previous boot.
|
||||
* How to get the desired speed at this point in the code?
|
||||
*/
|
||||
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: DQSTiming_D\n");
|
||||
DQSTiming_D(pMCTstat, pDCTstatA, allow_config_restore); /* Get Receiver Enable and DQS signal timing*/
|
||||
|
||||
if (!is_fam15h()) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: UMAMemTyping_D\n");
|
||||
UMAMemTyping_D(pMCTstat, pDCTstatA); /* Fix up for UMA sizing */
|
||||
}
|
||||
|
||||
if (!allow_config_restore) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: :OtherTiming\n");
|
||||
mct_OtherTiming(pMCTstat, pDCTstatA);
|
||||
}
|
||||
|
||||
if (ReconfigureDIMMspare_D(pMCTstat, pDCTstatA)) { /* RESET# if 1st pass of DIMM spare enabled*/
|
||||
goto restartinit;
|
||||
}
|
||||
|
||||
InterleaveNodes_D(pMCTstat, pDCTstatA);
|
||||
InterleaveChannels_D(pMCTstat, pDCTstatA);
|
||||
|
||||
ecc_enabled = 1;
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (pDCTstat->NodePresent)
|
||||
if (!is_ecc_enabled(pMCTstat, pDCTstat))
|
||||
ecc_enabled = 0;
|
||||
}
|
||||
|
||||
if (ecc_enabled) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: ECCInit_D\n");
|
||||
if (!ECCInit_D(pMCTstat, pDCTstatA)) { /* Setup ECC control and ECC check-bits*/
|
||||
/* Memory was not cleared during ECC setup */
|
||||
/* mctDoWarmResetMemClr_D(); */
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: MCTMemClr_D\n");
|
||||
MCTMemClr_D(pMCTstat,pDCTstatA);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_fam15h()) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: CPUMemTyping_D\n");
|
||||
CPUMemTyping_D(pMCTstat, pDCTstatA); /* Map dram into WB/UC CPU cacheability */
|
||||
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: UMAMemTyping_D\n");
|
||||
UMAMemTyping_D(pMCTstat, pDCTstatA); /* Fix up for UMA sizing */
|
||||
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_fam15h()) {
|
||||
enable_cc6 = 0;
|
||||
if (get_option(&nvram, "cpu_cc6_state") == CB_SUCCESS)
|
||||
enable_cc6 = !!nvram;
|
||||
|
||||
if (enable_cc6) {
|
||||
uint8_t num_nodes;
|
||||
|
||||
num_nodes = 0;
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (pDCTstat->NodePresent)
|
||||
num_nodes++;
|
||||
}
|
||||
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (pDCTstat->NodePresent)
|
||||
set_up_cc6_storage_fam15(pMCTstat, pDCTstat, num_nodes);
|
||||
}
|
||||
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (pDCTstat->NodePresent) {
|
||||
set_cc6_save_enable(pMCTstat, pDCTstat, 1);
|
||||
lock_dram_config(pMCTstat, pDCTstat);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mct_FinalMCT_D(pMCTstat, pDCTstatA);
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
fatalexit:
|
||||
die("mct_d: fatalexit");
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
/*
|
||||
* Memory may be mapped contiguously all the way up to 4GB
|
||||
* (depending on setup options). It is the responsibility of PCI
|
||||
* subsystem to create an uncacheable IO region below 4GB and to adjust
|
||||
* TOP_MEM downward prior to any IO mapping or accesses. It is the same
|
||||
* responsibility of the CPU sub-system prior to accessing LAPIC.
|
||||
*
|
||||
* Slot Number is an external convention, and is determined by OEM with
|
||||
* accompanying silk screening. OEM may choose to use Slot number
|
||||
* convention which is consistent with DIMM number conventions.
|
||||
* All AMD engineering platforms do.
|
||||
*
|
||||
* Build Requirements:
|
||||
* 1. MCT_SEG0_START and MCT_SEG0_END macros to begin and end the code
|
||||
* segment, defined in mcti.inc.
|
||||
*
|
||||
* Run-Time Requirements:
|
||||
* 1. Complete Hypertransport Bus Configuration
|
||||
* 2. SMBus Controller Initialized
|
||||
* 1. BSP in Big Real Mode
|
||||
* 2. Stack at SS:SP, located somewhere between A000:0000 and F000:FFFF
|
||||
* 3. Checksummed or Valid NVRAM bits
|
||||
* 4. MCG_CTL = -1, MC4_CTL_EN = 0 for all CPUs
|
||||
* 5. MCi_STS from shutdown/warm reset recorded (if desired) prior to entry
|
||||
* 6. All var MTRRs reset to zero
|
||||
* 7. State of NB_CFG.DisDatMsk set properly on all CPUs
|
||||
* 8. All CPUs at 2GHz Speed (unless DQS training is not installed).
|
||||
* 9. All cHT links at max Speed/Width (unless DQS training is not
|
||||
* installed).
|
||||
*
|
||||
* Global relationship between index values and item values:
|
||||
*
|
||||
* pDCTstat.CASL pDCTstat.Speed
|
||||
* j CL(j) k F(k)
|
||||
* --------------------------
|
||||
* 0 2.0 - -
|
||||
* 1 3.0 1 200 MHz
|
||||
* 2 4.0 2 266 MHz
|
||||
* 3 5.0 3 333 MHz
|
||||
* 4 6.0 4 400 MHz
|
||||
* 5 7.0 5 533 MHz
|
||||
* 6 8.0 6 667 MHz
|
||||
* 7 9.0 7 800 MHz
|
||||
*/
|
||||
[...]
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
[...]
|
||||
restartinit:
|
||||
if (!mctGet_NVbits(NV_ECC_CAP) || !mctGet_NVbits(NV_ECC))
|
||||
pMCTstat->try_ecc = 0;
|
||||
else
|
||||
pMCTstat->try_ecc = 1;
|
||||
[...]
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
/* Zero out data structures to avoid false detection of DIMMs */
|
||||
memset(pDCTstat, 0, sizeof(struct DCTStatStruc));
|
||||
/* Initialize data structures */
|
||||
pDCTstat->Node_ID = Node;
|
||||
pDCTstat->dev_host = PA_HOST(Node);
|
||||
pDCTstat->dev_map = PA_MAP(Node);
|
||||
pDCTstat->dev_dct = PA_DCT(Node);
|
||||
pDCTstat->dev_nbmisc = PA_NBMISC(Node);
|
||||
pDCTstat->dev_link = PA_LINK(Node);
|
||||
pDCTstat->dev_nbctl = PA_NBCTL(Node);
|
||||
pDCTstat->NodeSysBase = node_sys_base;
|
||||
if (mctGet_NVbits(NV_PACK_TYPE) == PT_GR) {
|
||||
uint32_t dword;
|
||||
pDCTstat->Dual_Node_Package = 1;
|
||||
/* Get the internal node number */
|
||||
dword = Get_NB32(pDCTstat->dev_nbmisc, 0xe8);
|
||||
dword = (dword >> 30) & 0x3;
|
||||
pDCTstat->Internal_Node_ID = dword;
|
||||
} else {
|
||||
pDCTstat->Dual_Node_Package = 0;
|
||||
}
|
||||
printk(BIOS_DEBUG, "%s: mct_init Node %d\n", __func__, Node);
|
||||
mct_init(pMCTstat, pDCTstat);
|
||||
mctNodeIDDebugPort_D();
|
||||
pDCTstat->NodePresent = NodePresent_D(Node);
|
||||
if (pDCTstat->NodePresent) {
|
||||
pDCTstat->LogicalCPUID = mctGetLogicalCPUID_D(Node);
|
||||
printk(BIOS_DEBUG, "%s: mct_InitialMCT_D\n", __func__);
|
||||
mct_InitialMCT_D(pMCTstat, pDCTstat);
|
||||
printk(BIOS_DEBUG, "%s: mctSMBhub_Init\n", __func__);
|
||||
/* Switch SMBUS crossbar to proper node */
|
||||
mctSMBhub_Init(Node);
|
||||
printk(BIOS_DEBUG, "%s: mct_preInitDCT\n", __func__);
|
||||
mct_preInitDCT(pMCTstat, pDCTstat);
|
||||
}
|
||||
node_sys_base = pDCTstat->NodeSysBase;
|
||||
node_sys_base += (pDCTstat->NodeSysLimit + 2) & ~0x0F;
|
||||
}
|
||||
[...]
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
[...]
|
||||
/* If the boot fails make sure training is attempted after reset */
|
||||
nvram = 0;
|
||||
set_option("allow_spd_nvram_cache_restore", &nvram);
|
||||
|
||||
#if CONFIG(DIMM_VOLTAGE_SET_SUPPORT)
|
||||
printk(BIOS_DEBUG, "%s: DIMMSetVoltage\n", __func__);
|
||||
/* Set the DIMM voltages (mainboard specific) */
|
||||
DIMMSetVoltages(pMCTstat, pDCTstatA);
|
||||
#endif
|
||||
if (!CONFIG(DIMM_VOLTAGE_SET_SUPPORT)) {
|
||||
/* Assume 1.5V operation */
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
if (!pDCTstat->NodePresent)
|
||||
continue;
|
||||
for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm++) {
|
||||
if (pDCTstat->DIMMValid & (1 << dimm))
|
||||
pDCTstat->DimmConfiguredVoltage[dimm] = 0x1;
|
||||
}
|
||||
}
|
||||
}
|
||||
[...]
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
[...]
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
if (pDCTstat->NodePresent) {
|
||||
printk(BIOS_DEBUG, "%s: mctSMBhub_Init\n", __func__);
|
||||
/* Switch SMBUS crossbar to proper node*/
|
||||
mctSMBhub_Init(Node);
|
||||
|
||||
printk(BIOS_DEBUG, "%s: mct_initDCT\n", __func__);
|
||||
mct_initDCT(pMCTstat, pDCTstat);
|
||||
if (pDCTstat->ErrCode == SC_FatalErr) {
|
||||
goto fatalexit; /* any fatal errors?*/
|
||||
} else if (pDCTstat->ErrCode < SC_StopError) {
|
||||
NodesWmem++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NodesWmem == 0) {
|
||||
printk(BIOS_ALERT, "Unable to detect valid memory on any nodes. Halting!\n");
|
||||
goto fatalexit;
|
||||
}
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: SyncDCTsReady_D\n");
|
||||
/* Make sure DCTs are ready for accesses.*/
|
||||
SyncDCTsReady_D(pMCTstat, pDCTstatA);
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: HTMemMapInit_D\n");
|
||||
/* Map local memory into system address space.*/
|
||||
HTMemMapInit_D(pMCTstat, pDCTstatA);
|
||||
mctHookAfterHTMap();
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: mctHookAfterCPU\n");
|
||||
/* Setup external northbridge(s) */
|
||||
mctHookAfterCPU();
|
||||
[...]
|
||||
return;
|
||||
fatalexit:
|
||||
die("mct_d: fatalexit");
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
[...]
|
||||
/* FIXME
|
||||
* Previous training values should only be used if the current desired
|
||||
* speed is the same as the speed used in the previous boot.
|
||||
* How to get the desired speed at this point in the code?
|
||||
*/
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: DQSTiming_D\n");
|
||||
/* Get Receiver Enable and DQS signal timing*/
|
||||
DQSTiming_D(pMCTstat, pDCTstatA, allow_config_restore);
|
||||
if (!allow_config_restore) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: :OtherTiming\n");
|
||||
mct_OtherTiming(pMCTstat, pDCTstatA);
|
||||
}
|
||||
/* RESET# if 1st pass of DIMM spare enabled*/
|
||||
if (ReconfigureDIMMspare_D(pMCTstat, pDCTstatA)) {
|
||||
goto restartinit;
|
||||
}
|
||||
InterleaveNodes_D(pMCTstat, pDCTstatA);
|
||||
InterleaveChannels_D(pMCTstat, pDCTstatA);
|
||||
ecc_enabled = 1;
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
if (pDCTstat->NodePresent)
|
||||
if (!is_ecc_enabled(pMCTstat, pDCTstat))
|
||||
ecc_enabled = 0;
|
||||
}
|
||||
if (ecc_enabled) {
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: ECCInit_D\n");
|
||||
/* Setup ECC control and ECC check-bits*/
|
||||
if (!ECCInit_D(pMCTstat, pDCTstatA)) {
|
||||
/* Memory was not cleared during ECC setup */
|
||||
/* mctDoWarmResetMemClr_D(); */
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: MCTMemClr_D\n");
|
||||
MCTMemClr_D(pMCTstat,pDCTstatA);
|
||||
}
|
||||
}
|
||||
[...]
|
||||
return;
|
||||
fatalexit:
|
||||
die("mct_d: fatalexit");
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
[...]
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: CPUMemTyping_D\n");
|
||||
/* Map dram into WB/UC CPU cacheability */
|
||||
CPUMemTyping_D(pMCTstat, pDCTstatA);
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: UMAMemTyping_D\n");
|
||||
/* Fix up for UMA sizing */
|
||||
UMAMemTyping_D(pMCTstat, pDCTstatA);
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D: mct_ForceNBPState0_Dis_Fam15\n");
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
mct_ForceNBPState0_Dis_Fam15(pMCTstat, pDCTstat);
|
||||
}
|
||||
enable_cc6 = 0;
|
||||
if (get_option(&nvram, "cpu_cc6_state") == CB_SUCCESS)
|
||||
enable_cc6 = !!nvram;
|
||||
if (enable_cc6) {
|
||||
uint8_t num_nodes;
|
||||
num_nodes = 0;
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
if (pDCTstat->NodePresent)
|
||||
num_nodes++;
|
||||
}
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
if (pDCTstat->NodePresent)
|
||||
set_up_cc6_storage_fam15(pMCTstat, pDCTstat, num_nodes);
|
||||
}
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
if (pDCTstat->NodePresent) {
|
||||
set_cc6_save_enable(pMCTstat, pDCTstat, 1);
|
||||
lock_dram_config(pMCTstat, pDCTstat);
|
||||
}
|
||||
}
|
||||
}
|
||||
mct_FinalMCT_D(pMCTstat, pDCTstatA);
|
||||
printk(BIOS_DEBUG, "mctAutoInitMCT_D Done: Global Status: %x\n", pMCTstat->GStatus);
|
||||
return;
|
||||
fatalexit:
|
||||
die("mct_d: fatalexit");
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
void mctAutoInitMCT_D(struct MCTStatStruc *pMCTstat,
|
||||
struct DCTStatStruc *pDCTstatA)
|
||||
{
|
||||
[...]
|
||||
/* If DIMM configuration has not changed since last boot restore
|
||||
* training values */
|
||||
allow_config_restore = 1;
|
||||
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
|
||||
struct DCTStatStruc *pDCTstat;
|
||||
pDCTstat = pDCTstatA + Node;
|
||||
|
||||
if (pDCTstat->NodePresent)
|
||||
if (!pDCTstat->spd_data.nvram_spd_match)
|
||||
allow_config_restore = 0;
|
||||
}
|
||||
/* FIXME
|
||||
* Stability issues have arisen on multiple Family 15h systems
|
||||
* when configuration restoration is enabled. In all cases these
|
||||
* stability issues resolved by allowing the RAM to go through a
|
||||
* full training cycle.
|
||||
*
|
||||
* Debug and reenable this!
|
||||
*/
|
||||
allow_config_restore = 0;
|
||||
[...]
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue