Improving BKDG implementation of P-states,
CPU and northbridge frequency and voltage handling for Fam 10 in SVI mode. In fact I changed coreDelay before deleting the code in fidvid that called it. But there're still a couple of calls from src/northbridge/amd/amdmct/wrappers/mcti_d.c Since the comment encouraged fixing something, I parametrized it with the delay time in microseconds and paranoically tried to avoid an overflow at pathological moments. Signed-off-by: Xavi Drudis Ferran <xdrudis@tinet.cat> Acked-by: Marc Jones <marcj303@gmail.com> git-svn-id: svn://svn.coreboot.org/coreboot/trunk@6408 2b7e53f0-3cfb-0310-b3e9-8179ed1497e1
This commit is contained in:
parent
6276b6f151
commit
c3132105bd
|
@ -77,9 +77,9 @@ static void applyBoostFIDOffset( device_t dev ) {
|
||||||
u32 cpuFid = msr.lo & PS_CPU_FID_MASK;
|
u32 cpuFid = msr.lo & PS_CPU_FID_MASK;
|
||||||
cpuFid = cpuFid + asymetricBoostThisCore;
|
cpuFid = cpuFid + asymetricBoostThisCore;
|
||||||
msr.lo &= ~PS_CPU_FID_MASK;
|
msr.lo &= ~PS_CPU_FID_MASK;
|
||||||
msr.lo |= cpuFid ;
|
msr.lo |= cpuFid ;
|
||||||
wrmsr(PS_REG_BASE , msr);
|
wrmsr(PS_REG_BASE , msr);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ static void enableNbPState1( device_t dev ) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static u8 setPStateMaxVal( device_t dev ) {
|
static u8 setPStateMaxVal( device_t dev ) {
|
||||||
u8 i,maxpstate=0;
|
u8 i,maxpstate=0;
|
||||||
for (i = 0; i < NM_PS_REG; i++) {
|
for (i = 0; i < NM_PS_REG; i++) {
|
||||||
msr_t msr = rdmsr(PS_REG_BASE + i);
|
msr_t msr = rdmsr(PS_REG_BASE + i);
|
||||||
if (msr.hi & PS_IDD_VALUE_MASK) {
|
if (msr.hi & PS_IDD_VALUE_MASK) {
|
||||||
|
@ -141,8 +141,8 @@ static void dualPlaneOnly( device_t dev ) {
|
||||||
msr.lo=0; msr.hi=0;
|
msr.lo=0; msr.hi=0;
|
||||||
wrmsr(0xC0010064, rdmsr(0xC0010068) );
|
wrmsr(0xC0010064, rdmsr(0xC0010068) );
|
||||||
wrmsr(0xC0010068, msr );
|
wrmsr(0xC0010068, msr );
|
||||||
}
|
}
|
||||||
|
|
||||||
//FIXME: CPTC2 and HTC_REG should get max per node, not per core ?
|
//FIXME: CPTC2 and HTC_REG should get max per node, not per core ?
|
||||||
u8 maxpstate = setPStateMaxVal(dev);
|
u8 maxpstate = setPStateMaxVal(dev);
|
||||||
|
|
||||||
|
@ -155,15 +155,15 @@ static void dualPlaneOnly( device_t dev ) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vidTo100uV(u8 vid)
|
static int vidTo100uV(u8 vid)
|
||||||
{// returns voltage corresponding to vid in tenths of mV, i.e. hundreds of uV
|
{// returns voltage corresponding to vid in tenths of mV, i.e. hundreds of uV
|
||||||
// BKDG #31116 rev 3.48 2.4.1.6
|
// BKDG #31116 rev 3.48 2.4.1.6
|
||||||
int voltage;
|
int voltage;
|
||||||
if (vid >= 0x7c) {
|
if (vid >= 0x7c) {
|
||||||
voltage = 0;
|
voltage = 0;
|
||||||
} else {
|
} else {
|
||||||
voltage = (15500 - (125*vid));
|
voltage = (15500 - (125*vid));
|
||||||
}
|
}
|
||||||
return voltage;
|
return voltage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -193,12 +193,12 @@ static void recalculateVsSlamTimeSettingOnCorePre(device_t dev)
|
||||||
|
|
||||||
/* This function calculates the VsSlamTime using the range of possible
|
/* This function calculates the VsSlamTime using the range of possible
|
||||||
* voltages instead of a hardcoded 200us.
|
* voltages instead of a hardcoded 200us.
|
||||||
* Note: his function is called only from prep_fid_change,
|
* Note: his function is called only from prep_fid_change,
|
||||||
* and that from init_cpus.c finalize_node_setup()
|
* and that from init_cpus.c finalize_node_setup()
|
||||||
* (after set AMD MSRs and init ht )
|
* (after set AMD MSRs and init ht )
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* BKDG r31116 2010-04-22 2.4.1.7 step b F3xD8[VSSlamTime] */
|
/* BKDG r31116 2010-04-22 2.4.1.7 step b F3xD8[VSSlamTime] */
|
||||||
/* Calculate Slam Time
|
/* Calculate Slam Time
|
||||||
* Vslam = (mobileCPU?0.2:0.4)us/mV * (Vp0 - (lowest out of Vpmin or Valt)) mV
|
* Vslam = (mobileCPU?0.2:0.4)us/mV * (Vp0 - (lowest out of Vpmin or Valt)) mV
|
||||||
* In our case, we will scale the values by 100 to avoid
|
* In our case, we will scale the values by 100 to avoid
|
||||||
|
@ -222,7 +222,7 @@ static void recalculateVsSlamTimeSettingOnCorePre(device_t dev)
|
||||||
highVoltageVid = (u8) ((msr.lo >> PS_CPU_VID_SHFT) & 0x7F);
|
highVoltageVid = (u8) ((msr.lo >> PS_CPU_VID_SHFT) & 0x7F);
|
||||||
if (!(msr.hi & 0x80000000)) {
|
if (!(msr.hi & 0x80000000)) {
|
||||||
printk(BIOS_ERR,"P-state info in MSRC001_0064 is invalid !!!\n");
|
printk(BIOS_ERR,"P-state info in MSRC001_0064 is invalid !!!\n");
|
||||||
highVoltageVid = (u8) ((pci_read_config32(dev, 0x1E0)
|
highVoltageVid = (u8) ((pci_read_config32(dev, 0x1E0)
|
||||||
>> PS_CPU_VID_SHFT) & 0x7F);
|
>> PS_CPU_VID_SHFT) & 0x7F);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -238,13 +238,13 @@ static void recalculateVsSlamTimeSettingOnCorePre(device_t dev)
|
||||||
/* Get PSmax's index */
|
/* Get PSmax's index */
|
||||||
msr = rdmsr(0xC0010061);
|
msr = rdmsr(0xC0010061);
|
||||||
bValue = (u8) ((msr.lo >> PS_MAX_VAL_SHFT) & BIT_MASK_3);
|
bValue = (u8) ((msr.lo >> PS_MAX_VAL_SHFT) & BIT_MASK_3);
|
||||||
|
|
||||||
/* Get PSmax's VID */
|
/* Get PSmax's VID */
|
||||||
msr = rdmsr(0xC0010064 + bValue);
|
msr = rdmsr(0xC0010064 + bValue);
|
||||||
lowVoltageVid = (u8) ((msr.lo >> PS_CPU_VID_SHFT) & 0x7F);
|
lowVoltageVid = (u8) ((msr.lo >> PS_CPU_VID_SHFT) & 0x7F);
|
||||||
if (!(msr.hi & 0x80000000)) {
|
if (!(msr.hi & 0x80000000)) {
|
||||||
printk(BIOS_ERR,"P-state info in MSR%8x is invalid !!!\n",0xC0010064 + bValue);
|
printk(BIOS_ERR,"P-state info in MSR%8x is invalid !!!\n",0xC0010064 + bValue);
|
||||||
lowVoltageVid = (u8) ((pci_read_config32(dev, 0x1E0+(bValue*4))
|
lowVoltageVid = (u8) ((pci_read_config32(dev, 0x1E0+(bValue*4))
|
||||||
>> PS_CPU_VID_SHFT) & 0x7F);
|
>> PS_CPU_VID_SHFT) & 0x7F);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,7 +266,7 @@ static void recalculateVsSlamTimeSettingOnCorePre(device_t dev)
|
||||||
if (lowVoltageVid < bValue)
|
if (lowVoltageVid < bValue)
|
||||||
lowVoltageVid = bValue;
|
lowVoltageVid = bValue;
|
||||||
|
|
||||||
u8 mobileFlag = get_platform_type() & AMD_PTYPE_MOB;
|
u8 mobileFlag = get_platform_type() & AMD_PTYPE_MOB;
|
||||||
minimumSlamTime = (mobileFlag?2:4) * (vidTo100uV(highVoltageVid) - vidTo100uV(lowVoltageVid)); /* * 0.01 us */
|
minimumSlamTime = (mobileFlag?2:4) * (vidTo100uV(highVoltageVid) - vidTo100uV(lowVoltageVid)); /* * 0.01 us */
|
||||||
|
|
||||||
|
|
||||||
|
@ -819,7 +819,7 @@ static void init_fidvid_bsp_stage1(u32 ap_apicid, void *gp)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fixPsNbVidAfterWR(u32 newNbVid, u8 NbVidUpdatedAll,u8 pviMode)
|
static void fixPsNbVidAfterWR(u32 newNbVid, u8 NbVidUpdatedAll,u8 pviMode)
|
||||||
{
|
{
|
||||||
msr_t msr;
|
msr_t msr;
|
||||||
u8 i;
|
u8 i;
|
||||||
|
@ -837,7 +837,7 @@ static void fixPsNbVidAfterWR(u32 newNbVid, u8 NbVidUpdatedAll,u8 pviMode)
|
||||||
for (i = 0; i < 5; i++) {
|
for (i = 0; i < 5; i++) {
|
||||||
msr = rdmsr(0xC0010064 + i);
|
msr = rdmsr(0xC0010064 + i);
|
||||||
/* NbDid (bit 22 of P-state Reg) == 0 or NbVidUpdatedAll = 1 */
|
/* NbDid (bit 22 of P-state Reg) == 0 or NbVidUpdatedAll = 1 */
|
||||||
if ( (msr.hi & PS_IDD_VALUE_MASK)
|
if ( (msr.hi & PS_IDD_VALUE_MASK)
|
||||||
&& (msr.hi & PS_EN_MASK)
|
&& (msr.hi & PS_EN_MASK)
|
||||||
&&(((msr.lo & PS_NB_DID_MASK) == 0) || NbVidUpdatedAll)) {
|
&&(((msr.lo & PS_NB_DID_MASK) == 0) || NbVidUpdatedAll)) {
|
||||||
msr.lo &= PS_NB_VID_M_OFF;
|
msr.lo &= PS_NB_VID_M_OFF;
|
||||||
|
@ -855,7 +855,7 @@ static void fixPsNbVidAfterWR(u32 newNbVid, u8 NbVidUpdatedAll,u8 pviMode)
|
||||||
/* For each core in the system, transition all cores to StartupPstate */
|
/* For each core in the system, transition all cores to StartupPstate */
|
||||||
msr = rdmsr(0xC0010071);
|
msr = rdmsr(0xC0010071);
|
||||||
StartupPstate = msr.hi & 0x07;
|
StartupPstate = msr.hi & 0x07;
|
||||||
|
|
||||||
/* Set and wait for StartupPstate to set. */
|
/* Set and wait for StartupPstate to set. */
|
||||||
set_pstate(StartupPstate);
|
set_pstate(StartupPstate);
|
||||||
|
|
||||||
|
|
|
@ -291,7 +291,10 @@
|
||||||
|
|
||||||
#define TSC_MSR 0x10
|
#define TSC_MSR 0x10
|
||||||
#define CUR_PSTATE_MSR 0xc0010063
|
#define CUR_PSTATE_MSR 0xc0010063
|
||||||
|
#define TSC_FREQ_SEL_SHIFT 24
|
||||||
|
|
||||||
|
#define TSC_FREQ_SEL_MASK (1 << TSC_FREQ_SEL_SHIFT)
|
||||||
|
|
||||||
#define WAIT_PSTATE_TIMEOUT 80000000 /* 0.1 s , unit : 1.25 ns */
|
#define WAIT_PSTATE_TIMEOUT 80000000 /* 0.1 s , unit : 1.25 ns */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -340,6 +340,39 @@ static void mctHookAfterDramInit(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
|
#if (CONFIG_DIMM_SUPPORT & 0x000F)==0x0005 /* AMD_FAM10_DDR3 */
|
||||||
|
static void coreDelay(u32 microseconds)
|
||||||
|
{
|
||||||
|
msr_t now;
|
||||||
|
msr_t end;
|
||||||
|
u32 cycles;
|
||||||
|
|
||||||
|
/* delay ~40us
|
||||||
|
This seems like a hack to me...
|
||||||
|
It would be nice to have a central delay function. */
|
||||||
|
|
||||||
|
cycles = (microseconds * 100) << 3; /* x8 (number of 1.25ns ticks) */
|
||||||
|
|
||||||
|
if (!(rdmsr(HWCR).lo & TSC_FREQ_SEL_MASK)) {
|
||||||
|
msr_t pstate_msr = rdmsr(CUR_PSTATE_MSR);
|
||||||
|
if (!(rdmsr(0xC0010064+pstate_msr.lo).lo & NB_DID_M_ON)) {
|
||||||
|
cycles = cycles <<1; // half freq, double cycles
|
||||||
|
}
|
||||||
|
} // else should we keep p0 freq at the time of setting TSC_FREQ_SEL_MASK somewhere and check it here ?
|
||||||
|
|
||||||
|
now = rdmsr(TSC_MSR);
|
||||||
|
// avoid overflow when called near 2^32 ticks ~ 5.3 s boundaries
|
||||||
|
if (0xffffffff - cycles >= now.lo ) {
|
||||||
|
end.hi = now.hi;
|
||||||
|
end.lo = now.lo + cycles;
|
||||||
|
} else {
|
||||||
|
end.hi = now.hi +1; //
|
||||||
|
end.lo = cycles - (1+(0xffffffff - now.lo));
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
now = rdmsr(TSC_MSR);
|
||||||
|
} while ((now.hi < end.hi) || ((now.hi == end.hi) && (now.lo < end.lo)));
|
||||||
|
}
|
||||||
|
|
||||||
/* Erratum 350 */
|
/* Erratum 350 */
|
||||||
static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat)
|
static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat)
|
||||||
{
|
{
|
||||||
|
@ -385,7 +418,7 @@ static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs
|
||||||
|
|
||||||
print_t("vErrata350: step 3\n");
|
print_t("vErrata350: step 3\n");
|
||||||
/* 3. Wait at least 300 nanoseconds. */
|
/* 3. Wait at least 300 nanoseconds. */
|
||||||
coreDelay();
|
coreDelay(1);
|
||||||
|
|
||||||
print_t("vErrata350: step 4\n");
|
print_t("vErrata350: step 4\n");
|
||||||
/* 4. Write 0000_0000h to register F2x[1, 0]9C_xD080F0C. */
|
/* 4. Write 0000_0000h to register F2x[1, 0]9C_xD080F0C. */
|
||||||
|
@ -398,7 +431,7 @@ static void vErrata350(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTs
|
||||||
|
|
||||||
print_t("vErrata350: step 5\n");
|
print_t("vErrata350: step 5\n");
|
||||||
/* 5. Wait at least 2 microseconds. */
|
/* 5. Wait at least 2 microseconds. */
|
||||||
coreDelay();
|
coreDelay(2);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue