From 845b00ce3344d1483d98cddcf59f317a1b96da64 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Fri, 7 Aug 2015 19:05:29 -0500 Subject: [PATCH] amd/amdmct/mct_ddr3: Fix poor performance on Family 15h CPUs Change-Id: Ib6bc197e43e40ba2b923b1eb1229bacafc8be360 Signed-off-by: Timothy Pearson Reviewed-on: http://review.coreboot.org/12029 Tested-by: build bot (Jenkins) Reviewed-by: Stefan Reinauer --- src/northbridge/amd/amdmct/mct_ddr3/mct_d.c | 371 +++++++++++++++--- src/northbridge/amd/amdmct/mct_ddr3/mct_d.h | 1 + .../amd/amdmct/mct_ddr3/mctdqs_d.c | 65 ++- src/northbridge/amd/amdmct/mct_ddr3/mctproc.c | 49 ++- src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c | 216 +++++++++- src/northbridge/amd/amdmct/mct_ddr3/mctwl.c | 4 + 6 files changed, 614 insertions(+), 92 deletions(-) diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c index d76eea0e2a..aad813a056 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.c @@ -32,6 +32,8 @@ * supported. */ +// #define DEBUG_DIMM_SPD 1 + static u8 ReconfigureDIMMspare_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); static void DQSTiming_D(struct MCTStatStruc *pMCTstat, @@ -168,7 +170,8 @@ static u32 mct_MR1Odt_RDimm(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct, u32 MrsChipSel); static u32 mct_DramTermDyn_RDimm(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dimm); -static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2); +static u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, + uint8_t dct, uint32_t misc2, uint32_t DramControl); static void mct_BeforeDQSTrainSamp(struct DCTStatStruc *pDCTstat); static void mct_WriteLevelization_HW(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, uint8_t Pass); @@ -1366,6 +1369,8 @@ static uint8_t fam15h_slow_access_mode(struct DCTStatStruc *pDCTstat, uint8_t dc static void set_2t_configuration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 dct) { + printk(BIOS_DEBUG, "%s: Start\n", __func__); + uint32_t dev; uint32_t reg; uint32_t dword; @@ -1388,6 +1393,8 @@ static void set_2t_configuration(struct MCTStatStruc *pMCTstat, else dword &= ~(0x1 << 20); /* Clear 2T CMD mode */ Set_NB32_DCT(dev, dct, reg, dword); + + printk(BIOS_DEBUG, "%s: Done\n", __func__); } static void precise_ndelay_fam15(struct MCTStatStruc *pMCTstat, uint32_t nanoseconds) { @@ -2019,6 +2026,8 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, /* Disable training mode */ uint8_t lane; uint8_t dimm; + uint16_t max_cdd_we_delta; + uint16_t cdd_trwtto_we_delta; uint8_t receiver; uint8_t max_lane; uint8_t ecc_enabled; @@ -2033,21 +2042,37 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, uint16_t twrwrdd; uint16_t cdd_twrwrdd; uint16_t twrrd; + uint16_t cdd_twrrd; + uint16_t cdd_trwtto; uint16_t trwtto; uint8_t first_dimm; uint16_t delay; uint16_t delay2; + uint8_t min_value; + uint8_t write_early; uint8_t read_odt_delay; uint8_t write_odt_delay; + uint8_t buffer_data_delay; + int16_t latency_difference; uint16_t difference; uint16_t current_total_delay_1[MAX_BYTE_LANES]; uint16_t current_total_delay_2[MAX_BYTE_LANES]; + uint8_t ddr_voltage_index; + uint8_t max_dimms_installable; /* FIXME * This should be platform configurable */ uint8_t dimm_event_l_pin_support = 0; + if (pDCTstat->DIMMValidDCT[dct] == 0) + ddr_voltage_index = 1; + else + ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); + + ddr_voltage_index = dct_ddr_voltage_index(pDCTstat, dct); + max_dimms_installable = mctGet_NVbits(NV_MAX_DIMMS_PER_CH); + ecc_enabled = !!(pMCTstat->GStatus & 1 << GSB_ECCDIMMs); if (ecc_enabled) max_lane = 9; @@ -2081,6 +2106,24 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, else write_odt_delay = 0; + dword = (Get_NB32_DCT(dev, dct, 0xa8) >> 24) & 0x3; + write_early = dword / 2; + + latency_difference = Get_NB32_DCT(dev, dct, 0x200) & 0x1f; + dword = Get_NB32_DCT(dev, dct, 0x20c) & 0x1f; + latency_difference -= dword; + + if (pDCTstat->Status & (1 << SB_LoadReduced)) { + /* LRDIMM */ + + /* TODO + * Implement LRDIMM support + * See Fam15h BKDG Rev. 3.14 section 2.10.5.5 + */ + } else { + buffer_data_delay = 0; + } + /* TODO: * Adjust trdrdsddc if four-rank DIMMs are installed per * section 2.10.5.5.1 of the Family 15h BKDG. @@ -2116,7 +2159,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, } /* Convert the difference to MEMCLKs */ - cdd_trdrddd = (((cdd_trdrddd >> 5) & 0x1f) + 1) / 2; + cdd_trdrddd = (((cdd_trdrddd + (1 << 6) - 1) >> 6) & 0xf); /* Calculate Trdrddd */ delay = (read_odt_delay + 3) * 2; @@ -2162,7 +2205,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, } /* Convert the difference to MEMCLKs */ - cdd_twrwrdd = (((cdd_twrwrdd >> 5) & 0x1f) + 1) / 2; + cdd_twrwrdd = (((cdd_twrwrdd + (1 << 6) - 1) >> 6) & 0xf); /* Calculate Twrwrdd */ delay = (write_odt_delay + 3) * 2; @@ -2181,6 +2224,107 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, dword &= ~(0x1 << 18); /* DisAutoRefresh = 0 */ Set_NB32_DCT(dev, dct, 0x8c, dword); /* DRAM Timing High */ + /* Configure power saving options */ + dword = Get_NB32_DCT(dev, dct, 0xa8); /* Dram Miscellaneous 2 */ + dword |= (0x1 << 22); /* PrtlChPDEnhEn = 0x1 */ + dword |= (0x1 << 21); /* AggrPDEn = 0x1 */ + Set_NB32_DCT(dev, dct, 0xa8, dword); /* Dram Miscellaneous 2 */ + + /* Configure partial power down delay */ + dword = Get_NB32(dev, 0x244); /* DRAM Controller Miscellaneous 3 */ + dword &= ~0xf; /* PrtlChPDDynDly = 0x2 */ + dword |= 0x2; + Set_NB32(dev, 0x244, dword); /* DRAM Controller Miscellaneous 3 */ + + /* Configure power save delays */ + delay = 0xa; + delay2 = 0x3; + + /* Family 15h BKDG Table 214 */ + if ((pDCTstat->Status & (1 << SB_Registered)) + || (pDCTstat->Status & (1 << SB_LoadReduced))) { + if (memclk_index <= 0x6) { + if (ddr_voltage_index < 0x4) + /* 1.5 or 1.35V */ + delay2 = 0x3; + else + /* 1.25V */ + delay2 = 0x4; + } + else if ((memclk_index == 0xa) + || (memclk_index == 0xe)) + delay2 = 0x4; + else if (memclk_index == 0x12) + delay2 = 0x5; + else if (memclk_index == 0x16) + delay2 = 0x6; + } else { + if (memclk_index <= 0x6) + delay2 = 0x3; + else if ((memclk_index == 0xa) + || (memclk_index == 0xe)) + delay2 = 0x4; + else if (memclk_index == 0x12) + delay2 = 0x5; + else if (memclk_index == 0x16) + delay2 = 0x6; + } + + /* Family 15h BKDG Table 215 */ + if (memclk_index <= 0x6) + delay = 0xa; + else if (memclk_index == 0xa) + delay = 0xd; + else if (memclk_index == 0xe) + delay = 0x10; + else if (memclk_index == 0x12) + delay = 0x14; + else if (memclk_index == 0x16) + delay = 0x17; + + dword = Get_NB32_DCT(dev, dct, 0x248); /* Dram Power Management 0 */ + dword &= ~(0x3f << 24); /* AggrPDDelay = 0x0 */ + dword &= ~(0x3f << 16); /* PchgPDEnDelay = 0x1 */ + dword |= (0x1 << 16); + dword &= ~(0x1f << 8); /* Txpdll = delay */ + dword |= ((delay & 0x1f) << 8); + dword &= ~0xf; /* Txp = delay2 */ + dword |= delay2 & 0xf; + Set_NB32_DCT(dev, dct, 0x248, dword); /* Dram Power Management 0 */ + + /* Family 15h BKDG Table 216 */ + if (memclk_index <= 0x6) { + delay = 0x5; + delay2 = 0x3; + } + else if (memclk_index == 0xa) { + delay = 0x6; + delay2 = 0x3; + } + else if (memclk_index == 0xe) { + delay = 0x7; + delay2 = 0x4; + } + else if (memclk_index == 0x12) { + delay = 0x8; + delay2 = 0x4; + } + else if (memclk_index == 0x16) { + delay = 0xa; + delay2 = 0x5; + } + + dword = Get_NB32_DCT(dev, dct, 0x24c); /* Dram Power Management 1 */ + dword &= ~(0x3f << 24); /* Tcksrx = delay */ + dword |= ((delay & 0x3f) << 24); + dword &= ~(0x3f << 16); /* Tcksre = delay */ + dword |= ((delay & 0x3f) << 16); + dword &= ~(0x3f << 8); /* Tckesr = delay2 + 1 */ + dword |= (((delay2 + 1) & 0x3f) << 8); + dword &= ~0xf; /* Tpd = delay2 */ + dword |= delay2 & 0xf; + Set_NB32_DCT(dev, dct, 0x24c, dword); /* Dram Power Management 1 */ + dword = Get_NB32_DCT(dev, dct, 0x94); /* DRAM Configuration High */ dword |= (0xf << 24); /* DcqBypassMax = 0xf */ dword |= (0x1 << 22); /* BankSwizzleMode = 1 */ @@ -2233,15 +2377,98 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, } } - /* TODO - * Calculate Twrrd per section 2.10.5.5.3 of the Family 15h BKDG - */ - twrrd = 0xb; + /* Calculate the Critical Delay Difference for Twrrd */ + cdd_twrrd = 0; + for (receiver = 0; receiver < 8; receiver += 2) { + dimm = (receiver >> 1); - /* TODO - * Calculate TrwtTO per section 2.10.5.5.4 of the Family 15h BKDG - */ - trwtto = 0x16; + if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver)) + continue; + + read_dqs_write_timing_control_registers(current_total_delay_1, dev, dct, dimm, index_reg); + read_dqs_receiver_enable_control_registers(current_total_delay_2, dev, dct, dimm, index_reg); + + for (lane = 0; lane < max_lane; lane++) { + if (current_total_delay_1[lane] > current_total_delay_2[lane]) + difference = current_total_delay_1[lane] - current_total_delay_2[lane]; + else + difference = current_total_delay_2[lane] - current_total_delay_1[lane]; + + if (difference > cdd_twrrd) + cdd_twrrd = difference; + } + } + + /* Convert the difference to MEMCLKs */ + cdd_twrrd = (((cdd_twrrd + (1 << 6) - 1) >> 6) & 0xf); + + /* Fam15h BKDG section 2.10.5.5.3 */ + if (pDCTstat->Status & (1 << SB_LoadReduced)) { + /* LRDIMM */ + + /* TODO + * Implement LRDIMM support + * See Fam15h BKDG Rev. 3.14 section 2.10.5.5 + */ + twrrd = 0xb; + } else { + max_cdd_we_delta = (((int16_t)cdd_twrrd + 1 - ((int16_t)write_early * 2)) + 1) / 2; + if (max_cdd_we_delta < 0) + max_cdd_we_delta = 0; + if (((uint16_t)max_cdd_we_delta) > write_odt_delay) + dword = max_cdd_we_delta; + else + dword = write_odt_delay; + dword += 3; + if (latency_difference < dword) { + dword -= latency_difference; + if (dword < 1) + twrrd = 1; + else + twrrd = dword; + } else { + twrrd = 1; + } + } + + /* Calculate the Critical Delay Difference for TrwtTO */ + cdd_trwtto = 0; + for (receiver = 0; receiver < 8; receiver += 2) { + dimm = (receiver >> 1); + + if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, dct, receiver)) + continue; + + read_dqs_receiver_enable_control_registers(current_total_delay_1, dev, dct, dimm, index_reg); + read_dqs_write_timing_control_registers(current_total_delay_2, dev, dct, dimm, index_reg); + + for (lane = 0; lane < max_lane; lane++) { + if (current_total_delay_1[lane] > current_total_delay_2[lane]) + difference = current_total_delay_1[lane] - current_total_delay_2[lane]; + else + difference = current_total_delay_2[lane] - current_total_delay_1[lane]; + + if (difference > cdd_trwtto) + cdd_trwtto = difference; + } + } + + /* Convert the difference to MEMCLKs */ + cdd_trwtto = (((cdd_trwtto + (1 << 6) - 1) >> 6) & 0xf); + + /* Fam15h BKDG section 2.10.5.5.4 */ + if (max_dimms_installable == 1) + min_value = 0; + else + min_value = read_odt_delay + buffer_data_delay; + cdd_trwtto_we_delta = (((int16_t)cdd_trwtto - 1 + ((int16_t)write_early * 2)) + 1) / 2; + cdd_trwtto_we_delta += latency_difference + 3; + if (cdd_trwtto_we_delta < 0) + cdd_trwtto_we_delta = 0; + if ((cdd_trwtto_we_delta) > min_value) + trwtto = cdd_trwtto_we_delta; + else + trwtto = min_value; dword = Get_NB32_DCT(dev, dct, 0xa4); /* DRAM Controller Temperature Throttle */ dword &= ~(0x1 << 11); /* BwCapEn = 0 */ @@ -2252,6 +2479,7 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, dword = Get_NB32_DCT(dev, dct, 0x110); /* DRAM Controller Select Low */ dword &= ~(0x1 << 2); /* DctSelIntLvEn = interleave_channels */ dword |= (interleave_channels & 0x1) << 2; + dword |= (0x3 << 6); /* DctSelIntLvAddr = 0x3 */ Set_NB32_DCT(dev, dct, 0x110, dword); /* DRAM Controller Select Low */ /* NOTE @@ -2259,22 +2487,6 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, * otherwise semi-random lockups will occur due to misconfigured scrubbing hardware! */ - /* FIXME - * The BKDG-recommended settings cause memory corruption on the ASUS KGPE-D16. - * Investigate and fix... - */ -#if 0 - /* Fam15h BKDG section 2.10.5.5.1 */ - dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ - dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */ - dword |= (0x1 << 24); - dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */ - dword |= ((trdrdsddc & 0xf) << 16); - dword &= ~(0xf); /* TrdrdDd = trdrddd */ - dword |= (trdrddd & 0xf); - Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ -#endif - /* Fam15h BKDG section 2.10.5.5.2 */ dword = Get_NB32_DCT(dev, dct, 0x214); /* DRAM Timing 4 */ dword &= ~(0xf << 16); /* TwrwrSdSc = 0x1 */ @@ -2287,8 +2499,14 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, /* Fam15h BKDG section 2.10.5.5.3 */ dword = Get_NB32_DCT(dev, dct, 0x218); /* DRAM Timing 5 */ + dword &= ~(0xf << 24); /* TrdrdSdSc = 0x1 */ + dword |= (0x1 << 24); + dword &= ~(0xf << 16); /* TrdrdSdDc = trdrdsddc */ + dword |= ((trdrdsddc & 0xf) << 16); dword &= ~(0xf << 8); /* Twrrd = twrrd */ dword |= ((twrrd & 0xf) << 8); + dword &= ~(0xf); /* TrdrdDd = trdrddd */ + dword |= (trdrddd & 0xf); Set_NB32_DCT(dev, dct, 0x218, dword); /* DRAM Timing 5 */ /* Fam15h BKDG section 2.10.5.5.4 */ @@ -2299,12 +2517,6 @@ static void fam15EnableTrainingMode(struct MCTStatStruc *pMCTstat, dword |= ((((dword >> 8) & 0x1f) + 1) << 16); Set_NB32_DCT(dev, dct, 0x21c, dword); /* DRAM Timing 6 */ - /* Configure partial power down delay */ - dword = Get_NB32(dev, 0x244); /* DRAM Controller Miscellaneous 3 */ - dword &= ~0xf; /* PrtlChPDDynDly = 0x2 */ - dword |= 0x2; - Set_NB32(dev, 0x244, dword); /* DRAM Controller Miscellaneous 3 */ - /* Enable prefetchers */ dword = Get_NB32(dev, 0x11c); /* Memory Controller Configuration High */ dword &= ~(0x1 << 13); /* PrefIoDis = 0 */ @@ -2393,6 +2605,8 @@ static void DQSTiming_D(struct MCTStatStruc *pMCTstat, mct_TrainDQSPos_D(pMCTstat, pDCTstatA); + TrainMaxRdLatency_En_D(pMCTstat, pDCTstatA); + if (is_fam15h()) exit_training_mode_fam15(pMCTstat, pDCTstatA); else @@ -2971,6 +3185,13 @@ static void ClearDCT_D(struct MCTStatStruc *pMCTstat, } while(reg < reg_end) { + if ((reg & 0xFF) == 0x84) { + if (is_fam15h()) { + val = Get_NB32_DCT(dev, dct, reg); + val &= ~(0x1 << 23); /* Clear PchgPDModeSel */ + val &= ~0x3; /* Clear BurstCtrl */ + } + } if ((reg & 0xFF) == 0x90) { if (pDCTstat->LogicalCPUID & AMD_DR_Dx) { val = Get_NB32_DCT(dev, dct, reg); /* get DRAMConfigLow */ @@ -3089,14 +3310,30 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, /* Convert DRAM CycleTiming values and store into DCT structure */ byte = pDCTstat->DIMMAutoSpeed; - if (byte == 7) - tCK16x = 20; - else if (byte == 6) - tCK16x = 24; - else if (byte == 5) - tCK16x = 30; - else - tCK16x = 40; + if (is_fam15h()) { + if (byte == 0x16) + tCK16x = 17; + else if (byte == 0x12) + tCK16x = 20; + else if (byte == 0xe) + tCK16x = 24; + else if (byte == 0xa) + tCK16x = 30; + else if (byte == 0x6) + tCK16x = 40; + else + tCK16x = 48; + } + else { + if (byte == 7) + tCK16x = 20; + else if (byte == 6) + tCK16x = 24; + else if (byte == 5) + tCK16x = 30; + else + tCK16x = 40; + } /* Notes: 1. All secondary time values given in SPDs are in binary with units of ns. @@ -3129,7 +3366,7 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, val = Max_TrpT; pDCTstat->Trp = val; - /*Trrd*/ + /* Trrd */ pDCTstat->DIMMTrrd = Trrd; val = Trrd / tCK16x; if (Trrd % tCK16x) { /* round up number of busclocks */ @@ -3247,21 +3484,31 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, dword = Get_NB32_DCT(dev, dct, 0x200); /* DRAM Timing 0 */ dword &= ~(0x3f1f1f1f); - dword |= ((pDCTstat->Tras + 0xf) & 0x3f) << 24; /* Tras */ - dword |= ((pDCTstat->Trp + 0x5) & 0x1f) << 16; /* Trp */ - dword |= ((pDCTstat->Trcd + 0x5) & 0x1f) << 8; /* Trcd */ + dword |= (pDCTstat->Tras & 0x3f) << 24; /* Tras */ + val = pDCTstat->Trp; + val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); + dword |= (val & 0x1f) << 16; /* Trp */ + dword |= (pDCTstat->Trcd & 0x1f) << 8; /* Trcd */ dword |= (pDCTstat->CASL & 0x1f); /* Tcl */ Set_NB32_DCT(dev, dct, 0x200, dword); /* DRAM Timing 0 */ dword = Get_NB32_DCT(dev, dct, 0x204); /* DRAM Timing 1 */ dword &= ~(0x0f3f0f3f); - dword |= ((pDCTstat->Trtp + 0x4) & 0xf) << 24; /* Trtp */ - if (pDCTstat->Tfaw != 0) - dword |= ((((pDCTstat->Tfaw - 0x1) * 2) + 0x10) & 0x3f) << 16; /* FourActWindow */ - dword |= ((pDCTstat->Trrd + 0x4) & 0xf) << 8; /* Trrd */ - dword |= ((pDCTstat->Trc + 0xb) & 0x3f); /* Trc */ + dword |= (pDCTstat->Trtp & 0xf) << 24; /* Trtp */ + if (pDCTstat->Tfaw != 0) { + val = pDCTstat->Tfaw; + val = mct_AdjustSPDTimings(pMCTstat, pDCTstat, val); + if ((val > 0x5) && (val < 0x2b)) + dword |= (val & 0x3f) << 16; /* FourActWindow */ + } + dword |= (pDCTstat->Trrd & 0xf) << 8; /* Trrd */ + dword |= (pDCTstat->Trc & 0x3f); /* Trc */ Set_NB32_DCT(dev, dct, 0x204, dword); /* DRAM Timing 1 */ + /* Trfc0-Trfc3 */ + for (i=0; i<4; i++) + if (pDCTstat->Trfc[i] == 0x0) + pDCTstat->Trfc[i] = 0x4; dword = Get_NB32_DCT(dev, dct, 0x208); /* DRAM Timing 2 */ dword &= ~(0x07070707); dword |= (pDCTstat->Trfc[3] & 0x7) << 24; /* Trfc3 */ @@ -3272,14 +3519,14 @@ static void SPD2ndTiming(struct MCTStatStruc *pMCTstat, dword = Get_NB32_DCT(dev, dct, 0x20c); /* DRAM Timing 3 */ dword &= ~(0x00000f00); - dword |= ((pDCTstat->Twtr + 0x4) & 0xf) << 8; /* Twtr */ + dword |= (pDCTstat->Twtr & 0xf) << 8; /* Twtr */ dword &= ~(0x0000001f); dword |= (Tcwl & 0x1f); /* Tcwl */ Set_NB32_DCT(dev, dct, 0x20c, dword); /* DRAM Timing 3 */ dword = Get_NB32_DCT(dev, dct, 0x22c); /* DRAM Timing 10 */ dword &= ~(0x0000001f); - dword |= ((pDCTstat->Twr + 0x4) & 0x1f); /* Twr */ + dword |= (pDCTstat->Twr & 0x1f); /* Twr */ Set_NB32_DCT(dev, dct, 0x22c, dword); /* DRAM Timing 10 */ if (pDCTstat->Speed > mhz_to_memclk_config(mctGet_NVbits(NV_MIN_MEMCLK))) { @@ -3875,6 +4122,8 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, } } + DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2, DramControl); + printk(BIOS_DEBUG, "AutoConfig_D: DramControl: %08x\n", DramControl); printk(BIOS_DEBUG, "AutoConfig_D: DramTimingLo: %08x\n", DramTimingLo); printk(BIOS_DEBUG, "AutoConfig_D: DramConfigMisc: %08x\n", DramConfigMisc); @@ -3886,7 +4135,6 @@ static u8 AutoConfig_D(struct MCTStatStruc *pMCTstat, Set_NB32_DCT(dev, dct, 0x78, DramControl); Set_NB32_DCT(dev, dct, 0x88, DramTimingLo); Set_NB32_DCT(dev, dct, 0xa0, DramConfigMisc); - DramConfigMisc2 = mct_SetDramConfigMisc2(pDCTstat, dct, DramConfigMisc2); Set_NB32_DCT(dev, dct, 0xa8, DramConfigMisc2); Set_NB32_DCT(dev, dct, 0x90, DramConfigLo); ProgDramMRSReg_D(pMCTstat, pDCTstat, dct); @@ -5257,6 +5505,16 @@ static void mct_PhyController_Config(struct MCTStatStruc *pMCTstat, u32 dev = pDCTstat->dev_dct; if (pDCTstat->LogicalCPUID & (AMD_DR_DAC2_OR_C3 | AMD_RB_C3 | AMD_FAM15_ALL)) { + if (is_fam15h()) { + /* Set F2x[1, 0]98_x0D0F0F13 DllDisEarlyU and DllDisEarlyL to save power */ + for (index = 0; index < 0x9; index++) { + dword = Get_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8)); + dword |= (0x1 << 1); /* DllDisEarlyU = 1 */ + dword |= 0x1; /* DllDisEarlyL = 1 */ + Set_NB32_index_wait_DCT(dev, dct, index_reg, 0x0d0f0013 | (index << 8), dword); + } + } + if (pDCTstat->Dimmx4Present == 0) { /* Set bit7 RxDqsUDllPowerDown to register F2x[1, 0]98_x0D0F0F13 for * additional power saving when x4 DIMMs are not present. @@ -5301,8 +5559,9 @@ static void mct_FinalMCT_D(struct MCTStatStruc *pMCTstat, mct_ExtMCTConfig_Dx(pDCTstat); } else { /* Family 15h CPUs */ - val = 0x0ce00f00 | 0x1 << 29; /* FlushWrOnStpGnt */ - val |= 0x10 << 2; /* MctWrLimit = 16 */ + val = 0x0ce00f00; /* FlushWrOnStpGnt = 0x0 */ + val |= 0x10 << 2; /* MctWrLimit = 0x10 */ + val |= 0x1; /* DctWrLimit = 0x1 */ Set_NB32(pDCTstat->dev_dct, 0x11c, val); val = Get_NB32(pDCTstat->dev_dct, 0x1b0); @@ -6543,8 +6802,8 @@ void ProgDramMRSReg_D(struct MCTStatStruc *pMCTstat, dword = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x84); if (is_fam15h()) { - dword |= DramMRS; dword &= ~0x00800003; + dword |= DramMRS; } else { dword &= ~0x00fc2f8f; dword |= DramMRS; diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h index eb4c74e309..b72b9da59a 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h +++ b/src/northbridge/amd/amdmct/mct_ddr3/mct_d.h @@ -984,6 +984,7 @@ void UMAMemTyping_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat uint64_t mctGetLogicalCPUID(u32 Node); u8 ECCInit_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA, u8 Pass); +void TrainMaxRdLatency_En_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); void mct_TrainDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); void TrainMaxReadLatency_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstatA); diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c index 3615616cd5..06597e23bb 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctdqs_d.c @@ -20,6 +20,9 @@ static void write_dqs_receiver_enable_control_registers(uint16_t* current_total_ static void read_read_dqs_timing_control_registers(uint16_t* current_total_delay, uint32_t dev, uint8_t dct, uint8_t dimm, uint32_t index_reg); +static void dqsTrainMaxRdLatency_SW_Fam15(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat); + static void CalcEccDQSPos_D(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u16 like, u8 scale, u8 ChipSel); @@ -214,6 +217,27 @@ void TrainReceiverEn_D(struct MCTStatStruc *pMCTstat, } } +void TrainMaxRdLatency_En_D(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstatA) +{ + uint8_t node; + struct DCTStatStruc *pDCTstat; + + for (node = 0; node < MAX_NODES_SUPPORTED; node++) { + pDCTstat = pDCTstatA + node; + + if (pDCTstat->DCTSysLimit) { + if (is_fam15h()) { + dqsTrainMaxRdLatency_SW_Fam15(pMCTstat, pDCTstat); + } else { + /* FIXME + * Implement Family 10h MaxRdLatency training + */ + } + } + } +} + static void SetEccDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat, u8 ChipSel) { @@ -894,7 +918,7 @@ static void TrainDQSRdWrPos_D_Fam10(struct MCTStatStruc *pMCTstat, * Algorithm detailed in the Fam15h BKDG Rev. 3.14 section 2.10.5.8.5 */ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, - struct DCTStatStruc *pDCTstat, uint8_t dct) + struct DCTStatStruc *pDCTstat, uint8_t dct, uint8_t calc_min) { uint8_t dimm; uint8_t lane; @@ -938,7 +962,8 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, p += (9 - dword); /* 2.10.5.8.5 (4) */ - p += 5; + if (!calc_min) + p += 5; /* 2.10.5.8.5 (5) */ dword = Get_NB32_DCT(dev, dct, 0xa8); @@ -965,7 +990,8 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, p += (max_delay >> 5); /* 2.10.5.8.5 (8) */ - p += 5; + if (!calc_min) + p += 5; /* 2.10.5.8.5 (9) */ t += 800; @@ -976,13 +1002,16 @@ static void Calc_SetMaxRdLatency_D_Fam15(struct MCTStatStruc *pMCTstat, n = (((((uint64_t)p * 1000000000000ULL)/(((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL) * 2)) + ((uint64_t)t)) * ((uint64_t)nb_clk * 1000)) / 1000000000ULL; /* 2.10.5.8.5 (11) */ - n -= 1; + if (!calc_min) + n -= 1; /* 2.10.5.8.5 (12) */ - dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210); - dword &= ~(0x3ff << 22); - dword |= (((n - 1) & 0x3ff) << 22); - Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword); + if (!calc_min) { + dword = Get_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210); + dword &= ~(0x3ff << 22); + dword |= (((n - 1) & 0x3ff) << 22); + Set_NB32_DCT_NBPstate(dev, dct, nb_pstate, 0x210, dword); + } /* Save result for later use */ pDCTstat->CH_MaxRdLat[dct] = n - 1; @@ -1103,6 +1132,9 @@ static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, } else if (lane < 8) { Set_NB32_DCT(dev, dct, 0x274, ~0x0); Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8))); + } else if (lane == 0xff) { + Set_NB32_DCT(dev, dct, 0x274, ~0xffffffff); + Set_NB32_DCT(dev, dct, 0x278, ~0xffffffff); } else { Set_NB32_DCT(dev, dct, 0x274, ~0x0); Set_NB32_DCT(dev, dct, 0x278, ~0x0); @@ -1110,8 +1142,9 @@ static void read_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, dword = Get_NB32_DCT(dev, dct, 0x27c); dword &= ~(0xff); /* EccMask = 0 */ - if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) - dword |= 0xff; /* EccMask = 0xff */ + if (lane != 0xff) + if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) + dword |= 0xff; /* EccMask = 0xff */ Set_NB32_DCT(dev, dct, 0x27c, dword); dword = Get_NB32_DCT(dev, dct, 0x270); @@ -1180,6 +1213,9 @@ static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, } else if (lane < 8) { Set_NB32_DCT(dev, dct, 0x274, ~0x0); Set_NB32_DCT(dev, dct, 0x278, ~(0xff << (lane * 8))); + } else if (lane == 0xff) { + Set_NB32_DCT(dev, dct, 0x274, ~0xffffffff); + Set_NB32_DCT(dev, dct, 0x278, ~0xffffffff); } else { Set_NB32_DCT(dev, dct, 0x274, ~0x0); Set_NB32_DCT(dev, dct, 0x278, ~0x0); @@ -1187,8 +1223,9 @@ static void write_dram_dqs_training_pattern_fam15(struct MCTStatStruc *pMCTstat, dword = Get_NB32_DCT(dev, dct, 0x27c); dword &= ~(0xff); /* EccMask = 0 */ - if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) - dword |= 0xff; /* EccMask = 0xff */ + if (lane != 0xff) + if ((lane != 8) || (pDCTstat->DimmECCPresent == 0)) + dword |= 0xff; /* EccMask = 0xff */ Set_NB32_DCT(dev, dct, 0x27c, dword); dword = Get_NB32_DCT(dev, dct, 0x270); @@ -1274,7 +1311,7 @@ static uint8_t TrainDQSRdWrPos_D_Fam15(struct MCTStatStruc *pMCTstat, uint32_t dev = pDCTstat->dev_dct; /* Calculate and program MaxRdLatency */ - Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct); + Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct, 0); Errors = 0; dual_rank = 0; @@ -1632,7 +1669,7 @@ static void TrainDQSReceiverEnCyc_D_Fam15(struct MCTStatStruc *pMCTstat, write_dqs_receiver_enable_control_registers(current_phy_phase_delay, dev, dct, dimm, index_reg); /* Calculate and program MaxRdLatency */ - Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct); + Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, dct, 0); /* 2.10.5.8.3 (4 B) */ dqs_results_array[current_phy_phase_delay[lane]] = TrainDQSRdWrPos_D_Fam15(pMCTstat, pDCTstat, dct, Receiver, Receiver + 2, lane, lane + 1); diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c index 09221935a3..cf13b40b31 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctproc.c @@ -15,7 +15,8 @@ */ /* mct_SetDramConfigMisc2_Cx & mct_SetDramConfigMisc2_Dx */ -u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) +u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, + uint8_t dct, uint32_t misc2, uint32_t DramControl) { u32 val; @@ -24,17 +25,47 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) if (pDCTstat->LogicalCPUID & AMD_FAM15_ALL) { uint8_t cs_mux_45; uint8_t cs_mux_67; + uint32_t f2x80; - /* BKDG v3.14 Table 200 / Table 201 */ - if (MaxDimmsInstallable < 3) { - cs_mux_45 = 1; - cs_mux_67 = 1; - } else { + misc2 &= ~(0x1 << 28); /* FastSelfRefEntryDis = 0x0 */ + if (MaxDimmsInstallable == 3) { + /* FIXME 3 DIMMS per channel unimplemented */ cs_mux_45 = 0; + } else { + uint32_t f2x60 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x60); + f2x80 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x80); + if ((((f2x80 & 0xf) == 0x7) || ((f2x80 & 0xf) == 0x9)) + && ((f2x60 & 0x3) == 0x3)) + cs_mux_45 = 1; + else if ((((f2x80 & 0xa) == 0x7) || ((f2x80 & 0xb) == 0x9)) + && ((f2x60 & 0x3) > 0x1)) + cs_mux_45 = 1; + else + cs_mux_45 = 0; + } + + if (MaxDimmsInstallable == 1) { + cs_mux_67 = 0; + } else if (MaxDimmsInstallable == 2) { + uint32_t f2x64 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x64); + f2x80 = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x80); + if (((((f2x80 >> 4) & 0xf) == 0x7) || (((f2x80 >> 4) & 0xf) == 0x9)) + && ((f2x64 & 0x3) == 0x3)) + cs_mux_67 = 1; + else if (((((f2x80 >> 4) & 0xa) == 0x7) || (((f2x80 >> 4) & 0xb) == 0x9)) + && ((f2x64 & 0x3) > 0x1)) + cs_mux_67 = 1; + else + cs_mux_67 = 0; + } else { + /* FIXME 3 DIMMS per channel unimplemented */ cs_mux_67 = 0; } - misc2 |= (cs_mux_45 & 0x1) << 26; - misc2 |= (cs_mux_67 & 0x1) << 27; + + misc2 &= ~(0x1 << 27); /* CsMux67 = cs_mux_67 */ + misc2 |= ((cs_mux_67 & 0x1) << 27); + misc2 &= ~(0x1 << 26); /* CsMux45 = cs_mux_45 */ + misc2 |= ((cs_mux_45 & 0x1) << 26); } else if (pDCTstat->LogicalCPUID & (AMD_DR_Dx | AMD_DR_Cx)) { if (pDCTstat->Status & (1 << SB_Registered)) { misc2 |= 1 << SubMemclkRegDly; @@ -46,8 +77,8 @@ u32 mct_SetDramConfigMisc2(struct DCTStatStruc *pDCTstat, u8 dct, u32 misc2) if (pDCTstat->LogicalCPUID & AMD_DR_Cx) misc2 |= 1 << OdtSwizzle; - val = Get_NB32_DCT(pDCTstat->dev_dct, dct, 0x78); + val = DramControl; val &= 7; val = ((~val) & 0xff) + 1; val += 6; diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c index 19b1b8f1e7..b36ecae32f 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctsrc.c @@ -541,9 +541,8 @@ static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat, u32 dev; u32 index_reg; u32 ch_start, ch_end, ch; - u32 msr; + msr_t msr; u32 cr4; - u32 lo, hi; uint32_t dword; uint8_t dimm; @@ -594,15 +593,14 @@ static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat, cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ write_cr4(cr4); - msr = HWCR; - _RDMSR(msr, &lo, &hi); + msr = rdmsr(HWCR); /* FIXME: Why use SSEDIS */ - if(lo & (1 << 17)) { /* save the old value */ + if(msr.lo & (1 << 17)) { /* save the old value */ _Wrap32Dis = 1; } - lo |= (1 << 17); /* HWCR.wrap32dis */ - lo &= ~(1 << 15); /* SSEDIS */ - _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ + msr.lo |= (1 << 17); /* HWCR.wrap32dis */ + msr.lo &= ~(1 << 15); /* SSEDIS */ + wrmsr(HWCR, msr); /* Setting wrap32dis allows 64-bit memory references in real mode */ _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); @@ -935,10 +933,9 @@ static void dqsTrainRcvrEn_SW_Fam10(struct MCTStatStruc *pMCTstat, } if(!_Wrap32Dis) { - msr = HWCR; - _RDMSR(msr, &lo, &hi); - lo &= ~(1<<17); /* restore HWCR.wrap32dis */ - _WRMSR(msr, lo, hi); + msr = rdmsr(HWCR); + msr.lo &= ~(1<<17); /* restore HWCR.wrap32dis */ + wrmsr(HWCR, msr); } if(!_SSE2){ cr4 = read_cr4(); @@ -1420,7 +1417,7 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, } /* Calculate and program MaxRdLatency */ - Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel); + Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel, 0); if(_DisableDramECC) { mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); @@ -1483,6 +1480,199 @@ static void dqsTrainRcvrEn_SW_Fam15(struct MCTStatStruc *pMCTstat, printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n"); } +static void write_max_read_latency_to_registers(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat, uint8_t dct, uint16_t latency) +{ + uint32_t dword; + uint8_t nb_pstate; + + for (nb_pstate = 0; nb_pstate < 2; nb_pstate++) { + dword = Get_NB32_DCT_NBPstate(pDCTstat->dev_dct, dct, nb_pstate, 0x210); + dword &= ~(0x3ff << 22); + dword |= ((latency & 0x3ff) << 22); + Set_NB32_DCT_NBPstate(pDCTstat->dev_dct, dct, nb_pstate, 0x210, dword); + } +} + +/* DQS MaxRdLatency Training (Family 15h) + * Algorithm detailed in: + * The Fam15h BKDG Rev. 3.14 section 2.10.5.8.5.1 + * This algorithm runs at the highest supported MEMCLK. + */ +static void dqsTrainMaxRdLatency_SW_Fam15(struct MCTStatStruc *pMCTstat, + struct DCTStatStruc *pDCTstat) +{ + u8 Channel; + u8 Addl_Index = 0; + u8 Receiver; + u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0; + u32 Errors; + + u32 dev; + u32 index_reg; + u32 ch_start, ch_end; + u32 msr; + u32 cr4; + u32 lo, hi; + + uint32_t dword; + uint8_t dimm; + uint8_t lane; + uint8_t mem_clk; + uint32_t nb_clk; + uint8_t nb_pstate; + uint16_t current_total_delay[MAX_BYTE_LANES]; + uint16_t current_rdqs_total_delay[MAX_BYTE_LANES]; + uint8_t current_worst_case_total_delay_dimm; + uint16_t current_worst_case_total_delay_value; + + uint16_t fam15h_freq_tab[] = {0, 0, 0, 0, 333, 0, 400, 0, 0, 0, 533, 0, 0, 0, 667, 0, 0, 0, 800, 0, 0, 0, 933}; + + print_debug_dqs("\nTrainMaxRdLatency: Node", pDCTstat->Node_ID, 0); + + dev = pDCTstat->dev_dct; + index_reg = 0x98; + ch_start = 0; + ch_end = 2; + + cr4 = read_cr4(); + if(cr4 & ( 1 << 9)) { /* save the old value */ + _SSE2 = 1; + } + cr4 |= (1 << 9); /* OSFXSR enable SSE2 */ + write_cr4(cr4); + + msr = HWCR; + _RDMSR(msr, &lo, &hi); + /* FIXME: Why use SSEDIS */ + if(lo & (1 << 17)) { /* save the old value */ + _Wrap32Dis = 1; + } + lo |= (1 << 17); /* HWCR.wrap32dis */ + lo &= ~(1 << 15); /* SSEDIS */ + _WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */ + + _DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat); + + Errors = 0; + dev = pDCTstat->dev_dct; + + for (Channel = 0; Channel < 2; Channel++) { + print_debug_dqs("\tTrainMaxRdLatency51: Node ", pDCTstat->Node_ID, 1); + print_debug_dqs("\tTrainMaxRdLatency51: Channel ", Channel, 1); + pDCTstat->Channel = Channel; + + if (pDCTstat->DIMMValidDCT[Channel] == 0) + continue; + + mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f; + + Receiver = mct_InitReceiver_D(pDCTstat, Channel); + + /* Find DIMM with worst case receiver enable delays */ + current_worst_case_total_delay_dimm = 0; + current_worst_case_total_delay_value = 0; + + /* There are four receiver pairs, loosely associated with chipselects. + * This is essentially looping over each DIMM. + */ + for (; Receiver < 8; Receiver += 2) { + Addl_Index = (Receiver >> 1) * 3 + 0x10; + dimm = (Receiver >> 1); + + print_debug_dqs("\t\tTrainMaxRdLatency52: index ", Addl_Index, 2); + + if (!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) { + continue; + } + + /* Retrieve the total delay values from pass 1 of DQS receiver enable training */ + read_dqs_receiver_enable_control_registers(current_total_delay, dev, Channel, dimm, index_reg); + read_read_dqs_timing_control_registers(current_rdqs_total_delay, dev, Channel, dimm, index_reg); + + for (lane = 0; lane < 8; lane++) { + current_total_delay[lane] += current_rdqs_total_delay[lane]; + if (current_total_delay[lane] > current_worst_case_total_delay_value) { + current_worst_case_total_delay_dimm = dimm; + current_worst_case_total_delay_value = current_total_delay[lane]; + } + } + +#if DQS_TRAIN_DEBUG > 0 + for (lane = 0; lane < 8; lane++) + print_debug_dqs_pair("\t\tTrainMaxRdLatency56: Lane ", lane, " current_total_delay ", current_total_delay[lane], 2); +#endif + } + + /* 2.10.5.8.5.1.1 */ + Calc_SetMaxRdLatency_D_Fam15(pMCTstat, pDCTstat, Channel, 1); + + /* 2.10.5.8.5.1.[2,3] + * Write the DRAM training pattern to the test address + */ + write_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, Channel, current_worst_case_total_delay_dimm << 1, 0xff); + + /* 2.10.5.8.5.1.4 + * Incrementally test each MaxRdLatency candidate + */ + for (; pDCTstat->CH_MaxRdLat[Channel] < 0x3ff; pDCTstat->CH_MaxRdLat[Channel]++) { + write_max_read_latency_to_registers(pMCTstat, pDCTstat, Channel, pDCTstat->CH_MaxRdLat[Channel]); + read_dram_dqs_training_pattern_fam15(pMCTstat, pDCTstat, Channel, current_worst_case_total_delay_dimm << 1, 0xff); + dword = Get_NB32_DCT(dev, Channel, 0x268) & 0x3ffff; + if (!dword) + break; + Set_NB32_index_wait_DCT(dev, Channel, index_reg, 0x00000050, 0x13131313); + } + + /* 2.10.5.8.5.1.5 */ + nb_pstate = 0; + mem_clk = Get_NB32_DCT(dev, Channel, 0x94) & 0x1f; + if (fam15h_freq_tab[mem_clk] == 0) { + return; + } + dword = Get_NB32(pDCTstat->dev_nbctl, (0x160 + (nb_pstate * 4))); /* Retrieve NbDid, NbFid */ + nb_clk = (200 * (((dword >> 1) & 0x1f) + 0x4)) / (((dword >> 7) & 0x1)?2:1); + + pDCTstat->CH_MaxRdLat[Channel]++; + pDCTstat->CH_MaxRdLat[Channel] += ((((uint64_t)15 * 100000000000ULL) / ((uint64_t)fam15h_freq_tab[mem_clk] * 1000000ULL)) + * ((uint64_t)nb_clk * 1000)) / 1000000000ULL; + + write_max_read_latency_to_registers(pMCTstat, pDCTstat, Channel, pDCTstat->CH_MaxRdLat[Channel]); + } + + if(_DisableDramECC) { + mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC); + } + + if(!_Wrap32Dis) { + msr = HWCR; + _RDMSR(msr, &lo, &hi); + lo &= ~(1<<17); /* restore HWCR.wrap32dis */ + _WRMSR(msr, lo, hi); + } + if(!_SSE2){ + cr4 = read_cr4(); + cr4 &= ~(1<<9); /* restore cr4.OSFXSR */ + write_cr4(cr4); + } + +#if DQS_TRAIN_DEBUG > 0 + { + u8 ChannelDTD; + printk(BIOS_DEBUG, "TrainMaxRdLatency: CH_MaxRdLat:\n"); + for(ChannelDTD = 0; ChannelDTD<2; ChannelDTD++) { + printk(BIOS_DEBUG, "Channel:%x: %x\n", + ChannelDTD, pDCTstat->CH_MaxRdLat[ChannelDTD]); + } + } +#endif + + printk(BIOS_DEBUG, "TrainMaxRdLatency: Status %x\n", pDCTstat->Status); + printk(BIOS_DEBUG, "TrainMaxRdLatency: ErrStatus %x\n", pDCTstat->ErrStatus); + printk(BIOS_DEBUG, "TrainMaxRdLatency: ErrCode %x\n", pDCTstat->ErrCode); + printk(BIOS_DEBUG, "TrainMaxRdLatency: Done\n\n"); +} + u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct) { if (pDCTstat->DIMMValidDCT[dct] == 0 ) { diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c index 4bfcc401ff..b354d923f8 100644 --- a/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c +++ b/src/northbridge/amd/amdmct/mct_ddr3/mctwl.c @@ -168,6 +168,8 @@ static void EnterSelfRefresh(struct MCTStatStruc *pMCTstat, static void ChangeMemClk(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat) { + printk(BIOS_DEBUG, "%s: Start\n", __func__); + uint8_t DCT0Present; uint8_t DCT1Present; uint32_t dword; @@ -309,6 +311,8 @@ static void ChangeMemClk(struct MCTStatStruc *pMCTstat, mct_Wait(15000); /* Wait for 750us */ } } + + printk(BIOS_DEBUG, "%s: Done\n", __func__); } /*