diff --git a/src/cpu/amd/dualcore/amd_sibling.c b/src/cpu/amd/dualcore/amd_sibling.c index da5928ab14..c74b282cb6 100644 --- a/src/cpu/amd/dualcore/amd_sibling.c +++ b/src/cpu/amd/dualcore/amd_sibling.c @@ -1,4 +1,5 @@ /* 2004.12 yhlu add dual core support */ +/* 24 June 2005 Cleaned up dual core support Eric Biederman */ #include #include @@ -14,59 +15,87 @@ static int first_time = 1; static int disable_siblings = !CONFIG_LOGICAL_CPUS; - - -int is_e0_later_in_bsp(int nodeid) +void amd_sibling_init(device_t cpu, struct node_core_id id) { - uint32_t val; - uint32_t val_old; - int e0_later; - if(nodeid==0) { // we don't need to do that for node 0 in core0/node0 - return !is_cpu_pre_e0(); + unsigned long i; + unsigned siblings, max_siblings; + + /* On the bootstrap processor see if I want sibling cpus enabled */ + if (first_time) { + first_time = 0; + get_option(&disable_siblings, "dual_core"); } - // d0 will be treated as e0 with this methods, but the d0 nb_cfg_54 always 0 - device_t dev; - dev = dev_find_slot(0, PCI_DEVFN(0x18+nodeid,2)); - if(!dev) return 0; - val_old = pci_read_config32(dev, 0x80); - val = val_old; - val |= (1<<3); - pci_write_config32(dev, 0x80, val); - val = pci_read_config32(dev, 0x80); - e0_later = !!(val & (1<<3)); - if(e0_later) { // pre_e0 bit 3 always be 0 and can not be changed - pci_write_config32(dev, 0x80, val_old); // restore it + + siblings = cpuid_ecx(0x80000008) & 0xff; + printk_debug("%d Sibling Cores found\n", siblings); + + /* For now assume all cpus have the same number of siblings */ + max_siblings = siblings + 1; + + /* Wishlist? make dual cores look like hyperthreading */ + + /* See if I am a sibling cpu */ + if (disable_siblings && (id.coreid != 0)) { + cpu->enabled = 0; + } + + if (id.coreid == 0) { + /* On the primary cpu find the siblings */ + for (i = 1; i <= siblings; i++) { + struct device_path cpu_path; + device_t new; + /* Build the cpu device path */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.u.apic.apic_id = + (0x10 + i*0x10 + id.nodeid); + new = alloc_dev(cpu->bus, &cpu_path); + if (!new) { + continue; + } + /* Report what I have done */ + printk_debug("CPU: %s %s\n", + dev_path(new), new->enabled?"enabled":"disabled"); + } } - - return e0_later; } -unsigned int read_nb_cfg_54(void) +struct node_core_id get_node_core_id(void) { - msr_t msr; - msr = rdmsr(NB_CFG_MSR); - return ( ( msr.hi >> (54-32)) & 1); -} - -struct node_core_id get_node_core_id(unsigned int nb_cfg_54) { struct node_core_id id; - // get the apicid via cpuid(1) ebx[27:24] - if(nb_cfg_54) { - // when NB_CFG[54] is set, nodid = ebx[27:25], coreid = ebx[24] - id.coreid = (cpuid_ebx(1) >> 24) & 0xf; - id.nodeid = (id.coreid>>1); - id.coreid &= 1; - } else { // single core should be here too + unsigned siblings; + /* Get the apicid at reset */ + id.nodeid = (cpuid_ebx(1) >> 24) & 0xff; + id.coreid = 0; + /* Find out how many siblings we have */ + siblings = cpuid_ecx(0x80000008) & 0xff; + if (siblings) { + unsigned bits; + msr_t msr; + bits = 0; + while ((1 << bits) <= siblings) + bits++; + + msr = rdmsr(NB_CFG_MSR); + if ((msr.hi >> (54-32)) & 1) { + // when NB_CFG[54] is set, nodeid = ebx[27:25], coreid = ebx[24] + id.coreid = id.nodeid & ((1 << bits) - 1); + id.nodeid >>= bits; + } else { // when NB_CFG[54] is clear, nodeid = ebx[26:24], coreid = ebx[27] - id.nodeid = (cpuid_ebx(1) >> 24) & 0xf; - id.coreid = (id.nodeid>>3); - id.nodeid &= 7; + id.coreid = id.nodeid >> 3; + id.nodeid &= 7; + } + } else { + if (!is_cpu_pre_e0()) { + id.nodeid >>= 1; + } } - return id; - - + return id; } + + +#if 0 static int get_max_siblings(int nodes) { device_t dev; @@ -169,76 +198,5 @@ unsigned get_apicid_base(unsigned ioapic_num) return apicid_base; } -#if 0 -void amd_sibling_init(device_t cpu) -{ - unsigned i, siblings; - struct cpuid_result result; - unsigned nb_cfg_54; - struct node_core_id id; - /* On the bootstrap processor see if I want sibling cpus enabled */ - if (first_time) { - first_time = 0; - get_option(&disable_siblings, "dual_core"); - } - result = cpuid(0x80000008); - /* See how many sibling cpus we have */ - /* Is dualcore supported */ - siblings = (result.ecx & 0xff); - if ( siblings < 1) { - return; - } - -#if 1 - printk_debug("CPU: %u %d siblings\n", - cpu->path.u.apic.apic_id, - siblings); #endif - - nb_cfg_54 = read_nb_cfg_54(); -#if 1 - id = get_node_core_id(nb_cfg_54); // pre e0 nb_cfg_54 can not be set - - /* See if I am a sibling cpu */ - //if ((cpu->path.u.apic.apic_id>>(nb_cfg_54?0:3)) & siblings ) { // siblings = 1, 3, 7, 15,.... - //if ( ( (cpu->path.u.apic.apic_id>>(nb_cfg_54?0:3)) % (siblings+1) ) != 0 ) { - if(id.coreid != 0) { - if (disable_siblings) { - cpu->enabled = 0; - } - return; - } -#endif - - /* I am the primary cpu start up my siblings */ - - for(i = 1; i <= siblings; i++) { - struct device_path cpu_path; - device_t new; - /* Build the cpu device path */ - cpu_path.type = DEVICE_PATH_APIC; - cpu_path.u.apic.apic_id = cpu->path.u.apic.apic_id + i * (nb_cfg_54?1:8); - - /* See if I can find the cpu */ - new = find_dev_path(cpu->bus, &cpu_path); - /* Allocate the new cpu device structure */ - if(!new) { - new = alloc_dev(cpu->bus, &cpu_path); - new->enabled = 1; - new->initialized = 0; - } - -#if 1 - printk_debug("CPU: %u has sibling %u\n", - cpu->path.u.apic.apic_id, - new->path.u.apic.apic_id); -#endif - /* Start the new cpu */ - if(new->enabled && !new->initialized) - start_cpu(new); - } - -} -#endif - diff --git a/src/cpu/amd/dualcore/dualcore.c b/src/cpu/amd/dualcore/dualcore.c index 1c3b2751a5..9c292b823c 100644 --- a/src/cpu/amd/dualcore/dualcore.c +++ b/src/cpu/amd/dualcore/dualcore.c @@ -1,99 +1,74 @@ /* 2004.12 yhlu add dual core support */ - -#ifndef SET_NB_CFG_54 -#define SET_NB_CFG_54 1 -#endif - #include "cpu/amd/dualcore/dualcore_id.c" -static inline unsigned get_core_num_in_bsp(unsigned nodeid) +static void do_k8_init_and_stop_secondaries(void) { - return ((pci_read_config32(PCI_DEV(0, 0x18+nodeid, 3), 0xe8)>>12) & 3); -} - -static inline -#if SET_NB_CFG_54 == 1 - uint8_t -#else - void -#endif - set_apicid_cpuid_lo(void) { -#if SET_NB_CFG_54 - //for pre_e0, even we set nb_cfg_54, but it will still be 0 - //for e0 later you should use get_node_id(read_nb_cfg_54()) even for single core cpu - //get siblings via cpuid(0x80000008) ecx[7:0] - #if CONFIG_MAX_PHYSICAL_CPUS != 8 - if( get_core_num_in_bsp(0) == 0) { - /*first node only has one core, pre_e0 - all e0 single core installed don't need enable lo too, - So if mixing e0 single core and dual core, - don't put single core in first socket */ - return 0; - } - #endif + struct node_core_id id; + device_t dev; + unsigned apicid; + unsigned max_siblings; + msr_t msr; - if(read_option(CMOS_VSTART_dual_core, CMOS_VLEN_dual_core, 0) != 0) { // disable dual_core - return 0; + /* Skip this if there was a built in self test failure */ + + if (is_cpu_pre_e0()) { + id.nodeid = lapicid() & 0x7; + id.coreid = 0; + } else { + /* Which cpu are we on? */ + id = get_node_core_id_x(); + + /* Set NB_CFG_MSR + * Linux expect the core to be in the least signficant bits. + */ + msr = rdmsr(NB_CFG_MSR); + msr.hi |= (1<<(54-32)); // InitApicIdCpuIdLo + wrmsr(NB_CFG_MSR, msr); } - // set the NB_CFG[54]=1; why the OS will be happy with that ??? - msr_t msr; - msr = rdmsr(NB_CFG_MSR); - msr.hi |= (1<<(54-32)); // InitApicIdCpuIdLo - wrmsr(NB_CFG_MSR, msr); + /* For now assume all cpus have the same number of siblings */ + max_siblings = (cpuid_ecx(0x80000008) & 0xff) + 1; - return 1; + /* Enable extended apic ids */ + device_t dev_f0 = PCI_DEV(0, 0x18+id.nodeid, 0); + unsigned val = pci_read_config32(dev_f0, 0x68); + val |= (1 << 18) | (1 << 17); + pci_write_config32(dev_f0, 0x68, val); -#endif + /* Set the lapicid */ + lapic_write(LAPIC_ID,(0x10 + id.coreid*0x10 + id.nodeid) << 24); + /* Remember the cpuid */ + if (id.coreid == 0) { + dev = PCI_DEV(0, 0x18 + id.nodeid, 2); + pci_write_config32(dev, 0x9c, cpuid_eax(1)); + } + + /* Maybe call distinguish_cpu_resets only on the last core? */ + distinguish_cpu_resets(id.nodeid); + if (!boot_cpu()) { + stop_this_cpu(); + } } -static inline void real_start_other_core(unsigned nodeid) +static void k8_init_and_stop_secondaries(void) { - uint32_t dword; - // set PCI_DEV(0, 0x18+nodeid, 3), 0x44 bit 27 to redirect all MC4 accesses and error logging to core0 - dword = pci_read_config32(PCI_DEV(0, 0x18+nodeid, 3), 0x44); - dword |= 1<<27; // NbMcaToMstCpuEn bit - pci_write_config32(PCI_DEV(0, 0x18+nodeid, 3), 0x44, dword); - // set PCI_DEV(0, 0x18+nodeid, 0), 0x68 bit 5 to start core1 - dword = pci_read_config32(PCI_DEV(0, 0x18+nodeid, 0), 0x68); - dword |= 1<<5; - pci_write_config32(PCI_DEV(0, 0x18+nodeid, 0), 0x68, dword); -} + /* This doesn't work with Cache As Ram because it messes with + the MTRR state, which breaks the init detection. + do_k8_init_and_stop_secondaries should be usable by CAR code. + */ -//it is running on core0 of every node -static inline void start_other_core(unsigned nodeid) { + int init_detected; - if(read_option(CMOS_VSTART_dual_core, CMOS_VLEN_dual_core, 0) != 0) { // disable dual_core - return; - } + init_detected = early_mtrr_init_detected(); + amd_early_mtrr_init(); - if( get_core_num() >0) { // defined in dualcore_id.c - real_start_other_core(nodeid); - } -} - -static inline unsigned get_nodes(void) -{ - return ((pci_read_config32(PCI_DEV(0, 0x18, 0), 0x60)>>4) & 7) + 1; -} - -//it is running on core0 of node0 -static inline void start_other_cores(void) { - unsigned nodes; - unsigned nodeid; - - if(read_option(CMOS_VSTART_dual_core, CMOS_VLEN_dual_core, 0) != 0) { // disable dual_core - return; - } - - nodes = get_nodes(); - - for(nodeid=0; nodeid 0) { - real_start_other_core(nodeid); - } + enable_lapic(); + init_timer(); + if (init_detected) { + asm volatile ("jmp __cpu_reset"); } + do_k8_init_and_stop_secondaries(); } diff --git a/src/cpu/amd/dualcore/dualcore_id.c b/src/cpu/amd/dualcore/dualcore_id.c index feab682851..75d977133a 100644 --- a/src/cpu/amd/dualcore/dualcore_id.c +++ b/src/cpu/amd/dualcore/dualcore_id.c @@ -11,8 +11,8 @@ static inline unsigned int read_nb_cfg_54(void) } struct node_core_id { - unsigned nodeid; - unsigned coreid; + unsigned nodeid:8; + unsigned coreid:8; }; static inline struct node_core_id get_node_core_id(unsigned nb_cfg_54) { diff --git a/src/cpu/amd/model_fxx/model_fxx_init.c b/src/cpu/amd/model_fxx/model_fxx_init.c index 3c526e78ca..bd3a1cea08 100644 --- a/src/cpu/amd/model_fxx/model_fxx_init.c +++ b/src/cpu/amd/model_fxx/model_fxx_init.c @@ -21,10 +21,7 @@ #include #include #include - -#if CONFIG_LOGICAL_CPUS==1 #include -#endif #include "model_fxx_msr.h" @@ -152,9 +149,6 @@ static void set_init_ecc_mtrrs(void) static void init_ecc_memory(unsigned node_id) { unsigned long startk, begink, endk; -#if K8_E0_MEM_HOLE_SIZEK != 0 - unsigned long hole_startk = 0, hole_endk = 0; -#endif unsigned long basek; struct mtrr_state mtrr_state; device_t f1_dev, f2_dev, f3_dev; @@ -199,25 +193,13 @@ static void init_ecc_memory(unsigned node_id) startk = (pci_read_config32(f1_dev, 0x40 + (node_id*8)) & 0xffff0000) >> 2; endk = ((pci_read_config32(f1_dev, 0x44 + (node_id*8)) & 0xffff0000) >> 2) + 0x4000; -#if K8_E0_MEM_HOLE_SIZEK != 0 - if (!is_cpu_pre_e0()) { - uint32_t val; - val = pci_read_config32(f1_dev, 0xf0); - if((val & 1)==1) { - hole_startk = ((val & (0xff<<24)) >> 10); - hole_endk = ((val & (0xff<<8))<<(16-10)) - startk; - hole_endk += hole_startk; - } - } -#endif - /* Don't start too early */ begink = startk; if (begink < CONFIG_LB_MEM_TOPK) { begink = CONFIG_LB_MEM_TOPK; } - printk_debug("Clearing memory %uK - %uK: ", startk, endk); + printk_debug("Clearing memory %uK - %uK: ", begink, endk); /* Save the normal state */ save_mtrr_state(&mtrr_state); @@ -234,9 +216,6 @@ static void init_ecc_memory(unsigned node_id) unsigned long size; void *addr; -#if K8_E0_MEM_HOLE_SIZEK != 0 - if ((basek >= hole_startk) && (basek < hole_endk)) continue; -#endif /* Report every 64M */ if ((basek % (64*1024)) == 0) { /* Restore the normal state */ @@ -340,6 +319,7 @@ static inline void k8_errata(void) /* Erratum 91 prefetch miss is handled in the kernel */ + /* Erratum 106 ... */ msr = rdmsr_amd(LS_CFG_MSR); msr.lo |= 1 << 25; @@ -350,7 +330,7 @@ static inline void k8_errata(void) msr.hi |= 1 << (43 - 32); wrmsr_amd(BU_CFG_MSR, msr); - if(is_cpu_d0()) { + if (is_cpu_pre_e0() && !is_cpu_pre_d0()) { /* Erratum 110 ...*/ msr = rdmsr_amd(CPU_ID_HYPER_EXT_FEATURES); msr.hi |=1; @@ -362,26 +342,34 @@ static inline void k8_errata(void) msr = rdmsr_amd(CPU_ID_EXT_FEATURES_MSR); msr.hi |=1; wrmsr_amd(CPU_ID_EXT_FEATURES_MSR, msr); + + /* Erratum 113 ... */ + msr = rdmsr_amd(BU_CFG_MSR); + msr.hi |= (1 << 16); + wrmsr_amd(BU_CFG_MSR, msr); } /* Erratum 122 */ - msr = rdmsr(HWCR_MSR); - msr.lo |= 1 << 6; - wrmsr(HWCR_MSR, msr); + if (!is_cpu_pre_c0()) { + msr = rdmsr(HWCR_MSR); + msr.lo |= 1 << 6; + wrmsr(HWCR_MSR, msr); + } + + /* Erratum 123? dual core deadlock? */ + + /* Erratum 131 */ + msr = rdmsr(NB_CFG_MSR); + msr.lo |= 1 << 20; + wrmsr(NB_CFG_MSR, msr); } -void model_fxx_init(device_t dev) +void model_fxx_init(device_t cpu) { unsigned long i; msr_t msr; -#if CONFIG_LOGICAL_CPUS struct node_core_id id; - unsigned siblings; - id.coreid=0; -#else - unsigned nodeid; -#endif /* Turn on caching if we haven't already */ x86_enable_cache(); @@ -404,43 +392,18 @@ void model_fxx_init(device_t dev) /* Enable the local cpu apics */ setup_lapic(); -#if CONFIG_LOGICAL_CPUS == 1 - siblings = cpuid_ecx(0x80000008) & 0xff; + /* Find our node and core */ + id = get_node_core_id(); - id = get_node_core_id(read_nb_cfg_54()); // pre e0 nb_cfg_54 can not be set - - if(siblings>0) { - msr = rdmsr_amd(CPU_ID_FEATURES_MSR); - msr.lo |= 1 << 28; - wrmsr_amd(CPU_ID_FEATURES_MSR, msr); - - msr = rdmsr_amd(LOGICAL_CPUS_NUM_MSR); - msr.lo = (siblings+1)<<16; - wrmsr_amd(LOGICAL_CPUS_NUM_MSR, msr); - - msr = rdmsr_amd(CPU_ID_EXT_FEATURES_MSR); - msr.hi |= 1<<(33-32); - wrmsr_amd(CPU_ID_EXT_FEATURES_MSR, msr); - } - - - /* Is this a bad location? In particular can another node prefecth + /* Is this a bad location? In particular can another node prefetch * data from this node before we have initialized it? */ - if (id.coreid == 0) init_ecc_memory(id.nodeid); // only do it for core 0 -#else - /* Is this a bad location? In particular can another node prefecth - * data from this node before we have initialized it? - */ - nodeid = lapicid() & 0xf; - init_ecc_memory(nodeid); -#endif - -#if CONFIG_LOGICAL_CPUS==1 - /* Start up my cpu siblings */ -// if(id.coreid==0) amd_sibling_init(dev); // Don't need core1 is already be put in the CPU BUS in bus_cpu_scan -#endif + if (id.coreid == 0) { + init_ecc_memory(id.nodeid); // only do it for core 0 + } + /* Deal with sibling cpus */ + amd_sibling_init(cpu, id); } static struct device_operations cpu_dev_ops = { @@ -451,7 +414,7 @@ static struct cpu_device_id cpu_table[] = { { X86_VENDOR_AMD, 0xf51 }, /* SH7-B3 */ { X86_VENDOR_AMD, 0xf58 }, /* SH7-C0 */ { X86_VENDOR_AMD, 0xf48 }, -#if 1 + { X86_VENDOR_AMD, 0xf5A }, /* SH7-CG */ { X86_VENDOR_AMD, 0xf4A }, { X86_VENDOR_AMD, 0xf7A }, @@ -483,7 +446,6 @@ static struct cpu_device_id cpu_table[] = { { X86_VENDOR_AMD, 0x20fc2 }, { X86_VENDOR_AMD, 0x20f12 }, /* JH-E6 */ { X86_VENDOR_AMD, 0x20f32 }, -#endif { 0, 0 }, }; diff --git a/src/include/cpu/amd/dualcore.h b/src/include/cpu/amd/dualcore.h index a8c49475ec..a38565b01f 100644 --- a/src/include/cpu/amd/dualcore.h +++ b/src/include/cpu/amd/dualcore.h @@ -2,18 +2,13 @@ #define CPU_AMD_DUALCORE_H struct device; -void amd_sibling_init(struct device *cpu); - -int is_e0_later_in_bsp(int nodeid); -unsigned int read_nb_cfg_54(void); struct node_core_id { unsigned nodeid; unsigned coreid; }; -// it can be used to get unitid and coreid it running only -struct node_core_id get_node_core_id(unsigned int nb_cfg_54); -unsigned get_apicid_base(unsigned ioapic_num); +void amd_sibling_init(struct device *cpu, struct node_core_id id); +struct node_core_id get_node_core_id(void); #endif /* CPU_AMD_DUALCORE_H */ diff --git a/src/northbridge/amd/amdk8/amdk8.h b/src/northbridge/amd/amdk8/amdk8.h index ca8e8dc3d2..89c03fc16b 100644 --- a/src/northbridge/amd/amdk8/amdk8.h +++ b/src/northbridge/amd/amdk8/amdk8.h @@ -136,6 +136,7 @@ #define DCL_DisInRcvrs (1<<24) #define DCL_BypMax_SHIFT 25 #define DCL_En2T (1<<28) +#define DCL_UpperCSMap (1<<29) #define DRAM_CONFIG_HIGH 0x94 #define DCH_ASYNC_LAT_SHIFT 0 #define DCH_ASYNC_LAT_MASK 0xf diff --git a/src/northbridge/amd/amdk8/coherent_ht.c b/src/northbridge/amd/amdk8/coherent_ht.c index c79a432ab5..db646a5265 100644 --- a/src/northbridge/amd/amdk8/coherent_ht.c +++ b/src/northbridge/amd/amdk8/coherent_ht.c @@ -155,23 +155,6 @@ static void disable_probes(void) } -#ifndef ENABLE_APIC_EXT_ID -#define ENABLE_APIC_EXT_ID 0 -#endif - -static void enable_apic_ext_id(u8 node) -{ -#if ENABLE_APIC_EXT_ID==1 -#warning "FIXME Is the right place to enable apic ext id here?" - - u32 val; - - val = pci_read_config32(NODE_HT(node), 0x68); - val |= (HTTC_APIC_EXT_SPUR | HTTC_APIC_EXT_ID | HTTC_APIC_EXT_BRD_CST); - pci_write_config32(NODE_HT(node), 0x68, val); -#endif -} - static void enable_routing(u8 node) { u32 val; @@ -292,20 +275,18 @@ static int verify_connection(u8 dest) return 1; } -static uint16_t read_freq_cap(device_t dev, uint8_t pos) +static unsigned read_freq_cap(device_t dev, unsigned pos) { /* Handle bugs in valid hypertransport frequency reporting */ - uint16_t freq_cap; + unsigned freq_cap; uint32_t id; freq_cap = pci_read_config16(dev, pos); freq_cap &= ~(1 << HT_FREQ_VENDOR); /* Ignore Vendor HT frequencies */ -#if K8_HT_FREQ_1G_SUPPORT == 1 if (!is_cpu_pre_e0()) { return freq_cap; } -#endif id = pci_read_config32(dev, 0); @@ -339,8 +320,10 @@ static int optimize_connection(device_t node1, uint8_t link1, device_t node2, ui /* See if I am changing the link freqency */ old_freq = pci_read_config8(node1, link1 + PCI_HT_CAP_HOST_FREQ); + old_freq &= 0x0f; needs_reset |= old_freq != freq; old_freq = pci_read_config8(node2, link2 + PCI_HT_CAP_HOST_FREQ); + old_freq &= 0x0f; needs_reset |= old_freq != freq; /* Set the Calulcated link frequency */ @@ -382,7 +365,6 @@ static int optimize_connection(device_t node1, uint8_t link1, device_t node2, ui /* Set node2's widths */ pci_write_config8(node2, link2 + PCI_HT_CAP_HOST_WIDTH + 1, width); - return needs_reset; } @@ -1625,9 +1607,9 @@ static void clear_dead_routes(unsigned nodes) } #endif /* CONFIG_MAX_PHYSICAL_CPUS > 1 */ -#if CONFIG_LOGICAL_CPUS==1 -static unsigned verify_dualcore(unsigned nodes) +static unsigned count_cpus(unsigned nodes) { +#if CONFIG_LOGICAL_CPUS==1 unsigned node, totalcpus, tmp; totalcpus = 0; @@ -1637,25 +1619,21 @@ static unsigned verify_dualcore(unsigned nodes) } return totalcpus; +#else + return nodes; +#endif } -#endif static void coherent_ht_finalize(unsigned nodes) { + unsigned total_cpus; + unsigned cpu_node_count; unsigned node; int rev_a0; -#if CONFIG_LOGICAL_CPUS==1 - unsigned total_cpus; + total_cpus = count_cpus(nodes); + cpu_node_count = ((total_cpus -1)<<16)|((nodes - 1) << 4); - if(read_option(CMOS_VSTART_dual_core, CMOS_VLEN_dual_core, 0) == 0) { /* dual_core */ - total_cpus = verify_dualcore(nodes); - } - else { - total_cpus = nodes; - } -#endif - /* set up cpu count and node count and enable Limit * Config Space Range for all available CPUs. * Also clear non coherent hypertransport bus range @@ -1672,11 +1650,7 @@ static void coherent_ht_finalize(unsigned nodes) /* Set the Total CPU and Node count in the system */ val = pci_read_config32(dev, 0x60); val &= (~0x000F0070); -#if CONFIG_LOGICAL_CPUS==1 - val |= ((total_cpus-1)<<16)|((nodes-1)<<4); -#else - val |= ((nodes-1)<<16)|((nodes-1)<<4); -#endif + val |= cpu_node_count; pci_write_config32(dev, 0x60, val); /* Only respond to real cpu pci configuration cycles @@ -1786,6 +1760,33 @@ static int optimize_link_read_pointers(unsigned nodes, int needs_reset) return needs_reset; } +static void startup_other_cores(unsigned nodes) +{ + unsigned node; + for(node = 0; node < nodes; node++) { + device_t dev; + unsigned siblings; + dev = NODE_MC(node); + siblings = (pci_read_config32(dev, 0xe8) >> 12) & 0x3; + + if (siblings) { + device_t dev_f0; + unsigned val; + /* Redirect all MC4 accesses and error logging to core0 */ + val = pci_read_config32(dev, 0x44); + val |= (1 << 27); //NbMcaToMstCpuEn bit + pci_write_config32(dev, 0x44, val); + + /* Enable the second core */ + dev_f0 = NODE_HT(node); + val = pci_read_config32(dev_f0, 0x68); + val |= ( 1 << 5); + pci_write_config32(dev_f0, 0x68, val); + } + } +} + + static int setup_coherent_ht_domain(void) { struct setup_smp_result result; @@ -1799,15 +1800,15 @@ static int setup_coherent_ht_domain(void) enable_bsp_routing(); #if CONFIG_MAX_PHYSICAL_CPUS > 1 - result = setup_smp(); - result.nodes = verify_mp_capabilities(result.nodes); - clear_dead_routes(result.nodes); + result = setup_smp(); #endif - + result.nodes = verify_mp_capabilities(result.nodes); + clear_dead_routes(result.nodes); if (result.nodes == 1) { setup_uniprocessor(); } coherent_ht_finalize(result.nodes); + startup_other_cores(result.nodes); result.needs_reset = apply_cpu_errata_fixes(result.nodes, result.needs_reset); result.needs_reset = optimize_link_read_pointers(result.nodes, result.needs_reset); return result.needs_reset; diff --git a/src/northbridge/amd/amdk8/northbridge.c b/src/northbridge/amd/amdk8/northbridge.c index e45aff8242..7ef1266130 100644 --- a/src/northbridge/amd/amdk8/northbridge.c +++ b/src/northbridge/amd/amdk8/northbridge.c @@ -17,9 +17,9 @@ #include #include +#include #if CONFIG_LOGICAL_CPUS==1 -#include #include #endif @@ -27,10 +27,7 @@ #include "root_complex/chip.h" #include "northbridge.h" #include "amdk8.h" - -#if K8_E0_MEM_HOLE_SIZEK != 0 -#include "./cpu_rev.c" -#endif +#include "cpu_rev.c" #define FX_DEVS 8 static device_t __f0_dev[FX_DEVS]; @@ -640,6 +637,41 @@ static uint32_t find_pci_tolm(struct bus *bus) return tolm; } +static uint32_t hoist_memory(unsigned long mmio_basek, int i) +{ + int ii; + uint32_t carry_over; + device_t dev; + uint32_t base, limit; + uint32_t basek; + uint32_t hoist; + + carry_over = (4*1024*1024) - mmio_basek; + for(ii=7;ii>i;ii--) { + + base = f1_read_config32(0x40 + (ii << 3)); + limit = f1_read_config32(0x44 + (ii << 3)); + if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) { + continue; + } + f1_write_config32(0x44 + (ii << 3),limit + (carry_over << 2)); + f1_write_config32(0x40 + (ii << 3),base + (carry_over << 2)); + } + limit = f1_read_config32(0x44 + (i << 3)); + f1_write_config32(0x44 + (i << 3),limit + (carry_over << 2)); + dev = __f1_dev[i]; + base = pci_read_config32(dev, 0x40 + (i << 3)); + basek = (pci_read_config32(dev, 0x40 + (i << 3)) & 0xffff0000) >> 2; + hoist = /* hole start address */ + ((mmio_basek << 10) & 0xff000000) + + /* hole address to memory controller address */ + (((basek + carry_over) >> 6) & 0x0000ff00) + + /* enable */ + 1; + pci_write_config32(dev, 0xf0, hoist); + return carry_over; +} + static void pci_domain_set_resources(device_t dev) { unsigned long mmio_basek; @@ -648,41 +680,23 @@ static void pci_domain_set_resources(device_t dev) pci_tolm = find_pci_tolm(&dev->link[0]); + /* Work around for NUMA bug in all kernels before 2.6.13. + If pci memory hole is too small, the kernel memory to NUMA + node mapping will fail to initialize and system will run in + non-NUMA mode. + */ + if(pci_tolm > 0xf8000000) pci_tolm = 0xf8000000; + #warning "FIXME handle interleaved nodes" mmio_basek = pci_tolm >> 10; /* Round mmio_basek to something the processor can support */ mmio_basek &= ~((1 << 6) -1); -#if 1 -#warning "FIXME improve mtrr.c so we don't use up all of the mtrrs with a 64M MMIO hole" - /* Round the mmio hold to 64M */ - mmio_basek &= ~((64*1024) - 1); -#endif - -#if K8_E0_MEM_HOLE_SIZEK != 0 - if (!is_cpu_pre_e0()) - for (i = 0; i < 8; i++) { - uint32_t base; - base = f1_read_config32(0x40 + (i << 3)); - if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) { - continue; - } - - base = pci_read_config32(__f1_dev[i], 0xf0); - if((base & 1)==0) continue; - base &= 0xff<<24; - base >>= 10; - if (mmio_basek > base) { - mmio_basek = base; - } - break; // only one hole - } -#endif - idx = 10; for(i = 0; i < 8; i++) { uint32_t base, limit; unsigned basek, limitk, sizek; + base = f1_read_config32(0x40 + (i << 3)); limit = f1_read_config32(0x44 + (i << 3)); if ((base & ((1<<1)|(1<<0))) != ((1<<1)|(1<<0))) { @@ -708,6 +722,9 @@ static void pci_domain_set_resources(device_t dev) pre_sizek = mmio_basek - basek; ram_resource(dev, idx++, basek, pre_sizek); sizek -= pre_sizek; + if(! is_cpu_pre_e0() ) { + sizek += hoist_memory(mmio_basek,i); + } basek = mmio_basek; } if ((basek + sizek) <= 4*1024*1024) { @@ -767,54 +784,16 @@ static struct device_operations pci_domain_ops = { .ops_pci_bus = &pci_cf8_conf1, }; -#define APIC_ID_OFFSET 0x10 - static unsigned int cpu_bus_scan(device_t dev, unsigned int max) { struct bus *cpu_bus; device_t dev_mc; - int bsp_apic_id; - int apic_id_offset; int i,j; - unsigned nb_cfg_54; - int enable_apic_ext_id; - unsigned siblings; -#if CONFIG_LOGICAL_CPUS == 1 - int e0_later_single_core; - int disable_siblings; -#endif - nb_cfg_54 = 0; - enable_apic_ext_id = 0; - siblings = 0; - - /* Find the bootstrap processors apicid */ - bsp_apic_id = lapicid(); - - /* See if I will enable extended ids' */ - apic_id_offset = bsp_apic_id; - -#if CONFIG_LOGICAL_CPUS == 1 - disable_siblings = !CONFIG_LOGICAL_CPUS; - get_option(&disable_siblings, "dual_core"); - - // for pre_e0, nb_cfg_54 can not be set, ( even set, when you read it still be 0) - // How can I get the nb_cfg_54 of every node' nb_cfg_54 in bsp??? and differ d0 and e0 single core - - nb_cfg_54 = read_nb_cfg_54(); -#endif dev_mc = dev_find_slot(0, PCI_DEVFN(0x18, 0)); if (!dev_mc) { die("0:18.0 not found?"); } - if (pci_read_config32(dev_mc, 0x68) & (HTTC_APIC_EXT_ID|HTTC_APIC_EXT_BRD_CST)) - { - enable_apic_ext_id = 1; - if (apic_id_offset == 0) { - /* bsp apic id is not changed */ - apic_id_offset = APIC_ID_OFFSET; - } - } /* Find which cpus are present */ cpu_bus = &dev->link[0]; @@ -834,82 +813,36 @@ static unsigned int cpu_bus_scan(device_t dev, unsigned int max) PCI_DEVFN(0x18 + i, j)); } } + + /* Build the cpu device path */ + cpu_path.type = DEVICE_PATH_APIC; + cpu_path.u.apic.apic_id = 0x10 + i; -#if CONFIG_LOGICAL_CPUS == 1 - e0_later_single_core = 0; - if ((!disable_siblings) && dev && dev->enabled) { - j = (pci_read_config32(dev, 0xe8) >> 12) & 3; // dev is func 3 - printk_debug(" %s siblings=%d\r\n", dev_path(dev), j); + /* See if I can find the cpu */ + cpu = find_dev_path(cpu_bus, &cpu_path); - if(nb_cfg_54) { - // For e0 single core if nb_cfg_54 is set, apicid will be 0, 2, 4.... - // ----> you can mixed single core e0 and dual core e0 at any sequence - // That is the typical case - - if(j == 0 ){ - e0_later_single_core = is_e0_later_in_bsp(i); // single core - } else { - e0_later_single_core = 0; - } - if(e0_later_single_core) { - printk_debug("\tFound e0 single core\r\n"); - j=1; - } - - if(siblings > j ) { - //actually we can't be here, because d0 nb_cfg_54 can not be set - //even worse is_e0_later_in_bsp() can not find out if it is d0 or e0 - - die("When NB_CFG_54 is set, if you want to mix e0 (single core and dual core) and single core(pre e0) CPUs, you need to put all the single core (pre e0) CPUs before all the (e0 single or dual core) CPUs\r\n"); - } - else { - siblings = j; - } - } else { - siblings = j; - } - } -#endif -#if CONFIG_LOGICAL_CPUS==1 - for (j = 0; j <= (e0_later_single_core?0:siblings); j++ ) { -#else - for (j = 0; j <= siblings; j++ ) { -#endif - /* Build the cpu device path */ - cpu_path.type = DEVICE_PATH_APIC; - cpu_path.u.apic.apic_id = i * (nb_cfg_54?(siblings+1):1) + j * (nb_cfg_54?1:8); - - /* See if I can find the cpu */ - cpu = find_dev_path(cpu_bus, &cpu_path); - - /* Enable the cpu if I have the processor */ - if (dev && dev->enabled) { - if (!cpu) { - cpu = alloc_dev(cpu_bus, &cpu_path); - } - if (cpu) { - cpu->enabled = 1; - } + /* Enable the cpu if I have the processor */ + if (dev && dev->enabled) { + if (!cpu) { + cpu = alloc_dev(cpu_bus, &cpu_path); } - - /* Disable the cpu if I don't have the processor */ - if (cpu && (!dev || !dev->enabled)) { - cpu->enabled = 0; - } - - /* Report what I have done */ if (cpu) { - if(enable_apic_ext_id) { - if(cpu->path.u.apic.apic_idpath.u.apic.apic_id > siblings) || (bsp_apic_id!=0) ) - cpu->path.u.apic.apic_id += apic_id_offset; - } - } - printk_debug("CPU: %s %s\n", - dev_path(cpu), cpu->enabled?"enabled":"disabled"); + cpu->enabled = 1; } - } //j + } + + /* Disable the cpu if I don't have the processor */ + if (cpu && (!dev || !dev->enabled)) { + cpu->enabled = 0; + } + + /* Report what I have done */ + if (cpu) { + printk_debug("CPU: %s %s\n", + dev_path(cpu), cpu->enabled?"enabled":"disabled"); + } } + return max; } diff --git a/src/northbridge/amd/amdk8/raminit.c b/src/northbridge/amd/amdk8/raminit.c index 5d9c320637..74e432d084 100644 --- a/src/northbridge/amd/amdk8/raminit.c +++ b/src/northbridge/amd/amdk8/raminit.c @@ -585,6 +585,16 @@ static void hw_enable_ecc(const struct mem_controller *ctrl) } +static void e_step_cpu(const struct mem_controller *ctrl) +{ + uint32_t dcl,data32; + + /* set bit 29 (upper cs map) of function 2 offset 0x90 */ + dcl = pci_read_config32(ctrl->f2, DRAM_CONFIG_LOW); + dcl |= DCL_UpperCSMap; + pci_write_config32(ctrl->f2, DRAM_CONFIG_LOW, dcl); +} + static int is_dual_channel(const struct mem_controller *ctrl) { uint32_t dcl; @@ -714,28 +724,14 @@ hw_err: return sz; } -static const unsigned cs_map_aa[15] = { - /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */ - 0, 1, 3, 6, 0, - 0, 2, 4, 7, 9, - 0, 0, 5, 8,10, -}; - static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz, unsigned index) { - uint32_t base0, base1, map; + uint32_t base0, base1; uint32_t dch; if (sz.side1 != sz.side2) { sz.side2 = 0; } - map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP); - map &= ~(0xf << (index * 4)); -#if K8_4RANK_DIMM_SUPPORT == 1 - if(sz.rank == 4) { - map &= ~(0xf << ( (index + 2) * 4)); - } -#endif /* For each base register. * Place the dimm size in 32 MB quantities in the bits 31 - 21. @@ -747,22 +743,6 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz /* Make certain side1 of the dimm is at least 32MB */ if (sz.side1 >= (25 +3)) { - if(is_cpu_pre_d0()) { - map |= (sz.side1 - (25 + 3)) << (index *4); -#if K8_4RANK_DIMM_SUPPORT == 1 - if(sz.rank == 4) { - map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4); - } -#endif - } - else { - map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << (index*4); -#if K8_4RANK_DIMM_SUPPORT == 1 - if(sz.rank == 4) { - map |= cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ] << ( (index + 2) * 4); - } -#endif - } base0 = (1 << ((sz.side1 - (25 + 3)) + 21)) | 1; } @@ -791,8 +771,6 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz } #endif - pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map); - /* Enable the memory clocks for this DIMM */ if (base0) { dch = pci_read_config32(ctrl->f2, DRAM_CONFIG_HIGH); @@ -806,6 +784,52 @@ static void set_dimm_size(const struct mem_controller *ctrl, struct dimm_size sz } } + +static void set_dimm_map(const struct mem_controller *ctrl, + struct dimm_size sz, unsigned index) +{ + static const unsigned cs_map_aa[15] = { + /* (row=12, col=8)(14, 12) ---> (0, 0) (2, 4) */ + 0, 1, 3, 6, 0, + 0, 2, 4, 7, 9, + 0, 0, 5, 8,10, + }; + uint32_t map; + int row,col; + + map = pci_read_config32(ctrl->f2, DRAM_BANK_ADDR_MAP); + map &= ~(0xf << (index * 4)); + +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + map &= ~(0xf << ( (index + 2) * 4)); + } +#endif + + if (is_cpu_pre_d0()) { + map |= (sz.side1 - (25 + 3)) << (index *4); +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + map |= (sz.side1 - (25 + 3)) << ( (index + 2) * 4); + } +#endif + } else { + unsigned val; + val = cs_map_aa[(sz.rows - 12) * 5 + (sz.col - 8) ]; + if(val == 0) { + print_err("Invalid Column or Row count\r\n"); + val = 7; + } + map |= val << (index*4); +#if K8_4RANK_DIMM_SUPPORT == 1 + if(sz.rank == 4) { + map |= val << ( (index + 2) * 4); + } +#endif + } + pci_write_config32(ctrl->f2, DRAM_BANK_ADDR_MAP, map); +} + static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask) { int i; @@ -820,6 +844,7 @@ static long spd_set_ram_size(const struct mem_controller *ctrl, long dimm_mask) return -1; /* Report SPD error */ } set_dimm_size(ctrl, sz, i); + set_dimm_map(ctrl, sz, i); } return dimm_mask; } @@ -971,7 +996,7 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl) if(is_dual_channel(ctrl)) { /* Also we run out of address mask bits if we try and interleave 8 4GB dimms */ if ((bits == 3) && (common_size == (1 << (32 - 3)))) { -// print_debug("8 4GB chip selects cannot be interleaved\r\n"); + print_spew("8 4GB chip selects cannot be interleaved\r\n"); return 0; } csbase_inc <<=1; @@ -981,7 +1006,7 @@ static unsigned long interleave_chip_selects(const struct mem_controller *ctrl) csbase_inc = csbase_low_d0[common_cs_mode]; if(is_dual_channel(ctrl)) { if( (bits==3) && (common_cs_mode > 8)) { -// print_debug("8 cs_mode>8 chip selects cannot be interleaved\r\n"); + print_spew("8 cs_mode>8 chip selects cannot be interleaved\r\n"); return 0; } csbase_inc <<=1; @@ -1100,25 +1125,6 @@ unsigned long memory_end_k(const struct mem_controller *ctrl, int max_node_id) return end_k; } -#if K8_E0_MEM_HOLE_SIZEK != 0 -#define K8_E0_MEM_HOLE_LIMITK 4*1024*1024 -#define K8_E0_MEM_HOLE_BASEK (K8_E0_MEM_HOLE_LIMITK - K8_E0_MEM_HOLE_SIZEK ) - -static void set_e0_mem_hole(const struct mem_controller *ctrl, unsigned base_k) -{ - /* Route the addresses to the controller node */ - unsigned val; - - val = pci_read_config32(ctrl->f1,0xf0); - - val &= 0x00ff00fe; - val = (K8_E0_MEM_HOLE_BASEK << 10) | ((K8_E0_MEM_HOLE_SIZEK+base_k)>>(16-10)) | 1; - - pci_write_config32(ctrl->f1, 0xf0, val); -} - -#endif - static void order_dimms(const struct mem_controller *ctrl) { unsigned long tom_k, base_k; @@ -1135,14 +1141,6 @@ static void order_dimms(const struct mem_controller *ctrl) /* Compute the memory base address */ base_k = memory_end_k(ctrl, ctrl->node_id); tom_k += base_k; -#if K8_E0_MEM_HOLE_SIZEK != 0 - if(!is_cpu_pre_e0()) { - /* See if I need to check the range cover hole */ - if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (tom_k > K8_E0_MEM_HOLE_BASEK)) { - tom_k += K8_E0_MEM_HOLE_SIZEK; - } - } -#endif route_dram_accesses(ctrl, base_k, tom_k); set_top_mem(tom_k); } @@ -2145,12 +2143,11 @@ static void sdram_set_spd_registers(const struct mem_controller *ctrl) struct spd_set_memclk_result result; const struct mem_param *param; long dimm_mask; -#if 1 + if (!controller_present(ctrl)) { -// print_debug("No memory controller present\r\n"); + print_debug("No memory controller present\r\n"); return; } -#endif hw_enable_ecc(ctrl); activate_spd_rom(ctrl); dimm_mask = spd_detect_dimms(ctrl); @@ -2176,6 +2173,10 @@ static void sdram_set_spd_registers(const struct mem_controller *ctrl) if (dimm_mask < 0) goto hw_spd_err; order_dimms(ctrl); + if( !is_cpu_pre_e0() ) { + print_debug("E step CPU\r\n"); + // e_step_cpu(ctrl); // Socket 939 only. + } return; hw_spd_err: /* Unrecoverable error reading SPD data */ @@ -2280,22 +2281,6 @@ static void sdram_enable(int controllers, const struct mem_controller *ctrl) } while(((dcl & DCL_MemClrStatus) == 0) || ((dcl & DCL_DramEnable) == 0) ); } - // init e0 mem hole here -#if K8_E0_MEM_HOLE_SIZEK != 0 - if (!is_cpu_pre_e0()) { - uint32_t base, limit; - unsigned base_k, limit_k; - base = pci_read_config32(ctrl->f1, 0x40 + (i << 3)); - limit = pci_read_config32(ctrl->f1, 0x44 + (i << 3)); - base_k = (base & 0xffff0000) >> 2; - limit_k = ((limit + 0x00010000) & 0xffff0000) >> 2; - if ((base_k <= K8_E0_MEM_HOLE_BASEK) && (limit_k > K8_E0_MEM_HOLE_BASEK)) { - set_e0_mem_hole(ctrl+i, base_k); - } - } - -#endif - print_debug(" done\r\n"); }