arch/arm64/armv8/mmu: Add support for 48bit VA
The VA space needs to be extended to support 48bit, as on Cavium SoCs the MMIO starts at 1 << 47. The following changes were done to coreboot and libpayload: * Use page table lvl 0 * Increase VA bits to 48 * Enable 256TB in MMU controller * Add additional asserts Tested on Cavium SoC and two ARM64 Chromebooks. Change-Id: I89e6a4809b6b725c3945bad7fce82b0dfee7c262 Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com> Reviewed-on: https://review.coreboot.org/24970 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Julius Werner <jwerner@chromium.org>
This commit is contained in:
parent
3d9462a07f
commit
57afc5e0f2
|
@ -172,6 +172,7 @@ static uint64_t init_xlat_table(uint64_t base_addr,
|
||||||
uint64_t size,
|
uint64_t size,
|
||||||
uint64_t tag)
|
uint64_t tag)
|
||||||
{
|
{
|
||||||
|
uint64_t l0_index = (base_addr & L0_ADDR_MASK) >> L0_ADDR_SHIFT;
|
||||||
uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT;
|
uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT;
|
||||||
uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT;
|
uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT;
|
||||||
uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT;
|
uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT;
|
||||||
|
@ -179,10 +180,10 @@ static uint64_t init_xlat_table(uint64_t base_addr,
|
||||||
uint64_t desc;
|
uint64_t desc;
|
||||||
uint64_t attr = get_block_attr(tag);
|
uint64_t attr = get_block_attr(tag);
|
||||||
|
|
||||||
/* L1 table lookup
|
/* L0 entry stores a table descriptor (doesn't support blocks) */
|
||||||
* If VA has bits more than L2 can resolve, lookup starts at L1
|
table = get_next_level_table(&table[l0_index], L1_XLAT_SIZE);
|
||||||
* Assumption: we don't need L0 table in coreboot */
|
|
||||||
if (BITS_PER_VA > L1_ADDR_SHIFT) {
|
/* L1 table lookup */
|
||||||
if ((size >= L1_XLAT_SIZE) &&
|
if ((size >= L1_XLAT_SIZE) &&
|
||||||
IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) {
|
IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) {
|
||||||
/* If block address is aligned and size is greater than
|
/* If block address is aligned and size is greater than
|
||||||
|
@ -193,12 +194,11 @@ static uint64_t init_xlat_table(uint64_t base_addr,
|
||||||
/* L2 lookup is not required */
|
/* L2 lookup is not required */
|
||||||
return L1_XLAT_SIZE;
|
return L1_XLAT_SIZE;
|
||||||
}
|
}
|
||||||
table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* L2 table lookup
|
/* L1 entry stores a table descriptor */
|
||||||
* If lookup was performed at L1, L2 table addr is obtained from L1 desc
|
table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
|
||||||
* else, lookup starts at ttbr address */
|
|
||||||
|
/* L2 table lookup */
|
||||||
if ((size >= L2_XLAT_SIZE) &&
|
if ((size >= L2_XLAT_SIZE) &&
|
||||||
IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) {
|
IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) {
|
||||||
/* If block address is aligned and size is greater than
|
/* If block address is aligned and size is greater than
|
||||||
|
@ -226,6 +226,7 @@ static void sanity_check(uint64_t addr, uint64_t size)
|
||||||
{
|
{
|
||||||
assert(!(addr & GRANULE_SIZE_MASK) &&
|
assert(!(addr & GRANULE_SIZE_MASK) &&
|
||||||
!(size & GRANULE_SIZE_MASK) &&
|
!(size & GRANULE_SIZE_MASK) &&
|
||||||
|
(addr + size < (1UL << BITS_PER_VA)) &&
|
||||||
size >= GRANULE_SIZE);
|
size >= GRANULE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -344,7 +345,7 @@ void mmu_enable(void)
|
||||||
|
|
||||||
/* Initialize TCR flags */
|
/* Initialize TCR flags */
|
||||||
raw_write_tcr_current(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
|
raw_write_tcr_current(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
|
||||||
TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB |
|
TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_256TB |
|
||||||
TCR_TBI_USED);
|
TCR_TBI_USED);
|
||||||
|
|
||||||
/* Initialize TTBR */
|
/* Initialize TTBR */
|
||||||
|
|
|
@ -83,7 +83,7 @@ extern char _start[], _end[];
|
||||||
/* XLAT Table Init Attributes */
|
/* XLAT Table Init Attributes */
|
||||||
|
|
||||||
#define VA_START 0x0
|
#define VA_START 0x0
|
||||||
#define BITS_PER_VA 33
|
#define BITS_PER_VA 48
|
||||||
#define MIN_64_BIT_ADDR (1UL << 32)
|
#define MIN_64_BIT_ADDR (1UL << 32)
|
||||||
/* Granule size of 4KB is being used */
|
/* Granule size of 4KB is being used */
|
||||||
#define GRANULE_SIZE_SHIFT 12
|
#define GRANULE_SIZE_SHIFT 12
|
||||||
|
@ -92,14 +92,12 @@ extern char _start[], _end[];
|
||||||
#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
|
#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
|
||||||
|
|
||||||
#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3)
|
#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3)
|
||||||
|
#define L0_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 3)
|
||||||
#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
|
#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
|
||||||
#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
|
#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
|
||||||
#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
|
#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
|
||||||
|
|
||||||
#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL
|
#define L0_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L0_ADDR_SHIFT)
|
||||||
#error "BITS_PER_VA too large (we don't have L0 table support)"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
|
#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
|
||||||
#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
|
#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
|
||||||
#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
|
#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
|
||||||
|
@ -109,6 +107,7 @@ extern char _start[], _end[];
|
||||||
#define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT)
|
#define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT)
|
||||||
#define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT)
|
#define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT)
|
||||||
#define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT)
|
#define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT)
|
||||||
|
#define L0_XLAT_SIZE (1UL << L0_ADDR_SHIFT)
|
||||||
|
|
||||||
/* Block indices required for MAIR */
|
/* Block indices required for MAIR */
|
||||||
#define BLOCK_INDEX_MEM_DEV_NGNRNE 0
|
#define BLOCK_INDEX_MEM_DEV_NGNRNE 0
|
||||||
|
|
|
@ -141,6 +141,7 @@ static uint64_t init_xlat_table(uint64_t base_addr,
|
||||||
uint64_t size,
|
uint64_t size,
|
||||||
uint64_t tag)
|
uint64_t tag)
|
||||||
{
|
{
|
||||||
|
uint64_t l0_index = (base_addr & L0_ADDR_MASK) >> L0_ADDR_SHIFT;
|
||||||
uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT;
|
uint64_t l1_index = (base_addr & L1_ADDR_MASK) >> L1_ADDR_SHIFT;
|
||||||
uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT;
|
uint64_t l2_index = (base_addr & L2_ADDR_MASK) >> L2_ADDR_SHIFT;
|
||||||
uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT;
|
uint64_t l3_index = (base_addr & L3_ADDR_MASK) >> L3_ADDR_SHIFT;
|
||||||
|
@ -148,10 +149,10 @@ static uint64_t init_xlat_table(uint64_t base_addr,
|
||||||
uint64_t desc;
|
uint64_t desc;
|
||||||
uint64_t attr = get_block_attr(tag);
|
uint64_t attr = get_block_attr(tag);
|
||||||
|
|
||||||
/* L1 table lookup
|
/* L0 entry stores a table descriptor (doesn't support blocks) */
|
||||||
* If VA has bits more than L2 can resolve, lookup starts at L1
|
table = get_next_level_table(&table[l0_index], L1_XLAT_SIZE);
|
||||||
* Assumption: we don't need L0 table in coreboot */
|
|
||||||
if (BITS_PER_VA > L1_ADDR_SHIFT) {
|
/* L1 table lookup */
|
||||||
if ((size >= L1_XLAT_SIZE) &&
|
if ((size >= L1_XLAT_SIZE) &&
|
||||||
IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) {
|
IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) {
|
||||||
/* If block address is aligned and size is greater than
|
/* If block address is aligned and size is greater than
|
||||||
|
@ -162,12 +163,11 @@ static uint64_t init_xlat_table(uint64_t base_addr,
|
||||||
/* L2 lookup is not required */
|
/* L2 lookup is not required */
|
||||||
return L1_XLAT_SIZE;
|
return L1_XLAT_SIZE;
|
||||||
}
|
}
|
||||||
table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* L2 table lookup
|
/* L1 entry stores a table descriptor */
|
||||||
* If lookup was performed at L1, L2 table addr is obtained from L1 desc
|
table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
|
||||||
* else, lookup starts at ttbr address */
|
|
||||||
|
/* L2 table lookup */
|
||||||
if ((size >= L2_XLAT_SIZE) &&
|
if ((size >= L2_XLAT_SIZE) &&
|
||||||
IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) {
|
IS_ALIGNED(base_addr, (1UL << L2_ADDR_SHIFT))) {
|
||||||
/* If block address is aligned and size is greater than
|
/* If block address is aligned and size is greater than
|
||||||
|
@ -195,6 +195,7 @@ static void sanity_check(uint64_t addr, uint64_t size)
|
||||||
{
|
{
|
||||||
assert(!(addr & GRANULE_SIZE_MASK) &&
|
assert(!(addr & GRANULE_SIZE_MASK) &&
|
||||||
!(size & GRANULE_SIZE_MASK) &&
|
!(size & GRANULE_SIZE_MASK) &&
|
||||||
|
(addr + size < (1UL << BITS_PER_VA)) &&
|
||||||
size >= GRANULE_SIZE);
|
size >= GRANULE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,7 +203,7 @@ static void sanity_check(uint64_t addr, uint64_t size)
|
||||||
* Desc : Returns the page table entry governing a specific address. */
|
* Desc : Returns the page table entry governing a specific address. */
|
||||||
static uint64_t get_pte(void *addr)
|
static uint64_t get_pte(void *addr)
|
||||||
{
|
{
|
||||||
int shift = BITS_PER_VA > L1_ADDR_SHIFT ? L1_ADDR_SHIFT : L2_ADDR_SHIFT;
|
int shift = L0_ADDR_SHIFT;
|
||||||
uint64_t *pte = (uint64_t *)_ttb;
|
uint64_t *pte = (uint64_t *)_ttb;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
@ -257,8 +258,8 @@ void mmu_init(void)
|
||||||
for (; _ettb - (u8 *)table > 0; table += GRANULE_SIZE/sizeof(*table))
|
for (; _ettb - (u8 *)table > 0; table += GRANULE_SIZE/sizeof(*table))
|
||||||
table[0] = UNUSED_DESC;
|
table[0] = UNUSED_DESC;
|
||||||
|
|
||||||
/* Initialize the root table (L1) to be completely unmapped. */
|
/* Initialize the root table (L0) to be completely unmapped. */
|
||||||
uint64_t *root = setup_new_table(INVALID_DESC, L1_XLAT_SIZE);
|
uint64_t *root = setup_new_table(INVALID_DESC, L0_XLAT_SIZE);
|
||||||
assert((u8 *)root == _ttb);
|
assert((u8 *)root == _ttb);
|
||||||
|
|
||||||
/* Initialize TTBR */
|
/* Initialize TTBR */
|
||||||
|
@ -269,7 +270,7 @@ void mmu_init(void)
|
||||||
|
|
||||||
/* Initialize TCR flags */
|
/* Initialize TCR flags */
|
||||||
raw_write_tcr_el3(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
|
raw_write_tcr_el3(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
|
||||||
TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB |
|
TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_256TB |
|
||||||
TCR_TBI_USED);
|
TCR_TBI_USED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,7 @@
|
||||||
/* XLAT Table Init Attributes */
|
/* XLAT Table Init Attributes */
|
||||||
|
|
||||||
#define VA_START 0x0
|
#define VA_START 0x0
|
||||||
#define BITS_PER_VA 33
|
#define BITS_PER_VA 48
|
||||||
/* Granule size of 4KB is being used */
|
/* Granule size of 4KB is being used */
|
||||||
#define GRANULE_SIZE_SHIFT 12
|
#define GRANULE_SIZE_SHIFT 12
|
||||||
#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT)
|
#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT)
|
||||||
|
@ -77,14 +77,12 @@
|
||||||
#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
|
#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
|
||||||
|
|
||||||
#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3)
|
#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3)
|
||||||
|
#define L0_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 3)
|
||||||
#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
|
#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
|
||||||
#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
|
#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
|
||||||
#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
|
#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
|
||||||
|
|
||||||
#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL
|
#define L0_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L0_ADDR_SHIFT)
|
||||||
#error "BITS_PER_VA too large (we don't have L0 table support)"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
|
#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
|
||||||
#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
|
#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
|
||||||
#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
|
#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
|
||||||
|
@ -94,6 +92,7 @@
|
||||||
#define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT)
|
#define L3_XLAT_SIZE (1UL << L3_ADDR_SHIFT)
|
||||||
#define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT)
|
#define L2_XLAT_SIZE (1UL << L2_ADDR_SHIFT)
|
||||||
#define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT)
|
#define L1_XLAT_SIZE (1UL << L1_ADDR_SHIFT)
|
||||||
|
#define L0_XLAT_SIZE (1UL << L0_ADDR_SHIFT)
|
||||||
|
|
||||||
/* Block indices required for MAIR */
|
/* Block indices required for MAIR */
|
||||||
#define BLOCK_INDEX_MEM_DEV_NGNRNE 0
|
#define BLOCK_INDEX_MEM_DEV_NGNRNE 0
|
||||||
|
|
Loading…
Reference in New Issue