diff --git a/payloads/libpayload/drivers/usb/ehci.c b/payloads/libpayload/drivers/usb/ehci.c index af7daf1999..129b9b825e 100644 --- a/payloads/libpayload/drivers/usb/ehci.c +++ b/payloads/libpayload/drivers/usb/ehci.c @@ -30,6 +30,7 @@ //#define USB_DEBUG #include +#include #include "ehci.h" #include "ehci_private.h" @@ -358,9 +359,10 @@ static int ehci_process_async_schedule( return result; } -static int ehci_bulk (endpoint_t *ep, int size, u8 *data, int finalize) +static int ehci_bulk (endpoint_t *ep, int size, u8 *src, int finalize) { int result = 0; + u8 *end = src + size; int remaining = size; int endp = ep->endpoint & 0xf; int pid = (ep->direction==IN)?EHCI_IN:EHCI_OUT; @@ -372,30 +374,42 @@ static int ehci_bulk (endpoint_t *ep, int size, u8 *data, int finalize) return -1; } - qtd_t *head = memalign(64, sizeof(qtd_t)); + if (!dma_coherent(src)) { + end = EHCI_INST(ep->dev->controller)->dma_buffer + size; + if (size > DMA_SIZE) { + usb_debug("EHCI bulk transfer too large for DMA buffer: %d\n", size); + return -1; + } + if (pid == EHCI_OUT) + memcpy(end - size, src, size); + } + + ehci_qh_t *qh = dma_memalign(64, sizeof(ehci_qh_t)); + qtd_t *head = dma_memalign(64, sizeof(qtd_t)); qtd_t *cur = head; + if (!qh || !head) + goto oom; while (1) { memset((void *)cur, 0, sizeof(qtd_t)); cur->token = QTD_ACTIVE | (pid << QTD_PID_SHIFT) | (0 << QTD_CERR_SHIFT); - u32 chunk = fill_td(cur, data, remaining); - remaining -= chunk; - data += chunk; + remaining -= fill_td(cur, end - remaining, remaining); cur->alt_next_qtd = QTD_TERMINATE; - if (remaining == 0) { + if (remaining <= 0) { cur->next_qtd = virt_to_phys(0) | QTD_TERMINATE; break; } else { - qtd_t *next = memalign(64, sizeof(qtd_t)); + qtd_t *next = dma_memalign(64, sizeof(qtd_t)); + if (!next) + goto oom; cur->next_qtd = virt_to_phys(next); cur = next; } } /* create QH */ - ehci_qh_t *qh = memalign(64, sizeof(ehci_qh_t)); memset((void *)qh, 0, sizeof(ehci_qh_t)); qh->horiz_link_ptr = virt_to_phys(qh) | QH_QH; qh->epchar = ep->dev->address | @@ -415,21 +429,31 @@ static int ehci_bulk (endpoint_t *ep, int size, u8 *data, int finalize) result = ehci_process_async_schedule( EHCI_INST(ep->dev->controller), qh, head); - if (result >= 0) + if (result >= 0) { result = size - result; + if (pid == EHCI_IN && end != src + size) + memcpy(src, end - size, result); + } ep->toggle = (cur->token & QTD_TOGGLE_MASK) >> QTD_TOGGLE_SHIFT; free_qh_and_tds(qh, head); return result; + +oom: + usb_debug("Not enough DMA memory for EHCI control structures!\n"); + free_qh_and_tds(qh, head); + return -1; } /* FIXME: Handle control transfers as 3 QHs, so the 2nd stage can be >0x4000 bytes */ -static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq, - int dalen, u8 *data) +static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *setup, + int dalen, u8 *src) { + u8 *data = src; + u8 *devreq = setup; int endp = 0; // this is control. always 0 (for now) int toggle = 0; int mlen = dev->endpoints[0].maxpacketsize; @@ -443,9 +467,26 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq non_hs_ctrl_ep = 1; } + if (!dma_coherent(setup)) { + devreq = EHCI_INST(dev->controller)->dma_buffer; + memcpy(devreq, setup, drlen); + } + if (dalen > 0 && !dma_coherent(src)) { + data = EHCI_INST(dev->controller)->dma_buffer + drlen; + if (drlen + dalen > DMA_SIZE) { + usb_debug("EHCI control transfer too large for DMA buffer: %d\n", drlen + dalen); + return -1; + } + if (dir == OUT) + memcpy(data, src, dalen); + } + /* create qTDs */ - qtd_t *head = memalign(64, sizeof(qtd_t)); + qtd_t *head = dma_memalign(64, sizeof(qtd_t)); + ehci_qh_t *qh = dma_memalign(64, sizeof(ehci_qh_t)); qtd_t *cur = head; + if (!qh || !head) + goto oom; memset((void *)cur, 0, sizeof(qtd_t)); cur->token = QTD_ACTIVE | (toggle?QTD_TOGGLE_DATA1:0) | @@ -454,9 +495,11 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq if (fill_td(cur, devreq, drlen) != drlen) { usb_debug("ERROR: couldn't send the entire device request\n"); } - qtd_t *next = memalign(64, sizeof(qtd_t)); + qtd_t *next = dma_memalign(64, sizeof(qtd_t)); cur->next_qtd = virt_to_phys(next); cur->alt_next_qtd = QTD_TERMINATE; + if (!next) + goto oom; /* FIXME: We're limited to 16-20K (depending on alignment) for payload for now. * Figure out, how toggle can be set sensibly in this scenario */ @@ -471,7 +514,9 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq if (fill_td(cur, data, dalen) != dalen) { usb_debug("ERROR: couldn't send the entire control payload\n"); } - next = memalign(64, sizeof(qtd_t)); + next = dma_memalign(64, sizeof(qtd_t)); + if (!next) + goto oom; cur->next_qtd = virt_to_phys(next); cur->alt_next_qtd = QTD_TERMINATE; } @@ -488,7 +533,6 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq cur->alt_next_qtd = QTD_TERMINATE; /* create QH */ - ehci_qh_t *qh = memalign(64, sizeof(ehci_qh_t)); memset((void *)qh, 0, sizeof(ehci_qh_t)); qh->horiz_link_ptr = virt_to_phys(qh) | QH_QH; qh->epchar = dev->address | @@ -506,11 +550,19 @@ static int ehci_control (usbdev_t *dev, direction_t dir, int drlen, void *devreq result = ehci_process_async_schedule( EHCI_INST(dev->controller), qh, head); - if (result >= 0) + if (result >= 0) { result = dalen - result; + if (dir == IN && data != src) + memcpy(src, data, result); + } free_qh_and_tds(qh, head); return result; + +oom: + usb_debug("Not enough DMA memory for EHCI control structures!\n"); + free_qh_and_tds(qh, head); + return -1; } @@ -581,13 +633,12 @@ static void *ehci_create_intr_queue( return NULL; } - intr_queue_t *const intrq = - (intr_queue_t *)memalign(64, sizeof(intr_queue_t)); + intr_queue_t *const intrq = (intr_queue_t *)malloc(sizeof(intr_queue_t)); /* * reqcount data chunks * plus one more spare, which we'll leave out of queue */ - u8 *data = (u8 *)malloc(reqsize * (reqcount + 1)); + u8 *data = (u8 *)dma_malloc(reqsize * (reqcount + 1)); if (!intrq || !data) fatal("Not enough memory to create USB interrupt queue.\n"); intrq->data = data; @@ -595,7 +646,7 @@ static void *ehci_create_intr_queue( intrq->reqsize = reqsize; /* create #reqcount transfer descriptors (qTDs) */ - intrq->head = (intr_qtd_t *)memalign(64, sizeof(intr_qtd_t)); + intrq->head = (intr_qtd_t *)dma_memalign(64, sizeof(intr_qtd_t)); intr_qtd_t *cur_td = intrq->head; for (i = 0; i < reqcount; ++i) { fill_intr_queue_td(intrq, cur_td, data); @@ -603,7 +654,7 @@ static void *ehci_create_intr_queue( if (i < reqcount - 1) { /* create one more qTD */ intr_qtd_t *const next_td = - (intr_qtd_t *)memalign(64, sizeof(intr_qtd_t)); + (intr_qtd_t *)dma_memalign(64, sizeof(intr_qtd_t)); cur_td->td.next_qtd = virt_to_phys(&next_td->td); cur_td->next = next_td; cur_td = next_td; @@ -612,7 +663,7 @@ static void *ehci_create_intr_queue( intrq->tail = cur_td; /* create spare qTD */ - intrq->spare = (intr_qtd_t *)memalign(64, sizeof(intr_qtd_t)); + intrq->spare = (intr_qtd_t *)dma_memalign(64, sizeof(intr_qtd_t)); fill_intr_queue_td(intrq, intrq->spare, data); /* initialize QH */ @@ -780,16 +831,22 @@ ehci_init (unsigned long physical_bar) /* Initialize periodic frame list */ /* 1024 32-bit pointers, 4kb aligned */ - u32 *const periodic_list = (u32 *)memalign(4096, 1024 * sizeof(u32)); + u32 *const periodic_list = (u32 *)dma_memalign(4096, 1024 * sizeof(u32)); if (!periodic_list) fatal("Not enough memory creating EHCI periodic frame list.\n"); + if (dma_initialized()) { + EHCI_INST(controller)->dma_buffer = dma_memalign(4096, DMA_SIZE); + if (!EHCI_INST(controller)->dma_buffer) + fatal("Not enough DMA memory for EHCI bounce buffer.\n"); + } + /* * Insert dummy QH in periodic frame list * This helps with broken host controllers * and doesn't violate the standard. */ - EHCI_INST(controller)->dummy_qh = (ehci_qh_t *)memalign(64, sizeof(ehci_qh_t)); + EHCI_INST(controller)->dummy_qh = (ehci_qh_t *)dma_memalign(64, sizeof(ehci_qh_t)); memset((void *)EHCI_INST(controller)->dummy_qh, 0, sizeof(*EHCI_INST(controller)->dummy_qh)); EHCI_INST(controller)->dummy_qh->horiz_link_ptr = QH_TERMINATE; diff --git a/payloads/libpayload/drivers/usb/ehci_private.h b/payloads/libpayload/drivers/usb/ehci_private.h index 986ee791bf..c9fa6e07da 100644 --- a/payloads/libpayload/drivers/usb/ehci_private.h +++ b/payloads/libpayload/drivers/usb/ehci_private.h @@ -135,13 +135,14 @@ typedef struct ehci { hc_cap_t *capabilities; hc_op_t *operation; ehci_qh_t *dummy_qh; +#define DMA_SIZE (64 * 1024) + void *dma_buffer; } ehci_t; #define PS_TERMINATE 1 #define PS_TYPE_QH 1 << 1 #define PS_PTR_MASK ~0x1f - #define EHCI_INST(controller) ((ehci_t*)((controller)->instance)) #endif diff --git a/payloads/libpayload/include/stdlib.h b/payloads/libpayload/include/stdlib.h index 8c8c200873..d3448b96cc 100644 --- a/payloads/libpayload/include/stdlib.h +++ b/payloads/libpayload/include/stdlib.h @@ -115,6 +115,8 @@ void *memalign(size_t align, size_t size); void init_dma_memory(void *start, u32 size); void *dma_malloc(size_t size); void *dma_memalign(size_t align, size_t size); +int dma_initialized(void); +int dma_coherent(void *ptr); /** @} */ /** diff --git a/payloads/libpayload/include/x86/arch/cache.h b/payloads/libpayload/include/x86/arch/cache.h index 73849b7348..4a8073616c 100644 --- a/payloads/libpayload/include/x86/arch/cache.h +++ b/payloads/libpayload/include/x86/arch/cache.h @@ -32,9 +32,13 @@ #ifndef __ARCH_CACHE_H__ #define __ARCH_CACHE_H__ -/* These are noops, needed by the USB stack on ARM */ +/* NOOPs mirroring ARM's cache API, since x86 devices usually cache snoop */ #define dmb() #define dsb() +#define dcache_clean_all() +#define dcache_clean_by_mva(addr, len) +#define dcache_invalidate_all() +#define dcache_invalidate_by_mva(addr, len) #define dcache_clean_invalidate_all() #define dcache_clean_invalidate_by_mva(addr, len) diff --git a/payloads/libpayload/libc/malloc.c b/payloads/libpayload/libc/malloc.c index 618d8b3b93..11dfeefadd 100644 --- a/payloads/libpayload/libc/malloc.c +++ b/payloads/libpayload/libc/malloc.c @@ -83,7 +83,7 @@ static int minimal_free = 0; void init_dma_memory(void *start, u32 size) { #ifdef CONFIG_LP_DEBUG_MALLOC - if (dma != heap) { + if (dma_initialized()) { printf("WARNING: %s called twice!\n"); return; } @@ -97,6 +97,17 @@ void init_dma_memory(void *start, u32 size) dma->align_regions = NULL; } +int dma_initialized() +{ + return dma != heap; +} + +/* For boards that don't initialize DMA we assume all locations are coherent */ +int dma_coherent(void *ptr) +{ + return !dma_initialized() || (dma->start <= ptr && dma->end > ptr); +} + static void setup(hdrtype_t volatile *start, int size) { *start = FREE_BLOCK(size); @@ -120,7 +131,7 @@ static void *alloc(int len, struct memory_type *type) /* Make sure the region is setup correctly. */ if (!HAS_MAGIC(*ptr)) - setup(ptr, (int)((&_eheap - &_heap) - HDRSIZE)); + setup(ptr, (int)((type->end - type->start) - HDRSIZE)); /* Find some free space. */ do { @@ -470,6 +481,6 @@ void print_malloc_map(void) if (free_memory && (minimal_free > free_memory)) minimal_free = free_memory; printf("Maximum memory consumption: %d bytes\n", - (unsigned int)(&_eheap - &_heap) - HDRSIZE - minimal_free); + (unsigned int)(heap->end - heap->start) - HDRSIZE - minimal_free); } #endif