?? ccio-dma.c
字號:
/*** ccio-dma.c:** DMA management routines for first generation cache-coherent machines.** Program U2/Uturn in "Virtual Mode" and use the I/O MMU.**** (c) Copyright 2000 Grant Grundler** (c) Copyright 2000 Ryan Bradetich** (c) Copyright 2000 Hewlett-Packard Company**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.****** "Real Mode" operation refers to U2/Uturn chip operation.** U2/Uturn were designed to perform coherency checks w/o using** the I/O MMU - basically what x86 does.**** Philipp Rumpf has a "Real Mode" driver for PCX-W machines at:** CVSROOT=:pserver:anonymous@198.186.203.37:/cvsroot/linux-parisc** cvs -z3 co linux/arch/parisc/kernel/dma-rm.c**** I've rewritten his code to work under TPG's tree. See ccio-rm-dma.c.**** Drawbacks of using Real Mode are:** o outbound DMA is slower - U2 won't prefetch data (GSC+ XQL signal).** o Inbound DMA less efficient - U2 can't use DMA_FAST attribute.** o Ability to do scatter/gather in HW is lost.** o Doesn't work under PCX-U/U+ machines since they didn't follow** the coherency design originally worked out. Only PCX-W does.*/#include <linux/config.h>#include <linux/types.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/spinlock.h>#include <linux/slab.h>#include <linux/string.h>#define PCI_DEBUG#include <linux/pci.h>#undef PCI_DEBUG#include <asm/byteorder.h>#include <asm/cache.h> /* for L1_CACHE_BYTES */#include <asm/uaccess.h>#include <asm/pgalloc.h>#include <asm/page.h>#include <asm/dma.h>#include <asm/io.h>#include <asm/gsc.h> /* for gsc_writeN()... */#include <asm/hardware.h> /* for register_module() *//* ** Choose "ccio" since that's what HP-UX calls it.** Make it easier for folks to migrate from one to the other :^)*/#define MODULE_NAME "ccio"#undef DEBUG_CCIO_RES#undef DEBUG_CCIO_RUN#undef DEBUG_CCIO_INIT#undef DEBUG_CCIO_RUN_SG#include <linux/proc_fs.h>#include <asm/runway.h> /* for proc_runway_root */#ifdef DEBUG_CCIO_INIT#define DBG_INIT(x...) printk(x)#else#define DBG_INIT(x...)#endif#ifdef DEBUG_CCIO_RUN#define DBG_RUN(x...) printk(x)#else#define DBG_RUN(x...)#endif#ifdef DEBUG_CCIO_RES#define DBG_RES(x...) printk(x)#else#define DBG_RES(x...)#endif#ifdef DEBUG_CCIO_RUN_SG#define DBG_RUN_SG(x...) printk(x)#else#define DBG_RUN_SG(x...)#endif#define CCIO_INLINE /* inline */#define WRITE_U32(value, addr) gsc_writel(value, (u32 *)(addr))#define READ_U32(addr) gsc_readl((u32 *)(addr))#define U2_IOA_RUNWAY 0x580#define U2_BC_GSC 0x501#define UTURN_IOA_RUNWAY 0x581#define UTURN_BC_GSC 0x502#define IOA_NORMAL_MODE 0x00020080 /* IO_CONTROL to turn on CCIO */#define CMD_TLB_DIRECT_WRITE 35 /* IO_COMMAND for I/O TLB Writes */#define CMD_TLB_PURGE 33 /* IO_COMMAND to Purge I/O TLB entry */struct ioa_registers { /* Runway Supervisory Set */ volatile int32_t unused1[12]; volatile uint32_t io_command; /* Offset 12 */ volatile uint32_t io_status; /* Offset 13 */ volatile uint32_t io_control; /* Offset 14 */ volatile int32_t unused2[1]; /* Runway Auxiliary Register Set */ volatile uint32_t io_err_resp; /* Offset 0 */ volatile uint32_t io_err_info; /* Offset 1 */ volatile uint32_t io_err_req; /* Offset 2 */ volatile uint32_t io_err_resp_hi; /* Offset 3 */ volatile uint32_t io_tlb_entry_m; /* Offset 4 */ volatile uint32_t io_tlb_entry_l; /* Offset 5 */ volatile uint32_t unused3[1]; volatile uint32_t io_pdir_base; /* Offset 7 */ volatile uint32_t io_io_low_hv; /* Offset 8 */ volatile uint32_t io_io_high_hv; /* Offset 9 */ volatile uint32_t unused4[1]; volatile uint32_t io_chain_id_mask; /* Offset 11 */ volatile uint32_t unused5[2]; volatile uint32_t io_io_low; /* Offset 14 */ volatile uint32_t io_io_high; /* Offset 15 */};struct ioc { struct ioa_registers *ioc_hpa; /* I/O MMU base address */ u8 *res_map; /* resource map, bit == pdir entry */ u64 *pdir_base; /* physical base address */ u32 res_hint; /* next available IOVP - circular search */ u32 res_size; /* size of resource map in bytes */ spinlock_t res_lock;#ifdef CONFIG_PROC_FS#define CCIO_SEARCH_SAMPLE 0x100 unsigned long avg_search[CCIO_SEARCH_SAMPLE]; unsigned long avg_idx; /* current index into avg_search */ unsigned long used_pages; unsigned long msingle_calls; unsigned long msingle_pages; unsigned long msg_calls; unsigned long msg_pages; unsigned long usingle_calls; unsigned long usingle_pages; unsigned long usg_calls; unsigned long usg_pages; unsigned short cujo20_bug;#endif /* STUFF We don't need in performance path */ u32 pdir_size; /* in bytes, determined by IOV Space size */ u32 chainid_shift; /* specify bit location of chain_id */ struct ioc *next; /* Linked list of discovered iocs */ const char *name; /* device name from firmware */ unsigned int hw_path; /* the hardware path this ioc is associatd with */ struct pci_dev *fake_pci_dev; /* the fake pci_dev for non-pci devs */ struct resource mmio_region[2]; /* The "routed" MMIO regions */};/* Ratio of Host MEM to IOV Space size */static unsigned long ccio_mem_ratio = 4;static struct ioc *ioc_list;static int ioc_count;/**************************************************************** I/O Pdir Resource Management** Bits set in the resource map are in use.* Each bit can represent a number of pages.* LSbs represent lower addresses (IOVA's).** This was was copied from sba_iommu.c. Don't try to unify* the two resource managers unless a way to have different* allocation policies is also adjusted. We'd like to avoid* I/O TLB thrashing by having resource allocation policy* match the I/O TLB replacement policy.****************************************************************/#define IOVP_SIZE PAGE_SIZE#define IOVP_SHIFT PAGE_SHIFT#define IOVP_MASK PAGE_MASK/* Convert from IOVP to IOVA and vice versa. */#define CCIO_IOVA(iovp,offset) ((iovp) | (offset))#define CCIO_IOVP(iova) ((iova) & IOVP_MASK)#define PDIR_INDEX(iovp) ((iovp)>>IOVP_SHIFT)#define MKIOVP(pdir_idx) ((long)(pdir_idx) << IOVP_SHIFT)#define MKIOVA(iovp,offset) (dma_addr_t)((long)iovp | (long)offset)#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))/*** Don't worry about the 150% average search length on a miss.** If the search wraps around, and passes the res_hint, it will** cause the kernel to panic anyhow.*/#define CCIO_SEARCH_LOOP(ioc, res_idx, mask_ptr, size) \ for(; res_ptr < res_end; ++res_ptr) { \ if(0 == (*res_ptr & *mask_ptr)) { \ *res_ptr |= *mask_ptr; \ res_idx = (int)((unsigned long)res_ptr - (unsigned long)ioc->res_map); \ ioc->res_hint = res_idx + (size >> 3); \ goto resource_found; \ } \ }#define CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, size) \ u##size *res_ptr = (u##size *)&((ioc)->res_map[ioa->res_hint & ~((size >> 3) - 1)]); \ u##size *res_end = (u##size *)&(ioc)->res_map[ioa->res_size]; \ u##size *mask_ptr = (u##size *)&mask; \ CCIO_SEARCH_LOOP(ioc, res_idx, mask_ptr, size); \ res_ptr = (u##size *)&(ioc)->res_map[0]; \ CCIO_SEARCH_LOOP(ioa, res_idx, mask_ptr, size);/*** Find available bit in this ioa's resource map.** Use a "circular" search:** o Most IOVA's are "temporary" - avg search time should be small.** o keep a history of what happened for debugging** o KISS.**** Perf optimizations:** o search for log2(size) bits at a time.** o search for available resource bits using byte/word/whatever.** o use different search for "large" (eg > 4 pages) or "very large"** (eg > 16 pages) mappings.*//** * ccio_alloc_range - Allocate pages in the ioc's resource map. * @ioc: The I/O Controller. * @pages_needed: The requested number of pages to be mapped into the * I/O Pdir... * * This function searches the resource map of the ioc to locate a range * of available pages for the requested size. */static intccio_alloc_range(struct ioc *ioc, unsigned long pages_needed){ int res_idx; unsigned long mask;#ifdef CONFIG_PROC_FS unsigned long cr_start = mfctl(16);#endif ASSERT(pages_needed); ASSERT((pages_needed * IOVP_SIZE) <= DMA_CHUNK_SIZE); ASSERT(pages_needed <= BITS_PER_LONG); mask = ~(~0UL >> pages_needed); DBG_RES("%s() size: %d pages_needed %d mask 0x%08lx\n", __FUNCTION__, size, pages_needed, mask); /* ** "seek and ye shall find"...praying never hurts either... ** ggg sacrifices another 710 to the computer gods. */ if(pages_needed <= 8) { CCIO_FIND_FREE_MAPPING(ioc, res_idx, mask, 8); } else if(pages_needed <= 16) { CCIO_FIND_FREE_MAPPING(ioc, res_idx, mask, 16); } else if(pages_needed <= 32) { CCIO_FIND_FREE_MAPPING(ioc, res_idx, mask, 32);#ifdef __LP64__ } else if(pages_needed <= 64) { CCIO_FIND_FREE_MAPPING(ioc, res_idx, mask, 64);#endif } else { panic(__FILE__ ": %s() Too many pages to map. pages_needed: %ld\n", __FUNCTION__, pages_needed); } panic(__FILE__ ": %s() I/O MMU is out of mapping resources.\n", __FUNCTION__); resource_found: DBG_RES("%s() res_idx %d mask 0x%08lx res_hint: %d\n", __FUNCTION__, res_idx, mask, ioc->res_hint);#ifdef CONFIG_PROC_FS { unsigned long cr_end = mfctl(16); unsigned long tmp = cr_end - cr_start; /* check for roll over */ cr_start = (cr_end < cr_start) ? -(tmp) : (tmp); } ioc->avg_search[ioc->avg_idx++] = cr_start; ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1; ioc->used_pages += pages_needed;#endif /* ** return the bit address. */ return res_idx << 3;}#define CCIO_FREE_MAPPINGS(ioc, res_idx, mask, size) \ u##size *res_ptr = (u##size *)&((ioc)->res_map[res_idx]); \ u##size *mask_ptr = (u##size *)&mask; \ ASSERT((*res_ptr & *mask_ptr) == *mask_ptr); \ *res_ptr &= ~(*mask_ptr);/** * ccio_free_range - Free pages from the ioc's resource map. * @ioc: The I/O Controller. * @iova: The I/O Virtual Address. * @pages_mapped: The requested number of pages to be freed from the * I/O Pdir. * * This function frees the resouces allocated for the iova. */static voidccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped){ unsigned long mask; unsigned long iovp = CCIO_IOVP(iova); unsigned int res_idx = PDIR_INDEX(iovp) >> 3; ASSERT(pages_mapped); ASSERT((pages_mapped * IOVP_SIZE) <= DMA_CHUNK_SIZE); ASSERT(pages_mapped <= BITS_PER_LONG); mask = ~(~0UL >> pages_mapped); DBG_RES("%s(): res_idx: %d pages_mapped %d mask 0x%08lx\n", __FUNCTION__, res_idx, pages_mapped, mask);#ifdef CONFIG_PROC_FS ioc->used_pages -= pages_mapped;#endif if(pages_mapped <= 8) { CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 8); } else if(pages_mapped <= 16) { CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 16); } else if(pages_mapped <= 32) { CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 32);#ifdef __LP64__ } else if(pages_mapped <= 64) { CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 64);#endif } else { panic(__FILE__ ":%s() Too many pages to unmap.\n", __FUNCTION__); }}/******************************************************************** CCIO dma_ops support routines*******************************************************************/typedef unsigned long space_t;#define KERNEL_SPACE 0/*** DMA "Page Type" and Hints ** o if SAFE_DMA isn't set, mapping is for FAST_DMA. SAFE_DMA should be** set for subcacheline DMA transfers since we don't want to damage the** other part of a cacheline.** o SAFE_DMA must be set for "memory" allocated via pci_alloc_consistent().** This bit tells U2 to do R/M/W for partial cachelines. "Streaming"** data can avoid this if the mapping covers full cache lines.** o STOP_MOST is needed for atomicity across cachelines.** Apperently only "some EISA devices" need this.** Using CONFIG_ISA is hack. Only the IOA with EISA under it needs** to use this hint iff the EISA devices needs this feature.** According to the U2 ERS, STOP_MOST enabled pages hurt performance.** o PREFETCH should *not* be set for cases like Multiple PCI devices** behind GSCtoPCI (dino) bus converter. Only one cacheline per GSC** device can be fetched and multiply DMA streams will thrash the** prefetch buffer and burn memory bandwidth. See 6.7.3 "Prefetch Rules** and Invalidation of Prefetch Entries".**** FIXME: the default hints need to be per GSC device - not global.** ** HP-UX dorks: linux device driver programming model is totally different** than HP-UX's. HP-UX always sets HINT_PREFETCH since it's drivers** do special things to work on non-coherent platforms...linux has to** be much more careful with this.*/#define IOPDIR_VALID 0x01UL#define HINT_SAFE_DMA 0x02UL /* used for pci_alloc_consistent() pages */#ifdef CONFIG_ISA /* EISA support really */#define HINT_STOP_MOST 0x04UL /* LSL support */#else#define HINT_STOP_MOST 0x00UL /* only needed for "some EISA devices" */#endif
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -