?? ccio-dma.c
字號:
/*** ccio-dma.c:** DMA management routines for first generation cache-coherent machines.** Program U2/Uturn in "Virtual Mode" and use the I/O MMU.**** (c) Copyright 2000 Grant Grundler** (c) Copyright 2000 Ryan Bradetich** (c) Copyright 2000 Hewlett-Packard Company**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.****** "Real Mode" operation refers to U2/Uturn chip operation.** U2/Uturn were designed to perform coherency checks w/o using** the I/O MMU - basically what x86 does.**** Philipp Rumpf has a "Real Mode" driver for PCX-W machines at:** CVSROOT=:pserver:anonymous@198.186.203.37:/cvsroot/linux-parisc** cvs -z3 co linux/arch/parisc/kernel/dma-rm.c**** I've rewritten his code to work under TPG's tree. See ccio-rm-dma.c.**** Drawbacks of using Real Mode are:** o outbound DMA is slower - U2 won't prefetch data (GSC+ XQL signal).** o Inbound DMA less efficient - U2 can't use DMA_FAST attribute.** o Ability to do scatter/gather in HW is lost.** o Doesn't work under PCX-U/U+ machines since they didn't follow** the coherency design originally worked out. Only PCX-W does.*/#include <linux/types.h>#include <linux/kernel.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/spinlock.h>#include <linux/slab.h>#include <linux/string.h>#include <linux/pci.h>#include <linux/reboot.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <linux/scatterlist.h>#include <asm/byteorder.h>#include <asm/cache.h> /* for L1_CACHE_BYTES */#include <asm/uaccess.h>#include <asm/page.h>#include <asm/dma.h>#include <asm/io.h>#include <asm/hardware.h> /* for register_module() */#include <asm/parisc-device.h>/* ** Choose "ccio" since that's what HP-UX calls it.** Make it easier for folks to migrate from one to the other :^)*/#define MODULE_NAME "ccio"#undef DEBUG_CCIO_RES#undef DEBUG_CCIO_RUN#undef DEBUG_CCIO_INIT#undef DEBUG_CCIO_RUN_SG#ifdef CONFIG_PROC_FS/* * CCIO_SEARCH_TIME can help measure how fast the bitmap search is. * impacts performance though - ditch it if you don't use it. */#define CCIO_SEARCH_TIME#undef CCIO_MAP_STATS#else#undef CCIO_SEARCH_TIME#undef CCIO_MAP_STATS#endif#include <linux/proc_fs.h>#include <asm/runway.h> /* for proc_runway_root */#ifdef DEBUG_CCIO_INIT#define DBG_INIT(x...) printk(x)#else#define DBG_INIT(x...)#endif#ifdef DEBUG_CCIO_RUN#define DBG_RUN(x...) printk(x)#else#define DBG_RUN(x...)#endif#ifdef DEBUG_CCIO_RES#define DBG_RES(x...) printk(x)#else#define DBG_RES(x...)#endif#ifdef DEBUG_CCIO_RUN_SG#define DBG_RUN_SG(x...) printk(x)#else#define DBG_RUN_SG(x...)#endif#define CCIO_INLINE inline#define WRITE_U32(value, addr) __raw_writel(value, addr)#define READ_U32(addr) __raw_readl(addr)#define U2_IOA_RUNWAY 0x580#define U2_BC_GSC 0x501#define UTURN_IOA_RUNWAY 0x581#define UTURN_BC_GSC 0x502#define IOA_NORMAL_MODE 0x00020080 /* IO_CONTROL to turn on CCIO */#define CMD_TLB_DIRECT_WRITE 35 /* IO_COMMAND for I/O TLB Writes */#define CMD_TLB_PURGE 33 /* IO_COMMAND to Purge I/O TLB entry */struct ioa_registers { /* Runway Supervisory Set */ int32_t unused1[12]; uint32_t io_command; /* Offset 12 */ uint32_t io_status; /* Offset 13 */ uint32_t io_control; /* Offset 14 */ int32_t unused2[1]; /* Runway Auxiliary Register Set */ uint32_t io_err_resp; /* Offset 0 */ uint32_t io_err_info; /* Offset 1 */ uint32_t io_err_req; /* Offset 2 */ uint32_t io_err_resp_hi; /* Offset 3 */ uint32_t io_tlb_entry_m; /* Offset 4 */ uint32_t io_tlb_entry_l; /* Offset 5 */ uint32_t unused3[1]; uint32_t io_pdir_base; /* Offset 7 */ uint32_t io_io_low_hv; /* Offset 8 */ uint32_t io_io_high_hv; /* Offset 9 */ uint32_t unused4[1]; uint32_t io_chain_id_mask; /* Offset 11 */ uint32_t unused5[2]; uint32_t io_io_low; /* Offset 14 */ uint32_t io_io_high; /* Offset 15 */};/*** IOA Registers** -------------**** Runway IO_CONTROL Register (+0x38)** ** The Runway IO_CONTROL register controls the forwarding of transactions.**** | 0 ... 13 | 14 15 | 16 ... 21 | 22 | 23 24 | 25 ... 31 |** | HV | TLB | reserved | HV | mode | reserved |**** o mode field indicates the address translation of transactions** forwarded from Runway to GSC+:** Mode Name Value Definition** Off (default) 0 Opaque to matching addresses.** Include 1 Transparent for matching addresses.** Peek 3 Map matching addresses.**** + "Off" mode: Runway transactions which match the I/O range** specified by the IO_IO_LOW/IO_IO_HIGH registers will be ignored.** + "Include" mode: all addresses within the I/O range specified** by the IO_IO_LOW and IO_IO_HIGH registers are transparently** forwarded. This is the I/O Adapter's normal operating mode.** + "Peek" mode: used during system configuration to initialize the** GSC+ bus. Runway Write_Shorts in the address range specified by** IO_IO_LOW and IO_IO_HIGH are forwarded through the I/O Adapter** *AND* the GSC+ address is remapped to the Broadcast Physical** Address space by setting the 14 high order address bits of the** 32 bit GSC+ address to ones.**** o TLB field affects transactions which are forwarded from GSC+ to Runway.** "Real" mode is the poweron default.** ** TLB Mode Value Description** Real 0 No TLB translation. Address is directly mapped and the** virtual address is composed of selected physical bits.** Error 1 Software fills the TLB manually.** Normal 2 IOA fetches IO TLB misses from IO PDIR (in host memory).****** IO_IO_LOW_HV +0x60 (HV dependent)** IO_IO_HIGH_HV +0x64 (HV dependent)** IO_IO_LOW +0x78 (Architected register)** IO_IO_HIGH +0x7c (Architected register)**** IO_IO_LOW and IO_IO_HIGH set the lower and upper bounds of the** I/O Adapter address space, respectively.**** 0 ... 7 | 8 ... 15 | 16 ... 31 |** 11111111 | 11111111 | address |**** Each LOW/HIGH pair describes a disjoint address space region.** (2 per GSC+ port). Each incoming Runway transaction address is compared** with both sets of LOW/HIGH registers. If the address is in the range** greater than or equal to IO_IO_LOW and less than IO_IO_HIGH the transaction** for forwarded to the respective GSC+ bus.** Specify IO_IO_LOW equal to or greater than IO_IO_HIGH to avoid specifying** an address space region.**** In order for a Runway address to reside within GSC+ extended address space:** Runway Address [0:7] must identically compare to 8'b11111111** Runway Address [8:11] must be equal to IO_IO_LOW(_HV)[16:19]** Runway Address [12:23] must be greater than or equal to** IO_IO_LOW(_HV)[20:31] and less than IO_IO_HIGH(_HV)[20:31].** Runway Address [24:39] is not used in the comparison.**** When the Runway transaction is forwarded to GSC+, the GSC+ address is** as follows:** GSC+ Address[0:3] 4'b1111** GSC+ Address[4:29] Runway Address[12:37]** GSC+ Address[30:31] 2'b00**** All 4 Low/High registers must be initialized (by PDC) once the lower bus** is interrogated and address space is defined. The operating system will** modify the architectural IO_IO_LOW and IO_IO_HIGH registers following** the PDC initialization. However, the hardware version dependent IO_IO_LOW** and IO_IO_HIGH registers should not be subsequently altered by the OS.** ** Writes to both sets of registers will take effect immediately, bypassing** the queues, which ensures that subsequent Runway transactions are checked** against the updated bounds values. However reads are queued, introducing** the possibility of a read being bypassed by a subsequent write to the same** register. This sequence can be avoided by having software wait for read** returns before issuing subsequent writes.*/struct ioc { struct ioa_registers __iomem *ioc_regs; /* I/O MMU base address */ u8 *res_map; /* resource map, bit == pdir entry */ u64 *pdir_base; /* physical base address */ u32 pdir_size; /* bytes, function of IOV Space size */ u32 res_hint; /* next available IOVP - circular search */ u32 res_size; /* size of resource map in bytes */ spinlock_t res_lock;#ifdef CCIO_SEARCH_TIME#define CCIO_SEARCH_SAMPLE 0x100 unsigned long avg_search[CCIO_SEARCH_SAMPLE]; unsigned long avg_idx; /* current index into avg_search */#endif#ifdef CCIO_MAP_STATS unsigned long used_pages; unsigned long msingle_calls; unsigned long msingle_pages; unsigned long msg_calls; unsigned long msg_pages; unsigned long usingle_calls; unsigned long usingle_pages; unsigned long usg_calls; unsigned long usg_pages;#endif unsigned short cujo20_bug; /* STUFF We don't need in performance path */ u32 chainid_shift; /* specify bit location of chain_id */ struct ioc *next; /* Linked list of discovered iocs */ const char *name; /* device name from firmware */ unsigned int hw_path; /* the hardware path this ioc is associatd with */ struct pci_dev *fake_pci_dev; /* the fake pci_dev for non-pci devs */ struct resource mmio_region[2]; /* The "routed" MMIO regions */};static struct ioc *ioc_list;static int ioc_count;/**************************************************************** I/O Pdir Resource Management** Bits set in the resource map are in use.* Each bit can represent a number of pages.* LSbs represent lower addresses (IOVA's).** This was was copied from sba_iommu.c. Don't try to unify* the two resource managers unless a way to have different* allocation policies is also adjusted. We'd like to avoid* I/O TLB thrashing by having resource allocation policy* match the I/O TLB replacement policy.****************************************************************/#define IOVP_SIZE PAGE_SIZE#define IOVP_SHIFT PAGE_SHIFT#define IOVP_MASK PAGE_MASK/* Convert from IOVP to IOVA and vice versa. */#define CCIO_IOVA(iovp,offset) ((iovp) | (offset))#define CCIO_IOVP(iova) ((iova) & IOVP_MASK)#define PDIR_INDEX(iovp) ((iovp)>>IOVP_SHIFT)#define MKIOVP(pdir_idx) ((long)(pdir_idx) << IOVP_SHIFT)#define MKIOVA(iovp,offset) (dma_addr_t)((long)iovp | (long)offset)/*** Don't worry about the 150% average search length on a miss.** If the search wraps around, and passes the res_hint, it will** cause the kernel to panic anyhow.*/#define CCIO_SEARCH_LOOP(ioc, res_idx, mask, size) \ for(; res_ptr < res_end; ++res_ptr) { \ if(0 == (*res_ptr & mask)) { \ *res_ptr |= mask; \ res_idx = (unsigned int)((unsigned long)res_ptr - (unsigned long)ioc->res_map); \ ioc->res_hint = res_idx + (size >> 3); \ goto resource_found; \ } \ }#define CCIO_FIND_FREE_MAPPING(ioa, res_idx, mask, size) \ u##size *res_ptr = (u##size *)&((ioc)->res_map[ioa->res_hint & ~((size >> 3) - 1)]); \ u##size *res_end = (u##size *)&(ioc)->res_map[ioa->res_size]; \ CCIO_SEARCH_LOOP(ioc, res_idx, mask, size); \ res_ptr = (u##size *)&(ioc)->res_map[0]; \ CCIO_SEARCH_LOOP(ioa, res_idx, mask, size);/*** Find available bit in this ioa's resource map.** Use a "circular" search:** o Most IOVA's are "temporary" - avg search time should be small.** o keep a history of what happened for debugging** o KISS.**** Perf optimizations:** o search for log2(size) bits at a time.** o search for available resource bits using byte/word/whatever.** o use different search for "large" (eg > 4 pages) or "very large"** (eg > 16 pages) mappings.*//** * ccio_alloc_range - Allocate pages in the ioc's resource map. * @ioc: The I/O Controller. * @pages_needed: The requested number of pages to be mapped into the * I/O Pdir... * * This function searches the resource map of the ioc to locate a range * of available pages for the requested size. */static intccio_alloc_range(struct ioc *ioc, size_t size){ unsigned int pages_needed = size >> IOVP_SHIFT; unsigned int res_idx;#ifdef CCIO_SEARCH_TIME unsigned long cr_start = mfctl(16);#endif BUG_ON(pages_needed == 0); BUG_ON((pages_needed * IOVP_SIZE) > DMA_CHUNK_SIZE); DBG_RES("%s() size: %d pages_needed %d\n", __FUNCTION__, size, pages_needed); /* ** "seek and ye shall find"...praying never hurts either... ** ggg sacrifices another 710 to the computer gods. */ if (pages_needed <= 8) { /* * LAN traffic will not thrash the TLB IFF the same NIC * uses 8 adjacent pages to map seperate payload data. * ie the same byte in the resource bit map. */#if 0 /* FIXME: bit search should shift it's way through * an unsigned long - not byte at a time. As it is now, * we effectively allocate this byte to this mapping. */ unsigned long mask = ~(~0UL >> pages_needed); CCIO_FIND_FREE_MAPPING(ioc, res_idx, mask, 8);#else CCIO_FIND_FREE_MAPPING(ioc, res_idx, 0xff, 8);#endif } else if (pages_needed <= 16) { CCIO_FIND_FREE_MAPPING(ioc, res_idx, 0xffff, 16); } else if (pages_needed <= 32) { CCIO_FIND_FREE_MAPPING(ioc, res_idx, ~(unsigned int)0, 32);#ifdef __LP64__ } else if (pages_needed <= 64) { CCIO_FIND_FREE_MAPPING(ioc, res_idx, ~0UL, 64);#endif } else { panic("%s: %s() Too many pages to map. pages_needed: %u\n", __FILE__, __FUNCTION__, pages_needed); } panic("%s: %s() I/O MMU is out of mapping resources.\n", __FILE__, __FUNCTION__); resource_found: DBG_RES("%s() res_idx %d res_hint: %d\n", __FUNCTION__, res_idx, ioc->res_hint);#ifdef CCIO_SEARCH_TIME {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -