?? ref.c
字號(hào):
}#endif /* !D4CUSTOM || D4_OPT (walloc_never) */#if !D4CUSTOM || D4_OPT (walloc_nofetch)/* * Write allocate if no fetch is required * (write exactly fills an integral number of subblocks) */D4_INLINEintd4walloc_nofetch (d4cache *c, d4memref m){ return m.size == D4REFNSB(c,m) << D4VAL (c, lg2subblocksize);}#endif /* !D4CUSTOM || D4_OPT (walloc_nofetch) */#if !D4CUSTOM || D4_OPT (wback_always)/* * Always write back */D4_INLINEintd4wback_always (d4cache *c, d4memref m, int setnumber, d4stacknode *ptr, int walloc){ return 1;}#endif /* !D4CUSTOM || D4_OPT (wback_always) */#if !D4CUSTOM || D4_OPT (wback_never)/* * Never write back (i.e., always write through) */D4_INLINEintd4wback_never (d4cache *c, d4memref m, int setnumber, d4stacknode *ptr, int walloc){ return 0;}#endif /* !D4CUSTOM || D4_OPT (wback_never) */#if !D4CUSTOM || D4_OPT (wback_nofetch)/* * Write back if no fetch is required * The actual test is for every affected subblock to be valid or * for the write to completely cover all affected subblocks. */D4_INLINEintd4wback_nofetch (d4cache *c, d4memref m, int setnumber, d4stacknode *ptr, int walloc){ return (D4ADDR2SBMASK(c,m) & ~ptr->valid) == 0 || m.size == (D4REFNSB(c,m) << D4VAL (c, lg2subblocksize));}#endif /* !D4CUSTOM || D4_OPT (wback_nofetch) */#if !D4CUSTOM || D4_OPT (ccc)/* * This function implements an infinite-sized cache, used * when classifying cache misses into compulsory, capacity, * and conflict misses. * * Return value: * -1 if at least 1 affected subblock (but not the whole block) * misses in the infinite cache * 0 if all affected subblocks hit in the infinite cache * 1 if the whole block misses in the infinite cache * Note we require that the number of subblocks per block be a * divisor of D4_BITMAP_RSIZE, so blocks are not split across bitmaps */static intd4infcache (d4cache *c, d4memref m){ const unsigned int sbsize = 1 << D4VAL (c, lg2subblocksize); const d4addr sbaddr = D4ADDR2SUBBLOCK (c, m.address); const int nsb = D4REFNSB (c, m); unsigned int bitoff; /* offset of bit in bitmap */ int hi, lo, i, b; static int totranges = 0, totbitmaps = 0; bitoff = (sbaddr & (D4_BITMAP_RSIZE-1)) / sbsize; /* binary search for range containing our address */ hi = c->nranges-1; lo = 0; while (lo <= hi) { i = lo + (hi-lo)/2; if (c->ranges[i].addr + D4_BITMAP_RSIZE <= sbaddr) lo = i + 1; /* need to look higher */ else if (c->ranges[i].addr > sbaddr) hi = i - 1; /* need to look lower */ else { /* found the right range */ const int sbpb = 1 << (D4VAL (c, lg2blocksize) - D4VAL (c, lg2subblocksize)); int nb; /* count affected bits we've seen */ for (nb = 0, b = 0; b < nsb; b++) nb += ((c->ranges[i].bitmap[(bitoff+b)/CHAR_BIT] & (1<<((bitoff+b)%CHAR_BIT))) != 0); if (nb == nsb) return 0; /* we've seen it all before */ /* consider the whole block */ if (sbpb != 1 && nsb != sbpb) { unsigned int bbitoff = (D4ADDR2BLOCK (c, m.address) & (D4_BITMAP_RSIZE-1)) / sbsize; for (nb = 0, b = 0; b < sbpb; b++) nb += ((c->ranges[i].bitmap[(bbitoff+b)/CHAR_BIT] & (1<<((bbitoff+b)%CHAR_BIT))) != 0); } /* set the bits */ for (b = 0; b < nsb; b++) c->ranges[i].bitmap[(bitoff+b)/CHAR_BIT] |= (1<<((bitoff+b)%CHAR_BIT)); return nb==0 ? 1 : -1; } } /* lo > hi: range not found; find position and insert new range */ if (c->nranges >= c->maxranges-1) { /* ran out of range pointers; allocate some more */ int oldmaxranges = c->maxranges; c->maxranges = (c->maxranges + 10) * 2; if (c->ranges == NULL) /* don't trust realloc(NULL,...) */ c->ranges = malloc (c->maxranges * sizeof(*c->ranges)); else c->ranges = realloc (c->ranges, c->maxranges * sizeof(*c->ranges)); if (c->ranges == NULL) { fprintf (stderr, "DineroIV: can't allocate more " "bitmap pointers for cache %s (%d so far, total %d)\n", c->name, oldmaxranges, totranges); exit(1); } totranges++; } for (i = c->nranges++ - 1; i >= 0; i--) { if (c->ranges[i].addr < sbaddr) break; c->ranges[i+1] = c->ranges[i]; } c->ranges[i+1].addr = sbaddr & ~(D4_BITMAP_RSIZE-1); c->ranges[i+1].bitmap = calloc ((((D4_BITMAP_RSIZE + sbsize - 1) / sbsize) + CHAR_BIT - 1) / CHAR_BIT, 1); if (c->ranges[i+1].bitmap == NULL) { fprintf (stderr, "DineroIV: can't allocate another bitmap " "(currently %d, total %d, each mapping 0x%x bytes)\n", c->nranges-1, totbitmaps, D4_BITMAP_RSIZE); exit(1); } totbitmaps++; for (b = 0; b < nsb; b++, bitoff++) c->ranges[i+1].bitmap[bitoff/CHAR_BIT] |= (1<<(bitoff%CHAR_BIT)); return 1; /* we've not seen it before */}#endif /* !D4CUSTOM || D4_OPT (ccc) *//* * Split a memory reference if it crosses a block boundary. * The remainder, if any, is queued for processing later. */D4_INLINEd4memrefd4_splitm (d4cache *c, d4memref mr, d4addr ba){ const int bsize = 1 << D4VAL (c, lg2blocksize); const int bmask = bsize - 1; int newsize; d4pendstack *pf; if (ba == D4ADDR2BLOCK (c, mr.address + mr.size - 1)) return mr; pf = d4get_mref(); pf->m.address = ba + bsize; pf->m.accesstype = mr.accesstype | D4_MULTIBLOCK; newsize = bsize - (mr.address&bmask); pf->m.size = mr.size - newsize; pf->next = c->pending; c->pending = pf; c->multiblock++; mr.size = newsize; return mr;}/* * Handle a memory reference for the given cache. * The user calls this function for the cache closest to * the processor; other caches are handled automatically. */voidd4ref (d4cache *c, d4memref mr){ /* special cases first */ if ((D4VAL (c, flags) & D4F_MEM) != 0) /* Special case for simulated memory */ c->fetch[(int)mr.accesstype]++; else if (mr.accesstype == D4XCOPYB || mr.accesstype == D4XINVAL) { d4memref m = mr; /* dumb compilers might de-optimize if we take addr of mr */ if (m.accesstype == D4XCOPYB) d4copyback (c, &m, 1); else d4invalidate (c, &m, 1); } else { /* Everything else */ const d4addr blockaddr = D4ADDR2BLOCK (c, mr.address); const d4memref m = d4_splitm (c, mr, blockaddr); const int atype = D4BASIC_ATYPE (m.accesstype); const int setnumber = D4ADDR2SET (c, m.address); const int ronly = D4CUSTOM && (D4VAL (c, flags) & D4F_RO) != 0; /* conservative */ const int walloc = !ronly && atype == D4XWRITE && D4VAL (c, wallocf) (c, m); const int sbbits = D4ADDR2SBMASK (c, m); int miss, blockmiss, wback; d4stacknode *ptr; if ((D4VAL (c, flags) & D4F_RO) != 0 && atype == D4XWRITE) { fprintf (stderr, "Dinero IV: write to read-only cache %d (%s)\n", c->cacheid, c->name); exit (9); } /* * Find address in the cache. * Quickly check for top of stack. */ ptr = c->stack[setnumber].top; if (ptr->blockaddr == blockaddr && ptr->valid != 0) ; /* found it */ else if (!D4CUSTOM || D4VAL (c, assoc) > 1) ptr = d4_find (c, setnumber, blockaddr); else ptr = NULL; blockmiss = (ptr == NULL); miss = blockmiss || (sbbits & ptr->valid) != sbbits; /* * Prefetch on reads and instruction fetches, but not on * writes, misc, and prefetch references. * Optionally, some percentage may be thrown away. */ if ((!D4CUSTOM || !D4_OPT (prefetch_none)) && (m.accesstype == D4XREAD || m.accesstype == D4XINSTRN)) { d4pendstack *pf = D4VAL (c, prefetchf) (c, m, miss, ptr); if (pf != NULL) { /* Note: 0 <= random() <= 2^31-1 and 0 <= random()/(INT_MAX/100) < 100. */ if (D4VAL (c, prefetch_abortpercent) > 0 && random()/(INT_MAX/100) < D4VAL (c, prefetch_abortpercent)) d4put_mref (pf); /* throw it away */ else { pf->next = c->pending; /* add to pending list */ c->pending = pf; } } } /* * Update the cache * Don't do it for non-write-allocate misses */ wback = 0; if (ronly || atype != D4XWRITE || !blockmiss || walloc) { /* * Adjust priority stack as necessary */ ptr = D4VAL (c, replacementf) (c, setnumber, m, ptr); /* * Update state bits */ if (blockmiss) { assert (ptr->valid == 0); ptr->referenced = 0; ptr->dirty = 0; } ptr->valid |= sbbits; if ((m.accesstype & D4PREFETCH) == 0) ptr->referenced |= sbbits; /* * For writes, decide if write-back or write-through. * Set the dirty bits if write-back is going to be used. */ wback = !ronly && (atype == D4XWRITE) && D4VAL (c, wbackf) (c, m, setnumber, ptr, walloc); if (wback) ptr->dirty |= sbbits; /* * Take care of replaced block * including write-back if necessary */ if (blockmiss) { d4stacknode *rptr = c->stack[setnumber].top->up; if (rptr->valid != 0) { if (!ronly && (rptr->valid & rptr->dirty) != 0) d4_wbblock (c, rptr, D4VAL (c, lg2subblocksize)); if (c->stack[setnumber].n > D4HASH_THRESH) d4_unhash (c, setnumber, rptr); rptr->valid = 0; } } } /* * Prepare reference for downstream cache. * We do this for write-throughs, read-type misses, * and fetches for incompletely written subblocks * when a write misses and write-allocate is being used. * In some cases, a write can generate two downstream references: * a fetch to load the complete subblock and a write-through store. */ if (!ronly && atype == D4XWRITE && !wback) { d4pendstack *newm = d4get_mref(); newm->m = m; newm->next = c->pending; c->pending = newm; } if (miss && (ronly || atype != D4XWRITE || (walloc && m.size != D4REFNSB (c, m) << D4VAL (c, lg2subblocksize)))) { d4pendstack *newm = d4get_mref(); /* note, we drop prefetch attribute */ newm->m.accesstype = (atype == D4XWRITE) ? D4XREAD : atype; newm->m.address = D4ADDR2SUBBLOCK (c, m.address); newm->m.size = D4REFNSB (c, m) << D4VAL (c, lg2subblocksize); newm->next = c->pending; c->pending = newm; } /* * Do fully associative and infinite sized caches too. * This allows classifying misses into {compulsory,capacity,conflict}. * An extra "set" is provided (==c->numsets) for the fully associative * simulation. */ if ((D4CUSTOM && D4_OPT (ccc)) || (!D4CUSTOM && (c->flags & D4F_CCC) != 0)) { /* set to use for fully assoc cache */ const int fullset = D4VAL(c,numsets); /* number of blocks in fully assoc cache */ int fullmiss, fullblockmiss; /* like miss and blockmiss, but for fully assoc cache */ ptr = c->stack[fullset].top; if (ptr->blockaddr != blockaddr) ptr = d4_find (c, fullset, blockaddr); else if (ptr->valid == 0) ptr = NULL; fullblockmiss = (ptr == NULL); fullmiss = fullblockmiss || (sbbits & ptr->valid) != sbbits; /* take care of stack update */ if (ronly || atype != D4XWRITE || !fullblockmiss || walloc) { ptr = D4VAL (c, replacementf) (c, fullset, m, ptr); assert (!fullblockmiss || ptr->valid == 0); ptr->valid |= sbbits; } /* classify misses */ if (miss) { int infmiss = 0; /* assume hit in infinite cache */ if (!fullmiss) /* hit in fully assoc: conflict miss */ c->conf_miss[(int)m.accesstype]++; else { infmiss = d4infcache (c, m); if (infmiss != 0) /* first miss: compulsory */ c->comp_miss[(int)m.accesstype]++; else /* hit in infinite cache: capacity miss */ c->cap_miss[(int)m.accesstype]++; } if (blockmiss) { if (!fullblockmiss) /* block hit in full assoc */ c->conf_blockmiss[(int)m.accesstype]++; else if (infmiss == 1) /* block miss in full and inf */ c->comp_blockmiss[(int)m.accesstype]++; else /* part of block hit in infinite cache */ c->cap_blockmiss[(int)m.accesstype]++; } } /* take care of replaced block */ if (fullblockmiss) { d4stacknode *rptr = c->stack[fullset].top->up; if (rptr->valid != 0) { if (c->stack[fullset].n > D4HASH_THRESH) d4_unhash (c, fullset, rptr); rptr->valid = 0; } } } /* * Update non-ccc metrics. */ c->fetch[(int)m.accesstype]++; if (miss) { c->miss[(int)m.accesstype]++; if (blockmiss) c->blockmiss[(int)m.accesstype]++; } /* * Now make recursive calls for pending references */ if (c->pending) d4_dopending (c, c->pending); }}#endif /* !D4CUSTOM || D4_REF_ONCE>1 */#undef D4_REF_ONCE#define D4_REF_ONCE 2 /* from now on, skip the first stuff and do the rest */
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -