?? fetch.cc
字號:
// cross to the next one Addr blockBaseAddr = icacheBlockAlignPC(xc->regs.pc); do {#if FULL_SYSTEM // do PC-based annotations for the *next* PC here, now that // we've update the PC. This lets us magically transition to // a totally different instruction with zero overhead (e.g., // if the annotation modifies pc). if (!xc->spec_mode) { Addr oldpc; do { oldpc = xc->regs.pc; system->pcEventQueue.service(xc); } while (oldpc != xc->regs.pc); }#endif pair<DynInst *, Fault> r = fetchOneInst(thread_number); DynInst *inst = r.first; Fault fault = r.second; if (inst != NULL) num_fetched++; // inst == NULL signals failure to fetch for some reason (like // refusal to fetch a speculative uncached instruction) if (fault != No_Fault || inst == NULL) { if (fault != No_Fault) { fetch_fault_count[thread_number]++; } return make_pair(num_fetched, false); } xc->regs.pc = inst->Pred_PC; // if we're entering the asynchronous interrupt handler, mark // the first instruction as "serializing" to flush the ROB // before dispatching it. Otherwise we're likely to // underestimate the overhead of entering the handler. if (entering_interrupt) { inst->serializing_inst = true; entering_interrupt = false; // just flag first one } /* * Now, figure out if we need to stop fetching... */ // did we exceed the per-cycle instruction limit? if (num_fetched >= max_to_fetch) return make_pair(num_fetched, false); // is the fetch queue full? if ((mt_frontend && ifq[thread_number].num_total() == ifq_size) || (!mt_frontend && ifq[0].num_total() == ifq_size)) { floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_QFULL; return make_pair(num_fetched, false); } if (inst->isControl()) { branch_cnt++; fetched_branch[thread_number]++; /* if we've exceeded our branch count, then we're */ /* done... */ if (branch_cnt >= fetch_branches) { floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_BRANCH_LIMIT; return make_pair(num_fetched, false); } else if (inst->Pred_PC != inst->PC + sizeof(MachInst)) { /* otherwise... */ /* if this is a predicted-taken branch, discontinue */ /* getting instructions from this block, move on to */ /* the next one. */ return make_pair(num_fetched, true); } } // did we fall through to the next cache line? } while (icacheBlockAlignPC(xc->regs.pc) == blockBaseAddr); return make_pair(num_fetched, true);}// For debugging purposesstatic Addr uncompressedBlockAddress = 0;/** * Do fetch for one thread. * * @param thread_number Thread ID to fetch from. * @param max_to_fetch Maximum number of instructions to fetch. * @return Number of instructions fetched. */intFullCPU::fetchOneThread(int thread_number, int max_to_fetch){ SpecExecContext *xc = thread[thread_number]; int fetched_this_thread = 0; int branch_cnt = 0; // Track fetched blocks so we don't fetch the same one twice in // the same cycle. // (This is relatively expensive... we should find a way to do // without it -- Steve) std::set<Addr> fetchedAddresses;#if FULL_SYSTEM bool entering_interrupt = false; // Check for interrupts here. We may want to do this sooner in // SMT full system (up in fetch(), before we do the thread // selection), but for a single-threaded processor it should be OK // here. if (!xc->spec_mode && checkInterrupts && check_interrupts() && !xc->inPalMode()) { int ipl = 0; int summary = 0; checkInterrupts = false; IntReg *ipr = xc->regs.ipr; if (xc->regs.ipr[AlphaISA::IPR_SIRR]) { for (int i = AlphaISA::INTLEVEL_SOFTWARE_MIN; i < AlphaISA::INTLEVEL_SOFTWARE_MAX; i++) { if (ipr[AlphaISA::IPR_SIRR] & (ULL(1) << i)) { // See table 4-19 of 21164 hardware reference ipl = (i - AlphaISA::INTLEVEL_SOFTWARE_MIN) + 1; summary |= (ULL(1) << i); } } } uint64_t interrupts = xc->cpu->intr_status(); for (int i = AlphaISA::INTLEVEL_EXTERNAL_MIN; i < AlphaISA::INTLEVEL_EXTERNAL_MAX; i++) { if (interrupts & (ULL(1) << i)) { // See table 4-19 of 21164 hardware reference ipl = i; summary |= (ULL(1) << i); } } if (ipr[AlphaISA::IPR_ASTRR]) panic("asynchronous traps not implemented\n"); if (ipl && ipl > xc->regs.ipr[AlphaISA::IPR_IPLR]) { ipr[AlphaISA::IPR_ISR] = summary; ipr[AlphaISA::IPR_INTID] = ipl; xc->ev5_trap(Interrupt_Fault); entering_interrupt = true; DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n", ipr[AlphaISA::IPR_IPLR], ipl, summary); } }#else const bool entering_interrupt = false;#endif // Fetch up to the maximum number of lines per cycle allowed for (int fetchedLines = 0; fetchedLines < lines_to_fetch; ++fetchedLines) { /* is this a bogus text address? (can happen on mis-spec path) */ if (!xc->validInstAddr(xc->regs.pc)) { floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_INVALID_PC; break; } // remember index & seq. number of first inst in this line for // cache fetch later int first_inst_index = icache_output_buffer[thread_number]->tail; InstSeqNum first_inst_seq_num = next_fetch_seq; uncompressedBlockAddress = xc->regs.pc; /* Mask lower bits to get block starting address */ Addr blockAddress = icacheBlockAlignPC(xc->regs.pc);#if FULL_SYSTEM bool pal_pc = xc->inPalMode();#endif pair<int, bool> r = fetchOneLine(thread_number, max_to_fetch - fetched_this_thread, branch_cnt, entering_interrupt); int fetched_this_line = r.first; bool keep_fetching = r.second; fetched_this_thread += fetched_this_line; /* * Fetch the entire cache block containing the instruction * at "start_address" */ if (fetched_this_line > 0 && (fetchedAddresses.find(blockAddress) == fetchedAddresses.end())) { MemAccessResult mem_access_result; assert(!icacheInterface->isBlocked()); MemReqPtr req = new MemReq(blockAddress, xc, icache_block_size); req->flags |= INST_READ; req->cmd = Read; req->asid = thread[thread_number]->getInstAsid(); req->thread_num = thread_number; req->time = curTick; req->data = new uint8_t[req->size]; req->xc = xc; req->pc = xc->regs.pc; Event *ev = new FetchCompleteEvent(this, thread_number, first_inst_index, fetched_this_line, first_inst_seq_num, req); req->completionEvent = ev;#if FULL_SYSTEM // ugly hack! if (pal_pc) req->paddr = req->vaddr; else req->paddr = vtophys(xc, blockAddress); req->paddr &= EV5::PAddrImplMask;#else Fault fetch_fault = xc->translateInstReq(req); if (fetch_fault != No_Fault) fatal("Bad translation on instruction fetch, vaddr = 0x%x", req->vaddr);#endif mem_access_result = icacheInterface->access(req); if (mem_access_result != MA_HIT) { /* if we missed in the I-cache, stop fetching after this block. */ floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_IMISS; floss_state.fetch_mem_result[thread_number] = mem_access_result; break; } } if (!keep_fetching) break; /* * fetch_branches == 0, fetch one cache line per thread */ if (fetch_branches == 0) { floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_BRANCH_LIMIT; break; } } if (fetched_this_thread) { thread_info[thread_number].last_fetch = curTick; } /* * STATISTICS (per-thread) */ fetch_nisn_dist_[thread_number].sample(fetched_this_thread); fetched_inst[thread_number] += fetched_this_thread; thread_info[thread_number].fetch_counter += fetched_this_thread; return fetched_this_thread;}/*****************************************************************************//* fetch up as many instruction as one branch prediction and one cache line *//* acess will support without overflowing the IFETCH -> DISPATCH QUEUE *//* *//* This function calls choose_next_thread() to determine which thread will *//* fetch next. *//* => choose_next_thread() calls the individual policy routines *//* based on the setting of "fetch_policy" *//* *//*****************************************************************************/voidFullCPU::fetch(){ int fetched_this_cycle = 0; int fetched_this_thread; int ports_used = 0; int thread_fetched[number_of_threads]; /* * Reset the number of instrs fetched for each thread */ icache_ports_used_last_fetch = 0; for (int i = 0; i < number_of_threads; i++) { thread_fetched[i] = 0;#if 0 if (curTick > 10000 && thread_info[i].last_fetch < curTick - 2000) { stringstream s; s << "Thread " << i << " hasn't fetched since cycle " << thread_info[i].last_fetch << ends; exitNow(s.str(), 1); }#endif } /* always update icounts... we use them for bias adjustment even * if we don't need them for scheduling this cycle */ update_icounts(); /* * For each thread, set/clear the thread_info[].blocked flag. * If set, also set floss_state.fetch_end_cause[] to indicate why. */ for (int thread_number = 0; thread_number < number_of_threads; thread_number++) { ExecContext *xc = thread[thread_number]; /* assume the worst until proven otherwise */ thread_info[thread_number].blocked = true; /* Unless we fetch a full fetch_width of instructions, this * should get set to indicate why we didn't */ floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_NONE; // // Now: check all the reasons we could be blocked... if none of // them are true, then mark as not blocked // // if (!thread_info[thread_number].active) continue; if (xc->status() != ExecContext::Active) {#if FULL_SYSTEM if (xc->status() == ExecContext::Suspended && check_interrupts()) { xc->activate(); } else#endif // FULL_SYSTEM { continue; } } // // The case where the IFQ is full, but all slots are reserved // (ie. no real instructions present) indicates a cache miss. // This will be detected and handled later. // int flag = 0; if (mt_frontend) { FetchQueue &q = ifq[thread_number]; if (q.num_total() == q.size && q.num_reserved < q.num_total()) { floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_QFULL; flag = 1; } } else { // // For the non-MT case... // FetchQueue &q = ifq[0]; if (q.num_total() == ifq_size && q.num_reserved < q.num_total()) { floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_QFULL; if (thread_number == 0) flag = 1; // First time through, we collect stats... else continue; // After that, we just keep going... } } if (flag) { // // We can't fetch for this thread... // for (int i = 0; i < number_of_threads; ++i) { unsigned c = IQNumInstructions(i); qfull_iq_occupancy[i] += c; qfull_rob_occupancy[i] += ROB.num_thread(i); qfull_iq_occ_dist_[i].sample(c); qfull_rob_occ_dist_[i].sample(ROB.num_thread(i)); } continue; } if (fetch_stall[thread_number] != 0) { /* fetch loss cause for this thread is fid_cause value */ floss_state.fetch_end_cause[thread_number] = fid_cause[thread_number]; continue; } if (fetch_fault_count[thread_number] != 0) { // pending faults... floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_FAULT_FLUSH; continue; } /* if icache_output_buffer is still full (due to icache miss, or multi-cycle hit) then stall */ if (icache_output_buffer[thread_number]->free_slots() < fetch_width) { floss_state.fetch_end_cause[thread_number] = FLOSS_FETCH_IMISS; floss_state.fetch_mem_result[thread_number] = MA_CACHE_MISS; continue; } thread_info[thread_number].blocked = false; } /* * We need to block threads that have been assigned zero priority * Check for all blocked while we're at it... */ bool all_threads_blocked = true; for (int i = 0; i < number_of_threads; i++) { if (thread_info[i].priority == 0) thread_info[i].blocked = true; if (!thread_info[i].blocked) all_threads_blocked = false; } if (all_threads_blocked) { flossRecord(&floss_state, thread_fetched); fetch_idle_cycles++; // check_counters(); return; } /* Add our static biases into the current icounts */ /* ==> these will be removed after the choose_next_thread() function */ for (int i = 0; i < number_of_threads; i++) thread_info[i].current_icount += static_icount_bias[i]; /* * This function takes the contents of thread_info[] into account * and may change fetch_list[].blocked */ choose_next_thread(fetch_list); /* Remove our static biases from the current icounts */ for (int i = 0; i < number_of_threads; i++) thread_info[i].current_icount -= static_icount_bias[i]; // // Assert blocked flag for threads with active ROB or IQ caps // for (int i = 0; i < number_of_threads; i++) { int thread_number = fetch_list[i].thread_number; /* Handle IQ and ROB caps */ if (iq_cap_active[thread_number] || rob_cap_active[thread_number]) fetch_list[i].blocked = true; } /* * Are all threads blocked? * => Need to check again, because the fetch policy may block a thread * * scan by fetch_list[] index to find threads not blocked by cache miss * or by fetch policy */ all_threads_blocked = true;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -