?? dispatch.cc
字號:
// // for load/stores: // idep #0 - store operand (value that is store'ed) // idep #1, #2 - eff addr computation inputs (addr of access) // // resulting IQ/LSQ operation pair: // IQ (effective address computation operation): // idep #0, #1 - eff addr computation inputs (addr of access) // LSQ (memory access operation): // idep #0 - operand input (value that is store'd) // idep #1 - eff addr computation result (from IQ op) // // effective address computation is transfered via the reserved // name DTMP // RegInfoElement reginfo[TheISA::MaxInstDestRegs]; //////////////////////////////////////////////////////////////// // // Allocate an ROB entry for this instruction // ////////////////////////////////////////////////////////////////// ROBStation *rob = ROB.new_tail(thread); rob->init(inst, dispatch_seq, numIQueues); ////////////////////////////////////////////////////////////////// // // Determine the chaining information for this instruction // // // ////////////////////////////////////////////////////////////////// NewChainInfo new_chain; if (clusterSharedInfo->ci_table != 0) { new_chain = choose_chain(inst, iq_idx); if (new_chain.suggested_cluster >= 0) iq_idx = new_chain.suggested_cluster; // DPRINTF(Dispatch, "DISP: chain_info: (clust %d) (head %d)\n", // iq_idx, new_chain.head_of_chain); } else { // if we're not using chains, don't let the lack of them be // a problem... new_chain.out_of_chains = false; } if (new_chain.out_of_chains) { ROB.remove(rob); ++chains_insuf[thread]; return 0; } ////////////////////////////////////////////////////////////////// // // Place the instruction into the IQ // // The ROB has been (almost) completely initialized // ////////////////////////////////////////////////////////////////// BaseIQ::iterator rs = 0; // Send the instruction to the Instruction Queue // (memory barriers excepted: they go to LSQ only) if (!inst->isMemBarrier()) { rs = IQ[iq_idx]->add(inst, dispatch_seq, rob, reginfo, &new_chain); rs->dispatch_timestamp = curTick; if (rs.isnull()) { // de-allocate the ROB & LSQ entries... ROB.remove(rob); // we're done for this cycle return 0; } } rob->iq_entry = rs; ////////////////////////////////////////////////////////////////// // // Add this instruction to the LSQ, as necessary // ////////////////////////////////////////////////////////////////// if (inst->isMemRef() || inst->isMemBarrier()) { // Remember to link in the iq_entry!! // BaseIQ::iterator lsq = LSQ->add(inst, dispatch_seq + 1, rob, 0, 0); // Check for resource allocation failure if (lsq.isnull()) { if (rs.notnull()) { // we have to clean-up dep-links... for (int i = 0; i < rs->num_ideps; ++i) { if (rs->idep_ptr[i]) { delete rs->idep_ptr[i]; rs->idep_ptr[i] = 0; } } } // de-allocate the ROB entry ROB.remove(rob); // de-allocate the IQ entry if (rs.notnull()) IQ[iq_idx]->squash(rs); // We're done for this cycle return 0; } lsq->dispatch_timestamp = curTick; lsq->iq_entry = rs; if (rs.notnull()) { IQ[iq_idx]->registerLSQ(rs, lsq); // rs->lsq_entry = lsq; } // Mark this ROB entry as being a memory operation // (changes the ROB-entry sequence number to match the LSQ entry) rob->setMemOp(lsq); // memory barriers don't require an EA computatiaon if (inst->isMemBarrier()) { rob->eaCompPending = false; } // We know this instruction has dispatched... add one to // the sequence counter (for the LSQ entry) ++dispatch_seq; } // We've dispatched... count one for the IQ ++dispatch_seq; //--------------------------------------------------------- // // Now that we know that we're going to USE the specified // chain... // if (new_chain.head_of_chain) { ++chain_heads[thread]; ++chain_heads_in_rob; clusterSharedInfo->ci_table->claim(new_chain.head_chain, thread, rob->seq); if (chainWires != 0) { chainWires->allocateWire(iq_idx, new_chain.head_chain); } } // Annotate the ROB entry rob->queue_num = iq_idx; // // Inform all other clusters that an instruction has dispatched // for (unsigned i = 0; i < numIQueues; ++i) if (i != iq_idx) IQ[i]->inform_dispatch(rs); // // 1) install outputs after inputs to prevent self reference // 2) Update the register information table // rob->num_outputs = inst->numDestRegs(); for (int i = 0; i < rob->num_outputs; ++i) { TheISA::RegIndex reg = inst->destRegIdx(i); rob->onames[i] = reg; create_vector[thread].set_entry(reg, rob, i, inst->spec_mode); reginfo[i].setCluster(iq_idx); (*clusterSharedInfo->ri_table)[thread][reg] = reginfo[i]; } // // Store off the use_spec_cv bitmap and the spec_create // vector entries // if (inst->recover_inst) { // rob->spec_state = new CreateVecSpecState(thread); } //////////////////////////////////////////////////////////// // // Now that we know that this instruction has made // it into the IQ/LSQ/ROB... count it as dispatched // NOTE: we include EA-comp instructions in the distribution // ++dispatch_count[thread]; ++dispatch_count_stat[thread]; ++dispatched_ops[thread]; if (inst->isMemRef()) ++dispatched_ops[thread]; if (inst->isSerializing()) ++dispatched_serializing[thread]; // // Add to the pipetrace... // if (ptrace) ptrace->moveInst(inst, PipeTrace::Dispatch, 0, 0, 0); /* * Physical registers... */ unsigned num_fp_regs = inst->numFPDestRegs(); unsigned num_int_regs = inst->numIntDestRegs(); free_fp_physical_regs -= num_fp_regs; free_int_physical_regs -= num_int_regs; used_fp_physical_regs[thread] += num_fp_regs; used_int_physical_regs[thread] += num_int_regs; return rob;}//// Return the number of a thread which can decode instructions into the// Decode/Dispatch queue. This requires that the thread have instructions// in the fetch queue and that there is space available for these// instructions in the decode queue//intFullCPU::choose_decode_thread(){ int rv = -1; // Use a Round-Robin approach to decide where to start unsigned t = first_decode_thread; first_decode_thread = ++first_decode_thread % number_of_threads; unsigned first = t; unsigned low_count = UINT_MAX; switch (fetch_policy) { case IC: first = 0; t = 0; do { unsigned cnt = decodeQueue->count(t) + IQNumInstructions(t); if (ifq[t].num_available()) { if (cnt < low_count) { low_count = cnt; rv = t; } } t = (t+1) % number_of_threads; } while (first != t); break; default: do { if (ifq[t].num_available()) { rv = t; break; } else { t = (t+1) % number_of_threads; } } while (first != t); break; } return rv;}voidFullCPU::start_decode(){ // if we don't have a place to put new instructions, bail if (!decodeQueue->loadable()) return; int thread = choose_decode_thread(); if (thread < 0) { // if we can't decode anything this cycle... return; } FetchQueue *fq = &(ifq[thread]); // as long as there are instructions, and we have bandwidth while (decodeQueue->addBW(thread) && (fq->num_valid + fq->num_squashed) > 0) { DynInst *inst = fq->pull(); if (inst) { decodeQueue->add(inst); if (inst->btb_miss() && inst->recover_inst) fixup_btb_miss(inst); } else { // instruction was squashed earlier... // drop it on the floor } }}voidFullCPU::fixup_btb_miss(DynInst *inst){ // For absolute and PC-relative (i.e. direct, not indirect) // control instructions that were predicted taken, the BTB // may have predicted the target address incorrectly or not // at all. Since these addresses by definition can be // calculated without executing the instruction, fix that up // here. // // Note that indirect jumps (that jump to addresses stored in // registers) need to be executed to get the target, so we // can't fix those up yet. // // The F_DIRJMP flag indicates a direct control transfer instruction. // int thread_number = inst->thread_number; // if we had a BTB miss that put us onto the wrong path if (inst->isDirectCtrl() && inst->btb_miss() && inst->recover_inst) { assert(inst->xc->spec_mode > 0); fetch_squash(thread_number); inst->recover_inst = false; inst->xc->spec_mode--; // Correct the PC for the BTB miss inst->xc->regs.pc = inst->branchTarget(); // // If we've trasferred completely out of spec-mode... // if (inst->xc->spec_mode == 0) { // reset use_spec_? reg maps and speculative memory state inst->xc->reset_spec_state(); } // Make sure that we don't apply the fixup on THIS cycle: // have to schedule event since fetch is simulated after dispatch // within each cycle fetch_stall[thread_number] |= BRANCH_STALL; Event *ev = new ClearFetchStallEvent(this, thread_number, BRANCH_STALL); ev->schedule(curTick + cycles(1)); fid_cause[thread_number] = FLOSS_FETCH_BRANCH_RECOVERY; }}voidFullCPU::dispatch_init(){ if (IQ[0]->type() == BaseIQ::Segmented) chainWires = new ChainWireInfo(max_chains, max_wires, numIQueues, chainWirePolicy); else chainWires = 0;}voidFullCPU::dispatchRegStats(){ using namespace Stats; dispatch_count.resize(number_of_threads); for (int i = 0; i < number_of_threads; ++i) dispatch_count[i] = 0; dispatch_count_stat .init(number_of_threads) .name(name() + ".DIS:count") .desc("cumulative count of dispatched insts") .flags(total) ; dispatched_serializing .init(number_of_threads) .name(name() + ".DIS:serializing_insts") .desc("count of serializing insts dispatched") .flags(total) ; dispatch_serialize_stall_cycles .init(number_of_threads) .name(name() + ".DIS:serialize_stall_cycles") .desc("count of cycles dispatch stalled for serializing inst") .flags(total) ; // // Chaining stats // chain_heads .init(number_of_threads) .name(name() + ".DIS:chain_heads") .desc("number insts that are chain heads") .flags(total) ; chains_insuf .init(number_of_threads) .name(name() + ".DIS:chains_insuf") .desc("number of times thread had insuf chains") .flags(total) ; dispatched_ops .init(number_of_threads) .name(name() + ".DIS:op_count") .desc("number of operations dispatched") .flags(total) ; rob_cap_events .init(number_of_threads) .name(name() + ".ROB:cap_events") .desc("number of cycles where ROB cap was active") .flags(total) ; rob_cap_inst_count .init(number_of_threads) .name(name() + ".ROB:cap_inst") .desc("number of instructions held up by ROB cap") .flags(total) ; iq_cap_events .init(number_of_threads) .name(name() +".IQ:cap_events" ) .desc("number of cycles where IQ cap was active") .flags(total) ; iq_cap_inst_count .init(number_of_threads) .name(name() + ".IQ:cap_inst") .desc("number of instructions held up by IQ cap") .flags(total) ; mod_n_disp_stalls.init(number_of_threads); mod_n_disp_stall_free.init(number_of_threads); if (dispatch_policy == MODULO_N) { mod_n_disp_stalls .name(name() + ".DIS:mod_n_stalls") .desc("cycles where dispatch stalled due to mod-n") .flags(total) ; mod_n_disp_stall_free .name(name() + ".DIS:mod_n_stall_free") .desc("free slots when dispatch stalled due to mod-n") .flags(total) ; } reg_int_full .name(name() + ".REG:int:full") .desc("number of cycles where there were no INT registers") ; reg_fp_full .name(name() + ".REG:fp:full") .desc("number of cycles where there were no FP registers") ; insufficient_chains .name(name() + ".DIS:insufficient_chains") .desc("Number of instances where dispatch stopped") ; secondChoiceCluster .name(name() + ".DIS:second_choice_clust") .desc("Number of instructions dispatched to second-choice cluster"); secondChoiceStall .name(name() + ".DIS:second_choice_stall") .desc("Number of instructions stalled when first choice not available"); // // Two input instruction stats // two_op_inst_count .init(number_of_threads) .name(name() + ".DIS:two_input_insts") .desc("Number of two input instructions queued") .flags(total) ; one_rdy_inst_count .init(number_of_threads) .name(name() + ".DIS:one_rdy_insts") .desc("number of 2-op insts w/ one rdy op") .flags(total) ; chain_create_dist .init(NUM_CHAIN_CR_CLASSES) .name(name() + ".DIS:chain_creation") .desc("Reason that chain head was created") .flags(pdf | dist) ; for (int i=0; i < NUM_CHAIN_CR_CLASSES; ++i) { chain_create_dist.subname(i, chain_cr_class_desc[i]); } inst_class_dist .init(NUM_INSN_CLASSES) .name(name() + "inst_class_dist") .desc("Operand status at dispatch") .flags(pdf | dist) ; for (int i=0; i < NUM_INSN_CLASSES; ++i) { inst_class_dist.subname(i, dispatchInstClassDesc[i]); }}voidFullCPU::dispatchRegFormulas(){ using namespace Stats; chain_head_frac .name(name() + ".DIS:chain_head_frac") .desc("fraction of insts that are chain heads") .flags(total) ; chain_head_frac = 100 * chain_heads / dispatch_count_stat; chains_insuf_rate .name(name() + ".DIS:chains_insuf_rate") .desc("rate that thread had insuf chains") .flags(total) ; chains_insuf_rate = chains_insuf / numCycles; dispatched_op_rate .name(name() + ".DIS:op_rate") .desc("dispatched operations per cycle") .flags(total) ; dispatched_op_rate = dispatched_ops / numCycles; dispatch_rate .name(name() + ".DIS:rate") .desc("dispatched_insts per cycle") .flags(total) ; dispatch_rate = dispatch_count_stat / numCycles; if (dispatch_policy == MODULO_N) { mod_n_stall_avg_free .name(name() + ".DIS:mod_n_stall_avg_free") .desc("avg free slots per cycle") .flags(total) ; mod_n_stall_avg_free = mod_n_disp_stall_free / mod_n_disp_stalls; mod_n_stall_frac .name(name() + ".DIS:mod_n_stall_frac") .desc("avg stalls per cycle") .flags(total) ; mod_n_stall_frac = mod_n_disp_stalls / numCycles; } reg_int_occ_rate .name(name() + ".REG:int:occ_rate") .desc("Average INT register usage") .flags(total)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -