?? dispatch.cc
字號:
.precision(0) ; reg_int_occ_rate = reg_int_thrd_occ / numCycles; reg_fp_occ_rate .name(name() + ".REG:fp:occ_rate") .desc("Average FP register usage") .flags(total) .precision(0) ; reg_fp_occ_rate = reg_fp_thrd_occ / numCycles; two_input_ratio .name(name() + ".DIS:two_input_ratio") .desc("fraction of all insts having 2 inputs") .flags(total) ; two_input_ratio = two_op_inst_count / dispatch_count_stat; one_rdy_ratio .name(name() + ".DIS:one_rdy_ratio") .desc("fraction of 2-op insts w/ one ready op") .flags(total) ; one_rdy_ratio = one_rdy_inst_count / two_op_inst_count;}//// Determine which chain this instruction should belong to//NewChainInfoFullCPU::choose_chain(DynInst *inst, unsigned cluster){ NewChainInfo rv; unsigned chained_ideps = 0, st_ideps = 0; int head_num = -1; int suggested_cluster = -1; bool inst_is_load = false; unsigned thread = inst->thread_number; // unsigned max_latency_depth = 0; unsigned ops_pred_ready_time = 0; // // Shared info... // RegInfoTable * rit = clusterSharedInfo->ri_table; ChainInfoTableBase *chain_info = clusterSharedInfo->ci_table; GenericPredictor *hm_predictor = clusterSharedInfo->hm_predictor; GenericPredictor *lr_predictor = clusterSharedInfo->lr_predictor; if (DTRACE(Chains)) { string s; inst->dump(s); DPRINTF(Chains,"Chains: fetch_seq %d : %s\n", inst->fetch_seq, s); } if (inst->isLoad()) { inst_is_load = true; if (use_hm_predictor) { // If we predict a hit if (hm_predictor->predict(inst->PC >> 2) == 1) { rv.hm_prediction = MA_HIT; DPRINTF(Chains, "Chains: load predicted HIT\n"); } else { rv.hm_prediction = MA_CACHE_MISS; DPRINTF(Chains, "Chains: load predicted MISS\n"); } } else { rv.hm_prediction = MA_NOT_PREDICTED; } if (hmp_func == HMP_BOTH && rv.hm_prediction == MA_HIT) { inst_is_load = false; // actually, just don't create HEAD DPRINTF(Chains, "Chains: HMP-Both: load isn't head\n"); } if (hmp_func == HMP_HEAD_SEL && rv.hm_prediction == MA_HIT && chain_heads_in_rob > (clusterSharedInfo->total_chains * 0.75)) { inst_is_load = false; // actually, just don't create HEAD DPRINTF(Chains, "Chains: HMP-Head-Sel: load isn't head\n"); } } else rv.hm_prediction = MA_NOT_PREDICTED; // // Check all IDEPS // // We want to find the register with the largest latency value (chained // or unchained) and treat this instruction as following that register // for (int i = 0; i < inst->numSrcRegs(); ++i) { unsigned reg = inst->srcRegIdx(i); // Get the predicted ready time for this operand unsigned cmp_time = (*rit)[thread][reg].predReady(); // Earliest an op can become ready is _this_ cycle if (cmp_time == 0) cmp_time = curTick; // Is this operand going to arrive later than the others? if (ops_pred_ready_time < cmp_time) { ops_pred_ready_time = cmp_time; rv.pred_last_op_index = i; } // We need these regardless... we may over-write them below rv.idep_info[i].chained = (*rit)[thread][reg].isChained(); rv.idep_info[i].delay = (*rit)[thread][reg].latency(); rv.idep_info[i].op_pred_ready_time = cmp_time; // Get cluster where this operand is being produced rv.idep_info[i].source_cluster = (*rit)[thread][reg].cluster(); // // Registers can be in one of two states: // (1) Chained -- this inst should follow another // (2) Self-Timed -- this inst should find its own way based // on the delay value. Note that this delay // value will be zeroed when the producing // instruction writes-back // if (rv.idep_info[i].chained) { rv.idep_info[i].follows_chain = (*rit)[thread][reg].chainNum(); rv.idep_info[i].chain_depth = (*rit)[thread][reg].chainDepth(); ++chained_ideps; } else { // we should only count this value as pending if it // hasn't written back if (rv.idep_info[i].delay) ++st_ideps; } } // // Check for and remove duplicate chain entries in rv... // // We only want to connect this instruction to a chain once // for (int i = 0; i < TheISA::MaxInstSrcRegs - 1; ++i) { if (!rv.idep_info[i].chained) continue; // // If we have duplicate input operands, treat all but the first // as if they were ready // for (int j = i + 1; j < TheISA::MaxInstSrcRegs; ++j) { // // if both of these i-deps follow the same chain // if (rv.idep_info[j].chained && rv.idep_info[i].follows_chain == rv.idep_info[j].follows_chain) { // we only want to follow _one_ of these (and the // i-th one is easier) // make sure we follow at the // longer delay value if (rv.idep_info[i].delay < rv.idep_info[j].delay) rv.idep_info[i].delay = rv.idep_info[j].delay; // remove the later entry rv.idep_info[j].chained = false; rv.idep_info[j].delay = 0; // fix this counter --chained_ideps; } } } int pending_ideps = chained_ideps + st_ideps; DPRINTF(Chains, "Chains: %d pending ideps (%d self-timed)\n", pending_ideps, st_ideps); // // We always assume that a self-timed instruction will finish before a // chained instruction // // if less than two operands are pending, it's not really a prediction! unsigned chained_idep_index = rv.pred_last_op_index; if (pending_ideps < 2) { // we use this flag in SegmentedIQ::writeback() rv.pred_last_op_index = -1; } // // Last-Op-Prediction: // // Only try to predict the last op if we have more than one // instruction chained // // FIXME: This code assumes that there are only TWO input deps! rv.lr_prediction = -1; if (chained_ideps > 1) { if (use_lat_predictor) { int other = 1 - rv.pred_last_op_index; // This input op will appear to the scheduling logic as // if the dependence has already been met... // -> The real dependence mechanism is unaffected by this rv.idep_info[other].chained = false; rv.idep_info[other].delay = 0; --chained_ideps; } if (use_lr_predictor) { rv.pred_last_op_index = lr_predictor->predict(inst->PC >> 2); rv.lr_prediction = rv.pred_last_op_index; for (int other = 0; other < TheISA::MaxInstSrcRegs; ++other) { /** * @todo This code only works correctly for 2-input * instructions 3-input instructions never chain * their 3rd input... */ if (other != rv.pred_last_op_index) { // This input op will appear to the scheduling logic as // if the dependence has already been met... // -> The real dependence mechanism is unaffected by this rv.idep_info[other].chained = false; rv.idep_info[other].delay = 0; --chained_ideps; } } } } // FIXME: We don't do chaining among clusters for more than one // chained ideps!!!// assert(chained_ideps < 2); unsigned producing_cluster; if (chained_ideps > 1) { // // Special case... // // FIXME: only looks at the first two ideps! // producing_cluster = rv.idep_info[0].source_cluster; unsigned c = rv.idep_info[1].source_cluster; // if the two cluster id's don't match, we have to pick one... if (producing_cluster != c) { bool g0 = ((IQ[producing_cluster]->free_slots() > 0) && !IQ[producing_cluster]->cap_met(thread)); bool g1 = ((IQ[c]->free_slots() > 0) && !IQ[c]->cap_met(thread)); if (g0 && g1) { // both good... // use the "other" cluster if it has lower occupancy... if (IQ[c]->free_slots() < IQ[producing_cluster]->free_slots()) { producing_cluster = c; } } else if (g0) { // use 'producing_cluster' } else { // only one choice, actually producing_cluster = c; // we actually get here for the !g0 && !g1 case, but // the result doesn't actually matter, since the instruction // won't dispatch in this case. } } else { // they are the same... good to go! } } else { if ((chained_ideps == 1) || pending_ideps) { // // We have an input in the queue // producing_cluster = rv.idep_info[chained_idep_index].source_cluster; } else { // // We don't have a producing cluster... choose the least-full cluster // producing_cluster = IQLeastFull(); } } // // Look for instructions that have to make a decision about which // chain to follow // if (inst->numSrcRegs() > 1) { ++two_op_inst_count[inst->thread_number]; // count the number of these with exactly one ready idep if (pending_ideps == 1) { ++one_rdy_inst_count[inst->thread_number]; } } if (pending_ideps == 0) inst_class_dist[INSN_CLASS_ALL_RDY] += 1; else if (pending_ideps == 1) inst_class_dist[INSN_CLASS_ONE_NOT_RDY] += 1; else if (chained_ideps > 1) inst_class_dist[INSN_CLASS_MULT_CHAINS] += 1; else if (chained_ideps == 1) inst_class_dist[INSN_CLASS_ONE_CHAINED] += 1; else inst_class_dist[INSN_CLASS_ALL_SELF_TIMED] += 1; // // This instruction is the head of a chain if: // (1) It is self-timed (and generates an output) // (2) It is following more than 1 chain // (3) It is a load // (4) The chain depth reported is greater than max_chain_depth // if (CHAIN_HEAD_IND_INSTS && pending_ideps == 0 && inst->numDestRegs() > 0 || chained_ideps > 1 || inst_is_load#if 0 || max_latency_depth > max_chain_depth#endif ) { rv.head_of_chain = true; rv.out_of_chains = chain_info->chainsFree() == 0; // // Bail out if we don't have any free chains... // if (rv.out_of_chains) { ++insufficient_chains; DPRINTF(Chains, "Chains: out of Chain Wires\n"); return rv; } } ////////////////////////////////////////////////////////////// // // Chain Wire Policies don't matter unless we have more than // one cluster // if (!rv.out_of_chains && (numIQueues > 1) && (chainWires != 0)) { // // Chain Wire Policies: // // OneToOne: Each cluster has enough wires for all chains // --> use originally-specified cluster // Static: Each cluster handles a subset of chains // --> change iq_idx as appropriate // Dynamic: Each cluster can allocate wires to chains // --> if cluster hosting chain is not available, // assign instruction to cluster that can // allocate a new chain // bool stall = false; switch (chainWirePolicy) { case ChainWireInfo::OneToOne: // Use the originally specified cluster if (checkClusterForDispatch(cluster, rv.head_of_chain)) { // we're good to go... suggested_cluster = cluster; head_num = chain_info->find_free(); } else { ++secondChoiceStall; stall = true; } break; case ChainWireInfo::Static: if (checkClusterForDispatch(producing_cluster, rv.head_of_chain)) { // we're good to go... suggested_cluster = producing_cluster; head_num = chainWires->findFreeWire(suggested_cluster); } else { suggested_cluster = IQLeastFull(); // we must mark this instruction as a chain-head if // we dispatch it to a second-choice cluster if (producing_cluster != suggested_cluster) { // if our second-choice is ok... if (checkClusterForDispatch(suggested_cluster, true)) { rv.head_of_chain = true; // unchain inputs that aren't in this cluster for (int i = 0; i < TheISA::MaxInstSrcRegs; ++i) { if (rv.idep_info[i].chained) { if (!chainWires->chainMapped(suggested_cluster, rv.idep_info[i].follows_chain)) { rv.idep_info[i].chained = false; } } } head_num = chainWires->findFreeWire(suggested_cluster); ++secondChoiceCluster; } else { ++secondChoiceStall; stall = true; } } else { ++secondChoiceStall; stall = true; } } break; case ChainWireInfo::StaticStall: if (checkClusterForDispatch(producing_cluster, rv.head_of_chain)) { // we're good to go... suggested_cluster = producing_cluster; head_num = chainWires->findFreeWire(suggested_cluster); } else { ++secondChoiceStall; stall = true; } break; // // this may or may not actually work... // case ChainWireInfo::Dynamic: if (checkClusterForDispatch(producing_cluster, rv.head_of_chain)) { // we're good to go... suggested_cluster = producing_cluster; // dynamic policy can use ANY free chain number head_num = chain_info->find_free(); } else { suggested_cluster = IQLeastFull(); // we must mark this instruction as a chain-head if // we dispatch it to a second-choice cluster if (producing_cluster != suggested_cluster) { // if our second-choice is ok... if (checkClusterForDispatch(suggested_cluster, true)) { rv.head_of_chain = true; // unchain inputs that aren't in this cluster for (int i = 0; i < TheISA::MaxInstSrcRegs; ++i) { if (rv.idep_info[i].chained) { if (!chainWires->chainMapped(suggested_cluster, rv.idep_info[i].follows_chain)) { rv.idep_info[i].chained = false; } } } head_num = chain_info->find_free(); ++secondChoiceCluster; } else { ++secondChoiceStall; stall = true; } } else { ++secondChoiceStall; stall = true; } } break; } // // bail out... // if (stall) { rv.out_of_chains = true; ++insufficient_chains; DPRINTF(Chains, "Chains: out of Chain Wires\n"); return rv; } } else { // // Single cluster... // if (rv.head_of_chain) { // // Find a free chain // (returns -1 for no chains) // head_num = chain_info->find_free(); } } rv.suggested_cluster = suggested_cluster; if (rv.head_of_chain) { // // Did we find a free chain? // if (head_num >= 0) { DPRINTF(Chains, "Chains: head of chain %d\n", head_num); // // Yup... We collect stats for this chain in writeback, // just before we init() the chain //#if 0 if (max_latency_depth > max_chain_depth) chain_create_dist[CHAIN_CR_DEPTH] += 1; else#endif if (inst_is_load) chain_create_dist[CHAIN_CR_LOAD] += 1; else if (pending_ideps==0) chain_create_dist[CHAIN_CR_NO_IDEPS] += 1; else chain_create_dist[CHAIN_CR_MULT_IDEPS] += 1; rv.head_chain = head_num; } else { // // Nope. We can't dispatch this inst... // rv.out_of_chains = true; ++insufficient_chains; DPRINTF(Chains, "Chains: insufficient chains\n"); } } DPRINTF(Chains, "Chains: %s", rv.str_dump());#if DUMP_CHAIN_INFO cout << "@" << curTick << endl; inst->dump(); rv.dump();#endif return rv;}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -