?? bpred.cc
字號:
.desc("PVP: Prob of correct pred given HC") .precision(4) ; conf_pvp = conf_chc / (conf_chc + conf_ihc); conf_spec .name(name() + ".conf.spec") .desc("Spec: \% incorr preds that were LC") .precision(4) ; conf_spec = conf_ilc / (conf_ihc + conf_ilc); conf_pvn .name(name() + ".conf.pvn") .desc("PVN: Prob of incorrect pred given LC") .precision(4) ; conf_pvn = conf_ilc / (conf_clc + conf_ilc); }}voiddecode_state(unsigned input, char *output){ int i; unsigned work; output[5] = '\0'; output[1] = output[3] = ' '; for (i = 0; i < 3; i++) { work = (input >> (2 * i)) & 0x03; output[2 * (2 - i)] = '0' + work; }}#if 0voidbpred_print_pred_state(struct bpred_t *p){ int i; Counter total_brs = 0; int pred_state; char s[10]; if (!p) return; for (i = 0; i < SMT_MAX_THREADS; ++i) { total_brs += p->cond_predicted[i]; } if (conf_pred_enable) { ccprintf(cerr, "%s.pred_state.begin\n", nameStr); ccprintf(cerr," state(M,L,G) correct incorrect " "total acc frac %% H-C acc %% L-C acc\n"); for (pred_state = 0; pred_state < p->pred_state_table_size; pred_state++) { Counter low_conf = p->pred_state[PS_LOW_CONF][pred_state]; Counter high_conf = p->pred_state[PS_HIGH_CONF][pred_state]; Counter correct = p->pred_state[PS_CORRECT][pred_state]; Counter incorrect = p->pred_state[PS_INCORRECT][pred_state]; Counter hc_correct = p->pred_state[PS_HC_COR][pred_state]; Counter lc_incorrect = p->pred_state[PS_LC_INCOR][pred_state]; Counter total = correct + incorrect; decode_state(pred_state, s); ccprintf(cerr, " %02x (%s) %12.0f %12.0f %12.0f %6.2f " "%6.2f %6.2f %6.2f %6.2f %6.2f\n", pred_state, s, (double) correct, (double) incorrect, (double) total, 100.0 * (double) correct / (double) total, 100.0 * (double) total / (double) total_brs, 100.0 * (double) high_conf / (double) total, 100.0 * (double) hc_correct / (double) correct, 100.0 * (double) low_conf / (double) total, 100.0 * (double) lc_incorrect / (double) incorrect); } ccprintf(cerr, "%s.pred_state.end\n", nameStr); }}#endif#if 0 /* doesn't work any more */voidconf_pred_print_state(struct bpred_t *p){ if (!p->conf_pred_index_bits) return; ccprintf(cerr, "conf.pred_state.begin\n"); for (int i = 0; i < (1 << p->conf_pred_index_bits) / 16; i += 16) { ccprintf(cerr, " %04X : ", i); for (int j = 0; j < 16; j++) ccprintf(cerr, "%02X ", p->conf_pred_table[i + j]); ccprintf(cerr, "\n"); } ccprintf(cerr, "conf.pred_state.end\n");}#endif/* * Calculate a 'pred_index_bits'-long predictor table index given branch * index bits (bindex) and 'hist_bits' bits of history (hist). 'xor' * specifies whether the bindex and hist bits should be xored or * concatenated. */unsigned intpred_index(unsigned int bindex, unsigned int hist, unsigned int hist_bits, unsigned int pred_index_bits, bool _xor){ /* bindex bits needed to fill out pred_index */ unsigned needed_bindex_bits = pred_index_bits - hist_bits; if (needed_bindex_bits > 0) { /* move hist_bits up to make room for bindex bits; this also * guarantees that any non-xored bindex bits come from the * low-order part of bindex */ hist <<= needed_bindex_bits; /* if we're concatenating (not xoring) bindex bits, clear the * unused bits */ if (!_xor) bindex &= NBIT_MASK(needed_bindex_bits); } else { if (!_xor) { /* don't need any bindex bits... */ bindex = 0; } } /* if !xor, the bindex & hist bits are disjoint, so XOR == OR */ return ((hist ^ bindex) & NBIT_MASK(pred_index_bits));}/* probe a predictor for a next fetch address, the predictor is probed with branch address BADDR, the branch target is BTARGET (used for static predictors), and OP is the instruction opcode (used to simulate predecode bits; a pointer to the predictor state entry (or null for jumps) is returned in *DIR_UPDATE_PTR (used for updating predictor state), and the non-speculative top-of-stack is returned in stack_recover_idx (used for recovering ret-addr stack after mis-predict). */BranchPred::LookupResultBranchPred::lookup(int thread, Addr baddr, /* branch address */ const StaticInstBasePtr &brInst, /* static instruction */ Addr *pred_target_ptr, BPredUpdateRec * brstate, /* state pointer for update/recovery */ enum conf_pred * confidence){ int i, index; bool pred_taken = false; unsigned int local_pred_ctr = 0, global_pred_ctr = 0, meta_dir_ctr = 0; /* if this is not a branch, return not-taken */ if (!brInst->isControl()) return Predict_Not_Taken; lookups[thread]++; /* if we got this far, we going to make a prediction... */ brstate->used_predictor = true; /* we'll set these later if necessary */ brstate->used_btb = 0; brstate->used_ras = 0;#if BP_VERBOSE ccprintf(cerr, "BR: %#08X (cycle %n) ", baddr, curTick);#endif /* if unconditional, predict taken, else do a direction prediction */ if (brInst->isUncondCtrl()) { pred_taken = 1; /* Branch could be misspeculated if it's an indirect jump and we * get the wrong target out of the BTB. Need to snapshot state * so we can undo potential (mis-)updates in bpred_recover() */ brstate->global_hist = global_hist_reg[thread]; brstate->ras_tos = retAddrStack[thread].tos; brstate->ras_value = retAddrStack[thread].stack[brstate->ras_tos];#if BP_VERBOSE ccprintf(cerr, "UNCOND ");#endif } else { /* branch bits used as index */ unsigned int bindex = baddr >> BranchPredAddrShiftAmt; unsigned global_hist = global_hist_reg[thread]; brstate->pred_state = 0;#if BP_VERBOSE ccprintf(cerr, "COND ");#endif /*************************************/ /* generate local prediction, if any */ /*************************************/ if (local_pred_table) { /* local predictor */ unsigned local_bindex; unsigned local_hist; unsigned pidx; local_bindex = bindex; local_bindex &= (num_local_hist_regs - 1); local_hist = local_hist_regs[local_bindex]; pidx = pred_index(bindex, local_hist, local_hist_bits, local_pred_index_bits, local_xor); local_pred_ctr = local_pred_table[pidx]; /* Local predictor result... */ pred_taken = (local_pred_ctr >= 2); /* save index for state update at commit */ brstate->local_pidx = pidx; /* for statistics */ brstate->pred_state = local_pred_ctr;#if BP_VERBOSE ccprintf(cerr, "LH=%#08X IX=%#08X CT=%1d ", local_hist, pidx, local_pred_ctr);#endif } /**************************************/ /* generate global prediction, if any */ /**************************************/ if (global_pred_table) { unsigned pidx = pred_index(bindex, global_hist, global_hist_bits, global_pred_index_bits, global_xor); global_pred_ctr = global_pred_table[pidx]; /* Global prediction is... */ pred_taken = (global_pred_ctr >= 2); /* save index for state update at commit */ brstate->global_pidx = pidx; /* for statistics */ brstate->pred_state <<= 2; /* move local state over (if any) */ brstate->pred_state |= global_pred_ctr;#if BP_VERBOSE ccprintf(cerr, "GH=%#08X IX=%#08X CT=%1d ", global_hist, pidx, global_pred_ctr);#endif } /*************************************************************/ /* if we're using a hybrid predictor, use it to choose local */ /* vs. global prediction. if not, we will use the value of */ /* pred_taken set by either the local or global predictor */ /* above (not both, since if we're not doing hybird only one */ /* will exist) */ /*************************************************************/ if (meta_pred_table) { unsigned pidx = pred_index(bindex, global_hist, global_hist_bits, meta_pred_index_bits, meta_xor); meta_dir_ctr = meta_pred_table[pidx]; /* meta > 2 --> use local */ pred_taken = (((meta_dir_ctr >= 2) ? local_pred_ctr : global_pred_ctr) >= 2); /* for statistics */ brstate->meta_pidx = pidx; brstate->pred_state |= (meta_dir_ctr << 4);#if BP_VERBOSE ccprintf(cerr, "META=%1d ", meta_dir_ctr);#endif } /*******************************************/ /* speculatively update global history reg */ /*******************************************/ global_hist = ((global_hist << 1) | pred_taken) & NBIT_MASK(global_hist_bits); global_hist_reg[thread] = global_hist; /* Just in case we got it wrong... generate the opposite update; * this value will be placed in the history reg if we misspeculate */ brstate->global_hist = global_hist ^ 0x01; /* save RAS TOS index and value for speculation recovery also */ brstate->ras_tos = retAddrStack[thread].tos; brstate->ras_value = retAddrStack[thread].stack[brstate->ras_tos]; /******************************/ /* Look up confidence value */ /******************************/ brstate->conf_result = CONF_NULL; if (confidence) { if (conf_pred_table) { unsigned pidx = pred_index(bindex, global_hist, global_hist_bits, conf_pred_index_bits, conf_pred_xor); brstate->conf_pidx = pidx; brstate->conf_value = conf_pred_table[pidx]; if (conf_pred_table[pidx] >= conf_pred_ctr_thresh) *confidence = CONF_HIGH; else *confidence = CONF_LOW; } else if (conf_pred_ctr_thresh < 0) { // static ctr confidence *confidence = conf_table[brstate->pred_state] ? CONF_HIGH : CONF_LOW; } else { // dynamic ctr confidence, small table if (conf_table[brstate->pred_state] > conf_pred_ctr_thresh) *confidence = CONF_HIGH; else *confidence = CONF_LOW; } brstate->conf_result = *confidence; } } /* * If branch is predicted not taken, there's no need to check the * BTB for a target. Note that this assumes that a BTB lookup does * not affect the state of the BTB (e.g. the replacement policy). */ if (!pred_taken) {#if BP_VERBOSE ccprintf(cerr, "<NT>\n");#endif return Predict_Not_Taken; }#if BP_VERBOSE else { ccprintf(cerr, "<T> "); }#endif /* * If we get here, branch is predicted taken (incl. unconditionals). * Try to get a target address from the RAS or the BTB. */ if (ras_size) { ReturnAddrStack *ras = &retAddrStack[thread]; if (brInst->isReturn()) { /* if this is a return, pop return-address stack and go */ Addr target = ras->stack[ras->tos]; DPRINTF(BPredRAS, "RAS ret %#x idx %d tgt %#x\n", baddr, ras->tos, target); ras->tos--; if (ras->tos < 0) ras->tos = ras_size - 1; brstate->used_ras = 1;#if BP_VERBOSE ccprintf(cerr, "RAS= %#08X\n", target);#endif brstate->ras_tos = ras->tos; brstate->ras_value = ras->stack[ras->tos]; *pred_target_ptr = target; return Predict_Taken_With_Target; } else if (brInst->isCall()) { /* if function call, push return address onto stack */ ras->tos++; if (ras->tos == ras_size) ras->tos = 0; ras->stack[ras->tos] = baddr + sizeof(MachInst); brstate->ras_tos = ras->tos; brstate->ras_value = ras->stack[ras->tos]; DPRINTF(BPredRAS, "RAS call %#x idx %d tgt %#x\n", baddr, ras->tos, baddr + sizeof(MachInst));#if BP_VERBOSE ccprintf(cerr, "PUSH %#08X ", ras->stack[ras->tos]);#endif } } /* predicted taken, not a return: do BTB lookup */ index = ((baddr >> BranchPredAddrShiftAmt) & (btb.sets - 1)) * btb.assoc; /* Now we know the set; look for a PC match */ btb_lookups[thread]++; for (i = index; i < (index + btb.assoc); i++) { if (btb.btb_data[i].addr == baddr) { /* match (BTB hit): return target */ brstate->used_btb = 1; btb_hits[thread]++;#if BP_VERBOSE ccprintf(cerr, "BTB=%#08X\n", btb.btb_data[i].target);#endif *pred_target_ptr = btb.btb_data[i].target; return Predict_Taken_With_Target; } }#if BP_VERBOSE ccprintf(cerr, "BTB missed\n");#endif /* BTB miss: just return predicted direction (taken) */ return Predict_Taken_No_Target;}/* Speculative execution can corrupt the ret-addr stack. So for each * lookup we return the top-of-stack (TOS) at that point; a mispredicted * branch, as part of its recovery, restores the TOS using this value -- * hopefully this uncorrupts the stack. */voidBranchPred::recover(int thread, Addr baddr, /* branch address */ BPredUpdateRec *brstate){ /* pred state pointer */#if BP_VERBOSE ccprintf(cerr, "RE: %#08X (cycle %n) ", baddr, curTick);#endif#if BP_VERBOSE ccprintf(cerr, "RAS_TOS=%02d GH=%#08X\n", brstate->ras_tos, brstate->global_hist);#endif if (DTRACE(BPredRAS) && (retAddrStack[thread].tos != brstate->ras_tos || retAddrStack[thread].stack[brstate->ras_tos] != brstate->ras_value)) DPRINTFN("RAS recover %#x %d %#x\n", baddr, brstate->ras_tos, brstate->ras_value); /* if we didn't use the predictor on this branch (for * leading-thread predictions) don't update */ if (!brstate->used_predictor) return; retAddrStack[thread].tos = brstate->ras_tos; retAddrStack[thread].stack[brstate->ras_tos] = brstate->ras_value; global_hist_reg[thread] = brstate->global_hist;}/* * Update 2-bit counter */static voidupdate_ctr(uint8_t * ctrp, bool incr)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -