?? e_coshl.s
字號:
// Get the remaining A,B coefficients{ .mmi ldfe f_A3 = [r_ad3],16 nop.m 0 nop.i 0};;// Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand// |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6)){ .mfi nop.m 0 fma.s1 f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57 mov r_signexp_0_5 = 0x0fffe // signexp of +0.5};;// Test for |x| >= overflow limit{ .mfi ldfe f_B1 = [r_ad3],16 fcmp.ge.s1 p6,p0 = f_ABS_X, f_smlst_oflow_input nop.i 0};;{ .mfi ldfe f_B2 = [r_ad3],16 nop.f 0 mov r_exp_32 = 0x10004};;// Subtract RSHF constant to get rounded M as a floating point value// M_temp * 2^(63-6) - 2^63{ .mfb ldfe f_B3 = [r_ad3],16 fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF(p6) br.cond.spnt COSH_HUGE // Branch if result will overflow};;{ .mfi getf.sig r_M = f_M_temp nop.f 0 cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32};;// Calculate j. j is the signed extension of the six lsb of M. It // has a range of -32 thru 31.// Calculate R// ax - M*log2by64_hi// R = (ax - M*log2by64_hi) - M*log2by64_lo{ .mfi nop.m 0 fnma.s1 f_R_temp = f_M, f_log2by64_hi, f_ABS_X and r_j = 0x3f, r_M};;{ .mii nop.m 0 shl r_jshf = r_j, 0x2 // Shift j so can sign extend it;; sxt1 r_jshf = r_jshf};;{ .mii nop.m 0 shr r_j = r_jshf, 0x2 // Now j has range -32 to 31 nop.i 0};;{ .mmi shladd r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi sub r_Mmj = r_M, r_j // M-j sub r_mj = r0, r_j // Form -j};;// The TBL and EXP branches are merged and predicated// If TBL, p6 true, 0.25 <= |x| < 32// If EXP, p7 true, 32 <= |x| < overflow_limit//// N = (M-j)/64{ .mfi ldfe f_Tjhi = [r_ad_J_hi] fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp shr r_N = r_Mmj, 0x6 // N = (M-j)/64 }{ .mfi shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi nop.f 0 shladd r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo};;{ .mfi sub r_2mNm1 = r_signexp_0_5, r_N // signexp 2^(-N-1) nop.f 0 shladd r_ad_J_lo = r_j, 2, r_ad5 // pointer to Tjlo}{ .mfi ldfe f_Tmjhi = [r_ad_mJ_hi] nop.f 0 add r_2Nm1 = r_signexp_0_5, r_N // signexp 2^(N-1)};;{ .mmf ldfs f_Tmjlo = [r_ad_mJ_lo] setf.exp f_sneg = r_2mNm1 // Form 2^(-N-1) nop.f 0};;{ .mmf ldfs f_Tjlo = [r_ad_J_lo] setf.exp f_spos = r_2Nm1 // Form 2^(N-1) nop.f 0};;// ******************************************************// STEP 2 (TBL and EXP)// ******************************************************// Calculate Rsquared and Rcubed in preparation for p_even and p_odd{ .mmf nop.m 0 nop.m 0 fma.s1 f_Rsq = f_R, f_R, f0};;// Calculate p_even// B_2 + Rsq *B_3// B_1 + Rsq * (B_2 + Rsq *B_3)// p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3)){ .mfi nop.m 0 fma.s1 f_peven_temp1 = f_Rsq, f_B3, f_B2 nop.i 0}// Calculate p_odd// A_2 + Rsq *A_3// A_1 + Rsq * (A_2 + Rsq *A_3)// podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3)){ .mfi nop.m 0 fma.s1 f_podd_temp1 = f_Rsq, f_A3, f_A2 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_Rcub = f_Rsq, f_R, f0 nop.i 0};;// // If TBL, // Calculate S_hi and S_lo, and C_hi// SC_hi_temp = sneg * Tmjhi// S_hi = spos * Tjhi - SC_hi_temp// S_hi = spos * Tjhi - (sneg * Tmjhi)// C_hi = spos * Tjhi + SC_hi_temp// C_hi = spos * Tjhi + (sneg * Tmjhi){ .mfi nop.m 0(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0 nop.i 0};;// If TBL, // C_lo_temp3 = sneg * Tmjlo// C_lo_temp4 = spos * Tjlo + C_lo_temp3// C_lo_temp4 = spos * Tjlo + (sneg * Tmjlo){ .mfi nop.m 0(p6) fma.s1 f_C_lo_temp3 = f_sneg, f_Tmjlo, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1 nop.i 0}{ .mfi nop.m 0 fma.s1 f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1 nop.i 0};;// If EXP, // Compute 2^(N-1) * Tjhi and 2^(N-1) * Tjlo{ .mfi nop.m 0(p7) fma.s1 f_Tjhi_spos = f_Tjhi, f_spos, f0 nop.i 0}{ .mfi nop.m 0(p7) fma.s1 f_Tjlo_spos = f_Tjlo, f_spos, f0 nop.i 0};;{ .mfi nop.m 0(p6) fma.s1 f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp nop.i 0};;{ .mfi nop.m 0(p6) fms.s1 f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp nop.i 0}{ .mfi nop.m 0(p6) fma.s1 f_C_lo_temp4 = f_spos, f_Tjlo, f_C_lo_temp3 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_peven = f_Rsq, f_peven_temp2, f0 nop.i 0}{ .mfi nop.m 0 fma.s1 f_podd = f_podd_temp2, f_Rcub, f_R nop.i 0};;// If TBL,// C_lo_temp1 = spos * Tjhi - C_hi// C_lo_temp2 = sneg * Tmjlo + C_lo_temp1// C_lo_temp2 = sneg * Tmjlo + (spos * Tjhi - C_hi){ .mfi nop.m 0(p6) fms.s1 f_C_lo_temp1 = f_spos, f_Tjhi, f_C_hi nop.i 0};;{ .mfi nop.m 0(p6) fma.s1 f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1 nop.i 0};;// If EXP,// Y_hi = 2^(N-1) * Tjhi// Y_lo = 2^(N-1) * Tjhi * (p_odd + p_even) + 2^(N-1) * Tjlo{ .mfi nop.m 0(p7) fma.s1 f_Y_lo_temp = f_peven, f1, f_podd nop.i 0};;// If TBL,// C_lo = C_lo_temp4 + C_lo_temp2{ .mfi nop.m 0(p6) fma.s1 f_C_lo = f_C_lo_temp4, f1, f_C_lo_temp2 nop.i 0};;// If TBL,// Y_hi = C_hi // Y_lo = S_hi*p_odd + (C_hi*p_even + C_lo){ .mfi nop.m 0(p6) fma.s1 f_Y_lo_temp = f_C_hi, f_peven, f_C_lo nop.i 0};;{ .mfi nop.m 0(p7) fma.s1 f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos nop.i 0};;// Dummy multiply to generate inexact{ .mfi nop.m 0 fmpy.s0 f_tmp = f_B2, f_B2 nop.i 0}{ .mfi nop.m 0(p6) fma.s1 f_Y_lo = f_S_hi, f_podd, f_Y_lo_temp nop.i 0};;// f8 = answer = Y_hi + Y_lo{ .mfi nop.m 0(p7) fma.s0 f8 = f_Y_lo, f1, f_Tjhi_spos nop.i 0};;// f8 = answer = Y_hi + Y_lo{ .mfb nop.m 0(p6) fma.s0 f8 = f_Y_lo, f1, f_C_hi br.ret.sptk b0 // Exit for COSH_BY_TBL and COSH_BY_EXP};;// Here if 0 < |x| < 0.25COSH_BY_POLY: { .mmf ldfe f_P6 = [r_ad2e],16 ldfe f_P5 = [r_ad2o],16 nop.f 0};;{ .mmi ldfe f_P4 = [r_ad2e],16 ldfe f_P3 = [r_ad2o],16 nop.i 0};;{ .mmi ldfe f_P2 = [r_ad2e],16 ldfe f_P1 = [r_ad2o],16 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_X3 = f_NORM_X, f_X2, f0 nop.i 0}{ .mfi nop.m 0 fma.s1 f_X4 = f_X2, f_X2, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_poly65 = f_X2, f_P6, f_P5 nop.i 0}{ .mfi nop.m 0 fma.s1 f_poly43 = f_X2, f_P4, f_P3 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_poly21 = f_X2, f_P2, f_P1 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_poly6543 = f_X4, f_poly65, f_poly43 nop.i 0};;{ .mfi nop.m 0 fma.s1 f_poly6to1 = f_X4, f_poly6543, f_poly21 nop.i 0};;// Dummy multiply to generate inexact{ .mfi nop.m 0 fmpy.s0 f_tmp = f_P6, f_P6 nop.i 0}{ .mfb nop.m 0 fma.s0 f8 = f_poly6to1, f_X2, f1 br.ret.sptk b0 // Exit COSH_BY_POLY};;// Here if x denorm or unormCOSH_DENORM:// Determine if x really a denorm and not a unorm{ .mmf getf.exp r_signexp_x = f_NORM_X mov r_exp_denorm = 0x0c001 // Real denorms have exp < this fmerge.s f_ABS_X = f0, f_NORM_X};;{ .mfi nop.m 0 fcmp.eq.s0 p10,p0 = f8, f0 // Set denorm flag nop.i 0};;// Set p8 if really a denorm{ .mmi and r_exp_x = r_exp_mask, r_signexp_x;; cmp.lt p8,p9 = r_exp_x, r_exp_denorm nop.i 0};;// Identify denormal operands.{ .mfb nop.m 0(p8) fma.s0 f8 = f8,f8,f1 // If x denorm, result=1+x^2(p9) br.cond.sptk COSH_COMMON // Return to main path if x unorm};;{ .mfb nop.m 0 nop.f 0 br.ret.sptk b0 // Exit if x denorm};;// Here if |x| >= overflow limitCOSH_HUGE: // for COSH_HUGE, put 24000 in exponent; take sign from input{ .mmi mov r_exp_huge = 0x15dbf;; setf.exp f_huge = r_exp_huge nop.i 0};;{ .mfi alloc r32 = ar.pfs,0,5,4,0 fma.s1 f_signed_hi_lo = f_huge, f1, f1 nop.i 0};;{ .mfi nop.m 0 fma.s0 f_pre_result = f_signed_hi_lo, f_huge, f0 mov GR_Parameter_TAG = 63};;GLOBAL_IEEE754_END(coshl)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 }{ .mib stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi add GR_Parameter_RESULT = 48,sp nop.m 0 nop.i 0};;{ .mmi ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;LOCAL_LIBM_END(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -