?? libm_sincos_large.s
字號:
FR_V = f75FR_V_hi = f75FR_V_lo = f76FR_U_hi = f77FR_U_lo = f78FR_U_hiabs = f79FR_V_hiabs = f80FR_PP_8 = f81FR_QQ_8 = f81FR_PP_7 = f82FR_QQ_7 = f82FR_PP_6 = f83FR_QQ_6 = f83FR_PP_5 = f84FR_QQ_5 = f84FR_PP_4 = f85FR_QQ_4 = f85FR_PP_3 = f86FR_QQ_3 = f86FR_PP_2 = f87FR_QQ_2 = f87FR_QQ_1 = f88FR_N_0_fix = f89FR_Inv_P_0 = f90FR_corr = f91FR_poly = f92FR_Neg_Two_to_M3 = f93FR_Two_to_M3 = f94FR_Neg_Two_to_63 = f94FR_P_0 = f95FR_C_lo = f96FR_PP_1 = f97FR_PP_1_lo = f98FR_ArgPrime = f99// GRGR_Table_Base = r32GR_Table_Base1 = r33GR_i_0 = r34GR_i_1 = r35GR_N_Inc = r36GR_Sin_or_Cos = r37GR_SAVE_B0 = r39GR_SAVE_GP = r40GR_SAVE_PFS = r41// sincos combined routine registers// GRGR_SINCOS_SAVE_PFS = r32GR_SINCOS_SAVE_B0 = r33GR_SINCOS_SAVE_GP = r34// FRFR_SINCOS_ARG = f100FR_SINCOS_RES_SIN = f101.section .textGLOBAL_LIBM_ENTRY(__libm_sincos_large){ .mfi alloc GR_SINCOS_SAVE_PFS = ar.pfs,0,3,0,0 fma.s1 FR_SINCOS_ARG = f8, f1, f0 // Save argument for sin and cos mov GR_SINCOS_SAVE_B0 = b0};;{ .mfb mov GR_SINCOS_SAVE_GP = gp nop.f 0 br.call.sptk b0 = __libm_sin_large // Call sin};;{ .mfi nop.m 0 fma.s1 FR_SINCOS_RES_SIN = f8, f1, f0 // Save sin result nop.i 0};;{ .mfb nop.m 0 fma.s1 f8 = FR_SINCOS_ARG, f1, f0 // Arg for cos br.call.sptk b0 = __libm_cos_large // Call cos};;{ .mfi mov gp = GR_SINCOS_SAVE_GP fma.s1 f9 = FR_SINCOS_RES_SIN, f1, f0 // Out sin result mov b0 = GR_SINCOS_SAVE_B0};;{ .mib nop.m 0 mov ar.pfs = GR_SINCOS_SAVE_PFS br.ret.sptk b0 // sincos_large exit};;GLOBAL_LIBM_END(__libm_sincos_large)GLOBAL_LIBM_ENTRY(__libm_sin_large){ .mlxalloc GR_Table_Base = ar.pfs,0,12,2,0 movl GR_Sin_or_Cos = 0x0 ;;}{ .mmi nop.m 999 addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp nop.i 999};;{ .mmi ld8 GR_Table_Base = [GR_Table_Base] nop.m 999 nop.i 999};;{ .mib nop.m 999 nop.i 999 br.cond.sptk SINCOS_CONTINUE ;;}GLOBAL_LIBM_END(__libm_sin_large)GLOBAL_LIBM_ENTRY(__libm_cos_large){ .mlxalloc GR_Table_Base= ar.pfs,0,12,2,0 movl GR_Sin_or_Cos = 0x1 ;;}{ .mmi nop.m 999 addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp nop.i 999};;{ .mmi ld8 GR_Table_Base = [GR_Table_Base] nop.m 999 nop.i 999};;//// Load Table Address//SINCOS_CONTINUE:{ .mmi add GR_Table_Base1 = 96, GR_Table_Base ldfs FR_Two_to_24 = [GR_Table_Base], 4 nop.i 999};;{ .mmi nop.m 999//// Load 2**24, load 2**63.// ldfs FR_Neg_Two_to_24 = [GR_Table_Base], 12 mov r41 = ar.pfs ;;}{ .mfi ldfs FR_Two_to_63 = [GR_Table_Base1], 4//// Check for unnormals - unsupported operands. We do not want// to generate denormal exception// Check for NatVals, QNaNs, SNaNs, +/-Infs// Check for EM unsupporteds// Check for Zero// fclass.m.unc p6, p8 = FR_Input_X, 0x1E3 mov r40 = gp ;;}{ .mfi nop.m 999 fclass.nm.unc p8, p0 = FR_Input_X, 0x1FF// GR_Sin_or_Cos denotes mov r39 = b0}{ .mfb ldfs FR_Neg_Two_to_63 = [GR_Table_Base1], 12 fclass.m.unc p10, p0 = FR_Input_X, 0x007(p6) br.cond.spnt SINCOS_SPECIAL ;;}{ .mib nop.m 999 nop.i 999(p8) br.cond.spnt SINCOS_SPECIAL ;;}{ .mib nop.m 999 nop.i 999//// Branch if +/- NaN, Inf.// Load -2**24, load -2**63.//(p10) br.cond.spnt SINCOS_ZERO ;;}{ .mmb ldfe FR_Inv_pi_by_2 = [GR_Table_Base], 16 ldfe FR_Inv_P_0 = [GR_Table_Base1], 16 nop.b 999 ;;}{ .mmb nop.m 999 ldfe FR_d_1 = [GR_Table_Base1], 16 nop.b 999 ;;}//// Raise possible denormal operand flag with useful fcmp// Is x <= -2**63// Load Inv_P_0 for pre-reduction// Load Inv_pi_by_2//{ .mmb ldfe FR_P_0 = [GR_Table_Base], 16 ldfe FR_d_2 = [GR_Table_Base1], 16 nop.b 999 ;;}//// Load P_0// Load d_1// Is x >= 2**63// Is x <= -2**24?//{ .mmi ldfe FR_P_1 = [GR_Table_Base], 16 ;;//// Load P_1// Load d_2// Is x >= 2**24?// ldfe FR_P_2 = [GR_Table_Base], 16 nop.i 999 ;;}{ .mmf nop.m 999 ldfe FR_P_3 = [GR_Table_Base], 16 fcmp.le.unc.s1 p7, p8 = FR_Input_X, FR_Neg_Two_to_24}{ .mfi nop.m 999//// Branch if +/- zero.// Decide about the paths to take:// If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2// OTHERWISE - CASE 3 OR 4// fcmp.le.unc.s1 p10, p11 = FR_Input_X, FR_Neg_Two_to_63 nop.i 999 ;;}{ .mfi nop.m 999(p8) fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24 nop.i 999}{ .mfi ldfe FR_Pi_by_4 = [GR_Table_Base1], 16(p11) fcmp.ge.s1 p10, p0 = FR_Input_X, FR_Two_to_63 nop.i 999 ;;}{ .mmi ldfe FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;; ldfs FR_Two_to_M3 = [GR_Table_Base1], 4 nop.i 999 ;;}{ .mib ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1], 12 nop.i 999//// Load P_2// Load P_3// Load pi_by_4// Load neg_pi_by_4// Load 2**(-3)// Load -2**(-3).//(p10) br.cond.spnt SINCOS_ARG_TOO_LARGE ;;}{ .mib nop.m 999 nop.i 999//// Branch out if x >= 2**63. Use Payne-Hanek Reduction//(p7) br.cond.spnt SINCOS_LARGER_ARG ;;}{ .mfi nop.m 999//// Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.// fma.s1 FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0 nop.i 999 ;;}{ .mfi nop.m 999 fcmp.lt.unc.s1 p6, p7 = FR_Input_X, FR_Pi_by_4 nop.i 999 ;;}{ .mfi nop.m 999//// Select the case when |Arg| < pi/4// Else Select the case when |Arg| >= pi/4// fcvt.fx.s1 FR_N_fix = FR_N_float nop.i 999 ;;}{ .mfi nop.m 999//// N = Arg * 2/pi// Check if Arg < pi/4//(p6) fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4 nop.i 999 ;;}//// Case 2: Convert integer N_fix back to normalized floating-point value.// Case 1: p8 is only affected when p6 is set//{ .mfi(p7) ldfs FR_Two_to_M33 = [GR_Table_Base1], 4//// Grab the integer part of N and call it N_fix//(p6) fmerge.se FR_r = FR_Input_X, FR_Input_X// If |x| < pi/4, r = x and c = 0// lf |x| < pi/4, is x < 2**(-3).// r = Arg// c = 0(p6) mov GR_N_Inc = GR_Sin_or_Cos ;;}{ .mmf nop.m 999(p7) ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4(p6) fmerge.se FR_c = f0, f0}{ .mfi nop.m 999(p6) fcmp.lt.unc.s1 p8, p9 = FR_Input_X, FR_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999//// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.// If |x| >= pi/4,// Create the right N for |x| < pi/4 and otherwise// Case 2: Place integer part of N in GP register//(p7) fcvt.xf FR_N_float = FR_N_fix nop.i 999 ;;}{ .mmf nop.m 999(p7) getf.sig GR_N_Inc = FR_N_fix(p8) fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;}{ .mib nop.m 999 nop.i 999//// Load 2**(-33), -2**(-33)//(p8) br.cond.spnt SINCOS_SMALL_R ;;}{ .mib nop.m 999 nop.i 999(p6) br.cond.sptk SINCOS_NORMAL_R ;;}//// if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.////// In this branch, |x| >= pi/4.//{ .mfi ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8//// Load -2**(-67)// fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X//// w = N * P_2// s = -N * P_1 + Arg// add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos}{ .mfi nop.m 999 fma.s1 FR_w = FR_N_float, FR_P_2, f0 nop.i 999 ;;}{ .mfi nop.m 999//// Adjust N_fix by N_inc to determine whether sine or// cosine is being calculated// fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33 nop.i 999 ;;}{ .mfi nop.m 999(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33 nop.i 999 ;;}{ .mfi nop.m 999// Remember x >= pi/4.// Is s <= -2**(-33) or s >= 2**(-33) (p6)// or -2**(-33) < s < 2**(-33) (p7)(p6) fms.s1 FR_r = FR_s, f1, FR_w nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w nop.i 999}{ .mfi nop.m 999(p6) fms.s1 FR_c = FR_s, f1, FR_r nop.i 999 ;;}{ .mfi nop.m 999//// For big s: r = s - w: No futher reduction is necessary// For small s: w = N * P_3 (change sign) More reduction//(p6) fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999(p8) fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999(p7) fms.s1 FR_r = FR_s, f1, FR_U_1 nop.i 999}{ .mfb nop.m 999//// For big s: Is |r| < 2**(-3)?// For big s: c = S - r// For small s: U_1 = N * P_2 + w//// If p8 is set, prepare to branch to Small_R.// If p9 is set, prepare to branch to Normal_R.// For big s, r is complete here.//(p6) fms.s1 FR_c = FR_c, f1, FR_w//// For big s: c = c + w (w has not been negated.)// For small s: r = S - U_1//(p8) br.cond.spnt SINCOS_SMALL_R ;;}{ .mib nop.m 999 nop.i 999(p9) br.cond.sptk SINCOS_NORMAL_R ;;}{ .mfi(p7) add GR_Table_Base1 = 224, GR_Table_Base1//// Branch to SINCOS_SMALL_R or SINCOS_NORMAL_R//(p7) fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1//// c = S - U_1// r = S_1 * r////(p7) extr.u GR_i_1 = GR_N_Inc, 0, 1}{ .mmi nop.m 999 ;;//// Get [i_0,i_1] - two lsb of N_fix_gr.// Do dummy fmpy so inexact is always set.//(p7) cmp.eq.unc p9, p10 = 0x0, GR_i_1(p7) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;}//// For small s: U_2 = N * P_2 - U_1// S_1 stored constant - grab the one stored with the// coefficients.//{ .mfi(p7) ldfe FR_S_1 = [GR_Table_Base1], 16//// Check if i_1 and i_0 != 0//(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67(p7) cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;}{ .mfi nop.m 999(p7) fms.s1 FR_s = FR_s, f1, FR_r nop.i 999}{ .mfi nop.m 999//// S = S - r// U_2 = U_2 + w// load S_1//(p7) fma.s1 FR_rsq = FR_r, FR_r, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s1 FR_U_2 = FR_U_2, f1, FR_w nop.i 999}{ .mfi nop.m 999//(p7) fmerge.se FR_Input_X = FR_r, FR_r(p7) fmerge.se FR_prelim = FR_r, FR_r nop.i 999 ;;}{ .mfi nop.m 999//(p10) fma.s1 FR_Input_X = f0, f1, f1(p10) fma.s1 FR_prelim = f0, f1, f1 nop.i 999 ;;}{ .mfi nop.m 999//// FR_rsq = r * r// Save r as the result.//(p7) fms.s1 FR_c = FR_s, f1, FR_U_1 nop.i 999 ;;}{ .mfi nop.m 999//// if ( i_1 ==0) poly = c + S_1*r*r*r// else Result = 1////(p12) fnma.s1 FR_Input_X = FR_Input_X, f1, f0(p12) fnma.s1 FR_prelim = FR_prelim, f1, f0 nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_r = FR_S_1, FR_r, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.d.s1 FR_S_1 = FR_S_1, FR_S_1, f0 nop.i 999 ;;}{ .mfi nop.m 999//// If i_1 != 0, poly = 2**(-67)//(p7) fms.s1 FR_c = FR_c, f1, FR_U_2 nop.i 999 ;;}{ .mfi nop.m 999//// c = c - U_2//(p9) fma.s1 FR_poly = FR_r, FR_rsq, FR_c nop.i 999 ;;}{ .mfi nop.m 999//// i_0 != 0, so Result = -Result//(p11) fma.s1 FR_Input_X = FR_prelim, f1, FR_poly nop.i 999 ;;}{ .mfb nop.m 999(p12) fms.s1 FR_Input_X = FR_prelim, f1, FR_poly//// if (i_0 == 0), Result = Result + poly// else Result = Result - poly// br.ret.sptk b0 ;;}SINCOS_LARGER_ARG:{ .mfi nop.m 999 fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0 nop.i 999};;// This path for argument > 2*24// Adjust table_ptr1 to beginning of table.//{ .mmi nop.m 999
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -