?? s_tanl.s
字號:
data4 0x25378A63, 0x00000000data8 0x3FF200BDB16523F6data4 0x247BB2E0, 0x00000000data8 0x3FF172358CE27778data4 0x24446538, 0x00000000data8 0x3FF0E873FDEFE692data4 0x2514638F, 0x00000000data8 0x3FF0632C33154062data4 0x24A7FC27, 0x00000000data8 0x3FEFC42EB3EF115Fdata4 0x248FD0FE, 0x00000000data8 0x3FEEC9E8135D26F6data4 0x2385C719, 0x00000000LOCAL_OBJECT_END(tanl_table_cm1)LOCAL_OBJECT_START(tanl_table_scim2)//// Entries SC_inv in Swapped IEEE format (extended)// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)//data8 0x839D6D4A1BF30C9E, 0x00004001data8 0x80092804554B0EB0, 0x00004001data8 0xF959F94CA1CF0DE9, 0x00004000data8 0xF3086BA077378677, 0x00004000data8 0xED154515CCD4723C, 0x00004000data8 0xE77909441C27CF25, 0x00004000data8 0xE22D037D8DDACB88, 0x00004000data8 0xDD2B2D8A89C73522, 0x00004000data8 0xD86E1A23BB2C1171, 0x00004000data8 0xD3F0E288DFF5E0F9, 0x00004000data8 0xCFAF16B1283BEBD5, 0x00004000data8 0xCBA4AFAA0D88DD53, 0x00004000data8 0xC7CE03CCCA67C43D, 0x00004000data8 0xC427BC820CA0DDB0, 0x00004000data8 0xC0AECD57F13D8CAB, 0x00004000data8 0xBD606C3871ECE6B1, 0x00004000data8 0xBA3A0A96A44C4929, 0x00004000data8 0xB7394F6FE5CCCEC1, 0x00004000data8 0xB45C12039637D8BC, 0x00004000data8 0xB1A0552892CB051B, 0x00004000data8 0xAF04432B6BA2FFD0, 0x00004000data8 0xAC862A237221235F, 0x00004000data8 0xAA2478AF5F00A9D1, 0x00004000data8 0xA7DDBB0C81E082BF, 0x00004000data8 0xA5B0987D45684FEE, 0x00004000data8 0xA39BD0F5627A8F53, 0x00004000data8 0xA19E3B036EC5C8B0, 0x00004000data8 0x9FB6C1F091CD7C66, 0x00004000data8 0x9DE464101FA3DF8A, 0x00004000data8 0x9C263139A8F6B888, 0x00004000data8 0x9A7B4968C27B0450, 0x00004000data8 0x98E2DB7E5EE614EE, 0x00004000LOCAL_OBJECT_END(tanl_table_scim2)LOCAL_OBJECT_START(tanl_table_scim1)//// Entries SC_inv in Swapped IEEE format (extended)// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)//data8 0x969F335C13B2B5BA, 0x00004000data8 0x93D446D9D4C0F548, 0x00004000data8 0x9147094F61B798AF, 0x00004000data8 0x8EF317CC758787AC, 0x00004000data8 0x8CD498B3B99EEFDB, 0x00004000data8 0x8AE82A7DDFF8BC37, 0x00004000data8 0x892AD546E3C55D42, 0x00004000data8 0x8799FEA9D15573C1, 0x00004000data8 0x86335F88435A4B4C, 0x00004000data8 0x84F4FB6E3E93A87B, 0x00004000data8 0x83DD195280A382FB, 0x00004000data8 0x82EA3D7FA4CB8C9E, 0x00004000data8 0x821B247C6861D0A8, 0x00004000data8 0x816EBED163E8D244, 0x00004000data8 0x80E42D9127E4CFC6, 0x00004000data8 0x807ABF8D28E64AFD, 0x00004000data8 0x8031EF26863B4FD8, 0x00004000data8 0x800960ADAE8C11FD, 0x00004000data8 0x8000E1475FDBEC21, 0x00004000data8 0x80186650A07791FA, 0x00004000LOCAL_OBJECT_END(tanl_table_scim1)Arg = f8Save_Norm_Arg = f8 // For input to reduction routineResult = f8r = f8 // For output from reduction routinec = f9 // For output from reduction routineU_2 = f10rsq = f11C_hi = f12C_lo = f13T_hi = f14T_lo = f15d_1 = f33N_0 = f34tail = f35tanx = f36Cx = f37Sx = f38sgn_r = f39CORR = f40P = f41D = f42ArgPrime = f43P_0 = f44P2_1 = f45P2_2 = f46P2_3 = f47P1_1 = f45P1_2 = f46P1_3 = f47P1_4 = f48P1_5 = f49P1_6 = f50P1_7 = f51P1_8 = f52P1_9 = f53x = f56xsq = f57Tx = f58Tx1 = f59Set = f60poly1 = f61poly2 = f62Poly = f63Poly1 = f64Poly2 = f65r_to_the_8 = f66B = f67SC_inv = f68Pos_r = f69N_0_fix = f70d_2 = f71PI_BY_4 = f72TWO_TO_NEG14 = f74TWO_TO_NEG33 = f75NEGTWO_TO_NEG14 = f76NEGTWO_TO_NEG33 = f77two_by_PI = f78N = f79N_fix = f80P_1 = f81P_2 = f82P_3 = f83s_val = f84w = f85B_mask1 = f86B_mask2 = f87w2 = f88A = f89a = f90t = f91U_1 = f92NEGTWO_TO_NEG2 = f93TWO_TO_NEG2 = f94Q1_1 = f95Q1_2 = f96Q1_3 = f97Q1_4 = f98Q1_5 = f99Q1_6 = f100Q1_7 = f101Q1_8 = f102S_hi = f103S_lo = f104V_hi = f105V_lo = f106U_hi = f107U_lo = f108U_hiabs = f109V_hiabs = f110V = f111Inv_P_0 = f112FR_inv_pi_2to63 = f113FR_rshf_2to64 = f114FR_2tom64 = f115FR_rshf = f116Norm_Arg = f117Abs_Arg = f118TWO_TO_NEG65 = f119fp_tmp = f120mOne = f121GR_SAVE_B0 = r33GR_SAVE_GP = r34GR_SAVE_PFS = r35table_base = r36table_ptr1 = r37table_ptr2 = r38table_ptr3 = r39lookup = r40N_fix_gr = r41GR_exp_2tom2 = r42GR_exp_2tom65 = r43exp_r = r44sig_r = r45bmask1 = r46table_offset = r47bmask2 = r48gr_tmp = r49cot_flag = r50GR_sig_inv_pi = r51GR_rshf_2to64 = r52GR_exp_2tom64 = r53GR_rshf = r54GR_exp_2_to_63 = r55GR_exp_2_to_24 = r56GR_signexp_x = r57GR_exp_x = r58GR_exp_mask = r59GR_exp_2tom14 = r60GR_exp_m2tom14 = r61GR_exp_2tom33 = r62GR_exp_m2tom33 = r63GR_SAVE_B0 = r64GR_SAVE_PFS = r65GR_SAVE_GP = r66GR_Parameter_X = r67GR_Parameter_Y = r68GR_Parameter_RESULT = r69GR_Parameter_Tag = r70.section .text.global __libm_tanl#.global __libm_cotl#.proc __libm_cotl#__libm_cotl:.endp __libm_cotl#LOCAL_LIBM_ENTRY(cotl){ .mlx alloc r32 = ar.pfs, 0,35,4,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi}{ .mlx mov GR_exp_mask = 0x1ffff // Exponent mask movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)};;// Check for NatVals, Infs , NaNs, and Zeros{ .mfi getf.exp GR_signexp_x = Arg // Get sign and exponent of x fclass.m p6,p0 = Arg, 0x1E7 // Test for natval, nan, inf, zero mov cot_flag = 0x1}{ .mfb addl table_base = @ltoff(TANL_BASE_CONSTANTS), gp // Pointer to table ptr fnorm.s1 Norm_Arg = Arg // Normalize x br.cond.sptk COMMON_PATH};;LOCAL_LIBM_END(cotl).proc __libm_tanl#__libm_tanl:.endp __libm_tanl#GLOBAL_IEEE754_ENTRY(tanl){ .mlx alloc r32 = ar.pfs, 0,35,4,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi}{ .mlx mov GR_exp_mask = 0x1ffff // Exponent mask movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)};;// Check for NatVals, Infs , NaNs, and Zeros{ .mfi getf.exp GR_signexp_x = Arg // Get sign and exponent of x mov cot_flag = 0x0}{ .mfi addl table_base = @ltoff(TANL_BASE_CONSTANTS), gp // Pointer to table ptr fnorm.s1 Norm_Arg = Arg // Normalize x nop.i 0};;// Common path for both tanl and cotlCOMMON_PATH:{ .mfi setf.sig FR_inv_pi_2to63 = GR_sig_inv_pi // Form 1/pi * 2^63 fclass.m p9, p0 = Arg, 0x0b // Test x denormal mov GR_exp_2tom64 = 0xffff - 64 // Scaling constant to compute N}{ .mlx setf.d FR_rshf_2to64 = GR_rshf_2to64 // Form const 1.1000 * 2^(63+64) movl GR_rshf = 0x43e8000000000000 // Form const 1.1000 * 2^63};;// Check for everything - if false, then must be pseudo-zero or pseudo-nan.// Branch out to deal with special values.{ .mfi addl gr_tmp = -1,r0 fclass.nm p7,p0 = Arg, 0x1FF // Test x unsupported mov GR_exp_2_to_63 = 0xffff + 63 // Exponent of 2^63}{ .mfb ld8 table_base = [table_base] // Get pointer to constant table fms.s1 mOne = f0, f0, f1(p6) br.cond.spnt TANL_SPECIAL // Branch if x natval, nan, inf, zero};;{ .mmb setf.sig fp_tmp = gr_tmp // Make a constant so fmpy produces inexact mov GR_exp_2_to_24 = 0xffff + 24 // Exponent of 2^24(p9) br.cond.spnt TANL_DENORMAL // Branch if x denormal};;TANL_COMMON:// Return to here if x denormal//// Do fcmp to generate Denormal exception// - can't do FNORM (will generate Underflow when U is unmasked!)// Branch out to deal with unsupporteds values.{ .mfi setf.exp FR_2tom64 = GR_exp_2tom64 // Form 2^-64 for scaling N_float fcmp.eq.s0 p0, p6 = Arg, f1 // Dummy to flag denormals add table_ptr1 = 0, table_base // Point to tanl_table_1}{ .mib setf.d FR_rshf = GR_rshf // Form right shift const 1.1000 * 2^63 add table_ptr2 = 80, table_base // Point to tanl_table_2(p7) br.cond.spnt TANL_UNSUPPORTED // Branch if x unsupported type};;{ .mfi and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x fmpy.s1 Save_Norm_Arg = Norm_Arg, f1 // Save x if large arg reduction dep.z bmask1 = 0x7c, 56, 8 // Form mask to get 5 msb of r // bmask1 = 0x7c00000000000000};;//// Decide about the paths to take:// Set PR_6 if |Arg| >= 2**63// Set PR_9 if |Arg| < 2**24 - CASE 1 OR 2// OTHERWISE Set PR_8 - CASE 3 OR 4//// Branch out if the magnitude of the input argument is >= 2^63// - do this branch before the next.{ .mfi ldfe two_by_PI = [table_ptr1],16 // Load 2/pi nop.f 999 dep.z bmask2 = 0x41, 57, 7 // Form mask to OR to produce B // bmask2 = 0x8200000000000000}{ .mib ldfe PI_BY_4 = [table_ptr2],16 // Load pi/4 cmp.ge p6,p0 = GR_exp_x, GR_exp_2_to_63 // Is |x| >= 2^63(p6) br.cond.spnt TANL_ARG_TOO_LARGE // Branch if |x| >= 2^63};;{ .mmi ldfe P_0 = [table_ptr1],16 // Load P_0 ldfe Inv_P_0 = [table_ptr2],16 // Load Inv_P_0 nop.i 999};;{ .mfi ldfe P_1 = [table_ptr1],16 // Load P_1 fmerge.s Abs_Arg = f0, Norm_Arg // Get |x| mov GR_exp_m2tom33 = 0x2ffff - 33 // Form signexp of -2^-33}{ .mfi ldfe d_1 = [table_ptr2],16 // Load d_1 for 2^24 <= |x| < 2^63 nop.f 999 mov GR_exp_2tom33 = 0xffff - 33 // Form signexp of 2^-33};;{ .mmi ldfe P_2 = [table_ptr1],16 // Load P_2 ldfe d_2 = [table_ptr2],16 // Load d_2 for 2^24 <= |x| < 2^63 cmp.ge p8,p0 = GR_exp_x, GR_exp_2_to_24 // Is |x| >= 2^24};;// Use special scaling to right shift so N=Arg * 2/pi is in rightmost bits// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24{ .mfb ldfe P_3 = [table_ptr1],16 // Load P_3 fma.s1 N_fix = Norm_Arg, FR_inv_pi_2to63, FR_rshf_2to64(p8) br.cond.spnt TANL_LARGER_ARG // Branch if 2^24 <= |x| < 2^63};;// Here if 0 < |x| < 2^24// ARGUMENT REDUCTION CODE - CASE 1 and 2//{ .mmf setf.exp TWO_TO_NEG33 = GR_exp_2tom33 // Form 2^-33 setf.exp NEGTWO_TO_NEG33 = GR_exp_m2tom33 // Form -2^-33 fmerge.s r = Norm_Arg,Norm_Arg // Assume r=x, ok if |x| < pi/4};;//// If |Arg| < pi/4, set PR_8, else pi/4 <=|Arg| < 2^24 - set PR_9.//// Case 2: Convert integer N_fix back to normalized floating-point value.{ .mfi getf.sig sig_r = Norm_Arg // Get sig_r if 1/4 <= |x| < pi/4 fcmp.lt.s1 p8,p9= Abs_Arg,PI_BY_4 // Test |x| < pi/4 mov GR_exp_2tom2 = 0xffff - 2 // Form signexp of 2^-2}{ .mfi ldfps TWO_TO_NEG2, NEGTWO_TO_NEG2 = [table_ptr2] // Load 2^-2, -2^-2 fms.s1 N = N_fix, FR_2tom64, FR_rshf // Use scaling to get N floated mov N_fix_gr = r0 // Assume N=0, ok if |x| < pi/4};;//// Case 1: Is |r| < 2**(-2).// Arg is the same as r in this case.// r = Arg// c = 0//// Case 2: Place integer part of N in GP register.{ .mfi(p9) getf.sig N_fix_gr = N_fix fmerge.s c = f0, f0 // Assume c=0, ok if |x| < pi/4 cmp.lt p10, p0 = GR_exp_x, GR_exp_2tom2 // Test if |x| < 1/4};;{ .mfi setf.sig B_mask1 = bmask1 // Form mask to get 5 msb of r nop.f 999 mov exp_r = GR_exp_x // Get exp_r if 1/4 <= |x| < pi/4}{ .mbb setf.sig B_mask2 = bmask2 // Form mask to form B from r(p10) br.cond.spnt TANL_SMALL_R // Branch if 0 < |x| < 1/4(p8) br.cond.spnt TANL_NORMAL_R // Branch if 1/4 <= |x| < pi/4};;// Here if pi/4 <= |x| < 2^24//// Case 1: PR_3 is only affected when PR_1 is set.////// Case 2: w = N * P_2// Case 2: s_val = -N * P_1 + Arg//{ .mfi nop.m 999 fnma.s1 s_val = N, P_1, Norm_Arg nop.i 999}{ .mfi nop.m 999 fmpy.s1 w = N, P_2 // w = N * P_2 for |s| >= 2^-33 nop.i 999};;// Case 2_reduce: w = N * P_3 (change sign){ .mfi nop.m 999 fmpy.s1 w2 = N, P_3 // w = N * P_3 for |s| < 2^-33 nop.i 999};;// Case 1_reduce: r = s + w (change sign){ .mfi nop.m 999 fsub.s1 r = s_val, w // r = s_val - w for |s| >= 2^-33
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -