?? e_atan2f.s
字號:
.file "atan2f.s"// Copyright (c) 2000 - 2003, Intel Corporation// All rights reserved.//// Contributed 2000 by the Intel Numerics Group, Intel Corporation//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// * The name of Intel Corporation may not be used to endorse or promote// products derived from this software without specific prior written// permission.// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.//// Intel Corporation is the author of this code, and requests that all// problem reports or change requests be submitted to it directly at// http://www.intel.com/software/products/opensource/libraries/num.htm.// History//==============================================================// 06/01/00 Initial version// 08/15/00 Bundle added after call to __libm_error_support to properly// set [the previously overwritten] GR_Parameter_RESULT.// 08/17/00 Changed predicate register macro-usage to direct predicate// names due to an assembler bug.// 01/05/01 Fixed flag settings for denormal input.// 01/19/01 Added documentation// 01/30/01 Improved speed// 02/06/02 Corrected .section statement// 05/20/02 Cleaned up namespace and sf0 syntax// 02/06/03 Reordered header: .section, .global, .proc, .align// Description//=========================================// The atan2 function computes the principle value of the arc tangent of y/x using// the signs of both arguments to determine the quadrant of the return value.// A domain error may occur if both arguments are zero.// The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.//..//..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that//..v and u can be negative. We state the relationship between atan2(y,x) and//..atan(v/u).//..//..Let swap = false if v = y, and swap = true if v = x.//..Define C according to the matrix//..//.. TABLE FOR C//.. x +ve x -ve//.. no swap (swap = false) sgn(y)*0 sgn(y)*pi//.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2//..//.. atan2(y,x) = C + atan(v/u) if no swap//.. atan2(y,x) = C - atan(v/u) if swap//..//..These relationship is more efficient to compute as we accommodate signs in v and u//..saving the need to obtain the absolute value before computation can proceed.//..//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows://..A = y * frcpa(x) (so A = (y/x)(1 - beta))//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is //..a correction.//..atan(A) is approximated by a polynomial //..A + p1 A^3 + p2 A^5 + ... + p10 A^21,//..atan(G) is approximated as follows://..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1//..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).//..//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows://..Z = x * frcpa(y) (so Z = (x/y)(1 - beta))//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is //..a correction.//..atan(Z) is approximated by a polynomial //..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,//..atan(T) is approximated as follows://..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1//..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).//..//..//..A = y * frcpa(x)//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21//..//..This polynomial is computed as follows://..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6//..//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6//..poly_A1 = poly_A2 + A4 * poly_A1//..poly_A1 = poly_A3 + A4 * poly_A1//..//..poly_A4 = p1 * A//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4//..poly_A5 = p2 + Asq * poly_A5 //..poly_A4 = poly_A4 + A5 * poly_A5//..//..atan_A = poly_A4 + A11 * poly_A1//..//..atan(G) is approximated as follows://..G_numer = y - A*x, G_denom = x + A*y//..H1 = frcpa(G_denom)//..H_beta = 1 - H1 * G_denom//..H2 = H1 + H1 * H_beta//..H_beta2 = H_beta*H_beta//..H3 = H2 + H2*H_beta2//..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq//..atan_G = G_numer*H3 + atan_G//..//..//..A = y * frcpa(x)//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21//..//..This polynomial is computed as follows://..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6//..//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6//..poly_A1 = poly_A2 + A4 * poly_A1//..poly_A1 = poly_A3 + A4 * poly_A1//..//..poly_A4 = p1 * A//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4//..poly_A5 = p2 + Asq * poly_A5 //..poly_A4 = poly_A4 + A5 * poly_A5//..//..atan_A = poly_A4 + A11 * poly_A1//..//..//..====================================================================//.. COEFFICIENTS USED IN THE COMPUTATION//..====================================================================//coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21//// coef_p1 = -.3333332707155439167401311806315789E+00// coef_p1 in dbl = BFD5 5555 1219 1621 //// coef_p2 = .1999967670926658391827857030875748E+00// coef_p2 in dbl = 3FC9 997E 7AFB FF4E //// coef_p3 = -.1427989384500152360161563301087296E+00// coef_p3 in dbl = BFC2 473C 5145 EE38 //// coef_p4 = .1105852823460720770079031213661163E+00// coef_p4 in dbl = 3FBC 4F51 2B18 65F5 //// coef_p5 = -.8811839915595312348625710228448363E-01// coef_p5 in dbl = BFB6 8EED 6A8C FA32 //// coef_p6 = .6742329836955067042153645159059714E-01// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3 //// coef_p7 = -.4468571068774672908561591262231909E-01// coef_p7 in dbl = BFA6 E10B A401 393F //// coef_p8 = .2252333246746511135532726960586493E-01// coef_p8 in dbl = 3F97 105B 4160 F86B //// coef_p9 = -.7303884867007574742501716845542314E-02// coef_p9 in dbl = BF7D EAAD AA33 6451 //// coef_p10 = .1109686868355312093949039454619058E-02// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA //// Special values//==============================================================// Y x Result// +number +inf +0// -number +inf -0// +number -inf +pi// -number -inf -pi//// +inf +number +pi/2// -inf +number -pi/2// +inf -number +pi/2// -inf -number -pi/2//// +inf +inf +pi/4// -inf +inf -pi/4// +inf -inf +3pi/4// -inf -inf -3pi/4//// +1 +1 +pi/4// -1 +1 -pi/4// +1 -1 +3pi/4// -1 -1 -3pi/4//// +number +0 +pi/2 // does not raise DBZ// -number +0 -pi/2 // does not raise DBZ// +number -0 +pi/2 // does not raise DBZ// -number -0 -pi/2 // does not raise DBZ//// +0 +number +0// -0 +number -0// +0 -number +pi// -0 -number -pi//// +0 +0 +0 // does not raise invalid// -0 +0 -0 // does not raise invalid// +0 -0 +pi // does not raise invalid// -0 -0 -pi // does not raise invalid//// Nan anything quiet Y// anything NaN quiet X// atan2(+-0/+-0) sets double error tag to 37// atan2f(+-0/+-0) sets single error tag to 38// These are domain errors.//// Assembly macros//=========================================// integer registersatan2f_GR_Addr_1 = r33atan2f_GR_Addr_2 = r34GR_SAVE_B0 = r35GR_SAVE_PFS = r36GR_SAVE_GP = r37GR_Parameter_X = r38GR_Parameter_Y = r39GR_Parameter_RESULT = r40GR_Parameter_TAG = r41// floating point registersatan2f_coef_p1 = f32atan2f_coef_p10 = f33atan2f_coef_p7 = f34atan2f_coef_p6 = f35atan2f_coef_p3 = f36atan2f_coef_p2 = f37atan2f_coef_p9 = f38atan2f_coef_p8 = f39atan2f_coef_p5 = f40atan2f_coef_p4 = f41atan2f_const_piby2 = f42atan2f_const_pi = f43atan2f_const_piby4 = f44atan2f_const_3piby4 = f45atan2f_xsq = f46atan2f_ysq = f47atan2f_xy = f48atan2f_const_1 = f49atan2f_sgn_Y = f50atan2f_Z0 = f51atan2f_A0 = f52atan2f_Z = f53atan2f_A = f54atan2f_C = f55atan2f_U = f56atan2f_Usq = f57atan2f_U4 = f58atan2f_U6 = f59atan2f_U8 = f60atan2f_poly_u109 = f61atan2f_poly_u87 = f62atan2f_poly_u65 = f63atan2f_poly_u43 = f64atan2f_poly_u21 = f65atan2f_poly_u10to7 = f66atan2f_poly_u6to3 = f67atan2f_poly_u10to3 = f68atan2f_poly_u10to0 = f69atan2f_poly_u210 = f70atan2f_T_numer = f71atan2f_T_denom = f72atan2f_G_numer = f73atan2f_G_denom = f74atan2f_p1rnum = f75atan2f_R_denom = f76atan2f_R_numer = f77atan2f_pR = f78atan2f_pRC = f79atan2f_pQRC = f80atan2f_Q1 = f81atan2f_Q_beta = f82atan2f_Q2 = f83atan2f_Q_beta2 = f84atan2f_Q3 = f85atan2f_r = f86atan2f_rsq = f87atan2f_poly_atan_U = f88// predicate registers//atan2f_Pred_Swap = p6 // |y| > |x|//atan2f_Pred_noSwap = p7 // |y| <= |x|//atan2f_Pred_Xpos = p8 // x >= 0//atan2f_Pred_Xneg = p9 // x < 0RODATA.align 16LOCAL_OBJECT_START(atan2f_coef_table1)data8 0xBFD5555512191621 // p1data8 0x3F522E5D33BC9BAA // p10data8 0xBFA6E10BA401393F // p7data8 0x3FB142A73D7C54E3 // p6data8 0xBFC2473C5145EE38 // p3data8 0x3FC9997E7AFBFF4E // p2LOCAL_OBJECT_END(atan2f_coef_table1)LOCAL_OBJECT_START(atan2f_coef_table2)data8 0xBF7DEAADAA336451 // p9data8 0x3F97105B4160F86B // p8data8 0xBFB68EED6A8CFA32 // p5data8 0x3FBC4F512B1865F5 // p4data8 0x3ff921fb54442d18 // pi/2data8 0x400921fb54442d18 // pidata8 0x3fe921fb54442d18 // pi/4data8 0x4002d97c7f3321d2 // 3pi/4LOCAL_OBJECT_END(atan2f_coef_table2).section .textGLOBAL_IEEE754_ENTRY(atan2f) { .mfi alloc r32 = ar.pfs,1,5,4,0 frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y nop.i 999} { .mfi addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp fma.s1 atan2f_xsq = f9,f9,f0 nop.i 999 ;;} { .mfi ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1] frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x nop.i 999} { .mfi nop.m 999 fma.s1 atan2f_ysq = f8,f8,f0 nop.i 999 ;;} { .mfi nop.m 999 fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0 nop.i 999}{ .mfi nop.m 999 fma.s1 atan2f_xy = f9,f8,f0 nop.i 999 ;;} { .mfi add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1 fmerge.s atan2f_sgn_Y = f8,f1 nop.i 999 ;;} { .mmf ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16 ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16 fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero} ;; { .mfi ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16 fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8 nop.i 999} { .mfi ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16 fma.s1 atan2f_Z = atan2f_Z0,f9,f0 nop.i 999 ;;} { .mfi ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16 fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9 nop.i 999} { .mfi ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16 fma.s1 atan2f_A = atan2f_A0,f8,f0 nop.i 999 ;;}{ .mfi ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2] fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero nop.i 999} { .mfb nop.m 999 fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero} // p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test{ .mfi nop.m 999 fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq nop.i 999}{ .mfb nop.m 999 fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -