?? libm_sincosf.s
字號:
.file "libm_sincosf.s"// Copyright (c) 2002 - 2005, Intel Corporation// All rights reserved.//// Contributed 2002 by the Intel Numerics Group, Intel Corporation//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// * The name of Intel Corporation may not be used to endorse or promote// products derived from this software without specific prior written// permission.// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.//// Intel Corporation is the author of this code, and requests that all// problem reports or change requests be submitted to it directly at// http://www.intel.com/software/products/opensource/libraries/num.htm.//// History//==============================================================// 02/01/02 Initial version// 02/18/02 Large arguments processing routine is excluded.// External interface entry points are added// 02/26/02 Added temporary return of results in r8, r9// 03/13/02 Corrected restore of predicate registers// 03/19/02 Added stack unwind around call to __libm_cisf_large// 09/05/02 Work range is widened by reduction strengthen (2 parts of Pi/16)// 02/10/03 Reordered header: .section, .global, .proc, .align// 02/11/04 cisf is moved to the separate file.// 03/31/05 Reformatted delimiters between data tables// API//==============================================================// 1) void sincosf(float, float*s, float*c)// 2) __libm_sincosf - internal LIBM function, that accepts// argument in f8 and returns cosine through f8, sine through f9//// Overview of operation//==============================================================//// Step 1// ======// Reduce x to region -1/2*pi/2^k ===== 0 ===== +1/2*pi/2^k where k=4// divide x by pi/2^k.// Multiply by 2^k/pi.// nfloat = Round result to integer (round-to-nearest)//// r = x - nfloat * pi/2^k// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) for increased accuracy.// pi/2^k is stored as two numbers that when added make pi/2^k.// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)// HIGH part is rounded to zero, LOW - to nearest//// x = (nfloat * pi/2^k) + r// r is small enough that we can use a polynomial approximation// and is referred to as the reduced argument.//// Step 3// ======// Take the unreduced part and remove the multiples of 2pi.// So nfloat = nfloat (with lower k+1 bits cleared) + lower k+1 bits//// nfloat (with lower k+1 bits cleared) is a multiple of 2^(k+1)// N * 2^(k+1)// nfloat * pi/2^k = N * 2^(k+1) * pi/2^k + (lower k+1 bits) * pi/2^k// nfloat * pi/2^k = N * 2 * pi + (lower k+1 bits) * pi/2^k// nfloat * pi/2^k = N2pi + M * pi/2^k////// Sin(x) = Sin((nfloat * pi/2^k) + r)// = Sin(nfloat * pi/2^k) * Cos(r) + Cos(nfloat * pi/2^k) * Sin(r)//// Sin(nfloat * pi/2^k) = Sin(N2pi + Mpi/2^k)// = Sin(N2pi)Cos(Mpi/2^k) + Cos(N2pi)Sin(Mpi/2^k)// = Sin(Mpi/2^k)//// Cos(nfloat * pi/2^k) = Cos(N2pi + Mpi/2^k)// = Cos(N2pi)Cos(Mpi/2^k) + Sin(N2pi)Sin(Mpi/2^k)// = Cos(Mpi/2^k)//// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)////// Step 4// ======// 0 <= M < 2^(k+1)// There are 2^(k+1) Sin entries in a table.// There are 2^(k+1) Cos entries in a table.//// Get Sin(Mpi/2^k) and Cos(Mpi/2^k) by table lookup.////// Step 5// ======// Calculate Cos(r) and Sin(r) by polynomial approximation.//// Cos(r) = 1 + r^2 q1 + r^4 q2 = Series for Cos// Sin(r) = r + r^3 p1 + r^5 p2 = Series for Sin//// and the coefficients q1, q2 and p1, p2 are stored in a table////// Calculate// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)//// as follows//// S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)// rsq = r*r////// P = p1 + r^2p2// Q = q1 + r^2q2//// rcub = r * rsq// Sin(r) = r + rcub * P// = r + r^3p1 + r^5p2 = Sin(r)//// P = r + rcub * P//// Answer = S[m] Cos(r) + C[m] P//// Cos(r) = 1 + rsq Q// Cos(r) = 1 + r^2 Q// Cos(r) = 1 + r^2 (q1 + r^2q2)// Cos(r) = 1 + r^2q1 + r^4q2//// S[m] Cos(r) = S[m](1 + rsq Q)// S[m] Cos(r) = S[m] + S[m] rsq Q// S[m] Cos(r) = S[m] + s_rsq Q// Q = S[m] + s_rsq Q//// Then,//// Answer = Q + C[m] P// Registers used//==============================================================// general input registers:// r14 -> r19// r32 -> r49// predicate registers used:// p6 -> p14// floating-point registers used// f9 -> f15// f32 -> f100// Assembly macros//==============================================================cisf_Arg = f8cisf_Sin_res = f9cisf_Cos_res = f8cisf_NORM_f8 = f10cisf_W = f11cisf_int_Nfloat = f12cisf_Nfloat = f13cisf_r = f14cisf_r_exact = f68cisf_rsq = f15cisf_rcub = f32cisf_Inv_Pi_by_16 = f33cisf_Pi_by_16_hi = f34cisf_Pi_by_16_lo = f35cisf_Inv_Pi_by_64 = f36cisf_Pi_by_64_hi = f37cisf_Pi_by_64_lo = f38cisf_P1 = f39cisf_Q1 = f40cisf_P2 = f41cisf_Q2 = f42cisf_P3 = f43cisf_Q3 = f44cisf_P4 = f45cisf_Q4 = f46cisf_P_temp1 = f47cisf_P_temp2 = f48cisf_Q_temp1 = f49cisf_Q_temp2 = f50cisf_P = f51cisf_SIG_INV_PI_BY_16_2TO61 = f52cisf_RSHF_2TO61 = f53cisf_RSHF = f54cisf_2TOM61 = f55cisf_NFLOAT = f56cisf_W_2TO61_RSH = f57cisf_tmp = f58cisf_Sm_sin = f59cisf_Cm_sin = f60cisf_Sm_cos = f61cisf_Cm_cos = f62cisf_srsq_sin = f63cisf_srsq_cos = f64cisf_Q_sin = f65cisf_Q_cos = f66cisf_Q = f67/////////////////////////////////////////////////////////////cisf_pResSin = r33cisf_pResCos = r34cisf_exp_limit = r35cisf_r_signexp = r36cisf_AD_beta_table = r37cisf_r_sincos = r38cisf_r_exp = r39cisf_r_17_ones = r40cisf_GR_sig_inv_pi_by_16 = r14cisf_GR_rshf_2to61 = r15cisf_GR_rshf = r16cisf_GR_exp_2tom61 = r17cisf_GR_n = r18cisf_GR_n_sin = r19cisf_GR_m_sin = r41cisf_GR_32m_sin = r41cisf_GR_n_cos = r42cisf_GR_m_cos = r43cisf_GR_32m_cos = r43cisf_AD_2_sin = r44cisf_AD_2_cos = r45cisf_gr_tmp = r46GR_SAVE_B0 = r47GR_SAVE_GP = r48rB0_SAVED = r49GR_SAVE_PFS = r50GR_SAVE_PR = r51cisf_AD_1 = r52RODATA.align 16// Pi/16 partsLOCAL_OBJECT_START(double_cisf_pi) data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd partLOCAL_OBJECT_END(double_cisf_pi)// Coefficients for polynomialsLOCAL_OBJECT_START(double_cisf_pq_k4) data8 0x3F810FABB668E9A2 // P2 data8 0x3FA552E3D6DE75C9 // Q2 data8 0xBFC555554447BC7F // P1 data8 0xBFDFFFFFC447610A // Q1LOCAL_OBJECT_END(double_cisf_pq_k4)// Sincos table (S[m], C[m])LOCAL_OBJECT_START(double_sin_cos_beta_k4) data8 0x0000000000000000 // sin ( 0 Pi / 16 ) data8 0x3FF0000000000000 // cos ( 0 Pi / 16 )// data8 0x3FC8F8B83C69A60B // sin ( 1 Pi / 16 ) data8 0x3FEF6297CFF75CB0 // cos ( 1 Pi / 16 )// data8 0x3FD87DE2A6AEA963 // sin ( 2 Pi / 16 ) data8 0x3FED906BCF328D46 // cos ( 2 Pi / 16 )// data8 0x3FE1C73B39AE68C8 // sin ( 3 Pi / 16 ) data8 0x3FEA9B66290EA1A3 // cos ( 3 Pi / 16 )// data8 0x3FE6A09E667F3BCD // sin ( 4 Pi / 16 ) data8 0x3FE6A09E667F3BCD // cos ( 4 Pi / 16 )// data8 0x3FEA9B66290EA1A3 // sin ( 5 Pi / 16 ) data8 0x3FE1C73B39AE68C8 // cos ( 5 Pi / 16 )// data8 0x3FED906BCF328D46 // sin ( 6 Pi / 16 ) data8 0x3FD87DE2A6AEA963 // cos ( 6 Pi / 16 )// data8 0x3FEF6297CFF75CB0 // sin ( 7 Pi / 16 ) data8 0x3FC8F8B83C69A60B // cos ( 7 Pi / 16 )// data8 0x3FF0000000000000 // sin ( 8 Pi / 16 ) data8 0x0000000000000000 // cos ( 8 Pi / 16 )// data8 0x3FEF6297CFF75CB0 // sin ( 9 Pi / 16 ) data8 0xBFC8F8B83C69A60B // cos ( 9 Pi / 16 )// data8 0x3FED906BCF328D46 // sin ( 10 Pi / 16 ) data8 0xBFD87DE2A6AEA963 // cos ( 10 Pi / 16 )// data8 0x3FEA9B66290EA1A3 // sin ( 11 Pi / 16 ) data8 0xBFE1C73B39AE68C8 // cos ( 11 Pi / 16 )// data8 0x3FE6A09E667F3BCD // sin ( 12 Pi / 16 ) data8 0xBFE6A09E667F3BCD // cos ( 12 Pi / 16 )// data8 0x3FE1C73B39AE68C8 // sin ( 13 Pi / 16 ) data8 0xBFEA9B66290EA1A3 // cos ( 13 Pi / 16 )// data8 0x3FD87DE2A6AEA963 // sin ( 14 Pi / 16 ) data8 0xBFED906BCF328D46 // cos ( 14 Pi / 16 )// data8 0x3FC8F8B83C69A60B // sin ( 15 Pi / 16 ) data8 0xBFEF6297CFF75CB0 // cos ( 15 Pi / 16 )// data8 0x0000000000000000 // sin ( 16 Pi / 16 ) data8 0xBFF0000000000000 // cos ( 16 Pi / 16 )// data8 0xBFC8F8B83C69A60B // sin ( 17 Pi / 16 ) data8 0xBFEF6297CFF75CB0 // cos ( 17 Pi / 16 )// data8 0xBFD87DE2A6AEA963 // sin ( 18 Pi / 16 ) data8 0xBFED906BCF328D46 // cos ( 18 Pi / 16 )// data8 0xBFE1C73B39AE68C8 // sin ( 19 Pi / 16 ) data8 0xBFEA9B66290EA1A3 // cos ( 19 Pi / 16 )// data8 0xBFE6A09E667F3BCD // sin ( 20 Pi / 16 ) data8 0xBFE6A09E667F3BCD // cos ( 20 Pi / 16 )// data8 0xBFEA9B66290EA1A3 // sin ( 21 Pi / 16 ) data8 0xBFE1C73B39AE68C8 // cos ( 21 Pi / 16 )// data8 0xBFED906BCF328D46 // sin ( 22 Pi / 16 ) data8 0xBFD87DE2A6AEA963 // cos ( 22 Pi / 16 )// data8 0xBFEF6297CFF75CB0 // sin ( 23 Pi / 16 ) data8 0xBFC8F8B83C69A60B // cos ( 23 Pi / 16 )// data8 0xBFF0000000000000 // sin ( 24 Pi / 16 ) data8 0x0000000000000000 // cos ( 24 Pi / 16 )// data8 0xBFEF6297CFF75CB0 // sin ( 25 Pi / 16 ) data8 0x3FC8F8B83C69A60B // cos ( 25 Pi / 16 )
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -