?? s_cosf.s
字號(hào):
.file "sincosf.s"// Copyright (c) 2000 - 2005, Intel Corporation// All rights reserved.//// Contributed 2000 by the Intel Numerics Group, Intel Corporation//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// * The name of Intel Corporation may not be used to endorse or promote// products derived from this software without specific prior written// permission.// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.//// Intel Corporation is the author of this code, and requests that all// problem reports or change requests be submitted to it directly at// http://www.intel.com/software/products/opensource/libraries/num.htm.//// History//==============================================================// 02/02/00 Initial version// 04/02/00 Unwind support added.// 06/16/00 Updated tables to enforce symmetry// 08/31/00 Saved 2 cycles in main path, and 9 in other paths.// 09/20/00 The updated tables regressed to an old version, so reinstated them// 10/18/00 Changed one table entry to ensure symmetry// 01/03/01 Improved speed, fixed flag settings for small arguments.// 02/18/02 Large arguments processing routine excluded// 05/20/02 Cleaned up namespace and sf0 syntax// 06/03/02 Insure inexact flag set for large arg result// 09/05/02 Single precision version is made using double precision one as base// 02/10/03 Reordered header: .section, .global, .proc, .align// 03/31/05 Reformatted delimiters between data tables//// API//==============================================================// float sinf( float x);// float cosf( float x);//// Overview of operation//==============================================================//// Step 1// ======// Reduce x to region -1/2*pi/2^k ===== 0 ===== +1/2*pi/2^k where k=4// divide x by pi/2^k.// Multiply by 2^k/pi.// nfloat = Round result to integer (round-to-nearest)//// r = x - nfloat * pi/2^k// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) // for increased accuracy.// pi/2^k is stored as two numbers that when added make pi/2^k.// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)// HIGH part is rounded to zero, LOW - to nearest//// x = (nfloat * pi/2^k) + r// r is small enough that we can use a polynomial approximation// and is referred to as the reduced argument.//// Step 3// ======// Take the unreduced part and remove the multiples of 2pi.// So nfloat = nfloat (with lower k+1 bits cleared) + lower k+1 bits//// nfloat (with lower k+1 bits cleared) is a multiple of 2^(k+1)// N * 2^(k+1)// nfloat * pi/2^k = N * 2^(k+1) * pi/2^k + (lower k+1 bits) * pi/2^k// nfloat * pi/2^k = N * 2 * pi + (lower k+1 bits) * pi/2^k// nfloat * pi/2^k = N2pi + M * pi/2^k////// Sin(x) = Sin((nfloat * pi/2^k) + r)// = Sin(nfloat * pi/2^k) * Cos(r) + Cos(nfloat * pi/2^k) * Sin(r)//// Sin(nfloat * pi/2^k) = Sin(N2pi + Mpi/2^k)// = Sin(N2pi)Cos(Mpi/2^k) + Cos(N2pi)Sin(Mpi/2^k)// = Sin(Mpi/2^k)//// Cos(nfloat * pi/2^k) = Cos(N2pi + Mpi/2^k)// = Cos(N2pi)Cos(Mpi/2^k) + Sin(N2pi)Sin(Mpi/2^k)// = Cos(Mpi/2^k)//// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)////// Step 4// ======// 0 <= M < 2^(k+1)// There are 2^(k+1) Sin entries in a table.// There are 2^(k+1) Cos entries in a table.//// Get Sin(Mpi/2^k) and Cos(Mpi/2^k) by table lookup.////// Step 5// ======// Calculate Cos(r) and Sin(r) by polynomial approximation.//// Cos(r) = 1 + r^2 q1 + r^4 q2 = Series for Cos// Sin(r) = r + r^3 p1 + r^5 p2 = Series for Sin//// and the coefficients q1, q2 and p1, p2 are stored in a table////// Calculate// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)//// as follows//// S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)// rsq = r*r////// P = P1 + r^2*P2// Q = Q1 + r^2*Q2//// rcub = r * rsq// Sin(r) = r + rcub * P// = r + r^3p1 + r^5p2 = Sin(r)//// The coefficients are not exactly these values, but almost.//// p1 = -1/6 = -1/3!// p2 = 1/120 = 1/5!// p3 = -1/5040 = -1/7!// p4 = 1/362889 = 1/9!//// P = r + r^3 * P//// Answer = S[m] Cos(r) + C[m] P//// Cos(r) = 1 + rsq Q// Cos(r) = 1 + r^2 Q// Cos(r) = 1 + r^2 (q1 + r^2q2)// Cos(r) = 1 + r^2q1 + r^4q2//// S[m] Cos(r) = S[m](1 + rsq Q)// S[m] Cos(r) = S[m] + S[m] rsq Q// S[m] Cos(r) = S[m] + s_rsq Q// Q = S[m] + s_rsq Q//// Then,//// Answer = Q + C[m] P// Registers used//==============================================================// general input registers:// r14 -> r19// r32 -> r45// predicate registers used:// p6 -> p14// floating-point registers used// f9 -> f15// f32 -> f61// Assembly macros//==============================================================sincosf_NORM_f8 = f9sincosf_W = f10sincosf_int_Nfloat = f11sincosf_Nfloat = f12sincosf_r = f13sincosf_rsq = f14sincosf_rcub = f15sincosf_save_tmp = f15sincosf_Inv_Pi_by_16 = f32sincosf_Pi_by_16_1 = f33sincosf_Pi_by_16_2 = f34sincosf_Inv_Pi_by_64 = f35sincosf_Pi_by_16_3 = f36sincosf_r_exact = f37sincosf_Sm = f38sincosf_Cm = f39sincosf_P1 = f40sincosf_Q1 = f41sincosf_P2 = f42sincosf_Q2 = f43sincosf_P3 = f44sincosf_Q3 = f45sincosf_P4 = f46sincosf_Q4 = f47sincosf_P_temp1 = f48sincosf_P_temp2 = f49sincosf_Q_temp1 = f50sincosf_Q_temp2 = f51sincosf_P = f52sincosf_Q = f53sincosf_srsq = f54sincosf_SIG_INV_PI_BY_16_2TO61 = f55sincosf_RSHF_2TO61 = f56sincosf_RSHF = f57sincosf_2TOM61 = f58sincosf_NFLOAT = f59sincosf_W_2TO61_RSH = f60fp_tmp = f61/////////////////////////////////////////////////////////////sincosf_AD_1 = r33sincosf_AD_2 = r34sincosf_exp_limit = r35sincosf_r_signexp = r36sincosf_AD_beta_table = r37sincosf_r_sincos = r38sincosf_r_exp = r39sincosf_r_17_ones = r40sincosf_GR_sig_inv_pi_by_16 = r14sincosf_GR_rshf_2to61 = r15sincosf_GR_rshf = r16sincosf_GR_exp_2tom61 = r17sincosf_GR_n = r18sincosf_GR_m = r19sincosf_GR_32m = r19sincosf_GR_all_ones = r19gr_tmp = r41GR_SAVE_PFS = r41GR_SAVE_B0 = r42GR_SAVE_GP = r43RODATA.align 16// Pi/16 partsLOCAL_OBJECT_START(double_sincosf_pi) data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd partLOCAL_OBJECT_END(double_sincosf_pi)// Coefficients for polynomialsLOCAL_OBJECT_START(double_sincosf_pq_k4) data8 0x3F810FABB668E9A2 // P2 data8 0x3FA552E3D6DE75C9 // Q2 data8 0xBFC555554447BC7F // P1 data8 0xBFDFFFFFC447610A // Q1LOCAL_OBJECT_END(double_sincosf_pq_k4)// Sincos table (S[m], C[m])LOCAL_OBJECT_START(double_sin_cos_beta_k4) data8 0x0000000000000000 // sin ( 0 Pi / 16 ) data8 0x3FF0000000000000 // cos ( 0 Pi / 16 )// data8 0x3FC8F8B83C69A60B // sin ( 1 Pi / 16 ) data8 0x3FEF6297CFF75CB0 // cos ( 1 Pi / 16 )// data8 0x3FD87DE2A6AEA963 // sin ( 2 Pi / 16 ) data8 0x3FED906BCF328D46 // cos ( 2 Pi / 16 )// data8 0x3FE1C73B39AE68C8 // sin ( 3 Pi / 16 ) data8 0x3FEA9B66290EA1A3 // cos ( 3 Pi / 16 )// data8 0x3FE6A09E667F3BCD // sin ( 4 Pi / 16 ) data8 0x3FE6A09E667F3BCD // cos ( 4 Pi / 16 )// data8 0x3FEA9B66290EA1A3 // sin ( 5 Pi / 16 ) data8 0x3FE1C73B39AE68C8 // cos ( 5 Pi / 16 )// data8 0x3FED906BCF328D46 // sin ( 6 Pi / 16 ) data8 0x3FD87DE2A6AEA963 // cos ( 6 Pi / 16 )// data8 0x3FEF6297CFF75CB0 // sin ( 7 Pi / 16 ) data8 0x3FC8F8B83C69A60B // cos ( 7 Pi / 16 )// data8 0x3FF0000000000000 // sin ( 8 Pi / 16 ) data8 0x0000000000000000 // cos ( 8 Pi / 16 )// data8 0x3FEF6297CFF75CB0 // sin ( 9 Pi / 16 ) data8 0xBFC8F8B83C69A60B // cos ( 9 Pi / 16 )// data8 0x3FED906BCF328D46 // sin ( 10 Pi / 16 ) data8 0xBFD87DE2A6AEA963 // cos ( 10 Pi / 16 )// data8 0x3FEA9B66290EA1A3 // sin ( 11 Pi / 16 ) data8 0xBFE1C73B39AE68C8 // cos ( 11 Pi / 16 )// data8 0x3FE6A09E667F3BCD // sin ( 12 Pi / 16 ) data8 0xBFE6A09E667F3BCD // cos ( 12 Pi / 16 )// data8 0x3FE1C73B39AE68C8 // sin ( 13 Pi / 16 ) data8 0xBFEA9B66290EA1A3 // cos ( 13 Pi / 16 )// data8 0x3FD87DE2A6AEA963 // sin ( 14 Pi / 16 ) data8 0xBFED906BCF328D46 // cos ( 14 Pi / 16 )// data8 0x3FC8F8B83C69A60B // sin ( 15 Pi / 16 ) data8 0xBFEF6297CFF75CB0 // cos ( 15 Pi / 16 )// data8 0x0000000000000000 // sin ( 16 Pi / 16 ) data8 0xBFF0000000000000 // cos ( 16 Pi / 16 )// data8 0xBFC8F8B83C69A60B // sin ( 17 Pi / 16 ) data8 0xBFEF6297CFF75CB0 // cos ( 17 Pi / 16 )// data8 0xBFD87DE2A6AEA963 // sin ( 18 Pi / 16 ) data8 0xBFED906BCF328D46 // cos ( 18 Pi / 16 )// data8 0xBFE1C73B39AE68C8 // sin ( 19 Pi / 16 ) data8 0xBFEA9B66290EA1A3 // cos ( 19 Pi / 16 )// data8 0xBFE6A09E667F3BCD // sin ( 20 Pi / 16 ) data8 0xBFE6A09E667F3BCD // cos ( 20 Pi / 16 )// data8 0xBFEA9B66290EA1A3 // sin ( 21 Pi / 16 ) data8 0xBFE1C73B39AE68C8 // cos ( 21 Pi / 16 )// data8 0xBFED906BCF328D46 // sin ( 22 Pi / 16 ) data8 0xBFD87DE2A6AEA963 // cos ( 22 Pi / 16 )// data8 0xBFEF6297CFF75CB0 // sin ( 23 Pi / 16 ) data8 0xBFC8F8B83C69A60B // cos ( 23 Pi / 16 )// data8 0xBFF0000000000000 // sin ( 24 Pi / 16 ) data8 0x0000000000000000 // cos ( 24 Pi / 16 )// data8 0xBFEF6297CFF75CB0 // sin ( 25 Pi / 16 ) data8 0x3FC8F8B83C69A60B // cos ( 25 Pi / 16 )//
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -