?? s_tanhf.s
字號(hào):
.file "tanhf.s"// Copyright (c) 2001 - 2005, Intel Corporation// All rights reserved.//// Contributed 2001 by the Intel Numerics Group, Intel Corporation//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// * The name of Intel Corporation may not be used to endorse or promote// products derived from this software without specific prior written// permission.// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Intel Corporation is the author of this code, and requests that all// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm.//// History//==============================================================// 05/30/01 Initial version// 05/20/02 Cleaned up namespace and sf0 syntax// 02/10/03 Reordered header: .section, .global, .proc, .align// 03/31/05 Reformatted delimiters between data tables//// API//==============================================================// float tanhf(float)//// Overview of operation//==============================================================// Background////// There are 9 paths:// 1. x = +/-0.0// Return tanhf(x) = +/-0.0//// 2. 0.0 < |x| < 0.3125// Return tanhf(x) = x + x^3*Pol3(x^2),// where Pol3(x^2) = C3*x^6 + C2*x^4 + C1*x^2 + C0//// 3. 0.3125 <= |x| < 8.0// Return tanhf(x) = sign(x)*PolD(x)*PolC(|x|) + sign(x)*PolA(|x|),// where sign(x)*PolD(x) = sign(x)*(|x|^7 + D2*x^6 + D1*|x|^5 + D0*x^4),// PolC(|x|) = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0,// PolA(|x|) = A3|x|^3 + A2*x^2 + A1*|x| + A0//// Actually range 0.3125<=|x|< 8.0 is split to 5 subranges.// For each subrange there is particular set of coefficients.// Below is the list of subranges:// 3.1 0.3125 <= |x| < 0.5// 3.2 0.5 <= |x| < 1.0// 3.3 1.0 <= |x| < 2.0// 3.4 2.0 <= |x| < 4.0// 3.5 4.0 <= |x| < 8.0//// 4. 8.0 <= |x| < 9.125// Return tanhf(x) = sign(x)*(A3|x|^3 + A2*x^2 + A1*|x| + A0)//// 5. 9.125 <= |x| < +INF// Return tanhf(x) = sign(x)*(1.0d - 2^(-52))//// 6. |x| = INF// Return tanhf(x) = sign(x) * 1.0//// 7. x = [S,Q]NaN // Return tanhf(x) = QNaN//// 8. x is positive denormal// Return tanhf(x) = x - x^2//// 9. x is negative denormal// Return tanhf(x) = x + x^2//// Registers used//==============================================================// Floating Point registers used: // f8, input// f32 -> f59// General registers used: // r32 -> r46, r2, r3// Predicate registers used:// p0, p6 -> p15// p6 to filter out case when x = [Q,S]NaN or +/-0// p7 to filter out case when x = denormal// p8 set if |x| >= 0.3125, used also to process denormal input// p9 to filter out case when |x| = inf// p10 to filter out case when |x| < 0.3125// p11 to filter out case when 0.3125 <= |x| < 9.125// p12 to filter out case when |x| >= 9.125// p13 to filter out case when 8.0 <= |x| < 9.125// p14 set to 1 for positive x// p15 set to 1 for negative x// Assembly macros//==============================================================rDataPtr = r2rDataPtr1 = r3rBias = r33rCoeffAddr3 = r34rNearSaturation = r35rCoeffAddr1 = r36rCoeffAddr2 = r37rOffset2 = r38rBias2 = r39rMask = r40rArg = r41rBound = r42rSignBit = r43rAbsArg = r44rDataPtr2 = r45rSaturation = r46//==============================================================fA0 = f32fA1 = f33fA2 = f34fA3 = f35fC0 = f36fC1 = f37fC2 = f38fC3 = f39fD0 = f40fD1 = f41fD2 = f42fB0 = f43fArgSqr = f44fAbsArg = f45fSignumX = f46fArg4 = f47fArg4Sgn = f48fArg3 = f49fArg3Sgn = f50fArg7Sgn = f51fArg6Sgn = f52fPolC = f53fPolCTmp = f54fPolA = f55fPolATmp = f56fPolD = f57fPolDTmp = f58fArgSqrSgn = f59// Data tables//==============================================================RODATA.align 16LOCAL_OBJECT_START(tanhf_data)// Polynomial coefficients for the tanh(x), 0.3125 <= |x| < 0.5data8 0x3F9BEEDFDD177D7B // C0data8 0x3F970D10C7F32458 // C1data8 0x3F766D6B051F3A38 // C2data8 0xBF732F2001B23402 // C3data8 0xBF854BE1CE1ED499 // D0data8 0x4013C944F3999A16 // D1data8 0xC01106C6975222C0 // D2data8 0x3F783D5ACCF9EBE8 // B0// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0data8 0xBF5D631440786869 // C0data8 0xBF575D79A0D52069 // C1data8 0xBF7E2237B7EFC705 // C2data8 0x3F6A7ACBC273041F // C3data8 0xC040E32EA52D91EB // D0data8 0x403D19463E5DB4D7 // D1data8 0xC02216F61F759F39 // D2data8 0xBF55B4EA0B844BE7 // B0// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0data8 0x3F8637DBE5B3E690 // C0data8 0xBF7F7FEC158C07F5 // C1data8 0x3F711C586706838A // C2data8 0xBF50EF7EF605554E // C3data8 0xC054D45448354E25 // D0data8 0x404ADFEEA282E730 // D1data8 0xC028AEE456D59549 // D2data8 0x3F25232D1BED59A8 // B0// Polynomial coefficients for the tanh(x), 2.0 <= |x| < 4.0data8 0xBF52602285F2D06C // C0data8 0x3F2E57C298FFE1E0 // C1data8 0xBF15ED575DB3C811 // C2data8 0x3EE428878A08525C // C3data8 0xC0895A26849039C1 // D0data8 0x406E3C60BBFBB575 // D1data8 0xC03A06F62867C75A // D2data8 0xBEB114C70F1C723E // B0// Polynomial coefficients for the tanh(x), 4.0 <= |x| < 8.0data8 0x3EF4B22BD17039A3 // C0data8 0xBEB704ADC040C57F // C1data8 0x3E937A98288AFE1A // C2data8 0xBE4F33B2C9FFE7E7 // C3data8 0xC0BE48CFADE2431E // D0data8 0x4090E74249760FDD // D1data8 0xC04B6F537FCF2F1E // D2data8 0x3E0DCD879C91ADEA // B0// Polynomial coefficients for the tanh(x), -0.3125 < x < 0.3125 data8 0xBFD555551E8245B7 // A0data8 0x3FC110E63F52E689 // A1data8 0xBFAB8CD6A5B7BAFA // A2data8 0x3F945D467FCEB553 // A3// Polynomial coefficients for the tanh(x), 0.3125 <= |x| < 0.5data8 0xBE3DCC92FCAECBB6 // A0data8 0x3FF0000043B7D267 // A1data8 0xBED18BF28ACFC4B1 // A2data8 0xBFD554A56F82837E // A3// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0data8 0x3EFD6054758539F9 // A0data8 0x3FEFFBFC77198EBE // A1data8 0x3F700327CA98D237 // A2data8 0xBFD68955F5BB2FA1 // A3// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0data8 0xBF71A53F229DF01B // A0data8 0x3FF0AECFD730DE50 // A1data8 0xBFC882F88E5DF3BA // A2data8 0x3FC6EDF212CA2A8D // A3// Polynomial coefficients for the tanh(x), 2.0 <= |x| < 4.0data8 0xBFAF0B712E9EDA47 // A0data8 0x3FF1C208080BEA64 // A1data8 0x3FC3D29B20C8946E // A2data8 0xBFF04514ED900A6A // A3// Polynomial coefficients for the tanh(x), 4.0 <= |x| < 8.0data8 0xBFB1DEA49A831CBC // A0data8 0x3FFA729FC7085674 // A1data8 0xBFF2F44D923A8FA4 // A2data8 0x3FE092FC5712227E // A3// Polynomial coefficients for the tanh(x), 8.0 <= |x| <= 9.125 data8 0x3FEFFF5769EE3041 // A0data8 0x3EFBBF148D850891 // A1data8 0xBEC86BCEF0F5C2FE // A2data8 0x3E7CBA4F3A885A5C // A3//data8 0x3FEFFFFFFFFFFFFF // 1.0 - epsilonLOCAL_OBJECT_END(tanhf_data).section .textGLOBAL_LIBM_ENTRY(tanhf){ .mfi alloc r32 = ar.pfs, 1, 14, 0, 0 fmerge.s fAbsArg = f1, f8 // |x| addl rMask = 0x806, r0}{ .mfi addl rDataPtr = @ltoff(tanhf_data), gp fma.s1 fArgSqr = f8, f8, f0 // x^2 adds rSignBit = 0x1, r0};;{ .mfi getf.s rArg = f8 // x in GR fclass.m p7,p0 = f8, 0x0b // is x denormal ? // sign bit and 2 most bits in significand shl rMask = rMask, 20 }{ .mfi ld8 rDataPtr = [rDataPtr] nop.f 0 adds rBias2 = 0x1F4, r0};;{ .mfi adds rNearSaturation = 0x14, r0
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -