?? w_tgammal.s
字號:
.file "tgammal.s"// Copyright (c) 2002 - 2005, Intel Corporation// All rights reserved.//// Contributed 2002 by the Intel Numerics Group, Intel Corporation//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// * The name of Intel Corporation may not be used to endorse or promote// products derived from this software without specific prior written// permission.// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Intel Corporation is the author of this code, and requests that all// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm.//// History//==============================================================// 01/16/02 Initial version// 05/20/02 Cleaned up namespace and sf0 syntax// 02/10/03 Reordered header: .section, .global, .proc, .align;// used data8 for long double table values// 03/17/03 Moved tgammal_libm_err label into .proc region// 04/10/03 Changed error codes for overflow and negative integers// 03/31/05 Reformatted delimiters between data tables//// API//==============================================================// long double tgammal(long double)//// Resources Used://// Floating-Point Registers: f8-f15// f32-f127//// General Purpose Registers: r32-r67 //// Predicate Registers: p6-p15////*********************************************************************//// IEEE Special Conditions://// tgammal(+inf) = +inf// tgammal(-inf) = QNaN // tgammal(+/-0) = +/-inf // tgammal(x<0, x - integer) = QNaN// tgammal(SNaN) = QNaN// tgammal(QNaN) = QNaN////*********************************************************************// Overview of operation//==============================================================//// Algorithm description// ---------------------//// There are 3 main paths in the implementation // (and additional special values branches)//// 1) |X| >= 13 - Stirling formula computation// a) Positive arguments:// TGAMMAL(X) = exp((X-0.5)*ln(X) - X + C + S(Z)), // where C = 0.5*ln(2*Pi) , Z = 1/Z, S(Z) - Bernulli polynomial // (up to 'B18' term).// Some of these calculation done in multiprecision. // Ln returns multiprecision result too // and exp also accepts and returns pair of values.// // b) Negative arguments// TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).// (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.// Here we use polynomial of 9th degree with 2 multiprecision steps.// Argument range reduction is: // N = [x] with round to nearest, r = x - N, -0.5 <= r < 0.5// After ((X-0.5)*ln(X) - X + C + S(Z)) completed we just invert// its result and compute exp with negative argument (1/exp(x)=exp(-x))// Then we multiply exp result to PI/(X*sin(PI*X)).//// 2) 1 <= |X| < 13 - Polynomial part// a) Positive arguments:// All values are splitted to such intervals as:// #0->[2;3], #1->[3,4], #2->[5,6]...// For even intervals we just use polynomial computation with degree 20// and first 6 multiprecision computations.// Range reduction looks like// N = [x] with truncate, r = x - N - 0.5, -0.5 <= r < 0.5// For odd intervals we use reccurent formula: // TGAMMAL(X) = TGAMMA(X-1)*(X-1)// [1;2] interval is splitted to 3 subranges: // [1;1.25], [1.25;1.75], [1.75;2] with the same polynomial forms//// b) Negative arguments// TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)).// (X*sin(PI*X))/PI calculated in parallel with TGAMMAL.// After multiplication by TGAMMAL(X) result we calculate reciprocal// and get final result.//// 3) 0 < |X| < 1 - Near 0 part// a) Here we use reccurent formula TGAMMAL(X) = TGAMMAL(X+1)/X// TGAMMAL(X+1) calculated as shown above, // 1/X result obtained in parallel. Then we just multiply these values.// There is only additional separated subrange: [0;0.125] with specific// polynomial constants set.//// b) Negative arguments// TGAMMAL(-X) = PI/(TGAMMAL(X+1)*sin(PI*X)).// There is no need to compute 1/X.RODATA.align 16LOCAL_OBJECT_START(Constants_Tgammal_log_80_Q)// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1 data4 0x00000000,0xB1721800,0x00003FFE,0x00000000data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000data4 0x00000000,0x80000000,0x0000BFFE,0x00000000 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Q).align 64LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h1)// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double data4 0x00008000,0x3F800000,0x00000000,0x00000000data4 0x00000000,0x00000000,0x00000000,0x00000000 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000 data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000 data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000 data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000 data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000 data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000 data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000 data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000 data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000 data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000 data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000 data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000 data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000 data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000 data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000 data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000 data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h1).align 64LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h2)// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE doubledata4 0x00008000,0x3F800000,0x00000000,0x00000000 data4 0x00000000,0x00000000,0x00000000,0x00000000 data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000 data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000 data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000 data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000 data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000 data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000 data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000 data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000 data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000 data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000 data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000 data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000 data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000 data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000 data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000 data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000 data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000 data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000 data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000 data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000 data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000 data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000 data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000 data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h2).align 64LOCAL_OBJECT_START(Constants_Tgammal_log_80_h3_G_H)// h3 IEEE double extended, H3 and G3 IEEE single data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00 data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400 data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08 data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408 data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10 data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410 data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20 data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428 data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30 data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438 data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40 data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448 data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50 data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458 data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68 data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470 data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90 data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0 data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8 data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8 data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8 data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8 data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0 data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0 data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766 data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6 data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620 data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D LOCAL_OBJECT_END(Constants_Tgammal_log_80_h3_G_H).align 64 LOCAL_OBJECT_START(Constants_Tgammal_stirling)//0.5*ln(2*Pi)=9.1893853320467266954096885e-01 + 7.2239360881843238220057778e-17data8 0x3FED67F1C864BEB4, 0x3C94D252F2400510// Bernulli numbers data8 0xAAAAAAAAAAAAAAAB, 0x00003FFB //B2 = 8.3333333333333333333333333333e-02data8 0xBF66C16C16C16C17 //B4 = -2.7777777777777777777777777778e-03data8 0x3F4A01A01A01A01A //B6 = 7.9365079365079365079365079365e-04data8 0xBF43813813813814 //B8 = -5.9523809523809523809523809524e-04data8 0x3F4B951E2B18FF23 //B10 = 8.4175084175084175084175084175e-04data8 0xBF5F6AB0D9993C7D //B12 = -1.9175269175269175269175269175e-03data8 0x3F7A41A41A41A41A //B14 = 6.4102564102564102564102564103e-03data8 0xBF9E4286CB0F5398 //B16 = -2.9550653594771241830065359477e-02data8 0x3FC6FE96381E0680 //B18 = 1.7964437236883057316493849002e-01data8 0x3FE0000000000000 // 0.5LOCAL_OBJECT_END(Constants_Tgammal_stirling).align 64 LOCAL_OBJECT_START(Constants_Tgammal_sin)// Polynomial coefficients for the sin(Pi*x)/Pi, 0 <= |x| < 0.5 //A2 = 8.1174242528335360802316245099e-01 + 5.1302254650266899774269946201e-18data8 0x3FE9F9CB402BC46C, 0x3C57A8B3819B7CEC//A1 = -1.6449340668482264060656916627e+00 + -3.0210280454695477893051351574e-17data8 0xBFFA51A6625307D3, 0xBC816A402079D0EFdata8 0xF3AEF1FFCCE6C813, 0x0000BFE3 //A9 = -7.0921197799923779127089910470e-09data8 0x87D54408E6D4BB9D, 0x00003FE9 //A8 = 2.5300880778252693946712766029e-07data8 0xEA12033DCE7B8ED9, 0x0000BFED //A7 = -6.9758403885461690048189307819e-06data8 0x9BA38C952A59D1A8, 0x00003FF2 //A6 = 1.4842878710882320255092707181e-04data8 0x99C0B55178FF0E38, 0x0000BFF6 //A5 = -2.3460810348048124421268761990e-03data8 0xD63402E798FEC896, 0x00003FF9 //A4 = 2.6147847817611456327417812320e-02data8 0xC354723906D95E92, 0x0000BFFC //A3 = -1.9075182412208257558294507774e-01LOCAL_OBJECT_END(Constants_Tgammal_sin).align 64 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_Arg)data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000 // L_hi = hi part log(2)/2^12data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000 // L_lo = lo part log(2)/2^12LOCAL_OBJECT_END(Constants_Tgammal_exp_64_Arg)LOCAL_OBJECT_START(Constants_Tgammal_exp_64_A)data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000 // A3data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000 // A2data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000 // A1LOCAL_OBJECT_END(Constants_Tgammal_exp_64_A)LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T1)data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DCdata4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942Ddata4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDAdata4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3Adata4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5Bdata4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CDdata4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35Bdata4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396Adata4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -