?? s_erf.s
字號:
nop.f 0 nop.i 0};;{.mfi ldfe fA7 = [rCoeffAddr1], 32 fms.s1 fArgAbs = fArgAbs, f1, fThreeAndQ nop.i 0}{.mfb ldfe fA6 = [rCoeffAddr2], 32 nop.f 0(p8) br.cond.spnt erf_3q_4 // branch out if 3.25 < |x| < 4.0} ;;{.mfi ldfe fA5 = [rCoeffAddr1], 32 fma.s1 fTDeg3 = fArgAbsNorm, fTSqr, f0 nop.i 0}{.mfi ldfe fA4 = [rCoeffAddr2], 32 fma.s1 fTQuadr = fTSqr, fTSqr, f0 nop.i 0};;// Path #3 Polynomial Pol19(y) computation; y = fArgAbsNorm{.mfi ldfe fA3 = [rCoeffAddr3], 32 fma.s1 fArgAbsNormSgn = fArgAbsNorm, fSignumX, f0 nop.i 0}{.mfi ldfe fA2 = [rCoeffAddr4], 32 nop.f 0 nop.i 0};;{.mfi ldfe fA1 = [rCoeffAddr3], 32 fma.s1 fRes = fA19, fArgAbsNorm, fA18 nop.i 0}{.mfi ldfe fA0 = [rCoeffAddr4], 32 nop.f 0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA17 = fA17, fArgAbsNorm, fA16 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA15 = fA15, fArgAbsNorm, fA14 nop.i 0};;{ .mfi nop.m 0 fma.s1 fTDeg7 = fTDeg3, fTQuadr, f0 nop.i 0}{ .mfi nop.m 0 fma.s1 fA13 = fA13, fArgAbsNorm, fA12 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA11 = fA11, fArgAbsNorm, fA10 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA9 = fA9, fArgAbsNorm, fA8 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTSqr, fA17 nop.i 0}{ .mfi nop.m 0 fma.s1 fA7 = fA7, fArgAbsNorm, fA6 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA5 = fA5, fArgAbsNorm, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA15 = fA15, fTSqr, fA13 nop.i 0}{ .mfi nop.m 0 fma.s1 fA4 = fA4, fArgAbsNorm, fA3 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA2 = fA2, fArgAbsNorm, fA1 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA11 = fA11, fTSqr, fA9 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA7 = fA7, fTSqr, fA5 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTQuadr, fA15 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA4 = fA4, fTSqr, fA2 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTQuadr, fA11 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA4 = fA7, fTDeg3, fA4 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTDeg7, fA4 nop.i 0};;{ .mfi nop.m 0 // result for negative argument(p15) fms.d.s0 f8 = fRes, fArgAbsNormSgn, fA0 nop.i 0}{ .mfb nop.m 0 // result for positive argument(p14) fma.d.s0 f8 = fRes, fArgAbsNormSgn, fA0 br.ret.sptk b0}// Here if 3.25 < |x| < 4.0.align 32erf_3q_4: .pred.rel "mutex", p14, p15{ .mfi ldfe fA5 = [rCoeffAddr1], 32 fma.s1 fTSqr = fArgAbs, fArgAbs, f0 nop.i 0}{ .mfi nop.m 0 fma.s1 fRes = fA19, fArgAbs, fA18 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA17 = fA17, fArgAbs, fA16 nop.i 0}{ .mfi nop.m 0 fma.s1 fA15 = fA15, fArgAbs, fA14 nop.i 0};; { .mfi nop.m 0 fma.s1 fA13 = fA13, fArgAbs, fA12 nop.i 0}{ .mfi nop.m 0 fma.s1 fA11 = fA11, fArgAbs, fA10 nop.i 0};; { .mfi nop.m 0 fma.s1 fA9 = fA9, fArgAbs, fA8 nop.i 0}{ .mfi nop.m 0 fma.s1 fArgAbsNormSgn = fArgAbs, fSignumX, f0 nop.i 0};; { .mfi nop.m 0 fma.s1 fTQuadr = fTSqr, fTSqr, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTSqr, fA17 nop.i 0};; { .mfi nop.m 0 fma.s1 fA15 = fA15, fTSqr, fA13 nop.i 0};; { .mfi nop.m 0 fma.s1 fA11 = fA11, fTSqr, fA9 nop.i 0} { .mfi nop.m 0 fma.s1 fA7 = fA7, fArgAbs, fA6 nop.i 0};; { .mfi nop.m 0 fma.s1 fTDeg7 = fTQuadr, fTSqr, f0 nop.i 0}{ .mfi nop.m 0 fma.s1 fRes = fRes, fTQuadr, fA15 nop.i 0};; { .mfi nop.m 0 fma.s1 fA11 = fA11, fTSqr, fA7 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTDeg7, fA11 nop.i 0};; { .mfi nop.m 0 // result for negative argument(p15) fms.d.s0 f8 = fRes, fArgAbsNormSgn, fA5 nop.i 0}{ .mfb nop.m 0 // result for positive argument(p14) fma.d.s0 f8 = fRes, fArgAbsNormSgn, fA5 br.ret.sptk b0};;// Here if |x| < 0.5.align 32erf_near_zero:{ .mfi adds rCoeffAddr1 = 1280, rDataPtr // address of A9 fma.s1 fTSqr = fArgSqr, fArgSqr, f0 // x^4 nop.i 0}{ .mfi adds rCoeffAddr2 = 1328, rDataPtr // address of A7 nop.f 0 nop.i 0};;{ .mfi ldfpd fA9, fA8 = [rCoeffAddr1], 16 nop.f 0 nop.i 0}{ .mfi ldfpd fA7, fA6 = [rCoeffAddr2], 16 nop.f 0 nop.i 0};;{ .mfi ldfpd fA5, fA4 = [rCoeffAddr1], 16 nop.f 0 nop.i 0}{ .mfi ldfpd fA3, fA2 = [rCoeffAddr2], 16 nop.f 0 nop.i 0};;{ .mfi ldfe fA1 = [rCoeffAddr1] nop.f 0 nop.i 0}{ .mfi ldfe fA0 = [rCoeffAddr2] nop.f 0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fTQuadr = fTSqr, fTSqr, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fA9, fArgSqr, fA8 nop.i 0}{ .mfi nop.m 0 fma.s1 fA7 = fA7, fArgSqr, fA6 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA3 = fA3, fArgSqr, fA2 nop.i 0}{ .mfi nop.m 0 fma.s1 fA5 = fA5, fArgSqr, fA4 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA1 = fA1, fArgSqr, fA0 nop.i 0}{ .mfi nop.m 0 fma.s1 fTQuadrSgn = fTQuadr, f8, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTSqr, fA7 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA1 = fA3, fTSqr, fA1 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes, fTSqr, fA5 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA1 = fA1, f8, f0 nop.i 0};;{ .mfb nop.m 0 fma.d.s0 f8 = fRes, fTQuadrSgn, fA1 // x*Pol9(x^2) br.ret.sptk b0 // Exit for |x| < 0.5};;// Here if 5.90625 <= |x| < +inf.align 32erf_saturation:{ .mfi adds rDataPtr = 1376, rDataPtr // address of A0 nop.f 0 nop.i 0};;{ .mfi ldfe fA0 = [rDataPtr] nop.f 0 nop.i 0};;{ .mfb nop.m 0 fma.d.s0 f8 = fA0, fSignumX, f0 // sign(x)*(1.0 - 2^(-63)) // Exit for 5.90625 <= |x| < +inf br.ret.sptk b0 // Exit for 5.90625 <=|x|< +inf};; // Here if x is double precision denormal.align 32erf_denormal:{ .mfi adds rDataPtr = 1632, rDataPtr // address of A0 fclass.m p7,p8 = f8, 0x0a // is x -denormal ? nop.i 0};;{ .mfi ldfe fA0 = [rDataPtr] // A0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA0 = fA0,f8,f0 // A0*x nop.i 0};;{ .mfi nop.m 0(p7) fma.d.s0 f8 = f8,f8,fA0 // -denormal nop.i 0}{ .mfb nop.m 0(p8) fnma.d.s0 f8 = f8,f8,fA0 // +denormal br.ret.sptk b0 // Exit for denormal};;GLOBAL_LIBM_END(erf)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -