?? libm_reduce.s
字號(hào):
// Exact, regardless ...of rounding direction// A_lo := x*p_3 - A_hi ...fma, exact// Endif{ .mfi nop.m 999 fmpy.s3 FR_Tmp_C = FR_X,FR_p_1 nop.i 999};;{ .mfi mov GR_TEMP3 = 0x0FF3F fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2 nop.i 999};;{ .mmf setf.exp FR_ScaleP4 = GR_TEMP3 mov GR_TEMP4 = 0x10045 fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3};;{ .mfi nop.m 999 fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C // For Tmp_C < sigma_C case nop.i 999};;{ .mmf setf.exp FR_Tmp2_C = GR_TEMP4 nop.m 999 fmpy.s3 FR_Tmp_B = FR_X,FR_p_2};;{ .mfi addl GR_BASE = @ltoff(Constants_Bits_of_pi_by_2#), gp fcmp.ge.s1 p12, p9 = FR_Tmp_C,FR_sigma_C nop.i 999}{ .mfi nop.m 999 fmpy.s3 FR_Tmp_A = FR_X,FR_p_3 nop.i 99};;{ .mfi ld8 GR_BASE = [GR_BASE](p12) mov FR_C_hi = FR_Tmp_C nop.i 999}{ .mfi nop.m 999(p9) fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C nop.i 999};;// End If// Step 3. Get reduced argument// If sgn_x == 0 (that is original x is positive)// D_hi := Pi_by_2_hi// D_lo := Pi_by_2_lo// Load from table// Else// D_hi := neg_Pi_by_2_hi// D_lo := neg_Pi_by_2_lo// Load from table// End If{ .mfi nop.m 999 fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4 nop.i 999}{ .mfi nop.m 999 fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B // For Tmp_B < sigma_B case nop.i 999};;{ .mfi nop.m 999 fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A // For Tmp_A < sigma_A case nop.i 999};;{ .mfi nop.m 999 fcmp.ge.s1 p13, p10 = FR_Tmp_B,FR_sigma_B nop.i 999}{ .mfi nop.m 999 fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi nop.i 999};;{ .mfi ldfe FR_D_hi = [GR_BASE],16 fcmp.ge.s1 p14, p11 = FR_Tmp_A,FR_sigma_A nop.i 999};;{ .mfi ldfe FR_D_lo = [GR_BASE](p13) mov FR_B_hi = FR_Tmp_B nop.i 999}{ .mfi nop.m 999(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B nop.i 999};;{ .mfi nop.m 999(p14) mov FR_A_hi = FR_Tmp_A nop.i 999}{ .mfi nop.m 999(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A nop.i 999};;// Note that C_hi is of integer value. We need only the// last few bits. Thus we can ensure C_hi is never a big// integer, freeing us from overflow worry.// Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);// Tmp_C is the upper portion of C_hi{ .mfi nop.m 999 fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C tbit.z p12,p9 = GR_Exp_x, 17};;{ .mfi nop.m 999 fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi nop.i 999}{ .mfi nop.m 999 fadd.s3 FR_A = FR_B_hi,FR_C_lo nop.i 999};;{ .mfi nop.m 999 fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi nop.i 999};;{ .mfi nop.m 999 fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C nop.i 999};;// *******************// Step 2. Get N and f// *******************// We have all the components to obtain// S_0, S_1, S_2, S_3 and thus N and f. We start by adding// C_lo and B_hi. This sum together with C_hi estimates// N and f well.// A := fadd.fpsr3( B_hi, C_lo )// B := max( B_hi, C_lo )// b := min( B_hi, C_lo ){ .mfi nop.m 999 fmax.s1 FR_B = FR_B_hi,FR_C_lo nop.i 999};;// We use a right-shift trick to get the integer part of A into the rightmost// bits of the significand by adding 1.1000..00 * 2^63. This operation is good// if |A| < 2^61, which it is in this case. We are doing this to save a few// cycles over using fcvt.fx followed by fnorm. The second step of the trick// is to subtract the same constant to float the rounded integer into a fp reg.{ .mfi nop.m 999// N := round_to_nearest_integer_value( A ); fma.s1 FR_N_fix = FR_A, f1, FR_RSHF nop.i 999};;{ .mfi nop.m 999 fmin.s1 FR_b = FR_B_hi,FR_C_lo nop.i 999}{ .mfi nop.m 999// C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7 fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C nop.i 999};;{ .mfi nop.m 999// a := (B - A) + b: Exact - note that a is either 0 or 2^(-64). fsub.s1 FR_a = FR_B,FR_A nop.i 999};;{ .mfi nop.m 999 fms.s1 FR_N = FR_N_fix, f1, FR_RSHF nop.i 999};;{ .mfi nop.m 999 fadd.s1 FR_a = FR_a,FR_b nop.i 999};;// f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.// N := convert to integer format( C_hi + N );// M := P_0 * x_lo;// N := N + M;{ .mfi nop.m 999 fsub.s1 FR_f = FR_A,FR_N nop.i 999}{ .mfi nop.m 999 fadd.s1 FR_N = FR_N,FR_C_hi nop.i 999};;{ .mfi nop.m 999(p9) fsub.s1 FR_D_hi = f0, FR_D_hi nop.i 999}{ .mfi nop.m 999(p9) fsub.s1 FR_D_lo = f0, FR_D_lo nop.i 999};;{ .mfi nop.m 999 fadd.s1 FR_g = FR_A_hi,FR_B_lo // For Case 1, g=A_hi+B_lo nop.i 999}{ .mfi nop.m 999 fadd.s3 FR_A = FR_A_hi,FR_B_lo // For Case 2, A=A_hi+B_lo w/ sf3 nop.i 999};;{ .mfi mov GR_Temp = 0x0FFCD // For Case 2, exponent of 2^-50 fmax.s1 FR_B = FR_A_hi,FR_B_lo // For Case 2, B=max(A_hi,B_lo) nop.i 999};;// f = f + a Exact because a is 0 or 2^(-64);// the msb of the sum is <= 1/2 and lsb >= 2^(-64).{ .mfi setf.exp FR_TWOM50 = GR_Temp // For Case 2, form 2^-50 fcvt.fx.s1 FR_N = FR_N nop.i 999}{ .mfi nop.m 999 fadd.s1 FR_f = FR_f,FR_a nop.i 999};;{ .mfi nop.m 999 fmin.s1 FR_b = FR_A_hi,FR_B_lo // For Case 2, b=min(A_hi,B_lo) nop.i 999};;{ .mfi nop.m 999 fsub.s1 FR_a = FR_B,FR_A // For Case 2, a=B-A nop.i 999};;{ .mfi nop.m 999 fadd.s1 FR_s_hi = FR_f,FR_g // For Case 1, s_hi=f+g nop.i 999}{ .mfi nop.m 999 fadd.s1 FR_f_hi = FR_A,FR_f // For Case 2, f_hi=A+f nop.i 999};;{ .mfi nop.m 999 fabs FR_f_abs = FR_f nop.i 999};;{ .mfi getf.sig GR_N = FR_N fsetc.s3 0x7F,0x40 // Reset sf3 to user settings + td nop.i 999};;{ .mfi nop.m 999 fsub.s1 FR_s_lo = FR_f,FR_s_hi // For Case 1, s_lo=f-s_hi nop.i 999}{ .mfi nop.m 999 fsub.s1 FR_f_lo = FR_f,FR_f_hi // For Case 2, f_lo=f-f_hi nop.i 999};;{ .mfi nop.m 999 fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi // For Case 1, r_hi=s_hi*D_hi nop.i 999}{ .mfi nop.m 999 fadd.s1 FR_a = FR_a,FR_b // For Case 2, a=a+b nop.i 999};;// If sgn_x == 1 (that is original x was negative)// N := 2^10 - N// this maintains N to be non-negative, but still// equivalent to the (negated N) mod 4.// End If{ .mfi add GR_N = GR_N,GR_M fcmp.ge.s1 p13, p10 = FR_f_abs,FR_TWOM33 mov GR_Temp = 0x00400};;{ .mfi(p9) sub GR_N = GR_Temp,GR_N fadd.s1 FR_s_lo = FR_s_lo,FR_g // For Case 1, s_lo=s_lo+g nop.i 999}{ .mfi nop.m 999 fadd.s1 FR_f_lo = FR_f_lo,FR_A // For Case 2, f_lo=f_lo+A nop.i 999};;// a := (B - A) + b Exact.// Note that a is either 0 or 2^(-128).// f_hi := A + f;// f_lo := (f - f_hi) + A// f_lo=f-f_hi is exact because either |f| >= |A|, in which// case f-f_hi is clearly exact; or otherwise, 0<|f|<|A|// means msb(f) <= msb(A) = 2^(-64) => |f| = 2^(-64).// If f = 2^(-64), f-f_hi involves cancellation and is// exact. If f = -2^(-64), then A + f is exact. Hence// f-f_hi is -A exactly, giving f_lo = 0.// f_lo := f_lo + a;// If |f| >= 2^(-33)// Case 1// CASE := 1// g := A_hi + B_lo;// s_hi := f + g;// s_lo := (f - s_hi) + g;// Else// Case 2// CASE := 2// A := fadd.fpsr3( A_hi, B_lo )// B := max( A_hi, B_lo )// b := min( A_hi, B_lo ){ .mfi nop.m 999(p10) fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50 nop.i 999}{ .mfi nop.m 999(p13) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi //For Case 1, r_lo=s_hi*D_hi+r_hi nop.i 999};;// If |f| >= 2^(-50) then// s_hi := f_hi;// s_lo := f_lo;// Else// f_lo := (f_lo + A_lo) + x*p_4// s_hi := f_hi + f_lo// s_lo := (f_hi - s_hi) + f_lo// End If{ .mfi nop.m 999(p14) mov FR_s_hi = FR_f_hi nop.i 999}{ .mfi nop.m 999(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_a nop.i 999};;{ .mfi nop.m 999(p14) mov FR_s_lo = FR_f_lo nop.i 999}{ .mfi nop.m 999(p11) fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo nop.i 999};;{ .mfi nop.m 999(p11) fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo nop.i 999};;{ .mfi nop.m 999(p13) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo //For Case 1, r_lo=s_hi*D_lo+r_lo nop.i 999}{ .mfi nop.m 999(p11) fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo nop.i 999};;// r_hi := s_hi*D_hi// r_lo := s_hi*D_hi - r_hi with fma// r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi{ .mfi nop.m 999(p10) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi nop.i 999}{ .mfi nop.m 999(p11) fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi nop.i 999};;{ .mfi nop.m 999(p10) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi nop.i 999}{ .mfi nop.m 999(p11) fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo nop.i 999};;{ .mfi nop.m 999(p10) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo nop.i 999};;// Return N, r_hi, r_lo// We do not return CASE{ .mfb nop.m 999 fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo br.ret.sptk b0};;.endp __libm_pi_by_2_reduce#
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -