?? ssin.s
字號:
/* ssin.s - Motorola 68040 FP sine routines (EXC) *//* Copyright 1991-1993 Wind River Systems, Inc. */ .data .globl _copyright_wind_river .long _copyright_wind_river/*modification history--------------------01f,21jul93,kdl added .text (SPR #2372).01e,23aug92,jcf changed bxxx to jxx.01d,26may92,rrr the tree shuffle01c,10jan92,kdl added modification history; general cleanup.01b,17dec91,kdl put in changes from Motorola v3.3 (from FPSP 2.1): reduce argument by one step before general reduction loop.01a,15aug91,kdl original version, from Motorola FPSP v2.0.*//*DESCRIPTION ssinsa 3.2 12/18/90 WIND RIVER MODIFICATION HISTORY 01a,31jul91,kdl from Motorola FPSP v2.0. The entry point sSIN computes the sine of an input argument sCOS computes the cosine, and sSINCOS computes both. The corresponding entry points with a "d" computes the same corresponding function values for denormalized inputs. Input: Double-extended number X in location pointed to by address register a0. Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or COS is requested. Otherwise, for SINCOS, sin(X) is returned in Fp0, and cos(X) is returned in Fp1. Modifies: Fp0 for SIN or COS| both Fp0 and Fp1 for SINCOS. Accuracy and Monotonicity: The returned result is within 1 ulp in 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the result is subsequently rounded to double precision. The result is provably monotonic in double precision. Speed: The programs sSIN and sCOS take approximately 150 cycles for input argument X such that |X| < 15Pi, which is the the usual situation. The speed for sSINCOS is approximately 190 cycles. Algorithm: SIN and COS: 1. If SIN is invoked, set AdjN := 0| otherwise, set AdjN := 1. 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte k by k := k + AdjN. 4. If k is even, go to 6. 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r) where cos(r) is approximated by an even polynomial in r, 1 + r*r*(B1+s*(B2+ |... + s*B8)), s = r*r. Exit. 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) where sin(r) is approximated by an odd polynomial in r r + r*s*(A1+s*(A2+ |... + s*A7)), s = r*r. Exit. 7. If |X| > 1, go to 9. 8. (|X|<2**(-40)) If SIN is invoked, return X| otherwise return 1. 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3. SINCOS: 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let k = N mod 4, so in particular, k = 0,1,2,or 3. 3. If k is even, go to 5. 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e. j1 exclusive or with the ls.b. of k. sgn1 := (-1)**j1, sgn2 := (-1)**j2. SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where sin(r) and cos(r) are computed as odd and even polynomials in r, respectively. Exit 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where sin(r) and cos(r) are computed as odd and even polynomials in r, respectively. Exit 6. If |X| > 1, go to 8. 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. Copyright (C) Motorola, Inc. 1990 All Rights Reserved THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA The copyright notice above does not evidence any actual or intended publication of such source code.SSIN idnt 2,1 Motorola 040 Floating Point Software Package section 8NOMANUAL*/#include "fpsp040E.h"BOUNDS1: .long 0x3FD78000,0x4004BC7ETWOBYPI: .long 0x3FE45F30,0x6DC9C883SINA7: .long 0xBD6AAA77,0xCCC994F5SINA6: .long 0x3DE61209,0x7AAE8DA1SINA5: .long 0xBE5AE645,0x2A118AE4SINA4: .long 0x3EC71DE3,0xA5341531SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000COSB8: .long 0x3D2AC4D0,0xD6011EE3COSB7: .long 0xBDA9396F,0x9F45AC19COSB6: .long 0x3E21EED9,0x0612C972COSB5: .long 0xBE927E4F,0xB79D9FCFCOSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5ECOSB1: .long 0xBF000000INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152ATWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000| xref __x_PITBL#define INARG FP_SCR4#define X FP_SCR5#define XDCARE X+2#define XFRAC X+4#define RPRIME FP_SCR1#define SPRIME FP_SCR2#define POSNEG1 L_SCR1#define TWOTO63 L_SCR1#define ENDFLAG L_SCR2#define N L_SCR2#define ADJN L_SCR3| xref __x_t_frcinx| xref __x_t_extdnrm| xref __x_sto_cos .text .globl __x_ssind__x_ssind:|--SIN(X) = X FOR DENORMALIZED X jra __x_t_extdnrm .globl __x_scosd__x_scosd:|--COS(X) = 1 FOR DENORMALIZED X/* fmoves &0x3F800000,fp0 */ .long 0xf23c4400,0x3f800000|| 9D25B Fix: Sometimes the previous fmoves sets fpsr bits| fmovel #0,fpsr| jra __x_t_frcinx .globl __x_ssin__x_ssin:|--SET ADJN TO 0 movel #0,a6@(ADJN) jra SINBGN .globl __x_scos__x_scos:|--SET ADJN TO 1 movel #1,a6@(ADJN)SINBGN:|--SAVE fpcr, FP1. CHECK IF |X| IS TOO SMALL OR LARGE fmovex a0@,fp0 |...lOAD INPUT movel A0@,d0 movew A0@(4),d0 fmovex fp0,a6@(X) andil #0x7FFFFFFF,d0 |...COMPACTIFY X cmpil #0x3FD78000,d0 |...|X| >= 2**(-40)? jge SOK1 jra SINSMSOK1: cmpil #0x4004BC7E,d0 |...|X| < 15 PI? jlt SINMAIN jra REDUCEXSINMAIN:|--THIS IS THE USUAL CASE, |X| <= 15 PI.|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. fmovex fp0,fp1 fmuld TWOBYPI,fp1 |...X*2/PI|--HIDE THE NEXT THREE INSTRUCTIONS lea __x_PITBL+0x200,a1 |...TABLE OF N*PI/2, N = -32,...,32|--FP1 IS NOW READY fmovel fp1,a6@(N) |...CONVERT TO INTEGER movel a6@(N),d0 asll #4,d0 addal d0,a1 |...A1 IS THE ADDRESS OF N*PIBY2| |...wHICH IS IN TWO PIECES Y1 # Y2 fsubx A1@+,fp0 |...X-Y1|--HIDE THE NEXT ONE fsubs A1@,fp0 |...FP0 IS R = (X-Y1)-Y2SINCONT:|--continuation from REDUCEX|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED movel a6@(N),d0 addl a6@(ADJN),d0 |...SEE IF d0 IS ODD OR EVEN rorl #1,d0 |...D0 WAS ODD IFF d0 IS NEGATIVE cmpil #0,d0 jlt COSPOLYSINPOLY:|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.|--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY/* |--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + |... + SA7)))), WHERE *//* |--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS *//* |--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) */|--WHERE T=S*S.|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. fmovex fp0,a6@(X) |...X IS R fmulx fp0,fp0 |...FP0 IS S|---HIDE THE NEXT TWO WHILE WAITING FOR FP0 fmoved SINA7,fp3 fmoved SINA6,fp2|--FP0 IS NOW READY fmovex fp0,fp1 fmulx fp1,fp1 |...FP1 IS T|--HIDE THE NEXT TWO WHILE WAITING FOR FP1 rorl #1,d0 andil #0x80000000,d0| |...lEAST SIG. BIT OF D0 IN SIGN POSITION eorl d0,a6@(X) /* |...X IS NOW R'= SGN*R */ fmulx fp1,fp3 |...TA7 fmulx fp1,fp2 |...TA6 faddd SINA5,fp3 |...A5+TA7 faddd SINA4,fp2 |...A4+TA6 fmulx fp1,fp3 |...T(A5+TA7) fmulx fp1,fp2 |...T(A4+TA6) faddd SINA3,fp3 |...A3+T(A5+TA7) faddx SINA2,fp2 |...A2+T(A4+TA6) fmulx fp3,fp1 |...T(A3+T(A5+TA7)) fmulx fp0,fp2 |...S(A2+T(A4+TA6)) faddx SINA1,fp1 |...A1+T(A3+T(A5+TA7)) fmulx a6@(X),fp0 /* |...R'*S */ faddx fp2,fp1 |...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING fmulx fp1,fp0 /* |...SIN(R')-R' */|--FP1 RELEASED. fmovel d1,fpcr | restore users exceptions faddx a6@(X),fp0 | last inst - possible exception set jra __x_t_frcinxCOSPOLY:|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.|--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY/* |--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + |... + SB8)))), WHERE *//* |--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS *//* |--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) */|--WHERE T=S*S.|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2|--AND IS THEREFORE STORED AS SINGLE PRECISION. fmulx fp0,fp0 |...FP0 IS S|---HIDE THE NEXT TWO WHILE WAITING FOR FP0 fmoved COSB8,fp2 fmoved COSB7,fp3|--FP0 IS NOW READY fmovex fp0,fp1 fmulx fp1,fp1 |...FP1 IS T|--HIDE THE NEXT TWO WHILE WAITING FOR FP1 fmovex fp0,a6@(X) |...X IS S rorl #1,d0 andil #0x80000000,d0| |...lEAST SIG. BIT OF D0 IN SIGN POSITION fmulx fp1,fp2 |...TB8|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU eorl d0,a6@(X) /* |...X IS NOW S'= SGN*S */ andil #0x80000000,d0 fmulx fp1,fp3 |...TB7|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU oril #0x3F800000,d0 |...D0 IS SGN IN SINGLE movel d0,a6@(POSNEG1) faddd COSB6,fp2 |...B6+TB8 faddd COSB5,fp3 |...B5+TB7 fmulx fp1,fp2 |...T(B6+TB8) fmulx fp1,fp3 |...T(B5+TB7) faddd COSB4,fp2 |...B4+T(B6+TB8) faddx COSB3,fp3 |...B3+T(B5+TB7) fmulx fp1,fp2 |...T(B4+T(B6+TB8)) fmulx fp3,fp1 |...T(B3+T(B5+TB7)) faddx COSB2,fp2 |...B2+T(B4+T(B6+TB8)) fadds COSB1,fp1 |...B1+T(B3+T(B5+TB7)) fmulx fp2,fp0 |...S(B2+T(B4+T(B6+TB8)))|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING|--FP2 RELEASED. faddx fp1,fp0|--FP1 RELEASED fmulx a6@(X),fp0 fmovel d1,fpcr | restore users exceptions fadds a6@(POSNEG1),fp0 | last inst - possible exception set jra __x_t_frcinxSINBORS:|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.|--IF |X| < 2**(-40), RETURN X OR 1. cmpil #0x3FFF8000,d0 jgt REDUCEXSINSM: movel a6@(ADJN),d0
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -