?? ppc_fmul.s
字號:
/* fpopt/ppc_fmul.S, pl_FPE_common, pl_linux 11/24/03 16:17:34 */
/*----------------------------------------------------------------------------- */
/* Copyright (c) 2003, IBM Corporation */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* * Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* * Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* * Neither the name of IBM nor the names of its contributors */
/* may be used to endorse or promote products derived from this */
/* software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, */
/* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */
/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */
/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY */
/* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE */
/* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* */
/*----------------------------------------------------------------------------- */
/* */
/* Function: multiply two double floating point values. frt = fpa * fpb */
/* Input: r3,r4(fpa) */
/* r5,r6(fpb) */
/* Output: r3,r4(frt) */
/* Notes: 1. No stack frame is created for the normal case of this function, */
/* so the following registers must be preserved, as required by */
/* ABI specification: */
/* LR, CR0, R1, R2, R13-R31 */
/* 2. For the full function case, a stack frame is created, following */
/* ABI specification, and non-volatile registers are saved and */
/* restored on the stack. */
/* 3. operation performed according to IEEE754-1985 standard with */
/* rounding mode = nearest even. */
/* */
/*----------------------------------------------------------------------------- */
#include <ppc4xx.inc>
#include "fpeLib.inc"
function_prolog(__muldf3)
mfcr r0 /* save cr */
mtctr r13 /* save r13 */
/* load fpa into r8,r9,r10 and cr6. load fpb into r11, r12, r13 and cr7 */
mr r9,r3 /* load fpa.exp, fpa.S, fpa.hi */
mr r12,r5 /* load fpb.exp, fpb.S, fpb.hi */
rlwinm r8,r9,32-20,0x7ff /* isolate exponent of fpa into r8 */
rlwinm r11,r12,32-20,0x7ff /* isolate exponent of fpb into r11 */
mr r10,r4 /* load fpa.lo */
/* load fpb.lo (already in r6) */
/* test for normal: */
cmpwi cr0,r8,0x7ff /* if (fpa.exp == DEXPMAX) */
cmpwi cr1,r8,0 /* if (fpa.exp == 0) */
cmpwi cr2,r11,0x7ff /* if (fpb.exp == DEXPMAX) */
cmpwi cr3,r11,0 /* if (fpb.exp == 0) */
beq full /*fpa.exp is max */
beq cr2,full /* fpb.exp is max */
beq cr1,tst0a /* fpa.exp is 0 */
beq cr3,tst0b /* fpb.exp is 0 */
/* normal case only: */
oris r9,r9,0x0010 /* fpa.hi |= 0x00100000; */
oris r12,r12,0x0010 /* fpb.hi |= 0x00100000; */
/* Calculate exponent */
add r8,r8,r11 /* result.expt = fpa.exp + fpb.exp; */
/* ------- note: r11 is now free */
addic. r8,r8,-1022 /* result.exp -= DEXPBIAS; check for negative */
/* result.exp += 1 (for multiplying left-just */
/* numbers if result is left-just) */
cmpwi cr2,r8,0x7fd /* if (result.exp >= DEXPMAX-2) */
ble full
bge cr2,full
/* Multiply the numbers: */
/* Start by shifting everything left 11 bits, so that the MSW is full: */
rlwinm r9,r9,11,0,20 /* r9 = fpa.hi */
rlwimi r9,r10,11,21,31
rlwinm r10,r10,11,0,20 /* r10 = fpa.lo */
rlwinm r12,r12,11,0,20 /* r12 = fpb.hi */
rlwimi r12,r6,11,21,31
rlwinm r13,r6,11,0,20 /* r13 = fpb.lo */
/* Multiply a-high and b-high: */
mulhwu r7,r9,r12 /* r7 gets high word of result */
mullw r11,r9,r12 /* r11 gets low word of result */
/* Multiply a-high and b-low (upper half only): */
mulhwu r13,r9,r13
addc r13,r11,r13
addze r7,r7
/* Multiply a-low and b-high (upper half only): */
mulhwu r10,r10,r12
addc r13,r13,r10
addze. r7,r7
/* r7/r13 now have result hi/lo */
/* ------- note: r9-r12 are now free */
/* There are two possibilities for the result: */
/* Either (bit 0 is 1) or (bit 0 is 0 and bit 1 is 1). */
/* If bit 0 is 0, then shift left 1 bit to normalize: */
blt msb_is_bit0
addc r13,r13,r13 /* shift r7/r13 left 1 bit */
adde r7,r7,r7 /* */
addic. r8,r8,-1 /* shifted case: exponent is 1 less */
/* than non-shifted */
beq full /* if exponent out of range, go to full handler */
msb_is_bit0:
/* See if we need to round. (NOTE: the exact .5 case needs more precision */
/* than what we have calculated; in that case, go to the full handler.) */
/* 53 bits (bits 0-52 of the 2-word result) are significant. The sticky bit */
/* is bit 53 = bit (53-32= 21) of the low word (r13). If it is 0, then no */
/* rounding. */
/* BUT we don't really know the sticky bit for sure yet -- the uncalculated */
/* low-order 64 bits of the 128-bit product could conceivably cause the low */
/* bits of our LSW to round up, changing the state of bit 21. */
/* The round-up is the result of a carry-out from 3 neglected summands, */
/* so at most it adds 2 to the low word. But we have already */
/* shifted left one bit, just above, so by now the carry-out could add */
/* as much as 4. The addition of a 3-bit number can */
/* propagate up to bit 21 ONLY if bits 22-29 are all ones: */
rlwinm. r10,r13,0,22,31
cmpwi cr2,r10,0x3fc /* special case if bits 22-29 all 1's */
rlwinm r11,r13,0,20,21 /* test LSb and sticky bit */
bge cr2,full
cmpwi cr3,r11,0x400 /* special rounding if LSb=0 and sticky=1 */
addic r13,r13,0x400 /* standard rounding = add 1 to sticky */
bne+ cr3,finish_std_round
beq full /* if LSb=0 and sticky=1 and 22-31 all */
/* zeros, then special case */
finish_std_round:
addze. r7,r7 /* add carry-out of low word to high */
/* -- if this overflowed us, then we */
/* need to shift back right 1 bit */
/* and increment the exponent */
/* to reflect this shift: */
bne+ std_done
addi r8,r8,1
rlwinm r13,r13,32-1,1,31
rlwimi r13,r7,32-1,0,0
rlwinm r7,r7,32-1,1,31
std_done:
/* We're done! Just assemble the result back into IEEE format: */
/* r3 = IEEE MSW, r4 = IEEE LSW */
xor r9,r3,r5 /* get product sign */
/* Shift right 11 bits (= left 21 bits) to put the MSb into bit 11, but */
/* mask it off since it's implied: */
rlwinm r3,r7,32-11,12,31
/* Bits 0-10 of the IEEE LSW represent bits 21-31 of the result, so they */
/* come from bits 21-31 of the high word (r7): */
rlwinm r4,r7,21,0,10
/* Bits 11-31 of the IEEE LSW represent bits 32-52 of the result, so they */
/* come from bits 0-20 of the low word (r13): */
rlwimi r4,r13,32-11,11,31
/* Put the exponent into bits 1-11 of the MSW: */
rlwimi r3,r8,20,1,11
/* Insert sign bit: */
rlwimi r3,r9,0,0,0
mtcr r0 /* restore cr */
mfctr r13 /* restore r13 */
blr /* return; */
tst0a:
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -