?? sub_n.s
字號:
! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and! store difference in a third limb vector.!! Copyright (C) 1995, 1996 Free Software Foundation, Inc.!! This file is part of the GNU MP Library.!! The GNU MP Library is free software; you can redistribute it and/or modify! it under the terms of the GNU Lesser General Public License as published by! the Free Software Foundation; either version 2.1 of the License, or (at your! option) any later version.!! The GNU MP Library is distributed in the hope that it will be useful, but! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public! License for more details.!! You should have received a copy of the GNU Lesser General Public License! along with the GNU MP Library; see the file COPYING.LIB. If not, write to! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,! MA 02111-1307, USA.! INPUT PARAMETERS#define RES_PTR %o0#define S1_PTR %o1#define S2_PTR %o2#define SIZE %o3#include <sysdep.h>ENTRY(__mpn_sub_n) xor S2_PTR,RES_PTR,%g1 andcc %g1,4,%g0 bne LOC(1) ! branch if alignment differs nop! ** V1a ** andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 be LOC(v1) ! if no, branch nop/* Add least significant limb separately to align RES_PTR and S2_PTR */ ld [S1_PTR],%g4 add S1_PTR,4,S1_PTR ld [S2_PTR],%g2 add S2_PTR,4,S2_PTR add SIZE,-1,SIZE subcc %g4,%g2,%o4 st %o4,[RES_PTR] add RES_PTR,4,RES_PTRLOC(v1): addx %g0,%g0,%o4 ! save cy in register cmp SIZE,2 ! if SIZE < 2 ... bl LOC(end2) ! ... branch to tail code subcc %g0,%o4,%g0 ! restore cy ld [S1_PTR+0],%g4 addcc SIZE,-10,SIZE ld [S1_PTR+4],%g1 ldd [S2_PTR+0],%g2 blt LOC(fin1) subcc %g0,%o4,%g0 ! restore cy/* Add blocks of 8 limbs until less than 8 limbs remain */LOC(loop1): subxcc %g4,%g2,%o4 ld [S1_PTR+8],%g4 subxcc %g1,%g3,%o5 ld [S1_PTR+12],%g1 ldd [S2_PTR+8],%g2 std %o4,[RES_PTR+0] subxcc %g4,%g2,%o4 ld [S1_PTR+16],%g4 subxcc %g1,%g3,%o5 ld [S1_PTR+20],%g1 ldd [S2_PTR+16],%g2 std %o4,[RES_PTR+8] subxcc %g4,%g2,%o4 ld [S1_PTR+24],%g4 subxcc %g1,%g3,%o5 ld [S1_PTR+28],%g1 ldd [S2_PTR+24],%g2 std %o4,[RES_PTR+16] subxcc %g4,%g2,%o4 ld [S1_PTR+32],%g4 subxcc %g1,%g3,%o5 ld [S1_PTR+36],%g1 ldd [S2_PTR+32],%g2 std %o4,[RES_PTR+24] addx %g0,%g0,%o4 ! save cy in register addcc SIZE,-8,SIZE add S1_PTR,32,S1_PTR add S2_PTR,32,S2_PTR add RES_PTR,32,RES_PTR bge LOC(loop1) subcc %g0,%o4,%g0 ! restore cyLOC(fin1): addcc SIZE,8-2,SIZE blt LOC(end1) subcc %g0,%o4,%g0 ! restore cy/* Add blocks of 2 limbs until less than 2 limbs remain */LOC(loope1): subxcc %g4,%g2,%o4 ld [S1_PTR+8],%g4 subxcc %g1,%g3,%o5 ld [S1_PTR+12],%g1 ldd [S2_PTR+8],%g2 std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register addcc SIZE,-2,SIZE add S1_PTR,8,S1_PTR add S2_PTR,8,S2_PTR add RES_PTR,8,RES_PTR bge LOC(loope1) subcc %g0,%o4,%g0 ! restore cyLOC(end1): subxcc %g4,%g2,%o4 subxcc %g1,%g3,%o5 std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register andcc SIZE,1,%g0 be LOC(ret1) subcc %g0,%o4,%g0 ! restore cy/* Add last limb */ ld [S1_PTR+8],%g4 ld [S2_PTR+8],%g2 subxcc %g4,%g2,%o4 st %o4,[RES_PTR+8]LOC(ret1): retl addx %g0,%g0,%o0 ! return carry-out from most sign. limbLOC(1): xor S1_PTR,RES_PTR,%g1 andcc %g1,4,%g0 bne LOC(2) nop! ** V1b ** andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 be LOC(v1b) ! if no, branch nop/* Add least significant limb separately to align RES_PTR and S1_PTR */ ld [S2_PTR],%g4 add S2_PTR,4,S2_PTR ld [S1_PTR],%g2 add S1_PTR,4,S1_PTR add SIZE,-1,SIZE subcc %g2,%g4,%o4 st %o4,[RES_PTR] add RES_PTR,4,RES_PTRLOC(v1b): addx %g0,%g0,%o4 ! save cy in register cmp SIZE,2 ! if SIZE < 2 ... bl LOC(end2) ! ... branch to tail code subcc %g0,%o4,%g0 ! restore cy ld [S2_PTR+0],%g4 addcc SIZE,-10,SIZE ld [S2_PTR+4],%g1 ldd [S1_PTR+0],%g2 blt LOC(fin1b) subcc %g0,%o4,%g0 ! restore cy/* Add blocks of 8 limbs until less than 8 limbs remain */LOC(loop1b): subxcc %g2,%g4,%o4 ld [S2_PTR+8],%g4 subxcc %g3,%g1,%o5 ld [S2_PTR+12],%g1 ldd [S1_PTR+8],%g2 std %o4,[RES_PTR+0] subxcc %g2,%g4,%o4 ld [S2_PTR+16],%g4 subxcc %g3,%g1,%o5 ld [S2_PTR+20],%g1 ldd [S1_PTR+16],%g2 std %o4,[RES_PTR+8] subxcc %g2,%g4,%o4 ld [S2_PTR+24],%g4 subxcc %g3,%g1,%o5 ld [S2_PTR+28],%g1 ldd [S1_PTR+24],%g2 std %o4,[RES_PTR+16] subxcc %g2,%g4,%o4 ld [S2_PTR+32],%g4 subxcc %g3,%g1,%o5 ld [S2_PTR+36],%g1 ldd [S1_PTR+32],%g2 std %o4,[RES_PTR+24] addx %g0,%g0,%o4 ! save cy in register addcc SIZE,-8,SIZE add S1_PTR,32,S1_PTR add S2_PTR,32,S2_PTR add RES_PTR,32,RES_PTR bge LOC(loop1b) subcc %g0,%o4,%g0 ! restore cyLOC(fin1b): addcc SIZE,8-2,SIZE blt LOC(end1b) subcc %g0,%o4,%g0 ! restore cy/* Add blocks of 2 limbs until less than 2 limbs remain */LOC(loope1b): subxcc %g2,%g4,%o4 ld [S2_PTR+8],%g4 subxcc %g3,%g1,%o5 ld [S2_PTR+12],%g1 ldd [S1_PTR+8],%g2 std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register addcc SIZE,-2,SIZE add S1_PTR,8,S1_PTR add S2_PTR,8,S2_PTR add RES_PTR,8,RES_PTR bge LOC(loope1b) subcc %g0,%o4,%g0 ! restore cyLOC(end1b): subxcc %g2,%g4,%o4 subxcc %g3,%g1,%o5 std %o4,[RES_PTR+0] addx %g0,%g0,%o4 ! save cy in register andcc SIZE,1,%g0 be LOC(ret1b) subcc %g0,%o4,%g0 ! restore cy/* Add last limb */ ld [S2_PTR+8],%g4 ld [S1_PTR+8],%g2 subxcc %g2,%g4,%o4 st %o4,[RES_PTR+8]LOC(ret1b): retl addx %g0,%g0,%o0 ! return carry-out from most sign. limb! ** V2 **/* If we come here, the alignment of S1_PTR and RES_PTR as well as the alignment of S2_PTR and RES_PTR differ. Since there are only two ways things can be aligned (that we care about) we now know that the alignment of S1_PTR and S2_PTR are the same. */LOC(2): cmp SIZE,1 be LOC(jone) nop andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 be LOC(v2) ! if no, branch nop/* Add least significant limb separately to align S1_PTR and S2_PTR */ ld [S1_PTR],%g4 add S1_PTR,4,S1_PTR ld [S2_PTR],%g2 add S2_PTR,4,S2_PTR add SIZE,-1,SIZE subcc %g4,%g2,%o4 st %o4,[RES_PTR] add RES_PTR,4,RES_PTRLOC(v2): addx %g0,%g0,%o4 ! save cy in register addcc SIZE,-8,SIZE blt LOC(fin2) subcc %g0,%o4,%g0 ! restore cy/* Add blocks of 8 limbs until less than 8 limbs remain */LOC(loop2): ldd [S1_PTR+0],%g2 ldd [S2_PTR+0],%o4 subxcc %g2,%o4,%g2 st %g2,[RES_PTR+0] subxcc %g3,%o5,%g3 st %g3,[RES_PTR+4] ldd [S1_PTR+8],%g2 ldd [S2_PTR+8],%o4 subxcc %g2,%o4,%g2 st %g2,[RES_PTR+8] subxcc %g3,%o5,%g3 st %g3,[RES_PTR+12] ldd [S1_PTR+16],%g2 ldd [S2_PTR+16],%o4 subxcc %g2,%o4,%g2 st %g2,[RES_PTR+16] subxcc %g3,%o5,%g3 st %g3,[RES_PTR+20] ldd [S1_PTR+24],%g2 ldd [S2_PTR+24],%o4 subxcc %g2,%o4,%g2 st %g2,[RES_PTR+24] subxcc %g3,%o5,%g3 st %g3,[RES_PTR+28] addx %g0,%g0,%o4 ! save cy in register addcc SIZE,-8,SIZE add S1_PTR,32,S1_PTR add S2_PTR,32,S2_PTR add RES_PTR,32,RES_PTR bge LOC(loop2) subcc %g0,%o4,%g0 ! restore cyLOC(fin2): addcc SIZE,8-2,SIZE blt LOC(end2) subcc %g0,%o4,%g0 ! restore cyLOC(loope2): ldd [S1_PTR+0],%g2 ldd [S2_PTR+0],%o4 subxcc %g2,%o4,%g2 st %g2,[RES_PTR+0] subxcc %g3,%o5,%g3 st %g3,[RES_PTR+4] addx %g0,%g0,%o4 ! save cy in register addcc SIZE,-2,SIZE add S1_PTR,8,S1_PTR add S2_PTR,8,S2_PTR add RES_PTR,8,RES_PTR bge LOC(loope2) subcc %g0,%o4,%g0 ! restore cyLOC(end2): andcc SIZE,1,%g0 be LOC(ret2) subcc %g0,%o4,%g0 ! restore cy/* Add last limb */LOC(jone): ld [S1_PTR],%g4 ld [S2_PTR],%g2 subxcc %g4,%g2,%o4 st %o4,[RES_PTR]LOC(ret2): retl addx %g0,%g0,%o0 ! return carry-out from most sign. limbEND(__mpn_sub_n)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -