?? ppc_fpu.c
字號:
/* * PearPC * ppc_fpu.cc * * Copyright (C) 2003, 2004 Sebastian Biallas (sb@biallas.net) * Copyright (C) 2003 Stefan Weyergraf * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ //#include "debug/tracers.h"#include <stdlib.h>#include "ppc_cpu.h"#include "ppc_dec.h"#include "ppc_fpu.h"#include "tracers.h"// .121#define PPC_FPR_TYPE2(a,b) (((a)<<8)|(b))inline void ppc_fpu_add(ppc_double *res, ppc_double *a, ppc_double *b){ switch (PPC_FPR_TYPE2(a->type, b->type)) { case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { int diff = a->e - b->e; if (diff<0) { diff = -diff; if (diff <= 56) { a->m >>= diff; } else if (a->m != 0) { a->m = 1; } else { a->m = 0; } res->e = b->e; } else { if (diff <= 56) { b->m >>= diff; } else if (b->m != 0) { b->m = 1; } else { b->m = 0; } res->e = a->e; } res->type = ppc_fpr_norm; if (a->s == b->s) { res->s = a->s; res->m = a->m + b->m; if (res->m & (1ULL<<56)) { res->m >>= 1; res->e++; } } else { res->s = a->s; res->m = a->m - b->m; if (!res->m) { if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) { res->s |= b->s; } else { res->s &= b->s; } res->type = ppc_fpr_zero; } else { if ((sint64)res->m < 0) { res->m = b->m - a->m; res->s = b->s; } diff = ppc_fpu_normalize(res) - 8; res->e -= diff; res->m <<= diff; } } break; } case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): res->s = a->s; res->type = ppc_fpr_NaN; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): res->e = a->e; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): res->s = a->s; res->m = a->m; res->type = a->type; break; case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): res->e = b->e; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): res->s = b->s; res->m = b->m; res->type = b->type; break; case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): if (a->s != b->s) { // +oo + -oo == NaN res->s = a->s ^ b->s; res->type = ppc_fpr_NaN; break; } // fall-thru case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): res->s = a->s; res->type = a->type; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): res->s = b->s; res->type = b->type; break; case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): // round bla res->type = ppc_fpr_zero; res->s = a->s && b->s; break; }}inline void ppc_fpu_quadro_mshr(ppc_quadro *q, int exp){ if (exp >= 64) { q->m1 = q->m0; q->m0 = 0; exp -= 64; } uint64 t = q->m0 & ((1ULL<<exp)-1); q->m0 >>= exp; q->m1 >>= exp; q->m1 |= t<<(64-exp);}inline void ppc_fpu_quadro_mshl(ppc_quadro *q, int exp){ if (exp >= 64) { q->m0 = q->m1; q->m1 = 0; exp -= 64; } uint64 t = (q->m1 >> (64-exp)) & ((1ULL<<exp)-1); q->m0 <<= exp; q->m1 <<= exp; q->m0 |= t;}inline void ppc_fpu_add_quadro_m(ppc_quadro *res, const ppc_quadro *a, const ppc_quadro *b){ res->m1 = a->m1+b->m1; if (res->m1 < a->m1) { res->m0 = a->m0+b->m0+1; } else { res->m0 = a->m0+b->m0; }}inline void ppc_fpu_sub_quadro_m(ppc_quadro *res, const ppc_quadro *a, const ppc_quadro *b){ res->m1 = a->m1-b->m1; if (a->m1 < b->m1) { res->m0 = a->m0-b->m0-1; } else { res->m0 = a->m0-b->m0; }}// res has 107 significant bits. a, b have 106 significant bits each.inline void ppc_fpu_add_quadro(ppc_quadro *res, ppc_quadro *a, ppc_quadro *b){ // treat as 107 bit mantissa if (a->type == ppc_fpr_norm) ppc_fpu_quadro_mshl(a, 1); if (b->type == ppc_fpr_norm) ppc_fpu_quadro_mshl(b, 1); switch (PPC_FPR_TYPE2(a->type, b->type)) { case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { int diff = a->e - b->e; if (diff < 0) { diff = -diff; if (diff <= 107) { // FIXME: may set x_prime ppc_fpu_quadro_mshr(a, diff); } else if (a->m0 || a->m1) { a->m0 = 0; a->m1 = 1; } else { a->m0 = 0; a->m1 = 0; } res->e = b->e; } else { if (diff <= 107) { // FIXME: may set x_prime ppc_fpu_quadro_mshr(b, diff); } else if (b->m0 || b->m1) { b->m0 = 0; b->m1 = 1; } else { b->m0 = 0; b->m1 = 0; } res->e = a->e; } res->type = ppc_fpr_norm; if (a->s == b->s) { res->s = a->s; ppc_fpu_add_quadro_m(res, a, b); int X_prime = res->m1 & 1; if (res->m0 & (1ULL<<(107-64))) { ppc_fpu_quadro_mshr(res, 1); res->e++; } // res = [107] res->m1 = (res->m1 & 0xfffffffffffffffeULL) | X_prime; } else { res->s = a->s; int cmp; if (a->m0 < b->m0) { cmp = -1; } else if (a->m0 > b->m0) { cmp = +1; } else { if (a->m1 < b->m1) { cmp = -1; } else if (a->m1 > b->m1) { cmp = +1; } else { cmp = 0; } } if (!cmp) { if (FPSCR_RN(gCPU.fpscr) == FPSCR_RN_MINF) { res->s |= b->s; } else { res->s &= b->s; } res->type = ppc_fpr_zero; } else { if (cmp < 0) { ppc_fpu_sub_quadro_m(res, b, a); res->s = b->s; } else { ppc_fpu_sub_quadro_m(res, a, b); } diff = ppc_fpu_normalize_quadro(res) - (128-107); int X_prime = res->m1 & 1; res->m1 &= 0xfffffffffffffffeULL; ppc_fpu_quadro_mshl(res, diff); res->e -= diff; res->m1 |= X_prime; } // res = [107] } break; } case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): res->s = a->s; res->type = ppc_fpr_NaN; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): res->e = a->e; res->s = a->s; res->m0 = a->m0; res->m1 = a->m1; res->type = a->type; break; case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): res->e = b->e; res->s = b->s; res->m0 = b->m0; res->m1 = b->m1; res->type = b->type; break; case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): if (a->s != b->s) { // +oo + -oo == NaN res->s = a->s ^ b->s; res->type = ppc_fpr_NaN; break; } // fall-thru case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): res->s = a->s; res->type = a->type; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): res->s = b->s; res->type = b->type; break; case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): // round bla res->type = ppc_fpr_zero; res->s = a->s && b->s; break; }}inline void ppc_fpu_add_uint64_carry(uint64 *a, uint64 b, uint64 *carry){ *carry = (*a+b < *a) ? 1 : 0; *a += b;}// 'res' has 56 significant bits on return, a + b have 56 significant bits eachinline void ppc_fpu_mul(ppc_double *res, const ppc_double *a, const ppc_double *b){ res->s = a->s ^ b->s; switch (PPC_FPR_TYPE2(a->type, b->type)) { case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { res->type = ppc_fpr_norm; res->e = a->e + b->e;// printf("new exp: %d\n", res.e);// ht_printf("MUL:\na.m: %qb\nb.m: %qb\n", a.m, b.m); uint64 fH, fM1, fM2, fL; fL = (a->m & 0xffffffff) * (b->m & 0xffffffff); // [32] * [32] = [63,64] fM1 = (a->m >> 32) * (b->m & 0xffffffff); // [24] * [32] = [55,56] fM2 = (a->m & 0xffffffff) * (b->m >> 32); // [32] * [24] = [55,56] fH = (a->m >> 32) * (b->m >> 32); // [24] * [24] = [47,48]// ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL); // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL uint64 rL, rH; rL = fL; // rL = rH = [63,64] rH = fH; // rH = fH = [47,48] uint64 split; split = fM1 + fM2; uint64 carry; ppc_fpu_add_uint64_carry(&rL, (split & 0xffffffff) << 32, &carry); // rL = [63,64] rH += carry; // rH = [0 .. 2^48] rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set // res.m = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_55] // [---------------------------------------------------------] // bit = [63 62 .. 58 | 57 56 .. 9 | 8 7 0 ] // [---------------------------------------------------------] // [15 bits zero | 49 bits rH | 8 most sign.bits rL ] res->m = rH << 9; res->m |= rL >> (64-9); // res.m = [58]// ht_printf("fH: %qx fM1: %qx fM2: %qx fL: %qx\n", fH, fM1, fM2, fL); if (res->m & (1ULL << 57)) { res->m >>= 2; res->e += 2; } else if (res->m & (1ULL << 56)) { res->m >>= 1; res->e++; } // res.m = [56] break; } case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_NaN): res->type = a->type; res->e = a->e; break; case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_NaN, ppc_fpr_zero): res->s = a->s; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_norm): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_zero): res->type = a->type; break; case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_NaN): case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_NaN): res->s = b->s; // fall-thru case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_zero): res->type = b->type; break; case PPC_FPR_TYPE2(ppc_fpr_zero, ppc_fpr_Inf): case PPC_FPR_TYPE2(ppc_fpr_Inf, ppc_fpr_zero): res->type = ppc_fpr_NaN; break; }}// 'res' has 'prec' significant bits on return, a + b have 56 significant bits each// for 111 >= prec >= 64inline void ppc_fpu_mul_quadro(ppc_quadro *res, ppc_double *a, ppc_double *b, int prec){ res->s = a->s ^ b->s; switch (PPC_FPR_TYPE2(a->type, b->type)) { case PPC_FPR_TYPE2(ppc_fpr_norm, ppc_fpr_norm): { res->type = ppc_fpr_norm; res->e = a->e + b->e;// printf("new exp: %d\n", res.e);// ht_printf("MUL:\na.m: %016qx\nb.m: %016qx\n", a.m, b.m); uint64 fH, fM1, fM2, fL; fL = (a->m & 0xffffffff) * (b->m & 0xffffffff); // [32] * [32] = [63,64] fM1 = (a->m >> 32) * (b->m & 0xffffffff); // [24] * [32] = [55,56] fM2 = (a->m & 0xffffffff) * (b->m >> 32); // [32] * [24] = [55,56] fH = (a->m >> 32) * (b->m >> 32); // [24] * [24] = [47,48]// ht_printf("fH: %016qx fM1: %016qx fM2: %016qx fL: %016qx\n", fH, fM1, fM2, fL); // calulate fH * 2^64 + (fM1 + fM2) * 2^32 + fL uint64 rL, rH; rL = fL; // rL = rH = [63,64] rH = fH; // rH = fH = [47,48] uint64 split; split = fM1 + fM2; uint64 carry; ppc_fpu_add_uint64_carry(&rL, (split & 0xffffffff) << 32, &carry); // rL = [63,64] rH += carry; // rH = [0 .. 2^48] rH += split >> 32; // rH = [0:48], where 46, 47 or 48 set // res.m0 = [0 0 .. 0 | rH_48 rH_47 .. rH_0 | rL_63 rL_62 .. rL_0] // [-----------------------------------------------------------] // log.bit= [127 126 .. 113 | 112 64 | 63 62 0 ] // [-----------------------------------------------------------] // [ 15 bits zero | 49 bits rH | 64 bits rL ] res->m0 = rH; res->m1 = rL; // res.m0|res.m1 = [111,112,113]// ht_printf("res = %016qx%016qx\n", res.m0, res.m1); if (res->m0 & (1ULL << 48)) { ppc_fpu_quadro_mshr(res, 2+(111-prec)); res->e += 2; } else if (res->m0 & (1ULL << 47)) { ppc_fpu_quadro_mshr(res, 1+(111-prec)); res->e += 1; } else { ppc_fpu_quadro_mshr(res, 111-prec); } // res.m0|res.m1 = [prec] break; }
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -