?? hb_64.c
字號:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:06:39 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h *//* * This function contains 1038 FP additions, 644 FP multiplications, * (or, 520 additions, 126 multiplications, 518 fused multiply/add), * 231 stack variables, 15 constants, and 256 memory accesses */#include "hb.h"static void hb_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(rs)) { E Tcx, Tcw, Tcv; { E Thy, Tv, T7n, T5B, TfP, Tey, Tkl, TjB, T6U, T2k, T7o, T2H, TiH, Tia, Tk8; E Tj8, T6V, T5E, Tbz, T9N, Tb7, T9Q, Tgh, Tev, Tb6, T8G, TbA, T8N, TfO, TcU; E Tgi, Td5, Ti3, T10, TjC, Tje, TiI, ThF, TeA, Tds, TjD, Tjb, TeB, Tdh, Tgl; E TfT, Tgk, TfW, T6Z, T7r, T5H, T39, Tbb, TbC, T9S, T8V, T72, T7q, T5G, T3A; E Tbe, TbD, T9T, T92, ThH, T1w, Tke, Tjq, Tkf, Tjt, TiK, ThO, Tgb, TgT, Tfc; E Tec, Tg8, TgU, Tfd, Tel, T77, T83, T6i, T5a, T7a, T82, T6j, T5n, Tbj, Tcc; E Tas, T9f, Tbm, Tcb, Tar, T9m, ThQ, T21, Tkb, Tjj, Tkc, Tjm, TiL, ThX, Tg4; E TgW, Tf9, TdL, Tg1, TgX, Tfa, TdU, T7e, T80, T6f, T4h, T9q, Tbr, T7h, T7Z; E T6g, T4u, T9D, T9C, Tbo, T9B, Tbp, T9x; { E T3v, T8Z, T8W, T90, T8X, T3y, T3q, T70; { E TcQ, TcT, Td4, TcZ; { E T24, T5t, T7, T27, T5w, Ti4, Tet, T2i, T5z, Te, Teu, Ti5, T5y, T2d, T8H; E T2u, Td0, Tm, Ti7, Td3, T8I, T2p, Tq, T2w, Tp, TcV, T2E, Tr, T2x, T2y; E Tes, Ter; { E T1, T2, T4, T5, T5u, T5v; T1 = cr[0]; T2 = ci[WS(rs, 31)]; T4 = cr[WS(rs, 16)]; T5 = ci[WS(rs, 15)]; { E T25, T3, T6, T26; T25 = ci[WS(rs, 47)]; T24 = T1 - T2; T3 = T1 + T2; T5t = T4 - T5; T6 = T4 + T5; T26 = cr[WS(rs, 48)]; T5u = ci[WS(rs, 63)]; T5v = cr[WS(rs, 32)]; TcQ = T3 - T6; T7 = T3 + T6; Tes = T25 - T26; T27 = T25 + T26; } Ter = T5u - T5v; T5w = T5u + T5v; } { E Ta, T29, Tb, TcR, T2h, Tc, T2a, T2b; { E T2f, T2g, T8, T9; T8 = cr[WS(rs, 8)]; T9 = ci[WS(rs, 23)]; Ti4 = Ter + Tes; Tet = Ter - Tes; T2f = ci[WS(rs, 39)]; T2g = cr[WS(rs, 56)]; Ta = T8 + T9; T29 = T8 - T9; Tb = ci[WS(rs, 7)]; TcR = T2f - T2g; T2h = T2f + T2g; Tc = cr[WS(rs, 24)]; T2a = ci[WS(rs, 55)]; T2b = cr[WS(rs, 40)]; } { E Tj, T2l, Ti, Td1, T2t, Tk, T2m, T2n; { E Tg, Th, T2r, T2s; Tg = cr[WS(rs, 4)]; { E T2e, Td, TcS, T2c; T2e = Tb - Tc; Td = Tb + Tc; TcS = T2a - T2b; T2c = T2a + T2b; T2i = T2e - T2h; T5z = T2e + T2h; Te = Ta + Td; Teu = Ta - Td; TcT = TcR - TcS; Ti5 = TcS + TcR; T5y = T29 + T2c; T2d = T29 - T2c; Th = ci[WS(rs, 27)]; } T2r = ci[WS(rs, 59)]; T2s = cr[WS(rs, 36)]; Tj = cr[WS(rs, 20)]; T2l = Tg - Th; Ti = Tg + Th; Td1 = T2r - T2s; T2t = T2r + T2s; Tk = ci[WS(rs, 11)]; T2m = ci[WS(rs, 43)]; T2n = cr[WS(rs, 52)]; } { E Tn, To, T2C, T2D; Tn = ci[WS(rs, 3)]; { E T2q, Tl, Td2, T2o; T2q = Tj - Tk; Tl = Tj + Tk; Td2 = T2m - T2n; T2o = T2m + T2n; T8H = T2t - T2q; T2u = T2q + T2t; Td0 = Ti - Tl; Tm = Ti + Tl; Ti7 = Td1 + Td2; Td3 = Td1 - Td2; T8I = T2l + T2o; T2p = T2l - T2o; To = cr[WS(rs, 28)]; } T2C = ci[WS(rs, 35)]; T2D = cr[WS(rs, 60)]; Tq = cr[WS(rs, 12)]; T2w = Tn - To; Tp = Tn + To; TcV = T2C - T2D; T2E = T2C + T2D; Tr = ci[WS(rs, 19)]; T2x = ci[WS(rs, 51)]; T2y = cr[WS(rs, 44)]; } } } { E Tj6, T8K, T8L, T9L, T8F, Ti6, T8E, T9M, T5C, T5D, Ti9, Tj7; { E T2F, Ti8, T2A, TjA, Tew, Tex, Tjz; { E Tf, TcY, TcX, Tu, T5x, T5A; Tj6 = T7 - Te; Tf = T7 + Te; { E T2B, Ts, TcW, T2z, Tt; T2B = Tq - Tr; Ts = Tq + Tr; TcW = T2x - T2y; T2z = T2x + T2y; T8K = T2B + T2E; T2F = T2B - T2E; TcY = Tp - Ts; Tt = Tp + Ts; TcX = TcV - TcW; Ti8 = TcV + TcW; T8L = T2w + T2z; T2A = T2w - T2z; Tu = Tm + Tt; TjA = Tm - Tt; } T9L = T5w - T5t; T5x = T5t + T5w; T5A = T5y - T5z; T8F = T5y + T5z; Td4 = Td0 + Td3; Tew = Td0 - Td3; Thy = Tf - Tu; Tv = Tf + Tu; T7n = FNMS(KP707106781, T5A, T5x); T5B = FMA(KP707106781, T5A, T5x); Tex = TcY + TcX; TcZ = TcX - TcY; Ti6 = Ti4 + Ti5; Tjz = Ti4 - Ti5; } { E T28, T2j, T2v, T2G; T8E = T24 + T27; T28 = T24 - T27; TfP = Tew + Tex; Tey = Tew - Tex; Tkl = TjA + Tjz; TjB = Tjz - TjA; T2j = T2d + T2i; T9M = T2d - T2i; T5C = FMA(KP414213562, T2p, T2u); T2v = FNMS(KP414213562, T2u, T2p); T2G = FMA(KP414213562, T2F, T2A); T5D = FNMS(KP414213562, T2A, T2F); T6U = FNMS(KP707106781, T2j, T28); T2k = FMA(KP707106781, T2j, T28); T7o = T2v - T2G; T2H = T2v + T2G; Ti9 = Ti7 + Ti8; Tj7 = Ti8 - Ti7; } } { E T8J, T9O, T9P, T8M; TiH = Ti6 + Ti9; Tia = Ti6 - Ti9; Tk8 = Tj6 + Tj7; Tj8 = Tj6 - Tj7; T8J = FNMS(KP414213562, T8I, T8H); T9O = FMA(KP414213562, T8H, T8I); T6V = T5D - T5C; T5E = T5C + T5D; Tbz = FNMS(KP707106781, T9M, T9L); T9N = FMA(KP707106781, T9M, T9L); T9P = FMA(KP414213562, T8K, T8L); T8M = FNMS(KP414213562, T8L, T8K); Tb7 = T9O + T9P; T9Q = T9O - T9P; Tgh = Teu + Tet; Tev = Tet - Teu; Tb6 = FMA(KP707106781, T8F, T8E); T8G = FNMS(KP707106781, T8F, T8E); TbA = T8M - T8J; T8N = T8J + T8M; } } } { E T8S, TC, Tdn, Tdk, ThC, T3e, T8P, T36, T2X, Tda, TY, ThA, Tdf, T35, T2S; E T3x, T3o, Tdl, TJ, ThD, Tdq, T3w, T3j, T34, TR, Tdc, Td9, Thz, T2N; { E TV, T2O, TU, Tdd, T2W, TW, T2P, T2Q; { E Tz, T3r, Ty, Tdj, T3u, TA, T3b, T3c; { E Tw, Tx, T3s, T3t; Tw = cr[WS(rs, 2)]; TfO = TcQ + TcT; TcU = TcQ - TcT; Tgi = Td4 + TcZ; Td5 = TcZ - Td4; Tx = ci[WS(rs, 29)]; T3s = ci[WS(rs, 45)]; T3t = cr[WS(rs, 50)]; Tz = cr[WS(rs, 18)]; T3r = Tw - Tx; Ty = Tw + Tx; Tdj = T3s - T3t; T3u = T3s + T3t; TA = ci[WS(rs, 13)]; T3b = ci[WS(rs, 61)]; T3c = cr[WS(rs, 34)]; } { E T3a, TB, Tdi, T3d; T8S = T3r + T3u; T3v = T3r - T3u; T3a = Tz - TA; TB = Tz + TA; Tdi = T3b - T3c; T3d = T3b + T3c; TC = Ty + TB; Tdn = Ty - TB; Tdk = Tdi - Tdj; ThC = Tdi + Tdj; T3e = T3a + T3d; T8P = T3d - T3a; } } { E TS, TT, T2U, T2V; TS = cr[WS(rs, 6)]; TT = ci[WS(rs, 25)]; T2U = ci[WS(rs, 41)]; T2V = cr[WS(rs, 54)]; TV = ci[WS(rs, 9)]; T2O = TS - TT; TU = TS + TT; Tdd = T2U - T2V; T2W = T2U + T2V; TW = cr[WS(rs, 22)]; T2P = ci[WS(rs, 57)]; T2Q = cr[WS(rs, 38)]; } { E TG, T3f, TF, Tdo, T3n, TH, T3g, T3h; { E TD, TE, T3l, T3m; TD = cr[WS(rs, 10)]; { E T2T, TX, Tde, T2R; T2T = TV - TW; TX = TV + TW; Tde = T2P - T2Q; T2R = T2P + T2Q; T36 = T2T - T2W; T2X = T2T + T2W; Tda = TU - TX; TY = TU + TX; ThA = Tde + Tdd; Tdf = Tdd - Tde; T35 = T2O - T2R; T2S = T2O + T2R; TE = ci[WS(rs, 21)]; } T3l = ci[WS(rs, 37)]; T3m = cr[WS(rs, 58)]; TG = ci[WS(rs, 5)]; T3f = TD - TE; TF = TD + TE; Tdo = T3l - T3m; T3n = T3l + T3m; TH = cr[WS(rs, 26)]; T3g = ci[WS(rs, 53)]; T3h = cr[WS(rs, 42)]; } { E TO, T30, TN, Td8, T33, TP, T2K, T2L; { E TL, TM, T31, T32; TL = ci[WS(rs, 1)]; { E T3k, TI, Tdp, T3i; T3k = TG - TH; TI = TG + TH; Tdp = T3g - T3h; T3i = T3g + T3h; T3x = T3k - T3n; T3o = T3k + T3n; Tdl = TF - TI; TJ = TF + TI; ThD = Tdp + Tdo; Tdq = Tdo - Tdp; T3w = T3f - T3i; T3j = T3f + T3i; TM = cr[WS(rs, 30)]; } T31 = ci[WS(rs, 49)]; T32 = cr[WS(rs, 46)]; TO = cr[WS(rs, 14)]; T30 = TL - TM; TN = TL + TM; Td8 = T31 - T32; T33 = T31 + T32; TP = ci[WS(rs, 17)]; T2K = ci[WS(rs, 33)]; T2L = cr[WS(rs, 62)]; } { E T2J, TQ, Td7, T2M; T8Z = T30 + T33; T34 = T30 - T33; T2J = TO - TP; TQ = TO + TP; Td7 = T2K - T2L; T2M = T2K + T2L; TR = TN + TQ; Tdc = TN - TQ; Td9 = Td7 - Td8; Thz = Td7 + Td8; T2N = T2J - T2M; T8W = T2J + T2M; } } } } { E Tja, Tj9, TfU, TfV, TfR, Tdb, Tdg, TfS; { E ThE, ThB, Tdm, Tdr; { E Tjc, TK, TZ, Tjd; Tjc = TC - TJ; TK = TC + TJ; TZ = TR + TY; Tja = TR - TY; Tjd = ThC - ThD; ThE = ThC + ThD; Tj9 = Thz - ThA; ThB = Thz + ThA; Ti3 = TK - TZ; T10 = TK + TZ; TjC = Tjc - Tjd; Tje = Tjc + Tjd; } TfU = Tdl + Tdk; Tdm = Tdk - Tdl; Tdr = Tdn - Tdq; TfV = Tdn + Tdq; TiI = ThE + ThB; ThF = ThB - ThE; TeA = FMA(KP414213562, Tdm, Tdr); Tds = FNMS(KP414213562, Tdr, Tdm); TfR = Tda + Td9; Tdb = Td9 - Tda; Tdg = Tdc - Tdf; TfS = Tdc + Tdf; } { E T2Z, T6X, T37, T2Y;
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -