?? hc2cbdft2_32.c
字號:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:12:25 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft2_32 -include hc2cb.h *//* * This function contains 498 FP additions, 260 FP multiplications, * (or, 300 additions, 62 multiplications, 198 fused multiply/add), * 165 stack variables, 7 constants, and 128 memory accesses */#include "hc2cb.h"static void hc2cbdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT m; for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) { E T8e, T8h, T7S, T8l, T8f, T84, T8c, T8k, T8g, T86, T82, T8m, T8i; { E T4B, T3h, T3K, Tv, T8Y, T6T, T8L, T7i, T8X, T7f, T4Y, T1G, T4K, T1j, T4X; E T2M, T8C, T6d, T8o, T66, T8K, T6M, T4L, T2P, T4C, T3o, T5q, T4q, T8p, T6C; E T8B, T6z, T72, T2u, T75, T10, T3P, T3a, T3L, T4t, T4E, T8F, T8t, T4F, T4w; E T8E, T8w, T6E, T6l, T6F, T6s, T76, T4P, T51, T2R, T28, T8P, T90, T7k, T71; E T2p, T4R, T2x, T73, T6x, T6y; { E T3l, T16, T3m, T2H, T2E, T13, T64, T7, T3i, T2J, T1c, T3j, T1h, T2K, Te; E T1z, T6R, T6a, Tt, T3g, T6b, T1E, T6Q, Tj, T1p, Ti, T3b, T1n, Tk, T1q; E T1r; { E T1, T2, T4, T5; { E T14, T15, T2F, T2G; T14 = Ip[0]; T15 = Im[WS(rs, 15)]; T2F = Ip[WS(rs, 8)]; T2G = Im[WS(rs, 7)]; T1 = Rp[0]; T3l = T14 - T15; T16 = T14 + T15; T3m = T2F - T2G; T2H = T2F + T2G; T2 = Rm[WS(rs, 15)]; T4 = Rp[WS(rs, 8)]; T5 = Rm[WS(rs, 7)]; } { E T1b, T1e, T18, Ta, T1f, Tb, Tc, T8, T9, T1g, T1d, Td; { E T19, T3, T6, T1a; T19 = Ip[WS(rs, 4)]; T2E = T1 - T2; T3 = T1 + T2; T13 = T4 - T5; T6 = T4 + T5; T1a = Im[WS(rs, 11)]; T8 = Rp[WS(rs, 4)]; T9 = Rm[WS(rs, 11)]; T64 = T3 - T6; T7 = T3 + T6; T1b = T19 + T1a; T3i = T19 - T1a; } T1e = Im[WS(rs, 3)]; T18 = T8 - T9; Ta = T8 + T9; T1f = Ip[WS(rs, 12)]; Tb = Rm[WS(rs, 3)]; Tc = Rp[WS(rs, 12)]; T2J = T18 - T1b; T1c = T18 + T1b; T1g = T1e + T1f; T3j = T1f - T1e; T1d = Tb - Tc; Td = Tb + Tc; T1h = T1d + T1g; T2K = T1d - T1g; T6x = Ta - Td; Te = Ta + Td; } { E Tq, T1A, Tp, T3e, T1y, Tr, T1B, T1C; { E Tn, To, T1w, T1x; Tn = Rm[WS(rs, 1)]; To = Rp[WS(rs, 14)]; T1w = Im[WS(rs, 1)]; T1x = Ip[WS(rs, 14)]; Tq = Rp[WS(rs, 6)]; T1A = Tn - To; Tp = Tn + To; T3e = T1x - T1w; T1y = T1w + T1x; Tr = Rm[WS(rs, 9)]; T1B = Ip[WS(rs, 6)]; T1C = Im[WS(rs, 9)]; } { E Tg, Th, T1l, T1m; Tg = Rp[WS(rs, 2)]; { E T1v, Ts, T3f, T1D; T1v = Tq - Tr; Ts = Tq + Tr; T3f = T1B - T1C; T1D = T1B + T1C; T1z = T1v - T1y; T6R = T1v + T1y; T6a = Tp - Ts; Tt = Tp + Ts; T3g = T3e + T3f; T6b = T3e - T3f; T1E = T1A - T1D; T6Q = T1A + T1D; Th = Rm[WS(rs, 13)]; } T1l = Ip[WS(rs, 2)]; T1m = Im[WS(rs, 13)]; Tj = Rp[WS(rs, 10)]; T1p = Tg - Th; Ti = Tg + Th; T3b = T1l - T1m; T1n = T1l + T1m; Tk = Rm[WS(rs, 5)]; T1q = Ip[WS(rs, 10)]; T1r = Im[WS(rs, 5)]; } } } { E T4o, T67, T68, T4p, T2I, T1i, T2N, T1u, T1F, T2O, T6K, T17; { E Tf, T1o, T1t, Tu, T7g, T6P, T6S, T7h, T7d, T7e; { E T6O, T6N, T1k, Tl; T4o = T7 - Te; Tf = T7 + Te; T1k = Tj - Tk; Tl = Tj + Tk; { E T3c, T1s, Tm, T3d; T3c = T1q - T1r; T1s = T1q + T1r; T1o = T1k + T1n; T6O = T1n - T1k; T67 = Ti - Tl; Tm = Ti + Tl; T3d = T3b + T3c; T68 = T3b - T3c; T1t = T1p - T1s; T6N = T1p + T1s; T4B = Tm - Tt; Tu = Tm + Tt; T4p = T3g - T3d; T3h = T3d + T3g; } T7g = FNMS(KP414213562, T6N, T6O); T6P = FMA(KP414213562, T6O, T6N); T6S = FMA(KP414213562, T6R, T6Q); T7h = FNMS(KP414213562, T6Q, T6R); } T3K = Tf - Tu; Tv = Tf + Tu; T8Y = T6P + T6S; T6T = T6P - T6S; T2I = T2E - T2H; T7d = T2E + T2H; T7e = T1c + T1h; T1i = T1c - T1h; T2N = FNMS(KP414213562, T1o, T1t); T1u = FMA(KP414213562, T1t, T1o); T8L = T7h - T7g; T7i = T7g + T7h; T8X = FMA(KP707106781, T7e, T7d); T7f = FNMS(KP707106781, T7e, T7d); T1F = FNMS(KP414213562, T1E, T1z); T2O = FMA(KP414213562, T1z, T1E); T6K = T16 - T13; T17 = T13 + T16; } { E T6L, T6A, T6B, T65, T3k, T2L, T69, T6c, T3n; T4Y = T1F - T1u; T1G = T1u + T1F; T4K = FNMS(KP707106781, T1i, T17); T1j = FMA(KP707106781, T1i, T17); T2L = T2J + T2K; T6L = T2J - T2K; T6A = T67 + T68; T69 = T67 - T68; T6c = T6a + T6b; T6B = T6b - T6a; T4X = FNMS(KP707106781, T2L, T2I); T2M = FMA(KP707106781, T2L, T2I); T8C = T69 - T6c; T6d = T69 + T6c; T65 = T3j - T3i; T3k = T3i + T3j; T8o = T64 - T65; T66 = T64 + T65; T8K = FNMS(KP707106781, T6L, T6K); T6M = FMA(KP707106781, T6L, T6K); T3n = T3l + T3m; T6y = T3l - T3m; T4L = T2N - T2O; T2P = T2N + T2O; T4C = T3n - T3k; T3o = T3k + T3n; T5q = T4o - T4p; T4q = T4o + T4p; T8p = T6B - T6A; T6C = T6A + T6B; } } } { E T1M, T6V, T6f, TC, T31, T6j, T23, T6Y, T2v, T2i, TY, T6p, T6n, T35, T2n; E T2w, T24, T1R, TJ, T6i, T6g, T2Y, T1W, T25, T2q, TN, T2r, T36, T2c, T29; E TQ, T2s; { E TU, T2k, T33, T2j, TX, T2l, T2m, T34; { E T1Z, Ty, T20, T2Z, T1L, T1I, TB, T21, T2e, T2h; { E T1J, T1K, Tw, Tx, Tz, TA; Tw = Rp[WS(rs, 1)]; Tx = Rm[WS(rs, 14)]; T1J = Ip[WS(rs, 1)]; T8B = T6y - T6x; T6z = T6x + T6y; T1Z = Tw - Tx; Ty = Tw + Tx; T1K = Im[WS(rs, 14)]; Tz = Rp[WS(rs, 9)]; TA = Rm[WS(rs, 6)]; T20 = Ip[WS(rs, 9)]; T2Z = T1J - T1K; T1L = T1J + T1K; T1I = Tz - TA; TB = Tz + TA; T21 = Im[WS(rs, 6)]; } { E T2f, T2g, TV, TW; { E TS, T30, T22, TT; TS = Rp[WS(rs, 3)]; T1M = T1I + T1L; T6V = T1L - T1I; T6f = Ty - TB; TC = Ty + TB; T30 = T20 - T21; T22 = T20 + T21; TT = Rm[WS(rs, 12)]; T2f = Ip[WS(rs, 3)]; T31 = T2Z + T30; T6j = T2Z - T30; T23 = T1Z - T22; T6Y = T1Z + T22; T2e = TS - TT; TU = TS + TT; T2g = Im[WS(rs, 12)]; } TV = Rm[WS(rs, 4)]; TW = Rp[WS(rs, 11)]; T2k = Im[WS(rs, 4)]; T33 = T2f - T2g; T2h = T2f + T2g; T2j = TV - TW; TX = TV + TW; T2l = Ip[WS(rs, 11)]; } T2v = T2e - T2h; T2i = T2e + T2h; } TY = TU + TX; T6p = TU - TX; T2m = T2k + T2l; T34 = T2l - T2k; { E TF, T1T, T2W, T1S, TI, T1U, T1N, T1Q, T1V, T2X; { E T1O, T1P, TD, TE, TG, TH; TD = Rp[WS(rs, 5)]; TE = Rm[WS(rs, 10)]; T6n = T34 - T33; T35 = T33 + T34; T2n = T2j + T2m; T2w = T2j - T2m; T1N = TD - TE; TF = TD + TE; T1O = Ip[WS(rs, 5)]; T1P = Im[WS(rs, 10)]; TG = Rm[WS(rs, 2)]; TH = Rp[WS(rs, 13)]; T1T = Im[WS(rs, 2)]; T2W = T1O - T1P; T1Q = T1O + T1P; T1S = TG - TH; TI = TG + TH; T1U = Ip[WS(rs, 13)]; } T24 = T1N - T1Q; T1R = T1N + T1Q; TJ = TF + TI; T6i = TF - TI; T1V = T1T + T1U; T2X = T1U - T1T; { E T2a, T2b, TL, TM, TO, TP; TL = Rm[0]; TM = Rp[WS(rs, 15)]; T6g = T2X - T2W; T2Y = T2W + T2X; T1W = T1S + T1V; T25 = T1S - T1V; T2q = TL - TM; TN = TL + TM; T2a = Im[0]; T2b = Ip[WS(rs, 15)]; TO = Rp[WS(rs, 7)]; TP = Rm[WS(rs, 8)]; T2r = Ip[WS(rs, 7)]; T36 = T2b - T2a; T2c = T2a + T2b; T29 = TO - TP; TQ = TO + TP; T2s = Im[WS(rs, 8)]; } } } { E T2d, T4u, T4v, T6r, T6o, T6k, T8u, T8v, T6h; { E T4r, T6m, T32, T4s, T6q, T39, T8r, T8s; { E TK, TR, T37, T2t, TZ, T38; T4r = TC - TJ; TK = TC + TJ; T2d = T29 - T2c; T72 = T29 + T2c; T6m = TN - TQ; TR = TN + TQ; T37 = T2r - T2s; T2t = T2r + T2s; T32 = T2Y + T31; T4s = T31 - T2Y; T4u = TR - TY; TZ = TR + TY; T38 = T36 + T37; T6q = T36 - T37; T2u = T2q - T2t; T75 = T2q + T2t; T10 = TK + TZ; T3P = TK - TZ; T4v = T38 - T35; T39 = T35 + T38; } T8r = T6q - T6p; T6r = T6p + T6q; T3a = T32 + T39; T3L = T39 - T32; T8s = T6m - T6n; T6o = T6m + T6n; T4t = T4r - T4s; T4E = T4r + T4s; T8F = FNMS(KP414213562, T8r, T8s); T8t = FMA(KP414213562, T8s, T8r); T6k = T6i + T6j; T8u = T6j - T6i; T8v = T6f - T6g; T6h = T6f + T6g; } { E T6Z, T1Y, T4O, T26, T6W, T1X, T2o, T4N, T27; T4F = T4v - T4u; T4w = T4u + T4v; T8E = FMA(KP414213562, T8u, T8v); T8w = FNMS(KP414213562, T8v, T8u); T6Z = T1R + T1W; T1X = T1R - T1W; T6E = FMA(KP414213562, T6h, T6k); T6l = FNMS(KP414213562, T6k, T6h); T6F = FNMS(KP414213562, T6o, T6r); T6s = FMA(KP414213562, T6r, T6o); T1Y = FMA(KP707106781, T1X, T1M); T4O = FNMS(KP707106781, T1X, T1M); T26 = T24 + T25; T6W = T25 - T24; T76 = T2i + T2n; T2o = T2i - T2n; T4N = FNMS(KP707106781, T26, T23); T27 = FMA(KP707106781, T26, T23); { E T8O, T6X, T8N, T70; T8O = FMA(KP707106781, T6W, T6V); T6X = FNMS(KP707106781, T6W, T6V); T8N = FMA(KP707106781, T6Z, T6Y); T70 = FNMS(KP707106781, T6Z, T6Y); T4P = FMA(KP668178637, T4O, T4N); T51 = FNMS(KP668178637, T4N, T4O); T2R = FNMS(KP198912367, T1Y, T27); T28 = FMA(KP198912367, T27, T1Y); T8P = FMA(KP198912367, T8O, T8N); T90 = FNMS(KP198912367, T8N, T8O); T7k = FNMS(KP668178637, T6X, T70); T71 = FMA(KP668178637, T70, T6X); T2p = FMA(KP707106781, T2o, T2d); T4R = FNMS(KP707106781, T2o, T2d); } T2x = T2v + T2w; T73 = T2v - T2w; } } } { E T8S, T91, T7l, T78, T5U, T5X, T5y, T61, T5V, T5K, T5S, T60, T5W, T5M, T5I; { E T4S, T50, T4e, T4h, T3S, T4l, T4f, T44, T4c, T4k, T4g, T46, T42; { E T3Q, T3U, T40, T3Z, T3V, T3A, T3D, T3H, T3B, T3y, T3G, T3C; { E T11, T3t, T3w, T3q, T3x, T3v, T3F, T12, T2B, T2U, T3z, T2C; { E T3u, T2S, T2z, T3p, T4Q, T2y; T3u = Tv - T10; T11 = Tv + T10; T4Q = FNMS(KP707106781, T2x, T2u); T2y = FMA(KP707106781, T2x, T2u); { E T8R, T74, T8Q, T77; T8R = FMA(KP707106781, T73, T72); T74 = FNMS(KP707106781, T73, T72); T8Q = FMA(KP707106781, T76, T75); T77 = FNMS(KP707106781, T76, T75); T4S = FNMS(KP668178637, T4R, T4Q); T50 = FMA(KP668178637, T4Q, T4R); T2S = FMA(KP198912367, T2p, T2y); T2z = FNMS(KP198912367, T2y, T2p); T8S = FMA(KP198912367, T8R, T8Q); T91 = FNMS(KP198912367, T8Q, T8R); T7l = FNMS(KP668178637, T74, T77); T78 = FMA(KP668178637, T77, T74);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -