?? hc2cb_32.c
字號:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:10:49 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2c -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cb_32 -include hc2cb.h *//* * This function contains 434 FP additions, 260 FP multiplications, * (or, 236 additions, 62 multiplications, 198 fused multiply/add), * 137 stack variables, 7 constants, and 128 memory accesses */#include "hc2cb.h"static void hc2cb_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) { E T5o, T5r, T5q, T5n, T5s, T5p; { E T5K, Tf, T8k, T7k, T8x, T7N, T3i, T1i, T3v, T2L, T5f, T4v, T6T, T6m, T52; E T42, TZ, T6X, T3p, T1X, T8B, T8p, T3o, T26, T58, T4n, T7T, T7z, T59, T4k; E T6p, T6a, TK, T6W, T8s, T8A, T2o, T3m, T3l, T2x, T55, T4g, T7S, T7G, T56; E T4d, T6o, T61, T5Q, T5N, T6f, Tu, T8y, T7r, T8l, T7Q, T3w, T1F, T45, T48; E T3j, T2O, T53, T4y, T62, T69; { E T6l, T6i, T40, T41; { E T12, T3, T6g, T2G, T2D, T6, T6h, T15, Td, T6k, T1g, T2J, Ta, T17, T1a; E T6j; { E T4, T5, T13, T14; { E T1, T2, T2E, T2F; T1 = Rp[0]; T2 = Rm[WS(rs, 15)]; T2E = Ip[0]; T2F = Im[WS(rs, 15)]; T4 = Rp[WS(rs, 8)]; T12 = T1 - T2; T3 = T1 + T2; T6g = T2E - T2F; T2G = T2E + T2F; T5 = Rm[WS(rs, 7)]; } T13 = Ip[WS(rs, 8)]; T14 = Im[WS(rs, 7)]; { E Tb, Tc, T1d, T1e; Tb = Rm[WS(rs, 3)]; T2D = T4 - T5; T6 = T4 + T5; T6h = T13 - T14; T15 = T13 + T14; Tc = Rp[WS(rs, 12)]; T1d = Ip[WS(rs, 12)]; T1e = Im[WS(rs, 3)]; { E T8, T1c, T1f, T9, T18, T19; T8 = Rp[WS(rs, 4)]; Td = Tb + Tc; T1c = Tb - Tc; T6k = T1d - T1e; T1f = T1d + T1e; T9 = Rm[WS(rs, 11)]; T18 = Ip[WS(rs, 4)]; T19 = Im[WS(rs, 11)]; T1g = T1c - T1f; T2J = T1c + T1f; Ta = T8 + T9; T17 = T8 - T9; T1a = T18 + T19; T6j = T18 - T19; } } } { E T2I, T7M, T7L, T16, T1h, T4u, T4t, T2H, T2K; { E T7i, T7, T1b, Te, T7j; T7i = T3 - T6; T7 = T3 + T6; T2I = T17 + T1a; T1b = T17 - T1a; Te = Ta + Td; T7M = Ta - Td; T7j = T6k - T6j; T6l = T6j + T6k; T6i = T6g + T6h; T7L = T6g - T6h; T5K = T7 - Te; Tf = T7 + Te; T8k = T7i + T7j; T7k = T7i - T7j; T40 = T12 + T15; T16 = T12 - T15; T1h = T1b + T1g; T4u = T1b - T1g; } T4t = T2G - T2D; T2H = T2D + T2G; T8x = T7M + T7L; T7N = T7L - T7M; T3i = FMA(KP707106781, T1h, T16); T1i = FNMS(KP707106781, T1h, T16); T2K = T2I - T2J; T41 = T2I + T2J; T3v = FMA(KP707106781, T2K, T2H); T2L = FNMS(KP707106781, T2K, T2H); T5f = FNMS(KP707106781, T4u, T4t); T4v = FMA(KP707106781, T4u, T4t); } } { E T1Y, T1H, TR, T7w, T1K, T21, T65, T7t, TU, T66, T23, T1Q, T1R, TX, T67; E T1U, TY, T7u; { E TL, TM, TO, TP, T63, T64; TL = Rm[0]; T6T = T6i + T6l; T6m = T6i - T6l; T52 = FMA(KP707106781, T41, T40); T42 = FNMS(KP707106781, T41, T40); TM = Rp[WS(rs, 15)]; TO = Rp[WS(rs, 7)]; TP = Rm[WS(rs, 8)]; { E T1I, TN, TQ, T1J, T1Z, T20; T1I = Ip[WS(rs, 15)]; T1Y = TL - TM; TN = TL + TM; T1H = TO - TP; TQ = TO + TP; T1J = Im[0]; T1Z = Ip[WS(rs, 7)]; T20 = Im[WS(rs, 8)]; TR = TN + TQ; T7w = TN - TQ; T1K = T1I + T1J; T63 = T1I - T1J; T64 = T1Z - T20; T21 = T1Z + T20; } { E TV, T1M, T1P, TW, T1S, T1T; { E TS, TT, T1N, T1O; TS = Rp[WS(rs, 3)]; T65 = T63 + T64; T7t = T63 - T64; TT = Rm[WS(rs, 12)]; T1N = Ip[WS(rs, 3)]; T1O = Im[WS(rs, 12)]; TV = Rm[WS(rs, 4)]; T1M = TS - TT; TU = TS + TT; T66 = T1N - T1O; T1P = T1N + T1O; TW = Rp[WS(rs, 11)]; T1S = Ip[WS(rs, 11)]; T1T = Im[WS(rs, 4)]; } T23 = T1M - T1P; T1Q = T1M + T1P; T1R = TV - TW; TX = TV + TW; T67 = T1S - T1T; T1U = T1S + T1T; } } TY = TU + TX; T7u = TU - TX; { E T7x, T68, T1V, T24; T7x = T67 - T66; T68 = T66 + T67; T1V = T1R + T1U; T24 = T1R - T1U; { E T4l, T1L, T1W, T4j, T7v, T8n, T8o, T7y; T62 = TR - TY; TZ = TR + TY; T6X = T65 + T68; T69 = T65 - T68; T4l = T1H + T1K; T1L = T1H - T1K; T1W = T1Q - T1V; T4j = T1Q + T1V; T7v = T7t - T7u; T8n = T7u + T7t; T8o = T7w + T7x; T7y = T7w - T7x; { E T4i, T22, T25, T4m; T4i = T1Y + T21; T22 = T1Y - T21; T3p = FMA(KP707106781, T1W, T1L); T1X = FNMS(KP707106781, T1W, T1L); T8B = FMA(KP414213562, T8n, T8o); T8p = FNMS(KP414213562, T8o, T8n); T25 = T23 + T24; T4m = T23 - T24; T3o = FMA(KP707106781, T25, T22); T26 = FNMS(KP707106781, T25, T22); T58 = FMA(KP707106781, T4m, T4l); T4n = FNMS(KP707106781, T4m, T4l); T7T = FNMS(KP414213562, T7v, T7y); T7z = FMA(KP414213562, T7y, T7v); T59 = FMA(KP707106781, T4j, T4i); T4k = FNMS(KP707106781, T4j, T4i); } } } } } { E T5T, T60, T4c, T4b; { E T2p, T28, T2b, T7D, TC, T2s, T7A, T5W, TF, T2j, T5X, T2i, TI, T2k, T2u; E T2h; { E Tz, Ty, TA, Tw, Tx; Tw = Rp[WS(rs, 1)]; Tx = Rm[WS(rs, 14)]; Tz = Rp[WS(rs, 9)]; T6p = T69 - T62; T6a = T62 + T69; Ty = Tw + Tx; T2p = Tw - Tx; TA = Rm[WS(rs, 6)]; { E T5U, T5V, T2d, T2g; { E T2q, T2r, T29, T2a, TB; T29 = Ip[WS(rs, 1)]; T2a = Im[WS(rs, 14)]; TB = Tz + TA; T28 = Tz - TA; T2q = Ip[WS(rs, 9)]; T5U = T29 - T2a; T2b = T29 + T2a; T2r = Im[WS(rs, 6)]; T7D = Ty - TB; TC = Ty + TB; T2s = T2q + T2r; T5V = T2q - T2r; } { E T2e, T2f, TD, TE, TG, TH; TD = Rp[WS(rs, 5)]; TE = Rm[WS(rs, 10)]; T7A = T5U - T5V; T5W = T5U + T5V; T2e = Ip[WS(rs, 5)]; T2d = TD - TE; TF = TD + TE; T2f = Im[WS(rs, 10)]; TG = Rm[WS(rs, 2)]; TH = Rp[WS(rs, 13)]; T2j = Ip[WS(rs, 13)]; T5X = T2e - T2f; T2g = T2e + T2f; T2i = TG - TH; TI = TG + TH; T2k = Im[WS(rs, 2)]; } T2u = T2d - T2g; T2h = T2d + T2g; } } { E TJ, T7B, T2l, T5Y; TJ = TF + TI; T7B = TF - TI; T2l = T2j + T2k; T5Y = T2j - T2k; { E T4e, T2c, T2v, T8q, T7C, T7F, T8r, T2n, T7E, T2m, T5Z, T4f, T2t, T2w; T4e = T2b - T28; T2c = T28 + T2b; TK = TC + TJ; T5T = TC - TJ; T7E = T5Y - T5X; T5Z = T5X + T5Y; T2m = T2i + T2l; T2v = T2i - T2l; T60 = T5W - T5Z; T6W = T5W + T5Z; T8q = T7B + T7A; T7C = T7A - T7B; T7F = T7D - T7E; T8r = T7D + T7E; T2n = T2h - T2m; T4c = T2h + T2m; T4b = T2p + T2s; T2t = T2p - T2s; T2w = T2u + T2v; T4f = T2v - T2u; T8s = FMA(KP414213562, T8r, T8q); T8A = FNMS(KP414213562, T8q, T8r); T2o = FNMS(KP707106781, T2n, T2c); T3m = FMA(KP707106781, T2n, T2c); T3l = FMA(KP707106781, T2w, T2t); T2x = FNMS(KP707106781, T2w, T2t); T55 = FMA(KP707106781, T4f, T4e); T4g = FNMS(KP707106781, T4f, T4e); T7S = FMA(KP414213562, T7C, T7F); T7G = FNMS(KP414213562, T7F, T7C); } } } { E T43, T1y, T7o, Tm, T7p, T44, T1D, Tq, T1o, Tp, T5L, T1m, Tr, T1p, T1q; { E Tj, T1z, Ti, T5O, T1x, Tk, T1A, T1B; { E Tg, Th, T1v, T1w; Tg = Rp[WS(rs, 2)]; T56 = FMA(KP707106781, T4c, T4b); T4d = FNMS(KP707106781, T4c, T4b); T6o = T5T + T60; T61 = T5T - T60; Th = Rm[WS(rs, 13)]; T1v = Ip[WS(rs, 2)]; T1w = Im[WS(rs, 13)]; Tj = Rp[WS(rs, 10)]; T1z = Tg - Th; Ti = Tg + Th; T5O = T1v - T1w; T1x = T1v + T1w; Tk = Rm[WS(rs, 5)]; T1A = Ip[WS(rs, 10)]; T1B = Im[WS(rs, 5)]; } { E Tn, To, T1k, T1l; Tn = Rm[WS(rs, 1)]; { E T1u, Tl, T5P, T1C; T1u = Tj - Tk; Tl = Tj + Tk; T5P = T1A - T1B; T1C = T1A + T1B; T43 = T1x - T1u; T1y = T1u + T1x; T7o = Ti - Tl; Tm = Ti + Tl; T5Q = T5O + T5P; T7p = T5O - T5P; T44 = T1z + T1C; T1D = T1z - T1C; To = Rp[WS(rs, 14)]; } T1k = Ip[WS(rs, 14)]; T1l = Im[WS(rs, 1)]; Tq = Rp[WS(rs, 6)]; T1o = Tn - To; Tp = Tn + To; T5L = T1k - T1l; T1m = T1k + T1l; Tr = Rm[WS(rs, 9)]; T1p = Ip[WS(rs, 6)]; T1q = Im[WS(rs, 9)]; } } { E T46, T47, T7P, T7O, T2N, T1t, T1E, T2M, T4w, T4x; { E T1n, Tt, T1s, T7n, T7q, T7m, T7l; { E T1j, Ts, T5M, T1r; T1j = Tq - Tr; Ts = Tq + Tr; T5M = T1p - T1q; T1r = T1p + T1q; T46 = T1j + T1m; T1n = T1j - T1m; T7m = Tp - Ts; Tt = Tp + Ts; T5N = T5L + T5M; T7l = T5L - T5M; T47 = T1o + T1r; T1s = T1o - T1r; } T7P = T7m + T7l; T7n = T7l - T7m; T7q = T7o + T7p; T7O = T7o - T7p; T6f = Tm - Tt; Tu = Tm + Tt; T8y = T7q + T7n; T7r = T7n - T7q; T2N = FMA(KP414213562, T1n, T1s); T1t = FNMS(KP414213562, T1s, T1n); T1E = FMA(KP414213562, T1D, T1y); T2M = FNMS(KP414213562, T1y, T1D); } T8l = T7O + T7P; T7Q = T7O - T7P; T3w = T1E + T1t; T1F = T1t - T1E; T45 = FNMS(KP414213562, T44, T43); T4w = FMA(KP414213562, T43, T44); T4x = FMA(KP414213562, T46, T47); T48 = FNMS(KP414213562, T47, T46); T3j = T2M + T2N; T2O = T2M - T2N; T53 = T4w + T4x; T4y = T4w - T4x; } } } {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -