?? hb2_32.c
字號:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:07:56 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hb2_32 -include hb.h *//* * This function contains 488 FP additions, 350 FP multiplications, * (or, 236 additions, 98 multiplications, 252 fused multiply/add), * 204 stack variables, 7 constants, and 128 memory accesses */#include "hb.h"static void hb2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) { E T5u, T6b, T6e, T5I, T66, T60, T5U, T5R, T67, T5L, T61, T5x, T5A, T5D, T5O; E T62, T5V, T5P; { E T11, T14, T12, T37, T17, T1b, T39, T15, T7C, T8P, T8S, T7I, T98, T7e, T78; E T8V, T3d, T3x, T3a, T3v, T9s, T3G, T4p, T5X, T16, T9m, T3y, T4b, T3C, T4g; E T5Z, T1a, T4r, T3J, T2O, T1c, T4W, T4s, T3Y, T3K, T3l, T3e, T3i, T3q, T8K; E T8E, T8m, T7S, T5k, T5e; { E T13, T3c, T38, T3F, T7B, T9l, T77, T7d, T9r, T7H; T11 = W[2]; T14 = W[3]; T12 = W[4]; T37 = W[0]; T17 = W[6]; T1b = W[7]; T13 = T11 * T12; T3c = T37 * T14; T38 = T37 * T11; T3F = T37 * T12; T7B = T11 * T17; T9l = T12 * T17; T77 = T37 * T17; T7d = T37 * T1b; T9r = T12 * T1b; T7H = T11 * T1b; T39 = W[1]; T15 = W[5]; { E T3I, T19, T5d, T3b, T18, T2N; T7C = FMA(T14, T1b, T7B); T8P = FNMS(T14, T1b, T7B); T8S = FMA(T14, T17, T7H); T7I = FNMS(T14, T17, T7H); T98 = FNMS(T39, T17, T7d); T7e = FMA(T39, T17, T7d); T78 = FNMS(T39, T1b, T77); T8V = FMA(T39, T1b, T77); T3d = FMA(T39, T11, T3c); T3x = FNMS(T39, T11, T3c); T3a = FNMS(T39, T14, T38); T3v = FMA(T39, T14, T38); T9s = FNMS(T15, T17, T9r); T3G = FNMS(T39, T15, T3F); T4p = FMA(T39, T15, T3F); T5X = FNMS(T14, T15, T13); T16 = FMA(T14, T15, T13); T3I = T37 * T15; T19 = T11 * T15; T5d = T3v * T12; T3b = T3a * T12; T9m = FMA(T15, T1b, T9l); { E T3w, T3B, T5t, T5H; T3w = T3v * T17; T3B = T3v * T1b; T5t = T3a * T17; T5H = T3a * T1b; T3y = FNMS(T3x, T1b, T3w); T4b = FMA(T3x, T1b, T3w); T3C = FMA(T3x, T17, T3B); T4g = FNMS(T3x, T17, T3B); T5u = FMA(T3d, T1b, T5t); T6b = FNMS(T3d, T1b, T5t); T6e = FMA(T3d, T17, T5H); T5I = FNMS(T3d, T17, T5H); T18 = T16 * T17; T2N = T16 * T1b; T5Z = FMA(T14, T12, T19); T1a = FNMS(T14, T12, T19); } { E T3H, T3X, T4q, T4V, T5Y, T65; T4q = T4p * T17; T4V = T4p * T1b; T4r = FNMS(T39, T12, T3I); T3J = FMA(T39, T12, T3I); T2O = FNMS(T1a, T17, T2N); T1c = FMA(T1a, T1b, T18); T3H = T3G * T17; T4W = FNMS(T4r, T17, T4V); T4s = FMA(T4r, T1b, T4q); T3X = T3G * T1b; T5Y = T5X * T17; T65 = T5X * T1b; T3Y = FNMS(T3J, T17, T3X); T3K = FMA(T3J, T1b, T3H); { E T8J, T8D, T3h, T5j, T8l, T7R; T3h = T3a * T15; T66 = FNMS(T5Z, T17, T65); T60 = FMA(T5Z, T1b, T5Y); T3l = FNMS(T3d, T15, T3b); T3e = FMA(T3d, T15, T3b); T3i = FNMS(T3d, T12, T3h); T3q = FMA(T3d, T12, T3h); T8J = T3l * T1b; T8D = T3l * T17; T5j = T3v * T15; T8l = T3e * T1b; T7R = T3e * T17; T8K = FNMS(T3q, T17, T8J); T8E = FMA(T3q, T1b, T8D); T8m = FNMS(T3i, T17, T8l); T7S = FMA(T3i, T1b, T7R); T5U = FNMS(T3x, T12, T5j); T5k = FMA(T3x, T12, T5j); T5e = FNMS(T3x, T15, T5d); T5R = FMA(T3x, T15, T5d); } } } } { E T6O, T6i, T7s, T7o, T6j, Tf, T8W, T7V, T99, T8p, T3L, T1t, T3Z, T2X, T5J; E T4Z, T7t, T6W, T5v, T4v, TZ, T7x, T91, T9d, T28, T3S, T3R, T2h, T5B, T4Q; E T8v, T8a, T5C, T4N, T6Z, T6J, TK, T7w, T2z, T3P, T94, T9c, T3O, T2I, T5y; E T4J, T8u, T8h, T5z, T4G, T6Y, T6A, T6p, T6m, T6P, Tu, T9a, T82, T8X, T8s; E T40, T1Q, T4y, T4B, T3M, T30, T5w, T52; { E T6B, T6I, T4L, T4M, T4t, T4u; { E T1d, T3, T2P, T6, T6Q, T2S, T6R, T1g, Td, T6U, T1i, Ta, T2V, T1r, T6T; E T1l; { E T4, T5, T2Q, T2R, T1, T2, T1e, T1f; T1 = cr[0]; T2 = ci[WS(rs, 15)]; { E T6N, T6h, T7r, T7n; T6N = T5R * T1b; T6h = T5R * T17; T7r = T5e * T1b; T7n = T5e * T17; T6O = FNMS(T5U, T17, T6N); T6i = FMA(T5U, T1b, T6h); T7s = FNMS(T5k, T17, T7r); T7o = FMA(T5k, T1b, T7n); T1d = T1 - T2; T3 = T1 + T2; } T4 = cr[WS(rs, 8)]; T5 = ci[WS(rs, 7)]; T2Q = ci[WS(rs, 31)]; T2R = cr[WS(rs, 16)]; T1e = ci[WS(rs, 23)]; T2P = T4 - T5; T6 = T4 + T5; T6Q = T2Q - T2R; T2S = T2Q + T2R; T1f = cr[WS(rs, 24)]; { E T1o, T1n, T1p, Tb, Tc; Tb = ci[WS(rs, 3)]; Tc = cr[WS(rs, 12)]; T1o = ci[WS(rs, 19)]; T6R = T1e - T1f; T1g = T1e + T1f; T1n = Tb - Tc; Td = Tb + Tc; T1p = cr[WS(rs, 28)]; { E T1j, T1k, T8, T9, T1q; T8 = cr[WS(rs, 4)]; T9 = ci[WS(rs, 11)]; T1q = T1o + T1p; T6U = T1o - T1p; T1j = ci[WS(rs, 27)]; T1i = T8 - T9; Ta = T8 + T9; T1k = cr[WS(rs, 20)]; T2V = T1n + T1q; T1r = T1n - T1q; T6T = T1j - T1k; T1l = T1j + T1k; } } } { E T2U, T6V, T6S, T1h, T1s, T4Y, T4X, T2T, T2W; { E T7T, T8o, T1m, T7U, T7, Te, T8n; T7T = T3 - T6; T7 = T3 + T6; Te = Ta + Td; T8o = Ta - Td; T1m = T1i - T1l; T2U = T1i + T1l; T6j = T7 - Te; Tf = T7 + Te; T7U = T6U - T6T; T6V = T6T + T6U; T6S = T6Q + T6R; T8n = T6Q - T6R; T4t = T1d + T1g; T1h = T1d - T1g; T8W = T7T + T7U; T7V = T7T - T7U; T99 = T8o + T8n; T8p = T8n - T8o; T1s = T1m + T1r; T4Y = T1m - T1r; } T4X = T2S - T2P; T2T = T2P + T2S; T2W = T2U - T2V; T4u = T2U + T2V; T3L = FMA(KP707106781, T1s, T1h); T1t = FNMS(KP707106781, T1s, T1h); T3Z = FMA(KP707106781, T2W, T2T); T2X = FNMS(KP707106781, T2W, T2T); T5J = FNMS(KP707106781, T4Y, T4X); T4Z = FMA(KP707106781, T4Y, T4X); T7t = T6S + T6V; T6W = T6S - T6V; } } { E T29, T1S, T1V, T87, TR, T2c, T84, T6E, T1X, TU, T1Y, T6G, T25, T22, TX; E T1Z; { E TO, TN, TP, TL, TM, T6C, T6D; TL = ci[0]; TM = cr[WS(rs, 15)]; TO = cr[WS(rs, 7)]; T5v = FMA(KP707106781, T4u, T4t); T4v = FNMS(KP707106781, T4u, T4t); TN = TL + TM; T29 = TL - TM; TP = ci[WS(rs, 8)]; { E T2a, T2b, T1T, T1U, TQ; T1T = ci[WS(rs, 16)]; T1U = cr[WS(rs, 31)]; TQ = TO + TP; T1S = TO - TP; T2a = ci[WS(rs, 24)]; T6C = T1T - T1U; T1V = T1T + T1U; T2b = cr[WS(rs, 23)]; T87 = TN - TQ; TR = TN + TQ; T2c = T2a + T2b; T6D = T2a - T2b; } { E T23, T24, TS, TT, TV, TW; TS = cr[WS(rs, 3)]; TT = ci[WS(rs, 12)]; T84 = T6C - T6D; T6E = T6C + T6D; T23 = ci[WS(rs, 20)]; T1X = TS - TT; TU = TS + TT; T24 = cr[WS(rs, 27)]; TV = ci[WS(rs, 4)]; TW = cr[WS(rs, 11)]; T1Y = ci[WS(rs, 28)]; T6G = T23 - T24; T25 = T23 + T24; T22 = TV - TW; TX = TV + TW; T1Z = cr[WS(rs, 19)]; } } { E T4O, T1W, T2f, T26, T8Z, T86, T2e, T21, T89, T90; { E T85, TY, T6F, T20, T6H, T88; T4O = T1S + T1V; T1W = T1S - T1V; T2f = T22 - T25; T26 = T22 + T25; T85 = TU - TX; TY = TU + TX; T6F = T1Y - T1Z; T20 = T1Y + T1Z; T8Z = T85 + T84; T86 = T84 - T85; T6B = TR - TY; TZ = TR + TY; T6H = T6F + T6G; T88 = T6G - T6F; T2e = T1X - T20; T21 = T1X + T20; T7x = T6E + T6H; T6I = T6E - T6H; T89 = T87 - T88; T90 = T87 + T88; } { E T4P, T2d, T27, T2g; T2d = T29 - T2c; T4L = T29 + T2c; T4M = T21 + T26; T27 = T21 - T26; T2g = T2e + T2f; T4P = T2e - T2f; T91 = FNMS(KP414213562, T90, T8Z); T9d = FMA(KP414213562, T8Z, T90); T28 = FNMS(KP707106781, T27, T1W); T3S = FMA(KP707106781, T27, T1W); T3R = FMA(KP707106781, T2g, T2d); T2h = FNMS(KP707106781, T2g, T2d); T5B = FMA(KP707106781, T4P, T4O); T4Q = FNMS(KP707106781, T4P, T4O); T8v = FNMS(KP414213562, T86, T89); T8a = FMA(KP414213562, T89, T86); } } } { E T6s, T6z, T4F, T4E; { E T2A, T2j, TC, T8e, T2m, T2D, T6v, T8b, TG, T2o, TF, T6x, T2w, TH, T2p; E T2q; { E Tw, Tx, Tz, TA, T6t, T6u; Tw = cr[WS(rs, 1)]; T5C = FMA(KP707106781, T4M, T4L); T4N = FNMS(KP707106781, T4M, T4L); T6Z = T6I - T6B; T6J = T6B + T6I; Tx = ci[WS(rs, 14)]; Tz = cr[WS(rs, 9)]; TA = ci[WS(rs, 6)]; { E T2k, Ty, TB, T2l, T2B, T2C; T2k = ci[WS(rs, 30)]; T2A = Tw - Tx; Ty = Tw + Tx; T2j = Tz - TA; TB = Tz + TA; T2l = cr[WS(rs, 17)]; T2B = ci[WS(rs, 22)]; T2C = cr[WS(rs, 25)]; TC = Ty + TB; T8e = Ty - TB; T2m = T2k + T2l; T6t = T2k - T2l; T6u = T2B - T2C; T2D = T2B + T2C; } { E TD, TE, T2u, T2v; TD = cr[WS(rs, 5)]; T6v = T6t + T6u; T8b = T6t - T6u; TE = ci[WS(rs, 10)]; T2u = ci[WS(rs, 18)]; T2v = cr[WS(rs, 29)]; TG = ci[WS(rs, 2)]; T2o = TD - TE; TF = TD + TE; T6x = T2u - T2v; T2w = T2u + T2v; TH = cr[WS(rs, 13)]; T2p = ci[WS(rs, 26)]; T2q = cr[WS(rs, 21)]; } } { E T4H, T2n, T2G, T2F, T92, T8d, T2y, T93, T8g, T4I, T2E, T2H; { E T2x, T8c, T8f, T2s, T2t, TI; T4H = T2m - T2j; T2n = T2j + T2m; T2t = TG - TH; TI = TG + TH; { E T6w, T2r, TJ, T6y; T6w = T2p - T2q; T2r = T2p + T2q; T2G = T2t - T2w; T2x = T2t + T2w; T8c = TF - TI; TJ = TF + TI; T6y = T6w + T6x; T8f = T6x - T6w; T2F = T2o - T2r; T2s = T2o + T2r; TK = TC + TJ; T6s = TC - TJ; T6z = T6v - T6y; T7w = T6v + T6y; } T92 = T8c + T8b; T8d = T8b - T8c; T4F = T2s + T2x; T2y = T2s - T2x; T93 = T8e + T8f; T8g = T8e - T8f; } T4E = T2A + T2D; T2E = T2A - T2D; T2H = T2F + T2G; T4I = T2G - T2F; T2z = FNMS(KP707106781, T2y, T2n); T3P = FMA(KP707106781, T2y, T2n); T94 = FMA(KP414213562, T93, T92); T9c = FNMS(KP414213562, T92, T93); T3O = FMA(KP707106781, T2H, T2E); T2I = FNMS(KP707106781, T2H, T2E); T5y = FMA(KP707106781, T4I, T4H); T4J = FNMS(KP707106781, T4I, T4H); T8u = FMA(KP414213562, T8d, T8g); T8h = FNMS(KP414213562, T8g, T8d); } } { E T4x, T1O, Tm, T7Z, T80, T4w, T1J, T4A, T1D, Tt, T7X, T7W, T4z, T1y; { E Tj, T1K, Ti, T6o, T1N, Tk, T1G, T1H; { E Tg, Th, T1L, T1M;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -