?? hc2cbdft2_32.c
字號:
#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2cdft -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hc2cbdft2_32 -include hc2cb.h *//* * This function contains 498 FP additions, 208 FP multiplications, * (or, 404 additions, 114 multiplications, 94 fused multiply/add), * 102 stack variables, 7 constants, and 128 memory accesses */#include "hc2cb.h"static void hc2cbdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP555570233, +0.555570233019602224742830813948532874374937191); DK(KP195090322, +0.195090322016128267848284868477022240927691618); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP382683432, +0.382683432365089771728459984030398866761344562); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) { E Tf, T4a, T6h, T7Z, T6P, T8e, T1j, T4v, T2R, T4L, T5C, T7E, T6a, T7U, T3n; E T4q, TZ, T38, T2p, T4B, T7M, T7R, T2y, T4C, T5Y, T63, T6C, T86, T4i, T4n; E T6z, T85, TK, T31, T1Y, T4y, T7J, T7Q, T27, T4z, T5R, T62, T6v, T83, T4f; E T4m, T6s, T82, Tu, T4p, T6o, T8f, T6M, T80, T1G, T4K, T2I, T4w, T5J, T7T; E T67, T7F, T3g, T4b; { E T3, T2M, T16, T3k, T6, T13, T2P, T3l, Td, T3i, T1h, T2K, Ta, T3h, T1c; E T2J; { E T1, T2, T2N, T2O; T1 = Rp[0]; T2 = Rm[WS(rs, 15)]; T3 = T1 + T2; T2M = T1 - T2; { E T14, T15, T4, T5; T14 = Ip[0]; T15 = Im[WS(rs, 15)]; T16 = T14 + T15; T3k = T14 - T15; T4 = Rp[WS(rs, 8)]; T5 = Rm[WS(rs, 7)]; T6 = T4 + T5; T13 = T4 - T5; } T2N = Ip[WS(rs, 8)]; T2O = Im[WS(rs, 7)]; T2P = T2N + T2O; T3l = T2N - T2O; { E Tb, Tc, T1d, T1e, T1f, T1g; Tb = Rm[WS(rs, 3)]; Tc = Rp[WS(rs, 12)]; T1d = Tb - Tc; T1e = Im[WS(rs, 3)]; T1f = Ip[WS(rs, 12)]; T1g = T1e + T1f; Td = Tb + Tc; T3i = T1f - T1e; T1h = T1d + T1g; T2K = T1d - T1g; } { E T8, T9, T18, T19, T1a, T1b; T8 = Rp[WS(rs, 4)]; T9 = Rm[WS(rs, 11)]; T18 = T8 - T9; T19 = Ip[WS(rs, 4)]; T1a = Im[WS(rs, 11)]; T1b = T19 + T1a; Ta = T8 + T9; T3h = T19 - T1a; T1c = T18 + T1b; T2J = T18 - T1b; } } { E T7, Te, T6f, T6g; T7 = T3 + T6; Te = Ta + Td; Tf = T7 + Te; T4a = T7 - Te; T6f = T16 - T13; T6g = KP707106781 * (T2J - T2K); T6h = T6f + T6g; T7Z = T6f - T6g; } { E T6N, T6O, T17, T1i; T6N = T2M + T2P; T6O = KP707106781 * (T1c + T1h); T6P = T6N - T6O; T8e = T6O + T6N; T17 = T13 + T16; T1i = KP707106781 * (T1c - T1h); T1j = T17 + T1i; T4v = T17 - T1i; } { E T2L, T2Q, T5A, T5B; T2L = KP707106781 * (T2J + T2K); T2Q = T2M - T2P; T2R = T2L + T2Q; T4L = T2Q - T2L; T5A = T3 - T6; T5B = T3i - T3h; T5C = T5A + T5B; T7E = T5A - T5B; } { E T68, T69, T3j, T3m; T68 = Ta - Td; T69 = T3k - T3l; T6a = T68 + T69; T7U = T69 - T68; T3j = T3h + T3i; T3m = T3k + T3l; T3n = T3j + T3m; T4q = T3m - T3j; } } { E TR, T5S, T29, T2t, T2c, T5W, T2w, T37, TY, T5T, T5V, T2i, T2n, T2r, T34; E T2q, T6A, T6B; { E TL, TM, TN, TO, TP, TQ; TL = Rm[0]; TM = Rp[WS(rs, 15)]; TN = TL + TM; TO = Rp[WS(rs, 7)]; TP = Rm[WS(rs, 8)]; TQ = TO + TP; TR = TN + TQ; T5S = TN - TQ; T29 = TO - TP; T2t = TL - TM; } { E T2a, T2b, T35, T2u, T2v, T36; T2a = Im[0]; T2b = Ip[WS(rs, 15)]; T35 = T2b - T2a; T2u = Ip[WS(rs, 7)]; T2v = Im[WS(rs, 8)]; T36 = T2u - T2v; T2c = T2a + T2b; T5W = T35 - T36; T2w = T2u + T2v; T37 = T35 + T36; } { E TU, T2e, T2h, T32, TX, T2j, T2m, T33; { E TS, TT, T2f, T2g; TS = Rp[WS(rs, 3)]; TT = Rm[WS(rs, 12)]; TU = TS + TT; T2e = TS - TT; T2f = Ip[WS(rs, 3)]; T2g = Im[WS(rs, 12)]; T2h = T2f + T2g; T32 = T2f - T2g; } { E TV, TW, T2k, T2l; TV = Rm[WS(rs, 4)]; TW = Rp[WS(rs, 11)]; TX = TV + TW; T2j = TV - TW; T2k = Im[WS(rs, 4)]; T2l = Ip[WS(rs, 11)]; T2m = T2k + T2l; T33 = T2l - T2k; } TY = TU + TX; T5T = T33 - T32; T5V = TU - TX; T2i = T2e + T2h; T2n = T2j + T2m; T2r = T2j - T2m; T34 = T32 + T33; T2q = T2e - T2h; } TZ = TR + TY; T38 = T34 + T37; { E T2d, T2o, T7K, T7L; T2d = T29 - T2c; T2o = KP707106781 * (T2i - T2n); T2p = T2d + T2o; T4B = T2d - T2o; T7K = T5S - T5T; T7L = T5W - T5V; T7M = FMA(KP382683432, T7K, KP923879532 * T7L); T7R = FNMS(KP923879532, T7K, KP382683432 * T7L); } { E T2s, T2x, T5U, T5X; T2s = KP707106781 * (T2q + T2r); T2x = T2t - T2w; T2y = T2s + T2x; T4C = T2x - T2s; T5U = T5S + T5T; T5X = T5V + T5W; T5Y = FMA(KP923879532, T5U, KP382683432 * T5X); T63 = FNMS(KP382683432, T5U, KP923879532 * T5X); } T6A = T2t + T2w; T6B = KP707106781 * (T2i + T2n); T6C = T6A - T6B; T86 = T6B + T6A; { E T4g, T4h, T6x, T6y; T4g = TR - TY; T4h = T37 - T34; T4i = T4g + T4h; T4n = T4h - T4g; T6x = KP707106781 * (T2q - T2r); T6y = T29 + T2c; T6z = T6x - T6y; T85 = T6y + T6x; } } { E TC, T5L, T1I, T22, T1L, T5P, T25, T30, TJ, T5M, T5O, T1R, T1W, T20, T2X; E T1Z, T6t, T6u; { E Tw, Tx, Ty, Tz, TA, TB; Tw = Rp[WS(rs, 1)]; Tx = Rm[WS(rs, 14)]; Ty = Tw + Tx; Tz = Rp[WS(rs, 9)]; TA = Rm[WS(rs, 6)]; TB = Tz + TA; TC = Ty + TB; T5L = Ty - TB; T1I = Tz - TA; T22 = Tw - Tx; } { E T1J, T1K, T2Y, T23, T24, T2Z; T1J = Ip[WS(rs, 1)]; T1K = Im[WS(rs, 14)]; T2Y = T1J - T1K; T23 = Ip[WS(rs, 9)]; T24 = Im[WS(rs, 6)]; T2Z = T23 - T24; T1L = T1J + T1K; T5P = T2Y - T2Z; T25 = T23 + T24; T30 = T2Y + T2Z; } { E TF, T1N, T1Q, T2V, TI, T1S, T1V, T2W; { E TD, TE, T1O, T1P; TD = Rp[WS(rs, 5)]; TE = Rm[WS(rs, 10)]; TF = TD + TE; T1N = TD - TE; T1O = Ip[WS(rs, 5)]; T1P = Im[WS(rs, 10)]; T1Q = T1O + T1P; T2V = T1O - T1P; } { E TG, TH, T1T, T1U; TG = Rm[WS(rs, 2)]; TH = Rp[WS(rs, 13)]; TI = TG + TH; T1S = TG - TH; T1T = Im[WS(rs, 2)]; T1U = Ip[WS(rs, 13)]; T1V = T1T + T1U; T2W = T1U - T1T; } TJ = TF + TI; T5M = T2W - T2V; T5O = TF - TI; T1R = T1N + T1Q; T1W = T1S + T1V; T20 = T1S - T1V; T2X = T2V + T2W; T1Z = T1N - T1Q; } TK = TC + TJ; T31 = T2X + T30; { E T1M, T1X, T7H, T7I; T1M = T1I + T1L; T1X = KP707106781 * (T1R - T1W); T1Y = T1M + T1X; T4y = T1M - T1X; T7H = T5L - T5M; T7I = T5P - T5O; T7J = FNMS(KP923879532, T7I, KP382683432 * T7H); T7Q = FMA(KP923879532, T7H, KP382683432 * T7I); } { E T21, T26, T5N, T5Q; T21 = KP707106781 * (T1Z + T20); T26 = T22 - T25; T27 = T21 + T26; T4z = T26 - T21; T5N = T5L + T5M; T5Q = T5O + T5P; T5R = FNMS(KP382683432, T5Q, KP923879532 * T5N); T62 = FMA(KP382683432, T5N, KP923879532 * T5Q); } T6t = T22 + T25; T6u = KP707106781 * (T1R + T1W); T6v = T6t - T6u; T83 = T6u + T6t; { E T4d, T4e, T6q, T6r; T4d = TC - TJ; T4e = T30 - T2X; T4f = T4d - T4e; T4m = T4d + T4e; T6q = T1L - T1I; T6r = KP707106781 * (T1Z - T20); T6s = T6q + T6r; T82 = T6q - T6r; } } { E Ti, T3a, Tl, T3b, T1o, T1t, T6j, T6i, T5E, T5D, Tp, T3d, Ts, T3e, T1z; E T1E, T6m, T6l, T5H, T5G; { E T1p, T1n, T1k, T1s; { E Tg, Th, T1l, T1m; Tg = Rp[WS(rs, 2)]; Th = Rm[WS(rs, 13)]; Ti = Tg + Th; T1p = Tg - Th; T1l = Ip[WS(rs, 2)]; T1m = Im[WS(rs, 13)]; T1n = T1l + T1m; T3a = T1l - T1m; } { E Tj, Tk, T1q, T1r; Tj = Rp[WS(rs, 10)]; Tk = Rm[WS(rs, 5)]; Tl = Tj + Tk; T1k = Tj - Tk; T1q = Ip[WS(rs, 10)]; T1r = Im[WS(rs, 5)]; T1s = T1q + T1r; T3b = T1q - T1r; } T1o = T1k + T1n; T1t = T1p - T1s; T6j = T1p + T1s; T6i = T1n - T1k; T5E = T3a - T3b; T5D = Ti - Tl; } { E T1A, T1y, T1v, T1D; { E Tn, To, T1w, T1x; Tn = Rm[WS(rs, 1)]; To = Rp[WS(rs, 14)]; Tp = Tn + To; T1A = Tn - To; T1w = Im[WS(rs, 1)]; T1x = Ip[WS(rs, 14)]; T1y = T1w + T1x; T3d = T1x - T1w; } { E Tq, Tr, T1B, T1C; Tq = Rp[WS(rs, 6)]; Tr = Rm[WS(rs, 9)]; Ts = Tq + Tr; T1v = Tq - Tr; T1B = Ip[WS(rs, 6)]; T1C = Im[WS(rs, 9)]; T1D = T1B + T1C; T3e = T1B - T1C; } T1z = T1v - T1y; T1E = T1A - T1D; T6m = T1A + T1D; T6l = T1v + T1y; T5H = T3d - T3e; T5G = Tp - Ts; } { E Tm, Tt, T6k, T6n; Tm = Ti + Tl; Tt = Tp + Ts; Tu = Tm + Tt; T4p = Tm - Tt; T6k = FMA(KP382683432, T6i, KP923879532 * T6j); T6n = FMA(KP382683432, T6l, KP923879532 * T6m); T6o = T6k - T6n; T8f = T6k + T6n; } { E T6K, T6L, T1u, T1F; T6K = FNMS(KP923879532, T6i, KP382683432 * T6j); T6L = FNMS(KP923879532, T6l, KP382683432 * T6m); T6M = T6K + T6L; T80 = T6K - T6L; T1u = FMA(KP923879532, T1o, KP382683432 * T1t); T1F = FNMS(KP382683432, T1E, KP923879532 * T1z); T1G = T1u + T1F; T4K = T1F - T1u; } { E T2G, T2H, T5F, T5I; T2G = FNMS(KP382683432, T1o, KP923879532 * T1t); T2H = FMA(KP382683432, T1z, KP923879532 * T1E); T2I = T2G + T2H; T4w = T2G - T2H; T5F = T5D - T5E; T5I = T5G + T5H; T5J = KP707106781 * (T5F + T5I); T7T = KP707106781 * (T5F - T5I); } { E T65, T66, T3c, T3f; T65 = T5D + T5E; T66 = T5H - T5G; T67 = KP707106781 * (T65 + T66); T7F = KP707106781 * (T66 - T65); T3c = T3a + T3b; T3f = T3d + T3e; T3g = T3c + T3f; T4b = T3f - T3c; } } { E T11, T3s, T3p, T3u, T3K, T40, T3G, T3Y, T2T, T43, T3z, T3P, T2B, T45, T3x; E T3T; { E Tv, T10, T3E, T3F; Tv = Tf + Tu; T10 = TK + TZ; T11 = Tv + T10; T3s = Tv - T10; { E T39, T3o, T3I, T3J; T39 = T31 + T38; T3o = T3g + T3n; T3p = T39 + T3o; T3u = T3o - T39; T3I = TK - TZ; T3J = T3n - T3g; T3K = T3I + T3J; T40 = T3J - T3I; } T3E = Tf - Tu; T3F = T38 - T31; T3G = T3E + T3F; T3Y = T3E - T3F; { E T2S, T3N, T2F, T3O, T2D, T2E; T2S = T2I + T2R; T3N = T1j - T1G; T2D = FNMS(KP195090322, T1Y, KP980785280 * T27); T2E = FMA(KP195090322, T2p, KP980785280 * T2y); T2F = T2D + T2E; T3O = T2D - T2E; T2T = T2F + T2S; T43 = T3N - T3O; T3z = T2S - T2F;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -