?? r2cbiii_64.c
字號:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:09:09 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_r2cb -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include r2cbIII.h *//* * This function contains 434 FP additions, 260 FP multiplications, * (or, 238 additions, 64 multiplications, 196 fused multiply/add), * 165 stack variables, 36 constants, and 128 memory accesses */#include "r2cbIII.h"static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs){ DK(KP357805721, +0.357805721314524104672487743774474392487532769); DK(KP1_883088130, +1.883088130366041556825018805199004714371179592); DK(KP472964775, +0.472964775891319928124438237972992463904131113); DK(KP1_807978586, +1.807978586246886663172400594461074097420264050); DK(KP049126849, +0.049126849769467254105343321271313617079695752); DK(KP1_997590912, +1.997590912410344785429543209518201388886407229); DK(KP906347169, +0.906347169019147157946142717268914412664134293); DK(KP1_481902250, +1.481902250709918182351233794990325459457910619); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP250486960, +0.250486960191305461595702160124721208578685568); DK(KP1_940062506, +1.940062506389087985207968414572200502913731924); DK(KP599376933, +0.599376933681923766271389869014404232837890546); DK(KP1_715457220, +1.715457220000544139804539968569540274084981599); DK(KP148335987, +0.148335987538347428753676511486911367000625355); DK(KP1_978353019, +1.978353019929561946903347476032486127967379067); DK(KP741650546, +0.741650546272035369581266691172079863842265220); DK(KP1_606415062, +1.606415062961289819613353025926283847759138854); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T43, T4b, T49, T4e, T3T, T46, T40, T4a; { E T3t, T15, T2E, T3U, T6b, Tf, T6Q, T6u, T5J, T4L, T3V, T1g, T5U, T5q, T3u; E T2H, T6v, Tu, T5r, T4V, T6R, T6e, T2K, T1s, T2J, T1D, T3X, T3B, T5s, T4Q; E T3Y, T3y, T6g, TK, T5M, T57, T6N, T6j, T35, T1W, T34, T25, T4i, T3J, T5N; E T52, T4j, T3G, T6l, TZ, T3L, T5P, T5i, T6M, T6o, T3M, T38, T2n, T37, T2w; E T4l, T3Q, T5Q, T5d; { E T3x, T3w, T3E, T3F; { E T5p, T5o, T2G, T2F; { E T11, T3, T5m, T2D, T2A, T6, T5n, T14, Tb, T16, Ta, T4I, T19, Tc, T1c; E T1d; { E T4, T5, T12, T13; { E T1, T2, T2B, T2C; T1 = Cr[0]; T2 = Cr[WS(csr, 31)]; T2B = Ci[0]; T2C = Ci[WS(csi, 31)]; T4 = Cr[WS(csr, 16)]; T11 = T1 - T2; T3 = T1 + T2; T5m = T2C - T2B; T2D = T2B + T2C; T5 = Cr[WS(csr, 15)]; T12 = Ci[WS(csi, 16)]; T13 = Ci[WS(csi, 15)]; } { E T8, T9, T17, T18; T8 = Cr[WS(csr, 8)]; T2A = T4 - T5; T6 = T4 + T5; T5n = T13 - T12; T14 = T12 + T13; T9 = Cr[WS(csr, 23)]; T17 = Ci[WS(csi, 8)]; T18 = Ci[WS(csi, 23)]; Tb = Cr[WS(csr, 7)]; T16 = T8 - T9; Ta = T8 + T9; T4I = T18 - T17; T19 = T17 + T18; Tc = Cr[WS(csr, 24)]; T1c = Ci[WS(csi, 7)]; T1d = Ci[WS(csi, 24)]; } } { E T1b, T4J, T1e, T4H, T7, Te, Td; T3t = T11 + T14; T15 = T11 - T14; T1b = Tb - Tc; Td = Tb + Tc; T4J = T1c - T1d; T1e = T1c + T1d; T2E = T2A + T2D; T3U = T2A - T2D; T4H = T3 - T6; T7 = T3 + T6; Te = Ta + Td; T5p = Ta - Td; { E T4K, T6s, T6t, T1a, T1f; T5o = T5m - T5n; T6s = T5n + T5m; T6t = T4I + T4J; T4K = T4I - T4J; T6b = T7 - Te; Tf = T7 + Te; T6Q = T6t + T6s; T6u = T6s - T6t; T2G = T16 + T19; T1a = T16 - T19; T1f = T1b - T1e; T2F = T1b + T1e; T5J = T4H - T4K; T4L = T4H + T4K; T3V = T1a - T1f; T1g = T1a + T1f; } } } { E T1i, Ti, T4O, T1q, T1n, Tl, T4N, T1l, Tq, T1t, Tp, T4T, T1A, Tr, T1u; E T1v; { E Tj, Tk, T1j, T1k; { E Tg, Th, T1o, T1p; Tg = Cr[WS(csr, 4)]; T5U = T5p + T5o; T5q = T5o - T5p; T3u = T2G + T2F; T2H = T2F - T2G; Th = Cr[WS(csr, 27)]; T1o = Ci[WS(csi, 4)]; T1p = Ci[WS(csi, 27)]; Tj = Cr[WS(csr, 20)]; T1i = Tg - Th; Ti = Tg + Th; T4O = T1p - T1o; T1q = T1o + T1p; Tk = Cr[WS(csr, 11)]; T1j = Ci[WS(csi, 20)]; T1k = Ci[WS(csi, 11)]; } { E Tn, To, T1y, T1z; Tn = Cr[WS(csr, 3)]; T1n = Tj - Tk; Tl = Tj + Tk; T4N = T1k - T1j; T1l = T1j + T1k; To = Cr[WS(csr, 28)]; T1y = Ci[WS(csi, 3)]; T1z = Ci[WS(csi, 28)]; Tq = Cr[WS(csr, 12)]; T1t = Tn - To; Tp = Tn + To; T4T = T1y - T1z; T1A = T1y + T1z; Tr = Cr[WS(csr, 19)]; T1u = Ci[WS(csi, 12)]; T1v = Ci[WS(csi, 19)]; } } { E T4M, T1B, T1w, T4P, T1m, T1r, Tm, Ts, T4S; T4M = Ti - Tl; Tm = Ti + Tl; T1B = Tq - Tr; Ts = Tq + Tr; T4S = T1v - T1u; T1w = T1u + T1v; { E T6c, Tt, T4R, T6d, T4U; T6c = T4N + T4O; T4P = T4N - T4O; Tt = Tp + Ts; T4R = Tp - Ts; T6d = T4S + T4T; T4U = T4S - T4T; T3x = T1i + T1l; T1m = T1i - T1l; T6v = Tm - Tt; Tu = Tm + Tt; T5r = T4R - T4U; T4V = T4R + T4U; T6R = T6c + T6d; T6e = T6c - T6d; T1r = T1n + T1q; T3w = T1n - T1q; } { E T3A, T3z, T1x, T1C; T3A = T1t + T1w; T1x = T1t - T1w; T1C = T1A - T1B; T3z = T1B + T1A; T2K = FMA(KP414213562, T1m, T1r); T1s = FNMS(KP414213562, T1r, T1m); T2J = FMA(KP414213562, T1x, T1C); T1D = FNMS(KP414213562, T1C, T1x); T3X = FMA(KP414213562, T3z, T3A); T3B = FNMS(KP414213562, T3A, T3z); T5s = T4M + T4P; T4Q = T4M - T4P; } } } } { E T1G, Ty, T54, T20, T1X, TB, T53, T1J, TI, T4Z, T1L, TF, T22, T1U, T50; E T1O; { E T1Y, T1Z, Tz, TA, Tw, Tx, T1H, T1I; Tw = Cr[WS(csr, 2)]; Tx = Cr[WS(csr, 29)]; T1Y = Ci[WS(csi, 2)]; T3Y = FNMS(KP414213562, T3w, T3x); T3y = FMA(KP414213562, T3x, T3w); T1G = Tw - Tx; Ty = Tw + Tx; T1Z = Ci[WS(csi, 29)]; Tz = Cr[WS(csr, 18)]; TA = Cr[WS(csr, 13)]; T1H = Ci[WS(csi, 18)]; T54 = T1Y - T1Z; T20 = T1Y + T1Z; T1X = Tz - TA; TB = Tz + TA; T1I = Ci[WS(csi, 13)]; { E T1R, T1Q, T1S, TG, TH; TG = Cr[WS(csr, 5)]; TH = Cr[WS(csr, 26)]; T1R = Ci[WS(csi, 5)]; T53 = T1H - T1I; T1J = T1H + T1I; T1Q = TG - TH; TI = TG + TH; T1S = Ci[WS(csi, 26)]; { E T1M, T1N, TD, TE, T1T; TD = Cr[WS(csr, 10)]; TE = Cr[WS(csr, 21)]; T1T = T1R + T1S; T4Z = T1S - T1R; T1M = Ci[WS(csi, 10)]; T1L = TD - TE; TF = TD + TE; T1N = Ci[WS(csi, 21)]; T22 = T1Q + T1T; T1U = T1Q - T1T; T50 = T1M - T1N; T1O = T1M + T1N; } } } { E T4Y, T23, T51, T1K, T1V, T3I, T3H, T21, T24; { E T56, T1P, T6h, T55, TC, TJ, T6i; T4Y = Ty - TB; TC = Ty + TB; TJ = TF + TI; T56 = TF - TI; T1P = T1L - T1O; T23 = T1L + T1O; T6h = T53 + T54; T55 = T53 - T54; T6g = TC - TJ; TK = TC + TJ; T6i = T50 + T4Z; T51 = T4Z - T50; T3E = T1G + T1J; T1K = T1G - T1J; T5M = T56 + T55; T57 = T55 - T56; T6N = T6i + T6h; T6j = T6h - T6i; T1V = T1P + T1U; T3I = T1P - T1U; } T3H = T1X - T20; T21 = T1X + T20; T24 = T22 - T23; T3F = T23 + T22; T35 = FNMS(KP707106781, T1V, T1K); T1W = FMA(KP707106781, T1V, T1K); T34 = FMA(KP707106781, T24, T21); T25 = FNMS(KP707106781, T24, T21); T4i = FMA(KP707106781, T3I, T3H); T3J = FNMS(KP707106781, T3I, T3H); T5N = T4Y - T51; T52 = T4Y + T51; } } { E T27, TN, T5f, T2q, T2r, TQ, T5e, T2a, TX, T5a, T2c, TU, T2t, T2l, T5b; E T2f; { E T2o, T2p, TO, TP, TL, TM, T28, T29; TL = Cr[WS(csr, 1)]; TM = Cr[WS(csr, 30)]; T2o = Ci[WS(csi, 1)]; T4j = FMA(KP707106781, T3F, T3E); T3G = FNMS(KP707106781, T3F, T3E); T27 = TL - TM; TN = TL + TM; T2p = Ci[WS(csi, 30)]; TO = Cr[WS(csr, 14)]; TP = Cr[WS(csr, 17)]; T28 = Ci[WS(csi, 14)]; T5f = T2p - T2o; T2q = T2o + T2p; T2r = TO - TP; TQ = TO + TP; T29 = Ci[WS(csi, 17)]; { E T2i, T2h, T2j, TV, TW; TV = Cr[WS(csr, 9)]; TW = Cr[WS(csr, 22)]; T2i = Ci[WS(csi, 9)]; T5e = T28 - T29; T2a = T28 + T29; T2h = TV - TW; TX = TV + TW; T2j = Ci[WS(csi, 22)]; { E T2d, T2e, TS, TT, T2k; TS = Cr[WS(csr, 6)]; TT = Cr[WS(csr, 25)]; T2k = T2i + T2j; T5a = T2j - T2i; T2d = Ci[WS(csi, 6)]; T2c = TS - TT; TU = TS + TT; T2e = Ci[WS(csi, 25)]; T2t = T2h + T2k; T2l = T2h - T2k; T5b = T2d - T2e; T2f = T2d + T2e; } } }
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -