?? n1bv_64.c
字號:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:44:33 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_notw_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 64 -name n1bv_64 -include n1b.h *//* * This function contains 456 FP additions, 258 FP multiplications, * (or, 198 additions, 0 multiplications, 258 fused multiply/add), * 168 stack variables, 15 constants, and 128 memory accesses */#include "n1b.h"static void n1bv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs){ DVK(KP773010453, +0.773010453362736960810906609758469800971041293); DVK(KP820678790, +0.820678790828660330972281985331011598767386482); DVK(KP881921264, +0.881921264348355029712756863660388349508442621); DVK(KP534511135, +0.534511135950791641089685961295362908582039528); DVK(KP995184726, +0.995184726672196886244836953109479921575474869); DVK(KP098491403, +0.098491403357164253077197521291327432293052451); DVK(KP980785280, +0.980785280403230449126182236134239036973933731); DVK(KP956940335, +0.956940335732208864935797886980269969482849206); DVK(KP303346683, +0.303346683607342391675883946941299872384187453); DVK(KP831469612, +0.831469612302545237078788377617905756738560812); DVK(KP923879532, +0.923879532511286756128183189396788286822416626); DVK(KP668178637, +0.668178637919298919997757686523080761552472251); DVK(KP198912367, +0.198912367379658006911597622644676228597850501); DVK(KP414213562, +0.414213562373095048801688724209698078569671875); DVK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; const R *xi; R *xo; xi = ii; xo = io; for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) { V T5T, T5S, T5X, T65, T5Z, T5R, T67, T63, T5U, T64; { V T7, T26, T5k, T6A, T47, T69, T2V, T3z, T6B, T4e, T6a, T5n, T3M, T2Y, T27; V Tm, T3A, T3i, T29, TC, T5p, T4o, T6D, T6e, T3l, T3B, TR, T2a, T4x, T5q; V T6h, T6E, T39, T3H, T3I, T3c, T5N, T57, T72, T6w, T5O, T5e, T71, T6t, T2y; V T1W, T2x, T1N, T33, T34, T3E, T32, T1p, T2v, T1g, T2u, T4M, T5K, T6p, T6Z; V T6m, T6Y, T5L, T4T; { V T4g, T4l, T3g, Tu, Tx, T4h, TA, T4i; { V T1, T2, T23, T24, T4, T5, T20, T21; T1 = LD(&(xi[0]), ivs, &(xi[0])); T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0])); T23 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0])); T24 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0])); T4 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0])); T5 = LD(&(xi[WS(is, 48)]), ivs, &(xi[0])); T20 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0])); T21 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0])); { V Ta, T48, Tk, T4c, T49, Td, Tf, Tg; { V T8, T43, T3, T45, T25, T5i, T6, T44, T22, T9, Ti, Tj, Tb, Tc; T8 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0])); T43 = VSUB(T1, T2); T3 = VADD(T1, T2); T45 = VSUB(T23, T24); T25 = VADD(T23, T24); T5i = VSUB(T4, T5); T6 = VADD(T4, T5); T44 = VSUB(T20, T21); T22 = VADD(T20, T21); T9 = LD(&(xi[WS(is, 36)]), ivs, &(xi[0])); Ti = LD(&(xi[WS(is, 12)]), ivs, &(xi[0])); Tj = LD(&(xi[WS(is, 44)]), ivs, &(xi[0])); Tb = LD(&(xi[WS(is, 20)]), ivs, &(xi[0])); Tc = LD(&(xi[WS(is, 52)]), ivs, &(xi[0])); { V T2T, T46, T5j, T2U; T7 = VSUB(T3, T6); T2T = VADD(T3, T6); T46 = VADD(T44, T45); T5j = VSUB(T44, T45); T26 = VSUB(T22, T25); T2U = VADD(T22, T25); Ta = VADD(T8, T9); T48 = VSUB(T8, T9); Tk = VADD(Ti, Tj); T4c = VSUB(Tj, Ti); T5k = VFMA(LDK(KP707106781), T5j, T5i); T6A = VFNMS(LDK(KP707106781), T5j, T5i); T47 = VFMA(LDK(KP707106781), T46, T43); T69 = VFNMS(LDK(KP707106781), T46, T43); T2V = VADD(T2T, T2U); T3z = VSUB(T2T, T2U); T49 = VSUB(Tb, Tc); Td = VADD(Tb, Tc); } Tf = LD(&(xi[WS(is, 60)]), ivs, &(xi[0])); Tg = LD(&(xi[WS(is, 28)]), ivs, &(xi[0])); } { V Te, T2W, T5l, T4a, Tq, Tt, Tv, Tw, T5m, T4d, Tl, T2X, Ty, Tz, To; V Tp; To = LD(&(xi[WS(is, 2)]), ivs, &(xi[0])); Tp = LD(&(xi[WS(is, 34)]), ivs, &(xi[0])); { V Th, T4b, Tr, Ts; Tr = LD(&(xi[WS(is, 18)]), ivs, &(xi[0])); Ts = LD(&(xi[WS(is, 50)]), ivs, &(xi[0])); Te = VSUB(Ta, Td); T2W = VADD(Ta, Td); T5l = VFMA(LDK(KP414213562), T48, T49); T4a = VFNMS(LDK(KP414213562), T49, T48); Th = VADD(Tf, Tg); T4b = VSUB(Tf, Tg); Tq = VADD(To, Tp); T4g = VSUB(To, Tp); T4l = VSUB(Tr, Ts); Tt = VADD(Tr, Ts); Tv = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); Tw = LD(&(xi[WS(is, 42)]), ivs, &(xi[0])); T5m = VFMA(LDK(KP414213562), T4b, T4c); T4d = VFNMS(LDK(KP414213562), T4c, T4b); Tl = VSUB(Th, Tk); T2X = VADD(Th, Tk); Ty = LD(&(xi[WS(is, 58)]), ivs, &(xi[0])); Tz = LD(&(xi[WS(is, 26)]), ivs, &(xi[0])); } T3g = VADD(Tq, Tt); Tu = VSUB(Tq, Tt); Tx = VADD(Tv, Tw); T4h = VSUB(Tv, Tw); T6B = VSUB(T4a, T4d); T4e = VADD(T4a, T4d); T6a = VADD(T5l, T5m); T5n = VSUB(T5l, T5m); T3M = VSUB(T2W, T2X); T2Y = VADD(T2W, T2X); T27 = VSUB(Te, Tl); Tm = VADD(Te, Tl); TA = VADD(Ty, Tz); T4i = VSUB(Ty, Tz); } } } { V TK, T4p, T4u, T4k, T6d, T4n, T6c, TL, TN, TO, T3j, TJ, TF, TI; { V TD, TE, TG, TH; TD = LD(&(xi[WS(is, 62)]), ivs, &(xi[0])); TE = LD(&(xi[WS(is, 30)]), ivs, &(xi[0])); TG = LD(&(xi[WS(is, 14)]), ivs, &(xi[0])); TH = LD(&(xi[WS(is, 46)]), ivs, &(xi[0])); TK = LD(&(xi[WS(is, 54)]), ivs, &(xi[0])); { V T3h, TB, T4j, T4m; T3h = VADD(Tx, TA); TB = VSUB(Tx, TA); T4j = VADD(T4h, T4i); T4m = VSUB(T4h, T4i); T4p = VSUB(TD, TE); TF = VADD(TD, TE); T4u = VSUB(TH, TG); TI = VADD(TG, TH); T3A = VSUB(T3g, T3h); T3i = VADD(T3g, T3h); T29 = VFMA(LDK(KP414213562), Tu, TB); TC = VFNMS(LDK(KP414213562), TB, Tu); T4k = VFMA(LDK(KP707106781), T4j, T4g); T6d = VFNMS(LDK(KP707106781), T4j, T4g); T4n = VFMA(LDK(KP707106781), T4m, T4l); T6c = VFNMS(LDK(KP707106781), T4m, T4l); TL = LD(&(xi[WS(is, 22)]), ivs, &(xi[0])); } TN = LD(&(xi[WS(is, 6)]), ivs, &(xi[0])); TO = LD(&(xi[WS(is, 38)]), ivs, &(xi[0])); } T3j = VADD(TF, TI); TJ = VSUB(TF, TI); { V T3a, T1E, T52, T5b, T1x, T4Z, T6r, T6u, T5a, T1U, T55, T5c, T1L, T3b; { V T4V, T1t, T58, T1w, T1Q, T1T, T1I, T4Y, T59, T1J, T53, T1H; { V T1r, TM, T4r, TP, T4q, T1s, T1u, T1v; T1r = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)])); T5p = VFMA(LDK(KP198912367), T4k, T4n); T4o = VFNMS(LDK(KP198912367), T4n, T4k); T6D = VFMA(LDK(KP668178637), T6c, T6d); T6e = VFNMS(LDK(KP668178637), T6d, T6c); TM = VADD(TK, TL); T4r = VSUB(TK, TL); TP = VADD(TN, TO); T4q = VSUB(TN, TO); T1s = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)])); T1u = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); T1v = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)])); { V T1R, T4X, T6g, T4t, T6f, T4w, T1S, T1O, T1P; T1O = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)])); T1P = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)])); T1R = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)])); { V T3k, TQ, T4s, T4v; T3k = VADD(TP, TM); TQ = VSUB(TM, TP); T4s = VADD(T4q, T4r); T4v = VSUB(T4r, T4q); T4V = VSUB(T1r, T1s); T1t = VADD(T1r, T1s); T58 = VSUB(T1v, T1u); T1w = VADD(T1u, T1v); T4X = VSUB(T1O, T1P); T1Q = VADD(T1O, T1P); T3l = VADD(T3j, T3k); T3B = VSUB(T3j, T3k); TR = VFNMS(LDK(KP414213562), TQ, TJ); T2a = VFMA(LDK(KP414213562), TJ, TQ); T6g = VFNMS(LDK(KP707106781), T4s, T4p); T4t = VFMA(LDK(KP707106781), T4s, T4p); T6f = VFNMS(LDK(KP707106781), T4v, T4u); T4w = VFMA(LDK(KP707106781), T4v, T4u); T1S = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)])); } { V T4W, T1A, T50, T51, T1D, T1F, T1G; { V T1y, T1z, T1B, T1C; T1y = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)])); T1z = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)])); T1B = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)])); T1C = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)])); T4x = VFNMS(LDK(KP198912367), T4w, T4t); T5q = VFMA(LDK(KP198912367), T4t, T4w); T6h = VFNMS(LDK(KP668178637), T6g, T6f); T6E = VFMA(LDK(KP668178637), T6f, T6g); T4W = VSUB(T1R, T1S); T1T = VADD(T1R, T1S); T1A = VADD(T1y, T1z); T50 = VSUB(T1y, T1z); T51 = VSUB(T1C, T1B); T1D = VADD(T1B, T1C); } T1F = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)])); T1G = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)])); T1I = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)])); T4Y = VADD(T4W, T4X); T59 = VSUB(T4X, T4W); T1J = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)])); T3a = VADD(T1A, T1D); T1E = VSUB(T1A, T1D); T52 = VFMA(LDK(KP414213562), T51, T50); T5b = VFNMS(LDK(KP414213562), T50, T51); T53 = VSUB(T1F, T1G); T1H = VADD(T1F, T1G); } } } { V T37, T54, T1K, T38; T1x = VSUB(T1t, T1w); T37 = VADD(T1t, T1w); T4Z = VFMA(LDK(KP707106781), T4Y, T4V); T6r = VFNMS(LDK(KP707106781), T4Y, T4V); T54 = VSUB(T1J, T1I); T1K = VADD(T1I, T1J); T6u = VFNMS(LDK(KP707106781), T59, T58); T5a = VFMA(LDK(KP707106781), T59, T58); T38 = VADD(T1T, T1Q); T1U = VSUB(T1Q, T1T); T55 = VFNMS(LDK(KP414213562), T54, T53); T5c = VFMA(LDK(KP414213562), T53, T54); T1L = VSUB(T1H, T1K); T3b = VADD(T1H, T1K); T39 = VADD(T37, T38); T3H = VSUB(T37, T38); } } { V T4A, TW, T4N, TZ, T1j, T1m, T4O, T4D, T13, T4F, T16, T4G, T1a, T4I, T4J; V T1d; { V TU, TV, TX, TY, T56, T6v; TU = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)])); T56 = VADD(T52, T55); T6v = VSUB(T55, T52); { V T5d, T6s, T1V, T1M; T5d = VADD(T5b, T5c); T6s = VSUB(T5c, T5b); T1V = VSUB(T1L, T1E); T1M = VADD(T1E, T1L); T3I = VSUB(T3b, T3a); T3c = VADD(T3a, T3b); T5N = VFNMS(LDK(KP923879532), T56, T4Z); T57 = VFMA(LDK(KP923879532), T56, T4Z); T72 = VFNMS(LDK(KP923879532), T6v, T6u); T6w = VFMA(LDK(KP923879532), T6v, T6u); T5O = VFNMS(LDK(KP923879532), T5d, T5a); T5e = VFMA(LDK(KP923879532), T5d, T5a); T71 = VFMA(LDK(KP923879532), T6s, T6r); T6t = VFNMS(LDK(KP923879532), T6s, T6r); T2y = VFNMS(LDK(KP707106781), T1V, T1U); T1W = VFMA(LDK(KP707106781), T1V, T1U); T2x = VFNMS(LDK(KP707106781), T1M, T1x); T1N = VFMA(LDK(KP707106781), T1M, T1x); TV = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)])); } TX = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)])); TY = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)])); { V T1h, T1i, T1k, T1l; T1h = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)])); T1i = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)])); T1k = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)])); T1l = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)])); { V T11, T4B, T4C, T12, T14, T15; T11 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); T4A = VSUB(TU, TV); TW = VADD(TU, TV); T4N = VSUB(TX, TY); TZ = VADD(TX, TY); T1j = VADD(T1h, T1i); T4B = VSUB(T1h, T1i); T1m = VADD(T1k, T1l); T4C = VSUB(T1k, T1l); T12 = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)])); T14 = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)])); T15 = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)])); { V T18, T19, T1b, T1c; T18 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)])); T19 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)])); T1b = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)])); T1c = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)])); T4O = VSUB(T4B, T4C); T4D = VADD(T4B, T4C); T13 = VADD(T11, T12); T4F = VSUB(T11, T12); T16 = VADD(T14, T15); T4G = VSUB(T14, T15); T1a = VADD(T18, T19); T4I = VSUB(T18, T19); T4J = VSUB(T1b, T1c); T1d = VADD(T1b, T1c); } } } } { V T30, T10, T6k, T4E, T4Q, T4H, T17, T6n, T4P, T1e, T4K, T4R, T1n, T31; T30 = VADD(TW, TZ); T10 = VSUB(TW, TZ); T6k = VFNMS(LDK(KP707106781), T4D, T4A); T4E = VFMA(LDK(KP707106781), T4D, T4A); T4Q = VFMA(LDK(KP414213562), T4F, T4G); T4H = VFNMS(LDK(KP414213562), T4G, T4F); T33 = VADD(T13, T16); T17 = VSUB(T13, T16); T6n = VFNMS(LDK(KP707106781), T4O, T4N); T4P = VFMA(LDK(KP707106781), T4O, T4N); T34 = VADD(T1a, T1d); T1e = VSUB(T1a, T1d); T4K = VFMA(LDK(KP414213562), T4J, T4I); T4R = VFNMS(LDK(KP414213562), T4I, T4J); T1n = VSUB(T1j, T1m); T31 = VADD(T1j, T1m); { V T1f, T1o, T6o, T4L, T4S, T6l; T1f = VADD(T17, T1e);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -