?? n2sv_64.c
字號:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:46:47 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_notw -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2sv_64 -with-ostride 1 -include n2s.h -store-multiple 4 *//* * This function contains 912 FP additions, 392 FP multiplications, * (or, 520 additions, 0 multiplications, 392 fused multiply/add), * 310 stack variables, 15 constants, and 288 memory accesses */#include "n2s.h"static void n2sv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs){ DVK(KP881921264, +0.881921264348355029712756863660388349508442621); DVK(KP956940335, +0.956940335732208864935797886980269969482849206); DVK(KP534511135, +0.534511135950791641089685961295362908582039528); DVK(KP303346683, +0.303346683607342391675883946941299872384187453); DVK(KP773010453, +0.773010453362736960810906609758469800971041293); DVK(KP995184726, +0.995184726672196886244836953109479921575474869); DVK(KP820678790, +0.820678790828660330972281985331011598767386482); DVK(KP098491403, +0.098491403357164253077197521291327432293052451); DVK(KP831469612, +0.831469612302545237078788377617905756738560812); DVK(KP980785280, +0.980785280403230449126182236134239036973933731); DVK(KP668178637, +0.668178637919298919997757686523080761552472251); DVK(KP198912367, +0.198912367379658006911597622644676228597850501); DVK(KP923879532, +0.923879532511286756128183189396788286822416626); DVK(KP707106781, +0.707106781186547524400844362104849039284835938); DVK(KP414213562, +0.414213562373095048801688724209698078569671875); INT i; for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) { V TeJ, TeK, TeP, TeQ, TfH, TfI, TfJ, TfK, Tgj, Tgk, Tgv, Tgw, T9a, T99, T9e; V T9b; { V T7B, T37, T5Z, T8F, TbB, TcB, Tf, Td9, T62, T7C, T2i, TdH, Tcb, Tah, T8G; V T3e, Tak, TbC, T65, T3m, TdI, Tu, Tda, T2x, TbD, Tan, T8I, T7G, T8J, T7J; V T64, T3t, Tas, Tce, TK, Tdd, Tav, Tcf, Tdc, T2N, T3G, T6G, T9k, T7O, T9l; V T7R, T6H, T3N, T1L, TdA, Tdx, Teo, Tbs, Tct, T5Q, T6V, T8y, T9z, T5j, T6Y; V Tbb, Tcw, T8n, T9C, Tch, Taz, Tdf, TZ, Tdg, T32, Tci, TaC, T6J, T3Z, T9n; V T7V, T9o, T7Y, T6K, T46, Tdp, T1g, Tej, Tdm, Tcm, Tb1, Tcp, TaK, T6O, T4X; V T9s, T8f, T6R, T4q, T9v, T84, Tdn, T1v, Tek, Tds, Tcn, TaV, Tcq, Tb4, T9t; V T8b, T9w, T8i, T6S, T50, T6P, T4N, T5k, T1V, T1S, TdB, Tbi, T5s, Tbt, Tbg; V T5F, T5R, T5p, T1Y, Tbj, T5n, T8z, T8q; { V Tba, T57, T8l, Tb7, T5M, T8w, T8m, T5P, T8x, T5i; { V T2p, T7F, T7E, Tal, T2w, Tam, T3s, T7H, T7I, T3p, T3d, T3a; { V T8, T35, T3, T5Y, T26, T5X, T6, T36, T29, T9, T2b, T2c, Tb, Tc, T2e; V T2f; { V T1, T2, T24, T25, T4, T5, T27, T28; T1 = LD(&(ri[0]), ivs, &(ri[0])); T2 = LD(&(ri[WS(is, 32)]), ivs, &(ri[0])); T24 = LD(&(ii[0]), ivs, &(ii[0])); T25 = LD(&(ii[WS(is, 32)]), ivs, &(ii[0])); T4 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0])); T5 = LD(&(ri[WS(is, 48)]), ivs, &(ri[0])); T27 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0])); T28 = LD(&(ii[WS(is, 48)]), ivs, &(ii[0])); T8 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); T35 = VSUB(T1, T2); T3 = VADD(T1, T2); T5Y = VSUB(T24, T25); T26 = VADD(T24, T25); T5X = VSUB(T4, T5); T6 = VADD(T4, T5); T36 = VSUB(T27, T28); T29 = VADD(T27, T28); T9 = LD(&(ri[WS(is, 40)]), ivs, &(ri[0])); T2b = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); T2c = LD(&(ii[WS(is, 40)]), ivs, &(ii[0])); Tb = LD(&(ri[WS(is, 56)]), ivs, &(ri[0])); Tc = LD(&(ri[WS(is, 24)]), ivs, &(ri[0])); T2e = LD(&(ii[WS(is, 56)]), ivs, &(ii[0])); T2f = LD(&(ii[WS(is, 24)]), ivs, &(ii[0])); } { V T39, Ta, T38, T2d, T3b, Td, T3c, T2g, Taf, T7; T7B = VADD(T35, T36); T37 = VSUB(T35, T36); T39 = VSUB(T8, T9); Ta = VADD(T8, T9); T38 = VSUB(T2b, T2c); T2d = VADD(T2b, T2c); T3b = VSUB(Tb, Tc); Td = VADD(Tb, Tc); T3c = VSUB(T2e, T2f); T2g = VADD(T2e, T2f); T5Z = VADD(T5X, T5Y); T8F = VSUB(T5Y, T5X); Taf = VSUB(T3, T6); T7 = VADD(T3, T6); { V TbA, T2a, Te, Tbz, T60, T61, T2h, Tag; TbA = VSUB(T26, T29); T2a = VADD(T26, T29); Te = VADD(Ta, Td); Tbz = VSUB(Td, Ta); T3d = VADD(T3b, T3c); T60 = VSUB(T3b, T3c); T61 = VADD(T39, T38); T3a = VSUB(T38, T39); T2h = VADD(T2d, T2g); Tag = VSUB(T2d, T2g); TbB = VADD(Tbz, TbA); TcB = VSUB(TbA, Tbz); Tf = VADD(T7, Te); Td9 = VSUB(T7, Te); T62 = VSUB(T60, T61); T7C = VADD(T61, T60); T2i = VADD(T2a, T2h); TdH = VSUB(T2a, T2h); Tcb = VSUB(Taf, Tag); Tah = VADD(Taf, Tag); } } } { V T3j, Ti, T3h, T2l, T3g, Tl, T2t, T3k, T2o, T3q, Tp, T3o, T2s, T3n, Ts; V T2u, T2m, T2n; { V Tg, Th, T2j, T2k, Tj, Tk; Tg = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); Th = LD(&(ri[WS(is, 36)]), ivs, &(ri[0])); T2j = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); T2k = LD(&(ii[WS(is, 36)]), ivs, &(ii[0])); Tj = LD(&(ri[WS(is, 20)]), ivs, &(ri[0])); Tk = LD(&(ri[WS(is, 52)]), ivs, &(ri[0])); T2m = LD(&(ii[WS(is, 20)]), ivs, &(ii[0])); T8G = VADD(T3a, T3d); T3e = VSUB(T3a, T3d); T3j = VSUB(Tg, Th); Ti = VADD(Tg, Th); T3h = VSUB(T2j, T2k); T2l = VADD(T2j, T2k); T3g = VSUB(Tj, Tk); Tl = VADD(Tj, Tk); T2n = LD(&(ii[WS(is, 52)]), ivs, &(ii[0])); } { V Tn, To, T2q, T2r, Tq, Tr; Tn = LD(&(ri[WS(is, 60)]), ivs, &(ri[0])); To = LD(&(ri[WS(is, 28)]), ivs, &(ri[0])); T2q = LD(&(ii[WS(is, 60)]), ivs, &(ii[0])); T2r = LD(&(ii[WS(is, 28)]), ivs, &(ii[0])); Tq = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); Tr = LD(&(ri[WS(is, 44)]), ivs, &(ri[0])); T2t = LD(&(ii[WS(is, 12)]), ivs, &(ii[0])); T3k = VSUB(T2m, T2n); T2o = VADD(T2m, T2n); T3q = VSUB(Tn, To); Tp = VADD(Tn, To); T3o = VSUB(T2q, T2r); T2s = VADD(T2q, T2r); T3n = VSUB(Tq, Tr); Ts = VADD(Tq, Tr); T2u = LD(&(ii[WS(is, 44)]), ivs, &(ii[0])); } { V Tai, Tm, Taj, T3r; Tai = VSUB(Ti, Tl); Tm = VADD(Ti, Tl); T2p = VADD(T2l, T2o); Taj = VSUB(T2l, T2o); { V T3i, T3l, Tt, T2v; T7F = VSUB(T3h, T3g); T3i = VADD(T3g, T3h); T3l = VSUB(T3j, T3k); T7E = VADD(T3j, T3k); Tt = VADD(Tp, Ts); Tal = VSUB(Tp, Ts); T2v = VADD(T2t, T2u); T3r = VSUB(T2t, T2u); Tak = VADD(Tai, Taj); TbC = VSUB(Taj, Tai); T65 = VFNMS(LDK(KP414213562), T3i, T3l); T3m = VFMA(LDK(KP414213562), T3l, T3i); TdI = VSUB(Tt, Tm); Tu = VADD(Tm, Tt); T2w = VADD(T2s, T2v); Tam = VSUB(T2s, T2v); } T3s = VSUB(T3q, T3r); T7H = VADD(T3q, T3r); T7I = VSUB(T3o, T3n); T3p = VADD(T3n, T3o); } } { V T7M, T7Q, T7N, T3M, T3J, T7P; { V TG, T3H, Ty, T3x, T2B, T3w, TB, T3I, T2E, TH, T2J, T2K, TD, TE, T2G; V T2H; { V Tw, Tx, T2z, T2A, Tz, TA, T2C, T2D; Tw = LD(&(ri[WS(is, 2)]), ivs, &(ri[0])); Tda = VSUB(T2p, T2w); T2x = VADD(T2p, T2w); TbD = VADD(Tal, Tam); Tan = VSUB(Tal, Tam); T8I = VFNMS(LDK(KP414213562), T7E, T7F); T7G = VFMA(LDK(KP414213562), T7F, T7E); T8J = VFMA(LDK(KP414213562), T7H, T7I); T7J = VFNMS(LDK(KP414213562), T7I, T7H); T64 = VFMA(LDK(KP414213562), T3p, T3s); T3t = VFNMS(LDK(KP414213562), T3s, T3p); Tx = LD(&(ri[WS(is, 34)]), ivs, &(ri[0])); T2z = LD(&(ii[WS(is, 2)]), ivs, &(ii[0])); T2A = LD(&(ii[WS(is, 34)]), ivs, &(ii[0])); Tz = LD(&(ri[WS(is, 18)]), ivs, &(ri[0])); TA = LD(&(ri[WS(is, 50)]), ivs, &(ri[0])); T2C = LD(&(ii[WS(is, 18)]), ivs, &(ii[0])); T2D = LD(&(ii[WS(is, 50)]), ivs, &(ii[0])); TG = LD(&(ri[WS(is, 58)]), ivs, &(ri[0])); T3H = VSUB(Tw, Tx); Ty = VADD(Tw, Tx); T3x = VSUB(T2z, T2A); T2B = VADD(T2z, T2A); T3w = VSUB(Tz, TA); TB = VADD(Tz, TA); T3I = VSUB(T2C, T2D); T2E = VADD(T2C, T2D); TH = LD(&(ri[WS(is, 26)]), ivs, &(ri[0])); T2J = LD(&(ii[WS(is, 58)]), ivs, &(ii[0])); T2K = LD(&(ii[WS(is, 26)]), ivs, &(ii[0])); TD = LD(&(ri[WS(is, 10)]), ivs, &(ri[0])); TE = LD(&(ri[WS(is, 42)]), ivs, &(ri[0])); T2G = LD(&(ii[WS(is, 10)]), ivs, &(ii[0])); T2H = LD(&(ii[WS(is, 42)]), ivs, &(ii[0])); } { V Tat, TC, Tar, T2F, T3K, T3E, TJ, Taq, T2M, Tau, T3B, T3L, T3y, T3F; { V TI, T3C, T2L, T3D, TF, T3z, T2I, T3A; Tat = VSUB(Ty, TB); TC = VADD(Ty, TB); TI = VADD(TG, TH); T3C = VSUB(TG, TH); T2L = VADD(T2J, T2K); T3D = VSUB(T2J, T2K); TF = VADD(TD, TE); T3z = VSUB(TD, TE); T2I = VADD(T2G, T2H); T3A = VSUB(T2G, T2H); Tar = VSUB(T2B, T2E); T2F = VADD(T2B, T2E); T3K = VADD(T3C, T3D); T3E = VSUB(T3C, T3D); TJ = VADD(TF, TI); Taq = VSUB(TI, TF); T2M = VADD(T2I, T2L); Tau = VSUB(T2I, T2L); T3B = VADD(T3z, T3A); T3L = VSUB(T3A, T3z); } T7M = VSUB(T3x, T3w); T3y = VADD(T3w, T3x); Tas = VADD(Taq, Tar); Tce = VSUB(Tar, Taq); TK = VADD(TC, TJ); Tdd = VSUB(TC, TJ); Tav = VADD(Tat, Tau); Tcf = VSUB(Tat, Tau); T7Q = VADD(T3B, T3E); T3F = VSUB(T3B, T3E); Tdc = VSUB(T2F, T2M); T2N = VADD(T2F, T2M); T7N = VADD(T3L, T3K); T3M = VSUB(T3K, T3L); T3J = VSUB(T3H, T3I); T7P = VADD(T3H, T3I); T3G = VFNMS(LDK(KP707106781), T3F, T3y); T6G = VFMA(LDK(KP707106781), T3F, T3y); } } { V T1H, T5I, T1z, Tb8, T56, T53, T1C, Tb9, T5L, T1I, T5e, T5f, T1E, T1F, T59; V T5a; { V T1x, T1y, T54, T55, T1A, T1B, T5J, T5K; T1x = LD(&(ri[WS(is, 63)]), ivs, &(ri[WS(is, 1)])); T9k = VFNMS(LDK(KP707106781), T7N, T7M); T7O = VFMA(LDK(KP707106781), T7N, T7M); T9l = VFNMS(LDK(KP707106781), T7Q, T7P); T7R = VFMA(LDK(KP707106781), T7Q, T7P); T6H = VFMA(LDK(KP707106781), T3M, T3J); T3N = VFNMS(LDK(KP707106781), T3M, T3J); T1y = LD(&(ri[WS(is, 31)]), ivs, &(ri[WS(is, 1)])); T54 = LD(&(ii[WS(is, 63)]), ivs, &(ii[WS(is, 1)])); T55 = LD(&(ii[WS(is, 31)]), ivs, &(ii[WS(is, 1)])); T1A = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)])); T1B = LD(&(ri[WS(is, 47)]), ivs, &(ri[WS(is, 1)])); T5J = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)])); T5K = LD(&(ii[WS(is, 47)]), ivs, &(ii[WS(is, 1)])); T1H = LD(&(ri[WS(is, 55)]), ivs, &(ri[WS(is, 1)])); T5I = VSUB(T1x, T1y); T1z = VADD(T1x, T1y); Tb8 = VADD(T54, T55); T56 = VSUB(T54, T55); T53 = VSUB(T1A, T1B); T1C = VADD(T1A, T1B); Tb9 = VADD(T5J, T5K); T5L = VSUB(T5J, T5K); T1I = LD(&(ri[WS(is, 23)]), ivs, &(ri[WS(is, 1)])); T5e = LD(&(ii[WS(is, 55)]), ivs, &(ii[WS(is, 1)])); T5f = LD(&(ii[WS(is, 23)]), ivs, &(ii[WS(is, 1)])); T1E = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)])); T1F = LD(&(ri[WS(is, 39)]), ivs, &(ri[WS(is, 1)])); T59 = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)])); T5a = LD(&(ii[WS(is, 39)]), ivs, &(ii[WS(is, 1)])); } { V Tbo, T1D, Tdv, T5h, T5N, T1K, Tdw, Tbr, T5O, T5c; { V T1J, T5d, Tbq, T5g, T1G, T58, Tbp, T5b; Tbo = VSUB(T1z, T1C); T1D = VADD(T1z, T1C); T1J = VADD(T1H, T1I); T5d = VSUB(T1H, T1I); Tbq = VADD(T5e, T5f); T5g = VSUB(T5e, T5f);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -