?? n1fv_25.c
字號:
T3y = VSUB(T3o, T3p); } { V T1Z, T25, T1P, T22, T1X, TG, T1b, T28, T1t, T1y, T1x, T1E, T1Q, T1Y; { V T26, T1L, T1T, TF, T1f, T1W, T3m, T3g, T2M, T2G, T39, T3j, T21, T1O, T20; V T27; T26 = VFMA(LDK(KP867381224), T1K, T1J); T1L = VFNMS(LDK(KP867381224), T1K, T1J); T20 = VFNMS(LDK(KP958953096), T1S, T1R); T1T = VFMA(LDK(KP958953096), T1S, T1R); { V T2R, T2Y, T2e, T2v, T1N, T1V; T2R = VFNMS(LDK(KP494780565), T2c, T2d); T2Y = VFMA(LDK(KP447533225), T2d, T2c); T2e = VFMA(LDK(KP120146378), T2d, T2c); T2v = VFNMS(LDK(KP132830569), T2c, T2d); TF = VFNMS(LDK(KP667278218), TE, Tx); T1f = VFMA(LDK(KP603558818), Tx, TE); T1N = VFMA(LDK(KP869845200), TE, Tx); T1V = VFNMS(LDK(KP786782374), Tx, TE); { V T3A, T3C, T3w, T3u; T3A = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3z, T3y)); T3C = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3y, T3z)); T3w = VSUB(T3q, T3t); T3u = VADD(T3q, T3t); { V T2B, T2x, T2H, T2i; T2B = VFMA(LDK(KP734762448), T2w, T2v); T2x = VFNMS(LDK(KP734762448), T2w, T2v); T2H = VFNMS(LDK(KP734762448), T2h, T2e); T2i = VFMA(LDK(KP734762448), T2h, T2e); { V T30, T35, T3c, T2S, T3v; T30 = VFNMS(LDK(KP921078979), T2Z, T2Y); T35 = VFMA(LDK(KP921078979), T2Z, T2Y); T3c = VFMA(LDK(KP982009705), T2R, T2Q); T2S = VFNMS(LDK(KP982009705), T2R, T2Q); T1W = VFMA(LDK(KP912575812), T1V, T1U); T1Z = VFNMS(LDK(KP912575812), T1V, T1U); T1O = VFMA(LDK(KP912575812), T1N, T1M); T25 = VFNMS(LDK(KP912575812), T1N, T1M); ST(&(xo[0]), VADD(T3u, T3n), ovs, &(xo[0])); T3v = VFNMS(LDK(KP250000000), T3u, T3n); { V T2y, T2J, T2q, T2D; T2y = VFMA(LDK(KP945422727), T2x, T2u); T2J = VFMA(LDK(KP522616830), T2x, T2I); T2q = VFMA(LDK(KP956723877), T2p, T2i); T2D = VFNMS(LDK(KP522616830), T2i, T2C); { V T3e, T31, T36, T2T; T3e = VFMA(LDK(KP906616052), T30, T2X); T31 = VFNMS(LDK(KP906616052), T30, T2X); T36 = VFNMS(LDK(KP923225144), T2S, T2P); T2T = VFMA(LDK(KP923225144), T2S, T2P); { V T3k, T3d, T3x, T3B; T3k = VFNMS(LDK(KP669429328), T3b, T3c); T3d = VFMA(LDK(KP570584518), T3c, T3b); T3x = VFMA(LDK(KP559016994), T3w, T3v); T3B = VFNMS(LDK(KP559016994), T3w, T3v); { V T2A, T2K, T2r, T2E; T2A = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2z, T2y)); T2K = VFNMS(LDK(KP690983005), T2J, T2u); T2r = VFMA(LDK(KP992114701), T2q, T2b); T2E = VFMA(LDK(KP763932022), T2D, T2p); { V T32, T3a, T37, T3h; T32 = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2z, T31)); T3a = VFMA(LDK(KP262346850), T31, T2z); T37 = VFNMS(LDK(KP997675361), T36, T35); T3h = VFNMS(LDK(KP904508497), T36, T34); { V T2U, T33, T3l, T3f; T2U = VFMA(LDK(KP949179823), T2T, T2b); T33 = VFNMS(LDK(KP237294955), T2T, T2b); T3l = VFNMS(LDK(KP669429328), T3e, T3k); T3f = VFMA(LDK(KP618033988), T3e, T3d); ST(&(xo[WS(os, 20)]), VFMAI(T3A, T3x), ovs, &(xo[0])); ST(&(xo[WS(os, 5)]), VFNMSI(T3A, T3x), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 15)]), VFNMSI(T3C, T3B), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 10)]), VFMAI(T3C, T3B), ovs, &(xo[0])); { V T2L, T2F, T38, T3i; T2L = VFMA(LDK(KP855719849), T2K, T2H); ST(&(xo[WS(os, 22)]), VFMAI(T2A, T2r), ovs, &(xo[0])); ST(&(xo[WS(os, 3)]), VFNMSI(T2A, T2r), ovs, &(xo[WS(os, 1)])); T2F = VFNMS(LDK(KP855719849), T2E, T2B); T38 = VFMA(LDK(KP560319534), T37, T34); T3i = VFNMS(LDK(KP681693190), T3h, T35); ST(&(xo[WS(os, 23)]), VFMAI(T32, T2U), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 2)]), VFNMSI(T32, T2U), ovs, &(xo[0])); T3m = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3l, T3a)); T3g = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3f, T3a)); T2M = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2L, T2z)); T2G = VFMA(LDK(KP897376177), T2F, T2b); T39 = VFNMS(LDK(KP949179823), T38, T33); T3j = VFNMS(LDK(KP860541664), T3i, T33); T21 = VFMA(LDK(KP447417479), T1O, T20); } } } } } } } } } } } T1P = VFNMS(LDK(KP809385824), T1O, T1L); ST(&(xo[WS(os, 17)]), VFMAI(T2M, T2G), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 8)]), VFNMSI(T2M, T2G), ovs, &(xo[0])); ST(&(xo[WS(os, 12)]), VFMAI(T3g, T39), ovs, &(xo[0])); ST(&(xo[WS(os, 13)]), VFNMSI(T3g, T39), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 7)]), VFMAI(T3m, T3j), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 18)]), VFNMSI(T3m, T3j), ovs, &(xo[0])); T22 = VFMA(LDK(KP690983005), T21, T1L); T27 = VFMA(LDK(KP447417479), T1W, T26); T1X = VFMA(LDK(KP894834959), T1W, T1T); { V T1r, T1s, T1v, T1w; T1r = VFNMS(LDK(KP916574801), T1f, T1e); T1g = VFMA(LDK(KP916574801), T1f, T1e); T1k = VFNMS(LDK(KP831864738), T1j, T1i); T1s = VFMA(LDK(KP831864738), T1j, T1i); T1v = VFNMS(LDK(KP829049696), TF, Tq); TG = VFMA(LDK(KP829049696), TF, Tq); T1b = VFMA(LDK(KP831864738), T1a, TV); T1w = VFNMS(LDK(KP831864738), T1a, TV); T28 = VFNMS(LDK(KP763932022), T27, T1T); T1t = VFMA(LDK(KP904730450), T1s, T1r); T1y = VFNMS(LDK(KP904730450), T1s, T1r); T1x = VFMA(LDK(KP559154169), T1w, T1v); T1E = VFNMS(LDK(KP683113946), T1v, T1w); } } T1Q = VFNMS(LDK(KP992114701), T1P, Tb); T1Y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T1X, T1q)); { V T1u, T1F, T1z, T1h, T1c, T23, T29; T23 = VFNMS(LDK(KP999544308), T22, T1Z); T29 = VFNMS(LDK(KP999544308), T28, T25); T1I = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1t, T1q)); T1u = VFNMS(LDK(KP242145790), T1t, T1q); T1F = VFMA(LDK(KP617882369), T1y, T1E); T1z = VFMA(LDK(KP559016994), T1y, T1x); T1h = VFNMS(LDK(KP904730450), T1b, TG); T1c = VFMA(LDK(KP904730450), T1b, TG); ST(&(xo[WS(os, 21)]), VFNMSI(T1Y, T1Q), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 4)]), VFMAI(T1Y, T1Q), ovs, &(xo[0])); T24 = VFNMS(LDK(KP803003575), T23, Tb); T2a = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T29, T1q)); T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T1F, T1u)); T1A = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1z, T1u)); T1l = VFNMS(LDK(KP904730450), T1k, T1h); T1B = VADD(T1g, T1h); T1H = VFMA(LDK(KP968583161), T1c, Tb); T1d = VFNMS(LDK(KP242145790), T1c, Tb); } } } } ST(&(xo[WS(os, 9)]), VFMAI(T2a, T24), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 16)]), VFNMSI(T2a, T24), ovs, &(xo[0])); { V T1m, T1C, T1n, T1D; T1m = VFNMS(LDK(KP618033988), T1l, T1g); T1C = VFNMS(LDK(KP683113946), T1B, T1k); ST(&(xo[WS(os, 24)]), VFMAI(T1I, T1H), ovs, &(xo[0])); ST(&(xo[WS(os, 1)]), VFNMSI(T1I, T1H), ovs, &(xo[WS(os, 1)])); T1n = VFNMS(LDK(KP876091699), T1m, T1d); T1D = VFMA(LDK(KP792626838), T1C, T1d); ST(&(xo[WS(os, 19)]), VFMAI(T1A, T1n), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 6)]), VFNMSI(T1A, T1n), ovs, &(xo[0])); ST(&(xo[WS(os, 14)]), VFMAI(T1G, T1D), ovs, &(xo[0])); ST(&(xo[WS(os, 11)]), VFNMSI(T1G, T1D), ovs, &(xo[WS(os, 1)])); } }}static const kdft_desc desc = { 25, "n1fv_25", {43, 12, 181, 0}, &GENUS, 0, 0, 0, 0 };void X(codelet_n1fv_25) (planner *p) { X(kdft_register) (p, n1fv_25, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_notw_c -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name n1fv_25 -include n1f.h *//* * This function contains 224 FP additions, 140 FP multiplications, * (or, 146 additions, 62 multiplications, 78 fused multiply/add), * 115 stack variables, 40 constants, and 50 memory accesses */#include "n1f.h"static void n1fv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs){ DVK(KP809016994, +0.809016994374947424102293417182819058860154590); DVK(KP309016994, +0.309016994374947424102293417182819058860154590); DVK(KP770513242, +0.770513242775789230803009636396177847271667672); DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); DVK(KP992114701, +0.992114701314477831049793042785778521453036709); DVK(KP250666467, +0.250666467128608490746237519633017587885836494); DVK(KP637423989, +0.637423989748689710176712811676016195434917298); DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); DVK(KP125333233, +0.125333233564304245373118759816508793942918247); DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); DVK(KP248689887, +0.248689887164854788242283746006447968417567406); DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); DVK(KP497379774, +0.497379774329709576484567492012895936835134813); DVK(KP968583161, +0.968583161128631119490168375464735813836012403); DVK(KP904827052, +0.904827052466019527713668647932697593970413911); DVK(KP851558583, +0.851558583130145297725004891488503407959946084); DVK(KP425779291, +0.425779291565072648862502445744251703979973042); DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); DVK(KP844327925, +0.844327925502015078548558063966681505381659241); DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); DVK(KP481753674, +0.481753674101715274987191502872129653528542010); DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); DVK(KP535826794, +0.535826794978996618271308767867639978063575346); DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); DVK(KP963507348, +0.963507348203430549974383005744259307057084020); DVK(KP876306680, +0.876306680043863587308115903922062583399064238); DVK(KP998026728, +0.998026728428271561952336806863450553336905220); DVK(KP125581039, +0.125581039058626752152356449131262266244969664); DVK(KP684547105, +0.684547105928688673732283357621209269889519233); DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); DVK(KP062790519, +0.062790519529313376076178224565631133122484832); DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); DVK(KP728968627, +0.728968627421411523146730319055259111372571664); DVK(KP293892626, +0.293892626146236564584352977319536384298826219); DVK(KP475528258, +0.475528258147576786058219666689691071702849317); DVK(KP250000000, +0.250000000000000000000000000000000000000000000); DVK(KP587785252, +0.587785252292473129168705954639072768597652438); DVK(KP951056516, +0.951056516295153572116439333379382143405698634); DVK(KP559016994, +0.559016994374947424102293417182819058860154590); INT i; const R *xi; R *xo; xi = ri; xo = ro; for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) { V T7, T1g, T26, Ta, T2R, T2N, T2O, T2P, T19, T1Y, T16, T1Z, T1a, T2v, T1l; V T2m, TU, T21, TR, T22, TV, T2u, T1k, T2l, T2K, T2L, T2M, TE, T1R, TB; V T1S, TF, T2r, T1i, T2j, Tp, T1U, Tm, T1V, Tq, T2s, T1h, T2i; { V T8, T6, T1f, T3, T1e, T25, T9; T8 = LD(&(xi[0]), ivs, &(xi[0])); { V T4, T5, T1, T2; T4 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0])); T5 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)])); T6 = VADD(T4, T5); T1f = VSUB(T4, T5); T1 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)])); T2 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -