?? n2sv_64.c
字號:
STM4(&(io[3]), Tgh, ovs, &(io[1])); Tgi = VFNMS(LDK(KP956940335), T6C, T6B); STM4(&(io[35]), Tgi, ovs, &(io[1])); { V T6l, T6x, T6d, T6b; T6l = VFMA(LDK(KP831469612), T6k, T6j); T6x = VFNMS(LDK(KP831469612), T6k, T6j); T6d = VFMA(LDK(KP831469612), T6a, T67); T6b = VFNMS(LDK(KP831469612), T6a, T67); { V T6g, T6i, T5W, T6c; T6g = VSUB(T6e, T6f); T6i = VADD(T6f, T6e); T5W = VSUB(T52, T5V); T6c = VADD(T52, T5V); Tgj = VFMA(LDK(KP956940335), T6w, T6v); STM4(&(io[19]), Tgj, ovs, &(io[1])); Tgk = VFNMS(LDK(KP956940335), T6w, T6v); STM4(&(io[51]), Tgk, ovs, &(io[1])); Tgl = VFMA(LDK(KP956940335), T6s, T6l); STM4(&(ro[3]), Tgl, ovs, &(ro[1])); Tgm = VFNMS(LDK(KP956940335), T6s, T6l); STM4(&(ro[35]), Tgm, ovs, &(ro[1])); Tgn = VFMA(LDK(KP881921264), T6i, T6h); STM4(&(ro[59]), Tgn, ovs, &(ro[1])); Tgo = VFNMS(LDK(KP881921264), T6i, T6h); STM4(&(ro[27]), Tgo, ovs, &(ro[1])); Tgp = VFMA(LDK(KP881921264), T6g, T6d); STM4(&(io[11]), Tgp, ovs, &(io[1])); Tgq = VFNMS(LDK(KP881921264), T6g, T6d); STM4(&(io[43]), Tgq, ovs, &(io[1])); Tgr = VFMA(LDK(KP881921264), T6c, T6b); STM4(&(io[59]), Tgr, ovs, &(io[1])); Tgs = VFNMS(LDK(KP881921264), T6c, T6b); STM4(&(io[27]), Tgs, ovs, &(io[1])); Tgt = VFMA(LDK(KP881921264), T5W, T49); STM4(&(ro[11]), Tgt, ovs, &(ro[1])); Tgu = VFNMS(LDK(KP881921264), T5W, T49); STM4(&(ro[43]), Tgu, ovs, &(ro[1])); Tgv = VFNMS(LDK(KP956940335), T6A, T6x); STM4(&(ro[51]), Tgv, ovs, &(ro[1])); Tgw = VFMA(LDK(KP956940335), T6A, T6x); STM4(&(ro[19]), Tgw, ovs, &(ro[1])); } } } } { V T8j, T8c, T8C, T8v, T8N, T8M, T8X, T7L, T9c, T92, T9d, T95, T98, T80; { V T90, T91, T93, T94, T7S, T7Z; T8j = VFNMS(LDK(KP923879532), T8i, T8f); T90 = VFMA(LDK(KP923879532), T8i, T8f); T91 = VFMA(LDK(KP923879532), T8b, T84); T8c = VFNMS(LDK(KP923879532), T8b, T84); T8C = VFNMS(LDK(KP923879532), T8B, T8y); T93 = VFMA(LDK(KP923879532), T8B, T8y); T94 = VFMA(LDK(KP923879532), T8u, T8n); T8v = VFNMS(LDK(KP923879532), T8u, T8n); T8N = VFMA(LDK(KP198912367), T7O, T7R); T7S = VFNMS(LDK(KP198912367), T7R, T7O); T7Z = VFMA(LDK(KP198912367), T7Y, T7V); T8M = VFNMS(LDK(KP198912367), T7V, T7Y); T8X = VFMA(LDK(KP923879532), T7K, T7D); T7L = VFNMS(LDK(KP923879532), T7K, T7D); T9c = VFNMS(LDK(KP098491403), T90, T91); T92 = VFMA(LDK(KP098491403), T91, T90); T9d = VFMA(LDK(KP098491403), T93, T94); T95 = VFNMS(LDK(KP098491403), T94, T93); T98 = VADD(T7S, T7Z); T80 = VSUB(T7S, T7Z); } { V T8V, T81, T8T, T8k, T97, T8L, T8Y, T8O, T8S, T8D; T8V = VFNMS(LDK(KP980785280), T80, T7L); T81 = VFMA(LDK(KP980785280), T80, T7L); T8T = VFNMS(LDK(KP820678790), T8c, T8j); T8k = VFMA(LDK(KP820678790), T8j, T8c); T97 = VFMA(LDK(KP923879532), T8K, T8H); T8L = VFNMS(LDK(KP923879532), T8K, T8H); T8Y = VADD(T8N, T8M); T8O = VSUB(T8M, T8N); T8S = VFMA(LDK(KP820678790), T8v, T8C); T8D = VFNMS(LDK(KP820678790), T8C, T8v); { V T8R, T8P, T8U, T8W, T8E, T8Q; { V T96, T9f, T9g, T8Z; T9a = VSUB(T95, T92); T96 = VADD(T92, T95); T9f = VFMA(LDK(KP980785280), T98, T97); T99 = VFNMS(LDK(KP980785280), T98, T97); T9e = VSUB(T9c, T9d); T9g = VADD(T9c, T9d); T8Z = VFMA(LDK(KP980785280), T8Y, T8X); T9b = VFNMS(LDK(KP980785280), T8Y, T8X); T8R = VFMA(LDK(KP980785280), T8O, T8L); T8P = VFNMS(LDK(KP980785280), T8O, T8L); T8U = VSUB(T8S, T8T); T8W = VADD(T8T, T8S); T8E = VSUB(T8k, T8D); T8Q = VADD(T8k, T8D); { V Tgx, Tgy, Tgz, TgA; Tgx = VFNMS(LDK(KP995184726), T9g, T9f); STM4(&(io[33]), Tgx, ovs, &(io[1])); STN4(&(io[32]), TeO, Tgx, TfG, Tgi, ovs); Tgy = VFMA(LDK(KP995184726), T96, T8Z); STM4(&(ro[1]), Tgy, ovs, &(ro[1])); STN4(&(ro[0]), TeL, Tgy, Tfv, Tgl, ovs); Tgz = VFNMS(LDK(KP995184726), T96, T8Z); STM4(&(ro[33]), Tgz, ovs, &(ro[1])); STN4(&(ro[32]), TeM, Tgz, Tfw, Tgm, ovs); TgA = VFMA(LDK(KP995184726), T9g, T9f); STM4(&(io[1]), TgA, ovs, &(io[1])); STN4(&(io[0]), TeN, TgA, TfF, Tgh, ovs); } } { V TgB, TgC, TgD, TgE; TgB = VFMA(LDK(KP773010453), T8W, T8V); STM4(&(ro[57]), TgB, ovs, &(ro[1])); STN4(&(ro[56]), TeS, TgB, Tfx, Tgn, ovs); TgC = VFNMS(LDK(KP773010453), T8W, T8V); STM4(&(ro[25]), TgC, ovs, &(ro[1])); STN4(&(ro[24]), TeR, TgC, Tfy, Tgo, ovs); TgD = VFMA(LDK(KP773010453), T8U, T8R); STM4(&(io[9]), TgD, ovs, &(io[1])); STN4(&(io[8]), TeT, TgD, Tfz, Tgp, ovs); TgE = VFNMS(LDK(KP773010453), T8U, T8R); STM4(&(io[41]), TgE, ovs, &(io[1])); STN4(&(io[40]), TeU, TgE, TfA, Tgq, ovs); { V TgF, TgG, TgH, TgI; TgF = VFMA(LDK(KP773010453), T8Q, T8P); STM4(&(io[57]), TgF, ovs, &(io[1])); STN4(&(io[56]), TeW, TgF, TfB, Tgr, ovs); TgG = VFNMS(LDK(KP773010453), T8Q, T8P); STM4(&(io[25]), TgG, ovs, &(io[1])); STN4(&(io[24]), TeV, TgG, TfC, Tgs, ovs); TgH = VFMA(LDK(KP773010453), T8E, T81); STM4(&(ro[9]), TgH, ovs, &(ro[1])); STN4(&(ro[8]), TeX, TgH, TfD, Tgt, ovs); TgI = VFNMS(LDK(KP773010453), T8E, T81); STM4(&(ro[41]), TgI, ovs, &(ro[1])); STN4(&(ro[40]), TeY, TgI, TfE, Tgu, ovs); } } } } } } } } } } { V TgJ, TgK, TgL, TgM; TgJ = VFMA(LDK(KP995184726), T9a, T99); STM4(&(io[17]), TgJ, ovs, &(io[1])); STN4(&(io[16]), TeQ, TgJ, TfI, Tgj, ovs); TgK = VFNMS(LDK(KP995184726), T9a, T99); STM4(&(io[49]), TgK, ovs, &(io[1])); STN4(&(io[48]), TeP, TgK, TfH, Tgk, ovs); TgL = VFMA(LDK(KP995184726), T9e, T9b); STM4(&(ro[17]), TgL, ovs, &(ro[1])); STN4(&(ro[16]), TeK, TgL, TfJ, Tgw, ovs); TgM = VFNMS(LDK(KP995184726), T9e, T9b); STM4(&(ro[49]), TgM, ovs, &(ro[1])); STN4(&(ro[48]), TeJ, TgM, TfK, Tgv, ovs); } }}static const kdft_desc desc = { 64, "n2sv_64", {520, 0, 392, 0}, &GENUS, 0, 1, 0, 0 };void X(codelet_n2sv_64) (planner *p) { X(kdft_register) (p, n2sv_64, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_notw -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2sv_64 -with-ostride 1 -include n2s.h -store-multiple 4 *//* * This function contains 912 FP additions, 248 FP multiplications, * (or, 808 additions, 144 multiplications, 104 fused multiply/add), * 260 stack variables, 15 constants, and 288 memory accesses */#include "n2s.h"static void n2sv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs){ DVK(KP773010453, +0.773010453362736960810906609758469800971041293); DVK(KP634393284, +0.634393284163645498215171613225493370675687095); DVK(KP098017140, +0.098017140329560601994195563888641845861136673); DVK(KP995184726, +0.995184726672196886244836953109479921575474869); DVK(KP881921264, +0.881921264348355029712756863660388349508442621); DVK(KP471396736, +0.471396736825997648556387625905254377657460319); DVK(KP290284677, +0.290284677254462367636192375817395274691476278); DVK(KP956940335, +0.956940335732208864935797886980269969482849206); DVK(KP831469612, +0.831469612302545237078788377617905756738560812); DVK(KP555570233, +0.555570233019602224742830813948532874374937191); DVK(KP195090322, +0.195090322016128267848284868477022240927691618); DVK(KP980785280, +0.980785280403230449126182236134239036973933731); DVK(KP923879532, +0.923879532511286756128183189396788286822416626); DVK(KP382683432, +0.382683432365089771728459984030398866761344562); DVK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) { V T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e; V T8G, Tu, TdI, Tak, TbD, Tan, TbC, T2x, Tda, T3m, T65, T7G, T8J, T7J, T8I; V T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R; V T9l, T3N, T6H, T1L, Tdv, Tbs, Tcw, TdC, Teo, T5j, T6V, T5Q, T6Y, T8y, T9C; V Tbb, Tct, T8n, T9z, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V; V T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O; V T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50; V T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, TdD, Tbv, Tcu, Tdy, Tep, T5G, T6Z; V T5T, T6W, T8B, T9A, Tbm, Tcx, T8u, T9D; { V T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g; V T3c; { V T1, T2, T24, T25; T1 = LD(&(ri[0]), ivs, &(ri[0])); T2 = LD(&(ri[WS(is, 32)]), ivs, &(ri[0])); T3 = VADD(T1, T2); T35 = VSUB(T1, T2); T24 = LD(&(ii[0]), ivs, &(ii[0])); T25 = LD(&(ii[WS(is, 32)]), ivs, &(ii[0])); T26 = VADD(T24, T25); T5Y = VSUB(T24, T25); } { V T4, T5, T27, T28; T4 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0])); T5 = LD(&(ri[WS(is, 48)]), ivs, &(ri[0])); T6 = VADD(T4, T5); T5X = VSUB(T4, T5); T27 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0])); T28 = LD(&(ii[WS(is, 48)]), ivs, &(ii[0])); T29 = VADD(T27, T28); T36 = VSUB(T27, T28); } { V T8, T9, T2b, T2c; T8 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0])); T9 = LD(&(ri[WS(is, 40)]), ivs, &(ri[0])); Ta = VADD(T8, T9); T39 = VSUB(T8, T9); T2b = LD(&(ii[WS(is, 8)]), ivs, &(ii[0])); T2c = LD(&(ii[WS(is, 40)]), ivs, &(ii[0])); T2d = VADD(T2b, T2c); T38 = VSUB(T2b, T2c); } { V Tb, Tc, T2e, T2f; Tb = LD(&(ri[WS(is, 56)]), ivs, &(ri[0])); Tc = LD(&(ri[WS(is, 24)]), ivs, &(ri[0])); Td = VADD(Tb, Tc); T3b = VSUB(Tb, Tc); T2e = LD(&(ii[WS(is, 56)]), ivs, &(ii[0])); T2f = LD(&(ii[WS(is, 24)]), ivs, &(ii[0])); T2g = VADD(T2e, T2f); T3c = VSUB(T2e, T2f); } { V T7, Te, T2a, T2h; T37 = VSUB(T35, T36); T7B = VADD(T35, T36); T8F = VSUB(T5Y, T5X); T5Z = VADD(T5X, T5Y); T7 = VADD(T3, T6); Te = VADD(Ta, Td); Tf = VADD(T7, Te); Td9 = VSUB(T7, Te); { V Tbz, TbA, T60, T61; Tbz = VSUB(T26, T29); TbA = VSUB(Td, Ta); TbB = VSUB(Tbz, TbA); TcB = VADD(TbA, Tbz); T60 = VSUB(T3b, T3c); T61 = VADD(T39, T38); T62 = VMUL(LDK(KP707106781), VSUB(T60, T61)); T7C = VMUL(LDK(KP707106781), VADD(T61, T60)); } T2a = VADD(T26, T29); T2h = VADD(T2d, T2g); T2i = VADD(T2a, T2h); TdH = VSUB(T2a, T2h); { V Taf, Tag, T3a, T3d; Taf = VSUB(T3, T6); Tag = VSUB(T2d, T2g); Tah = VSUB(Taf, Tag); Tcb = VADD(Taf, Tag); T3a = VSUB(T38, T39); T3d = VADD(T3b, T3c); T3e = VMUL(LDK(KP707106781), VSUB(T3a, T3d)); T8G = VMUL(LDK(KP707106781), VADD(T3a, T3d)); } } } { V Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v; V T3r; { V Tg, Th, T2j, T2k; Tg = LD(&(ri[WS(is, 4)]), ivs, &(ri[0])); Th = LD(&(ri[WS(is, 36)]), ivs, &(ri[0])); Ti = VADD(Tg, Th); T3j = VSUB(Tg, Th); T2j = LD(&(ii[WS(is, 4)]), ivs, &(ii[0])); T2k = LD(&(ii[WS(is, 36)]), ivs, &(ii[0])); T2l = VADD(T2j, T2k); T3h = VSUB(T2j, T2k); } { V Tj, Tk, T2m, T2n; Tj = LD(&(ri[WS(is, 20)]), ivs, &(ri[0])); Tk = LD(&(ri[WS(is, 52)]), ivs, &(ri[0])); Tl = VADD(Tj, Tk); T3g = VSUB(Tj, Tk); T2m = LD(&(ii[WS(is, 20)]), ivs, &(ii[0])); T2n = LD(&(ii[WS(is, 52)]), ivs, &(ii[0])); T2o = VADD(T2m, T2n); T3k = VSUB(T2m, T2n); } { V Tn, To, T2q, T2r; Tn = LD(&(ri[WS(is, 60)]), ivs, &(ri[0])); To = LD(&(ri[WS(is, 28)]), ivs, &(ri[0])); Tp = VADD(Tn, To); T3q = VSUB(Tn, To); T2q = LD(&(ii[WS(is, 60)]), ivs, &(ii[0])); T2r = LD(&(ii[WS(is, 28)]), ivs, &(ii[0])); T2s = VADD(T2q, T2r); T3o = VSUB(T2q, T2r); } { V Tq, Tr, T2t, T2u; Tq = LD(&(ri[WS(is, 12)]), ivs, &(ri[0])); Tr = LD(&(ri[WS(is, 44)]), ivs, &(ri[0])); Ts = VADD(Tq, Tr); T3n = VSUB(Tq, Tr); T2t = LD(&(ii[WS(is, 12)]), ivs, &(ii[0]));
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -