?? n1bv_64.c
字號:
T31 = VADD(T1j, T1m); { V T1f, T1o, T6o, T4L, T4S, T6l; T1f = VADD(T17, T1e); T1o = VSUB(T17, T1e); T6o = VSUB(T4H, T4K); T4L = VADD(T4H, T4K); T4S = VADD(T4Q, T4R); T6l = VSUB(T4Q, T4R); T3E = VSUB(T30, T31); T32 = VADD(T30, T31); T1p = VFMA(LDK(KP707106781), T1o, T1n); T2v = VFNMS(LDK(KP707106781), T1o, T1n); T1g = VFMA(LDK(KP707106781), T1f, T10); T2u = VFNMS(LDK(KP707106781), T1f, T10); T4M = VFMA(LDK(KP923879532), T4L, T4E); T5K = VFNMS(LDK(KP923879532), T4L, T4E); T6p = VFMA(LDK(KP923879532), T6o, T6n); T6Z = VFNMS(LDK(KP923879532), T6o, T6n); T6m = VFNMS(LDK(KP923879532), T6l, T6k); T6Y = VFMA(LDK(KP923879532), T6l, T6k); T5L = VFNMS(LDK(KP923879532), T4S, T4P); T4T = VFMA(LDK(KP923879532), T4S, T4P); } } } } } } { V T6b, T6F, T7f, T6X, T70, T79, T7a, T73, T6C, T76, T77, T6i; { V T2Z, T3r, T3s, T3m, T3d, T3v; T2Z = VSUB(T2V, T2Y); T3r = VADD(T2V, T2Y); T3s = VADD(T3i, T3l); T3m = VSUB(T3i, T3l); T3d = VSUB(T39, T3c); T3v = VADD(T39, T3c); { V T3x, T3t, T3Q, T3J, T3D, T3V, T3G, T3P, T3u, T36, T3O, T3Y, T6V, T6W; { V T3N, T3C, T3F, T35; T3N = VSUB(T3A, T3B); T3C = VADD(T3A, T3B); T3F = VSUB(T33, T34); T35 = VADD(T33, T34); T3x = VADD(T3r, T3s); T3t = VSUB(T3r, T3s); T3Q = VFMA(LDK(KP414213562), T3H, T3I); T3J = VFNMS(LDK(KP414213562), T3I, T3H); T3D = VFMA(LDK(KP707106781), T3C, T3z); T3V = VFNMS(LDK(KP707106781), T3C, T3z); T3G = VFNMS(LDK(KP414213562), T3F, T3E); T3P = VFMA(LDK(KP414213562), T3E, T3F); T3u = VADD(T32, T35); T36 = VSUB(T32, T35); T3O = VFMA(LDK(KP707106781), T3N, T3M); T3Y = VFNMS(LDK(KP707106781), T3N, T3M); } T6b = VFNMS(LDK(KP923879532), T6a, T69); T6V = VFMA(LDK(KP923879532), T6a, T69); T6W = VADD(T6D, T6E); T6F = VSUB(T6D, T6E); { V T3R, T3W, T3K, T3Z; T3R = VSUB(T3P, T3Q); T3W = VADD(T3P, T3Q); T3K = VADD(T3G, T3J); T3Z = VSUB(T3G, T3J); { V T3e, T3n, T3w, T3y; T3e = VADD(T36, T3d); T3n = VSUB(T36, T3d); T3w = VSUB(T3u, T3v); T3y = VADD(T3u, T3v); { V T41, T3X, T3S, T3U; T41 = VFMA(LDK(KP923879532), T3W, T3V); T3X = VFNMS(LDK(KP923879532), T3W, T3V); T3S = VFNMS(LDK(KP923879532), T3R, T3O); T3U = VFMA(LDK(KP923879532), T3R, T3O); { V T42, T40, T3L, T3T; T42 = VFNMS(LDK(KP923879532), T3Z, T3Y); T40 = VFMA(LDK(KP923879532), T3Z, T3Y); T3L = VFNMS(LDK(KP923879532), T3K, T3D); T3T = VFMA(LDK(KP923879532), T3K, T3D); { V T3o, T3q, T3f, T3p; T3o = VFNMS(LDK(KP707106781), T3n, T3m); T3q = VFMA(LDK(KP707106781), T3n, T3m); T3f = VFNMS(LDK(KP707106781), T3e, T2Z); T3p = VFMA(LDK(KP707106781), T3e, T2Z); ST(&(xo[WS(os, 32)]), VSUB(T3x, T3y), ovs, &(xo[0])); ST(&(xo[0]), VADD(T3x, T3y), ovs, &(xo[0])); ST(&(xo[WS(os, 16)]), VFMAI(T3w, T3t), ovs, &(xo[0])); ST(&(xo[WS(os, 48)]), VFNMSI(T3w, T3t), ovs, &(xo[0])); ST(&(xo[WS(os, 44)]), VFNMSI(T40, T3X), ovs, &(xo[0])); ST(&(xo[WS(os, 20)]), VFMAI(T40, T3X), ovs, &(xo[0])); ST(&(xo[WS(os, 52)]), VFMAI(T42, T41), ovs, &(xo[0])); ST(&(xo[WS(os, 12)]), VFNMSI(T42, T41), ovs, &(xo[0])); ST(&(xo[WS(os, 4)]), VFMAI(T3U, T3T), ovs, &(xo[0])); ST(&(xo[WS(os, 60)]), VFNMSI(T3U, T3T), ovs, &(xo[0])); ST(&(xo[WS(os, 36)]), VFMAI(T3S, T3L), ovs, &(xo[0])); ST(&(xo[WS(os, 28)]), VFNMSI(T3S, T3L), ovs, &(xo[0])); ST(&(xo[WS(os, 56)]), VFNMSI(T3q, T3p), ovs, &(xo[0])); ST(&(xo[WS(os, 8)]), VFMAI(T3q, T3p), ovs, &(xo[0])); ST(&(xo[WS(os, 40)]), VFMAI(T3o, T3f), ovs, &(xo[0])); ST(&(xo[WS(os, 24)]), VFNMSI(T3o, T3f), ovs, &(xo[0])); T7f = VFNMS(LDK(KP831469612), T6W, T6V); T6X = VFMA(LDK(KP831469612), T6W, T6V); } } } } } T70 = VFMA(LDK(KP303346683), T6Z, T6Y); T79 = VFNMS(LDK(KP303346683), T6Y, T6Z); T7a = VFNMS(LDK(KP303346683), T71, T72); T73 = VFMA(LDK(KP303346683), T72, T71); T6C = VFMA(LDK(KP923879532), T6B, T6A); T76 = VFNMS(LDK(KP923879532), T6B, T6A); T77 = VSUB(T6e, T6h); T6i = VADD(T6e, T6h); } } { V T2r, T2D, T2C, T2s, T5H, T5o, T5v, T5D, T5r, T5I, T5x, T5h, T5F, T5B; { V TT, T2f, T2n, T1Y, T28, T2b, T2l, T2p, T2j, T2k; { V T1q, T2d, T7h, T7l, T2e, T1X, T75, T7d, T7m, T7k, T7c, T7e, Tn, TS; T2r = VFNMS(LDK(KP707106781), Tm, T7); Tn = VFMA(LDK(KP707106781), Tm, T7); TS = VADD(TC, TR); T2D = VSUB(TC, TR); { V T7b, T7j, T74, T7i, T78, T7g; T1q = VFNMS(LDK(KP198912367), T1p, T1g); T2d = VFMA(LDK(KP198912367), T1g, T1p); T7g = VADD(T79, T7a); T7b = VSUB(T79, T7a); T7j = VSUB(T70, T73); T74 = VADD(T70, T73); T7i = VFNMS(LDK(KP831469612), T77, T76); T78 = VFMA(LDK(KP831469612), T77, T76); T2j = VFNMS(LDK(KP923879532), TS, Tn); TT = VFMA(LDK(KP923879532), TS, Tn); T7h = VFMA(LDK(KP956940335), T7g, T7f); T7l = VFNMS(LDK(KP956940335), T7g, T7f); T2e = VFMA(LDK(KP198912367), T1N, T1W); T1X = VFNMS(LDK(KP198912367), T1W, T1N); T75 = VFNMS(LDK(KP956940335), T74, T6X); T7d = VFMA(LDK(KP956940335), T74, T6X); T7m = VFMA(LDK(KP956940335), T7j, T7i); T7k = VFNMS(LDK(KP956940335), T7j, T7i); T7c = VFNMS(LDK(KP956940335), T7b, T78); T7e = VFMA(LDK(KP956940335), T7b, T78); } T2k = VADD(T2d, T2e); T2f = VSUB(T2d, T2e); ST(&(xo[WS(os, 45)]), VFMAI(T7k, T7h), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 19)]), VFNMSI(T7k, T7h), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 51)]), VFNMSI(T7m, T7l), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 13)]), VFMAI(T7m, T7l), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 61)]), VFMAI(T7e, T7d), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 3)]), VFNMSI(T7e, T7d), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 29)]), VFMAI(T7c, T75), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 35)]), VFNMSI(T7c, T75), ovs, &(xo[WS(os, 1)])); T2n = VSUB(T1q, T1X); T1Y = VADD(T1q, T1X); T2C = VFNMS(LDK(KP707106781), T27, T26); T28 = VFMA(LDK(KP707106781), T27, T26); T2b = VSUB(T29, T2a); T2s = VADD(T29, T2a); } T2l = VFNMS(LDK(KP980785280), T2k, T2j); T2p = VFMA(LDK(KP980785280), T2k, T2j); { V T5z, T4z, T5A, T5g; { V T4f, T4y, T1Z, T2h, T4U, T5t, T2m, T2c, T5u, T5f; T5H = VFNMS(LDK(KP923879532), T4e, T47); T4f = VFMA(LDK(KP923879532), T4e, T47); T4y = VADD(T4o, T4x); T5T = VSUB(T4o, T4x); T1Z = VFNMS(LDK(KP980785280), T1Y, TT); T2h = VFMA(LDK(KP980785280), T1Y, TT); T4U = VFNMS(LDK(KP098491403), T4T, T4M); T5t = VFMA(LDK(KP098491403), T4M, T4T); T2m = VFNMS(LDK(KP923879532), T2b, T28); T2c = VFMA(LDK(KP923879532), T2b, T28); T5u = VFMA(LDK(KP098491403), T57, T5e); T5f = VFNMS(LDK(KP098491403), T5e, T57); T5z = VFNMS(LDK(KP980785280), T4y, T4f); T4z = VFMA(LDK(KP980785280), T4y, T4f); T5S = VFNMS(LDK(KP923879532), T5n, T5k); T5o = VFMA(LDK(KP923879532), T5n, T5k); { V T2o, T2q, T2i, T2g; T2o = VFMA(LDK(KP980785280), T2n, T2m); T2q = VFNMS(LDK(KP980785280), T2n, T2m); T2i = VFMA(LDK(KP980785280), T2f, T2c); T2g = VFNMS(LDK(KP980785280), T2f, T2c); T5A = VADD(T5t, T5u); T5v = VSUB(T5t, T5u); T5D = VSUB(T4U, T5f); T5g = VADD(T4U, T5f); ST(&(xo[WS(os, 46)]), VFNMSI(T2o, T2l), ovs, &(xo[0])); ST(&(xo[WS(os, 18)]), VFMAI(T2o, T2l), ovs, &(xo[0])); ST(&(xo[WS(os, 50)]), VFMAI(T2q, T2p), ovs, &(xo[0])); ST(&(xo[WS(os, 14)]), VFNMSI(T2q, T2p), ovs, &(xo[0])); ST(&(xo[WS(os, 2)]), VFMAI(T2i, T2h), ovs, &(xo[0])); ST(&(xo[WS(os, 62)]), VFNMSI(T2i, T2h), ovs, &(xo[0])); ST(&(xo[WS(os, 34)]), VFMAI(T2g, T1Z), ovs, &(xo[0])); ST(&(xo[WS(os, 30)]), VFNMSI(T2g, T1Z), ovs, &(xo[0])); T5r = VSUB(T5p, T5q); T5I = VADD(T5p, T5q); } } T5x = VFMA(LDK(KP995184726), T5g, T4z); T5h = VFNMS(LDK(KP995184726), T5g, T4z); T5F = VFMA(LDK(KP995184726), T5A, T5z); T5B = VFNMS(LDK(KP995184726), T5A, T5z); } } { V T6J, T6R, T6L, T6z, T6T, T6P; { V T6N, T6j, T6O, T6y; { V T6q, T6H, T5C, T5s, T6I, T6x; T6q = VFNMS(LDK(KP534511135), T6p, T6m); T6H = VFMA(LDK(KP534511135), T6m, T6p); T5C = VFNMS(LDK(KP980785280), T5r, T5o); T5s = VFMA(LDK(KP980785280), T5r, T5o); T6I = VFMA(LDK(KP534511135), T6t, T6w); T6x = VFNMS(LDK(KP534511135), T6w, T6t); T6N = VFMA(LDK(KP831469612), T6i, T6b); T6j = VFNMS(LDK(KP831469612), T6i, T6b); { V T5E, T5G, T5y, T5w; T5E = VFMA(LDK(KP995184726), T5D, T5C); T5G = VFNMS(LDK(KP995184726), T5D, T5C); T5y = VFMA(LDK(KP995184726), T5v, T5s); T5w = VFNMS(LDK(KP995184726), T5v, T5s); T6O = VADD(T6H, T6I); T6J = VSUB(T6H, T6I); T6R = VSUB(T6q, T6x); T6y = VADD(T6q, T6x); ST(&(xo[WS(os, 47)]), VFNMSI(T5E, T5B), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 17)]), VFMAI(T5E, T5B), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 49)]), VFMAI(T5G, T5F), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 15)]), VFNMSI(T5G, T5F), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 1)]), VFMAI(T5y, T5x), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 63)]), VFNMSI(T5y, T5x), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 33)]), VFMAI(T5w, T5h), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 31)]), VFNMSI(T5w, T5h), ovs, &(xo[WS(os, 1)])); } } T6L = VFMA(LDK(KP881921264), T6y, T6j); T6z = VFNMS(LDK(KP881921264), T6y, T6j); T6T = VFMA(LDK(KP881921264), T6O, T6N); T6P = VFNMS(LDK(KP881921264), T6O, T6N); } { V T2H, T2P, T2J, T2B, T2R, T2N; { V T2L, T2t, T2M, T2A; { V T2w, T2F, T6Q, T6G, T2G, T2z; T2w = VFMA(LDK(KP668178637), T2v, T2u); T2F = VFNMS(LDK(KP668178637), T2u, T2v); T6Q = VFNMS(LDK(KP831469612), T6F, T6C); T6G = VFMA(LDK(KP831469612), T6F, T6C); T2G = VFNMS(LDK(KP668178637), T2x, T2y); T2z = VFMA(LDK(KP668178637), T2y, T2x); T2L = VFNMS(LDK(KP923879532), T2s, T2r); T2t = VFMA(LDK(KP923879532), T2s, T2r); { V T6S, T6U, T6M, T6K; T6S = VFMA(LDK(KP881921264), T6R, T6Q); T6U = VFNMS(LDK(KP881921264), T6R, T6Q); T6M = VFMA(LDK(KP881921264), T6J, T6G); T6K = VFNMS(LDK(KP881921264), T6J, T6G); T2M = VADD(T2F, T2G); T2H = VSUB(T2F, T2G); T2P = VSUB(T2w, T2z); T2A = VADD(T2w, T2z); ST(&(xo[WS(os, 43)]), VFNMSI(T6S, T6P), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 21)]), VFMAI(T6S, T6P), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 53)]), VFMAI(T6U, T6T), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 11)]), VFNMSI(T6U, T6T), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 5)]), VFMAI(T6M, T6L), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 59)]), VFNMSI(T6M, T6L), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 37)]), VFMAI(T6K, T6z), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 27)]), VFNMSI(T6K, T6z), ovs, &(xo[WS(os, 1)])); } } T2J = VFMA(LDK(KP831469612), T2A, T2t); T2B = VFNMS(LDK(KP831469612), T2A, T2t); T2R = VFNMS(LDK(KP831469612), T2M, T2L); T2N = VFMA(LDK(KP831469612), T2M, T2L); } { V T61, T5J, T62, T5Q; { V T5M, T5V, T2O, T2E, T5W, T5P; T5M = VFMA(LDK(KP820678790), T5L, T5K); T5V = VFNMS(LDK(KP820678790), T5K, T5L); T2O = VFMA(LDK(KP923879532), T2D, T2C); T2E = VFNMS(LDK(KP923879532), T2D, T2C); T5W = VFNMS(LDK(KP820678790), T5N, T5O); T5P = VFMA(LDK(KP820678790), T5O, T5N); T61 = VFNMS(LDK(KP980785280), T5I, T5H); T5J = VFMA(LDK(KP980785280), T5I, T5H); { V T2Q, T2S, T2K, T2I; T2Q = VFNMS(LDK(KP831469612), T2P, T2O); T2S = VFMA(LDK(KP831469612), T2P, T2O); T2K = VFMA(LDK(KP831469612), T2H, T2E); T2I = VFNMS(LDK(KP831469612), T2H, T2E); T62 = VADD(T5V, T5W); T5X = VSUB(T5V, T5W); T65 = VSUB(T5M, T5P); T5Q = VADD(T5M, T5P); ST(&(xo[WS(os, 42)]), VFMAI(T2Q, T2N), ovs, &(xo[0])); ST(&(xo[WS(os, 22)]), VFNMSI(T2Q, T2N), ovs, &(xo[0])); ST(&(xo[WS(os, 54)]), VFNMSI(T2S, T2R), ovs, &(xo[0])); ST(&(xo[WS(os, 10)]), VFMAI(T2S, T2R), ovs, &(xo[0])); ST(&(xo[WS(os, 58)]), VFMAI(T2K, T2J), ovs, &(xo[0])); ST(&(xo[WS(os, 6)]), VFNMSI(T2K, T2J), ovs, &(xo[0])); ST(&(xo[WS(os, 26)]), VFMAI(T2I, T2B), ovs, &(xo[0])); ST(&(xo[WS(os, 38)]), VFNMSI(T2I, T2B), ovs, &(xo[0])); } } T5Z = VFMA(LDK(KP773010453), T5Q, T5J); T5R = VFNMS(LDK(KP773010453), T5Q, T5J); T67 = VFNMS(LDK(KP773010453), T62, T61); T63 = VFMA(LDK(KP773010453), T62, T61); } } } } } } T5U = VFNMS(LDK(KP980785280), T5T, T5S); T64 = VFMA(LDK(KP980785280), T5T, T5S); { V T68, T66, T5Y, T60; T68 = VFMA(LDK(KP773010453), T65, T64); T66 = VFNMS(LDK(KP773010453), T65, T64); T5Y = VFNMS(LDK(KP773010453), T5X, T5U); T60 = VFMA(LDK(KP773010453), T5X, T5U); ST(&(xo[WS(os, 41)]), VFMAI(T66, T63), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 23)]), VFNMSI(T66, T63), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 55)]), VFNMSI(T68, T67), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 9)]), VFMAI(T68, T67), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 57)]), VFMAI(T60, T5Z), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 7)]), VFNMSI(T60, T5Z), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 25)]), VFMAI(T5Y, T5R), ovs, &(xo[WS(os, 1)])); ST(&(xo[WS(os, 39)]), VFNMSI(T5Y, T5R), ovs, &(xo[WS(os, 1)])); } }}static const kdft_desc desc = { 64, "n1bv_64", {198, 0, 258, 0}, &GENUS, 0, 0, 0, 0 };void X(codelet_n1bv_64) (planner *p) { X(kdft_register) (p, n1bv_64, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_notw_c -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 64 -name n1bv_64 -include n1b.h *//* * This function contains 456 FP additions, 124 FP multiplications, * (or, 404 additions, 72 multiplications, 52 fused multiply/add), * 108 stack variables, and 128 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_notw_c.ml,v 1.16 2006-01-05 03:04:27 stevenj Exp $ */#include "n1b.h"static void n1bv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs){ DVK(KP634393284, +0.634393284163645498215171613225493370675687095); DVK(KP773010453, +0.773010453362736960810906609758469800971041293);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -