?? gf2x.cpp
字號:
_ntl_ulong d0[2], d1[2], d2[2], d01[2], d02[2], d12[2];
mul1_IL(d0, a[0], b[0]);
mul1_IL(d1, a[1], b[1]);
mul1_IL(d2, a[2], b[2]);
mul1_IL(d01, a[0]^a[1], b[0]^b[1]);
mul1_IL(d02, a[0]^a[2], b[0]^b[2]);
mul1_IL(d12, a[1]^a[2], b[1]^b[2]);
c[0] = d0[0];
c[1] = d0[1] ^ d01[0] ^ d1[0] ^ d0[0];
c[2] = d01[1] ^ d1[1] ^ d0[1] ^ d02[0] ^ d2[0] ^ d0[0] ^ d1[0];
c[3] = d02[1] ^ d2[1] ^ d0[1] ^ d1[1] ^ d12[0] ^ d1[0] ^ d2[0];
c[4] = d12[1] ^ d1[1] ^ d2[1] ^ d2[0];
c[5] = d2[1];
}
static
void mul4(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b)
{
_ntl_ulong hs0[2], hs1[2];
_ntl_ulong hl2[4];
hs0[0] = a[0] ^ a[2];
hs0[1] = a[1] ^ a[3];
hs1[0] = b[0] ^ b[2];
hs1[1] = b[1] ^ b[3];
mul2(c, a, b);
mul2(c+4, a+2, b+2);
mul2(hl2, hs0, hs1);
hl2[0] = hl2[0] ^ c[0] ^ c[4];
hl2[1] = hl2[1] ^ c[1] ^ c[5];
hl2[2] = hl2[2] ^ c[2] ^ c[6];
hl2[3] = hl2[3] ^ c[3] ^ c[7];
c[2] ^= hl2[0];
c[3] ^= hl2[1];
c[4] ^= hl2[2];
c[5] ^= hl2[3];
}
static
void mul5 (_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b)
{
_ntl_ulong hs0[3], hs1[3];
_ntl_ulong hl2[6];
hs0[0] = a[0] ^ a[3];
hs0[1] = a[1] ^ a[4];
hs0[2] = a[2];
hs1[0] = b[0] ^ b[3];
hs1[1] = b[1] ^ b[4];
hs1[2] = b[2];
mul3(c, a, b);
mul3(hl2, hs0, hs1);
mul2(c+6, a+3, b+3);
hl2[0] = hl2[0] ^ c[0] ^ c[6];
hl2[1] = hl2[1] ^ c[1] ^ c[7];
hl2[2] = hl2[2] ^ c[2] ^ c[8];
hl2[3] = hl2[3] ^ c[3] ^ c[9];
hl2[4] = hl2[4] ^ c[4];
hl2[5] = hl2[5] ^ c[5];
c[3] ^= hl2[0];
c[4] ^= hl2[1];
c[5] ^= hl2[2];
c[6] ^= hl2[3];
c[7] ^= hl2[4];
c[8] ^= hl2[5];
}
static
void mul6(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b)
{
_ntl_ulong hs0[3], hs1[3];
_ntl_ulong hl2[6];
hs0[0] = a[0] ^ a[3];
hs0[1] = a[1] ^ a[4];
hs0[2] = a[2] ^ a[5];
hs1[0] = b[0] ^ b[3];
hs1[1] = b[1] ^ b[4];
hs1[2] = b[2] ^ b[5];
mul3(c, a, b);
mul3(c+6, a+3, b+3);
mul3(hl2, hs0, hs1);
hl2[0] = hl2[0] ^ c[0] ^ c[6];
hl2[1] = hl2[1] ^ c[1] ^ c[7];
hl2[2] = hl2[2] ^ c[2] ^ c[8];
hl2[3] = hl2[3] ^ c[3] ^ c[9];
hl2[4] = hl2[4] ^ c[4] ^ c[10];
hl2[5] = hl2[5] ^ c[5] ^ c[11];
c[3] ^= hl2[0];
c[4] ^= hl2[1];
c[5] ^= hl2[2];
c[6] ^= hl2[3];
c[7] ^= hl2[4];
c[8] ^= hl2[5];
}
static
void mul7(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b)
{
_ntl_ulong hs0[4], hs1[4];
_ntl_ulong hl2[8];
hs0[0] = a[0] ^ a[4];
hs0[1] = a[1] ^ a[5];
hs0[2] = a[2] ^ a[6];
hs0[3] = a[3];
hs1[0] = b[0] ^ b[4];
hs1[1] = b[1] ^ b[5];
hs1[2] = b[2] ^ b[6];
hs1[3] = b[3];
mul4(c, a, b);
mul4(hl2, hs0, hs1);
mul3(c+8, a+4, b+4);
hl2[0] = hl2[0] ^ c[0] ^ c[8];
hl2[1] = hl2[1] ^ c[1] ^ c[9];
hl2[2] = hl2[2] ^ c[2] ^ c[10];
hl2[3] = hl2[3] ^ c[3] ^ c[11];
hl2[4] = hl2[4] ^ c[4] ^ c[12];
hl2[5] = hl2[5] ^ c[5] ^ c[13];
hl2[6] = hl2[6] ^ c[6];
hl2[7] = hl2[7] ^ c[7];
c[4] ^= hl2[0];
c[5] ^= hl2[1];
c[6] ^= hl2[2];
c[7] ^= hl2[3];
c[8] ^= hl2[4];
c[9] ^= hl2[5];
c[10] ^= hl2[6];
c[11] ^= hl2[7];
}
static
void mul8(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b)
{
_ntl_ulong hs0[4], hs1[4];
_ntl_ulong hl2[8];
hs0[0] = a[0] ^ a[4];
hs0[1] = a[1] ^ a[5];
hs0[2] = a[2] ^ a[6];
hs0[3] = a[3] ^ a[7];
hs1[0] = b[0] ^ b[4];
hs1[1] = b[1] ^ b[5];
hs1[2] = b[2] ^ b[6];
hs1[3] = b[3] ^ b[7];
mul4(c, a, b);
mul4(c+8, a+4, b+4);
mul4(hl2, hs0, hs1);
hl2[0] = hl2[0] ^ c[0] ^ c[8];
hl2[1] = hl2[1] ^ c[1] ^ c[9];
hl2[2] = hl2[2] ^ c[2] ^ c[10];
hl2[3] = hl2[3] ^ c[3] ^ c[11];
hl2[4] = hl2[4] ^ c[4] ^ c[12];
hl2[5] = hl2[5] ^ c[5] ^ c[13];
hl2[6] = hl2[6] ^ c[6] ^ c[14];
hl2[7] = hl2[7] ^ c[7] ^ c[15];
c[4] ^= hl2[0];
c[5] ^= hl2[1];
c[6] ^= hl2[2];
c[7] ^= hl2[3];
c[8] ^= hl2[4];
c[9] ^= hl2[5];
c[10] ^= hl2[6];
c[11] ^= hl2[7];
}
static
void KarMul(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b,
long len, _ntl_ulong *stk)
{
if (len <= 8) {
switch (len) {
case 1: mul1(c, a[0], b[0]); break;
case 2: mul2(c, a, b); break;
case 3: mul3(c, a, b); break;
case 4: mul4(c, a, b); break;
case 5: mul5(c, a, b); break;
case 6: mul6(c, a, b); break;
case 7: mul7(c, a, b); break;
case 8: mul8(c, a, b); break;
}
return;
}
long ll, lh, i, ll2, lh2;
const _ntl_ulong *a0, *a1, *b0, *b1;
_ntl_ulong *a01, *b01, *h;
lh = len >> 1;
ll = (len+1) >> 1;
ll2 = ll << 1;
lh2 = lh << 1;
a01 = stk; stk += ll+1;
b01 = stk; stk += ll+1;
h = stk; stk += ll2+1;
a0 = a;
a1 = a+ll;
b0 = b;
b1 = b+ll;
KarMul(c, a0, b0, ll, stk);
KarMul(c+ll2, a1, b1, lh, stk);
for (i = 0; i < lh; i++) {
a01[i] = a[i] ^ a[i+ll];
b01[i] = b[i] ^ b[i+ll];
}
if (lh < ll) {
a01[lh] = a[lh];
b01[lh] = b[lh];
}
KarMul(h, a01, b01, ll, stk);
for (i = 0; i < ll2; i++)
h[i] ^= c[i];
for (i = 0; i < lh2; i++)
h[i] ^= c[i+ll2];
for (i = 0; i < ll2; i++)
c[i+ll] ^= h[i];
}
void mul(GF2X& c, const GF2X& a, const GF2X& b)
{
long sa = a.xrep.length();
long sb = b.xrep.length();
if (sa <= 0 || sb <= 0) {
clear(c);
return;
}
_ntl_ulong a0 = a.xrep[0];
_ntl_ulong b0 = b.xrep[0];
if (sb == 1 && b0 == 1) {
c = a;
return;
}
if (sa == 1 && a0 == 1) {
c = b;
return;
}
if (&a == &b) {
sqr(c, a);
return;
}
if (sa == sb && sa <= 8) {
// we treat these cases specially for efficiency reasons
switch (sa) {
case 1: {
_ntl_ulong v[2];
if (!(a0 >> NTL_BITS_PER_LONG/2))
mul_half(v, b0, a0);
else if (!(b0 >> NTL_BITS_PER_LONG/2))
mul_half(v, a0, b0);
else
mul1(v, a0, b0);
if (v[1]) {
c.xrep.SetLength(2);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
}
else {
c.xrep.SetLength(1);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
}
}
return;
case 2: {
_ntl_ulong v[4];
mul2(v, &a.xrep[0], &b.xrep[0]);
if (v[3]) {
c.xrep.SetLength(4);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
}
else {
c.xrep.SetLength(3);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
}
}
return;
case 3: {
_ntl_ulong v[6];
mul3(v, &a.xrep[0], &b.xrep[0]);
if (v[5]) {
c.xrep.SetLength(6);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
}
else {
c.xrep.SetLength(5);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
}
}
return;
case 4: {
_ntl_ulong v[8];
mul4(v, &a.xrep[0], &b.xrep[0]);
if (v[7]) {
c.xrep.SetLength(8);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
}
else {
c.xrep.SetLength(7);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
}
}
return;
case 5: {
_ntl_ulong v[10];
mul5(v, &a.xrep[0], &b.xrep[0]);
if (v[9]) {
c.xrep.SetLength(10);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
cp[8] = v[8];
cp[9] = v[9];
}
else {
c.xrep.SetLength(9);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
cp[8] = v[8];
}
}
return;
case 6: {
_ntl_ulong v[12];
mul6(v, &a.xrep[0], &b.xrep[0]);
if (v[11]) {
c.xrep.SetLength(12);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
cp[8] = v[8];
cp[9] = v[9];
cp[10] = v[10];
cp[11] = v[11];
}
else {
c.xrep.SetLength(11);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
cp[8] = v[8];
cp[9] = v[9];
cp[10] = v[10];
}
}
return;
case 7: {
_ntl_ulong v[14];
mul7(v, &a.xrep[0], &b.xrep[0]);
if (v[13]) {
c.xrep.SetLength(14);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
cp[8] = v[8];
cp[9] = v[9];
cp[10] = v[10];
cp[11] = v[11];
cp[12] = v[12];
cp[13] = v[13];
}
else {
c.xrep.SetLength(13);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[8] = v[8];
cp[9] = v[9];
cp[10] = v[10];
cp[11] = v[11];
cp[12] = v[12];
}
}
return;
case 8: {
_ntl_ulong v[16];
mul8(v, &a.xrep[0], &b.xrep[0]);
if (v[15]) {
c.xrep.SetLength(16);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
cp[8] = v[8];
cp[9] = v[9];
cp[10] = v[10];
cp[11] = v[11];
cp[12] = v[12];
cp[13] = v[13];
cp[14] = v[14];
cp[15] = v[15];
}
else {
c.xrep.SetLength(15);
_ntl_ulong *cp = &c.xrep[0];
cp[0] = v[0];
cp[1] = v[1];
cp[2] = v[2];
cp[3] = v[3];
cp[4] = v[4];
cp[5] = v[5];
cp[6] = v[6];
cp[7] = v[7];
cp[8] = v[8];
cp[9] = v[9];
cp[10] = v[10];
cp[11] = v[11];
cp[12] = v[12];
cp[13] = v[13];
cp[14] = v[14];
}
}
return;
}
}
// another special case: one of the two inputs
// has length 1 (or less).
if (sa == 1) {
c.xrep.SetLength(sb + 1);
_ntl_ulong *cp = c.xrep.elts();
const _ntl_ulong *bp = b.xrep.elts();
if (a0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1))
Mul1(cp, bp, sb, a0);
else
Mul1_short(cp, bp, sb, a0);
c.normalize();
return;
}
if (sb == 1) {
c.xrep.SetLength(sa + 1);
_ntl_ulong *cp = c.xrep.elts();
const _ntl_ulong *ap = a.xrep.elts();
if (b0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1))
Mul1(cp, ap, sa, b0);
else
Mul1_short(cp, ap, sa, b0);
c.normalize();
return;
}
// finally: the general case
static WordVector mem;
static WordVector stk;
static WordVector vec;
const _ntl_ulong *ap, *bp;
_ntl_ulong *cp;
long sc = sa + sb;
long in_mem = 0;
if (&a == &c || &b == &c) {
mem.SetLength(sc);
cp = mem.elts();
in_mem = 1;
}
else {
c.xrep.SetLength(sc);
cp = c.xrep.elts();
}
long n, hn, sp;
n = min(sa, sb);
sp = 0;
while (n > 8) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -