?? fpgenerator.java
字號:
package st.ata.util;import java.util.Hashtable;/**<p> This class provides methods that construct fingerprints of stringsof bytes via operations in <i>GF[2^d]</i> for <i>0 < d <= 64</i>.<i>GF[2^d]</i> is represented as the set of polynomials of degree<i>d</i> with coefficients in <i>Z(2)</i>, modulo an irreduciblepolynomial <i>P</i> of degree <i>d</i>. The representation ofpolynomials is as an unsigned binary number in which the leastsignificant exponent is kept in the most significant bit.<p> Let S be a string of bytes and <i>g(S)</i> the string obtained bytaking the byte <code>0x01</code> followed by eight <code>0x00</code>bytes followed by <code>S</code>. Let <i>f(S)</i> be the polynomialassociated to the string <i>S</i> viewed as a polynomial withcoefficients in the field <i>Z(2)</i>. The fingerprint of S is simplythe value <i>f(g(S))</i> modulo <i>P</i>. Because polynomials arerepresented with the least significant coefficient in the mostsignificant bit, fingerprints of degree <i>d</i> are stored in the<code>d</code> <strong>most</code> significant bits of a long word.<p> Fingerprints can be used as a probably unique id for the inputstring. More precisely, if <i>P</i> is chosen at random amongirreducible polynomials of degree <i>d</i>, then the probability thatany two strings <i>A</i> and <i>B</i> have the same fingerprint isless than <i>max(|A|,|B|)/2^(d+1)</i> where <i>|A|</i> is the lengthof A in bits.<p> The routines named <code>extend[8]</code> and <code>fp[8]</code>return reduced results, while <code>extend_[byte/char/int/long]</code>do not. An <em>un</em>reduced result is a number that is equal (mod</code>polynomial</code> to the desired fingerprint but may havedegree <code>degree</code> or higher. The method <code>reduce</code>reduces such a result to a polynomial of degree less than<code>degree</code>. Obtaining reduced results takes longer thanobtaining unreduced results; thus, when fingerprinting long strings,it's better to obtain irreduced results inside the fingerprinting loopand use <code>reduce</code> to reduce to a fingerprint after the loop.*/// Tested by: TestFPGeneratorpublic final class FPGenerator { /** Return a fingerprint generator. The fingerprints generated will have degree <code>degree</code> and will be generated by <code>polynomial</code>. If a generator based on <code>polynomial</code> has already been created, it will be returned. Requires that <code>polynomial</code> is an irreducible polynomial of degree <code>degree</code> (the array <code>polynomials</code> contains some irreducible polynomials). */ public static FPGenerator make(long polynomial, int degree) { Long l = new Long(polynomial); FPGenerator fpgen = (FPGenerator) generators.get(l); if (fpgen == null) { fpgen = new FPGenerator(polynomial, degree); generators.put(l, fpgen); } return fpgen; } private static final Hashtable generators = new Hashtable(10); private static final long zero = 0; private static final long one = 0x8000000000000000L; /** Return a value equal (mod <code>polynomial</code>) to <code>fp</code> and of degree less than <code>degree</code>. */ public long reduce(long fp) { int N = (8 - degree/8); long local = (N == 8 ? 0 : fp & (-1L << 8*N)); long temp = zero; for (int i = 0; i < N; i++) { temp ^= ByteModTable[8+i][((int)fp) & 0xff]; fp >>>= 8; }; return local ^ temp; } /** Extends <code>f</code> with lower eight bits of <code>v</code> with<em>out</em> full reduction. In other words, returns a polynomial that is equal (mod <code>polynomial</code>) to the desired fingerprint but may be of higher degree than the desired fingerprint. */ public long extend_byte(long f, int v) { f ^= (0xff & v); int i = (int)f; long result = (f>>>8); result ^= ByteModTable[7][i & 0xff]; return result; } /** Extends <code>f</code> with lower sixteen bits of <code>v</code>. Does not reduce. */ public long extend_char(long f, int v) { f ^= (0xffff & v); int i = (int)f; long result = (f>>>16); result ^= ByteModTable[6][i & 0xff]; i >>>= 8; result ^= ByteModTable[7][i & 0xff]; return result; } /** Extends <code>f</code> with (all bits of) <code>v</code>. Does not reduce. */ public long extend_int(long f, int v) { f ^= (0xffffffffL & v); int i = (int)f; long result = (f>>>32); result ^= ByteModTable[4][i & 0xff]; i >>>= 8; result ^= ByteModTable[5][i & 0xff]; i >>>= 8; result ^= ByteModTable[6][i & 0xff]; i >>>= 8; result ^= ByteModTable[7][i & 0xff]; return result; } /** Extends <code>f</code> with <code>v</code>. Does not reduce. */ public long extend_long(long f, long v) { f ^= v; long result = ByteModTable[0][(int)(f & 0xff)]; f >>>= 8; result ^= ByteModTable[1][(int)(f & 0xff)]; f >>>= 8; result ^= ByteModTable[2][(int)(f & 0xff)]; f >>>= 8; result ^= ByteModTable[3][(int)(f & 0xff)]; f >>>= 8; result ^= ByteModTable[4][(int)(f & 0xff)]; f >>>= 8; result ^= ByteModTable[5][(int)(f & 0xff)]; f >>>= 8; result ^= ByteModTable[6][(int)(f & 0xff)]; f >>>= 8; result ^= ByteModTable[7][(int)(f & 0xff)]; return result; } /** Compute fingerprint of "n" bytes of "buf" starting from "buf[start]". Requires "[start, start+n)" is in bounds. */ public long fp(byte[] buf, int start, int n) { return extend(empty, buf, start, n); } /** Compute fingerprint of (all bits of) "n" characters of "buf" starting from "buf[i]". Requires "[i, i+n)" is in bounds. */ public long fp(char[] buf, int start, int n) { return extend(empty, buf, start, n); }// COMMENTED OUT TO REMOVE Dependency on st.ata.util.Text// /** Compute fingerprint of (all bits of) <code>t</code> */// public long fp(Text t) {// return extend(empty, t);// } /** Compute fingerprint of (all bits of) the characters of "s". */ public long fp(CharSequence s) { return extend(empty, s); } /** Compute fingerprint of (all bits of) "n" characters of "buf" starting from "buf[i]". Requires "[i, i+n)" is in bounds. */ public long fp(int[] buf, int start, int n) { return extend(empty, buf, start, n); } /** Compute fingerprint of (all bits of) "n" characters of "buf" starting from "buf[i]". Requires "[i, i+n)" is in bounds. */ public long fp(long[] buf, int start, int n) { return extend(empty, buf, start, n); } /** Compute fingerprint of the lower eight bits of the characters of "s". */ public long fp8(String s) { return extend8(empty, s); } /** Compute fingerprint of the lower eight bits of "n" characters of "buf" starting from "buf[i]". Requires "[i, i+n)" is in bounds. */ public long fp8(char[] buf, int start, int n) { return extend8(empty, buf, start, n); } /** Extends fingerprint <code>f</code> by adding the low eight bits of "b". */ public long extend(long f, byte v) { return reduce(extend_byte(f, v)); } /** Extends fingerprint <code>f</code> by adding (all bits of) "v". */ public long extend(long f, char v) { return reduce(extend_char(f, v)); } /** Extends fingerprint <code>f</code> by adding (all bits of) "v". */ public long extend(long f, int v) { return reduce(extend_int(f, v)); } /** Extends fingerprint <code>f</code> by adding (all bits of) "v". */ public long extend(long f, long v) { return reduce(extend_long(f, v)); } /** Extends fingerprint <code>f</code> by adding "n" bytes of "buf" starting from "buf[start]". Result is reduced. Requires "[i, i+n)" is in bounds. */ public long extend(long f, byte[] buf, int start, int n) { for (int i = 0; i < n; i++) { f = extend_byte(f, buf[start+i]); } return reduce(f); } /** Extends fingerprint <code>f</code> by adding (all bits of) "n" characters of "buf" starting from "buf[i]". Result is reduced. Requires "[i, i+n)" is in bounds. */ public long extend(long f, char[] buf, int start, int n) {
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -