?? arabicshaping.java
字號:
length = w - start; } else { // spaces at beginning while (w < e) { dest[w++] = '\u0020'; } } } } return length; } /* * Name : expandLamAlef * Function: LamAlef needs special handling as the LamAlef is * one character while expanding it will give two * characters Lam + Alef, so we need to expand the LamAlef * in near or far spaces according to the options the user * specifies or increase the buffer size. * Dest has enough room for the expansion if we are growing. * lamalef are normalized to the 'special characters' */ private int expandLamAlef(char[] dest, int start, int length, int lacount) { int lenOptions = options & LENGTH_MASK; if (!isLogical) { switch (lenOptions) { case LENGTH_FIXED_SPACES_AT_BEGINNING: lenOptions = LENGTH_FIXED_SPACES_AT_END; break; case LENGTH_FIXED_SPACES_AT_END: lenOptions = LENGTH_FIXED_SPACES_AT_BEGINNING; break; default: break; } } switch (lenOptions) { case LENGTH_GROW_SHRINK: { for (int r = start + length, w = r + lacount; --r >= start;) { char ch = dest[r]; if (isNormalizedLamAlefChar(ch)) { dest[--w] = '\u0644'; dest[--w] = convertNormalizedLamAlef[ch - '\u065C']; } else { dest[--w] = ch; } } } length += lacount; break; case LENGTH_FIXED_SPACES_NEAR: { if (isNormalizedLamAlefChar(dest[start])) { throw new RuntimeException("no space for lamalef"); } for (int i = start + length; --i > start;) { // don't check start, already checked char ch = dest[i]; if (isNormalizedLamAlefChar(ch)) { if (dest[i-1] == '\u0020') { dest[i] = '\u0644'; dest[--i] = convertNormalizedLamAlef[ch - '\u065C']; } else { throw new RuntimeException("no space for lamalef"); } } } } break; case LENGTH_FIXED_SPACES_AT_END: { if (lacount > countSpacesLeft(dest, start, length)) { throw new RuntimeException("no space for lamalef"); } for (int r = start + lacount, w = start, e = start + length; r < e; ++r) { char ch = dest[r]; if (isNormalizedLamAlefChar(ch)) { dest[w++] = convertNormalizedLamAlef[ch - '\u065C']; dest[w++] = '\u0644'; } else { dest[w++] = ch; } } } break; case LENGTH_FIXED_SPACES_AT_BEGINNING: { if (lacount > countSpacesRight(dest, start, length)) { throw new RuntimeException("no space for lamalef"); } for (int r = start + length - lacount, w = start + length; --r >= start;) { char ch = dest[r]; if (isNormalizedLamAlefChar(ch)) { dest[--w] = '\u0644'; dest[--w] = convertNormalizedLamAlef[ch - '\u065C']; } else { dest[--w] = ch; } } } break; } return length; } /* Convert the input buffer from FExx Range into 06xx Range * to put all characters into the 06xx range * even the lamalef is converted to the special region in * the 06xx range. Return the number of lamalef chars found. */ private int normalize(char[] dest, int start, int length) { int lacount = 0; for (int i = start, e = i + length; i < e; ++i) { char ch = dest[i]; if (ch >= '\uFE70' && ch <= '\uFEFC') { if (isLamAlefChar(ch)) { ++lacount; } dest[i] = (char)convertFEto06[ch - '\uFE70']; } } return lacount; } /* * Name : shapeUnicode * Function: Converts an Arabic Unicode buffer in 06xx Range into a shaped * arabic Unicode buffer in FExx Range */ private int shapeUnicode(char[] dest, int start, int length, int destSize, int tashkeelFlag) { normalize(dest, start, length); // resolve the link between the characters. // Arabic characters have four forms: Isolated, Initial, Medial and Final. // Tashkeel characters have two, isolated or medial, and sometimes only isolated. // tashkeelFlag == 0: shape normally, 1: shape isolated, 2: don't shape boolean lamalef_found = false; int i = start + length - 1; int currLink = getLink(dest[i]); int nextLink = 0; int prevLink = 0; int lastLink = 0; int prevPos = i; int lastPos = i; int nx = -2; int nw = 0; while (i >= 0) { // If high byte of currLink > 0 then there might be more than one shape if ((currLink & '\uFF00') > 0 || isTashkeelChar(dest[i])) { nw = i - 1; nx = -2; while (nx < 0) { // we need to know about next char if (nw == -1) { nextLink = 0; nx = Integer.MAX_VALUE; } else { nextLink = getLink(dest[nw]); if ((nextLink & IRRELEVANT) == 0) { nx = nw; } else { --nw; } } } if (((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0)) { lamalef_found = true; char wLamalef = changeLamAlef(dest[i]); // get from 0x065C-0x065f if (wLamalef != '\u0000') { // replace alef by marker, it will be removed later dest[i] = '\uffff'; dest[lastPos] = wLamalef; i = lastPos; } lastLink = prevLink; currLink = getLink(wLamalef); // requires '\u0000', unfortunately } // get the proper shape according to link ability of neighbors // and of character; depends on the order of the shapes // (isolated, initial, middle, final) in the compatibility area int flag = specialChar(dest[i]); int shape = shapeTable[nextLink & LINK_MASK] [lastLink & LINK_MASK] [currLink & LINK_MASK]; if (flag == 1) { shape &= 0x1; } else if (flag == 2) { if (tashkeelFlag == 0 && ((lastLink & LINKL) != 0) && ((nextLink & LINKR) != 0) && dest[i] != '\u064C' && dest[i] != '\u064D' && !((nextLink & ALEFTYPE) == ALEFTYPE && (lastLink & LAMTYPE) == LAMTYPE)) { shape = 1; } else { shape = 0; } } if (flag == 2) { if (tashkeelFlag < 2) { dest[i] = (char)('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape); } // else leave tashkeel alone } else { dest[i] = (char)('\uFE70' + (currLink >> 8) + shape); } } // move one notch forward if ((currLink & IRRELEVANT) == 0) { prevLink = lastLink; lastLink = currLink; prevPos = lastPos; lastPos = i; } --i; if (i == nx) { currLink = nextLink; nx = -2; } else if (i != -1) { currLink = getLink(dest[i]); } } // If we found a lam/alef pair in the buffer // call removeLamAlefSpaces to remove the spaces that were added if (lamalef_found) { destSize = removeLamAlefSpaces(dest, start, length); } else { destSize = length; } return destSize; } /* * Name : deShapeUnicode * Function: Converts an Arabic Unicode buffer in FExx Range into unshaped * arabic Unicode buffer in 06xx Range */ private int deShapeUnicode(char[] dest, int start, int length, int destSize) { int lamalef_count = normalize(dest, start, length); // If there was a lamalef in the buffer call expandLamAlef if (lamalef_count != 0) { // need to adjust dest to fit expanded buffer... !!! destSize = expandLamAlef(dest, start, length, lamalef_count); } else { destSize = length; } return destSize; } private int internalShape(char[] source, int sourceStart, int sourceLength, char[] dest, int destStart, int destSize) { if (sourceLength == 0) { return 0; } if (destSize == 0) { if (((options & LETTERS_MASK) != LETTERS_NOOP) && ((options & LENGTH_MASK) == LENGTH_GROW_SHRINK)) { return calculateSize(source, sourceStart, sourceLength); } else { return sourceLength; // by definition } } // always use temp buffer char[] temp = new char[sourceLength * 2]; // all lamalefs requiring expansion System.arraycopy(source, sourceStart, temp, 0, sourceLength); if (isLogical) { invertBuffer(temp, 0, sourceLength); } int outputSize = sourceLength; switch (options & LETTERS_MASK) { case LETTERS_SHAPE_TASHKEEL_ISOLATED: outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 1); break; case LETTERS_SHAPE: outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 0); break; case LETTERS_UNSHAPE: outputSize = deShapeUnicode(temp, 0, sourceLength, destSize); break; default: break; } if (outputSize > destSize) { throw new RuntimeException("not enough room for result data"); } if ((options & DIGITS_MASK) != DIGITS_NOOP) { char digitBase = '\u0030'; // European digits switch (options & DIGIT_TYPE_MASK) { case DIGIT_TYPE_AN: digitBase = '\u0660'; // Arabic-Indic digits break; case DIGIT_TYPE_AN_EXTENDED: digitBase = '\u06f0'; // Eastern Arabic-Indic digits (Persian and Urdu) break; default: break; } switch (options & DIGITS_MASK) { case DIGITS_EN2AN: { int digitDelta = digitBase - '\u0030'; for (int i = 0; i < outputSize; ++i) { char ch = temp[i]; if (ch <= '\u0039' && ch >= '\u0030') { temp[i] += digitDelta; } } } break; case DIGITS_AN2EN: { char digitTop = (char)(digitBase + 9); int digitDelta = '\u0030' - digitBase; for (int i = 0; i < outputSize; ++i) { char ch = temp[i]; if (ch <= digitTop && ch >= digitBase) { temp[i] += digitDelta; } } } break; case DIGITS_EN2AN_INIT_LR: shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, false); break; case DIGITS_EN2AN_INIT_AL: shapeToArabicDigitsWithContext(temp, 0, outputSize, digitBase, true); break; default: break; } } if (isLogical) { invertBuffer(temp, 0, outputSize); } System.arraycopy(temp, 0, dest, destStart, outputSize); return outputSize; }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -