?? recompiler.java
字號:
case RE.E_NALNUM: case RE.E_SPACE: case RE.E_NSPACE: case RE.E_DIGIT: case RE.E_NDIGIT: return ESC_CLASS; case 'u': case 'x': { // Exact required hex digits for escape type int hexDigits = (escapeChar == 'u' ? 4 : 2); // Parse up to hexDigits characters from input int val = 0; for ( ; idx < len && hexDigits-- > 0; idx++) { // Get char char c = pattern.charAt(idx); // If it's a hexadecimal digit (0-9) if (c >= '0' && c <= '9') { // Compute new value val = (val << 4) + c - '0'; } else { // If it's a hexadecimal letter (a-f) c = Character.toLowerCase(c); if (c >= 'a' && c <= 'f') { // Compute new value val = (val << 4) + (c - 'a') + 10; } else { // If it's not a valid digit or hex letter, the escape must be invalid // because hexDigits of input have not been absorbed yet. syntaxError("Expected " + hexDigits + " hexadecimal digits after \\" + escapeChar); } } } return val; } case 't': return '\t'; case 'n': return '\n'; case 'r': return '\r'; case 'f': return '\f'; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // An octal escape starts with a 0 or has two digits in a row if ((idx < len && Character.isDigit(pattern.charAt(idx))) || escapeChar == '0') { // Handle \nnn octal escapes int val = escapeChar - '0'; if (idx < len && Character.isDigit(pattern.charAt(idx))) { val = ((val << 3) + (pattern.charAt(idx++) - '0')); if (idx < len && Character.isDigit(pattern.charAt(idx))) { val = ((val << 3) + (pattern.charAt(idx++) - '0')); } } return val; } // It's actually a backreference (\[1-9]), not an escape return ESC_BACKREF; default: // Simple quoting of a character return escapeChar; } } /** * Compile a character class * @return Index of class node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int characterClass() throws RESyntaxException { // Check for bad calling or empty class if (pattern.charAt(idx) != '[') { internalError(); } // Check for unterminated or empty class if ((idx + 1) >= len || pattern.charAt(++idx) == ']') { syntaxError("Empty or unterminated class"); } // Check for POSIX character class if (idx < len && pattern.charAt(idx) == ':') { // Skip colon idx++; // POSIX character classes are denoted with lowercase ASCII strings int idxStart = idx; while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z') { idx++; } // Should be a ":]" to terminate the POSIX character class if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']') { // Get character class String charClass = pattern.substring(idxStart, idx); // Select the POSIX class id Character i = (Character)hashPOSIX.get(charClass); if (i != null) { // Move past colon and right bracket idx += 2; // Return new POSIX character class node return node(RE.OP_POSIXCLASS, i.charValue()); } syntaxError("Invalid POSIX character class '" + charClass + "'"); } syntaxError("Invalid POSIX character class syntax"); } // Try to build a class. Create OP_ANYOF node int ret = node(RE.OP_ANYOF, 0); // Parse class declaration char CHAR_INVALID = Character.MAX_VALUE; char last = CHAR_INVALID; char simpleChar; boolean include = true; boolean definingRange = false; int idxFirst = idx; char rangeStart = Character.MIN_VALUE; char rangeEnd; RERange range = new RERange(); while (idx < len && pattern.charAt(idx) != ']') { switchOnCharacter: // Switch on character switch (pattern.charAt(idx)) { case '^': include = !include; if (idx == idxFirst) { range.include(Character.MIN_VALUE, Character.MAX_VALUE, true); } idx++; continue; case '\\': { // Escape always advances the stream int c; switch (c = escape ()) { case ESC_COMPLEX: case ESC_BACKREF: // Word boundaries and backrefs not allowed in a character class! syntaxError("Bad character class"); case ESC_CLASS: // Classes can't be an endpoint of a range if (definingRange) { syntaxError("Bad character class"); } // Handle specific type of class (some are ok) switch (pattern.charAt(idx - 1)) { case RE.E_NSPACE: range.include(Character.MIN_VALUE, 7, include); // [Min - \b ) range.include((char) 11, include); // ( \n - \f ) range.include(14, 31, include); // ( \r - ' ') range.include(33, Character.MAX_VALUE, include); // (' ' - Max] break; case RE.E_NALNUM: range.include(Character.MIN_VALUE, '/', include); // [Min - '0') range.include(':', '@', include); // ('9' - 'A') range.include('[', '^', include); // ('Z' - '_') range.include('`', include); // ('_' - 'a') range.include('{', Character.MAX_VALUE, include); // ('z' - Max] break; case RE.E_NDIGIT: range.include(Character.MIN_VALUE, '/', include); // [Min - '0') range.include(':', Character.MAX_VALUE, include); // ('9' - Max] break; case RE.E_SPACE: range.include('\t', include); range.include('\r', include); range.include('\f', include); range.include('\n', include); range.include('\b', include); range.include(' ', include); break; case RE.E_ALNUM: range.include('a', 'z', include); range.include('A', 'Z', include); range.include('_', include); // Fall through! case RE.E_DIGIT: range.include('0', '9', include); break; } // Make last char invalid (can't be a range start) last = CHAR_INVALID; break; default: // Escape is simple so treat as a simple char simpleChar = (char) c; break switchOnCharacter; } } continue; case '-': // Start a range if one isn't already started if (definingRange) { syntaxError("Bad class range"); } definingRange = true; // If no last character, start of range is 0 rangeStart = (last == CHAR_INVALID ? 0 : last); // Premature end of range. define up to Character.MAX_VALUE if ((idx + 1) < len && pattern.charAt(++idx) == ']') { simpleChar = Character.MAX_VALUE; break; } continue; default: simpleChar = pattern.charAt(idx++); break; } // Handle simple character simpleChar if (definingRange) { // if we are defining a range make it now rangeEnd = simpleChar; // Actually create a range if the range is ok if (rangeStart >= rangeEnd) { syntaxError("Bad character class"); } range.include(rangeStart, rangeEnd, include); // We are done defining the range last = CHAR_INVALID; definingRange = false; } else { // If simple character and not start of range, include it if (idx >= len || pattern.charAt(idx) != '-') { range.include(simpleChar, include); } last = simpleChar; } } // Shouldn't be out of input if (idx == len) { syntaxError("Unterminated character class"); } // Absorb the ']' end of class marker idx++; // Emit character class definition instruction[ret + RE.offsetOpdata] = (char)range.num; for (int i = 0; i < range.num; i++) { emit((char)range.minRange[i]); emit((char)range.maxRange[i]); } return ret; } /** * Absorb an atomic character string. This method is a little tricky because * it can un-include the last character of string if a closure operator follows. * This is correct because *+? have higher precedence than concatentation (thus * ABC* means AB(C*) and NOT (ABC)*). * @return Index of new atom node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int atom() throws RESyntaxException { // Create a string node int ret = node(RE.OP_ATOM, 0); // Length of atom int lenAtom = 0; // Loop while we've got input atomLoop: while (idx < len) { // Is there a next char? if ((idx + 1) < len) { char c = pattern.charAt(idx + 1); // If the next 'char' is an escape, look past the whole escape if (pattern.charAt(idx) == '\\') { int idxEscape = idx; escape(); if (idx < len) { c = pattern.charAt(idx); } idx = idxEscape; } // Switch on next char switch (c)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -