?? nativeregexp.java
字號:
return (c == '\u0020' || c == '\u0009' || c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029 || c == '\u000C' || c == '\u000B' || c == '\u00A0' || Character.getType((char)c) == Character.SPACE_SEPARATOR); } /* * * 1. If IgnoreCase is false, return ch. * 2. Let u be ch converted to upper case as if by calling * String.prototype.toUpperCase on the one-character string ch. * 3. If u does not consist of a single character, return ch. * 4. Let cu be u's character. * 5. If ch's code point value is greater than or equal to decimal 128 and cu's * code point value is less than decimal 128, then return ch. * 6. Return cu. */ private static char upcase(char ch) { if (ch < 128) { if ('a' <= ch && ch <= 'z') { return (char)(ch + ('A' - 'a')); } return ch; } char cu = Character.toUpperCase(ch); if ((ch >= 128) && (cu < 128)) return ch; return cu; } private static char downcase(char ch) { if (ch < 128) { if ('A' <= ch && ch <= 'Z') { return (char)(ch + ('a' - 'A')); } return ch; } char cl = Character.toLowerCase(ch); if ((ch >= 128) && (cl < 128)) return ch; return cl; }/* * Validates and converts hex ascii value. */ private static int toASCIIHexDigit(int c) { if (c < '0') return -1; if (c <= '9') { return c - '0'; } c |= 0x20; if ('a' <= c && c <= 'f') { return c - 'a' + 10; } return -1; }/* * Top-down regular expression grammar, based closely on Perl4. * * regexp: altern A regular expression is one or more * altern '|' regexp alternatives separated by vertical bar. */ private static boolean parseDisjunction(CompilerState state) { if (!parseAlternative(state)) return false; char[] source = state.cpbegin; int index = state.cp; if (index != source.length && source[index] == '|') { RENode altResult; ++state.cp; altResult = new RENode(REOP_ALT); altResult.kid = state.result; if (!parseDisjunction(state)) return false; altResult.kid2 = state.result; state.result = altResult; /* ALT, <next>, ..., JUMP, <end> ... JUMP <end> */ state.progLength += 9; } return true; }/* * altern: item An alternative is one or more items, * item altern concatenated together. */ private static boolean parseAlternative(CompilerState state) { RENode headTerm = null; RENode tailTerm = null; char[] source = state.cpbegin; while (true) { if (state.cp == state.cpend || source[state.cp] == '|' || (state.parenNesting != 0 && source[state.cp] == ')')) { if (headTerm == null) { state.result = new RENode(REOP_EMPTY); } else state.result = headTerm; return true; } if (!parseTerm(state)) return false; if (headTerm == null) headTerm = state.result; else { if (tailTerm == null) { headTerm.next = state.result; tailTerm = state.result; while (tailTerm.next != null) tailTerm = tailTerm.next; } else { tailTerm.next = state.result; tailTerm = tailTerm.next; while (tailTerm.next != null) tailTerm = tailTerm.next; } } } } /* calculate the total size of the bitmap required for a class expression */ private static boolean calculateBitmapSize(CompilerState state, RENode target, char[] src, int index, int end) { char rangeStart = 0; char c; int n; int digit; int nDigits; int i; int max = 0; boolean inRange = false; target.bmsize = 0; if (index == end) return true; if (src[index] == '^') ++index; while (index != end) { int localMax = 0; nDigits = 2; switch (src[index]) { case '\\': ++index; c = src[index++]; switch (c) { case 'b': localMax = 0x8; break; case 'f': localMax = 0xC; break; case 'n': localMax = 0xA; break; case 'r': localMax = 0xD; break; case 't': localMax = 0x9; break; case 'v': localMax = 0xB; break; case 'c': if (((index + 1) < end) && Character.isLetter(src[index + 1])) localMax = (char)(src[index++] & 0x1F); else localMax = '\\'; break; case 'u': nDigits += 2; // fall thru... case 'x': n = 0; for (i = 0; (i < nDigits) && (index < end); i++) { c = src[index++]; n = Kit.xDigitToInt(c, n); if (n < 0) { // Back off to accepting the original // '\' as a literal index -= (i + 1); n = '\\'; break; } } localMax = n; break; case 'd': if (inRange) { reportError("msg.bad.range", ""); return false; } localMax = '9'; break; case 'D': case 's': case 'S': case 'w': case 'W': if (inRange) { reportError("msg.bad.range", ""); return false; } target.bmsize = 65535; return true; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* * This is a non-ECMA extension - decimal escapes (in this * case, octal!) are supposed to be an error inside class * ranges, but supported here for backwards compatibility. * */ n = (c - '0'); c = src[index]; if ('0' <= c && c <= '7') { index++; n = 8 * n + (c - '0'); c = src[index]; if ('0' <= c && c <= '7') { index++; i = 8 * n + (c - '0'); if (i <= 0377) n = i; else index--; } } localMax = n; break; default: localMax = c; break; } break; default: localMax = src[index++]; break; } if (inRange) { if (rangeStart > localMax) { reportError("msg.bad.range", ""); return false; } inRange = false; } else { if (index < (end - 1)) { if (src[index] == '-') { ++index; inRange = true; rangeStart = (char)localMax; continue; } } } if ((state.flags & JSREG_FOLD) != 0){ char cu = upcase((char)localMax); char cd = downcase((char)localMax); localMax = (cu >= cd) ? cu : cd; } if (localMax > max) max = localMax; } target.bmsize = max; return true; } /* * item: assertion An item is either an assertion or * quantatom a quantified atom. * * assertion: '^' Assertions match beginning of string * (or line if the class static property * RegExp.multiline is true). * '$' End of string (or line if the class * static property RegExp.multiline is * true). * '\b' Word boundary (between \w and \W). * '\B' Word non-boundary. * * quantatom: atom An unquantified atom. * quantatom '{' n ',' m '}' * Atom must occur between n and m times. * quantatom '{' n ',' '}' Atom must occur at least n times. * quantatom '{' n '}' Atom must occur exactly n times. * quantatom '*' Zero or more times (same as {0,}). * quantatom '+' One or more times (same as {1,}). * quantatom '?' Zero or one time (same as {0,1}). * * any of which can be optionally followed by '?' for ungreedy * * atom: '(' regexp ')' A parenthesized regexp (what matched * can be addressed using a backreference, * see '\' n below). * '.' Matches any char except '\n'. * '[' classlist ']' A character class. * '[' '^' classlist ']' A negated character class. * '\f' Form Feed. * '\n' Newline (Line Feed). * '\r' Carriage Return. * '\t' Horizontal Tab. * '\v' Vertical Tab. * '\d' A digit (same as [0-9]). * '\D' A non-digit. * '\w' A word character, [0-9a-z_A-Z]. * '\W' A non-word character. * '\s' A whitespace character, [ \b\f\n\r\t\v]. * '\S' A non-whitespace character. * '\' n A backreference to the nth (n decimal * and positive) parenthesized expression. * '\' octal An octal escape sequence (octal must be * two or three digits long, unless it is * 0 for the null character). * '\x' hex A hex escape (hex must be two digits). * '\c' ctrl A control character, ctrl is a letter. * '\' literalatomchar Any character except one of the above * that follow '\' in an atom. * otheratomchar Any character not first among the other * atom right-hand sides. */ private static void doFlat(CompilerState state, char c) { state.result = new RENode(REOP_FLAT); state.result.chr = c; state.result.length = 1; state.result.flatIndex = -1; state.progLength += 3; } private static int getDecimalValue(char c, CompilerState state, int maxValue, String overflowMessageId) { boolean overflow = false;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -