?? recompiler.java
字號:
{ case '{': case '?': case '*': case '+': // If the next character is a closure operator and our atom is non-empty, the // current character should bind to the closure operator rather than the atom if (lenAtom != 0) { break atomLoop; } } } // Switch on current char switch (pattern.charAt(idx)) { case ']': case '^': case '$': case '.': case '[': case '(': case ')': case '|': break atomLoop; case '{': case '?': case '*': case '+': // We should have an atom by now if (lenAtom == 0) { // No atom before closure syntaxError("Missing operand to closure"); } break atomLoop; case '\\': { // Get the escaped character (advances input automatically) int idxBeforeEscape = idx; int c = escape(); // Check if it's a simple escape (as opposed to, say, a backreference) if ((c & ESC_MASK) == ESC_MASK) { // Not a simple escape, so backup to where we were before the escape. idx = idxBeforeEscape; break atomLoop; } // Add escaped char to atom emit((char) c); lenAtom++; } break; default: // Add normal character to atom emit(pattern.charAt(idx++)); lenAtom++; break; } } // This "shouldn't" happen if (lenAtom == 0) { internalError(); } // Emit the atom length into the program instruction[ret + RE.offsetOpdata] = (char)lenAtom; return ret; } /** * Match a terminal node. * @param flags Flags * @return Index of terminal node (closeable) * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int terminal(int[] flags) throws RESyntaxException { switch (pattern.charAt(idx)) { case RE.OP_EOL: case RE.OP_BOL: case RE.OP_ANY: return node(pattern.charAt(idx++), 0); case '[': return characterClass(); case '(': return expr(flags); case ')': syntaxError("Unexpected close paren"); case '|': internalError(); case ']': syntaxError("Mismatched class"); case 0: syntaxError("Unexpected end of input"); case '?': case '+': case '{': case '*': syntaxError("Missing operand to closure"); case '\\': { // Don't forget, escape() advances the input stream! int idxBeforeEscape = idx; // Switch on escaped character switch (escape()) { case ESC_CLASS: case ESC_COMPLEX: flags[0] &= ~NODE_NULLABLE; return node(RE.OP_ESCAPE, pattern.charAt(idx - 1)); case ESC_BACKREF: { char backreference = (char)(pattern.charAt(idx - 1) - '0'); if (parens <= backreference) { syntaxError("Bad backreference"); } flags[0] |= NODE_NULLABLE; return node(RE.OP_BACKREF, backreference); } default: // We had a simple escape and we want to have it end up in // an atom, so we back up and fall though to the default handling idx = idxBeforeEscape; flags[0] &= ~NODE_NULLABLE; break; } } } // Everything above either fails or returns. // If it wasn't one of the above, it must be the start of an atom. flags[0] &= ~NODE_NULLABLE; return atom(); } /** * Compile a possibly closured terminal * @param flags Flags passed by reference * @return Index of closured node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int closure(int[] flags) throws RESyntaxException { // Before terminal int idxBeforeTerminal = idx; // Values to pass by reference to terminal() int[] terminalFlags = { NODE_NORMAL }; // Get terminal symbol int ret = terminal(terminalFlags); // Or in flags from terminal symbol flags[0] |= terminalFlags[0]; // Advance input, set NODE_NULLABLE flag and do sanity checks if (idx >= len) { return ret; } boolean greedy = true; char closureType = pattern.charAt(idx); switch (closureType) { case '?': case '*': // The current node can be null flags[0] |= NODE_NULLABLE; // Drop through case '+': // Eat closure character idx++; // Drop through case '{': // Don't allow blantant stupidity int opcode = instruction[ret /* + RE.offsetOpcode */]; if (opcode == RE.OP_BOL || opcode == RE.OP_EOL) { syntaxError("Bad closure operand"); } if ((terminalFlags[0] & NODE_NULLABLE) != 0) { syntaxError("Closure operand can't be nullable"); } } // If the next character is a '?', make the closure non-greedy (reluctant) if (idx < len && pattern.charAt(idx) == '?') { idx++; greedy = false; } if (greedy) { // Actually do the closure now switch (closureType) { case '{': { bracket(); int bracketEnd = idx; int bracketMin = this.bracketMin; int bracketOpt = this.bracketOpt; // Pointer to the last terminal int pos = ret; // Process min first for (int c = 0; c < bracketMin; c++) { // Rewind stream and run it through again - more matchers coming idx = idxBeforeTerminal; setNextOfEnd(pos, pos = terminal(terminalFlags)); } // Do the right thing for maximum ({m,}) if (bracketOpt == bracketUnbounded) { // Drop through now and closure expression. // We are done with the {m,} expr, so skip rest idx = bracketEnd; nodeInsert(RE.OP_STAR, 0, pos); setNextOfEnd(pos + RE.nodeSize, pos); break; } else if (bracketOpt > 0) { int opt[] = new int[bracketOpt + 1]; // Surround first optional terminal with MAYBE nodeInsert(RE.OP_MAYBE, 0, pos); opt[0] = pos; // Add all the rest optional terminals with preceeding MAYBEs for (int c = 1; c < bracketOpt; c++) { opt[c] = node(RE.OP_MAYBE, 0); // Rewind stream and run it through again - more matchers coming idx = idxBeforeTerminal; terminal(terminalFlags); } // Tie ends together int end = opt[bracketOpt] = node(RE.OP_NOTHING, 0); for (int c = 0; c < bracketOpt; c++) { setNextOfEnd(opt[c], end); setNextOfEnd(opt[c] + RE.nodeSize, opt[c + 1]); } } else { // Rollback terminal - no opt matchers present lenInstruction = pos; node(RE.OP_NOTHING, 0); } // We are done. skip the reminder of {m,n} expr idx = bracketEnd; break; } case '?': { nodeInsert(RE.OP_MAYBE, 0, ret); int n = node(RE.OP_NOTHING, 0); setNextOfEnd(ret, n); setNextOfEnd(ret + RE.nodeSize, n); break; } case '*': { nodeInsert(RE.OP_STAR, 0, ret); setNextOfEnd(ret + RE.nodeSize, ret); break; } case '+': { nodeInsert(RE.OP_CONTINUE, 0, ret); int n = node(RE.OP_PLUS, 0); setNextOfEnd(ret + RE.nodeSize, n); setNextOfEnd(n, ret); break; } } } else { // Actually do the closure now switch (closureType) { case '?': { nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); int n = node(RE.OP_NOTHING, 0); setNextOfEnd(ret, n); setNextOfEnd(ret + RE.nodeSize, n); break; } case '*': { nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret); setNextOfEnd(ret + RE.nodeSize, ret); break; } case '+': { nodeInsert(RE.OP_CONTINUE, 0, ret); int n = node(RE.OP_RELUCTANTPLUS, 0); setNextOfEnd(n, ret); setNextOfEnd(ret + RE.nodeSize, n); break; } } } return ret; } /** * Compile body of one branch of an or operator (implements concatenation) * * @param flags Flags passed by reference * @return Pointer to first node in the branch * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int branch(int[] flags) throws RESyntaxException { // Get each possibly closured piece and concat
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -