?? recompiler.java
字號:
int node; int ret = -1; int chain = -1; int[] closureFlags = new int[1]; boolean nullable = true; while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')') { // Get new node closureFlags[0] = NODE_NORMAL; node = closure(closureFlags); if (closureFlags[0] == NODE_NORMAL) { nullable = false; } // If there's a chain, append to the end if (chain != -1) { setNextOfEnd(chain, node); } // Chain starts at current chain = node; if (ret == -1) { ret = node; } } // If we don't run loop, make a nothing node if (ret == -1) { ret = node(RE.OP_NOTHING, 0); } // Set nullable flag for this branch if (nullable) { flags[0] |= NODE_NULLABLE; } return ret; } /** * Compile an expression with possible parens around it. Paren matching * is done at this level so we can tie the branch tails together. * * @param flags Flag value passed by reference * @return Node index of expression in instruction array * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int expr(int[] flags) throws RESyntaxException { // Create open paren node unless we were called from the top level (which has no parens) int paren = -1; int ret = -1; int closeParens = parens; if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(') { // if its a cluster ( rather than a proper subexpression ie with backrefs ) if (idx + 2 < len && pattern.charAt(idx + 1) == '?' && pattern.charAt(idx + 2) == ':') { paren = 2; idx += 3; ret = node(RE.OP_OPEN_CLUSTER, 0); } else { paren = 1; idx++; ret = node(RE.OP_OPEN, parens++); } } flags[0] &= ~NODE_TOPLEVEL; // Process contents of first branch node boolean open = false; int branch = branch(flags); if (ret == -1) { ret = branch; } else { setNextOfEnd(ret, branch); } // Loop through branches while (idx < len && pattern.charAt(idx) == '|') { // Now open the first branch since there are more than one if (!open) { nodeInsert(RE.OP_BRANCH, 0, branch); open = true; } idx++; setNextOfEnd(branch, branch = node(RE.OP_BRANCH, 0)); branch(flags); } // Create an ending node (either a close paren or an OP_END) int end; if (paren > 0) { if (idx < len && pattern.charAt(idx) == ')') { idx++; } else { syntaxError("Missing close paren"); } if (paren == 1) { end = node(RE.OP_CLOSE, closeParens); } else { end = node(RE.OP_CLOSE_CLUSTER, 0); } } else { end = node(RE.OP_END, 0); } // Append the ending node to the ret nodelist setNextOfEnd(ret, end); // Hook the ends of each branch to the end node int currentNode = ret; int nextNodeOffset = instruction[currentNode + RE.offsetNext]; // while the next node o while (nextNodeOffset != 0 && currentNode < lenInstruction) { // If branch, make the end of the branch's operand chain point to the end node. if (instruction[currentNode /* + RE.offsetOpcode */] == RE.OP_BRANCH) { setNextOfEnd(currentNode + RE.nodeSize, end); } nextNodeOffset = instruction[currentNode + RE.offsetNext]; currentNode += nextNodeOffset; } // Return the node list return ret; } /** * Compiles a regular expression pattern into a program runnable by the pattern * matcher class 'RE'. * @param pattern Regular expression pattern to compile (see RECompiler class * for details). * @return A compiled regular expression program. * @exception RESyntaxException Thrown if the regular expression has invalid syntax. * @see RECompiler * @see RE */ public REProgram compile(String pattern) throws RESyntaxException { // Initialize variables for compilation this.pattern = pattern; // Save pattern in instance variable len = pattern.length(); // Precompute pattern length for speed idx = 0; // Set parsing index to the first character lenInstruction = 0; // Set emitted instruction count to zero parens = 1; // Set paren level to 1 (the implicit outer parens) // Initialize pass by reference flags value int[] flags = { NODE_TOPLEVEL }; // Parse expression expr(flags); // Should be at end of input if (idx != len) { if (pattern.charAt(idx) == ')') { syntaxError("Unmatched close paren"); } syntaxError("Unexpected input remains"); } // Return the result char[] ins = new char[lenInstruction]; System.arraycopy(instruction, 0, ins, 0, lenInstruction); return new REProgram(parens, ins); } /** * Local, nested class for maintaining character ranges for character classes. */ class RERange { int size = 16; // Capacity of current range arrays int[] minRange = new int[size]; // Range minima int[] maxRange = new int[size]; // Range maxima int num = 0; // Number of range array elements in use /** * Deletes the range at a given index from the range lists * @param index Index of range to delete from minRange and maxRange arrays. */ void delete(int index) { // Return if no elements left or index is out of range if (num == 0 || index >= num) { return; } // Move elements down while (++index < num) { if (index - 1 >= 0) { minRange[index-1] = minRange[index]; maxRange[index-1] = maxRange[index]; } } // One less element now num--; } /** * Merges a range into the range list, coalescing ranges if possible. * @param min Minimum end of range * @param max Maximum end of range */ void merge(int min, int max) { // Loop through ranges for (int i = 0; i < num; i++) { // Min-max is subsumed by minRange[i]-maxRange[i] if (min >= minRange[i] && max <= maxRange[i]) { return; } // Min-max subsumes minRange[i]-maxRange[i] else if (min <= minRange[i] && max >= maxRange[i]) { delete(i); merge(min, max); return; } // Min is in the range, but max is outside else if (min >= minRange[i] && min <= maxRange[i]) { min = minRange[i]; delete(i); merge(min, max); return; } // Max is in the range, but min is outside else if (max >= minRange[i] && max <= maxRange[i]) { max = maxRange[i]; delete(i); merge(min, max); return; } } // Must not overlap any other ranges if (num >= size) { size *= 2; int[] newMin = new int[size]; int[] newMax = new int[size]; System.arraycopy(minRange, 0, newMin, 0, num); System.arraycopy(maxRange, 0, newMax, 0, num); minRange = newMin; maxRange = newMax; } minRange[num] = min; maxRange[num] = max; num++; } /** * Removes a range by deleting or shrinking all other ranges * @param min Minimum end of range * @param max Maximum end of range */ void remove(int min, int max) { // Loop through ranges for (int i = 0; i < num; i++) { // minRange[i]-maxRange[i] is subsumed by min-max if (minRange[i] >= min && maxRange[i] <= max) { delete(i); return; } // min-max is subsumed by minRange[i]-maxRange[i] else if (min >= minRange[i] && max <= maxRange[i]) { int minr = minRange[i]; int maxr = maxRange[i]; delete(i); if (minr < min) { merge(minr, min - 1); } if (max < maxr) { merge(max + 1, maxr); } return; } // minRange is in the range, but maxRange is outside else if (minRange[i] >= min && minRange[i] <= max) { minRange[i] = max + 1; return; } // maxRange is in the range, but minRange is outside else if (maxRange[i] >= min && maxRange[i] <= max) { maxRange[i] = min - 1; return; } } } /** * Includes (or excludes) the range from min to max, inclusive. * @param min Minimum end of range * @param max Maximum end of range * @param include True if range should be included. False otherwise. */ void include(int min, int max, boolean include) { if (include) { merge(min, max); } else { remove(min, max); } } /** * Includes a range with the same min and max * @param minmax Minimum and maximum end of range (inclusive) * @param include True if range should be included. False otherwise. */ void include(char minmax, boolean include) { include(minmax, minmax, include); } }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -