?? nativeregexp.java
字號:
/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Rhino code, released * May 6, 1998. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1997-1999 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): * Norris Boyd * Igor Bukanov * Brendan Eich * Matthias Radestock * * Alternatively, the contents of this file may be used under the * terms of the GNU Public License (the "GPL"), in which case the * provisions of the GPL are applicable instead of those above. * If you wish to allow use of your version of this file only * under the terms of the GPL and not to allow others to use your * version of this file under the NPL, indicate your decision by * deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete * the provisions above, a recipient may use your version of this * file under either the NPL or the GPL. */package org.mozilla.javascript.regexp;import java.io.Serializable;import org.mozilla.javascript.*;/** * This class implements the RegExp native object. * * Revision History: * Implementation in C by Brendan Eich * Initial port to Java by Norris Boyd from jsregexp.c version 1.36 * Merged up to version 1.38, which included Unicode support. * Merged bug fixes in version 1.39. * Merged JSFUN13_BRANCH changes up to 1.32.2.13 * * @author Brendan Eich * @author Norris Boyd */public class NativeRegExp extends IdScriptableObject implements Function{ static final long serialVersionUID = 4965263491464903264L; private static final Object REGEXP_TAG = new Object(); public static final int JSREG_GLOB = 0x1; // 'g' flag: global public static final int JSREG_FOLD = 0x2; // 'i' flag: fold public static final int JSREG_MULTILINE = 0x4; // 'm' flag: multiline //type of match to perform public static final int TEST = 0; public static final int MATCH = 1; public static final int PREFIX = 2; private static final boolean debug = false; private static final byte REOP_EMPTY = 0; /* match rest of input against rest of r.e. */ private static final byte REOP_ALT = 1; /* alternative subexpressions in kid and next */ private static final byte REOP_BOL = 2; /* beginning of input (or line if multiline) */ private static final byte REOP_EOL = 3; /* end of input (or line if multiline) */ private static final byte REOP_WBDRY = 4; /* match "" at word boundary */ private static final byte REOP_WNONBDRY = 5; /* match "" at word non-boundary */ private static final byte REOP_QUANT = 6; /* quantified atom: atom{1,2} */ private static final byte REOP_STAR = 7; /* zero or more occurrences of kid */ private static final byte REOP_PLUS = 8; /* one or more occurrences of kid */ private static final byte REOP_OPT = 9; /* optional subexpression in kid */ private static final byte REOP_LPAREN = 10; /* left paren bytecode: kid is u.num'th sub-regexp */ private static final byte REOP_RPAREN = 11; /* right paren bytecode */ private static final byte REOP_DOT = 12; /* stands for any character */ private static final byte REOP_CCLASS = 13; /* character class: [a-f] */ private static final byte REOP_DIGIT = 14; /* match a digit char: [0-9] */ private static final byte REOP_NONDIGIT = 15; /* match a non-digit char: [^0-9] */ private static final byte REOP_ALNUM = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */ private static final byte REOP_NONALNUM = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */ private static final byte REOP_SPACE = 18; /* match a whitespace char */ private static final byte REOP_NONSPACE = 19; /* match a non-whitespace char */ private static final byte REOP_BACKREF = 20; /* back-reference (e.g., \1) to a parenthetical */ private static final byte REOP_FLAT = 21; /* match a flat string */ private static final byte REOP_FLAT1 = 22; /* match a single char */ private static final byte REOP_JUMP = 23; /* for deoptimized closure loops */ private static final byte REOP_DOTSTAR = 24; /* optimize .* to use a single opcode */ private static final byte REOP_ANCHOR = 25; /* like .* but skips left context to unanchored r.e. */ private static final byte REOP_EOLONLY = 26; /* $ not preceded by any pattern */ private static final byte REOP_UCFLAT = 27; /* flat Unicode string; len immediate counts chars */ private static final byte REOP_UCFLAT1 = 28; /* single Unicode char */ private static final byte REOP_UCCLASS = 29; /* Unicode character class, vector of chars to match */ private static final byte REOP_NUCCLASS = 30; /* negated Unicode character class */ private static final byte REOP_BACKREFi = 31; /* case-independent REOP_BACKREF */ private static final byte REOP_FLATi = 32; /* case-independent REOP_FLAT */ private static final byte REOP_FLAT1i = 33; /* case-independent REOP_FLAT1 */ private static final byte REOP_UCFLATi = 34; /* case-independent REOP_UCFLAT */ private static final byte REOP_UCFLAT1i = 35; /* case-independent REOP_UCFLAT1 */ private static final byte REOP_ANCHOR1 = 36; /* first-char discriminating REOP_ANCHOR */ private static final byte REOP_NCCLASS = 37; /* negated 8-bit character class */ private static final byte REOP_DOTSTARMIN = 38; /* ungreedy version of REOP_DOTSTAR */ private static final byte REOP_LPARENNON = 39; /* non-capturing version of REOP_LPAREN */ private static final byte REOP_RPARENNON = 40; /* non-capturing version of REOP_RPAREN */ private static final byte REOP_ASSERT = 41; /* zero width positive lookahead assertion */ private static final byte REOP_ASSERT_NOT = 42; /* zero width negative lookahead assertion */ private static final byte REOP_ASSERTTEST = 43; /* sentinel at end of assertion child */ private static final byte REOP_ASSERTNOTTEST = 44; /* sentinel at end of !assertion child */ private static final byte REOP_MINIMALSTAR = 45; /* non-greedy version of * */ private static final byte REOP_MINIMALPLUS = 46; /* non-greedy version of + */ private static final byte REOP_MINIMALOPT = 47; /* non-greedy version of ? */ private static final byte REOP_MINIMALQUANT = 48; /* non-greedy version of {} */ private static final byte REOP_ENDCHILD = 49; /* sentinel at end of quantifier child */ private static final byte REOP_CLASS = 50; /* character class with index */ private static final byte REOP_REPEAT = 51; /* directs execution of greedy quantifier */ private static final byte REOP_MINIMALREPEAT = 52; /* directs execution of non-greedy quantifier */ private static final byte REOP_END = 53; public static void init(Context cx, Scriptable scope, boolean sealed) { NativeRegExp proto = new NativeRegExp(); proto.re = (RECompiled)compileRE("", null, false); proto.activatePrototypeMap(MAX_PROTOTYPE_ID); proto.setParentScope(scope); proto.setPrototype(getObjectPrototype(scope)); NativeRegExpCtor ctor = new NativeRegExpCtor(); ScriptRuntime.setFunctionProtoAndParent(ctor, scope); ctor.setImmunePrototypeProperty(proto); if (sealed) { proto.sealObject(); ctor.sealObject(); } defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM); } NativeRegExp(Scriptable scope, Object regexpCompiled) { this.re = (RECompiled)regexpCompiled; this.lastIndex = 0; ScriptRuntime.setObjectProtoAndParent(this, scope); } public String getClassName() { return "RegExp"; } public Object call(Context cx, Scriptable scope, Scriptable thisObj, Object[] args) { return execSub(cx, scope, args, MATCH); } public Scriptable construct(Context cx, Scriptable scope, Object[] args) { return (Scriptable)execSub(cx, scope, args, MATCH); } Scriptable compile(Context cx, Scriptable scope, Object[] args) { if (args.length > 0 && args[0] instanceof NativeRegExp) { if (args.length > 1 && args[1] != Undefined.instance) { // report error throw ScriptRuntime.typeError0("msg.bad.regexp.compile"); } NativeRegExp thatObj = (NativeRegExp) args[0]; this.re = thatObj.re; this.lastIndex = thatObj.lastIndex; return this; } String s = args.length == 0 ? "" : ScriptRuntime.toString(args[0]); String global = args.length > 1 && args[1] != Undefined.instance ? ScriptRuntime.toString(args[1]) : null; this.re = (RECompiled)compileRE(s, global, false); this.lastIndex = 0; return this; } public String toString() { StringBuffer buf = new StringBuffer(); buf.append('/'); if (re.source.length != 0) { buf.append(re.source); } else { // See bugzilla 226045 buf.append("(?:)"); } buf.append('/'); if ((re.flags & JSREG_GLOB) != 0) buf.append('g'); if ((re.flags & JSREG_FOLD) != 0) buf.append('i'); if ((re.flags & JSREG_MULTILINE) != 0) buf.append('m'); return buf.toString(); } NativeRegExp() { } private static RegExpImpl getImpl(Context cx) { return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx); } private Object execSub(Context cx, Scriptable scopeObj, Object[] args, int matchType) { RegExpImpl reImpl = getImpl(cx); String str; if (args.length == 0) { str = reImpl.input; if (str == null) { reportError("msg.no.re.input.for", toString()); } } else { str = ScriptRuntime.toString(args[0]); } double d = ((re.flags & JSREG_GLOB) != 0) ? lastIndex : 0; Object rval; if (d < 0 || str.length() < d) { lastIndex = 0; rval = null; } else { int indexp[] = { (int)d }; rval = executeRegExp(cx, scopeObj, reImpl, str, indexp, matchType); if ((re.flags & JSREG_GLOB) != 0) { lastIndex = (rval == null || rval == Undefined.instance) ? 0 : indexp[0]; } } return rval; } static Object compileRE(String str, String global, boolean flat) { RECompiled regexp = new RECompiled(); regexp.source = str.toCharArray(); int length = str.length(); int flags = 0; if (global != null) { for (int i = 0; i < global.length(); i++) { char c = global.charAt(i); if (c == 'g') { flags |= JSREG_GLOB; } else if (c == 'i') { flags |= JSREG_FOLD; } else if (c == 'm') { flags |= JSREG_MULTILINE; } else { reportError("msg.invalid.re.flag", String.valueOf(c)); } } } regexp.flags = flags; CompilerState state = new CompilerState(regexp.source, length, flags); if (flat && length > 0) {if (debug) {System.out.println("flat = \"" + str + "\"");} state.result = new RENode(REOP_FLAT); state.result.chr = state.cpbegin[0]; state.result.length = length; state.result.flatIndex = 0; state.progLength += 5; } else if (!parseDisjunction(state)) return null; regexp.program = new byte[state.progLength + 1]; if (state.classCount != 0) { regexp.classList = new RECharSet[state.classCount]; regexp.classCount = state.classCount; } int endPC = emitREBytecode(state, regexp, 0, state.result); regexp.program[endPC++] = REOP_END;if (debug) {System.out.println("Prog. length = " + endPC);for (int i = 0; i < endPC; i++) { System.out.print(regexp.program[i]); if (i < (endPC - 1)) System.out.print(", ");}System.out.println();} regexp.parenCount = state.parenCount; // If re starts with literal, init anchorCh accordingly switch (regexp.program[0]) { case REOP_UCFLAT1: case REOP_UCFLAT1i: regexp.anchorCh = (char)getIndex(regexp.program, 1); break; case REOP_FLAT1: case REOP_FLAT1i: regexp.anchorCh = (char)(regexp.program[1] & 0xFF); break; case REOP_FLAT: case REOP_FLATi: int k = getIndex(regexp.program, 1); regexp.anchorCh = regexp.source[k]; break; }if (debug) {if (regexp.anchorCh >= 0) { System.out.println("Anchor ch = '" + (char)regexp.anchorCh + "'");}} return regexp; } static boolean isDigit(char c) { return '0' <= c && c <= '9'; } private static boolean isWord(char c) { return Character.isLetter(c) || isDigit(c) || c == '_'; } private static boolean isLineTerm(char c) { return ScriptRuntime.isJSLineTerminator(c); } private static boolean isREWhiteSpace(int c) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -