?? research.cxx.svn-base
字號:
mask = '\377';
i++;
p++;
} else
mask = 0;
if (*p == '-') { /* real dash */
i++;
prevChar = *p;
ChSet(*p++);
}
if (*p == ']') { /* real brace */
i++;
prevChar = *p;
ChSet(*p++);
}
while (*p && *p != ']') {
if (*p == '-') {
if (prevChar < 0) {
// Previous def. was a char class like \d, take dash literally
prevChar = *p;
ChSet(*p);
} else if (*(p+1)) {
if (*(p+1) != ']') {
c1 = prevChar + 1;
i++;
c2 = *++p;
if (c2 == '\\') {
if (!*(p+1)) // End of RE
return badpat("Missing ]");
else {
i++;
p++;
int incr;
c2 = GetBackslashExpression(p, incr);
i += incr;
p += incr;
if (c2 >= 0) {
// Convention: \c (c is any char) is case sensitive, whatever the option
ChSet(static_cast<unsigned char>(c2));
prevChar = c2;
} else {
// bittab is already changed
prevChar = -1;
}
}
}
if (prevChar < 0) {
// Char after dash is char class like \d, take dash literally
prevChar = '-';
ChSet('-');
} else {
// Put all chars between c1 and c2 included in the char set
while (c1 <= c2) {
ChSetWithCase(static_cast<unsigned char>(c1++), caseSensitive);
}
}
} else {
// Dash before the ], take it literally
prevChar = *p;
ChSet(*p);
}
} else {
return badpat("Missing ]");
}
} else if (*p == '\\' && *(p+1)) {
i++;
p++;
int incr;
int c = GetBackslashExpression(p, incr);
i += incr;
p += incr;
if (c >= 0) {
// Convention: \c (c is any char) is case sensitive, whatever the option
ChSet(static_cast<unsigned char>(c));
prevChar = c;
} else {
// bittab is already changed
prevChar = -1;
}
} else {
prevChar = *p;
ChSetWithCase(*p, caseSensitive);
}
i++;
p++;
}
if (!*p)
return badpat("Missing ]");
for (n = 0; n < BITBLK; bittab[n++] = 0)
*mp++ = static_cast<char>(mask ^ bittab[n]);
break;
case '*': /* match 0 or more... */
case '+': /* match 1 or more... */
if (p == pat)
return badpat("Empty closure");
lp = sp; /* previous opcode */
if (*lp == CLO) /* equivalence... */
break;
switch (*lp) {
case BOL:
case BOT:
case EOT:
case BOW:
case EOW:
case REF:
return badpat("Illegal closure");
default:
break;
}
if (*p == '+')
for (sp = mp; lp < sp; lp++)
*mp++ = *lp;
*mp++ = END;
*mp++ = END;
sp = mp;
while (--mp > lp)
*mp = mp[-1];
*mp = CLO;
mp = sp;
break;
case '\\': /* tags, backrefs... */
i++;
switch (*++p) {
case '<':
*mp++ = BOW;
break;
case '>':
if (*sp == BOW)
return badpat("Null pattern inside \\<\\>");
*mp++ = EOW;
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
n = *p-'0';
if (tagi > 0 && tagstk[tagi] == n)
return badpat("Cyclical reference");
if (tagc > n) {
*mp++ = static_cast<char>(REF);
*mp++ = static_cast<char>(n);
} else
return badpat("Undetermined reference");
break;
default:
if (!posix && *p == '(') {
if (tagc < MAXTAG) {
tagstk[++tagi] = tagc;
*mp++ = BOT;
*mp++ = static_cast<char>(tagc++);
} else
return badpat("Too many \\(\\) pairs");
} else if (!posix && *p == ')') {
if (*sp == BOT)
return badpat("Null pattern inside \\(\\)");
if (tagi > 0) {
*mp++ = static_cast<char>(EOT);
*mp++ = static_cast<char>(tagstk[tagi--]);
} else
return badpat("Unmatched \\)");
} else {
int incr;
int c = GetBackslashExpression(p, incr);
i += incr;
p += incr;
if (c >= 0) {
*mp++ = CHR;
*mp++ = static_cast<unsigned char>(c);
} else {
*mp++ = CCL;
mask = 0;
for (n = 0; n < BITBLK; bittab[n++] = 0)
*mp++ = static_cast<char>(mask ^ bittab[n]);
}
}
}
break;
default : /* an ordinary char */
if (posix && *p == '(') {
if (tagc < MAXTAG) {
tagstk[++tagi] = tagc;
*mp++ = BOT;
*mp++ = static_cast<char>(tagc++);
} else
return badpat("Too many () pairs");
} else if (posix && *p == ')') {
if (*sp == BOT)
return badpat("Null pattern inside ()");
if (tagi > 0) {
*mp++ = static_cast<char>(EOT);
*mp++ = static_cast<char>(tagstk[tagi--]);
} else
return badpat("Unmatched )");
} else {
unsigned char c = *p;
if (!c) // End of RE
c = '\\'; // We take it as raw backslash
if (caseSensitive || !iswordc(c)) {
*mp++ = CHR;
*mp++ = c;
} else {
*mp++ = CCL;
mask = 0;
ChSetWithCase(c, false);
for (n = 0; n < BITBLK; bittab[n++] = 0)
*mp++ = static_cast<char>(mask ^ bittab[n]);
}
}
break;
}
sp = lp;
}
if (tagi > 0)
return badpat((posix ? "Unmatched (" : "Unmatched \\("));
*mp = END;
sta = OKP;
return 0;
}
/*
* RESearch::Execute:
* execute nfa to find a match.
*
* special cases: (nfa[0])
* BOL
* Match only once, starting from the
* beginning.
* CHR
* First locate the character without
* calling PMatch, and if found, call
* PMatch for the remaining string.
* END
* RESearch::Compile failed, poor luser did not
* check for it. Fail fast.
*
* If a match is found, bopat[0] and eopat[0] are set
* to the beginning and the end of the matched fragment,
* respectively.
*
*/
int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {
unsigned char c;
int ep = NOTFOUND;
char *ap = nfa;
bol = lp;
failure = 0;
Clear();
switch (*ap) {
case BOL: /* anchored: match from BOL only */
ep = PMatch(ci, lp, endp, ap);
break;
case EOL: /* just searching for end of line normal path doesn't work */
if (*(ap+1) == END) {
lp = endp;
ep = lp;
break;
} else {
return 0;
}
case CHR: /* ordinary char: locate it fast */
c = *(ap+1);
while ((lp < endp) && (ci.CharAt(lp) != c))
lp++;
if (lp >= endp) /* if EOS, fail, else fall thru. */
return 0;
default: /* regular matching all the way. */
while (lp < endp) {
ep = PMatch(ci, lp, endp, ap);
if (ep != NOTFOUND)
break;
lp++;
}
break;
case END: /* munged automaton. fail always */
return 0;
}
if (ep == NOTFOUND)
return 0;
bopat[0] = lp;
eopat[0] = ep;
return 1;
}
/*
* PMatch: internal routine for the hard part
*
* This code is partly snarfed from an early grep written by
* David Conroy. The backref and tag stuff, and various other
* innovations are by oz.
*
* special case optimizations: (nfa[n], nfa[n+1])
* CLO ANY
* We KNOW .* will match everything upto the
* end of line. Thus, directly go to the end of
* line, without recursive PMatch calls. As in
* the other closure cases, the remaining pattern
* must be matched by moving backwards on the
* string recursively, to find a match for xy
* (x is ".*" and y is the remaining pattern)
* where the match satisfies the LONGEST match for
* x followed by a match for y.
* CLO CHR
* We can again scan the string forward for the
* single char and at the point of failure, we
* execute the remaining nfa recursively, same as
* above.
*
* At the end of a successful match, bopat[n] and eopat[n]
* are set to the beginning and end of subpatterns matched
* by tagged expressions (n = 1 to 9).
*/
extern void re_fail(char *,char);
#define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])
/*
* skip values for CLO XXX to skip past the closure
*/
#define ANYSKIP 2 /* [CLO] ANY END */
#define CHRSKIP 3 /* [CLO] CHR chr END */
#define CCLSKIP 34 /* [CLO] CCL 32 bytes END */
int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) {
int op, c, n;
int e; /* extra pointer for CLO */
int bp; /* beginning of subpat... */
int ep; /* ending of subpat... */
int are; /* to save the line ptr. */
while ((op = *ap++) != END)
switch (op) {
case CHR:
if (ci.CharAt(lp++) != *ap++)
return NOTFOUND;
break;
case ANY:
if (lp++ >= endp)
return NOTFOUND;
break;
case CCL:
c = ci.CharAt(lp++);
if (!isinset(ap,c))
return NOTFOUND;
ap += BITBLK;
break;
case BOL:
if (lp != bol)
return NOTFOUND;
break;
case EOL:
if (lp < endp)
return NOTFOUND;
break;
case BOT:
bopat[*ap++] = lp;
break;
case EOT:
eopat[*ap++] = lp;
break;
case BOW:
if (lp!=bol && iswordc(ci.CharAt(lp-1)) || !iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
case EOW:
if (lp==bol || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
case REF:
n = *ap++;
bp = bopat[n];
ep = eopat[n];
while (bp < ep)
if (ci.CharAt(bp++) != ci.CharAt(lp++))
return NOTFOUND;
break;
case CLO:
are = lp;
switch (*ap) {
case ANY:
while (lp < endp)
lp++;
n = ANYSKIP;
break;
case CHR:
c = *(ap+1);
while ((lp < endp) && (c == ci.CharAt(lp)))
lp++;
n = CHRSKIP;
break;
case CCL:
while ((lp < endp) && isinset(ap+1,ci.CharAt(lp)))
lp++;
n = CCLSKIP;
break;
default:
failure = true;
//re_fail("closure: bad nfa.", *ap);
return NOTFOUND;
}
ap += n;
while (lp >= are) {
if ((e = PMatch(ci, lp, endp, ap)) != NOTFOUND)
return e;
--lp;
}
return NOTFOUND;
default:
//re_fail("RESearch::Execute: bad nfa.", static_cast<char>(op));
return NOTFOUND;
}
return lp;
}
/*
* RESearch::Substitute:
* substitute the matched portions of the src in dst.
*
* & substitute the entire matched pattern.
*
* \digit substitute a subpattern, with the given tag number.
* Tags are numbered from 1 to 9. If the particular
* tagged subpattern does not exist, null is substituted.
*/
int RESearch::Substitute(CharacterIndexer &ci, char *src, char *dst) {
unsigned char c;
int pin;
int bp;
int ep;
if (!*src || !bopat[0])
return 0;
while ((c = *src++) != 0) {
switch (c) {
case '&':
pin = 0;
break;
case '\\':
c = *src++;
if (c >= '0' && c <= '9') {
pin = c - '0';
break;
}
default:
*dst++ = c;
continue;
}
if ((bp = bopat[pin]) != 0 && (ep = eopat[pin]) != 0) {
while (ci.CharAt(bp) && bp < ep)
*dst++ = ci.CharAt(bp++);
if (bp < ep)
return 0;
}
}
*dst = '\0';
return 1;
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -