?? tclregexp.c
字號:
/* * tclRegexp.c -- * * This file contains the public interfaces to the Tcl regular * expression mechanism. * * Copyright (c) 1998 by Sun Microsystems, Inc. * Copyright (c) 1998-1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id: tclRegexp.c,v 1.14 2002/01/17 03:03:12 dgp Exp $ */#include "tclInt.h"#include "tclPort.h"#include "tclRegexp.h"/* *---------------------------------------------------------------------- * The routines in this file use Henry Spencer's regular expression * package contained in the following additional source files: * * regc_color.c regc_cvec.c regc_lex.c * regc_nfa.c regcomp.c regcustom.h * rege_dfa.c regerror.c regerrs.h * regex.h regexec.c regfree.c * regfronts.c regguts.h * * Copyright (c) 1998 Henry Spencer. All rights reserved. * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author * thanks all of them. * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * *** NOTE: this code has been altered slightly for use in Tcl: *** * *** 1. Names have been changed, e.g. from re_comp to *** * *** TclRegComp, to avoid clashes with other *** * *** regexp implementations used by applications. *** *//* * Thread local storage used to maintain a per-thread cache of compiled * regular expressions. */#define NUM_REGEXPS 30typedef struct ThreadSpecificData { int initialized; /* Set to 1 when the module is initialized. */ char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled * regular expression patterns. NULL * means that this slot isn't used. * Malloc-ed. */ int patLengths[NUM_REGEXPS];/* Number of non-null characters in * corresponding entry in patterns. * -1 means entry isn't used. */ struct TclRegexp *regexps[NUM_REGEXPS]; /* Compiled forms of above strings. Also * malloc-ed, or NULL if not in use yet. */} ThreadSpecificData;static Tcl_ThreadDataKey dataKey;/* * Declarations for functions used only in this file. */static TclRegexp * CompileRegexp _ANSI_ARGS_((Tcl_Interp *interp, CONST char *pattern, int length, int flags));static void DupRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *srcPtr, Tcl_Obj *copyPtr));static void FinalizeRegexp _ANSI_ARGS_((ClientData clientData));static void FreeRegexp _ANSI_ARGS_((TclRegexp *regexpPtr));static void FreeRegexpInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));static int RegExpExecUniChar _ANSI_ARGS_((Tcl_Interp *interp, Tcl_RegExp re, CONST Tcl_UniChar *uniString, int numChars, int nmatches, int flags));static int SetRegexpFromAny _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Obj *objPtr));/* * The regular expression Tcl object type. This serves as a cache * of the compiled form of the regular expression. */Tcl_ObjType tclRegexpType = { "regexp", /* name */ FreeRegexpInternalRep, /* freeIntRepProc */ DupRegexpInternalRep, /* dupIntRepProc */ NULL, /* updateStringProc */ SetRegexpFromAny /* setFromAnyProc */};/* *---------------------------------------------------------------------- * * Tcl_RegExpCompile -- * * Compile a regular expression into a form suitable for fast * matching. This procedure is DEPRECATED in favor of the * object version of the command. * * Results: * The return value is a pointer to the compiled form of string, * suitable for passing to Tcl_RegExpExec. This compiled form * is only valid up until the next call to this procedure, so * don't keep these around for a long time! If an error occurred * while compiling the pattern, then NULL is returned and an error * message is left in the interp's result. * * Side effects: * Updates the cache of compiled regexps. * *---------------------------------------------------------------------- */Tcl_RegExpTcl_RegExpCompile(interp, string) Tcl_Interp *interp; /* For use in error reporting and * to access the interp regexp cache. */ CONST char *string; /* String for which to produce * compiled regular expression. */{ return (Tcl_RegExp) CompileRegexp(interp, string, (int) strlen(string), REG_ADVANCED);}/* *---------------------------------------------------------------------- * * Tcl_RegExpExec -- * * Execute the regular expression matcher using a compiled form * of a regular expression and save information about any match * that is found. * * Results: * If an error occurs during the matching operation then -1 * is returned and the interp's result contains an error message. * Otherwise the return value is 1 if a matching range is * found and 0 if there is no matching range. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_RegExpExec(interp, re, string, start) Tcl_Interp *interp; /* Interpreter to use for error reporting. */ Tcl_RegExp re; /* Compiled regular expression; must have * been returned by previous call to * Tcl_GetRegExpFromObj. */ CONST char *string; /* String against which to match re. */ CONST char *start; /* If string is part of a larger string, * this identifies beginning of larger * string, so that "^" won't match. */{ int flags, result, numChars; TclRegexp *regexp = (TclRegexp *)re; Tcl_DString ds; CONST Tcl_UniChar *ustr; /* * If the starting point is offset from the beginning of the buffer, * then we need to tell the regexp engine not to match "^". */ if (string > start) { flags = REG_NOTBOL; } else { flags = 0; } /* * Remember the string for use by Tcl_RegExpRange(). */ regexp->string = string; regexp->objPtr = NULL; /* * Convert the string to Unicode and perform the match. */ Tcl_DStringInit(&ds); ustr = Tcl_UtfToUniCharDString(string, -1, &ds); numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); result = RegExpExecUniChar(interp, re, ustr, numChars, -1 /* nmatches */, flags); Tcl_DStringFree(&ds); return result;}/* *--------------------------------------------------------------------------- * * Tcl_RegExpRange -- * * Returns pointers describing the range of a regular expression match, * or one of the subranges within the match. * * Results: * The variables at *startPtr and *endPtr are modified to hold the * addresses of the endpoints of the range given by index. If the * specified range doesn't exist then NULLs are returned. * * Side effects: * None. * *--------------------------------------------------------------------------- */voidTcl_RegExpRange(re, index, startPtr, endPtr) Tcl_RegExp re; /* Compiled regular expression that has * been passed to Tcl_RegExpExec. */ int index; /* 0 means give the range of the entire * match, > 0 means give the range of * a matching subrange. */ CONST char **startPtr; /* Store address of first character in * (sub-) range here. */ CONST char **endPtr; /* Store address of character just after last * in (sub-) range here. */{ TclRegexp *regexpPtr = (TclRegexp *) re; CONST char *string; if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = *endPtr = NULL; } else if (regexpPtr->matches[index].rm_so < 0) { *startPtr = *endPtr = NULL; } else { if (regexpPtr->objPtr) { string = Tcl_GetString(regexpPtr->objPtr); } else { string = regexpPtr->string; } *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); }}/* *--------------------------------------------------------------------------- * * RegExpExecUniChar -- * * Execute the regular expression matcher using a compiled form of a * regular expression and save information about any match that is * found. * * Results: * If an error occurs during the matching operation then -1 is * returned and an error message is left in interp's result. * Otherwise the return value is 1 if a matching range was found or * 0 if there was no matching range. * * Side effects: * None. * *---------------------------------------------------------------------- */static intRegExpExecUniChar(interp, re, wString, numChars, nmatches, flags) Tcl_Interp *interp; /* Interpreter to use for error reporting. */ Tcl_RegExp re; /* Compiled regular expression; returned by * a previous call to Tcl_GetRegExpFromObj */ CONST Tcl_UniChar *wString; /* String against which to match re. */ int numChars; /* Length of Tcl_UniChar string (must * be >= 0). */ int nmatches; /* How many subexpression matches (counting * the whole match as subexpression 0) are * of interest. -1 means "don't know". */ int flags; /* Regular expression flags. */{ int status; TclRegexp *regexpPtr = (TclRegexp *) re; size_t last = regexpPtr->re.re_nsub + 1; size_t nm = last; if (nmatches >= 0 && (size_t) nmatches < nm) { nm = (size_t) nmatches; } status = TclReExec(®expPtr->re, wString, (size_t) numChars, ®expPtr->details, nm, regexpPtr->matches, flags); /* * Check for errors. */ if (status != REG_OKAY) { if (status == REG_NOMATCH) { return 0; } if (interp != NULL) { TclRegError(interp, "error while matching regular expression: ", status); } return -1; } return 1;}/* *--------------------------------------------------------------------------- * * TclRegExpRangeUniChar -- * * Returns pointers describing the range of a regular expression match, * or one of the subranges within the match, or the hypothetical range * represented by the rm_extend field of the rm_detail_t. * * Results: * The variables at *startPtr and *endPtr are modified to hold the * offsets of the endpoints of the range given by index. If the * specified range doesn't exist then -1s are supplied. * * Side effects: * None. * *--------------------------------------------------------------------------- */voidTclRegExpRangeUniChar(re, index, startPtr, endPtr) Tcl_RegExp re; /* Compiled regular expression that has * been passed to Tcl_RegExpExec. */ int index; /* 0 means give the range of the entire * match, > 0 means give the range of * a matching subrange, -1 means the * range of the rm_extend field. */ int *startPtr; /* Store address of first character in * (sub-) range here. */ int *endPtr; /* Store address of character just after last * in (sub-) range here. */{ TclRegexp *regexpPtr = (TclRegexp *) re; if ((regexpPtr->flags®_EXPECT) && index == -1) { *startPtr = regexpPtr->details.rm_extend.rm_so; *endPtr = regexpPtr->details.rm_extend.rm_eo; } else if ((size_t) index > regexpPtr->re.re_nsub) { *startPtr = -1; *endPtr = -1; } else { *startPtr = regexpPtr->matches[index].rm_so; *endPtr = regexpPtr->matches[index].rm_eo; }}/* *---------------------------------------------------------------------- * * Tcl_RegExpMatch -- * * See if a string matches a regular expression. * * Results: * If an error occurs during the matching operation then -1 * is returned and the interp's result contains an error message. * Otherwise the return value is 1 if "string" matches "pattern" * and 0 otherwise. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_RegExpMatch(interp, string, pattern) Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ CONST char *string; /* String. */ CONST char *pattern; /* Regular expression to match against * string. */{ Tcl_RegExp re; re = Tcl_RegExpCompile(interp, pattern); if (re == NULL) { return -1; } return Tcl_RegExpExec(interp, re, string, string);}/* *---------------------------------------------------------------------- * * Tcl_RegExpExecObj -- * * Execute a precompiled regexp against the given object. * * Results: * If an error occurs during the matching operation then -1 * is returned and the interp's result contains an error message. * Otherwise the return value is 1 if "string" matches "pattern" * and 0 otherwise. * * Side effects: * Converts the object to a Unicode object. * *---------------------------------------------------------------------- */intTcl_RegExpExecObj(interp, re, objPtr, offset, nmatches, flags) Tcl_Interp *interp; /* Interpreter to use for error reporting. */ Tcl_RegExp re; /* Compiled regular expression; must have * been returned by previous call to * Tcl_GetRegExpFromObj. */ Tcl_Obj *objPtr; /* String against which to match re. */ int offset; /* Character index that marks where matching * should begin. */ int nmatches; /* How many subexpression matches (counting * the whole match as subexpression 0) are * of interest. -1 means all of them. */ int flags; /* Regular expression execution flags. */{ TclRegexp *regexpPtr = (TclRegexp *) re; Tcl_UniChar *udata; int length; /* * Save the target object so we can extract strings from it later. */ regexpPtr->string = NULL; regexpPtr->objPtr = objPtr; udata = Tcl_GetUnicodeFromObj(objPtr, &length); if (offset > length) { offset = length; } udata += offset; length -= offset; return RegExpExecUniChar(interp, re, udata, length, nmatches, flags);}/* *---------------------------------------------------------------------- * * Tcl_RegExpMatchObj -- * * See if an object matches a regular expression. * * Results: * If an error occurs during the matching operation then -1 * is returned and the interp's result contains an error message. * Otherwise the return value is 1 if "string" matches "pattern" * and 0 otherwise. * * Side effects: * Changes the internal rep of the pattern and string objects. * *---------------------------------------------------------------------- */intTcl_RegExpMatchObj(interp, stringObj, patternObj) Tcl_Interp *interp; /* Used for error reporting. May be NULL. */ Tcl_Obj *stringObj; /* Object containing the String to search. */ Tcl_Obj *patternObj; /* Regular expression to match against * string. */{ Tcl_RegExp re; re = Tcl_GetRegExpFromObj(interp, patternObj, TCL_REG_ADVANCED | TCL_REG_NOSUB); if (re == NULL) { return -1; } return Tcl_RegExpExecObj(interp, re, stringObj, 0 /* offset */, 0 /* nmatches */, 0 /* flags */);}/* *---------------------------------------------------------------------- * * Tcl_RegExpGetInfo -- * * Retrieve information about the current match. * * Results: * None. *
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -