?? regex.c
字號:
goto normal_char; } break; case '+': case '?': if ((syntax & RE_BK_PLUS_QM) || (syntax & RE_LIMITED_OPS)) goto normal_char; handle_plus: case '*': /* If there is no previous pattern... */ if (!laststart) { if (syntax & RE_CONTEXT_INVALID_OPS) FREE_STACK_RETURN (REG_BADRPT); else if (!(syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; } { /* Are we optimizing this jump? */ boolean keep_string_p = false; /* 1 means zero (many) matches is allowed. */ char zero_times_ok = 0, many_times_ok = 0; /* If there is a sequence of repetition chars, collapse it down to just one (the right one). We can't combine interval operators with these because of, e.g., `a{2}*', which should only match an even number of `a's. */ for (;;) { zero_times_ok |= c != '+'; many_times_ok |= c != '?'; if (p == pend) break; PATFETCH (c); if (c == '*' || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) ; else if (syntax & RE_BK_PLUS_QM && c == '\\') { if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); PATFETCH (c1); if (!(c1 == '+' || c1 == '?')) { PATUNFETCH; PATUNFETCH; break; } c = c1; } else { PATUNFETCH; break; } /* If we get here, we found another repeat character. */ } /* Star, etc. applied to an empty pattern is equivalent to an empty pattern. */ if (!laststart) break; /* Now we know whether or not zero matches is allowed and also whether or not two or more matches is allowed. */ if (many_times_ok) { /* More than one repetition is allowed, so put in at the end a backward relative jump from `b' to before the next jump we're going to put in below (which jumps from laststart to after this jump). But if we are at the `*' in the exact sequence `.*\n', insert an unconditional jump backwards to the ., instead of the beginning of the loop. This way we only push a failure point once, instead of every time through the loop. */ assert (p - 1 > pattern); /* Allocate the space for the jump. */ GET_BUFFER_SPACE (3); /* We know we are not at the first character of the pattern, because laststart was nonzero. And we've already incremented `p', by the way, to be the character after the `*'. Do we have to do something analogous here for null bytes, because of RE_DOT_NOT_NULL? */ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') && zero_times_ok && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') && !(syntax & RE_DOT_NEWLINE)) { /* We have .*\n. */ STORE_JUMP (jump, b, laststart); keep_string_p = true; } else /* Anything else. */ STORE_JUMP (maybe_pop_jump, b, laststart - 3); /* We've added more stuff to the buffer. */ b += 3; } /* On failure, jump from laststart to b + 3, which will be the end of the buffer after this jump is inserted. */ GET_BUFFER_SPACE (3); INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump : on_failure_jump, laststart, b + 3); pending_exact = 0; b += 3; if (!zero_times_ok) { /* At least one repetition is required, so insert a `dummy_failure_jump' before the initial `on_failure_jump' instruction of the loop. This effects a skip over that instruction the first time we hit that loop. */ GET_BUFFER_SPACE (3); INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); b += 3; } } break; case '.': laststart = b; BUF_PUSH (anychar); break; case '[': { boolean had_char_class = false; if (p == pend) FREE_STACK_RETURN (REG_EBRACK); /* Ensure that we have enough space to push a charset: the opcode, the length count, and the bitset; 34 bytes in all. */ GET_BUFFER_SPACE (34); laststart = b; /* We test `*p == '^' twice, instead of using an if statement, so we only need one BUF_PUSH. */ BUF_PUSH (*p == '^' ? charset_not : charset); if (*p == '^') p++; /* Remember the first position in the bracket expression. */ p1 = p; /* Push the number of bytes in the bitmap. */ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); /* Clear the whole map. */ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); /* charset_not matches newline according to a syntax bit. */ if ((re_opcode_t) b[-2] == charset_not && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) SET_LIST_BIT ('\n'); /* Read in characters and ranges, setting map bits. */ for (;;) { if (p == pend) FREE_STACK_RETURN (REG_EBRACK); PATFETCH (c); /* \ might escape characters inside [...] and [^...]. */ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') { if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); PATFETCH (c1); SET_LIST_BIT (c1); continue; } /* Could be the end of the bracket expression. If it's not (i.e., when the bracket expression is `[]' so far), the ']' character bit gets set way below. */ if (c == ']' && p != p1 + 1) break; /* Look ahead to see if it's a range when the last thing was a character class. */ if (had_char_class && c == '-' && *p != ']') FREE_STACK_RETURN (REG_ERANGE); /* Look ahead to see if it's a range when the last thing was a character: if this is a hyphen not at the beginning or the end of a list, then it's the range operator. */ if (c == '-' && !(p - 2 >= pattern && p[-2] == '[') && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') && *p != ']') { reg_errcode_t ret = compile_range (&p, pend, translate, syntax, b); if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } else if (p[0] == '-' && p[1] != ']') { /* This handles ranges made up of characters only. */ reg_errcode_t ret; /* Move past the `-'. */ PATFETCH (c1); ret = compile_range (&p, pend, translate, syntax, b); if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } /* See if we're at the beginning of a possible character class. */ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') { /* Leave room for the null. */ char str[CHAR_CLASS_MAX_LENGTH + 1]; PATFETCH (c); c1 = 0; /* If pattern is `[[:'. */ if (p == pend) FREE_STACK_RETURN (REG_EBRACK); for (;;) { PATFETCH (c); if (c == ':' || c == ']' || p == pend || c1 == CHAR_CLASS_MAX_LENGTH) break; str[c1++] = c; } str[c1] = '\0'; /* If isn't a word bracketed by `[:' and:`]': undo the ending character, the letters, and leave the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { int ch; boolean is_alnum = STREQ (str, "alnum"); boolean is_alpha = STREQ (str, "alpha"); boolean is_blank = STREQ (str, "blank"); boolean is_cntrl = STREQ (str, "cntrl"); boolean is_digit = STREQ (str, "digit"); boolean is_graph = STREQ (str, "graph"); boolean is_lower = STREQ (str, "lower"); boolean is_print = STREQ (str, "print"); boolean is_punct = STREQ (str, "punct"); boolean is_space = STREQ (str, "space"); boolean is_upper = STREQ (str, "upper"); boolean is_xdigit = STREQ (str, "xdigit"); if (!IS_CHAR_CLASS (str)) FREE_STACK_RETURN (REG_ECTYPE); /* Throw away the ] at the end of the character class. */ PATFETCH (c); if (p == pend) FREE_STACK_RETURN (REG_EBRACK); for (ch = 0; ch < 1 << BYTEWIDTH; ch++) { /* This was split into 3 if's to avoid an arbitrary limit in some compiler. */ if ( (is_alnum && ISALNUM (ch)) || (is_alpha && ISALPHA (ch)) || (is_blank && ISBLANK (ch)) || (is_cntrl && ISCNTRL (ch))) SET_LIST_BIT (ch); if ( (is_digit && ISDIGIT (ch)) || (is_graph && ISGRAPH (ch)) || (is_lower && ISLOWER (ch)) || (is_print && ISPRINT (ch))) SET_LIST_BIT (ch); if ( (is_punct && ISPUNCT (ch)) || (is_space && ISSPACE (ch)) || (is_upper && ISUPPER (ch)) || (is_xdigit && ISXDIGIT (ch))) SET_LIST_BIT (ch); } had_char_class = true; } else { c1++; while (c1--) PATUNFETCH; SET_LIST_BIT ('['); SET_LIST_BIT (':'); had_char_class = false; } } else { had_char_class = false; SET_LIST_BIT (c); } } /* Discard any (non)matching list bytes that are all 0 at the end of the map. Decrease the map-length byte too. */ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; b += b[-1]; } break; case '(': if (syntax & RE_NO_BK_PARENS) goto handle_open; else goto normal_char; case ')': if (syntax & RE_NO_BK_PARENS) goto handle_close; else goto normal_char; case '\n': if (syntax & RE_NEWLINE_ALT) goto handle_alt; else goto normal_char; case '|': if (syntax & RE_NO_BK_VBAR) goto handle_alt; else goto normal_char; case '{': if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) goto handle_interval; else goto normal_char; case '\\': if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); /* Do not translate the character after the \, so that we can distinguish, e.g., \B from \b, even if we normally would translate, e.g., B to b. */ PATFETCH_RAW (c); switch (c) { case '(': if (syntax & RE_NO_BK_PARENS) goto normal_backslash; handle_open: bufp->re_nsub++; regnum++; if (COMPILE_STACK_FULL) { RETALLOC (compile_stack.stack, compile_stack.size << 1, compile_stack_elt_t); if (compile_stack.stack == NULL) return REG_ESPACE; compile_stack.size <<= 1; } /* These are the values to restore when we hit end of this group. They are all relative offsets, so that if the whole pattern moves because of realloc, they will still be valid. */ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; COMPILE_STACK_TOP.fixup_alt_jump = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; COMPILE_STACK_TOP.regnum = regnum; /* We will eventually replace the 0 with the number of groups inner to this one. But do not push a start_memory for groups beyond the last one we can represent in the compiled pattern. */ if (regnum <= MAX_REGNUM) { COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; BUF_PUSH_3 (start_memory, regnum, 0); } compile_stack.avail++; fixup_alt_jump = 0; laststart = 0; begalt = b; /* If we've reached MAX_REGNUM groups, then this open won't actually generate any code, so we'll have to clear pending_exact explicitly. */ pending_exact = 0; break; case ')': if (syntax & RE_NO_BK_PARENS) goto normal_backslash; if (COMPILE_STACK_EMPTY) if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_backslash; else FREE_STACK_RETURN (REG_ERPAREN); handle_close: if (fixup_alt_jump) { /* Push a dummy failure point at the end of the alternative for a possible future `pop_failure_jump' to pop. See comments at `push_dummy_failure' in `re_match_2'. */ BUF_PUSH (push_dummy_failure); /* We allocated space for this
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -