?? regular-expressions.zc
字號:
//[of]:description
//[c]Tests if a string matches with a regular expression.
//[cf]
//[of]:imports
import "base/types"
import "base/memory-allocator"
import "collection/collection"
import "collection/vector"
import "text/string-buffer"
import "text/string"
//[cf]
//[of]:definitions
//[c]
typedef char map = [256] byte
equ max groups = 32
//[c]
public struct regex
private
nodes: local vector
states: local vector
rules: local vector
groups: int
group mask: dword
group starts: [max groups] string
group stops: [max groups] string
initial: state
final: state
match beginning: bool
match ending: bool
firsts: char map
// for compilation only
is case sensitive: bool
end
//[cf]
//[of]:error codes
public enum regex code
regex ok
regex missing cpar
regex empty
regex invalid range
end
//[c]
//[cf]
//[c]
//[of]:initialize - release
//[of]:initialize
//[c]
public func initialize (m: regex)
initialize (states (m), 256)
initialize (nodes (m), 256)
initialize (rules (m), 256)
groups (m) = 0
match beginning (m) = false
match ending (m) = false
end
//[cf]
//[of]:release
//[c]
public func release (m: regex)
delete all (m)
release (states (m))
release (nodes (m))
release (rules (m))
end
//[cf]
//[cf]
//[of]:compiling
//[of]:compile (source,case sensitive)
//[c]Compile a regular expression
//[c]
public func compile (m: regex, s: string, is case sensitive: bool)
is case sensitive (m) = is case sensitive
delete all (m)
remove all (nodes (m))
remove all (states (m))
remove all (rules (m))
groups (m) = 0
group mask (m) = 1
match beginning (m) = false
match ending (m) = false
def p = s
if p[] == $^
match beginning (m) = true
p += 1
end
def parser: local regex parser
source (parser) = p
// 'firsts' is cleared before returning so an compile
// error will make fail any search without an extra
// test. So the object is valid and the match function
// can be used even if the compilation fails.
initialize (firsts (m))
def node = new or node (m)
def code = get union (m, parser, node)
if code <> regex ok
return code
end
def s1 = new state (m)
def s2 = new state (m)
expand (m, node, s1, s2)
initial (m) = s1
final (m) = s2
// compute firsts
scan (m, s1)
return code
end
//[c]
//[c]Structures:
//[c]
struct regex parser
source: string
node: node
end
//[c]
//[c]Subfunctions:
//[of]:scan
//[c]Scan all chars accessibles from this state: all epsilon transitions
//[c]are followed recursively.
//[c]
func scan (m: regex, s: state) : void
if mark (s)
return
end
mark (s) = true
each (rules (s)) ? r
equ rule = r: rule
if epsilon (rule)
scan (m, state (rule))
else
def i = 0
def p = chars (rule)
def q = firsts (m)
while i < 256
q [i] |= p [i]
i += 1
end
end
end
end
//[cf]
//[of]:expand
//[c]
func expand (m: regex, node: node, s1: state, s2: state) : void
switch (type (node))
case nt rules
equ rn = node : rule node
add rule (s1, s2, rule (rn))
case nt or
equ on = node : or node
def mask = group mask (m)
group start (s1) |= mask
group stop (s2) |= mask
group mask (m) <<= 1
groups (m) += 1
each (sequences (on)) ? ns
equ nodes = ns : node
def ss1 = s1
each (nodes) ? n
equ subnode = n : node
def ss2 = is last (subnode) -> s2, new state (m)
expand (m, subnode, ss1, ss2)
ss1 = ss2
end
end
case nt zero or many
equ rn = node : rep node
def t1 = new state (m)
def t2 = new state (m)
expand (m, node (rn), t1, t2)
add epsilon (m, s1, s2)
add epsilon (m, s1, t1)
add epsilon (m, t2, s2)
add epsilon (m, t2, t1)
case nt one or many
equ rn = node : rep node
def t1 = new state (m)
def t2 = new state (m)
expand (m, node (rn), t1, t2)
add epsilon (m, s1, t1)
add epsilon (m, t2, s2)
add epsilon (m, t2, t1)
case nt zero or one
equ rn = node : rep node
expand (m, node (rn), s1, s2)
add epsilon (m, s1, s2)
end
end
//[cf]
//[of]:get union
func get union (m: regex, parser: regex parser, or node: or node) : regex code
def sequences = sequences (or node)
repeat
def nodes: local collection
def code = get sequence (m, parser, nodes)
if code <> regex ok
return code
end
def node = first (nodes)
if is nil (node)
return regex empty
end
add (sequences, node)
def p = source (parser)
if p[] <> $|
break
end
source (parser) = p + 1
end
return regex ok
end
//[cf]
//[of]:get sequence
//[c]
func get sequence (m: regex, parser: regex parser, nodes: collection) : regex code
initialize (nodes)
repeat
def code = get node (m, parser)
if code <> regex ok
return code
end
def node = node (parser)
if is nil (node)
break
end
add (nodes, node)
end
return regex ok
end
//[cf]
//[of]:get node
func get node (m: regex, parser: regex parser)
def p = source (parser)
def node = nil : node
def c = p[]
switch c
//[of]: case nul char, ), |
case nul char, $), $|
// empty
//[cf]
//[of]: case $
case $$
p += 1
if is nul (p[])
match ending (m) = true
else
node = new char node (m, c)
end
//[cf]
//[of]: case (
case $(
def or node = new or node (m)
source (parser) = p+1
def code = get union (m, parser, or node)
if code <> regex ok
return code
end
p = source (parser)
if p[] <> $)
return regex missing cpar
end
node = or node
p += 1
//[cf]
//[of]: case [
case $[
def rule = new char rule (m)
def rule node = new rule node (m, rule)
p += 1
p = get ranges (m, rule, p)
if is nil (p) || p[] <> $]
source (parser) = p
return regex invalid range
end
node = rule node
p += 1
//[cf]
//[of]: case \\
case $\
p += 1
c = get escape char (p[])
node = new char node (m, c)
p += 1
//[cf]
//[of]: case .
case $.
node = new rule node (m, new any rule (m))
p += 1
//[cf]
//[of]: else
else
node = new char node (m, c)
p += 1
//[cf]
end
// Check repeaters
if not nil (node)
c = p[]
if c == $*
p += 1
node = new repeat node (m, node, nt zero or many)
elsif c == $+
p += 1
node = new repeat node (m, node, nt one or many)
elsif c == $?
p += 1
node = new repeat node (m, node, nt zero or one)
end
end
source (parser) = p
node (parser) = node
return regex ok
end
//[c]
//[of]:get ranges
func get ranges (m: regex, rule: rule, s: string)
def p = s
def invert = false
if p[] == $^
invert = true
p += 1
end
repeat
def c = p []
if is nul (c) || c == $]
break
end
p += 1
def d = p []
if c == $\ && d <> nul char
c = get escape char (d)
p += 1
d = p []
end
if d <> $- || p [1] == $]
set (rule, c, is case sensitive (m))
else
p += 1
d = p []
if d == $\ && p [1] <> nul char
d = get escape char (d)
p += 1
end
if d <> nul char
set (rule, c, d, is case sensitive (m))
p += 1
end
end
end
if invert
invert (rule)
end
return p
end
//[cf]
//[of]:new char node
//[c]
func new char node (m: regex, c: char)
return new rule node (m, new char rule (m, c))
end
//[cf]
//[cf]
//[cf]
//[cf]
//[of]:matching
//[of]:match (s)
//[c]Returns the end position or nil if no match
//[c]
public func match (m: regex, string: string)
// quick test
if firsts (m) [string[0]:byte:int] == 0:byte
return nil
end
// test without group (faster)
if is nil (test without group (m, string))
return nil
end
// test with groups (slower)
return test with groups (m, string)
end
//[c]
equ stack size = 1024
//[c]
public func test without group (m: regex, string: string)
def stack: [stack size] local scan
def top = stack + stack size
def sp = top
def final = final (m)
def s = initial (m)
def p = string
repeat
if s == final
return p
end
def c = p[]
each (rules (s)) ? r
equ rule = r : rule
if epsilon (rule)
if sp == stack
return nil // stack overflow
end
sp -= 1
s (sp []) = state (rule)
p (sp []) = p
elsif match (rule, c)
if sp == stack
return nil // stack overflow
end
sp -= 1
s (sp []) = state (rule)
p (sp []) = p + 1
end
end
// pop next
if sp == top
break
end
s = s (sp [])
p = p (sp [])
sp += 1
end
return nil
end
//[c]
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -