1
1
# This file is a part of Julia. License is MIT: http://julialang.org/license
2
2
3
- # # low-level pcre interface ##
3
+ # # low-level pcre2 interface ##
4
4
5
5
module PCRE
6
6
7
7
include (" pcre_h.jl" )
8
8
9
- const VERSION = bytestring ( ccall (( :pcre_version , :libpcre ), Ptr{UInt8}, ()))
9
+ const PCRE_LIB = " libpcre2-8 "
10
10
11
11
global JIT_STACK = C_NULL
12
+ global MATCH_CONTEXT = C_NULL
13
+
12
14
function __init__ ()
13
15
JIT_STACK_START_SIZE = 32768
14
16
JIT_STACK_MAX_SIZE = 1048576
15
- global JIT_STACK = ccall ((:pcre_jit_stack_alloc , :libpcre ), Ptr{Void},
16
- (Cint, Cint), JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE)
17
+ global JIT_STACK = ccall ((:pcre2_jit_stack_create_8 , PCRE_LIB), Ptr{Void},
18
+ (Cint, Cint, Ptr{Void}),
19
+ JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, C_NULL )
20
+ global MATCH_CONTEXT = ccall ((:pcre2_match_context_create_8 , PCRE_LIB),
21
+ Ptr{Void}, (Ptr{Void},), C_NULL )
22
+ ccall ((:pcre2_jit_stack_assign_8 , PCRE_LIB), Void,
23
+ (Ptr{Void}, Ptr{Void}, Ptr{Void}), MATCH_CONTEXT, C_NULL , JIT_STACK)
17
24
end
18
25
19
26
# supported options for different use cases
@@ -25,7 +32,6 @@ const COMPILE_MASK =
25
32
DOTALL |
26
33
EXTENDED |
27
34
FIRSTLINE |
28
- JAVASCRIPT_COMPAT |
29
35
MULTILINE |
30
36
NEWLINE_ANY |
31
37
NEWLINE_ANYCRLF |
@@ -34,9 +40,9 @@ const COMPILE_MASK =
34
40
NEWLINE_LF |
35
41
NO_AUTO_CAPTURE |
36
42
NO_START_OPTIMIZE |
37
- NO_UTF8_CHECK |
43
+ NO_UTF_CHECK |
38
44
UNGREEDY |
39
- UTF8
45
+ UTF
40
46
41
47
const EXECUTE_MASK =
42
48
NEWLINE_ANY |
@@ -49,20 +55,20 @@ const EXECUTE_MASK =
49
55
NOTEMPTY_ATSTART |
50
56
NOTEOL |
51
57
NO_START_OPTIMIZE |
52
- NO_UTF8_CHECK |
58
+ NO_UTF_CHECK |
53
59
PARTIAL_HARD |
54
60
PARTIAL_SOFT
55
61
62
+
56
63
const OPTIONS_MASK = COMPILE_MASK | EXECUTE_MASK
57
64
58
- function info {T} (
59
- regex:: Ptr{Void} ,
60
- extra:: Ptr{Void} , what:: Integer , :: Type{T}
61
- )
65
+ const UNSET = ~ Csize_t (0 ) # Indicates that an output vector element is unset
66
+
67
+ function info (regex:: Ptr{Void} , what:: Integer , T)
62
68
buf = zeros (UInt8,sizeof (T))
63
- ret = ccall ((:pcre_fullinfo , :libpcre ), Int32,
64
- (Ptr{Void}, Ptr{Void}, Int32, Ptr{UInt8}),
65
- regex, extra, what, buf)
69
+ ret = ccall ((:pcre2_pattern_info_8 , PCRE_LIB ), Int32,
70
+ (Ptr{Void}, Int32, Ptr{UInt8}),
71
+ regex, what, buf)
66
72
if ret != 0
67
73
error (ret == ERROR_NULL ? " NULL regex object" :
68
74
ret == ERROR_BADMAGIC ? " invalid regex object" :
@@ -72,83 +78,68 @@ function info{T}(
72
78
reinterpret (T,buf)[1 ]
73
79
end
74
80
75
- function config {T} (what:: Integer , :: Type{T} )
76
- buf = zeros (UInt8, sizeof (T))
77
- ret = ccall ((:pcre_config , :libpcre ), Int32,
78
- (Int32, Ptr{UInt8}),
79
- what, buf)
80
-
81
- if ret != 0
82
- error (" PCRE.config error code $n " )
83
- end
84
- reinterpret (T,buf)[1 ]
81
+ function get_ovec (match_data)
82
+ ptr = ccall ((:pcre2_get_ovector_pointer_8 , PCRE_LIB), Ptr{Csize_t},
83
+ (Ptr{Void},), match_data)
84
+ n = ccall ((:pcre2_get_ovector_count_8 , PCRE_LIB), UInt32,
85
+ (Ptr{Void},), match_data)
86
+ pointer_to_array (ptr, 2 n, false )
85
87
end
86
88
87
89
function compile (pattern:: AbstractString , options:: Integer )
88
- errstr = Array (Ptr{UInt8},1 )
89
- errstr[1 ] = C_NULL
90
- erroff = zeros (Int32,1 )
91
- re_ptr = ccall ((:pcre_compile , :libpcre ), Ptr{Void},
92
- (Cstring, Int32, Ptr{Ptr{UInt8}}, Ptr{Int32}, Ptr{UInt8}),
93
- pattern, options, errstr, erroff, C_NULL )
94
- if re_ptr == C_NULL
95
- error (" $(bytestring (errstr[1 ])) " ,
96
- " at position $(erroff[1 ]+ 1 ) " ,
97
- " in $(repr (pattern)) " )
98
- end
99
-
90
+ errno = Ref {Int32} (0 )
91
+ erroff = Ref {UInt32} (0 )
92
+ re_ptr = ccall ((:pcre2_compile_8 , PCRE_LIB), Ptr{Void},
93
+ (Cstring, UInt32, UInt32, Ref{Int32}, Ref{UInt32}, Ptr{Void}),
94
+ pattern, sizeof (pattern), options, errno, erroff, C_NULL )
95
+ re_ptr == C_NULL && error (" PCRE compilation error: $(err_message (errno[])) at offset $(erroff[]) " )
100
96
re_ptr
101
97
end
102
98
103
- function study (regex:: Ptr{Void} , options:: Integer )
104
- # NOTE: options should always be zero in current PCRE
105
- errstr = Array (Ptr{UInt8},1 )
106
- errstr[1 ] = C_NULL
107
- extra = ccall ((:pcre_study , :libpcre ), Ptr{Void},
108
- (Ptr{Void}, Int32, Ptr{Ptr{UInt8}}),
109
- regex, options, errstr)
110
- if errstr[1 ] != C_NULL
111
- error (" $(bytestring (errstr[1 ])) " )
112
- end
113
-
114
- ccall ((:pcre_assign_jit_stack , :libpcre ), Void,
115
- (Ptr{Void}, Ptr{Void}, Ptr{Void}),
116
- extra, C_NULL , JIT_STACK)
117
- extra
99
+ function jit_compile (regex:: Ptr{Void} )
100
+ errno = ccall ((:pcre2_jit_compile_8 , PCRE_LIB), UInt32,
101
+ (Ptr{Void}, Int32),
102
+ regex, JIT_COMPLETE)
103
+ errno == 0 || error (" PCRE JIT error: $(err_message (errno)) " )
118
104
end
119
105
120
- study (re:: Ptr{Void} ) = study (re, Int32 (0 ))
106
+ free_match_data (match_data) =
107
+ ccall ((:pcre2_match_data_free_8 , PCRE_LIB), Void, (Ptr{Void},), match_data)
108
+
109
+ free_re (re) =
110
+ ccall ((:pcre2_code_free_8 , PCRE_LIB), Void, (Ptr{Void},), re)
111
+
112
+ free_jit_stack (stack) =
113
+ ccall ((:pcre2_jit_stack_free_8 , PCRE_LIB), Void, (Ptr{Void},), stack)
121
114
122
- free_study (extra:: Ptr{Void} ) =
123
- ccall ((:pcre_free_study , :libpcre ), Void, (Ptr{Void},), extra)
124
- free (regex:: Ptr{Void} ) =
125
- ccall (unsafe_load (cglobal ((:pcre_free , :libpcre ),Ptr{Void})), Void, (Ptr{Void},), regex)
115
+ free_match_context (context) =
116
+ ccall ((:pcre2_match_context_free_8 , PCRE_LIB), Void, (Ptr{Void},), context)
126
117
127
- function exec (regex:: Ptr{Void} , extra:: Ptr{Void} , str:: SubString , offset:: Integer ,
128
- options:: Integer , ovec:: Vector{Int32} )
129
- return exec (regex, extra, str. string, str. offset, offset, sizeof (str),
130
- options, ovec)
118
+ function err_message (errno)
119
+ buffer = Array (UInt8, 256 )
120
+ ccall ((:pcre2_get_error_message_8 , PCRE_LIB), Void,
121
+ (Int32, Ptr{UInt8}, UInt32), errno, buffer, sizeof (buffer))
122
+ bytestring (pointer (buffer))
131
123
end
132
124
133
- function exec (regex:: Ptr{Void} , extra:: Ptr{Void} , str:: ByteString , offset:: Integer ,
134
- options:: Integer , ovec:: Vector{Int32} )
135
- return exec (regex, extra, str, 0 , offset, sizeof (str), options, ovec)
125
+ function exec (re,subject,offset,options,match_data)
126
+ rc = ccall ((:pcre2_match_8 , PCRE_LIB), Cint,
127
+ (Ptr{Void}, Cstring, Csize_t, Csize_t, Cuint, Ptr{Void}, Ptr{Void}),
128
+ re, subject, sizeof (subject), offset, options, match_data, MATCH_CONTEXT)
129
+ # rc == -1 means no match, -2 means partial match.
130
+ rc < - 2 && error (" PCRE.exec error: $(err_message (rc)) " )
131
+ rc >= 0
136
132
end
137
133
138
- function exec (regex:: Ptr{Void} , extra:: Ptr{Void} ,
139
- str:: ByteString , shift:: Integer , offset:: Integer ,
140
- len:: Integer , options:: Integer ,
141
- ovec:: Vector{Int32} )
142
- if offset < 0 || len < offset || len+ shift > sizeof (str)
143
- throw (BoundsError ())
144
- end
145
- n = ccall ((:pcre_exec , :libpcre ), Int32,
146
- (Ptr{Void}, Ptr{Void}, Ptr{UInt8}, Int32,
147
- Int32, Int32, Ptr{Int32}, Int32),
148
- regex, extra, pointer (str. data,shift+ 1 ), len,
149
- offset, options, ovec, length (ovec))
150
- n < - 1 && error (" PCRE.exec error code $n " )
151
- return n > - 1
134
+ function create_match_data (re)
135
+ ccall ((:pcre2_match_data_create_from_pattern_8 , PCRE_LIB),
136
+ Ptr{Void}, (Ptr{Void}, Ptr{Void}), re, C_NULL )
137
+ end
138
+
139
+ function substring_number_from_name (re, name)
140
+ ccall ((:pcre2_substring_number_from_name_8 , PCRE_LIB), Cint,
141
+ (Ptr{Void}, Cstring), re, name)
152
142
end
153
143
144
+
154
145
end # module
0 commit comments