diff --git a/doc/gcc44-performance.html b/doc/gcc44-performance.html new file mode 100644 index 000000000..80f38d7ff --- /dev/null +++ b/doc/gcc44-performance.html @@ -0,0 +1,142 @@ + + + Regular Expression Performance Comparison + + + + + + + +

Regular Expression Performance Comparison

+

+ The following tables provide comparisons between the following regular + expression libraries:

+

GRETA.

+

The Boost regex library.

+

Henry Spencer's regular expression library + - this is provided for comparison as a typical non-backtracking implementation.

+

Philip Hazel's PCRE library.

+

Details

+

Machine: Intel Xeon E5405 2.0GHz Server.

+

Compiler: GNU C++ version 4.4.7 20120313 (Red Hat 4.4.7-4).

+

C++ Standard Library: GNU libstdc++ version 20120313.

+

OS: CentOS 6.4.

+

Boost version: 1.56.0.

+

PCRE version: 8.37.

+

+ As ever care should be taken in interpreting the results, only sensible regular + expressions (rather than pathological cases) are given, most are taken from the + Boost regex examples, or from the Library of + Regular Expressions. In addition, some variation in the relative + performance of these libraries can be expected on other machines - as memory + access and processor caching effects can be quite large for most finite state + machine algorithms.

+

Averages

+

The following are the average relative scores for all the tests: the perfect + regular expression library would score 1, in practice anything less than 2 + is pretty good.

+

+ + + + + + + + + + + +
GRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic Xpressivegoogle RE2
5.015048.364695.777555.7310712.80164.812981.422273.867123.74945
+

+

Comparison 1: Long Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a long English language text was measured + (mtent12.txt + from Project Gutenberg, 19Mb). 

+

+ + + + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
Twain3.64
(0.0256s)
3.64
(0.0256s)
4.98
(0.035s)
5.16
(0.0362s)
3.96
(0.0278s)
4.8
(0.0338s)
2.84
(0.02s)
3.64
(0.0256s)
1
(0.00703s)
Huck[[:alpha:]]+4.96
(0.0259s)
4.96
(0.0259s)
6.45
(0.0338s)
6.33
(0.0331s)
4.9
(0.0256s)
6.09
(0.0319s)
3.7
(0.0194s)
4.78
(0.025s)
1
(0.00523s)
[[:alpha:]]+ing10.4
(1.46s)
19
(2.66s)
3.5
(0.49s)
3.57
(0.5s)
8.43
(1.18s)
14.2
(1.99s)
4.5
(0.63s)
3.11
(0.435s)
1
(0.14s)
^[^ +]*?Twain5.61
(0.47s)
20.4
(1.71s)
2.69
(0.225s)
2.69
(0.225s)
NA4.96
(0.415s)
1.24
(0.104s)
2.84
(0.237s)
1
(0.0838s)
Tom|Sawyer|Huckleberry|Finn6.94
(0.23s)
13.4
(0.445s)
1.38
(0.0456s)
1.36
(0.045s)
1
(0.0331s)
2
(0.0663s)
1.6
(0.0531s)
1.45
(0.0481s)
2.53
(0.0838s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)5.71
(0.45s)
6.86
(0.54s)
1.44
(0.114s)
1.48
(0.116s)
1.43
(0.113s)
2.6
(0.205s)
1
(0.0788s)
1.3
(0.102s)
1.05
(0.0825s)
+

+

Comparison 2: Medium Sized Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a medium sized English language text was + measured (the first 50K from mtent12.txt - up to the end of Chapter 1). 

+

+ + + + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
Twain1.53
(5.98e-05s)
1.56
(6.1e-05s)
3.62
(0.000142s)
3.62
(0.000142s)
3.31
(0.000129s)
3.31
(0.000129s)
1.19
(4.64e-05s)
1.69
(6.59e-05s)
1
(3.91e-05s)
Huck[[:alpha:]]+3.48
(6.59e-05s)
3.48
(6.59e-05s)
6.06
(0.000115s)
5.81
(0.00011s)
4.77
(9.03e-05s)
5.35
(0.000101s)
2.52
(4.76e-05s)
3.35
(6.35e-05s)
1
(1.89e-05s)
[[:alpha:]]+ing12
(0.00375s)
22.2
(0.00695s)
3.44
(0.00107s)
3.5
(0.00109s)
9.25
(0.00289s)
15.5
(0.00484s)
5.19
(0.00162s)
3.56
(0.00111s)
1
(0.000313s)
^[^ +]*?Twain5.57
(0.00121s)
16.5
(0.00359s)
2.88
(0.000625s)
2.88
(0.000625s)
NA5.03
(0.00109s)
1.24
(0.000269s)
2.97
(0.000645s)
1
(0.000217s)
Tom|Sawyer|Huckleberry|Finn5.87
(0.000674s)
11.7
(0.00135s)
2.77
(0.000317s)
2.77
(0.000317s)
2.02
(0.000232s)
3.7
(0.000425s)
1
(0.000115s)
2.55
(0.000293s)
1.85
(0.000212s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)3.77
(0.000791s)
8.09
(0.0017s)
2.93
(0.000615s)
2.93
(0.000615s)
2.07
(0.000435s)
3.86
(0.000811s)
1.19
(0.000249s)
2.21
(0.000464s)
1
(0.00021s)
+

+

Comparison 3: C++ Code Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within the C++ source file + boost/crc.hpp was measured. 

+

+ + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([ ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{)22.9
(0.00162s)
22.9
(0.00162s)
1.47
(0.000104s)
1.48
(0.000105s)
NA4.41
(0.000313s)
1
(7.08e-05s)
1.66
(0.000117s)
1.93
(0.000137s)
(^[ ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\>6.98
(0.0043s)
6.6
(0.00406s)
6.73
(0.00414s)
6.6
(0.00406s)
NA11.8
(0.00727s)
1
(0.000615s)
NANA
^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>)6.32
(0.000479s)
21.7
(0.00164s)
2.13
(0.000161s)
2.13
(0.000161s)
NA3.29
(0.000249s)
1
(7.57e-05s)
1.97
(0.000149s)
1.87
(0.000142s)
^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost/[^>]+>)6.32
(0.000479s)
22.5
(0.0017s)
2.13
(0.000161s)
2.13
(0.000161s)
NA3.29
(0.000249s)
1
(7.57e-05s)
1.94
(0.000146s)
1.84
(0.000139s)
+

+

+

Comparison 4: HTML Document Search

+ +

For each of the following regular expressions the time taken to find all + occurrences of the expression within the html file libs/libraries.htm + was measured. 

+

+ + + + + + + +
ExpressionGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
beman|john|dave4.38
(0.000791s)
8.76
(0.00158s)
1.73
(0.000313s)
1.73
(0.000313s)
2.97
(0.000537s)
2.19
(0.000396s)
1
(0.000181s)
2.62
(0.000474s)
1.49
(0.000269s)
<a[^>]+href=("[^"]*"|[^[:space:]]+)[^>]*>2.45
(0.000425s)
3.49
(0.000605s)
3.44
(0.000596s)
3.44
(0.000596s)
51.4
(0.00891s)
2.76
(0.000479s)
1
(0.000173s)
4.51
(0.000781s)
32.9
(0.0057s)
<img[^>]+src=("[^"]*"|[^[:space:]]+)[^>]*>1.12
(6.47e-05s)
1.14
(6.59e-05s)
3.71
(0.000215s)
3.66
(0.000212s)
8.17
(0.000474s)
3.07
(0.000178s)
1
(5.8e-05s)
3.41
(0.000198s)
2.02
(0.000117s)
<p>.*?</p>1.21
(9.03e-05s)
1.26
(9.4e-05s)
2.85
(0.000212s)
2.85
(0.000212s)
NA2.52
(0.000188s)
1
(7.45e-05s)
3.41
(0.000254s)
6.82
(0.000508s)
<h[12345678][^>]*>.*?</h[12345678]>1.73
(0.000139s)
1.97
(0.000159s)
2.76
(0.000222s)
2.73
(0.00022s)
NA2.48
(0.0002s)
1
(8.06e-05s)
4.85
(0.000391s)
6.3
(0.000508s)
<font[^>]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?</font>1.27
(7.2e-05s)
1.31
(7.45e-05s)
3.74
(0.000212s)
3.96
(0.000225s)
NA3.18
(0.000181s)
1
(5.68e-05s)
3.1
(0.000176s)
1.02
(5.8e-05s)
+

+

Comparison 3: Simple Matches

+

+ For each of the following regular expressions the time taken to match against + the text indicated was measured. 

+

+ + + + + + + + + + + + + + + +
ExpressionTextGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCREPCRE JITDynamic XpressiveRE2
abcabc3.12
(9.66e-08s)
4.62
(1.43e-07s)
11.5
(3.58e-07s)
11.7
(3.62e-07s)
4.54
(1.41e-07s)
5.08
(1.57e-07s)
1
(3.1e-08s)
6.46
(2e-07s)
3.77
(1.17e-07s)
^([0-9]+)(\-| |$)(.*)$100- this is a line of ftp response which contains a message string1.81
(2.77e-07s)
3.38
(5.15e-07s)
4.19
(6.39e-07s)
4.25
(6.48e-07s)
95
(1.45e-05s)
2.41
(3.67e-07s)
1
(1.53e-07s)
2.91
(4.43e-07s)
3.5
(5.34e-07s)
([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}1234-5678-1234-4564.95
(3.48e-07s)
8
(5.63e-07s)
12.5
(8.77e-07s)
13
(9.16e-07s)
3.32
(2.34e-07s)
5.36
(3.77e-07s)
1
(7.03e-08s)
6.64
(4.67e-07s)
4.54
(3.19e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$john@johnmaddock.co.uk5.64
(1.18e-06s)
5.64
(1.18e-06s)
7.18
(1.51e-06s)
7.27
(1.53e-06s)
30.2
(6.33e-06s)
4.18
(8.77e-07s)
1
(2.1e-07s)
5
(1.05e-06s)
1.61
(3.39e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$foo12@foo.edu6.86
(1.03e-06s)
6.86
(1.03e-06s)
8.76
(1.32e-06s)
9.27
(1.39e-06s)
29.5
(4.43e-06s)
4.89
(7.34e-07s)
1
(1.5e-07s)
6.1
(9.16e-07s)
2.03
(3.05e-07s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$bob.smith@foo.tv7.1
(1.05e-06s)
7.1
(1.05e-06s)
8.9
(1.32e-06s)
8.65
(1.28e-06s)
31.5
(4.65e-06s)
4.84
(7.15e-07s)
1
(1.48e-07s)
6.32
(9.35e-07s)
2.13
(3.15e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$EH10 2QQ3.9
(1.81e-07s)
5.95
(2.77e-07s)
11.7
(5.44e-07s)
10.9
(5.05e-07s)
4.21
(1.96e-07s)
4.62
(2.15e-07s)
1
(4.65e-08s)
5.33
(2.48e-07s)
6.05
(2.81e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$G1 1AA3.54
(1.65e-07s)
5.85
(2.72e-07s)
10.7
(4.96e-07s)
10.9
(5.05e-07s)
3.9
(1.81e-07s)
4.62
(2.15e-07s)
1
(4.65e-08s)
5.54
(2.57e-07s)
5.95
(2.77e-07s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$SW1 1ZZ3.85
(1.81e-07s)
5.87
(2.77e-07s)
10.5
(4.96e-07s)
10.1
(4.77e-07s)
3.95
(1.86e-07s)
4.56
(2.15e-07s)
1
(4.71e-08s)
5.27
(2.48e-07s)
5.97
(2.81e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$4/1/20013.32
(1.74e-07s)
5.09
(2.67e-07s)
8.73
(4.58e-07s)
8.91
(4.67e-07s)
3.68
(1.93e-07s)
4.14
(2.17e-07s)
1
(5.25e-08s)
4.64
(2.43e-07s)
2.91
(1.53e-07s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$12/12/20013.27
(1.72e-07s)
5
(2.62e-07s)
9.27
(4.86e-07s)
9.64
(5.05e-07s)
3.91
(2.05e-07s)
4.14
(2.17e-07s)
1
(5.25e-08s)
5.36
(2.81e-07s)
5.55
(2.91e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$1233.03
(1.34e-07s)
5.03
(2.22e-07s)
10.3
(4.53e-07s)
10.2
(4.48e-07s)
8.11
(3.58e-07s)
4.97
(2.19e-07s)
1
(4.41e-08s)
5.3
(2.34e-07s)
5.84
(2.57e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$+3.141593
(1.57e-07s)
4.64
(2.43e-07s)
10.5
(5.53e-07s)
9.45
(4.96e-07s)
10.2
(5.34e-07s)
4.55
(2.38e-07s)
1
(5.25e-08s)
4.64
(2.43e-07s)
5.36
(2.81e-07s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$-3.141592.97
(1.57e-07s)
4.58
(2.43e-07s)
10.4
(5.53e-07s)
9.35
(4.96e-07s)
10.1
(5.34e-07s)
4.49
(2.38e-07s)
1
(5.3e-08s)
4.94
(2.62e-07s)
5.39
(2.86e-07s)
+

+
+

?? Copyright John Maddock 2003

+

Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt)

+ + + + diff --git a/doc/html/boost_regex/background_information/performance.html b/doc/html/boost_regex/background_information/performance.html index 283e19029..f20fffabc 100644 --- a/doc/html/boost_regex/background_information/performance.html +++ b/doc/html/boost_regex/background_information/performance.html @@ -42,6 +42,10 @@ Gcc 3.2 (cygwin) (non-recursive Boost.Regex implementation). +
  • + Gcc 4.4 (CentOS 6) (non-recursive + Boost.Regex implementation). +
  • diff --git a/performance/Jamfile.v2 b/performance/Jamfile.v2 index ca47cb9ee..60eb1fbff 100644 --- a/performance/Jamfile.v2 +++ b/performance/Jamfile.v2 @@ -3,12 +3,14 @@ # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt. -SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_dynamic_xpressive time_posix time_safe_greta ; +SOURCES = command_line main time_boost time_greta time_localised_boost time_pcre time_pcre_jit time_dynamic_xpressive time_posix time_safe_greta time_re2 ; local HS_REGEX_PATH = [ modules.peek : HS_REGEX_PATH ] ; local USE_POSIX = [ modules.peek : USE_POSIX ] ; local PCRE_PATH = [ modules.peek : PCRE_PATH ] ; local USE_PCRE = [ modules.peek : USE_PCRE ] ; +local GRETA_PATH = [ modules.peek : GRETA_PATH ] ; +local USE_RE2 = [ modules.peek : USE_RE2 ] ; if $(HS_REGEX_PATH) { @@ -20,31 +22,51 @@ else if $(USE_POSIX) POSIX_OPTS = BOOST_HAS_POSIX=1 ; } -lib pcre : : pcre ; +lib pcre : : pcre /usr/local/lib ; if $(PCRE_PATH) { +# currently pcre have more source files PCRE_SOURCES = $(PCRE_PATH)/chartables.c $(PCRE_PATH)/get.c $(PCRE_PATH)/pcre.c $(PCRE_PATH)/study.c ; PCRE_OPTS = BOOST_HAS_PCRE=1 $(PCRE_PATH) ; } else if $(USE_PCRE) { - PCRE_OPTS = BOOST_HAS_PCRE=1 ; + PCRE_OPTS = BOOST_HAS_PCRE=1 BOOST_HAS_PCRE_JIT=1 ; PCRE_SOURCES = pcre ; } +if $(GRETA_PATH) +{ + GRETA_SOURCES = $(GRETA_PATH)/regexpr2.cpp $(GRETA_PATH)/syntax2.cpp ; + GRETA_OPTS = BOOST_HAS_GRETA=1 $(GRETA_PATH) ; +} + +lib re2 : : re2 ; + +if $(USE_RE2) +{ + RE2_OPTS = BOOST_HAS_RE2=1 ; + RE2_SOURCES = re2 ; +} + exe regex_comparison : $(SOURCES).cpp $(HS_SOURCES) $(PCRE_SOURCES) + $(GRETA_SOURCES) + $(RE2_SOURCES) ../build//boost_regex ../../test/build//boost_prg_exec_monitor/static : BOOST_REGEX_NO_LIB=1 BOOST_REGEX_STATIC_LINK=1 + BOOST_HAS_XPRESSIVE=1 $(POSIX_OPTS) $(PCRE_OPTS) + $(GRETA_OPTS) + $(RE2_OPTS) ; diff --git a/performance/command_line.cpp b/performance/command_line.cpp index 2d2ac7ba1..dac2bcc4d 100644 --- a/performance/command_line.cpp +++ b/performance/command_line.cpp @@ -33,7 +33,9 @@ bool time_greta = false; bool time_safe_greta = false; bool time_posix = false; bool time_pcre = false; +bool time_pcre_jit = false; bool time_xpressive = false; +bool time_re2 = false; bool time_std = false; bool test_matches = false; @@ -55,7 +57,9 @@ double boost_total = 0; double locale_boost_total = 0; double posix_total = 0; double pcre_total = 0; +double pcre_jit_total = 0; double xpressive_total = 0; +double re2_total = 0; double std_total = 0; unsigned greta_test_count = 0; unsigned safe_greta_test_count = 0; @@ -63,7 +67,9 @@ unsigned boost_test_count = 0; unsigned locale_boost_test_count = 0; unsigned posix_test_count = 0; unsigned pcre_test_count = 0; +unsigned pcre_jit_test_count = 0; unsigned xpressive_test_count = 0; +unsigned re2_test_count = 0; unsigned std_test_count = 0; int handle_argument(const std::string& what) @@ -86,10 +92,18 @@ int handle_argument(const std::string& what) else if(what == "-pcre") time_pcre = true; #endif +#ifdef BOOST_HAS_PCRE_JIT + else if(what == "-pcrejit") + time_pcre_jit = true; +#endif #ifdef BOOST_HAS_XPRESSIVE else if(what == "-xpressive" || what == "-dxpr") time_xpressive = true; #endif +#ifdef BOOST_HAS_RE2 + else if(what == "-re2") + time_re2 = true; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX else if(what == "-std") time_std = true; @@ -108,9 +122,15 @@ int handle_argument(const std::string& what) #ifdef BOOST_HAS_PCRE time_pcre = true; #endif +#ifdef BOOST_HAS_PCRE_JIT + time_pcre_jit = true; +#endif #ifdef BOOST_HAS_XPRESSIVE time_xpressive = true; #endif +#ifdef BOOST_HAS_RE2 + time_re2 = true; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX time_std = true; #endif @@ -174,9 +194,15 @@ int show_usage() #ifdef BOOST_HAS_PCRE " -pcre Apply tests to PCRE library\n" #endif +#ifdef BOOST_HAS_PCRE_JIT + " -pcrejit Apply tests to PCRE library (int JIT mode)\n" +#endif #ifdef BOOST_HAS_XPRESSIVE " -dxpr Apply tests to dynamic xpressive library\n" #endif +#ifdef BOOST_HAS_RE2 + " -re2 Apply tests to google RE2 library\n" +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX " -std Apply tests to std::regex.\n" #endif @@ -283,10 +309,18 @@ void output_html_results(bool show_description, const std::string& tagname) if(time_pcre == true) os << ""; #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + os << ""; +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) os << ""; #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + os << ""; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) os << ""; @@ -362,6 +396,17 @@ void output_html_results(bool show_description, const std::string& tagname) } } #endif +#if defined(BOOST_HAS_PCRE_JIT) + if(time_pcre_jit == true) + { + print_result(os, first->pcre_jit_time, first->factor); + if(first->pcre_jit_time > 0) + { + pcre_jit_total += first->pcre_jit_time / first->factor; + ++pcre_jit_test_count; + } + } +#endif #if defined(BOOST_HAS_XPRESSIVE) if(time_xpressive == true) { @@ -373,6 +418,17 @@ void output_html_results(bool show_description, const std::string& tagname) } } #endif +#if defined(BOOST_HAS_RE2) + if(time_re2 == true) + { + print_result(os, first->re2_time, first->factor); + if(first->re2_time > 0) + { + re2_total += first->re2_time / first->factor; + ++re2_test_count; + } + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -450,12 +506,24 @@ std::string get_averages_table() os << ""; } #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + { + os << ""; + } +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) { os << ""; } #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + { + os << ""; + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -473,25 +541,31 @@ std::string get_averages_table() os << "\n"; if(time_safe_greta == true) os << "\n"; -#endif -#if defined(BOOST_HAS_POSIX) - if(time_boost == true) - os << "\n"; #endif if(time_boost == true) os << "\n"; if(time_localised_boost == true) os << "\n"; +#if defined(BOOST_HAS_POSIX) if(time_posix == true) os << "\n"; +#endif #if defined(BOOST_HAS_PCRE) if(time_pcre == true) os << "\n"; #endif +#if defined(BOOST_HAS_PCRE_JIT) + if(time_pcre_jit == true) + os << "\n"; +#endif #if defined(BOOST_HAS_XPRESSIVE) if(time_xpressive == true) os << "\n"; #endif +#if defined(BOOST_HAS_RE2) + if(time_re2 == true) + os << "\n"; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) os << "\n"; diff --git a/performance/input.html b/performance/input.html index 425dedebc..706229392 100644 --- a/performance/input.html +++ b/performance/input.html @@ -60,7 +60,7 @@

    Comparison 4: HTML Document Search

    occurrences of the expression within the html file libs/libraries.htm was measured. 

    %html_search%

    -

    Comparison 3: Simple Matches

    +

    Comparison 5: Simple Matches

    For each of the following regular expressions the time taken to match against the text indicated was measured. 

    diff --git a/performance/main.cpp b/performance/main.cpp index b7ba8a526..0bdd52e17 100644 --- a/performance/main.cpp +++ b/performance/main.cpp @@ -66,6 +66,14 @@ void test_match(const std::string& re, const std::string& text, const std::strin std::cout << "\tPCRE regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + { + time = pcrj::time_match(re, text, icase); + r.pcre_jit_time = time; + std::cout << "\tPCRE JIT regex: " << time << "s\n"; + } +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) { @@ -74,6 +82,14 @@ void test_match(const std::string& re, const std::string& text, const std::strin std::cout << "\txpressive regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + { + time = gre2::time_match(re, text, icase); + r.re2_time = time; + std::cout << "\tRE2 regex: " << time << "s\n"; + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -135,6 +151,14 @@ void test_find_all(const std::string& re, const std::string& text, const std::st std::cout << "\tPCRE regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + { + time = pcrj::time_find_all(re, text, icase); + r.pcre_jit_time = time; + std::cout << "\tPCRE JIT regex: " << time << "s\n"; + } +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) { @@ -143,6 +167,14 @@ void test_find_all(const std::string& re, const std::string& text, const std::st std::cout << "\txpressive regex: " << time << "s\n"; } #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + { + time = gre2::time_find_all(re, text, icase); + r.re2_time = time; + std::cout << "\tRE2 regex: " << time << "s\n"; + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -226,10 +258,13 @@ int cpp_main(int argc, char * argv[]) const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|]+>)"; + bool time_posix_orig = time_posix; + time_posix = false; test_find_all(class_expression, file_contents); test_find_all(highlight_expression, file_contents); test_find_all(include_expression, file_contents); test_find_all(boost_include_expression, file_contents); + time_posix = time_posix_orig; } output_html_results(false, "%code_search%"); @@ -237,11 +272,15 @@ int cpp_main(int argc, char * argv[]) { load_file(file_contents, "../../../libs/libraries.htm"); test_find_all("beman|john|dave", file_contents, true); - test_find_all("

    .*?

    ", file_contents, true); test_find_all("]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); - test_find_all("]*>.*?", file_contents, true); test_find_all("]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); + bool time_posix_orig = time_posix; + time_posix = false; + // POSIX-Extended unspport Non greedy repeats + test_find_all("

    .*?

    ", file_contents, true); + test_find_all("]*>.*?", file_contents, true); test_find_all("]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?", file_contents, true); + time_posix = time_posix_orig; } output_html_results(false, "%html_search%"); @@ -252,7 +291,10 @@ int cpp_main(int argc, char * argv[]) test_find_all("Twain", file_contents); test_find_all("Huck[[:alpha:]]+", file_contents); test_find_all("[[:alpha:]]+ing", file_contents); + bool time_posix_orig = time_posix; + time_posix = false; test_find_all("^[^\n]*?Twain", file_contents); + time_posix = time_posix_orig; test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); } @@ -260,16 +302,17 @@ int cpp_main(int argc, char * argv[]) if(test_long_twain) { - load_file(file_contents, "mtent13.txt"); + load_file(file_contents, "mtent12.txt"); test_find_all("Twain", file_contents); test_find_all("Huck[[:alpha:]]+", file_contents); test_find_all("[[:alpha:]]+ing", file_contents); - test_find_all("^[^\n]*?Twain", file_contents); - test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); + bool time_posix_orig = time_posix; time_posix = false; + test_find_all("^[^\n]*?Twain", file_contents); // POSIX-Extended: the escape character is not "special" inside a character class declaration + time_posix = time_posix_orig; + test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); - time_posix = true; } output_html_results(false, "%long_twain_search%"); diff --git a/performance/regex_comparison.hpp b/performance/regex_comparison.hpp index 4ed968fd8..db819cf95 100644 --- a/performance/regex_comparison.hpp +++ b/performance/regex_comparison.hpp @@ -26,7 +26,9 @@ extern bool time_greta; extern bool time_safe_greta; extern bool time_posix; extern bool time_pcre; +extern bool time_pcre_jit; extern bool time_xpressive; +extern bool time_re2; extern bool time_std; extern bool test_matches; @@ -55,7 +57,9 @@ struct results double safe_greta_time; double posix_time; double pcre_time; + double pcre_jit_time; double xpressive_time; + double re2_time; double std_time; double factor; std::string expression; @@ -67,7 +71,9 @@ struct results safe_greta_time(-1), posix_time(-1), pcre_time(-1), + pcre_jit_time(-1), xpressive_time(-1), + re2_time(-1), std_time(-1), factor((std::numeric_limits::max)()), expression(ex), @@ -87,8 +93,12 @@ struct results factor = posix_time; if((pcre_time >= 0) && (pcre_time < factor)) factor = pcre_time; + if((pcre_jit_time >= 0) && (pcre_jit_time < factor)) + factor = pcre_jit_time; if((xpressive_time >= 0) && (xpressive_time < factor)) factor = xpressive_time; + if((re2_time >= 0) && (re2_time < factor)) + factor = re2_time; if((std_time >= 0) && (std_time < factor)) factor = std_time; } @@ -114,6 +124,12 @@ namespace pcr { double time_match(const std::string& re, const std::string& text, bool icase); double time_find_all(const std::string& re, const std::string& text, bool icase); +} +namespace pcrj { +// pcre jit tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + } namespace g { // greta tests: @@ -138,8 +154,13 @@ namespace dxpr { double time_match(const std::string& re, const std::string& text, bool icase); double time_find_all(const std::string& re, const std::string& text, bool icase); } +namespace gre2 { +// re2 tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); +} namespace stdr { -// xpressive tests: +// C11 tests: double time_match(const std::string& re, const std::string& text, bool icase); double time_find_all(const std::string& re, const std::string& text, bool icase); }
    PCREPCRE JITDynamic XpressiveRE2std::regexPCREPCRE JITDynamic Xpressivegoogle RE2" << (greta_total / greta_test_count) << "" << (safe_greta_total / safe_greta_test_count) << "" << (boost_total / boost_test_count) << "" << (boost_total / boost_test_count) << "" << (locale_boost_total / locale_boost_test_count) << "" << (posix_total / posix_test_count) << "" << (pcre_total / pcre_test_count) << "" << (pcre_jit_total / pcre_jit_test_count) << "" << (xpressive_total / xpressive_test_count) << "" << (re2_total / re2_test_count) << "" << (std_total / std_test_count) << "