diff --git a/doc/gcc44-performance.html b/doc/gcc44-performance.html new file mode 100644 index 000000000..80f38d7ff --- /dev/null +++ b/doc/gcc44-performance.html @@ -0,0 +1,142 @@ + +
++ The following tables provide comparisons between the following regular + expression libraries:
+ + +Henry Spencer's regular expression library + - this is provided for comparison as a typical non-backtracking implementation.
+Philip Hazel's PCRE library.
+Machine: Intel Xeon E5405 2.0GHz Server.
+Compiler: GNU C++ version 4.4.7 20120313 (Red Hat 4.4.7-4).
+C++ Standard Library: GNU libstdc++ version 20120313.
+OS: CentOS 6.4.
+Boost version: 1.56.0.
+PCRE version: 8.37.
++ As ever care should be taken in interpreting the results, only sensible regular + expressions (rather than pathological cases) are given, most are taken from the + Boost regex examples, or from the Library of + Regular Expressions. In addition, some variation in the relative + performance of these libraries can be expected on other machines - as memory + access and processor caching effects can be quite large for most finite state + machine algorithms.
+The following are the average relative scores for all the tests: the perfect + regular expression library would score 1, in practice anything less than 2 + is pretty good.
+GRETA | GRETA (non-recursive mode) | Boost | Boost + C++ locale | POSIX | PCRE | PCRE JIT | Dynamic Xpressive | google RE2 |
5.01504 | +8.36469 | +5.77755 | +5.73107 | +12.8016 | +4.81298 | +1.42227 | +3.86712 | +3.74945 | +
For each of the following regular expressions the time taken to find all + occurrences of the expression within a long English language text was measured + (mtent12.txt + from Project Gutenberg, 19Mb).
+Expression | GRETA | GRETA (non-recursive mode) | Boost | Boost + C++ locale | POSIX | PCRE | PCRE JIT | Dynamic Xpressive | RE2 |
Twain | 3.64 (0.0256s) | 3.64 (0.0256s) | 4.98 (0.035s) | 5.16 (0.0362s) | 3.96 (0.0278s) | 4.8 (0.0338s) | 2.84 (0.02s) | 3.64 (0.0256s) | 1 (0.00703s) |
Huck[[:alpha:]]+ | 4.96 (0.0259s) | 4.96 (0.0259s) | 6.45 (0.0338s) | 6.33 (0.0331s) | 4.9 (0.0256s) | 6.09 (0.0319s) | 3.7 (0.0194s) | 4.78 (0.025s) | 1 (0.00523s) |
[[:alpha:]]+ing | 10.4 (1.46s) | 19 (2.66s) | 3.5 (0.49s) | 3.57 (0.5s) | 8.43 (1.18s) | 14.2 (1.99s) | 4.5 (0.63s) | 3.11 (0.435s) | 1 (0.14s) |
^[^
+]*?Twain | 5.61 (0.47s) | 20.4 (1.71s) | 2.69 (0.225s) | 2.69 (0.225s) | NA | 4.96 (0.415s) | 1.24 (0.104s) | 2.84 (0.237s) | 1 (0.0838s) |
Tom|Sawyer|Huckleberry|Finn | 6.94 (0.23s) | 13.4 (0.445s) | 1.38 (0.0456s) | 1.36 (0.045s) | 1 (0.0331s) | 2 (0.0663s) | 1.6 (0.0531s) | 1.45 (0.0481s) | 2.53 (0.0838s) |
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn) | 5.71 (0.45s) | 6.86 (0.54s) | 1.44 (0.114s) | 1.48 (0.116s) | 1.43 (0.113s) | 2.6 (0.205s) | 1 (0.0788s) | 1.3 (0.102s) | 1.05 (0.0825s) |
For each of the following regular expressions the time taken to find all + occurrences of the expression within a medium sized English language text was + measured (the first 50K from mtent12.txt - up to the end of Chapter 1).
+Expression | GRETA | GRETA (non-recursive mode) | Boost | Boost + C++ locale | POSIX | PCRE | PCRE JIT | Dynamic Xpressive | RE2 |
Twain | 1.53 (5.98e-05s) | 1.56 (6.1e-05s) | 3.62 (0.000142s) | 3.62 (0.000142s) | 3.31 (0.000129s) | 3.31 (0.000129s) | 1.19 (4.64e-05s) | 1.69 (6.59e-05s) | 1 (3.91e-05s) |
Huck[[:alpha:]]+ | 3.48 (6.59e-05s) | 3.48 (6.59e-05s) | 6.06 (0.000115s) | 5.81 (0.00011s) | 4.77 (9.03e-05s) | 5.35 (0.000101s) | 2.52 (4.76e-05s) | 3.35 (6.35e-05s) | 1 (1.89e-05s) |
[[:alpha:]]+ing | 12 (0.00375s) | 22.2 (0.00695s) | 3.44 (0.00107s) | 3.5 (0.00109s) | 9.25 (0.00289s) | 15.5 (0.00484s) | 5.19 (0.00162s) | 3.56 (0.00111s) | 1 (0.000313s) |
^[^
+]*?Twain | 5.57 (0.00121s) | 16.5 (0.00359s) | 2.88 (0.000625s) | 2.88 (0.000625s) | NA | 5.03 (0.00109s) | 1.24 (0.000269s) | 2.97 (0.000645s) | 1 (0.000217s) |
Tom|Sawyer|Huckleberry|Finn | 5.87 (0.000674s) | 11.7 (0.00135s) | 2.77 (0.000317s) | 2.77 (0.000317s) | 2.02 (0.000232s) | 3.7 (0.000425s) | 1 (0.000115s) | 2.55 (0.000293s) | 1.85 (0.000212s) |
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn) | 3.77 (0.000791s) | 8.09 (0.0017s) | 2.93 (0.000615s) | 2.93 (0.000615s) | 2.07 (0.000435s) | 3.86 (0.000811s) | 1.19 (0.000249s) | 2.21 (0.000464s) | 1 (0.00021s) |
For each of the following regular expressions the time taken to find all + occurrences of the expression within the C++ source file + boost/crc.hpp was measured.
+Expression | GRETA | GRETA (non-recursive mode) | Boost | Boost + C++ locale | POSIX | PCRE | PCRE JIT | Dynamic Xpressive | RE2 |
^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([ ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{) | 22.9 (0.00162s) | 22.9 (0.00162s) | 1.47 (0.000104s) | 1.48 (0.000105s) | NA | 4.41 (0.000313s) | 1 (7.08e-05s) | 1.66 (0.000117s) | 1.93 (0.000137s) |
(^[ ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\> | 6.98 (0.0043s) | 6.6 (0.00406s) | 6.73 (0.00414s) | 6.6 (0.00406s) | NA | 11.8 (0.00727s) | 1 (0.000615s) | NA | NA |
^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>) | 6.32 (0.000479s) | 21.7 (0.00164s) | 2.13 (0.000161s) | 2.13 (0.000161s) | NA | 3.29 (0.000249s) | 1 (7.57e-05s) | 1.97 (0.000149s) | 1.87 (0.000142s) |
^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost/[^>]+>) | 6.32 (0.000479s) | 22.5 (0.0017s) | 2.13 (0.000161s) | 2.13 (0.000161s) | NA | 3.29 (0.000249s) | 1 (7.57e-05s) | 1.94 (0.000146s) | 1.84 (0.000139s) |
For each of the following regular expressions the time taken to find all + occurrences of the expression within the html file libs/libraries.htm + was measured.
+Expression | GRETA | GRETA (non-recursive mode) | Boost | Boost + C++ locale | POSIX | PCRE | PCRE JIT | Dynamic Xpressive | RE2 |
beman|john|dave | 4.38 (0.000791s) | 8.76 (0.00158s) | 1.73 (0.000313s) | 1.73 (0.000313s) | 2.97 (0.000537s) | 2.19 (0.000396s) | 1 (0.000181s) | 2.62 (0.000474s) | 1.49 (0.000269s) |
<a[^>]+href=("[^"]*"|[^[:space:]]+)[^>]*> | 2.45 (0.000425s) | 3.49 (0.000605s) | 3.44 (0.000596s) | 3.44 (0.000596s) | 51.4 (0.00891s) | 2.76 (0.000479s) | 1 (0.000173s) | 4.51 (0.000781s) | 32.9 (0.0057s) |
<img[^>]+src=("[^"]*"|[^[:space:]]+)[^>]*> | 1.12 (6.47e-05s) | 1.14 (6.59e-05s) | 3.71 (0.000215s) | 3.66 (0.000212s) | 8.17 (0.000474s) | 3.07 (0.000178s) | 1 (5.8e-05s) | 3.41 (0.000198s) | 2.02 (0.000117s) |
<p>.*?</p> | 1.21 (9.03e-05s) | 1.26 (9.4e-05s) | 2.85 (0.000212s) | 2.85 (0.000212s) | NA | 2.52 (0.000188s) | 1 (7.45e-05s) | 3.41 (0.000254s) | 6.82 (0.000508s) |
<h[12345678][^>]*>.*?</h[12345678]> | 1.73 (0.000139s) | 1.97 (0.000159s) | 2.76 (0.000222s) | 2.73 (0.00022s) | NA | 2.48 (0.0002s) | 1 (8.06e-05s) | 4.85 (0.000391s) | 6.3 (0.000508s) |
<font[^>]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?</font> | 1.27 (7.2e-05s) | 1.31 (7.45e-05s) | 3.74 (0.000212s) | 3.96 (0.000225s) | NA | 3.18 (0.000181s) | 1 (5.68e-05s) | 3.1 (0.000176s) | 1.02 (5.8e-05s) |
+ For each of the following regular expressions the time taken to match against + the text indicated was measured.
+Expression | Text | GRETA | GRETA (non-recursive mode) | Boost | Boost + C++ locale | POSIX | PCRE | PCRE JIT | Dynamic Xpressive | RE2 |
abc | abc | 3.12 (9.66e-08s) | 4.62 (1.43e-07s) | 11.5 (3.58e-07s) | 11.7 (3.62e-07s) | 4.54 (1.41e-07s) | 5.08 (1.57e-07s) | 1 (3.1e-08s) | 6.46 (2e-07s) | 3.77 (1.17e-07s) |
^([0-9]+)(\-| |$)(.*)$ | 100- this is a line of ftp response which contains a message string | 1.81 (2.77e-07s) | 3.38 (5.15e-07s) | 4.19 (6.39e-07s) | 4.25 (6.48e-07s) | 95 (1.45e-05s) | 2.41 (3.67e-07s) | 1 (1.53e-07s) | 2.91 (4.43e-07s) | 3.5 (5.34e-07s) |
([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4} | 1234-5678-1234-456 | 4.95 (3.48e-07s) | 8 (5.63e-07s) | 12.5 (8.77e-07s) | 13 (9.16e-07s) | 3.32 (2.34e-07s) | 5.36 (3.77e-07s) | 1 (7.03e-08s) | 6.64 (4.67e-07s) | 4.54 (3.19e-07s) |
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$ | john@johnmaddock.co.uk | 5.64 (1.18e-06s) | 5.64 (1.18e-06s) | 7.18 (1.51e-06s) | 7.27 (1.53e-06s) | 30.2 (6.33e-06s) | 4.18 (8.77e-07s) | 1 (2.1e-07s) | 5 (1.05e-06s) | 1.61 (3.39e-07s) |
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$ | foo12@foo.edu | 6.86 (1.03e-06s) | 6.86 (1.03e-06s) | 8.76 (1.32e-06s) | 9.27 (1.39e-06s) | 29.5 (4.43e-06s) | 4.89 (7.34e-07s) | 1 (1.5e-07s) | 6.1 (9.16e-07s) | 2.03 (3.05e-07s) |
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$ | bob.smith@foo.tv | 7.1 (1.05e-06s) | 7.1 (1.05e-06s) | 8.9 (1.32e-06s) | 8.65 (1.28e-06s) | 31.5 (4.65e-06s) | 4.84 (7.15e-07s) | 1 (1.48e-07s) | 6.32 (9.35e-07s) | 2.13 (3.15e-07s) |
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$ | EH10 2QQ | 3.9 (1.81e-07s) | 5.95 (2.77e-07s) | 11.7 (5.44e-07s) | 10.9 (5.05e-07s) | 4.21 (1.96e-07s) | 4.62 (2.15e-07s) | 1 (4.65e-08s) | 5.33 (2.48e-07s) | 6.05 (2.81e-07s) |
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$ | G1 1AA | 3.54 (1.65e-07s) | 5.85 (2.72e-07s) | 10.7 (4.96e-07s) | 10.9 (5.05e-07s) | 3.9 (1.81e-07s) | 4.62 (2.15e-07s) | 1 (4.65e-08s) | 5.54 (2.57e-07s) | 5.95 (2.77e-07s) |
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$ | SW1 1ZZ | 3.85 (1.81e-07s) | 5.87 (2.77e-07s) | 10.5 (4.96e-07s) | 10.1 (4.77e-07s) | 3.95 (1.86e-07s) | 4.56 (2.15e-07s) | 1 (4.71e-08s) | 5.27 (2.48e-07s) | 5.97 (2.81e-07s) |
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$ | 4/1/2001 | 3.32 (1.74e-07s) | 5.09 (2.67e-07s) | 8.73 (4.58e-07s) | 8.91 (4.67e-07s) | 3.68 (1.93e-07s) | 4.14 (2.17e-07s) | 1 (5.25e-08s) | 4.64 (2.43e-07s) | 2.91 (1.53e-07s) |
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$ | 12/12/2001 | 3.27 (1.72e-07s) | 5 (2.62e-07s) | 9.27 (4.86e-07s) | 9.64 (5.05e-07s) | 3.91 (2.05e-07s) | 4.14 (2.17e-07s) | 1 (5.25e-08s) | 5.36 (2.81e-07s) | 5.55 (2.91e-07s) |
^[-+]?[[:digit:]]*\.?[[:digit:]]*$ | 123 | 3.03 (1.34e-07s) | 5.03 (2.22e-07s) | 10.3 (4.53e-07s) | 10.2 (4.48e-07s) | 8.11 (3.58e-07s) | 4.97 (2.19e-07s) | 1 (4.41e-08s) | 5.3 (2.34e-07s) | 5.84 (2.57e-07s) |
^[-+]?[[:digit:]]*\.?[[:digit:]]*$ | +3.14159 | 3 (1.57e-07s) | 4.64 (2.43e-07s) | 10.5 (5.53e-07s) | 9.45 (4.96e-07s) | 10.2 (5.34e-07s) | 4.55 (2.38e-07s) | 1 (5.25e-08s) | 4.64 (2.43e-07s) | 5.36 (2.81e-07s) |
^[-+]?[[:digit:]]*\.?[[:digit:]]*$ | -3.14159 | 2.97 (1.57e-07s) | 4.58 (2.43e-07s) | 10.4 (5.53e-07s) | 9.35 (4.96e-07s) | 10.1 (5.34e-07s) | 4.49 (2.38e-07s) | 1 (5.3e-08s) | 4.94 (2.62e-07s) | 5.39 (2.86e-07s) |
?? Copyright John Maddock 2003
+Use, modification and distribution are subject to the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt)
+ + + + diff --git a/doc/html/boost_regex/background_information/performance.html b/doc/html/boost_regex/background_information/performance.html index 283e19029..f20fffabc 100644 --- a/doc/html/boost_regex/background_information/performance.html +++ b/doc/html/boost_regex/background_information/performance.html @@ -42,6 +42,10 @@ Gcc 3.2 (cygwin) (non-recursive Boost.Regex implementation). +PCRE | "; #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + os << "PCRE JIT | "; +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) os << "Dynamic Xpressive | "; #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + os << "RE2 | "; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) os << "std::regex | "; @@ -362,6 +396,17 @@ void output_html_results(bool show_description, const std::string& tagname) } } #endif +#if defined(BOOST_HAS_PCRE_JIT) + if(time_pcre_jit == true) + { + print_result(os, first->pcre_jit_time, first->factor); + if(first->pcre_jit_time > 0) + { + pcre_jit_total += first->pcre_jit_time / first->factor; + ++pcre_jit_test_count; + } + } +#endif #if defined(BOOST_HAS_XPRESSIVE) if(time_xpressive == true) { @@ -373,6 +418,17 @@ void output_html_results(bool show_description, const std::string& tagname) } } #endif +#if defined(BOOST_HAS_RE2) + if(time_re2 == true) + { + print_result(os, first->re2_time, first->factor); + if(first->re2_time > 0) + { + re2_total += first->re2_time / first->factor; + ++re2_test_count; + } + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -450,12 +506,24 @@ std::string get_averages_table() os << "PCRE | "; } #endif +#ifdef BOOST_HAS_PCRE_JIT + if(time_pcre_jit == true) + { + os << "PCRE JIT | "; + } +#endif #ifdef BOOST_HAS_XPRESSIVE if(time_xpressive == true) { os << "Dynamic Xpressive | "; } #endif +#ifdef BOOST_HAS_RE2 + if(time_re2 == true) + { + os << "google RE2 | "; + } +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) { @@ -473,25 +541,31 @@ std::string get_averages_table() os << "" << (greta_total / greta_test_count) << " | \n"; if(time_safe_greta == true) os << "" << (safe_greta_total / safe_greta_test_count) << " | \n"; -#endif -#if defined(BOOST_HAS_POSIX) - if(time_boost == true) - os << "" << (boost_total / boost_test_count) << " | \n"; #endif if(time_boost == true) os << "" << (boost_total / boost_test_count) << " | \n"; if(time_localised_boost == true) os << "" << (locale_boost_total / locale_boost_test_count) << " | \n"; +#if defined(BOOST_HAS_POSIX) if(time_posix == true) os << "" << (posix_total / posix_test_count) << " | \n"; +#endif #if defined(BOOST_HAS_PCRE) if(time_pcre == true) os << "" << (pcre_total / pcre_test_count) << " | \n"; #endif +#if defined(BOOST_HAS_PCRE_JIT) + if(time_pcre_jit == true) + os << "" << (pcre_jit_total / pcre_jit_test_count) << " | \n"; +#endif #if defined(BOOST_HAS_XPRESSIVE) if(time_xpressive == true) os << "" << (xpressive_total / xpressive_test_count) << " | \n"; #endif +#if defined(BOOST_HAS_RE2) + if(time_re2 == true) + os << "" << (re2_total / re2_test_count) << " | \n"; +#endif #ifndef BOOST_NO_CXX11_HDR_REGEX if(time_std == true) os << "" << (std_total / std_test_count) << " | \n"; diff --git a/performance/input.html b/performance/input.html index 425dedebc..706229392 100644 --- a/performance/input.html +++ b/performance/input.html @@ -60,7 +60,7 @@