1 | /* |
---|
2 | * |
---|
3 | * Copyright (c) 2002 |
---|
4 | * John Maddock |
---|
5 | * |
---|
6 | * Use, modification and distribution are subject to the |
---|
7 | * Boost Software License, Version 1.0. (See accompanying file |
---|
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
---|
9 | * |
---|
10 | */ |
---|
11 | |
---|
12 | #include <iostream> |
---|
13 | #include <fstream> |
---|
14 | #include <iterator> |
---|
15 | #include <cassert> |
---|
16 | #include <boost/test/execution_monitor.hpp> |
---|
17 | #include "regex_comparison.hpp" |
---|
18 | |
---|
19 | |
---|
20 | void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase) |
---|
21 | { |
---|
22 | double time; |
---|
23 | results r(re, description); |
---|
24 | |
---|
25 | std::cout << "Testing: \"" << re << "\" against \"" << description << "\"" << std::endl; |
---|
26 | |
---|
27 | #ifdef BOOST_HAS_GRETA |
---|
28 | if(time_greta == true) |
---|
29 | { |
---|
30 | time = g::time_match(re, text, icase); |
---|
31 | r.greta_time = time; |
---|
32 | std::cout << "\tGRETA regex: " << time << "s\n"; |
---|
33 | } |
---|
34 | if(time_safe_greta == true) |
---|
35 | { |
---|
36 | time = gs::time_match(re, text, icase); |
---|
37 | r.safe_greta_time = time; |
---|
38 | std::cout << "\tSafe GRETA regex: " << time << "s\n"; |
---|
39 | } |
---|
40 | #endif |
---|
41 | if(time_boost == true) |
---|
42 | { |
---|
43 | time = b::time_match(re, text, icase); |
---|
44 | r.boost_time = time; |
---|
45 | std::cout << "\tBoost regex: " << time << "s\n"; |
---|
46 | } |
---|
47 | if(time_localised_boost == true) |
---|
48 | { |
---|
49 | time = bl::time_match(re, text, icase); |
---|
50 | r.localised_boost_time = time; |
---|
51 | std::cout << "\tBoost regex (C++ locale): " << time << "s\n"; |
---|
52 | } |
---|
53 | #ifdef BOOST_HAS_POSIX |
---|
54 | if(time_posix == true) |
---|
55 | { |
---|
56 | time = posix::time_match(re, text, icase); |
---|
57 | r.posix_time = time; |
---|
58 | std::cout << "\tPOSIX regex: " << time << "s\n"; |
---|
59 | } |
---|
60 | #endif |
---|
61 | #ifdef BOOST_HAS_PCRE |
---|
62 | if(time_pcre == true) |
---|
63 | { |
---|
64 | time = pcr::time_match(re, text, icase); |
---|
65 | r.pcre_time = time; |
---|
66 | std::cout << "\tPCRE regex: " << time << "s\n"; |
---|
67 | } |
---|
68 | #endif |
---|
69 | #ifdef BOOST_HAS_XPRESSIVE |
---|
70 | if(time_xpressive == true) |
---|
71 | { |
---|
72 | time = dxpr::time_match(re, text, icase); |
---|
73 | r.xpressive_time = time; |
---|
74 | std::cout << "\txpressive regex: " << time << "s\n"; |
---|
75 | } |
---|
76 | #endif |
---|
77 | r.finalise(); |
---|
78 | result_list.push_back(r); |
---|
79 | } |
---|
80 | |
---|
81 | void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase) |
---|
82 | { |
---|
83 | std::cout << "Testing: " << re << std::endl; |
---|
84 | |
---|
85 | double time; |
---|
86 | results r(re, description); |
---|
87 | |
---|
88 | #ifdef BOOST_HAS_GRETA |
---|
89 | if(time_greta == true) |
---|
90 | { |
---|
91 | time = g::time_find_all(re, text, icase); |
---|
92 | r.greta_time = time; |
---|
93 | std::cout << "\tGRETA regex: " << time << "s\n"; |
---|
94 | } |
---|
95 | if(time_safe_greta == true) |
---|
96 | { |
---|
97 | time = gs::time_find_all(re, text, icase); |
---|
98 | r.safe_greta_time = time; |
---|
99 | std::cout << "\tSafe GRETA regex: " << time << "s\n"; |
---|
100 | } |
---|
101 | #endif |
---|
102 | if(time_boost == true) |
---|
103 | { |
---|
104 | time = b::time_find_all(re, text, icase); |
---|
105 | r.boost_time = time; |
---|
106 | std::cout << "\tBoost regex: " << time << "s\n"; |
---|
107 | } |
---|
108 | if(time_localised_boost == true) |
---|
109 | { |
---|
110 | time = bl::time_find_all(re, text, icase); |
---|
111 | r.localised_boost_time = time; |
---|
112 | std::cout << "\tBoost regex (C++ locale): " << time << "s\n"; |
---|
113 | } |
---|
114 | #ifdef BOOST_HAS_POSIX |
---|
115 | if(time_posix == true) |
---|
116 | { |
---|
117 | time = posix::time_find_all(re, text, icase); |
---|
118 | r.posix_time = time; |
---|
119 | std::cout << "\tPOSIX regex: " << time << "s\n"; |
---|
120 | } |
---|
121 | #endif |
---|
122 | #ifdef BOOST_HAS_PCRE |
---|
123 | if(time_pcre == true) |
---|
124 | { |
---|
125 | time = pcr::time_find_all(re, text, icase); |
---|
126 | r.pcre_time = time; |
---|
127 | std::cout << "\tPCRE regex: " << time << "s\n"; |
---|
128 | } |
---|
129 | #endif |
---|
130 | #ifdef BOOST_HAS_XPRESSIVE |
---|
131 | if(time_xpressive == true) |
---|
132 | { |
---|
133 | time = dxpr::time_find_all(re, text, icase); |
---|
134 | r.xpressive_time = time; |
---|
135 | std::cout << "\txpressive regex: " << time << "s\n"; |
---|
136 | } |
---|
137 | #endif |
---|
138 | r.finalise(); |
---|
139 | result_list.push_back(r); |
---|
140 | } |
---|
141 | |
---|
142 | int cpp_main(int argc, char * argv[]) |
---|
143 | { |
---|
144 | // start by processing the command line args: |
---|
145 | if(argc < 2) |
---|
146 | return show_usage(); |
---|
147 | int result = 0; |
---|
148 | for(int c = 1; c < argc; ++c) |
---|
149 | { |
---|
150 | result += handle_argument(argv[c]); |
---|
151 | } |
---|
152 | if(result) |
---|
153 | return result; |
---|
154 | |
---|
155 | if(test_matches) |
---|
156 | { |
---|
157 | // start with a simple test, this is basically a measure of the minimal overhead |
---|
158 | // involved in calling a regex matcher: |
---|
159 | test_match("abc", "abc"); |
---|
160 | // these are from the regex docs: |
---|
161 | test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string"); |
---|
162 | test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456"); |
---|
163 | // these are from http://www.regxlib.com/ |
---|
164 | test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk"); |
---|
165 | test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu"); |
---|
166 | test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv"); |
---|
167 | test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ"); |
---|
168 | test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA"); |
---|
169 | test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ"); |
---|
170 | test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001"); |
---|
171 | test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001"); |
---|
172 | test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123"); |
---|
173 | test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159"); |
---|
174 | test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159"); |
---|
175 | } |
---|
176 | output_html_results(true, "%short_matches%"); |
---|
177 | |
---|
178 | std::string file_contents; |
---|
179 | |
---|
180 | if(test_code) |
---|
181 | { |
---|
182 | load_file(file_contents, "../../../boost/crc.hpp"); |
---|
183 | |
---|
184 | const char* highlight_expression = // preprocessor directives: index 1 |
---|
185 | "(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|" |
---|
186 | // comment: index 2 |
---|
187 | "(//[^\\n]*|/\\*.*?\\*/)|" |
---|
188 | // literals: index 3 |
---|
189 | "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|" |
---|
190 | // string literals: index 4 |
---|
191 | "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|" |
---|
192 | // keywords: index 5 |
---|
193 | "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import" |
---|
194 | "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall" |
---|
195 | "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool" |
---|
196 | "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete" |
---|
197 | "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto" |
---|
198 | "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected" |
---|
199 | "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast" |
---|
200 | "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned" |
---|
201 | "|using|virtual|void|volatile|wchar_t|while)\\>" |
---|
202 | ; |
---|
203 | |
---|
204 | const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" |
---|
205 | "(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?" |
---|
206 | "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" |
---|
207 | "(\\{|:[^;\\{()]*\\{)"; |
---|
208 | |
---|
209 | const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)"; |
---|
210 | const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)"; |
---|
211 | |
---|
212 | |
---|
213 | test_find_all(class_expression, file_contents); |
---|
214 | test_find_all(highlight_expression, file_contents); |
---|
215 | test_find_all(include_expression, file_contents); |
---|
216 | test_find_all(boost_include_expression, file_contents); |
---|
217 | } |
---|
218 | output_html_results(false, "%code_search%"); |
---|
219 | |
---|
220 | if(test_html) |
---|
221 | { |
---|
222 | load_file(file_contents, "../../../libs/libraries.htm"); |
---|
223 | test_find_all("beman|john|dave", file_contents, true); |
---|
224 | test_find_all("<p>.*?</p>", file_contents, true); |
---|
225 | test_find_all("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); |
---|
226 | test_find_all("<h[12345678][^>]*>.*?</h[12345678]>", file_contents, true); |
---|
227 | test_find_all("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); |
---|
228 | test_find_all("<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents, true); |
---|
229 | } |
---|
230 | output_html_results(false, "%html_search%"); |
---|
231 | |
---|
232 | if(test_short_twain) |
---|
233 | { |
---|
234 | load_file(file_contents, "short_twain.txt"); |
---|
235 | |
---|
236 | test_find_all("Twain", file_contents); |
---|
237 | test_find_all("Huck[[:alpha:]]+", file_contents); |
---|
238 | test_find_all("[[:alpha:]]+ing", file_contents); |
---|
239 | test_find_all("^[^\n]*?Twain", file_contents); |
---|
240 | test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); |
---|
241 | test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); |
---|
242 | } |
---|
243 | output_html_results(false, "%short_twain_search%"); |
---|
244 | |
---|
245 | if(test_long_twain) |
---|
246 | { |
---|
247 | load_file(file_contents, "mtent13.txt"); |
---|
248 | |
---|
249 | test_find_all("Twain", file_contents); |
---|
250 | test_find_all("Huck[[:alpha:]]+", file_contents); |
---|
251 | test_find_all("[[:alpha:]]+ing", file_contents); |
---|
252 | test_find_all("^[^\n]*?Twain", file_contents); |
---|
253 | test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); |
---|
254 | time_posix = false; |
---|
255 | test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); |
---|
256 | time_posix = true; |
---|
257 | } |
---|
258 | output_html_results(false, "%long_twain_search%"); |
---|
259 | |
---|
260 | output_final_html(); |
---|
261 | return 0; |
---|
262 | } |
---|
263 | |
---|
264 | #include <boost/test/included/prg_exec_monitor.hpp> |
---|
265 | |
---|