1 | /* |
---|
2 | * |
---|
3 | * Copyright (c) 1998-2002 |
---|
4 | * John Maddock |
---|
5 | * |
---|
6 | * Use, modification and distribution are subject to the |
---|
7 | * Boost Software License, Version 1.0. (See accompanying file |
---|
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
---|
9 | * |
---|
10 | */ |
---|
11 | |
---|
12 | #include <string> |
---|
13 | #include <algorithm> |
---|
14 | #include <deque> |
---|
15 | #include <iterator> |
---|
16 | |
---|
17 | #ifdef BOOST_RE_OLD_IOSTREAM |
---|
18 | #include <iostream.h> |
---|
19 | #include <fstream.h> |
---|
20 | #else |
---|
21 | #include <iostream> |
---|
22 | #include <fstream> |
---|
23 | using std::cout; |
---|
24 | using std::cin; |
---|
25 | using std::cerr; |
---|
26 | using std::istream; |
---|
27 | using std::ostream; |
---|
28 | using std::endl; |
---|
29 | using std::ifstream; |
---|
30 | using std::streambuf; |
---|
31 | using std::getline; |
---|
32 | #endif |
---|
33 | |
---|
34 | #include <boost/config.hpp> |
---|
35 | #include <boost/regex.hpp> |
---|
36 | #include <boost/cregex.hpp> |
---|
37 | #include <boost/timer.hpp> |
---|
38 | #include <boost/smart_ptr.hpp> |
---|
39 | |
---|
40 | #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) |
---|
41 | #include <windows.h> |
---|
42 | #endif |
---|
43 | |
---|
44 | #if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi) |
---|
45 | // maybe no Koenig lookup, use using declaration instead: |
---|
46 | using namespace boost; |
---|
47 | #endif |
---|
48 | |
---|
49 | #ifndef BOOST_NO_WREGEX |
---|
50 | ostream& operator << (ostream& os, const std::wstring& s) |
---|
51 | { |
---|
52 | std::wstring::const_iterator i, j; |
---|
53 | i = s.begin(); |
---|
54 | j = s.end(); |
---|
55 | while(i != j) |
---|
56 | { |
---|
57 | os.put(*i); |
---|
58 | ++i; |
---|
59 | } |
---|
60 | return os; |
---|
61 | } |
---|
62 | #endif |
---|
63 | |
---|
64 | template <class S> |
---|
65 | class string_out_iterator |
---|
66 | #ifndef BOOST_NO_STD_ITERATOR |
---|
67 | : public std::iterator<std::output_iterator_tag, void, void, void, void> |
---|
68 | #endif // ndef BOOST_NO_STD_ITERATOR |
---|
69 | { |
---|
70 | #ifdef BOOST_NO_STD_ITERATOR |
---|
71 | typedef std::output_iterator_tag iterator_category; |
---|
72 | typedef void value_type; |
---|
73 | typedef void difference_type; |
---|
74 | typedef void pointer; |
---|
75 | typedef void reference; |
---|
76 | #endif // BOOST_NO_STD_ITERATOR |
---|
77 | |
---|
78 | S* out; |
---|
79 | public: |
---|
80 | string_out_iterator(S& s) : out(&s) {} |
---|
81 | string_out_iterator& operator++() { return *this; } |
---|
82 | string_out_iterator& operator++(int) { return *this; } |
---|
83 | string_out_iterator& operator*() { return *this; } |
---|
84 | string_out_iterator& operator=(typename S::value_type v) |
---|
85 | { |
---|
86 | out->append(1, v); |
---|
87 | return *this; |
---|
88 | } |
---|
89 | }; |
---|
90 | |
---|
91 | namespace boost{ |
---|
92 | #if defined(BOOST_MSVC) || (defined(__BORLANDC__) && (__BORLANDC__ == 0x550)) || defined(__SGI_STL_PORT) |
---|
93 | // |
---|
94 | // problem with std::getline under MSVC6sp3 |
---|
95 | // and C++ Builder 5.5, is this really that hard? |
---|
96 | istream& getline(istream& is, std::string& s) |
---|
97 | { |
---|
98 | s.erase(); |
---|
99 | char c = is.get(); |
---|
100 | while(c != '\n') |
---|
101 | { |
---|
102 | s.append(1, c); |
---|
103 | c = is.get(); |
---|
104 | } |
---|
105 | return is; |
---|
106 | } |
---|
107 | #elif defined(__CYGWIN__) |
---|
108 | istream& getline(istream& is, std::string& s) |
---|
109 | { |
---|
110 | std::getline(is, s); |
---|
111 | if(s.size() && (s[s.size() -1] == '\r')) |
---|
112 | s.erase(s.size() - 1); |
---|
113 | return is; |
---|
114 | } |
---|
115 | #else |
---|
116 | using std::getline; |
---|
117 | #endif |
---|
118 | } |
---|
119 | |
---|
120 | |
---|
121 | int main(int argc, char**argv) |
---|
122 | { |
---|
123 | ifstream ifs; |
---|
124 | streambuf* pbuf = 0; |
---|
125 | if(argc == 2) |
---|
126 | { |
---|
127 | ifs.open(argv[1]); |
---|
128 | if(ifs.bad()) |
---|
129 | { |
---|
130 | cout << "Bad filename: " << argv[1] << endl; |
---|
131 | return -1; |
---|
132 | } |
---|
133 | pbuf = cin.rdbuf(ifs.rdbuf()); |
---|
134 | } |
---|
135 | |
---|
136 | boost::regex ex; |
---|
137 | boost::match_results<std::string::const_iterator> sm; |
---|
138 | #ifndef BOOST_NO_WREGEX |
---|
139 | std::wstring ws1, ws2; |
---|
140 | boost::wregex wex; |
---|
141 | boost::match_results<std::wstring::const_iterator> wsm; |
---|
142 | #endif |
---|
143 | boost::match_results<std::deque<char>::iterator> dm; |
---|
144 | std::string s1, s2, ts; |
---|
145 | std::deque<char> ds; |
---|
146 | boost::regex_t r; |
---|
147 | boost::scoped_array<boost::regmatch_t> matches; |
---|
148 | std::size_t nsubs; |
---|
149 | boost::timer t; |
---|
150 | double tim; |
---|
151 | bool result; |
---|
152 | int iters = 100; |
---|
153 | double wait_time = (std::min)(t.elapsed_min() * 1000, 1.0); |
---|
154 | |
---|
155 | while(true) |
---|
156 | { |
---|
157 | cout << "Enter expression (or \"quit\" to exit): "; |
---|
158 | boost::getline(cin, s1); |
---|
159 | if(argc == 2) |
---|
160 | cout << endl << s1 << endl; |
---|
161 | if(s1 == "quit") |
---|
162 | break; |
---|
163 | #ifndef BOOST_NO_WREGEX |
---|
164 | ws1.erase(); |
---|
165 | std::copy(s1.begin(), s1.end(), string_out_iterator<std::wstring>(ws1)); |
---|
166 | #endif |
---|
167 | try{ |
---|
168 | ex.assign(s1); |
---|
169 | #ifndef BOOST_NO_WREGEX |
---|
170 | wex.assign(ws1); |
---|
171 | #endif |
---|
172 | } |
---|
173 | catch(std::exception& e) |
---|
174 | { |
---|
175 | cout << "Error in expression: \"" << e.what() << "\"" << endl; |
---|
176 | continue; |
---|
177 | } |
---|
178 | int code = regcomp(&r, s1.c_str(), boost::REG_PERL); |
---|
179 | if(code != 0) |
---|
180 | { |
---|
181 | char buf[256]; |
---|
182 | regerror(code, &r, buf, 256); |
---|
183 | cout << "regcomp error: \"" << buf << "\"" << endl; |
---|
184 | continue; |
---|
185 | } |
---|
186 | nsubs = r.re_nsub + 1; |
---|
187 | matches.reset(new boost::regmatch_t[nsubs]); |
---|
188 | |
---|
189 | while(true) |
---|
190 | { |
---|
191 | cout << "Enter string to search (or \"quit\" to exit): "; |
---|
192 | boost::getline(cin, s2); |
---|
193 | if(argc == 2) |
---|
194 | cout << endl << s2 << endl; |
---|
195 | if(s2 == "quit") |
---|
196 | break; |
---|
197 | |
---|
198 | #ifndef BOOST_NO_WREGEX |
---|
199 | ws2.erase(); |
---|
200 | std::copy(s2.begin(), s2.end(), string_out_iterator<std::wstring>(ws2)); |
---|
201 | #endif |
---|
202 | ds.erase(ds.begin(), ds.end()); |
---|
203 | std::copy(s2.begin(), s2.end(), std::back_inserter(ds)); |
---|
204 | |
---|
205 | int i; |
---|
206 | iters = 10; |
---|
207 | tim = 1.1; |
---|
208 | |
---|
209 | #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) |
---|
210 | MSG msg; |
---|
211 | PeekMessage(&msg, 0, 0, 0, 0); |
---|
212 | Sleep(0); |
---|
213 | #endif |
---|
214 | |
---|
215 | // cache load: |
---|
216 | regex_search(s2, sm, ex); |
---|
217 | |
---|
218 | // measure time interval for basic_regex<char> |
---|
219 | do{ |
---|
220 | iters *= (tim > 0.001) ? (1.1/tim) : 100; |
---|
221 | t.restart(); |
---|
222 | for(i =0; i < iters; ++i) |
---|
223 | { |
---|
224 | result = regex_search(s2, sm, ex); |
---|
225 | } |
---|
226 | tim = t.elapsed(); |
---|
227 | }while(tim < wait_time); |
---|
228 | |
---|
229 | cout << "regex time: " << (tim * 1000000 / iters) << "us" << endl; |
---|
230 | if(result) |
---|
231 | { |
---|
232 | for(i = 0; i < sm.size(); ++i) |
---|
233 | { |
---|
234 | ts = sm[i]; |
---|
235 | cout << "\tmatch " << i << ": \""; |
---|
236 | cout << ts; |
---|
237 | cout << "\" (matched=" << sm[i].matched << ")" << endl; |
---|
238 | } |
---|
239 | cout << "\tmatch $`: \""; |
---|
240 | cout << std::string(sm[-1]); |
---|
241 | cout << "\" (matched=" << sm[-1].matched << ")" << endl; |
---|
242 | cout << "\tmatch $': \""; |
---|
243 | cout << std::string(sm[-2]); |
---|
244 | cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl; |
---|
245 | } |
---|
246 | |
---|
247 | #ifndef BOOST_NO_WREGEX |
---|
248 | // measure time interval for boost::wregex |
---|
249 | iters = 10; |
---|
250 | tim = 1.1; |
---|
251 | // cache load: |
---|
252 | regex_search(ws2, wsm, wex); |
---|
253 | do{ |
---|
254 | iters *= (tim > 0.001) ? (1.1/tim) : 100; |
---|
255 | t.restart(); |
---|
256 | for(i = 0; i < iters; ++i) |
---|
257 | { |
---|
258 | result = regex_search(ws2, wsm, wex); |
---|
259 | } |
---|
260 | tim = t.elapsed(); |
---|
261 | }while(tim < wait_time); |
---|
262 | cout << "wregex time: " << (tim * 1000000 / iters) << "us" << endl; |
---|
263 | if(result) |
---|
264 | { |
---|
265 | std::wstring tw; |
---|
266 | for(i = 0; i < wsm.size(); ++i) |
---|
267 | { |
---|
268 | tw.erase(); |
---|
269 | std::copy(wsm[i].first, wsm[i].second, string_out_iterator<std::wstring>(tw)); |
---|
270 | cout << "\tmatch " << i << ": \"" << tw; |
---|
271 | cout << "\" (matched=" << sm[i].matched << ")" << endl; |
---|
272 | } |
---|
273 | cout << "\tmatch $`: \""; |
---|
274 | tw.erase(); |
---|
275 | std::copy(wsm[-1].first, wsm[-1].second, string_out_iterator<std::wstring>(tw)); |
---|
276 | cout << tw; |
---|
277 | cout << "\" (matched=" << sm[-1].matched << ")" << endl; |
---|
278 | cout << "\tmatch $': \""; |
---|
279 | tw.erase(); |
---|
280 | std::copy(wsm[-2].first, wsm[-2].second, string_out_iterator<std::wstring>(tw)); |
---|
281 | cout << tw; |
---|
282 | cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl; |
---|
283 | } |
---|
284 | #endif |
---|
285 | |
---|
286 | // measure time interval for basic_regex<char> using a deque |
---|
287 | iters = 10; |
---|
288 | tim = 1.1; |
---|
289 | // cache load: |
---|
290 | regex_search(ds.begin(), ds.end(), dm, ex); |
---|
291 | do{ |
---|
292 | iters *= (tim > 0.001) ? (1.1/tim) : 100; |
---|
293 | t.restart(); |
---|
294 | for(i = 0; i < iters; ++i) |
---|
295 | { |
---|
296 | result = regex_search(ds.begin(), ds.end(), dm, ex); |
---|
297 | } |
---|
298 | tim = t.elapsed(); |
---|
299 | }while(tim < wait_time); |
---|
300 | cout << "regex time (search over std::deque<char>): " << (tim * 1000000 / iters) << "us" << endl; |
---|
301 | |
---|
302 | if(result) |
---|
303 | { |
---|
304 | for(i = 0; i < dm.size(); ++i) |
---|
305 | { |
---|
306 | ts.erase(); |
---|
307 | std::copy(dm[i].first, dm[i].second, string_out_iterator<std::string>(ts)); |
---|
308 | cout << "\tmatch " << i << ": \"" << ts; |
---|
309 | cout << "\" (matched=" << sm[i].matched << ")" << endl; |
---|
310 | } |
---|
311 | cout << "\tmatch $`: \""; |
---|
312 | ts.erase(); |
---|
313 | std::copy(dm[-1].first, dm[-1].second, string_out_iterator<std::string>(ts)); |
---|
314 | cout << ts; |
---|
315 | cout << "\" (matched=" << sm[-1].matched << ")" << endl; |
---|
316 | cout << "\tmatch $': \""; |
---|
317 | ts.erase(); |
---|
318 | std::copy(dm[-2].first, dm[-2].second, string_out_iterator<std::string>(ts)); |
---|
319 | cout << ts; |
---|
320 | cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl; |
---|
321 | } |
---|
322 | |
---|
323 | // measure time interval for POSIX matcher: |
---|
324 | iters = 10; |
---|
325 | tim = 1.1; |
---|
326 | // cache load: |
---|
327 | regexec(&r, s2.c_str(), nsubs, matches.get(), 0); |
---|
328 | do{ |
---|
329 | iters *= (tim > 0.001) ? (1.1/tim) : 100; |
---|
330 | t.restart(); |
---|
331 | for(i = 0; i < iters; ++i) |
---|
332 | { |
---|
333 | result = regexec(&r, s2.c_str(), nsubs, matches.get(), 0); |
---|
334 | } |
---|
335 | tim = t.elapsed(); |
---|
336 | }while(tim < wait_time); |
---|
337 | cout << "POSIX regexec time: " << (tim * 1000000 / iters) << "us" << endl; |
---|
338 | |
---|
339 | if(result == 0) |
---|
340 | { |
---|
341 | for(i = 0; i < nsubs; ++i) |
---|
342 | { |
---|
343 | if(matches[i].rm_so >= 0) |
---|
344 | { |
---|
345 | ts.assign(s2.begin() + matches[i].rm_so, s2.begin() + matches[i].rm_eo); |
---|
346 | cout << "\tmatch " << i << ": \"" << ts << "\" (matched=" << (matches[i].rm_so != -1) << ")"<< endl; |
---|
347 | } |
---|
348 | else |
---|
349 | cout << "\tmatch " << i << ": \"\" (matched=" << (matches[i].rm_so != -1) << ")" << endl; // no match |
---|
350 | } |
---|
351 | cout << "\tmatch $`: \""; |
---|
352 | ts.erase(); |
---|
353 | ts.assign(s2.begin(), s2.begin() + matches[0].rm_so); |
---|
354 | cout << ts; |
---|
355 | cout << "\" (matched=" << (matches[0].rm_so != 0) << ")" << endl; |
---|
356 | cout << "\tmatch $': \""; |
---|
357 | ts.erase(); |
---|
358 | ts.assign(s2.begin() + matches[0].rm_eo, s2.end()); |
---|
359 | cout << ts; |
---|
360 | cout << "\" (matched=" << (matches[0].rm_eo != s2.size()) << ")" << endl << endl; |
---|
361 | } |
---|
362 | } |
---|
363 | regfree(&r); |
---|
364 | } |
---|
365 | |
---|
366 | if(pbuf) |
---|
367 | { |
---|
368 | cin.rdbuf(pbuf); |
---|
369 | ifs.close(); |
---|
370 | } |
---|
371 | |
---|
372 | return 0; |
---|
373 | } |
---|
374 | |
---|
375 | #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) |
---|
376 | #pragma message(lib, "user32.lib") |
---|
377 | #endif |
---|
378 | |
---|
379 | |
---|
380 | |
---|
381 | |
---|
382 | |
---|
383 | |
---|
384 | |
---|
385 | |
---|
386 | |
---|
387 | |
---|