Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/boost_1_33_1/libs/regex/performance/main.cpp @ 13

Last change on this file since 13 was 12, checked in by landauf, 17 years ago

added boost

File size: 9.8 KB
Line 
1/*
2 *
3 * Copyright (c) 2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12#include <iostream>
13#include <fstream>
14#include <iterator>
15#include <cassert>
16#include <boost/test/execution_monitor.hpp>
17#include "regex_comparison.hpp"
18
19
20void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase)
21{
22   double time;
23   results r(re, description);
24
25   std::cout << "Testing: \"" << re << "\" against \"" << description << "\"" << std::endl;
26
27#ifdef BOOST_HAS_GRETA
28   if(time_greta == true)
29   {
30      time = g::time_match(re, text, icase);
31      r.greta_time = time;
32      std::cout << "\tGRETA regex: " << time << "s\n";
33   }
34   if(time_safe_greta == true)
35   {
36      time = gs::time_match(re, text, icase);
37      r.safe_greta_time = time;
38      std::cout << "\tSafe GRETA regex: " << time << "s\n";
39   }
40#endif
41   if(time_boost == true)
42   {
43      time = b::time_match(re, text, icase);
44      r.boost_time = time;
45      std::cout << "\tBoost regex: " << time << "s\n";
46   }
47   if(time_localised_boost == true)
48   {
49      time = bl::time_match(re, text, icase);
50      r.localised_boost_time = time;
51      std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
52   }
53#ifdef BOOST_HAS_POSIX
54   if(time_posix == true)
55   {
56      time = posix::time_match(re, text, icase);
57      r.posix_time = time;
58      std::cout << "\tPOSIX regex: " << time << "s\n";
59   }
60#endif
61#ifdef BOOST_HAS_PCRE
62   if(time_pcre == true)
63   {
64      time = pcr::time_match(re, text, icase);
65      r.pcre_time = time;
66      std::cout << "\tPCRE regex: " << time << "s\n";
67   }
68#endif
69#ifdef BOOST_HAS_XPRESSIVE
70   if(time_xpressive == true)
71   {
72      time = dxpr::time_match(re, text, icase);
73      r.xpressive_time = time;
74      std::cout << "\txpressive regex: " << time << "s\n";
75   }
76#endif
77   r.finalise();
78   result_list.push_back(r);
79}
80
81void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase)
82{
83   std::cout << "Testing: " << re << std::endl;
84
85   double time;
86   results r(re, description);
87
88#ifdef BOOST_HAS_GRETA
89   if(time_greta == true)
90   {
91      time = g::time_find_all(re, text, icase);
92      r.greta_time = time;
93      std::cout << "\tGRETA regex: " << time << "s\n";
94   }
95   if(time_safe_greta == true)
96   {
97      time = gs::time_find_all(re, text, icase);
98      r.safe_greta_time = time;
99      std::cout << "\tSafe GRETA regex: " << time << "s\n";
100   }
101#endif
102   if(time_boost == true)
103   {
104      time = b::time_find_all(re, text, icase);
105      r.boost_time = time;
106      std::cout << "\tBoost regex: " << time << "s\n";
107   }
108   if(time_localised_boost == true)
109   {
110      time = bl::time_find_all(re, text, icase);
111      r.localised_boost_time = time;
112      std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
113   }
114#ifdef BOOST_HAS_POSIX
115   if(time_posix == true)
116   {
117      time = posix::time_find_all(re, text, icase);
118      r.posix_time = time;
119      std::cout << "\tPOSIX regex: " << time << "s\n";
120   }
121#endif
122#ifdef BOOST_HAS_PCRE
123   if(time_pcre == true)
124   {
125      time = pcr::time_find_all(re, text, icase);
126      r.pcre_time = time;
127      std::cout << "\tPCRE regex: " << time << "s\n";
128   }
129#endif
130#ifdef BOOST_HAS_XPRESSIVE
131   if(time_xpressive == true)
132   {
133      time = dxpr::time_find_all(re, text, icase);
134      r.xpressive_time = time;
135      std::cout << "\txpressive regex: " << time << "s\n";
136   }
137#endif
138   r.finalise();
139   result_list.push_back(r);
140}
141
142int cpp_main(int argc, char * argv[])
143{
144   // start by processing the command line args:
145   if(argc < 2)
146      return show_usage();
147   int result = 0;
148   for(int c = 1; c < argc; ++c)
149   {
150      result += handle_argument(argv[c]);
151   }
152   if(result)
153      return result;
154
155   if(test_matches)
156   {
157      // start with a simple test, this is basically a measure of the minimal overhead
158      // involved in calling a regex matcher:
159      test_match("abc", "abc");
160      // these are from the regex docs:
161      test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string");
162      test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456");
163      // these are from http://www.regxlib.com/
164      test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk");
165      test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu");
166      test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv");
167      test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ");
168      test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA");
169      test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ");
170      test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001");
171      test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001");
172      test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123");
173      test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159");
174      test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159");
175   }
176   output_html_results(true, "%short_matches%");
177
178   std::string file_contents;
179
180   if(test_code)
181   {
182      load_file(file_contents, "../../../boost/crc.hpp");
183
184      const char* highlight_expression = // preprocessor directives: index 1
185                              "(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|"
186                              // comment: index 2
187                              "(//[^\\n]*|/\\*.*?\\*/)|"
188                              // literals: index 3
189                              "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
190                              // string literals: index 4
191                              "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
192                              // keywords: index 5
193                              "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
194                              "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
195                              "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
196                              "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
197                              "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
198                              "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
199                              "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
200                              "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
201                              "|using|virtual|void|volatile|wchar_t|while)\\>"
202                              ;
203
204      const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" 
205                   "(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?" 
206                   "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" 
207                   "(\\{|:[^;\\{()]*\\{)";
208
209      const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)";
210      const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)";
211
212
213      test_find_all(class_expression, file_contents);
214      test_find_all(highlight_expression, file_contents);
215      test_find_all(include_expression, file_contents);
216      test_find_all(boost_include_expression, file_contents);
217   }
218   output_html_results(false, "%code_search%");
219
220   if(test_html)
221   {
222      load_file(file_contents, "../../../libs/libraries.htm");
223      test_find_all("beman|john|dave", file_contents, true);
224      test_find_all("<p>.*?</p>", file_contents, true);
225      test_find_all("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
226      test_find_all("<h[12345678][^>]*>.*?</h[12345678]>", file_contents, true);
227      test_find_all("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
228      test_find_all("<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents, true);
229   }
230   output_html_results(false, "%html_search%");
231
232   if(test_short_twain)
233   {
234      load_file(file_contents, "short_twain.txt");
235
236      test_find_all("Twain", file_contents);
237      test_find_all("Huck[[:alpha:]]+", file_contents);
238      test_find_all("[[:alpha:]]+ing", file_contents);
239      test_find_all("^[^\n]*?Twain", file_contents);
240      test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
241      test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
242   }
243   output_html_results(false, "%short_twain_search%");
244
245   if(test_long_twain)
246   {
247      load_file(file_contents, "mtent13.txt");
248
249      test_find_all("Twain", file_contents);
250      test_find_all("Huck[[:alpha:]]+", file_contents);
251      test_find_all("[[:alpha:]]+ing", file_contents);
252      test_find_all("^[^\n]*?Twain", file_contents);
253      test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
254      time_posix = false;
255      test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
256      time_posix = true;
257   }   
258   output_html_results(false, "%long_twain_search%");
259
260   output_final_html();
261   return 0;
262}
263
264#include <boost/test/included/prg_exec_monitor.hpp>
265
Note: See TracBrowser for help on using the repository browser.