Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/boost_1_34_1/libs/regex/example/snippets/icu_example.cpp @ 30

Last change on this file since 30 was 29, checked in by landauf, 17 years ago

updated boost from 1_33_1 to 1_34_1

File size: 5.0 KB
Line 
1/*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13  *   LOCATION:    see http://www.boost.org for most recent version.
14  *   FILE         mfc_example.cpp
15  *   VERSION      see <boost/version.hpp>
16  *   DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
17  */
18
19#include <boost/regex/config.hpp>
20
21#ifdef BOOST_HAS_ICU
22
23#include <boost/regex/icu.hpp>
24#include <iostream>
25#include <assert.h>
26
27//
28// Find out if *password* meets our password requirements,
29// as defined by the regular expression *requirements*.
30//
31bool is_valid_password(const UnicodeString& password, const UnicodeString& requirements)
32{
33   return boost::u32regex_match(password, boost::make_u32regex(requirements));
34}
35
36//
37// Extract filename part of a path from a UTF-8 encoded std::string and return the result
38// as another std::string:
39//
40std::string get_filename(const std::string& path)
41{
42   boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
43   boost::smatch what;
44   if(boost::u32regex_match(path, what, r))
45   {
46      // extract $1 as a CString:
47      return what.str(1);
48   }
49   else
50   {
51      throw std::runtime_error("Invalid pathname");
52   }
53}
54
55UnicodeString extract_greek(const UnicodeString& text)
56{
57   // searches through some UTF-16 encoded text for a block encoded in Greek,
58   // this expression is imperfect, but the best we can do for now - searching
59   // for specific scripts is actually pretty hard to do right.
60   boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
61   boost::u16match what;
62   if(boost::u32regex_search(text, what, r))
63   {
64      // extract $0 as a CString:
65      return UnicodeString(what[0].first, what.length(0));
66   }
67   else
68   {
69      throw std::runtime_error("No Greek found!");
70   }
71}
72
73void enumerate_currencies(const std::string& text)
74{
75   // enumerate and print all the currency symbols, along
76   // with any associated numeric values:
77   const char* re = 
78      "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
79      "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
80      "(?(1)"
81         "|(?(2)"
82            "[[:Cf:][:Cc:][:Z*:]]*"
83         ")"
84         "[[:Sc:]]"
85      ")";
86   boost::u32regex r = boost::make_u32regex(re);
87   boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
88   while(i != j)
89   {
90      std::cout << (*i)[0] << std::endl;
91      ++i;
92   }
93}
94
95void enumerate_currencies2(const std::string& text)
96{
97   // enumerate and print all the currency symbols, along
98   // with any associated numeric values:
99   const char* re = 
100      "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
101      "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
102      "(?(1)"
103         "|(?(2)"
104            "[[:Cf:][:Cc:][:Z*:]]*"
105         ")"
106         "[[:Sc:]]"
107      ")";
108   boost::u32regex r = boost::make_u32regex(re);
109   boost::u32regex_token_iterator<std::string::const_iterator> 
110      i(boost::make_u32regex_token_iterator(text, r, 1)), j;
111   while(i != j)
112   {
113      std::cout << *i << std::endl;
114      ++i;
115   }
116}
117
118
119//
120// Take a credit card number as a string of digits,
121// and reformat it as a human readable string with "-"
122// separating each group of four digit;,
123// note that we're mixing a UTF-32 regex, with a UTF-16
124// string and a UTF-8 format specifier, and it still all
125// just works:
126//
127const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
128const char* human_format = "$1-$2-$3-$4";
129
130UnicodeString human_readable_card_number(const UnicodeString& s)
131{
132   return boost::u32regex_replace(s, e, human_format);
133}
134
135
136int main()
137{
138   // password checks using u32regex_match:
139   UnicodeString pwd = "abcDEF---";
140   UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
141   bool b = is_valid_password(pwd, pwd_check);
142   assert(b);
143   pwd = "abcD-";
144   b = is_valid_password(pwd, pwd_check);
145   assert(!b);
146   // filename extraction with u32regex_match:
147   std::string file = "abc.hpp";
148   file = get_filename(file);
149   assert(file == "abc.hpp");
150   file = "c:\\a\\b\\c\\d.h";
151   file = get_filename(file);
152   assert(file == "d.h");
153
154   // Greek text extraction with u32regex_search:
155   UnicodeString text = L"Some where in \x0391\x039D\x0395\x0398\x0391 2004";
156   UnicodeString greek = extract_greek(text);
157   assert(greek == L"\x0391\x039D\x0395\x0398\x0391 2004");
158
159   // extract currency symbols with associated value, use iterator interface:
160   std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the £ sign encoded in UTF-8
161   enumerate_currencies(text2);
162   enumerate_currencies2(text2);
163
164   UnicodeString credit_card_number = "1234567887654321";
165   credit_card_number = human_readable_card_number(credit_card_number);
166   assert(credit_card_number == "1234-5678-8765-4321");
167   return 0;
168}
169
170#else
171
172#include <iostream>
173
174int main()
175{
176   std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
177   return 0;
178}
179
180
181#endif
182
Note: See TracBrowser for help on using the repository browser.