1 | #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
---|
2 | #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ |
---|
3 | |
---|
4 | /* Copyright (c) 2004-2005 CrystalClear Software, Inc. |
---|
5 | * Use, modification and distribution is subject to the |
---|
6 | * Boost Software License, Version 1.0. (See accompanying |
---|
7 | * file LICENSE-1.0 or http://www.boost.org/LICENSE-1.0) |
---|
8 | * Author: Jeff Garland, Bart Garst |
---|
9 | * $Date: 2005/07/01 02:58:57 $ |
---|
10 | */ |
---|
11 | |
---|
12 | |
---|
13 | #include "boost/lexical_cast.hpp" //error without? |
---|
14 | #include "boost/algorithm/string/case_conv.hpp" |
---|
15 | #include <map> |
---|
16 | #include <string> |
---|
17 | #include <vector> |
---|
18 | #include <algorithm> |
---|
19 | |
---|
20 | namespace boost { namespace date_time { |
---|
21 | |
---|
22 | |
---|
23 | template<typename charT> |
---|
24 | struct parse_match_result |
---|
25 | { |
---|
26 | parse_match_result() : |
---|
27 | match_depth(0), |
---|
28 | current_match(-1)// -1 is match_not-found value |
---|
29 | {} |
---|
30 | typedef std::basic_string<charT> string_type; |
---|
31 | string_type remaining() const |
---|
32 | { |
---|
33 | if (match_depth == cache.size()) { |
---|
34 | return string_type(); |
---|
35 | } |
---|
36 | if (current_match == -1) { |
---|
37 | return cache; |
---|
38 | } |
---|
39 | //some of the cache was used return the rest |
---|
40 | return string_type(cache, match_depth); |
---|
41 | } |
---|
42 | charT last_char() const |
---|
43 | { |
---|
44 | return cache[cache.size()-1]; |
---|
45 | } |
---|
46 | //! Returns true if more characters were parsed than was necessary |
---|
47 | /*! Should be used in conjunction with last_char() |
---|
48 | * to get the remaining character. */ |
---|
49 | bool has_remaining() const |
---|
50 | { |
---|
51 | return (cache.size() > match_depth); |
---|
52 | } |
---|
53 | |
---|
54 | // cache will hold characters that have been read from the stream |
---|
55 | string_type cache; |
---|
56 | unsigned short match_depth; |
---|
57 | short current_match; |
---|
58 | static const short PARSE_ERROR; |
---|
59 | }; |
---|
60 | template<typename charT> |
---|
61 | const short parse_match_result<charT>::PARSE_ERROR = -1; |
---|
62 | |
---|
63 | //for debug -- really only char streams... |
---|
64 | template<typename charT> |
---|
65 | std::basic_ostream<charT>& |
---|
66 | operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr) |
---|
67 | { |
---|
68 | os << "cm: " << mr.current_match |
---|
69 | << " C: '" << mr.cache |
---|
70 | << "' md: " << mr.match_depth |
---|
71 | << " R: " << mr.remaining(); |
---|
72 | return os; |
---|
73 | } |
---|
74 | |
---|
75 | |
---|
76 | |
---|
77 | //! Recursive data structure to allow efficient parsing of various strings |
---|
78 | /*! This class provides a quick lookup by building what amounts to a |
---|
79 | * tree data structure. It also features a match function which can |
---|
80 | * can handle nasty input interators by caching values as it recurses |
---|
81 | * the tree so that it can backtrack as needed. |
---|
82 | */ |
---|
83 | template<typename charT> |
---|
84 | struct string_parse_tree |
---|
85 | { |
---|
86 | typedef std::multimap<charT, string_parse_tree> ptree_coll; |
---|
87 | typedef typename ptree_coll::value_type value_type; |
---|
88 | typedef typename ptree_coll::iterator iterator; |
---|
89 | typedef typename ptree_coll::const_iterator const_iterator; |
---|
90 | typedef std::basic_string<charT> string_type; |
---|
91 | typedef std::vector<std::basic_string<charT> > collection_type; |
---|
92 | typedef parse_match_result<charT> parse_match_result_type; |
---|
93 | |
---|
94 | /*! Parameter "starting_point" desingates where the numbering begins. |
---|
95 | * A starting_point of zero will start the numbering at zero |
---|
96 | * (Sun=0, Mon=1, ...) were a starting_point of one starts the |
---|
97 | * numbering at one (Jan=1, Feb=2, ...). The default is zero, |
---|
98 | * negative vaules are not allowed */ |
---|
99 | string_parse_tree(collection_type names, unsigned int starting_point=0) |
---|
100 | { |
---|
101 | // iterate thru all the elements and build the tree |
---|
102 | unsigned short index = 0; |
---|
103 | while (index != names.size() ) { |
---|
104 | string_type s = boost::algorithm::to_lower_copy(names[index]); |
---|
105 | insert(s, static_cast<unsigned short>(index + starting_point)); |
---|
106 | index++; |
---|
107 | } |
---|
108 | //set the last tree node = index+1 indicating a value |
---|
109 | index++; |
---|
110 | } |
---|
111 | |
---|
112 | |
---|
113 | string_parse_tree(short value = -1) : |
---|
114 | m_value(value) |
---|
115 | {} |
---|
116 | ptree_coll m_next_chars; |
---|
117 | short m_value; |
---|
118 | |
---|
119 | void insert(const string_type& s, unsigned short value) |
---|
120 | { |
---|
121 | unsigned int i = 0; |
---|
122 | iterator ti; |
---|
123 | while(i < s.size()) { |
---|
124 | if (i==0) { |
---|
125 | if (i == (s.size()-1)) { |
---|
126 | ti = m_next_chars.insert(value_type(s[i], |
---|
127 | string_parse_tree<charT>(value))); |
---|
128 | } |
---|
129 | else { |
---|
130 | ti = m_next_chars.insert(value_type(s[i], |
---|
131 | string_parse_tree<charT>())); |
---|
132 | } |
---|
133 | } |
---|
134 | else { |
---|
135 | if (i == (s.size()-1)) { |
---|
136 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
---|
137 | string_parse_tree<charT>(value))); |
---|
138 | } |
---|
139 | |
---|
140 | else { |
---|
141 | ti = ti->second.m_next_chars.insert(value_type(s[i], |
---|
142 | string_parse_tree<charT>())); |
---|
143 | } |
---|
144 | |
---|
145 | } |
---|
146 | i++; |
---|
147 | } |
---|
148 | } |
---|
149 | |
---|
150 | |
---|
151 | //! Recursive function that finds a matching string in the tree. |
---|
152 | /*! Must check match_results::has_remaining() after match() is |
---|
153 | * called. This is required so the user can determine if |
---|
154 | * stream iterator is already pointing to the expected |
---|
155 | * character or not (match() might advance sitr to next char in stream). |
---|
156 | * |
---|
157 | * A parse_match_result that has been returned from a failed match |
---|
158 | * attempt can be sent in to the match function of a different |
---|
159 | * string_parse_tree to attempt a match there. Use the iterators |
---|
160 | * for the partially consumed stream, the parse_match_result object, |
---|
161 | * and '0' for the level parameter. */ |
---|
162 | short |
---|
163 | match(std::istreambuf_iterator<charT>& sitr, |
---|
164 | std::istreambuf_iterator<charT>& stream_end, |
---|
165 | parse_match_result_type& result, |
---|
166 | unsigned int& level) const |
---|
167 | { |
---|
168 | |
---|
169 | level++; |
---|
170 | charT c; |
---|
171 | // if we conditionally advance sitr, we won't have |
---|
172 | // to consume the next character past the input |
---|
173 | bool adv_itr = true; |
---|
174 | if (level > result.cache.size()) { |
---|
175 | if (sitr == stream_end) return 0; //bail - input exhausted |
---|
176 | c = static_cast<charT>(std::tolower(*sitr)); |
---|
177 | //result.cache += c; |
---|
178 | //sitr++; |
---|
179 | } |
---|
180 | else { |
---|
181 | // if we're looking for characters from the cache, |
---|
182 | // we don't want to increment sitr |
---|
183 | adv_itr = false; |
---|
184 | c = static_cast<charT>(std::tolower(result.cache[level-1])); |
---|
185 | } |
---|
186 | const_iterator litr = m_next_chars.lower_bound(c); |
---|
187 | const_iterator uitr = m_next_chars.upper_bound(c); |
---|
188 | while (litr != uitr) { // equal if not found |
---|
189 | if(adv_itr) { |
---|
190 | sitr++; |
---|
191 | result.cache += c; |
---|
192 | } |
---|
193 | if (litr->second.m_value != -1) { // -1 is default value |
---|
194 | if (result.match_depth < level) { |
---|
195 | result.current_match = litr->second.m_value; |
---|
196 | result.match_depth = static_cast<unsigned short>(level); |
---|
197 | } |
---|
198 | litr->second.match(sitr, stream_end, |
---|
199 | result, level); |
---|
200 | level--; |
---|
201 | } |
---|
202 | else { |
---|
203 | litr->second.match(sitr, stream_end, |
---|
204 | result, level); |
---|
205 | level--; |
---|
206 | } |
---|
207 | |
---|
208 | if(level <= result.cache.size()) { |
---|
209 | adv_itr = false; |
---|
210 | } |
---|
211 | |
---|
212 | litr++; |
---|
213 | } |
---|
214 | return result.current_match; |
---|
215 | |
---|
216 | } |
---|
217 | |
---|
218 | /*! Must check match_results::has_remaining() after match() is |
---|
219 | * called. This is required so the user can determine if |
---|
220 | * stream iterator is already pointing to the expected |
---|
221 | * character or not (match() might advance sitr to next char in stream). |
---|
222 | */ |
---|
223 | parse_match_result_type |
---|
224 | match(std::istreambuf_iterator<charT>& sitr, |
---|
225 | std::istreambuf_iterator<charT>& stream_end) const |
---|
226 | { |
---|
227 | // lookup to_lower of char in tree. |
---|
228 | unsigned int level = 0; |
---|
229 | // string_type cache; |
---|
230 | parse_match_result_type result; |
---|
231 | match(sitr, stream_end, result, level); |
---|
232 | return result; |
---|
233 | } |
---|
234 | |
---|
235 | void printme(std::ostream& os, int& level) |
---|
236 | { |
---|
237 | level++; |
---|
238 | iterator itr = m_next_chars.begin(); |
---|
239 | iterator end = m_next_chars.end(); |
---|
240 | // os << "starting level: " << level << std::endl; |
---|
241 | while (itr != end) { |
---|
242 | os << "level: " << level |
---|
243 | << " node: " << itr->first |
---|
244 | << " value: " << itr->second.m_value |
---|
245 | << std::endl; |
---|
246 | itr->second.printme(os, level); |
---|
247 | itr++; |
---|
248 | } |
---|
249 | level--; |
---|
250 | } |
---|
251 | |
---|
252 | void print(std::ostream& os) |
---|
253 | { |
---|
254 | int level = 0; |
---|
255 | printme(os, level); |
---|
256 | } |
---|
257 | |
---|
258 | void printmatch(std::ostream& os, charT c) |
---|
259 | { |
---|
260 | iterator litr = m_next_chars.lower_bound(c); |
---|
261 | iterator uitr = m_next_chars.upper_bound(c); |
---|
262 | os << "matches for: " << c << std::endl; |
---|
263 | while (litr != uitr) { |
---|
264 | os << " node: " << litr->first |
---|
265 | << " value: " << litr->second.m_value |
---|
266 | << std::endl; |
---|
267 | litr++; |
---|
268 | } |
---|
269 | } |
---|
270 | |
---|
271 | }; |
---|
272 | |
---|
273 | |
---|
274 | } } //namespace |
---|
275 | #endif |
---|