Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: downloads/boost_1_34_1/boost/xpressive/detail/dynamic/parser_traits.hpp @ 44

Last change on this file since 44 was 29, checked in by landauf, 17 years ago
updated boost from 1_33_1 to 1_34_1
File size: 14.7 KB

Line
1	///////////////////////////////////////////////////////////////////////////////
2	// detail/dynamic/parser_traits.hpp
3	//
4	// Copyright 2004 Eric Niebler. Distributed under the Boost
5	// Software License, Version 1.0. (See accompanying file
6	// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7
8	#ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
9	#define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
10
11	// MS compatible compilers support #pragma once
12	#if defined(_MSC_VER) && (_MSC_VER >= 1020)
13	# pragma once
14	#endif
15
16	#include <string>
17	#include <climits>
18	#include <boost/assert.hpp>
19	#include <boost/xpressive/regex_error.hpp>
20	#include <boost/xpressive/regex_traits.hpp>
21	#include <boost/xpressive/detail/detail_fwd.hpp>
22	#include <boost/xpressive/detail/dynamic/matchable.hpp>
23	#include <boost/xpressive/detail/dynamic/parser_enum.hpp>
24	#include <boost/xpressive/detail/utility/literals.hpp>
25	#include <boost/xpressive/detail/utility/algorithm.hpp>
26
27	namespace boost { namespace xpressive
28	{
29
30	///////////////////////////////////////////////////////////////////////////////
31	// compiler_traits
32	// this works for char and wchar_t. it must be specialized for anything else.
33	//
34	template<typename RegexTraits>
35	struct compiler_traits
36	{
37	typedef typename RegexTraits::char_type char_type;
38	typedef std::basic_string<char_type> string_type;
39	typedef typename string_type::const_iterator iterator_type;
40	typedef RegexTraits regex_traits;
41	typedef typename RegexTraits::locale_type locale_type;
42
43	///////////////////////////////////////////////////////////////////////////////
44	// constructor
45	explicit compiler_traits(RegexTraits const &traits = RegexTraits())
46	: traits_(traits)
47	, flags_(regex_constants::ECMAScript)
48	, space_(lookup_classname(traits_, "space"))
49	{
50	BOOST_ASSERT(0 != this->space_);
51	}
52
53	///////////////////////////////////////////////////////////////////////////////
54	// flags
55	regex_constants::syntax_option_type flags() const
56	{
57	return this->flags_;
58	}
59
60	///////////////////////////////////////////////////////////////////////////////
61	// flags
62	void flags(regex_constants::syntax_option_type flags)
63	{
64	this->flags_ = flags;
65	}
66
67	///////////////////////////////////////////////////////////////////////////////
68	// traits
69	regex_traits &traits()
70	{
71	return this->traits_;
72	}
73
74	regex_traits const &traits() const
75	{
76	return this->traits_;
77	}
78
79	///////////////////////////////////////////////////////////////////////////////
80	// imbue
81	locale_type imbue(locale_type const &loc)
82	{
83	locale_type oldloc = this->traits().imbue(loc);
84	this->space_ = lookup_classname(this->traits(), "space");
85	BOOST_ASSERT(0 != this->space_);
86	return oldloc;
87	}
88
89	///////////////////////////////////////////////////////////////////////////////
90	// getloc
91	locale_type getloc() const
92	{
93	return this->traits().getloc();
94	}
95
96	///////////////////////////////////////////////////////////////////////////////
97	// get_token
98	// get a token and advance the iterator
99	regex_constants::compiler_token_type get_token(iterator_type &begin, iterator_type end)
100	{
101	using namespace regex_constants;
102	if(this->eat_ws_(begin, end) == end)
103	{
104	return regex_constants::token_end_of_pattern;
105	}
106
107	switch(*begin)
108	{
109	case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end);
110	case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any;
111	case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line;
112	case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line;
113	case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin;
114	case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
115	case BOOST_XPR_CHAR_(char_type, '\|'): ++begin; return token_alternate;
116	case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
117	case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
118
119	case BOOST_XPR_CHAR_(char_type, '*'):
120	case BOOST_XPR_CHAR_(char_type, '+'):
121	case BOOST_XPR_CHAR_(char_type, '?'):
122	return token_invalid_quantifier;
123
124	case BOOST_XPR_CHAR_(char_type, '{'):
125	default:
126	return token_literal;
127	}
128	}
129
130	///////////////////////////////////////////////////////////////////////////////
131	// get_quant_spec
132	bool get_quant_spec(iterator_type &begin, iterator_type end, detail::quant_spec &spec)
133	{
134	using namespace regex_constants;
135	iterator_type old_begin;
136
137	if(this->eat_ws_(begin, end) == end)
138	{
139	return false;
140	}
141
142	switch(*begin)
143	{
144	case BOOST_XPR_CHAR_(char_type, '*'):
145	spec.min_ = 0;
146	spec.max_ = (std::numeric_limits<unsigned int>::max)();
147	break;
148
149	case BOOST_XPR_CHAR_(char_type, '+'):
150	spec.min_ = 1;
151	spec.max_ = (std::numeric_limits<unsigned int>::max)();
152	break;
153
154	case BOOST_XPR_CHAR_(char_type, '?'):
155	spec.min_ = 0;
156	spec.max_ = 1;
157	break;
158
159	case BOOST_XPR_CHAR_(char_type, '{'):
160	old_begin = this->eat_ws_(++begin, end);
161	spec.min_ = spec.max_ = detail::toi(begin, end, this->traits());
162	detail::ensure
163	(
164	begin != old_begin && begin != end, error_brace, "invalid quantifier"
165	);
166
167	if(*begin == BOOST_XPR_CHAR_(char_type, ','))
168	{
169	old_begin = this->eat_ws_(++begin, end);
170	spec.max_ = detail::toi(begin, end, this->traits());
171	detail::ensure
172	(
173	begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin
174	, error_brace, "invalid quantifier"
175	);
176
177	if(begin == old_begin)
178	{
179	spec.max_ = (std::numeric_limits<unsigned int>::max)();
180	}
181	else
182	{
183	detail::ensure
184	(
185	spec.min_ <= spec.max_, error_badbrace, "invalid quantification range"
186	);
187	}
188	}
189	else
190	{
191	detail::ensure
192	(
193	BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier"
194	);
195	}
196	break;
197
198	default:
199	return false;
200	}
201
202	spec.greedy_ = true;
203	if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
204	{
205	++begin;
206	spec.greedy_ = false;
207	}
208
209	return true;
210	}
211
212	///////////////////////////////////////////////////////////////////////////
213	// get_group_type
214	regex_constants::compiler_token_type get_group_type(iterator_type &begin, iterator_type end)
215	{
216	using namespace regex_constants;
217	if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
218	{
219	this->eat_ws_(++begin, end);
220	detail::ensure(begin != end, error_paren, "incomplete extension");
221
222	switch(*begin)
223	{
224	case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark;
225	case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression;
226	case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment;
227	case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead;
228	case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead;
229	case BOOST_XPR_CHAR_(char_type, '<'):
230	this->eat_ws_(++begin, end);
231	detail::ensure(begin != end, error_paren, "incomplete extension");
232	switch(*begin)
233	{
234	case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind;
235	case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind;
236	default:
237	throw regex_error(error_badbrace, "unrecognized extension");
238	}
239
240	case BOOST_XPR_CHAR_(char_type, 'i'):
241	case BOOST_XPR_CHAR_(char_type, 'm'):
242	case BOOST_XPR_CHAR_(char_type, 's'):
243	case BOOST_XPR_CHAR_(char_type, 'x'):
244	case BOOST_XPR_CHAR_(char_type, '-'):
245	return this->parse_mods_(begin, end);
246
247	default:
248	throw regex_error(error_badbrace, "unrecognized extension");
249	}
250	}
251
252	return token_literal;
253	}
254
255	//////////////////////////////////////////////////////////////////////////
256	// get_charset_token
257	// NOTE: white-space is never ignored in a charset.
258	regex_constants::compiler_token_type get_charset_token(iterator_type &begin, iterator_type end)
259	{
260	using namespace regex_constants;
261	BOOST_ASSERT(begin != end);
262	switch(*begin)
263	{
264	case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert;
265	case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen;
266	case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
267	case BOOST_XPR_CHAR_(char_type, '['):
268	{
269	iterator_type next = begin; ++next;
270	if(next != end && *next == BOOST_XPR_CHAR_(char_type, ':'))
271	{
272	begin = ++next;
273	return token_posix_charset_begin;
274	}
275	}
276	break;
277	case BOOST_XPR_CHAR_(char_type, ':'):
278	{
279	iterator_type next = begin; ++next;
280	if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']'))
281	{
282	begin = ++next;
283	return token_posix_charset_end;
284	}
285	}
286	break;
287	case BOOST_XPR_CHAR_(char_type, '\\'):
288	if(++begin != end)
289	{
290	switch(*begin)
291	{
292	case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace;
293	default:;
294	}
295	}
296	return token_escape;
297	default:;
298	}
299	return token_literal;
300	}
301
302	//////////////////////////////////////////////////////////////////////////
303	// get_escape_token
304	regex_constants::compiler_token_type get_escape_token(iterator_type &begin, iterator_type end)
305	{
306	using namespace regex_constants;
307	if(begin != end)
308	{
309	switch(*begin)
310	{
311	//case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell;
312	//case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control;
313	//case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape;
314	//case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed;
315	//case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline;
316	//case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab;
317	//case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab;
318	case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence;
319	case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary;
320	case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary;
321	case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end;
322	case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin;
323	case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence;
324	// Non-standard extension to ECMAScript syntax
325	case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin;
326	case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end;
327	default:; // fall-through
328	}
329	}
330
331	return token_escape;
332	}
333
334	private:
335
336	//////////////////////////////////////////////////////////////////////////
337	// parse_mods_
338	regex_constants::compiler_token_type parse_mods_(iterator_type &begin, iterator_type end)
339	{
340	using namespace regex_constants;
341	bool set = true;
342	do switch(*begin)
343	{
344	case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break;
345	case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break;
346	case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break;
347	case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break;
348	case BOOST_XPR_CHAR_(char_type, ':'): ++begin; // fall-through
349	case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark;
350	case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; // else fall-through
351	default: throw regex_error(error_paren, "unknown pattern modifier");
352	}
353	while(detail::ensure(++begin != end, error_paren, "incomplete extension"));
354	return token_no_mark;
355	}
356
357	///////////////////////////////////////////////////////////////////////////////
358	// flag_
359	void flag_(bool set, regex_constants::syntax_option_type flag)
360	{
361	this->flags_ = set ? (this->flags_ \| flag) : (this->flags_ & ~flag);
362	}
363
364	///////////////////////////////////////////////////////////////////////////
365	// is_space_
366	bool is_space_(char_type ch) const
367	{
368	return this->traits().isctype(ch, this->space_);
369	}
370
371	///////////////////////////////////////////////////////////////////////////////
372	// eat_ws_
373	iterator_type &eat_ws_(iterator_type &begin, iterator_type end)
374	{
375	if(0 != (regex_constants::ignore_white_space & this->flags()))
376	{
377	while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == begin \|\| this->is_space_(begin)))
378	{
379	if(BOOST_XPR_CHAR_(char_type, '#') == *begin++)
380	{
381	while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {}
382	}
383	else
384	{
385	for(; end != begin && this->is_space_(*begin); ++begin) {}
386	}
387	}
388	}
389
390	return begin;
391	}
392
393	regex_traits traits_;
394	regex_constants::syntax_option_type flags_;
395	typename regex_traits::char_class_type space_;
396	};
397
398	}} // namespace boost::xpressive
399
400	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: