Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/branches/doc/src/libraries/util/SubString.cc @ 7331

Last change on this file since 7331 was 7327, checked in by landauf, 15 years ago
added documentation
Property svn:eol-style set to `native`
File size: 19.2 KB

Line
1	/*
2	* ORXONOX - the hottest 3D action shooter ever to exist
3	* > www.orxonox.net <
4	*
5	*
6	* License notice:
7	*
8	* This program is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU General Public License
10	* as published by the Free Software Foundation; either version 2
11	* of the License, or (at your option) any later version.
12	*
13	* This program is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	* GNU General Public License for more details.
17	*
18	* You should have received a copy of the GNU General Public License
19	* along with this program; if not, write to the Free Software
20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21	*
22	* Author:
23	* Christian Meyer
24	* Co-authors:
25	* Benjamin Grauer
26	*
27
28	//
29	// splitLine
30	// STL string tokenizer
31	//
32	// Created by Clemens Wacha.
33	// Version 1.0
34	// Copyright (c) 2005 Clemens Wacha. All rights reserved.
35	//
36
37	* Extended by Fabian 'x3n' Landau by the SL_PARENTHESES mode.
38	*/
39
40	#include "SubString.h"
41	#include <cstdio>
42	#include "Debug.h"
43
44	namespace orxonox
45	{
46	const std::string SubString::WhiteSpaces = " \n\t";
47	const std::string SubString::WhiteSpacesWithComma = " \n\t,";
48	const SubString SubString::NullSubString = SubString();
49
50	/**
51	@brief Default constructor.
52	*/
53	SubString::SubString()
54	{
55	}
56
57	/**
58	@brief Splits a string into multiple tokens.
59	@param line The line to split
60	@param delimiters Multiple characters at which to split the line
61	@param delimiterNeighbours Neighbours of the delimiters that will be erased as well (for example white-spaces)
62	@param bAllowEmptyEntries If true, empty tokens are also added to the SubString (if there are two delimiters without a char in between)
63	@param escapeChar The escape character that is used to escape safemode chars (for example if you want to use a quotation mark between two other quotation marks).
64	@param bRemoveEscapeChar If true, the escape char is removed from the tokens
65	@param safemodeChar Within these characters splitting won't happen (usually the quotation marks)
66	@param bRemoveSafemodeChar Removes the safemodeChar from the beginning and the ending of a token
67	@param openparenthesisChar The beginning of a safemode is marked with this (usually an opening brace)
68	@param closeparenthesisChar The ending of a safemode is marked with this (usually a closing brace)
69	@param bRemoveParenthesisChars Removes the parenthesis chars from the beginning and the ending of a token
70	@param commentChar The comment character (used to ignore the part of the line after the comment char).
71	*/
72	SubString::SubString(const std::string& line,
73	const std::string& delimiters, const std::string& delimiterNeighbours, bool bAllowEmptyEntries,
74	char escapeChar, bool bRemoveEscapeChar, char safemodeChar, bool bRemoveSafemodeChar,
75	char openparenthesisChar, char closeparenthesisChar, bool bRemoveParenthesisChars, char commentChar)
76	{
77	SubString::splitLine(this->tokens_, this->bTokenInSafemode_, line, delimiters, delimiterNeighbours, bAllowEmptyEntries, escapeChar, bRemoveEscapeChar, safemodeChar, bRemoveSafemodeChar, openparenthesisChar, closeparenthesisChar, bRemoveParenthesisChars, commentChar);
78	}
79
80	/**
81	@brief creates a new SubString based on a subset of an other SubString.
82	@param other The other SubString
83	@param begin The beginning of the subset
84
85	The subset ranges from the token with index @a begin to the end of the tokens.
86	If @a begin is greater than the greatest index, the new SubString will be empty.
87	*/
88	SubString::SubString(const SubString& other, unsigned int begin)
89	{
90	for (unsigned int i = begin; i < other.size(); ++i)
91	{
92	this->tokens_.push_back(other[i]);
93	this->bTokenInSafemode_.push_back(other.isInSafemode(i));
94	}
95	}
96
97	/**
98	@brief creates a new SubString based on a subset of an other SubString.
99	@param other The other SubString
100	@param begin The beginning of the subset
101	@param end The end of the subset
102
103	The subset ranges from the token with index @a begin until (but not including) the token with index @a end.
104	If @a begin or @a end are beyond the allowed index, the resulting SubString will be empty.
105	*/
106	SubString::SubString(const SubString& other, unsigned int begin, unsigned int end)
107	{
108	for (unsigned int i = begin; i < std::min(other.size(), end); ++i)
109	{
110	this->tokens_.push_back(other[i]);
111	this->bTokenInSafemode_.push_back(other.isInSafemode(i));
112	}
113	}
114
115	/**
116	@brief Creates a SubString from a count and values set.
117	@param argc The number of arguments
118	@param argv An array of pointers to the arguments
119	*/
120	SubString::SubString(unsigned int argc, const char** argv)
121	{
122	for(unsigned int i = 0; i < argc; ++i)
123	{
124	this->tokens_.push_back(std::string(argv[i]));
125	this->bTokenInSafemode_.push_back(false);
126	}
127	}
128
129	/**
130	@brief Destructor
131	*/
132	SubString::~SubString()
133	{ }
134
135	/**
136	@brief Stores the tokens of @a other in this SubString
137	@return This SubString.
138	*/
139	SubString& SubString::operator=(const SubString& other)
140	{
141	this->tokens_ = other.tokens_;
142	this->bTokenInSafemode_ = other.bTokenInSafemode_;
143	return *this;
144	}
145
146	/**
147	@brief Compares this SubString to another SubString and returns true if they contain the same values.
148	*/
149	bool SubString::operator==(const SubString& other) const
150	{
151	return ((this->tokens_ == other.tokens_) && (this->bTokenInSafemode_ == other.bTokenInSafemode_));
152	}
153
154	/**
155	@copydoc operator==
156	*/
157	bool SubString::compare(const SubString& other) const
158	{
159	return (*this == other);
160	}
161
162	/**
163	@brief Compares this SubString to another SubString and returns true if the first @a length values match.
164	@param other The other SubString
165	@param length How many tokens to compare
166	*/
167	bool SubString::compare(const SubString& other, unsigned int length) const
168	{
169	if (length > this->size() \|\| length > other.size())
170	return false;
171
172	for (unsigned int i = 0; i < length; ++i)
173	if ((this->tokens_[i] != other.tokens_[i]) \|\| (this->bTokenInSafemode_[i] != other.bTokenInSafemode_[i]))
174	return false;
175	return true;
176	}
177
178	/**
179	@brief Concatenates the tokens of two SubStrings and returns the resulting new SubString
180	@return A new SubString that contains the tokens of this and the other SubString
181	*/
182	SubString SubString::operator+(const SubString& other) const
183	{
184	return SubString(*this) += other;
185	}
186
187	/**
188	@brief Appends the tokens of @a other to this SubString
189	@return This SubString
190	*/
191	SubString& SubString::operator+=(const SubString& other)
192	{
193	for (unsigned int i = 0; i < other.size(); ++i)
194	{
195	this->tokens_.push_back(other[i]);
196	this->bTokenInSafemode_.push_back(other.isInSafemode(i));
197	}
198	return *this;
199	}
200
201	/**
202	@copydoc SubString(const std::string&,const std::string&,const std::string&,bool,char,bool,char,bool,char,char,bool,char)
203	*/
204	unsigned int SubString::split(const std::string& line,
205	const std::string& delimiters, const std::string& delimiterNeighbours, bool bAllowEmptyEntries,
206	char escapeChar, bool bRemoveEscapeChar, char safemodeChar, bool bRemoveSafemodeChar,
207	char openparenthesisChar, char closeparenthesisChar, bool bRemoveParenthesisChars, char commentChar)
208	{
209	this->tokens_.clear();
210	this->bTokenInSafemode_.clear();
211	SubString::splitLine(this->tokens_, this->bTokenInSafemode_, line, delimiters, delimiterNeighbours, bAllowEmptyEntries, escapeChar, bRemoveEscapeChar, safemodeChar, bRemoveSafemodeChar, openparenthesisChar, closeparenthesisChar, bRemoveParenthesisChars, commentChar);
212	return this->tokens_.size();
213	}
214
215	/**
216	@brief Joins the tokens of this SubString using the given delimiter and returns a string.
217	@param delimiter This delimiter will be placed between each two tokens
218	@return The joined string.
219	*/
220	std::string SubString::join(const std::string& delimiter) const
221	{
222	if (!this->tokens_.empty())
223	{
224	std::string retVal = this->tokens_[0];
225	for (unsigned int i = 1; i < this->tokens_.size(); ++i)
226	retVal += delimiter + this->tokens_[i];
227	return retVal;
228	}
229	else
230	return "";
231	}
232
233	/**
234	@brief Creates a subset of this SubString.
235	@param begin The beginning of the subset
236	@return A new SubString containing the defined subset.
237
238	The subset ranges from the token with index @a begin to the end of the tokens.
239	If @a begin is greater than the greatest index, the new SubString will be empty.
240
241	This function is added for your convenience, and does the same as
242	SubString::SubString(const SubString& other, unsigned int begin)
243	*/
244	SubString SubString::subSet(unsigned int begin) const
245	{
246	return SubString(*this, begin);
247	}
248
249	/**
250	@brief Creates a subset of this SubString.
251	@param begin The beginning of the subset
252	@param end The ending of the subset
253	@return A new SubString containing the defined subset.
254
255	The subset ranges from the token with index @a begin until (but not including) the token with index @a end.
256	If @a begin or @a end are beyond the allowed index, the resulting SubString will be empty.
257
258	This function is added for your convenience, and does the same as
259	SubString::SubString(const SubString& other, unsigned int begin, unsigned int end)
260	*/
261	SubString SubString::subSet(unsigned int begin, unsigned int end) const
262	{
263	return SubString(*this, begin, end);
264	}
265
266	/**
267	@copydoc SubString(const std::string&,const std::string&,const std::string&,bool,char,bool,char,bool,char,char,bool,char)
268	@param tokens The array, where the splitted strings will be stored in
269	@param bTokenInSafemode A vector wich stores for each character of the string if it is in safemode or not
270	@param start_state The internal state of the parser
271
272	This is the actual splitting algorithm from Clemens Wacha.
273	Supports delimiters, escape characters, ignores special characters between safemodeChar and between commentChar and line end "\n".
274
275	Extended by Orxonox to support parenthesis as additional safe-mode.
276	*/
277	SubString::SPLIT_LINE_STATE
278	SubString::splitLine(std::vector<std::string>& tokens,
279	std::vector<bool>& bTokenInSafemode,
280	const std::string& line,
281	const std::string& delimiters,
282	const std::string& delimiterNeighbours,
283	bool bAllowEmptyEntries,
284	char escapeChar,
285	bool bRemoveEscapeChar,
286	char safemodeChar,
287	bool bRemoveSafemodeChar,
288	char openparenthesisChar,
289	char closeparenthesisChar,
290	bool bRemoveParenthesisChars,
291	char commentChar,
292	SPLIT_LINE_STATE start_state)
293	{
294	SPLIT_LINE_STATE state = start_state;
295	unsigned int i = 0;
296	unsigned int fallBackNeighbours = 0;
297
298	std::string token;
299	bool inSafemode = false;
300
301	if(start_state != SL_NORMAL && tokens.size() > 0)
302	{
303	token = tokens[tokens.size()-1];
304	tokens.pop_back();
305	}
306	if(start_state != SL_NORMAL && bTokenInSafemode.size() > 0)
307	{
308	inSafemode = bTokenInSafemode[bTokenInSafemode.size()-1];
309	bTokenInSafemode.pop_back();
310	}
311
312	while(i < line.size())
313	{
314	switch(state)
315	{
316	case SL_NORMAL:
317	if(line[i] == escapeChar)
318	{
319	state = SL_ESCAPE;
320	if (!bRemoveEscapeChar)
321	token += line[i];
322	}
323	else if(line[i] == safemodeChar)
324	{
325	state = SL_SAFEMODE;
326	inSafemode = true;
327	if (!bRemoveSafemodeChar)
328	token += line[i];
329	}
330	else if(line[i] == openparenthesisChar)
331	{
332	state = SL_PARENTHESES;
333	inSafemode = true;
334	if (!bRemoveParenthesisChars)
335	token += line[i];
336	}
337	else if(line[i] == commentChar)
338	{
339	if (fallBackNeighbours > 0)
340	token = token.substr(0, token.size() - fallBackNeighbours);
341	// FINISH
342	if(bAllowEmptyEntries \|\| token.size() > 0)
343	{
344	tokens.push_back(token);
345	token.clear();
346	bTokenInSafemode.push_back(inSafemode);
347	inSafemode = false;
348	}
349	token += line[i]; // EAT
350	state = SL_COMMENT;
351	}
352	else if(delimiters.find(line[i]) != std::string::npos)
353	{
354	// line[i] is a delimiter
355	if (fallBackNeighbours > 0)
356	token = token.substr(0, token.size() - fallBackNeighbours);
357	// FINISH
358	if(bAllowEmptyEntries \|\| token.size() > 0)
359	{
360	tokens.push_back(token);
361	token.clear();
362	bTokenInSafemode.push_back(inSafemode);
363	inSafemode = false;
364	}
365	state = SL_NORMAL;
366	}
367	else
368	{
369	if (delimiterNeighbours.find(line[i]) != std::string::npos)
370	{
371	if (token.size() > 0)
372	++fallBackNeighbours;
373	else
374	{
375	++i;
376	continue;
377	}
378	}
379	else
380	fallBackNeighbours = 0;
381	token += line[i]; // EAT
382	}
383	break;
384	case SL_ESCAPE:
385	if (!bRemoveSafemodeChar)
386	token += line[i];
387	else
388	{
389	if(line[i] == 'n') token += '\n';
390	else if(line[i] == 't') token += '\t';
391	else if(line[i] == 'v') token += '\v';
392	else if(line[i] == 'b') token += '\b';
393	else if(line[i] == 'r') token += '\r';
394	else if(line[i] == 'f') token += '\f';
395	else if(line[i] == 'a') token += '\a';
396	else if(line[i] == '?') token += '\?';
397	else token += line[i]; // EAT
398	}
399	state = SL_NORMAL;
400	break;
401	case SL_SAFEMODE:
402	if(line[i] == safemodeChar)
403	{
404	state = SL_NORMAL;
405	if (!bRemoveSafemodeChar)
406	token += line[i];
407	}
408	else if(line[i] == escapeChar)
409	{
410	state = SL_SAFEESCAPE;
411	}
412	else
413	{
414	token += line[i]; // EAT
415	}
416	break;
417
418	case SL_SAFEESCAPE:
419	if(line[i] == 'n') token += '\n';
420	else if(line[i] == 't') token += '\t';
421	else if(line[i] == 'v') token += '\v';
422	else if(line[i] == 'b') token += '\b';
423	else if(line[i] == 'r') token += '\r';
424	else if(line[i] == 'f') token += '\f';
425	else if(line[i] == 'a') token += '\a';
426	else if(line[i] == '?') token += '\?';
427	else token += line[i]; // EAT
428	state = SL_SAFEMODE;
429	break;
430
431	case SL_PARENTHESES:
432	if(line[i] == closeparenthesisChar)
433	{
434	state = SL_NORMAL;
435	if (!bRemoveParenthesisChars)
436	token += line[i];
437	}
438	else if(line[i] == escapeChar)
439	{
440	state = SL_PARENTHESESESCAPE;
441	}
442	else
443	{
444	token += line[i]; // EAT
445	}
446	break;
447
448	case SL_PARENTHESESESCAPE:
449	if(line[i] == 'n') token += '\n';
450	else if(line[i] == 't') token += '\t';
451	else if(line[i] == 'v') token += '\v';
452	else if(line[i] == 'b') token += '\b';
453	else if(line[i] == 'r') token += '\r';
454	else if(line[i] == 'f') token += '\f';
455	else if(line[i] == 'a') token += '\a';
456	else if(line[i] == '?') token += '\?';
457	else token += line[i]; // EAT
458	state = SL_PARENTHESES;
459	break;
460
461	case SL_COMMENT:
462	if(line[i] == '\n')
463	{
464	// FINISH
465	if(token.size() > 0)
466	{
467	tokens.push_back(token);
468	token.clear();
469	bTokenInSafemode.push_back(inSafemode);
470	inSafemode = false;
471	}
472	state = SL_NORMAL;
473	}
474	else
475	{
476	token += line[i]; // EAT
477	}
478	break;
479
480	default:
481	// nothing
482	break;
483	}
484	++i;
485	}
486
487	// FINISH
488	if (fallBackNeighbours > 0)
489	token = token.substr(0, token.size() - fallBackNeighbours);
490	if(bAllowEmptyEntries \|\| token.size() > 0)
491	{
492	tokens.push_back(token);
493	token.clear();
494	bTokenInSafemode.push_back(inSafemode);
495	inSafemode = false;
496	}
497	return(state);
498	}
499
500	/**
501	@brief Some nice debug information about this SubString.
502	*/
503	void SubString::debug() const
504	{
505	COUT(0) << "Substring-information::count=" << this->tokens_.size() << " ::";
506	for (unsigned int i = 0; i < this->tokens_.size(); ++i)
507	COUT(0) << "s" << i << "='" << this->tokens_[i].c_str() << "'::";
508	COUT(0) << std::endl;
509	}
510	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: