Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/trunk/src/libraries/util/SubString.cc @ 11456

Last change on this file since 11456 was 11071, checked in by landauf, 9 years ago
merged branch cpp11_v3 back to trunk
Property svn:eol-style set to `native`
File size: 18.2 KB

Line
1	/*
2	* ORXONOX - the hottest 3D action shooter ever to exist
3	* > www.orxonox.net <
4	*
5	*
6	* License notice:
7	*
8	* This program is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU General Public License
10	* as published by the Free Software Foundation; either version 2
11	* of the License, or (at your option) any later version.
12	*
13	* This program is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	* GNU General Public License for more details.
17	*
18	* You should have received a copy of the GNU General Public License
19	* along with this program; if not, write to the Free Software
20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21	*
22	* Author:
23	* Christian Meyer
24	* Co-authors:
25	* Benjamin Grauer
26	*
27
28	//
29	// splitLine
30	// STL string tokenizer
31	//
32	// Created by Clemens Wacha.
33	// Version 1.0
34	// Copyright (c) 2005 Clemens Wacha. All rights reserved.
35	//
36
37	* Extended by Fabian 'x3n' Landau by the SL_PARENTHESES mode.
38	*/
39
40	/**
41	@file
42	@brief Implementation of the SubString class.
43	*/
44
45	#include "SubString.h"
46	#include <cstdio>
47	#include "Output.h"
48
49	namespace orxonox
50	{
51	const std::string SubString::WhiteSpaces = " \n\t";
52	const std::string SubString::WhiteSpacesWithComma = " \n\t,";
53	const SubString SubString::NullSubString = SubString();
54
55	/**
56	@brief Default constructor.
57	*/
58	SubString::SubString()
59	{
60	}
61
62	/**
63	@brief Splits a string into multiple tokens.
64	@param line The line to split
65	@param delimiters Multiple characters at which to split the line
66	@param delimiterNeighbours Neighbours of the delimiters that will be erased as well (for example white-spaces)
67	@param bAllowEmptyEntries If true, empty tokens are also added to the SubString (if there are two delimiters without a char in between)
68	@param escapeChar The escape character that is used to escape safemode chars (for example if you want to use a quotation mark between two other quotation marks).
69	@param bRemoveEscapeChar If true, the escape char is removed from the tokens
70	@param safemodeChar Within these characters splitting won't happen (usually the quotation marks)
71	@param bRemoveSafemodeChar Removes the safemodeChar from the beginning and the ending of a token
72	@param openparenthesisChar The beginning of a safemode is marked with this (usually an opening brace)
73	@param closeparenthesisChar The ending of a safemode is marked with this (usually a closing brace)
74	@param bRemoveParenthesisChars Removes the parenthesis chars from the beginning and the ending of a token
75	@param commentChar The comment character (used to ignore the part of the line after the comment char).
76	*/
77	SubString::SubString(const std::string& line,
78	const std::string& delimiters, const std::string& delimiterNeighbours, bool bAllowEmptyEntries,
79	char escapeChar, bool bRemoveEscapeChar, char safemodeChar, bool bRemoveSafemodeChar,
80	char openparenthesisChar, char closeparenthesisChar, bool bRemoveParenthesisChars, char commentChar)
81	{
82	SubString::splitLine(this->tokens_, this->bTokenInSafemode_, line, delimiters, delimiterNeighbours, bAllowEmptyEntries, escapeChar, bRemoveEscapeChar, safemodeChar, bRemoveSafemodeChar, openparenthesisChar, closeparenthesisChar, bRemoveParenthesisChars, commentChar);
83	}
84
85	/**
86	@brief creates a new SubString based on a subset of an other SubString.
87	@param other The other SubString
88	@param begin The beginning of the subset
89	@param length The length of the subset
90
91	The subset ranges from the token with index @a begin and contains @a length elements.
92	*/
93	SubString::SubString(const SubString& other, size_t begin, size_t length)
94	{
95	for (size_t i = 0; i < length; ++i)
96	{
97	if (begin + i >= other.size())
98	break;
99
100	this->tokens_.push_back(other[begin + i]);
101	this->bTokenInSafemode_.push_back(other.isInSafemode(begin + i));
102	}
103	}
104
105	/**
106	@brief Creates a SubString from a count and values set.
107	@param argc The number of arguments
108	@param argv An array of pointers to the arguments
109	*/
110	SubString::SubString(size_t argc, const char** argv)
111	{
112	for (size_t i = 0; i < argc; ++i)
113	{
114	this->tokens_.emplace_back(argv[i]);
115	this->bTokenInSafemode_.push_back(false);
116	}
117	}
118
119	/**
120	@brief Destructor
121	*/
122	SubString::~SubString()
123	{ }
124
125	/**
126	@brief Stores the tokens of @a other in this SubString
127	@return This SubString.
128	*/
129	SubString& SubString::operator=(const SubString& other)
130	{
131	this->tokens_ = other.tokens_;
132	this->bTokenInSafemode_ = other.bTokenInSafemode_;
133	return *this;
134	}
135
136	/**
137	@brief Compares this SubString to another SubString and returns true if they contain the same values.
138	*/
139	bool SubString::operator==(const SubString& other) const
140	{
141	return ((this->tokens_ == other.tokens_) && (this->bTokenInSafemode_ == other.bTokenInSafemode_));
142	}
143
144	/**
145	@brief Compares this SubString to another SubString and returns true if the first @a length values match.
146	@param other The other SubString
147	@param length How many tokens to compare
148	*/
149	bool SubString::compare(const SubString& other, size_t length) const
150	{
151	if (std::min(length, this->size()) != std::min(length, other.size()))
152	return false;
153
154	for (size_t i = 0; i < std::min(length, this->size()); ++i)
155	if ((this->tokens_[i] != other.tokens_[i]) \|\| (this->bTokenInSafemode_[i] != other.bTokenInSafemode_[i]))
156	return false;
157
158	return true;
159	}
160
161	/**
162	@brief Concatenates the tokens of two SubStrings and returns the resulting new SubString
163	@return A new SubString that contains the tokens of this and the other SubString
164	*/
165	SubString SubString::operator+(const SubString& other) const
166	{
167	return SubString(*this) += other;
168	}
169
170	/**
171	@brief Appends the tokens of @a other to this SubString
172	@return This SubString
173	*/
174	SubString& SubString::operator+=(const SubString& other)
175	{
176	for (size_t i = 0; i < other.size(); ++i)
177	{
178	this->tokens_.push_back(other[i]);
179	this->bTokenInSafemode_.push_back(other.isInSafemode(i));
180	}
181	return *this;
182	}
183
184	/**
185	@copydoc SubString(const std::string&,const std::string&,const std::string&,bool,char,bool,char,bool,char,char,bool,char)
186	*/
187	size_t SubString::split(const std::string& line,
188	const std::string& delimiters, const std::string& delimiterNeighbours, bool bAllowEmptyEntries,
189	char escapeChar, bool bRemoveEscapeChar, char safemodeChar, bool bRemoveSafemodeChar,
190	char openparenthesisChar, char closeparenthesisChar, bool bRemoveParenthesisChars, char commentChar)
191	{
192	this->tokens_.clear();
193	this->bTokenInSafemode_.clear();
194	SubString::splitLine(this->tokens_, this->bTokenInSafemode_, line, delimiters, delimiterNeighbours, bAllowEmptyEntries, escapeChar, bRemoveEscapeChar, safemodeChar, bRemoveSafemodeChar, openparenthesisChar, closeparenthesisChar, bRemoveParenthesisChars, commentChar);
195	return this->tokens_.size();
196	}
197
198	/**
199	@brief Joins the tokens of this SubString using the given delimiter and returns a string.
200	@param delimiter This delimiter will be placed between each two tokens
201	@return The joined string.
202	*/
203	std::string SubString::join(const std::string& delimiter) const
204	{
205	if (!this->tokens_.empty())
206	{
207	std::string retVal = this->tokens_[0];
208	for (size_t i = 1; i < this->tokens_.size(); ++i)
209	retVal += delimiter + this->tokens_[i];
210	return retVal;
211	}
212	else
213	return "";
214	}
215
216	/**
217	@brief Creates a subset of this SubString.
218	@param begin The beginning of the subset
219	@param length The length of the subset
220	@return A new SubString containing the defined subset.
221
222	The subset ranges from the token with index @a begin and contains @a length elements.
223
224	This function is added for your convenience, and does the same as
225	SubString::SubString(const SubString& other, size_t begin, size_t length)
226	*/
227	SubString SubString::subSet(size_t begin, size_t length) const
228	{
229	return SubString(*this, begin, length);
230	}
231
232	/**
233	@copydoc SubString(const std::string&,const std::string&,const std::string&,bool,char,bool,char,bool,char,char,bool,char)
234	@param tokens The array, where the splitted strings will be stored in
235	@param bTokenInSafemode A vector wich stores for each character of the string if it is in safemode or not
236	@param start_state The internal state of the parser
237
238	This is the actual splitting algorithm from Clemens Wacha.
239	Supports delimiters, escape characters, ignores special characters between safemodeChar and between commentChar and line end "\n".
240
241	Extended by Orxonox to support parenthesis as additional safe-mode.
242	*/
243	SubString::SPLIT_LINE_STATE
244	SubString::splitLine(std::vector<std::string>& tokens,
245	std::vector<bool>& bTokenInSafemode,
246	const std::string& line,
247	const std::string& delimiters,
248	const std::string& delimiterNeighbours,
249	bool bAllowEmptyEntries,
250	char escapeChar,
251	bool bRemoveEscapeChar,
252	char safemodeChar,
253	bool bRemoveSafemodeChar,
254	char openparenthesisChar,
255	char closeparenthesisChar,
256	bool bRemoveParenthesisChars,
257	char commentChar,
258	SPLIT_LINE_STATE start_state)
259	{
260	SPLIT_LINE_STATE state = start_state;
261	size_t i = 0;
262	size_t fallBackNeighbours = 0;
263
264	std::string token;
265	bool inSafemode = false;
266
267	if(start_state != SPLIT_LINE_STATE::NORMAL && tokens.size() > 0)
268	{
269	token = tokens[tokens.size()-1];
270	tokens.pop_back();
271	}
272	if(start_state != SPLIT_LINE_STATE::NORMAL && bTokenInSafemode.size() > 0)
273	{
274	inSafemode = bTokenInSafemode[bTokenInSafemode.size()-1];
275	bTokenInSafemode.pop_back();
276	}
277
278	while(i < line.size())
279	{
280	switch(state)
281	{
282	case SPLIT_LINE_STATE::NORMAL:
283	if(line[i] == escapeChar)
284	{
285	state = SPLIT_LINE_STATE::ESCAPE;
286	if (!bRemoveEscapeChar)
287	token += line[i];
288	fallBackNeighbours = 0;
289	}
290	else if(line[i] == safemodeChar)
291	{
292	state = SPLIT_LINE_STATE::SAFEMODE;
293	inSafemode = true;
294	if (!bRemoveSafemodeChar)
295	token += line[i];
296	fallBackNeighbours = 0;
297	}
298	else if(line[i] == openparenthesisChar)
299	{
300	state = SPLIT_LINE_STATE::PARENTHESES;
301	inSafemode = true;
302	if (!bRemoveParenthesisChars)
303	token += line[i];
304	fallBackNeighbours = 0;
305	}
306	else if(line[i] == commentChar)
307	{
308	if (fallBackNeighbours > 0)
309	token = token.substr(0, token.size() - fallBackNeighbours);
310	fallBackNeighbours = 0;
311	// FINISH
312	if(bAllowEmptyEntries \|\| token.size() > 0)
313	{
314	tokens.push_back(token);
315	token.clear();
316	bTokenInSafemode.push_back(inSafemode);
317	inSafemode = false;
318	}
319	token += line[i]; // EAT
320	state = SPLIT_LINE_STATE::COMMENT;
321	}
322	else if(delimiters.find(line[i]) != std::string::npos)
323	{
324	// line[i] is a delimiter
325	if (fallBackNeighbours > 0)
326	token = token.substr(0, token.size() - fallBackNeighbours);
327	fallBackNeighbours = 0;
328	// FINISH
329	if(bAllowEmptyEntries \|\| token.size() > 0)
330	{
331	tokens.push_back(token);
332	token.clear();
333	bTokenInSafemode.push_back(inSafemode);
334	inSafemode = false;
335	}
336	state = SPLIT_LINE_STATE::NORMAL;
337	}
338	else
339	{
340	if (delimiterNeighbours.find(line[i]) != std::string::npos)
341	{
342	if (token.size() > 0)
343	++fallBackNeighbours;
344	else
345	{
346	++i;
347	continue;
348	}
349	}
350	else
351	fallBackNeighbours = 0;
352	token += line[i]; // EAT
353	}
354	break;
355	case SPLIT_LINE_STATE::ESCAPE:
356	if (!bRemoveSafemodeChar)
357	token += line[i];
358	else
359	{
360	if(line[i] == 'n') token += '\n';
361	else if(line[i] == 't') token += '\t';
362	else if(line[i] == 'v') token += '\v';
363	else if(line[i] == 'b') token += '\b';
364	else if(line[i] == 'r') token += '\r';
365	else if(line[i] == 'f') token += '\f';
366	else if(line[i] == 'a') token += '\a';
367	else if(line[i] == '?') token += '\?';
368	else token += line[i]; // EAT
369	}
370	state = SPLIT_LINE_STATE::NORMAL;
371	break;
372	case SPLIT_LINE_STATE::SAFEMODE:
373	if(line[i] == safemodeChar)
374	{
375	state = SPLIT_LINE_STATE::NORMAL;
376	if (!bRemoveSafemodeChar)
377	token += line[i];
378	}
379	else if(line[i] == escapeChar)
380	{
381	state = SPLIT_LINE_STATE::SAFEESCAPE;
382	}
383	else
384	{
385	token += line[i]; // EAT
386	}
387	break;
388
389	case SPLIT_LINE_STATE::SAFEESCAPE:
390	if(line[i] == 'n') token += '\n';
391	else if(line[i] == 't') token += '\t';
392	else if(line[i] == 'v') token += '\v';
393	else if(line[i] == 'b') token += '\b';
394	else if(line[i] == 'r') token += '\r';
395	else if(line[i] == 'f') token += '\f';
396	else if(line[i] == 'a') token += '\a';
397	else if(line[i] == '?') token += '\?';
398	else token += line[i]; // EAT
399	state = SPLIT_LINE_STATE::SAFEMODE;
400	break;
401
402	case SPLIT_LINE_STATE::PARENTHESES:
403	if(line[i] == closeparenthesisChar)
404	{
405	state = SPLIT_LINE_STATE::NORMAL;
406	if (!bRemoveParenthesisChars)
407	token += line[i];
408	}
409	else if(line[i] == escapeChar)
410	{
411	state = SPLIT_LINE_STATE::PARENTHESESESCAPE;
412	}
413	else
414	{
415	token += line[i]; // EAT
416	}
417	break;
418
419	case SPLIT_LINE_STATE::PARENTHESESESCAPE:
420	if(line[i] == 'n') token += '\n';
421	else if(line[i] == 't') token += '\t';
422	else if(line[i] == 'v') token += '\v';
423	else if(line[i] == 'b') token += '\b';
424	else if(line[i] == 'r') token += '\r';
425	else if(line[i] == 'f') token += '\f';
426	else if(line[i] == 'a') token += '\a';
427	else if(line[i] == '?') token += '\?';
428	else token += line[i]; // EAT
429	state = SPLIT_LINE_STATE::PARENTHESES;
430	break;
431
432	case SPLIT_LINE_STATE::COMMENT:
433	if(line[i] == '\n')
434	{
435	// FINISH
436	if(token.size() > 0)
437	{
438	tokens.push_back(token);
439	token.clear();
440	bTokenInSafemode.push_back(inSafemode);
441	inSafemode = false;
442	}
443	state = SPLIT_LINE_STATE::NORMAL;
444	}
445	else
446	{
447	token += line[i]; // EAT
448	}
449	break;
450
451	default:
452	// nothing
453	break;
454	}
455	++i;
456	}
457
458	// FINISH
459	if (fallBackNeighbours > 0)
460	token = token.substr(0, token.size() - fallBackNeighbours);
461	if(bAllowEmptyEntries \|\| token.size() > 0)
462	{
463	tokens.push_back(token);
464	token.clear();
465	bTokenInSafemode.push_back(inSafemode);
466	inSafemode = false;
467	}
468	return(state);
469	}
470
471	/**
472	@brief Some nice debug information about this SubString.
473	*/
474	void SubString::debug() const
475	{
476	orxout(debug_output) << "Substring-information::count=" << this->tokens_.size() << " ::";
477	for (size_t i = 0; i < this->tokens_.size(); ++i)
478	orxout(debug_output) << "s" << i << "='" << this->tokens_[i].c_str() << "'::";
479	orxout(debug_output) << endl;
480	}
481	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: