Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

tinyxmlparser.cpp @ 9

Last change on this file since 9 was 6, checked in by anonymous, 17 years ago
=…
File size: 35.3 KB

Rev	Line
[6]	1	/*
	2	www.sourceforge.net/projects/tinyxml
	3	Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
	4
	5	This software is provided 'as-is', without any express or implied
	6	warranty. In no event will the authors be held liable for any
	7	damages arising from the use of this software.
	8
	9	Permission is granted to anyone to use this software for any
	10	purpose, including commercial applications, and to alter it and
	11	redistribute it freely, subject to the following restrictions:
	12
	13	1. The origin of this software must not be misrepresented; you must
	14	not claim that you wrote the original software. If you use this
	15	software in a product, an acknowledgment in the product documentation
	16	would be appreciated but is not required.
	17
	18	2. Altered source versions must be plainly marked as such, and
	19	must not be misrepresented as being the original software.
	20
	21	3. This notice may not be removed or altered from any source
	22	distribution.
	23	*/
	24
	25	#include <ctype.h>
	26	#include <stddef.h>
	27
	28	#include "tinyxml.h"
	29
	30	//#define DEBUG_PARSER
	31	#if defined( DEBUG_PARSER )
	32	# if defined( DEBUG ) && defined( _MSC_VER )
	33	# include <windows.h>
	34	# define TIXML_LOG OutputDebugString
	35	# else
	36	# define TIXML_LOG printf
	37	# endif
	38	#endif
	39
	40	// Note tha "PutString" hardcodes the same list. This
	41	// is less flexible than it appears. Changing the entries
	42	// or order will break putstring.
	43	TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
	44	{
	45	{ "&", 5, '&' },
	46	{ "<", 4, '<' },
	47	{ ">", 4, '>' },
	48	{ """, 6, '\"' },
	49	{ "'", 6, '\'' }
	50	};
	51
	52	// Bunch of unicode info at:
	53	// http://www.unicode.org/faq/utf_bom.html
	54	// Including the basic of this table, which determines the #bytes in the
	55	// sequence from the lead byte. 1 placed for invalid sequences --
	56	// although the result will be junk, pass it through as much as possible.
	57	// Beware of the non-characters in UTF-8:
	58	// ef bb bf (Microsoft "lead bytes")
	59	// ef bf be
	60	// ef bf bf
	61
	62	const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
	63	const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
	64	const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
	65
	66	const int TiXmlBase::utf8ByteTable[256] =
	67	{
	68	// 0 1 2 3 4 5 6 7 8 9 a b c d e f
	69	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
	70	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
	71	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
	72	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
	73	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
	74	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
	75	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
	76	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
	77	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
	78	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
	79	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
	80	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
	81	1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
	82	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
	83	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
	84	4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
	85	};
	86
	87
	88	void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
	89	{
	90	const unsigned long BYTE_MASK = 0xBF;
	91	const unsigned long BYTE_MARK = 0x80;
	92	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
	93
	94	if (input < 0x80)
	95	*length = 1;
	96	else if ( input < 0x800 )
	97	*length = 2;
	98	else if ( input < 0x10000 )
	99	*length = 3;
	100	else if ( input < 0x200000 )
	101	*length = 4;
	102	else
	103	{ *length = 0; return; } // This code won't covert this correctly anyway.
	104
	105	output += *length;
	106
	107	// Scary scary fall throughs.
	108	switch (*length)
	109	{
	110	case 4:
	111	--output;
	112	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
	113	input >>= 6;
	114	case 3:
	115	--output;
	116	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
	117	input >>= 6;
	118	case 2:
	119	--output;
	120	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
	121	input >>= 6;
	122	case 1:
	123	--output;
	124	output = (char)(input \| FIRST_BYTE_MARK[length]);
	125	}
	126	}
	127
	128
	129	/static/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /encoding/ )
	130	{
	131	// This will only work for low-ascii, everything else is assumed to be a valid
	132	// letter. I'm not sure this is the best approach, but it is quite tricky trying
	133	// to figure out alhabetical vs. not across encoding. So take a very
	134	// conservative approach.
	135
	136	// if ( encoding == TIXML_ENCODING_UTF8 )
	137	// {
	138	if ( anyByte < 127 )
	139	return isalpha( anyByte );
	140	else
	141	return 1; // What else to do? The unicode set is huge...get the english ones right.
	142	// }
	143	// else
	144	// {
	145	// return isalpha( anyByte );
	146	// }
	147	}
	148
	149
	150	/static/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /encoding/ )
	151	{
	152	// This will only work for low-ascii, everything else is assumed to be a valid
	153	// letter. I'm not sure this is the best approach, but it is quite tricky trying
	154	// to figure out alhabetical vs. not across encoding. So take a very
	155	// conservative approach.
	156
	157	// if ( encoding == TIXML_ENCODING_UTF8 )
	158	// {
	159	if ( anyByte < 127 )
	160	return isalnum( anyByte );
	161	else
	162	return 1; // What else to do? The unicode set is huge...get the english ones right.
	163	// }
	164	// else
	165	// {
	166	// return isalnum( anyByte );
	167	// }
	168	}
	169
	170
	171	class TiXmlParsingData
	172	{
	173	friend class TiXmlDocument;
	174	public:
	175	void Stamp( const char* now, TiXmlEncoding encoding );
	176
	177	const TiXmlCursor& Cursor() { return cursor; }
	178
	179	private:
	180	// Only used by the document!
	181	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
	182	{
	183	assert( start );
	184	stamp = start;
	185	tabsize = _tabsize;
	186	cursor.row = row;
	187	cursor.col = col;
	188	}
	189
	190	TiXmlCursor cursor;
	191	const char* stamp;
	192	int tabsize;
	193	};
	194
	195
	196	void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
	197	{
	198	assert( now );
	199
	200	// Do nothing if the tabsize is 0.
	201	if ( tabsize < 1 )
	202	{
	203	return;
	204	}
	205
	206	// Get the current row, column.
	207	int row = cursor.row;
	208	int col = cursor.col;
	209	const char* p = stamp;
	210	assert( p );
	211
	212	while ( p < now )
	213	{
	214	// Treat p as unsigned, so we have a happy compiler.
	215	const unsigned char* pU = (const unsigned char*)p;
	216
	217	// Code contributed by Fletcher Dunn: (modified by lee)
	218	switch (*pU) {
	219	case 0:
	220	// We should never get here, but in case we do, don't
	221	// advance past the terminating null character, ever
	222	return;
	223
	224	case '\r':
	225	// bump down to the next line
	226	++row;
	227	col = 0;
	228	// Eat the character
	229	++p;
	230
	231	// Check for \r\n sequence, and treat this as a single character
	232	if (*p == '\n') {
	233	++p;
	234	}
	235	break;
	236
	237	case '\n':
	238	// bump down to the next line
	239	++row;
	240	col = 0;
	241
	242	// Eat the character
	243	++p;
	244
	245	// Check for \n\r sequence, and treat this as a single
	246	// character. (Yes, this bizarre thing does occur still
	247	// on some arcane platforms...)
	248	if (*p == '\r') {
	249	++p;
	250	}
	251	break;
	252
	253	case '\t':
	254	// Eat the character
	255	++p;
	256
	257	// Skip to next tab stop
	258	col = (col / tabsize + 1) * tabsize;
	259	break;
	260
	261	case TIXML_UTF_LEAD_0:
	262	if ( encoding == TIXML_ENCODING_UTF8 )
	263	{
	264	if ( (p+1) && (p+2) )
	265	{
	266	// In these cases, don't advance the column. These are
	267	// 0-width spaces.
	268	if ( (pU+1)==TIXML_UTF_LEAD_1 && (pU+2)==TIXML_UTF_LEAD_2 )
	269	p += 3;
	270	else if ( (pU+1)==0xbfU && (pU+2)==0xbeU )
	271	p += 3;
	272	else if ( (pU+1)==0xbfU && (pU+2)==0xbfU )
	273	p += 3;
	274	else
	275	{ p +=3; ++col; } // A normal character.
	276	}
	277	}
	278	else
	279	{
	280	++p;
	281	++col;
	282	}
	283	break;
	284
	285	default:
	286	if ( encoding == TIXML_ENCODING_UTF8 )
	287	{
	288	// Eat the 1 to 4 byte utf8 character.
	289	int step = TiXmlBase::utf8ByteTable[((const unsigned char)p)];
	290	if ( step == 0 )
	291	step = 1; // Error case from bad encoding, but handle gracefully.
	292	p += step;
	293
	294	// Just advance one column, of course.
	295	++col;
	296	}
	297	else
	298	{
	299	++p;
	300	++col;
	301	}
	302	break;
	303	}
	304	}
	305	cursor.row = row;
	306	cursor.col = col;
	307	assert( cursor.row >= -1 );
	308	assert( cursor.col >= -1 );
	309	stamp = p;
	310	assert( stamp );
	311	}
	312
	313
	314	const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
	315	{
	316	if ( !p \|\| !*p )
	317	{
	318	return 0;
	319	}
	320	if ( encoding == TIXML_ENCODING_UTF8 )
	321	{
	322	while ( *p )
	323	{
	324	const unsigned char* pU = (const unsigned char*)p;
	325
	326	// Skip the stupid Microsoft UTF-8 Byte order marks
	327	if ( *(pU+0)==TIXML_UTF_LEAD_0
	328	&& *(pU+1)==TIXML_UTF_LEAD_1
	329	&& *(pU+2)==TIXML_UTF_LEAD_2 )
	330	{
	331	p += 3;
	332	continue;
	333	}
	334	else if(*(pU+0)==TIXML_UTF_LEAD_0
	335	&& *(pU+1)==0xbfU
	336	&& *(pU+2)==0xbeU )
	337	{
	338	p += 3;
	339	continue;
	340	}
	341	else if(*(pU+0)==TIXML_UTF_LEAD_0
	342	&& *(pU+1)==0xbfU
	343	&& *(pU+2)==0xbfU )
	344	{
	345	p += 3;
	346	continue;
	347	}
	348
	349	if ( IsWhiteSpace( p ) \|\| p == '\n' \|\| *p =='\r' ) // Still using old rules for white space.
	350	++p;
	351	else
	352	break;
	353	}
	354	}
	355	else
	356	{
	357	while ( p && IsWhiteSpace( p ) \|\| p == '\n' \|\| p =='\r' )
	358	++p;
	359	}
	360
	361	return p;
	362	}
	363
	364	#ifdef TIXML_USE_STL
	365	/static/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
	366	{
	367	for( ;; )
	368	{
	369	if ( !in->good() ) return false;
	370
	371	int c = in->peek();
	372	// At this scope, we can't get to a document. So fail silently.
	373	if ( !IsWhiteSpace( c ) \|\| c <= 0 )
	374	return true;
	375
	376	*tag += (char) in->get();
	377	}
	378	}
	379
	380	/static/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
	381	{
	382	//assert( character > 0 && character < 128 ); // else it won't work in utf-8
	383	while ( in->good() )
	384	{
	385	int c = in->peek();
	386	if ( c == character )
	387	return true;
	388	if ( c <= 0 ) // Silent failure: can't get document at this scope
	389	return false;
	390
	391	in->get();
	392	*tag += (char) c;
	393	}
	394	return false;
	395	}
	396	#endif
	397
	398	// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
	399	// "assign" optimization removes over 10% of the execution time.
	400	//
	401	const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
	402	{
	403	// Oddly, not supported on some comilers,
	404	//name->clear();
	405	// So use this:
	406	*name = "";
	407	assert( p );
	408
	409	// Names start with letters or underscores.
	410	// Of course, in unicode, tinyxml has no idea what a letter is. The
	411	// algorithm is generous.
	412	//
	413	// After that, they can be letters, underscores, numbers,
	414	// hyphens, or colons. (Colons are valid ony for namespaces,
	415	// but tinyxml can't tell namespaces from names.)
	416	if ( p && *p
	417	&& ( IsAlpha( (unsigned char) p, encoding ) \|\| p == '_' ) )
	418	{
	419	const char* start = p;
	420	while( p && *p
	421	&& ( IsAlphaNum( (unsigned char ) *p, encoding )
	422	\|\| *p == '_'
	423	\|\| *p == '-'
	424	\|\| *p == '.'
	425	\|\| *p == ':' ) )
	426	{
	427	//(name) += p; // expensive
	428	++p;
	429	}
	430	if ( p-start > 0 ) {
	431	name->assign( start, p-start );
	432	}
	433	return p;
	434	}
	435	return 0;
	436	}
	437
	438	const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
	439	{
	440	// Presume an entity, and pull it out.
	441	TIXML_STRING ent;
	442	int i;
	443	*length = 0;
	444
	445	if ( (p+1) && (p+1) == '#' && *(p+2) )
	446	{
	447	unsigned long ucs = 0;
	448	ptrdiff_t delta = 0;
	449	unsigned mult = 1;
	450
	451	if ( *(p+2) == 'x' )
	452	{
	453	// Hexadecimal.
	454	if ( !*(p+3) ) return 0;
	455
	456	const char* q = p+3;
	457	q = strchr( q, ';' );
	458
	459	if ( !q \|\| !*q ) return 0;
	460
	461	delta = q-p;
	462	--q;
	463
	464	while ( *q != 'x' )
	465	{
	466	if ( q >= '0' && q <= '9' )
	467	ucs += mult * (*q - '0');
	468	else if ( q >= 'a' && q <= 'f' )
	469	ucs += mult * (*q - 'a' + 10);
	470	else if ( q >= 'A' && q <= 'F' )
	471	ucs += mult * (*q - 'A' + 10 );
	472	else
	473	return 0;
	474	mult *= 16;
	475	--q;
	476	}
	477	}
	478	else
	479	{
	480	// Decimal.
	481	if ( !*(p+2) ) return 0;
	482
	483	const char* q = p+2;
	484	q = strchr( q, ';' );
	485
	486	if ( !q \|\| !*q ) return 0;
	487
	488	delta = q-p;
	489	--q;
	490
	491	while ( *q != '#' )
	492	{
	493	if ( q >= '0' && q <= '9' )
	494	ucs += mult * (*q - '0');
	495	else
	496	return 0;
	497	mult *= 10;
	498	--q;
	499	}
	500	}
	501	if ( encoding == TIXML_ENCODING_UTF8 )
	502	{
	503	// convert the UCS to UTF-8
	504	ConvertUTF32ToUTF8( ucs, value, length );
	505	}
	506	else
	507	{
	508	*value = (char)ucs;
	509	*length = 1;
	510	}
	511	return p + delta + 1;
	512	}
	513
	514	// Now try to match it.
	515	for( i=0; i<NUM_ENTITY; ++i )
	516	{
	517	if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
	518	{
	519	assert( strlen( entity[i].str ) == entity[i].strLength );
	520	*value = entity[i].chr;
	521	*length = 1;
	522	return ( p + entity[i].strLength );
	523	}
	524	}
	525
	526	// So it wasn't an entity, its unrecognized, or something like that.
	527	value = p; // Don't put back the last one, since we return it!
	528	//*length = 1; // Leave unrecognized entities - this doesn't really work.
	529	// Just writes strange XML.
	530	return p+1;
	531	}
	532
	533
	534	bool TiXmlBase::StringEqual( const char* p,
	535	const char* tag,
	536	bool ignoreCase,
	537	TiXmlEncoding encoding )
	538	{
	539	assert( p );
	540	assert( tag );
	541	if ( !p \|\| !*p )
	542	{
	543	assert( 0 );
	544	return false;
	545	}
	546
	547	const char* q = p;
	548
	549	if ( ignoreCase )
	550	{
	551	while ( q && tag && ToLower( q, encoding ) == ToLower( tag, encoding ) )
	552	{
	553	++q;
	554	++tag;
	555	}
	556
	557	if ( *tag == 0 )
	558	return true;
	559	}
	560	else
	561	{
	562	while ( q && tag && q == tag )
	563	{
	564	++q;
	565	++tag;
	566	}
	567
	568	if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
	569	return true;
	570	}
	571	return false;
	572	}
	573
	574	const char* TiXmlBase::ReadText( const char* p,
	575	TIXML_STRING * text,
	576	bool trimWhiteSpace,
	577	const char* endTag,
	578	bool caseInsensitive,
	579	TiXmlEncoding encoding )
	580	{
	581	*text = "";
	582	if ( !trimWhiteSpace // certain tags always keep whitespace
	583	\|\| !condenseWhiteSpace ) // if true, whitespace is always kept
	584	{
	585	// Keep all the white space.
	586	while ( p && *p
	587	&& !StringEqual( p, endTag, caseInsensitive, encoding )
	588	)
	589	{
	590	int len;
	591	char cArr[4] = { 0, 0, 0, 0 };
	592	p = GetChar( p, cArr, &len, encoding );
	593	text->append( cArr, len );
	594	}
	595	}
	596	else
	597	{
	598	bool whitespace = false;
	599
	600	// Remove leading white space:
	601	p = SkipWhiteSpace( p, encoding );
	602	while ( p && *p
	603	&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
	604	{
	605	if ( p == '\r' \|\| p == '\n' )
	606	{
	607	whitespace = true;
	608	++p;
	609	}
	610	else if ( IsWhiteSpace( *p ) )
	611	{
	612	whitespace = true;
	613	++p;
	614	}
	615	else
	616	{
	617	// If we've found whitespace, add it before the
	618	// new character. Any whitespace just becomes a space.
	619	if ( whitespace )
	620	{
	621	(*text) += ' ';
	622	whitespace = false;
	623	}
	624	int len;
	625	char cArr[4] = { 0, 0, 0, 0 };
	626	p = GetChar( p, cArr, &len, encoding );
	627	if ( len == 1 )
	628	(*text) += cArr[0]; // more efficient
	629	else
	630	text->append( cArr, len );
	631	}
	632	}
	633	}
	634	if ( p )
	635	p += strlen( endTag );
	636	return p;
	637	}
	638
	639	#ifdef TIXML_USE_STL
	640
	641	void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
	642	{
	643	// The basic issue with a document is that we don't know what we're
	644	// streaming. Read something presumed to be a tag (and hope), then
	645	// identify it, and call the appropriate stream method on the tag.
	646	//
	647	// This "pre-streaming" will never read the closing ">" so the
	648	// sub-tag can orient itself.
	649
	650	if ( !StreamTo( in, '<', tag ) )
	651	{
	652	SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	653	return;
	654	}
	655
	656	while ( in->good() )
	657	{
	658	int tagIndex = (int) tag->length();
	659	while ( in->good() && in->peek() != '>' )
	660	{
	661	int c = in->get();
	662	if ( c <= 0 )
	663	{
	664	SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	665	break;
	666	}
	667	(*tag) += (char) c;
	668	}
	669
	670	if ( in->good() )
	671	{
	672	// We now have something we presume to be a node of
	673	// some sort. Identify it, and call the node to
	674	// continue streaming.
	675	TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
	676
	677	if ( node )
	678	{
	679	node->StreamIn( in, tag );
	680	bool isElement = node->ToElement() != 0;
	681	delete node;
	682	node = 0;
	683
	684	// If this is the root element, we're done. Parsing will be
	685	// done by the >> operator.
	686	if ( isElement )
	687	{
	688	return;
	689	}
	690	}
	691	else
	692	{
	693	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
	694	return;
	695	}
	696	}
	697	}
	698	// We should have returned sooner.
	699	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
	700	}
	701
	702	#endif
	703
	704	const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
	705	{
	706	ClearError();
	707
	708	// Parse away, at the document level. Since a document
	709	// contains nothing but other tags, most of what happens
	710	// here is skipping white space.
	711	if ( !p \|\| !*p )
	712	{
	713	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	714	return 0;
	715	}
	716
	717	// Note that, for a document, this needs to come
	718	// before the while space skip, so that parsing
	719	// starts from the pointer we are given.
	720	location.Clear();
	721	if ( prevData )
	722	{
	723	location.row = prevData->cursor.row;
	724	location.col = prevData->cursor.col;
	725	}
	726	else
	727	{
	728	location.row = 0;
	729	location.col = 0;
	730	}
	731	TiXmlParsingData data( p, TabSize(), location.row, location.col );
	732	location = data.Cursor();
	733
	734	if ( encoding == TIXML_ENCODING_UNKNOWN )
	735	{
	736	// Check for the Microsoft UTF-8 lead bytes.
	737	const unsigned char* pU = (const unsigned char*)p;
	738	if ( (pU+0) && (pU+0) == TIXML_UTF_LEAD_0
	739	&& (pU+1) && (pU+1) == TIXML_UTF_LEAD_1
	740	&& (pU+2) && (pU+2) == TIXML_UTF_LEAD_2 )
	741	{
	742	encoding = TIXML_ENCODING_UTF8;
	743	useMicrosoftBOM = true;
	744	}
	745	}
	746
	747	p = SkipWhiteSpace( p, encoding );
	748	if ( !p )
	749	{
	750	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	751	return 0;
	752	}
	753
	754	while ( p && *p )
	755	{
	756	TiXmlNode* node = Identify( p, encoding );
	757	if ( node )
	758	{
	759	p = node->Parse( p, &data, encoding );
	760	LinkEndChild( node );
	761	}
	762	else
	763	{
	764	break;
	765	}
	766
	767	// Did we get encoding info?
	768	if ( encoding == TIXML_ENCODING_UNKNOWN
	769	&& node->ToDeclaration() )
	770	{
	771	TiXmlDeclaration* dec = node->ToDeclaration();
	772	const char* enc = dec->Encoding();
	773	assert( enc );
	774
	775	if ( *enc == 0 )
	776	encoding = TIXML_ENCODING_UTF8;
	777	else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
	778	encoding = TIXML_ENCODING_UTF8;
	779	else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
	780	encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
	781	else
	782	encoding = TIXML_ENCODING_LEGACY;
	783	}
	784
	785	p = SkipWhiteSpace( p, encoding );
	786	}
	787
	788	// Was this empty?
	789	if ( !firstChild ) {
	790	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
	791	return 0;
	792	}
	793
	794	// All is well.
	795	return p;
	796	}
	797
	798	void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
	799	{
	800	// The first error in a chain is more accurate - don't set again!
	801	if ( error )
	802	return;
	803
	804	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
	805	error = true;
	806	errorId = err;
	807	errorDesc = errorString[ errorId ];
	808
	809	errorLocation.Clear();
	810	if ( pError && data )
	811	{
	812	data->Stamp( pError, encoding );
	813	errorLocation = data->Cursor();
	814	}
	815	}
	816
	817
	818	TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
	819	{
	820	TiXmlNode* returnNode = 0;
	821
	822	p = SkipWhiteSpace( p, encoding );
	823	if( !p \|\| !p \|\| p != '<' )
	824	{
	825	return 0;
	826	}
	827
	828	TiXmlDocument* doc = GetDocument();
	829	p = SkipWhiteSpace( p, encoding );
	830
	831	if ( !p \|\| !*p )
	832	{
	833	return 0;
	834	}
	835
	836	// What is this thing?
	837	// - Elements start with a letter or underscore, but xml is reserved.
	838	// - Comments: <!--
	839	// - Decleration: <?xml
	840	// - Everthing else is unknown to tinyxml.
	841	//
	842
	843	const char* xmlHeader = { "<?xml" };
	844	const char* commentHeader = { "<!--" };
	845	const char* dtdHeader = { "<!" };
	846	const char* cdataHeader = { "<![CDATA[" };
	847
	848	if ( StringEqual( p, xmlHeader, true, encoding ) )
	849	{
	850	#ifdef DEBUG_PARSER
	851	TIXML_LOG( "XML parsing Declaration\n" );
	852	#endif
	853	returnNode = new TiXmlDeclaration();
	854	}
	855	else if ( StringEqual( p, commentHeader, false, encoding ) )
	856	{
	857	#ifdef DEBUG_PARSER
	858	TIXML_LOG( "XML parsing Comment\n" );
	859	#endif
	860	returnNode = new TiXmlComment();
	861	}
	862	else if ( StringEqual( p, cdataHeader, false, encoding ) )
	863	{
	864	#ifdef DEBUG_PARSER
	865	TIXML_LOG( "XML parsing CDATA\n" );
	866	#endif
	867	TiXmlText* text = new TiXmlText( "" );
	868	text->SetCDATA( true );
	869	returnNode = text;
	870	}
	871	else if ( StringEqual( p, dtdHeader, false, encoding ) )
	872	{
	873	#ifdef DEBUG_PARSER
	874	TIXML_LOG( "XML parsing Unknown(1)\n" );
	875	#endif
	876	returnNode = new TiXmlUnknown();
	877	}
	878	else if ( IsAlpha( *(p+1), encoding )
	879	\|\| *(p+1) == '_' )
	880	{
	881	#ifdef DEBUG_PARSER
	882	TIXML_LOG( "XML parsing Element\n" );
	883	#endif
	884	returnNode = new TiXmlElement( "" );
	885	}
	886	else
	887	{
	888	#ifdef DEBUG_PARSER
	889	TIXML_LOG( "XML parsing Unknown(2)\n" );
	890	#endif
	891	returnNode = new TiXmlUnknown();
	892	}
	893
	894	if ( returnNode )
	895	{
	896	// Set the parent, so it can report errors
	897	returnNode->parent = this;
	898	}
	899	else
	900	{
	901	if ( doc )
	902	doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
	903	}
	904	return returnNode;
	905	}
	906
	907	#ifdef TIXML_USE_STL
	908
	909	void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
	910	{
	911	// We're called with some amount of pre-parsing. That is, some of "this"
	912	// element is in "tag". Go ahead and stream to the closing ">"
	913	while( in->good() )
	914	{
	915	int c = in->get();
	916	if ( c <= 0 )
	917	{
	918	TiXmlDocument* document = GetDocument();
	919	if ( document )
	920	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	921	return;
	922	}
	923	(*tag) += (char) c ;
	924
	925	if ( c == '>' )
	926	break;
	927	}
	928
	929	if ( tag->length() < 3 ) return;
	930
	931	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
	932	// If not, identify and stream.
	933
	934	if ( tag->at( tag->length() - 1 ) == '>'
	935	&& tag->at( tag->length() - 2 ) == '/' )
	936	{
	937	// All good!
	938	return;
	939	}
	940	else if ( tag->at( tag->length() - 1 ) == '>' )
	941	{
	942	// There is more. Could be:
	943	// text
	944	// cdata text (which looks like another node)
	945	// closing tag
	946	// another node.
	947	for ( ;; )
	948	{
	949	StreamWhiteSpace( in, tag );
	950
	951	// Do we have text?
	952	if ( in->good() && in->peek() != '<' )
	953	{
	954	// Yep, text.
	955	TiXmlText text( "" );
	956	text.StreamIn( in, tag );
	957
	958	// What follows text is a closing tag or another node.
	959	// Go around again and figure it out.
	960	continue;
	961	}
	962
	963	// We now have either a closing tag...or another node.
	964	// We should be at a "<", regardless.
	965	if ( !in->good() ) return;
	966	assert( in->peek() == '<' );
	967	int tagIndex = (int) tag->length();
	968
	969	bool closingTag = false;
	970	bool firstCharFound = false;
	971
	972	for( ;; )
	973	{
	974	if ( !in->good() )
	975	return;
	976
	977	int c = in->peek();
	978	if ( c <= 0 )
	979	{
	980	TiXmlDocument* document = GetDocument();
	981	if ( document )
	982	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	983	return;
	984	}
	985
	986	if ( c == '>' )
	987	break;
	988
	989	*tag += (char) c;
	990	in->get();
	991
	992	// Early out if we find the CDATA id.
	993	if ( c == '[' && tag->size() >= 9 )
	994	{
	995	size_t len = tag->size();
	996	const char* start = tag->c_str() + len - 9;
	997	if ( strcmp( start, "<![CDATA[" ) == 0 ) {
	998	assert( !closingTag );
	999	break;
	1000	}
	1001	}
	1002
	1003	if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
	1004	{
	1005	firstCharFound = true;
	1006	if ( c == '/' )
	1007	closingTag = true;
	1008	}
	1009	}
	1010	// If it was a closing tag, then read in the closing '>' to clean up the input stream.
	1011	// If it was not, the streaming will be done by the tag.
	1012	if ( closingTag )
	1013	{
	1014	if ( !in->good() )
	1015	return;
	1016
	1017	int c = in->get();
	1018	if ( c <= 0 )
	1019	{
	1020	TiXmlDocument* document = GetDocument();
	1021	if ( document )
	1022	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1023	return;
	1024	}
	1025	assert( c == '>' );
	1026	*tag += (char) c;
	1027
	1028	// We are done, once we've found our closing tag.
	1029	return;
	1030	}
	1031	else
	1032	{
	1033	// If not a closing tag, id it, and stream.
	1034	const char* tagloc = tag->c_str() + tagIndex;
	1035	TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
	1036	if ( !node )
	1037	return;
	1038	node->StreamIn( in, tag );
	1039	delete node;
	1040	node = 0;
	1041
	1042	// No return: go around from the beginning: text, closing tag, or node.
	1043	}
	1044	}
	1045	}
	1046	}
	1047	#endif
	1048
	1049	const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1050	{
	1051	p = SkipWhiteSpace( p, encoding );
	1052	TiXmlDocument* document = GetDocument();
	1053
	1054	if ( !p \|\| !*p )
	1055	{
	1056	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
	1057	return 0;
	1058	}
	1059
	1060	if ( data )
	1061	{
	1062	data->Stamp( p, encoding );
	1063	location = data->Cursor();
	1064	}
	1065
	1066	if ( *p != '<' )
	1067	{
	1068	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
	1069	return 0;
	1070	}
	1071
	1072	p = SkipWhiteSpace( p+1, encoding );
	1073
	1074	// Read the name.
	1075	const char* pErr = p;
	1076
	1077	p = ReadName( p, &value, encoding );
	1078	if ( !p \|\| !*p )
	1079	{
	1080	if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
	1081	return 0;
	1082	}
	1083
	1084	TIXML_STRING endTag ("</");
	1085	endTag += value;
	1086	endTag += ">";
	1087
	1088	// Check for and read attributes. Also look for an empty
	1089	// tag or an end tag.
	1090	while ( p && *p )
	1091	{
	1092	pErr = p;
	1093	p = SkipWhiteSpace( p, encoding );
	1094	if ( !p \|\| !*p )
	1095	{
	1096	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
	1097	return 0;
	1098	}
	1099	if ( *p == '/' )
	1100	{
	1101	++p;
	1102	// Empty tag.
	1103	if ( *p != '>' )
	1104	{
	1105	if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
	1106	return 0;
	1107	}
	1108	return (p+1);
	1109	}
	1110	else if ( *p == '>' )
	1111	{
	1112	// Done with attributes (if there were any.)
	1113	// Read the value -- which can include other
	1114	// elements -- read the end tag, and return.
	1115	++p;
	1116	p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
	1117	if ( !p \|\| !*p )
	1118	return 0;
	1119
	1120	// We should find the end tag now
	1121	if ( StringEqual( p, endTag.c_str(), false, encoding ) )
	1122	{
	1123	p += endTag.length();
	1124	return p;
	1125	}
	1126	else
	1127	{
	1128	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
	1129	return 0;
	1130	}
	1131	}
	1132	else
	1133	{
	1134	// Try to read an attribute:
	1135	TiXmlAttribute* attrib = new TiXmlAttribute();
	1136	if ( !attrib )
	1137	{
	1138	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
	1139	return 0;
	1140	}
	1141
	1142	attrib->SetDocument( document );
	1143	pErr = p;
	1144	p = attrib->Parse( p, data, encoding );
	1145
	1146	if ( !p \|\| !*p )
	1147	{
	1148	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
	1149	delete attrib;
	1150	return 0;
	1151	}
	1152
	1153	// Handle the strange case of double attributes:
	1154	#ifdef TIXML_USE_STL
	1155	TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
	1156	#else
	1157	TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
	1158	#endif
	1159	if ( node )
	1160	{
	1161	node->SetValue( attrib->Value() );
	1162	delete attrib;
	1163	return 0;
	1164	}
	1165
	1166	attributeSet.Add( attrib );
	1167	}
	1168	}
	1169	return p;
	1170	}
	1171
	1172
	1173	const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1174	{
	1175	TiXmlDocument* document = GetDocument();
	1176
	1177	// Read in text and elements in any order.
	1178	const char* pWithWhiteSpace = p;
	1179	p = SkipWhiteSpace( p, encoding );
	1180
	1181	while ( p && *p )
	1182	{
	1183	if ( *p != '<' )
	1184	{
	1185	// Take what we have, make a text element.
	1186	TiXmlText* textNode = new TiXmlText( "" );
	1187
	1188	if ( !textNode )
	1189	{
	1190	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
	1191	return 0;
	1192	}
	1193
	1194	if ( TiXmlBase::IsWhiteSpaceCondensed() )
	1195	{
	1196	p = textNode->Parse( p, data, encoding );
	1197	}
	1198	else
	1199	{
	1200	// Special case: we want to keep the white space
	1201	// so that leading spaces aren't removed.
	1202	p = textNode->Parse( pWithWhiteSpace, data, encoding );
	1203	}
	1204
	1205	if ( !textNode->Blank() )
	1206	LinkEndChild( textNode );
	1207	else
	1208	delete textNode;
	1209	}
	1210	else
	1211	{
	1212	// We hit a '<'
	1213	// Have we hit a new element or an end tag? This could also be
	1214	// a TiXmlText in the "CDATA" style.
	1215	if ( StringEqual( p, "</", false, encoding ) )
	1216	{
	1217	return p;
	1218	}
	1219	else
	1220	{
	1221	TiXmlNode* node = Identify( p, encoding );
	1222	if ( node )
	1223	{
	1224	p = node->Parse( p, data, encoding );
	1225	LinkEndChild( node );
	1226	}
	1227	else
	1228	{
	1229	return 0;
	1230	}
	1231	}
	1232	}
	1233	pWithWhiteSpace = p;
	1234	p = SkipWhiteSpace( p, encoding );
	1235	}
	1236
	1237	if ( !p )
	1238	{
	1239	if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
	1240	}
	1241	return p;
	1242	}
	1243
	1244
	1245	#ifdef TIXML_USE_STL
	1246	void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
	1247	{
	1248	while ( in->good() )
	1249	{
	1250	int c = in->get();
	1251	if ( c <= 0 )
	1252	{
	1253	TiXmlDocument* document = GetDocument();
	1254	if ( document )
	1255	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1256	return;
	1257	}
	1258	(*tag) += (char) c;
	1259
	1260	if ( c == '>' )
	1261	{
	1262	// All is well.
	1263	return;
	1264	}
	1265	}
	1266	}
	1267	#endif
	1268
	1269
	1270	const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1271	{
	1272	TiXmlDocument* document = GetDocument();
	1273	p = SkipWhiteSpace( p, encoding );
	1274
	1275	if ( data )
	1276	{
	1277	data->Stamp( p, encoding );
	1278	location = data->Cursor();
	1279	}
	1280	if ( !p \|\| !p \|\| p != '<' )
	1281	{
	1282	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
	1283	return 0;
	1284	}
	1285	++p;
	1286	value = "";
	1287
	1288	while ( p && p && p != '>' )
	1289	{
	1290	value += *p;
	1291	++p;
	1292	}
	1293
	1294	if ( !p )
	1295	{
	1296	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
	1297	}
	1298	if ( *p == '>' )
	1299	return p+1;
	1300	return p;
	1301	}
	1302
	1303	#ifdef TIXML_USE_STL
	1304	void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
	1305	{
	1306	while ( in->good() )
	1307	{
	1308	int c = in->get();
	1309	if ( c <= 0 )
	1310	{
	1311	TiXmlDocument* document = GetDocument();
	1312	if ( document )
	1313	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1314	return;
	1315	}
	1316
	1317	(*tag) += (char) c;
	1318
	1319	if ( c == '>'
	1320	&& tag->at( tag->length() - 2 ) == '-'
	1321	&& tag->at( tag->length() - 3 ) == '-' )
	1322	{
	1323	// All is well.
	1324	return;
	1325	}
	1326	}
	1327	}
	1328	#endif
	1329
	1330
	1331	const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1332	{
	1333	TiXmlDocument* document = GetDocument();
	1334	value = "";
	1335
	1336	p = SkipWhiteSpace( p, encoding );
	1337
	1338	if ( data )
	1339	{
	1340	data->Stamp( p, encoding );
	1341	location = data->Cursor();
	1342	}
	1343	const char* startTag = "<!--";
	1344	const char* endTag = "-->";
	1345
	1346	if ( !StringEqual( p, startTag, false, encoding ) )
	1347	{
	1348	document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
	1349	return 0;
	1350	}
	1351	p += strlen( startTag );
	1352	p = ReadText( p, &value, false, endTag, false, encoding );
	1353	return p;
	1354	}
	1355
	1356
	1357	const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1358	{
	1359	p = SkipWhiteSpace( p, encoding );
	1360	if ( !p \|\| !*p ) return 0;
	1361
	1362	// int tabsize = 4;
	1363	// if ( document )
	1364	// tabsize = document->TabSize();
	1365
	1366	if ( data )
	1367	{
	1368	data->Stamp( p, encoding );
	1369	location = data->Cursor();
	1370	}
	1371	// Read the name, the '=' and the value.
	1372	const char* pErr = p;
	1373	p = ReadName( p, &name, encoding );
	1374	if ( !p \|\| !*p )
	1375	{
	1376	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
	1377	return 0;
	1378	}
	1379	p = SkipWhiteSpace( p, encoding );
	1380	if ( !p \|\| !p \|\| p != '=' )
	1381	{
	1382	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1383	return 0;
	1384	}
	1385
	1386	++p; // skip '='
	1387	p = SkipWhiteSpace( p, encoding );
	1388	if ( !p \|\| !*p )
	1389	{
	1390	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1391	return 0;
	1392	}
	1393
	1394	const char* end;
	1395	const char SINGLE_QUOTE = '\'';
	1396	const char DOUBLE_QUOTE = '\"';
	1397
	1398	if ( *p == SINGLE_QUOTE )
	1399	{
	1400	++p;
	1401	end = "\'"; // single quote in string
	1402	p = ReadText( p, &value, false, end, false, encoding );
	1403	}
	1404	else if ( *p == DOUBLE_QUOTE )
	1405	{
	1406	++p;
	1407	end = "\""; // double quote in string
	1408	p = ReadText( p, &value, false, end, false, encoding );
	1409	}
	1410	else
	1411	{
	1412	// All attribute values should be in single or double quotes.
	1413	// But this is such a common error that the parser will try
	1414	// its best, even without them.
	1415	value = "";
	1416	while ( p && *p // existence
	1417	&& !IsWhiteSpace( p ) && p != '\n' && *p != '\r' // whitespace
	1418	&& p != '/' && p != '>' ) // tag end
	1419	{
	1420	if ( p == SINGLE_QUOTE \|\| p == DOUBLE_QUOTE ) {
	1421	// [ 1451649 ] Attribute values with trailing quotes not handled correctly
	1422	// We did not have an opening quote but seem to have a
	1423	// closing one. Give up and throw an error.
	1424	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1425	return 0;
	1426	}
	1427	value += *p;
	1428	++p;
	1429	}
	1430	}
	1431	return p;
	1432	}
	1433
	1434	#ifdef TIXML_USE_STL
	1435	void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
	1436	{
	1437	while ( in->good() )
	1438	{
	1439	int c = in->peek();
	1440	if ( !cdata && (c == '<' ) )
	1441	{
	1442	return;
	1443	}
	1444	if ( c <= 0 )
	1445	{
	1446	TiXmlDocument* document = GetDocument();
	1447	if ( document )
	1448	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1449	return;
	1450	}
	1451
	1452	(*tag) += (char) c;
	1453	in->get(); // "commits" the peek made above
	1454
	1455	if ( cdata && c == '>' && tag->size() >= 3 ) {
	1456	size_t len = tag->size();
	1457	if ( (tag)[len-2] == ']' && (tag)[len-3] == ']' ) {
	1458	// terminator of cdata.
	1459	return;
	1460	}
	1461	}
	1462	}
	1463	}
	1464	#endif
	1465
	1466	const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1467	{
	1468	value = "";
	1469	TiXmlDocument* document = GetDocument();
	1470
	1471	if ( data )
	1472	{
	1473	data->Stamp( p, encoding );
	1474	location = data->Cursor();
	1475	}
	1476
	1477	const char* const startTag = "<![CDATA[";
	1478	const char* const endTag = "]]>";
	1479
	1480	if ( cdata \|\| StringEqual( p, startTag, false, encoding ) )
	1481	{
	1482	cdata = true;
	1483
	1484	if ( !StringEqual( p, startTag, false, encoding ) )
	1485	{
	1486	document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
	1487	return 0;
	1488	}
	1489	p += strlen( startTag );
	1490
	1491	// Keep all the white space, ignore the encoding, etc.
	1492	while ( p && *p
	1493	&& !StringEqual( p, endTag, false, encoding )
	1494	)
	1495	{
	1496	value += *p;
	1497	++p;
	1498	}
	1499
	1500	TIXML_STRING dummy;
	1501	p = ReadText( p, &dummy, false, endTag, false, encoding );
	1502	return p;
	1503	}
	1504	else
	1505	{
	1506	bool ignoreWhite = true;
	1507
	1508	const char* end = "<";
	1509	p = ReadText( p, &value, ignoreWhite, end, false, encoding );
	1510	if ( p )
	1511	return p-1; // don't truncate the '<'
	1512	return 0;
	1513	}
	1514	}
	1515
	1516	#ifdef TIXML_USE_STL
	1517	void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
	1518	{
	1519	while ( in->good() )
	1520	{
	1521	int c = in->get();
	1522	if ( c <= 0 )
	1523	{
	1524	TiXmlDocument* document = GetDocument();
	1525	if ( document )
	1526	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1527	return;
	1528	}
	1529	(*tag) += (char) c;
	1530
	1531	if ( c == '>' )
	1532	{
	1533	// All is well.
	1534	return;
	1535	}
	1536	}
	1537	}
	1538	#endif
	1539
	1540	const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
	1541	{
	1542	p = SkipWhiteSpace( p, _encoding );
	1543	// Find the beginning, find the end, and look for
	1544	// the stuff in-between.
	1545	TiXmlDocument* document = GetDocument();
	1546	if ( !p \|\| !*p \|\| !StringEqual( p, "<?xml", true, _encoding ) )
	1547	{
	1548	if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
	1549	return 0;
	1550	}
	1551	if ( data )
	1552	{
	1553	data->Stamp( p, _encoding );
	1554	location = data->Cursor();
	1555	}
	1556	p += 5;
	1557
	1558	version = "";
	1559	encoding = "";
	1560	standalone = "";
	1561
	1562	while ( p && *p )
	1563	{
	1564	if ( *p == '>' )
	1565	{
	1566	++p;
	1567	return p;
	1568	}
	1569
	1570	p = SkipWhiteSpace( p, _encoding );
	1571	if ( StringEqual( p, "version", true, _encoding ) )
	1572	{
	1573	TiXmlAttribute attrib;
	1574	p = attrib.Parse( p, data, _encoding );
	1575	version = attrib.Value();
	1576	}
	1577	else if ( StringEqual( p, "encoding", true, _encoding ) )
	1578	{
	1579	TiXmlAttribute attrib;
	1580	p = attrib.Parse( p, data, _encoding );
	1581	encoding = attrib.Value();
	1582	}
	1583	else if ( StringEqual( p, "standalone", true, _encoding ) )
	1584	{
	1585	TiXmlAttribute attrib;
	1586	p = attrib.Parse( p, data, _encoding );
	1587	standalone = attrib.Value();
	1588	}
	1589	else
	1590	{
	1591	// Read over whatever it is.
	1592	while( p && p && p != '>' && !IsWhiteSpace( *p ) )
	1593	++p;
	1594	}
	1595	}
	1596	return 0;
	1597	}
	1598
	1599	bool TiXmlText::Blank() const
	1600	{
	1601	for ( unsigned i=0; i<value.length(); i++ )
	1602	if ( !IsWhiteSpace( value[i] ) )
	1603	return false;
	1604	return true;
	1605	}
	1606

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: downloads/Tools/XMLConverter/src/tinyxmlparser.cpp @ 9

Download in other formats: