Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/trunk/src/tinyxml/tinyxmlparser.cpp @ 3008

Last change on this file since 3008 was 2710, checked in by rgrieder, 16 years ago
Merged buildsystem3 containing buildsystem2 containing Adi's buildsystem branch back to the trunk. Please update the media directory if you were not using buildsystem3 before.
Property svn:eol-style set to `native`
File size: 38.2 KB

Rev	Line
[471]	1	/*
	2	www.sourceforge.net/projects/tinyxml
	3	Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
	4
[738]	5	This software is provided 'as-is', without any express or implied
	6	warranty. In no event will the authors be held liable for any
[471]	7	damages arising from the use of this software.
	8
[738]	9	Permission is granted to anyone to use this software for any
	10	purpose, including commercial applications, and to alter it and
[471]	11	redistribute it freely, subject to the following restrictions:
	12
[738]	13	1. The origin of this software must not be misrepresented; you must
[471]	14	not claim that you wrote the original software. If you use this
	15	software in a product, an acknowledgment in the product documentation
	16	would be appreciated but is not required.
	17
[738]	18	2. Altered source versions must be plainly marked as such, and
[471]	19	must not be misrepresented as being the original software.
	20
[738]	21	3. This notice may not be removed or altered from any source
[471]	22	distribution.
	23	*/
	24
	25	#include <ctype.h>
	26	#include <stddef.h>
	27
[738]	28	#include "tinyxml.h"
	29
[471]	30	//#define DEBUG_PARSER
[738]	31	#if defined( DEBUG_PARSER )
	32	# if defined( DEBUG ) && defined( _MSC_VER )
	33	# include <windows.h>
	34	# define TIXML_LOG OutputDebugString
	35	# else
	36	# define TIXML_LOG printf
	37	# endif
	38	#endif
[471]	39
	40	// Note tha "PutString" hardcodes the same list. This
	41	// is less flexible than it appears. Changing the entries
[738]	42	// or order will break putstring.
	43	TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
[471]	44	{
	45	{ "&", 5, '&' },
	46	{ "<", 4, '<' },
	47	{ ">", 4, '>' },
	48	{ """, 6, '\"' },
	49	{ "'", 6, '\'' }
	50	};
	51
	52	// Bunch of unicode info at:
	53	// http://www.unicode.org/faq/utf_bom.html
	54	// Including the basic of this table, which determines the #bytes in the
	55	// sequence from the lead byte. 1 placed for invalid sequences --
	56	// although the result will be junk, pass it through as much as possible.
[738]	57	// Beware of the non-characters in UTF-8:
[471]	58	// ef bb bf (Microsoft "lead bytes")
	59	// ef bf be
[738]	60	// ef bf bf
[471]	61
	62	const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
	63	const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
	64	const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
	65
[738]	66	const int TiXmlBase::utf8ByteTable[256] =
[471]	67	{
	68	// 0 1 2 3 4 5 6 7 8 9 a b c d e f
	69	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
	70	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
	71	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
	72	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
	73	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
	74	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
	75	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
	76	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
	77	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
[738]	78	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
	79	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
	80	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
[471]	81	1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
	82	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
	83	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
	84	4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
	85	};
	86
	87
	88	void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
	89	{
	90	const unsigned long BYTE_MASK = 0xBF;
	91	const unsigned long BYTE_MARK = 0x80;
	92	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
	93
[738]	94	if (input < 0x80)
[471]	95	*length = 1;
	96	else if ( input < 0x800 )
	97	*length = 2;
	98	else if ( input < 0x10000 )
	99	*length = 3;
	100	else if ( input < 0x200000 )
	101	*length = 4;
	102	else
	103	{ *length = 0; return; } // This code won't covert this correctly anyway.
	104
	105	output += *length;
	106
	107	// Scary scary fall throughs.
[738]	108	switch (*length)
[471]	109	{
	110	case 4:
[738]	111	--output;
	112	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
[471]	113	input >>= 6;
	114	case 3:
[738]	115	--output;
	116	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
[471]	117	input >>= 6;
	118	case 2:
[738]	119	--output;
	120	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
[471]	121	input >>= 6;
	122	case 1:
[738]	123	--output;
[471]	124	output = (char)(input \| FIRST_BYTE_MARK[length]);
	125	}
	126	}
	127
	128
	129	/static/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /encoding/ )
	130	{
	131	// This will only work for low-ascii, everything else is assumed to be a valid
	132	// letter. I'm not sure this is the best approach, but it is quite tricky trying
[738]	133	// to figure out alhabetical vs. not across encoding. So take a very
[471]	134	// conservative approach.
	135
	136	// if ( encoding == TIXML_ENCODING_UTF8 )
	137	// {
	138	if ( anyByte < 127 )
	139	return isalpha( anyByte );
	140	else
	141	return 1; // What else to do? The unicode set is huge...get the english ones right.
	142	// }
	143	// else
	144	// {
	145	// return isalpha( anyByte );
	146	// }
	147	}
	148
	149
	150	/static/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /encoding/ )
	151	{
	152	// This will only work for low-ascii, everything else is assumed to be a valid
	153	// letter. I'm not sure this is the best approach, but it is quite tricky trying
[738]	154	// to figure out alhabetical vs. not across encoding. So take a very
[471]	155	// conservative approach.
	156
	157	// if ( encoding == TIXML_ENCODING_UTF8 )
	158	// {
	159	if ( anyByte < 127 )
	160	return isalnum( anyByte );
	161	else
	162	return 1; // What else to do? The unicode set is huge...get the english ones right.
	163	// }
	164	// else
	165	// {
	166	// return isalnum( anyByte );
	167	// }
	168	}
	169
	170
	171	class TiXmlParsingData
	172	{
	173	friend class TiXmlDocument;
	174	public:
	175	void Stamp( const char* now, TiXmlEncoding encoding );
	176
	177	const TiXmlCursor& Cursor() { return cursor; }
	178
	179	private:
	180	// Only used by the document!
	181	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
	182	{
	183	assert( start );
	184	stamp = start;
	185	tabsize = _tabsize;
	186	cursor.row = row;
	187	cursor.col = col;
	188	}
	189
	190	TiXmlCursor cursor;
	191	const char* stamp;
	192	int tabsize;
	193	};
	194
	195
	196	void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
	197	{
	198	assert( now );
	199
	200	// Do nothing if the tabsize is 0.
	201	if ( tabsize < 1 )
	202	{
	203	return;
	204	}
	205
	206	// Get the current row, column.
	207	int row = cursor.row;
	208	int col = cursor.col;
	209	const char* p = stamp;
	210	assert( p );
	211
	212	while ( p < now )
	213	{
	214	// Treat p as unsigned, so we have a happy compiler.
	215	const unsigned char* pU = (const unsigned char*)p;
	216
	217	// Code contributed by Fletcher Dunn: (modified by lee)
	218	switch (*pU) {
	219	case 0:
	220	// We should never get here, but in case we do, don't
	221	// advance past the terminating null character, ever
	222	return;
	223
	224	case '\r':
	225	// bump down to the next line
	226	++row;
[738]	227	col = 0;
[471]	228	// Eat the character
	229	++p;
	230
	231	// Check for \r\n sequence, and treat this as a single character
	232	if (*p == '\n') {
	233	++p;
	234	}
	235	break;
	236
	237	case '\n':
	238	// bump down to the next line
	239	++row;
	240	col = 0;
	241
	242	// Eat the character
	243	++p;
	244
	245	// Check for \n\r sequence, and treat this as a single
	246	// character. (Yes, this bizarre thing does occur still
	247	// on some arcane platforms...)
	248	if (*p == '\r') {
	249	++p;
	250	}
	251	break;
	252
	253	case '\t':
	254	// Eat the character
	255	++p;
	256
	257	// Skip to next tab stop
	258	col = (col / tabsize + 1) * tabsize;
	259	break;
	260
	261	case TIXML_UTF_LEAD_0:
	262	if ( encoding == TIXML_ENCODING_UTF8 )
	263	{
	264	if ( (p+1) && (p+2) )
	265	{
	266	// In these cases, don't advance the column. These are
	267	// 0-width spaces.
	268	if ( (pU+1)==TIXML_UTF_LEAD_1 && (pU+2)==TIXML_UTF_LEAD_2 )
[738]	269	p += 3;
[471]	270	else if ( (pU+1)==0xbfU && (pU+2)==0xbeU )
[738]	271	p += 3;
[471]	272	else if ( (pU+1)==0xbfU && (pU+2)==0xbfU )
[738]	273	p += 3;
[471]	274	else
	275	{ p +=3; ++col; } // A normal character.
	276	}
	277	}
	278	else
	279	{
	280	++p;
	281	++col;
	282	}
	283	break;
	284
	285	default:
	286	if ( encoding == TIXML_ENCODING_UTF8 )
	287	{
	288	// Eat the 1 to 4 byte utf8 character.
[738]	289	int step = TiXmlBase::utf8ByteTable[((const unsigned char)p)];
[471]	290	if ( step == 0 )
	291	step = 1; // Error case from bad encoding, but handle gracefully.
	292	p += step;
	293
	294	// Just advance one column, of course.
	295	++col;
	296	}
	297	else
	298	{
	299	++p;
	300	++col;
	301	}
	302	break;
	303	}
	304	}
	305	cursor.row = row;
	306	cursor.col = col;
	307	assert( cursor.row >= -1 );
	308	assert( cursor.col >= -1 );
	309	stamp = p;
	310	assert( stamp );
	311	}
	312
	313
	314	const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
	315	{
	316	if ( !p \|\| !*p )
	317	{
	318	return 0;
	319	}
	320	if ( encoding == TIXML_ENCODING_UTF8 )
	321	{
	322	while ( *p )
	323	{
	324	const unsigned char* pU = (const unsigned char*)p;
[738]	325
[471]	326	// Skip the stupid Microsoft UTF-8 Byte order marks
	327	if ( *(pU+0)==TIXML_UTF_LEAD_0
[738]	328	&& *(pU+1)==TIXML_UTF_LEAD_1
[471]	329	&& *(pU+2)==TIXML_UTF_LEAD_2 )
	330	{
	331	p += 3;
	332	continue;
	333	}
	334	else if(*(pU+0)==TIXML_UTF_LEAD_0
	335	&& *(pU+1)==0xbfU
	336	&& *(pU+2)==0xbeU )
	337	{
	338	p += 3;
	339	continue;
	340	}
	341	else if(*(pU+0)==TIXML_UTF_LEAD_0
	342	&& *(pU+1)==0xbfU
	343	&& *(pU+2)==0xbfU )
	344	{
	345	p += 3;
	346	continue;
	347	}
	348
	349	if ( IsWhiteSpace( p ) \|\| p == '\n' \|\| *p =='\r' ) // Still using old rules for white space.
	350	++p;
	351	else
	352	break;
	353	}
	354	}
	355	else
	356	{
	357	while ( p && IsWhiteSpace( p ) \|\| p == '\n' \|\| p =='\r' )
	358	++p;
	359	}
	360
	361	return p;
	362	}
	363
	364	#ifdef TIXML_USE_STL
[738]	365	/static/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
[471]	366	{
	367	for( ;; )
	368	{
	369	if ( !in->good() ) return false;
	370
	371	int c = in->peek();
	372	// At this scope, we can't get to a document. So fail silently.
	373	if ( !IsWhiteSpace( c ) \|\| c <= 0 )
	374	return true;
	375
	376	*tag += (char) in->get();
	377	}
	378	}
	379
[738]	380	/static/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
[471]	381	{
	382	//assert( character > 0 && character < 128 ); // else it won't work in utf-8
	383	while ( in->good() )
	384	{
	385	int c = in->peek();
	386	if ( c == character )
	387	return true;
	388	if ( c <= 0 ) // Silent failure: can't get document at this scope
	389	return false;
	390
	391	in->get();
	392	*tag += (char) c;
	393	}
	394	return false;
	395	}
	396	#endif
	397
[738]	398	// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
	399	// "assign" optimization removes over 10% of the execution time.
	400	//
[471]	401	const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
	402	{
[738]	403	// Oddly, not supported on some comilers,
	404	//name->clear();
	405	// So use this:
[471]	406	*name = "";
	407	assert( p );
	408
	409	// Names start with letters or underscores.
	410	// Of course, in unicode, tinyxml has no idea what a letter is. The
	411	// algorithm is generous.
	412	//
	413	// After that, they can be letters, underscores, numbers,
	414	// hyphens, or colons. (Colons are valid ony for namespaces,
	415	// but tinyxml can't tell namespaces from names.)
[738]	416	if ( p && *p
[471]	417	&& ( IsAlpha( (unsigned char) p, encoding ) \|\| p == '_' ) )
	418	{
[738]	419	const char* start = p;
[471]	420	while( p && *p
[738]	421	&& ( IsAlphaNum( (unsigned char ) *p, encoding )
[471]	422	\|\| *p == '_'
	423	\|\| *p == '-'
	424	\|\| *p == '.'
	425	\|\| *p == ':' ) )
	426	{
[738]	427	//(name) += p; // expensive
[471]	428	++p;
	429	}
[738]	430	if ( p-start > 0 ) {
	431	name->assign( start, p-start );
	432	}
[471]	433	return p;
	434	}
	435	return 0;
	436	}
	437
	438	const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
	439	{
	440	// Presume an entity, and pull it out.
	441	TIXML_STRING ent;
	442	int i;
	443	*length = 0;
	444
	445	if ( (p+1) && (p+1) == '#' && *(p+2) )
	446	{
	447	unsigned long ucs = 0;
	448	ptrdiff_t delta = 0;
	449	unsigned mult = 1;
	450
	451	if ( *(p+2) == 'x' )
	452	{
	453	// Hexadecimal.
	454	if ( !*(p+3) ) return 0;
	455
	456	const char* q = p+3;
	457	q = strchr( q, ';' );
	458
	459	if ( !q \|\| !*q ) return 0;
	460
	461	delta = q-p;
	462	--q;
	463
	464	while ( *q != 'x' )
	465	{
	466	if ( q >= '0' && q <= '9' )
	467	ucs += mult * (*q - '0');
	468	else if ( q >= 'a' && q <= 'f' )
	469	ucs += mult * (*q - 'a' + 10);
	470	else if ( q >= 'A' && q <= 'F' )
	471	ucs += mult * (*q - 'A' + 10 );
[738]	472	else
[471]	473	return 0;
	474	mult *= 16;
	475	--q;
	476	}
	477	}
	478	else
	479	{
	480	// Decimal.
	481	if ( !*(p+2) ) return 0;
	482
	483	const char* q = p+2;
	484	q = strchr( q, ';' );
	485
	486	if ( !q \|\| !*q ) return 0;
	487
	488	delta = q-p;
	489	--q;
	490
	491	while ( *q != '#' )
	492	{
	493	if ( q >= '0' && q <= '9' )
	494	ucs += mult * (*q - '0');
[738]	495	else
[471]	496	return 0;
	497	mult *= 10;
	498	--q;
	499	}
	500	}
	501	if ( encoding == TIXML_ENCODING_UTF8 )
	502	{
	503	// convert the UCS to UTF-8
	504	ConvertUTF32ToUTF8( ucs, value, length );
	505	}
	506	else
	507	{
	508	*value = (char)ucs;
	509	*length = 1;
	510	}
	511	return p + delta + 1;
	512	}
	513
	514	// Now try to match it.
	515	for( i=0; i<NUM_ENTITY; ++i )
	516	{
	517	if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
	518	{
	519	assert( strlen( entity[i].str ) == entity[i].strLength );
	520	*value = entity[i].chr;
	521	*length = 1;
	522	return ( p + entity[i].strLength );
	523	}
	524	}
	525
	526	// So it wasn't an entity, its unrecognized, or something like that.
	527	value = p; // Don't put back the last one, since we return it!
[738]	528	//*length = 1; // Leave unrecognized entities - this doesn't really work.
	529	// Just writes strange XML.
[471]	530	return p+1;
	531	}
	532
	533
	534	bool TiXmlBase::StringEqual( const char* p,
	535	const char* tag,
	536	bool ignoreCase,
	537	TiXmlEncoding encoding )
	538	{
	539	assert( p );
	540	assert( tag );
	541	if ( !p \|\| !*p )
	542	{
	543	assert( 0 );
	544	return false;
	545	}
	546
	547	const char* q = p;
	548
	549	if ( ignoreCase )
	550	{
	551	while ( q && tag && ToLower( q, encoding ) == ToLower( tag, encoding ) )
	552	{
	553	++q;
	554	++tag;
	555	}
	556
	557	if ( *tag == 0 )
	558	return true;
	559	}
	560	else
	561	{
	562	while ( q && tag && q == tag )
	563	{
	564	++q;
	565	++tag;
	566	}
	567
	568	if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
	569	return true;
	570	}
	571	return false;
	572	}
	573
[738]	574	const char* TiXmlBase::ReadText( const char* p,
	575	TIXML_STRING * text,
	576	bool trimWhiteSpace,
	577	const char* endTag,
[471]	578	bool caseInsensitive,
	579	TiXmlEncoding encoding )
	580	{
	581	*text = "";
	582	if ( !trimWhiteSpace // certain tags always keep whitespace
	583	\|\| !condenseWhiteSpace ) // if true, whitespace is always kept
	584	{
	585	// Keep all the white space.
	586	while ( p && *p
	587	&& !StringEqual( p, endTag, caseInsensitive, encoding )
	588	)
	589	{
	590	int len;
	591	char cArr[4] = { 0, 0, 0, 0 };
	592	p = GetChar( p, cArr, &len, encoding );
	593	text->append( cArr, len );
	594	}
	595	}
	596	else
	597	{
	598	bool whitespace = false;
	599
	600	// Remove leading white space:
	601	p = SkipWhiteSpace( p, encoding );
	602	while ( p && *p
	603	&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
	604	{
	605	if ( p == '\r' \|\| p == '\n' )
	606	{
	607	whitespace = true;
	608	++p;
	609	}
	610	else if ( IsWhiteSpace( *p ) )
	611	{
	612	whitespace = true;
	613	++p;
	614	}
	615	else
	616	{
	617	// If we've found whitespace, add it before the
	618	// new character. Any whitespace just becomes a space.
	619	if ( whitespace )
	620	{
	621	(*text) += ' ';
	622	whitespace = false;
	623	}
	624	int len;
	625	char cArr[4] = { 0, 0, 0, 0 };
	626	p = GetChar( p, cArr, &len, encoding );
	627	if ( len == 1 )
	628	(*text) += cArr[0]; // more efficient
	629	else
	630	text->append( cArr, len );
	631	}
	632	}
	633	}
[738]	634	if ( p )
	635	p += strlen( endTag );
	636	return p;
[471]	637	}
	638
	639	#ifdef TIXML_USE_STL
	640
[738]	641	void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	642	{
	643	// The basic issue with a document is that we don't know what we're
	644	// streaming. Read something presumed to be a tag (and hope), then
	645	// identify it, and call the appropriate stream method on the tag.
	646	//
	647	// This "pre-streaming" will never read the closing ">" so the
	648	// sub-tag can orient itself.
	649
[738]	650	if ( !StreamTo( in, '<', tag ) )
[471]	651	{
	652	SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	653	return;
	654	}
	655
	656	while ( in->good() )
	657	{
	658	int tagIndex = (int) tag->length();
	659	while ( in->good() && in->peek() != '>' )
	660	{
	661	int c = in->get();
	662	if ( c <= 0 )
	663	{
	664	SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	665	break;
	666	}
	667	(*tag) += (char) c;
	668	}
	669
	670	if ( in->good() )
	671	{
[738]	672	// We now have something we presume to be a node of
[471]	673	// some sort. Identify it, and call the node to
	674	// continue streaming.
	675	TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
	676
	677	if ( node )
	678	{
	679	node->StreamIn( in, tag );
	680	bool isElement = node->ToElement() != 0;
	681	delete node;
	682	node = 0;
	683
	684	// If this is the root element, we're done. Parsing will be
	685	// done by the >> operator.
	686	if ( isElement )
	687	{
	688	return;
	689	}
	690	}
	691	else
	692	{
	693	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
	694	return;
	695	}
	696	}
	697	}
	698	// We should have returned sooner.
	699	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
	700	}
	701
	702	#endif
	703
	704	const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
	705	{
	706	ClearError();
	707
	708	// Parse away, at the document level. Since a document
	709	// contains nothing but other tags, most of what happens
	710	// here is skipping white space.
	711	if ( !p \|\| !*p )
	712	{
	713	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	714	return 0;
	715	}
	716
	717	// Note that, for a document, this needs to come
	718	// before the while space skip, so that parsing
	719	// starts from the pointer we are given.
	720	location.Clear();
	721	if ( prevData )
	722	{
	723	location.row = prevData->cursor.row;
	724	location.col = prevData->cursor.col;
	725	}
	726	else
	727	{
	728	location.row = 0;
	729	location.col = 0;
	730	}
	731	TiXmlParsingData data( p, TabSize(), location.row, location.col );
	732	location = data.Cursor();
	733
	734	if ( encoding == TIXML_ENCODING_UNKNOWN )
	735	{
	736	// Check for the Microsoft UTF-8 lead bytes.
	737	const unsigned char* pU = (const unsigned char*)p;
	738	if ( (pU+0) && (pU+0) == TIXML_UTF_LEAD_0
	739	&& (pU+1) && (pU+1) == TIXML_UTF_LEAD_1
	740	&& (pU+2) && (pU+2) == TIXML_UTF_LEAD_2 )
	741	{
	742	encoding = TIXML_ENCODING_UTF8;
	743	useMicrosoftBOM = true;
	744	}
	745	}
	746
	747	p = SkipWhiteSpace( p, encoding );
	748	if ( !p )
	749	{
	750	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	751	return 0;
	752	}
	753
	754	while ( p && *p )
	755	{
	756	TiXmlNode* node = Identify( p, encoding );
	757	if ( node )
	758	{
	759	p = node->Parse( p, &data, encoding );
	760	LinkEndChild( node );
	761	}
	762	else
	763	{
	764	break;
	765	}
	766
	767	// Did we get encoding info?
	768	if ( encoding == TIXML_ENCODING_UNKNOWN
	769	&& node->ToDeclaration() )
	770	{
	771	TiXmlDeclaration* dec = node->ToDeclaration();
	772	const char* enc = dec->Encoding();
	773	assert( enc );
	774
	775	if ( *enc == 0 )
	776	encoding = TIXML_ENCODING_UTF8;
	777	else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
	778	encoding = TIXML_ENCODING_UTF8;
	779	else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
	780	encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
[738]	781	else
[471]	782	encoding = TIXML_ENCODING_LEGACY;
	783	}
	784
	785	p = SkipWhiteSpace( p, encoding );
	786	}
	787
	788	// Was this empty?
	789	if ( !firstChild ) {
	790	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
	791	return 0;
	792	}
	793
	794	// All is well.
	795	return p;
	796	}
	797
	798	void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
[738]	799	{
[471]	800	// The first error in a chain is more accurate - don't set again!
	801	if ( error )
	802	return;
	803
	804	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
	805	error = true;
	806	errorId = err;
	807	errorDesc = errorString[ errorId ];
	808
	809	errorLocation.Clear();
	810	if ( pError && data )
	811	{
	812	data->Stamp( pError, encoding );
	813	errorLocation = data->Cursor();
	814	}
	815	}
	816
	817
	818	TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
	819	{
	820	TiXmlNode* returnNode = 0;
	821
	822	p = SkipWhiteSpace( p, encoding );
	823	if( !p \|\| !p \|\| p != '<' )
	824	{
	825	return 0;
	826	}
	827
	828	TiXmlDocument* doc = GetDocument();
	829	p = SkipWhiteSpace( p, encoding );
	830
	831	if ( !p \|\| !*p )
	832	{
	833	return 0;
	834	}
	835
[738]	836	// What is this thing?
[471]	837	// - Elements start with a letter or underscore, but xml is reserved.
	838	// - Comments: <!--
	839	// - Decleration: <?xml
[740]	840	// - StylesheetReference <?xml-stylesheet
[471]	841	// - Everthing else is unknown to tinyxml.
	842	//
	843
	844	const char* xmlHeader = { "<?xml" };
[740]	845	const char* xmlSSHeader = { "<?xml-stylesheet" };
[471]	846	const char* commentHeader = { "<!--" };
	847	const char* dtdHeader = { "<!" };
	848	const char* cdataHeader = { "<![CDATA[" };
	849
[740]	850	if ( StringEqual( p, xmlSSHeader, true, encoding ) )
[471]	851	{
	852	#ifdef DEBUG_PARSER
[740]	853	TIXML_LOG( "XML parsing Stylesheet Reference\n" );
	854	#endif
	855	returnNode = new TiXmlStylesheetReference();
	856	}
	857	else if ( StringEqual( p, xmlHeader, true, encoding ) )
	858	{
	859	#ifdef DEBUG_PARSER
[471]	860	TIXML_LOG( "XML parsing Declaration\n" );
	861	#endif
	862	returnNode = new TiXmlDeclaration();
	863	}
	864	else if ( StringEqual( p, commentHeader, false, encoding ) )
	865	{
	866	#ifdef DEBUG_PARSER
	867	TIXML_LOG( "XML parsing Comment\n" );
	868	#endif
	869	returnNode = new TiXmlComment();
	870	}
	871	else if ( StringEqual( p, cdataHeader, false, encoding ) )
	872	{
	873	#ifdef DEBUG_PARSER
	874	TIXML_LOG( "XML parsing CDATA\n" );
	875	#endif
	876	TiXmlText* text = new TiXmlText( "" );
	877	text->SetCDATA( true );
	878	returnNode = text;
	879	}
	880	else if ( StringEqual( p, dtdHeader, false, encoding ) )
	881	{
	882	#ifdef DEBUG_PARSER
	883	TIXML_LOG( "XML parsing Unknown(1)\n" );
	884	#endif
	885	returnNode = new TiXmlUnknown();
	886	}
	887	else if ( IsAlpha( *(p+1), encoding )
	888	\|\| *(p+1) == '_' )
	889	{
	890	#ifdef DEBUG_PARSER
	891	TIXML_LOG( "XML parsing Element\n" );
	892	#endif
	893	returnNode = new TiXmlElement( "" );
	894	}
	895	else
	896	{
	897	#ifdef DEBUG_PARSER
	898	TIXML_LOG( "XML parsing Unknown(2)\n" );
	899	#endif
	900	returnNode = new TiXmlUnknown();
	901	}
	902
	903	if ( returnNode )
	904	{
	905	// Set the parent, so it can report errors
	906	returnNode->parent = this;
	907	}
	908	else
	909	{
	910	if ( doc )
	911	doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
	912	}
	913	return returnNode;
	914	}
	915
	916	#ifdef TIXML_USE_STL
	917
[738]	918	void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
[471]	919	{
	920	// We're called with some amount of pre-parsing. That is, some of "this"
	921	// element is in "tag". Go ahead and stream to the closing ">"
	922	while( in->good() )
	923	{
	924	int c = in->get();
	925	if ( c <= 0 )
	926	{
	927	TiXmlDocument* document = GetDocument();
	928	if ( document )
	929	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	930	return;
	931	}
	932	(*tag) += (char) c ;
[738]	933
[471]	934	if ( c == '>' )
	935	break;
	936	}
	937
	938	if ( tag->length() < 3 ) return;
	939
	940	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
	941	// If not, identify and stream.
	942
[738]	943	if ( tag->at( tag->length() - 1 ) == '>'
[471]	944	&& tag->at( tag->length() - 2 ) == '/' )
	945	{
	946	// All good!
	947	return;
	948	}
	949	else if ( tag->at( tag->length() - 1 ) == '>' )
	950	{
	951	// There is more. Could be:
	952	// text
[738]	953	// cdata text (which looks like another node)
[471]	954	// closing tag
	955	// another node.
	956	for ( ;; )
	957	{
	958	StreamWhiteSpace( in, tag );
	959
	960	// Do we have text?
[738]	961	if ( in->good() && in->peek() != '<' )
[471]	962	{
	963	// Yep, text.
	964	TiXmlText text( "" );
	965	text.StreamIn( in, tag );
	966
	967	// What follows text is a closing tag or another node.
	968	// Go around again and figure it out.
	969	continue;
	970	}
	971
	972	// We now have either a closing tag...or another node.
	973	// We should be at a "<", regardless.
	974	if ( !in->good() ) return;
	975	assert( in->peek() == '<' );
	976	int tagIndex = (int) tag->length();
	977
	978	bool closingTag = false;
	979	bool firstCharFound = false;
	980
	981	for( ;; )
	982	{
	983	if ( !in->good() )
	984	return;
	985
	986	int c = in->peek();
	987	if ( c <= 0 )
	988	{
	989	TiXmlDocument* document = GetDocument();
	990	if ( document )
	991	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	992	return;
	993	}
[738]	994
[471]	995	if ( c == '>' )
	996	break;
	997
	998	*tag += (char) c;
	999	in->get();
	1000
[738]	1001	// Early out if we find the CDATA id.
	1002	if ( c == '[' && tag->size() >= 9 )
	1003	{
	1004	size_t len = tag->size();
	1005	const char* start = tag->c_str() + len - 9;
	1006	if ( strcmp( start, "<![CDATA[" ) == 0 ) {
	1007	assert( !closingTag );
	1008	break;
	1009	}
	1010	}
	1011
[471]	1012	if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
	1013	{
	1014	firstCharFound = true;
	1015	if ( c == '/' )
	1016	closingTag = true;
	1017	}
	1018	}
	1019	// If it was a closing tag, then read in the closing '>' to clean up the input stream.
	1020	// If it was not, the streaming will be done by the tag.
	1021	if ( closingTag )
	1022	{
	1023	if ( !in->good() )
	1024	return;
	1025
	1026	int c = in->get();
	1027	if ( c <= 0 )
	1028	{
	1029	TiXmlDocument* document = GetDocument();
	1030	if ( document )
	1031	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1032	return;
	1033	}
	1034	assert( c == '>' );
	1035	*tag += (char) c;
	1036
	1037	// We are done, once we've found our closing tag.
	1038	return;
	1039	}
	1040	else
	1041	{
	1042	// If not a closing tag, id it, and stream.
	1043	const char* tagloc = tag->c_str() + tagIndex;
	1044	TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
	1045	if ( !node )
	1046	return;
	1047	node->StreamIn( in, tag );
	1048	delete node;
	1049	node = 0;
	1050
	1051	// No return: go around from the beginning: text, closing tag, or node.
	1052	}
	1053	}
	1054	}
	1055	}
	1056	#endif
	1057
	1058	const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1059	{
	1060	p = SkipWhiteSpace( p, encoding );
	1061	TiXmlDocument* document = GetDocument();
	1062
	1063	if ( !p \|\| !*p )
	1064	{
	1065	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
	1066	return 0;
	1067	}
	1068
	1069	if ( data )
	1070	{
	1071	data->Stamp( p, encoding );
	1072	location = data->Cursor();
	1073	}
	1074
	1075	if ( *p != '<' )
	1076	{
	1077	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
	1078	return 0;
	1079	}
	1080
	1081	p = SkipWhiteSpace( p+1, encoding );
	1082
	1083	// Read the name.
	1084	const char* pErr = p;
	1085
	1086	p = ReadName( p, &value, encoding );
	1087	if ( !p \|\| !*p )
	1088	{
	1089	if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
	1090	return 0;
	1091	}
	1092
	1093	TIXML_STRING endTag ("</");
	1094	endTag += value;
	1095	endTag += ">";
	1096
	1097	// Check for and read attributes. Also look for an empty
	1098	// tag or an end tag.
	1099	while ( p && *p )
	1100	{
	1101	pErr = p;
	1102	p = SkipWhiteSpace( p, encoding );
	1103	if ( !p \|\| !*p )
	1104	{
	1105	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
	1106	return 0;
	1107	}
	1108	if ( *p == '/' )
	1109	{
	1110	++p;
	1111	// Empty tag.
	1112	if ( *p != '>' )
	1113	{
[738]	1114	if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
[471]	1115	return 0;
	1116	}
	1117	return (p+1);
	1118	}
	1119	else if ( *p == '>' )
	1120	{
	1121	// Done with attributes (if there were any.)
	1122	// Read the value -- which can include other
	1123	// elements -- read the end tag, and return.
	1124	++p;
	1125	p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
[738]	1126	if ( !p \|\| !*p ) {
	1127	// We were looking for the end tag, but found nothing.
	1128	// Fix for [ 1663758 ] Failure to report error on bad XML
	1129	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
[471]	1130	return 0;
[738]	1131	}
[471]	1132
	1133	// We should find the end tag now
	1134	if ( StringEqual( p, endTag.c_str(), false, encoding ) )
	1135	{
	1136	p += endTag.length();
	1137	return p;
	1138	}
	1139	else
	1140	{
	1141	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
	1142	return 0;
	1143	}
	1144	}
	1145	else
	1146	{
	1147	// Try to read an attribute:
	1148	TiXmlAttribute* attrib = new TiXmlAttribute();
	1149	if ( !attrib )
	1150	{
	1151	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
	1152	return 0;
	1153	}
	1154
	1155	attrib->SetDocument( document );
[738]	1156	pErr = p;
[471]	1157	p = attrib->Parse( p, data, encoding );
	1158
	1159	if ( !p \|\| !*p )
	1160	{
	1161	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
	1162	delete attrib;
	1163	return 0;
	1164	}
	1165
	1166	// Handle the strange case of double attributes:
[738]	1167	#ifdef TIXML_USE_STL
	1168	TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
	1169	#else
[471]	1170	TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
[738]	1171	#endif
[471]	1172	if ( node )
	1173	{
	1174	node->SetValue( attrib->Value() );
	1175	delete attrib;
	1176	return 0;
	1177	}
	1178
	1179	attributeSet.Add( attrib );
	1180	}
	1181	}
	1182	return p;
	1183	}
	1184
	1185
	1186	const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1187	{
	1188	TiXmlDocument* document = GetDocument();
	1189
	1190	// Read in text and elements in any order.
	1191	const char* pWithWhiteSpace = p;
	1192	p = SkipWhiteSpace( p, encoding );
	1193
	1194	while ( p && *p )
	1195	{
	1196	if ( *p != '<' )
	1197	{
	1198	// Take what we have, make a text element.
	1199	TiXmlText* textNode = new TiXmlText( "" );
	1200
	1201	if ( !textNode )
	1202	{
	1203	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
	1204	return 0;
	1205	}
	1206
	1207	if ( TiXmlBase::IsWhiteSpaceCondensed() )
	1208	{
	1209	p = textNode->Parse( p, data, encoding );
	1210	}
	1211	else
	1212	{
	1213	// Special case: we want to keep the white space
	1214	// so that leading spaces aren't removed.
	1215	p = textNode->Parse( pWithWhiteSpace, data, encoding );
	1216	}
	1217
	1218	if ( !textNode->Blank() )
	1219	LinkEndChild( textNode );
	1220	else
	1221	delete textNode;
[738]	1222	}
	1223	else
[471]	1224	{
	1225	// We hit a '<'
	1226	// Have we hit a new element or an end tag? This could also be
	1227	// a TiXmlText in the "CDATA" style.
	1228	if ( StringEqual( p, "</", false, encoding ) )
	1229	{
	1230	return p;
	1231	}
	1232	else
	1233	{
	1234	TiXmlNode* node = Identify( p, encoding );
	1235	if ( node )
	1236	{
	1237	p = node->Parse( p, data, encoding );
	1238	LinkEndChild( node );
[738]	1239	}
[471]	1240	else
	1241	{
	1242	return 0;
	1243	}
	1244	}
	1245	}
	1246	pWithWhiteSpace = p;
	1247	p = SkipWhiteSpace( p, encoding );
	1248	}
	1249
	1250	if ( !p )
	1251	{
	1252	if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
[738]	1253	}
[471]	1254	return p;
	1255	}
	1256
	1257
	1258	#ifdef TIXML_USE_STL
[738]	1259	void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1260	{
	1261	while ( in->good() )
	1262	{
[738]	1263	int c = in->get();
[471]	1264	if ( c <= 0 )
	1265	{
	1266	TiXmlDocument* document = GetDocument();
	1267	if ( document )
	1268	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1269	return;
	1270	}
	1271	(*tag) += (char) c;
	1272
	1273	if ( c == '>' )
	1274	{
	1275	// All is well.
[738]	1276	return;
[471]	1277	}
	1278	}
	1279	}
	1280	#endif
	1281
	1282
	1283	const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1284	{
	1285	TiXmlDocument* document = GetDocument();
	1286	p = SkipWhiteSpace( p, encoding );
	1287
	1288	if ( data )
	1289	{
	1290	data->Stamp( p, encoding );
	1291	location = data->Cursor();
	1292	}
	1293	if ( !p \|\| !p \|\| p != '<' )
	1294	{
	1295	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
	1296	return 0;
	1297	}
	1298	++p;
	1299	value = "";
	1300
	1301	while ( p && p && p != '>' )
	1302	{
	1303	value += *p;
	1304	++p;
	1305	}
	1306
	1307	if ( !p )
	1308	{
	1309	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
	1310	}
	1311	if ( *p == '>' )
	1312	return p+1;
	1313	return p;
	1314	}
	1315
	1316	#ifdef TIXML_USE_STL
[738]	1317	void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1318	{
	1319	while ( in->good() )
	1320	{
[738]	1321	int c = in->get();
[471]	1322	if ( c <= 0 )
	1323	{
	1324	TiXmlDocument* document = GetDocument();
	1325	if ( document )
	1326	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1327	return;
	1328	}
	1329
	1330	(*tag) += (char) c;
	1331
[738]	1332	if ( c == '>'
[471]	1333	&& tag->at( tag->length() - 2 ) == '-'
	1334	&& tag->at( tag->length() - 3 ) == '-' )
	1335	{
	1336	// All is well.
[738]	1337	return;
[471]	1338	}
	1339	}
	1340	}
	1341	#endif
	1342
	1343
	1344	const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1345	{
	1346	TiXmlDocument* document = GetDocument();
	1347	value = "";
	1348
	1349	p = SkipWhiteSpace( p, encoding );
	1350
	1351	if ( data )
	1352	{
	1353	data->Stamp( p, encoding );
	1354	location = data->Cursor();
	1355	}
	1356	const char* startTag = "<!--";
	1357	const char* endTag = "-->";
	1358
	1359	if ( !StringEqual( p, startTag, false, encoding ) )
	1360	{
	1361	document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
	1362	return 0;
	1363	}
	1364	p += strlen( startTag );
[738]	1365
	1366	// [ 1475201 ] TinyXML parses entities in comments
	1367	// Oops - ReadText doesn't work, because we don't want to parse the entities.
	1368	// p = ReadText( p, &value, false, endTag, false, encoding );
	1369	//
	1370	// from the XML spec:
	1371	/*
	1372	[Definition: Comments may appear anywhere in a document outside other markup; in addition,
	1373	they may appear within the document type declaration at places allowed by the grammar.
	1374	They are not part of the document's character data; an XML processor MAY, but need not,
	1375	make it possible for an application to retrieve the text of comments. For compatibility,
	1376	the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
	1377	references MUST NOT be recognized within comments.
	1378
	1379	An example of a comment:
	1380
	1381	<!-- declarations for <head> & <body> -->
	1382	*/
	1383
	1384	value = "";
	1385	// Keep all the white space.
	1386	while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
	1387	{
	1388	value.append( p, 1 );
	1389	++p;
	1390	}
	1391	if ( p )
	1392	p += strlen( endTag );
	1393
[471]	1394	return p;
	1395	}
	1396
	1397
	1398	const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1399	{
	1400	p = SkipWhiteSpace( p, encoding );
	1401	if ( !p \|\| !*p ) return 0;
	1402
[738]	1403	// int tabsize = 4;
	1404	// if ( document )
	1405	// tabsize = document->TabSize();
[471]	1406
	1407	if ( data )
	1408	{
	1409	data->Stamp( p, encoding );
	1410	location = data->Cursor();
	1411	}
	1412	// Read the name, the '=' and the value.
	1413	const char* pErr = p;
	1414	p = ReadName( p, &name, encoding );
	1415	if ( !p \|\| !*p )
	1416	{
	1417	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
	1418	return 0;
	1419	}
	1420	p = SkipWhiteSpace( p, encoding );
	1421	if ( !p \|\| !p \|\| p != '=' )
	1422	{
	1423	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1424	return 0;
	1425	}
	1426
	1427	++p; // skip '='
	1428	p = SkipWhiteSpace( p, encoding );
	1429	if ( !p \|\| !*p )
	1430	{
	1431	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1432	return 0;
	1433	}
[738]	1434
[471]	1435	const char* end;
[738]	1436	const char SINGLE_QUOTE = '\'';
	1437	const char DOUBLE_QUOTE = '\"';
[471]	1438
[738]	1439	if ( *p == SINGLE_QUOTE )
[471]	1440	{
	1441	++p;
[738]	1442	end = "\'"; // single quote in string
[471]	1443	p = ReadText( p, &value, false, end, false, encoding );
	1444	}
[738]	1445	else if ( *p == DOUBLE_QUOTE )
[471]	1446	{
	1447	++p;
[738]	1448	end = "\""; // double quote in string
[471]	1449	p = ReadText( p, &value, false, end, false, encoding );
	1450	}
	1451	else
	1452	{
	1453	// All attribute values should be in single or double quotes.
	1454	// But this is such a common error that the parser will try
	1455	// its best, even without them.
	1456	value = "";
[738]	1457	while ( p && *p // existence
[471]	1458	&& !IsWhiteSpace( p ) && p != '\n' && *p != '\r' // whitespace
[738]	1459	&& p != '/' && p != '>' ) // tag end
[471]	1460	{
[738]	1461	if ( p == SINGLE_QUOTE \|\| p == DOUBLE_QUOTE ) {
	1462	// [ 1451649 ] Attribute values with trailing quotes not handled correctly
	1463	// We did not have an opening quote but seem to have a
	1464	// closing one. Give up and throw an error.
	1465	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1466	return 0;
	1467	}
[471]	1468	value += *p;
	1469	++p;
	1470	}
	1471	}
	1472	return p;
	1473	}
	1474
	1475	#ifdef TIXML_USE_STL
[738]	1476	void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1477	{
[738]	1478	while ( in->good() )
[471]	1479	{
[738]	1480	int c = in->peek();
	1481	if ( !cdata && (c == '<' ) )
	1482	{
	1483	return;
	1484	}
[471]	1485	if ( c <= 0 )
	1486	{
	1487	TiXmlDocument* document = GetDocument();
	1488	if ( document )
	1489	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1490	return;
	1491	}
	1492
	1493	(*tag) += (char) c;
[738]	1494	in->get(); // "commits" the peek made above
[471]	1495
[738]	1496	if ( cdata && c == '>' && tag->size() >= 3 ) {
	1497	size_t len = tag->size();
	1498	if ( (tag)[len-2] == ']' && (tag)[len-3] == ']' ) {
	1499	// terminator of cdata.
[471]	1500	return;
	1501	}
	1502	}
	1503	}
	1504	}
	1505	#endif
	1506
	1507	const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1508	{
	1509	value = "";
	1510	TiXmlDocument* document = GetDocument();
	1511
	1512	if ( data )
	1513	{
	1514	data->Stamp( p, encoding );
	1515	location = data->Cursor();
	1516	}
	1517
	1518	const char* const startTag = "<![CDATA[";
	1519	const char* const endTag = "]]>";
	1520
	1521	if ( cdata \|\| StringEqual( p, startTag, false, encoding ) )
	1522	{
	1523	cdata = true;
	1524
	1525	if ( !StringEqual( p, startTag, false, encoding ) )
	1526	{
	1527	document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
	1528	return 0;
	1529	}
	1530	p += strlen( startTag );
	1531
	1532	// Keep all the white space, ignore the encoding, etc.
	1533	while ( p && *p
	1534	&& !StringEqual( p, endTag, false, encoding )
	1535	)
	1536	{
	1537	value += *p;
	1538	++p;
	1539	}
	1540
[738]	1541	TIXML_STRING dummy;
[471]	1542	p = ReadText( p, &dummy, false, endTag, false, encoding );
	1543	return p;
	1544	}
	1545	else
	1546	{
	1547	bool ignoreWhite = true;
	1548
	1549	const char* end = "<";
	1550	p = ReadText( p, &value, ignoreWhite, end, false, encoding );
	1551	if ( p )
	1552	return p-1; // don't truncate the '<'
	1553	return 0;
	1554	}
	1555	}
	1556
	1557	#ifdef TIXML_USE_STL
[738]	1558	void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1559	{
	1560	while ( in->good() )
	1561	{
	1562	int c = in->get();
	1563	if ( c <= 0 )
	1564	{
	1565	TiXmlDocument* document = GetDocument();
	1566	if ( document )
	1567	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1568	return;
	1569	}
	1570	(*tag) += (char) c;
	1571
	1572	if ( c == '>' )
	1573	{
	1574	// All is well.
	1575	return;
	1576	}
	1577	}
	1578	}
	1579	#endif
	1580
	1581	const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
	1582	{
	1583	p = SkipWhiteSpace( p, _encoding );
	1584	// Find the beginning, find the end, and look for
	1585	// the stuff in-between.
	1586	TiXmlDocument* document = GetDocument();
	1587	if ( !p \|\| !*p \|\| !StringEqual( p, "<?xml", true, _encoding ) )
	1588	{
	1589	if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
	1590	return 0;
	1591	}
	1592	if ( data )
	1593	{
	1594	data->Stamp( p, _encoding );
	1595	location = data->Cursor();
	1596	}
	1597	p += 5;
	1598
	1599	version = "";
	1600	encoding = "";
	1601	standalone = "";
	1602
	1603	while ( p && *p )
	1604	{
	1605	if ( *p == '>' )
	1606	{
	1607	++p;
	1608	return p;
	1609	}
	1610
	1611	p = SkipWhiteSpace( p, _encoding );
	1612	if ( StringEqual( p, "version", true, _encoding ) )
	1613	{
	1614	TiXmlAttribute attrib;
[738]	1615	p = attrib.Parse( p, data, _encoding );
[471]	1616	version = attrib.Value();
	1617	}
	1618	else if ( StringEqual( p, "encoding", true, _encoding ) )
	1619	{
	1620	TiXmlAttribute attrib;
[738]	1621	p = attrib.Parse( p, data, _encoding );
[471]	1622	encoding = attrib.Value();
	1623	}
	1624	else if ( StringEqual( p, "standalone", true, _encoding ) )
	1625	{
	1626	TiXmlAttribute attrib;
[738]	1627	p = attrib.Parse( p, data, _encoding );
[471]	1628	standalone = attrib.Value();
	1629	}
	1630	else
	1631	{
	1632	// Read over whatever it is.
	1633	while( p && p && p != '>' && !IsWhiteSpace( *p ) )
	1634	++p;
	1635	}
	1636	}
	1637	return 0;
	1638	}
	1639
	1640	bool TiXmlText::Blank() const
	1641	{
	1642	for ( unsigned i=0; i<value.length(); i++ )
	1643	if ( !IsWhiteSpace( value[i] ) )
	1644	return false;
	1645	return true;
	1646	}
	1647
[740]	1648	#ifdef TIXML_USE_STL
	1649	void TiXmlStylesheetReference::StreamIn( std::istream * in, TIXML_STRING * tag )
	1650	{
	1651	while ( in->good() )
	1652	{
	1653	int c = in->get();
	1654	if ( c <= 0 )
	1655	{
	1656	TiXmlDocument* document = GetDocument();
	1657	if ( document )
	1658	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1659	return;
	1660	}
	1661	(*tag) += (char) c;
	1662
	1663	if ( c == '>' )
	1664	{
	1665	// All is well.
	1666	return;
	1667	}
	1668	}
	1669	}
	1670	#endif
	1671
	1672	const char* TiXmlStylesheetReference::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
	1673	{
	1674	p = SkipWhiteSpace( p, _encoding );
	1675	// Find the beginning, find the end, and look for
	1676	// the stuff in-between.
	1677	TiXmlDocument* document = GetDocument();
	1678	if ( !p \|\| !*p \|\| !StringEqual( p, "<?xml-stylesheet", true, _encoding ) )
	1679	{
	1680	if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
	1681	return 0;
	1682	}
	1683	if ( data )
	1684	{
	1685	data->Stamp( p, _encoding );
	1686	location = data->Cursor();
	1687	}
	1688	p += 5;
	1689
	1690	type = "";
	1691	href = "";
	1692
	1693	while ( p && *p )
	1694	{
	1695	if ( *p == '>' )
	1696	{
	1697	++p;
	1698	return p;
	1699	}
	1700
	1701	p = SkipWhiteSpace( p, _encoding );
	1702	if ( StringEqual( p, "type", true, _encoding ) )
	1703	{
	1704	TiXmlAttribute attrib;
	1705	p = attrib.Parse( p, data, _encoding );
	1706	type = attrib.Value();
	1707	}
	1708	else if ( StringEqual( p, "href", true, _encoding ) )
	1709	{
	1710	TiXmlAttribute attrib;
	1711	p = attrib.Parse( p, data, _encoding );
	1712	href = attrib.Value();
	1713	}
	1714	else
	1715	{
	1716	// Read over whatever it is.
	1717	while( p && p && p != '>' && !IsWhiteSpace( *p ) )
	1718	++p;
	1719	}
	1720	}
	1721	return 0;
	1722	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: