Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: code/trunk/src/tinyxml/tinyxmlparser.cc @ 2122

Last change on this file since 2122 was 1505, checked in by rgrieder, 16 years ago
f* svn: It doesn't even inform you if you attempt to set a non existing property. It is svn:eol-style and not eol-style when using the command by the way…
Property svn:eol-style set to `native`
File size: 38.2 KB

Rev	Line
[471]	1	/*
	2	www.sourceforge.net/projects/tinyxml
	3	Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
	4
[738]	5	This software is provided 'as-is', without any express or implied
	6	warranty. In no event will the authors be held liable for any
[471]	7	damages arising from the use of this software.
	8
[738]	9	Permission is granted to anyone to use this software for any
	10	purpose, including commercial applications, and to alter it and
[471]	11	redistribute it freely, subject to the following restrictions:
	12
[738]	13	1. The origin of this software must not be misrepresented; you must
[471]	14	not claim that you wrote the original software. If you use this
	15	software in a product, an acknowledgment in the product documentation
	16	would be appreciated but is not required.
	17
[738]	18	2. Altered source versions must be plainly marked as such, and
[471]	19	must not be misrepresented as being the original software.
	20
[738]	21	3. This notice may not be removed or altered from any source
[471]	22	distribution.
	23	*/
	24
	25	#include <ctype.h>
	26	#include <stddef.h>
	27
[738]	28	#include "tinyxml.h"
	29
[471]	30	//#define DEBUG_PARSER
[738]	31	#if defined( DEBUG_PARSER )
	32	# if defined( DEBUG ) && defined( _MSC_VER )
	33	# include <windows.h>
	34	# define TIXML_LOG OutputDebugString
	35	# else
	36	# define TIXML_LOG printf
	37	# endif
	38	#endif
[471]	39
	40	// Note tha "PutString" hardcodes the same list. This
	41	// is less flexible than it appears. Changing the entries
[738]	42	// or order will break putstring.
	43	TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
[471]	44	{
	45	{ "&", 5, '&' },
	46	{ "<", 4, '<' },
	47	{ ">", 4, '>' },
	48	{ """, 6, '\"' },
	49	{ "'", 6, '\'' }
	50	};
	51
	52	// Bunch of unicode info at:
	53	// http://www.unicode.org/faq/utf_bom.html
	54	// Including the basic of this table, which determines the #bytes in the
	55	// sequence from the lead byte. 1 placed for invalid sequences --
	56	// although the result will be junk, pass it through as much as possible.
[738]	57	// Beware of the non-characters in UTF-8:
[471]	58	// ef bb bf (Microsoft "lead bytes")
	59	// ef bf be
[738]	60	// ef bf bf
[471]	61
	62	const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
	63	const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
	64	const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
	65
[738]	66	const int TiXmlBase::utf8ByteTable[256] =
[471]	67	{
	68	// 0 1 2 3 4 5 6 7 8 9 a b c d e f
	69	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
	70	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
	71	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
	72	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
	73	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
	74	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
	75	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
	76	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
	77	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
[738]	78	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
	79	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
	80	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
[471]	81	1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
	82	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
	83	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
	84	4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
	85	};
	86
	87
	88	void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
	89	{
	90	const unsigned long BYTE_MASK = 0xBF;
	91	const unsigned long BYTE_MARK = 0x80;
	92	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
	93
[738]	94	if (input < 0x80)
[471]	95	*length = 1;
	96	else if ( input < 0x800 )
	97	*length = 2;
	98	else if ( input < 0x10000 )
	99	*length = 3;
	100	else if ( input < 0x200000 )
	101	*length = 4;
	102	else
	103	{ *length = 0; return; } // This code won't covert this correctly anyway.
	104
	105	output += *length;
	106
	107	// Scary scary fall throughs.
[738]	108	switch (*length)
[471]	109	{
	110	case 4:
[738]	111	--output;
	112	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
[471]	113	input >>= 6;
	114	case 3:
[738]	115	--output;
	116	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
[471]	117	input >>= 6;
	118	case 2:
[738]	119	--output;
	120	*output = (char)((input \| BYTE_MARK) & BYTE_MASK);
[471]	121	input >>= 6;
	122	case 1:
[738]	123	--output;
[471]	124	output = (char)(input \| FIRST_BYTE_MARK[length]);
	125	}
	126	}
	127
	128
	129	/static/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /encoding/ )
	130	{
	131	// This will only work for low-ascii, everything else is assumed to be a valid
	132	// letter. I'm not sure this is the best approach, but it is quite tricky trying
[738]	133	// to figure out alhabetical vs. not across encoding. So take a very
[471]	134	// conservative approach.
	135
	136	// if ( encoding == TIXML_ENCODING_UTF8 )
	137	// {
	138	if ( anyByte < 127 )
	139	return isalpha( anyByte );
	140	else
	141	return 1; // What else to do? The unicode set is huge...get the english ones right.
	142	// }
	143	// else
	144	// {
	145	// return isalpha( anyByte );
	146	// }
	147	}
	148
	149
	150	/static/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /encoding/ )
	151	{
	152	// This will only work for low-ascii, everything else is assumed to be a valid
	153	// letter. I'm not sure this is the best approach, but it is quite tricky trying
[738]	154	// to figure out alhabetical vs. not across encoding. So take a very
[471]	155	// conservative approach.
	156
	157	// if ( encoding == TIXML_ENCODING_UTF8 )
	158	// {
	159	if ( anyByte < 127 )
	160	return isalnum( anyByte );
	161	else
	162	return 1; // What else to do? The unicode set is huge...get the english ones right.
	163	// }
	164	// else
	165	// {
	166	// return isalnum( anyByte );
	167	// }
	168	}
	169
	170
	171	class TiXmlParsingData
	172	{
	173	friend class TiXmlDocument;
	174	public:
	175	void Stamp( const char* now, TiXmlEncoding encoding );
	176
	177	const TiXmlCursor& Cursor() { return cursor; }
	178
	179	private:
	180	// Only used by the document!
	181	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
	182	{
	183	assert( start );
	184	stamp = start;
	185	tabsize = _tabsize;
	186	cursor.row = row;
	187	cursor.col = col;
	188	}
	189
	190	TiXmlCursor cursor;
	191	const char* stamp;
	192	int tabsize;
	193	};
	194
	195
	196	void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
	197	{
	198	assert( now );
	199
	200	// Do nothing if the tabsize is 0.
	201	if ( tabsize < 1 )
	202	{
	203	return;
	204	}
	205
	206	// Get the current row, column.
	207	int row = cursor.row;
	208	int col = cursor.col;
	209	const char* p = stamp;
	210	assert( p );
	211
	212	while ( p < now )
	213	{
	214	// Treat p as unsigned, so we have a happy compiler.
	215	const unsigned char* pU = (const unsigned char*)p;
	216
	217	// Code contributed by Fletcher Dunn: (modified by lee)
	218	switch (*pU) {
	219	case 0:
	220	// We should never get here, but in case we do, don't
	221	// advance past the terminating null character, ever
	222	return;
	223
	224	case '\r':
	225	// bump down to the next line
	226	++row;
[738]	227	col = 0;
[471]	228	// Eat the character
	229	++p;
	230
	231	// Check for \r\n sequence, and treat this as a single character
	232	if (*p == '\n') {
	233	++p;
	234	}
	235	break;
	236
	237	case '\n':
	238	// bump down to the next line
	239	++row;
	240	col = 0;
	241
	242	// Eat the character
	243	++p;
	244
	245	// Check for \n\r sequence, and treat this as a single
	246	// character. (Yes, this bizarre thing does occur still
	247	// on some arcane platforms...)
	248	if (*p == '\r') {
	249	++p;
	250	}
	251	break;
	252
	253	case '\t':
	254	// Eat the character
	255	++p;
	256
	257	// Skip to next tab stop
	258	col = (col / tabsize + 1) * tabsize;
	259	break;
	260
	261	case TIXML_UTF_LEAD_0:
	262	if ( encoding == TIXML_ENCODING_UTF8 )
	263	{
	264	if ( (p+1) && (p+2) )
	265	{
	266	// In these cases, don't advance the column. These are
	267	// 0-width spaces.
	268	if ( (pU+1)==TIXML_UTF_LEAD_1 && (pU+2)==TIXML_UTF_LEAD_2 )
[738]	269	p += 3;
[471]	270	else if ( (pU+1)==0xbfU && (pU+2)==0xbeU )
[738]	271	p += 3;
[471]	272	else if ( (pU+1)==0xbfU && (pU+2)==0xbfU )
[738]	273	p += 3;
[471]	274	else
	275	{ p +=3; ++col; } // A normal character.
	276	}
	277	}
	278	else
	279	{
	280	++p;
	281	++col;
	282	}
	283	break;
	284
	285	default:
	286	if ( encoding == TIXML_ENCODING_UTF8 )
	287	{
	288	// Eat the 1 to 4 byte utf8 character.
[738]	289	int step = TiXmlBase::utf8ByteTable[((const unsigned char)p)];
[471]	290	if ( step == 0 )
	291	step = 1; // Error case from bad encoding, but handle gracefully.
	292	p += step;
	293
	294	// Just advance one column, of course.
	295	++col;
	296	}
	297	else
	298	{
	299	++p;
	300	++col;
	301	}
	302	break;
	303	}
	304	}
	305	cursor.row = row;
	306	cursor.col = col;
	307	assert( cursor.row >= -1 );
	308	assert( cursor.col >= -1 );
	309	stamp = p;
	310	assert( stamp );
	311	}
	312
	313
	314	const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
	315	{
	316	if ( !p \|\| !*p )
	317	{
	318	return 0;
	319	}
	320	if ( encoding == TIXML_ENCODING_UTF8 )
	321	{
	322	while ( *p )
	323	{
	324	const unsigned char* pU = (const unsigned char*)p;
[738]	325
[471]	326	// Skip the stupid Microsoft UTF-8 Byte order marks
	327	if ( *(pU+0)==TIXML_UTF_LEAD_0
[738]	328	&& *(pU+1)==TIXML_UTF_LEAD_1
[471]	329	&& *(pU+2)==TIXML_UTF_LEAD_2 )
	330	{
	331	p += 3;
	332	continue;
	333	}
	334	else if(*(pU+0)==TIXML_UTF_LEAD_0
	335	&& *(pU+1)==0xbfU
	336	&& *(pU+2)==0xbeU )
	337	{
	338	p += 3;
	339	continue;
	340	}
	341	else if(*(pU+0)==TIXML_UTF_LEAD_0
	342	&& *(pU+1)==0xbfU
	343	&& *(pU+2)==0xbfU )
	344	{
	345	p += 3;
	346	continue;
	347	}
	348
	349	if ( IsWhiteSpace( p ) \|\| p == '\n' \|\| *p =='\r' ) // Still using old rules for white space.
	350	++p;
	351	else
	352	break;
	353	}
	354	}
	355	else
	356	{
	357	while ( p && IsWhiteSpace( p ) \|\| p == '\n' \|\| p =='\r' )
	358	++p;
	359	}
	360
	361	return p;
	362	}
	363
	364	#ifdef TIXML_USE_STL
[738]	365	/static/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
[471]	366	{
	367	for( ;; )
	368	{
	369	if ( !in->good() ) return false;
	370
	371	int c = in->peek();
	372	// At this scope, we can't get to a document. So fail silently.
	373	if ( !IsWhiteSpace( c ) \|\| c <= 0 )
	374	return true;
	375
	376	*tag += (char) in->get();
	377	}
	378	}
	379
[738]	380	/static/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
[471]	381	{
	382	//assert( character > 0 && character < 128 ); // else it won't work in utf-8
	383	while ( in->good() )
	384	{
	385	int c = in->peek();
	386	if ( c == character )
	387	return true;
	388	if ( c <= 0 ) // Silent failure: can't get document at this scope
	389	return false;
	390
	391	in->get();
	392	*tag += (char) c;
	393	}
	394	return false;
	395	}
	396	#endif
	397
[738]	398	// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
	399	// "assign" optimization removes over 10% of the execution time.
	400	//
[471]	401	const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
	402	{
[738]	403	// Oddly, not supported on some comilers,
	404	//name->clear();
	405	// So use this:
[471]	406	*name = "";
	407	assert( p );
	408
	409	// Names start with letters or underscores.
	410	// Of course, in unicode, tinyxml has no idea what a letter is. The
	411	// algorithm is generous.
	412	//
	413	// After that, they can be letters, underscores, numbers,
	414	// hyphens, or colons. (Colons are valid ony for namespaces,
	415	// but tinyxml can't tell namespaces from names.)
[738]	416	if ( p && *p
[471]	417	&& ( IsAlpha( (unsigned char) p, encoding ) \|\| p == '_' ) )
	418	{
[738]	419	const char* start = p;
[471]	420	while( p && *p
[738]	421	&& ( IsAlphaNum( (unsigned char ) *p, encoding )
[471]	422	\|\| *p == '_'
	423	\|\| *p == '-'
	424	\|\| *p == '.'
	425	\|\| *p == ':' ) )
	426	{
[738]	427	//(name) += p; // expensive
[471]	428	++p;
	429	}
[738]	430	if ( p-start > 0 ) {
	431	name->assign( start, p-start );
	432	}
[471]	433	return p;
	434	}
	435	return 0;
	436	}
	437
	438	const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
	439	{
	440	// Presume an entity, and pull it out.
	441	TIXML_STRING ent;
	442	int i;
	443	*length = 0;
	444
	445	if ( (p+1) && (p+1) == '#' && *(p+2) )
	446	{
	447	unsigned long ucs = 0;
	448	ptrdiff_t delta = 0;
	449	unsigned mult = 1;
	450
	451	if ( *(p+2) == 'x' )
	452	{
	453	// Hexadecimal.
	454	if ( !*(p+3) ) return 0;
	455
	456	const char* q = p+3;
	457	q = strchr( q, ';' );
	458
	459	if ( !q \|\| !*q ) return 0;
	460
	461	delta = q-p;
	462	--q;
	463
	464	while ( *q != 'x' )
	465	{
	466	if ( q >= '0' && q <= '9' )
	467	ucs += mult * (*q - '0');
	468	else if ( q >= 'a' && q <= 'f' )
	469	ucs += mult * (*q - 'a' + 10);
	470	else if ( q >= 'A' && q <= 'F' )
	471	ucs += mult * (*q - 'A' + 10 );
[738]	472	else
[471]	473	return 0;
	474	mult *= 16;
	475	--q;
	476	}
	477	}
	478	else
	479	{
	480	// Decimal.
	481	if ( !*(p+2) ) return 0;
	482
	483	const char* q = p+2;
	484	q = strchr( q, ';' );
	485
	486	if ( !q \|\| !*q ) return 0;
	487
	488	delta = q-p;
	489	--q;
	490
	491	while ( *q != '#' )
	492	{
	493	if ( q >= '0' && q <= '9' )
	494	ucs += mult * (*q - '0');
[738]	495	else
[471]	496	return 0;
	497	mult *= 10;
	498	--q;
	499	}
	500	}
	501	if ( encoding == TIXML_ENCODING_UTF8 )
	502	{
	503	// convert the UCS to UTF-8
	504	ConvertUTF32ToUTF8( ucs, value, length );
	505	}
	506	else
	507	{
	508	*value = (char)ucs;
	509	*length = 1;
	510	}
	511	return p + delta + 1;
	512	}
	513
	514	// Now try to match it.
	515	for( i=0; i<NUM_ENTITY; ++i )
	516	{
	517	if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
	518	{
	519	assert( strlen( entity[i].str ) == entity[i].strLength );
	520	*value = entity[i].chr;
	521	*length = 1;
	522	return ( p + entity[i].strLength );
	523	}
	524	}
	525
	526	// So it wasn't an entity, its unrecognized, or something like that.
	527	value = p; // Don't put back the last one, since we return it!
[738]	528	//*length = 1; // Leave unrecognized entities - this doesn't really work.
	529	// Just writes strange XML.
[471]	530	return p+1;
	531	}
	532
	533
	534	bool TiXmlBase::StringEqual( const char* p,
	535	const char* tag,
	536	bool ignoreCase,
	537	TiXmlEncoding encoding )
	538	{
	539	assert( p );
	540	assert( tag );
	541	if ( !p \|\| !*p )
	542	{
	543	assert( 0 );
	544	return false;
	545	}
	546
	547	const char* q = p;
	548
	549	if ( ignoreCase )
	550	{
	551	while ( q && tag && ToLower( q, encoding ) == ToLower( tag, encoding ) )
	552	{
	553	++q;
	554	++tag;
	555	}
	556
	557	if ( *tag == 0 )
	558	return true;
	559	}
	560	else
	561	{
	562	while ( q && tag && q == tag )
	563	{
	564	++q;
	565	++tag;
	566	}
	567
	568	if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
	569	return true;
	570	}
	571	return false;
	572	}
	573
[738]	574	const char* TiXmlBase::ReadText( const char* p,
	575	TIXML_STRING * text,
	576	bool trimWhiteSpace,
	577	const char* endTag,
[471]	578	bool caseInsensitive,
	579	TiXmlEncoding encoding )
	580	{
	581	*text = "";
	582	if ( !trimWhiteSpace // certain tags always keep whitespace
	583	\|\| !condenseWhiteSpace ) // if true, whitespace is always kept
	584	{
	585	// Keep all the white space.
	586	while ( p && *p
	587	&& !StringEqual( p, endTag, caseInsensitive, encoding )
	588	)
	589	{
	590	int len;
	591	char cArr[4] = { 0, 0, 0, 0 };
	592	p = GetChar( p, cArr, &len, encoding );
	593	text->append( cArr, len );
	594	}
	595	}
	596	else
	597	{
	598	bool whitespace = false;
	599
	600	// Remove leading white space:
	601	p = SkipWhiteSpace( p, encoding );
	602	while ( p && *p
	603	&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
	604	{
	605	if ( p == '\r' \|\| p == '\n' )
	606	{
	607	whitespace = true;
	608	++p;
	609	}
	610	else if ( IsWhiteSpace( *p ) )
	611	{
	612	whitespace = true;
	613	++p;
	614	}
	615	else
	616	{
	617	// If we've found whitespace, add it before the
	618	// new character. Any whitespace just becomes a space.
	619	if ( whitespace )
	620	{
	621	(*text) += ' ';
	622	whitespace = false;
	623	}
	624	int len;
	625	char cArr[4] = { 0, 0, 0, 0 };
	626	p = GetChar( p, cArr, &len, encoding );
	627	if ( len == 1 )
	628	(*text) += cArr[0]; // more efficient
	629	else
	630	text->append( cArr, len );
	631	}
	632	}
	633	}
[738]	634	if ( p )
	635	p += strlen( endTag );
	636	return p;
[471]	637	}
	638
	639	#ifdef TIXML_USE_STL
	640
[738]	641	void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	642	{
	643	// The basic issue with a document is that we don't know what we're
	644	// streaming. Read something presumed to be a tag (and hope), then
	645	// identify it, and call the appropriate stream method on the tag.
	646	//
	647	// This "pre-streaming" will never read the closing ">" so the
	648	// sub-tag can orient itself.
	649
[738]	650	if ( !StreamTo( in, '<', tag ) )
[471]	651	{
	652	SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	653	return;
	654	}
	655
	656	while ( in->good() )
	657	{
	658	int tagIndex = (int) tag->length();
	659	while ( in->good() && in->peek() != '>' )
	660	{
	661	int c = in->get();
	662	if ( c <= 0 )
	663	{
	664	SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	665	break;
	666	}
	667	(*tag) += (char) c;
	668	}
	669
	670	if ( in->good() )
	671	{
[738]	672	// We now have something we presume to be a node of
[471]	673	// some sort. Identify it, and call the node to
	674	// continue streaming.
	675	TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
	676
	677	if ( node )
	678	{
	679	node->StreamIn( in, tag );
	680	bool isElement = node->ToElement() != 0;
	681	delete node;
	682	node = 0;
	683
	684	// If this is the root element, we're done. Parsing will be
	685	// done by the >> operator.
	686	if ( isElement )
	687	{
	688	return;
	689	}
	690	}
	691	else
	692	{
	693	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
	694	return;
	695	}
	696	}
	697	}
	698	// We should have returned sooner.
	699	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
	700	}
	701
	702	#endif
	703
	704	const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
	705	{
	706	ClearError();
	707
	708	// Parse away, at the document level. Since a document
	709	// contains nothing but other tags, most of what happens
	710	// here is skipping white space.
	711	if ( !p \|\| !*p )
	712	{
	713	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	714	return 0;
	715	}
	716
	717	// Note that, for a document, this needs to come
	718	// before the while space skip, so that parsing
	719	// starts from the pointer we are given.
	720	location.Clear();
	721	if ( prevData )
	722	{
	723	location.row = prevData->cursor.row;
	724	location.col = prevData->cursor.col;
	725	}
	726	else
	727	{
	728	location.row = 0;
	729	location.col = 0;
	730	}
	731	TiXmlParsingData data( p, TabSize(), location.row, location.col );
	732	location = data.Cursor();
	733
	734	if ( encoding == TIXML_ENCODING_UNKNOWN )
	735	{
	736	// Check for the Microsoft UTF-8 lead bytes.
	737	const unsigned char* pU = (const unsigned char*)p;
	738	if ( (pU+0) && (pU+0) == TIXML_UTF_LEAD_0
	739	&& (pU+1) && (pU+1) == TIXML_UTF_LEAD_1
	740	&& (pU+2) && (pU+2) == TIXML_UTF_LEAD_2 )
	741	{
	742	encoding = TIXML_ENCODING_UTF8;
	743	useMicrosoftBOM = true;
	744	}
	745	}
	746
	747	p = SkipWhiteSpace( p, encoding );
	748	if ( !p )
	749	{
	750	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
	751	return 0;
	752	}
	753
	754	while ( p && *p )
	755	{
	756	TiXmlNode* node = Identify( p, encoding );
	757	if ( node )
	758	{
	759	p = node->Parse( p, &data, encoding );
	760	LinkEndChild( node );
	761	}
	762	else
	763	{
	764	break;
	765	}
	766
	767	// Did we get encoding info?
	768	if ( encoding == TIXML_ENCODING_UNKNOWN
	769	&& node->ToDeclaration() )
	770	{
	771	TiXmlDeclaration* dec = node->ToDeclaration();
	772	const char* enc = dec->Encoding();
	773	assert( enc );
	774
	775	if ( *enc == 0 )
	776	encoding = TIXML_ENCODING_UTF8;
	777	else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
	778	encoding = TIXML_ENCODING_UTF8;
	779	else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
	780	encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
[738]	781	else
[471]	782	encoding = TIXML_ENCODING_LEGACY;
	783	}
	784
	785	p = SkipWhiteSpace( p, encoding );
	786	}
	787
	788	// Was this empty?
	789	if ( !firstChild ) {
	790	SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
	791	return 0;
	792	}
	793
	794	// All is well.
	795	return p;
	796	}
	797
	798	void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
[738]	799	{
[471]	800	// The first error in a chain is more accurate - don't set again!
	801	if ( error )
	802	return;
	803
	804	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
	805	error = true;
	806	errorId = err;
	807	errorDesc = errorString[ errorId ];
	808
	809	errorLocation.Clear();
	810	if ( pError && data )
	811	{
	812	data->Stamp( pError, encoding );
	813	errorLocation = data->Cursor();
	814	}
	815	}
	816
	817
	818	TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
	819	{
	820	TiXmlNode* returnNode = 0;
	821
	822	p = SkipWhiteSpace( p, encoding );
	823	if( !p \|\| !p \|\| p != '<' )
	824	{
	825	return 0;
	826	}
	827
	828	TiXmlDocument* doc = GetDocument();
	829	p = SkipWhiteSpace( p, encoding );
	830
	831	if ( !p \|\| !*p )
	832	{
	833	return 0;
	834	}
	835
[738]	836	// What is this thing?
[471]	837	// - Elements start with a letter or underscore, but xml is reserved.
	838	// - Comments: <!--
	839	// - Decleration: <?xml
[740]	840	// - StylesheetReference <?xml-stylesheet
[471]	841	// - Everthing else is unknown to tinyxml.
	842	//
	843
	844	const char* xmlHeader = { "<?xml" };
[740]	845	const char* xmlSSHeader = { "<?xml-stylesheet" };
[471]	846	const char* commentHeader = { "<!--" };
	847	const char* dtdHeader = { "<!" };
	848	const char* cdataHeader = { "<![CDATA[" };
	849
[740]	850	if ( StringEqual( p, xmlSSHeader, true, encoding ) )
[471]	851	{
	852	#ifdef DEBUG_PARSER
[740]	853	TIXML_LOG( "XML parsing Stylesheet Reference\n" );
	854	#endif
	855	returnNode = new TiXmlStylesheetReference();
	856	}
	857	else if ( StringEqual( p, xmlHeader, true, encoding ) )
	858	{
	859	#ifdef DEBUG_PARSER
[471]	860	TIXML_LOG( "XML parsing Declaration\n" );
	861	#endif
	862	returnNode = new TiXmlDeclaration();
	863	}
	864	else if ( StringEqual( p, commentHeader, false, encoding ) )
	865	{
	866	#ifdef DEBUG_PARSER
	867	TIXML_LOG( "XML parsing Comment\n" );
	868	#endif
	869	returnNode = new TiXmlComment();
	870	}
	871	else if ( StringEqual( p, cdataHeader, false, encoding ) )
	872	{
	873	#ifdef DEBUG_PARSER
	874	TIXML_LOG( "XML parsing CDATA\n" );
	875	#endif
	876	TiXmlText* text = new TiXmlText( "" );
	877	text->SetCDATA( true );
	878	returnNode = text;
	879	}
	880	else if ( StringEqual( p, dtdHeader, false, encoding ) )
	881	{
	882	#ifdef DEBUG_PARSER
	883	TIXML_LOG( "XML parsing Unknown(1)\n" );
	884	#endif
	885	returnNode = new TiXmlUnknown();
	886	}
	887	else if ( IsAlpha( *(p+1), encoding )
	888	\|\| *(p+1) == '_' )
	889	{
	890	#ifdef DEBUG_PARSER
	891	TIXML_LOG( "XML parsing Element\n" );
	892	#endif
	893	returnNode = new TiXmlElement( "" );
	894	}
	895	else
	896	{
	897	#ifdef DEBUG_PARSER
	898	TIXML_LOG( "XML parsing Unknown(2)\n" );
	899	#endif
	900	returnNode = new TiXmlUnknown();
	901	}
	902
	903	if ( returnNode )
	904	{
	905	// Set the parent, so it can report errors
	906	returnNode->parent = this;
	907	}
	908	else
	909	{
	910	if ( doc )
	911	doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
	912	}
	913	return returnNode;
	914	}
	915
	916	#ifdef TIXML_USE_STL
	917
[738]	918	void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
[471]	919	{
	920	// We're called with some amount of pre-parsing. That is, some of "this"
	921	// element is in "tag". Go ahead and stream to the closing ">"
	922	while( in->good() )
	923	{
	924	int c = in->get();
	925	if ( c <= 0 )
	926	{
	927	TiXmlDocument* document = GetDocument();
	928	if ( document )
	929	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	930	return;
	931	}
	932	(*tag) += (char) c ;
[738]	933
[471]	934	if ( c == '>' )
	935	break;
	936	}
	937
	938	if ( tag->length() < 3 ) return;
	939
	940	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
	941	// If not, identify and stream.
	942
[738]	943	if ( tag->at( tag->length() - 1 ) == '>'
[471]	944	&& tag->at( tag->length() - 2 ) == '/' )
	945	{
	946	// All good!
	947	return;
	948	}
	949	else if ( tag->at( tag->length() - 1 ) == '>' )
	950	{
	951	// There is more. Could be:
	952	// text
[738]	953	// cdata text (which looks like another node)
[471]	954	// closing tag
	955	// another node.
	956	for ( ;; )
	957	{
	958	StreamWhiteSpace( in, tag );
	959
	960	// Do we have text?
[738]	961	if ( in->good() && in->peek() != '<' )
[471]	962	{
	963	// Yep, text.
	964	TiXmlText text( "" );
	965	text.StreamIn( in, tag );
	966
	967	// What follows text is a closing tag or another node.
	968	// Go around again and figure it out.
	969	continue;
	970	}
	971
	972	// We now have either a closing tag...or another node.
	973	// We should be at a "<", regardless.
	974	if ( !in->good() ) return;
	975	assert( in->peek() == '<' );
	976	int tagIndex = (int) tag->length();
	977
	978	bool closingTag = false;
	979	bool firstCharFound = false;
	980
	981	for( ;; )
	982	{
	983	if ( !in->good() )
	984	return;
	985
	986	int c = in->peek();
	987	if ( c <= 0 )
	988	{
	989	TiXmlDocument* document = GetDocument();
	990	if ( document )
	991	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	992	return;
	993	}
[738]	994
[471]	995	if ( c == '>' )
	996	break;
	997
	998	*tag += (char) c;
	999	in->get();
	1000
[738]	1001	// Early out if we find the CDATA id.
	1002	if ( c == '[' && tag->size() >= 9 )
	1003	{
	1004	size_t len = tag->size();
	1005	const char* start = tag->c_str() + len - 9;
	1006	if ( strcmp( start, "<![CDATA[" ) == 0 ) {
	1007	assert( !closingTag );
	1008	break;
	1009	}
	1010	}
	1011
[471]	1012	if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
	1013	{
	1014	firstCharFound = true;
	1015	if ( c == '/' )
	1016	closingTag = true;
	1017	}
	1018	}
	1019	// If it was a closing tag, then read in the closing '>' to clean up the input stream.
	1020	// If it was not, the streaming will be done by the tag.
	1021	if ( closingTag )
	1022	{
	1023	if ( !in->good() )
	1024	return;
	1025
	1026	int c = in->get();
	1027	if ( c <= 0 )
	1028	{
	1029	TiXmlDocument* document = GetDocument();
	1030	if ( document )
	1031	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1032	return;
	1033	}
	1034	assert( c == '>' );
	1035	*tag += (char) c;
	1036
	1037	// We are done, once we've found our closing tag.
	1038	return;
	1039	}
	1040	else
	1041	{
	1042	// If not a closing tag, id it, and stream.
	1043	const char* tagloc = tag->c_str() + tagIndex;
	1044	TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
	1045	if ( !node )
	1046	return;
	1047	node->StreamIn( in, tag );
	1048	delete node;
	1049	node = 0;
	1050
	1051	// No return: go around from the beginning: text, closing tag, or node.
	1052	}
	1053	}
	1054	}
	1055	}
	1056	#endif
	1057
	1058	const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1059	{
	1060	p = SkipWhiteSpace( p, encoding );
	1061	TiXmlDocument* document = GetDocument();
	1062
	1063	if ( !p \|\| !*p )
	1064	{
	1065	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
	1066	return 0;
	1067	}
	1068
	1069	if ( data )
	1070	{
	1071	data->Stamp( p, encoding );
	1072	location = data->Cursor();
	1073	}
	1074
	1075	if ( *p != '<' )
	1076	{
	1077	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
	1078	return 0;
	1079	}
	1080
	1081	p = SkipWhiteSpace( p+1, encoding );
	1082
	1083	// Read the name.
	1084	const char* pErr = p;
	1085
	1086	p = ReadName( p, &value, encoding );
	1087	if ( !p \|\| !*p )
	1088	{
	1089	if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
	1090	return 0;
	1091	}
	1092
	1093	TIXML_STRING endTag ("</");
	1094	endTag += value;
	1095	endTag += ">";
	1096
	1097	// Check for and read attributes. Also look for an empty
	1098	// tag or an end tag.
	1099	while ( p && *p )
	1100	{
	1101	pErr = p;
	1102	p = SkipWhiteSpace( p, encoding );
	1103	if ( !p \|\| !*p )
	1104	{
	1105	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
	1106	return 0;
	1107	}
	1108	if ( *p == '/' )
	1109	{
	1110	++p;
	1111	// Empty tag.
	1112	if ( *p != '>' )
	1113	{
[738]	1114	if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
[471]	1115	return 0;
	1116	}
	1117	return (p+1);
	1118	}
	1119	else if ( *p == '>' )
	1120	{
	1121	// Done with attributes (if there were any.)
	1122	// Read the value -- which can include other
	1123	// elements -- read the end tag, and return.
	1124	++p;
	1125	p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
[738]	1126	if ( !p \|\| !*p ) {
	1127	// We were looking for the end tag, but found nothing.
	1128	// Fix for [ 1663758 ] Failure to report error on bad XML
	1129	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
[471]	1130	return 0;
[738]	1131	}
[471]	1132
	1133	// We should find the end tag now
	1134	if ( StringEqual( p, endTag.c_str(), false, encoding ) )
	1135	{
	1136	p += endTag.length();
	1137	return p;
	1138	}
	1139	else
	1140	{
	1141	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
	1142	return 0;
	1143	}
	1144	}
	1145	else
	1146	{
	1147	// Try to read an attribute:
	1148	TiXmlAttribute* attrib = new TiXmlAttribute();
	1149	if ( !attrib )
	1150	{
	1151	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
	1152	return 0;
	1153	}
	1154
	1155	attrib->SetDocument( document );
[738]	1156	pErr = p;
[471]	1157	p = attrib->Parse( p, data, encoding );
	1158
	1159	if ( !p \|\| !*p )
	1160	{
	1161	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
	1162	delete attrib;
	1163	return 0;
	1164	}
	1165
	1166	// Handle the strange case of double attributes:
[738]	1167	#ifdef TIXML_USE_STL
	1168	TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
	1169	#else
[471]	1170	TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
[738]	1171	#endif
[471]	1172	if ( node )
	1173	{
	1174	node->SetValue( attrib->Value() );
	1175	delete attrib;
	1176	return 0;
	1177	}
	1178
	1179	attributeSet.Add( attrib );
	1180	}
	1181	}
	1182	return p;
	1183	}
	1184
	1185
	1186	const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1187	{
	1188	TiXmlDocument* document = GetDocument();
	1189
	1190	// Read in text and elements in any order.
	1191	const char* pWithWhiteSpace = p;
	1192	p = SkipWhiteSpace( p, encoding );
	1193
	1194	while ( p && *p )
	1195	{
	1196	if ( *p != '<' )
	1197	{
	1198	// Take what we have, make a text element.
	1199	TiXmlText* textNode = new TiXmlText( "" );
	1200
	1201	if ( !textNode )
	1202	{
	1203	if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
	1204	return 0;
	1205	}
	1206
	1207	if ( TiXmlBase::IsWhiteSpaceCondensed() )
	1208	{
	1209	p = textNode->Parse( p, data, encoding );
	1210	}
	1211	else
	1212	{
	1213	// Special case: we want to keep the white space
	1214	// so that leading spaces aren't removed.
	1215	p = textNode->Parse( pWithWhiteSpace, data, encoding );
	1216	}
	1217
	1218	if ( !textNode->Blank() )
	1219	LinkEndChild( textNode );
	1220	else
	1221	delete textNode;
[738]	1222	}
	1223	else
[471]	1224	{
	1225	// We hit a '<'
	1226	// Have we hit a new element or an end tag? This could also be
	1227	// a TiXmlText in the "CDATA" style.
	1228	if ( StringEqual( p, "</", false, encoding ) )
	1229	{
	1230	return p;
	1231	}
	1232	else
	1233	{
	1234	TiXmlNode* node = Identify( p, encoding );
	1235	if ( node )
	1236	{
	1237	p = node->Parse( p, data, encoding );
	1238	LinkEndChild( node );
[738]	1239	}
[471]	1240	else
	1241	{
	1242	return 0;
	1243	}
	1244	}
	1245	}
	1246	pWithWhiteSpace = p;
	1247	p = SkipWhiteSpace( p, encoding );
	1248	}
	1249
	1250	if ( !p )
	1251	{
	1252	if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
[738]	1253	}
[471]	1254	return p;
	1255	}
	1256
	1257
	1258	#ifdef TIXML_USE_STL
[738]	1259	void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1260	{
	1261	while ( in->good() )
	1262	{
[738]	1263	int c = in->get();
[471]	1264	if ( c <= 0 )
	1265	{
	1266	TiXmlDocument* document = GetDocument();
	1267	if ( document )
	1268	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1269	return;
	1270	}
	1271	(*tag) += (char) c;
	1272
	1273	if ( c == '>' )
	1274	{
	1275	// All is well.
[738]	1276	return;
[471]	1277	}
	1278	}
	1279	}
	1280	#endif
	1281
	1282
	1283	const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1284	{
	1285	TiXmlDocument* document = GetDocument();
	1286	p = SkipWhiteSpace( p, encoding );
	1287
	1288	if ( data )
	1289	{
	1290	data->Stamp( p, encoding );
	1291	location = data->Cursor();
	1292	}
	1293	if ( !p \|\| !p \|\| p != '<' )
	1294	{
	1295	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
	1296	return 0;
	1297	}
	1298	++p;
	1299	value = "";
	1300
	1301	while ( p && p && p != '>' )
	1302	{
	1303	value += *p;
	1304	++p;
	1305	}
	1306
	1307	if ( !p )
	1308	{
	1309	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
	1310	}
	1311	if ( *p == '>' )
	1312	return p+1;
	1313	return p;
	1314	}
	1315
	1316	#ifdef TIXML_USE_STL
[738]	1317	void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1318	{
	1319	while ( in->good() )
	1320	{
[738]	1321	int c = in->get();
[471]	1322	if ( c <= 0 )
	1323	{
	1324	TiXmlDocument* document = GetDocument();
	1325	if ( document )
	1326	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1327	return;
	1328	}
	1329
	1330	(*tag) += (char) c;
	1331
[738]	1332	if ( c == '>'
[471]	1333	&& tag->at( tag->length() - 2 ) == '-'
	1334	&& tag->at( tag->length() - 3 ) == '-' )
	1335	{
	1336	// All is well.
[738]	1337	return;
[471]	1338	}
	1339	}
	1340	}
	1341	#endif
	1342
	1343
	1344	const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1345	{
	1346	TiXmlDocument* document = GetDocument();
	1347	value = "";
	1348
	1349	p = SkipWhiteSpace( p, encoding );
	1350
	1351	if ( data )
	1352	{
	1353	data->Stamp( p, encoding );
	1354	location = data->Cursor();
	1355	}
	1356	const char* startTag = "<!--";
	1357	const char* endTag = "-->";
	1358
	1359	if ( !StringEqual( p, startTag, false, encoding ) )
	1360	{
	1361	document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
	1362	return 0;
	1363	}
	1364	p += strlen( startTag );
[738]	1365
	1366	// [ 1475201 ] TinyXML parses entities in comments
	1367	// Oops - ReadText doesn't work, because we don't want to parse the entities.
	1368	// p = ReadText( p, &value, false, endTag, false, encoding );
	1369	//
	1370	// from the XML spec:
	1371	/*
	1372	[Definition: Comments may appear anywhere in a document outside other markup; in addition,
	1373	they may appear within the document type declaration at places allowed by the grammar.
	1374	They are not part of the document's character data; an XML processor MAY, but need not,
	1375	make it possible for an application to retrieve the text of comments. For compatibility,
	1376	the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
	1377	references MUST NOT be recognized within comments.
	1378
	1379	An example of a comment:
	1380
	1381	<!-- declarations for <head> & <body> -->
	1382	*/
	1383
	1384	value = "";
	1385	// Keep all the white space.
	1386	while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
	1387	{
	1388	value.append( p, 1 );
	1389	++p;
	1390	}
	1391	if ( p )
	1392	p += strlen( endTag );
	1393
[471]	1394	return p;
	1395	}
	1396
	1397
	1398	const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1399	{
	1400	p = SkipWhiteSpace( p, encoding );
	1401	if ( !p \|\| !*p ) return 0;
	1402
[738]	1403	// int tabsize = 4;
	1404	// if ( document )
	1405	// tabsize = document->TabSize();
[471]	1406
	1407	if ( data )
	1408	{
	1409	data->Stamp( p, encoding );
	1410	location = data->Cursor();
	1411	}
	1412	// Read the name, the '=' and the value.
	1413	const char* pErr = p;
	1414	p = ReadName( p, &name, encoding );
	1415	if ( !p \|\| !*p )
	1416	{
	1417	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
	1418	return 0;
	1419	}
	1420	p = SkipWhiteSpace( p, encoding );
	1421	if ( !p \|\| !p \|\| p != '=' )
	1422	{
	1423	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1424	return 0;
	1425	}
	1426
	1427	++p; // skip '='
	1428	p = SkipWhiteSpace( p, encoding );
	1429	if ( !p \|\| !*p )
	1430	{
	1431	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1432	return 0;
	1433	}
[738]	1434
[471]	1435	const char* end;
[738]	1436	const char SINGLE_QUOTE = '\'';
	1437	const char DOUBLE_QUOTE = '\"';
[471]	1438
[738]	1439	if ( *p == SINGLE_QUOTE )
[471]	1440	{
	1441	++p;
[738]	1442	end = "\'"; // single quote in string
[471]	1443	p = ReadText( p, &value, false, end, false, encoding );
	1444	}
[738]	1445	else if ( *p == DOUBLE_QUOTE )
[471]	1446	{
	1447	++p;
[738]	1448	end = "\""; // double quote in string
[471]	1449	p = ReadText( p, &value, false, end, false, encoding );
	1450	}
	1451	else
	1452	{
	1453	// All attribute values should be in single or double quotes.
	1454	// But this is such a common error that the parser will try
	1455	// its best, even without them.
	1456	value = "";
[738]	1457	while ( p && *p // existence
[471]	1458	&& !IsWhiteSpace( p ) && p != '\n' && *p != '\r' // whitespace
[738]	1459	&& p != '/' && p != '>' ) // tag end
[471]	1460	{
[738]	1461	if ( p == SINGLE_QUOTE \|\| p == DOUBLE_QUOTE ) {
	1462	// [ 1451649 ] Attribute values with trailing quotes not handled correctly
	1463	// We did not have an opening quote but seem to have a
	1464	// closing one. Give up and throw an error.
	1465	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
	1466	return 0;
	1467	}
[471]	1468	value += *p;
	1469	++p;
	1470	}
	1471	}
	1472	return p;
	1473	}
	1474
	1475	#ifdef TIXML_USE_STL
[738]	1476	void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1477	{
[738]	1478	while ( in->good() )
[471]	1479	{
[738]	1480	int c = in->peek();
	1481	if ( !cdata && (c == '<' ) )
	1482	{
	1483	return;
	1484	}
[471]	1485	if ( c <= 0 )
	1486	{
	1487	TiXmlDocument* document = GetDocument();
	1488	if ( document )
	1489	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1490	return;
	1491	}
	1492
	1493	(*tag) += (char) c;
[738]	1494	in->get(); // "commits" the peek made above
[471]	1495
[738]	1496	if ( cdata && c == '>' && tag->size() >= 3 ) {
	1497	size_t len = tag->size();
	1498	if ( (tag)[len-2] == ']' && (tag)[len-3] == ']' ) {
	1499	// terminator of cdata.
[471]	1500	return;
	1501	}
	1502	}
	1503	}
	1504	}
	1505	#endif
	1506
	1507	const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
	1508	{
	1509	value = "";
	1510	TiXmlDocument* document = GetDocument();
	1511
	1512	if ( data )
	1513	{
	1514	data->Stamp( p, encoding );
	1515	location = data->Cursor();
	1516	}
	1517
	1518	const char* const startTag = "<![CDATA[";
	1519	const char* const endTag = "]]>";
	1520
	1521	if ( cdata \|\| StringEqual( p, startTag, false, encoding ) )
	1522	{
	1523	cdata = true;
	1524
	1525	if ( !StringEqual( p, startTag, false, encoding ) )
	1526	{
	1527	document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
	1528	return 0;
	1529	}
	1530	p += strlen( startTag );
	1531
	1532	// Keep all the white space, ignore the encoding, etc.
	1533	while ( p && *p
	1534	&& !StringEqual( p, endTag, false, encoding )
	1535	)
	1536	{
	1537	value += *p;
	1538	++p;
	1539	}
	1540
[738]	1541	TIXML_STRING dummy;
[471]	1542	p = ReadText( p, &dummy, false, endTag, false, encoding );
	1543	return p;
	1544	}
	1545	else
	1546	{
	1547	bool ignoreWhite = true;
	1548
	1549	const char* end = "<";
	1550	p = ReadText( p, &value, ignoreWhite, end, false, encoding );
	1551	if ( p )
	1552	return p-1; // don't truncate the '<'
	1553	return 0;
	1554	}
	1555	}
	1556
	1557	#ifdef TIXML_USE_STL
[738]	1558	void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
[471]	1559	{
	1560	while ( in->good() )
	1561	{
	1562	int c = in->get();
	1563	if ( c <= 0 )
	1564	{
	1565	TiXmlDocument* document = GetDocument();
	1566	if ( document )
	1567	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1568	return;
	1569	}
	1570	(*tag) += (char) c;
	1571
	1572	if ( c == '>' )
	1573	{
	1574	// All is well.
	1575	return;
	1576	}
	1577	}
	1578	}
	1579	#endif
	1580
	1581	const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
	1582	{
	1583	p = SkipWhiteSpace( p, _encoding );
	1584	// Find the beginning, find the end, and look for
	1585	// the stuff in-between.
	1586	TiXmlDocument* document = GetDocument();
	1587	if ( !p \|\| !*p \|\| !StringEqual( p, "<?xml", true, _encoding ) )
	1588	{
	1589	if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
	1590	return 0;
	1591	}
	1592	if ( data )
	1593	{
	1594	data->Stamp( p, _encoding );
	1595	location = data->Cursor();
	1596	}
	1597	p += 5;
	1598
	1599	version = "";
	1600	encoding = "";
	1601	standalone = "";
	1602
	1603	while ( p && *p )
	1604	{
	1605	if ( *p == '>' )
	1606	{
	1607	++p;
	1608	return p;
	1609	}
	1610
	1611	p = SkipWhiteSpace( p, _encoding );
	1612	if ( StringEqual( p, "version", true, _encoding ) )
	1613	{
	1614	TiXmlAttribute attrib;
[738]	1615	p = attrib.Parse( p, data, _encoding );
[471]	1616	version = attrib.Value();
	1617	}
	1618	else if ( StringEqual( p, "encoding", true, _encoding ) )
	1619	{
	1620	TiXmlAttribute attrib;
[738]	1621	p = attrib.Parse( p, data, _encoding );
[471]	1622	encoding = attrib.Value();
	1623	}
	1624	else if ( StringEqual( p, "standalone", true, _encoding ) )
	1625	{
	1626	TiXmlAttribute attrib;
[738]	1627	p = attrib.Parse( p, data, _encoding );
[471]	1628	standalone = attrib.Value();
	1629	}
	1630	else
	1631	{
	1632	// Read over whatever it is.
	1633	while( p && p && p != '>' && !IsWhiteSpace( *p ) )
	1634	++p;
	1635	}
	1636	}
	1637	return 0;
	1638	}
	1639
	1640	bool TiXmlText::Blank() const
	1641	{
	1642	for ( unsigned i=0; i<value.length(); i++ )
	1643	if ( !IsWhiteSpace( value[i] ) )
	1644	return false;
	1645	return true;
	1646	}
	1647
[740]	1648	#ifdef TIXML_USE_STL
	1649	void TiXmlStylesheetReference::StreamIn( std::istream * in, TIXML_STRING * tag )
	1650	{
	1651	while ( in->good() )
	1652	{
	1653	int c = in->get();
	1654	if ( c <= 0 )
	1655	{
	1656	TiXmlDocument* document = GetDocument();
	1657	if ( document )
	1658	document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
	1659	return;
	1660	}
	1661	(*tag) += (char) c;
	1662
	1663	if ( c == '>' )
	1664	{
	1665	// All is well.
	1666	return;
	1667	}
	1668	}
	1669	}
	1670	#endif
	1671
	1672	const char* TiXmlStylesheetReference::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
	1673	{
	1674	p = SkipWhiteSpace( p, _encoding );
	1675	// Find the beginning, find the end, and look for
	1676	// the stuff in-between.
	1677	TiXmlDocument* document = GetDocument();
	1678	if ( !p \|\| !*p \|\| !StringEqual( p, "<?xml-stylesheet", true, _encoding ) )
	1679	{
	1680	if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
	1681	return 0;
	1682	}
	1683	if ( data )
	1684	{
	1685	data->Stamp( p, _encoding );
	1686	location = data->Cursor();
	1687	}
	1688	p += 5;
	1689
	1690	type = "";
	1691	href = "";
	1692
	1693	while ( p && *p )
	1694	{
	1695	if ( *p == '>' )
	1696	{
	1697	++p;
	1698	return p;
	1699	}
	1700
	1701	p = SkipWhiteSpace( p, _encoding );
	1702	if ( StringEqual( p, "type", true, _encoding ) )
	1703	{
	1704	TiXmlAttribute attrib;
	1705	p = attrib.Parse( p, data, _encoding );
	1706	type = attrib.Value();
	1707	}
	1708	else if ( StringEqual( p, "href", true, _encoding ) )
	1709	{
	1710	TiXmlAttribute attrib;
	1711	p = attrib.Parse( p, data, _encoding );
	1712	href = attrib.Value();
	1713	}
	1714	else
	1715	{
	1716	// Read over whatever it is.
	1717	while( p && p && p != '>' && !IsWhiteSpace( *p ) )
	1718	++p;
	1719	}
	1720	}
	1721	return 0;
	1722	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: