[3] | 1 | /* |
---|
| 2 | ----------------------------------------------------------------------------- |
---|
| 3 | This source file is part of OGRE |
---|
| 4 | (Object-oriented Graphics Rendering Engine) |
---|
| 5 | For the latest info, see http://www.ogre3d.org |
---|
| 6 | |
---|
| 7 | Copyright (c) 2000-2006 Torus Knot Software Ltd |
---|
| 8 | Also see acknowledgements in Readme.html |
---|
| 9 | |
---|
| 10 | This program is free software; you can redistribute it and/or modify it under |
---|
| 11 | the terms of the GNU Lesser General Public License as published by the Free Software |
---|
| 12 | Foundation; either version 2 of the License, or (at your option) any later |
---|
| 13 | version. |
---|
| 14 | |
---|
| 15 | This program is distributed in the hope that it will be useful, but WITHOUT |
---|
| 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
---|
| 17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. |
---|
| 18 | |
---|
| 19 | You should have received a copy of the GNU Lesser General Public License along with |
---|
| 20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
---|
| 21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to |
---|
| 22 | http://www.gnu.org/copyleft/lesser.txt. |
---|
| 23 | |
---|
| 24 | You may alternatively use this source under the terms of a specific version of |
---|
| 25 | the OGRE Unrestricted License provided you have obtained such a license from |
---|
| 26 | Torus Knot Software Ltd. |
---|
| 27 | ----------------------------------------------------------------------------- |
---|
| 28 | */ |
---|
| 29 | #include "OgreStableHeaders.h" |
---|
| 30 | #include "OgreCompiler2Pass.h" |
---|
| 31 | #include "OgreLogManager.h" |
---|
| 32 | #include "OgreException.h" |
---|
| 33 | #include "OgreStringConverter.h" |
---|
| 34 | |
---|
| 35 | namespace Ogre { |
---|
| 36 | //----------------------------------------------------------------------- |
---|
| 37 | // instantiate static members |
---|
| 38 | Compiler2Pass::TokenState Compiler2Pass::mBNFTokenState; |
---|
| 39 | Compiler2Pass::TokenStateContainer Compiler2Pass::mClientTokenStates; |
---|
| 40 | //----------------------------------------------------------------------- |
---|
| 41 | Compiler2Pass::Compiler2Pass() |
---|
| 42 | : mActiveTokenState(&mBNFTokenState) |
---|
| 43 | , mSource(0) |
---|
| 44 | , mSourceName("system") |
---|
| 45 | { |
---|
| 46 | // reserve some memory space in the containers being used |
---|
| 47 | mBNFTokenState.tokenQue.reserve(100); |
---|
| 48 | mBNFTokenState.lexemeTokenDefinitions.reserve(50); |
---|
| 49 | |
---|
| 50 | initBNFCompiler(); |
---|
| 51 | } |
---|
| 52 | //----------------------------------------------------------------------- |
---|
| 53 | void Compiler2Pass::initBNFCompiler(void) |
---|
| 54 | { |
---|
| 55 | if (mBNFTokenState.lexemeTokenDefinitions.empty()) |
---|
| 56 | { |
---|
| 57 | /* Every Token ID must be manually generated during the compiler bootstrap phase |
---|
| 58 | since the rule base is manually defined. |
---|
| 59 | */ |
---|
| 60 | |
---|
| 61 | addLexemeToken("UNKNOWN", BNF_UNKOWN); |
---|
| 62 | addLexemeToken("syntax", BNF_SYNTAX); |
---|
| 63 | addLexemeToken("rule", BNF_RULE); |
---|
| 64 | addLexemeToken("identifier", BNF_IDENTIFIER); |
---|
| 65 | addLexemeToken("identifier_right", BNF_IDENTIFIER_RIGHT); |
---|
| 66 | addLexemeToken("identifier_characters", BNF_IDENTIFIER_CHARACTERS); |
---|
| 67 | addLexemeToken("<", BNF_ID_BEGIN, false, true); |
---|
| 68 | addLexemeToken(">", BNF_ID_END, false, true); |
---|
| 69 | addLexemeToken("<#", BNF_CONSTANT_BEGIN, false, true); |
---|
| 70 | addLexemeToken("::=", BNF_SET_RULE, false, true); |
---|
| 71 | addLexemeToken("expression", BNF_EXPRESSION); |
---|
| 72 | addLexemeToken("and_term", BNF_AND_TERM); |
---|
| 73 | addLexemeToken("or_term", BNF_OR_TERM); |
---|
| 74 | addLexemeToken("term", BNF_TERM); |
---|
| 75 | addLexemeToken("term_id", BNF_TERM_ID); |
---|
| 76 | addLexemeToken("constant", BNF_CONSTANT); |
---|
| 77 | addLexemeToken("|", BNF_OR, false, true); |
---|
| 78 | addLexemeToken("terminal_symbol", BNF_TERMINAL_SYMBOL); |
---|
| 79 | addLexemeToken("terminal_start", BNF_TERMINAL_START); |
---|
| 80 | addLexemeToken("repeat_expression", BNF_REPEAT_EXPRESSION); |
---|
| 81 | addLexemeToken("not_expression", BNF_NOT_EXPRESSION); |
---|
| 82 | addLexemeToken("{", BNF_REPEAT_BEGIN, false, true); |
---|
| 83 | addLexemeToken("}", BNF_REPEAT_END, false, true); |
---|
| 84 | addLexemeToken("set", BNF_SET); |
---|
| 85 | addLexemeToken("(", BNF_SET_BEGIN, false, true); |
---|
| 86 | addLexemeToken(")", BNF_SET_END, false, true); |
---|
| 87 | addLexemeToken("set_end_exc", BNF_SET_END_EXC); |
---|
| 88 | addLexemeToken("optional_expression", BNF_OPTIONAL_EXPRESSION); |
---|
| 89 | addLexemeToken("[", BNF_OPTIONAL_BEGIN, false, true); |
---|
| 90 | addLexemeToken("]", BNF_OPTIONAL_END, false, true); |
---|
| 91 | addLexemeToken("not_test", BNF_NOT_TEST); |
---|
| 92 | addLexemeToken("not_chk", BNF_NOT_CHK); |
---|
| 93 | addLexemeToken("(?!", BNF_NOT_TEST_BEGIN, false, true); |
---|
| 94 | addLexemeToken("'", BNF_SINGLEQUOTE, false, true); |
---|
| 95 | addLexemeToken(":", BNF_CONDITIONAL_TOKEN_INSERT, false, true); |
---|
| 96 | addLexemeToken("-'", BNF_NO_TOKEN_START, false, true); |
---|
| 97 | addLexemeToken("any_character", BNF_ANY_CHARACTER); |
---|
| 98 | addLexemeToken("single_quote_exc", BNF_SINGLE_QUOTE_EXC); |
---|
| 99 | addLexemeToken("white_space_chk", BNF_WHITE_SPACE_CHK); |
---|
| 100 | addLexemeToken("special_characters1", BNF_SPECIAL_CHARACTERS1); |
---|
| 101 | addLexemeToken("special_characters2", BNF_SPECIAL_CHARACTERS2); |
---|
| 102 | |
---|
| 103 | addLexemeToken("letter", BNF_LETTER); |
---|
| 104 | addLexemeToken("letter_digit", BNF_LETTER_DIGIT); |
---|
| 105 | addLexemeToken("digit", BNF_DIGIT); |
---|
| 106 | addLexemeToken("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", BNF_ALPHA_SET, false, true); |
---|
| 107 | addLexemeToken("0123456789", BNF_NUMBER_SET, false, true); |
---|
| 108 | addLexemeToken("`~!@#$%^&*(-_=+\\|[]{}:;\"<>,.?/\n\r\t", BNF_SPECIAL_CHARACTER_SET2, false, true); |
---|
| 109 | addLexemeToken("$_", BNF_SPECIAL_CHARACTER_SET1, false, true); |
---|
| 110 | addLexemeToken(" ", BNF_WHITE_SPACE, false, true); |
---|
| 111 | addLexemeToken("?!", BNF_NOT_CHARS, false, true); |
---|
| 112 | } |
---|
| 113 | |
---|
| 114 | if (mBNFTokenState.rootRulePath.empty()) |
---|
| 115 | { |
---|
| 116 | // first entry is set to unknown in order to trap rule id's not set for non-terminal tokens |
---|
| 117 | mBNFTokenState.rootRulePath.resize(1); |
---|
| 118 | // used by bootstrap BNF text parser |
---|
| 119 | // <> - non-terminal token |
---|
| 120 | // () - set of |
---|
| 121 | // ::= - rule definition |
---|
| 122 | #define _rule_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otRULE, id)); |
---|
| 123 | #define _is_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otAND, id)); |
---|
| 124 | // - blank space is an implied "AND" meaning the token is required |
---|
| 125 | #define _and_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otAND, id)); |
---|
| 126 | // | - or |
---|
| 127 | #define _or_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otOR, id)); |
---|
| 128 | // [] - optional |
---|
| 129 | #define _optional_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otOPTIONAL, id)); |
---|
| 130 | // {} - repeat 0 or more times until fail or rule does not progress |
---|
| 131 | #define _repeat_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otREPEAT, id)); |
---|
| 132 | #define _data_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otDATA, id)); |
---|
| 133 | // (?! ) - lookahead negative (not test) |
---|
| 134 | #define _not_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otNOT_TEST, id)); |
---|
| 135 | #define _end_ mBNFTokenState.rootRulePath.push_back(TokenRule(otEND,0)); |
---|
| 136 | |
---|
| 137 | // <syntax> ::= { rule } |
---|
| 138 | _rule_(BNF_SYNTAX) _repeat_(BNF_RULE) _end_ |
---|
| 139 | |
---|
| 140 | // <rule> ::= <identifier> "::=" <expression> |
---|
| 141 | _rule_(BNF_RULE) |
---|
| 142 | _is_(BNF_IDENTIFIER) |
---|
| 143 | _and_(BNF_SET_RULE) |
---|
| 144 | _and_(BNF_EXPRESSION) |
---|
| 145 | _end_ |
---|
| 146 | |
---|
| 147 | // <expression> ::= <and_term> { <or_term> } |
---|
| 148 | _rule_(BNF_EXPRESSION) |
---|
| 149 | _is_(BNF_AND_TERM) |
---|
| 150 | _repeat_(BNF_OR_TERM) |
---|
| 151 | _end_ |
---|
| 152 | // <or_term> ::= "|" <and_term> |
---|
| 153 | _rule_(BNF_OR_TERM) |
---|
| 154 | _is_(BNF_OR) |
---|
| 155 | _and_(BNF_AND_TERM) |
---|
| 156 | _end_ |
---|
| 157 | // <and_term> ::= <term> { <term> } |
---|
| 158 | _rule_(BNF_AND_TERM) |
---|
| 159 | _is_(BNF_TERM) |
---|
| 160 | _repeat_(BNF_TERM) |
---|
| 161 | _end_ |
---|
| 162 | // <term> ::= <term_id> | <repeat_expression> | <optional_expression> | <not_expression> |
---|
| 163 | _rule_(BNF_TERM) |
---|
| 164 | _is_(BNF_TERM_ID) |
---|
| 165 | _or_(BNF_REPEAT_EXPRESSION) |
---|
| 166 | _or_(BNF_OPTIONAL_EXPRESSION) |
---|
| 167 | _or_(BNF_NOT_EXPRESSION) |
---|
| 168 | _end_ |
---|
| 169 | |
---|
| 170 | // <term_id> ::= <constant> | <identifier_right> | <terminal_symbol> | <set> |
---|
| 171 | _rule_(BNF_TERM_ID) |
---|
| 172 | _is_(BNF_CONSTANT) |
---|
| 173 | _or_(BNF_IDENTIFIER_RIGHT) |
---|
| 174 | _or_(BNF_TERMINAL_SYMBOL) |
---|
| 175 | _or_(BNF_SET) |
---|
| 176 | _end_ |
---|
| 177 | |
---|
| 178 | // <repeat_expression> ::= "{" <term_id> "}" |
---|
| 179 | _rule_(BNF_REPEAT_EXPRESSION) |
---|
| 180 | _is_(BNF_REPEAT_BEGIN) |
---|
| 181 | _and_(BNF_TERM_ID) |
---|
| 182 | _and_(BNF_REPEAT_END) |
---|
| 183 | _end_ |
---|
| 184 | |
---|
| 185 | // <optional_expression> ::= "[" <term_id> "]" |
---|
| 186 | _rule_(BNF_OPTIONAL_EXPRESSION) |
---|
| 187 | _is_(BNF_OPTIONAL_BEGIN) |
---|
| 188 | _and_(BNF_TERM_ID) |
---|
| 189 | _and_(BNF_OPTIONAL_END) |
---|
| 190 | _end_ |
---|
| 191 | |
---|
| 192 | // <not_expression> ::= "(?!" <term_id> ")" |
---|
| 193 | _rule_(BNF_NOT_EXPRESSION) |
---|
| 194 | _is_(BNF_NOT_TEST_BEGIN) |
---|
| 195 | _and_(BNF_TERM_ID) |
---|
| 196 | _and_(BNF_SET_END) |
---|
| 197 | _end_ |
---|
| 198 | |
---|
| 199 | // <identifier_right> ::= <indentifier> (?!"::=") |
---|
| 200 | _rule_(BNF_IDENTIFIER_RIGHT) |
---|
| 201 | _is_(BNF_IDENTIFIER) |
---|
| 202 | _not_(BNF_SET_RULE) |
---|
| 203 | _end_ |
---|
| 204 | |
---|
| 205 | // <identifier> ::= "<" <letter> {<identifier_characters>} ">" |
---|
| 206 | _rule_(BNF_IDENTIFIER) |
---|
| 207 | _is_(BNF_ID_BEGIN) |
---|
| 208 | _and_(BNF_LETTER) |
---|
| 209 | _repeat_(BNF_IDENTIFIER_CHARACTERS) |
---|
| 210 | _and_(BNF_ID_END) |
---|
| 211 | _end_ |
---|
| 212 | |
---|
| 213 | // <identifier_characters> ::= <letter_digit> | <special_characters1> |
---|
| 214 | _rule_(BNF_IDENTIFIER_CHARACTERS) |
---|
| 215 | _is_(BNF_LETTER_DIGIT) |
---|
| 216 | _or_(BNF_SPECIAL_CHARACTERS1) |
---|
| 217 | _end_ |
---|
| 218 | |
---|
| 219 | // <terminal_symbol> ::= <terminal_start> @{ <any_character> } "'" [":"] |
---|
| 220 | _rule_(BNF_TERMINAL_SYMBOL) |
---|
| 221 | _is_(BNF_TERMINAL_START) |
---|
| 222 | _and_(_no_space_skip_) |
---|
| 223 | _repeat_(BNF_ANY_CHARACTER) |
---|
| 224 | _and_(BNF_SINGLEQUOTE) |
---|
| 225 | _optional_(BNF_CONDITIONAL_TOKEN_INSERT) |
---|
| 226 | _end_ |
---|
| 227 | |
---|
| 228 | // <terminal_start> ::= "-'" | "'" |
---|
| 229 | _rule_(BNF_TERMINAL_START) |
---|
| 230 | _is_(BNF_NO_TOKEN_START) |
---|
| 231 | _or_(BNF_SINGLEQUOTE) |
---|
| 232 | _end_ |
---|
| 233 | |
---|
| 234 | |
---|
| 235 | // <constant> ::= "<#" <letter> {<identifier_characters>} ">" |
---|
| 236 | _rule_(BNF_CONSTANT) |
---|
| 237 | _is_(BNF_CONSTANT_BEGIN) |
---|
| 238 | _and_(BNF_LETTER) |
---|
| 239 | _repeat_(BNF_IDENTIFIER_CHARACTERS) |
---|
| 240 | _and_(BNF_ID_END) |
---|
| 241 | _end_ |
---|
| 242 | |
---|
| 243 | // <set> ::= "(" (?!<not_chk>) @{<any_character>} ")" |
---|
| 244 | _rule_(BNF_SET) |
---|
| 245 | _is_(BNF_SET_BEGIN) |
---|
| 246 | _not_(BNF_NOT_CHK) |
---|
| 247 | _and_(_no_space_skip_) |
---|
| 248 | _repeat_(BNF_ANY_CHARACTER) |
---|
| 249 | _and_(BNF_SET_END) |
---|
| 250 | _end_ |
---|
| 251 | |
---|
| 252 | // <any_character> ::= <letter_digit> | <special_characters2> |
---|
| 253 | _rule_(BNF_ANY_CHARACTER) |
---|
| 254 | _is_(BNF_LETTER_DIGIT) |
---|
| 255 | _or_(BNF_SPECIAL_CHARACTERS2) |
---|
| 256 | _end_ |
---|
| 257 | |
---|
| 258 | // <letter_digit> ::= <letter> | <digit> |
---|
| 259 | _rule_(BNF_LETTER_DIGIT) |
---|
| 260 | _is_(BNF_LETTER) |
---|
| 261 | _or_(BNF_DIGIT) |
---|
| 262 | _end_ |
---|
| 263 | |
---|
| 264 | // <letter> ::= (abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ) |
---|
| 265 | _rule_(BNF_LETTER) |
---|
| 266 | _is_(_character_) |
---|
| 267 | _data_(BNF_ALPHA_SET)// "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" |
---|
| 268 | _end_ |
---|
| 269 | |
---|
| 270 | // <digit> ::= (0123456789) |
---|
| 271 | _rule_(BNF_DIGIT) |
---|
| 272 | _is_(_character_) |
---|
| 273 | _data_(BNF_NUMBER_SET) |
---|
| 274 | _end_ |
---|
| 275 | |
---|
| 276 | // <special_characters1> ::= ($_) |
---|
| 277 | _rule_(BNF_SPECIAL_CHARACTERS1) |
---|
| 278 | _is_(_character_) |
---|
| 279 | _data_(BNF_SPECIAL_CHARACTER_SET1) |
---|
| 280 | _end_ |
---|
| 281 | |
---|
| 282 | // <special_characters2> ::= (`~!@#$%^&*(-_=+\|[]{}:;"<>,.?/) | <single_quote_exc> |
---|
| 283 | // | <white_space_chk> | <set_end_exc> |
---|
| 284 | _rule_(BNF_SPECIAL_CHARACTERS2) |
---|
| 285 | _is_(_character_) |
---|
| 286 | _data_(BNF_SPECIAL_CHARACTER_SET2) |
---|
| 287 | _or_(BNF_WHITE_SPACE_CHK) |
---|
| 288 | _or_(BNF_SINGLE_QUOTE_EXC) |
---|
| 289 | _or_(BNF_SET_END_EXC) |
---|
| 290 | _end_ |
---|
| 291 | |
---|
| 292 | // <single_quote_exc> ::= "'" (?!" ") |
---|
| 293 | _rule_(BNF_SINGLE_QUOTE_EXC) |
---|
| 294 | _is_(_character_) |
---|
| 295 | _data_(BNF_SINGLEQUOTE) |
---|
| 296 | _not_(BNF_WHITE_SPACE_CHK) |
---|
| 297 | _end_ |
---|
| 298 | |
---|
| 299 | // <set_end_exc> ::= ")" (?!" ") |
---|
| 300 | _rule_(BNF_SET_END_EXC) |
---|
| 301 | _is_(_character_) |
---|
| 302 | _data_(BNF_SET_END) |
---|
| 303 | _not_(BNF_WHITE_SPACE_CHK) |
---|
| 304 | _end_ |
---|
| 305 | |
---|
| 306 | // <white_space_chk> ::= ( ) |
---|
| 307 | _rule_(BNF_WHITE_SPACE_CHK) |
---|
| 308 | _is_(_character_) |
---|
| 309 | _data_(BNF_WHITE_SPACE) |
---|
| 310 | _end_ |
---|
| 311 | // <not_chk> ::= (?!) |
---|
| 312 | _rule_(BNF_NOT_CHK) |
---|
| 313 | _is_(BNF_NOT_CHARS) |
---|
| 314 | //_data_(BNF_NOT_CHARS) |
---|
| 315 | _end_ |
---|
| 316 | |
---|
| 317 | // now that all the rules are added, update token definitions with rule links |
---|
| 318 | verifyTokenRuleLinks("system"); |
---|
| 319 | } |
---|
| 320 | // switch to client state |
---|
| 321 | mActiveTokenState = mClientTokenState; |
---|
| 322 | } |
---|
| 323 | |
---|
| 324 | //----------------------------------------------------------------------- |
---|
| 325 | void Compiler2Pass::verifyTokenRuleLinks(const String& grammerName) |
---|
| 326 | { |
---|
| 327 | size_t token_ID; |
---|
| 328 | |
---|
| 329 | // scan through all the rules and initialize index to rules for non-terminal tokens |
---|
| 330 | const size_t ruleCount = mActiveTokenState->rootRulePath.size(); |
---|
| 331 | for (size_t i = 0; i < ruleCount; ++i) |
---|
| 332 | { |
---|
| 333 | // make sure token definition holds valid token |
---|
| 334 | if (mActiveTokenState->rootRulePath[i].operation == otRULE) |
---|
| 335 | { |
---|
| 336 | token_ID = mActiveTokenState->rootRulePath[i].tokenID; |
---|
| 337 | // system token id's can never have a rule assigned to them so no need to check if token is system token |
---|
| 338 | // but do make sure the id is within defined bounds |
---|
| 339 | if (token_ID >= mActiveTokenState->lexemeTokenDefinitions.size()) |
---|
| 340 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "For grammer: " + grammerName + |
---|
| 341 | ", a token ID was out of token definition range.", |
---|
| 342 | "Compiler2Pass::verifyTokenRuleLinks"); |
---|
| 343 | |
---|
| 344 | LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions[token_ID]; |
---|
| 345 | if (tokenDef.ID != token_ID) |
---|
| 346 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "For grammer: " + grammerName + |
---|
| 347 | ", lexeme non-terminal token definition: " + |
---|
| 348 | tokenDef.lexeme + " is corrupted and does not match its assigned rule.", |
---|
| 349 | "Compiler2Pass::verifyTokenRuleLinks"); |
---|
| 350 | // if operation is a rule then update token definition |
---|
| 351 | tokenDef.ruleID = i; |
---|
| 352 | tokenDef.isNonTerminal = true; |
---|
| 353 | } |
---|
| 354 | } // end for |
---|
| 355 | |
---|
| 356 | // test all non terminals for valid rule ID |
---|
| 357 | const size_t definitionCount = mActiveTokenState->lexemeTokenDefinitions.size(); |
---|
| 358 | bool errorsFound = false; |
---|
| 359 | // report all non-terminals that don't have a rule then throw an exception |
---|
| 360 | for (token_ID = 0; token_ID < definitionCount; ++token_ID) |
---|
| 361 | { |
---|
| 362 | const LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions[token_ID]; |
---|
| 363 | if (tokenDef.isNonTerminal && (tokenDef.ruleID == 0)) |
---|
| 364 | { |
---|
| 365 | errorsFound = true; |
---|
| 366 | LogManager::getSingleton().logMessage( |
---|
| 367 | "For grammer: " + grammerName + |
---|
| 368 | ", lexeme non-terminal token definition: " + tokenDef.lexeme + |
---|
| 369 | " found with no rule definition or corrupted." |
---|
| 370 | ); |
---|
| 371 | } |
---|
| 372 | } |
---|
| 373 | if (errorsFound) |
---|
| 374 | { |
---|
| 375 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "For grammer: " + grammerName + |
---|
| 376 | ", lexeme non-terminal token definition(s) found with no rule definition or corrupted.", |
---|
| 377 | "Compiler2Pass::verifyTokenRuleLinks"); |
---|
| 378 | } |
---|
| 379 | } |
---|
| 380 | |
---|
| 381 | //----------------------------------------------------------------------- |
---|
| 382 | bool Compiler2Pass::compile(const String& source, const String& sourceName) |
---|
| 383 | { |
---|
| 384 | // make sure BNF compiler is setup to compile BNF grammer if required |
---|
| 385 | initBNFCompiler(); |
---|
| 386 | // compile the client's BNF grammer |
---|
| 387 | setClientBNFGrammer(); |
---|
| 388 | |
---|
| 389 | bool Passed = false; |
---|
| 390 | |
---|
| 391 | mSource = &source; |
---|
| 392 | mSourceName = sourceName; |
---|
| 393 | mActiveTokenState = mClientTokenState; |
---|
| 394 | // start compiling if there is a rule base to work with |
---|
| 395 | if (mActiveTokenState->rootRulePath.size() > 1) |
---|
| 396 | { |
---|
| 397 | Passed = doPass1(); |
---|
| 398 | |
---|
| 399 | if (Passed) |
---|
| 400 | { |
---|
| 401 | Passed = doPass2(); |
---|
| 402 | } |
---|
| 403 | } |
---|
| 404 | |
---|
| 405 | return Passed; |
---|
| 406 | } |
---|
| 407 | |
---|
| 408 | //----------------------------------------------------------------------- |
---|
| 409 | bool Compiler2Pass::doPass1() |
---|
| 410 | { |
---|
| 411 | // scan through Source string and build a token list using TokenInstructions |
---|
| 412 | // this is a simple brute force lexical scanner/analyzer that also parses the formed |
---|
| 413 | // token for proper semantics and context in one pass |
---|
| 414 | |
---|
| 415 | mCurrentLine = 1; |
---|
| 416 | mCharPos = 0; |
---|
| 417 | // reset position in Constants container |
---|
| 418 | mConstants.clear(); |
---|
| 419 | mLabels.clear(); |
---|
| 420 | // there is no active label when first starting pass 1 |
---|
| 421 | mLabelIsActive = false; |
---|
| 422 | mActiveLabelKey = 0; |
---|
| 423 | mEndOfSource = mSource->length(); |
---|
| 424 | |
---|
| 425 | // start with a clean slate |
---|
| 426 | mActiveTokenState->tokenQue.clear(); |
---|
| 427 | mPass2TokenQuePosition = 0; |
---|
| 428 | mPreviousActionQuePosition = 0; |
---|
| 429 | mNextActionQuePosition = 0; |
---|
| 430 | mNoTerminalToken = false; |
---|
| 431 | mNoSpaceSkip = false; |
---|
| 432 | mErrorCharPos = 0; |
---|
| 433 | mInsertTokenID = 0; |
---|
| 434 | // tokenize and check semantics untill an error occurs or end of source is reached |
---|
| 435 | // assume RootRulePath has pointer to rules so start at index + 1 for first rule path |
---|
| 436 | // first rule token would be a rule definition so skip over it |
---|
| 437 | bool passed = false; |
---|
| 438 | |
---|
| 439 | try |
---|
| 440 | { |
---|
| 441 | passed = processRulePath(1); |
---|
| 442 | // if a lexeme in source still exists then the end of source was not reached and there was a problem some where |
---|
| 443 | if (positionToNextLexeme()) passed = false; |
---|
| 444 | if (passed) |
---|
| 445 | { |
---|
| 446 | // special condition at end of script. The last action needs to be triggered if |
---|
| 447 | // parsing reached the end of the source. |
---|
| 448 | activatePreviousTokenAction(); |
---|
| 449 | } |
---|
| 450 | else if (mCharPos != mEndOfSource && mErrorCharPos == 0) |
---|
| 451 | { |
---|
| 452 | LogManager::getSingleton().logMessage( |
---|
| 453 | "*** ERROR *** : in " + getClientGrammerName() + |
---|
| 454 | " Source: " + mSourceName + |
---|
| 455 | "\nUnknown token found on line " + StringConverter::toString(mCurrentLine) + |
---|
| 456 | "\nFound: >>>" + mSource->substr(mCharPos, 20) + |
---|
| 457 | "<<<\n" |
---|
| 458 | ); |
---|
| 459 | |
---|
| 460 | } |
---|
| 461 | |
---|
| 462 | } |
---|
| 463 | catch (Exception& e) |
---|
| 464 | { |
---|
| 465 | LogManager::getSingleton().logMessage( "Exception caught " |
---|
| 466 | " while trying to parse " |
---|
| 467 | + getClientGrammerName() |
---|
| 468 | + ": " |
---|
| 469 | + mSourceName |
---|
| 470 | + ". Exception was '" |
---|
| 471 | + e.getFullDescription() |
---|
| 472 | + "'. Pass 1 terminated" |
---|
| 473 | ); |
---|
| 474 | } |
---|
| 475 | catch (...) |
---|
| 476 | { |
---|
| 477 | LogManager::getSingleton().logMessage( "Unkown exception while trying to parse: " |
---|
| 478 | + getClientGrammerName() |
---|
| 479 | + ": " |
---|
| 480 | + mSourceName |
---|
| 481 | ); |
---|
| 482 | } |
---|
| 483 | |
---|
| 484 | return passed; |
---|
| 485 | |
---|
| 486 | } |
---|
| 487 | |
---|
| 488 | //----------------------------------------------------------------------- |
---|
| 489 | bool Compiler2Pass::doPass2() |
---|
| 490 | { |
---|
| 491 | bool passed = true; |
---|
| 492 | // step through tokens container and execute until end found or error occurs |
---|
| 493 | |
---|
| 494 | return passed; |
---|
| 495 | } |
---|
| 496 | //----------------------------------------------------------------------- |
---|
| 497 | const Compiler2Pass::TokenInst& Compiler2Pass::getCurrentToken(const size_t expectedTokenID) const |
---|
| 498 | { |
---|
| 499 | if (mPass2TokenQuePosition <= mActiveTokenState->tokenQue.size() - 1) |
---|
| 500 | { |
---|
| 501 | const TokenInst& tokenInst = mActiveTokenState->tokenQue[mPass2TokenQuePosition]; |
---|
| 502 | |
---|
| 503 | if (expectedTokenID > 0 && (tokenInst.tokenID != expectedTokenID)) |
---|
| 504 | { |
---|
| 505 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, getClientGrammerName() + ":" + mSourceName |
---|
| 506 | + ", expected token ID not found" , |
---|
| 507 | "Compiler2Pass::getCurrentToken"); |
---|
| 508 | } |
---|
| 509 | |
---|
| 510 | return tokenInst; |
---|
| 511 | } |
---|
| 512 | else |
---|
| 513 | { |
---|
| 514 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, getClientGrammerName() + ":" + mSourceName + |
---|
| 515 | ", Line " + StringConverter::toString(mActiveTokenState->tokenQue.back().line) + |
---|
| 516 | "\n no token available, all pass 2 tokens processed" , |
---|
| 517 | "Compiler2Pass::getCurrentToken"); |
---|
| 518 | } |
---|
| 519 | } |
---|
| 520 | //----------------------------------------------------------------------- |
---|
| 521 | bool Compiler2Pass::testNextTokenID(const size_t expectedTokenID) const |
---|
| 522 | { |
---|
| 523 | const size_t nextTokenIndex = mPass2TokenQuePosition + 1; |
---|
| 524 | |
---|
| 525 | if (nextTokenIndex < mActiveTokenState->tokenQue.size()) |
---|
| 526 | return mActiveTokenState->tokenQue[nextTokenIndex].tokenID == expectedTokenID; |
---|
| 527 | |
---|
| 528 | return false; |
---|
| 529 | } |
---|
| 530 | //----------------------------------------------------------------------- |
---|
| 531 | void Compiler2Pass::skipToken(void) const |
---|
| 532 | { |
---|
| 533 | if (mPass2TokenQuePosition < mActiveTokenState->tokenQue.size() - 1) |
---|
| 534 | { |
---|
| 535 | ++mPass2TokenQuePosition; |
---|
| 536 | } |
---|
| 537 | else |
---|
| 538 | { |
---|
| 539 | // no more tokens left for pass 2 processing |
---|
| 540 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, getClientGrammerName() + ":" + mSourceName |
---|
| 541 | + ", no more tokens available for pass 2 processing" , |
---|
| 542 | "Compiler2Pass::skipToken"); |
---|
| 543 | } |
---|
| 544 | } |
---|
| 545 | //----------------------------------------------------------------------- |
---|
| 546 | void Compiler2Pass::replaceToken(void) |
---|
| 547 | { |
---|
| 548 | // move instruction que index back one position |
---|
| 549 | if (mPass2TokenQuePosition > 0) |
---|
| 550 | --mPass2TokenQuePosition; |
---|
| 551 | } |
---|
| 552 | //----------------------------------------------------------------------- |
---|
| 553 | float Compiler2Pass::getCurrentTokenValue(void) const |
---|
| 554 | { |
---|
| 555 | // get float value from current token instruction |
---|
| 556 | const TokenInst& token = getCurrentToken(); |
---|
| 557 | if ( token.tokenID == _value_) |
---|
| 558 | { |
---|
| 559 | std::map<size_t, float>::const_iterator i = mConstants.find(mPass2TokenQuePosition); |
---|
| 560 | if (i != mConstants.end()) |
---|
| 561 | { |
---|
| 562 | return i->second; |
---|
| 563 | } |
---|
| 564 | else |
---|
| 565 | { |
---|
| 566 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, "In " + mSourceName + |
---|
| 567 | ", on line " + StringConverter::toString(token.line) + |
---|
| 568 | ", no value was found in : >>>" + mSource->substr(token.pos, 20) + |
---|
| 569 | "<<<", |
---|
| 570 | "Compiler2Pass::getCurrentTokenValue"); |
---|
| 571 | } |
---|
| 572 | } |
---|
| 573 | else |
---|
| 574 | { |
---|
| 575 | // if token is not for a value then throw an exception |
---|
| 576 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, "In " + mSourceName + |
---|
| 577 | ", on line " + StringConverter::toString(token.line) + |
---|
| 578 | ", token is not for a value. Found: >>>" + mSource->substr(token.pos, 20) + |
---|
| 579 | "<<<", |
---|
| 580 | "Compiler2Pass::getCurrentTokenValue"); |
---|
| 581 | } |
---|
| 582 | } |
---|
| 583 | //----------------------------------------------------------------------- |
---|
| 584 | const String& Compiler2Pass::getCurrentTokenLabel(void) const |
---|
| 585 | { |
---|
| 586 | // get label from current token instruction |
---|
| 587 | const TokenInst& token = getCurrentToken(); |
---|
| 588 | if (token.tokenID == _character_) |
---|
| 589 | { |
---|
| 590 | std::map<size_t, String>::const_iterator i = mLabels.find(mPass2TokenQuePosition); |
---|
| 591 | if (i != mLabels.end()) |
---|
| 592 | { |
---|
| 593 | return i->second; |
---|
| 594 | } |
---|
| 595 | else |
---|
| 596 | { |
---|
| 597 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, "In " + mSourceName + |
---|
| 598 | ", on line " + StringConverter::toString(token.line) + |
---|
| 599 | ", no Label was found in : >>>" + mSource->substr(token.pos, 20) + |
---|
| 600 | "<<<", |
---|
| 601 | "Compiler2Pass::getCurrentTokenLabel"); |
---|
| 602 | } |
---|
| 603 | } |
---|
| 604 | else |
---|
| 605 | { |
---|
| 606 | // if token is not for a label then throw an exception |
---|
| 607 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, "In " + mSourceName + |
---|
| 608 | ", on line " + StringConverter::toString(token.line) + |
---|
| 609 | ", token is not for a label. Found: >>>" + mSource->substr(token.pos, 20) + |
---|
| 610 | "<<<", |
---|
| 611 | "Compiler2Pass::getCurrentTokenLabel"); |
---|
| 612 | } |
---|
| 613 | } |
---|
| 614 | //----------------------------------------------------------------------- |
---|
| 615 | const String& Compiler2Pass::getCurrentTokenLexeme(void) const |
---|
| 616 | { |
---|
| 617 | // get label from current token instruction |
---|
| 618 | const TokenInst& token = getCurrentToken(); |
---|
| 619 | if (token.tokenID < SystemTokenBase) |
---|
| 620 | return mActiveTokenState->lexemeTokenDefinitions[token.tokenID].lexeme; |
---|
| 621 | else |
---|
| 622 | { |
---|
| 623 | // if token is for system use then throw an exception |
---|
| 624 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, "In " + mSourceName + |
---|
| 625 | ", on line " + StringConverter::toString(token.line) + |
---|
| 626 | ", token is for system use only. Found: >>>" + mSource->substr(token.pos, 20) + |
---|
| 627 | "<<<", |
---|
| 628 | "Compiler2Pass::getCurrentTokenLexeme"); |
---|
| 629 | } |
---|
| 630 | |
---|
| 631 | } |
---|
| 632 | //----------------------------------------------------------------------- |
---|
| 633 | size_t Compiler2Pass::getPass2TokenQueCount(void) const |
---|
| 634 | { |
---|
| 635 | // calculate number of tokens between current token instruction and next token with action |
---|
| 636 | if(mActiveTokenState->tokenQue.size() > mPass2TokenQuePosition) |
---|
| 637 | return mActiveTokenState->tokenQue.size() - 1 - mPass2TokenQuePosition; |
---|
| 638 | else |
---|
| 639 | return 0; |
---|
| 640 | } |
---|
| 641 | //----------------------------------------------------------------------- |
---|
| 642 | size_t Compiler2Pass::getRemainingTokensForAction(void) const |
---|
| 643 | { |
---|
| 644 | size_t remaingingTokens = 0; |
---|
| 645 | if (mNextActionQuePosition > mPass2TokenQuePosition) |
---|
| 646 | { |
---|
| 647 | // don't count next action nor the current position |
---|
| 648 | remaingingTokens = mNextActionQuePosition - mPass2TokenQuePosition - 1; |
---|
| 649 | } |
---|
| 650 | |
---|
| 651 | return remaingingTokens; |
---|
| 652 | } |
---|
| 653 | //----------------------------------------------------------------------- |
---|
| 654 | bool Compiler2Pass::setNextActionQuePosition(size_t pos, const bool search) |
---|
| 655 | { |
---|
| 656 | const size_t lastPos = mActiveTokenState->tokenQue.size(); |
---|
| 657 | |
---|
| 658 | if (pos >= lastPos) |
---|
| 659 | return false; |
---|
| 660 | |
---|
| 661 | bool nextActionFound = false; |
---|
| 662 | |
---|
| 663 | // if searching then assume no next action will be found so set position to end of que |
---|
| 664 | if (search) |
---|
| 665 | mNextActionQuePosition = lastPos; |
---|
| 666 | |
---|
| 667 | while (!nextActionFound && (pos < lastPos)) |
---|
| 668 | { |
---|
| 669 | const size_t tokenID = mActiveTokenState->tokenQue[pos].tokenID; |
---|
| 670 | |
---|
| 671 | if ((tokenID < SystemTokenBase) && |
---|
| 672 | mActiveTokenState->lexemeTokenDefinitions.at(tokenID).hasAction) |
---|
| 673 | { |
---|
| 674 | mNextActionQuePosition = pos; |
---|
| 675 | nextActionFound = true; |
---|
| 676 | } |
---|
| 677 | |
---|
| 678 | if (search) |
---|
| 679 | ++pos; |
---|
| 680 | else |
---|
| 681 | pos = lastPos; |
---|
| 682 | } |
---|
| 683 | |
---|
| 684 | return nextActionFound; |
---|
| 685 | } |
---|
| 686 | //----------------------------------------------------------------------- |
---|
| 687 | void Compiler2Pass::setPass2TokenQuePosition(size_t pos, const bool activateAction) |
---|
| 688 | { |
---|
| 689 | if (pos < mActiveTokenState->tokenQue.size()) |
---|
| 690 | { |
---|
| 691 | mPass2TokenQuePosition = pos; |
---|
| 692 | ++pos; |
---|
| 693 | // find the next token with an action |
---|
| 694 | setNextActionQuePosition(pos, true); |
---|
| 695 | |
---|
| 696 | // activate action if token has one and it was requested |
---|
| 697 | if (activateAction) |
---|
| 698 | { |
---|
| 699 | const size_t tokenID = mActiveTokenState->tokenQue.at(mPass2TokenQuePosition).tokenID; |
---|
| 700 | if ((tokenID < SystemTokenBase) && |
---|
| 701 | mActiveTokenState->lexemeTokenDefinitions.at(tokenID).hasAction) |
---|
| 702 | { |
---|
| 703 | // assume that pass 2 processing will use tokens downstream |
---|
| 704 | executeTokenAction(tokenID); |
---|
| 705 | } |
---|
| 706 | } |
---|
| 707 | } |
---|
| 708 | } |
---|
| 709 | //----------------------------------------------------------------------- |
---|
| 710 | void Compiler2Pass::setClientBNFGrammer(void) |
---|
| 711 | { |
---|
| 712 | // switch to internal BNF Containers |
---|
| 713 | // clear client containers |
---|
| 714 | mClientTokenState = &mClientTokenStates[getClientGrammerName()]; |
---|
| 715 | // attempt to compile the grammer into a rule base if no rules exist |
---|
| 716 | if (mClientTokenState->rootRulePath.size() == 0) |
---|
| 717 | { |
---|
| 718 | mClientTokenState->tokenQue.reserve(100); |
---|
| 719 | mClientTokenState->lexemeTokenDefinitions.reserve(100); |
---|
| 720 | // first entry in rule path is set as a bad entry and no token should reference it |
---|
| 721 | mClientTokenState->rootRulePath.resize(1); |
---|
| 722 | // allow the client to setup token definitions prior to |
---|
| 723 | // compiling the BNF grammer |
---|
| 724 | // ensure token definitions are added to the client state |
---|
| 725 | mActiveTokenState = mClientTokenState; |
---|
| 726 | // get client to setup token definitions and actions it wants to know about |
---|
| 727 | setupTokenDefinitions(); |
---|
| 728 | // make sure active token state is for BNF compiling |
---|
| 729 | mActiveTokenState = &mBNFTokenState; |
---|
| 730 | mSource = &getClientBNFGrammer(); |
---|
| 731 | |
---|
| 732 | if (doPass1()) |
---|
| 733 | { |
---|
| 734 | buildClientBNFRulePaths(); |
---|
| 735 | } |
---|
| 736 | else |
---|
| 737 | { |
---|
| 738 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "BNF Grammar compilation failed for " + |
---|
| 739 | getClientGrammerName(), "Compiler2Pass::setClientBNFGrammer"); |
---|
| 740 | } |
---|
| 741 | // change token state to client data after compiling grammer |
---|
| 742 | mActiveTokenState = mClientTokenState; |
---|
| 743 | // verify the client rule paths and associated terminal and non-terminal lexemes |
---|
| 744 | verifyTokenRuleLinks(getClientGrammerName()); |
---|
| 745 | } |
---|
| 746 | } |
---|
| 747 | |
---|
| 748 | //----------------------------------------------------------------------- |
---|
| 749 | bool Compiler2Pass::processRulePath( size_t rulepathIDX) |
---|
| 750 | { |
---|
| 751 | // rule path determines what tokens and therefore what lexemes are acceptable from the source |
---|
| 752 | // it is assumed that the tokens with the longest similar lexemes are arranged first so |
---|
| 753 | // if a match is found it is accepted and no further searching is done |
---|
| 754 | |
---|
| 755 | if (rulepathIDX >= mActiveTokenState->rootRulePath.size()) |
---|
| 756 | { |
---|
| 757 | // This is very bad and no way to recover so raise exception |
---|
| 758 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "rule ID exceeds rule base bounds.", "Compiler2Pass::processRulePath"); |
---|
| 759 | } |
---|
| 760 | // record position of last token in container |
---|
| 761 | // to be used as the rollback position if a valid token is not found |
---|
| 762 | const size_t TokenContainerOldSize = mActiveTokenState->tokenQue.size(); |
---|
| 763 | const size_t OldCharPos = mCharPos; |
---|
| 764 | const size_t OldLinePos = mCurrentLine; |
---|
| 765 | const bool OldLabelIsActive = mLabelIsActive; |
---|
| 766 | const size_t OldActiveLabelKey = mActiveLabelKey; |
---|
| 767 | const String OldLabel = mLabels[OldActiveLabelKey]; |
---|
| 768 | |
---|
| 769 | // keep track of what non-terminal token activated the rule |
---|
| 770 | size_t ActiveNTTRule = mActiveTokenState->rootRulePath[rulepathIDX].tokenID; |
---|
| 771 | // start rule path at next position for definition |
---|
| 772 | ++rulepathIDX; |
---|
| 773 | |
---|
| 774 | // assume the rule will pass |
---|
| 775 | bool passed = true; |
---|
| 776 | bool tokenFound = false; |
---|
| 777 | bool endFound = false; |
---|
| 778 | bool clearInsertTokenID = false; |
---|
| 779 | |
---|
| 780 | // keep following rulepath until the end of the rule or the end of the source is reached |
---|
| 781 | while (!(endFound || isEndOfSource())) |
---|
| 782 | { |
---|
| 783 | switch (mActiveTokenState->rootRulePath[rulepathIDX].operation) |
---|
| 784 | { |
---|
| 785 | |
---|
| 786 | case otAND: |
---|
| 787 | // only validate if the previous rule passed |
---|
| 788 | if (passed) |
---|
| 789 | passed = ValidateToken(rulepathIDX, ActiveNTTRule); |
---|
| 790 | // log error message if a previouse token was found in this rule path and current token failed |
---|
| 791 | if (tokenFound && (mCharPos != mErrorCharPos) && !passed) |
---|
| 792 | { |
---|
| 793 | mErrorCharPos = mCharPos; |
---|
| 794 | LogManager::getSingleton().logMessage( |
---|
| 795 | "*** ERROR *** : in " + getClientGrammerName() + |
---|
| 796 | " Source: " + mSourceName + |
---|
| 797 | "\nUnknown token found on line " + StringConverter::toString(mCurrentLine) + |
---|
| 798 | "\nFound: >>>" + mSource->substr(mCharPos, 20) + |
---|
| 799 | "<<<\nbut was expecting form: " + getBNFGrammerTextFromRulePath(rulepathIDX, 2) + |
---|
| 800 | "\nwhile in rule path: <" + mActiveTokenState->lexemeTokenDefinitions[ActiveNTTRule].lexeme + |
---|
| 801 | ">" |
---|
| 802 | ); |
---|
| 803 | // log last valid token found |
---|
| 804 | const TokenInst& tokenInst = mActiveTokenState->tokenQue.back(); |
---|
| 805 | LogManager::getSingleton().logMessage( |
---|
| 806 | "Last valid token found was on line " + StringConverter::toString(tokenInst.line)); |
---|
| 807 | LogManager::getSingleton().logMessage( |
---|
| 808 | "source hint: >>>" + mSource->substr(tokenInst.pos, 20) + "<<<"); |
---|
| 809 | } |
---|
| 810 | |
---|
| 811 | break; |
---|
| 812 | |
---|
| 813 | case otOR: |
---|
| 814 | // only validate if the previous rule failed |
---|
| 815 | if ( passed == false ) |
---|
| 816 | { |
---|
| 817 | // clear previous tokens from entry and try again |
---|
| 818 | mActiveTokenState->tokenQue.resize(TokenContainerOldSize); |
---|
| 819 | passed = ValidateToken(rulepathIDX, ActiveNTTRule); |
---|
| 820 | } |
---|
| 821 | else |
---|
| 822 | { |
---|
| 823 | // path passed up to this point therefore finished so pretend end marker found |
---|
| 824 | endFound = true; |
---|
| 825 | } |
---|
| 826 | break; |
---|
| 827 | |
---|
| 828 | case otOPTIONAL: |
---|
| 829 | // if previous passed then try this rule but it does not effect succes of rule since its optional |
---|
| 830 | if(passed) ValidateToken(rulepathIDX, ActiveNTTRule); |
---|
| 831 | break; |
---|
| 832 | |
---|
| 833 | case otREPEAT: |
---|
| 834 | // repeat until called rule fails or cursor does not advance |
---|
| 835 | // repeat is 0 or more times |
---|
| 836 | if (passed) |
---|
| 837 | { |
---|
| 838 | // keep calling until failure or no change in cursor position |
---|
| 839 | size_t prevPos = mCharPos; |
---|
| 840 | while ( ValidateToken(rulepathIDX, ActiveNTTRule)) |
---|
| 841 | { |
---|
| 842 | if (mCharPos > prevPos) |
---|
| 843 | { |
---|
| 844 | prevPos = mCharPos; |
---|
| 845 | } |
---|
| 846 | else |
---|
| 847 | { |
---|
| 848 | // repeat failed to advance the cursor position so time to quit since the repeating rule |
---|
| 849 | // path isn't finding anything |
---|
| 850 | // this can happen if the rule being called only has _optional_ rules |
---|
| 851 | // this checking of the cursor positions prevents infinite loop from occuring |
---|
| 852 | break; |
---|
| 853 | } |
---|
| 854 | } |
---|
| 855 | } |
---|
| 856 | break; |
---|
| 857 | |
---|
| 858 | case otDATA: |
---|
| 859 | // skip it, should have been handled by previous operation. |
---|
| 860 | break; |
---|
| 861 | |
---|
| 862 | case otNOT_TEST: |
---|
| 863 | // only validate if the previous rule passed |
---|
| 864 | if (passed) |
---|
| 865 | { |
---|
| 866 | |
---|
| 867 | // perform look ahead and test if rule production fails |
---|
| 868 | const size_t la_TokenContainerOldSize = mActiveTokenState->tokenQue.size(); |
---|
| 869 | const size_t la_OldCharPos = mCharPos; |
---|
| 870 | const size_t la_OldLinePos = mCurrentLine; |
---|
| 871 | const bool la_OldLabelIsActive = mLabelIsActive; |
---|
| 872 | const size_t la_OldActiveLabelKey = mActiveLabelKey; |
---|
| 873 | const String la_OldLabel = mLabels[la_OldActiveLabelKey]; |
---|
| 874 | |
---|
| 875 | passed = !ValidateToken(rulepathIDX, ActiveNTTRule); |
---|
| 876 | |
---|
| 877 | // only wanted to take a peek as to what was ahead so now restore back to current position |
---|
| 878 | mActiveTokenState->tokenQue.resize(la_TokenContainerOldSize); |
---|
| 879 | mCharPos = la_OldCharPos; |
---|
| 880 | mCurrentLine = la_OldLinePos; |
---|
| 881 | // restor label state if it was active before not test |
---|
| 882 | if (la_OldLabelIsActive) |
---|
| 883 | { |
---|
| 884 | mActiveLabelKey = la_OldActiveLabelKey; |
---|
| 885 | mLabelIsActive = la_OldLabelIsActive; |
---|
| 886 | mActiveLabel = &mLabels[mActiveLabelKey]; |
---|
| 887 | *mActiveLabel = la_OldLabel; |
---|
| 888 | } |
---|
| 889 | // only perform full rollback if tokens found |
---|
| 890 | if (!passed) |
---|
| 891 | { |
---|
| 892 | // the rule did not validate so get rid of tokens decoded |
---|
| 893 | // roll back the token container end position to what it was when rule started |
---|
| 894 | // this will get rid of all tokens that had been pushed on the container while |
---|
| 895 | // trying to validating this rule |
---|
| 896 | mActiveTokenState->tokenQue.resize(TokenContainerOldSize); |
---|
| 897 | //mConstants.resize(OldConstantsSize); |
---|
| 898 | mCharPos = OldCharPos; |
---|
| 899 | mCurrentLine = OldLinePos; |
---|
| 900 | // restor label state if it was active before not test |
---|
| 901 | if (OldLabelIsActive) |
---|
| 902 | { |
---|
| 903 | mActiveLabelKey = OldActiveLabelKey; |
---|
| 904 | mLabelIsActive = OldLabelIsActive; |
---|
| 905 | mActiveLabel = &mLabels[mActiveLabelKey]; |
---|
| 906 | *mActiveLabel = OldLabel; |
---|
| 907 | } |
---|
| 908 | |
---|
| 909 | // terminate rule production processing |
---|
| 910 | endFound = true; |
---|
| 911 | } |
---|
| 912 | } |
---|
| 913 | break; |
---|
| 914 | |
---|
| 915 | case otINSERT_TOKEN: |
---|
| 916 | mInsertTokenID = mActiveTokenState->rootRulePath[rulepathIDX].tokenID; |
---|
| 917 | clearInsertTokenID = true; |
---|
| 918 | break; |
---|
| 919 | |
---|
| 920 | case otEND: |
---|
| 921 | // end of rule found so time to return |
---|
| 922 | endFound = true; |
---|
| 923 | // only rollback if no tokens found |
---|
| 924 | if (!passed && !tokenFound) |
---|
| 925 | { |
---|
| 926 | // the rule did not validate so get rid of tokens decoded |
---|
| 927 | // roll back the token container end position to what it was when rule started |
---|
| 928 | // this will get rid of all tokens that had been pushed on the container while |
---|
| 929 | // trying to validating this rule |
---|
| 930 | mActiveTokenState->tokenQue.resize(TokenContainerOldSize); |
---|
| 931 | //mConstants.resize(OldConstantsSize); |
---|
| 932 | mCharPos = OldCharPos; |
---|
| 933 | mCurrentLine = OldLinePos; |
---|
| 934 | } |
---|
| 935 | else |
---|
| 936 | { |
---|
| 937 | // if the rule path was partially completed, one or more tokens found then mark it as passed |
---|
| 938 | if (!passed && tokenFound && !mLabelIsActive) |
---|
| 939 | { |
---|
| 940 | passed = true; |
---|
| 941 | } |
---|
| 942 | } |
---|
| 943 | break; |
---|
| 944 | |
---|
| 945 | default: |
---|
| 946 | // an exception should be raised since the code should never get here |
---|
| 947 | passed = false; |
---|
| 948 | endFound = true; |
---|
| 949 | break; |
---|
| 950 | |
---|
| 951 | } // end switch |
---|
| 952 | // prevent rollback from occuring if a token was found but later part of rule fails |
---|
| 953 | // this allows pass2 to either fix the problem or report the error and continue on. |
---|
| 954 | // Don't do this for _no_token_ since its a special system token and has nothing todo with |
---|
| 955 | // a successfull parse of the source. Can check this by looking at mNoTerminalToken state. |
---|
| 956 | // if _no_token had just been validated then mNoTerminalToken will be true. |
---|
| 957 | if (passed && !mNoTerminalToken && !mInsertTokenID) |
---|
| 958 | tokenFound = true; |
---|
| 959 | // move on to the next rule in the path |
---|
| 960 | ++rulepathIDX; |
---|
| 961 | } // end while |
---|
| 962 | |
---|
| 963 | // if this rule production requested a token insert, make sure its reset so it does not affect |
---|
| 964 | // the parent rule |
---|
| 965 | if (clearInsertTokenID) |
---|
| 966 | mInsertTokenID = 0; |
---|
| 967 | |
---|
| 968 | return passed; |
---|
| 969 | } |
---|
| 970 | |
---|
| 971 | //----------------------------------------------------------------------- |
---|
| 972 | bool Compiler2Pass::isCharacterLabel(const size_t rulepathIDX) |
---|
| 973 | { |
---|
| 974 | if (isEndOfSource()) |
---|
| 975 | return false; |
---|
| 976 | |
---|
| 977 | // assume the test is going to fail |
---|
| 978 | bool Passed = false; |
---|
| 979 | |
---|
| 980 | // get token from next rule operation. |
---|
| 981 | // token string is list of valid or invalid single characters. |
---|
| 982 | // If the token string starts with a ! then the set is for invalid characters. |
---|
| 983 | // compare character at current cursor position in script to characters in list for a match |
---|
| 984 | // if match found then add character to active label |
---|
| 985 | // _character_ will not have a token definition but the next rule operation should be |
---|
| 986 | // DATA and have the token ID required to get the character set. |
---|
| 987 | const TokenRule& rule = mActiveTokenState->rootRulePath[rulepathIDX + 1]; |
---|
| 988 | if (rule.operation == otDATA) |
---|
| 989 | { |
---|
| 990 | const size_t TokenID = rule.tokenID; |
---|
| 991 | // check for ! as first character in character set indicating that an input character is |
---|
| 992 | // accepted if its not in the character set. |
---|
| 993 | // Otherwise a pass occurs if the input character is found in the character set. |
---|
| 994 | const String& characterSet = mActiveTokenState->lexemeTokenDefinitions[TokenID].lexeme; |
---|
| 995 | if ((characterSet.size() > 1) && characterSet[0] == '!') |
---|
| 996 | Passed = characterSet.find((*mSource)[mCharPos], 1) == String::npos; |
---|
| 997 | else |
---|
| 998 | Passed = characterSet.find((*mSource)[mCharPos]) != String::npos; |
---|
| 999 | |
---|
| 1000 | if (Passed) |
---|
| 1001 | { |
---|
| 1002 | // is a new label starting? |
---|
| 1003 | // if mLabelIsActive is false then starting a new label so need a new mActiveLabelKey |
---|
| 1004 | if (!mLabelIsActive) |
---|
| 1005 | { |
---|
| 1006 | // mActiveLabelKey will be the end of the instruction container ie the size of mTokenInstructions |
---|
| 1007 | mActiveLabelKey = mActiveTokenState->tokenQue.size(); |
---|
| 1008 | // if a token insert is pending then use next key |
---|
| 1009 | if (mInsertTokenID) |
---|
| 1010 | ++mActiveLabelKey; |
---|
| 1011 | mLabelIsActive = true; |
---|
| 1012 | mNoSpaceSkip = true; |
---|
| 1013 | // reset the contents of the label since it might have been used prior to a rollback |
---|
| 1014 | // and cach string location so don't have to look it up for the rest of the label processing |
---|
| 1015 | mActiveLabel = &mLabels[mActiveLabelKey]; |
---|
| 1016 | mActiveLabel->clear(); |
---|
| 1017 | } |
---|
| 1018 | // add the single character to the end of the active label |
---|
| 1019 | *mActiveLabel += (*mSource)[mCharPos]; |
---|
| 1020 | } |
---|
| 1021 | } |
---|
| 1022 | |
---|
| 1023 | return Passed; |
---|
| 1024 | } |
---|
| 1025 | //----------------------------------------------------------------------- |
---|
| 1026 | bool Compiler2Pass::ValidateToken(const size_t rulepathIDX, const size_t activeRuleID) |
---|
| 1027 | { |
---|
| 1028 | size_t tokenlength = 0; |
---|
| 1029 | // assume the test is going to fail |
---|
| 1030 | bool Passed = false; |
---|
| 1031 | size_t tokenID = mActiveTokenState->rootRulePath[rulepathIDX].tokenID; |
---|
| 1032 | // if terminal token then compare text of lexeme with what is in source |
---|
| 1033 | if ( (tokenID >= SystemTokenBase) || |
---|
| 1034 | !mActiveTokenState->lexemeTokenDefinitions[tokenID].isNonTerminal ) |
---|
| 1035 | { |
---|
| 1036 | if (tokenID != _character_) |
---|
| 1037 | { |
---|
| 1038 | mLabelIsActive = false; |
---|
| 1039 | // allow spaces to be skipped for next lexeme processing |
---|
| 1040 | mNoSpaceSkip = false; |
---|
| 1041 | } |
---|
| 1042 | |
---|
| 1043 | if (tokenID == _no_space_skip_) |
---|
| 1044 | { |
---|
| 1045 | // don't skip spaces to get to next lexeme |
---|
| 1046 | mNoSpaceSkip = true; |
---|
| 1047 | // move on to next rule |
---|
| 1048 | Passed = true; |
---|
| 1049 | } |
---|
| 1050 | else if (tokenID == _no_token_) |
---|
| 1051 | { |
---|
| 1052 | // turn on no terminal token processing for next rule |
---|
| 1053 | mNoTerminalToken = true; |
---|
| 1054 | // move on to next rule |
---|
| 1055 | Passed = true; |
---|
| 1056 | } |
---|
| 1057 | // if label processing is active ie previous token was _character_ |
---|
| 1058 | // and current token is supposed to be a _character_ then don't |
---|
| 1059 | // position to next lexeme in source |
---|
| 1060 | else if (mNoSpaceSkip || positionToNextLexeme()) |
---|
| 1061 | { |
---|
| 1062 | // if Token is supposed to be a number then check if its a numerical constant |
---|
| 1063 | if (tokenID == _value_) |
---|
| 1064 | { |
---|
| 1065 | float constantvalue = 0.0f; |
---|
| 1066 | if (Passed = isFloatValue(constantvalue, tokenlength)) |
---|
| 1067 | { |
---|
| 1068 | // key is the next instruction index |
---|
| 1069 | size_t key = mActiveTokenState->tokenQue.size(); |
---|
| 1070 | // if a token insert is pending then use next key |
---|
| 1071 | if (mInsertTokenID) |
---|
| 1072 | ++key; |
---|
| 1073 | mConstants[key] = constantvalue; |
---|
| 1074 | } |
---|
| 1075 | } |
---|
| 1076 | else // check if user label or valid keyword token |
---|
| 1077 | { |
---|
| 1078 | if (tokenID == _character_) |
---|
| 1079 | { |
---|
| 1080 | if (Passed = isCharacterLabel(rulepathIDX)) |
---|
| 1081 | // only one character was processed |
---|
| 1082 | tokenlength = 1; |
---|
| 1083 | } |
---|
| 1084 | else |
---|
| 1085 | { |
---|
| 1086 | // compare token lexeme text with source text |
---|
| 1087 | if (Passed = isLexemeMatch(mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme, mActiveTokenState->lexemeTokenDefinitions[tokenID].isCaseSensitive)) |
---|
| 1088 | { |
---|
| 1089 | tokenlength = mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme.length(); |
---|
| 1090 | // check if terminal token should be ignored ie not put in instruction que |
---|
| 1091 | if (mNoTerminalToken) |
---|
| 1092 | tokenID = _no_token_; |
---|
| 1093 | } |
---|
| 1094 | // always clear no terminal token flag. it only works for one pending terminal token. |
---|
| 1095 | mNoTerminalToken = false; |
---|
| 1096 | } |
---|
| 1097 | } |
---|
| 1098 | |
---|
| 1099 | // turn off label processing if token ID was not for _character_ |
---|
| 1100 | if (tokenID == _character_) |
---|
| 1101 | { |
---|
| 1102 | // _character_ token being processed |
---|
| 1103 | // turn off generation of a new token instruction if this is not |
---|
| 1104 | // the first _character_ in a sequence of _character_ terminal tokens. |
---|
| 1105 | // Only want one _character_ token which Identifies a label |
---|
| 1106 | |
---|
| 1107 | if (mActiveTokenState->tokenQue.size() > mActiveLabelKey) |
---|
| 1108 | { |
---|
| 1109 | // this token is not the first _character_ in the label sequence |
---|
| 1110 | // so turn off the token by turning TokenID into _no_token_ |
---|
| 1111 | tokenID = _no_token_; |
---|
| 1112 | } |
---|
| 1113 | } |
---|
| 1114 | |
---|
| 1115 | // if valid terminal token was found then add it to the instruction container for pass 2 processing |
---|
| 1116 | if (Passed) |
---|
| 1117 | { |
---|
| 1118 | if (tokenID != _no_token_) |
---|
| 1119 | { |
---|
| 1120 | TokenInst newtoken; |
---|
| 1121 | // push token onto end of container |
---|
| 1122 | newtoken.NTTRuleID = activeRuleID; |
---|
| 1123 | newtoken.line = mCurrentLine; |
---|
| 1124 | newtoken.pos = mCharPos; |
---|
| 1125 | newtoken.found = true; |
---|
| 1126 | |
---|
| 1127 | // check to see if a terminal token is waiting to be inserted based on the next |
---|
| 1128 | // token being found |
---|
| 1129 | if (mInsertTokenID) |
---|
| 1130 | { |
---|
| 1131 | newtoken.tokenID = mInsertTokenID; |
---|
| 1132 | mActiveTokenState->tokenQue.push_back(newtoken); |
---|
| 1133 | // token action processing |
---|
| 1134 | // if the token has an action then fire previous token action |
---|
| 1135 | checkTokenActionTrigger(); |
---|
| 1136 | // reset the token ID that was inserted so that it will not get inserted until set again |
---|
| 1137 | mInsertTokenID = 0; |
---|
| 1138 | } |
---|
| 1139 | |
---|
| 1140 | newtoken.tokenID = tokenID; |
---|
| 1141 | mActiveTokenState->tokenQue.push_back(newtoken); |
---|
| 1142 | // token action processing |
---|
| 1143 | // if the token has an action then fire previous token action |
---|
| 1144 | checkTokenActionTrigger(); |
---|
| 1145 | } |
---|
| 1146 | |
---|
| 1147 | // update source position |
---|
| 1148 | mCharPos += tokenlength; |
---|
| 1149 | } // end if |
---|
| 1150 | } // end else if |
---|
| 1151 | |
---|
| 1152 | } |
---|
| 1153 | // else a non terminal token was found |
---|
| 1154 | else |
---|
| 1155 | { |
---|
| 1156 | // execute rule for non-terminal |
---|
| 1157 | // get rule_ID for index into rulepath to be called |
---|
| 1158 | Passed = processRulePath(mActiveTokenState->lexemeTokenDefinitions[tokenID].ruleID); |
---|
| 1159 | } |
---|
| 1160 | |
---|
| 1161 | return Passed; |
---|
| 1162 | } |
---|
| 1163 | |
---|
| 1164 | //----------------------------------------------------------------------- |
---|
| 1165 | bool Compiler2Pass::isFloatValue(float& fvalue, size_t& charsize) const |
---|
| 1166 | { |
---|
| 1167 | if (isEndOfSource()) |
---|
| 1168 | return false; |
---|
| 1169 | |
---|
| 1170 | // check to see if it is a numeric float value |
---|
| 1171 | bool valuefound = false; |
---|
| 1172 | |
---|
| 1173 | const char* startptr = mSource->c_str() + mCharPos; |
---|
| 1174 | char* endptr = NULL; |
---|
| 1175 | |
---|
| 1176 | fvalue = static_cast<float>(strtod(startptr, &endptr)); |
---|
| 1177 | // if a valid float was found then endptr will have the pointer to the first invalid character |
---|
| 1178 | if (endptr) |
---|
| 1179 | { |
---|
| 1180 | if (endptr > startptr) |
---|
| 1181 | { |
---|
| 1182 | // a valid value was found so process it |
---|
| 1183 | charsize = endptr - startptr; |
---|
| 1184 | valuefound = true; |
---|
| 1185 | } |
---|
| 1186 | } |
---|
| 1187 | |
---|
| 1188 | return valuefound; |
---|
| 1189 | } |
---|
| 1190 | |
---|
| 1191 | //----------------------------------------------------------------------- |
---|
| 1192 | bool Compiler2Pass::isLexemeMatch(const String& lexeme, const bool caseSensitive) const |
---|
| 1193 | { |
---|
| 1194 | // compare text at source+charpos with the lexeme : limit testing to lexeme size |
---|
| 1195 | if (!caseSensitive) |
---|
| 1196 | { |
---|
| 1197 | String testItem = mSource->substr(mCharPos, lexeme.length()); |
---|
| 1198 | StringUtil::toLowerCase(testItem); |
---|
| 1199 | return (testItem.compare(lexeme) == 0); |
---|
| 1200 | } |
---|
| 1201 | else |
---|
| 1202 | { |
---|
| 1203 | return (mSource->compare(mCharPos, lexeme.length(), lexeme) == 0); |
---|
| 1204 | } |
---|
| 1205 | } |
---|
| 1206 | |
---|
| 1207 | //----------------------------------------------------------------------- |
---|
| 1208 | bool Compiler2Pass::positionToNextLexeme() |
---|
| 1209 | { |
---|
| 1210 | bool validlexemefound = false; |
---|
| 1211 | size_t oldCharPos = mCharPos; |
---|
| 1212 | |
---|
| 1213 | while (!validlexemefound && !isEndOfSource()) |
---|
| 1214 | { |
---|
| 1215 | skipWhiteSpace(); |
---|
| 1216 | skipEOL(); |
---|
| 1217 | skipComments(); |
---|
| 1218 | // have we reached the end of the source? |
---|
| 1219 | if (!isEndOfSource()) |
---|
| 1220 | { |
---|
| 1221 | // if ASCII > space then assume valid character is found |
---|
| 1222 | if (static_cast<uchar>((*mSource)[mCharPos]) > static_cast<uchar>(' ')) |
---|
| 1223 | { |
---|
| 1224 | validlexemefound = true; |
---|
| 1225 | } |
---|
| 1226 | else // maybe a control character has been encountered? |
---|
| 1227 | { |
---|
| 1228 | // check if the char pos advanced in this iteration. |
---|
| 1229 | // If it didn't then we have found a char that |
---|
| 1230 | // is not relevent to the parse so skip it so that we don't |
---|
| 1231 | // end up in an infinite loop. |
---|
| 1232 | if (oldCharPos == mCharPos) |
---|
| 1233 | ++mCharPos; |
---|
| 1234 | |
---|
| 1235 | // endofsource will get checked on next iteration of this loop so no need to check it here |
---|
| 1236 | // need to update oldCharPos so that position advancement can be varified on the next iteration |
---|
| 1237 | oldCharPos = mCharPos; |
---|
| 1238 | } |
---|
| 1239 | } |
---|
| 1240 | }// end of while |
---|
| 1241 | |
---|
| 1242 | return validlexemefound; |
---|
| 1243 | } |
---|
| 1244 | |
---|
| 1245 | //----------------------------------------------------------------------- |
---|
| 1246 | void Compiler2Pass::skipComments() |
---|
| 1247 | { |
---|
| 1248 | if (isEndOfSource()) |
---|
| 1249 | return; |
---|
| 1250 | // if current char and next are // then search for EOL |
---|
| 1251 | if (mSource->compare(mCharPos, 2, "//") == 0) |
---|
| 1252 | findEOL(); |
---|
| 1253 | } |
---|
| 1254 | |
---|
| 1255 | //----------------------------------------------------------------------- |
---|
| 1256 | void Compiler2Pass::findEOL() |
---|
| 1257 | { |
---|
| 1258 | if (isEndOfSource()) |
---|
| 1259 | return; |
---|
| 1260 | // find eol charter and move to this position |
---|
| 1261 | mCharPos = mSource->find('\n', mCharPos); |
---|
| 1262 | } |
---|
| 1263 | |
---|
| 1264 | //----------------------------------------------------------------------- |
---|
| 1265 | void Compiler2Pass::skipEOL() |
---|
| 1266 | { |
---|
| 1267 | if (isEndOfSource()) |
---|
| 1268 | return; |
---|
| 1269 | |
---|
| 1270 | if (((*mSource)[mCharPos] == '\n') || ((*mSource)[mCharPos] == '\r')) |
---|
| 1271 | { |
---|
| 1272 | mCurrentLine++; |
---|
| 1273 | mCharPos++; |
---|
| 1274 | if (mCharPos >= mEndOfSource) |
---|
| 1275 | return; |
---|
| 1276 | if (((*mSource)[mCharPos] == '\n') || ((*mSource)[mCharPos] == '\r')) |
---|
| 1277 | { |
---|
| 1278 | mCharPos++; |
---|
| 1279 | } |
---|
| 1280 | } |
---|
| 1281 | } |
---|
| 1282 | |
---|
| 1283 | //----------------------------------------------------------------------- |
---|
| 1284 | void Compiler2Pass::skipWhiteSpace() |
---|
| 1285 | { |
---|
| 1286 | if (isEndOfSource()) |
---|
| 1287 | return; |
---|
| 1288 | |
---|
| 1289 | mCharPos = mSource->find_first_not_of(" \t", mCharPos); |
---|
| 1290 | } |
---|
| 1291 | |
---|
| 1292 | //----------------------------------------------------------------------- |
---|
| 1293 | size_t Compiler2Pass::addLexemeToken(const String& lexeme, const size_t token, const bool hasAction, const bool caseSensitive) |
---|
| 1294 | { |
---|
| 1295 | size_t newTokenID = token; |
---|
| 1296 | // if token ID is zero then auto-generate a new token ID |
---|
| 1297 | if (newTokenID == 0) |
---|
| 1298 | { |
---|
| 1299 | // assume BNF system bootstrap is current state |
---|
| 1300 | size_t autoTokenIDStart = BNF_AUTOTOKENSTART; |
---|
| 1301 | // if in client state then get auto token start position from the client |
---|
| 1302 | if (mActiveTokenState != &mBNFTokenState) |
---|
| 1303 | autoTokenIDStart = getAutoTokenIDStart(); |
---|
| 1304 | // make sure new auto gen id starts at autoTokenIDStart or greater |
---|
| 1305 | newTokenID = (mActiveTokenState->lexemeTokenDefinitions.size() <= autoTokenIDStart ) ? autoTokenIDStart : newTokenID = mActiveTokenState->lexemeTokenDefinitions.size(); |
---|
| 1306 | } |
---|
| 1307 | |
---|
| 1308 | if (newTokenID >= mActiveTokenState->lexemeTokenDefinitions.size()) |
---|
| 1309 | { |
---|
| 1310 | mActiveTokenState->lexemeTokenDefinitions.resize(newTokenID + 1); |
---|
| 1311 | } |
---|
| 1312 | // since resizing guarentees the token definition will exist, just assign values to members |
---|
| 1313 | LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions[newTokenID]; |
---|
| 1314 | if (tokenDef.ID != 0) |
---|
| 1315 | { |
---|
| 1316 | OGRE_EXCEPT(Exception::ERR_DUPLICATE_ITEM, "In " + getClientGrammerName() + |
---|
| 1317 | ", lexeme >>>" + |
---|
| 1318 | lexeme + "<<< already exists in lexeme token definitions", |
---|
| 1319 | "Compiler2Pass::addLexemeToken"); |
---|
| 1320 | } |
---|
| 1321 | tokenDef.ID = newTokenID; |
---|
| 1322 | tokenDef.lexeme = lexeme; |
---|
| 1323 | if (!caseSensitive) |
---|
| 1324 | StringUtil::toLowerCase(tokenDef.lexeme); |
---|
| 1325 | tokenDef.hasAction = hasAction; |
---|
| 1326 | tokenDef.isCaseSensitive = caseSensitive; |
---|
| 1327 | |
---|
| 1328 | mActiveTokenState->lexemeTokenMap[lexeme] = newTokenID; |
---|
| 1329 | |
---|
| 1330 | return newTokenID; |
---|
| 1331 | } |
---|
| 1332 | |
---|
| 1333 | //----------------------------------------------------------------------- |
---|
| 1334 | void Compiler2Pass::checkTokenActionTrigger(void) |
---|
| 1335 | { |
---|
| 1336 | size_t lastTokenQuePos = mActiveTokenState->tokenQue.size(); |
---|
| 1337 | // if there are no token instructions in the que then there is nothing todo |
---|
| 1338 | if (lastTokenQuePos == 0) |
---|
| 1339 | return; |
---|
| 1340 | |
---|
| 1341 | --lastTokenQuePos; |
---|
| 1342 | |
---|
| 1343 | if (lastTokenQuePos == mPreviousActionQuePosition) |
---|
| 1344 | return; |
---|
| 1345 | |
---|
| 1346 | // check action trigger if last token has an action |
---|
| 1347 | if (setNextActionQuePosition(lastTokenQuePos)) |
---|
| 1348 | { |
---|
| 1349 | // only activate the action belonging to the token found previously |
---|
| 1350 | activatePreviousTokenAction(); |
---|
| 1351 | // current token action now becomes the previous one |
---|
| 1352 | mPreviousActionQuePosition = lastTokenQuePos; |
---|
| 1353 | } |
---|
| 1354 | } |
---|
| 1355 | |
---|
| 1356 | //----------------------------------------------------------------------- |
---|
| 1357 | String Compiler2Pass::getBNFGrammerTextFromRulePath(size_t ruleID, const size_t level) |
---|
| 1358 | { |
---|
| 1359 | |
---|
| 1360 | String grammerText; |
---|
| 1361 | |
---|
| 1362 | // default to using Client rule path |
---|
| 1363 | // check if index is inbounds |
---|
| 1364 | if (ruleID >= mActiveTokenState->rootRulePath.size()) |
---|
| 1365 | { |
---|
| 1366 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "rule ID exceeds client rule path bounds.", "Compiler2Pass::getBNFGrammerRulePathText"); |
---|
| 1367 | } |
---|
| 1368 | // iterate through rule path and get terminal and non-terminal strings |
---|
| 1369 | const TokenRuleContainer& rulePath = mActiveTokenState->rootRulePath; |
---|
| 1370 | |
---|
| 1371 | while (rulePath[ruleID].operation != otEND) |
---|
| 1372 | { |
---|
| 1373 | // rule text processing - the op instructions, system tokens |
---|
| 1374 | switch (rulePath[ruleID].operation) |
---|
| 1375 | { |
---|
| 1376 | // rule lexeme ::= |
---|
| 1377 | case otRULE: |
---|
| 1378 | grammerText += "\n" + getLexemeText(ruleID, level) + " ::="; |
---|
| 1379 | break; |
---|
| 1380 | // no special processing for AND op |
---|
| 1381 | case otAND: |
---|
| 1382 | grammerText += " " + getLexemeText(ruleID, level); |
---|
| 1383 | break; |
---|
| 1384 | // or | lexeme |
---|
| 1385 | case otOR: |
---|
| 1386 | grammerText += " | " + getLexemeText(ruleID, level); |
---|
| 1387 | break; |
---|
| 1388 | // optional [lexeme] |
---|
| 1389 | case otOPTIONAL: |
---|
| 1390 | grammerText += " [" + getLexemeText(ruleID, level) + "]"; |
---|
| 1391 | break; |
---|
| 1392 | // repeat {lexeme} |
---|
| 1393 | case otREPEAT: |
---|
| 1394 | grammerText += " {" + getLexemeText(ruleID, level) + "}"; |
---|
| 1395 | break; |
---|
| 1396 | // not test (?!lexeme) |
---|
| 1397 | case otNOT_TEST: |
---|
| 1398 | grammerText += " (?!" + getLexemeText(ruleID, level) + ")"; |
---|
| 1399 | break; |
---|
| 1400 | default: |
---|
| 1401 | grammerText += "*** Unknown Operation ***"; |
---|
| 1402 | } |
---|
| 1403 | // lexeme/token text procesing |
---|
| 1404 | ++ruleID; |
---|
| 1405 | } |
---|
| 1406 | |
---|
| 1407 | return grammerText; |
---|
| 1408 | } |
---|
| 1409 | |
---|
| 1410 | //----------------------------------------------------------------------- |
---|
| 1411 | |
---|
| 1412 | //----------------------------------------------------------------------- |
---|
| 1413 | // Private Methods |
---|
| 1414 | //----------------------------------------------------------------------- |
---|
| 1415 | //----------------------------------------------------------------------- |
---|
| 1416 | String Compiler2Pass::getLexemeText(size_t& ruleID, const size_t level) |
---|
| 1417 | { |
---|
| 1418 | if (ruleID >= mActiveTokenState->rootRulePath.size()) |
---|
| 1419 | { |
---|
| 1420 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, |
---|
| 1421 | "rule ID exceeds client rule path bounds.", "Compiler2Pass::getLexemeText" |
---|
| 1422 | ); |
---|
| 1423 | } |
---|
| 1424 | |
---|
| 1425 | String lexeme; |
---|
| 1426 | |
---|
| 1427 | const TokenRuleContainer& rulePath = mActiveTokenState->rootRulePath; |
---|
| 1428 | const size_t tokenID = rulePath[ruleID].tokenID; |
---|
| 1429 | |
---|
| 1430 | if ( tokenID < SystemTokenBase) |
---|
| 1431 | { |
---|
| 1432 | // non-terminal tokens |
---|
| 1433 | if (mActiveTokenState->lexemeTokenDefinitions[tokenID].isNonTerminal) |
---|
| 1434 | { |
---|
| 1435 | // allow expansion of non-terminals into terminals |
---|
| 1436 | if (level > 0) |
---|
| 1437 | { |
---|
| 1438 | size_t subRuleID = mActiveTokenState->lexemeTokenDefinitions[tokenID].ruleID + 1; |
---|
| 1439 | lexeme = getBNFGrammerTextFromRulePath(subRuleID, level - 1); |
---|
| 1440 | } |
---|
| 1441 | else |
---|
| 1442 | { |
---|
| 1443 | lexeme = "<" + mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme + ">"; |
---|
| 1444 | } |
---|
| 1445 | } |
---|
| 1446 | else // terminal tokens |
---|
| 1447 | { |
---|
| 1448 | lexeme = "'" + mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme + "'"; |
---|
| 1449 | } |
---|
| 1450 | } |
---|
| 1451 | else // system token processing |
---|
| 1452 | { |
---|
| 1453 | switch (rulePath[ruleID].tokenID) |
---|
| 1454 | { |
---|
| 1455 | case _character_: |
---|
| 1456 | // need to get next rule instruction for data |
---|
| 1457 | ++ruleID; |
---|
| 1458 | // data for _character_ is always a set so put () around text string |
---|
| 1459 | lexeme = "(" + mActiveTokenState->lexemeTokenDefinitions[rulePath[ruleID].tokenID].lexeme + ")"; |
---|
| 1460 | break; |
---|
| 1461 | case _value_: |
---|
| 1462 | // <#> - need name of label? |
---|
| 1463 | lexeme = "<#Number>"; |
---|
| 1464 | break; |
---|
| 1465 | } |
---|
| 1466 | } |
---|
| 1467 | |
---|
| 1468 | return lexeme; |
---|
| 1469 | } |
---|
| 1470 | //----------------------------------------------------------------------- |
---|
| 1471 | void Compiler2Pass::activatePreviousTokenAction(void) |
---|
| 1472 | { |
---|
| 1473 | const size_t previousTokenID = mActiveTokenState->tokenQue.at(mPreviousActionQuePosition).tokenID; |
---|
| 1474 | const LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions.at(previousTokenID); |
---|
| 1475 | if (tokenDef.hasAction) |
---|
| 1476 | { |
---|
| 1477 | // set the current pass 2 token que position to previous action que position |
---|
| 1478 | // assume that pass 2 processing will use tokens downstream |
---|
| 1479 | mPass2TokenQuePosition = mPreviousActionQuePosition; |
---|
| 1480 | executeTokenAction(previousTokenID); |
---|
| 1481 | } |
---|
| 1482 | } |
---|
| 1483 | //----------------------------------------------------------------------- |
---|
| 1484 | void Compiler2Pass::buildClientBNFRulePaths(void) |
---|
| 1485 | { |
---|
| 1486 | bool isFirstToken = true; |
---|
| 1487 | OperationType pendingRuleOp = otAND; |
---|
| 1488 | |
---|
| 1489 | // convert tokens in BNF token que to rule paths |
---|
| 1490 | while (getPass2TokenQueCount() > 0) |
---|
| 1491 | { |
---|
| 1492 | // get a pass 2 token |
---|
| 1493 | // if this is the first time getting a token then get the current token |
---|
| 1494 | const TokenInst& currentToken = isFirstToken ? getCurrentToken() : getNextToken(); |
---|
| 1495 | isFirstToken = false; |
---|
| 1496 | // only process the token if its valid |
---|
| 1497 | if (currentToken.found) |
---|
| 1498 | { |
---|
| 1499 | // a valid token has been found, convert to a rule |
---|
| 1500 | switch (currentToken.tokenID) |
---|
| 1501 | { |
---|
| 1502 | case BNF_ID_BEGIN: // < |
---|
| 1503 | extractNonTerminal(pendingRuleOp); |
---|
| 1504 | pendingRuleOp = otAND; |
---|
| 1505 | break; |
---|
| 1506 | |
---|
| 1507 | |
---|
| 1508 | case BNF_CONSTANT_BEGIN: // <# |
---|
| 1509 | extractNumericConstant(pendingRuleOp); |
---|
| 1510 | pendingRuleOp = otAND; |
---|
| 1511 | break; |
---|
| 1512 | |
---|
| 1513 | case BNF_OR: // | |
---|
| 1514 | pendingRuleOp = otOR; |
---|
| 1515 | break; |
---|
| 1516 | |
---|
| 1517 | case BNF_REPEAT_BEGIN: // { |
---|
| 1518 | pendingRuleOp = otREPEAT; |
---|
| 1519 | break; |
---|
| 1520 | |
---|
| 1521 | case BNF_NO_TOKEN_START: // -' |
---|
| 1522 | extractTerminal(pendingRuleOp, true); |
---|
| 1523 | pendingRuleOp = otAND; |
---|
| 1524 | break; |
---|
| 1525 | |
---|
| 1526 | case BNF_SINGLEQUOTE: // ' |
---|
| 1527 | extractTerminal(pendingRuleOp); |
---|
| 1528 | pendingRuleOp = otAND; |
---|
| 1529 | break; |
---|
| 1530 | |
---|
| 1531 | case BNF_OPTIONAL_BEGIN: // [ |
---|
| 1532 | pendingRuleOp = otOPTIONAL; |
---|
| 1533 | break; |
---|
| 1534 | |
---|
| 1535 | case BNF_NOT_TEST_BEGIN: // (?! |
---|
| 1536 | pendingRuleOp = otNOT_TEST; |
---|
| 1537 | break; |
---|
| 1538 | |
---|
| 1539 | case BNF_SET_BEGIN: // ( |
---|
| 1540 | extractSet(pendingRuleOp); |
---|
| 1541 | pendingRuleOp = otAND; |
---|
| 1542 | break; |
---|
| 1543 | |
---|
| 1544 | case BNF_CONDITIONAL_TOKEN_INSERT: |
---|
| 1545 | setConditionalTokenInsert(); |
---|
| 1546 | break; |
---|
| 1547 | |
---|
| 1548 | default: |
---|
| 1549 | // trap closings ie ] } ) |
---|
| 1550 | break; |
---|
| 1551 | } // end switch |
---|
| 1552 | } // end if |
---|
| 1553 | } // end while |
---|
| 1554 | } |
---|
| 1555 | |
---|
| 1556 | //----------------------------------------------------------------------- |
---|
| 1557 | void Compiler2Pass::modifyLastRule(const OperationType pendingRuleOp, const size_t tokenID) |
---|
| 1558 | { |
---|
| 1559 | // add operation using this token ID to the current rule expression |
---|
| 1560 | size_t lastIndex = mClientTokenState->rootRulePath.size(); |
---|
| 1561 | if (lastIndex == 0) |
---|
| 1562 | { |
---|
| 1563 | // throw exception since there should have been at least one rule existing |
---|
| 1564 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "BNF Grammar build rules failed: no previous rule op defined", "Compiler2Pass::modifyLastRule"); |
---|
| 1565 | } |
---|
| 1566 | --lastIndex; |
---|
| 1567 | mClientTokenState->rootRulePath[lastIndex].operation = pendingRuleOp; |
---|
| 1568 | mClientTokenState->rootRulePath[lastIndex].tokenID = tokenID; |
---|
| 1569 | // add new end op token rule |
---|
| 1570 | mClientTokenState->rootRulePath.push_back(TokenRule(otEND, 0)); |
---|
| 1571 | } |
---|
| 1572 | |
---|
| 1573 | //----------------------------------------------------------------------- |
---|
| 1574 | size_t Compiler2Pass::getClientLexemeTokenID(const String& lexeme, const bool isCaseSensitive) |
---|
| 1575 | { |
---|
| 1576 | size_t tokenID = mClientTokenState->lexemeTokenMap[lexeme]; |
---|
| 1577 | |
---|
| 1578 | if (tokenID == 0) |
---|
| 1579 | { |
---|
| 1580 | // lexeme not found so a new entry is made by the system |
---|
| 1581 | // note that all lexemes added by the system will not/can not have an action |
---|
| 1582 | tokenID = mClientTokenState->lexemeTokenDefinitions.size(); |
---|
| 1583 | // add identifier to client lexeme tokens |
---|
| 1584 | mActiveTokenState = mClientTokenState; |
---|
| 1585 | addLexemeToken(lexeme, tokenID, false, isCaseSensitive); |
---|
| 1586 | mActiveTokenState = &mBNFTokenState; |
---|
| 1587 | } |
---|
| 1588 | |
---|
| 1589 | return tokenID; |
---|
| 1590 | } |
---|
| 1591 | //----------------------------------------------------------------------- |
---|
| 1592 | void Compiler2Pass::extractNonTerminal(const OperationType pendingRuleOp) |
---|
| 1593 | { |
---|
| 1594 | // begining of identifier |
---|
| 1595 | // next token should be for a label |
---|
| 1596 | const String& identifierLabel = getNextTokenLabel(); |
---|
| 1597 | // next token should be id end |
---|
| 1598 | getNextToken(BNF_ID_END); |
---|
| 1599 | // add identifier to lexeme token definitions but keep case sensitivity |
---|
| 1600 | const size_t tokenID = getClientLexemeTokenID(identifierLabel, true); |
---|
| 1601 | LexemeTokenDef& tokenDef = mClientTokenState->lexemeTokenDefinitions[tokenID]; |
---|
| 1602 | |
---|
| 1603 | // peek at the next token isntruction to see if this |
---|
| 1604 | // identifier is for a new rule or is part of the current rule |
---|
| 1605 | if (testNextTokenID(BNF_SET_RULE)) |
---|
| 1606 | { |
---|
| 1607 | // consume set rule |
---|
| 1608 | getNextToken(BNF_SET_RULE); |
---|
| 1609 | // check to make sure this is the first time this rule is being setup by |
---|
| 1610 | // verifying rule id is 0 |
---|
| 1611 | if (tokenDef.ruleID != 0) |
---|
| 1612 | { |
---|
| 1613 | // this is not the first time for this identifier to be set up as a rule |
---|
| 1614 | // since duplicate rules can not exist, throw an exception |
---|
| 1615 | OGRE_EXCEPT(Exception::ERR_DUPLICATE_ITEM, "while parsing BNF grammer for: " + |
---|
| 1616 | getClientGrammerName() + |
---|
| 1617 | ", an attempt was made to assign a rule to identifier: " + |
---|
| 1618 | tokenDef.lexeme + ", that already had a rule assigned", |
---|
| 1619 | "Compiler2Pass::extractNonTerminal"); |
---|
| 1620 | } |
---|
| 1621 | // add new rule to end of rule path |
---|
| 1622 | mClientTokenState->rootRulePath.push_back(TokenRule(otRULE, tokenID)); |
---|
| 1623 | tokenDef.ruleID = mClientTokenState->rootRulePath.size() - 1; |
---|
| 1624 | // add new end op token rule |
---|
| 1625 | mClientTokenState->rootRulePath.push_back(TokenRule(otEND, 0)); |
---|
| 1626 | } |
---|
| 1627 | else // just a reference to a non-terminal |
---|
| 1628 | { |
---|
| 1629 | modifyLastRule(pendingRuleOp, tokenID); |
---|
| 1630 | } |
---|
| 1631 | |
---|
| 1632 | tokenDef.isNonTerminal = true; |
---|
| 1633 | } |
---|
| 1634 | //----------------------------------------------------------------------- |
---|
| 1635 | void Compiler2Pass::extractTerminal(const OperationType pendingRuleOp, const bool notoken) |
---|
| 1636 | { |
---|
| 1637 | // begining of label |
---|
| 1638 | // next token should be for a label |
---|
| 1639 | const String& terminalLabel = getNextTokenLabel(); |
---|
| 1640 | // next token should be single quote end |
---|
| 1641 | getNextToken(BNF_SINGLEQUOTE); |
---|
| 1642 | // add terminal to lexeme token definitions |
---|
| 1643 | // note that if label not in the map it is automatically added |
---|
| 1644 | const size_t tokenID = getClientLexemeTokenID(terminalLabel); |
---|
| 1645 | if (notoken) |
---|
| 1646 | modifyLastRule(otAND, _no_token_); |
---|
| 1647 | modifyLastRule(pendingRuleOp, tokenID); |
---|
| 1648 | } |
---|
| 1649 | //----------------------------------------------------------------------- |
---|
| 1650 | void Compiler2Pass::setConditionalTokenInsert(void) |
---|
| 1651 | { |
---|
| 1652 | // get position of rule just before end rule |
---|
| 1653 | size_t lastIndex = mClientTokenState->rootRulePath.size(); |
---|
| 1654 | if (lastIndex <= 1) |
---|
| 1655 | { |
---|
| 1656 | // throw exception since there should have been at least one rule existing |
---|
| 1657 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "BNF Grammar build rules failed: no previous terminal token rule defined", |
---|
| 1658 | "Compiler2Pass::setConditionalTokenInsert"); |
---|
| 1659 | } |
---|
| 1660 | lastIndex -= 2; |
---|
| 1661 | mClientTokenState->rootRulePath[lastIndex].operation = otINSERT_TOKEN; |
---|
| 1662 | } |
---|
| 1663 | //----------------------------------------------------------------------- |
---|
| 1664 | void Compiler2Pass::extractSet(const OperationType pendingRuleOp) |
---|
| 1665 | { |
---|
| 1666 | const String& setLabel = getNextTokenLabel(); |
---|
| 1667 | // next token should be ) |
---|
| 1668 | getNextToken(BNF_SET_END); |
---|
| 1669 | // add set to lexeme token definitions but keep case sensitivity |
---|
| 1670 | const size_t tokenID = getClientLexemeTokenID(setLabel, true); |
---|
| 1671 | // add operation using this token ID to the current rule expression |
---|
| 1672 | modifyLastRule(pendingRuleOp, _character_); |
---|
| 1673 | // add the data required by the character lookup operation |
---|
| 1674 | modifyLastRule(otDATA, tokenID); |
---|
| 1675 | } |
---|
| 1676 | //----------------------------------------------------------------------- |
---|
| 1677 | void Compiler2Pass::extractNumericConstant(const OperationType pendingRuleOp) |
---|
| 1678 | { |
---|
| 1679 | // consume label for constant, don't need it for anything |
---|
| 1680 | getNextTokenLabel(); |
---|
| 1681 | |
---|
| 1682 | getNextToken(BNF_ID_END); // > |
---|
| 1683 | // add operation using this token ID to the current rule expression |
---|
| 1684 | modifyLastRule(pendingRuleOp, _value_); |
---|
| 1685 | } |
---|
| 1686 | |
---|
| 1687 | |
---|
| 1688 | } |
---|