1 | /* |
---|
2 | ** $Id: llex.c,v 1.119 2003/03/24 12:39:34 roberto Exp $ |
---|
3 | ** Lexical Analyzer |
---|
4 | ** See Copyright Notice in lua.h |
---|
5 | */ |
---|
6 | |
---|
7 | |
---|
8 | #include <ctype.h> |
---|
9 | #include <string.h> |
---|
10 | |
---|
11 | #define llex_c |
---|
12 | |
---|
13 | #include "lua.h" |
---|
14 | |
---|
15 | #include "ldo.h" |
---|
16 | #include "llex.h" |
---|
17 | #include "lobject.h" |
---|
18 | #include "lparser.h" |
---|
19 | #include "lstate.h" |
---|
20 | #include "lstring.h" |
---|
21 | #include "lzio.h" |
---|
22 | |
---|
23 | |
---|
24 | |
---|
25 | #define next(LS) (LS->current = zgetc(LS->z)) |
---|
26 | |
---|
27 | |
---|
28 | |
---|
29 | /* ORDER RESERVED */ |
---|
30 | static const char *const token2string [] = { |
---|
31 | "and", "break", "do", "else", "elseif", |
---|
32 | "end", "false", "for", "function", "if", |
---|
33 | "in", "local", "nil", "not", "or", "repeat", |
---|
34 | "return", "then", "true", "until", "while", "*name", |
---|
35 | "..", "...", "==", ">=", "<=", "~=", |
---|
36 | "*number", "*string", "<eof>" |
---|
37 | }; |
---|
38 | |
---|
39 | |
---|
40 | void luaX_init (lua_State *L) { |
---|
41 | int i; |
---|
42 | for (i=0; i<NUM_RESERVED; i++) { |
---|
43 | TString *ts = luaS_new(L, token2string[i]); |
---|
44 | luaS_fix(ts); /* reserved words are never collected */ |
---|
45 | lua_assert(strlen(token2string[i])+1 <= TOKEN_LEN); |
---|
46 | ts->tsv.reserved = cast(lu_byte, i+1); /* reserved word */ |
---|
47 | } |
---|
48 | } |
---|
49 | |
---|
50 | |
---|
51 | #define MAXSRC 80 |
---|
52 | |
---|
53 | |
---|
54 | void luaX_checklimit (LexState *ls, int val, int limit, const char *msg) { |
---|
55 | if (val > limit) { |
---|
56 | msg = luaO_pushfstring(ls->L, "too many %s (limit=%d)", msg, limit); |
---|
57 | luaX_syntaxerror(ls, msg); |
---|
58 | } |
---|
59 | } |
---|
60 | |
---|
61 | |
---|
62 | void luaX_errorline (LexState *ls, const char *s, const char *token, int line) { |
---|
63 | lua_State *L = ls->L; |
---|
64 | char buff[MAXSRC]; |
---|
65 | luaO_chunkid(buff, getstr(ls->source), MAXSRC); |
---|
66 | luaO_pushfstring(L, "%s:%d: %s near `%s'", buff, line, s, token); |
---|
67 | luaD_throw(L, LUA_ERRSYNTAX); |
---|
68 | } |
---|
69 | |
---|
70 | |
---|
71 | static void luaX_error (LexState *ls, const char *s, const char *token) { |
---|
72 | luaX_errorline(ls, s, token, ls->linenumber); |
---|
73 | } |
---|
74 | |
---|
75 | |
---|
76 | void luaX_syntaxerror (LexState *ls, const char *msg) { |
---|
77 | const char *lasttoken; |
---|
78 | switch (ls->t.token) { |
---|
79 | case TK_NAME: |
---|
80 | lasttoken = getstr(ls->t.seminfo.ts); |
---|
81 | break; |
---|
82 | case TK_STRING: |
---|
83 | case TK_NUMBER: |
---|
84 | lasttoken = luaZ_buffer(ls->buff); |
---|
85 | break; |
---|
86 | default: |
---|
87 | lasttoken = luaX_token2str(ls, ls->t.token); |
---|
88 | break; |
---|
89 | } |
---|
90 | luaX_error(ls, msg, lasttoken); |
---|
91 | } |
---|
92 | |
---|
93 | |
---|
94 | const char *luaX_token2str (LexState *ls, int token) { |
---|
95 | if (token < FIRST_RESERVED) { |
---|
96 | lua_assert(token == (unsigned char)token); |
---|
97 | return luaO_pushfstring(ls->L, "%c", token); |
---|
98 | } |
---|
99 | else |
---|
100 | return token2string[token-FIRST_RESERVED]; |
---|
101 | } |
---|
102 | |
---|
103 | |
---|
104 | static void luaX_lexerror (LexState *ls, const char *s, int token) { |
---|
105 | if (token == TK_EOS) |
---|
106 | luaX_error(ls, s, luaX_token2str(ls, token)); |
---|
107 | else |
---|
108 | luaX_error(ls, s, luaZ_buffer(ls->buff)); |
---|
109 | } |
---|
110 | |
---|
111 | |
---|
112 | static void inclinenumber (LexState *LS) { |
---|
113 | next(LS); /* skip `\n' */ |
---|
114 | ++LS->linenumber; |
---|
115 | luaX_checklimit(LS, LS->linenumber, MAX_INT, "lines in a chunk"); |
---|
116 | } |
---|
117 | |
---|
118 | |
---|
119 | void luaX_setinput (lua_State *L, LexState *LS, ZIO *z, TString *source) { |
---|
120 | LS->L = L; |
---|
121 | LS->lookahead.token = TK_EOS; /* no look-ahead token */ |
---|
122 | LS->z = z; |
---|
123 | LS->fs = NULL; |
---|
124 | LS->linenumber = 1; |
---|
125 | LS->lastline = 1; |
---|
126 | LS->source = source; |
---|
127 | next(LS); /* read first char */ |
---|
128 | if (LS->current == '#') { |
---|
129 | do { /* skip first line */ |
---|
130 | next(LS); |
---|
131 | } while (LS->current != '\n' && LS->current != EOZ); |
---|
132 | } |
---|
133 | } |
---|
134 | |
---|
135 | |
---|
136 | |
---|
137 | /* |
---|
138 | ** ======================================================= |
---|
139 | ** LEXICAL ANALYZER |
---|
140 | ** ======================================================= |
---|
141 | */ |
---|
142 | |
---|
143 | |
---|
144 | /* use buffer to store names, literal strings and numbers */ |
---|
145 | |
---|
146 | /* extra space to allocate when growing buffer */ |
---|
147 | #define EXTRABUFF 32 |
---|
148 | |
---|
149 | /* maximum number of chars that can be read without checking buffer size */ |
---|
150 | #define MAXNOCHECK 5 |
---|
151 | |
---|
152 | #define checkbuffer(LS, len) \ |
---|
153 | if (((len)+MAXNOCHECK)*sizeof(char) > luaZ_sizebuffer((LS)->buff)) \ |
---|
154 | luaZ_openspace((LS)->L, (LS)->buff, (len)+EXTRABUFF) |
---|
155 | |
---|
156 | #define save(LS, c, l) \ |
---|
157 | (luaZ_buffer((LS)->buff)[l++] = cast(char, c)) |
---|
158 | #define save_and_next(LS, l) (save(LS, LS->current, l), next(LS)) |
---|
159 | |
---|
160 | |
---|
161 | static size_t readname (LexState *LS) { |
---|
162 | size_t l = 0; |
---|
163 | checkbuffer(LS, l); |
---|
164 | do { |
---|
165 | checkbuffer(LS, l); |
---|
166 | save_and_next(LS, l); |
---|
167 | } while (isalnum(LS->current) || LS->current == '_'); |
---|
168 | save(LS, '\0', l); |
---|
169 | return l-1; |
---|
170 | } |
---|
171 | |
---|
172 | |
---|
173 | /* LUA_NUMBER */ |
---|
174 | static void read_numeral (LexState *LS, int comma, SemInfo *seminfo) { |
---|
175 | size_t l = 0; |
---|
176 | checkbuffer(LS, l); |
---|
177 | if (comma) save(LS, '.', l); |
---|
178 | while (isdigit(LS->current)) { |
---|
179 | checkbuffer(LS, l); |
---|
180 | save_and_next(LS, l); |
---|
181 | } |
---|
182 | if (LS->current == '.') { |
---|
183 | save_and_next(LS, l); |
---|
184 | if (LS->current == '.') { |
---|
185 | save_and_next(LS, l); |
---|
186 | save(LS, '\0', l); |
---|
187 | luaX_lexerror(LS, |
---|
188 | "ambiguous syntax (decimal point x string concatenation)", |
---|
189 | TK_NUMBER); |
---|
190 | } |
---|
191 | } |
---|
192 | while (isdigit(LS->current)) { |
---|
193 | checkbuffer(LS, l); |
---|
194 | save_and_next(LS, l); |
---|
195 | } |
---|
196 | if (LS->current == 'e' || LS->current == 'E') { |
---|
197 | save_and_next(LS, l); /* read `E' */ |
---|
198 | if (LS->current == '+' || LS->current == '-') |
---|
199 | save_and_next(LS, l); /* optional exponent sign */ |
---|
200 | while (isdigit(LS->current)) { |
---|
201 | checkbuffer(LS, l); |
---|
202 | save_and_next(LS, l); |
---|
203 | } |
---|
204 | } |
---|
205 | save(LS, '\0', l); |
---|
206 | if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r)) |
---|
207 | luaX_lexerror(LS, "malformed number", TK_NUMBER); |
---|
208 | } |
---|
209 | |
---|
210 | |
---|
211 | static void read_long_string (LexState *LS, SemInfo *seminfo) { |
---|
212 | int cont = 0; |
---|
213 | size_t l = 0; |
---|
214 | checkbuffer(LS, l); |
---|
215 | save(LS, '[', l); /* save first `[' */ |
---|
216 | save_and_next(LS, l); /* pass the second `[' */ |
---|
217 | if (LS->current == '\n') /* string starts with a newline? */ |
---|
218 | inclinenumber(LS); /* skip it */ |
---|
219 | for (;;) { |
---|
220 | checkbuffer(LS, l); |
---|
221 | switch (LS->current) { |
---|
222 | case EOZ: |
---|
223 | save(LS, '\0', l); |
---|
224 | luaX_lexerror(LS, (seminfo) ? "unfinished long string" : |
---|
225 | "unfinished long comment", TK_EOS); |
---|
226 | break; /* to avoid warnings */ |
---|
227 | case '[': |
---|
228 | save_and_next(LS, l); |
---|
229 | if (LS->current == '[') { |
---|
230 | cont++; |
---|
231 | save_and_next(LS, l); |
---|
232 | } |
---|
233 | continue; |
---|
234 | case ']': |
---|
235 | save_and_next(LS, l); |
---|
236 | if (LS->current == ']') { |
---|
237 | if (cont == 0) goto endloop; |
---|
238 | cont--; |
---|
239 | save_and_next(LS, l); |
---|
240 | } |
---|
241 | continue; |
---|
242 | case '\n': |
---|
243 | save(LS, '\n', l); |
---|
244 | inclinenumber(LS); |
---|
245 | if (!seminfo) l = 0; /* reset buffer to avoid wasting space */ |
---|
246 | continue; |
---|
247 | default: |
---|
248 | save_and_next(LS, l); |
---|
249 | } |
---|
250 | } endloop: |
---|
251 | save_and_next(LS, l); /* skip the second `]' */ |
---|
252 | save(LS, '\0', l); |
---|
253 | if (seminfo) |
---|
254 | seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 2, l - 5); |
---|
255 | } |
---|
256 | |
---|
257 | |
---|
258 | static void read_string (LexState *LS, int del, SemInfo *seminfo) { |
---|
259 | size_t l = 0; |
---|
260 | checkbuffer(LS, l); |
---|
261 | save_and_next(LS, l); |
---|
262 | while (LS->current != del) { |
---|
263 | checkbuffer(LS, l); |
---|
264 | switch (LS->current) { |
---|
265 | case EOZ: |
---|
266 | save(LS, '\0', l); |
---|
267 | luaX_lexerror(LS, "unfinished string", TK_EOS); |
---|
268 | break; /* to avoid warnings */ |
---|
269 | case '\n': |
---|
270 | save(LS, '\0', l); |
---|
271 | luaX_lexerror(LS, "unfinished string", TK_STRING); |
---|
272 | break; /* to avoid warnings */ |
---|
273 | case '\\': |
---|
274 | next(LS); /* do not save the `\' */ |
---|
275 | switch (LS->current) { |
---|
276 | case 'a': save(LS, '\a', l); next(LS); break; |
---|
277 | case 'b': save(LS, '\b', l); next(LS); break; |
---|
278 | case 'f': save(LS, '\f', l); next(LS); break; |
---|
279 | case 'n': save(LS, '\n', l); next(LS); break; |
---|
280 | case 'r': save(LS, '\r', l); next(LS); break; |
---|
281 | case 't': save(LS, '\t', l); next(LS); break; |
---|
282 | case 'v': save(LS, '\v', l); next(LS); break; |
---|
283 | case '\n': save(LS, '\n', l); inclinenumber(LS); break; |
---|
284 | case EOZ: break; /* will raise an error next loop */ |
---|
285 | default: { |
---|
286 | if (!isdigit(LS->current)) |
---|
287 | save_and_next(LS, l); /* handles \\, \", \', and \? */ |
---|
288 | else { /* \xxx */ |
---|
289 | int c = 0; |
---|
290 | int i = 0; |
---|
291 | do { |
---|
292 | c = 10*c + (LS->current-'0'); |
---|
293 | next(LS); |
---|
294 | } while (++i<3 && isdigit(LS->current)); |
---|
295 | if (c > UCHAR_MAX) { |
---|
296 | save(LS, '\0', l); |
---|
297 | luaX_lexerror(LS, "escape sequence too large", TK_STRING); |
---|
298 | } |
---|
299 | save(LS, c, l); |
---|
300 | } |
---|
301 | } |
---|
302 | } |
---|
303 | break; |
---|
304 | default: |
---|
305 | save_and_next(LS, l); |
---|
306 | } |
---|
307 | } |
---|
308 | save_and_next(LS, l); /* skip delimiter */ |
---|
309 | save(LS, '\0', l); |
---|
310 | seminfo->ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff) + 1, l - 3); |
---|
311 | } |
---|
312 | |
---|
313 | |
---|
314 | int luaX_lex (LexState *LS, SemInfo *seminfo) { |
---|
315 | for (;;) { |
---|
316 | switch (LS->current) { |
---|
317 | |
---|
318 | case '\n': { |
---|
319 | inclinenumber(LS); |
---|
320 | continue; |
---|
321 | } |
---|
322 | case '-': { |
---|
323 | next(LS); |
---|
324 | if (LS->current != '-') return '-'; |
---|
325 | /* else is a comment */ |
---|
326 | next(LS); |
---|
327 | if (LS->current == '[' && (next(LS), LS->current == '[')) |
---|
328 | read_long_string(LS, NULL); /* long comment */ |
---|
329 | else /* short comment */ |
---|
330 | while (LS->current != '\n' && LS->current != EOZ) |
---|
331 | next(LS); |
---|
332 | continue; |
---|
333 | } |
---|
334 | case '[': { |
---|
335 | next(LS); |
---|
336 | if (LS->current != '[') return '['; |
---|
337 | else { |
---|
338 | read_long_string(LS, seminfo); |
---|
339 | return TK_STRING; |
---|
340 | } |
---|
341 | } |
---|
342 | case '=': { |
---|
343 | next(LS); |
---|
344 | if (LS->current != '=') return '='; |
---|
345 | else { next(LS); return TK_EQ; } |
---|
346 | } |
---|
347 | case '<': { |
---|
348 | next(LS); |
---|
349 | if (LS->current != '=') return '<'; |
---|
350 | else { next(LS); return TK_LE; } |
---|
351 | } |
---|
352 | case '>': { |
---|
353 | next(LS); |
---|
354 | if (LS->current != '=') return '>'; |
---|
355 | else { next(LS); return TK_GE; } |
---|
356 | } |
---|
357 | case '~': { |
---|
358 | next(LS); |
---|
359 | if (LS->current != '=') return '~'; |
---|
360 | else { next(LS); return TK_NE; } |
---|
361 | } |
---|
362 | case '"': |
---|
363 | case '\'': { |
---|
364 | read_string(LS, LS->current, seminfo); |
---|
365 | return TK_STRING; |
---|
366 | } |
---|
367 | case '.': { |
---|
368 | next(LS); |
---|
369 | if (LS->current == '.') { |
---|
370 | next(LS); |
---|
371 | if (LS->current == '.') { |
---|
372 | next(LS); |
---|
373 | return TK_DOTS; /* ... */ |
---|
374 | } |
---|
375 | else return TK_CONCAT; /* .. */ |
---|
376 | } |
---|
377 | else if (!isdigit(LS->current)) return '.'; |
---|
378 | else { |
---|
379 | read_numeral(LS, 1, seminfo); |
---|
380 | return TK_NUMBER; |
---|
381 | } |
---|
382 | } |
---|
383 | case EOZ: { |
---|
384 | return TK_EOS; |
---|
385 | } |
---|
386 | default: { |
---|
387 | if (isspace(LS->current)) { |
---|
388 | next(LS); |
---|
389 | continue; |
---|
390 | } |
---|
391 | else if (isdigit(LS->current)) { |
---|
392 | read_numeral(LS, 0, seminfo); |
---|
393 | return TK_NUMBER; |
---|
394 | } |
---|
395 | else if (isalpha(LS->current) || LS->current == '_') { |
---|
396 | /* identifier or reserved word */ |
---|
397 | size_t l = readname(LS); |
---|
398 | TString *ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l); |
---|
399 | if (ts->tsv.reserved > 0) /* reserved word? */ |
---|
400 | return ts->tsv.reserved - 1 + FIRST_RESERVED; |
---|
401 | seminfo->ts = ts; |
---|
402 | return TK_NAME; |
---|
403 | } |
---|
404 | else { |
---|
405 | int c = LS->current; |
---|
406 | if (iscntrl(c)) |
---|
407 | luaX_error(LS, "invalid control char", |
---|
408 | luaO_pushfstring(LS->L, "char(%d)", c)); |
---|
409 | next(LS); |
---|
410 | return c; /* single-char tokens (+ - / ...) */ |
---|
411 | } |
---|
412 | } |
---|
413 | } |
---|
414 | } |
---|
415 | } |
---|
416 | |
---|
417 | #undef next |
---|