1 | /* |
---|
2 | * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. |
---|
3 | * |
---|
4 | * This file is part of Jam - see jam.c for Copyright information. |
---|
5 | */ |
---|
6 | |
---|
7 | # include "jam.h" |
---|
8 | # include "lists.h" |
---|
9 | # include "parse.h" |
---|
10 | # include "scan.h" |
---|
11 | # include "jamgram.h" |
---|
12 | # include "jambase.h" |
---|
13 | # include "newstr.h" |
---|
14 | |
---|
15 | /* |
---|
16 | * scan.c - the jam yacc scanner |
---|
17 | * |
---|
18 | * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk. |
---|
19 | * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc. |
---|
20 | * Also handle tokens abutting EOF by remembering |
---|
21 | * to return EOF now matter how many times yylex() |
---|
22 | * reinvokes yyline(). |
---|
23 | * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT. |
---|
24 | * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is |
---|
25 | * defined before Linux's yacc tries to redefine it. |
---|
26 | */ |
---|
27 | |
---|
28 | struct keyword { |
---|
29 | char *word; |
---|
30 | int type; |
---|
31 | } keywords[] = { |
---|
32 | # include "jamgramtab.h" |
---|
33 | { 0, 0 } |
---|
34 | } ; |
---|
35 | |
---|
36 | struct include { |
---|
37 | struct include *next; /* next serial include file */ |
---|
38 | char *string; /* pointer into current line */ |
---|
39 | char **strings; /* for yyfparse() -- text to parse */ |
---|
40 | FILE *file; /* for yyfparse() -- file being read */ |
---|
41 | char *fname; /* for yyfparse() -- file name */ |
---|
42 | int line; /* line counter for error messages */ |
---|
43 | char buf[ 512 ]; /* for yyfparse() -- line buffer */ |
---|
44 | } ; |
---|
45 | |
---|
46 | static struct include *incp = 0; /* current file; head of chain */ |
---|
47 | |
---|
48 | static int scanmode = SCAN_NORMAL; |
---|
49 | static int anyerrors = 0; |
---|
50 | static char *symdump( YYSTYPE *s ); |
---|
51 | |
---|
52 | # define BIGGEST_TOKEN 10240 /* no single token can be larger */ |
---|
53 | |
---|
54 | /* |
---|
55 | * Set parser mode: normal, string, or keyword |
---|
56 | */ |
---|
57 | |
---|
58 | void |
---|
59 | yymode( int n ) |
---|
60 | { |
---|
61 | scanmode = n; |
---|
62 | } |
---|
63 | |
---|
64 | void |
---|
65 | yyerror( char *s ) |
---|
66 | { |
---|
67 | if( incp ) |
---|
68 | printf( "%s:%d: ", incp->fname, incp->line ); |
---|
69 | |
---|
70 | printf( "%s at %s\n", s, symdump( &yylval ) ); |
---|
71 | |
---|
72 | ++anyerrors; |
---|
73 | } |
---|
74 | |
---|
75 | int |
---|
76 | yyanyerrors() |
---|
77 | { |
---|
78 | return anyerrors != 0; |
---|
79 | } |
---|
80 | |
---|
81 | void |
---|
82 | yyfparse( char *s ) |
---|
83 | { |
---|
84 | struct include *i = (struct include *)malloc( sizeof( *i ) ); |
---|
85 | |
---|
86 | /* Push this onto the incp chain. */ |
---|
87 | |
---|
88 | i->string = ""; |
---|
89 | i->strings = 0; |
---|
90 | i->file = 0; |
---|
91 | i->fname = copystr( s ); |
---|
92 | i->line = 0; |
---|
93 | i->next = incp; |
---|
94 | incp = i; |
---|
95 | |
---|
96 | /* If the filename is "+", it means use the internal jambase. */ |
---|
97 | |
---|
98 | if( !strcmp( s, "+" ) ) |
---|
99 | i->strings = jambase; |
---|
100 | } |
---|
101 | |
---|
102 | /* |
---|
103 | * yyline() - read new line and return first character |
---|
104 | * |
---|
105 | * Fabricates a continuous stream of characters across include files, |
---|
106 | * returning EOF at the bitter end. |
---|
107 | */ |
---|
108 | |
---|
109 | int |
---|
110 | yyline() |
---|
111 | { |
---|
112 | struct include *i = incp; |
---|
113 | |
---|
114 | if( !incp ) |
---|
115 | return EOF; |
---|
116 | |
---|
117 | /* Once we start reading from the input stream, we reset the */ |
---|
118 | /* include insertion point so that the next include file becomes */ |
---|
119 | /* the head of the list. */ |
---|
120 | |
---|
121 | /* If there is more data in this line, return it. */ |
---|
122 | |
---|
123 | if( *i->string ) |
---|
124 | return *i->string++; |
---|
125 | |
---|
126 | /* If we're reading from an internal string list, go to the */ |
---|
127 | /* next string. */ |
---|
128 | |
---|
129 | if( i->strings ) |
---|
130 | { |
---|
131 | if( !*i->strings ) |
---|
132 | goto next; |
---|
133 | |
---|
134 | i->line++; |
---|
135 | i->string = *(i->strings++); |
---|
136 | return *i->string++; |
---|
137 | } |
---|
138 | |
---|
139 | /* If necessary, open the file */ |
---|
140 | |
---|
141 | if( !i->file ) |
---|
142 | { |
---|
143 | FILE *f = stdin; |
---|
144 | |
---|
145 | if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) ) |
---|
146 | perror( i->fname ); |
---|
147 | |
---|
148 | i->file = f; |
---|
149 | } |
---|
150 | |
---|
151 | /* If there's another line in this file, start it. */ |
---|
152 | |
---|
153 | if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) |
---|
154 | { |
---|
155 | i->line++; |
---|
156 | i->string = i->buf; |
---|
157 | return *i->string++; |
---|
158 | } |
---|
159 | |
---|
160 | next: |
---|
161 | /* This include is done. */ |
---|
162 | /* Free it up and return EOF so yyparse() returns to parse_file(). */ |
---|
163 | |
---|
164 | incp = i->next; |
---|
165 | |
---|
166 | /* Close file, free name */ |
---|
167 | |
---|
168 | if( i->file && i->file != stdin ) |
---|
169 | fclose( i->file ); |
---|
170 | freestr( i->fname ); |
---|
171 | free( (char *)i ); |
---|
172 | |
---|
173 | return EOF; |
---|
174 | } |
---|
175 | |
---|
176 | /* |
---|
177 | * yylex() - set yylval to current token; return its type |
---|
178 | * |
---|
179 | * Macros to move things along: |
---|
180 | * |
---|
181 | * yychar() - return and advance character; invalid after EOF |
---|
182 | * yyprev() - back up one character; invalid before yychar() |
---|
183 | * |
---|
184 | * yychar() returns a continuous stream of characters, until it hits |
---|
185 | * the EOF of the current include file. |
---|
186 | */ |
---|
187 | |
---|
188 | # define yychar() ( *incp->string ? *incp->string++ : yyline() ) |
---|
189 | # define yyprev() ( incp->string-- ) |
---|
190 | |
---|
191 | int |
---|
192 | yylex() |
---|
193 | { |
---|
194 | int c; |
---|
195 | char buf[BIGGEST_TOKEN]; |
---|
196 | char *b = buf; |
---|
197 | |
---|
198 | if( !incp ) |
---|
199 | goto eof; |
---|
200 | |
---|
201 | /* Get first character (whitespace or of token) */ |
---|
202 | |
---|
203 | c = yychar(); |
---|
204 | |
---|
205 | if( scanmode == SCAN_STRING ) |
---|
206 | { |
---|
207 | /* If scanning for a string (action's {}'s), look for the */ |
---|
208 | /* closing brace. We handle matching braces, if they match! */ |
---|
209 | |
---|
210 | int nest = 1; |
---|
211 | |
---|
212 | while( c != EOF && b < buf + sizeof( buf ) ) |
---|
213 | { |
---|
214 | if( c == '{' ) |
---|
215 | nest++; |
---|
216 | |
---|
217 | if( c == '}' && !--nest ) |
---|
218 | break; |
---|
219 | |
---|
220 | *b++ = c; |
---|
221 | |
---|
222 | c = yychar(); |
---|
223 | |
---|
224 | /* turn trailing "\r\n" sequences into plain "\n" |
---|
225 | * for Cygwin |
---|
226 | */ |
---|
227 | if (c == '\n' && b[-1] == '\r') |
---|
228 | --b; |
---|
229 | } |
---|
230 | |
---|
231 | /* We ate the ending brace -- regurgitate it. */ |
---|
232 | |
---|
233 | if( c != EOF ) |
---|
234 | yyprev(); |
---|
235 | |
---|
236 | /* Check obvious errors. */ |
---|
237 | |
---|
238 | if( b == buf + sizeof( buf ) ) |
---|
239 | { |
---|
240 | yyerror( "action block too big" ); |
---|
241 | goto eof; |
---|
242 | } |
---|
243 | |
---|
244 | if( nest ) |
---|
245 | { |
---|
246 | yyerror( "unmatched {} in action block" ); |
---|
247 | goto eof; |
---|
248 | } |
---|
249 | |
---|
250 | *b = 0; |
---|
251 | yylval.type = STRING; |
---|
252 | yylval.string = newstr( buf ); |
---|
253 | yylval.file = incp->fname; |
---|
254 | yylval.line = incp->line; |
---|
255 | |
---|
256 | } |
---|
257 | else |
---|
258 | { |
---|
259 | char *b = buf; |
---|
260 | struct keyword *k; |
---|
261 | int inquote = 0; |
---|
262 | int notkeyword; |
---|
263 | |
---|
264 | /* Eat white space */ |
---|
265 | |
---|
266 | for( ;; ) |
---|
267 | { |
---|
268 | /* Skip past white space */ |
---|
269 | |
---|
270 | while( c != EOF && isspace( c ) ) |
---|
271 | c = yychar(); |
---|
272 | |
---|
273 | /* Not a comment? Swallow up comment line. */ |
---|
274 | |
---|
275 | if( c != '#' ) |
---|
276 | break; |
---|
277 | while( ( c = yychar() ) != EOF && c != '\n' ) |
---|
278 | ; |
---|
279 | } |
---|
280 | |
---|
281 | /* c now points to the first character of a token. */ |
---|
282 | |
---|
283 | if( c == EOF ) |
---|
284 | goto eof; |
---|
285 | |
---|
286 | yylval.file = incp->fname; |
---|
287 | yylval.line = incp->line; |
---|
288 | |
---|
289 | /* While scanning the word, disqualify it for (expensive) */ |
---|
290 | /* keyword lookup when we can: $anything, "anything", \anything */ |
---|
291 | |
---|
292 | notkeyword = c == '$'; |
---|
293 | |
---|
294 | /* look for white space to delimit word */ |
---|
295 | /* "'s get stripped but preserve white space */ |
---|
296 | /* \ protects next character */ |
---|
297 | |
---|
298 | while( |
---|
299 | c != EOF && |
---|
300 | b < buf + sizeof( buf ) && |
---|
301 | ( inquote || !isspace( c ) ) ) |
---|
302 | { |
---|
303 | if( c == '"' ) |
---|
304 | { |
---|
305 | /* begin or end " */ |
---|
306 | inquote = !inquote; |
---|
307 | notkeyword = 1; |
---|
308 | } |
---|
309 | else if( c != '\\' ) |
---|
310 | { |
---|
311 | /* normal char */ |
---|
312 | *b++ = c; |
---|
313 | } |
---|
314 | else if( ( c = yychar()) != EOF ) |
---|
315 | { |
---|
316 | /* \c */ |
---|
317 | *b++ = c; |
---|
318 | notkeyword = 1; |
---|
319 | } |
---|
320 | else |
---|
321 | { |
---|
322 | /* \EOF */ |
---|
323 | break; |
---|
324 | } |
---|
325 | |
---|
326 | c = yychar(); |
---|
327 | } |
---|
328 | |
---|
329 | /* Check obvious errors. */ |
---|
330 | |
---|
331 | if( b == buf + sizeof( buf ) ) |
---|
332 | { |
---|
333 | yyerror( "string too big" ); |
---|
334 | goto eof; |
---|
335 | } |
---|
336 | |
---|
337 | if( inquote ) |
---|
338 | { |
---|
339 | yyerror( "unmatched \" in string" ); |
---|
340 | goto eof; |
---|
341 | } |
---|
342 | |
---|
343 | /* We looked ahead a character - back up. */ |
---|
344 | |
---|
345 | if( c != EOF ) |
---|
346 | yyprev(); |
---|
347 | |
---|
348 | /* scan token table */ |
---|
349 | /* don't scan if it's obviously not a keyword or if its */ |
---|
350 | /* an alphabetic when were looking for punctuation */ |
---|
351 | |
---|
352 | *b = 0; |
---|
353 | yylval.type = ARG; |
---|
354 | |
---|
355 | if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) ) |
---|
356 | { |
---|
357 | for( k = keywords; k->word; k++ ) |
---|
358 | if( *buf == *k->word && !strcmp( k->word, buf ) ) |
---|
359 | { |
---|
360 | yylval.type = k->type; |
---|
361 | yylval.string = k->word; /* used by symdump */ |
---|
362 | break; |
---|
363 | } |
---|
364 | } |
---|
365 | |
---|
366 | if( yylval.type == ARG ) |
---|
367 | yylval.string = newstr( buf ); |
---|
368 | } |
---|
369 | |
---|
370 | if( DEBUG_SCAN ) |
---|
371 | printf( "scan %s\n", symdump( &yylval ) ); |
---|
372 | |
---|
373 | return yylval.type; |
---|
374 | |
---|
375 | eof: |
---|
376 | yylval.file = "end-of-input"; /* just in case */ |
---|
377 | yylval.line = 0; |
---|
378 | |
---|
379 | yylval.type = EOF; |
---|
380 | return yylval.type; |
---|
381 | } |
---|
382 | |
---|
383 | static char * |
---|
384 | symdump( YYSTYPE *s ) |
---|
385 | { |
---|
386 | static char buf[ BIGGEST_TOKEN + 20 ]; |
---|
387 | |
---|
388 | switch( s->type ) |
---|
389 | { |
---|
390 | case EOF: |
---|
391 | sprintf( buf, "EOF" ); |
---|
392 | break; |
---|
393 | case 0: |
---|
394 | sprintf( buf, "unknown symbol %s", s->string ); |
---|
395 | break; |
---|
396 | case ARG: |
---|
397 | sprintf( buf, "argument %s", s->string ); |
---|
398 | break; |
---|
399 | case STRING: |
---|
400 | sprintf( buf, "string \"%s\"", s->string ); |
---|
401 | break; |
---|
402 | default: |
---|
403 | sprintf( buf, "keyword %s", s->string ); |
---|
404 | break; |
---|
405 | } |
---|
406 | return buf; |
---|
407 | } |
---|
408 | |
---|
409 | /* Get information about the current file and line, for those epsilon |
---|
410 | * transitions that produce a parse |
---|
411 | */ |
---|
412 | void yyinput_stream( char** name, int* line ) |
---|
413 | { |
---|
414 | if (incp) |
---|
415 | { |
---|
416 | *name = incp->fname; |
---|
417 | *line = incp->line; |
---|
418 | } |
---|
419 | else |
---|
420 | { |
---|
421 | *name = "(builtin)"; |
---|
422 | *line = -1; |
---|
423 | } |
---|
424 | } |
---|