1 | /* Copyright 2002 Rene Rivera. |
---|
2 | ** Distributed under the Boost Software License, Version 1.0. |
---|
3 | ** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
---|
4 | */ |
---|
5 | |
---|
6 | #include <stdio.h> |
---|
7 | #include <string.h> |
---|
8 | #include <ctype.h> |
---|
9 | #include <stdlib.h> |
---|
10 | |
---|
11 | /* |
---|
12 | # yyacc - yacc wrapper |
---|
13 | # |
---|
14 | # Allows tokens to be written as `literal` and then automatically |
---|
15 | # substituted with #defined tokens. |
---|
16 | # |
---|
17 | # Usage: |
---|
18 | # yyacc file.y filetab.h file.yy |
---|
19 | # |
---|
20 | # inputs: |
---|
21 | # file.yy yacc grammar with ` literals |
---|
22 | # |
---|
23 | # outputs: |
---|
24 | # file.y yacc grammar |
---|
25 | # filetab.h array of string <-> token mappings |
---|
26 | # |
---|
27 | # 3-13-93 |
---|
28 | # Documented and p moved in sed command (for some reason, |
---|
29 | # s/x/y/p doesn't work). |
---|
30 | # 10-12-93 |
---|
31 | # Take basename as second argument. |
---|
32 | # 12-31-96 |
---|
33 | # reversed order of args to be compatible with GenFile rule |
---|
34 | # 11-20-2002 |
---|
35 | # Reimplemented as a C program for portability. (Rene Rivera) |
---|
36 | */ |
---|
37 | |
---|
38 | void print_usage(); |
---|
39 | char * copy_string(char * s, int l); |
---|
40 | char * tokenize_string(char * s); |
---|
41 | int cmp_literal(const void * a, const void * b); |
---|
42 | |
---|
43 | typedef struct |
---|
44 | { |
---|
45 | char * string; |
---|
46 | char * token; |
---|
47 | } literal; |
---|
48 | |
---|
49 | int main(int argc, char ** argv) |
---|
50 | { |
---|
51 | int result = 0; |
---|
52 | if (argc != 4) |
---|
53 | { |
---|
54 | print_usage(); |
---|
55 | result = 1; |
---|
56 | } |
---|
57 | else |
---|
58 | { |
---|
59 | FILE * token_output_f = 0; |
---|
60 | FILE * grammar_output_f = 0; |
---|
61 | FILE * grammar_source_f = 0; |
---|
62 | |
---|
63 | grammar_source_f = fopen(argv[3],"r"); |
---|
64 | if (grammar_source_f == 0) { result = 1; } |
---|
65 | if (result == 0) |
---|
66 | { |
---|
67 | literal literals[1024]; |
---|
68 | int t = 0; |
---|
69 | char l[2048]; |
---|
70 | while (1) |
---|
71 | { |
---|
72 | if (fgets(l,2048,grammar_source_f) != 0) |
---|
73 | { |
---|
74 | char * c = l; |
---|
75 | while (1) |
---|
76 | { |
---|
77 | char * c1 = strchr(c,'`'); |
---|
78 | if (c1 != 0) |
---|
79 | { |
---|
80 | char * c2 = strchr(c1+1,'`'); |
---|
81 | if (c2 != 0) |
---|
82 | { |
---|
83 | literals[t].string = copy_string(c1+1,c2-c1-1); |
---|
84 | literals[t].token = tokenize_string(literals[t].string); |
---|
85 | t += 1; |
---|
86 | c = c2+1; |
---|
87 | } |
---|
88 | else |
---|
89 | break; |
---|
90 | } |
---|
91 | else |
---|
92 | break; |
---|
93 | } |
---|
94 | } |
---|
95 | else |
---|
96 | { |
---|
97 | break; |
---|
98 | } |
---|
99 | } |
---|
100 | literals[t].string = 0; |
---|
101 | literals[t].token = 0; |
---|
102 | qsort(literals,t,sizeof(literal),cmp_literal); |
---|
103 | { |
---|
104 | int p = 1; |
---|
105 | int i = 1; |
---|
106 | while (literals[i].string != 0) |
---|
107 | { |
---|
108 | if (strcmp(literals[p-1].string,literals[i].string) != 0) |
---|
109 | { |
---|
110 | literals[p] = literals[i]; |
---|
111 | p += 1; |
---|
112 | } |
---|
113 | i += 1; |
---|
114 | } |
---|
115 | literals[p].string = 0; |
---|
116 | literals[p].token = 0; |
---|
117 | t = p; |
---|
118 | } |
---|
119 | token_output_f = fopen(argv[2],"w"); |
---|
120 | if (token_output_f != 0) |
---|
121 | { |
---|
122 | int i = 0; |
---|
123 | while (literals[i].string != 0) |
---|
124 | { |
---|
125 | fprintf(token_output_f," { \"%s\", %s },\n",literals[i].string,literals[i].token); |
---|
126 | i += 1; |
---|
127 | } |
---|
128 | fclose(token_output_f); |
---|
129 | } |
---|
130 | else |
---|
131 | result = 1; |
---|
132 | if (result == 0) |
---|
133 | { |
---|
134 | grammar_output_f = fopen(argv[1],"w"); |
---|
135 | if (grammar_output_f != 0) |
---|
136 | { |
---|
137 | int i = 0; |
---|
138 | while (literals[i].string != 0) |
---|
139 | { |
---|
140 | fprintf(grammar_output_f,"%%token %s\n",literals[i].token); |
---|
141 | i += 1; |
---|
142 | } |
---|
143 | rewind(grammar_source_f); |
---|
144 | while (1) |
---|
145 | { |
---|
146 | if (fgets(l,2048,grammar_source_f) != 0) |
---|
147 | { |
---|
148 | char * c = l; |
---|
149 | while (1) |
---|
150 | { |
---|
151 | char * c1 = strchr(c,'`'); |
---|
152 | if (c1 != 0) |
---|
153 | { |
---|
154 | char * c2 = strchr(c1+1,'`'); |
---|
155 | if (c2 != 0) |
---|
156 | { |
---|
157 | literal key; |
---|
158 | literal * replacement = 0; |
---|
159 | key.string = copy_string(c1+1,c2-c1-1); |
---|
160 | key.token = 0; |
---|
161 | replacement = (literal*)bsearch( |
---|
162 | &key,literals,t,sizeof(literal),cmp_literal); |
---|
163 | *c1 = 0; |
---|
164 | fprintf(grammar_output_f,"%s%s",c,replacement->token); |
---|
165 | c = c2+1; |
---|
166 | } |
---|
167 | else |
---|
168 | { |
---|
169 | fprintf(grammar_output_f,"%s",c); |
---|
170 | break; |
---|
171 | } |
---|
172 | } |
---|
173 | else |
---|
174 | { |
---|
175 | fprintf(grammar_output_f,"%s",c); |
---|
176 | break; |
---|
177 | } |
---|
178 | } |
---|
179 | } |
---|
180 | else |
---|
181 | { |
---|
182 | break; |
---|
183 | } |
---|
184 | } |
---|
185 | fclose(grammar_output_f); |
---|
186 | } |
---|
187 | else |
---|
188 | result = 1; |
---|
189 | } |
---|
190 | } |
---|
191 | if (result != 0) |
---|
192 | { |
---|
193 | perror("yyacc"); |
---|
194 | } |
---|
195 | } |
---|
196 | return result; |
---|
197 | } |
---|
198 | |
---|
199 | static char * usage[] = { |
---|
200 | "yyacc <grammar output.y> <token table output.h> <grammar source.yy>", |
---|
201 | 0 }; |
---|
202 | |
---|
203 | void print_usage() |
---|
204 | { |
---|
205 | char ** u; |
---|
206 | for (u = usage; *u != 0; ++u) |
---|
207 | { |
---|
208 | fputs(*u,stderr); putc('\n',stderr); |
---|
209 | } |
---|
210 | } |
---|
211 | |
---|
212 | char * copy_string(char * s, int l) |
---|
213 | { |
---|
214 | char * result = (char*)malloc(l+1); |
---|
215 | strncpy(result,s,l); |
---|
216 | result[l] = 0; |
---|
217 | return result; |
---|
218 | } |
---|
219 | |
---|
220 | char * tokenize_string(char * s) |
---|
221 | { |
---|
222 | char * result; |
---|
223 | char * literal = s; |
---|
224 | int l; |
---|
225 | int c; |
---|
226 | |
---|
227 | if (strcmp(s,":") == 0) literal = "_colon"; |
---|
228 | else if (strcmp(s,"!") == 0) literal = "_bang"; |
---|
229 | else if (strcmp(s,"!=") == 0) literal = "_bang_equals"; |
---|
230 | else if (strcmp(s,"&&") == 0) literal = "_amperamper"; |
---|
231 | else if (strcmp(s,"&") == 0) literal = "_amper"; |
---|
232 | else if (strcmp(s,"+") == 0) literal = "_plus"; |
---|
233 | else if (strcmp(s,"+=") == 0) literal = "_plus_equals"; |
---|
234 | else if (strcmp(s,"||") == 0) literal = "_barbar"; |
---|
235 | else if (strcmp(s,"|") == 0) literal = "_bar"; |
---|
236 | else if (strcmp(s,";") == 0) literal = "_semic"; |
---|
237 | else if (strcmp(s,"-") == 0) literal = "_minus"; |
---|
238 | else if (strcmp(s,"<") == 0) literal = "_langle"; |
---|
239 | else if (strcmp(s,"<=") == 0) literal = "_langle_equals"; |
---|
240 | else if (strcmp(s,">") == 0) literal = "_rangle"; |
---|
241 | else if (strcmp(s,">=") == 0) literal = "_rangle_equals"; |
---|
242 | else if (strcmp(s,".") == 0) literal = "_period"; |
---|
243 | else if (strcmp(s,"?") == 0) literal = "_question"; |
---|
244 | else if (strcmp(s,"?=") == 0) literal = "_question_equals"; |
---|
245 | else if (strcmp(s,"=") == 0) literal = "_equals"; |
---|
246 | else if (strcmp(s,",") == 0) literal = "_comma"; |
---|
247 | else if (strcmp(s,"[") == 0) literal = "_lbracket"; |
---|
248 | else if (strcmp(s,"]") == 0) literal = "_rbracket"; |
---|
249 | else if (strcmp(s,"{") == 0) literal = "_lbrace"; |
---|
250 | else if (strcmp(s,"}") == 0) literal = "_rbrace"; |
---|
251 | else if (strcmp(s,"(") == 0) literal = "_lparen"; |
---|
252 | else if (strcmp(s,")") == 0) literal = "_rparen"; |
---|
253 | l = strlen(literal)+2; |
---|
254 | result = (char*)malloc(l+1); |
---|
255 | for (c = 0; literal[c] != 0; ++c) |
---|
256 | { |
---|
257 | result[c] = toupper(literal[c]); |
---|
258 | } |
---|
259 | result[l-2] = '_'; |
---|
260 | result[l-1] = 't'; |
---|
261 | result[l] = 0; |
---|
262 | return result; |
---|
263 | } |
---|
264 | |
---|
265 | int cmp_literal(const void * a, const void * b) |
---|
266 | { |
---|
267 | return strcmp(((const literal *)a)->string,((const literal *)b)->string); |
---|
268 | } |
---|