1 | /* |
---|
2 | * tclScan.c -- |
---|
3 | * |
---|
4 | * This file contains the implementation of the "scan" command. |
---|
5 | * |
---|
6 | * Copyright (c) 1998 by Scriptics Corporation. |
---|
7 | * |
---|
8 | * See the file "license.terms" for information on usage and redistribution of |
---|
9 | * this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
10 | * |
---|
11 | * RCS: @(#) $Id: tclScan.c,v 1.27 2007/12/13 15:23:20 dgp Exp $ |
---|
12 | */ |
---|
13 | |
---|
14 | #include "tclInt.h" |
---|
15 | |
---|
16 | /* |
---|
17 | * Flag values used by Tcl_ScanObjCmd. |
---|
18 | */ |
---|
19 | |
---|
20 | #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */ |
---|
21 | #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */ |
---|
22 | #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */ |
---|
23 | #define SCAN_WIDTH 0x8 /* A width value was supplied. */ |
---|
24 | |
---|
25 | #define SCAN_LONGER 0x400 /* Asked for a wide value. */ |
---|
26 | #define SCAN_BIG 0x800 /* Asked for a bignum value. */ |
---|
27 | |
---|
28 | /* |
---|
29 | * The following structure contains the information associated with a |
---|
30 | * character set. |
---|
31 | */ |
---|
32 | |
---|
33 | typedef struct CharSet { |
---|
34 | int exclude; /* 1 if this is an exclusion set. */ |
---|
35 | int nchars; |
---|
36 | Tcl_UniChar *chars; |
---|
37 | int nranges; |
---|
38 | struct Range { |
---|
39 | Tcl_UniChar start; |
---|
40 | Tcl_UniChar end; |
---|
41 | } *ranges; |
---|
42 | } CharSet; |
---|
43 | |
---|
44 | /* |
---|
45 | * Declarations for functions used only in this file. |
---|
46 | */ |
---|
47 | |
---|
48 | static char * BuildCharSet(CharSet *cset, char *format); |
---|
49 | static int CharInSet(CharSet *cset, int ch); |
---|
50 | static void ReleaseCharSet(CharSet *cset); |
---|
51 | static int ValidateFormat(Tcl_Interp *interp, char *format, |
---|
52 | int numVars, int *totalVars); |
---|
53 | |
---|
54 | /* |
---|
55 | *---------------------------------------------------------------------- |
---|
56 | * |
---|
57 | * BuildCharSet -- |
---|
58 | * |
---|
59 | * This function examines a character set format specification and builds |
---|
60 | * a CharSet containing the individual characters and character ranges |
---|
61 | * specified. |
---|
62 | * |
---|
63 | * Results: |
---|
64 | * Returns the next format position. |
---|
65 | * |
---|
66 | * Side effects: |
---|
67 | * Initializes the charset. |
---|
68 | * |
---|
69 | *---------------------------------------------------------------------- |
---|
70 | */ |
---|
71 | |
---|
72 | static char * |
---|
73 | BuildCharSet( |
---|
74 | CharSet *cset, |
---|
75 | char *format) /* Points to first char of set. */ |
---|
76 | { |
---|
77 | Tcl_UniChar ch, start; |
---|
78 | int offset, nranges; |
---|
79 | char *end; |
---|
80 | |
---|
81 | memset(cset, 0, sizeof(CharSet)); |
---|
82 | |
---|
83 | offset = Tcl_UtfToUniChar(format, &ch); |
---|
84 | if (ch == '^') { |
---|
85 | cset->exclude = 1; |
---|
86 | format += offset; |
---|
87 | offset = Tcl_UtfToUniChar(format, &ch); |
---|
88 | } |
---|
89 | end = format + offset; |
---|
90 | |
---|
91 | /* |
---|
92 | * Find the close bracket so we can overallocate the set. |
---|
93 | */ |
---|
94 | |
---|
95 | if (ch == ']') { |
---|
96 | end += Tcl_UtfToUniChar(end, &ch); |
---|
97 | } |
---|
98 | nranges = 0; |
---|
99 | while (ch != ']') { |
---|
100 | if (ch == '-') { |
---|
101 | nranges++; |
---|
102 | } |
---|
103 | end += Tcl_UtfToUniChar(end, &ch); |
---|
104 | } |
---|
105 | |
---|
106 | cset->chars = (Tcl_UniChar *) |
---|
107 | ckalloc(sizeof(Tcl_UniChar) * (end - format - 1)); |
---|
108 | if (nranges > 0) { |
---|
109 | cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges); |
---|
110 | } else { |
---|
111 | cset->ranges = NULL; |
---|
112 | } |
---|
113 | |
---|
114 | /* |
---|
115 | * Now build the character set. |
---|
116 | */ |
---|
117 | |
---|
118 | cset->nchars = cset->nranges = 0; |
---|
119 | format += Tcl_UtfToUniChar(format, &ch); |
---|
120 | start = ch; |
---|
121 | if (ch == ']' || ch == '-') { |
---|
122 | cset->chars[cset->nchars++] = ch; |
---|
123 | format += Tcl_UtfToUniChar(format, &ch); |
---|
124 | } |
---|
125 | while (ch != ']') { |
---|
126 | if (*format == '-') { |
---|
127 | /* |
---|
128 | * This may be the first character of a range, so don't add it |
---|
129 | * yet. |
---|
130 | */ |
---|
131 | |
---|
132 | start = ch; |
---|
133 | } else if (ch == '-') { |
---|
134 | /* |
---|
135 | * Check to see if this is the last character in the set, in which |
---|
136 | * case it is not a range and we should add the previous character |
---|
137 | * as well as the dash. |
---|
138 | */ |
---|
139 | |
---|
140 | if (*format == ']') { |
---|
141 | cset->chars[cset->nchars++] = start; |
---|
142 | cset->chars[cset->nchars++] = ch; |
---|
143 | } else { |
---|
144 | format += Tcl_UtfToUniChar(format, &ch); |
---|
145 | |
---|
146 | /* |
---|
147 | * Check to see if the range is in reverse order. |
---|
148 | */ |
---|
149 | |
---|
150 | if (start < ch) { |
---|
151 | cset->ranges[cset->nranges].start = start; |
---|
152 | cset->ranges[cset->nranges].end = ch; |
---|
153 | } else { |
---|
154 | cset->ranges[cset->nranges].start = ch; |
---|
155 | cset->ranges[cset->nranges].end = start; |
---|
156 | } |
---|
157 | cset->nranges++; |
---|
158 | } |
---|
159 | } else { |
---|
160 | cset->chars[cset->nchars++] = ch; |
---|
161 | } |
---|
162 | format += Tcl_UtfToUniChar(format, &ch); |
---|
163 | } |
---|
164 | return format; |
---|
165 | } |
---|
166 | |
---|
167 | /* |
---|
168 | *---------------------------------------------------------------------- |
---|
169 | * |
---|
170 | * CharInSet -- |
---|
171 | * |
---|
172 | * Check to see if a character matches the given set. |
---|
173 | * |
---|
174 | * Results: |
---|
175 | * Returns non-zero if the character matches the given set. |
---|
176 | * |
---|
177 | * Side effects: |
---|
178 | * None. |
---|
179 | * |
---|
180 | *---------------------------------------------------------------------- |
---|
181 | */ |
---|
182 | |
---|
183 | static int |
---|
184 | CharInSet( |
---|
185 | CharSet *cset, |
---|
186 | int c) /* Character to test, passed as int because of |
---|
187 | * non-ANSI prototypes. */ |
---|
188 | { |
---|
189 | Tcl_UniChar ch = (Tcl_UniChar) c; |
---|
190 | int i, match = 0; |
---|
191 | |
---|
192 | for (i = 0; i < cset->nchars; i++) { |
---|
193 | if (cset->chars[i] == ch) { |
---|
194 | match = 1; |
---|
195 | break; |
---|
196 | } |
---|
197 | } |
---|
198 | if (!match) { |
---|
199 | for (i = 0; i < cset->nranges; i++) { |
---|
200 | if ((cset->ranges[i].start <= ch) && (ch <= cset->ranges[i].end)) { |
---|
201 | match = 1; |
---|
202 | break; |
---|
203 | } |
---|
204 | } |
---|
205 | } |
---|
206 | return (cset->exclude ? !match : match); |
---|
207 | } |
---|
208 | |
---|
209 | /* |
---|
210 | *---------------------------------------------------------------------- |
---|
211 | * |
---|
212 | * ReleaseCharSet -- |
---|
213 | * |
---|
214 | * Free the storage associated with a character set. |
---|
215 | * |
---|
216 | * Results: |
---|
217 | * None. |
---|
218 | * |
---|
219 | * Side effects: |
---|
220 | * None. |
---|
221 | * |
---|
222 | *---------------------------------------------------------------------- |
---|
223 | */ |
---|
224 | |
---|
225 | static void |
---|
226 | ReleaseCharSet( |
---|
227 | CharSet *cset) |
---|
228 | { |
---|
229 | ckfree((char *)cset->chars); |
---|
230 | if (cset->ranges) { |
---|
231 | ckfree((char *)cset->ranges); |
---|
232 | } |
---|
233 | } |
---|
234 | |
---|
235 | /* |
---|
236 | *---------------------------------------------------------------------- |
---|
237 | * |
---|
238 | * ValidateFormat -- |
---|
239 | * |
---|
240 | * Parse the format string and verify that it is properly formed and that |
---|
241 | * there are exactly enough variables on the command line. |
---|
242 | * |
---|
243 | * Results: |
---|
244 | * A standard Tcl result. |
---|
245 | * |
---|
246 | * Side effects: |
---|
247 | * May place an error in the interpreter result. |
---|
248 | * |
---|
249 | *---------------------------------------------------------------------- |
---|
250 | */ |
---|
251 | |
---|
252 | static int |
---|
253 | ValidateFormat( |
---|
254 | Tcl_Interp *interp, /* Current interpreter. */ |
---|
255 | char *format, /* The format string. */ |
---|
256 | int numVars, /* The number of variables passed to the scan |
---|
257 | * command. */ |
---|
258 | int *totalSubs) /* The number of variables that will be |
---|
259 | * required. */ |
---|
260 | { |
---|
261 | int gotXpg, gotSequential, value, i, flags; |
---|
262 | char *end; |
---|
263 | Tcl_UniChar ch; |
---|
264 | int objIndex, xpgSize, nspace = numVars; |
---|
265 | int *nassign = (int *) TclStackAlloc(interp, nspace * sizeof(int)); |
---|
266 | char buf[TCL_UTF_MAX+1]; |
---|
267 | |
---|
268 | /* |
---|
269 | * Initialize an array that records the number of times a variable is |
---|
270 | * assigned to by the format string. We use this to detect if a variable |
---|
271 | * is multiply assigned or left unassigned. |
---|
272 | */ |
---|
273 | |
---|
274 | for (i = 0; i < nspace; i++) { |
---|
275 | nassign[i] = 0; |
---|
276 | } |
---|
277 | |
---|
278 | xpgSize = objIndex = gotXpg = gotSequential = 0; |
---|
279 | |
---|
280 | while (*format != '\0') { |
---|
281 | format += Tcl_UtfToUniChar(format, &ch); |
---|
282 | |
---|
283 | flags = 0; |
---|
284 | |
---|
285 | if (ch != '%') { |
---|
286 | continue; |
---|
287 | } |
---|
288 | format += Tcl_UtfToUniChar(format, &ch); |
---|
289 | if (ch == '%') { |
---|
290 | continue; |
---|
291 | } |
---|
292 | if (ch == '*') { |
---|
293 | flags |= SCAN_SUPPRESS; |
---|
294 | format += Tcl_UtfToUniChar(format, &ch); |
---|
295 | goto xpgCheckDone; |
---|
296 | } |
---|
297 | |
---|
298 | if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
---|
299 | /* |
---|
300 | * Check for an XPG3-style %n$ specification. Note: there must |
---|
301 | * not be a mixture of XPG3 specs and non-XPG3 specs in the same |
---|
302 | * format string. |
---|
303 | */ |
---|
304 | |
---|
305 | value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */ |
---|
306 | if (*end != '$') { |
---|
307 | goto notXpg; |
---|
308 | } |
---|
309 | format = end+1; |
---|
310 | format += Tcl_UtfToUniChar(format, &ch); |
---|
311 | gotXpg = 1; |
---|
312 | if (gotSequential) { |
---|
313 | goto mixedXPG; |
---|
314 | } |
---|
315 | objIndex = value - 1; |
---|
316 | if ((objIndex < 0) || (numVars && (objIndex >= numVars))) { |
---|
317 | goto badIndex; |
---|
318 | } else if (numVars == 0) { |
---|
319 | /* |
---|
320 | * In the case where no vars are specified, the user can |
---|
321 | * specify %9999$ legally, so we have to consider special |
---|
322 | * rules for growing the assign array. 'value' is guaranteed |
---|
323 | * to be > 0. |
---|
324 | */ |
---|
325 | xpgSize = (xpgSize > value) ? xpgSize : value; |
---|
326 | } |
---|
327 | goto xpgCheckDone; |
---|
328 | } |
---|
329 | |
---|
330 | notXpg: |
---|
331 | gotSequential = 1; |
---|
332 | if (gotXpg) { |
---|
333 | mixedXPG: |
---|
334 | Tcl_SetResult(interp, |
---|
335 | "cannot mix \"%\" and \"%n$\" conversion specifiers", |
---|
336 | TCL_STATIC); |
---|
337 | goto error; |
---|
338 | } |
---|
339 | |
---|
340 | xpgCheckDone: |
---|
341 | /* |
---|
342 | * Parse any width specifier. |
---|
343 | */ |
---|
344 | |
---|
345 | if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
---|
346 | value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */ |
---|
347 | flags |= SCAN_WIDTH; |
---|
348 | format += Tcl_UtfToUniChar(format, &ch); |
---|
349 | } |
---|
350 | |
---|
351 | /* |
---|
352 | * Handle any size specifier. |
---|
353 | */ |
---|
354 | |
---|
355 | switch (ch) { |
---|
356 | case 'l': |
---|
357 | if (*format == 'l') { |
---|
358 | flags |= SCAN_BIG; |
---|
359 | format += 1; |
---|
360 | format += Tcl_UtfToUniChar(format, &ch); |
---|
361 | break; |
---|
362 | } |
---|
363 | case 'L': |
---|
364 | flags |= SCAN_LONGER; |
---|
365 | case 'h': |
---|
366 | format += Tcl_UtfToUniChar(format, &ch); |
---|
367 | } |
---|
368 | |
---|
369 | if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { |
---|
370 | goto badIndex; |
---|
371 | } |
---|
372 | |
---|
373 | /* |
---|
374 | * Handle the various field types. |
---|
375 | */ |
---|
376 | |
---|
377 | switch (ch) { |
---|
378 | case 'c': |
---|
379 | if (flags & SCAN_WIDTH) { |
---|
380 | Tcl_SetResult(interp, |
---|
381 | "field width may not be specified in %c conversion", |
---|
382 | TCL_STATIC); |
---|
383 | goto error; |
---|
384 | } |
---|
385 | /* |
---|
386 | * Fall through! |
---|
387 | */ |
---|
388 | case 'n': |
---|
389 | case 's': |
---|
390 | if (flags & (SCAN_LONGER|SCAN_BIG)) { |
---|
391 | invalidFieldSize: |
---|
392 | buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; |
---|
393 | Tcl_AppendResult(interp, |
---|
394 | "field size modifier may not be specified in %", buf, |
---|
395 | " conversion", NULL); |
---|
396 | goto error; |
---|
397 | } |
---|
398 | /* |
---|
399 | * Fall through! |
---|
400 | */ |
---|
401 | case 'd': |
---|
402 | case 'e': |
---|
403 | case 'f': |
---|
404 | case 'g': |
---|
405 | case 'i': |
---|
406 | case 'o': |
---|
407 | case 'x': |
---|
408 | break; |
---|
409 | case 'u': |
---|
410 | if (flags & SCAN_BIG) { |
---|
411 | Tcl_SetResult(interp, |
---|
412 | "unsigned bignum scans are invalid", TCL_STATIC); |
---|
413 | goto error; |
---|
414 | } |
---|
415 | break; |
---|
416 | /* |
---|
417 | * Bracket terms need special checking |
---|
418 | */ |
---|
419 | case '[': |
---|
420 | if (flags & (SCAN_LONGER|SCAN_BIG)) { |
---|
421 | goto invalidFieldSize; |
---|
422 | } |
---|
423 | if (*format == '\0') { |
---|
424 | goto badSet; |
---|
425 | } |
---|
426 | format += Tcl_UtfToUniChar(format, &ch); |
---|
427 | if (ch == '^') { |
---|
428 | if (*format == '\0') { |
---|
429 | goto badSet; |
---|
430 | } |
---|
431 | format += Tcl_UtfToUniChar(format, &ch); |
---|
432 | } |
---|
433 | if (ch == ']') { |
---|
434 | if (*format == '\0') { |
---|
435 | goto badSet; |
---|
436 | } |
---|
437 | format += Tcl_UtfToUniChar(format, &ch); |
---|
438 | } |
---|
439 | while (ch != ']') { |
---|
440 | if (*format == '\0') { |
---|
441 | goto badSet; |
---|
442 | } |
---|
443 | format += Tcl_UtfToUniChar(format, &ch); |
---|
444 | } |
---|
445 | break; |
---|
446 | badSet: |
---|
447 | Tcl_SetResult(interp, "unmatched [ in format string", |
---|
448 | TCL_STATIC); |
---|
449 | goto error; |
---|
450 | default: |
---|
451 | { |
---|
452 | char buf[TCL_UTF_MAX+1]; |
---|
453 | |
---|
454 | buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; |
---|
455 | Tcl_AppendResult(interp, "bad scan conversion character \"", |
---|
456 | buf, "\"", NULL); |
---|
457 | goto error; |
---|
458 | } |
---|
459 | } |
---|
460 | if (!(flags & SCAN_SUPPRESS)) { |
---|
461 | if (objIndex >= nspace) { |
---|
462 | /* |
---|
463 | * Expand the nassign buffer. If we are using XPG specifiers, |
---|
464 | * make sure that we grow to a large enough size. xpgSize is |
---|
465 | * guaranteed to be at least one larger than objIndex. |
---|
466 | */ |
---|
467 | |
---|
468 | value = nspace; |
---|
469 | if (xpgSize) { |
---|
470 | nspace = xpgSize; |
---|
471 | } else { |
---|
472 | nspace += 16; /* formerly STATIC_LIST_SIZE */ |
---|
473 | } |
---|
474 | nassign = (int *) TclStackRealloc(interp, nassign, |
---|
475 | nspace * sizeof(int)); |
---|
476 | for (i = value; i < nspace; i++) { |
---|
477 | nassign[i] = 0; |
---|
478 | } |
---|
479 | } |
---|
480 | nassign[objIndex]++; |
---|
481 | objIndex++; |
---|
482 | } |
---|
483 | } |
---|
484 | |
---|
485 | /* |
---|
486 | * Verify that all of the variable were assigned exactly once. |
---|
487 | */ |
---|
488 | |
---|
489 | if (numVars == 0) { |
---|
490 | if (xpgSize) { |
---|
491 | numVars = xpgSize; |
---|
492 | } else { |
---|
493 | numVars = objIndex; |
---|
494 | } |
---|
495 | } |
---|
496 | if (totalSubs) { |
---|
497 | *totalSubs = numVars; |
---|
498 | } |
---|
499 | for (i = 0; i < numVars; i++) { |
---|
500 | if (nassign[i] > 1) { |
---|
501 | Tcl_SetResult(interp, |
---|
502 | "variable is assigned by multiple \"%n$\" conversion specifiers", |
---|
503 | TCL_STATIC); |
---|
504 | goto error; |
---|
505 | } else if (!xpgSize && (nassign[i] == 0)) { |
---|
506 | /* |
---|
507 | * If the space is empty, and xpgSize is 0 (means XPG wasn't used, |
---|
508 | * and/or numVars != 0), then too many vars were given |
---|
509 | */ |
---|
510 | |
---|
511 | Tcl_SetResult(interp, |
---|
512 | "variable is not assigned by any conversion specifiers", |
---|
513 | TCL_STATIC); |
---|
514 | goto error; |
---|
515 | } |
---|
516 | } |
---|
517 | |
---|
518 | TclStackFree(interp, nassign); |
---|
519 | return TCL_OK; |
---|
520 | |
---|
521 | badIndex: |
---|
522 | if (gotXpg) { |
---|
523 | Tcl_SetResult(interp, "\"%n$\" argument index out of range", |
---|
524 | TCL_STATIC); |
---|
525 | } else { |
---|
526 | Tcl_SetResult(interp, |
---|
527 | "different numbers of variable names and field specifiers", |
---|
528 | TCL_STATIC); |
---|
529 | } |
---|
530 | |
---|
531 | error: |
---|
532 | TclStackFree(interp, nassign); |
---|
533 | return TCL_ERROR; |
---|
534 | } |
---|
535 | |
---|
536 | /* |
---|
537 | *---------------------------------------------------------------------- |
---|
538 | * |
---|
539 | * Tcl_ScanObjCmd -- |
---|
540 | * |
---|
541 | * This function is invoked to process the "scan" Tcl command. See the |
---|
542 | * user documentation for details on what it does. |
---|
543 | * |
---|
544 | * Results: |
---|
545 | * A standard Tcl result. |
---|
546 | * |
---|
547 | * Side effects: |
---|
548 | * See the user documentation. |
---|
549 | * |
---|
550 | *---------------------------------------------------------------------- |
---|
551 | */ |
---|
552 | |
---|
553 | /* ARGSUSED */ |
---|
554 | int |
---|
555 | Tcl_ScanObjCmd( |
---|
556 | ClientData dummy, /* Not used. */ |
---|
557 | Tcl_Interp *interp, /* Current interpreter. */ |
---|
558 | int objc, /* Number of arguments. */ |
---|
559 | Tcl_Obj *CONST objv[]) /* Argument objects. */ |
---|
560 | { |
---|
561 | char *format; |
---|
562 | int numVars, nconversions, totalVars = -1; |
---|
563 | int objIndex, offset, i, result, code; |
---|
564 | long value; |
---|
565 | CONST char *string, *end, *baseString; |
---|
566 | char op = 0; |
---|
567 | int width, underflow = 0; |
---|
568 | Tcl_WideInt wideValue; |
---|
569 | Tcl_UniChar ch, sch; |
---|
570 | Tcl_Obj **objs = NULL, *objPtr = NULL; |
---|
571 | int flags; |
---|
572 | char buf[513]; /* Temporary buffer to hold scanned number |
---|
573 | * strings before they are passed to |
---|
574 | * strtoul. */ |
---|
575 | |
---|
576 | if (objc < 3) { |
---|
577 | Tcl_WrongNumArgs(interp, 1, objv, |
---|
578 | "string format ?varName varName ...?"); |
---|
579 | return TCL_ERROR; |
---|
580 | } |
---|
581 | |
---|
582 | format = Tcl_GetStringFromObj(objv[2], NULL); |
---|
583 | numVars = objc-3; |
---|
584 | |
---|
585 | /* |
---|
586 | * Check for errors in the format string. |
---|
587 | */ |
---|
588 | |
---|
589 | if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) { |
---|
590 | return TCL_ERROR; |
---|
591 | } |
---|
592 | |
---|
593 | /* |
---|
594 | * Allocate space for the result objects. |
---|
595 | */ |
---|
596 | |
---|
597 | if (totalVars > 0) { |
---|
598 | objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars); |
---|
599 | for (i = 0; i < totalVars; i++) { |
---|
600 | objs[i] = NULL; |
---|
601 | } |
---|
602 | } |
---|
603 | |
---|
604 | string = Tcl_GetStringFromObj(objv[1], NULL); |
---|
605 | baseString = string; |
---|
606 | |
---|
607 | /* |
---|
608 | * Iterate over the format string filling in the result objects until we |
---|
609 | * reach the end of input, the end of the format string, or there is a |
---|
610 | * mismatch. |
---|
611 | */ |
---|
612 | |
---|
613 | objIndex = 0; |
---|
614 | nconversions = 0; |
---|
615 | while (*format != '\0') { |
---|
616 | int parseFlag = TCL_PARSE_NO_WHITESPACE; |
---|
617 | format += Tcl_UtfToUniChar(format, &ch); |
---|
618 | |
---|
619 | flags = 0; |
---|
620 | |
---|
621 | /* |
---|
622 | * If we see whitespace in the format, skip whitespace in the string. |
---|
623 | */ |
---|
624 | |
---|
625 | if (Tcl_UniCharIsSpace(ch)) { |
---|
626 | offset = Tcl_UtfToUniChar(string, &sch); |
---|
627 | while (Tcl_UniCharIsSpace(sch)) { |
---|
628 | if (*string == '\0') { |
---|
629 | goto done; |
---|
630 | } |
---|
631 | string += offset; |
---|
632 | offset = Tcl_UtfToUniChar(string, &sch); |
---|
633 | } |
---|
634 | continue; |
---|
635 | } |
---|
636 | |
---|
637 | if (ch != '%') { |
---|
638 | literal: |
---|
639 | if (*string == '\0') { |
---|
640 | underflow = 1; |
---|
641 | goto done; |
---|
642 | } |
---|
643 | string += Tcl_UtfToUniChar(string, &sch); |
---|
644 | if (ch != sch) { |
---|
645 | goto done; |
---|
646 | } |
---|
647 | continue; |
---|
648 | } |
---|
649 | |
---|
650 | format += Tcl_UtfToUniChar(format, &ch); |
---|
651 | if (ch == '%') { |
---|
652 | goto literal; |
---|
653 | } |
---|
654 | |
---|
655 | /* |
---|
656 | * Check for assignment suppression ('*') or an XPG3-style assignment |
---|
657 | * ('%n$'). |
---|
658 | */ |
---|
659 | |
---|
660 | if (ch == '*') { |
---|
661 | flags |= SCAN_SUPPRESS; |
---|
662 | format += Tcl_UtfToUniChar(format, &ch); |
---|
663 | } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
---|
664 | char *formatEnd; |
---|
665 | value = strtoul(format-1, &formatEnd, 10);/* INTL: "C" locale. */ |
---|
666 | if (*formatEnd == '$') { |
---|
667 | format = formatEnd+1; |
---|
668 | format += Tcl_UtfToUniChar(format, &ch); |
---|
669 | objIndex = (int) value - 1; |
---|
670 | } |
---|
671 | } |
---|
672 | |
---|
673 | /* |
---|
674 | * Parse any width specifier. |
---|
675 | */ |
---|
676 | |
---|
677 | if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ |
---|
678 | width = (int) strtoul(format-1, &format, 10);/* INTL: "C" locale. */ |
---|
679 | format += Tcl_UtfToUniChar(format, &ch); |
---|
680 | } else { |
---|
681 | width = 0; |
---|
682 | } |
---|
683 | |
---|
684 | /* |
---|
685 | * Handle any size specifier. |
---|
686 | */ |
---|
687 | |
---|
688 | switch (ch) { |
---|
689 | case 'l': |
---|
690 | if (*format == 'l') { |
---|
691 | flags |= SCAN_BIG; |
---|
692 | format += 1; |
---|
693 | format += Tcl_UtfToUniChar(format, &ch); |
---|
694 | break; |
---|
695 | } |
---|
696 | case 'L': |
---|
697 | flags |= SCAN_LONGER; |
---|
698 | /* |
---|
699 | * Fall through so we skip to the next character. |
---|
700 | */ |
---|
701 | case 'h': |
---|
702 | format += Tcl_UtfToUniChar(format, &ch); |
---|
703 | } |
---|
704 | |
---|
705 | /* |
---|
706 | * Handle the various field types. |
---|
707 | */ |
---|
708 | |
---|
709 | switch (ch) { |
---|
710 | case 'n': |
---|
711 | if (!(flags & SCAN_SUPPRESS)) { |
---|
712 | objPtr = Tcl_NewIntObj(string - baseString); |
---|
713 | Tcl_IncrRefCount(objPtr); |
---|
714 | objs[objIndex++] = objPtr; |
---|
715 | } |
---|
716 | nconversions++; |
---|
717 | continue; |
---|
718 | |
---|
719 | case 'd': |
---|
720 | op = 'i'; |
---|
721 | parseFlag |= TCL_PARSE_DECIMAL_ONLY; |
---|
722 | break; |
---|
723 | case 'i': |
---|
724 | op = 'i'; |
---|
725 | parseFlag |= TCL_PARSE_SCAN_PREFIXES; |
---|
726 | break; |
---|
727 | case 'o': |
---|
728 | op = 'i'; |
---|
729 | parseFlag |= TCL_PARSE_OCTAL_ONLY | TCL_PARSE_SCAN_PREFIXES; |
---|
730 | break; |
---|
731 | case 'x': |
---|
732 | op = 'i'; |
---|
733 | parseFlag |= TCL_PARSE_HEXADECIMAL_ONLY; |
---|
734 | break; |
---|
735 | case 'u': |
---|
736 | op = 'i'; |
---|
737 | parseFlag |= TCL_PARSE_DECIMAL_ONLY; |
---|
738 | flags |= SCAN_UNSIGNED; |
---|
739 | break; |
---|
740 | |
---|
741 | case 'f': |
---|
742 | case 'e': |
---|
743 | case 'g': |
---|
744 | op = 'f'; |
---|
745 | break; |
---|
746 | |
---|
747 | case 's': |
---|
748 | op = 's'; |
---|
749 | break; |
---|
750 | |
---|
751 | case 'c': |
---|
752 | op = 'c'; |
---|
753 | flags |= SCAN_NOSKIP; |
---|
754 | break; |
---|
755 | case '[': |
---|
756 | op = '['; |
---|
757 | flags |= SCAN_NOSKIP; |
---|
758 | break; |
---|
759 | } |
---|
760 | |
---|
761 | /* |
---|
762 | * At this point, we will need additional characters from the string |
---|
763 | * to proceed. |
---|
764 | */ |
---|
765 | |
---|
766 | if (*string == '\0') { |
---|
767 | underflow = 1; |
---|
768 | goto done; |
---|
769 | } |
---|
770 | |
---|
771 | /* |
---|
772 | * Skip any leading whitespace at the beginning of a field unless the |
---|
773 | * format suppresses this behavior. |
---|
774 | */ |
---|
775 | |
---|
776 | if (!(flags & SCAN_NOSKIP)) { |
---|
777 | while (*string != '\0') { |
---|
778 | offset = Tcl_UtfToUniChar(string, &sch); |
---|
779 | if (!Tcl_UniCharIsSpace(sch)) { |
---|
780 | break; |
---|
781 | } |
---|
782 | string += offset; |
---|
783 | } |
---|
784 | if (*string == '\0') { |
---|
785 | underflow = 1; |
---|
786 | goto done; |
---|
787 | } |
---|
788 | } |
---|
789 | |
---|
790 | /* |
---|
791 | * Perform the requested scanning operation. |
---|
792 | */ |
---|
793 | |
---|
794 | switch (op) { |
---|
795 | case 's': |
---|
796 | /* |
---|
797 | * Scan a string up to width characters or whitespace. |
---|
798 | */ |
---|
799 | |
---|
800 | if (width == 0) { |
---|
801 | width = ~0; |
---|
802 | } |
---|
803 | end = string; |
---|
804 | while (*end != '\0') { |
---|
805 | offset = Tcl_UtfToUniChar(end, &sch); |
---|
806 | if (Tcl_UniCharIsSpace(sch)) { |
---|
807 | break; |
---|
808 | } |
---|
809 | end += offset; |
---|
810 | if (--width == 0) { |
---|
811 | break; |
---|
812 | } |
---|
813 | } |
---|
814 | if (!(flags & SCAN_SUPPRESS)) { |
---|
815 | objPtr = Tcl_NewStringObj(string, end-string); |
---|
816 | Tcl_IncrRefCount(objPtr); |
---|
817 | objs[objIndex++] = objPtr; |
---|
818 | } |
---|
819 | string = end; |
---|
820 | break; |
---|
821 | |
---|
822 | case '[': { |
---|
823 | CharSet cset; |
---|
824 | |
---|
825 | if (width == 0) { |
---|
826 | width = ~0; |
---|
827 | } |
---|
828 | end = string; |
---|
829 | |
---|
830 | format = BuildCharSet(&cset, format); |
---|
831 | while (*end != '\0') { |
---|
832 | offset = Tcl_UtfToUniChar(end, &sch); |
---|
833 | if (!CharInSet(&cset, (int)sch)) { |
---|
834 | break; |
---|
835 | } |
---|
836 | end += offset; |
---|
837 | if (--width == 0) { |
---|
838 | break; |
---|
839 | } |
---|
840 | } |
---|
841 | ReleaseCharSet(&cset); |
---|
842 | |
---|
843 | if (string == end) { |
---|
844 | /* |
---|
845 | * Nothing matched the range, stop processing. |
---|
846 | */ |
---|
847 | goto done; |
---|
848 | } |
---|
849 | if (!(flags & SCAN_SUPPRESS)) { |
---|
850 | objPtr = Tcl_NewStringObj(string, end-string); |
---|
851 | Tcl_IncrRefCount(objPtr); |
---|
852 | objs[objIndex++] = objPtr; |
---|
853 | } |
---|
854 | string = end; |
---|
855 | |
---|
856 | break; |
---|
857 | } |
---|
858 | case 'c': |
---|
859 | /* |
---|
860 | * Scan a single Unicode character. |
---|
861 | */ |
---|
862 | |
---|
863 | string += Tcl_UtfToUniChar(string, &sch); |
---|
864 | if (!(flags & SCAN_SUPPRESS)) { |
---|
865 | objPtr = Tcl_NewIntObj((int)sch); |
---|
866 | Tcl_IncrRefCount(objPtr); |
---|
867 | objs[objIndex++] = objPtr; |
---|
868 | } |
---|
869 | break; |
---|
870 | |
---|
871 | case 'i': |
---|
872 | /* |
---|
873 | * Scan an unsigned or signed integer. |
---|
874 | */ |
---|
875 | objPtr = Tcl_NewLongObj(0); |
---|
876 | Tcl_IncrRefCount(objPtr); |
---|
877 | if (width == 0) { |
---|
878 | width = ~0; |
---|
879 | } |
---|
880 | if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width, |
---|
881 | &end, TCL_PARSE_INTEGER_ONLY | parseFlag)) { |
---|
882 | Tcl_DecrRefCount(objPtr); |
---|
883 | if (width < 0) { |
---|
884 | if (*end == '\0') { |
---|
885 | underflow = 1; |
---|
886 | } |
---|
887 | } else { |
---|
888 | if (end == string + width) { |
---|
889 | underflow = 1; |
---|
890 | } |
---|
891 | } |
---|
892 | goto done; |
---|
893 | } |
---|
894 | string = end; |
---|
895 | if (flags & SCAN_SUPPRESS) { |
---|
896 | Tcl_DecrRefCount(objPtr); |
---|
897 | break; |
---|
898 | } |
---|
899 | if (flags & SCAN_LONGER) { |
---|
900 | if (Tcl_GetWideIntFromObj(NULL, objPtr, &wideValue) != TCL_OK) { |
---|
901 | wideValue = ~(Tcl_WideUInt)0 >> 1; /* WIDE_MAX */ |
---|
902 | if (TclGetString(objPtr)[0] == '-') { |
---|
903 | wideValue++; /* WIDE_MAX + 1 = WIDE_MIN */ |
---|
904 | } |
---|
905 | } |
---|
906 | if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) { |
---|
907 | sprintf(buf, "%" TCL_LL_MODIFIER "u", |
---|
908 | (Tcl_WideUInt)wideValue); |
---|
909 | Tcl_SetStringObj(objPtr, buf, -1); |
---|
910 | } else { |
---|
911 | Tcl_SetWideIntObj(objPtr, wideValue); |
---|
912 | } |
---|
913 | } else if (!(flags & SCAN_BIG)) { |
---|
914 | if (TclGetLongFromObj(NULL, objPtr, &value) != TCL_OK) { |
---|
915 | if (TclGetString(objPtr)[0] == '-') { |
---|
916 | value = LONG_MIN; |
---|
917 | } else { |
---|
918 | value = LONG_MAX; |
---|
919 | } |
---|
920 | } |
---|
921 | if ((flags & SCAN_UNSIGNED) && (value < 0)) { |
---|
922 | sprintf(buf, "%lu", value); /* INTL: ISO digit */ |
---|
923 | Tcl_SetStringObj(objPtr, buf, -1); |
---|
924 | } else { |
---|
925 | Tcl_SetLongObj(objPtr, value); |
---|
926 | } |
---|
927 | } |
---|
928 | objs[objIndex++] = objPtr; |
---|
929 | break; |
---|
930 | |
---|
931 | case 'f': |
---|
932 | /* |
---|
933 | * Scan a floating point number |
---|
934 | */ |
---|
935 | |
---|
936 | objPtr = Tcl_NewDoubleObj(0.0); |
---|
937 | Tcl_IncrRefCount(objPtr); |
---|
938 | if (width == 0) { |
---|
939 | width = ~0; |
---|
940 | } |
---|
941 | if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width, |
---|
942 | &end, TCL_PARSE_DECIMAL_ONLY | TCL_PARSE_NO_WHITESPACE)) { |
---|
943 | Tcl_DecrRefCount(objPtr); |
---|
944 | if (width < 0) { |
---|
945 | if (*end == '\0') { |
---|
946 | underflow = 1; |
---|
947 | } |
---|
948 | } else { |
---|
949 | if (end == string + width) { |
---|
950 | underflow = 1; |
---|
951 | } |
---|
952 | } |
---|
953 | goto done; |
---|
954 | } else if (flags & SCAN_SUPPRESS) { |
---|
955 | Tcl_DecrRefCount(objPtr); |
---|
956 | string = end; |
---|
957 | } else { |
---|
958 | double dvalue; |
---|
959 | if (Tcl_GetDoubleFromObj(NULL, objPtr, &dvalue) != TCL_OK) { |
---|
960 | #ifdef ACCEPT_NAN |
---|
961 | if (objPtr->typePtr == &tclDoubleType) { |
---|
962 | dValue = objPtr->internalRep.doubleValue; |
---|
963 | } else |
---|
964 | #endif |
---|
965 | { |
---|
966 | Tcl_DecrRefCount(objPtr); |
---|
967 | goto done; |
---|
968 | } |
---|
969 | } |
---|
970 | Tcl_SetDoubleObj(objPtr, dvalue); |
---|
971 | objs[objIndex++] = objPtr; |
---|
972 | string = end; |
---|
973 | } |
---|
974 | } |
---|
975 | nconversions++; |
---|
976 | } |
---|
977 | |
---|
978 | done: |
---|
979 | result = 0; |
---|
980 | code = TCL_OK; |
---|
981 | |
---|
982 | if (numVars) { |
---|
983 | /* |
---|
984 | * In this case, variables were specified (classic scan). |
---|
985 | */ |
---|
986 | |
---|
987 | for (i = 0; i < totalVars; i++) { |
---|
988 | if (objs[i] == NULL) { |
---|
989 | continue; |
---|
990 | } |
---|
991 | result++; |
---|
992 | if (Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0) == NULL) { |
---|
993 | Tcl_AppendResult(interp, "couldn't set variable \"", |
---|
994 | TclGetString(objv[i+3]), "\"", NULL); |
---|
995 | code = TCL_ERROR; |
---|
996 | } |
---|
997 | Tcl_DecrRefCount(objs[i]); |
---|
998 | } |
---|
999 | } else { |
---|
1000 | /* |
---|
1001 | * Here no vars were specified, we want a list returned (inline scan) |
---|
1002 | */ |
---|
1003 | |
---|
1004 | objPtr = Tcl_NewObj(); |
---|
1005 | for (i = 0; i < totalVars; i++) { |
---|
1006 | if (objs[i] != NULL) { |
---|
1007 | Tcl_ListObjAppendElement(NULL, objPtr, objs[i]); |
---|
1008 | Tcl_DecrRefCount(objs[i]); |
---|
1009 | } else { |
---|
1010 | /* |
---|
1011 | * More %-specifiers than matching chars, so we just spit out |
---|
1012 | * empty strings for these. |
---|
1013 | */ |
---|
1014 | |
---|
1015 | Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj()); |
---|
1016 | } |
---|
1017 | } |
---|
1018 | } |
---|
1019 | if (objs != NULL) { |
---|
1020 | ckfree((char*) objs); |
---|
1021 | } |
---|
1022 | if (code == TCL_OK) { |
---|
1023 | if (underflow && (nconversions == 0)) { |
---|
1024 | if (numVars) { |
---|
1025 | objPtr = Tcl_NewIntObj(-1); |
---|
1026 | } else { |
---|
1027 | if (objPtr) { |
---|
1028 | Tcl_SetListObj(objPtr, 0, NULL); |
---|
1029 | } else { |
---|
1030 | objPtr = Tcl_NewObj(); |
---|
1031 | } |
---|
1032 | } |
---|
1033 | } else if (numVars) { |
---|
1034 | objPtr = Tcl_NewIntObj(result); |
---|
1035 | } |
---|
1036 | Tcl_SetObjResult(interp, objPtr); |
---|
1037 | } |
---|
1038 | return code; |
---|
1039 | } |
---|
1040 | |
---|
1041 | /* |
---|
1042 | * Local Variables: |
---|
1043 | * mode: c |
---|
1044 | * c-basic-offset: 4 |
---|
1045 | * fill-column: 78 |
---|
1046 | * End: |
---|
1047 | */ |
---|