Initial version of donated sources by Avertec, 3.4p5.
[tas-yagle.git] / distrib / sources / api / tcl / parse.c
1 /* $LAAS: parse.c,v 1.3 2003/07/08 17:07:55 mallet Exp $ */
2
3 /*
4 * Copyright (c) 1997 Sun Microsystems, Inc.
5 * Copyright (c) 1998 by Scriptics Corporation.
6 * Copyright (C) 2001 LAAS/CNRS
7 *
8 * This file contains procedures that parse Tcl scripts. They do so in a
9 * general-purpose fashion that can be used for many different purposes,
10 * including compilation, direct execution, code analysis, etc.
11 *
12 * These procedures are part of the standard Tcl distribution. However,
13 * they have been slightly modified to let them parse incomplete command
14 * lines and for the completion mechanism.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions are
18 * met:
19 *
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
25 * distribution.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 */
40
41 #include <ctype.h>
42 #include <string.h>
43 #include <tcl.h>
44 #include "eltclsh.h"
45 #include "parse.h"
46
47 /*
48 * The following table provides parsing information about each possible
49 * 8-bit character. The table is designed to be referenced with either
50 * signed or unsigned characters, so it has 384 entries. The first 128
51 * entries correspond to negative character values, the next 256 correspond
52 * to positive character values. The last 128 entries are identical to the
53 * first 128. The table is always indexed with a 128-byte offset (the 128th
54 * entry corresponds to a character value of 0).
55 *
56 * The macro CHAR_TYPE is used to index into the table and return
57 * information about its character argument. The following return
58 * values are defined.
59 *
60 * TYPE_NORMAL - All characters that don't have special significance
61 * to the Tcl parser.
62 * TYPE_SPACE - The character is a whitespace character other
63 * than newline.
64 * TYPE_COMMAND_END - Character is newline or semicolon.
65 * TYPE_SUBS - Character begins a substitution or has other
66 * special meaning in ParseTokens: backslash, dollar
67 * sign, open bracket, or null.
68 * TYPE_QUOTE - Character is a double quote.
69 * TYPE_CLOSE_PAREN - Character is a right parenthesis.
70 * TYPE_CLOSE_BRACK - Character is a right square bracket.
71 * TYPE_BRACE - Character is a curly brace (either left or right).
72 */
73
74 #define TYPE_NORMAL 0
75 #define TYPE_SPACE 0x1
76 #define TYPE_COMMAND_END 0x2
77 #define TYPE_SUBS 0x4
78 #define TYPE_QUOTE 0x8
79 #define TYPE_CLOSE_PAREN 0x10
80 #define TYPE_CLOSE_BRACK 0x20
81 #define TYPE_BRACE 0x40
82
83 #define CHAR_TYPE(c) (typeTable+128)[(int)(c)]
84
85 static char typeTable[] = {
86 /*
87 * Negative character values, from -128 to -1:
88 */
89
90 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
91 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
92 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
93 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
94 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
95 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
96 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
97 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
98 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
99 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
100 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
101 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
102 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
103 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
104 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
105 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
106 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
107 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
108 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
109 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
110 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
111 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
112 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
113 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
114 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
115 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
116 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
117 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
118 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
119 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
120 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
121 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
122
123 /*
124 * Positive character values, from 0-127:
125 */
126
127 TYPE_SUBS, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
128 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
129 TYPE_NORMAL, TYPE_SPACE, TYPE_COMMAND_END, TYPE_SPACE,
130 TYPE_SPACE, TYPE_SPACE, TYPE_NORMAL, TYPE_NORMAL,
131 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
132 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
133 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
134 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
135 TYPE_SPACE, TYPE_NORMAL, TYPE_QUOTE, TYPE_NORMAL,
136 TYPE_SUBS, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
137 TYPE_NORMAL, TYPE_CLOSE_PAREN, TYPE_NORMAL, TYPE_NORMAL,
138 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
139 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
140 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
141 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_COMMAND_END,
142 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
143 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
144 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
145 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
146 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
147 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
148 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
149 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_SUBS,
150 TYPE_SUBS, TYPE_CLOSE_BRACK, TYPE_NORMAL, TYPE_NORMAL,
151 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
152 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
153 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
154 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
155 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
156 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
157 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_BRACE,
158 TYPE_NORMAL, TYPE_BRACE, TYPE_NORMAL, TYPE_NORMAL,
159
160 /*
161 * Large unsigned character values, from 128-255:
162 */
163
164 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
165 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
166 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
167 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
168 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
169 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
170 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
171 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
172 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
173 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
174 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
175 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
176 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
177 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
178 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
179 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
180 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
181 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
182 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
183 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
184 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
185 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
186 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
187 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
188 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
189 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
190 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
191 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
192 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
193 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
194 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
195 TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL, TYPE_NORMAL,
196 };
197
198 /*
199 * Prototypes for local procedures defined in this file:
200 */
201
202 static int parseTokens(char *src, int mask, ElTclParse *parsePtr);
203 static void expandTokenArray(ElTclParse *parsePtr);
204 static int parseVarName(char *string, int numBytes,
205 ElTclParse *parsePtr);
206 static int parseBraces(char *string, int numBytes,
207 ElTclParse *parsePtr, char ** termPtr);
208 static int parseQuotedString(char *string, int numBytes,
209 ElTclParse * parsePtr, char **termPtr);
210
211 /*
212 *----------------------------------------------------------------------
213 *
214 * elTclParseCommand --
215 *
216 * Given a string, this procedure parses the first Tcl command in
217 * the string and returns information about the structure of the
218 * command.
219 *
220 * Results:
221 * The return value is TCL_OK if the command was parsed successfully
222 * and TCL_ERROR otherwise. On a successful return, parsePtr is
223 * filled in with information about the command that was
224 * parsed. Note that unlike with the similar Tcl function, an error
225 * is *not* returned if the script in an incomplete command.
226 *
227 * Side effects:
228 * If there is insufficient space in parsePtr to hold all the
229 * information about the command, then additional space is
230 * malloc-ed. If the procedure returns TCL_OK then the caller must
231 * eventually invoke Tcl_FreeParse to release any additional space
232 * that was allocated.
233 *
234 *----------------------------------------------------------------------
235 */
236
237 int
238 elTclParseCommand(string, numBytes, nested, parsePtr)
239 char *string; /* First character of string containing
240 * one or more Tcl commands. The string
241 * must be in writable memory and must
242 * have one additional byte of space at
243 * string[length] where we can
244 * temporarily store a 0 sentinel
245 * character. */
246 int numBytes; /* Total number of bytes in string. If < 0,
247 * the script consists of all bytes up to
248 * the first null character. */
249 int nested; /* Non-zero means this is a nested command:
250 * close bracket should be considered
251 * a command terminator. If zero, then close
252 * bracket has no special meaning. */
253 register ElTclParse *parsePtr;
254 /* Structure to fill in with information
255 * about the parsed command; any previous
256 * information in the structure is
257 * ignored. */
258 {
259 register char *src; /* Points to current character
260 * in the command. */
261 int type; /* Result returned by CHAR_TYPE(*src). */
262 ElTclToken *tokenPtr; /* Pointer to token being filled in. */
263 int wordIndex; /* Index of word token for current word. */
264 char utfBytes[TCL_UTF_MAX]; /* Holds result of backslash substitution. */
265 int terminators; /* CHAR_TYPE bits that indicate the end
266 * of a command. */
267 char *termPtr; /* Set by Tcl_ParseBraces/QuotedString to
268 * point to char after terminating one. */
269 int length, savedChar;
270
271
272 if (numBytes < 0) {
273 numBytes = (string? strlen(string) : 0);
274 }
275
276 parsePtr->commentStart = NULL;
277 parsePtr->commentSize = 0;
278 parsePtr->commandStart = NULL;
279 parsePtr->commandSize = 0;
280 parsePtr->numWords = 0;
281 parsePtr->tokenPtr = parsePtr->staticTokens;
282 parsePtr->numTokens = 0;
283 parsePtr->tokensAvailable = NUM_STATIC_TOKENS;
284 parsePtr->string = string;
285 parsePtr->end = string + numBytes;
286 parsePtr->term = parsePtr->end;
287 parsePtr->incomplete = 0;
288 parsePtr->errorType = TCL_PARSE_SUCCESS;
289
290 if (nested != 0) {
291 terminators = TYPE_COMMAND_END | TYPE_CLOSE_BRACK;
292 } else {
293 terminators = TYPE_COMMAND_END;
294 }
295
296 /*
297 * Temporarily overwrite the character just after the end of the
298 * string with a 0 byte. This acts as a sentinel and reduces the
299 * number of places where we have to check for the end of the
300 * input string. The original value of the byte is restored at
301 * the end of the parse.
302 */
303
304 savedChar = string[numBytes];
305 if (savedChar != 0) {
306 string[numBytes] = 0;
307 }
308
309 /*
310 * Parse any leading space and comments before the first word of the
311 * command.
312 */
313
314 src = string;
315 while (1) {
316 while ((CHAR_TYPE(*src) == TYPE_SPACE) || (*src == '\n')) {
317 src++;
318 }
319 if ((*src == '\\') && (src[1] == '\n')) {
320 /*
321 * Skip backslash-newline sequence: it should be treated
322 * just like white space.
323 */
324
325 if ((src + 2) == parsePtr->end) {
326 parsePtr->incomplete = 1;
327 }
328 src += 2;
329 continue;
330 }
331 if (*src != '#') {
332 break;
333 }
334 if (parsePtr->commentStart == NULL) {
335 parsePtr->commentStart = src;
336 }
337 while (1) {
338 if (src == parsePtr->end) {
339 if (nested) {
340 parsePtr->incomplete = nested;
341 }
342 parsePtr->commentSize = src - parsePtr->commentStart;
343 break;
344 } else if (*src == '\\') {
345 if ((src[1] == '\n') && ((src + 2) == parsePtr->end)) {
346 parsePtr->incomplete = 1;
347 }
348 Tcl_UtfBackslash(src, &length, utfBytes);
349 src += length;
350 } else if (*src == '\n') {
351 src++;
352 parsePtr->commentSize = src - parsePtr->commentStart;
353 break;
354 } else {
355 src++;
356 }
357 }
358 }
359
360 /*
361 * The following loop parses the words of the command, one word
362 * in each iteration through the loop.
363 */
364
365 parsePtr->commandStart = src;
366 while (1) {
367 /*
368 * Create the token for the word.
369 */
370
371 if (parsePtr->numTokens == parsePtr->tokensAvailable) {
372 expandTokenArray(parsePtr);
373 }
374 wordIndex = parsePtr->numTokens;
375 tokenPtr = &parsePtr->tokenPtr[wordIndex];
376 tokenPtr->type = TCL_TOKEN_WORD;
377
378 /*
379 * Skip white space before the word. Also skip a backslash-newline
380 * sequence: it should be treated just like white space.
381 */
382
383 while (1) {
384 type = CHAR_TYPE(*src);
385 if (type == TYPE_SPACE) {
386 src++;
387 continue;
388 } else if ((*src == '\\') && (src[1] == '\n')) {
389 if ((src + 2) == parsePtr->end) {
390 parsePtr->incomplete = 1;
391 }
392 Tcl_UtfBackslash(src, &length, utfBytes);
393 src += length;
394 continue;
395 }
396 break;
397 }
398 if ((type & terminators) != 0) {
399 parsePtr->term = src;
400 src++;
401 break;
402 }
403 if (src == parsePtr->end) {
404 break;
405 }
406 tokenPtr->start = src;
407 parsePtr->numTokens++;
408 parsePtr->numWords++;
409
410 /*
411 * At this point the word can have one of three forms: something
412 * enclosed in quotes, something enclosed in braces, or an
413 * unquoted word (anything else).
414 */
415
416 if (*src == '"') {
417 if (parseQuotedString(src, (parsePtr->end - src),
418 parsePtr, &termPtr) != TCL_OK) {
419 goto error;
420 }
421 src = termPtr;
422 } else if (*src == '{') {
423 if (parseBraces(src, (parsePtr->end - src),
424 parsePtr, &termPtr) != TCL_OK) {
425 goto error;
426 }
427 src = termPtr;
428 } else {
429 /*
430 * This is an unquoted word. Call ParseTokens and let it do
431 * all of the work.
432 */
433
434 if (parseTokens(src, TYPE_SPACE|terminators,
435 parsePtr) != TCL_OK) {
436 goto error;
437 }
438 src = parsePtr->term;
439 }
440
441 /*
442 * Finish filling in the token for the word and check for the
443 * special case of a word consisting of a single range of
444 * literal text.
445 */
446
447 tokenPtr = &parsePtr->tokenPtr[wordIndex];
448 tokenPtr->size = src - tokenPtr->start;
449 tokenPtr->numComponents = parsePtr->numTokens - (wordIndex + 1);
450 if ((tokenPtr->numComponents == 1)
451 && (tokenPtr[1].type == TCL_TOKEN_TEXT)) {
452 tokenPtr->type = TCL_TOKEN_SIMPLE_WORD;
453 }
454
455 /*
456 * Do two additional checks: (a) make sure we're really at the
457 * end of a word (there might have been garbage left after a
458 * quoted or braced word), and (b) check for the end of the
459 * command.
460 */
461
462 type = CHAR_TYPE(*src);
463 if (type == TYPE_SPACE) {
464 src++;
465 continue;
466 } else {
467 /*
468 * Backslash-newline (and any following white space) must be
469 * treated as if it were a space character.
470 */
471
472 if ((*src == '\\') && (src[1] == '\n')) {
473 if ((src + 2) == parsePtr->end) {
474 parsePtr->incomplete = 1;
475 }
476 Tcl_UtfBackslash(src, &length, utfBytes);
477 src += length;
478 continue;
479 }
480 }
481
482 if ((type & terminators) != 0) {
483 parsePtr->term = src;
484 src++;
485 break;
486 }
487 if (src == parsePtr->end) {
488 break;
489 }
490 if (src[-1] == '"') {
491 parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA;
492 } else {
493 parsePtr->errorType = TCL_PARSE_BRACE_EXTRA;
494 }
495 break;
496 }
497
498 parsePtr->commandSize = src - parsePtr->commandStart;
499 if (savedChar != 0) {
500 string[numBytes] = (char) savedChar;
501 }
502 return TCL_OK;
503
504 error:
505 if (savedChar != 0) {
506 string[numBytes] = (char) savedChar;
507 }
508 if (parsePtr->commandStart == NULL) {
509 parsePtr->commandStart = string;
510 }
511 parsePtr->commandSize = parsePtr->term - parsePtr->commandStart;
512 return TCL_OK;
513 }
514
515 /*
516 *----------------------------------------------------------------------
517 *
518 * ParseTokens --
519 *
520 * This procedure forms the heart of the Tcl parser. It parses one
521 * or more tokens from a string, up to a termination point
522 * specified by the caller. This procedure is used to parse
523 * unquoted command words (those not in quotes or braces), words in
524 * quotes, and array indices for variables.
525 *
526 * Results:
527 * Tokens are added to parsePtr and parsePtr->term is filled in
528 * with the address of the character that terminated the parse (the
529 * first one whose CHAR_TYPE matched mask or the character at
530 * parsePtr->end). The return value is TCL_OK if the parse
531 * completed successfully and TCL_ERROR otherwise.
532 *
533 * Side effects:
534 * None.
535 *
536 *----------------------------------------------------------------------
537 */
538
539 static int
540 parseTokens(src, mask, parsePtr)
541 register char *src; /* First character to parse. */
542 int mask; /* Specifies when to stop parsing. The
543 * parse stops at the first unquoted
544 * character whose CHAR_TYPE contains
545 * any of the bits in mask. */
546 ElTclParse *parsePtr; /* Information about parse in progress.
547 * Updated with additional tokens and
548 * termination information. */
549 {
550 int type, originalTokens, varToken;
551 char utfBytes[TCL_UTF_MAX];
552 ElTclToken *tokenPtr;
553 ElTclParse nested;
554
555 /*
556 * Each iteration through the following loop adds one token of
557 * type TCL_TOKEN_TEXT, TCL_TOKEN_BS, TCL_TOKEN_COMMAND, or
558 * TCL_TOKEN_VARIABLE to parsePtr. For TCL_TOKEN_VARIABLE tokens,
559 * additional tokens are added for the parsed variable name.
560 */
561
562 originalTokens = parsePtr->numTokens;
563 while (1) {
564 if (parsePtr->numTokens == parsePtr->tokensAvailable) {
565 expandTokenArray(parsePtr);
566 }
567 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];
568 tokenPtr->start = src;
569 tokenPtr->numComponents = 0;
570
571 type = CHAR_TYPE(*src);
572 if (type & mask) {
573 break;
574 }
575
576 if ((type & TYPE_SUBS) == 0) {
577 /*
578 * This is a simple range of characters. Scan to find the end
579 * of the range.
580 */
581
582 while (1) {
583 src++;
584 if (CHAR_TYPE(*src) & (mask | TYPE_SUBS)) {
585 break;
586 }
587 }
588 tokenPtr->type = TCL_TOKEN_TEXT;
589 tokenPtr->size = src - tokenPtr->start;
590 parsePtr->numTokens++;
591 } else if (*src == '$') {
592 /*
593 * This is a variable reference. Call ParseVarName to do
594 * all the dirty work of parsing the name.
595 */
596
597 varToken = parsePtr->numTokens;
598 if (parseVarName(src, parsePtr->end - src, parsePtr) != TCL_OK) {
599 return TCL_ERROR;
600 }
601 src += parsePtr->tokenPtr[varToken].size;
602 } else if (*src == '[') {
603 /*
604 * Command substitution. Call Tcl_ParseCommand recursively
605 * (and repeatedly) to parse the nested command(s), then
606 * throw away the parse information.
607 */
608
609 src++;
610 while (1) {
611 if (elTclParseCommand(src, parsePtr->end - src,
612 1, &nested) != TCL_OK) {
613 parsePtr->errorType = nested.errorType;
614 parsePtr->term = nested.term;
615 parsePtr->incomplete = nested.incomplete;
616 return TCL_ERROR;
617 }
618
619 src = nested.commandStart + nested.commandSize;
620 if (nested.tokenPtr != nested.staticTokens) {
621 ckfree((char *) nested.tokenPtr);
622 }
623 if ((*nested.term == ']') && !nested.incomplete) {
624 break;
625 }
626 if (src == parsePtr->end) {
627 parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;
628 parsePtr->incomplete = 1;
629 break;
630 }
631 }
632 tokenPtr->type = TCL_TOKEN_COMMAND;
633 tokenPtr->size = src - tokenPtr->start;
634 parsePtr->numTokens++;
635 } else if (*src == '\\') {
636 /*
637 * Backslash substitution.
638 */
639
640 if (src[1] == '\n') {
641 if ((src + 2) == parsePtr->end) {
642 parsePtr->incomplete = 1;
643 }
644
645 /*
646 * Note: backslash-newline is special in that it is
647 * treated the same as a space character would be. This
648 * means that it could terminate the token.
649 */
650
651 if (mask & TYPE_SPACE) {
652 break;
653 }
654 }
655 tokenPtr->type = TCL_TOKEN_BS;
656 Tcl_UtfBackslash(src, &tokenPtr->size, utfBytes);
657 parsePtr->numTokens++;
658 src += tokenPtr->size;
659 } else if (*src == 0) {
660 /*
661 * We encountered a null character. If it is the null
662 * character at the end of the string, then return.
663 * Otherwise generate a text token for the single
664 * character.
665 */
666
667 if (src == parsePtr->end) {
668 break;
669 }
670 tokenPtr->type = TCL_TOKEN_TEXT;
671 tokenPtr->size = 1;
672 parsePtr->numTokens++;
673 src++;
674 } else {
675 elTclFreeParse(parsePtr);
676 return TCL_ERROR;
677 }
678 }
679 if (parsePtr->numTokens == originalTokens) {
680 /*
681 * There was nothing in this range of text. Add an empty token
682 * for the empty range, so that there is always at least one
683 * token added.
684 */
685
686 tokenPtr->type = TCL_TOKEN_TEXT;
687 tokenPtr->size = 0;
688 parsePtr->numTokens++;
689 }
690 parsePtr->term = src;
691 return TCL_OK;
692 }
693
694 /*
695 *----------------------------------------------------------------------
696 *
697 * elTclFreeParse --
698 *
699 * This procedure is invoked to free any dynamic storage that may
700 * have been allocated by a previous call to Tcl_ParseCommand.
701 *
702 * Results:
703 * None.
704 *
705 * Side effects:
706 * If there is any dynamically allocated memory in *parsePtr,
707 * it is freed.
708 *
709 *----------------------------------------------------------------------
710 */
711
712 void
713 elTclFreeParse(parsePtr)
714 ElTclParse *parsePtr; /* Structure that was filled in by a
715 * previous call to Tcl_ParseCommand. */
716 {
717 if (parsePtr->tokenPtr != parsePtr->staticTokens) {
718 ckfree((char *) parsePtr->tokenPtr);
719 parsePtr->tokenPtr = parsePtr->staticTokens;
720 }
721 }
722
723 /*
724 *----------------------------------------------------------------------
725 *
726 * ExpandTokenArray --
727 *
728 * This procedure is invoked when the current space for tokens in
729 * a Tcl_Parse structure fills up; it allocates memory to grow the
730 * token array
731 *
732 * Results:
733 * None.
734 *
735 * Side effects:
736 * Memory is allocated for a new larger token array; the memory
737 * for the old array is freed, if it had been dynamically allocated.
738 *
739 *----------------------------------------------------------------------
740 */
741
742 static void
743 expandTokenArray(parsePtr)
744 ElTclParse *parsePtr; /* Parse structure whose token space
745 * has overflowed. */
746 {
747 int newCount;
748 ElTclToken *newPtr;
749
750 newCount = parsePtr->tokensAvailable*2;
751 newPtr = (ElTclToken *) ckalloc((unsigned)(newCount*sizeof(ElTclToken)));
752 memcpy((VOID *) newPtr, (VOID *) parsePtr->tokenPtr,
753 (size_t) (parsePtr->tokensAvailable * sizeof(ElTclToken)));
754 if (parsePtr->tokenPtr != parsePtr->staticTokens) {
755 ckfree((char *) parsePtr->tokenPtr);
756 }
757 parsePtr->tokenPtr = newPtr;
758 parsePtr->tokensAvailable = newCount;
759 }
760
761 /*
762 *----------------------------------------------------------------------
763 *
764 * ParseVarName --
765 *
766 * Given a string starting with a $ sign, parse off a variable
767 * name and return information about the parse.
768 *
769 * Results:
770 * The return value is TCL_OK if the command was parsed successfully
771 * and TCL_ERROR otherwise. On a successful return, tokenPtr and
772 * numTokens fields of parsePtr are filled in with information about
773 * the variable name that was parsed. The "size" field of the first
774 * new token gives the total number of bytes in the variable name.
775 * Other fields in parsePtr are undefined.
776 *
777 * Side effects:
778 * If there is insufficient space in parsePtr to hold all the
779 * information about the command, then additional space is
780 * malloc-ed. If the procedure returns TCL_OK then the caller must
781 * eventually invoke Tcl_FreeParse to release any additional space
782 * that was allocated.
783 *
784 *----------------------------------------------------------------------
785 */
786
787 static int
788 parseVarName(string, numBytes, parsePtr)
789 char *string; /* String containing variable name. First
790 * character must be "$". */
791 int numBytes; /* Total number of bytes in string. If < 0,
792 * the string consists of all bytes up to the
793 * first null character. */
794 ElTclParse *parsePtr; /* Structure to fill in with information
795 * about the variable name. */
796 {
797 ElTclToken *tokenPtr;
798 char *end, *src;
799 unsigned char c;
800 int varIndex, offset;
801 Tcl_UniChar ch;
802 unsigned array;
803
804 if (numBytes >= 0) {
805 end = string + numBytes;
806 } else {
807 end = string + strlen(string);
808 }
809
810 /*
811 * Generate one token for the variable, an additional token for the
812 * name, plus any number of additional tokens for the index, if
813 * there is one.
814 */
815
816 src = string;
817 if ((parsePtr->numTokens + 2) > parsePtr->tokensAvailable) {
818 expandTokenArray(parsePtr);
819 }
820 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];
821 tokenPtr->type = TCL_TOKEN_VARIABLE;
822 tokenPtr->start = src;
823 varIndex = parsePtr->numTokens;
824 parsePtr->numTokens++;
825 tokenPtr++;
826 src++;
827 if (src >= end) {
828 goto justADollarSign;
829 }
830 tokenPtr->type = TCL_TOKEN_TEXT;
831 tokenPtr->start = src;
832 tokenPtr->numComponents = 0;
833
834 /*
835 * The name of the variable can have three forms:
836 * 1. The $ sign is followed by an open curly brace. Then
837 * the variable name is everything up to the next close
838 * curly brace, and the variable is a scalar variable.
839 * 2. The $ sign is not followed by an open curly brace. Then
840 * the variable name is everything up to the next
841 * character that isn't a letter, digit, or underscore.
842 * :: sequences are also considered part of the variable
843 * name, in order to support namespaces. If the following
844 * character is an open parenthesis, then the information
845 * between parentheses is the array element name.
846 * 3. The $ sign is followed by something that isn't a letter,
847 * digit, or underscore: in this case, there is no variable
848 * name and the token is just "$".
849 */
850
851 if (*src == '{') {
852 src++;
853 tokenPtr->type = TCL_TOKEN_TEXT;
854 tokenPtr->start = src;
855 tokenPtr->numComponents = 0;
856 while (1) {
857 if (src == end) {
858 parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE;
859 parsePtr->incomplete = 1;
860 break;
861 }
862 if (*src == '}') {
863 break;
864 }
865 src++;
866 }
867 tokenPtr->size = src - tokenPtr->start;
868 tokenPtr[-1].size = src - tokenPtr[-1].start;
869 parsePtr->numTokens++;
870 if (src < end) src++;
871 } else {
872 tokenPtr->type = TCL_TOKEN_TEXT;
873 tokenPtr->start = src;
874 tokenPtr->numComponents = 0;
875 while (src != end) {
876 offset = Tcl_UtfToUniChar(src, &ch);
877 c = (unsigned char)(ch);
878 if (isalnum(c) || (c == '_')) { /* INTL: ISO only, UCHAR. */
879 src += offset;
880 continue;
881 }
882 if ((c == ':') && (((src+1) != end) && (src[1] == ':'))) {
883 src += 2;
884 while ((src != end) && (*src == ':')) {
885 src += 1;
886 }
887 continue;
888 }
889 break;
890 }
891
892 /*
893 * Support for empty array names here.
894 */
895 array = ((src != end) && (*src == '('));
896 tokenPtr->size = src - tokenPtr->start;
897 if (tokenPtr->size == 0 && !array) {
898 goto justADollarSign;
899 }
900 parsePtr->numTokens++;
901 if (array) {
902 /*
903 * This is a reference to an array element. Call
904 * ParseTokens recursively to parse the element name,
905 * since it could contain any number of substitutions.
906 */
907
908 if (parseTokens(src+1, TYPE_CLOSE_PAREN, parsePtr)
909 != TCL_OK) {
910 goto error;
911 }
912 if ((parsePtr->term == end) || (*parsePtr->term != ')')) {
913 parsePtr->errorType = TCL_PARSE_MISSING_PAREN;
914 parsePtr->incomplete = 1;
915 src = parsePtr->term;
916 } else
917 src = parsePtr->term + 1;
918 }
919 }
920 tokenPtr = &parsePtr->tokenPtr[varIndex];
921 tokenPtr->size = src - tokenPtr->start;
922 tokenPtr->numComponents = parsePtr->numTokens - (varIndex + 1);
923 return TCL_OK;
924
925 /*
926 * The dollar sign isn't followed by a variable name.
927 * replace the TCL_TOKEN_VARIABLE token with a
928 * TCL_TOKEN_TEXT token for the dollar sign.
929 */
930
931 justADollarSign:
932 tokenPtr = &parsePtr->tokenPtr[varIndex];
933 tokenPtr->type = TCL_TOKEN_TEXT;
934 tokenPtr->size = 1;
935 tokenPtr->numComponents = 0;
936 return TCL_OK;
937
938 error:
939 elTclFreeParse(parsePtr);
940 return TCL_ERROR;
941 }
942
943
944 /*
945 *----------------------------------------------------------------------
946 *
947 * ParseBraces --
948 *
949 * Given a string in braces such as a Tcl command argument or a string
950 * value in a Tcl expression, this procedure parses the string and
951 * returns information about the parse.
952 *
953 * Results:
954 * The return value is TCL_OK if the string was parsed successfully
955 * and TCL_ERROR otherwise. On a successful return, tokenPtr and
956 * numTokens fields of parsePtr are filled in with information about
957 * the string that was parsed. Other fields in parsePtr are
958 * undefined. termPtr is set to point to the character just after
959 * the last one in the braced string.
960 *
961 * Side effects:
962 * If there is insufficient space in parsePtr to hold all the
963 * information about the command, then additional space is
964 * malloc-ed. If the procedure returns TCL_OK then the caller must
965 * eventually invoke Tcl_FreeParse to release any additional space
966 * that was allocated.
967 *
968 *----------------------------------------------------------------------
969 */
970
971 static int
972 parseBraces(string, numBytes, parsePtr, termPtr)
973 char *string; /* String containing the string in braces.
974 * The first character must be '{'. */
975 int numBytes; /* Total number of bytes in string. If < 0,
976 * the string consists of all bytes up to
977 * the first null character. */
978 register ElTclParse *parsePtr;
979 /* Structure to fill in with information
980 * about the string. */
981 char **termPtr; /* If non-NULL, points to word in which to
982 * store a pointer to the character just
983 * after the terminating '}' if the parse
984 * was successful. */
985
986 {
987 char utfBytes[TCL_UTF_MAX]; /* For result of backslash substitution. */
988 ElTclToken *tokenPtr;
989 register char *src, *end;
990 int startIndex, level, length;
991
992 if ((numBytes >= 0) || (string == NULL)) {
993 end = string + numBytes;
994 } else {
995 end = string + strlen(string);
996 }
997
998 src = string+1;
999 startIndex = parsePtr->numTokens;
1000
1001 if (parsePtr->numTokens == parsePtr->tokensAvailable) {
1002 expandTokenArray(parsePtr);
1003 }
1004 tokenPtr = &parsePtr->tokenPtr[startIndex];
1005 tokenPtr->type = TCL_TOKEN_TEXT;
1006 tokenPtr->start = src;
1007 tokenPtr->numComponents = 0;
1008 level = 1;
1009 while (1) {
1010 while (CHAR_TYPE(*src) == TYPE_NORMAL) {
1011 src++;
1012 }
1013 if (*src == '}') {
1014 level--;
1015 if (level == 0) {
1016 break;
1017 }
1018 src++;
1019 } else if (*src == '{') {
1020 level++;
1021 src++;
1022 } else if (*src == '\\') {
1023 Tcl_UtfBackslash(src, &length, utfBytes);
1024 if (src[1] == '\n') {
1025 /*
1026 * A backslash-newline sequence must be collapsed, even
1027 * inside braces, so we have to split the word into
1028 * multiple tokens so that the backslash-newline can be
1029 * represented explicitly.
1030 */
1031
1032 if ((src + 2) == end) {
1033 parsePtr->incomplete = 1;
1034 }
1035 tokenPtr->size = (src - tokenPtr->start);
1036 if (tokenPtr->size != 0) {
1037 parsePtr->numTokens++;
1038 }
1039 if ((parsePtr->numTokens+1) >= parsePtr->tokensAvailable) {
1040 expandTokenArray(parsePtr);
1041 }
1042 tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];
1043 tokenPtr->type = TCL_TOKEN_BS;
1044 tokenPtr->start = src;
1045 tokenPtr->size = length;
1046 tokenPtr->numComponents = 0;
1047 parsePtr->numTokens++;
1048
1049 src += length;
1050 tokenPtr++;
1051 tokenPtr->type = TCL_TOKEN_TEXT;
1052 tokenPtr->start = src;
1053 tokenPtr->numComponents = 0;
1054 } else {
1055 src += length;
1056 }
1057 } else if (src == end) {
1058 parsePtr->errorType = TCL_PARSE_MISSING_BRACE;
1059 parsePtr->incomplete = 1;
1060 break;
1061 } else {
1062 src++;
1063 }
1064 }
1065
1066 /*
1067 * Decide if we need to finish emitting a partially-finished token.
1068 * There are 3 cases:
1069 * {abc \newline xyz} or {xyz} - finish emitting "xyz" token
1070 * {abc \newline} - don't emit token after \newline
1071 * {} - finish emitting zero-sized token
1072 * The last case ensures that there is a token (even if empty) that
1073 * describes the braced string.
1074 */
1075
1076 if ((src != tokenPtr->start)
1077 || (parsePtr->numTokens == startIndex)) {
1078 tokenPtr->size = (src - tokenPtr->start);
1079 parsePtr->numTokens++;
1080 }
1081 if (termPtr != NULL) {
1082 *termPtr = src+1;
1083 }
1084 return TCL_OK;
1085 }
1086
1087
1088 /*
1089 *----------------------------------------------------------------------
1090 *
1091 * ParseQuotedString --
1092 *
1093 * Given a double-quoted string such as a quoted Tcl command argument
1094 * or a quoted value in a Tcl expression, this procedure parses the
1095 * string and returns information about the parse.
1096 *
1097 * Results:
1098 * The return value is TCL_OK if the string was parsed successfully
1099 * and TCL_ERROR otherwise. On a successful return, tokenPtr and
1100 * numTokens fields of parsePtr are filled in with information about
1101 * the string that was parsed. Other fields in parsePtr are
1102 * undefined. termPtr is set to point to the character just after
1103 * the quoted string's terminating close-quote.
1104 *
1105 * Side effects:
1106 * If there is insufficient space in parsePtr to hold all the
1107 * information about the command, then additional space is
1108 * malloc-ed. If the procedure returns TCL_OK then the caller must
1109 * eventually invoke Tcl_FreeParse to release any additional space
1110 * that was allocated.
1111 *
1112 *----------------------------------------------------------------------
1113 */
1114
1115 static int
1116 parseQuotedString(string, numBytes, parsePtr, termPtr)
1117 char *string; /* String containing the quoted string.
1118 * The first character must be '"'. */
1119 int numBytes; /* Total number of bytes in string. If < 0,
1120 * the string consists of all bytes up to
1121 * the first null character. */
1122 register ElTclParse *parsePtr;
1123 /* Structure to fill in with information
1124 * about the string. */
1125 char **termPtr; /* If non-NULL, points to word in which to
1126 * store a pointer to the character just
1127 * after the quoted string's terminating
1128 * close-quote if the parse succeeds. */
1129 {
1130 char *end;
1131
1132 if ((numBytes >= 0) || (string == NULL)) {
1133 end = string + numBytes;
1134 } else {
1135 end = string + strlen(string);
1136 }
1137
1138 if (parseTokens(string+1, TYPE_QUOTE, parsePtr) != TCL_OK) {
1139 goto error;
1140 }
1141 if (*parsePtr->term != '"') {
1142 parsePtr->errorType = TCL_PARSE_MISSING_QUOTE;
1143 parsePtr->incomplete = 1;
1144 if (termPtr != NULL) {
1145 *termPtr = parsePtr->term;
1146 }
1147 } else {
1148 if (termPtr != NULL) {
1149 *termPtr = (parsePtr->term + 1);
1150 }
1151 }
1152 return TCL_OK;
1153
1154 error:
1155 elTclFreeParse(parsePtr);
1156 return TCL_ERROR;
1157 }