1 /* $LAAS: parse.c,v 1.3 2003/07/08 17:07:55 mallet Exp $ */
4 * Copyright (c) 1997 Sun Microsystems, Inc.
5 * Copyright (c) 1998 by Scriptics Corporation.
6 * Copyright (C) 2001 LAAS/CNRS
8 * This file contains procedures that parse Tcl scripts. They do so in a
9 * general-purpose fashion that can be used for many different purposes,
10 * including compilation, direct execution, code analysis, etc.
12 * These procedures are part of the standard Tcl distribution. However,
13 * they have been slightly modified to let them parse incomplete command
14 * lines and for the completion mechanism.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions are
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
48 * The following table provides parsing information about each possible
49 * 8-bit character. The table is designed to be referenced with either
50 * signed or unsigned characters, so it has 384 entries. The first 128
51 * entries correspond to negative character values, the next 256 correspond
52 * to positive character values. The last 128 entries are identical to the
53 * first 128. The table is always indexed with a 128-byte offset (the 128th
54 * entry corresponds to a character value of 0).
56 * The macro CHAR_TYPE is used to index into the table and return
57 * information about its character argument. The following return
60 * TYPE_NORMAL - All characters that don't have special significance
62 * TYPE_SPACE - The character is a whitespace character other
64 * TYPE_COMMAND_END - Character is newline or semicolon.
65 * TYPE_SUBS - Character begins a substitution or has other
66 * special meaning in ParseTokens: backslash, dollar
67 * sign, open bracket, or null.
68 * TYPE_QUOTE - Character is a double quote.
69 * TYPE_CLOSE_PAREN - Character is a right parenthesis.
70 * TYPE_CLOSE_BRACK - Character is a right square bracket.
71 * TYPE_BRACE - Character is a curly brace (either left or right).
75 #define TYPE_SPACE 0x1
76 #define TYPE_COMMAND_END 0x2
78 #define TYPE_QUOTE 0x8
79 #define TYPE_CLOSE_PAREN 0x10
80 #define TYPE_CLOSE_BRACK 0x20
81 #define TYPE_BRACE 0x40
83 #define CHAR_TYPE(c) (typeTable+128)[(int)(c)]
85 static char typeTable
[] = {
87 * Negative character values, from -128 to -1:
90 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
91 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
92 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
93 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
94 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
95 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
96 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
97 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
98 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
99 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
100 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
101 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
102 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
103 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
104 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
105 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
106 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
107 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
108 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
109 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
110 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
111 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
112 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
113 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
114 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
115 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
116 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
117 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
118 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
119 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
120 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
121 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
124 * Positive character values, from 0-127:
127 TYPE_SUBS
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
128 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
129 TYPE_NORMAL
, TYPE_SPACE
, TYPE_COMMAND_END
, TYPE_SPACE
,
130 TYPE_SPACE
, TYPE_SPACE
, TYPE_NORMAL
, TYPE_NORMAL
,
131 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
132 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
133 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
134 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
135 TYPE_SPACE
, TYPE_NORMAL
, TYPE_QUOTE
, TYPE_NORMAL
,
136 TYPE_SUBS
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
137 TYPE_NORMAL
, TYPE_CLOSE_PAREN
, TYPE_NORMAL
, TYPE_NORMAL
,
138 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
139 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
140 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
141 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_COMMAND_END
,
142 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
143 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
144 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
145 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
146 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
147 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
148 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
149 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_SUBS
,
150 TYPE_SUBS
, TYPE_CLOSE_BRACK
, TYPE_NORMAL
, TYPE_NORMAL
,
151 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
152 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
153 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
154 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
155 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
156 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
157 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_BRACE
,
158 TYPE_NORMAL
, TYPE_BRACE
, TYPE_NORMAL
, TYPE_NORMAL
,
161 * Large unsigned character values, from 128-255:
164 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
165 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
166 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
167 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
168 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
169 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
170 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
171 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
172 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
173 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
174 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
175 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
176 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
177 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
178 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
179 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
180 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
181 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
182 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
183 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
184 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
185 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
186 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
187 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
188 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
189 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
190 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
191 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
192 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
193 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
194 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
195 TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
, TYPE_NORMAL
,
199 * Prototypes for local procedures defined in this file:
202 static int parseTokens(char *src
, int mask
, ElTclParse
*parsePtr
);
203 static void expandTokenArray(ElTclParse
*parsePtr
);
204 static int parseVarName(char *string
, int numBytes
,
205 ElTclParse
*parsePtr
);
206 static int parseBraces(char *string
, int numBytes
,
207 ElTclParse
*parsePtr
, char ** termPtr
);
208 static int parseQuotedString(char *string
, int numBytes
,
209 ElTclParse
* parsePtr
, char **termPtr
);
212 *----------------------------------------------------------------------
214 * elTclParseCommand --
216 * Given a string, this procedure parses the first Tcl command in
217 * the string and returns information about the structure of the
221 * The return value is TCL_OK if the command was parsed successfully
222 * and TCL_ERROR otherwise. On a successful return, parsePtr is
223 * filled in with information about the command that was
224 * parsed. Note that unlike with the similar Tcl function, an error
225 * is *not* returned if the script in an incomplete command.
228 * If there is insufficient space in parsePtr to hold all the
229 * information about the command, then additional space is
230 * malloc-ed. If the procedure returns TCL_OK then the caller must
231 * eventually invoke Tcl_FreeParse to release any additional space
232 * that was allocated.
234 *----------------------------------------------------------------------
238 elTclParseCommand(string
, numBytes
, nested
, parsePtr
)
239 char *string
; /* First character of string containing
240 * one or more Tcl commands. The string
241 * must be in writable memory and must
242 * have one additional byte of space at
243 * string[length] where we can
244 * temporarily store a 0 sentinel
246 int numBytes
; /* Total number of bytes in string. If < 0,
247 * the script consists of all bytes up to
248 * the first null character. */
249 int nested
; /* Non-zero means this is a nested command:
250 * close bracket should be considered
251 * a command terminator. If zero, then close
252 * bracket has no special meaning. */
253 register ElTclParse
*parsePtr
;
254 /* Structure to fill in with information
255 * about the parsed command; any previous
256 * information in the structure is
259 register char *src
; /* Points to current character
261 int type
; /* Result returned by CHAR_TYPE(*src). */
262 ElTclToken
*tokenPtr
; /* Pointer to token being filled in. */
263 int wordIndex
; /* Index of word token for current word. */
264 char utfBytes
[TCL_UTF_MAX
]; /* Holds result of backslash substitution. */
265 int terminators
; /* CHAR_TYPE bits that indicate the end
267 char *termPtr
; /* Set by Tcl_ParseBraces/QuotedString to
268 * point to char after terminating one. */
269 int length
, savedChar
;
273 numBytes
= (string
? strlen(string
) : 0);
276 parsePtr
->commentStart
= NULL
;
277 parsePtr
->commentSize
= 0;
278 parsePtr
->commandStart
= NULL
;
279 parsePtr
->commandSize
= 0;
280 parsePtr
->numWords
= 0;
281 parsePtr
->tokenPtr
= parsePtr
->staticTokens
;
282 parsePtr
->numTokens
= 0;
283 parsePtr
->tokensAvailable
= NUM_STATIC_TOKENS
;
284 parsePtr
->string
= string
;
285 parsePtr
->end
= string
+ numBytes
;
286 parsePtr
->term
= parsePtr
->end
;
287 parsePtr
->incomplete
= 0;
288 parsePtr
->errorType
= TCL_PARSE_SUCCESS
;
291 terminators
= TYPE_COMMAND_END
| TYPE_CLOSE_BRACK
;
293 terminators
= TYPE_COMMAND_END
;
297 * Temporarily overwrite the character just after the end of the
298 * string with a 0 byte. This acts as a sentinel and reduces the
299 * number of places where we have to check for the end of the
300 * input string. The original value of the byte is restored at
301 * the end of the parse.
304 savedChar
= string
[numBytes
];
305 if (savedChar
!= 0) {
306 string
[numBytes
] = 0;
310 * Parse any leading space and comments before the first word of the
316 while ((CHAR_TYPE(*src
) == TYPE_SPACE
) || (*src
== '\n')) {
319 if ((*src
== '\\') && (src
[1] == '\n')) {
321 * Skip backslash-newline sequence: it should be treated
322 * just like white space.
325 if ((src
+ 2) == parsePtr
->end
) {
326 parsePtr
->incomplete
= 1;
334 if (parsePtr
->commentStart
== NULL
) {
335 parsePtr
->commentStart
= src
;
338 if (src
== parsePtr
->end
) {
340 parsePtr
->incomplete
= nested
;
342 parsePtr
->commentSize
= src
- parsePtr
->commentStart
;
344 } else if (*src
== '\\') {
345 if ((src
[1] == '\n') && ((src
+ 2) == parsePtr
->end
)) {
346 parsePtr
->incomplete
= 1;
348 Tcl_UtfBackslash(src
, &length
, utfBytes
);
350 } else if (*src
== '\n') {
352 parsePtr
->commentSize
= src
- parsePtr
->commentStart
;
361 * The following loop parses the words of the command, one word
362 * in each iteration through the loop.
365 parsePtr
->commandStart
= src
;
368 * Create the token for the word.
371 if (parsePtr
->numTokens
== parsePtr
->tokensAvailable
) {
372 expandTokenArray(parsePtr
);
374 wordIndex
= parsePtr
->numTokens
;
375 tokenPtr
= &parsePtr
->tokenPtr
[wordIndex
];
376 tokenPtr
->type
= TCL_TOKEN_WORD
;
379 * Skip white space before the word. Also skip a backslash-newline
380 * sequence: it should be treated just like white space.
384 type
= CHAR_TYPE(*src
);
385 if (type
== TYPE_SPACE
) {
388 } else if ((*src
== '\\') && (src
[1] == '\n')) {
389 if ((src
+ 2) == parsePtr
->end
) {
390 parsePtr
->incomplete
= 1;
392 Tcl_UtfBackslash(src
, &length
, utfBytes
);
398 if ((type
& terminators
) != 0) {
399 parsePtr
->term
= src
;
403 if (src
== parsePtr
->end
) {
406 tokenPtr
->start
= src
;
407 parsePtr
->numTokens
++;
408 parsePtr
->numWords
++;
411 * At this point the word can have one of three forms: something
412 * enclosed in quotes, something enclosed in braces, or an
413 * unquoted word (anything else).
417 if (parseQuotedString(src
, (parsePtr
->end
- src
),
418 parsePtr
, &termPtr
) != TCL_OK
) {
422 } else if (*src
== '{') {
423 if (parseBraces(src
, (parsePtr
->end
- src
),
424 parsePtr
, &termPtr
) != TCL_OK
) {
430 * This is an unquoted word. Call ParseTokens and let it do
434 if (parseTokens(src
, TYPE_SPACE
|terminators
,
435 parsePtr
) != TCL_OK
) {
438 src
= parsePtr
->term
;
442 * Finish filling in the token for the word and check for the
443 * special case of a word consisting of a single range of
447 tokenPtr
= &parsePtr
->tokenPtr
[wordIndex
];
448 tokenPtr
->size
= src
- tokenPtr
->start
;
449 tokenPtr
->numComponents
= parsePtr
->numTokens
- (wordIndex
+ 1);
450 if ((tokenPtr
->numComponents
== 1)
451 && (tokenPtr
[1].type
== TCL_TOKEN_TEXT
)) {
452 tokenPtr
->type
= TCL_TOKEN_SIMPLE_WORD
;
456 * Do two additional checks: (a) make sure we're really at the
457 * end of a word (there might have been garbage left after a
458 * quoted or braced word), and (b) check for the end of the
462 type
= CHAR_TYPE(*src
);
463 if (type
== TYPE_SPACE
) {
468 * Backslash-newline (and any following white space) must be
469 * treated as if it were a space character.
472 if ((*src
== '\\') && (src
[1] == '\n')) {
473 if ((src
+ 2) == parsePtr
->end
) {
474 parsePtr
->incomplete
= 1;
476 Tcl_UtfBackslash(src
, &length
, utfBytes
);
482 if ((type
& terminators
) != 0) {
483 parsePtr
->term
= src
;
487 if (src
== parsePtr
->end
) {
490 if (src
[-1] == '"') {
491 parsePtr
->errorType
= TCL_PARSE_QUOTE_EXTRA
;
493 parsePtr
->errorType
= TCL_PARSE_BRACE_EXTRA
;
498 parsePtr
->commandSize
= src
- parsePtr
->commandStart
;
499 if (savedChar
!= 0) {
500 string
[numBytes
] = (char) savedChar
;
505 if (savedChar
!= 0) {
506 string
[numBytes
] = (char) savedChar
;
508 if (parsePtr
->commandStart
== NULL
) {
509 parsePtr
->commandStart
= string
;
511 parsePtr
->commandSize
= parsePtr
->term
- parsePtr
->commandStart
;
516 *----------------------------------------------------------------------
520 * This procedure forms the heart of the Tcl parser. It parses one
521 * or more tokens from a string, up to a termination point
522 * specified by the caller. This procedure is used to parse
523 * unquoted command words (those not in quotes or braces), words in
524 * quotes, and array indices for variables.
527 * Tokens are added to parsePtr and parsePtr->term is filled in
528 * with the address of the character that terminated the parse (the
529 * first one whose CHAR_TYPE matched mask or the character at
530 * parsePtr->end). The return value is TCL_OK if the parse
531 * completed successfully and TCL_ERROR otherwise.
536 *----------------------------------------------------------------------
540 parseTokens(src
, mask
, parsePtr
)
541 register char *src
; /* First character to parse. */
542 int mask
; /* Specifies when to stop parsing. The
543 * parse stops at the first unquoted
544 * character whose CHAR_TYPE contains
545 * any of the bits in mask. */
546 ElTclParse
*parsePtr
; /* Information about parse in progress.
547 * Updated with additional tokens and
548 * termination information. */
550 int type
, originalTokens
, varToken
;
551 char utfBytes
[TCL_UTF_MAX
];
552 ElTclToken
*tokenPtr
;
556 * Each iteration through the following loop adds one token of
557 * type TCL_TOKEN_TEXT, TCL_TOKEN_BS, TCL_TOKEN_COMMAND, or
558 * TCL_TOKEN_VARIABLE to parsePtr. For TCL_TOKEN_VARIABLE tokens,
559 * additional tokens are added for the parsed variable name.
562 originalTokens
= parsePtr
->numTokens
;
564 if (parsePtr
->numTokens
== parsePtr
->tokensAvailable
) {
565 expandTokenArray(parsePtr
);
567 tokenPtr
= &parsePtr
->tokenPtr
[parsePtr
->numTokens
];
568 tokenPtr
->start
= src
;
569 tokenPtr
->numComponents
= 0;
571 type
= CHAR_TYPE(*src
);
576 if ((type
& TYPE_SUBS
) == 0) {
578 * This is a simple range of characters. Scan to find the end
584 if (CHAR_TYPE(*src
) & (mask
| TYPE_SUBS
)) {
588 tokenPtr
->type
= TCL_TOKEN_TEXT
;
589 tokenPtr
->size
= src
- tokenPtr
->start
;
590 parsePtr
->numTokens
++;
591 } else if (*src
== '$') {
593 * This is a variable reference. Call ParseVarName to do
594 * all the dirty work of parsing the name.
597 varToken
= parsePtr
->numTokens
;
598 if (parseVarName(src
, parsePtr
->end
- src
, parsePtr
) != TCL_OK
) {
601 src
+= parsePtr
->tokenPtr
[varToken
].size
;
602 } else if (*src
== '[') {
604 * Command substitution. Call Tcl_ParseCommand recursively
605 * (and repeatedly) to parse the nested command(s), then
606 * throw away the parse information.
611 if (elTclParseCommand(src
, parsePtr
->end
- src
,
612 1, &nested
) != TCL_OK
) {
613 parsePtr
->errorType
= nested
.errorType
;
614 parsePtr
->term
= nested
.term
;
615 parsePtr
->incomplete
= nested
.incomplete
;
619 src
= nested
.commandStart
+ nested
.commandSize
;
620 if (nested
.tokenPtr
!= nested
.staticTokens
) {
621 ckfree((char *) nested
.tokenPtr
);
623 if ((*nested
.term
== ']') && !nested
.incomplete
) {
626 if (src
== parsePtr
->end
) {
627 parsePtr
->errorType
= TCL_PARSE_MISSING_BRACKET
;
628 parsePtr
->incomplete
= 1;
632 tokenPtr
->type
= TCL_TOKEN_COMMAND
;
633 tokenPtr
->size
= src
- tokenPtr
->start
;
634 parsePtr
->numTokens
++;
635 } else if (*src
== '\\') {
637 * Backslash substitution.
640 if (src
[1] == '\n') {
641 if ((src
+ 2) == parsePtr
->end
) {
642 parsePtr
->incomplete
= 1;
646 * Note: backslash-newline is special in that it is
647 * treated the same as a space character would be. This
648 * means that it could terminate the token.
651 if (mask
& TYPE_SPACE
) {
655 tokenPtr
->type
= TCL_TOKEN_BS
;
656 Tcl_UtfBackslash(src
, &tokenPtr
->size
, utfBytes
);
657 parsePtr
->numTokens
++;
658 src
+= tokenPtr
->size
;
659 } else if (*src
== 0) {
661 * We encountered a null character. If it is the null
662 * character at the end of the string, then return.
663 * Otherwise generate a text token for the single
667 if (src
== parsePtr
->end
) {
670 tokenPtr
->type
= TCL_TOKEN_TEXT
;
672 parsePtr
->numTokens
++;
675 elTclFreeParse(parsePtr
);
679 if (parsePtr
->numTokens
== originalTokens
) {
681 * There was nothing in this range of text. Add an empty token
682 * for the empty range, so that there is always at least one
686 tokenPtr
->type
= TCL_TOKEN_TEXT
;
688 parsePtr
->numTokens
++;
690 parsePtr
->term
= src
;
695 *----------------------------------------------------------------------
699 * This procedure is invoked to free any dynamic storage that may
700 * have been allocated by a previous call to Tcl_ParseCommand.
706 * If there is any dynamically allocated memory in *parsePtr,
709 *----------------------------------------------------------------------
713 elTclFreeParse(parsePtr
)
714 ElTclParse
*parsePtr
; /* Structure that was filled in by a
715 * previous call to Tcl_ParseCommand. */
717 if (parsePtr
->tokenPtr
!= parsePtr
->staticTokens
) {
718 ckfree((char *) parsePtr
->tokenPtr
);
719 parsePtr
->tokenPtr
= parsePtr
->staticTokens
;
724 *----------------------------------------------------------------------
726 * ExpandTokenArray --
728 * This procedure is invoked when the current space for tokens in
729 * a Tcl_Parse structure fills up; it allocates memory to grow the
736 * Memory is allocated for a new larger token array; the memory
737 * for the old array is freed, if it had been dynamically allocated.
739 *----------------------------------------------------------------------
743 expandTokenArray(parsePtr
)
744 ElTclParse
*parsePtr
; /* Parse structure whose token space
750 newCount
= parsePtr
->tokensAvailable
*2;
751 newPtr
= (ElTclToken
*) ckalloc((unsigned)(newCount
*sizeof(ElTclToken
)));
752 memcpy((VOID
*) newPtr
, (VOID
*) parsePtr
->tokenPtr
,
753 (size_t) (parsePtr
->tokensAvailable
* sizeof(ElTclToken
)));
754 if (parsePtr
->tokenPtr
!= parsePtr
->staticTokens
) {
755 ckfree((char *) parsePtr
->tokenPtr
);
757 parsePtr
->tokenPtr
= newPtr
;
758 parsePtr
->tokensAvailable
= newCount
;
762 *----------------------------------------------------------------------
766 * Given a string starting with a $ sign, parse off a variable
767 * name and return information about the parse.
770 * The return value is TCL_OK if the command was parsed successfully
771 * and TCL_ERROR otherwise. On a successful return, tokenPtr and
772 * numTokens fields of parsePtr are filled in with information about
773 * the variable name that was parsed. The "size" field of the first
774 * new token gives the total number of bytes in the variable name.
775 * Other fields in parsePtr are undefined.
778 * If there is insufficient space in parsePtr to hold all the
779 * information about the command, then additional space is
780 * malloc-ed. If the procedure returns TCL_OK then the caller must
781 * eventually invoke Tcl_FreeParse to release any additional space
782 * that was allocated.
784 *----------------------------------------------------------------------
788 parseVarName(string
, numBytes
, parsePtr
)
789 char *string
; /* String containing variable name. First
790 * character must be "$". */
791 int numBytes
; /* Total number of bytes in string. If < 0,
792 * the string consists of all bytes up to the
793 * first null character. */
794 ElTclParse
*parsePtr
; /* Structure to fill in with information
795 * about the variable name. */
797 ElTclToken
*tokenPtr
;
800 int varIndex
, offset
;
805 end
= string
+ numBytes
;
807 end
= string
+ strlen(string
);
811 * Generate one token for the variable, an additional token for the
812 * name, plus any number of additional tokens for the index, if
817 if ((parsePtr
->numTokens
+ 2) > parsePtr
->tokensAvailable
) {
818 expandTokenArray(parsePtr
);
820 tokenPtr
= &parsePtr
->tokenPtr
[parsePtr
->numTokens
];
821 tokenPtr
->type
= TCL_TOKEN_VARIABLE
;
822 tokenPtr
->start
= src
;
823 varIndex
= parsePtr
->numTokens
;
824 parsePtr
->numTokens
++;
828 goto justADollarSign
;
830 tokenPtr
->type
= TCL_TOKEN_TEXT
;
831 tokenPtr
->start
= src
;
832 tokenPtr
->numComponents
= 0;
835 * The name of the variable can have three forms:
836 * 1. The $ sign is followed by an open curly brace. Then
837 * the variable name is everything up to the next close
838 * curly brace, and the variable is a scalar variable.
839 * 2. The $ sign is not followed by an open curly brace. Then
840 * the variable name is everything up to the next
841 * character that isn't a letter, digit, or underscore.
842 * :: sequences are also considered part of the variable
843 * name, in order to support namespaces. If the following
844 * character is an open parenthesis, then the information
845 * between parentheses is the array element name.
846 * 3. The $ sign is followed by something that isn't a letter,
847 * digit, or underscore: in this case, there is no variable
848 * name and the token is just "$".
853 tokenPtr
->type
= TCL_TOKEN_TEXT
;
854 tokenPtr
->start
= src
;
855 tokenPtr
->numComponents
= 0;
858 parsePtr
->errorType
= TCL_PARSE_MISSING_VAR_BRACE
;
859 parsePtr
->incomplete
= 1;
867 tokenPtr
->size
= src
- tokenPtr
->start
;
868 tokenPtr
[-1].size
= src
- tokenPtr
[-1].start
;
869 parsePtr
->numTokens
++;
870 if (src
< end
) src
++;
872 tokenPtr
->type
= TCL_TOKEN_TEXT
;
873 tokenPtr
->start
= src
;
874 tokenPtr
->numComponents
= 0;
876 offset
= Tcl_UtfToUniChar(src
, &ch
);
877 c
= (unsigned char)(ch
);
878 if (isalnum(c
) || (c
== '_')) { /* INTL: ISO only, UCHAR. */
882 if ((c
== ':') && (((src
+1) != end
) && (src
[1] == ':'))) {
884 while ((src
!= end
) && (*src
== ':')) {
893 * Support for empty array names here.
895 array
= ((src
!= end
) && (*src
== '('));
896 tokenPtr
->size
= src
- tokenPtr
->start
;
897 if (tokenPtr
->size
== 0 && !array
) {
898 goto justADollarSign
;
900 parsePtr
->numTokens
++;
903 * This is a reference to an array element. Call
904 * ParseTokens recursively to parse the element name,
905 * since it could contain any number of substitutions.
908 if (parseTokens(src
+1, TYPE_CLOSE_PAREN
, parsePtr
)
912 if ((parsePtr
->term
== end
) || (*parsePtr
->term
!= ')')) {
913 parsePtr
->errorType
= TCL_PARSE_MISSING_PAREN
;
914 parsePtr
->incomplete
= 1;
915 src
= parsePtr
->term
;
917 src
= parsePtr
->term
+ 1;
920 tokenPtr
= &parsePtr
->tokenPtr
[varIndex
];
921 tokenPtr
->size
= src
- tokenPtr
->start
;
922 tokenPtr
->numComponents
= parsePtr
->numTokens
- (varIndex
+ 1);
926 * The dollar sign isn't followed by a variable name.
927 * replace the TCL_TOKEN_VARIABLE token with a
928 * TCL_TOKEN_TEXT token for the dollar sign.
932 tokenPtr
= &parsePtr
->tokenPtr
[varIndex
];
933 tokenPtr
->type
= TCL_TOKEN_TEXT
;
935 tokenPtr
->numComponents
= 0;
939 elTclFreeParse(parsePtr
);
945 *----------------------------------------------------------------------
949 * Given a string in braces such as a Tcl command argument or a string
950 * value in a Tcl expression, this procedure parses the string and
951 * returns information about the parse.
954 * The return value is TCL_OK if the string was parsed successfully
955 * and TCL_ERROR otherwise. On a successful return, tokenPtr and
956 * numTokens fields of parsePtr are filled in with information about
957 * the string that was parsed. Other fields in parsePtr are
958 * undefined. termPtr is set to point to the character just after
959 * the last one in the braced string.
962 * If there is insufficient space in parsePtr to hold all the
963 * information about the command, then additional space is
964 * malloc-ed. If the procedure returns TCL_OK then the caller must
965 * eventually invoke Tcl_FreeParse to release any additional space
966 * that was allocated.
968 *----------------------------------------------------------------------
972 parseBraces(string
, numBytes
, parsePtr
, termPtr
)
973 char *string
; /* String containing the string in braces.
974 * The first character must be '{'. */
975 int numBytes
; /* Total number of bytes in string. If < 0,
976 * the string consists of all bytes up to
977 * the first null character. */
978 register ElTclParse
*parsePtr
;
979 /* Structure to fill in with information
980 * about the string. */
981 char **termPtr
; /* If non-NULL, points to word in which to
982 * store a pointer to the character just
983 * after the terminating '}' if the parse
987 char utfBytes
[TCL_UTF_MAX
]; /* For result of backslash substitution. */
988 ElTclToken
*tokenPtr
;
989 register char *src
, *end
;
990 int startIndex
, level
, length
;
992 if ((numBytes
>= 0) || (string
== NULL
)) {
993 end
= string
+ numBytes
;
995 end
= string
+ strlen(string
);
999 startIndex
= parsePtr
->numTokens
;
1001 if (parsePtr
->numTokens
== parsePtr
->tokensAvailable
) {
1002 expandTokenArray(parsePtr
);
1004 tokenPtr
= &parsePtr
->tokenPtr
[startIndex
];
1005 tokenPtr
->type
= TCL_TOKEN_TEXT
;
1006 tokenPtr
->start
= src
;
1007 tokenPtr
->numComponents
= 0;
1010 while (CHAR_TYPE(*src
) == TYPE_NORMAL
) {
1019 } else if (*src
== '{') {
1022 } else if (*src
== '\\') {
1023 Tcl_UtfBackslash(src
, &length
, utfBytes
);
1024 if (src
[1] == '\n') {
1026 * A backslash-newline sequence must be collapsed, even
1027 * inside braces, so we have to split the word into
1028 * multiple tokens so that the backslash-newline can be
1029 * represented explicitly.
1032 if ((src
+ 2) == end
) {
1033 parsePtr
->incomplete
= 1;
1035 tokenPtr
->size
= (src
- tokenPtr
->start
);
1036 if (tokenPtr
->size
!= 0) {
1037 parsePtr
->numTokens
++;
1039 if ((parsePtr
->numTokens
+1) >= parsePtr
->tokensAvailable
) {
1040 expandTokenArray(parsePtr
);
1042 tokenPtr
= &parsePtr
->tokenPtr
[parsePtr
->numTokens
];
1043 tokenPtr
->type
= TCL_TOKEN_BS
;
1044 tokenPtr
->start
= src
;
1045 tokenPtr
->size
= length
;
1046 tokenPtr
->numComponents
= 0;
1047 parsePtr
->numTokens
++;
1051 tokenPtr
->type
= TCL_TOKEN_TEXT
;
1052 tokenPtr
->start
= src
;
1053 tokenPtr
->numComponents
= 0;
1057 } else if (src
== end
) {
1058 parsePtr
->errorType
= TCL_PARSE_MISSING_BRACE
;
1059 parsePtr
->incomplete
= 1;
1067 * Decide if we need to finish emitting a partially-finished token.
1068 * There are 3 cases:
1069 * {abc \newline xyz} or {xyz} - finish emitting "xyz" token
1070 * {abc \newline} - don't emit token after \newline
1071 * {} - finish emitting zero-sized token
1072 * The last case ensures that there is a token (even if empty) that
1073 * describes the braced string.
1076 if ((src
!= tokenPtr
->start
)
1077 || (parsePtr
->numTokens
== startIndex
)) {
1078 tokenPtr
->size
= (src
- tokenPtr
->start
);
1079 parsePtr
->numTokens
++;
1081 if (termPtr
!= NULL
) {
1089 *----------------------------------------------------------------------
1091 * ParseQuotedString --
1093 * Given a double-quoted string such as a quoted Tcl command argument
1094 * or a quoted value in a Tcl expression, this procedure parses the
1095 * string and returns information about the parse.
1098 * The return value is TCL_OK if the string was parsed successfully
1099 * and TCL_ERROR otherwise. On a successful return, tokenPtr and
1100 * numTokens fields of parsePtr are filled in with information about
1101 * the string that was parsed. Other fields in parsePtr are
1102 * undefined. termPtr is set to point to the character just after
1103 * the quoted string's terminating close-quote.
1106 * If there is insufficient space in parsePtr to hold all the
1107 * information about the command, then additional space is
1108 * malloc-ed. If the procedure returns TCL_OK then the caller must
1109 * eventually invoke Tcl_FreeParse to release any additional space
1110 * that was allocated.
1112 *----------------------------------------------------------------------
1116 parseQuotedString(string
, numBytes
, parsePtr
, termPtr
)
1117 char *string
; /* String containing the quoted string.
1118 * The first character must be '"'. */
1119 int numBytes
; /* Total number of bytes in string. If < 0,
1120 * the string consists of all bytes up to
1121 * the first null character. */
1122 register ElTclParse
*parsePtr
;
1123 /* Structure to fill in with information
1124 * about the string. */
1125 char **termPtr
; /* If non-NULL, points to word in which to
1126 * store a pointer to the character just
1127 * after the quoted string's terminating
1128 * close-quote if the parse succeeds. */
1132 if ((numBytes
>= 0) || (string
== NULL
)) {
1133 end
= string
+ numBytes
;
1135 end
= string
+ strlen(string
);
1138 if (parseTokens(string
+1, TYPE_QUOTE
, parsePtr
) != TCL_OK
) {
1141 if (*parsePtr
->term
!= '"') {
1142 parsePtr
->errorType
= TCL_PARSE_MISSING_QUOTE
;
1143 parsePtr
->incomplete
= 1;
1144 if (termPtr
!= NULL
) {
1145 *termPtr
= parsePtr
->term
;
1148 if (termPtr
!= NULL
) {
1149 *termPtr
= (parsePtr
->term
+ 1);
1155 elTclFreeParse(parsePtr
);