cleanup, add example output
[sv2nmigen.git] / lexor.py
1 """
2 %{
3 /*
4 * Copyright (c) 1998-2017 Stephen Williams (steve@icarus.com)
5 *
6 * This source code is free software; you can redistribute it
7 * and/or modify it in source code form under the terms of the GNU
8 * General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
10 * any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 */
21 """
22
23 lex_debug = 0
24
25 from ply import lex
26
27 #DOCSTRING REMOVED
28
29 states = (#('module', 'exclusive'),
30 ('timescale', 'exclusive'),)
31
32 from parse_tokens import tokens
33 tokens += ['timescale', 'LITERAL', 'IDENTIFIER', 'DEC_NUMBER', 'BASED_NUMBER',
34 'UNBASED_NUMBER']
35
36 def t_ccomment(t):
37 r'/\*(.|\n)*?\*/'
38 t.lexer.lineno += t.value.count('\n')
39
40 t_ignore_cppcomment = r'//.*'
41
42 t_ignore = ' \t\n'
43
44 t_K_PSTAR = r"\(\*"
45 t_K_STARP = r"\*\)"
46 t_K_DOTSTAR = r"\.\*"
47 t_K_LS = r"(<<|<<<)"
48 t_K_RS = r">>"
49 t_K_RSS = r">>>"
50 t_K_POW = r"\*\*"
51 t_K_LE = r"<="
52 t_K_GE = r">="
53 t_K_EG = r"=>"
54 """
55 "+=>"|"-=>" {
56 /*
57 * Resolve the ambiguity between the += assignment
58 * operator and +=> polarity edge path operator
59 *
60 * +=> should be treated as two separate tokens '+' and
61 * '=>' (K_EG), therefore we only consume the first
62 * character of the matched pattern i.e. either + or -
63 * and push back the rest of the matches text (=>) in
64 * the input stream.
65 */
66 yyless(1);
67 return yytext[0];
68 }
69 """
70 t_K_SG = r"\*>"
71 t_K_EQ = r"=="
72 t_K_NE = r"!="
73 t_K_CEQ = r"==="
74 t_K_CNE = r"!=="
75 t_K_WEQ = r"==\?"
76 t_K_WNE = r"!=\?"
77 t_K_LOR = r"\|\|"
78 t_K_LAND = r"\&\&"
79 t_K_TAND = r"\&\&\&"
80 t_K_NOR = r"\~\|"
81 t_K_NXOR = r"(\~\^|\^\~)"
82 t_K_NAND = r"\~\&"
83 t_K_TRIGGER = r"\->"
84 t_K_PO_POS = r"\+:"
85 t_K_PO_NEG = r"\-:"
86 t_K_CONTRIBUTE = r"<\+"
87 t_K_PLUS_EQ = r"\+="
88 t_K_MINUS_EQ = r"\-="
89 t_K_MUL_EQ = r"\*="
90 t_K_DIV_EQ = r"\/="
91 t_K_MOD_EQ = r"\%="
92 t_K_AND_EQ = r"\&="
93 t_K_OR_EQ = r"\|="
94 t_K_XOR_EQ = r"\^="
95 t_K_LS_EQ = r"(<<=|<<<=)"
96 t_K_RS_EQ = r">>="
97 t_K_RSS_EQ = r">>>="
98 t_K_INCR = r"\+\+"
99 t_K_DECR = r"\\--"
100 t_K_LP = r"\'\{"
101 t_K_SCOPE_RES = r"::"
102
103 tokens += [ 'K_PSTAR', 'K_STARP', 'K_DOTSTAR', 'K_LS',
104 'K_RS', 'K_RSS', 'K_POW', 'K_LE', 'K_GE', 'K_EG', 'K_SG',
105 'K_EQ', 'K_NE', 'K_CEQ', 'K_CNE', 'K_WEQ', 'K_WNE',
106 'K_LOR', 'K_LAND', 'K_TAND', 'K_NOR', 'K_NXOR',
107 'K_NAND', 'K_TRIGGER', 'K_PO_POS', 'K_PO_NEG', 'K_CONTRIBUTE',
108 'K_PLUS_EQ', 'K_MINUS_EQ', 'K_MUL_EQ', 'K_DIV_EQ', 'K_MOD_EQ',
109 'K_AND_EQ', 'K_OR_EQ', 'K_XOR_EQ', 'K_LS_EQ', 'K_RS_EQ',
110 'K_RSS_EQ', 'K_INCR', 'K_DECR', 'K_LP',
111 'K_SCOPE_RES'
112 ]
113
114 lexor_keyword_code = {
115 "above" : 'K_above',
116 "abs" : 'K_abs',
117 "absdelay" : 'K_absdelay',
118 "abstol" : 'K_abstol',
119 "accept_on" : 'K_accept_on',
120 "access" : 'K_access',
121 "acos" : 'K_acos',
122 "acosh" : 'K_acosh',
123 "ac_stim" : 'K_ac_stim',
124 "alias" : 'K_alias',
125 "aliasparam" : 'K_aliasparam',
126 "always" : 'K_always',
127 "always_comb" : 'K_always_comb',
128 "always_ff" : 'K_always_ff',
129 "always_latch" : 'K_always_latch',
130 "analog" : 'K_analog',
131 "analysis" : 'K_analysis',
132 "and" : 'K_and',
133 "asin" : 'K_asin',
134 "asinh" : 'K_asinh',
135 "assert" : 'K_assert',
136 "assign" : 'K_assign',
137 "assume" : 'K_assume',
138 "atan" : 'K_atan',
139 "atan2" : 'K_atan2',
140 "atanh" : 'K_atanh',
141 "automatic" : 'K_automatic',
142 "before" : 'K_before',
143 "begin" : 'K_begin',
144 "bind" : 'K_bind',
145 "bins" : 'K_bins',
146 "binsof" : 'K_binsof',
147 "bit" : 'K_bit',
148 "branch" : 'K_branch',
149 "break" : 'K_break',
150 "bool" : 'K_bool',
151 "buf" : 'K_buf',
152 "bufif0" : 'K_bufif0',
153 "bufif1" : 'K_bufif1',
154 "byte" : 'K_byte',
155 "case" : 'K_case',
156 "casex" : 'K_casex',
157 "casez" : 'K_casez',
158 "ceil" : 'K_ceil',
159 "cell" : 'K_cell',
160 "chandle" : 'K_chandle',
161 "checker" : 'K_checker',
162 "class" : 'K_class',
163 "clocking" : 'K_clocking',
164 "cmos" : 'K_cmos',
165 "config" : 'K_config',
166 "connect" : 'K_connect',
167 "connectmodule" : 'K_connectmodule',
168 "connectrules" : 'K_connectrules',
169 "const" : 'K_const',
170 "constraint" : 'K_constraint',
171 "context" : 'K_context',
172 "continue" : 'K_continue',
173 "continuous" : 'K_continuous',
174 "cos" : 'K_cos',
175 "cosh" : 'K_cosh',
176 "cover" : 'K_cover',
177 "covergroup" : 'K_covergroup',
178 "coverpoint" : 'K_coverpoint',
179 "cross" : 'K_cross',
180 "ddt" : 'K_ddt',
181 "ddt_nature" : 'K_ddt_nature',
182 "ddx" : 'K_ddx',
183 "deassign" : 'K_deassign',
184 "default" : 'K_default',
185 "defparam" : 'K_defparam',
186 "design" : 'K_design',
187 "disable" : 'K_disable',
188 "discipline" : 'K_discipline',
189 "discrete" : 'K_discrete',
190 "dist" : 'K_dist',
191 "do" : 'K_do',
192 "domain" : 'K_domain',
193 "driver_update" : 'K_driver_update',
194 "edge" : 'K_edge',
195 "else" : 'K_else',
196 "end" : 'K_end',
197 "endcase" : 'K_endcase',
198 "endchecker" : 'K_endchecker',
199 "endconfig" : 'K_endconfig',
200 "endclass" : 'K_endclass',
201 "endclocking" : 'K_endclocking',
202 "endconnectrules" : 'K_endconnectrules',
203 "enddiscipline" : 'K_enddiscipline',
204 "endfunction" : 'K_endfunction',
205 "endgenerate" : 'K_endgenerate',
206 "endgroup" : 'K_endgroup',
207 "endinterface" : 'K_endinterface',
208 "endmodule" : 'K_endmodule',
209 "endnature" : 'K_endnature',
210 "endpackage" : 'K_endpackage',
211 "endparamset" : 'K_endparamset',
212 "endprimitive" : 'K_endprimitive',
213 "endprogram" : 'K_endprogram',
214 "endproperty" : 'K_endproperty',
215 "endspecify" : 'K_endspecify',
216 "endsequence" : 'K_endsequence',
217 "endtable" : 'K_endtable',
218 "endtask" : 'K_endtask',
219 "enum" : 'K_enum',
220 "event" : 'K_event',
221 "eventually" : 'K_eventually',
222 "exclude" : 'K_exclude',
223 "exp" : 'K_exp',
224 "expect" : 'K_expect',
225 "export" : 'K_export',
226 "extends" : 'K_extends',
227 "extern" : 'K_extern',
228 "final" : 'K_final',
229 "final_step" : 'K_final_step',
230 "first_match" : 'K_first_match',
231 "flicker_noise" : 'K_flicker_noise',
232 "floor" : 'K_floor',
233 "flow" : 'K_flow',
234 "for" : 'K_for',
235 "foreach" : 'K_foreach',
236 "force" : 'K_force',
237 "forever" : 'K_forever',
238 "fork" : 'K_fork',
239 "forkjoin" : 'K_forkjoin',
240 "from" : 'K_from',
241 "function" : 'K_function',
242 "generate" : 'K_generate',
243 "genvar" : 'K_genvar',
244 "global" : 'K_global',
245 "ground" : 'K_ground',
246 "highz0" : 'K_highz0',
247 "highz1" : 'K_highz1',
248 "hypot" : 'K_hypot',
249 "idt" : 'K_idt',
250 "idtmod" : 'K_idtmod',
251 "idt_nature" : 'K_idt_nature',
252 "if" : 'K_if',
253 "iff" : 'K_iff',
254 "ifnone" : 'K_ifnone',
255 "ignore_bins" : 'K_ignore_bins',
256 "illegal_bins" : 'K_illegal_bins',
257 "implies" : 'K_implies',
258 "implements" : 'K_implements',
259 "import" : 'K_import',
260 "incdir" : 'K_incdir',
261 "include" : 'K_include',
262 "inf" : 'K_inf',
263 "initial" : 'K_initial',
264 "initial_step" : 'K_initial_step',
265 "inout" : 'K_inout',
266 "input" : 'K_input',
267 "inside" : 'K_inside',
268 "instance" : 'K_instance',
269 "int" : 'K_int',
270 "integer" : 'K_integer',
271 "interconnect" : 'K_interconnect',
272 "interface" : 'K_interface',
273 "intersect" : 'K_intersect',
274 "join" : 'K_join',
275 "join_any" : 'K_join_any',
276 "join_none" : 'K_join_none',
277 "laplace_nd" : 'K_laplace_nd',
278 "laplace_np" : 'K_laplace_np',
279 "laplace_zd" : 'K_laplace_zd',
280 "laplace_zp" : 'K_laplace_zp',
281 "large" : 'K_large',
282 "last_crossing" : 'K_last_crossing',
283 "let" : 'K_let',
284 "liblist" : 'K_liblist',
285 "library" : 'K_library',
286 "limexp" : 'K_limexp',
287 "ln" : 'K_ln',
288 "local" : 'K_local',
289 "localparam" : 'K_localparam',
290 "log" : 'K_log',
291 # This is defined by SystemVerilog 1800-2005 and as an Icarus extension.'
292 "logic" : 'K_logic',
293 "longint" : 'K_longint',
294 "macromodule" : 'K_macromodule',
295 "matches" : 'K_matches',
296 "max" : 'K_max',
297 "medium" : 'K_medium',
298 "merged" : 'K_merged',
299 "min" : 'K_min',
300 "modport" : 'K_modport',
301 "module" : 'K_module',
302 "nand" : 'K_nand',
303 "nature" : 'K_nature',
304 "negedge" : 'K_negedge',
305 "net_resolution" : 'K_net_resolution',
306 "nettype" : 'K_nettype',
307 "new" : 'K_new',
308 "nexttime" : 'K_nexttime',
309 "nmos" : 'K_nmos',
310 "noise_table" : 'K_noise_table',
311 "nor" : 'K_nor',
312 "noshowcancelled" : 'K_noshowcancelled',
313 "not" : 'K_not',
314 "notif0" : 'K_notif0',
315 "notif1" : 'K_notif1',
316 "null" : 'K_null',
317 "or" : 'K_or',
318 "output" : 'K_output',
319 "package" : 'K_package',
320 "packed" : 'K_packed',
321 "parameter" : 'K_parameter',
322 "paramset" : 'K_paramset',
323 "pmos" : 'K_pmos',
324 "posedge" : 'K_posedge',
325 "potential" : 'K_potential',
326 "pow" : 'K_pow',
327 "primitive" : 'K_primitive',
328 "priority" : 'K_priority',
329 "program" : 'K_program',
330 "property" : 'K_property',
331 "protected" : 'K_protected',
332 "pull0" : 'K_pull0',
333 "pull1" : 'K_pull1',
334 "pulldown" : 'K_pulldown',
335 "pullup" : 'K_pullup',
336 "pulsestyle_onevent" : 'K_pulsestyle_onevent',
337 "pulsestyle_ondetect" : 'K_pulsestyle_ondetect',
338 "pure" : 'K_pure',
339 "rand" : 'K_rand',
340 "randc" : 'K_randc',
341 "randcase" : 'K_randcase',
342 "randsequence" : 'K_randsequence',
343 "rcmos" : 'K_rcmos',
344 "real" : 'K_real',
345 "realtime" : 'K_realtime',
346 "ref" : 'K_ref',
347 "reg" : 'K_reg',
348 "reject_on" : 'K_reject_on',
349 "release" : 'K_release',
350 "repeat" : 'K_repeat',
351 "resolveto" : 'K_resolveto',
352 "restrict" : 'K_restrict',
353 "return" : 'K_return',
354 "rnmos" : 'K_rnmos',
355 "rpmos" : 'K_rpmos',
356 "rtran" : 'K_rtran',
357 "rtranif0" : 'K_rtranif0',
358 "rtranif1" : 'K_rtranif1',
359 "s_always" : 'K_s_always',
360 "s_eventually" : 'K_s_eventually',
361 "s_nexttime" : 'K_s_nexttime',
362 "s_until" : 'K_s_until',
363 "s_until_with" : 'K_s_until_with',
364 "scalared" : 'K_scalared',
365 "sequence" : 'K_sequence',
366 "shortint" : 'K_shortint',
367 "shortreal" : 'K_shortreal',
368 "showcancelled" : 'K_showcancelled',
369 "signed" : 'K_signed',
370 "sin" : 'K_sin',
371 "sinh" : 'K_sinh',
372 "slew" : 'K_slew',
373 "small" : 'K_small',
374 "soft" : 'K_soft',
375 "solve" : 'K_solve',
376 "specify" : 'K_specify',
377 "specparam" : 'K_specparam',
378 "split" : 'K_split',
379 "sqrt" : 'K_sqrt',
380 "static" : 'K_static',
381 # This is defined by both SystemVerilog 1800-2005 and Verilog-AMS 2.3',
382 "string" : 'K_string',
383 "strong" : 'K_strong',
384 "strong0" : 'K_strong0',
385 "strong1" : 'K_strong1',
386 "struct" : 'K_struct',
387 "super" : 'K_super',
388 "supply0" : 'K_supply0',
389 "supply1" : 'K_supply1',
390 "sync_accept_on" : 'K_sync_accept_on',
391 "sync_reject_on" : 'K_sync_reject_on',
392 "table" : 'K_table',
393 "tagged" : 'K_tagged',
394 "tan" : 'K_tan',
395 "tanh" : 'K_tanh',
396 "task" : 'K_task',
397 "this" : 'K_this',
398 "throughout" : 'K_throughout',
399 "time" : 'K_time',
400 "timeprecision" : 'K_timeprecision',
401 "timer" : 'K_timer',
402 "timeunit" : 'K_timeunit',
403 "tran" : 'K_tran',
404 "tranif0" : 'K_tranif0',
405 "tranif1" : 'K_tranif1',
406 "transition" : 'K_transition',
407 "tri" : 'K_tri',
408 "tri0" : 'K_tri0',
409 "tri1" : 'K_tri1',
410 "triand" : 'K_triand',
411 "trior" : 'K_trior',
412 "trireg" : 'K_trireg',
413 "type" : 'K_type',
414 "typedef" : 'K_typedef',
415 "union" : 'K_union',
416 "unique" : 'K_unique',
417 "unique0" : 'K_unique',
418 "units" : 'K_units',
419 # Reserved for future use!',
420 "unsigned" : 'K_unsigned',
421 "until" : 'K_until',
422 "until_with" : 'K_until_with',
423 "untyped" : 'K_untyped',
424 "use" : 'K_use',
425 "uwire" : 'K_uwire',
426 "var" : 'K_var',
427 "vectored" : 'K_vectored',
428 "virtual" : 'K_virtual',
429 "void" : 'K_void',
430 "wait" : 'K_wait',
431 "wait_order" : 'K_wait_order',
432 "wand" : 'K_wand',
433 "weak" : 'K_weak',
434 "weak0" : 'K_weak0',
435 "weak1" : 'K_weak1',
436 "while" : 'K_while',
437 "white_noise" : 'K_white_noise',
438 "wildcard" : 'K_wildcard',
439 "wire" : 'K_wire',
440 "with" : 'K_with',
441 "within" : 'K_within',
442 # This is the name originally proposed for uwire and is deprecated!',
443 "wone" : 'K_wone',
444 "wor" : 'K_wor',
445 # This is defined by Verilog-AMS 2.3 and as an Icarus extension.',
446 "wreal" : 'K_wreal',
447 "xnor" : 'K_xnor',
448 "xor" : 'K_xor',
449 "zi_nd" : 'K_zi_nd',
450 "zi_np" : 'K_zi_np',
451 "zi_zd" : 'K_zi_zd',
452 "zi_zp" : 'K_zi_zp',
453 }
454
455 literals = [ '[', '}', '{', ';', ':', '[', ']', ',', '(', ')',
456 '#', '=', '.', '@', '&', '!', '?', '<', '>', '%',
457 '|', '^', '~', '+', '*', '/', '-']
458
459 """
460 def t_module_end(t):
461 r'endmodule'
462 code = t.lexer.lexdata[t.modulestart:t.lexpos]
463 t.type = 'INITIAL'
464 t.value = code
465 t.lexer.lineno += t.value.count('\n')
466 return t
467
468 t_module_ignore = ' \t'
469 """
470
471 def t_LITERAL(t):
472 r'[a-zA-Z_$][a-zA-Z0-9$_]*'
473 word = t.value
474 keyword = lexor_keyword_code.get(t.value, 'IDENTIFIER')
475 if(lex_debug): print ("literal", word,keyword)
476 #if keyword in ['K_module', 'K_macromodule']:
477 # t.lexer.modulestart = t.lexpos+len(t.value)
478 # t.lexer.begin('module')
479 if keyword == 'IDENTIFIER':
480 t.type = 'IDENTIFIER'
481 t.value = word
482 return t
483 t.type = keyword
484 return t
485
486 def t_dec_number(t):
487 r'\'[sS]?[dD][ \t]*[0-9][0-9_]*'
488 t.type = 'BASED_NUMBER'
489 #t.value = word # make_unsized_dec(yytext);
490 return t
491
492 def t_undef_highz_dec(t):
493 r'\'[sS]?[dD][ \t]*[xzXZ?]_*'
494 t.type = 'BASED_NUMBER'
495 #t.value = word # make_undef_highz_dec(yytext);
496 return t
497
498 def t_based_make_unsized_binary(t):
499 r'\'[sS]?[bB][ \t]*[0-1xzXZ?][0-1xzXZ?_]*'
500 t.type = 'BASED_NUMBER'
501 #t.value = word # make_unsized_binary(yytext);
502 return t
503
504 def t_make_unsized_octal(t):
505 r'\'[sS]?[oO][ \t]*[0-7xzXZ?][0-7xzXZ?_]*'
506 t.type = 'BASED_NUMBER'
507 #t.value = word # make_unsized_octal(yytext);
508 return t
509
510 def t_make_unsized_hex(t):
511 r'\'[sS]?[hH][ \t]*[0-9a-fA-FxzXZ?][0-9a-fA-FxzXZ?_]*'
512 t.type = 'BASED_NUMBER'
513 #t.value = word # make_unsized_hex(yytext);
514 return t
515
516 def t_unbased_make_unsized_binary(t):
517 r'\'[01xzXZ]'
518 t.type = 'UNBASED_NUMBER'
519 #t.value = word # make_unsized_binary(yytext);
520 return t
521
522 """
523 /* Decimal numbers are the usual. But watch out for the UDPTABLE
524 mode, where there are no decimal numbers. Reject the match if we
525 are in the UDPTABLE state. */
526 """
527 def t_make_unsized_dec(t):
528 r'[0-9][0-9_]*'
529 t.type = 'DEC_NUMBER'
530 #t.value = word # make_unsized_dec(yytext);
531 #based_size = yylval.number->as_ulong();
532 return t
533
534 """
535 /* Notice and handle the `timescale directive. */
536 """
537
538 def t_timescale(t):
539 #r'^{W}?`timescale'
540 r'`timescale'
541 t.lexer.timestart = t.lexpos+len(t.value)
542 t.lexer.push_state('timescale')
543
544 #t_timescale_ignore_toeol = r'.+\n'
545 t_timescale_ignore = ' \t'
546 #t_timescale_ignore_whitespace = r'\s+'
547 #t_code_ignore = ""
548
549 def t_timescale_end(t):
550 r'.+\n'
551 code = t.lexer.lexdata[t.lexer.timestart:t.lexpos]
552 t.type = 'timescale'
553 t.value = code
554 t.lexer.pop_state()
555 print ("match", code)
556 return t
557
558 def t_timescale_error(t):
559 print("%d: Timescale error '%s'" % (t.lexer.lineno, t.value[0]))
560 print(t.value)
561 raise RuntimeError
562
563 """
564 def t_module_error(t):
565 print("%d: Module error '%s'" % (t.lexer.lineno, t.value[0]))
566 print(t.value)
567 raise RuntimeError
568 """
569
570 def t_error(t):
571 print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]))
572 print(t.value)
573 t.lexer.skip(1)
574
575 tokens = list(set(tokens))
576
577 lex.lex()
578
579 if __name__ == '__main__':
580 lex.runmain()
581