add support for syntax "do i = N to N" equivalent to for
[soc.git] / src / soc / decoder / pseudo / parser.py
1 # Based on GardenSnake - a parser generator demonstration program
2 # GardenSnake was released into the Public Domain by Andrew Dalke.
3
4 # Portions of this work are derived from Python's Grammar definition
5 # and may be covered under the Python copyright and license
6 #
7 # Andrew Dalke / Dalke Scientific Software, LLC
8 # 30 August 2006 / Cape Town, South Africa
9
10 # Modifications for inclusion in PLY distribution
11 from pprint import pprint
12 from ply import lex, yacc
13 import astor
14
15 from soc.decoder.power_decoder import create_pdecode
16 from soc.decoder.pseudo.lexer import IndentLexer
17 from soc.decoder.orderedset import OrderedSet
18
19 # I use the Python AST
20 #from compiler import ast
21 import ast
22
23 # Helper function
24
25
26 def Assign(left, right):
27 names = []
28 print("Assign", left, right)
29 if isinstance(left, ast.Name):
30 # Single assignment on left
31 # XXX when doing IntClass, which will have an "eq" function,
32 # this is how to access it
33 # eq = ast.Attribute(left, "eq") # get eq fn
34 # return ast.Call(eq, [right], []) # now call left.eq(right)
35 return ast.Assign([ast.Name(left.id, ast.Store())], right)
36 elif isinstance(left, ast.Tuple):
37 # List of things - make sure they are Name nodes
38 names = []
39 for child in left.getChildren():
40 if not isinstance(child, ast.Name):
41 raise SyntaxError("that assignment not supported")
42 names.append(child.name)
43 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
44 return ast.Assign([ast.AssTuple(ass_list)], right)
45 elif isinstance(left, ast.Subscript):
46 return ast.Assign([left], right)
47 # XXX HMMM probably not needed...
48 ls = left.slice
49 if isinstance(ls, ast.Slice):
50 lower, upper, step = ls.lower, ls.upper, ls.step
51 print("slice assign", lower, upper, step)
52 if step is None:
53 ls = (lower, upper, None)
54 else:
55 ls = (lower, upper, step)
56 ls = ast.Tuple(ls)
57 return ast.Call(ast.Name("selectassign"),
58 [left.value, ls, right], [])
59 else:
60 print("Assign fail")
61 raise SyntaxError("Can't do that yet")
62
63
64 # I implemented INDENT / DEDENT generation as a post-processing filter
65
66 # The original lex token stream contains WS and NEWLINE characters.
67 # WS will only occur before any other tokens on a line.
68
69 # I have three filters. One tags tokens by adding two attributes.
70 # "must_indent" is True if the token must be indented from the
71 # previous code. The other is "at_line_start" which is True for WS
72 # and the first non-WS/non-NEWLINE on a line. It flags the check so
73 # see if the new line has changed indication level.
74
75
76 # No using Python's approach because Ply supports precedence
77
78 # comparison: expr (comp_op expr)*
79 # arith_expr: term (('+'|'-') term)*
80 # term: factor (('*'|'/'|'%'|'//') factor)*
81 # factor: ('+'|'-'|'~') factor | power
82 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
83
84 def make_le_compare(arg):
85 (left, right) = arg
86 return ast.Compare(left, [ast.LtE()], [right])
87
88
89 def make_ge_compare(arg):
90 (left, right) = arg
91 return ast.Compare(left, [ast.GtE()], [right])
92
93
94 def make_lt_compare(arg):
95 (left, right) = arg
96 return ast.Compare(left, [ast.Lt()], [right])
97
98
99 def make_gt_compare(arg):
100 (left, right) = arg
101 return ast.Compare(left, [ast.Gt()], [right])
102
103
104 def make_eq_compare(arg):
105 (left, right) = arg
106 return ast.Compare(left, [ast.Eq()], [right])
107
108
109 binary_ops = {
110 "^": ast.BitXor(),
111 "&": ast.BitAnd(),
112 "|": ast.BitOr(),
113 "+": ast.Add(),
114 "-": ast.Sub(),
115 "*": ast.Mult(),
116 "/": ast.Div(),
117 "%": ast.Mod(),
118 "<=": make_le_compare,
119 ">=": make_ge_compare,
120 "<": make_lt_compare,
121 ">": make_gt_compare,
122 "=": make_eq_compare,
123 }
124 unary_ops = {
125 "+": ast.UAdd(),
126 "-": ast.USub(),
127 "¬": ast.Invert(),
128 }
129
130
131 def check_concat(node): # checks if the comparison is already a concat
132 print("check concat", node)
133 if not isinstance(node, ast.Call):
134 return [node]
135 print("func", node.func.id)
136 if node.func.id != 'concat':
137 return [node]
138 if node.keywords: # a repeated list-constant, don't optimise
139 return [node]
140 return node.args
141
142
143 # identify SelectableInt pattern
144 def identify_sint_mul_pattern(p):
145 if not isinstance(p[3], ast.Constant):
146 return False
147 if not isinstance(p[1], ast.List):
148 return False
149 l = p[1].elts
150 if len(l) != 1:
151 return False
152 elt = l[0]
153 return isinstance(elt, ast.Constant)
154
155 def apply_trailer(atom, trailer):
156 if trailer[0] == "TLIST":
157 # assume depth of one
158 atom = apply_trailer(atom, trailer[1])
159 trailer = trailer[2]
160 if trailer[0] == "CALL":
161 #p[0] = ast.Expr(ast.Call(p[1], p[2][1], []))
162 return ast.Call(atom, trailer[1], [])
163 # if p[1].id == 'print':
164 # p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)
165 # else:
166 # p[0] = ast.CallFunc(p[1], p[2][1], None, None)
167 else:
168 print("subscript atom", trailer[1])
169 #raise AssertionError("not implemented %s" % p[2][0])
170 subs = trailer[1]
171 if len(subs) == 1:
172 idx = subs[0]
173 else:
174 idx = ast.Slice(subs[0], subs[1], None)
175 return ast.Subscript(atom, idx, ast.Load())
176
177 ########## Parser (tokens -> AST) ######
178
179 # also part of Ply
180 #import yacc
181
182 # https://www.mathcs.emory.edu/~valerie/courses/fall10/155/resources/op_precedence.html
183 # python operator precedence
184 # Highest precedence at top, lowest at bottom.
185 # Operators in the same box evaluate left to right.
186 #
187 # Operator Description
188 # () Parentheses (grouping)
189 # f(args...) Function call
190 # x[index:index] Slicing
191 # x[index] Subscription
192 # x.attribute Attribute reference
193 # ** Exponentiation
194 # ~x Bitwise not
195 # +x, -x Positive, negative
196 # *, /, % mul, div, remainder
197 # +, - Addition, subtraction
198 # <<, >> Bitwise shifts
199 # & Bitwise AND
200 # ^ Bitwise XOR
201 # | Bitwise OR
202 # in, not in, is, is not, <, <=, >, >=, <>, !=, == comp, membership, ident
203 # not x Boolean NOT
204 # and Boolean AND
205 # or Boolean OR
206 # lambda Lambda expression
207
208 class PowerParser:
209
210 precedence = (
211 ("left", "EQ", "GT", "LT", "LE", "GE", "LTU", "GTU"),
212 ("left", "BITOR"),
213 ("left", "BITXOR"),
214 ("left", "BITAND"),
215 ("left", "PLUS", "MINUS"),
216 ("left", "MULT", "DIV", "MOD"),
217 ("left", "INVERT"),
218 )
219
220 def __init__(self):
221 self.gprs = {}
222 for rname in ['RA', 'RB', 'RC', 'RT', 'RS']:
223 self.gprs[rname] = None
224 self.read_regs = OrderedSet()
225 self.uninit_regs = OrderedSet()
226 self.write_regs = OrderedSet()
227
228 # The grammar comments come from Python's Grammar/Grammar file
229
230 # NB: compound_stmt in single_input is followed by extra NEWLINE!
231 # file_input: (NEWLINE | stmt)* ENDMARKER
232
233 def p_file_input_end(self, p):
234 """file_input_end : file_input ENDMARKER"""
235 print("end", p[1])
236 p[0] = p[1]
237
238 def p_file_input(self, p):
239 """file_input : file_input NEWLINE
240 | file_input stmt
241 | NEWLINE
242 | stmt"""
243 if isinstance(p[len(p)-1], str):
244 if len(p) == 3:
245 p[0] = p[1]
246 else:
247 p[0] = [] # p == 2 --> only a blank line
248 else:
249 if len(p) == 3:
250 p[0] = p[1] + p[2]
251 else:
252 p[0] = p[1]
253
254 # funcdef: [decorators] 'def' NAME parameters ':' suite
255 # ignoring decorators
256
257 def p_funcdef(self, p):
258 "funcdef : DEF NAME parameters COLON suite"
259 p[0] = ast.FunctionDef(p[2], p[3], p[5], ())
260
261 # parameters: '(' [varargslist] ')'
262 def p_parameters(self, p):
263 """parameters : LPAR RPAR
264 | LPAR varargslist RPAR"""
265 if len(p) == 3:
266 args = []
267 else:
268 args = p[2]
269 p[0] = ast.arguments(args=args, vararg=None, kwarg=None, defaults=[])
270
271 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
272 # '**' NAME) |
273 # highly simplified
274
275 def p_varargslist(self, p):
276 """varargslist : varargslist COMMA NAME
277 | NAME"""
278 if len(p) == 4:
279 p[0] = p[1] + p[3]
280 else:
281 p[0] = [p[1]]
282
283 # stmt: simple_stmt | compound_stmt
284 def p_stmt_simple(self, p):
285 """stmt : simple_stmt"""
286 # simple_stmt is a list
287 p[0] = p[1]
288
289 def p_stmt_compound(self, p):
290 """stmt : compound_stmt"""
291 p[0] = [p[1]]
292
293 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
294 def p_simple_stmt(self, p):
295 """simple_stmt : small_stmts NEWLINE
296 | small_stmts SEMICOLON NEWLINE"""
297 p[0] = p[1]
298
299 def p_small_stmts(self, p):
300 """small_stmts : small_stmts SEMICOLON small_stmt
301 | small_stmt"""
302 if len(p) == 4:
303 p[0] = p[1] + [p[3]]
304 else:
305 p[0] = [p[1]]
306
307 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
308 # import_stmt | global_stmt | exec_stmt | assert_stmt
309 def p_small_stmt(self, p):
310 """small_stmt : flow_stmt
311 | break_stmt
312 | expr_stmt"""
313 if isinstance(p[1], ast.Call):
314 p[0] = ast.Expr(p[1])
315 else:
316 p[0] = p[1]
317
318 # expr_stmt: testlist (augassign (yield_expr|testlist) |
319 # ('=' (yield_expr|testlist))*)
320 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
321 # '<<=' | '>>=' | '**=' | '//=')
322 def p_expr_stmt(self, p):
323 """expr_stmt : testlist ASSIGN testlist
324 | testlist """
325 print("expr_stmt", p)
326 if len(p) == 2:
327 # a list of expressions
328 #p[0] = ast.Discard(p[1])
329 p[0] = p[1]
330 else:
331 name = None
332 if isinstance(p[1], ast.Name):
333 name = p[1].id
334 elif isinstance(p[1], ast.Subscript):
335 name = p[1].value.id
336 if name in self.gprs:
337 # add to list of uninitialised
338 self.uninit_regs.add(name)
339 elif isinstance(p[1], ast.Call) and p[1].func.id == 'GPR':
340 print(astor.dump_tree(p[1]))
341 # replace GPR(x) with GPR[x]
342 idx = p[1].args[0]
343 p[1] = ast.Subscript(p[1].func, idx)
344 elif isinstance(p[1], ast.Call) and p[1].func.id == 'MEM':
345 print ("mem assign")
346 print(astor.dump_tree(p[1]))
347 p[1].func.id = "memassign" # change function name to set
348 p[1].args.append(p[3])
349 p[0] = p[1]
350 print ("mem rewrite")
351 print(astor.dump_tree(p[0]))
352 return
353 else:
354 print ("help, help")
355 print(astor.dump_tree(p[1]))
356 print("expr assign", name, p[1])
357 if name and name in self.gprs:
358 self.write_regs.add(name) # add to list of regs to write
359 p[0] = Assign(p[1], p[3])
360
361 def p_flow_stmt(self, p):
362 "flow_stmt : return_stmt"
363 p[0] = p[1]
364
365 # return_stmt: 'return' [testlist]
366 def p_return_stmt(self, p):
367 "return_stmt : RETURN testlist"
368 p[0] = ast.Return(p[2])
369
370 def p_compound_stmt(self, p):
371 """compound_stmt : if_stmt
372 | while_stmt
373 | for_stmt
374 | funcdef
375 """
376 p[0] = p[1]
377
378 def p_break_stmt(self, p):
379 """break_stmt : BREAK
380 """
381 p[0] = ast.Break()
382
383 def p_for_stmt(self, p):
384 """for_stmt : FOR atom EQ test TO test COLON suite
385 | DO atom EQ test TO test COLON suite
386 """
387 # auto-add-one (sigh) due to python range
388 start = p[4]
389 end = ast.BinOp(p[6], ast.Add(), ast.Constant(1))
390 it = ast.Call(ast.Name("range"), [start, end], [])
391 p[0] = ast.For(p[2], it, p[8], [])
392
393 def p_while_stmt(self, p):
394 """while_stmt : DO WHILE test COLON suite ELSE COLON suite
395 | DO WHILE test COLON suite
396 """
397 if len(p) == 6:
398 p[0] = ast.While(p[3], p[5], [])
399 else:
400 p[0] = ast.While(p[3], p[5], p[8])
401
402 def p_if_stmt(self, p):
403 """if_stmt : IF test COLON suite ELSE COLON if_stmt
404 | IF test COLON suite ELSE COLON suite
405 | IF test COLON suite
406 """
407 if len(p) == 8 and isinstance(p[7], ast.If):
408 p[0] = ast.If(p[2], p[4], [p[7]])
409 elif len(p) == 5:
410 p[0] = ast.If(p[2], p[4], [])
411 else:
412 p[0] = ast.If(p[2], p[4], p[7])
413
414 def p_suite(self, p):
415 """suite : simple_stmt
416 | NEWLINE INDENT stmts DEDENT"""
417 if len(p) == 2:
418 p[0] = p[1]
419 else:
420 p[0] = p[3]
421
422 def p_stmts(self, p):
423 """stmts : stmts stmt
424 | stmt"""
425 if len(p) == 3:
426 p[0] = p[1] + p[2]
427 else:
428 p[0] = p[1]
429
430 def p_comparison(self, p):
431 """comparison : comparison PLUS comparison
432 | comparison MINUS comparison
433 | comparison MULT comparison
434 | comparison DIV comparison
435 | comparison MOD comparison
436 | comparison EQ comparison
437 | comparison LE comparison
438 | comparison GE comparison
439 | comparison LTU comparison
440 | comparison GTU comparison
441 | comparison LT comparison
442 | comparison GT comparison
443 | comparison BITOR comparison
444 | comparison BITXOR comparison
445 | comparison BITAND comparison
446 | PLUS comparison
447 | comparison MINUS
448 | INVERT comparison
449 | comparison APPEND comparison
450 | power"""
451 if len(p) == 4:
452 print(list(p))
453 if p[2] == '<u':
454 p[0] = ast.Call(ast.Name("ltu"), (p[1], p[3]), [])
455 elif p[2] == '>u':
456 p[0] = ast.Call(ast.Name("gtu"), (p[1], p[3]), [])
457 elif p[2] == '||':
458 l = check_concat(p[1]) + check_concat(p[3])
459 p[0] = ast.Call(ast.Name("concat"), l, [])
460 elif p[2] in ['<', '>', '=', '<=', '>=']:
461 p[0] = binary_ops[p[2]]((p[1], p[3]))
462 elif identify_sint_mul_pattern(p):
463 keywords=[ast.keyword(arg='repeat', value=p[3])]
464 l = p[1].elts
465 p[0] = ast.Call(ast.Name("concat"), l, keywords)
466 else:
467 p[0] = ast.BinOp(p[1], binary_ops[p[2]], p[3])
468 elif len(p) == 3:
469 if isinstance(p[2], str) and p[2] == '-':
470 p[0] = ast.UnaryOp(unary_ops[p[2]], p[1])
471 else:
472 p[0] = ast.UnaryOp(unary_ops[p[1]], p[2])
473 else:
474 p[0] = p[1]
475
476 # power: atom trailer* ['**' factor]
477 # trailers enables function calls (and subscripts).
478 # so this is 'trailerlist'
479 def p_power(self, p):
480 """power : atom
481 | atom trailerlist"""
482 if len(p) == 2:
483 p[0] = p[1]
484 else:
485 print("power dump atom")
486 print(astor.dump_tree(p[1]))
487 print("power dump trailerlist")
488 print(astor.dump_tree(p[2]))
489 p[0] = apply_trailer(p[1], p[2])
490
491 def p_atom_name(self, p):
492 """atom : NAME"""
493 p[0] = ast.Name(id=p[1], ctx=ast.Load())
494
495 def p_atom_number(self, p):
496 """atom : BINARY
497 | NUMBER
498 | STRING"""
499 p[0] = ast.Constant(p[1])
500
501 # '[' [listmaker] ']' |
502
503 def p_atom_listmaker(self, p):
504 """atom : LBRACK listmaker RBRACK"""
505 p[0] = p[2]
506
507 def p_listmaker(self, p):
508 """listmaker : test COMMA listmaker
509 | test
510 """
511 if len(p) == 2:
512 p[0] = ast.List([p[1]])
513 else:
514 p[0] = ast.List([p[1]] + p[3].nodes)
515
516 def p_atom_tuple(self, p):
517 """atom : LPAR testlist RPAR"""
518 print("tuple", p[2])
519 print("astor dump")
520 print(astor.dump_tree(p[2]))
521
522 if isinstance(p[2], ast.Name):
523 print("tuple name", p[2].id)
524 if p[2].id in self.gprs:
525 self.read_regs.add(p[2].id) # add to list of regs to read
526 #p[0] = ast.Subscript(ast.Name("GPR"), ast.Str(p[2].id))
527 # return
528 p[0] = p[2]
529 elif isinstance(p[2], ast.BinOp):
530 if isinstance(p[2].left, ast.Name) and \
531 isinstance(p[2].right, ast.Constant) and \
532 p[2].right.value == 0 and \
533 p[2].left.id in self.gprs:
534 rid = p[2].left.id
535 self.read_regs.add(rid) # add to list of regs to read
536 # create special call to GPR.getz
537 gprz = ast.Name("GPR")
538 gprz = ast.Attribute(gprz, "getz") # get testzero function
539 # *sigh* see class GPR. we need index itself not reg value
540 ridx = ast.Name("_%s" % rid)
541 p[0] = ast.Call(gprz, [ridx], [])
542 print("tree", astor.dump_tree(p[0]))
543 else:
544 p[0] = p[2]
545 else:
546 p[0] = p[2]
547
548 def p_trailerlist(self, p):
549 """trailerlist : trailer trailerlist
550 | trailer
551 """
552 if len(p) == 2:
553 p[0] = p[1]
554 else:
555 p[0] = ("TLIST", p[1], p[2])
556
557 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
558 def p_trailer(self, p):
559 """trailer : trailer_arglist
560 | trailer_subscript
561 """
562 p[0] = p[1]
563
564 def p_trailer_arglist(self, p):
565 "trailer_arglist : LPAR arglist RPAR"
566 p[0] = ("CALL", p[2])
567
568 def p_trailer_subscript(self, p):
569 "trailer_subscript : LBRACK subscript RBRACK"
570 p[0] = ("SUBS", p[2])
571
572 # subscript: '.' '.' '.' | test | [test] ':' [test]
573
574 def p_subscript(self, p):
575 """subscript : test COLON test
576 | test
577 """
578 if len(p) == 4:
579 # add one to end
580 if isinstance(p[3], ast.Constant):
581 end = ast.Constant(p[3].value+1)
582 else:
583 end = ast.BinOp(p[3], ast.Add(), ast.Constant(1))
584 p[0] = [p[1], end]
585 else:
586 p[0] = [p[1]]
587
588 # testlist: test (',' test)* [',']
589 # Contains shift/reduce error
590
591 def p_testlist(self, p):
592 """testlist : testlist_multi COMMA
593 | testlist_multi """
594 if len(p) == 2:
595 p[0] = p[1]
596 else:
597 # May need to promote singleton to tuple
598 if isinstance(p[1], list):
599 p[0] = p[1]
600 else:
601 p[0] = [p[1]]
602 # Convert into a tuple?
603 if isinstance(p[0], list):
604 p[0] = ast.Tuple(p[0])
605
606 def p_testlist_multi(self, p):
607 """testlist_multi : testlist_multi COMMA test
608 | test"""
609 if len(p) == 2:
610 # singleton
611 p[0] = p[1]
612 else:
613 if isinstance(p[1], list):
614 p[0] = p[1] + [p[3]]
615 else:
616 # singleton -> tuple
617 p[0] = [p[1], p[3]]
618
619 # test: or_test ['if' or_test 'else' test] | lambdef
620 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
621
622 def p_test(self, p):
623 "test : comparison"
624 p[0] = p[1]
625
626 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
627 # | '**' test)
628 # XXX INCOMPLETE: this doesn't allow the trailing comma
629
630 def p_arglist(self, p):
631 """arglist : arglist COMMA argument
632 | argument"""
633 if len(p) == 4:
634 p[0] = p[1] + [p[3]]
635 else:
636 p[0] = [p[1]]
637
638 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
639 def p_argument(self, p):
640 "argument : test"
641 p[0] = p[1]
642
643 def p_error(self, p):
644 # print "Error!", repr(p)
645 raise SyntaxError(p)
646
647
648 class GardenSnakeParser(PowerParser):
649 def __init__(self, lexer=None, debug=False):
650 PowerParser.__init__(self)
651 self.debug = debug
652 if lexer is None:
653 lexer = IndentLexer(debug=0)
654 self.lexer = lexer
655 self.tokens = lexer.tokens
656 self.parser = yacc.yacc(module=self, start="file_input_end",
657 debug=debug, write_tables=False)
658
659 self.sd = create_pdecode()
660
661 def parse(self, code):
662 # self.lexer.input(code)
663 result = self.parser.parse(code, lexer=self.lexer, debug=self.debug)
664 return ast.Module(result)
665
666
667 ###### Code generation ######
668
669 #from compiler import misc, syntax, pycodegen
670
671 class GardenSnakeCompiler(object):
672 def __init__(self, debug=False):
673 self.parser = GardenSnakeParser(debug=debug)
674
675 def compile(self, code, mode="exec", filename="<string>"):
676 tree = self.parser.parse(code)
677 print("snake")
678 pprint(tree)
679 return tree
680 #misc.set_filename(filename, tree)
681 return compile(tree, mode="exec", filename="<string>")
682 # syntax.check(tree)
683 gen = pycodegen.ModuleCodeGenerator(tree)
684 code = gen.getCode()
685 return code