remove extraneous statement
[soc.git] / src / soc / decoder / pseudo / parser.py
1 # Based on GardenSnake - a parser generator demonstration program
2 # GardenSnake was released into the Public Domain by Andrew Dalke.
3
4 # Portions of this work are derived from Python's Grammar definition
5 # and may be covered under the Python copyright and license
6 #
7 # Andrew Dalke / Dalke Scientific Software, LLC
8 # 30 August 2006 / Cape Town, South Africa
9
10 # Modifications for inclusion in PLY distribution
11 from pprint import pprint
12 from ply import lex, yacc
13 import astor
14
15 from soc.decoder.power_decoder import create_pdecode
16 from soc.decoder.pseudo.lexer import IndentLexer
17 from soc.decoder.orderedset import OrderedSet
18
19 # I use the Python AST
20 #from compiler import ast
21 import ast
22
23 # Helper function
24
25
26 def Assign(left, right):
27 names = []
28 print("Assign", left, right)
29 if isinstance(left, ast.Name):
30 # Single assignment on left
31 # XXX when doing IntClass, which will have an "eq" function,
32 # this is how to access it
33 # eq = ast.Attribute(left, "eq") # get eq fn
34 # return ast.Call(eq, [right], []) # now call left.eq(right)
35 return ast.Assign([ast.Name(left.id, ast.Store())], right)
36 elif isinstance(left, ast.Tuple):
37 # List of things - make sure they are Name nodes
38 names = []
39 for child in left.getChildren():
40 if not isinstance(child, ast.Name):
41 raise SyntaxError("that assignment not supported")
42 names.append(child.name)
43 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
44 return ast.Assign([ast.AssTuple(ass_list)], right)
45 elif isinstance(left, ast.Subscript):
46 return ast.Assign([left], right)
47 # XXX HMMM probably not needed...
48 ls = left.slice
49 if isinstance(ls, ast.Slice):
50 lower, upper, step = ls.lower, ls.upper, ls.step
51 print("slice assign", lower, upper, step)
52 if step is None:
53 ls = (lower, upper, None)
54 else:
55 ls = (lower, upper, step)
56 ls = ast.Tuple(ls)
57 return ast.Call(ast.Name("selectassign"),
58 [left.value, ls, right], [])
59 else:
60 print("Assign fail")
61 raise SyntaxError("Can't do that yet")
62
63
64 # I implemented INDENT / DEDENT generation as a post-processing filter
65
66 # The original lex token stream contains WS and NEWLINE characters.
67 # WS will only occur before any other tokens on a line.
68
69 # I have three filters. One tags tokens by adding two attributes.
70 # "must_indent" is True if the token must be indented from the
71 # previous code. The other is "at_line_start" which is True for WS
72 # and the first non-WS/non-NEWLINE on a line. It flags the check so
73 # see if the new line has changed indication level.
74
75
76 # No using Python's approach because Ply supports precedence
77
78 # comparison: expr (comp_op expr)*
79 # arith_expr: term (('+'|'-') term)*
80 # term: factor (('*'|'/'|'%'|'//') factor)*
81 # factor: ('+'|'-'|'~') factor | power
82 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
83
84 def make_le_compare(arg):
85 (left, right) = arg
86 return ast.Compare(left, [ast.LtE()], [right])
87
88
89 def make_ge_compare(arg):
90 (left, right) = arg
91 return ast.Compare(left, [ast.GtE()], [right])
92
93
94 def make_lt_compare(arg):
95 (left, right) = arg
96 return ast.Compare(left, [ast.Lt()], [right])
97
98
99 def make_gt_compare(arg):
100 (left, right) = arg
101 return ast.Compare(left, [ast.Gt()], [right])
102
103
104 def make_eq_compare(arg):
105 (left, right) = arg
106 return ast.Compare(left, [ast.Eq()], [right])
107
108
109 binary_ops = {
110 "^": ast.BitXor(),
111 "&": ast.BitAnd(),
112 "|": ast.BitOr(),
113 "+": ast.Add(),
114 "-": ast.Sub(),
115 "*": ast.Mult(),
116 "/": ast.Div(),
117 "%": ast.Mod(),
118 "<=": make_le_compare,
119 ">=": make_ge_compare,
120 "<": make_lt_compare,
121 ">": make_gt_compare,
122 "=": make_eq_compare,
123 }
124 unary_ops = {
125 "+": ast.UAdd(),
126 "-": ast.USub(),
127 "¬": ast.Invert(),
128 }
129
130
131 def check_concat(node): # checks if the comparison is already a concat
132 print("check concat", node)
133 if not isinstance(node, ast.Call):
134 return [node]
135 print("func", node.func.id)
136 if node.func.id != 'concat':
137 return [node]
138 if node.keywords: # a repeated list-constant, don't optimise
139 return [node]
140 return node.args
141
142
143 # identify SelectableInt pattern
144 def identify_sint_mul_pattern(p):
145 if not isinstance(p[3], ast.Constant):
146 return False
147 if not isinstance(p[1], ast.List):
148 return False
149 l = p[1].elts
150 if len(l) != 1:
151 return False
152 elt = l[0]
153 return isinstance(elt, ast.Constant)
154
155 def apply_trailer(atom, trailer):
156 if trailer[0] == "TLIST":
157 # assume depth of one
158 atom = apply_trailer(atom, trailer[1])
159 trailer = trailer[2]
160 if trailer[0] == "CALL":
161 #p[0] = ast.Expr(ast.Call(p[1], p[2][1], []))
162 return ast.Call(atom, trailer[1], [])
163 # if p[1].id == 'print':
164 # p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)
165 # else:
166 # p[0] = ast.CallFunc(p[1], p[2][1], None, None)
167 else:
168 print("subscript atom", trailer[1])
169 #raise AssertionError("not implemented %s" % p[2][0])
170 subs = trailer[1]
171 if len(subs) == 1:
172 idx = subs[0]
173 else:
174 idx = ast.Slice(subs[0], subs[1], None)
175 return ast.Subscript(atom, idx, ast.Load())
176
177 ########## Parser (tokens -> AST) ######
178
179 # also part of Ply
180 #import yacc
181
182 # https://www.mathcs.emory.edu/~valerie/courses/fall10/155/resources/op_precedence.html
183 # python operator precedence
184 # Highest precedence at top, lowest at bottom.
185 # Operators in the same box evaluate left to right.
186 #
187 # Operator Description
188 # () Parentheses (grouping)
189 # f(args...) Function call
190 # x[index:index] Slicing
191 # x[index] Subscription
192 # x.attribute Attribute reference
193 # ** Exponentiation
194 # ~x Bitwise not
195 # +x, -x Positive, negative
196 # *, /, % mul, div, remainder
197 # +, - Addition, subtraction
198 # <<, >> Bitwise shifts
199 # & Bitwise AND
200 # ^ Bitwise XOR
201 # | Bitwise OR
202 # in, not in, is, is not, <, <=, >, >=, <>, !=, == comp, membership, ident
203 # not x Boolean NOT
204 # and Boolean AND
205 # or Boolean OR
206 # lambda Lambda expression
207
208 class PowerParser:
209
210 precedence = (
211 ("left", "EQ", "GT", "LT", "LE", "GE", "LTU", "GTU"),
212 ("left", "BITOR"),
213 ("left", "BITXOR"),
214 ("left", "BITAND"),
215 ("left", "PLUS", "MINUS"),
216 ("left", "MULT", "DIV", "MOD"),
217 ("left", "INVERT"),
218 )
219
220 def __init__(self):
221 self.gprs = {}
222 for rname in ['RA', 'RB', 'RC', 'RT', 'RS']:
223 self.gprs[rname] = None
224 self.read_regs = OrderedSet()
225 self.uninit_regs = OrderedSet()
226 self.write_regs = OrderedSet()
227
228 # The grammar comments come from Python's Grammar/Grammar file
229
230 # NB: compound_stmt in single_input is followed by extra NEWLINE!
231 # file_input: (NEWLINE | stmt)* ENDMARKER
232
233 def p_file_input_end(self, p):
234 """file_input_end : file_input ENDMARKER"""
235 print("end", p[1])
236 p[0] = p[1]
237
238 def p_file_input(self, p):
239 """file_input : file_input NEWLINE
240 | file_input stmt
241 | NEWLINE
242 | stmt"""
243 if isinstance(p[len(p)-1], str):
244 if len(p) == 3:
245 p[0] = p[1]
246 else:
247 p[0] = [] # p == 2 --> only a blank line
248 else:
249 if len(p) == 3:
250 p[0] = p[1] + p[2]
251 else:
252 p[0] = p[1]
253
254 # funcdef: [decorators] 'def' NAME parameters ':' suite
255 # ignoring decorators
256
257 def p_funcdef(self, p):
258 "funcdef : DEF NAME parameters COLON suite"
259 p[0] = ast.FunctionDef(p[2], p[3], p[5], ())
260
261 # parameters: '(' [varargslist] ')'
262 def p_parameters(self, p):
263 """parameters : LPAR RPAR
264 | LPAR varargslist RPAR"""
265 if len(p) == 3:
266 args = []
267 else:
268 args = p[2]
269 p[0] = ast.arguments(args=args, vararg=None, kwarg=None, defaults=[])
270
271 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
272 # '**' NAME) |
273 # highly simplified
274
275 def p_varargslist(self, p):
276 """varargslist : varargslist COMMA NAME
277 | NAME"""
278 if len(p) == 4:
279 p[0] = p[1] + p[3]
280 else:
281 p[0] = [p[1]]
282
283 # stmt: simple_stmt | compound_stmt
284 def p_stmt_simple(self, p):
285 """stmt : simple_stmt"""
286 # simple_stmt is a list
287 p[0] = p[1]
288
289 def p_stmt_compound(self, p):
290 """stmt : compound_stmt"""
291 p[0] = [p[1]]
292
293 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
294 def p_simple_stmt(self, p):
295 """simple_stmt : small_stmts NEWLINE
296 | small_stmts SEMICOLON NEWLINE"""
297 p[0] = p[1]
298
299 def p_small_stmts(self, p):
300 """small_stmts : small_stmts SEMICOLON small_stmt
301 | small_stmt"""
302 if len(p) == 4:
303 p[0] = p[1] + [p[3]]
304 else:
305 p[0] = [p[1]]
306
307 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
308 # import_stmt | global_stmt | exec_stmt | assert_stmt
309 def p_small_stmt(self, p):
310 """small_stmt : flow_stmt
311 | break_stmt
312 | expr_stmt"""
313 if isinstance(p[1], ast.Call):
314 p[0] = ast.Expr(p[1])
315 else:
316 p[0] = p[1]
317
318 # expr_stmt: testlist (augassign (yield_expr|testlist) |
319 # ('=' (yield_expr|testlist))*)
320 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
321 # '<<=' | '>>=' | '**=' | '//=')
322 def p_expr_stmt(self, p):
323 """expr_stmt : testlist ASSIGN testlist
324 | testlist """
325 print("expr_stmt", p)
326 if len(p) == 2:
327 # a list of expressions
328 #p[0] = ast.Discard(p[1])
329 p[0] = p[1]
330 else:
331 name = None
332 if isinstance(p[1], ast.Name):
333 name = p[1].id
334 elif isinstance(p[1], ast.Subscript):
335 name = p[1].value.id
336 if name in self.gprs:
337 # add to list of uninitialised
338 self.uninit_regs.add(name)
339 elif isinstance(p[1], ast.Call) and p[1].func.id == 'GPR':
340 print(astor.dump_tree(p[1]))
341 # replace GPR(x) with GPR[x]
342 idx = p[1].args[0]
343 p[1] = ast.Subscript(p[1].func, idx)
344 elif isinstance(p[1], ast.Call) and p[1].func.id == 'MEM':
345 print ("mem assign")
346 print(astor.dump_tree(p[1]))
347 p[1].func.id = "memassign" # change function name to set
348 p[1].args.append(p[3])
349 p[0] = p[1]
350 print ("mem rewrite")
351 print(astor.dump_tree(p[0]))
352 return
353 else:
354 print ("help, help")
355 print(astor.dump_tree(p[1]))
356 print("expr assign", name, p[1])
357 if name and name in self.gprs:
358 self.write_regs.add(name) # add to list of regs to write
359 p[0] = Assign(p[1], p[3])
360
361 def p_flow_stmt(self, p):
362 "flow_stmt : return_stmt"
363 p[0] = p[1]
364
365 # return_stmt: 'return' [testlist]
366 def p_return_stmt(self, p):
367 "return_stmt : RETURN testlist"
368 p[0] = ast.Return(p[2])
369
370 def p_compound_stmt(self, p):
371 """compound_stmt : if_stmt
372 | while_stmt
373 | for_stmt
374 | funcdef
375 """
376 p[0] = p[1]
377
378 def p_break_stmt(self, p):
379 """break_stmt : BREAK
380 """
381 p[0] = ast.Break()
382
383 def p_for_stmt(self, p):
384 """for_stmt : FOR test EQ test TO test COLON suite
385 """
386 # auto-add-one (sigh) due to python range
387 start = p[4]
388 end = ast.BinOp(p[6], ast.Add(), ast.Constant(1))
389 it = ast.Call(ast.Name("range"), [start, end], [])
390 p[0] = ast.For(p[2], it, p[8], [])
391
392 def p_while_stmt(self, p):
393 """while_stmt : DO WHILE test COLON suite ELSE COLON suite
394 | DO WHILE test COLON suite
395 """
396 if len(p) == 6:
397 p[0] = ast.While(p[3], p[5], [])
398 else:
399 p[0] = ast.While(p[3], p[5], p[8])
400
401 def p_if_stmt(self, p):
402 """if_stmt : IF test COLON suite ELSE COLON if_stmt
403 | IF test COLON suite ELSE COLON suite
404 | IF test COLON suite
405 """
406 if len(p) == 8 and isinstance(p[7], ast.If):
407 p[0] = ast.If(p[2], p[4], [p[7]])
408 elif len(p) == 5:
409 p[0] = ast.If(p[2], p[4], [])
410 else:
411 p[0] = ast.If(p[2], p[4], p[7])
412
413 def p_suite(self, p):
414 """suite : simple_stmt
415 | NEWLINE INDENT stmts DEDENT"""
416 if len(p) == 2:
417 p[0] = p[1]
418 else:
419 p[0] = p[3]
420
421 def p_stmts(self, p):
422 """stmts : stmts stmt
423 | stmt"""
424 if len(p) == 3:
425 p[0] = p[1] + p[2]
426 else:
427 p[0] = p[1]
428
429 def p_comparison(self, p):
430 """comparison : comparison PLUS comparison
431 | comparison MINUS comparison
432 | comparison MULT comparison
433 | comparison DIV comparison
434 | comparison MOD comparison
435 | comparison EQ comparison
436 | comparison LE comparison
437 | comparison GE comparison
438 | comparison LTU comparison
439 | comparison GTU comparison
440 | comparison LT comparison
441 | comparison GT comparison
442 | comparison BITOR comparison
443 | comparison BITXOR comparison
444 | comparison BITAND comparison
445 | PLUS comparison
446 | comparison MINUS
447 | INVERT comparison
448 | comparison APPEND comparison
449 | power"""
450 if len(p) == 4:
451 print(list(p))
452 if p[2] == '<u':
453 p[0] = ast.Call(ast.Name("ltu"), (p[1], p[3]), [])
454 elif p[2] == '>u':
455 p[0] = ast.Call(ast.Name("gtu"), (p[1], p[3]), [])
456 elif p[2] == '||':
457 l = check_concat(p[1]) + check_concat(p[3])
458 p[0] = ast.Call(ast.Name("concat"), l, [])
459 elif p[2] in ['<', '>', '=', '<=', '>=']:
460 p[0] = binary_ops[p[2]]((p[1], p[3]))
461 elif identify_sint_mul_pattern(p):
462 keywords=[ast.keyword(arg='repeat', value=p[3])]
463 l = p[1].elts
464 p[0] = ast.Call(ast.Name("concat"), l, keywords)
465 else:
466 p[0] = ast.BinOp(p[1], binary_ops[p[2]], p[3])
467 elif len(p) == 3:
468 if isinstance(p[2], str) and p[2] == '-':
469 p[0] = ast.UnaryOp(unary_ops[p[2]], p[1])
470 else:
471 p[0] = ast.UnaryOp(unary_ops[p[1]], p[2])
472 else:
473 p[0] = p[1]
474
475 # power: atom trailer* ['**' factor]
476 # trailers enables function calls (and subscripts).
477 # so this is 'trailerlist'
478 def p_power(self, p):
479 """power : atom
480 | atom trailerlist"""
481 if len(p) == 2:
482 p[0] = p[1]
483 else:
484 print("power dump atom")
485 print(astor.dump_tree(p[1]))
486 print("power dump trailerlist")
487 print(astor.dump_tree(p[2]))
488 p[0] = apply_trailer(p[1], p[2])
489
490 def p_atom_name(self, p):
491 """atom : NAME"""
492 p[0] = ast.Name(id=p[1], ctx=ast.Load())
493
494 def p_atom_number(self, p):
495 """atom : BINARY
496 | NUMBER
497 | STRING"""
498 p[0] = ast.Constant(p[1])
499
500 # '[' [listmaker] ']' |
501
502 def p_atom_listmaker(self, p):
503 """atom : LBRACK listmaker RBRACK"""
504 p[0] = p[2]
505
506 def p_listmaker(self, p):
507 """listmaker : test COMMA listmaker
508 | test
509 """
510 if len(p) == 2:
511 p[0] = ast.List([p[1]])
512 else:
513 p[0] = ast.List([p[1]] + p[3].nodes)
514
515 def p_atom_tuple(self, p):
516 """atom : LPAR testlist RPAR"""
517 print("tuple", p[2])
518 print("astor dump")
519 print(astor.dump_tree(p[2]))
520
521 if isinstance(p[2], ast.Name):
522 print("tuple name", p[2].id)
523 if p[2].id in self.gprs:
524 self.read_regs.add(p[2].id) # add to list of regs to read
525 #p[0] = ast.Subscript(ast.Name("GPR"), ast.Str(p[2].id))
526 # return
527 p[0] = p[2]
528 elif isinstance(p[2], ast.BinOp):
529 if isinstance(p[2].left, ast.Name) and \
530 isinstance(p[2].right, ast.Constant) and \
531 p[2].right.value == 0 and \
532 p[2].left.id in self.gprs:
533 rid = p[2].left.id
534 self.read_regs.add(rid) # add to list of regs to read
535 # create special call to GPR.getz
536 gprz = ast.Name("GPR")
537 gprz = ast.Attribute(gprz, "getz") # get testzero function
538 # *sigh* see class GPR. we need index itself not reg value
539 ridx = ast.Name("_%s" % rid)
540 p[0] = ast.Call(gprz, [ridx], [])
541 print("tree", astor.dump_tree(p[0]))
542 else:
543 p[0] = p[2]
544 else:
545 p[0] = p[2]
546
547 def p_trailerlist(self, p):
548 """trailerlist : trailer trailerlist
549 | trailer
550 """
551 if len(p) == 2:
552 p[0] = p[1]
553 else:
554 p[0] = ("TLIST", p[1], p[2])
555
556 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
557 def p_trailer(self, p):
558 """trailer : trailer_arglist
559 | trailer_subscript
560 """
561 p[0] = p[1]
562
563 def p_trailer_arglist(self, p):
564 "trailer_arglist : LPAR arglist RPAR"
565 p[0] = ("CALL", p[2])
566
567 def p_trailer_subscript(self, p):
568 "trailer_subscript : LBRACK subscript RBRACK"
569 p[0] = ("SUBS", p[2])
570
571 # subscript: '.' '.' '.' | test | [test] ':' [test]
572
573 def p_subscript(self, p):
574 """subscript : test COLON test
575 | test
576 """
577 if len(p) == 4:
578 # add one to end
579 if isinstance(p[3], ast.Constant):
580 end = ast.Constant(p[3].value+1)
581 else:
582 end = ast.BinOp(p[3], ast.Add(), ast.Constant(1))
583 p[0] = [p[1], end]
584 else:
585 p[0] = [p[1]]
586
587 # testlist: test (',' test)* [',']
588 # Contains shift/reduce error
589
590 def p_testlist(self, p):
591 """testlist : testlist_multi COMMA
592 | testlist_multi """
593 if len(p) == 2:
594 p[0] = p[1]
595 else:
596 # May need to promote singleton to tuple
597 if isinstance(p[1], list):
598 p[0] = p[1]
599 else:
600 p[0] = [p[1]]
601 # Convert into a tuple?
602 if isinstance(p[0], list):
603 p[0] = ast.Tuple(p[0])
604
605 def p_testlist_multi(self, p):
606 """testlist_multi : testlist_multi COMMA test
607 | test"""
608 if len(p) == 2:
609 # singleton
610 p[0] = p[1]
611 else:
612 if isinstance(p[1], list):
613 p[0] = p[1] + [p[3]]
614 else:
615 # singleton -> tuple
616 p[0] = [p[1], p[3]]
617
618 # test: or_test ['if' or_test 'else' test] | lambdef
619 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
620
621 def p_test(self, p):
622 "test : comparison"
623 p[0] = p[1]
624
625 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
626 # | '**' test)
627 # XXX INCOMPLETE: this doesn't allow the trailing comma
628
629 def p_arglist(self, p):
630 """arglist : arglist COMMA argument
631 | argument"""
632 if len(p) == 4:
633 p[0] = p[1] + [p[3]]
634 else:
635 p[0] = [p[1]]
636
637 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
638 def p_argument(self, p):
639 "argument : test"
640 p[0] = p[1]
641
642 def p_error(self, p):
643 # print "Error!", repr(p)
644 raise SyntaxError(p)
645
646
647 class GardenSnakeParser(PowerParser):
648 def __init__(self, lexer=None):
649 PowerParser.__init__(self)
650 if lexer is None:
651 lexer = IndentLexer(debug=0)
652 self.lexer = lexer
653 self.tokens = lexer.tokens
654 self.parser = yacc.yacc(module=self, start="file_input_end",
655 debug=False, write_tables=False)
656
657 self.sd = create_pdecode()
658
659 def parse(self, code):
660 # self.lexer.input(code)
661 result = self.parser.parse(code, lexer=self.lexer, debug=False)
662 return ast.Module(result)
663
664
665 ###### Code generation ######
666
667 #from compiler import misc, syntax, pycodegen
668
669 class GardenSnakeCompiler(object):
670 def __init__(self):
671 self.parser = GardenSnakeParser()
672
673 def compile(self, code, mode="exec", filename="<string>"):
674 tree = self.parser.parse(code)
675 print("snake")
676 pprint(tree)
677 return tree
678 #misc.set_filename(filename, tree)
679 return compile(tree, mode="exec", filename="<string>")
680 # syntax.check(tree)
681 gen = pycodegen.ModuleCodeGenerator(tree)
682 code = gen.getCode()
683 return code