4fb7bba95a6b22f02456c86d21a192a0c75142da
[soc.git] / src / soc / decoder / pseudo / parser.py
1 # Based on GardenSnake - a parser generator demonstration program
2 # GardenSnake was released into the Public Domain by Andrew Dalke.
3
4 # Portions of this work are derived from Python's Grammar definition
5 # and may be covered under the Python copyright and license
6 #
7 # Andrew Dalke / Dalke Scientific Software, LLC
8 # 30 August 2006 / Cape Town, South Africa
9
10 # Modifications for inclusion in PLY distribution
11 from pprint import pprint
12 from ply import lex, yacc
13 import astor
14
15 from soc.decoder.power_decoder import create_pdecode
16 from soc.decoder.pseudo.lexer import IndentLexer
17 from soc.decoder.orderedset import OrderedSet
18
19 # I use the Python AST
20 #from compiler import ast
21 import ast
22
23 # Helper function
24
25
26 def Assign(left, right, iea_mode):
27 names = []
28 print("Assign", left, right)
29 if isinstance(left, ast.Name):
30 # Single assignment on left
31 # XXX when doing IntClass, which will have an "eq" function,
32 # this is how to access it
33 # eq = ast.Attribute(left, "eq") # get eq fn
34 # return ast.Call(eq, [right], []) # now call left.eq(right)
35 return ast.Assign([ast.Name(left.id, ast.Store())], right)
36 elif isinstance(left, ast.Tuple):
37 # List of things - make sure they are Name nodes
38 names = []
39 for child in left.getChildren():
40 if not isinstance(child, ast.Name):
41 raise SyntaxError("that assignment not supported")
42 names.append(child.name)
43 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
44 return ast.Assign([ast.AssTuple(ass_list)], right)
45 elif isinstance(left, ast.Subscript):
46 return ast.Assign([left], right)
47 # XXX HMMM probably not needed...
48 ls = left.slice
49 if isinstance(ls, ast.Slice):
50 lower, upper, step = ls.lower, ls.upper, ls.step
51 print("slice assign", lower, upper, step)
52 if step is None:
53 ls = (lower, upper, None)
54 else:
55 ls = (lower, upper, step)
56 ls = ast.Tuple(ls)
57 return ast.Call(ast.Name("selectassign"),
58 [left.value, ls, right], [])
59 else:
60 print("Assign fail")
61 raise SyntaxError("Can't do that yet")
62
63
64 # I implemented INDENT / DEDENT generation as a post-processing filter
65
66 # The original lex token stream contains WS and NEWLINE characters.
67 # WS will only occur before any other tokens on a line.
68
69 # I have three filters. One tags tokens by adding two attributes.
70 # "must_indent" is True if the token must be indented from the
71 # previous code. The other is "at_line_start" which is True for WS
72 # and the first non-WS/non-NEWLINE on a line. It flags the check so
73 # see if the new line has changed indication level.
74
75
76 # No using Python's approach because Ply supports precedence
77
78 # comparison: expr (comp_op expr)*
79 # arith_expr: term (('+'|'-') term)*
80 # term: factor (('*'|'/'|'%'|'//') factor)*
81 # factor: ('+'|'-'|'~') factor | power
82 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
83
84 def make_le_compare(arg):
85 (left, right) = arg
86 return ast.Compare(left, [ast.LtE()], [right])
87
88
89 def make_ge_compare(arg):
90 (left, right) = arg
91 return ast.Compare(left, [ast.GtE()], [right])
92
93
94 def make_lt_compare(arg):
95 (left, right) = arg
96 return ast.Compare(left, [ast.Lt()], [right])
97
98
99 def make_gt_compare(arg):
100 (left, right) = arg
101 return ast.Compare(left, [ast.Gt()], [right])
102
103
104 def make_eq_compare(arg):
105 (left, right) = arg
106 return ast.Compare(left, [ast.Eq()], [right])
107
108
109 def make_ne_compare(arg):
110 (left, right) = arg
111 return ast.Compare(left, [ast.NotEq()], [right])
112
113
114 binary_ops = {
115 "^": ast.BitXor(),
116 "&": ast.BitAnd(),
117 "|": ast.BitOr(),
118 "+": ast.Add(),
119 "-": ast.Sub(),
120 "*": ast.Mult(),
121 "/": ast.Div(),
122 "%": ast.Mod(),
123 "<=": make_le_compare,
124 ">=": make_ge_compare,
125 "<": make_lt_compare,
126 ">": make_gt_compare,
127 "=": make_eq_compare,
128 "!=": make_ne_compare,
129 }
130 unary_ops = {
131 "+": ast.UAdd(),
132 "-": ast.USub(),
133 "¬": ast.Invert(),
134 }
135
136
137 def check_concat(node): # checks if the comparison is already a concat
138 print("check concat", node)
139 if not isinstance(node, ast.Call):
140 return [node]
141 print("func", node.func.id)
142 if node.func.id != 'concat':
143 return [node]
144 if node.keywords: # a repeated list-constant, don't optimise
145 return [node]
146 return node.args
147
148
149 # identify SelectableInt pattern [something] * N
150 # must return concat(something, repeat=N)
151 def identify_sint_mul_pattern(p):
152 if p[2] != '*': # multiply
153 return False
154 if not isinstance(p[3], ast.Constant): # rhs = Num
155 return False
156 if not isinstance(p[1], ast.List): # lhs is a list
157 return False
158 l = p[1].elts
159 if len(l) != 1: # lhs is a list of length 1
160 return False
161 return True # yippee!
162
163
164 def apply_trailer(atom, trailer):
165 if trailer[0] == "TLIST":
166 # assume depth of one
167 atom = apply_trailer(atom, trailer[1])
168 trailer = trailer[2]
169 if trailer[0] == "CALL":
170 #p[0] = ast.Expr(ast.Call(p[1], p[2][1], []))
171 return ast.Call(atom, trailer[1], [])
172 # if p[1].id == 'print':
173 # p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)
174 # else:
175 # p[0] = ast.CallFunc(p[1], p[2][1], None, None)
176 else:
177 print("subscript atom", trailer[1])
178 #raise AssertionError("not implemented %s" % p[2][0])
179 subs = trailer[1]
180 if len(subs) == 1:
181 idx = subs[0]
182 else:
183 idx = ast.Slice(subs[0], subs[1], None)
184 return ast.Subscript(atom, idx, ast.Load())
185
186 ########## Parser (tokens -> AST) ######
187
188 # also part of Ply
189 #import yacc
190
191 # https://www.mathcs.emory.edu/~valerie/courses/fall10/155/resources/op_precedence.html
192 # python operator precedence
193 # Highest precedence at top, lowest at bottom.
194 # Operators in the same box evaluate left to right.
195 #
196 # Operator Description
197 # () Parentheses (grouping)
198 # f(args...) Function call
199 # x[index:index] Slicing
200 # x[index] Subscription
201 # x.attribute Attribute reference
202 # ** Exponentiation
203 # ~x Bitwise not
204 # +x, -x Positive, negative
205 # *, /, % mul, div, remainder
206 # +, - Addition, subtraction
207 # <<, >> Bitwise shifts
208 # & Bitwise AND
209 # ^ Bitwise XOR
210 # | Bitwise OR
211 # in, not in, is, is not, <, <=, >, >=, <>, !=, == comp, membership, ident
212 # not x Boolean NOT
213 # and Boolean AND
214 # or Boolean OR
215 # lambda Lambda expression
216
217 class PowerParser:
218
219 precedence = (
220 ("left", "EQ", "NE", "GT", "LT", "LE", "GE", "LTU", "GTU"),
221 ("left", "BITOR"),
222 ("left", "BITXOR"),
223 ("left", "BITAND"),
224 ("left", "PLUS", "MINUS"),
225 ("left", "MULT", "DIV", "MOD"),
226 ("left", "INVERT"),
227 )
228
229 def __init__(self):
230 self.gprs = {}
231 for rname in ['RA', 'RB', 'RC', 'RT', 'RS']:
232 self.gprs[rname] = None
233 self.read_regs = OrderedSet()
234 self.uninit_regs = OrderedSet()
235 self.write_regs = OrderedSet()
236
237 # The grammar comments come from Python's Grammar/Grammar file
238
239 # NB: compound_stmt in single_input is followed by extra NEWLINE!
240 # file_input: (NEWLINE | stmt)* ENDMARKER
241
242 def p_file_input_end(self, p):
243 """file_input_end : file_input ENDMARKER"""
244 print("end", p[1])
245 p[0] = p[1]
246
247 def p_file_input(self, p):
248 """file_input : file_input NEWLINE
249 | file_input stmt
250 | NEWLINE
251 | stmt"""
252 if isinstance(p[len(p)-1], str):
253 if len(p) == 3:
254 p[0] = p[1]
255 else:
256 p[0] = [] # p == 2 --> only a blank line
257 else:
258 if len(p) == 3:
259 p[0] = p[1] + p[2]
260 else:
261 p[0] = p[1]
262
263 # funcdef: [decorators] 'def' NAME parameters ':' suite
264 # ignoring decorators
265
266 def p_funcdef(self, p):
267 "funcdef : DEF NAME parameters COLON suite"
268 p[0] = ast.FunctionDef(p[2], p[3], p[5], ())
269
270 # parameters: '(' [varargslist] ')'
271 def p_parameters(self, p):
272 """parameters : LPAR RPAR
273 | LPAR varargslist RPAR"""
274 if len(p) == 3:
275 args = []
276 else:
277 args = p[2]
278 p[0] = ast.arguments(args=args, vararg=None, kwarg=None, defaults=[])
279
280 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
281 # '**' NAME) |
282 # highly simplified
283
284 def p_varargslist(self, p):
285 """varargslist : varargslist COMMA NAME
286 | NAME"""
287 if len(p) == 4:
288 p[0] = p[1] + p[3]
289 else:
290 p[0] = [p[1]]
291
292 # stmt: simple_stmt | compound_stmt
293 def p_stmt_simple(self, p):
294 """stmt : simple_stmt"""
295 # simple_stmt is a list
296 p[0] = p[1]
297
298 def p_stmt_compound(self, p):
299 """stmt : compound_stmt"""
300 p[0] = [p[1]]
301
302 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
303 def p_simple_stmt(self, p):
304 """simple_stmt : small_stmts NEWLINE
305 | small_stmts SEMICOLON NEWLINE"""
306 p[0] = p[1]
307
308 def p_small_stmts(self, p):
309 """small_stmts : small_stmts SEMICOLON small_stmt
310 | small_stmt"""
311 if len(p) == 4:
312 p[0] = p[1] + [p[3]]
313 else:
314 p[0] = [p[1]]
315
316 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
317 # import_stmt | global_stmt | exec_stmt | assert_stmt
318 def p_small_stmt(self, p):
319 """small_stmt : flow_stmt
320 | break_stmt
321 | expr_stmt"""
322 if isinstance(p[1], ast.Call):
323 p[0] = ast.Expr(p[1])
324 else:
325 p[0] = p[1]
326
327 # expr_stmt: testlist (augassign (yield_expr|testlist) |
328 # ('=' (yield_expr|testlist))*)
329 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
330 # '<<=' | '>>=' | '**=' | '//=')
331 def p_expr_stmt(self, p):
332 """expr_stmt : testlist ASSIGNEA testlist
333 | testlist ASSIGN testlist
334 | testlist """
335 print("expr_stmt", p)
336 if len(p) == 2:
337 # a list of expressions
338 #p[0] = ast.Discard(p[1])
339 p[0] = p[1]
340 else:
341 iea_mode = p[2] == '<-iea'
342 name = None
343 if isinstance(p[1], ast.Name):
344 name = p[1].id
345 elif isinstance(p[1], ast.Subscript):
346 name = p[1].value.id
347 if name in self.gprs:
348 # add to list of uninitialised
349 self.uninit_regs.add(name)
350 elif isinstance(p[1], ast.Call) and p[1].func.id == 'GPR':
351 print(astor.dump_tree(p[1]))
352 # replace GPR(x) with GPR[x]
353 idx = p[1].args[0]
354 p[1] = ast.Subscript(p[1].func, idx)
355 elif isinstance(p[1], ast.Call) and p[1].func.id == 'MEM':
356 print ("mem assign")
357 print(astor.dump_tree(p[1]))
358 p[1].func.id = "memassign" # change function name to set
359 p[1].args.append(p[3])
360 p[0] = p[1]
361 print ("mem rewrite")
362 print(astor.dump_tree(p[0]))
363 return
364 else:
365 print ("help, help")
366 print(astor.dump_tree(p[1]))
367 print("expr assign", name, p[1])
368 if name and name in self.gprs:
369 self.write_regs.add(name) # add to list of regs to write
370 p[0] = Assign(p[1], p[3], iea_mode)
371
372 def p_flow_stmt(self, p):
373 "flow_stmt : return_stmt"
374 p[0] = p[1]
375
376 # return_stmt: 'return' [testlist]
377 def p_return_stmt(self, p):
378 "return_stmt : RETURN testlist"
379 p[0] = ast.Return(p[2])
380
381 def p_compound_stmt(self, p):
382 """compound_stmt : if_stmt
383 | while_stmt
384 | for_stmt
385 | funcdef
386 """
387 p[0] = p[1]
388
389 def p_break_stmt(self, p):
390 """break_stmt : BREAK
391 """
392 p[0] = ast.Break()
393
394 def p_for_stmt(self, p):
395 """for_stmt : FOR atom EQ test TO test COLON suite
396 | DO atom EQ test TO test COLON suite
397 """
398 # auto-add-one (sigh) due to python range
399 start = p[4]
400 end = ast.BinOp(p[6], ast.Add(), ast.Constant(1))
401 it = ast.Call(ast.Name("range"), [start, end], [])
402 p[0] = ast.For(p[2], it, p[8], [])
403
404 def p_while_stmt(self, p):
405 """while_stmt : DO WHILE test COLON suite ELSE COLON suite
406 | DO WHILE test COLON suite
407 """
408 if len(p) == 6:
409 p[0] = ast.While(p[3], p[5], [])
410 else:
411 p[0] = ast.While(p[3], p[5], p[8])
412
413 def p_if_stmt(self, p):
414 """if_stmt : IF test COLON suite ELSE COLON if_stmt
415 | IF test COLON suite ELSE COLON suite
416 | IF test COLON suite
417 """
418 if len(p) == 8 and isinstance(p[7], ast.If):
419 p[0] = ast.If(p[2], p[4], [p[7]])
420 elif len(p) == 5:
421 p[0] = ast.If(p[2], p[4], [])
422 else:
423 p[0] = ast.If(p[2], p[4], p[7])
424
425 def p_suite(self, p):
426 """suite : simple_stmt
427 | NEWLINE INDENT stmts DEDENT"""
428 if len(p) == 2:
429 p[0] = p[1]
430 else:
431 p[0] = p[3]
432
433 def p_stmts(self, p):
434 """stmts : stmts stmt
435 | stmt"""
436 if len(p) == 3:
437 p[0] = p[1] + p[2]
438 else:
439 p[0] = p[1]
440
441 def p_comparison(self, p):
442 """comparison : comparison PLUS comparison
443 | comparison MINUS comparison
444 | comparison MULT comparison
445 | comparison DIV comparison
446 | comparison MOD comparison
447 | comparison EQ comparison
448 | comparison NE comparison
449 | comparison LE comparison
450 | comparison GE comparison
451 | comparison LTU comparison
452 | comparison GTU comparison
453 | comparison LT comparison
454 | comparison GT comparison
455 | comparison BITOR comparison
456 | comparison BITXOR comparison
457 | comparison BITAND comparison
458 | PLUS comparison
459 | comparison MINUS
460 | INVERT comparison
461 | comparison APPEND comparison
462 | power"""
463 if len(p) == 4:
464 print(list(p))
465 if p[2] == '<u':
466 p[0] = ast.Call(ast.Name("ltu"), (p[1], p[3]), [])
467 elif p[2] == '>u':
468 p[0] = ast.Call(ast.Name("gtu"), (p[1], p[3]), [])
469 elif p[2] == '||':
470 l = check_concat(p[1]) + check_concat(p[3])
471 p[0] = ast.Call(ast.Name("concat"), l, [])
472 elif p[2] in ['<', '>', '=', '<=', '>=', '!=']:
473 p[0] = binary_ops[p[2]]((p[1], p[3]))
474 elif identify_sint_mul_pattern(p):
475 keywords=[ast.keyword(arg='repeat', value=p[3])]
476 l = p[1].elts
477 p[0] = ast.Call(ast.Name("concat"), l, keywords)
478 else:
479 p[0] = ast.BinOp(p[1], binary_ops[p[2]], p[3])
480 elif len(p) == 3:
481 if isinstance(p[2], str) and p[2] == '-':
482 p[0] = ast.UnaryOp(unary_ops[p[2]], p[1])
483 else:
484 p[0] = ast.UnaryOp(unary_ops[p[1]], p[2])
485 else:
486 p[0] = p[1]
487
488 # power: atom trailer* ['**' factor]
489 # trailers enables function calls (and subscripts).
490 # so this is 'trailerlist'
491 def p_power(self, p):
492 """power : atom
493 | atom trailerlist"""
494 if len(p) == 2:
495 p[0] = p[1]
496 else:
497 print("power dump atom")
498 print(astor.dump_tree(p[1]))
499 print("power dump trailerlist")
500 print(astor.dump_tree(p[2]))
501 p[0] = apply_trailer(p[1], p[2])
502
503 def p_atom_name(self, p):
504 """atom : NAME"""
505 p[0] = ast.Name(id=p[1], ctx=ast.Load())
506
507 def p_atom_number(self, p):
508 """atom : BINARY
509 | NUMBER
510 | STRING"""
511 p[0] = ast.Constant(p[1])
512
513 # '[' [listmaker] ']' |
514
515 def p_atom_listmaker(self, p):
516 """atom : LBRACK listmaker RBRACK"""
517 p[0] = p[2]
518
519 def p_listmaker(self, p):
520 """listmaker : test COMMA listmaker
521 | test
522 """
523 if len(p) == 2:
524 p[0] = ast.List([p[1]])
525 else:
526 p[0] = ast.List([p[1]] + p[3].nodes)
527
528 def p_atom_tuple(self, p):
529 """atom : LPAR testlist RPAR"""
530 print("tuple", p[2])
531 print("astor dump")
532 print(astor.dump_tree(p[2]))
533
534 if isinstance(p[2], ast.Name):
535 print("tuple name", p[2].id)
536 if p[2].id in self.gprs:
537 self.read_regs.add(p[2].id) # add to list of regs to read
538 #p[0] = ast.Subscript(ast.Name("GPR"), ast.Str(p[2].id))
539 # return
540 p[0] = p[2]
541 elif isinstance(p[2], ast.BinOp):
542 if isinstance(p[2].left, ast.Name) and \
543 isinstance(p[2].right, ast.Constant) and \
544 p[2].right.value == 0 and \
545 p[2].left.id in self.gprs:
546 rid = p[2].left.id
547 self.read_regs.add(rid) # add to list of regs to read
548 # create special call to GPR.getz
549 gprz = ast.Name("GPR")
550 gprz = ast.Attribute(gprz, "getz") # get testzero function
551 # *sigh* see class GPR. we need index itself not reg value
552 ridx = ast.Name("_%s" % rid)
553 p[0] = ast.Call(gprz, [ridx], [])
554 print("tree", astor.dump_tree(p[0]))
555 else:
556 p[0] = p[2]
557 else:
558 p[0] = p[2]
559
560 def p_trailerlist(self, p):
561 """trailerlist : trailer trailerlist
562 | trailer
563 """
564 if len(p) == 2:
565 p[0] = p[1]
566 else:
567 p[0] = ("TLIST", p[1], p[2])
568
569 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
570 def p_trailer(self, p):
571 """trailer : trailer_arglist
572 | trailer_subscript
573 """
574 p[0] = p[1]
575
576 def p_trailer_arglist(self, p):
577 "trailer_arglist : LPAR arglist RPAR"
578 p[0] = ("CALL", p[2])
579
580 def p_trailer_subscript(self, p):
581 "trailer_subscript : LBRACK subscript RBRACK"
582 p[0] = ("SUBS", p[2])
583
584 # subscript: '.' '.' '.' | test | [test] ':' [test]
585
586 def p_subscript(self, p):
587 """subscript : test COLON test
588 | test
589 """
590 if len(p) == 4:
591 # add one to end
592 if isinstance(p[3], ast.Constant):
593 end = ast.Constant(p[3].value+1)
594 else:
595 end = ast.BinOp(p[3], ast.Add(), ast.Constant(1))
596 p[0] = [p[1], end]
597 else:
598 p[0] = [p[1]]
599
600 # testlist: test (',' test)* [',']
601 # Contains shift/reduce error
602
603 def p_testlist(self, p):
604 """testlist : testlist_multi COMMA
605 | testlist_multi """
606 if len(p) == 2:
607 p[0] = p[1]
608 else:
609 # May need to promote singleton to tuple
610 if isinstance(p[1], list):
611 p[0] = p[1]
612 else:
613 p[0] = [p[1]]
614 # Convert into a tuple?
615 if isinstance(p[0], list):
616 p[0] = ast.Tuple(p[0])
617
618 def p_testlist_multi(self, p):
619 """testlist_multi : testlist_multi COMMA test
620 | test"""
621 if len(p) == 2:
622 # singleton
623 p[0] = p[1]
624 else:
625 if isinstance(p[1], list):
626 p[0] = p[1] + [p[3]]
627 else:
628 # singleton -> tuple
629 p[0] = [p[1], p[3]]
630
631 # test: or_test ['if' or_test 'else' test] | lambdef
632 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
633
634 def p_test(self, p):
635 "test : comparison"
636 p[0] = p[1]
637
638 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
639 # | '**' test)
640 # XXX INCOMPLETE: this doesn't allow the trailing comma
641
642 def p_arglist(self, p):
643 """arglist : arglist COMMA argument
644 | argument"""
645 if len(p) == 4:
646 p[0] = p[1] + [p[3]]
647 else:
648 p[0] = [p[1]]
649
650 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
651 def p_argument(self, p):
652 "argument : test"
653 p[0] = p[1]
654
655 def p_error(self, p):
656 # print "Error!", repr(p)
657 raise SyntaxError(p)
658
659
660 class GardenSnakeParser(PowerParser):
661 def __init__(self, lexer=None, debug=False):
662 PowerParser.__init__(self)
663 self.debug = debug
664 if lexer is None:
665 lexer = IndentLexer(debug=0)
666 self.lexer = lexer
667 self.tokens = lexer.tokens
668 self.parser = yacc.yacc(module=self, start="file_input_end",
669 debug=debug, write_tables=False)
670
671 self.sd = create_pdecode()
672
673 def parse(self, code):
674 # self.lexer.input(code)
675 result = self.parser.parse(code, lexer=self.lexer, debug=self.debug)
676 return ast.Module(result)
677
678
679 ###### Code generation ######
680
681 #from compiler import misc, syntax, pycodegen
682
683 class GardenSnakeCompiler(object):
684 def __init__(self, debug=False):
685 self.parser = GardenSnakeParser(debug=debug)
686
687 def compile(self, code, mode="exec", filename="<string>"):
688 tree = self.parser.parse(code)
689 print("snake")
690 pprint(tree)
691 return tree
692 #misc.set_filename(filename, tree)
693 return compile(tree, mode="exec", filename="<string>")
694 # syntax.check(tree)
695 gen = pycodegen.ModuleCodeGenerator(tree)
696 code = gen.getCode()
697 return code