src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17 from nmutil.extend import exts
  18 from nmutil.gtkw import write_gtkw
  19
  20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
  23                                         is_engine_pysim)
  24
  25 from soc.decoder.decode2execute1 import Data
  26 from soc.decoder.power_enums import MicrOp, Function, CryIn
  27
  28 from soc.fu.alu.alu_input_record import CompALUOpSubset
  29 from soc.fu.cr.cr_input_record import CompCROpSubset
  30
  31 import operator
  32
  33
  34 class Adder(Elaboratable):
  35     def __init__(self, width):
  36         self.invert_in = Signal()
  37         self.a = Signal(width)
  38         self.b = Signal(width)
  39         self.o = Signal(width, name="add_o")
  40
  41     def elaborate(self, platform):
  42         m = Module()
  43         with m.If(self.invert_in):
  44             m.d.comb += self.o.eq((~self.a) + self.b)
  45         with m.Else():
  46             m.d.comb += self.o.eq(self.a + self.b)
  47         return m
  48
  49
  50 class Subtractor(Elaboratable):
  51     def __init__(self, width):
  52         self.a = Signal(width)
  53         self.b = Signal(width)
  54         self.o = Signal(width, name="sub_o")
  55
  56     def elaborate(self, platform):
  57         m = Module()
  58         m.d.comb += self.o.eq(self.a - self.b)
  59         return m
  60
  61
  62 class Multiplier(Elaboratable):
  63     def __init__(self, width):
  64         self.a = Signal(width)
  65         self.b = Signal(width)
  66         self.o = Signal(width, name="mul_o")
  67
  68     def elaborate(self, platform):
  69         m = Module()
  70         m.d.comb += self.o.eq(self.a * self.b)
  71         return m
  72
  73
  74 class Shifter(Elaboratable):
  75     def __init__(self, width):
  76         self.width = width
  77         self.a = Signal(width)
  78         self.b = Signal(width)
  79         self.o = Signal(width, name="shf_o")
  80
  81     def elaborate(self, platform):
  82         m = Module()
  83         btrunc = Signal(self.width)
  84         m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
  85         m.d.comb += self.o.eq(self.a >> btrunc)
  86         return m
  87
  88
  89 class SignExtend(Elaboratable):
  90     def __init__(self, width):
  91         self.width = width
  92         self.a = Signal(width)
  93         self.o = Signal(width, name="exts_o")
  94
  95     def elaborate(self, platform):
  96         m = Module()
  97         m.d.comb += self.o.eq(exts(self.a, 8, self.width))
  98         return m
  99
 100
 101 class Dummy:
 102     pass
 103
 104
 105 class DummyALU(Elaboratable):
 106     def __init__(self, width):
 107         self.p = Dummy()  # make look like nmutil pipeline API
 108         self.p.data_i = Dummy()
 109         self.p.data_i.ctx = Dummy()
 110         self.n = Dummy()  # make look like nmutil pipeline API
 111         self.n.data_o = Dummy()
 112         self.p.valid_i = Signal()
 113         self.p.ready_o = Signal()
 114         self.n.ready_i = Signal()
 115         self.n.valid_o = Signal()
 116         self.counter = Signal(4)
 117         self.op = CompCROpSubset()
 118         i = []
 119         i.append(Signal(width, name="i1"))
 120         i.append(Signal(width, name="i2"))
 121         i.append(Signal(width, name="i3"))
 122         self.i = Array(i)
 123         self.a, self.b, self.c = i[0], i[1], i[2]
 124         self.out = Array([Signal(width, name="alu_o")])
 125         self.o = self.out[0]
 126         self.width = width
 127         # more "look like nmutil pipeline API"
 128         self.p.data_i.ctx.op = self.op
 129         self.p.data_i.a = self.a
 130         self.p.data_i.b = self.b
 131         self.p.data_i.c = self.c
 132         self.n.data_o.o = self.o
 133
 134     def elaborate(self, platform):
 135         m = Module()
 136
 137         go_now = Signal(reset_less=True)  # testing no-delay ALU
 138
 139         with m.If(self.p.valid_i):
 140             # input is valid. next check, if we already said "ready" or not
 141             with m.If(~self.p.ready_o):
 142                 # we didn't say "ready" yet, so say so and initialise
 143                 m.d.sync += self.p.ready_o.eq(1)
 144
 145                 m.d.sync += self.o.eq(self.a)
 146                 m.d.comb += go_now.eq(1)
 147                 m.d.sync += self.counter.eq(1)
 148
 149         with m.Else():
 150             # input says no longer valid, so drop ready as well.
 151             # a "proper" ALU would have had to sync in the opcode and a/b ops
 152             m.d.sync += self.p.ready_o.eq(0)
 153
 154         # ok so the counter's running: when it gets to 1, fire the output
 155         with m.If((self.counter == 1) | go_now):
 156             # set the output as valid if the recipient is ready for it
 157             m.d.sync += self.n.valid_o.eq(1)
 158         with m.If(self.n.ready_i & self.n.valid_o):
 159             m.d.sync += self.n.valid_o.eq(0)
 160             # recipient said it was ready: reset back to known-good.
 161             m.d.sync += self.counter.eq(0)  # reset the counter
 162             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 163
 164         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 165         with m.If(self.counter > 1):
 166             m.d.sync += self.counter.eq(self.counter - 1)
 167
 168         return m
 169
 170     def __iter__(self):
 171         yield from self.op.ports()
 172         yield self.a
 173         yield self.b
 174         yield self.c
 175         yield self.o
 176
 177     def ports(self):
 178         return list(self)
 179
 180
 181 class ALU(Elaboratable):
 182     def __init__(self, width):
 183         self.p = Dummy()  # make look like nmutil pipeline API
 184         self.p.data_i = Dummy()
 185         self.p.data_i.ctx = Dummy()
 186         self.n = Dummy()  # make look like nmutil pipeline API
 187         self.n.data_o = Dummy()
 188         self.p.valid_i = Signal()
 189         self.p.ready_o = Signal()
 190         self.n.ready_i = Signal()
 191         self.n.valid_o = Signal()
 192         self.counter = Signal(4)
 193         self.op = CompALUOpSubset(name="op")
 194         i = []
 195         i.append(Signal(width, name="i1"))
 196         i.append(Signal(width, name="i2"))
 197         self.i = Array(i)
 198         self.a, self.b = i[0], i[1]
 199         out = []
 200         out.append(Data(width, name="alu_o"))
 201         out.append(Data(3, name="alu_cr"))
 202         self.out = Array(out)
 203         self.o = self.out[0]
 204         self.cr = self.out[1]
 205         self.width = width
 206         # more "look like nmutil pipeline API"
 207         self.p.data_i.ctx.op = self.op
 208         self.p.data_i.a = self.a
 209         self.p.data_i.b = self.b
 210         self.n.data_o.o = self.o
 211         self.n.data_o.cr = self.cr
 212
 213     def elaborate(self, platform):
 214         m = Module()
 215         add = Adder(self.width)
 216         mul = Multiplier(self.width)
 217         shf = Shifter(self.width)
 218         sub = Subtractor(self.width)
 219         ext_sign = SignExtend(self.width)
 220
 221         m.submodules.add = add
 222         m.submodules.mul = mul
 223         m.submodules.shf = shf
 224         m.submodules.sub = sub
 225         m.submodules.ext_sign = ext_sign
 226
 227         # really should not activate absolutely all ALU inputs like this
 228         for mod in [add, mul, shf, sub]:
 229             m.d.comb += [
 230                 mod.a.eq(self.a),
 231                 mod.b.eq(self.b),
 232             ]
 233         # EXTS sign extends the first input
 234         with m.If(self.op.insn_type == MicrOp.OP_EXTS):
 235             m.d.comb += ext_sign.a.eq(self.a)
 236         # EXTSWSLI sign extends the second input
 237         with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 238             m.d.comb += ext_sign.a.eq(self.b)
 239
 240         # pass invert (and carry later)
 241         m.d.comb += add.invert_in.eq(self.op.invert_in)
 242
 243         go_now = Signal(reset_less=True)  # testing no-delay ALU
 244
 245         # ALU sequencer is idle when the count is zero
 246         alu_idle = Signal(reset_less=True)
 247         m.d.comb += alu_idle.eq(self.counter == 0)
 248
 249         # ALU sequencer is done when the count is one
 250         alu_done = Signal(reset_less=True)
 251         m.d.comb += alu_done.eq(self.counter == 1)
 252
 253         # select handshake handling according to ALU type
 254         with m.If(go_now):
 255             # with a combinatorial, no-delay ALU, just pass through
 256             # the handshake signals to the other side
 257             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 258             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 259         with m.Else():
 260             # sequential ALU handshake:
 261             # ready_o responds to valid_i, but only if the ALU is idle
 262             m.d.comb += self.p.ready_o.eq(alu_idle)
 263             # select the internally generated valid_o, above
 264             m.d.comb += self.n.valid_o.eq(alu_done)
 265
 266         # hold the ALU result until ready_o is asserted
 267         alu_r = Signal(self.width)
 268
 269         # output masks
 270         # NOP and ILLEGAL don't output anything
 271         with m.If((self.op.insn_type != MicrOp.OP_NOP) &
 272                   (self.op.insn_type != MicrOp.OP_ILLEGAL)):
 273             m.d.comb += self.o.ok.eq(1)
 274         # CR is output when rc bit is active
 275         m.d.comb += self.cr.ok.eq(self.op.rc.rc)
 276
 277         with m.If(alu_idle):
 278             with m.If(self.p.valid_i):
 279
 280                 # as this is a "fake" pipeline, just grab the output right now
 281                 with m.If(self.op.insn_type == MicrOp.OP_ADD):
 282                     m.d.sync += alu_r.eq(add.o)
 283                 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
 284                     m.d.sync += alu_r.eq(mul.o)
 285                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 286                     m.d.sync += alu_r.eq(shf.o)
 287                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 288                     m.d.sync += alu_r.eq(ext_sign.o)
 289                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 290                     m.d.sync += alu_r.eq(ext_sign.o)
 291                 # SUB is zero-delay, no need to register
 292
 293                 # NOTE: all of these are fake, just something to test
 294
 295                 # MUL, to take 5 instructions
 296                 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
 297                     m.d.sync += self.counter.eq(5)
 298                 # SHIFT to take 1, straight away
 299                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 300                     m.d.sync += self.counter.eq(1)
 301                 # ADD/SUB to take 3
 302                 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
 303                     m.d.sync += self.counter.eq(3)
 304                 # EXTS to take 1
 305                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 306                     m.d.sync += self.counter.eq(1)
 307                 # EXTSWSLI to take 1
 308                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 309                     m.d.sync += self.counter.eq(1)
 310                 # others to take no delay
 311                 with m.Else():
 312                     m.d.comb += go_now.eq(1)
 313
 314         with m.Elif(~alu_done | self.n.ready_i):
 315             # decrement the counter while the ALU is neither idle nor finished
 316             m.d.sync += self.counter.eq(self.counter - 1)
 317
 318         # choose between zero-delay output, or registered
 319         with m.If(go_now):
 320             m.d.comb += self.o.data.eq(sub.o)
 321         # only present the result at the last computation cycle
 322         with m.Elif(alu_done):
 323             m.d.comb += self.o.data.eq(alu_r)
 324
 325         # determine condition register bits based on the data output value
 326         with m.If(~self.o.data.any()):
 327             m.d.comb += self.cr.data.eq(0b001)
 328         with m.Elif(self.o.data[-1]):
 329             m.d.comb += self.cr.data.eq(0b010)
 330         with m.Else():
 331             m.d.comb += self.cr.data.eq(0b100)
 332
 333         return m
 334
 335     def __iter__(self):
 336         yield from self.op.ports()
 337         yield self.a
 338         yield self.b
 339         yield from self.o.ports()
 340         yield self.p.valid_i
 341         yield self.p.ready_o
 342         yield self.n.valid_o
 343         yield self.n.ready_i
 344
 345     def ports(self):
 346         return list(self)
 347
 348
 349 class BranchOp(Elaboratable):
 350     def __init__(self, width, op):
 351         self.a = Signal(width)
 352         self.b = Signal(width)
 353         self.o = Signal(width)
 354         self.op = op
 355
 356     def elaborate(self, platform):
 357         m = Module()
 358         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 359         return m
 360
 361
 362 class BranchALU(Elaboratable):
 363     def __init__(self, width):
 364         self.p = Dummy()  # make look like nmutil pipeline API
 365         self.p.data_i = Dummy()
 366         self.p.data_i.ctx = Dummy()
 367         self.n = Dummy()  # make look like nmutil pipeline API
 368         self.n.data_o = Dummy()
 369         self.p.valid_i = Signal()
 370         self.p.ready_o = Signal()
 371         self.n.ready_i = Signal()
 372         self.n.valid_o = Signal()
 373         self.counter = Signal(4)
 374         self.op = Signal(2)
 375         i = []
 376         i.append(Signal(width, name="i1"))
 377         i.append(Signal(width, name="i2"))
 378         self.i = Array(i)
 379         self.a, self.b = i[0], i[1]
 380         self.out = Array([Signal(width)])
 381         self.o = self.out[0]
 382         self.width = width
 383
 384     def elaborate(self, platform):
 385         m = Module()
 386         bgt = BranchOp(self.width, operator.gt)
 387         blt = BranchOp(self.width, operator.lt)
 388         beq = BranchOp(self.width, operator.eq)
 389         bne = BranchOp(self.width, operator.ne)
 390
 391         m.submodules.bgt = bgt
 392         m.submodules.blt = blt
 393         m.submodules.beq = beq
 394         m.submodules.bne = bne
 395         for mod in [bgt, blt, beq, bne]:
 396             m.d.comb += [
 397                 mod.a.eq(self.a),
 398                 mod.b.eq(self.b),
 399             ]
 400
 401         go_now = Signal(reset_less=True)  # testing no-delay ALU
 402         with m.If(self.p.valid_i):
 403             # input is valid. next check, if we already said "ready" or not
 404             with m.If(~self.p.ready_o):
 405                 # we didn't say "ready" yet, so say so and initialise
 406                 m.d.sync += self.p.ready_o.eq(1)
 407
 408                 # as this is a "fake" pipeline, just grab the output right now
 409                 with m.Switch(self.op):
 410                     for i, mod in enumerate([bgt, blt, beq, bne]):
 411                         with m.Case(i):
 412                             m.d.sync += self.o.eq(mod.o)
 413                 # branch to take 5 cycles (fake)
 414                 m.d.sync += self.counter.eq(5)
 415                 #m.d.comb += go_now.eq(1)
 416         with m.Else():
 417             # input says no longer valid, so drop ready as well.
 418             # a "proper" ALU would have had to sync in the opcode and a/b ops
 419             m.d.sync += self.p.ready_o.eq(0)
 420
 421         # ok so the counter's running: when it gets to 1, fire the output
 422         with m.If((self.counter == 1) | go_now):
 423             # set the output as valid if the recipient is ready for it
 424             m.d.sync += self.n.valid_o.eq(1)
 425         with m.If(self.n.ready_i & self.n.valid_o):
 426             m.d.sync += self.n.valid_o.eq(0)
 427             # recipient said it was ready: reset back to known-good.
 428             m.d.sync += self.counter.eq(0)  # reset the counter
 429             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 430
 431         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 432         with m.If(self.counter > 1):
 433             m.d.sync += self.counter.eq(self.counter - 1)
 434
 435         return m
 436
 437     def __iter__(self):
 438         yield self.op
 439         yield self.a
 440         yield self.b
 441         yield self.o
 442
 443     def ports(self):
 444         return list(self)
 445
 446
 447 def run_op(dut, a, b, op, inv_a=0):
 448     yield dut.a.eq(a)
 449     yield dut.b.eq(b)
 450     yield dut.op.insn_type.eq(op)
 451     yield dut.op.invert_in.eq(inv_a)
 452     yield dut.n.ready_i.eq(0)
 453     yield dut.p.valid_i.eq(1)
 454     yield dut.n.ready_i.eq(1)
 455     yield
 456
 457     # wait for the ALU to accept our input data
 458     while not (yield dut.p.ready_o):
 459         yield
 460
 461     yield dut.p.valid_i.eq(0)
 462     yield dut.a.eq(0)
 463     yield dut.b.eq(0)
 464     yield dut.op.insn_type.eq(0)
 465     yield dut.op.invert_in.eq(0)
 466
 467     # wait for the ALU to present the output data
 468     while not (yield dut.n.valid_o):
 469         yield
 470
 471     # latch the result and lower read_i
 472     result = yield dut.o.data
 473     yield dut.n.ready_i.eq(0)
 474
 475     return result
 476
 477
 478 def alu_sim(dut):
 479     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
 480     print("alu_sim add", result)
 481     assert (result == 8)
 482
 483     result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
 484     print("alu_sim mul", result)
 485     assert (result == 6)
 486
 487     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
 488     print("alu_sim add-inv", result)
 489     assert (result == 65533)
 490
 491     # test zero-delay ALU
 492     # don't have OP_SUB, so use any other
 493     result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
 494     print("alu_sim sub", result)
 495     assert (result == 2)
 496
 497     result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
 498     print("alu_sim shr", result)
 499     assert (result == 3)
 500
 501
 502 def test_alu():
 503     alu = ALU(width=16)
 504     write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
 505     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 506
 507     vl = rtlil.convert(alu, ports=alu.ports())
 508     with open("test_alu.il", "w") as f:
 509         f.write(vl)
 510
 511
 512 def test_alu_parallel():
 513     # Compare with the sequential test implementation, above.
 514     m = Module()
 515     m.submodules.alu = dut = ALU(width=16)
 516     write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
 517                    pysim=is_engine_pysim())
 518
 519     sim = Simulator(m)
 520     sim.add_clock(1e-6)
 521
 522     def send(a, b, op, inv_a=0, rc=0):
 523         # present input data and assert valid_i
 524         yield dut.a.eq(a)
 525         yield dut.b.eq(b)
 526         yield dut.op.insn_type.eq(op)
 527         yield dut.op.invert_in.eq(inv_a)
 528         yield dut.op.rc.rc.eq(rc)
 529         yield dut.p.valid_i.eq(1)
 530         yield
 531         # wait for ready_o to be asserted
 532         while not (yield dut.p.ready_o):
 533             yield
 534         # clear input data and negate valid_i
 535         # if send is called again immediately afterwards, there will be no
 536         # visible transition (they will not be negated, after all)
 537         yield dut.p.valid_i.eq(0)
 538         yield dut.a.eq(0)
 539         yield dut.b.eq(0)
 540         yield dut.op.insn_type.eq(0)
 541         yield dut.op.invert_in.eq(0)
 542         yield dut.op.rc.rc.eq(0)
 543
 544     def receive():
 545         # signal readiness to receive data
 546         yield dut.n.ready_i.eq(1)
 547         yield
 548         # wait for valid_o to be asserted
 549         while not (yield dut.n.valid_o):
 550             yield
 551         # read results
 552         result = yield dut.o.data
 553         cr = yield dut.cr.data
 554         # negate ready_i
 555         # if receive is called again immediately afterwards, there will be no
 556         # visible transition (it will not be negated, after all)
 557         yield dut.n.ready_i.eq(0)
 558         return result, cr
 559
 560     def producer():
 561         # send a few test cases, interspersed with wait states
 562         # note that, for this test, we do not wait for the result to be ready,
 563         # before presenting the next input
 564         # 5 + 3
 565         yield from send(5, 3, MicrOp.OP_ADD)
 566         yield
 567         yield
 568         # 2 * 3
 569         yield from send(2, 3, MicrOp.OP_MUL_L64, rc=1)
 570         # (-6) + 3
 571         yield from send(5, 3, MicrOp.OP_ADD, inv_a=1, rc=1)
 572         yield
 573         # 5 - 3
 574         # note that this is a zero-delay operation
 575         yield from send(5, 3, MicrOp.OP_CMP)
 576         yield
 577         yield
 578         # NOP
 579         yield from send(5, 3, MicrOp.OP_NOP)
 580         # 13 >> 2
 581         yield from send(13, 2, MicrOp.OP_SHR)
 582         # sign extent 13
 583         yield from send(13, 2, MicrOp.OP_EXTS)
 584         # sign extend -128 (8 bits)
 585         yield from send(0x80, 2, MicrOp.OP_EXTS, rc=1)
 586         # sign extend -128 (8 bits)
 587         yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
 588
 589     def consumer():
 590         # receive and check results, interspersed with wait states
 591         # the consumer is not in step with the producer, but the
 592         # order of the results are preserved
 593         yield
 594         # 5 + 3 = 8
 595         result = yield from receive()
 596         assert result[0] == 8
 597         # 2 * 3 = 6
 598         result = yield from receive()
 599         assert result == (6, 0b100)
 600         yield
 601         yield
 602         # (-6) + 3 = -3
 603         result = yield from receive()
 604         assert result == (65533, 0b010)  # unsigned equivalent to -2
 605         # 5 - 3 = 2
 606         # note that this is a zero-delay operation
 607         # this, and the previous result, will be received back-to-back
 608         # (check the output waveform to see this)
 609         result = yield from receive()
 610         assert result[0] == 2
 611         yield
 612         yield
 613         # NOP
 614         yield from receive()
 615         # 13 >> 2 = 3
 616         result = yield from receive()
 617         assert result[0] == 3
 618         # sign extent 13 = 13
 619         result = yield from receive()
 620         assert result[0] == 13
 621         # sign extend -128 (8 bits) = -128 (16 bits)
 622         result = yield from receive()
 623         assert result == (0xFF80, 0b010)
 624         # sign extend -128 (8 bits) = -128 (16 bits)
 625         result = yield from receive()
 626         assert result[0] == 0xFF80
 627
 628     sim.add_sync_process(producer)
 629     sim.add_sync_process(consumer)
 630     sim_writer = sim.write_vcd("test_alu_parallel.vcd")
 631     with sim_writer:
 632         sim.run()
 633
 634
 635 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
 636                    pysim=True):
 637     """Common function to write the GTKWave documents for this module"""
 638     gtkwave_desc = [
 639         'clk',
 640         'i1[15:0]',
 641         'i2[15:0]',
 642         'op__insn_type' if pysim else 'op__insn_type[6:0]',
 643         'op__invert_in',
 644         'valid_i',
 645         'ready_o',
 646         'valid_o',
 647         'ready_i',
 648         'alu_o[15:0]',
 649         'alu_o_ok',
 650         'alu_cr[2:0]',
 651         'alu_cr_ok'
 652     ]
 653     # determine the module name of the DUT
 654     module = 'top'
 655     if sub_module is not None:
 656         module = nmigen_sim_top_module + sub_module
 657     vcd_name = gtkw_name.replace('.gtkw', '.vcd')
 658     write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
 659                loc=__file__, clk_period=clk_period, base='signed')
 660
 661
 662 if __name__ == "__main__":
 663     test_alu()
 664     test_alu_parallel()
 665
 666     # alu = BranchALU(width=16)
 667     # vl = rtlil.convert(alu, ports=alu.ports())
 668     # with open("test_branch_alu.il", "w") as f:
 669     #     f.write(vl)