src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17 from nmutil.extend import exts
  18 from nmutil.gtkw import write_gtkw
  19
  20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
  23                                         is_engine_pysim)
  24
  25 from openpower.decoder.decode2execute1 import Data
  26 from openpower.decoder.power_enums import MicrOp, Function, CryIn
  27
  28 from soc.fu.alu.alu_input_record import CompALUOpSubset
  29 from soc.fu.cr.cr_input_record import CompCROpSubset
  30
  31 from soc.fu.pipe_data import FUBaseData
  32 from soc.fu.alu.pipe_data import CommonPipeSpec
  33 from soc.fu.compunits.compunits import FunctionUnitBaseSingle
  34
  35 import operator
  36
  37
  38 class Adder(Elaboratable):
  39     def __init__(self, width):
  40         self.invert_in = Signal()
  41         self.a = Signal(width)
  42         self.b = Signal(width)
  43         self.o = Signal(width, name="add_o")
  44
  45     def elaborate(self, platform):
  46         m = Module()
  47         with m.If(self.invert_in):
  48             m.d.comb += self.o.eq((~self.a) + self.b)
  49         with m.Else():
  50             m.d.comb += self.o.eq(self.a + self.b)
  51         return m
  52
  53
  54 class Subtractor(Elaboratable):
  55     def __init__(self, width):
  56         self.a = Signal(width)
  57         self.b = Signal(width)
  58         self.o = Signal(width, name="sub_o")
  59
  60     def elaborate(self, platform):
  61         m = Module()
  62         m.d.comb += self.o.eq(self.a - self.b)
  63         return m
  64
  65
  66 class Multiplier(Elaboratable):
  67     def __init__(self, width):
  68         self.a = Signal(width)
  69         self.b = Signal(width)
  70         self.o = Signal(width, name="mul_o")
  71
  72     def elaborate(self, platform):
  73         m = Module()
  74         m.d.comb += self.o.eq(self.a * self.b)
  75         return m
  76
  77
  78 class Shifter(Elaboratable):
  79     def __init__(self, width):
  80         self.width = width
  81         self.a = Signal(width)
  82         self.b = Signal(width)
  83         self.o = Signal(width, name="shf_o")
  84
  85     def elaborate(self, platform):
  86         m = Module()
  87         btrunc = Signal(self.width)
  88         m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
  89         m.d.comb += self.o.eq(self.a >> btrunc)
  90         return m
  91
  92
  93 class SignExtend(Elaboratable):
  94     def __init__(self, width):
  95         self.width = width
  96         self.a = Signal(width)
  97         self.o = Signal(width, name="exts_o")
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101         m.d.comb += self.o.eq(exts(self.a, 8, self.width))
 102         return m
 103
 104
 105 class Dummy:
 106     pass
 107
 108
 109 class DummyALU(Elaboratable):
 110     def __init__(self, width):
 111         self.p = Dummy()  # make look like nmutil pipeline API
 112         self.p.i_data = Dummy()
 113         self.p.i_data.ctx = Dummy()
 114         self.n = Dummy()  # make look like nmutil pipeline API
 115         self.n.o_data = Dummy()
 116         self.p.i_valid = Signal()
 117         self.p.o_ready = Signal()
 118         self.n.i_ready = Signal()
 119         self.n.o_valid = Signal()
 120         self.counter = Signal(4)
 121         self.op = CompCROpSubset()
 122         i = []
 123         i.append(Signal(width, name="i1"))
 124         i.append(Signal(width, name="i2"))
 125         i.append(Signal(width, name="i3"))
 126         self.i = i
 127         self.a, self.b, self.c = i[0], i[1], i[2]
 128         self.out = tuple([Signal(width, name="alu_o")])
 129         self.o = self.out[0]
 130         self.width = width
 131         # more "look like nmutil pipeline API"
 132         self.p.i_data.ctx.op = self.op
 133         self.p.i_data.a = self.a
 134         self.p.i_data.b = self.b
 135         self.p.i_data.c = self.c
 136         self.n.o_data.o = self.o
 137
 138     def elaborate(self, platform):
 139         m = Module()
 140
 141         go_now = Signal(reset_less=True)  # testing no-delay ALU
 142
 143         with m.If(self.p.i_valid):
 144             # input is valid. next check, if we already said "ready" or not
 145             with m.If(~self.p.o_ready):
 146                 # we didn't say "ready" yet, so say so and initialise
 147                 m.d.sync += self.p.o_ready.eq(1)
 148
 149                 m.d.sync += self.o.eq(self.a)
 150                 m.d.comb += go_now.eq(1)
 151                 m.d.sync += self.counter.eq(1)
 152
 153         with m.Else():
 154             # input says no longer valid, so drop ready as well.
 155             # a "proper" ALU would have had to sync in the opcode and a/b ops
 156             m.d.sync += self.p.o_ready.eq(0)
 157
 158         # ok so the counter's running: when it gets to 1, fire the output
 159         with m.If((self.counter == 1) | go_now):
 160             # set the output as valid if the recipient is ready for it
 161             m.d.sync += self.n.o_valid.eq(1)
 162         with m.If(self.n.i_ready & self.n.o_valid):
 163             m.d.sync += self.n.o_valid.eq(0)
 164             # recipient said it was ready: reset back to known-good.
 165             m.d.sync += self.counter.eq(0)  # reset the counter
 166             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 167
 168         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 169         with m.If(self.counter > 1):
 170             m.d.sync += self.counter.eq(self.counter - 1)
 171
 172         return m
 173
 174     def __iter__(self):
 175         yield from self.op.ports()
 176         yield self.a
 177         yield self.b
 178         yield self.c
 179         yield self.o
 180
 181     def ports(self):
 182         return list(self)
 183
 184 #####################
 185 # converting even this dummy ALU over to the FunctionUnit RegSpecs API
 186 # which, errr, note that the regspecs are totally ignored below, but
 187 # at least the widths are all 64-bit so it's okay.
 188 #####################
 189
 190 # input (and output) for logical initial stage (common input)
 191 class ALUInputData(FUBaseData):
 192     regspec = [('INT', 'a', '0:63'), # RA
 193                ('INT', 'b', '0:63'), # RB/immediate
 194                ]
 195     def __init__(self, pspec):
 196         super().__init__(pspec, False)
 197
 198
 199 # output from ALU final stage
 200 class ALUOutputData(FUBaseData):
 201     regspec = [('INT', 'o', '0:63'),        # RT
 202                ]
 203     def __init__(self, pspec):
 204         super().__init__(pspec, True)
 205
 206
 207 # ALU pipe specification class
 208 class ALUPipeSpec(CommonPipeSpec):
 209     regspec = (ALUInputData.regspec, ALUOutputData.regspec)
 210     opsubsetkls = CompALUOpSubset
 211
 212
 213 class ALUFunctionUnit(FunctionUnitBaseSingle):
 214 #class ALUFunctionUnit(FunctionUnitBaseMulti):
 215     fnunit = Function.ALU
 216
 217     def __init__(self, idx):
 218         super().__init__(ALUPipeSpec, ALU, 1)
 219
 220
 221 class ALU(Elaboratable):
 222     def __init__(self, width):
 223         # XXX major temporary hack: attempting to convert
 224         # ALU over to RegSpecs API, FunctionUnitBaseSingle passes in
 225         # a regspec here which we can't cope with.  therefore, errr...
 226         # just throw it away and set the width to 64
 227         if not isinstance(width, int):
 228             width = 64
 229         # TODO, really this should just inherit from ControlBase it would
 230         # be a lot less messy.
 231         self.p = Dummy()  # make look like nmutil pipeline API
 232         self.p.i_data = Dummy()
 233         self.p.i_data.ctx = Dummy()
 234         self.n = Dummy()  # make look like nmutil pipeline API
 235         self.n.o_data = Dummy()
 236         self.p.i_valid = Signal()
 237         self.p.o_ready = Signal()
 238         self.n.i_ready = Signal()
 239         self.n.o_valid = Signal()
 240         self.counter = Signal(4)
 241         self.op = CompALUOpSubset(name="op")
 242         i = []
 243         i.append(Signal(width, name="i1"))
 244         i.append(Signal(width, name="i2"))
 245         self.i = i
 246         self.a, self.b = i[0], i[1]
 247         out = []
 248         out.append(Data(width, name="alu_o"))
 249         out.append(Data(width, name="alu_cr"))
 250         self.out = tuple(out)
 251         self.o = self.out[0]
 252         self.cr = self.out[1]
 253         self.width = width
 254         # more "look like nmutil ControlBase pipeline API" stuff
 255         self.p.i_data.ctx.op = self.op
 256         self.p.i_data.a = self.a
 257         self.p.i_data.b = self.b
 258         self.n.o_data.o = self.o
 259         self.n.o_data.cr = self.cr
 260
 261     def elaborate(self, platform):
 262         m = Module()
 263         add = Adder(self.width)
 264         mul = Multiplier(self.width)
 265         shf = Shifter(self.width)
 266         sub = Subtractor(self.width)
 267         ext_sign = SignExtend(self.width)
 268
 269         m.submodules.add = add
 270         m.submodules.mul = mul
 271         m.submodules.shf = shf
 272         m.submodules.sub = sub
 273         m.submodules.ext_sign = ext_sign
 274
 275         # really should not activate absolutely all ALU inputs like this
 276         for mod in [add, mul, shf, sub]:
 277             m.d.comb += [
 278                 mod.a.eq(self.a),
 279                 mod.b.eq(self.b),
 280             ]
 281         # EXTS sign extends the first input
 282         with m.If(self.op.insn_type == MicrOp.OP_EXTS):
 283             m.d.comb += ext_sign.a.eq(self.a)
 284         # EXTSWSLI sign extends the second input
 285         with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 286             m.d.comb += ext_sign.a.eq(self.b)
 287
 288         # pass invert (and carry later)
 289         m.d.comb += add.invert_in.eq(self.op.invert_in)
 290
 291         go_now = Signal(reset_less=True)  # testing no-delay ALU
 292
 293         # ALU sequencer is idle when the count is zero
 294         alu_idle = Signal(reset_less=True)
 295         m.d.comb += alu_idle.eq(self.counter == 0)
 296
 297         # ALU sequencer is done when the count is one
 298         alu_done = Signal(reset_less=True)
 299         m.d.comb += alu_done.eq(self.counter == 1)
 300
 301         # select handshake handling according to ALU type
 302         with m.If(go_now):
 303             # with a combinatorial, no-delay ALU, just pass through
 304             # the handshake signals to the other side
 305             m.d.comb += self.p.o_ready.eq(self.n.i_ready)
 306             m.d.comb += self.n.o_valid.eq(self.p.i_valid)
 307         with m.Else():
 308             # sequential ALU handshake:
 309             # o_ready responds to i_valid, but only if the ALU is idle
 310             m.d.comb += self.p.o_ready.eq(alu_idle)
 311             # select the internally generated o_valid, above
 312             m.d.comb += self.n.o_valid.eq(alu_done)
 313
 314         # hold the ALU result until o_ready is asserted
 315         alu_r = Signal(self.width)
 316
 317         # output masks
 318         # NOP and ILLEGAL don't output anything
 319         with m.If((self.op.insn_type != MicrOp.OP_NOP) &
 320                   (self.op.insn_type != MicrOp.OP_ILLEGAL)):
 321             m.d.comb += self.o.ok.eq(1)
 322         # CR is output when rc bit is active
 323         m.d.comb += self.cr.ok.eq(self.op.rc.rc)
 324
 325         with m.If(alu_idle):
 326             with m.If(self.p.i_valid):
 327
 328                 # as this is a "fake" pipeline, just grab the output right now
 329                 with m.If(self.op.insn_type == MicrOp.OP_ADD):
 330                     m.d.sync += alu_r.eq(add.o)
 331                 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
 332                     m.d.sync += alu_r.eq(mul.o)
 333                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 334                     m.d.sync += alu_r.eq(shf.o)
 335                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 336                     m.d.sync += alu_r.eq(ext_sign.o)
 337                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 338                     m.d.sync += alu_r.eq(ext_sign.o)
 339                 # SUB is zero-delay, no need to register
 340
 341                 # NOTE: all of these are fake, just something to test
 342
 343                 # MUL, to take 5 instructions
 344                 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
 345                     m.d.sync += self.counter.eq(5)
 346                 # SHIFT to take 1, straight away
 347                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 348                     m.d.sync += self.counter.eq(1)
 349                 # ADD/SUB to take 3
 350                 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
 351                     m.d.sync += self.counter.eq(3)
 352                 # EXTS to take 1
 353                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 354                     m.d.sync += self.counter.eq(1)
 355                 # EXTSWSLI to take 1
 356                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 357                     m.d.sync += self.counter.eq(1)
 358                 # others to take no delay
 359                 with m.Else():
 360                     m.d.comb += go_now.eq(1)
 361
 362         with m.Elif(~alu_done | self.n.i_ready):
 363             # decrement the counter while the ALU is neither idle nor finished
 364             m.d.sync += self.counter.eq(self.counter - 1)
 365
 366         # choose between zero-delay output, or registered
 367         with m.If(go_now):
 368             m.d.comb += self.o.data.eq(sub.o)
 369         # only present the result at the last computation cycle
 370         with m.Elif(alu_done):
 371             m.d.comb += self.o.data.eq(alu_r)
 372
 373         # determine condition register bits based on the data output value
 374         with m.If(~self.o.data.any()):
 375             m.d.comb += self.cr.data.eq(0b001)
 376         with m.Elif(self.o.data[-1]):
 377             m.d.comb += self.cr.data.eq(0b010)
 378         with m.Else():
 379             m.d.comb += self.cr.data.eq(0b100)
 380
 381         return m
 382
 383     def __iter__(self):
 384         yield from self.op.ports()
 385         yield self.a
 386         yield self.b
 387         yield from self.o.ports()
 388         yield self.p.i_valid
 389         yield self.p.o_ready
 390         yield self.n.o_valid
 391         yield self.n.i_ready
 392
 393     def ports(self):
 394         return list(self)
 395
 396
 397 class BranchOp(Elaboratable):
 398     def __init__(self, width, op):
 399         self.a = Signal(width)
 400         self.b = Signal(width)
 401         self.o = Signal(width)
 402         self.op = op
 403
 404     def elaborate(self, platform):
 405         m = Module()
 406         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 407         return m
 408
 409
 410 class BranchALU(Elaboratable):
 411     def __init__(self, width):
 412         self.p = Dummy()  # make look like nmutil pipeline API
 413         self.p.i_data = Dummy()
 414         self.p.i_data.ctx = Dummy()
 415         self.n = Dummy()  # make look like nmutil pipeline API
 416         self.n.o_data = Dummy()
 417         self.p.i_valid = Signal()
 418         self.p.o_ready = Signal()
 419         self.n.i_ready = Signal()
 420         self.n.o_valid = Signal()
 421         self.counter = Signal(4)
 422         self.op = Signal(2)
 423         i = []
 424         i.append(Signal(width, name="i1"))
 425         i.append(Signal(width, name="i2"))
 426         self.i = i
 427         self.a, self.b = i[0], i[1]
 428         self.out = tuple([Signal(width)])
 429         self.o = self.out[0]
 430         self.width = width
 431
 432     def elaborate(self, platform):
 433         m = Module()
 434         bgt = BranchOp(self.width, operator.gt)
 435         blt = BranchOp(self.width, operator.lt)
 436         beq = BranchOp(self.width, operator.eq)
 437         bne = BranchOp(self.width, operator.ne)
 438
 439         m.submodules.bgt = bgt
 440         m.submodules.blt = blt
 441         m.submodules.beq = beq
 442         m.submodules.bne = bne
 443         for mod in [bgt, blt, beq, bne]:
 444             m.d.comb += [
 445                 mod.a.eq(self.a),
 446                 mod.b.eq(self.b),
 447             ]
 448
 449         go_now = Signal(reset_less=True)  # testing no-delay ALU
 450         with m.If(self.p.i_valid):
 451             # input is valid. next check, if we already said "ready" or not
 452             with m.If(~self.p.o_ready):
 453                 # we didn't say "ready" yet, so say so and initialise
 454                 m.d.sync += self.p.o_ready.eq(1)
 455
 456                 # as this is a "fake" pipeline, just grab the output right now
 457                 with m.Switch(self.op):
 458                     for i, mod in enumerate([bgt, blt, beq, bne]):
 459                         with m.Case(i):
 460                             m.d.sync += self.o.eq(mod.o)
 461                 # branch to take 5 cycles (fake)
 462                 m.d.sync += self.counter.eq(5)
 463                 #m.d.comb += go_now.eq(1)
 464         with m.Else():
 465             # input says no longer valid, so drop ready as well.
 466             # a "proper" ALU would have had to sync in the opcode and a/b ops
 467             m.d.sync += self.p.o_ready.eq(0)
 468
 469         # ok so the counter's running: when it gets to 1, fire the output
 470         with m.If((self.counter == 1) | go_now):
 471             # set the output as valid if the recipient is ready for it
 472             m.d.sync += self.n.o_valid.eq(1)
 473         with m.If(self.n.i_ready & self.n.o_valid):
 474             m.d.sync += self.n.o_valid.eq(0)
 475             # recipient said it was ready: reset back to known-good.
 476             m.d.sync += self.counter.eq(0)  # reset the counter
 477             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 478
 479         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 480         with m.If(self.counter > 1):
 481             m.d.sync += self.counter.eq(self.counter - 1)
 482
 483         return m
 484
 485     def __iter__(self):
 486         yield self.op
 487         yield self.a
 488         yield self.b
 489         yield self.o
 490
 491     def ports(self):
 492         return list(self)
 493
 494
 495 def run_op(dut, a, b, op, inv_a=0):
 496     yield dut.a.eq(a)
 497     yield dut.b.eq(b)
 498     yield dut.op.insn_type.eq(op)
 499     yield dut.op.invert_in.eq(inv_a)
 500     yield dut.n.i_ready.eq(0)
 501     yield dut.p.i_valid.eq(1)
 502     yield dut.n.i_ready.eq(1)
 503     yield
 504
 505     # wait for the ALU to accept our input data
 506     while not (yield dut.p.o_ready):
 507         yield
 508
 509     yield dut.p.i_valid.eq(0)
 510     yield dut.a.eq(0)
 511     yield dut.b.eq(0)
 512     yield dut.op.insn_type.eq(0)
 513     yield dut.op.invert_in.eq(0)
 514
 515     # wait for the ALU to present the output data
 516     while not (yield dut.n.o_valid):
 517         yield
 518
 519     # latch the result and lower read_i
 520     result = yield dut.o.data
 521     yield dut.n.i_ready.eq(0)
 522
 523     return result
 524
 525
 526 def alu_sim(dut):
 527     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
 528     print("alu_sim add", result)
 529     assert (result == 8)
 530
 531     result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
 532     print("alu_sim mul", result)
 533     assert (result == 6)
 534
 535     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
 536     print("alu_sim add-inv", result)
 537     assert (result == 65533)
 538
 539     # test zero-delay ALU
 540     # don't have OP_SUB, so use any other
 541     result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
 542     print("alu_sim sub", result)
 543     assert (result == 2)
 544
 545     result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
 546     print("alu_sim shr", result)
 547     assert (result == 3)
 548
 549
 550 def test_alu():
 551     alu = ALU(width=16)
 552     write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
 553     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 554
 555     vl = rtlil.convert(alu, ports=alu.ports())
 556     with open("test_alu.il", "w") as f:
 557         f.write(vl)
 558
 559
 560 def test_alu_parallel():
 561     # Compare with the sequential test implementation, above.
 562     m = Module()
 563     m.submodules.alu = dut = ALU(width=16)
 564     write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
 565                    pysim=is_engine_pysim())
 566
 567     sim = Simulator(m)
 568     sim.add_clock(1e-6)
 569
 570     def send(a, b, op, inv_a=0, rc=0):
 571         # present input data and assert i_valid
 572         yield dut.a.eq(a)
 573         yield dut.b.eq(b)
 574         yield dut.op.insn_type.eq(op)
 575         yield dut.op.invert_in.eq(inv_a)
 576         yield dut.op.rc.rc.eq(rc)
 577         yield dut.p.i_valid.eq(1)
 578         yield
 579         # wait for o_ready to be asserted
 580         while not (yield dut.p.o_ready):
 581             yield
 582         # clear input data and negate i_valid
 583         # if send is called again immediately afterwards, there will be no
 584         # visible transition (they will not be negated, after all)
 585         yield dut.p.i_valid.eq(0)
 586         yield dut.a.eq(0)
 587         yield dut.b.eq(0)
 588         yield dut.op.insn_type.eq(0)
 589         yield dut.op.invert_in.eq(0)
 590         yield dut.op.rc.rc.eq(0)
 591
 592     def receive():
 593         # signal readiness to receive data
 594         yield dut.n.i_ready.eq(1)
 595         yield
 596         # wait for o_valid to be asserted
 597         while not (yield dut.n.o_valid):
 598             yield
 599         # read results
 600         result = yield dut.o.data
 601         cr = yield dut.cr.data
 602         # negate i_ready
 603         # if receive is called again immediately afterwards, there will be no
 604         # visible transition (it will not be negated, after all)
 605         yield dut.n.i_ready.eq(0)
 606         return result, cr
 607
 608     def producer():
 609         # send a few test cases, interspersed with wait states
 610         # note that, for this test, we do not wait for the result to be ready,
 611         # before presenting the next input
 612         # 5 + 3
 613         yield from send(5, 3, MicrOp.OP_ADD)
 614         yield
 615         yield
 616         # 2 * 3
 617         yield from send(2, 3, MicrOp.OP_MUL_L64, rc=1)
 618         # (-6) + 3
 619         yield from send(5, 3, MicrOp.OP_ADD, inv_a=1, rc=1)
 620         yield
 621         # 5 - 3
 622         # note that this is a zero-delay operation
 623         yield from send(5, 3, MicrOp.OP_CMP)
 624         yield
 625         yield
 626         # NOP
 627         yield from send(5, 3, MicrOp.OP_NOP)
 628         # 13 >> 2
 629         yield from send(13, 2, MicrOp.OP_SHR)
 630         # sign extent 13
 631         yield from send(13, 2, MicrOp.OP_EXTS)
 632         # sign extend -128 (8 bits)
 633         yield from send(0x80, 2, MicrOp.OP_EXTS, rc=1)
 634         # sign extend -128 (8 bits)
 635         yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
 636         # 5 - 5
 637         yield from send(5, 5, MicrOp.OP_CMP, rc=1)
 638
 639     def consumer():
 640         # receive and check results, interspersed with wait states
 641         # the consumer is not in step with the producer, but the
 642         # order of the results are preserved
 643         yield
 644         # 5 + 3 = 8
 645         result = yield from receive()
 646         assert result[0] == 8
 647         # 2 * 3 = 6
 648         # 6 > 0 => CR = 0b100
 649         result = yield from receive()
 650         assert result == (6, 0b100)
 651         yield
 652         yield
 653         # (-6) + 3 = -3
 654         # -3 < 0 => CR = 0b010
 655         result = yield from receive()
 656         assert result == (65533, 0b010)  # unsigned equivalent to -2
 657         # 5 - 3 = 2
 658         # note that this is a zero-delay operation
 659         # this, and the previous result, will be received back-to-back
 660         # (check the output waveform to see this)
 661         result = yield from receive()
 662         assert result[0] == 2
 663         yield
 664         yield
 665         # NOP
 666         yield from receive()
 667         # 13 >> 2 = 3
 668         result = yield from receive()
 669         assert result[0] == 3
 670         # sign extent 13 = 13
 671         result = yield from receive()
 672         assert result[0] == 13
 673         # sign extend -128 (8 bits) = -128 (16 bits)
 674         # -128 < 0 => CR = 0b010
 675         result = yield from receive()
 676         assert result == (0xFF80, 0b010)
 677         # sign extend -128 (8 bits) = -128 (16 bits)
 678         result = yield from receive()
 679         assert result[0] == 0xFF80
 680         # 5 - 5 = 0
 681         # 0 == 0 => CR = 0b001
 682         result = yield from receive()
 683         assert result == (0, 0b001)
 684
 685     sim.add_sync_process(producer)
 686     sim.add_sync_process(consumer)
 687     sim_writer = sim.write_vcd("test_alu_parallel.vcd")
 688     with sim_writer:
 689         sim.run()
 690
 691
 692 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
 693                    pysim=True):
 694     """Common function to write the GTKWave documents for this module"""
 695     gtkwave_desc = [
 696         'clk',
 697         'i1[15:0]',
 698         'i2[15:0]',
 699         'op__insn_type' if pysim else 'op__insn_type[6:0]',
 700         'op__invert_in',
 701         'i_valid',
 702         'o_ready',
 703         'o_valid',
 704         'i_ready',
 705         'alu_o[15:0]',
 706         'alu_o_ok',
 707         'alu_cr[15:0]',
 708         'alu_cr_ok'
 709     ]
 710     # determine the module name of the DUT
 711     module = 'top'
 712     if sub_module is not None:
 713         module = nmigen_sim_top_module + sub_module
 714     vcd_name = gtkw_name.replace('.gtkw', '.vcd')
 715     write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
 716                loc=__file__, clk_period=clk_period, base='signed')
 717
 718
 719 if __name__ == "__main__":
 720     test_alu()
 721     test_alu_parallel()
 722
 723     # alu = BranchALU(width=16)
 724     # vl = rtlil.convert(alu, ports=alu.ports())
 725     # with open("test_branch_alu.il", "w") as f:
 726     #     f.write(vl)