memory_dff: Recognize soft transparency logic.
authorMarcelina Kościelnicka <mwk@0x04.net>
Tue, 10 Aug 2021 17:42:10 +0000 (19:42 +0200)
committerMarcelina Kościelnicka <mwk@0x04.net>
Fri, 13 Aug 2021 21:08:32 +0000 (23:08 +0200)
passes/memory/memory_dff.cc
tests/memories/trans_sdp.v [new file with mode: 0644]
tests/memories/trans_sp.v [new file with mode: 0644]
tests/opt/memory_dff_trans.ys [new file with mode: 0644]

index 8ad131c7c1685b07cdc912b8611d5f3a175a2b81..21962c238c92b1d15a3e56cda5c1dc9ac2a10805 100644 (file)
@@ -19,7 +19,9 @@
 
 #include "kernel/yosys.h"
 #include "kernel/sigtools.h"
+#include "kernel/modtools.h"
 #include "kernel/ffinit.h"
+#include "kernel/qcsat.h"
 #include "kernel/mem.h"
 #include "kernel/ff.h"
 #include "kernel/ffmerge.h"
 USING_YOSYS_NAMESPACE
 PRIVATE_NAMESPACE_BEGIN
 
+struct MuxData {
+       int base_idx;
+       int size;
+       bool is_b;
+       SigSpec sig_s;
+       std::vector<SigSpec> sig_other;
+};
+
+struct PortData {
+       bool relevant;
+       std::vector<bool> uncollidable_mask;
+       std::vector<bool> transparency_mask;
+       std::vector<bool> collision_x_mask;
+       bool final_transparency;
+       bool final_collision_x;
+};
+
+// A helper with some caching for transparency-related SAT queries.
+// Bound to a single memory read port in the process of being converted
+// from async to sync..
+struct MemQueryCache
+{
+       QuickConeSat &qcsat;
+       // The memory.
+       Mem &mem;
+       // The port, still async at this point.
+       MemRd &port;
+       // The virtual FF that will end up merged into this port.
+       FfData &ff;
+       // An ezSAT variable that is true when we actually care about the data
+       // read from memory (ie. the FF has enable on and is not in reset).
+       int port_ren;
+       // Some caches.
+       dict<std::pair<int, SigBit>, bool> cache_can_collide_rdwr;
+       dict<std::tuple<int, int, SigBit, SigBit>, bool> cache_can_collide_together;
+       dict<std::tuple<int, SigBit, SigBit, bool>, bool> cache_is_w2rbyp;
+       dict<std::tuple<SigBit, bool>, bool> cache_impossible_with_ren;
+
+       MemQueryCache(QuickConeSat &qcsat, Mem &mem, MemRd &port, FfData &ff) : qcsat(qcsat), mem(mem), port(port), ff(ff) {
+               // port_ren is an upper bound on when we care about the value fetched
+               // from memory this cycle.
+               int ren = ezSAT::CONST_TRUE;
+               if (ff.has_en) {
+                       ren = qcsat.importSigBit(ff.sig_en);
+                       if (!ff.pol_en)
+                               ren = qcsat.ez->NOT(ren);
+               }
+               if (ff.has_srst) {
+                       int nrst = qcsat.importSigBit(ff.sig_srst);
+                       if (ff.pol_srst)
+                               nrst = qcsat.ez->NOT(nrst);
+                       ren = qcsat.ez->AND(ren, nrst);
+               }
+               port_ren = ren;
+       }
+
+       // Returns ezSAT variable that is true iff the two addresses are the same.
+       int addr_eq(SigSpec raddr, SigSpec waddr) {
+               int abits = std::max(GetSize(raddr), GetSize(waddr));
+               raddr.extend_u0(abits);
+               waddr.extend_u0(abits);
+               return qcsat.ez->vec_eq(qcsat.importSig(raddr), qcsat.importSig(waddr));
+       }
+
+       // Returns true if a given write port bit can be active at the same time
+       // as this read port and at the same address.
+       bool can_collide_rdwr(int widx, SigBit wen) {
+               std::pair<int, SigBit> key(widx, wen);
+               auto it = cache_can_collide_rdwr.find(key);
+               if (it != cache_can_collide_rdwr.end())
+                       return it->second;
+               auto &wport = mem.wr_ports[widx];
+               int aeq = addr_eq(port.addr, wport.addr);
+               int wen_sat = qcsat.importSigBit(wen);
+               qcsat.prepare();
+               bool res = qcsat.ez->solve(aeq, wen_sat, port_ren);
+               cache_can_collide_rdwr[key] = res;
+               return res;
+       }
+
+       // Returns true if both given write port bits can be active at the same
+       // time as this read port and at the same address (three-way collision).
+       bool can_collide_together(int widx1, int widx2, int bitidx) {
+               auto &wport1 = mem.wr_ports[widx1];
+               auto &wport2 = mem.wr_ports[widx2];
+               SigBit wen1 = wport1.en[bitidx];
+               SigBit wen2 = wport2.en[bitidx];
+               std::tuple<int, int, SigBit, SigBit> key(widx1, widx2, wen1, wen2);
+               auto it = cache_can_collide_together.find(key);
+               if (it != cache_can_collide_together.end())
+                       return it->second;
+               int aeq1 = addr_eq(port.addr, wport1.addr);
+               int aeq2 = addr_eq(port.addr, wport2.addr);
+               int wen1_sat = qcsat.importSigBit(wen1);
+               int wen2_sat = qcsat.importSigBit(wen2);
+               qcsat.prepare();
+               bool res = qcsat.ez->solve(wen1_sat, wen2_sat, aeq1, aeq2, port_ren);
+               cache_can_collide_together[key] = res;
+               return res;
+       }
+
+       // Returns true if the given mux selection signal is a valid data-bypass
+       // signal in soft transparency logic for a given write port bit.
+       bool is_w2rbyp(int widx, SigBit wen, SigBit sel, bool neg_sel) {
+               std::tuple<int, SigBit, SigBit, bool> key(widx, wen, sel, neg_sel);
+               auto it = cache_is_w2rbyp.find(key);
+               if (it != cache_is_w2rbyp.end())
+                       return it->second;
+               auto &wport = mem.wr_ports[widx];
+               int aeq = addr_eq(port.addr, wport.addr);
+               int wen_sat = qcsat.importSigBit(wen);
+               int sel_expected = qcsat.ez->AND(aeq, wen_sat);
+               int sel_sat = qcsat.importSigBit(sel);
+               if (neg_sel)
+                       sel_sat = qcsat.ez->NOT(sel_sat);
+               qcsat.prepare();
+               bool res = !qcsat.ez->solve(port_ren, qcsat.ez->XOR(sel_expected, sel_sat));
+               cache_is_w2rbyp[key] = res;
+               return res;
+       }
+
+       // Returns true if the given mux selection signal can never be true
+       // when this port is active.
+       bool impossible_with_ren(SigBit sel, bool neg_sel) {
+               std::tuple<SigBit, bool> key(sel, neg_sel);
+               auto it = cache_impossible_with_ren.find(key);
+               if (it != cache_impossible_with_ren.end())
+                       return it->second;
+               int sel_sat = qcsat.importSigBit(sel);
+               if (neg_sel)
+                       sel_sat = qcsat.ez->NOT(sel_sat);
+               qcsat.prepare();
+               bool res = !qcsat.ez->solve(port_ren, sel_sat);
+               cache_impossible_with_ren[key] = res;
+               return res;
+       }
+
+       // Helper for data_eq: walks up a multiplexer when the value of its
+       // sel signal is constant under the assumption that this read port
+       // is active and a given other mux sel signal is true.
+       bool walk_up_mux_cond(SigBit sel, bool neg_sel, SigBit &bit) {
+               auto &drivers = qcsat.modwalker.signal_drivers[qcsat.modwalker.sigmap(bit)];
+               if (GetSize(drivers) != 1)
+                       return false;
+               auto driver = *drivers.begin();
+               if (!driver.cell->type.in(ID($mux), ID($pmux)))
+                       return false;
+               log_assert(driver.port == ID::Y);
+               SigSpec sig_s = driver.cell->getPort(ID::S);
+               int sel_sat = qcsat.importSigBit(sel);
+               if (neg_sel)
+                       sel_sat = qcsat.ez->NOT(sel_sat);
+               bool all_0 = true;
+               int width = driver.cell->parameters.at(ID::WIDTH).as_int();
+               for (int i = 0; i < GetSize(sig_s); i++) {
+                       int sbit = qcsat.importSigBit(sig_s[i]);
+                       qcsat.prepare();
+                       if (!qcsat.ez->solve(port_ren, sel_sat, qcsat.ez->NOT(sbit))) {
+                               bit = driver.cell->getPort(ID::B)[i * width + driver.offset];
+                               return true;
+                       }
+                       if (qcsat.ez->solve(port_ren, sel_sat, sbit))
+                               all_0 = false;
+               }
+               if (all_0) {
+                       bit = driver.cell->getPort(ID::A)[driver.offset];
+                       return true;
+               }
+               return false;
+       }
+
+       // Returns true if a given data signal is equivalent to another, under
+       // the assumption that this read port is active and a given mux sel signal
+       // is true.  Used to match transparency logic data with write port data.
+       // The walk_up_mux_cond part is necessary because write ports in yosys
+       // tend to be connected to things like (wen ? wdata : 'x).
+       bool data_eq(SigBit sel, bool neg_sel, SigBit dbit, SigBit odbit) {
+               if (qcsat.modwalker.sigmap(dbit) == qcsat.modwalker.sigmap(odbit))
+                       return true;
+               while (walk_up_mux_cond(sel, neg_sel, dbit));
+               while (walk_up_mux_cond(sel, neg_sel, odbit));
+               return qcsat.modwalker.sigmap(dbit) == qcsat.modwalker.sigmap(odbit);
+       }
+};
+
 struct MemoryDffWorker
 {
        Module *module;
-       SigMap sigmap;
+       ModWalker modwalker;
        FfInitVals initvals;
        FfMergeHelper merger;
 
-       MemoryDffWorker(Module *module) : module(module), sigmap(module)
+       MemoryDffWorker(Module *module) : module(module), modwalker(module->design)
        {
-               initvals.set(&sigmap, module);
+               modwalker.setup(module);
+               initvals.set(&modwalker.sigmap, module);
                merger.set(&initvals, module);
        }
 
-       void handle_rd_port(Mem &mem, int idx)
+       // Starting from the output of an async read port, as long as the data
+       // signal's only user is a mux data signal, passes through the mux
+       // and remembers information about it.  Conceptually works on every
+       // bit separately, but coalesces the result when possible.
+       SigSpec walk_muxes(SigSpec data, std::vector<MuxData> &res) {
+               bool did_something;
+               do {
+                       did_something = false;
+                       int prev_idx = -1;
+                       Cell *prev_cell = nullptr;
+                       bool prev_is_b = false;
+                       for (int i = 0; i < GetSize(data); i++) {
+                               SigBit bit = modwalker.sigmap(data[i]);
+                               auto &consumers = modwalker.signal_consumers[bit];
+                               if (GetSize(consumers) != 1 || modwalker.signal_outputs.count(bit))
+                                       continue;
+                               auto consumer = *consumers.begin();
+                               bool is_b;
+                               if (consumer.cell->type == ID($mux)) {
+                                       if (consumer.port == ID::A) {
+                                               is_b = false;
+                                       } else if (consumer.port == ID::B) {
+                                               is_b = true;
+                                       } else {
+                                               continue;
+                                       }
+                               } else if (consumer.cell->type == ID($pmux)) {
+                                       if (consumer.port == ID::A) {
+                                               is_b = false;
+                                       } else {
+                                               continue;
+                                       }
+                               } else {
+                                       continue;
+                               }
+                               SigSpec y = consumer.cell->getPort(ID::Y);
+                               int mux_width = GetSize(y);
+                               SigBit ybit = y.extract(consumer.offset);
+                               if (prev_cell != consumer.cell || prev_idx+1 != i || prev_is_b != is_b) {
+                                       MuxData md;
+                                       md.base_idx = i;
+                                       md.size = 0;
+                                       md.is_b = is_b;
+                                       md.sig_s = consumer.cell->getPort(ID::S);
+                                       md.sig_other.resize(GetSize(md.sig_s));
+                                       prev_cell = consumer.cell;
+                                       prev_is_b = is_b;
+                                       res.push_back(md);
+                               }
+                               auto &md = res.back();
+                               md.size++;
+                               for (int j = 0; j < GetSize(md.sig_s); j++) {
+                                       SigBit obit = consumer.cell->getPort(is_b ? ID::A : ID::B).extract(j * mux_width + consumer.offset);
+                                       md.sig_other[j].append(obit);
+                               }
+                               prev_idx = i;
+                               data[i] = ybit;
+                               did_something = true;
+                       }
+               } while (did_something);
+               return data;
+       }
+
+       // Merges FF and possibly soft transparency logic into an asynchronous
+       // read port, making it into a synchronous one.
+       //
+       // There are three moving parts involved here:
+       //
+       // - the async port, which we start from, whose data port is input to...
+       // - an arbitrary chain of $mux and $pmux cells implementing soft transparency
+       //   logic (ie. bypassing write port's data iff the write port is active and
+       //   writing to the same address as this read port), which in turn feeds...
+       // - a final FF
+       //
+       // The async port and the mux chain are not allowed to have any users that
+       // are not part of the above.
+       //
+       // The algorithm is:
+       //
+       // 1. Walk through the muxes.
+       // 2. Recognize the final FF.
+       // 3. Knowing the FF's clock and read enable, make a list of write ports
+       //    that we'll run transparency analysis on.
+       // 4. For every mux bit, recognize it as one of:
+       //    - a transparency bypass mux for some port
+       //    - a bypass mux that feeds 'x instead (this will result in collision
+       //      don't care behavior being recognized)
+       //    - a mux that never selects the other value when read port is active,
+       //      and can thus be skipped (this is necessary because this could
+       //      be a transparency bypass mux for never-colliding port that other
+       //      passes failed to optimize)
+       //    - a mux whose other input is 'x, and can thus be skipped
+       // 5. When recognizing transparency bypasses, take care to preserve priority
+       //    behavior — when two bypasses are sequential muxes on the chain, they
+       //    effectively have priority over one another, and the transform can
+       //    only be performed when either a) their corresponding write ports
+       //    also have priority, or b) there can never be a three-way collision
+       //    between the two write ports and the read port.
+       // 6. Check consistency of per-bit transparency masks, merge them into
+       //    per-port transparency masks
+       // 7. If everything went fine in the previous steps, actually perform
+       //    the merge.
+       void handle_rd_port(Mem &mem, QuickConeSat &qcsat, int idx)
        {
                auto &port = mem.rd_ports[idx];
                log("Checking read port `%s'[%d] in module `%s': ", mem.memid.c_str(), idx, module->name.c_str());
 
+               std::vector<MuxData> muxdata;
+               SigSpec data = walk_muxes(port.data, muxdata);
                FfData ff;
                pool<std::pair<Cell *, int>> bits;
-               if (!merger.find_output_ff(port.data, ff, bits)) {
+               if (!merger.find_output_ff(data, ff, bits)) {
                        log("no output FF found.\n");
                        return;
                }
@@ -60,6 +352,144 @@ struct MemoryDffWorker
                        log("output FF has both set and reset, not supported.\n");
                        return;
                }
+
+               // Construct cache.
+               MemQueryCache cache(qcsat, mem, port, ff);
+
+               // Prepare information structure about all ports, recognize port bits
+               // that can never collide at all and don't need to be checked.
+               std::vector<PortData> portdata;
+               for (int i = 0; i < GetSize(mem.wr_ports); i++) {
+                       PortData pd;
+                       auto &wport = mem.wr_ports[i];
+                       pd.relevant = true;
+                       if (!wport.clk_enable)
+                               pd.relevant = false;
+                       if (wport.clk != ff.sig_clk)
+                               pd.relevant = false;
+                       if (wport.clk_polarity != ff.pol_clk)
+                               pd.relevant = false;
+                       // In theory, we *could* support mismatched width
+                       // ports here.  However, it's not worth it — wide
+                       // ports are recognized *after* memory_dff in
+                       // a normal flow.
+                       if (wport.wide_log2 != port.wide_log2)
+                               pd.relevant = false;
+                       pd.uncollidable_mask.resize(GetSize(port.data));
+                       pd.transparency_mask.resize(GetSize(port.data));
+                       pd.collision_x_mask.resize(GetSize(port.data));
+                       if (pd.relevant) {
+                               // If we got this far, this port is potentially
+                               // transparent and/or has undefined collision
+                               // behavior.  Now, for every bit, check if it can
+                               // ever collide.
+                               for (int j = 0; j < ff.width; j++) {
+                                       if (!cache.can_collide_rdwr(i, wport.en[j])) {
+                                               pd.uncollidable_mask[j] = true;
+                                               pd.collision_x_mask[j] = true;
+                                       }
+                               }
+                       }
+                       portdata.push_back(pd);
+               }
+
+               // Now inspect the mux chain.
+               for (auto &md : muxdata) {
+                       // We only mark transparent bits after processing a complete
+                       // mux, so that the transparency priority validation check
+                       // below sees transparency information as of previous mux.
+                       std::vector<std::pair<PortData&, int>> trans_queue;
+                       for (int sel_idx = 0; sel_idx < GetSize(md.sig_s); sel_idx++) {
+                               SigBit sbit = md.sig_s[sel_idx];
+                               SigSpec &odata = md.sig_other[sel_idx];
+                               for (int bitidx = md.base_idx; bitidx < md.base_idx+md.size; bitidx++) {
+                                       SigBit odbit = odata[bitidx-md.base_idx];
+                                       bool recognized = false;
+                                       for (int pi = 0; pi < GetSize(mem.wr_ports); pi++) {
+                                               auto &pd = portdata[pi];
+                                               auto &wport = mem.wr_ports[pi];
+                                               if (!pd.relevant)
+                                                       continue;
+                                               if (pd.uncollidable_mask[bitidx])
+                                                       continue;
+                                               bool match = cache.is_w2rbyp(pi, wport.en[bitidx], sbit, md.is_b);
+                                               if (!match)
+                                                       continue;
+                                               // If we got here, we recognized this mux sel
+                                               // as valid bypass sel for a given port bit.
+                                               if (odbit == State::Sx) {
+                                                       // 'x, mark collision don't care.
+                                                       pd.collision_x_mask[bitidx] = true;
+                                                       pd.transparency_mask[bitidx] = false;
+                                               } else if (cache.data_eq(sbit, md.is_b, wport.data[bitidx], odbit)) {
+                                                       // Correct data value, mark transparency,
+                                                       // but only after verifying that priority
+                                                       // is fine.
+                                                       for (int k = 0; k < GetSize(mem.wr_ports); k++) {
+                                                               if (portdata[k].transparency_mask[bitidx]) {
+                                                                       if (wport.priority_mask[k])
+                                                                               continue;
+                                                                       if (!cache.can_collide_together(pi, k, bitidx))
+                                                                               continue;
+                                                                       log("FF found, but transparency logic priority doesn't match write priority.\n");
+                                                                       return;
+                                                               }
+                                                       }
+                                                       recognized = true;
+                                                       trans_queue.push_back({pd, bitidx});
+                                                       break;
+                                               } else {
+                                                       log("FF found, but with a mux data input that doesn't seem to correspond to transparency logic.\n");
+                                                       return;
+                                               }
+                                       }
+                                       if (!recognized) {
+                                               // If we haven't positively identified this as
+                                               // a bypass: it's still skippable if the
+                                               // data is 'x, or if the sel cannot actually be
+                                               // active.
+                                               if (odbit == State::Sx)
+                                                       continue;
+                                               if (cache.impossible_with_ren(sbit, md.is_b))
+                                                       continue;
+                                               log("FF found, but with a mux select that doesn't seem to correspond to transparency logic.\n");
+                                               return;
+                                       }
+                               }
+                       }
+                       // Done with this mux, now actually apply the transparencies.
+                       for (auto it : trans_queue) {
+                               it.first.transparency_mask[it.second] = true;
+                               it.first.collision_x_mask[it.second] = false;
+                       }
+               }
+
+               // Final merging and validation of per-bit masks.
+               for (int pi = 0; pi < GetSize(mem.wr_ports); pi++) {
+                       auto &pd = portdata[pi];
+                       if (!pd.relevant)
+                               continue;
+                       bool trans = false;
+                       bool non_trans = false;
+                       for (int i = 0; i < ff.width; i++) {
+                               if (pd.collision_x_mask[i])
+                                       continue;
+                               if (pd.transparency_mask[i])
+                                       trans = true;
+                               else
+                                       non_trans = true;
+                       }
+                       if (trans && non_trans) {
+                               log("FF found, but soft transparency logic is inconsistent for port %d.\n", pi);
+                               return;
+                       }
+                       pd.final_transparency = trans;
+                       pd.final_collision_x = !trans && !non_trans;
+               }
+
+               // OK, it worked.
+               log("merging output FF to cell.\n");
+
                merger.remove_output_ff(bits);
                if (ff.has_en && !ff.pol_en)
                        ff.sig_en = module->LogicNot(NEW_ID, ff.sig_en);
@@ -89,8 +519,21 @@ struct MemoryDffWorker
                }
                port.init_value = ff.val_init;
                port.data = ff.sig_q;
+               for (int pi = 0; pi < GetSize(mem.wr_ports); pi++) {
+                       auto &pd = portdata[pi];
+                       if (!pd.relevant)
+                               continue;
+                       if (pd.final_collision_x) {
+                               log("    Write port %d: don't care on collision.\n", pi);
+                               port.collision_x_mask[pi] = true;
+                       } else if (pd.final_transparency) {
+                               log("    Write port %d: transparent.\n", pi);
+                               port.transparency_mask[pi] = true;
+                       } else {
+                               log("    Write port %d: non-transparent.\n", pi);
+                       }
+               }
                mem.emit();
-               log("merged output FF to cell.\n");
        }
 
        void handle_rd_port_addr(Mem &mem, int idx)
@@ -146,9 +589,10 @@ struct MemoryDffWorker
        {
                std::vector<Mem> memories = Mem::get_selected_memories(module);
                for (auto &mem : memories) {
+                       QuickConeSat qcsat(modwalker);
                        for (int i = 0; i < GetSize(mem.rd_ports); i++) {
                                if (!mem.rd_ports[i].clk_enable)
-                                       handle_rd_port(mem, i);
+                                       handle_rd_port(mem, qcsat, i);
                        }
                }
                for (auto &mem : memories) {
diff --git a/tests/memories/trans_sdp.v b/tests/memories/trans_sdp.v
new file mode 100644 (file)
index 0000000..b89f2cc
--- /dev/null
@@ -0,0 +1,21 @@
+// expect-wr-ports 1
+// expect-rd-ports 1
+// expect-rd-clk \clk
+// expect-rd-en \re
+
+module top(input clk, we, re, input [7:0] ra, wa, wd, output reg [7:0] rd);
+
+reg [7:0] mem[0:255];
+
+always @(posedge clk) begin
+       if (we)
+               mem[wa] <= wd;
+
+       if (re) begin
+               rd <= mem[ra];
+               if (we && ra == wa)
+                       rd <= wd;
+       end
+end
+
+endmodule
diff --git a/tests/memories/trans_sp.v b/tests/memories/trans_sp.v
new file mode 100644 (file)
index 0000000..ddd41a1
--- /dev/null
@@ -0,0 +1,21 @@
+// expect-wr-ports 1
+// expect-rd-ports 1
+// expect-rd-clk \clk
+// expect-rd-en \re
+
+module top(input clk, we, re, input [7:0] addr, wd, output reg [7:0] rd);
+
+reg [7:0] mem[0:255];
+
+always @(posedge clk) begin
+       if (we)
+               mem[addr] <= wd;
+
+       if (re) begin
+               rd <= mem[addr];
+               if (we)
+                       rd <= wd;
+       end
+end
+
+endmodule
diff --git a/tests/opt/memory_dff_trans.ys b/tests/opt/memory_dff_trans.ys
new file mode 100644 (file)
index 0000000..7599949
--- /dev/null
@@ -0,0 +1,862 @@
+# Good case 1: single port.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] addr,
+       input [3:0] wd,
+       input we,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we) begin
+               mem[addr] <= wd;
+               rd <= wd;
+       end else begin
+               rd <= mem[addr];
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=1'b1 r:RD_COLLISION_X_MASK=1'b0 %i %i
+
+design -reset
+
+# Good case 2: single port, exclusive.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] addr,
+       input [3:0] wd,
+       input we,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we) begin
+               mem[addr] <= wd;
+       end else begin
+               rd <= mem[addr];
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=1'b0 r:RD_COLLISION_X_MASK=1'b1 %i %i
+
+design -reset
+
+# Good case 3: proper bypass muxes.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+               if (we2 && wa2 == ra)
+                       rd <= wd2;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b11 r:RD_COLLISION_X_MASK=2'b00 %i %i
+
+design -reset
+
+# Good case 4: proper bypass mux, but only one.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b01 r:RD_COLLISION_X_MASK=2'b00 %i %i
+
+design -reset
+
+# Good case 5: proper bypass mux, but the other one.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we2 && wa2 == ra)
+                       rd <= wd2;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b10 r:RD_COLLISION_X_MASK=2'b00 %i %i
+
+design -reset
+
+# Good case 6: 'x mux.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= 4'hx;
+               if (we2 && wa2 == ra)
+                       rd <= wd2;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b10 r:RD_COLLISION_X_MASK=2'b01 %i %i
+
+design -reset
+
+# Good case 7: uncollidable addresses.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] addr,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+wire [3:0] wa1 = addr;
+wire [3:0] wa2 = addr + 1;
+wire [3:0] ra = addr + 2;
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b00 r:RD_COLLISION_X_MASK=2'b11 %i %i
+
+design -reset
+
+# Good case 8: uncollidable addresses, but still have soft transparency logic.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] addr,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+wire [3:0] wa1 = addr;
+wire [3:0] wa2 = addr + 1;
+wire [3:0] ra = addr + 2;
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+               if (we2 && wa2 == ra)
+                       rd <= wd2;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b00 r:RD_COLLISION_X_MASK=2'b11 %i %i
+
+design -reset
+
+# Bad case 1: broken bypass signal.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+               if (we2 && wa2 == ra && we1)
+                       rd <= wd2;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+logger -expect log "FF found, but with a mux select that doesn't seem to correspond to transparency logic" 1
+memory_dff
+logger -check-expected
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_CLK_ENABLE=1'b0 %i
+
+design -reset
+
+# Bad case 2: bad data signal.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+               if (we2 && wa2 == ra)
+                       rd <= wd1;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+logger -expect log "FF found, but with a mux data input that doesn't seem to correspond to transparency logic" 1
+memory_dff
+logger -check-expected
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_CLK_ENABLE=1'b0 %i
+
+design -reset
+
+# Bad case 3: priority mismatch.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we2 && wa2 == ra)
+                       rd <= wd2;
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+logger -expect log "FF found, but transparency logic priority doesn't match write priority." 1
+memory_dff
+logger -check-expected
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_CLK_ENABLE=1'b0 %i
+
+design -reset
+
+# Good case 10: priority mismatch, but since the second value is 'x, it's still OK.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we2 && wa2 == ra)
+                       rd <= wd2;
+               if (we1 && wa1 == ra)
+                       rd <= 4'hx;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b10 r:RD_COLLISION_X_MASK=2'b01 %i %i
+
+design -reset
+
+# Good case 11: priority mismatch, but since three-way collision cannot happen, it's still OK.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] addr,
+       input [1:0] mode,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] wa1, wa2, ra;
+
+always @* begin
+       case (mode)
+       0: begin
+               wa1 = addr+1;
+               wa2 = addr;
+               ra = addr;
+       end
+       1: begin
+               wa1 = addr;
+               wa2 = addr+1;
+               ra = addr;
+       end
+       2: begin
+               wa1 = addr;
+               wa2 = addr;
+               ra = addr+1;
+       end
+       3: begin
+               wa1 = addr;
+               wa2 = addr+1;
+               ra = addr+2;
+       end
+       endcase
+end
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we2 && wa2 == ra)
+                       rd <= wd2;
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b11 r:RD_COLLISION_X_MASK=2'b00 %i %i
+
+design -reset
+
+# Bad case 4: half of the port is transparent.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2] <= wd2;
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+               if (we2 && wa2 == ra)
+                       rd[3:2] <= wd2[3:2];
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+logger -expect log "FF found, but soft transparency logic is inconsistent for port 1." 1
+memory_dff
+logger -check-expected
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_CLK_ENABLE=1'b0 %i
+
+design -reset
+
+# Good case 12: like above, but the other bits aren't changed by the port anyway.
+
+read_verilog << EOT
+
+module top(
+       input [3:0] ra,
+       input [3:0] wa1,
+       input [3:0] wa2,
+       input [3:0] wd1,
+       input [3:0] wd2,
+       input we1, we2,
+       input re,
+       input clk,
+       output reg [3:0] rd,
+);
+
+reg [3:0] mem[0:15];
+
+always @(posedge clk) begin
+       if (we1)
+               mem[wa1] <= wd1;
+       if (we2)
+               mem[wa2][3:2] <= wd2[3:2];
+       if (re) begin
+               rd <= mem[ra];
+               if (we1 && wa1 == ra)
+                       rd <= wd1;
+               if (we2 && wa2 == ra)
+                       rd[3:2] <= wd2[3:2];
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=2'b11 r:RD_COLLISION_X_MASK=2'b00 %i %i
+
+design -reset
+
+# Good case 13: wide read, narrow write.
+
+read_verilog << EOT
+
+module top(
+       input [7:0] addr,
+       input [7:0] wd,
+       input we,
+       input re,
+       input clk,
+       output reg [31:0] rd,
+);
+
+reg [7:0] mem[0:255];
+
+always @(posedge clk) begin
+       if (we)
+               mem[addr] <= wd;
+       if (re) begin
+               rd[7:0] <= mem[{addr[7:2], 2'b00}];
+               rd[15:8] <= mem[{addr[7:2], 2'b01}];
+               rd[23:16] <= mem[{addr[7:2], 2'b10}];
+               rd[31:24] <= mem[{addr[7:2], 2'b11}];
+               case ({we, addr[1:0]})
+               3'b100: rd[7:0] <= wd;
+               3'b101: rd[15:8] <= wd;
+               3'b110: rd[23:16] <= wd;
+               3'b111: rd[31:24] <= wd;
+               endcase
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+dump
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=4'b1111 r:RD_COLLISION_X_MASK=4'b0000 %i %i
+
+design -reset
+
+# Good case 14: narrow read, wide write.
+
+read_verilog << EOT
+
+module top(
+       input [7:0] addr,
+       input [31:0] wd,
+       input we,
+       input re,
+       input clk,
+       output reg [7:0] rd,
+);
+
+reg [7:0] mem[0:255];
+
+always @(posedge clk) begin
+       if (we) begin
+               mem[{addr[7:2], 2'b00}] <= wd[7:0];
+               mem[{addr[7:2], 2'b01}] <= wd[15:8];
+               mem[{addr[7:2], 2'b10}] <= wd[23:16];
+               mem[{addr[7:2], 2'b11}] <= wd[31:24];
+       end
+       if (re) begin
+               rd <= mem[addr];
+               case ({we, addr[1:0]})
+               3'b100: rd <= wd[7:0];
+               3'b101: rd <= wd[15:8];
+               3'b110: rd <= wd[23:16];
+               3'b111: rd <= wd[31:24];
+               endcase
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+dump
+memory_dff
+memory_collect
+select -assert-count 1 t:$mem_v2
+select -assert-count 1 t:$mem_v2 r:RD_TRANSPARENCY_MASK=4'b1111 r:RD_COLLISION_X_MASK=4'b0000 %i %i
+
+design -reset
+
+# Good case 15: wide read, wide write.
+
+read_verilog << EOT
+
+module top(
+       input [7:0] addr,
+       input [31:0] wd,
+       input we,
+       input re,
+       input clk,
+       output reg [31:0] rd,
+);
+
+reg [7:0] mem[0:255];
+
+always @(posedge clk) begin
+       if (we) begin
+               mem[{addr[7:2], 2'b00}] <= wd[7:0];
+               mem[{addr[7:2], 2'b01}] <= wd[15:8];
+               mem[{addr[7:2], 2'b10}] <= wd[23:16];
+               mem[{addr[7:2], 2'b11}] <= wd[31:24];
+       end
+       if (re) begin
+               rd[7:0] <= mem[{addr[7:2], 2'b00}];
+               rd[15:8] <= mem[{addr[7:2], 2'b01}];
+               rd[23:16] <= mem[{addr[7:2], 2'b10}];
+               rd[31:24] <= mem[{addr[7:2], 2'b11}];
+               if (we)
+                       rd <= wd;
+       end
+end
+
+endmodule
+
+EOT
+
+hierarchy -auto-top
+proc
+opt_dff
+opt_clean
+dump
+memory_dff
+select -assert-count 4 t:$memrd_v2
+select -assert-count 1 t:$memrd_v2 r:TRANSPARENCY_MASK=4'b0001 r:COLLISION_X_MASK=4'b1110 %i %i
+select -assert-count 1 t:$memrd_v2 r:TRANSPARENCY_MASK=4'b0010 r:COLLISION_X_MASK=4'b1101 %i %i
+select -assert-count 1 t:$memrd_v2 r:TRANSPARENCY_MASK=4'b0100 r:COLLISION_X_MASK=4'b1011 %i %i
+select -assert-count 1 t:$memrd_v2 r:TRANSPARENCY_MASK=4'b1000 r:COLLISION_X_MASK=4'b0111 %i %i
+
+design -reset