Add memory_libmap pass.
authorMarcelina Kościelnicka <mwk@0x04.net>
Sun, 6 Feb 2022 09:10:21 +0000 (10:10 +0100)
committerMarcelina Kościelnicka <mwk@0x04.net>
Wed, 18 May 2022 15:32:56 +0000 (17:32 +0200)
kernel/constids.inc
passes/memory/Makefile.inc
passes/memory/memlib.cc [new file with mode: 0644]
passes/memory/memlib.h [new file with mode: 0644]
passes/memory/memlib.md [new file with mode: 0644]
passes/memory/memory_libmap.cc [new file with mode: 0644]

index 566b76217737c7bdea2a6e083fb8141a2658a298..d822c078b6a334dbc59438a522dfaae14a781215 100644 (file)
@@ -29,10 +29,12 @@ X(A_SIGNED)
 X(A_WIDTH)
 X(B)
 X(BI)
+X(BITS_USED)
 X(blackbox)
 X(B_SIGNED)
 X(bugpoint_keep)
 X(B_WIDTH)
+X(BYTE)
 X(C)
 X(cells_not_processed)
 X(CE_OVER_SRST)
@@ -116,6 +118,8 @@ X(keep_hierarchy)
 X(L)
 X(lib_whitebox)
 X(localparam)
+X(logic_block)
+X(lram)
 X(LUT)
 X(lut_keep)
 X(M)
@@ -145,6 +149,9 @@ X(PRIORITY_MASK)
 X(Q)
 X(qwp_position)
 X(R)
+X(ram_block)
+X(ram_style)
+X(ramstyle)
 X(RD_ADDR)
 X(RD_ARST)
 X(RD_ARST_VALUE)
@@ -164,6 +171,9 @@ X(RD_TRANSPARENT)
 X(RD_WIDE_CONTINUATION)
 X(reg)
 X(reprocess_after)
+X(rom_block)
+X(rom_style)
+X(romstyle)
 X(S)
 X(SET)
 X(SET_POLARITY)
@@ -186,6 +196,8 @@ X(STATE_NUM_LOG2)
 X(STATE_RST)
 X(STATE_TABLE)
 X(submod)
+X(syn_ramstyle)
+X(syn_romstyle)
 X(S_WIDTH)
 X(T)
 X(TABLE)
index 5a2c4ecfc1ea9393f0df5cb2e84f29581d5ba9be..9a37394cf146ba4bc02a8ac9ddecc056f2bbb439 100644 (file)
@@ -9,4 +9,6 @@ OBJS += passes/memory/memory_map.o
 OBJS += passes/memory/memory_memx.o
 OBJS += passes/memory/memory_nordff.o
 OBJS += passes/memory/memory_narrow.o
+OBJS += passes/memory/memory_libmap.o
 
+OBJS += passes/memory/memlib.o
diff --git a/passes/memory/memlib.cc b/passes/memory/memlib.cc
new file mode 100644 (file)
index 0000000..8a7adc9
--- /dev/null
@@ -0,0 +1,1101 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2021  Marcelina Kościelnicka <mwk@0x04.net>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "memlib.h"
+
+#include <ctype.h>
+
+USING_YOSYS_NAMESPACE
+
+using namespace MemLibrary;
+
+PRIVATE_NAMESPACE_BEGIN
+
+typedef dict<std::string, Const> Options;
+
+struct ClockDef {
+       ClkPolKind kind;
+       std::string name;
+};
+
+struct RawWrTransDef {
+       WrTransTargetKind target_kind;
+       std::string target_group;
+       WrTransKind kind;
+};
+
+struct PortWidthDef {
+       bool tied;
+       std::vector<int> wr_widths;
+       std::vector<int> rd_widths;
+};
+
+struct SrstDef {
+       ResetValKind val;
+       SrstKind kind;
+       bool block_wr;
+};
+
+struct Empty {};
+
+template<typename T> struct Capability {
+       T val;
+       Options opts, portopts;
+
+       Capability(T val, Options opts, Options portopts) : val(val), opts(opts), portopts(portopts) {}
+};
+
+template<typename T> using Caps = std::vector<Capability<T>>;
+
+struct PortGroupDef {
+       PortKind kind;
+       dict<std::string, pool<Const>> portopts;
+       std::vector<std::string> names;
+       Caps<Empty> forbid;
+       Caps<ClockDef> clock;
+       Caps<Empty> clken;
+       Caps<Empty> wrbe_separate;
+       Caps<PortWidthDef> width;
+       Caps<Empty> rden;
+       Caps<RdWrKind> rdwr;
+       Caps<ResetValKind> rdinit;
+       Caps<ResetValKind> rdarst;
+       Caps<SrstDef> rdsrst;
+       Caps<std::string> wrprio;
+       Caps<RawWrTransDef> wrtrans;
+       Caps<Empty> optional;
+       Caps<Empty> optional_rw;
+};
+
+struct WidthsDef {
+       std::vector<int> widths;
+       WidthMode mode;
+};
+
+struct ResourceDef {
+       std::string name;
+       int count;
+};
+
+struct RamDef {
+       IdString id;
+       dict<std::string, pool<Const>> opts;
+       RamKind kind;
+       Caps<Empty> forbid;
+       Caps<Empty> prune_rom;
+       Caps<PortGroupDef> ports;
+       Caps<int> abits;
+       Caps<WidthsDef> widths;
+       Caps<ResourceDef> resource;
+       Caps<double> cost;
+       Caps<double> widthscale;
+       Caps<int> byte;
+       Caps<MemoryInitKind> init;
+       Caps<std::string> style;
+};
+
+struct Parser {
+       std::string filename;
+       std::ifstream infile;
+       int line_number = 0;
+       Library &lib;
+       const pool<std::string> &defines;
+       pool<std::string> &defines_unused;
+       std::vector<std::string> tokens;
+       int token_idx = 0;
+       bool eof = false;
+
+       std::vector<std::pair<std::string, Const>> option_stack;
+       std::vector<std::pair<std::string, Const>> portoption_stack;
+       RamDef ram;
+       PortGroupDef port;
+       bool active = true;
+
+       Parser(std::string filename, Library &lib, const pool<std::string> &defines, pool<std::string> &defines_unused) : filename(filename), lib(lib), defines(defines), defines_unused(defines_unused) {
+               // Note: this rewrites the filename we're opening, but not
+               // the one we're storing — this is actually correct, so that
+               // we keep the original filename for diagnostics.
+               rewrite_filename(filename);
+               infile.open(filename);
+               if (infile.fail()) {
+                       log_error("failed to open %s\n", filename.c_str());
+               }
+               parse();
+               infile.close();
+       }
+
+       std::string peek_token() {
+               if (eof)
+                       return "";
+
+               if (token_idx < GetSize(tokens))
+                       return tokens[token_idx];
+
+               tokens.clear();
+               token_idx = 0;
+
+               std::string line;
+               while (std::getline(infile, line)) {
+                       line_number++;
+                       for (string tok = next_token(line); !tok.empty(); tok = next_token(line)) {
+                               if (tok[0] == '#')
+                                       break;
+                               if (tok[tok.size()-1] == ';') {
+                                       tokens.push_back(tok.substr(0, tok.size()-1));
+                                       tokens.push_back(";");
+                               } else {
+                                       tokens.push_back(tok);
+                               }
+                       }
+                       if (!tokens.empty())
+                               return tokens[token_idx];
+               }
+
+               eof = true;
+               return "";
+       }
+
+       std::string get_token() {
+               std::string res = peek_token();
+               if (!eof)
+                       token_idx++;
+               return res;
+       }
+
+       void eat_token(std::string expected) {
+               std::string token = get_token();
+               if (token != expected) {
+                       log_error("%s:%d: expected `%s`, got `%s`.\n", filename.c_str(), line_number, expected.c_str(), token.c_str());
+               }
+       }
+
+       IdString get_id() {
+               std::string token = get_token();
+               if (token.empty() || (token[0] != '$' && token[0] != '\\')) {
+                       log_error("%s:%d: expected id string, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+               return IdString(token);
+       }
+
+       std::string get_name() {
+               std::string res = get_token();
+               bool valid = true;
+               // Basic sanity check.
+               if (res.empty() || (!isalpha(res[0]) && res[0] != '_'))
+                       valid = false;
+               for (char c: res)
+                       if (!isalnum(c) && c != '_')
+                               valid = false;
+               if (!valid)
+                       log_error("%s:%d: expected name, got `%s`.\n", filename.c_str(), line_number, res.c_str());
+               return res;
+       }
+
+       std::string get_string() {
+               std::string token = get_token();
+               if (token.size() < 2 || token[0] != '"' || token[token.size()-1] != '"') {
+                       log_error("%s:%d: expected string, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+               return token.substr(1, token.size()-2);
+       }
+
+       bool peek_string() {
+               std::string token = peek_token();
+               return !token.empty() && token[0] == '"';
+       }
+
+       int get_int() {
+               std::string token = get_token();
+               char *endptr;
+               long res = strtol(token.c_str(), &endptr, 0);
+               if (token.empty() || *endptr || res > INT_MAX) {
+                       log_error("%s:%d: expected int, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+               return res;
+       }
+
+       double get_double() {
+               std::string token = get_token();
+               char *endptr;
+               double res = strtod(token.c_str(), &endptr);
+               if (token.empty() || *endptr) {
+                       log_error("%s:%d: expected float, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+               return res;
+       }
+
+       bool peek_int() {
+               std::string token = peek_token();
+               return !token.empty() && isdigit(token[0]);
+       }
+
+       void get_semi() {
+               std::string token = get_token();
+               if (token != ";") {
+                       log_error("%s:%d: expected `;`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+       }
+
+       Const get_value() {
+               std::string token = peek_token();
+               if (!token.empty() && token[0] == '"') {
+                       std::string s = get_string();
+                       return Const(s);
+               } else {
+                       return Const(get_int());
+               }
+       }
+
+       bool enter_ifdef(bool polarity) {
+               bool res = active;
+               std::string name = get_name();
+               defines_unused.erase(name);
+               if (active) {
+                       if (defines.count(name)) {
+                               active = polarity;
+                       } else {
+                               active = !polarity;
+                       }
+               }
+               return res;
+       }
+
+       void enter_else(bool save) {
+               get_token();
+               active = !active && save;
+       }
+
+       void enter_option() {
+               std::string name = get_string();
+               Const val = get_value();
+               if (active) {
+                       ram.opts[name].insert(val);
+               }
+               option_stack.push_back({name, val});
+       }
+
+       void exit_option() {
+               option_stack.pop_back();
+       }
+
+       Options get_options() {
+               Options res;
+               for (auto it: option_stack)
+                       res[it.first] = it.second;
+               return res;
+       }
+
+       void enter_portoption() {
+               std::string name = get_string();
+               Const val = get_value();
+               if (active) {
+                       port.portopts[name].insert(val);
+               }
+               portoption_stack.push_back({name, val});
+       }
+
+       void exit_portoption() {
+               portoption_stack.pop_back();
+       }
+
+       Options get_portoptions() {
+               Options res;
+               for (auto it: portoption_stack)
+                       res[it.first] = it.second;
+               return res;
+       }
+
+       template<typename T> void add_cap(Caps<T> &caps, T val) {
+               if (active)
+                       caps.push_back(Capability<T>(val, get_options(), get_portoptions()));
+       }
+
+       void parse_port_block() {
+               if (peek_token() == "{") {
+                       get_token();
+                       while (peek_token() != "}")
+                               parse_port_item();
+                       get_token();
+               } else {
+                       parse_port_item();
+               }
+       }
+
+       void parse_ram_block() {
+               if (peek_token() == "{") {
+                       get_token();
+                       while (peek_token() != "}")
+                               parse_ram_item();
+                       get_token();
+               } else {
+                       parse_ram_item();
+               }
+       }
+
+       void parse_top_block() {
+               if (peek_token() == "{") {
+                       get_token();
+                       while (peek_token() != "}")
+                               parse_top_item();
+                       get_token();
+               } else {
+                       parse_top_item();
+               }
+       }
+
+       void parse_port_item() {
+               std::string token = get_token();
+               if (token == "ifdef") {
+                       bool save = enter_ifdef(true);
+                       parse_port_block();
+                       if (peek_token() == "else") {
+                               enter_else(save);
+                               parse_port_block();
+                       }
+                       active = save;
+               } else if (token == "ifndef") {
+                       bool save = enter_ifdef(false);
+                       parse_port_block();
+                       if (peek_token() == "else") {
+                               enter_else(save);
+                               parse_port_block();
+                       }
+                       active = save;
+               } else if (token == "option") {
+                       enter_option();
+                       parse_port_block();
+                       exit_option();
+               } else if (token == "portoption") {
+                       enter_portoption();
+                       parse_port_block();
+                       exit_portoption();
+               } else if (token == "clock") {
+                       if (port.kind == PortKind::Ar) {
+                               log_error("%s:%d: `clock` not allowed in async read port.\n", filename.c_str(), line_number);
+                       }
+                       ClockDef def;
+                       token = get_token();
+                       if (token == "anyedge") {
+                               def.kind = ClkPolKind::Anyedge;
+                       } else if (token == "posedge") {
+                               def.kind = ClkPolKind::Posedge;
+                       } else if (token == "negedge") {
+                               def.kind = ClkPolKind::Negedge;
+                       } else {
+                               log_error("%s:%d: expected `posedge`, `negedge`, or `anyedge`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       if (peek_string()) {
+                               def.name = get_string();
+                       }
+                       get_semi();
+                       add_cap(port.clock, def);
+               } else if (token == "clken") {
+                       if (port.kind == PortKind::Ar) {
+                               log_error("%s:%d: `clken` not allowed in async read port.\n", filename.c_str(), line_number);
+                       }
+                       get_semi();
+                       add_cap(port.clken, {});
+               } else if (token == "wrbe_separate") {
+                       if (port.kind == PortKind::Ar || port.kind == PortKind::Sr) {
+                               log_error("%s:%d: `wrbe_separate` not allowed in read port.\n", filename.c_str(), line_number);
+                       }
+                       get_semi();
+                       add_cap(port.wrbe_separate, {});
+               } else if (token == "width") {
+                       PortWidthDef def;
+                       token = peek_token();
+                       bool is_rw = port.kind == PortKind::Srsw || port.kind == PortKind::Arsw;
+                       if (token == "tied") {
+                               get_token();
+                               if (!is_rw)
+                                       log_error("%s:%d: `tied` only makes sense for read+write ports.\n", filename.c_str(), line_number);
+                               while (peek_int())
+                                       def.wr_widths.push_back(get_int());
+                               def.tied = true;
+                       } else if (token == "mix") {
+                               get_token();
+                               if (!is_rw)
+                                       log_error("%s:%d: `mix` only makes sense for read+write ports.\n", filename.c_str(), line_number);
+                               while (peek_int())
+                                       def.wr_widths.push_back(get_int());
+                               def.rd_widths = def.wr_widths;
+                               def.tied = false;
+                       } else if (token == "rd") {
+                               get_token();
+                               if (!is_rw)
+                                       log_error("%s:%d: `rd` only makes sense for read+write ports.\n", filename.c_str(), line_number);
+                               do {
+                                       def.rd_widths.push_back(get_int());
+                               } while (peek_int());
+                               eat_token("wr");
+                               do {
+                                       def.wr_widths.push_back(get_int());
+                               } while (peek_int());
+                               def.tied = false;
+                       } else if (token == "wr") {
+                               get_token();
+                               if (!is_rw)
+                                       log_error("%s:%d: `wr` only makes sense for read+write ports.\n", filename.c_str(), line_number);
+                               do {
+                                       def.wr_widths.push_back(get_int());
+                               } while (peek_int());
+                               eat_token("rd");
+                               do {
+                                       def.rd_widths.push_back(get_int());
+                               } while (peek_int());
+                               def.tied = false;
+                       } else {
+                               do {
+                                       def.wr_widths.push_back(get_int());
+                               } while (peek_int());
+                               def.tied = true;
+                       }
+                       get_semi();
+                       add_cap(port.width, def);
+               } else if (token == "rden") {
+                       if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw)
+                               log_error("%s:%d: `rden` only allowed on sync read ports.\n", filename.c_str(), line_number);
+                       get_semi();
+                       add_cap(port.rden, {});
+               } else if (token == "rdwr") {
+                       if (port.kind != PortKind::Srsw)
+                               log_error("%s:%d: `rdwr` only allowed on sync read+write ports.\n", filename.c_str(), line_number);
+                       RdWrKind kind;
+                       token = get_token();
+                       if (token == "undefined") {
+                               kind = RdWrKind::Undefined;
+                       } else if (token == "no_change") {
+                               kind = RdWrKind::NoChange;
+                       } else if (token == "new") {
+                               kind = RdWrKind::New;
+                       } else if (token == "old") {
+                               kind = RdWrKind::Old;
+                       } else if (token == "new_only") {
+                               kind = RdWrKind::NewOnly;
+                       } else {
+                               log_error("%s:%d: expected `undefined`, `new`, `old`, `new_only`, or `no_change`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       get_semi();
+                       add_cap(port.rdwr, kind);
+               } else if (token == "rdinit") {
+                       if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw)
+                               log_error("%s:%d: `%s` only allowed on sync read ports.\n", filename.c_str(), line_number, token.c_str());
+                       ResetValKind kind;
+                       token = get_token();
+                       if (token == "none") {
+                               kind = ResetValKind::None;
+                       } else if (token == "zero") {
+                               kind = ResetValKind::Zero;
+                       } else if (token == "any") {
+                               kind = ResetValKind::Any;
+                       } else if (token == "no_undef") {
+                               kind = ResetValKind::NoUndef;
+                       } else {
+                               log_error("%s:%d: expected `none`, `zero`, `any`, or `no_undef`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       get_semi();
+                       add_cap(port.rdinit, kind);
+               } else if (token == "rdarst") {
+                       if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw)
+                               log_error("%s:%d: `%s` only allowed on sync read ports.\n", filename.c_str(), line_number, token.c_str());
+                       ResetValKind kind;
+                       token = get_token();
+                       if (token == "none") {
+                               kind = ResetValKind::None;
+                       } else if (token == "zero") {
+                               kind = ResetValKind::Zero;
+                       } else if (token == "any") {
+                               kind = ResetValKind::Any;
+                       } else if (token == "no_undef") {
+                               kind = ResetValKind::NoUndef;
+                       } else if (token == "init") {
+                               kind = ResetValKind::Init;
+                       } else {
+                               log_error("%s:%d: expected `none`, `zero`, `any`, `no_undef`, or `init`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       get_semi();
+                       add_cap(port.rdarst, kind);
+               } else if (token == "rdsrst") {
+                       if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw)
+                               log_error("%s:%d: `%s` only allowed on sync read ports.\n", filename.c_str(), line_number, token.c_str());
+                       SrstDef def;
+                       token = get_token();
+                       if (token == "none") {
+                               def.val = ResetValKind::None;
+                       } else if (token == "zero") {
+                               def.val = ResetValKind::Zero;
+                       } else if (token == "any") {
+                               def.val = ResetValKind::Any;
+                       } else if (token == "no_undef") {
+                               def.val = ResetValKind::NoUndef;
+                       } else if (token == "init") {
+                               def.val = ResetValKind::Init;
+                       } else {
+                               log_error("%s:%d: expected `none`, `zero`, `any`, `no_undef`, or `init`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       if (def.val == ResetValKind::None) {
+                               def.kind = SrstKind::None;
+                       } else {
+                               token = get_token();
+                               if (token == "ungated") {
+                                       def.kind = SrstKind::Ungated;
+                               } else if (token == "gated_clken") {
+                                       def.kind = SrstKind::GatedClkEn;
+                               } else if (token == "gated_rden") {
+                                       def.kind = SrstKind::GatedRdEn;
+                               } else {
+                                       log_error("%s:%d: expected `ungated`, `gated_clken` or `gated_rden`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                               }
+                       }
+                       def.block_wr = false;
+                       if (peek_token() == "block_wr") {
+                               get_token();
+                               def.block_wr = true;
+                       }
+                       get_semi();
+                       add_cap(port.rdsrst, def);
+               } else if (token == "wrprio") {
+                       if (port.kind == PortKind::Ar || port.kind == PortKind::Sr)
+                               log_error("%s:%d: `wrprio` only allowed on write ports.\n", filename.c_str(), line_number);
+                       do {
+                               add_cap(port.wrprio, get_string());
+                       } while (peek_string());
+                       get_semi();
+               } else if (token == "wrtrans") {
+                       if (port.kind == PortKind::Ar || port.kind == PortKind::Sr)
+                               log_error("%s:%d: `wrtrans` only allowed on write ports.\n", filename.c_str(), line_number);
+                       token = peek_token();
+                       RawWrTransDef def;
+                       if (token == "all") {
+                               def.target_kind = WrTransTargetKind::All;
+                               get_token();
+                       } else {
+                               def.target_kind = WrTransTargetKind::Group;
+                               def.target_group = get_string();
+                       }
+                       token = get_token();
+                       if (token == "new") {
+                               def.kind = WrTransKind::New;
+                       } else if (token == "old") {
+                               def.kind = WrTransKind::Old;
+                       } else {
+                               log_error("%s:%d: expected `new` or `old`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       get_semi();
+                       add_cap(port.wrtrans, def);
+               } else if (token == "forbid") {
+                       get_semi();
+                       add_cap(port.forbid, {});
+               } else if (token == "optional") {
+                       get_semi();
+                       add_cap(port.optional, {});
+               } else if (token == "optional_rw") {
+                       get_semi();
+                       add_cap(port.optional_rw, {});
+               } else if (token == "") {
+                       log_error("%s:%d: unexpected EOF while parsing port item.\n", filename.c_str(), line_number);
+               } else {
+                       log_error("%s:%d: unknown port-level item `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+       }
+
+       void parse_ram_item() {
+               std::string token = get_token();
+               if (token == "ifdef") {
+                       bool save = enter_ifdef(true);
+                       parse_ram_block();
+                       if (peek_token() == "else") {
+                               enter_else(save);
+                               parse_ram_block();
+                       }
+                       active = save;
+               } else if (token == "ifndef") {
+                       bool save = enter_ifdef(false);
+                       parse_ram_block();
+                       if (peek_token() == "else") {
+                               enter_else(save);
+                               parse_ram_block();
+                       }
+                       active = save;
+               } else if (token == "option") {
+                       enter_option();
+                       parse_ram_block();
+                       exit_option();
+               } else if (token == "prune_rom") {
+                       get_semi();
+                       add_cap(ram.prune_rom, {});
+               } else if (token == "forbid") {
+                       get_semi();
+                       add_cap(ram.forbid, {});
+               } else if (token == "abits") {
+                       int val = get_int();
+                       if (val < 0)
+                               log_error("%s:%d: abits %d nagative.\n", filename.c_str(), line_number, val);
+                       get_semi();
+                       add_cap(ram.abits, val);
+               } else if (token == "width") {
+                       WidthsDef def;
+                       int w = get_int();
+                       if (w <= 0)
+                               log_error("%s:%d: width %d not positive.\n", filename.c_str(), line_number, w);
+                       def.widths.push_back(w);
+                       def.mode = WidthMode::Single;
+                       get_semi();
+                       add_cap(ram.widths, def);
+               } else if (token == "widths") {
+                       WidthsDef def;
+                       int last = 0;
+                       do {
+                               int w = get_int();
+                               if (w <= 0)
+                                       log_error("%s:%d: width %d not positive.\n", filename.c_str(), line_number, w);
+                               if (w < last * 2)
+                                       log_error("%s:%d: width %d smaller than %d required for progression.\n", filename.c_str(), line_number, w, last * 2);
+                               last = w;
+                               def.widths.push_back(w);
+                       } while(peek_int());
+                       token = get_token();
+                       if (token == "global") {
+                               def.mode = WidthMode::Global;
+                       } else if (token == "per_port") {
+                               def.mode = WidthMode::PerPort;
+                       } else {
+                               log_error("%s:%d: expected `global`, or `per_port`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       get_semi();
+                       add_cap(ram.widths, def);
+               } else if (token == "resource") {
+                       ResourceDef def;
+                       def.name = get_string();
+                       if (peek_int())
+                               def.count = get_int();
+                       else
+                               def.count = 1;
+                       get_semi();
+                       add_cap(ram.resource, def);
+               } else if (token == "cost") {
+                       add_cap(ram.cost, get_double());
+                       get_semi();
+               } else if (token == "widthscale") {
+                       if (peek_int()) {
+                               add_cap(ram.widthscale, get_double());
+                       } else {
+                               add_cap(ram.widthscale, 0.0);
+                       }
+                       get_semi();
+               } else if (token == "byte") {
+                       int val = get_int();
+                       if (val <= 0)
+                               log_error("%s:%d: byte width %d not positive.\n", filename.c_str(), line_number, val);
+                       add_cap(ram.byte, val);
+                       get_semi();
+               } else if (token == "init") {
+                       MemoryInitKind kind;
+                       token = get_token();
+                       if (token == "zero") {
+                               kind = MemoryInitKind::Zero;
+                       } else if (token == "any") {
+                               kind = MemoryInitKind::Any;
+                       } else if (token == "no_undef") {
+                               kind = MemoryInitKind::NoUndef;
+                       } else if (token == "none") {
+                               kind = MemoryInitKind::None;
+                       } else {
+                               log_error("%s:%d: expected `zero`, `any`, `none`, or `no_undef`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       get_semi();
+                       add_cap(ram.init, kind);
+               } else if (token == "style") {
+                       do {
+                               std::string val = get_string();
+                               for (auto &c: val)
+                                       c = std::tolower(c);
+                               add_cap(ram.style, val);
+                       } while (peek_string());
+                       get_semi();
+               } else if (token == "port") {
+                       port = PortGroupDef();
+                       token = get_token();
+                       if (token == "ar") {
+                               port.kind = PortKind::Ar;
+                       } else if (token == "sr") {
+                               port.kind = PortKind::Sr;
+                       } else if (token == "sw") {
+                               port.kind = PortKind::Sw;
+                       } else if (token == "arsw") {
+                               port.kind = PortKind::Arsw;
+                       } else if (token == "srsw") {
+                               port.kind = PortKind::Srsw;
+                       } else {
+                               log_error("%s:%d: expected `ar`, `sr`, `sw`, `arsw`, or `srsw`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       do {
+                               port.names.push_back(get_string());
+                       } while (peek_string());
+                       parse_port_block();
+                       if (active)
+                               add_cap(ram.ports, port);
+               } else if (token == "") {
+                       log_error("%s:%d: unexpected EOF while parsing ram item.\n", filename.c_str(), line_number);
+               } else {
+                       log_error("%s:%d: unknown ram-level item `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+       }
+
+       void parse_top_item() {
+               std::string token = get_token();
+               if (token == "ifdef") {
+                       bool save = enter_ifdef(true);
+                       parse_top_block();
+                       if (peek_token() == "else") {
+                               enter_else(save);
+                               parse_top_block();
+                       }
+                       active = save;
+               } else if (token == "ifndef") {
+                       bool save = enter_ifdef(false);
+                       parse_top_block();
+                       if (peek_token() == "else") {
+                               enter_else(save);
+                               parse_top_block();
+                       }
+                       active = save;
+               } else if (token == "ram") {
+                       int orig_line = line_number;
+                       ram = RamDef();
+                       token = get_token();
+                       if (token == "distributed") {
+                               ram.kind = RamKind::Distributed;
+                       } else if (token == "block") {
+                               ram.kind = RamKind::Block;
+                       } else if (token == "huge") {
+                               ram.kind = RamKind::Huge;
+                       } else {
+                               log_error("%s:%d: expected `distributed`, `block`, or `huge`, got `%s`.\n", filename.c_str(), line_number, token.c_str());
+                       }
+                       ram.id = get_id();
+                       parse_ram_block();
+                       if (active) {
+                               compile_ram(orig_line);
+                       }
+               } else if (token == "") {
+                       log_error("%s:%d: unexpected EOF while parsing top item.\n", filename.c_str(), line_number);
+               } else {
+                       log_error("%s:%d: unknown top-level item `%s`.\n", filename.c_str(), line_number, token.c_str());
+               }
+       }
+
+       bool opts_ok(const Options &def, const Options &var) {
+               for (auto &it: def)
+                       if (var.at(it.first) != it.second)
+                               return false;
+               return true;
+       }
+
+       template<typename T> const T *find_single_cap(const Caps<T> &caps, const Options &opts, const Options &portopts, const char *name) {
+               const T *res = nullptr;
+               for (auto &cap: caps) {
+                       if (!opts_ok(cap.opts, opts))
+                               continue;
+                       if (!opts_ok(cap.portopts, portopts))
+                               continue;
+                       if (res)
+                               log_error("%s:%d: duplicate %s cap.\n", filename.c_str(), line_number, name);
+                       res = &cap.val;
+               }
+               return res;
+       }
+
+       std::vector<Options> make_opt_combinations(const dict<std::string, pool<Const>> &opts) {
+               std::vector<Options> res;
+               res.push_back(Options());
+               for (auto &it: opts) {
+                       std::vector<Options> new_res;
+                       for (auto &val: it.second) {
+                               for (Options o: res) {
+                                       o[it.first] = val;
+                                       new_res.push_back(o);
+                               }
+                       }
+                       res = new_res;
+               }
+               return res;
+       }
+
+       void compile_portgroup(Ram &cram, PortGroupDef &pdef, dict<std::string, int> &clk_ids, const dict<std::string, int> &port_ids, int orig_line) {
+               PortGroup grp;
+               grp.optional = find_single_cap(pdef.optional, cram.options, Options(), "optional");
+               grp.optional_rw = find_single_cap(pdef.optional_rw, cram.options, Options(), "optional_rw");
+               grp.names = pdef.names;
+               for (auto portopts: make_opt_combinations(pdef.portopts)) {
+                       bool forbidden = false;
+                       for (auto &fdef: ram.forbid) {
+                               if (opts_ok(fdef.opts, cram.options) && opts_ok(fdef.portopts, portopts)) {
+                                       forbidden = true;
+                               }
+                       }
+                       if (forbidden)
+                               continue;
+                       PortVariant var;
+                       var.options = portopts;
+                       var.kind = pdef.kind;
+                       if (pdef.kind != PortKind::Ar) {
+                               const ClockDef *cdef = find_single_cap(pdef.clock, cram.options, portopts, "clock");
+                               if (!cdef)
+                                       log_error("%s:%d: missing clock capability.\n", filename.c_str(), orig_line);
+                               var.clk_pol = cdef->kind;
+                               if (cdef->name.empty()) {
+                                       var.clk_shared = -1;
+                               } else {
+                                       auto it = clk_ids.find(cdef->name);
+                                       bool anyedge = cdef->kind == ClkPolKind::Anyedge;
+                                       if (it == clk_ids.end()) {
+                                               clk_ids[cdef->name] = var.clk_shared = GetSize(cram.shared_clocks);
+                                               RamClock clk;
+                                               clk.name = cdef->name;
+                                               clk.anyedge = anyedge;
+                                               cram.shared_clocks.push_back(clk);
+                                       } else {
+                                               var.clk_shared = it->second;
+                                               if (cram.shared_clocks[var.clk_shared].anyedge != anyedge) {
+                                                       log_error("%s:%d: named clock \"%s\" used with both posedge/negedge and anyedge clocks.\n", filename.c_str(), orig_line, cdef->name.c_str());
+                                               }
+                                       }
+                               }
+                               var.clk_en = find_single_cap(pdef.clken, cram.options, portopts, "clken");
+                       }
+                       const PortWidthDef *wdef = find_single_cap(pdef.width, cram.options, portopts, "width");
+                       if (wdef) {
+                               if (cram.width_mode != WidthMode::PerPort)
+                                       log_error("%s:%d: per-port width doesn't make sense for tied dbits.\n", filename.c_str(), orig_line);
+                               compile_widths(var, cram.dbits, *wdef);
+                       } else {
+                               var.width_tied = true;
+                               var.min_wr_wide_log2 = 0;
+                               var.min_rd_wide_log2 = 0;
+                               var.max_wr_wide_log2 = GetSize(cram.dbits) - 1;
+                               var.max_rd_wide_log2 = GetSize(cram.dbits) - 1;
+                       }
+                       if (pdef.kind == PortKind::Srsw || pdef.kind == PortKind::Sr) {
+                               const RdWrKind *rdwr = find_single_cap(pdef.rdwr, cram.options, portopts, "rdwr");
+                               var.rdwr = rdwr ? *rdwr : RdWrKind::Undefined;
+                               var.rd_en = find_single_cap(pdef.rden, cram.options, portopts, "rden");
+                               const ResetValKind *iv = find_single_cap(pdef.rdinit, cram.options, portopts, "rdinit");
+                               var.rdinitval = iv ? *iv : ResetValKind::None;
+                               const ResetValKind *arv = find_single_cap(pdef.rdarst, cram.options, portopts, "rdarst");
+                               var.rdarstval = arv ? *arv : ResetValKind::None;
+                               const SrstDef *srv = find_single_cap(pdef.rdsrst, cram.options, portopts, "rdsrst");
+                               if (srv) {
+                                       var.rdsrstval = srv->val;
+                                       var.rdsrstmode = srv->kind;
+                                       var.rdsrst_block_wr = srv->block_wr;
+                                       if (srv->kind == SrstKind::GatedClkEn && !var.clk_en)
+                                               log_error("%s:%d: `gated_clken` used without `clken`.\n", filename.c_str(), orig_line);
+                                       if (srv->kind == SrstKind::GatedRdEn && !var.rd_en)
+                                               log_error("%s:%d: `gated_rden` used without `rden`.\n", filename.c_str(), orig_line);
+                               } else {
+                                       var.rdsrstval = ResetValKind::None;
+                                       var.rdsrstmode = SrstKind::None;
+                                       var.rdsrst_block_wr = false;
+                               }
+                               if (var.rdarstval == ResetValKind::Init || var.rdsrstval == ResetValKind::Init) {
+                                       if (var.rdinitval != ResetValKind::Any && var.rdinitval != ResetValKind::NoUndef) {
+                                               log_error("%s:%d: reset value `init` has to be paired with `any` or `no_undef` initial value.\n", filename.c_str(), orig_line);
+                                       }
+                               }
+                       }
+                       var.wrbe_separate = find_single_cap(pdef.wrbe_separate, cram.options, portopts, "wrbe_separate");
+                       if (var.wrbe_separate && cram.byte == 0) {
+                               log_error("%s:%d: `wrbe_separate` used without `byte`.\n", filename.c_str(), orig_line);
+                       }
+                       for (auto &def: pdef.wrprio) {
+                               if (!opts_ok(def.opts, cram.options))
+                                       continue;
+                               if (!opts_ok(def.portopts, portopts))
+                                       continue;
+                               var.wrprio.push_back(port_ids.at(def.val));
+                       }
+                       for (auto &def: pdef.wrtrans) {
+                               if (!opts_ok(def.opts, cram.options))
+                                       continue;
+                               if (!opts_ok(def.portopts, portopts))
+                                       continue;
+                               WrTransDef tdef;
+                               tdef.target_kind = def.val.target_kind;
+                               if (def.val.target_kind == WrTransTargetKind::Group)
+                                       tdef.target_group = port_ids.at(def.val.target_group);
+                               tdef.kind = def.val.kind;
+                               var.wrtrans.push_back(tdef);
+                       }
+                       grp.variants.push_back(var);
+               }
+               if (grp.variants.empty()) {
+                       log_error("%s:%d: all port option combinations are forbidden.\n", filename.c_str(), orig_line);
+               }
+               cram.port_groups.push_back(grp);
+       }
+
+       void compile_ram(int orig_line) {
+               if (ram.abits.empty())
+                       log_error("%s:%d: `dims` capability should be specified.\n", filename.c_str(), orig_line);
+               if (ram.widths.empty())
+                       log_error("%s:%d: `widths` capability should be specified.\n", filename.c_str(), orig_line);
+               if (ram.ports.empty())
+                       log_error("%s:%d: at least one port group should be specified.\n", filename.c_str(), orig_line);
+               for (auto opts: make_opt_combinations(ram.opts)) {
+                       bool forbidden = false;
+                       for (auto &fdef: ram.forbid) {
+                               if (opts_ok(fdef.opts, opts)) {
+                                       forbidden = true;
+                               }
+                       }
+                       if (forbidden)
+                               continue;
+                       Ram cram;
+                       cram.id = ram.id;
+                       cram.kind = ram.kind;
+                       cram.options = opts;
+                       cram.prune_rom = find_single_cap(ram.prune_rom, opts, Options(), "prune_rom");
+                       const int *abits = find_single_cap(ram.abits, opts, Options(), "abits");
+                       if (!abits)
+                               continue;
+                       cram.abits = *abits;
+                       const WidthsDef *widths = find_single_cap(ram.widths, opts, Options(), "widths");
+                       if (!widths)
+                               continue;
+                       cram.dbits = widths->widths;
+                       cram.width_mode = widths->mode;
+                       const ResourceDef *resource = find_single_cap(ram.resource, opts, Options(), "resource");
+                       if (resource) {
+                               cram.resource_name = resource->name;
+                               cram.resource_count = resource->count;
+                       } else {
+                               cram.resource_count = 1;
+                       }
+                       cram.cost = 0;
+                       for (auto &cap: ram.cost) {
+                               if (opts_ok(cap.opts, opts))
+                                       cram.cost += cap.val;
+                       }
+                       const double *widthscale = find_single_cap(ram.widthscale, opts, Options(), "widthscale");
+                       if (widthscale)
+                               cram.widthscale = *widthscale ? *widthscale : cram.cost;
+                       else
+                               cram.widthscale = 0;
+                       const int *byte = find_single_cap(ram.byte, opts, Options(), "byte");
+                       cram.byte = byte ? *byte : 0;
+                       if (GetSize(cram.dbits) - 1 > cram.abits)
+                               log_error("%s:%d: abits %d too small for dbits progression.\n", filename.c_str(), line_number, cram.abits);
+                       validate_byte(widths->widths, cram.byte);
+                       const MemoryInitKind *ik = find_single_cap(ram.init, opts, Options(), "init");
+                       cram.init = ik ? *ik : MemoryInitKind::None;
+                       for (auto &sdef: ram.style)
+                               if (opts_ok(sdef.opts, opts))
+                                       cram.style.push_back(sdef.val);
+                       dict<std::string, int> port_ids;
+                       int ctr = 0;
+                       for (auto &pdef: ram.ports) {
+                               if (!opts_ok(pdef.opts, opts))
+                                       continue;
+                               for (auto &name: pdef.val.names)
+                                       port_ids[name] = ctr;
+                               ctr++;
+                       }
+                       dict<std::string, int> clk_ids;
+                       for (auto &pdef: ram.ports) {
+                               if (!opts_ok(pdef.opts, opts))
+                                       continue;
+                               compile_portgroup(cram, pdef.val, clk_ids, port_ids, orig_line);
+                       }
+                       lib.rams.push_back(cram);
+               }
+       }
+
+       void validate_byte(const std::vector<int> &widths, int byte) {
+               if (byte == 0)
+                       return;
+               if (byte >= widths.back())
+                       return;
+               if (widths[0] % byte == 0) {
+                       for (int j = 1; j < GetSize(widths); j++)
+                               if (widths[j] % byte != 0)
+                                       log_error("%s:%d: width progression past byte width %d is not divisible.\n", filename.c_str(), line_number, byte);
+                       return;
+               }
+               for (int i = 0; i < GetSize(widths); i++) {
+                       if (widths[i] == byte) {
+                               for (int j = i + 1; j < GetSize(widths); j++)
+                                       if (widths[j] % byte != 0)
+                                               log_error("%s:%d: width progression past byte width %d is not divisible.\n", filename.c_str(), line_number, byte);
+                               return;
+                       }
+               }
+               log_error("%s:%d: byte width %d invalid for dbits.\n", filename.c_str(), line_number, byte);
+       }
+
+       void compile_widths(PortVariant &var, const std::vector<int> &widths, const PortWidthDef &width) {
+               var.width_tied = width.tied;
+               auto wr_widths = compile_widthdef(widths, width.wr_widths);
+               var.min_wr_wide_log2 = wr_widths.first;
+               var.max_wr_wide_log2 = wr_widths.second;
+               if (width.tied) {
+                       var.min_rd_wide_log2 = wr_widths.first;
+                       var.max_rd_wide_log2 = wr_widths.second;
+               } else {
+                       auto rd_widths = compile_widthdef(widths, width.rd_widths);
+                       var.min_rd_wide_log2 = rd_widths.first;
+                       var.max_rd_wide_log2 = rd_widths.second;
+               }
+       }
+
+       std::pair<int, int> compile_widthdef(const std::vector<int> &dbits, const std::vector<int> &widths) {
+               if (widths.empty())
+                       return {0, GetSize(dbits) - 1};
+               for (int i = 0; i < GetSize(dbits); i++) {
+                       if (dbits[i] == widths[0]) {
+                               for (int j = 0; j < GetSize(widths); j++) {
+                                       if (i+j >= GetSize(dbits) || dbits[i+j] != widths[j]) {
+                                               log_error("%s:%d: port width %d doesn't match dbits progression.\n", filename.c_str(), line_number, widths[j]);
+                                       }
+                               }
+                               return {i, i + GetSize(widths) - 1};
+                       }
+               }
+               log_error("%s:%d: port width %d invalid for dbits.\n", filename.c_str(), line_number, widths[0]);
+       }
+
+       void parse() {
+               while (peek_token() != "")
+                       parse_top_item();
+       }
+};
+
+PRIVATE_NAMESPACE_END
+
+Library MemLibrary::parse_library(const std::vector<std::string> &filenames, const pool<std::string> &defines) {
+       Library res;
+       pool<std::string> defines_unused = defines;
+       for (auto &file: filenames) {
+               Parser(file, res, defines, defines_unused);
+       }
+       for (auto def: defines_unused) {
+               log_warning("define %s not used in the library.\n", def.c_str());
+       }
+       return res;
+}
diff --git a/passes/memory/memlib.h b/passes/memory/memlib.h
new file mode 100644 (file)
index 0000000..c3f7728
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2021  Marcelina Kościelnicka <mwk@0x04.net>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#ifndef MEMLIB_H
+#define MEMLIB_H
+
+#include <string>
+#include <vector>
+
+#include "kernel/yosys.h"
+
+YOSYS_NAMESPACE_BEGIN
+
+namespace MemLibrary {
+
+enum class RamKind {
+       Auto,
+       Logic,
+       NotLogic,
+       Distributed,
+       Block,
+       Huge,
+};
+
+enum class WidthMode {
+       Single,
+       Global,
+       PerPort,
+};
+
+enum class MemoryInitKind {
+       None,
+       Zero,
+       Any,
+       NoUndef,
+};
+
+enum class PortKind {
+       Sr,
+       Ar,
+       Sw,
+       Srsw,
+       Arsw,
+};
+
+enum class ClkPolKind {
+       Anyedge,
+       Posedge,
+       Negedge,
+};
+
+enum class RdWrKind {
+       Undefined,
+       NoChange,
+       New,
+       Old,
+       NewOnly,
+};
+
+enum class ResetValKind {
+       None,
+       Zero,
+       Any,
+       NoUndef,
+       Init,
+};
+
+enum class SrstKind {
+       None,
+       Ungated,
+       GatedClkEn,
+       GatedRdEn,
+};
+
+enum class WrTransTargetKind {
+       All,
+       Group,
+};
+
+enum class WrTransKind {
+       New,
+       Old,
+};
+
+struct WrTransDef {
+       WrTransTargetKind target_kind;
+       int target_group;
+       WrTransKind kind;
+};
+
+struct PortVariant {
+       dict<std::string, Const> options;
+       PortKind kind;
+       int clk_shared;
+       ClkPolKind clk_pol;
+       bool clk_en;
+       bool width_tied;
+       int min_wr_wide_log2;
+       int max_wr_wide_log2;
+       int min_rd_wide_log2;
+       int max_rd_wide_log2;
+       bool rd_en;
+       RdWrKind rdwr;
+       ResetValKind rdinitval;
+       ResetValKind rdarstval;
+       ResetValKind rdsrstval;
+       SrstKind rdsrstmode;
+       bool rdsrst_block_wr;
+       bool wrbe_separate;
+       std::vector<int> wrprio;
+       std::vector<WrTransDef> wrtrans;
+};
+
+struct PortGroup {
+       bool optional;
+       bool optional_rw;
+       std::vector<std::string> names;
+       std::vector<PortVariant> variants;
+};
+
+struct RamClock {
+       std::string name;
+       bool anyedge;
+};
+
+struct Ram {
+       IdString id;
+       RamKind kind;
+       dict<std::string, Const> options;
+       std::vector<PortGroup> port_groups;
+       bool prune_rom;
+       int abits;
+       std::vector<int> dbits;
+       WidthMode width_mode;
+       std::string resource_name;
+       int resource_count;
+       double cost;
+       double widthscale;
+       int byte;
+       MemoryInitKind init;
+       std::vector<std::string> style;
+       std::vector<RamClock> shared_clocks;
+};
+
+struct Library {
+       std::vector<Ram> rams;
+};
+
+Library parse_library(const std::vector<std::string> &filenames, const pool<std::string> &defines);
+
+}
+
+YOSYS_NAMESPACE_END
+
+#endif
diff --git a/passes/memory/memlib.md b/passes/memory/memlib.md
new file mode 100644 (file)
index 0000000..fdc2d4b
--- /dev/null
@@ -0,0 +1,505 @@
+# The `memory_libmap` pass
+
+The `memory_libmap` pass is used to map memories to hardware primitives.  To work,
+it needs a description of available target memories in a custom format.
+
+
+## Basic structure
+
+A basic library could look like this:
+
+    # A distributed-class RAM called $__RAM16X4SDP_
+    ram distributed $__RAM16X4SDP_ {
+        # Has 4 address bits (ie. 16 rows).
+        abits 4;
+        # Has 4 data bits.
+        width 4;
+        # Cost for the selection heuristic.
+        cost 4;
+        # Can be initialized to any value on startup.
+        init any;
+        # Has a synchronous write port called "W"...
+        port sw "W" {
+            # ... with a positive edge clock.
+            clock posedge;
+        }
+        # Has an asynchronous read port called "R".
+        port ar "R" {
+        }
+    }
+
+    # A block-class RAM called $__RAMB9K_
+    ram block $__RAMB9K_ {
+        # Has 13 address bits in the base (most narrow) data width.
+        abits 13;
+        # The available widths are:
+        # - 1 (13 address bits)
+        # - 2 (12 address bits)
+        # - 4 (11 address bits)
+        # - 9 (10 address bits)
+        # - 18 (9 address bits)
+        # The width selection is per-port.
+        widths 1 2 4 9 18 per_port;
+        # Has a write enable signal with 1 bit for every 9 data bits.
+        byte 9;
+        cost 64;
+        init any;
+        # Has two synchronous read+write ports, called "A" and "B".
+        port srsw "A" "B" {
+            clock posedge;
+            # Has a clock enable signal (gates both read and write).
+            clken;
+            # Has three per-port selectable options for handling read+write behavior:
+            portoption "RDWR" "NO_CHANGE" {
+                # When port is writing, reading is not done (output register keeps
+                # its value).
+                rdwr no_change;
+            }
+            portoption "RDWR" "OLD" {
+                # When port is writing, the data read is the old value (before the
+                # write).
+                rdwr old;
+            }
+            portoption "RDWR" "NEW" {
+                # When port is writing, the data read is the new value.
+                rdwr new;
+            }
+        }
+    }
+
+The pass will automatically select between the two available cells and
+the logic fallback (mapping the whole memory to LUTs+FFs) based on required
+capabilities and cost function.  The selected memories will be transformed
+to intermediate `$__RAM16X4SDP_` and `$__RAMB9K_` cells that need to be mapped
+to actual hardware cells by a `techmap` pass, while memories selected for logic
+fallback will be left unmapped and will be later mopped up by `memory_map` pass.
+
+## RAM definition blocks
+
+The syntax for a RAM definition is:
+
+    ram <kind: distributed|block|huge> <name> {
+        <ram properties>
+        <ports>
+    }
+
+The `<name>` is used as the type of the mapped cell that will be passed to `techmap`.
+The memory kind is one of `distributed`, `block`, or `huge`.  It describes the general
+class of the memory and can be matched on by manual selection attributes.
+
+The available ram properties are:
+
+- `abits <address bits>;`
+- `width <width>;`
+- `widths <width 1> <width 2> ... <width n> <global|per_port>;`
+- `byte <width>;`
+- `cost <cost>;`
+- `widthscale [<factor>];`
+- `resource <name> <count>;`
+- `init <none|zero|any|no_undef>;`
+- `style "<name 1>" "<name 2>" "<name 3>" ...;`
+- `prune_rom;`
+
+### RAM dimensions
+
+The memory dimensions are described by `abits` and `width` or `widths` properties.
+
+For a simple memory cell with a fixed width, use `abits` and `width` like this:
+
+    abits 4;
+    width 4;
+
+This will result in a `2**abits × width` memory cell.
+
+Multiple-width memories are also possible, and use the `widths` property instead.
+The rules for multiple-width memories are:
+
+- the widths are given in `widths` property in increasing order
+- the value in the `abits` property corresponds to the most narrow width
+- every width in the list needs to be greater than or equal to twice
+  the previous width (ie. `1 2 4 9 18` is valid, `1 2 4 7 14` is not)
+- it is assumed that, for every width in progression, the word in memory
+  is made of two smaller words, plus optionally some extra bits (eg. in the above
+  list, the 9-bit word is made of two 4-bit words and 1 extra bit), and thus
+  each sequential width in the list corresponds to one fewer usable address bit
+- all addresses connected to memory ports are always `abits` bits wide, with const
+  zero wired to the unused bits corresponding to wide ports
+
+When multiple widths are specified, they can be `per_port` or `global`.
+For the `global` version, the pass has to pick one width for the whole cell,
+and it is set on the resulting cell as the `WIDTH` parameter.  For the `per_port`
+version, the selection is made on per-port basis, and passed using `PORT_*_WIDTH`
+parameters.  When the mode is `per_port`, the width selection can be fine-tuned
+with the port `width` property.
+
+Specifying dimensions is mandatory.
+
+
+### Byte width
+
+If the memory cell has per-byte write enables, the `byte` property can be used
+to define the byte size (ie. how many data bits correspond to one write enable
+bit).
+
+The property is optional.  If not used, it is assumed that there is a single
+write enable signal for each writable port.
+
+The rules for this property are as follows:
+
+- for every available width, the width needs to be a multiple of the byte size,
+  or the byte size needs to be larger than the width
+- if the byte size is larger than the width, the byte enable signel is assumed
+  to be one bit wide and cover the whole port
+- otherwise, the byte enable signal has one bit for every `byte` bits of the
+  data port
+
+The exact kind of byte enable signal is determined by the presence or absence
+of the per-port `wrbe_separate` property.
+
+
+### Cost properties
+
+The `cost` property is used to estimate the cost of using a given mapping.
+This is the cost of using one cell, and will be scaled as appropriate if
+the mapping requires multiple cells.
+
+If the `widthscale` property is specified, the mapping is assumed to be flexible,
+with cost scaling with the percentage of data width actually used.  The value
+of the `widthscale` property is how much of the cost is scalable as such.
+If the value is omitted, all of the cost is assumed to scale.
+Eg. for the following properties:
+
+    width 14;
+    cost 8;
+    widthscale 7;
+
+The cost of a given cell will be assumed to be `(8 - 7) + 7 * (used_bits / 14)`.
+
+If `widthscale` is used, The pass will attach a `BITS_USED` parameter to mapped
+calls, with a bitmask of which data bits of the memory are actually in use.
+The parameter width will be the widest width in the `widths` property, and
+the bit correspondence is defined accordingly.
+
+The `cost` property is mandatory.
+
+
+### `init` property
+
+This property describes the state of the memory at initialization time.  Can have
+one of the following values:
+
+- `none`: the memory contents are unpredictable, memories requiring any sort
+  of initialization will not be mapped to this cell
+- `zero`: the memory contents are zero, memories can be mapped to this cell iff
+  their initialization value is entirely zero or undef
+- `any`: the memory contents can be arbitrarily selected, and the initialization
+  will be passes as the `INIT` parameter to the mapped cell
+- `no_undef`: like `any`, but only 0 and 1 bit values are supported (the pass will
+  convert any x bits to 0)
+
+The `INIT` parameter is always constructed as a concatenation of words corresponding
+to the widest available `widths` setting, so that all available memory cell bits
+are covered.
+
+This property is optional and assumed to be `none` when not present.
+
+
+### `style` property
+
+Provides a name (or names) for this definition that can be passed to the `ram_style`
+or similar attribute to manually select it.  Optional and can be used multiple times.
+
+
+### `prune_rom` property
+
+Specifying this property disqualifies the definition from consideration for source
+memories that have no write ports (ie. ROMs).  Use this on definitions that have
+an obviously superior read-only alternative (eg. LUTRAMs) to make the pass skip
+them over quickly.
+
+
+## Port definition blocks
+
+The syntax for a port group definition is:
+
+    port <ar|sr|sw|arsw|srsw> "NAME 1" "NAME 2" ... {
+        <port properties>
+    }
+
+A port group definition defines a group of ports with identical properties.
+There are as many ports in a group as there are names given.
+
+Ports come in 5 kinds:
+
+- `ar`: asynchronous read port
+- `sr`: synchronous read port
+- `sw`: synchronous write port
+- `arsw`: simultanous synchronous write + asynchronous read with common address (commonly found in LUT RAMs)
+- `srsw`: synchronous write + synchronous read with common address
+
+The port properties available are:
+
+- `width <tied|mix>;`
+- `width <width 1> <width 2> ...;`
+- `width <tied|mix> <width 1> <width 2> ...;`
+- `width rd <width 1> <width 2> ... wr <width 1> <width 2> ...;`
+- `clock <posedge|negedge|anyedge> ["SHARED_NAME"];`
+- `clken;`
+- `rden;`
+- `wrbe_separate;`
+- `rdwr <undefined|no_change|new|old|new_only>;`
+- `rdinit <none|zero|any|no_undef>;`
+- `rdarst <none|zero|any|no_undef|init>;`
+- `rdsrst <none|zero|any|no_undef|init> <ungated|gatec_clken|gated_rden> [block_wr];`
+- `wrprio "NAME" "NAME" ...;`
+- `wrtrans <"NAME"|all> <old|new>;`
+- `optional;`
+- `optional_rw;`
+
+The base signals connected to the mapped cell for ports are:
+
+- `PORT_<name>_ADDR`: the address
+- `PORT_<name>_WR_DATA`: the write data (for `sw`/`arsw`/`srsw` ports only)
+- `PORT_<name>_RD_DATA`: the read data (for `ar`/`sr`/`arsw`/`srsw` ports only)
+- `PORT_<name>_WR_EN`: the write enable or enables (for `sw`/`arsw`/`srsw` ports only)
+
+The address is always `abits` wide.  If a non-narrowest width is used, the appropriate low
+bits will be tied to 0.
+
+
+### Port `width` prooperty
+
+If the RAM has `per_port` widths, the available width selection can be further described
+on per-port basis, by using one of the following properties:
+
+- `width tied;`: any width from the master `widths` list is acceptable, and
+  (for read+write ports) the read and write width has to be the same
+- `width tied <width 1> <width 2> ...;`: like above, but limits the width
+  selection to the given list; the list has to be a contiguous sublist of the
+  master `widths` list
+- `width <width 1> <width 2> ...;`: alias for the above, to be used for read-only
+  or write-only ports
+- `width mix;`: any width from the master `widths` list is acceptable, and
+  read width can be different than write width (only usable for read+write ports)
+- `width mix <width 1> <width 2> ...;`: like above, but limits the width
+  selection to the given list; the list has to be a contiguous sublist of the
+  master `widths` list
+- `width rd <width 1> <width 2> ... wr <width 1> <width 2> ...;`: like above,
+  but the limitted selection can be different for read and write widths
+
+If `per_port` widths are in use and this property is not specified, `width tied;` is assumed.
+
+The parameters attached to the cell in `per_port` widths mode are:
+
+- `PORT_<name>_WIDTH`: the selected width (for `tied` ports)
+- `PORT_<name>_RD_WIDTH`: the selected read width (for `mix` ports)
+- `PORT_<name>_WR_WIDTH`: the selected write width (for `mix` ports)
+
+
+### `clock` property
+
+The `clock` property is used with synchronous ports (and synchronous ports only).
+It is mandatory for them and describes the clock polarity and clock sharing.
+`anyedge` means that both polarities are supported.
+
+If a shared clock name is provided, the port is assumed to have a shared clock signal
+with all other ports using the same shared name.  Otherwise, the port is assumed to
+have its own clock signal.
+
+The port clock is always provided on the memory cell as `PORT_<name>_CLK` signal
+(even if it is also shared).  Shared clocks are also provided as `CLK_<shared_name>`
+signals.
+
+For `anyedge` clocks, the cell gets a `PORT_<name>_CLKPOL` parameter that is set
+to 1 for `posedge` clocks and 0 for `negedge` clocks.  If the clock is shared,
+the same information will also be provided as `CLK_<shared_name>_POL` parameter.
+
+
+### `clken` and `rden`
+
+The `clken` property, if present, means that the port has a clock enable signal
+gating both reads and writes.  Such signal will be provided to the mapped cell
+as `PORT_<name>_CLK_EN`.  It is only applicable to synchronous ports.
+
+The `rden` property, if present, means that the port has a read clock enable signal.
+Such signal will be provided to the mapped cell as `PORT_<name>_RD_EN`.  It is only
+applicable to synchronous read ports (`sr` and `srsw`).
+
+For `sr` ports, both of these options are effectively equivalent.
+
+
+### `wrbe_separate` and the write enables
+
+The `wrbe_separate` property specifies that the write byte enables are provided
+as a separate signal from the main write enable.  It can only be used when the
+RAM-level `byte` property is also specified.
+
+The rules are as follows:
+
+If no `byte` is specified:
+
+- `wrbe_separate` is not allowed
+- `PORT_<name>_WR_EN` signal is single bit
+
+If `byte` is specified, but `wrbe_separate` is not:
+
+- `PORT_<name>_WR_EN` signal has one bit for every data byte
+- `PORT_<name>_WR_EN_WIDTH` parameter is the width of the above (only present for multiple-width cells)
+
+If `byte` is specified and `wrbe_separate` is present:
+
+- `PORT_<name>_WR_EN` signal is single bit
+- `PORT_<name>_WR_BE` signal has one bit for every data byte
+- `PORT_<name>_WR_BE_WIDTH` parameter is the width of the above (only present for multiple-width cells)
+- a given byte is written iff all of `CLK_EN` (if present), `WR_EN`, and the corresponding `WR_BE` bit are one
+
+This property can only be used on write ports.
+
+
+### `rdwr` property
+
+This property is allowed only on `srsw` ports and describes read-write interactions.
+
+The possible values are:
+
+- `no_change`: if write is being performed (any bit of `WR_EN` is set),
+  reading is not performed and the `RD_DATA` keeps its old value
+- `undefined`: all `RD_DATA` bits corresponding to enabled `WR_DATA` bits
+  have undefined value, remaining bits read from memory
+- `old`: all `RD_DATA` bits get the previous value in memory
+- `new`: all `RD_DATA` bits get the new value in memory (transparent write)
+- `new_only`: all `RD_DATA` bits corresponding to enabled `WR_DATA` bits
+  get the new value, all others are undefined
+
+If this property is not found on an `srsw` port, `undefined` is assumed.
+
+
+### Read data initial value and resets
+
+The `rdinit`, `rdarst`, and `rdsrst` are applicable only to synchronous read
+ports.
+
+`rdinit` describes the initial value of the read port data, and can be set to
+one of the following:
+
+- `none`: initial data is indeterminate
+- `zero`: initial data is all-0
+- `any`: initial data is arbitrarily configurable, and the selected value
+  will be attached to the cell as `PORT_<name>_RD_INIT_VALUE` parameter
+- `no_undef`: like `any`, but only 0 and 1 bits are allowed
+
+`rdarst` and `rdsrst` describe the asynchronous and synchronous reset capabilities.
+The values are similar to `rdinit`:
+
+- `none`: no reset
+- `zero`: reset to all-0 data
+- `any`: reset to arbitrary value, the selected value
+  will be attached to the cell as `PORT_<name>_RD_ARST_VALUE` or
+  `PORT_<name>_RD_SRST_VALUE` parameter
+- `no_undef`: like `any`, but only 0 and 1 bits are allowed
+- `init`: reset to the initial value, as specified by `rdinit` (which must be `any`
+  or `no_undef` itself)
+
+If the capability is anything other than `none`, the reset signal
+will be provided as `PORT_<name>_RD_ARST` or `PORT_<name>_RD_SRST`.
+
+For `rdsrst`, the priority must be additionally specified, as one of:
+
+- `ungated`: `RD_SRST` has priority over both `CLK_EN` and `RD_EN` (if present)
+- `gated_clken`: `CLK_EN` has priority over `RD_SRST`; `RD_SRST` has priority over `RD_EN` if present
+- `gated_rden`: `RD_EN` and `CLK_EN` (if present) both have priority over `RD_SRST`
+
+Also, `rdsrst` can optionally have `block_wr` specified, which means that sync reset
+cannot be performed in the same cycle as a write.
+
+If not provided, `none` is assumed for all three properties.
+
+
+### Write priority
+
+The `wrprio` property is only allowed on write ports and defines a priority relationship
+between port — when `wrprio "B";` is used in definition of port `"A"`, and both ports
+simultanously write to the same memory cell, the value written by port `"A"` will have
+precedence.
+
+This property is optional, and can be used multiple times as necessary.  If no relationship
+is described for a pair of write ports, no priority will be assumed.
+
+
+### Write transparency
+
+The `wrtrans` property is only allowed on write ports and defines behavior when
+another synchronous read port reads from the memory cell at the same time as the
+given port writes it.  The values are:
+
+- `old`: the read port will get the old value of the cell
+- `new`: the read port will get the new value of the cell
+
+This property is optional, and can be used multiple times as necessary.  If no relationship
+is described for a pair of ports, the value read is assumed to be indeterminate.
+
+Note that this property is not used to describe the read value on the port itself for `srsw`
+ports — for that purpose, the `rdwr` property is used instead.
+
+
+### Optional ports
+
+The `optional;` property will make the pass attach a `PORT_<name>_USED` parameter
+with a boolean value specifying whether a given port was meaningfully used in
+mapping a given cell.  Likewise, `optional_rw;` will attach `PORT_<name>_RD_USED`
+and `PORT_<name>_WR_USED` the specify whether the read / write part in particular
+was used.  These can be useful if the mapping has some meaningful optimization
+to apply for unused ports, but doesn't otherwise influence the selection process.
+
+
+## Options
+
+For highly configurable cells, multiple variants may be described in one cell description.
+All properties and port definitions within a RAM or port definition can be put inside
+an `option` block as follows:
+
+    option "NAME" <value> {
+        <properties, ports, ...>
+    }
+
+The value and name of an option are arbitrary, and the selected option value
+will be provided to the cell as `OPTION_<name>` parameter.  Values can be
+strings or integers.
+
+
+Likewise, for per-port options, a `portoption` block can be used:
+
+    portoption "NAME" <value> {
+        <properties, ...>
+    }
+
+These options will be provided as `PORT_<pname>_OPTION_<oname>` parameters.
+
+The library parser will simply expand the RAM definition for every possible combination
+of option values mentioned in the RAM body, and likewise for port definitions.
+This can lead to a combinatorial explosion.
+
+If some option values cannot be used together, a `forbid` pseudo-property can be used
+to discard a given combination, eg:
+
+    option "ABC" 1 {
+        portoption "DEF" "GHI" {
+            forbid;
+        }
+    }
+
+will disallow combining the RAM option `ABC = 2` with port option `DEF = "GHI"`.
+
+
+## Ifdefs
+
+To allow reusing a library for multiple FPGA families with slighly differing
+capabilities, `ifdef` (and `ifndef`) blocks are provided:
+
+    ifdef IS_FANCY_FPGA_WITH_CONFIGURABLE_ASYNC_RESET {
+        rdarst any;
+    } else {
+        rdarst zero;
+    }
+
+Such blocks can be enabled by passing the `-D` option to the pass.
diff --git a/passes/memory/memory_libmap.cc b/passes/memory/memory_libmap.cc
new file mode 100644 (file)
index 0000000..ab7bb7b
--- /dev/null
@@ -0,0 +1,2093 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2021  Marcelina Kościelnicka <mwk@0x04.net>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "memlib.h"
+
+#include <ctype.h>
+
+#include "kernel/yosys.h"
+#include "kernel/sigtools.h"
+#include "kernel/mem.h"
+#include "kernel/qcsat.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+using namespace MemLibrary;
+
+#define FACTOR_MUX 0.5
+#define FACTOR_DEMUX 0.5
+#define FACTOR_EMU 2
+
+struct PassOptions {
+       bool no_auto_distributed;
+       bool no_auto_block;
+       bool no_auto_huge;
+       double logic_cost_rom;
+       double logic_cost_ram;
+};
+
+struct WrPortConfig {
+       // Index of the read port this port is merged with, or -1 if none.
+       int rd_port;
+       // Index of the PortGroup in the Ram.
+       int port_group;
+       int port_variant;
+       const PortVariant *def;
+       // Emulate priority logic for this list of (source) write port indices.
+       std::vector<int> emu_prio;
+       // If true, this port needs to end up with uniform byte enables to work correctly.
+       bool force_uniform;
+
+       WrPortConfig() : rd_port(-1), force_uniform(false) {}
+};
+
+struct RdPortConfig {
+       // Index of the write port this port is merged with, or -1 if none.
+       int wr_port;
+       // Index of the PortGroup in the Ram.
+       int port_group;
+       int port_variant;
+       const PortVariant *def;
+       // If true, this is a sync port mapped into async mem, make an output
+       // register.  Mutually exclusive with the following options.
+       bool emu_sync;
+       // Emulate the EN / ARST / SRST / init value circuitry.
+       bool emu_en;
+       bool emu_arst;
+       bool emu_srst;
+       bool emu_init;
+       // Emulate EN-SRST priority.
+       bool emu_srst_en_prio;
+       // If true, use clk_en as rd_en.
+       bool rd_en_to_clk_en;
+       // Emulate transparency logic for this list of (source) write port indices.
+       std::vector<int> emu_trans;
+
+       RdPortConfig() : wr_port(-1), emu_sync(false), emu_en(false), emu_arst(false), emu_srst(false), emu_init(false), emu_srst_en_prio(false), rd_en_to_clk_en(false) {}
+};
+
+// The named clock and clock polarity assignments.
+struct SharedClockConfig {
+       bool used;
+       SigBit clk;
+       // For anyedge clocks.
+       bool polarity;
+       // For non-anyedge clocks.
+       bool invert;
+};
+
+struct MemConfig {
+       // Reference to the library ram definition
+       const Ram *def;
+       // Port assignments, indexed by Mem port index.
+       std::vector<WrPortConfig> wr_ports;
+       std::vector<RdPortConfig> rd_ports;
+       std::vector<SharedClockConfig> shared_clocks;
+       // Emulate read-first write-read behavior using soft logic.
+       bool emu_read_first;
+       // This many low bits of (target) address are always-0 on all ports.
+       int base_width_log2;
+       int unit_width_log2;
+       std::vector<int> swizzle;
+       int hard_wide_mask;
+       int emu_wide_mask;
+       // How many times the base memory block will need to be duplicated to get more
+       // data bits.
+       int repl_d;
+       // How many times the whole memory array will need to be duplicated to cover
+       // all read ports required.
+       int repl_port;
+       // Emulation score — how much circuitry we need to add for priority / transparency /
+       // reset / initial value emulation.
+       int score_emu;
+       // Mux score — how much circuitry we need to add to manually decode whatever address
+       // bits are not decoded by the memory array itself, for reads.
+       int score_mux;
+       // Demux score — how much circuitry we need to add to manually decode whatever address
+       // bits are not decoded by the memory array itself, for writes.
+       int score_demux;
+       double cost;
+       MemConfig() : emu_read_first(false) {}
+};
+
+typedef std::vector<MemConfig> MemConfigs;
+
+struct MapWorker {
+       Module *module;
+       ModWalker modwalker;
+       SigMap sigmap;
+       SigMap sigmap_xmux;
+       FfInitVals initvals;
+
+       MapWorker(Module *module) : module(module), modwalker(module->design, module), sigmap(module), sigmap_xmux(module), initvals(&sigmap, module) {
+               for (auto cell : module->cells())
+               {
+                       if (cell->type == ID($mux))
+                       {
+                               RTLIL::SigSpec sig_a = sigmap_xmux(cell->getPort(ID::A));
+                               RTLIL::SigSpec sig_b = sigmap_xmux(cell->getPort(ID::B));
+
+                               if (sig_a.is_fully_undef())
+                                       sigmap_xmux.add(cell->getPort(ID::Y), sig_b);
+                               else if (sig_b.is_fully_undef())
+                                       sigmap_xmux.add(cell->getPort(ID::Y), sig_a);
+                       }
+               }
+       }
+};
+
+struct SwizzleBit {
+       bool valid;
+       int mux_idx;
+       int addr;
+       int bit;
+};
+
+struct Swizzle {
+       int addr_shift;
+       int addr_start;
+       int addr_end;
+       std::vector<int> addr_mux_bits;
+       std::vector<std::vector<SwizzleBit>> bits;
+};
+
+struct MemMapping {
+       MapWorker &worker;
+       QuickConeSat qcsat;
+       Mem &mem;
+       const Library &lib;
+       const PassOptions &opts;
+       std::vector<MemConfig> cfgs;
+       bool logic_ok;
+       double logic_cost;
+       RamKind kind;
+       std::string style;
+       dict<int, int> wr_en_cache;
+       dict<std::pair<int, int>, bool> wr_implies_rd_cache;
+       dict<std::pair<int, int>, bool> wr_excludes_rd_cache;
+       dict<std::pair<int, int>, bool> wr_excludes_srst_cache;
+
+       MemMapping(MapWorker &worker, Mem &mem, const Library &lib, const PassOptions &opts) : worker(worker), qcsat(worker.modwalker), mem(mem), lib(lib), opts(opts) {
+               determine_style();
+               logic_ok = determine_logic_ok();
+               if (GetSize(mem.wr_ports) == 0)
+                       logic_cost = mem.width * mem.size * opts.logic_cost_rom;
+               else
+                       logic_cost = mem.width * mem.size * opts.logic_cost_ram;
+               if (kind == RamKind::Logic)
+                       return;
+               for (int i = 0; i < GetSize(lib.rams); i++) {
+                       auto &rdef = lib.rams[i];
+                       if (!check_ram_kind(rdef))
+                               continue;
+                       if (!check_ram_style(rdef))
+                               continue;
+                       if (!check_init(rdef))
+                               continue;
+                       if (rdef.prune_rom && mem.wr_ports.empty())
+                               continue;
+                       MemConfig cfg;
+                       cfg.def = &rdef;
+                       for (auto &cdef: rdef.shared_clocks) {
+                               (void)cdef;
+                               SharedClockConfig clk;
+                               clk.used = false;
+                               cfg.shared_clocks.push_back(clk);
+                       }
+                       cfgs.push_back(cfg);
+               }
+               assign_wr_ports();
+               assign_rd_ports();
+               handle_trans();
+               // If we got this far, the memory is mappable.  The following two can require emulating
+               // some functionality, but cannot cause the mapping to fail.
+               handle_priority();
+               handle_rd_rst();
+               score_emu_ports();
+               // Now it is just a matter of picking geometry.
+               handle_geom();
+               dump_configs(0);
+               prune_post_geom();
+               dump_configs(1);
+       }
+
+       bool addr_compatible(int wpidx, int rpidx) {
+               auto &wport = mem.wr_ports[wpidx];
+               auto &rport = mem.rd_ports[rpidx];
+               int max_wide_log2 = std::max(rport.wide_log2, wport.wide_log2);
+               SigSpec raddr = rport.addr.extract_end(max_wide_log2);
+               SigSpec waddr = wport.addr.extract_end(max_wide_log2);
+               int abits = std::max(GetSize(raddr), GetSize(waddr));
+               raddr.extend_u0(abits);
+               waddr.extend_u0(abits);
+               return worker.sigmap_xmux(raddr) == worker.sigmap_xmux(waddr);
+       }
+
+       int get_wr_en(int wpidx) {
+               auto it = wr_en_cache.find(wpidx);
+               if (it != wr_en_cache.end())
+                       return it->second;
+               int res = qcsat.ez->expression(qcsat.ez->OpOr, qcsat.importSig(mem.wr_ports[wpidx].en));
+               wr_en_cache.insert({wpidx, res});
+               return res;
+       }
+
+       bool get_wr_implies_rd(int wpidx, int rpidx) {
+               auto key = std::make_pair(wpidx, rpidx);
+               auto it = wr_implies_rd_cache.find(key);
+               if (it != wr_implies_rd_cache.end())
+                       return it->second;
+               int wr_en = get_wr_en(wpidx);
+               int rd_en = qcsat.importSigBit(mem.rd_ports[rpidx].en[0]);
+               qcsat.prepare();
+               bool res = !qcsat.ez->solve(wr_en, qcsat.ez->NOT(rd_en));
+               wr_implies_rd_cache.insert({key, res});
+               return res;
+       }
+
+       bool get_wr_excludes_rd(int wpidx, int rpidx) {
+               auto key = std::make_pair(wpidx, rpidx);
+               auto it = wr_excludes_rd_cache.find(key);
+               if (it != wr_excludes_rd_cache.end())
+                       return it->second;
+               int wr_en = get_wr_en(wpidx);
+               int rd_en = qcsat.importSigBit(mem.rd_ports[rpidx].en[0]);
+               qcsat.prepare();
+               bool res = !qcsat.ez->solve(wr_en, rd_en);
+               wr_excludes_rd_cache.insert({key, res});
+               return res;
+       }
+
+       bool get_wr_excludes_srst(int wpidx, int rpidx) {
+               auto key = std::make_pair(wpidx, rpidx);
+               auto it = wr_excludes_srst_cache.find(key);
+               if (it != wr_excludes_srst_cache.end())
+                       return it->second;
+               int wr_en = get_wr_en(wpidx);
+               int srst = qcsat.importSigBit(mem.rd_ports[rpidx].srst);
+               if (mem.rd_ports[rpidx].ce_over_srst) {
+                       int rd_en = qcsat.importSigBit(mem.rd_ports[rpidx].en[0]);
+                       srst = qcsat.ez->AND(srst, rd_en);
+               }
+               qcsat.prepare();
+               bool res = !qcsat.ez->solve(wr_en, srst);
+               wr_excludes_srst_cache.insert({key, res});
+               return res;
+       }
+
+       void dump_configs(int stage);
+       void dump_config(MemConfig &cfg);
+       void determine_style();
+       bool determine_logic_ok();
+       bool check_ram_kind(const Ram &ram);
+       bool check_ram_style(const Ram &ram);
+       bool check_init(const Ram &ram);
+       void assign_wr_ports();
+       void assign_rd_ports();
+       void handle_trans();
+       void handle_priority();
+       void handle_rd_rst();
+       void score_emu_ports();
+       void handle_geom();
+       void prune_post_geom();
+       void emit_port(const MemConfig &cfg, std::vector<Cell*> &cells, const PortVariant &pdef, const char *name, int wpidx, int rpidx, const std::vector<int> &hw_addr_swizzle);
+       void emit(const MemConfig &cfg);
+};
+
+void MemMapping::dump_configs(int stage) {
+       const char *stage_name;
+       switch (stage) {
+               case 0:
+                       stage_name = "post-geometry";
+                       break;
+               case 1:
+                       stage_name = "after post-geometry prune";
+                       break;
+               default:
+                       abort();
+       }
+       log_debug("Memory %s.%s mapping candidates (%s):\n", log_id(mem.module->name), log_id(mem.memid), stage_name);
+       if (logic_ok) {
+               log_debug("- logic fallback\n");
+               log_debug("  - cost: %f\n", logic_cost);
+       }
+       for (auto &cfg: cfgs) {
+               dump_config(cfg);
+       }
+}
+
+void MemMapping::dump_config(MemConfig &cfg) {
+       log_debug("- %s:\n", log_id(cfg.def->id));
+       for (auto &it: cfg.def->options)
+               log_debug("  - option %s %s\n", it.first.c_str(), log_const(it.second));
+       log_debug("  - emulation score: %d\n", cfg.score_emu);
+       log_debug("  - replicates (for ports): %d\n", cfg.repl_port);
+       log_debug("  - replicates (for data): %d\n", cfg.repl_d);
+       log_debug("  - mux score: %d\n", cfg.score_mux);
+       log_debug("  - demux score: %d\n", cfg.score_demux);
+       log_debug("  - cost: %f\n", cfg.cost);
+       std::stringstream os;
+       for (int x: cfg.def->dbits)
+               os << " " << x;
+       std::string dbits_s = os.str();
+       log_debug("  - abits %d dbits%s\n", cfg.def->abits, dbits_s.c_str());
+       if (cfg.def->byte != 0)
+               log_debug("  - byte width %d\n", cfg.def->byte);
+       log_debug("  - chosen base width %d\n", cfg.def->dbits[cfg.base_width_log2]);
+       os.str("");
+       for (int x: cfg.swizzle)
+               if (x == -1)
+                       os << " -";
+               else
+                       os << " " << x;
+       std::string swizzle_s = os.str();
+       log_debug("  - swizzle%s\n", swizzle_s.c_str());
+       os.str("");
+       for (int i = 0; (1 << i) <= cfg.hard_wide_mask; i++)
+               if (cfg.hard_wide_mask & 1 << i)
+                       os << " " << i;
+       std::string wide_s = os.str();
+       if (cfg.hard_wide_mask)
+               log_debug("  - hard wide bits%s\n", wide_s.c_str());
+       if (cfg.emu_read_first)
+               log_debug("  - emulate read-first behavior\n");
+       for (int i = 0; i < GetSize(mem.wr_ports); i++) {
+               auto &pcfg = cfg.wr_ports[i];
+               if (pcfg.rd_port == -1)
+                       log_debug("  - write port %d: port group %s\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str());
+               else
+                       log_debug("  - write port %d: port group %s (shared with read port %d)\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str(), pcfg.rd_port);
+
+               for (auto &it: pcfg.def->options)
+                       log_debug("    - option %s %s\n", it.first.c_str(), log_const(it.second));
+               if (cfg.def->width_mode == WidthMode::PerPort) {
+                       std::stringstream os;
+                       for (int i = pcfg.def->min_wr_wide_log2; i <= pcfg.def->max_wr_wide_log2; i++)
+                               os << " " << cfg.def->dbits[i];
+                       std::string widths_s = os.str();
+                       const char *note = "";
+                       if (pcfg.rd_port != -1)
+                               note = pcfg.def->width_tied ? " (tied)" : " (independent)";
+                       log_debug("    - widths%s%s\n", widths_s.c_str(), note);
+               }
+               for (auto i: pcfg.emu_prio)
+                       log_debug("    - emulate priority over write port %d\n", i);
+       }
+       for (int i = 0; i < GetSize(mem.rd_ports); i++) {
+               auto &pcfg = cfg.rd_ports[i];
+               if (pcfg.wr_port == -1)
+                       log_debug("  - read port %d: port group %s\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str());
+               else
+                       log_debug("  - read port %d: port group %s (shared with write port %d)\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str(), pcfg.wr_port);
+               for (auto &it: pcfg.def->options)
+                       log_debug("    - option %s %s\n", it.first.c_str(), log_const(it.second));
+               if (cfg.def->width_mode == WidthMode::PerPort) {
+                       std::stringstream os;
+                       for (int i = pcfg.def->min_rd_wide_log2; i <= pcfg.def->max_rd_wide_log2; i++)
+                               os << " " << cfg.def->dbits[i];
+                       std::string widths_s = os.str();
+                       const char *note = "";
+                       if (pcfg.wr_port != -1)
+                               note = pcfg.def->width_tied ? " (tied)" : " (independent)";
+                       log_debug("    - widths%s%s\n", widths_s.c_str(), note);
+               }
+               if (pcfg.emu_sync)
+                       log_debug("    - emulate data register\n");
+               if (pcfg.emu_en)
+                       log_debug("    - emulate clock enable\n");
+               if (pcfg.emu_arst)
+                       log_debug("    - emulate async reset\n");
+               if (pcfg.emu_srst)
+                       log_debug("    - emulate sync reset\n");
+               if (pcfg.emu_init)
+                       log_debug("    - emulate init value\n");
+               if (pcfg.emu_srst_en_prio)
+                       log_debug("    - emulate sync reset / enable priority\n");
+               for (auto i: pcfg.emu_trans)
+                       log_debug("    - emulate transparency with write port %d\n", i);
+       }
+}
+
+// Go through memory attributes to determine user-requested mapping style.
+void MemMapping::determine_style() {
+       kind = RamKind::Auto;
+       style = "";
+       if (mem.get_bool_attribute(ID::lram)) {
+               kind = RamKind::Huge;
+               return;
+       }
+       for (auto attr: {ID::ram_block, ID::rom_block, ID::ram_style, ID::rom_style, ID::ramstyle, ID::romstyle, ID::syn_ramstyle, ID::syn_romstyle}) {
+               if (mem.has_attribute(attr)) {
+                       Const val = mem.attributes.at(attr);
+                       if (val == 1) {
+                               kind = RamKind::NotLogic;
+                               return;
+                       }
+                       std::string val_s = val.decode_string();
+                       for (auto &c: val_s)
+                               c = std::tolower(c);
+                       if (val_s == "auto") {
+                               // Nothing.
+                       } else if (val_s == "logic" || val_s == "registers") {
+                               kind = RamKind::Logic;
+                       } else if (val_s == "distributed") {
+                               kind = RamKind::Distributed;
+                       } else if (val_s == "block" || val_s == "block_ram" || val_s == "ebr") {
+                               kind = RamKind::Block;
+                       } else if (val_s == "huge" || val_s == "ultra") {
+                               kind = RamKind::Huge;
+                       } else {
+                               kind = RamKind::NotLogic;
+                               style = val_s;
+                       }
+                       return;
+               }
+       }
+       if (mem.get_bool_attribute(ID::logic_block))
+               kind = RamKind::Logic;
+}
+
+// Determine whether the memory can be mapped entirely to soft logic.
+bool MemMapping::determine_logic_ok() {
+       if (kind != RamKind::Auto && kind != RamKind::Logic)
+               return false;
+       // Memory is mappable entirely to soft logic iff all its write ports are in the same clock domain.
+       if (mem.wr_ports.empty())
+               return true;
+       for (auto &port: mem.wr_ports) {
+               if (!port.clk_enable)
+                       return false;
+               if (port.clk != mem.wr_ports[0].clk)
+                       return false;
+               if (port.clk_polarity != mem.wr_ports[0].clk_polarity)
+                       return false;
+       }
+       return true;
+}
+
+// Apply RAM kind restrictions (logic/distributed/block/huge), if any.
+bool MemMapping::check_ram_kind(const Ram &ram) {
+       if (style != "")
+               return true;
+       if (ram.kind == kind)
+               return true;
+       if (kind == RamKind::Auto || kind == RamKind::NotLogic) {
+               if (ram.kind == RamKind::Distributed && opts.no_auto_distributed)
+                       return false;
+               if (ram.kind == RamKind::Block && opts.no_auto_block)
+                       return false;
+               if (ram.kind == RamKind::Huge && opts.no_auto_huge)
+                       return false;
+               return true;
+       }
+       return false;
+}
+
+// Apply specific RAM style restrictions, if any.
+bool MemMapping::check_ram_style(const Ram &ram) {
+       if (style == "")
+               return true;
+       for (auto &s: ram.style)
+               if (s == style)
+                       return true;
+       return false;
+}
+
+// Handle memory initializer restrictions, if any.
+bool MemMapping::check_init(const Ram &ram) {
+       bool has_nonx = false;
+       bool has_one = false;
+
+       for (auto &init: mem.inits) {
+               if (init.data.is_fully_undef())
+                       continue;
+               has_nonx = true;
+               for (auto bit: init.data)
+                       if (bit == State::S1)
+                               has_one = true;
+       }
+
+       switch (ram.init) {
+               case MemoryInitKind::None:
+                       return !has_nonx;
+               case MemoryInitKind::Zero:
+                       return !has_one;
+               default:
+                       return true;
+       }
+}
+
+bool apply_clock(MemConfig &cfg, const PortVariant &def, SigBit clk, bool clk_polarity) {
+       if (def.clk_shared == -1)
+               return true;
+       auto &cdef = cfg.def->shared_clocks[def.clk_shared];
+       auto &ccfg = cfg.shared_clocks[def.clk_shared];
+       if (cdef.anyedge) {
+               if (!ccfg.used) {
+                       ccfg.used = true;
+                       ccfg.clk = clk;
+                       ccfg.polarity = clk_polarity;
+                       return true;
+               } else {
+                       return ccfg.clk == clk && ccfg.polarity == clk_polarity;
+               }
+       } else {
+               bool invert = clk_polarity ^ (def.clk_pol == ClkPolKind::Posedge);
+               if (!ccfg.used) {
+                       ccfg.used = true;
+                       ccfg.clk = clk;
+                       ccfg.invert = invert;
+                       return true;
+               } else {
+                       return ccfg.clk == clk && ccfg.invert == invert;
+               }
+       }
+}
+
+// Perform write port assignment, validating clock options as we go.
+void MemMapping::assign_wr_ports() {
+       for (auto &port: mem.wr_ports) {
+               if (!port.clk_enable) {
+                       // Async write ports not supported.
+                       cfgs.clear();
+                       return;
+               }
+               MemConfigs new_cfgs;
+               for (auto &cfg: cfgs) {
+                       for (int pgi = 0; pgi < GetSize(cfg.def->port_groups); pgi++) {
+                               auto &pg = cfg.def->port_groups[pgi];
+                               // Make sure the target port group still has a free port.
+                               int used = 0;
+                               for (auto &oport: cfg.wr_ports)
+                                       if (oport.port_group == pgi)
+                                               used++;
+                               if (used >= GetSize(pg.names))
+                                       continue;
+                               for (int pvi = 0; pvi < GetSize(pg.variants); pvi++) {
+                                       auto &def = pg.variants[pvi];
+                                       // Make sure the target is a write port.
+                                       if (def.kind == PortKind::Ar || def.kind == PortKind::Sr)
+                                               continue;
+                                       MemConfig new_cfg = cfg;
+                                       WrPortConfig pcfg;
+                                       pcfg.rd_port = -1;
+                                       pcfg.port_group = pgi;
+                                       pcfg.port_variant = pvi;
+                                       pcfg.def = &def;
+                                       if (!apply_clock(new_cfg, def, port.clk, port.clk_polarity))
+                                               continue;
+                                       new_cfg.wr_ports.push_back(pcfg);
+                                       new_cfgs.push_back(new_cfg);
+                               }
+                       }
+               }
+               cfgs = new_cfgs;
+       }
+}
+
+// Perform read port assignment, validating clock and rden options as we go.
+void MemMapping::assign_rd_ports() {
+       for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) {
+               auto &port = mem.rd_ports[pidx];
+               MemConfigs new_cfgs;
+               for (auto &cfg: cfgs) {
+                       // First pass: read port not shared with a write port.
+                       for (int pgi = 0; pgi < GetSize(cfg.def->port_groups); pgi++) {
+                               auto &pg = cfg.def->port_groups[pgi];
+                               // Make sure the target port group has a port not used up by write ports.
+                               // Overuse by other read ports is not a problem — this will just result
+                               // in memory duplication.
+                               int used = 0;
+                               for (auto &oport: cfg.wr_ports)
+                                       if (oport.port_group == pgi)
+                                               used++;
+                               if (used >= GetSize(pg.names))
+                                       continue;
+                               for (int pvi = 0; pvi < GetSize(pg.variants); pvi++) {
+                                       auto &def = pg.variants[pvi];
+                                       // Make sure the target is a read port.
+                                       if (def.kind == PortKind::Sw)
+                                               continue;
+                                       // If mapping an async port, accept only async defs.
+                                       if (!port.clk_enable) {
+                                               if (def.kind == PortKind::Sr || def.kind == PortKind::Srsw)
+                                                       continue;
+                                       }
+                                       MemConfig new_cfg = cfg;
+                                       RdPortConfig pcfg;
+                                       pcfg.wr_port = -1;
+                                       pcfg.port_group = pgi;
+                                       pcfg.port_variant = pvi;
+                                       pcfg.def = &def;
+                                       if (def.kind == PortKind::Sr || def.kind == PortKind::Srsw) {
+                                               pcfg.emu_sync = false;
+                                               if (!apply_clock(new_cfg, def, port.clk, port.clk_polarity))
+                                                       continue;
+                                               // Decide if rden is usable.
+                                               if (port.en != State::S1) {
+                                                       if (def.clk_en) {
+                                                               pcfg.rd_en_to_clk_en = true;
+                                                       } else {
+                                                               pcfg.emu_en = !def.rd_en;
+                                                       }
+                                               }
+                                       } else {
+                                               pcfg.emu_sync = port.clk_enable;
+                                       }
+                                       new_cfg.rd_ports.push_back(pcfg);
+                                       new_cfgs.push_back(new_cfg);
+                               }
+                       }
+                       // Second pass: read port shared with a write port.
+                       for (int wpidx = 0; wpidx < GetSize(mem.wr_ports); wpidx++) {
+                               auto &wport = mem.wr_ports[wpidx];
+                               auto &wpcfg = cfg.wr_ports[wpidx];
+                               auto &def = *wpcfg.def;
+                               // Make sure the write port is not yet shared.
+                               if (wpcfg.rd_port != -1)
+                                       continue;
+                               // Make sure the target is a read port.
+                               if (def.kind == PortKind::Sw)
+                                       continue;
+                               // Validate address compatibility.
+                               if (!addr_compatible(wpidx, pidx))
+                                       continue;
+                               // Validate clock compatibility, if needed.
+                               if (def.kind == PortKind::Srsw) {
+                                       if (!port.clk_enable)
+                                               continue;
+                                       if (port.clk != wport.clk)
+                                               continue;
+                                       if (port.clk_polarity != wport.clk_polarity)
+                                               continue;
+                               }
+                               // Okay, let's fill it in.
+                               MemConfig new_cfg = cfg;
+                               new_cfg.wr_ports[wpidx].rd_port = pidx;
+                               RdPortConfig pcfg;
+                               pcfg.wr_port = wpidx;
+                               pcfg.port_group = wpcfg.port_group;
+                               pcfg.port_variant = wpcfg.port_variant;
+                               pcfg.def = wpcfg.def;
+                               pcfg.emu_sync = port.clk_enable && def.kind == PortKind::Arsw;
+                               // For srsw, check rden capability.
+                               if (def.kind == PortKind::Srsw) {
+                                       bool trans = port.transparency_mask[wpidx];
+                                       bool col_x = port.collision_x_mask[wpidx];
+                                       if (def.rdwr == RdWrKind::NoChange) {
+                                               if (!get_wr_excludes_rd(wpidx, pidx)) {
+                                                       if (!trans && !col_x)
+                                                               continue;
+                                                       if (trans)
+                                                               pcfg.emu_trans.push_back(wpidx);
+                                                       new_cfg.wr_ports[wpidx].force_uniform = true;
+                                               }
+                                               if (port.en != State::S1) {
+                                                       if (def.clk_en) {
+                                                               pcfg.rd_en_to_clk_en = true;
+                                                       } else {
+                                                               pcfg.emu_en = !def.rd_en;
+                                                       }
+                                               }
+                                       } else {
+                                               if (!col_x && !trans && def.rdwr != RdWrKind::Old)
+                                                       continue;
+                                               if (trans) {
+                                                       if (def.rdwr != RdWrKind::New && def.rdwr != RdWrKind::NewOnly)
+                                                               pcfg.emu_trans.push_back(wpidx);
+                                               }
+                                               if (def.rdwr == RdWrKind::NewOnly) {
+                                                       if (!get_wr_excludes_rd(wpidx, pidx))
+                                                               new_cfg.wr_ports[wpidx].force_uniform = true;
+                                               }
+                                               if (port.en != State::S1) {
+                                                       if (def.clk_en) {
+                                                               if (get_wr_implies_rd(wpidx, pidx)) {
+                                                                       pcfg.rd_en_to_clk_en = true;
+                                                               } else {
+                                                                       pcfg.emu_en = !def.rd_en;
+                                                               }
+                                                       } else {
+                                                               pcfg.emu_en = !def.rd_en;
+                                                       }
+                                               }
+                                       }
+                               }
+                               new_cfg.rd_ports.push_back(pcfg);
+                               new_cfgs.push_back(new_cfg);
+                       }
+               }
+               cfgs = new_cfgs;
+       }
+}
+
+// Validate transparency restrictions, determine where to add soft transparency logic.
+void MemMapping::handle_trans() {
+       if (mem.emulate_read_first_ok()) {
+               MemConfigs new_cfgs;
+               for (auto &cfg: cfgs) {
+                       new_cfgs.push_back(cfg);
+                       bool ok = true;
+                       // Using this trick will break read-write port sharing.
+                       for (auto &pcfg: cfg.rd_ports)
+                               if (pcfg.wr_port != -1)
+                                       ok = false;
+                       if (ok) {
+                               cfg.emu_read_first = true;
+                               new_cfgs.push_back(cfg);
+                       }
+               }
+               cfgs = new_cfgs;
+       }
+       for (int rpidx = 0; rpidx < GetSize(mem.rd_ports); rpidx++) {
+               auto &rport = mem.rd_ports[rpidx];
+               if (!rport.clk_enable)
+                       continue;
+               for (int wpidx = 0; wpidx < GetSize(mem.wr_ports); wpidx++) {
+                       auto &wport = mem.wr_ports[wpidx];
+                       if (!wport.clk_enable)
+                               continue;
+                       if (rport.clk != wport.clk)
+                               continue;
+                       if (rport.clk_polarity != wport.clk_polarity)
+                               continue;
+                       // If we got this far, we have a transparency restriction
+                       // to uphold.
+                       MemConfigs new_cfgs;
+                       for (auto &cfg: cfgs) {
+                               auto &rpcfg = cfg.rd_ports[rpidx];
+                               auto &wpcfg = cfg.wr_ports[wpidx];
+                               // The transparency relation for shared ports already handled while assigning them.
+                               if (rpcfg.wr_port == wpidx) {
+                                       new_cfgs.push_back(cfg);
+                                       continue;
+                               }
+                               if (rport.collision_x_mask[wpidx] && !cfg.emu_read_first) {
+                                       new_cfgs.push_back(cfg);
+                                       continue;
+                               }
+                               bool transparent = rport.transparency_mask[wpidx] || cfg.emu_read_first;
+                               if (rpcfg.emu_sync) {
+                                       // For async read port, just add the transparency logic
+                                       // if necessary.
+                                       if (transparent)
+                                               rpcfg.emu_trans.push_back(wpidx);
+                                       new_cfgs.push_back(cfg);
+                               } else {
+                                       // Otherwise, split through the relevant wrtrans caps.
+                                       // For non-transparent ports, the cap needs to be present.
+                                       // For transparent ports, we can emulate transparency
+                                       // even without a direct cap.
+                                       bool found = false;
+                                       for (auto &tdef: wpcfg.def->wrtrans) {
+                                               // Check if the target matches.
+                                               if (tdef.target_kind == WrTransTargetKind::Group && rpcfg.port_group != tdef.target_group)
+                                                       continue;
+                                               // Check if the transparency kind is acceptable.
+                                               if (transparent) {
+                                                       if (tdef.kind == WrTransKind::Old)
+                                                               continue;
+                                               } else {
+                                                       if (tdef.kind != WrTransKind::Old)
+                                                               continue;
+                                               }
+                                               // Okay, we can use this cap.
+                                               new_cfgs.push_back(cfg);
+                                               found = true;
+                                               break;
+                                       }
+                                       if (!found && transparent) {
+                                               // If the port pair is transparent, but no cap was
+                                               // found, use emulation.
+                                               rpcfg.emu_trans.push_back(wpidx);
+                                               new_cfgs.push_back(cfg);
+                                       }
+                               }
+                       }
+                       cfgs = new_cfgs;
+               }
+       }
+}
+
+// Determine where to add soft priority logic.
+void MemMapping::handle_priority() {
+       for (int p1idx = 0; p1idx < GetSize(mem.wr_ports); p1idx++) {
+               for (int p2idx = 0; p2idx < GetSize(mem.wr_ports); p2idx++) {
+                       auto &port2 = mem.wr_ports[p2idx];
+                       if (!port2.priority_mask[p1idx])
+                               continue;
+                       for (auto &cfg: cfgs) {
+                               auto &p1cfg = cfg.rd_ports[p1idx];
+                               auto &p2cfg = cfg.wr_ports[p2idx];
+                               bool found = false;
+                               for (auto &pgi: p2cfg.def->wrprio) {
+                                       if (pgi == p1cfg.port_group) {
+                                               found = true;
+                                               break;
+                                       }
+                               }
+                               // If no cap was found, emulate.
+                               if (!found)
+                                       p2cfg.emu_prio.push_back(p1idx);
+                       }
+               }
+       }
+}
+
+bool is_all_zero(const Const &val) {
+       for (auto bit: val.bits)
+               if (bit == State::S1)
+                       return false;
+       return true;
+}
+
+// Determine where to add soft init value / reset logic.
+void MemMapping::handle_rd_rst() {
+       for (auto &cfg: cfgs) {
+               for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) {
+                       auto &port = mem.rd_ports[pidx];
+                       auto &pcfg = cfg.rd_ports[pidx];
+                       // Only sync ports are relevant.
+                       // If emulated by async port or we already emulate CE, init will be
+                       // included for free.
+                       if (!port.clk_enable || pcfg.emu_sync || pcfg.emu_en)
+                               continue;
+                       switch (pcfg.def->rdinitval) {
+                               case ResetValKind::None:
+                                       pcfg.emu_init = !port.init_value.is_fully_undef();
+                                       break;
+                               case ResetValKind::Zero:
+                                       pcfg.emu_init = !is_all_zero(port.init_value);
+                                       break;
+                               default:
+                                       break;
+                       }
+                       Const init_val = port.init_value;
+                       if (port.arst != State::S0) {
+                               switch (pcfg.def->rdarstval) {
+                                       case ResetValKind::None:
+                                               pcfg.emu_arst = true;
+                                               break;
+                                       case ResetValKind::Zero:
+                                               pcfg.emu_arst = !is_all_zero(port.arst_value);
+                                               break;
+                                       case ResetValKind::Init:
+                                               if (init_val.is_fully_undef())
+                                                       init_val = port.arst_value;
+                                               pcfg.emu_arst = init_val != port.arst_value;
+                                               break;
+                                       default:
+                                               break;
+                               }
+                       }
+                       if (port.srst != State::S0) {
+                               switch (pcfg.def->rdsrstval) {
+                                       case ResetValKind::None:
+                                               pcfg.emu_srst = true;
+                                               break;
+                                       case ResetValKind::Zero:
+                                               pcfg.emu_srst = !is_all_zero(port.srst_value);
+                                               break;
+                                       case ResetValKind::Init:
+                                               if (init_val.is_fully_undef())
+                                                       init_val = port.srst_value;
+                                               pcfg.emu_srst = init_val != port.srst_value;
+                                               break;
+                                       default:
+                                               break;
+                               }
+                               if (!pcfg.emu_srst && pcfg.def->rdsrst_block_wr && pcfg.wr_port != -1) {
+                                       if (!get_wr_excludes_srst(pcfg.wr_port, pidx))
+                                               pcfg.emu_srst = true;
+                               }
+                               if (!pcfg.emu_srst && port.en != State::S1) {
+                                       if (port.ce_over_srst) {
+                                               switch (pcfg.def->rdsrstmode) {
+                                                       case SrstKind::Ungated:
+                                                               pcfg.emu_srst_en_prio = true;
+                                                               break;
+                                                       case SrstKind::GatedClkEn:
+                                                               pcfg.emu_srst_en_prio = !pcfg.rd_en_to_clk_en;
+                                                               break;
+                                                       case SrstKind::GatedRdEn:
+                                                               break;
+                                                       default:
+                                                               log_assert(0);
+                                               }
+                                       } else {
+                                               switch (pcfg.def->rdsrstmode) {
+                                                       case SrstKind::Ungated:
+                                                               break;
+                                                       case SrstKind::GatedClkEn:
+                                                               if (pcfg.rd_en_to_clk_en) {
+                                                                       if (pcfg.def->rd_en) {
+                                                                               pcfg.rd_en_to_clk_en = false;
+                                                                       } else {
+                                                                               pcfg.emu_srst_en_prio = true;
+                                                                       }
+                                                               }
+                                                               break;
+                                                       case SrstKind::GatedRdEn:
+                                                               pcfg.emu_srst_en_prio = true;
+                                                               break;
+                                                       default:
+                                                               log_assert(0);
+                                               }
+                                       }
+                               }
+                       } else {
+                               if (pcfg.def->rd_en && pcfg.def->rdwr == RdWrKind::NoChange && pcfg.wr_port != -1) {
+                                       pcfg.rd_en_to_clk_en = false;
+                               }
+                       }
+               }
+       }
+}
+
+void MemMapping::score_emu_ports() {
+       for (auto &cfg: cfgs) {
+               std::vector<int> port_usage_wr(cfg.def->port_groups.size());
+               std::vector<int> port_usage_rd(cfg.def->port_groups.size());
+               int score = 0;
+               // 3 points for every write port if we need to do read-first emulation.
+               if (cfg.emu_read_first)
+                       score += 3 * GetSize(cfg.wr_ports);
+               for (auto &pcfg: cfg.wr_ports) {
+                       // 1 point for every priority relation we need to fix up.
+                       // This is just a gate for every distinct wren pair.
+                       score += GetSize(pcfg.emu_prio);
+                       port_usage_wr[pcfg.port_group]++;
+               }
+               for (auto &pcfg: cfg.rd_ports) {
+                       // 3 points for every soft transparency logic instance.  This involves
+                       // registers and other major mess.
+                       score += 3 * GetSize(pcfg.emu_trans);
+                       // 3 points for CE soft logic.  Likewise involves registers.
+                       // If we already do this, subsumes any init/srst/arst emulation.
+                       if (pcfg.emu_en)
+                               score += 3;
+                       // 2 points for soft init value / reset logic: involves single bit
+                       // register and some muxes.
+                       if (pcfg.emu_init)
+                               score += 2;
+                       if (pcfg.emu_arst)
+                               score += 2;
+                       if (pcfg.emu_srst)
+                               score += 2;
+                       // 1 point for wrong srst/en priority (fixed with a single gate).
+                       if (pcfg.emu_srst_en_prio)
+                               score++;
+                       // 1 point for every non-shared read port used, as a tiebreaker
+                       // to prefer single-port configs.
+                       if (pcfg.wr_port == -1) {
+                               score++;
+                               port_usage_rd[pcfg.port_group]++;
+                       }
+               }
+               cfg.score_emu = score;
+               int repl_port = 1;
+               for (int i = 0; i < GetSize(cfg.def->port_groups); i++) {
+                       int space = GetSize(cfg.def->port_groups[i].names) - port_usage_wr[i];
+                       log_assert(space >= 0);
+                       if (port_usage_rd[i] > 0) {
+                               log_assert(space > 0);
+                               int usage = port_usage_rd[i];
+                               int cur = (usage + space - 1) / space;
+                               if (cur > repl_port)
+                                       repl_port = cur;
+                       }
+               }
+               cfg.repl_port = repl_port;
+       }
+}
+
+void MemMapping::handle_geom() {
+       std::vector<int> wren_size;
+       for (auto &port: mem.wr_ports) {
+               SigSpec en = port.en;
+               en.sort_and_unify();
+               wren_size.push_back(GetSize(en));
+       }
+       for (auto &cfg: cfgs) {
+               // First, create a set of "byte boundaries": the bit positions in source memory word
+               // that have write enable different from the previous bit in any write port.
+               // Bit 0 is considered to be a byte boundary as well.
+               // Likewise, create a set of "word boundaries" that are like above, but only for write ports
+               // with the "force uniform" flag set.
+               std::vector<bool> byte_boundary(mem.width, false);
+               std::vector<bool> word_boundary(mem.width, false);
+               byte_boundary[0] = true;
+               for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) {
+                       auto &port = mem.wr_ports[pidx];
+                       auto &pcfg = cfg.wr_ports[pidx];
+                       if (pcfg.force_uniform)
+                               word_boundary[0] = true;
+                       for (int sub = 0; sub < (1 << port.wide_log2); sub++) {
+                               for (int i = 1; i < mem.width; i++) {
+                                       int pos = sub * mem.width + i;
+                                       if (port.en[pos] != port.en[pos-1]) {
+                                               byte_boundary[i] = true;
+                                               if (pcfg.force_uniform)
+                                                       word_boundary[i] = true;
+                                       }
+                               }
+                       }
+               }
+               bool got_config = false;
+               int best_cost = 0;
+               int byte_width_log2 = 0;
+               for (int i = 0; i < GetSize(cfg.def->dbits); i++)
+                       if (cfg.def->byte >= cfg.def->dbits[i])
+                               byte_width_log2 = i;
+               if (cfg.def->byte == 0)
+                       byte_width_log2 = GetSize(cfg.def->dbits) - 1;
+               pool<int> no_wide_bits;
+               // Determine which of the source address bits involved in wide ports
+               // are "uniform".  Bits are considered uniform if, when a port is widened through
+               // them, the write enables are the same for both values of the bit.
+               int max_wr_wide_log2 = 0;
+               for (auto &port: mem.wr_ports)
+                       if (port.wide_log2 > max_wr_wide_log2)
+                               max_wr_wide_log2 = port.wide_log2;
+               int max_wide_log2 = max_wr_wide_log2;
+               for (auto &port: mem.rd_ports)
+                       if (port.wide_log2 > max_wide_log2)
+                               max_wide_log2 = port.wide_log2;
+               int wide_nu_start = max_wide_log2;
+               int wide_nu_end = max_wr_wide_log2;
+               for (int i = 0; i < GetSize(mem.wr_ports); i++) {
+                       auto &port = mem.wr_ports[i];
+                       auto &pcfg = cfg.wr_ports[i];
+                       for (int j = 0; j < port.wide_log2; j++) {
+                               bool uniform = true;
+                               // If write enables don't match, mark bit as non-uniform.
+                               for (int k = 0; k < (1 << port.wide_log2); k += 2 << j)
+                                       if (port.en.extract(k * mem.width, mem.width << j) != port.en.extract((k + (1 << j)) * mem.width, mem.width << j))
+                                               uniform = false;
+                               if (!uniform) {
+                                       if (pcfg.force_uniform) {
+                                               for (int k = j; k < port.wide_log2; k++)
+                                                       no_wide_bits.insert(k);
+                                       }
+                                       if (j < wide_nu_start)
+                                               wide_nu_start = j;
+                                       break;
+                               }
+                       }
+                       if (pcfg.def->width_tied && pcfg.rd_port != -1) {
+                               // If:
+                               //
+                               // - the write port is merged with a read port
+                               // - the read port is wider than the write port
+                               // - read and write widths are tied
+                               //
+                               // then we will have to artificially widen the write
+                               // port to the width of the read port, and emulate
+                               // a narrower write path by use of write enables,
+                               // which will definitely be non-uniform over the added
+                               // bits.
+                               auto &rport = mem.rd_ports[pcfg.rd_port];
+                               if (rport.wide_log2 > port.wide_log2) {
+                                       if (port.wide_log2 < wide_nu_start)
+                                               wide_nu_start = port.wide_log2;
+                                       if (rport.wide_log2 > wide_nu_end)
+                                               wide_nu_end = rport.wide_log2;
+                                       if (pcfg.force_uniform) {
+                                               for (int k = port.wide_log2; k < rport.wide_log2; k++)
+                                                       no_wide_bits.insert(k);
+                                       }
+                               }
+                       }
+               }
+               // Iterate over base widths.
+               for (int base_width_log2 = 0; base_width_log2 < GetSize(cfg.def->dbits); base_width_log2++) {
+                       // Now, see how many data bits we actually have available.
+                       // This is usually dbits[base_width_log2], but could be smaller if we
+                       // ran afoul of a max width limitation.  Configurations where this
+                       // happens are not useful, unless we need it to satisfy a *minimum*
+                       // width limitation.
+                       int unit_width_log2 = base_width_log2;
+                       for (auto &pcfg: cfg.wr_ports)
+                               if (unit_width_log2 > pcfg.def->max_wr_wide_log2)
+                                       unit_width_log2 = pcfg.def->max_wr_wide_log2;
+                       for (auto &pcfg: cfg.rd_ports)
+                               if (unit_width_log2 > pcfg.def->max_rd_wide_log2)
+                                       unit_width_log2 = pcfg.def->max_rd_wide_log2;
+                       if (unit_width_log2 != base_width_log2 && got_config)
+                               break;
+                       int unit_width = cfg.def->dbits[unit_width_log2];
+                       // Also determine effective byte width (the granularity of write enables).
+                       int effective_byte = cfg.def->byte;
+                       if (effective_byte == 0 || effective_byte > unit_width)
+                               effective_byte = unit_width;
+                       if (mem.wr_ports.empty())
+                               effective_byte = 1;
+                       log_assert(unit_width % effective_byte == 0);
+                       // Create the swizzle pattern.
+                       std::vector<int> swizzle;
+                       for (int i = 0; i < mem.width; i++) {
+                               if (word_boundary[i])
+                                       while (GetSize(swizzle) % unit_width)
+                                               swizzle.push_back(-1);
+                               else if (byte_boundary[i])
+                                       while (GetSize(swizzle) % effective_byte)
+                                               swizzle.push_back(-1);
+                               swizzle.push_back(i);
+                       }
+                       if (word_boundary[0])
+                               while (GetSize(swizzle) % unit_width)
+                                       swizzle.push_back(-1);
+                       else
+                               while (GetSize(swizzle) % effective_byte)
+                                       swizzle.push_back(-1);
+                       // Now evaluate the configuration, then keep adding more hard wide bits
+                       // and evaluating.
+                       int hard_wide_mask = 0;
+                       int hard_wide_num = 0;
+                       bool byte_failed = false;
+                       while (1) {
+                               // Check if all min width constraints are satisfied.
+                               // Only check these constraints for write ports with width below
+                               // byte width — for other ports, we can emulate narrow width with
+                               // a larger one.
+                               bool min_width_ok = true;
+                               int min_width_bit = wide_nu_start;
+                               for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) {
+                                       auto &port = mem.wr_ports[pidx];
+                                       int w = base_width_log2;
+                                       for (int i = 0; i < port.wide_log2; i++)
+                                               if (hard_wide_mask & 1 << i)
+                                                       w++;
+                                       if (w < cfg.wr_ports[pidx].def->min_wr_wide_log2 && w < byte_width_log2) {
+                                               min_width_ok = false;
+                                               if (min_width_bit > port.wide_log2)
+                                                       min_width_bit = port.wide_log2;
+                                       }
+                               }
+                               if (min_width_ok) {
+                                       int emu_wide_bits = max_wide_log2 - hard_wide_num;
+                                       int mult_wide = 1 << emu_wide_bits;
+                                       int addrs = 1 << (cfg.def->abits - base_width_log2 + emu_wide_bits);
+                                       int min_addr = mem.start_offset / addrs;
+                                       int max_addr = (mem.start_offset + mem.size - 1) / addrs;
+                                       int mult_a = max_addr - min_addr + 1;
+                                       int bits = mult_a * mult_wide * GetSize(swizzle);
+                                       int repl = (bits + unit_width - 1) / unit_width;
+                                       int score_demux = 0;
+                                       for (int i = 0; i < GetSize(mem.wr_ports); i++) {
+                                               auto &port = mem.wr_ports[i];
+                                               int w = emu_wide_bits;
+                                               for (int i = 0; i < port.wide_log2; i++)
+                                                       if (!(hard_wide_mask & 1 << i))
+                                                               w--;
+                                               if (w || mult_a != 1)
+                                                       score_demux += (mult_a << w) * wren_size[i];
+                                       }
+                                       int score_mux = 0;
+                                       for (auto &port: mem.rd_ports) {
+                                               int w = emu_wide_bits;
+                                               for (int i = 0; i < port.wide_log2; i++)
+                                                       if (!(hard_wide_mask & 1 << i))
+                                                               w--;
+                                               score_mux += ((mult_a << w) - 1) * GetSize(port.data);
+                                       }
+                                       double cost = (cfg.def->cost - cfg.def->widthscale) * repl * cfg.repl_port;
+                                       cost += cfg.def->widthscale * mult_a * mult_wide * mem.width / unit_width * cfg.repl_port;
+                                       cost += score_mux * FACTOR_MUX;
+                                       cost += score_demux * FACTOR_DEMUX;
+                                       cost += cfg.score_emu * FACTOR_EMU;
+                                       if (!got_config || cost < best_cost) {
+                                               cfg.base_width_log2 = base_width_log2;
+                                               cfg.unit_width_log2 = unit_width_log2;
+                                               cfg.swizzle = swizzle;
+                                               cfg.hard_wide_mask = hard_wide_mask;
+                                               cfg.emu_wide_mask = ((1 << max_wide_log2) - 1) & ~hard_wide_mask;
+                                               cfg.repl_d = repl;
+                                               cfg.score_demux = score_demux;
+                                               cfg.score_mux = score_mux;
+                                               cfg.cost = cost;
+                                               best_cost = cost;
+                                               got_config = true;
+                                       }
+                               }
+                               if (cfg.def->width_mode != WidthMode::PerPort)
+                                       break;
+                               // Now, pick the next bit to add to the hard wide mask.
+next_hw:
+                               int scan_from;
+                               int scan_to;
+                               bool retry = false;
+                               if (!min_width_ok) {
+                                       // If we still haven't met the minimum width limits,
+                                       // add the highest one that will be useful for working
+                                       // towards all unmet limits.
+                                       scan_from = min_width_bit;
+                                       scan_to = 0;
+                                       // If the relevant write port is not wide, it's impossible.
+                               } else if (byte_failed) {
+                                       // If we already failed with uniformly-written bits only,
+                                       // go with uniform bits that are only involved in reads.
+                                       scan_from = max_wide_log2;
+                                       scan_to = wide_nu_end;
+                               } else if (base_width_log2 + hard_wide_num < byte_width_log2) {
+                                       // If we still need uniform bits, prefer the low ones.
+                                       scan_from = wide_nu_start;
+                                       scan_to = 0;
+                                       retry = true;
+                               } else {
+                                       scan_from = max_wide_log2;
+                                       scan_to = 0;
+                               }
+                               int bit = scan_from - 1;
+                               while (1) {
+                                       if (bit < scan_to) {
+hw_bit_failed:
+                                               if (retry) {
+                                                       byte_failed = true;
+                                                       goto next_hw;
+                                               } else {
+                                                       goto bw_done;
+                                               }
+                                       }
+                                       if (!(hard_wide_mask & 1 << bit) && !no_wide_bits.count(bit))
+                                               break;
+                                       bit--;
+                               }
+                               int new_hw_mask = hard_wide_mask | 1 << bit;
+                               // Check if all max width constraints are satisfied.
+                               for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) {
+                                       auto &port = mem.wr_ports[pidx];
+                                       int w = base_width_log2;
+                                       for (int i = 0; i < port.wide_log2; i++)
+                                               if (new_hw_mask & 1 << i)
+                                                       w++;
+                                       if (w > cfg.wr_ports[pidx].def->max_wr_wide_log2) {
+                                               goto hw_bit_failed;
+                                       }
+                               }
+                               for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) {
+                                       auto &port = mem.rd_ports[pidx];
+                                       int w = base_width_log2;
+                                       for (int i = 0; i < port.wide_log2; i++)
+                                               if (new_hw_mask & 1 << i)
+                                                       w++;
+                                       if (w > cfg.rd_ports[pidx].def->max_rd_wide_log2) {
+                                               goto hw_bit_failed;
+                                       }
+                               }
+                               // Bit ok, commit.
+                               hard_wide_mask = new_hw_mask;
+                               hard_wide_num++;
+                       }
+bw_done:;
+               }
+               log_assert(got_config);
+       }
+}
+
+void MemMapping::prune_post_geom() {
+       std::vector<bool> keep;
+       dict<std::string, int> rsrc;
+       for (int i = 0; i < GetSize(cfgs); i++) {
+               auto &cfg = cfgs[i];
+               std::string key = cfg.def->resource_name;
+               if (key.empty()) {
+                       switch (cfg.def->kind) {
+                               case RamKind::Distributed:
+                                       key = "[distributed]";
+                                       break;
+                               case RamKind::Block:
+                                       key = "[block]";
+                                       break;
+                               case RamKind::Huge:
+                                       key = "[huge]";
+                                       break;
+                               default:
+                                       break;
+                       }
+               }
+               auto it = rsrc.find(key);
+               if (it == rsrc.end()) {
+                       rsrc[key] = i;
+                       keep.push_back(true);
+               } else {
+                       auto &ocfg = cfgs[it->second];
+                       if (cfg.cost < ocfg.cost) {
+                               keep[it->second] = false;
+                               it->second = i;
+                               keep.push_back(true);
+                       } else {
+                               keep.push_back(false);
+                       }
+               }
+       }
+       MemConfigs new_cfgs;
+       for (int i = 0; i < GetSize(cfgs); i++)
+               if (keep[i])
+                       new_cfgs.push_back(cfgs[i]);
+       cfgs = new_cfgs;
+}
+
+Swizzle gen_swizzle(const Mem &mem, const MemConfig &cfg, int sw_wide_log2, int hw_wide_log2) {
+       Swizzle res;
+
+       std::vector<int> emu_wide_bits;
+       std::vector<int> hard_wide_bits;
+       for (int i = 0; i < ceil_log2(mem.size); i++) {
+               if (cfg.emu_wide_mask & 1 << i)
+                       emu_wide_bits.push_back(i);
+               else if (GetSize(hard_wide_bits) < hw_wide_log2 - cfg.base_width_log2)
+                       hard_wide_bits.push_back(i);
+       }
+       for (int x : hard_wide_bits)
+               if (x >= sw_wide_log2)
+                       res.addr_mux_bits.push_back(x);
+       for (int x : emu_wide_bits)
+               if (x >= sw_wide_log2)
+                       res.addr_mux_bits.push_back(x);
+
+       res.addr_shift = cfg.def->abits - cfg.base_width_log2 + GetSize(emu_wide_bits);
+       res.addr_start = mem.start_offset & ~((1 << res.addr_shift) - 1);
+       res.addr_end = ((mem.start_offset + mem.size - 1) | ((1 << res.addr_shift) - 1)) + 1;
+       int hnum = (res.addr_end - res.addr_start) >> res.addr_shift;
+       int unit_width = cfg.def->dbits[cfg.unit_width_log2];
+
+       for (int rd = 0; rd < cfg.repl_d; rd++) {
+               std::vector<SwizzleBit> bits(cfg.def->dbits[hw_wide_log2]);
+               for (auto &bit: bits)
+                       bit.valid = false;
+               res.bits.push_back(bits);
+       }
+
+       for (int hi = 0; hi < hnum; hi++) {
+               for (int ewi = 0; ewi < (1 << GetSize(emu_wide_bits)); ewi++) {
+                       for (int hwi = 0; hwi < (1 << GetSize(hard_wide_bits)); hwi++) {
+                               int mux_idx = 0;
+                               int sub = 0;
+                               int mib = 0;
+                               int hbit_base = 0;
+                               for (int i = 0; i < GetSize(hard_wide_bits); i++) {
+                                       if (hard_wide_bits[i] < sw_wide_log2) {
+                                               if (hwi & 1 << i)
+                                                       sub |= 1 << hard_wide_bits[i];
+                                       } else {
+                                               if (hwi & 1 << i)
+                                                       mux_idx |= 1 << mib;
+                                               mib++;
+                                       }
+                                       if (hwi & 1 << i)
+                                               hbit_base += cfg.def->dbits[i + cfg.base_width_log2];
+                               }
+                               for (int i = 0; i < GetSize(emu_wide_bits); i++) {
+                                       if (emu_wide_bits[i] < sw_wide_log2) {
+                                               if (ewi & 1 << i)
+                                                       sub |= 1 << emu_wide_bits[i];
+                                       } else {
+                                               if (ewi & 1 << i)
+                                                       mux_idx |= 1 << mib;
+                                               mib++;
+                                       }
+                               }
+                               mux_idx |= hi << mib;
+                               int addr = res.addr_start + (hi << res.addr_shift);
+                               for (int i = 0; i < GetSize(res.addr_mux_bits); i++)
+                                       if (mux_idx & 1 << i)
+                                               addr += 1 << res.addr_mux_bits[i];
+                               for (int bit = 0; bit < GetSize(cfg.swizzle); bit++) {
+                                       if (cfg.swizzle[bit] == -1)
+                                               continue;
+                                       int rbit = bit + GetSize(cfg.swizzle) * (ewi + (hi << GetSize(emu_wide_bits)));
+                                       int rep = rbit / unit_width;
+                                       int hbit = hbit_base + rbit % unit_width;
+                                       auto &swz = res.bits[rep][hbit];
+                                       swz.valid = true;
+                                       swz.addr = addr;
+                                       swz.mux_idx = mux_idx;
+                                       swz.bit = cfg.swizzle[bit] + sub * mem.width;
+                               }
+                       }
+               }
+       }
+
+       return res;
+}
+
+void clean_undef(std::vector<State> &val) {
+       for (auto &bit : val)
+               if (bit != State::S1)
+                       bit = State::S0;
+}
+
+std::vector<SigSpec> generate_demux(Mem &mem, int wpidx, const Swizzle &swz) {
+       auto &port = mem.wr_ports[wpidx];
+       std::vector<SigSpec> res;
+       int hi_bits = ceil_log2(swz.addr_end - swz.addr_start) - swz.addr_shift;
+       auto compressed = port.compress_en();
+       SigSpec sig_a = compressed.first;
+       SigSpec addr = port.addr;
+       if (GetSize(addr) > hi_bits + swz.addr_shift) {
+               int lo = mem.start_offset;
+               int hi = mem.start_offset + mem.size;
+               int bits = ceil_log2(hi);
+               for (int i = 0; i < bits; i++) {
+                       int new_lo = lo;
+                       if (lo & 1 << i)
+                               new_lo -= 1 << i;
+                       int new_hi = hi;
+                       if (hi & 1 << i)
+                               new_hi += 1 << i;
+                       if (new_hi - new_lo > (1 << (hi_bits + swz.addr_shift)))
+                               break;
+                       lo = new_lo;
+                       hi = new_hi;
+               }
+               SigSpec in_range = mem.module->And(NEW_ID, mem.module->Ge(NEW_ID, addr, lo), mem.module->Lt(NEW_ID, addr, hi));
+               sig_a = mem.module->Mux(NEW_ID, Const(State::S0, GetSize(sig_a)), sig_a, in_range);
+       }
+       addr.extend_u0(swz.addr_shift + hi_bits, false);
+       SigSpec sig_s;
+       for (int x : swz.addr_mux_bits)
+               sig_s.append(addr[x]);
+       for (int i = 0; i < hi_bits; i++)
+               sig_s.append(addr[swz.addr_shift + i]);
+       SigSpec sig_y;
+       if (GetSize(sig_s) == 0)
+               sig_y = sig_a;
+       else
+               sig_y = mem.module->Demux(NEW_ID, sig_a, sig_s);
+       for (int i = 0; i < ((swz.addr_end - swz.addr_start) >> swz.addr_shift); i++) {
+               for (int j = 0; j < (1 << GetSize(swz.addr_mux_bits)); j++) {
+                       int hi = ((swz.addr_start >> swz.addr_shift) + i) & ((1 << hi_bits) - 1);
+                       int pos = (hi << GetSize(swz.addr_mux_bits) | j) * GetSize(sig_a);
+                       res.push_back(port.decompress_en(compressed.second, sig_y.extract(pos, GetSize(sig_a))));
+               }
+       }
+       return res;
+}
+
+std::vector<SigSpec> generate_mux(Mem &mem, int rpidx, const Swizzle &swz) {
+       auto &port = mem.rd_ports[rpidx];
+       std::vector<SigSpec> res;
+       int hi_bits = ceil_log2(swz.addr_end - swz.addr_start) - swz.addr_shift;
+       SigSpec sig_s;
+       SigSpec addr = port.addr;
+       addr.extend_u0(swz.addr_shift + hi_bits, false);
+       for (int x : swz.addr_mux_bits)
+               sig_s.append(addr[x]);
+       for (int i = 0; i < hi_bits; i++)
+               sig_s.append(addr[swz.addr_shift + i]);
+       if (GetSize(sig_s) == 0) {
+               return {port.data};
+       }
+       if (port.clk_enable) {
+               SigSpec new_sig_s = mem.module->addWire(NEW_ID, GetSize(sig_s));
+               mem.module->addDffe(NEW_ID, port.clk, port.en, sig_s, new_sig_s, port.clk_polarity);
+               sig_s = new_sig_s;
+       }
+       SigSpec sig_a = Const(State::Sx, GetSize(port.data) << hi_bits << GetSize(swz.addr_mux_bits));
+       for (int i = 0; i < ((swz.addr_end - swz.addr_start) >> swz.addr_shift); i++) {
+               for (int j = 0; j < (1 << GetSize(swz.addr_mux_bits)); j++) {
+                       SigSpec sig = mem.module->addWire(NEW_ID, GetSize(port.data));
+                       int hi = ((swz.addr_start >> swz.addr_shift) + i) & ((1 << hi_bits) - 1);
+                       int pos = (hi << GetSize(swz.addr_mux_bits) | j) * GetSize(port.data);
+                       for (int k = 0; k < GetSize(port.data); k++)
+                               sig_a[pos + k] = sig[k];
+                       res.push_back(sig);
+               }
+       }
+       mem.module->addBmux(NEW_ID, sig_a, sig_s, port.data);
+       return res;
+}
+
+void MemMapping::emit_port(const MemConfig &cfg, std::vector<Cell*> &cells, const PortVariant &pdef, const char *name, int wpidx, int rpidx, const std::vector<int> &hw_addr_swizzle) {
+       for (auto &it: pdef.options)
+               for (auto cell: cells)
+                       cell->setParam(stringf("\\PORT_%s_OPTION_%s", name, it.first.c_str()), it.second);
+       SigSpec addr = Const(State::Sx, cfg.def->abits);
+       int wide_log2 = 0, wr_wide_log2 = 0, rd_wide_log2 = 0;
+       SigSpec clk = State::S0;
+       SigSpec clk_en = State::S0;
+       bool clk_pol = true;
+       if (wpidx != -1) {
+               auto &wport = mem.wr_ports[wpidx];
+               clk = wport.clk;
+               clk_pol = wport.clk_polarity;
+               addr = wport.addr;
+               wide_log2 = wr_wide_log2 = wport.wide_log2;
+               if (rpidx != -1) {
+                       auto &rport = mem.rd_ports[rpidx];
+                       auto &rpcfg = cfg.rd_ports[rpidx];
+                       rd_wide_log2 = rport.wide_log2;
+                       if (rd_wide_log2 > wr_wide_log2)
+                               wide_log2 = rd_wide_log2;
+                       else
+                               addr = rport.addr;
+                       if (pdef.clk_en) {
+                               if (rpcfg.rd_en_to_clk_en) {
+                                       if (pdef.rdwr == RdWrKind::NoChange) {
+                                               clk_en = mem.module->Or(NEW_ID, rport.en, mem.module->ReduceOr(NEW_ID, wport.en));
+                                       } else {
+                                               clk_en = rport.en;
+                                       }
+                               } else {
+                                       clk_en = State::S1;
+                               }
+                       }
+               } else {
+                       if (pdef.clk_en)
+                               clk_en = State::S1;
+               }
+       } else if (rpidx != -1) {
+               auto &rport = mem.rd_ports[rpidx];
+               auto &rpcfg = cfg.rd_ports[rpidx];
+               if (rport.clk_enable) {
+                       clk = rport.clk;
+                       clk_pol = rport.clk_polarity;
+               }
+               addr = rport.addr;
+               wide_log2 = rd_wide_log2 = rport.wide_log2;
+               if (pdef.clk_en) {
+                       if (rpcfg.rd_en_to_clk_en)
+                               clk_en = rport.en;
+                       else
+                               clk_en = State::S1;
+               }
+
+       }
+       addr = worker.sigmap_xmux(addr);
+       if (pdef.kind != PortKind::Ar) {
+               switch (pdef.clk_pol) {
+                       case ClkPolKind::Posedge:
+                               if (!clk_pol)
+                                       clk = mem.module->Not(NEW_ID, clk);
+                               break;
+                       case ClkPolKind::Negedge:
+                               if (clk_pol)
+                                       clk = mem.module->Not(NEW_ID, clk);
+                               break;
+                       case ClkPolKind::Anyedge:
+                               for (auto cell: cells)
+                                       cell->setParam(stringf("\\PORT_%s_CLK_POL", name), clk_pol);
+               }
+               for (auto cell: cells) {
+                       cell->setPort(stringf("\\PORT_%s_CLK", name), clk);
+                       if (pdef.clk_en)
+                               cell->setPort(stringf("\\PORT_%s_CLK_EN", name), clk_en);
+               }
+       }
+
+       // Width determination.
+       if (pdef.width_tied) {
+               rd_wide_log2 = wr_wide_log2 = wide_log2;
+       }
+       int hw_wr_wide_log2 = cfg.base_width_log2;
+       for (int i = 0; i < wr_wide_log2; i++)
+               if (cfg.hard_wide_mask & (1 << i))
+                       hw_wr_wide_log2++;
+       if (hw_wr_wide_log2 < pdef.min_wr_wide_log2)
+               hw_wr_wide_log2 = pdef.min_wr_wide_log2;
+       if (hw_wr_wide_log2 > pdef.max_wr_wide_log2)
+               hw_wr_wide_log2 = pdef.max_wr_wide_log2;
+       int hw_rd_wide_log2 = cfg.base_width_log2;
+       for (int i = 0; i < rd_wide_log2; i++)
+               if (cfg.hard_wide_mask & (1 << i))
+                       hw_rd_wide_log2++;
+       if (hw_rd_wide_log2 < pdef.min_rd_wide_log2)
+               hw_rd_wide_log2 = pdef.min_rd_wide_log2;
+       if (hw_rd_wide_log2 > pdef.max_rd_wide_log2)
+               hw_rd_wide_log2 = pdef.max_rd_wide_log2;
+       if (pdef.width_tied) {
+               // For unused ports, pick max available width,
+               // in case narrow ports require disabling parity
+               // bits etc.
+               if (wpidx == -1 && rpidx == -1) {
+                       hw_wr_wide_log2 = pdef.max_wr_wide_log2;
+                       hw_rd_wide_log2 = pdef.max_rd_wide_log2;
+               }
+       } else {
+               if (wpidx == -1)
+                       hw_wr_wide_log2 = pdef.max_wr_wide_log2;
+               if (rpidx == -1)
+                       hw_rd_wide_log2 = pdef.max_rd_wide_log2;
+       }
+       if (cfg.def->width_mode == WidthMode::PerPort) {
+               for (auto cell: cells) {
+                       if (pdef.width_tied) {
+                               cell->setParam(stringf("\\PORT_%s_WIDTH", name), cfg.def->dbits[hw_wr_wide_log2]);
+                       } else {
+                               if (pdef.kind != PortKind::Ar && pdef.kind != PortKind::Sr)
+                                       cell->setParam(stringf("\\PORT_%s_WR_WIDTH", name), cfg.def->dbits[hw_wr_wide_log2]);
+                               if (pdef.kind != PortKind::Sw)
+                                       cell->setParam(stringf("\\PORT_%s_RD_WIDTH", name), cfg.def->dbits[hw_rd_wide_log2]);
+                       }
+               }
+       }
+
+       // Address determination.
+       SigSpec hw_addr;
+       for (int x: hw_addr_swizzle) {
+               if (x == -1 || x >= GetSize(addr))
+                       hw_addr.append(State::S0);
+               else
+                       hw_addr.append(addr[x]);
+       }
+       for (int i = 0; i < hw_wr_wide_log2 && i < hw_rd_wide_log2; i++)
+               hw_addr[i] = State::S0;
+       for (auto cell: cells)
+               cell->setPort(stringf("\\PORT_%s_ADDR", name), hw_addr);
+
+       // Write part.
+       if (pdef.kind != PortKind::Ar && pdef.kind != PortKind::Sr) {
+               int width = cfg.def->dbits[hw_wr_wide_log2];
+               int effective_byte = cfg.def->byte;
+               if (effective_byte == 0 || effective_byte > width)
+                       effective_byte = width;
+               if (wpidx != -1) {
+                       auto &wport = mem.wr_ports[wpidx];
+                       Swizzle port_swz = gen_swizzle(mem, cfg, wport.wide_log2, hw_wr_wide_log2);
+                       std::vector<SigSpec> big_wren = generate_demux(mem, wpidx, port_swz);
+                       for (int rd = 0; rd < cfg.repl_d; rd++) {
+                               auto cell = cells[rd];
+                               SigSpec hw_wdata;
+                               SigSpec hw_wren;
+                               for (auto &bit : port_swz.bits[rd]) {
+                                       if (!bit.valid) {
+                                               hw_wdata.append(State::Sx);
+                                       } else {
+                                               hw_wdata.append(wport.data[bit.bit]);
+                                       }
+                               }
+                               for (int i = 0; i < GetSize(port_swz.bits[rd]); i += effective_byte) {
+                                       auto &bit = port_swz.bits[rd][i];
+                                       if (!bit.valid) {
+                                               hw_wren.append(State::S0);
+                                       } else {
+                                               hw_wren.append(big_wren[bit.mux_idx][bit.bit]);
+                                       }
+                               }
+                               cell->setPort(stringf("\\PORT_%s_WR_DATA", name), hw_wdata);
+                               if (pdef.wrbe_separate) {
+                                       // TODO make some use of it
+                                       SigSpec en = mem.module->ReduceOr(NEW_ID, hw_wren);
+                                       cell->setPort(stringf("\\PORT_%s_WR_EN", name), en);
+                                       cell->setPort(stringf("\\PORT_%s_WR_BE", name), hw_wren);
+                                       if (cfg.def->width_mode != WidthMode::Single)
+                                               cell->setParam(stringf("\\PORT_%s_WR_BE_WIDTH", name), GetSize(hw_wren));
+                               } else {
+                                       cell->setPort(stringf("\\PORT_%s_WR_EN", name), hw_wren);
+                                       if (cfg.def->byte != 0 && cfg.def->width_mode != WidthMode::Single)
+                                               cell->setParam(stringf("\\PORT_%s_WR_EN_WIDTH", name), GetSize(hw_wren));
+                               }
+                       }
+               } else {
+                       for (auto cell: cells) {
+                               cell->setPort(stringf("\\PORT_%s_WR_DATA", name), Const(State::Sx, width));
+                               SigSpec hw_wren = Const(State::S0, width / effective_byte);
+                               if (pdef.wrbe_separate) {
+                                       cell->setPort(stringf("\\PORT_%s_WR_EN", name), State::S0);
+                                       cell->setPort(stringf("\\PORT_%s_WR_BE", name), hw_wren);
+                                       cell->setParam(stringf("\\PORT_%s_WR_BE_WIDTH", name), GetSize(hw_wren));
+                               } else {
+                                       cell->setPort(stringf("\\PORT_%s_WR_EN", name), hw_wren);
+                                       if (cfg.def->byte != 0)
+                                               cell->setParam(stringf("\\PORT_%s_WR_EN_WIDTH", name), GetSize(hw_wren));
+                               }
+                       }
+               }
+       }
+
+       // Read part.
+       if (pdef.kind != PortKind::Sw) {
+               int width = cfg.def->dbits[hw_rd_wide_log2];
+               if (rpidx != -1) {
+                       auto &rport = mem.rd_ports[rpidx];
+                       auto &rpcfg = cfg.rd_ports[rpidx];
+                       Swizzle port_swz = gen_swizzle(mem, cfg, rport.wide_log2, hw_rd_wide_log2);
+                       std::vector<SigSpec> big_rdata = generate_mux(mem, rpidx, port_swz);
+                       for (int rd = 0; rd < cfg.repl_d; rd++) {
+                               auto cell = cells[rd];
+                               if (pdef.kind == PortKind::Sr || pdef.kind == PortKind::Srsw) {
+                                       if (pdef.rd_en)
+                                               cell->setPort(stringf("\\PORT_%s_RD_EN", name), rpcfg.rd_en_to_clk_en ? State::S1 : rport.en);
+                                       if (pdef.rdarstval != ResetValKind::None)
+                                               cell->setPort(stringf("\\PORT_%s_RD_ARST", name), rport.arst);
+                                       if (pdef.rdsrstval != ResetValKind::None)
+                                               cell->setPort(stringf("\\PORT_%s_RD_SRST", name), rport.srst);
+                                       if (pdef.rdinitval == ResetValKind::Any || pdef.rdinitval == ResetValKind::NoUndef) {
+                                               Const val = rport.init_value;
+                                               if (pdef.rdarstval == ResetValKind::Init && rport.arst != State::S0) {
+                                                       log_assert(val.is_fully_undef() || val == rport.arst_value);
+                                                       val = rport.arst_value;
+                                               }
+                                               if (pdef.rdsrstval == ResetValKind::Init && rport.srst != State::S0) {
+                                                       log_assert(val.is_fully_undef() || val == rport.srst_value);
+                                                       val = rport.srst_value;
+                                               }
+                                               std::vector<State> hw_val;
+                                               for (auto &bit : port_swz.bits[rd]) {
+                                                       if (!bit.valid) {
+                                                               hw_val.push_back(State::Sx);
+                                                       } else {
+                                                               hw_val.push_back(val.bits[bit.bit]);
+                                                       }
+                                               }
+                                               if (pdef.rdinitval == ResetValKind::NoUndef)
+                                                       clean_undef(hw_val);
+                                               cell->setParam(stringf("\\PORT_%s_RD_INIT_VALUE", name), hw_val);
+                                       }
+                                       if (pdef.rdarstval == ResetValKind::Any || pdef.rdarstval == ResetValKind::NoUndef) {
+                                               std::vector<State> hw_val;
+                                               for (auto &bit : port_swz.bits[rd]) {
+                                                       if (!bit.valid) {
+                                                               hw_val.push_back(State::Sx);
+                                                       } else {
+                                                               hw_val.push_back(rport.arst_value.bits[bit.bit]);
+                                                       }
+                                               }
+                                               if (pdef.rdarstval == ResetValKind::NoUndef)
+                                                       clean_undef(hw_val);
+                                               cell->setParam(stringf("\\PORT_%s_RD_ARST_VALUE", name), hw_val);
+                                       }
+                                       if (pdef.rdsrstval == ResetValKind::Any || pdef.rdsrstval == ResetValKind::NoUndef) {
+                                               std::vector<State> hw_val;
+                                               for (auto &bit : port_swz.bits[rd]) {
+                                                       if (!bit.valid) {
+                                                               hw_val.push_back(State::Sx);
+                                                       } else {
+                                                               hw_val.push_back(rport.srst_value.bits[bit.bit]);
+                                                       }
+                                               }
+                                               if (pdef.rdsrstval == ResetValKind::NoUndef)
+                                                       clean_undef(hw_val);
+                                               cell->setParam(stringf("\\PORT_%s_RD_SRST_VALUE", name), hw_val);
+                                       }
+                               }
+                               SigSpec hw_rdata = mem.module->addWire(NEW_ID, width);
+                               cell->setPort(stringf("\\PORT_%s_RD_DATA", name), hw_rdata);
+                               SigSpec lhs;
+                               SigSpec rhs;
+                               for (int i = 0; i < GetSize(hw_rdata); i++) {
+                                       auto &bit = port_swz.bits[rd][i];
+                                       if (bit.valid) {
+                                               lhs.append(big_rdata[bit.mux_idx][bit.bit]);
+                                               rhs.append(hw_rdata[i]);
+                                       }
+                               }
+                               mem.module->connect(lhs, rhs);
+                       }
+               } else {
+                       for (auto cell: cells) {
+                               if (pdef.kind == PortKind::Sr || pdef.kind == PortKind::Srsw) {
+                                       if (pdef.rd_en)
+                                               cell->setPort(stringf("\\PORT_%s_RD_EN", name), State::S0);
+                                       if (pdef.rdarstval != ResetValKind::None)
+                                               cell->setPort(stringf("\\PORT_%s_RD_ARST", name), State::S0);
+                                       if (pdef.rdsrstval != ResetValKind::None)
+                                               cell->setPort(stringf("\\PORT_%s_RD_SRST", name), State::S0);
+                                       if (pdef.rdinitval == ResetValKind::Any)
+                                               cell->setParam(stringf("\\PORT_%s_RD_INIT_VALUE", name), Const(State::Sx, width));
+                                       else if (pdef.rdinitval == ResetValKind::NoUndef)
+                                               cell->setParam(stringf("\\PORT_%s_RD_INIT_VALUE", name), Const(State::S0, width));
+                                       if (pdef.rdarstval == ResetValKind::Any)
+                                               cell->setParam(stringf("\\PORT_%s_RD_ARST_VALUE", name), Const(State::Sx, width));
+                                       else if (pdef.rdarstval == ResetValKind::NoUndef)
+                                               cell->setParam(stringf("\\PORT_%s_RD_ARST_VALUE", name), Const(State::S0, width));
+                                       if (pdef.rdsrstval == ResetValKind::Any)
+                                               cell->setParam(stringf("\\PORT_%s_RD_SRST_VALUE", name), Const(State::Sx, width));
+                                       else if (pdef.rdsrstval == ResetValKind::NoUndef)
+                                               cell->setParam(stringf("\\PORT_%s_RD_SRST_VALUE", name), Const(State::S0, width));
+                               }
+                               SigSpec hw_rdata = mem.module->addWire(NEW_ID, width);
+                               cell->setPort(stringf("\\PORT_%s_RD_DATA", name), hw_rdata);
+                       }
+               }
+       }
+}
+
+void MemMapping::emit(const MemConfig &cfg) {
+       log("mapping memory %s.%s via %s\n", log_id(mem.module->name), log_id(mem.memid), log_id(cfg.def->id));
+       // First, handle emulations.
+       if (cfg.emu_read_first)
+               mem.emulate_read_first(&worker.initvals);
+       for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) {
+               auto &pcfg = cfg.rd_ports[pidx];
+               auto &port = mem.rd_ports[pidx];
+               if (pcfg.emu_sync)
+                       mem.extract_rdff(pidx, &worker.initvals);
+               else if (pcfg.emu_en)
+                       mem.emulate_rden(pidx, &worker.initvals);
+               else {
+                       if (pcfg.emu_srst_en_prio) {
+                               if (port.ce_over_srst)
+                                       mem.emulate_rd_ce_over_srst(pidx);
+                               else
+                                       mem.emulate_rd_srst_over_ce(pidx);
+                       }
+                       mem.emulate_reset(pidx, pcfg.emu_init, pcfg.emu_arst, pcfg.emu_srst, &worker.initvals);
+               }
+       }
+       for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) {
+               auto &pcfg = cfg.wr_ports[pidx];
+               for (int opidx: pcfg.emu_prio) {
+                       mem.emulate_priority(opidx, pidx, &worker.initvals);
+               }
+       }
+       for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) {
+               auto &port = mem.rd_ports[pidx];
+               auto &pcfg = cfg.rd_ports[pidx];
+               for (int opidx: pcfg.emu_trans) {
+                       // The port may no longer be transparent due to transparency being
+                       // nuked as part of emu_sync or emu_prio.
+                       if (port.transparency_mask[opidx])
+                               mem.emulate_transparency(opidx, pidx, &worker.initvals);
+               }
+       }
+
+       // tgt (repl, port group, port) -> mem (wr port, rd port), where -1 means no port.
+       std::vector<std::vector<std::vector<std::pair<int, int>>>> ports(cfg.repl_port);
+       for (int i = 0; i < cfg.repl_port; i++)
+               ports[i].resize(cfg.def->port_groups.size());
+       for (int i = 0; i < GetSize(cfg.wr_ports); i++) {
+               auto &pcfg = cfg.wr_ports[i];
+               for (int j = 0; j < cfg.repl_port; j++) {
+                       if (j == 0) {
+                               ports[j][pcfg.port_group].push_back({i, pcfg.rd_port});
+                       } else {
+                               ports[j][pcfg.port_group].push_back({i, -1});
+                       }
+               }
+       }
+       for (int i = 0; i < GetSize(cfg.rd_ports); i++) {
+               auto &pcfg = cfg.rd_ports[i];
+               if (pcfg.wr_port != -1)
+                       continue;
+               auto &pg = cfg.def->port_groups[pcfg.port_group];
+               int j = 0;
+               while (GetSize(ports[j][pcfg.port_group]) >= GetSize(pg.names))
+                       j++;
+               ports[j][pcfg.port_group].push_back({-1, i});
+       }
+
+       Swizzle init_swz = gen_swizzle(mem, cfg, 0, GetSize(cfg.def->dbits) - 1);
+       Const init_data = mem.get_init_data();
+
+       std::vector<int> hw_addr_swizzle;
+       for (int i = 0; i < cfg.base_width_log2; i++)
+               hw_addr_swizzle.push_back(-1);
+       for (int i = 0; i < init_swz.addr_shift; i++)
+               if (!(cfg.emu_wide_mask & 1 << i))
+                       hw_addr_swizzle.push_back(i);
+       log_assert(GetSize(hw_addr_swizzle) == cfg.def->abits);
+
+       for (int rp = 0; rp < cfg.repl_port; rp++) {
+               std::vector<Cell *> cells;
+               for (int rd = 0; rd < cfg.repl_d; rd++) {
+                       Cell *cell = mem.module->addCell(stringf("%s.%d.%d", mem.memid.c_str(), rp, rd), cfg.def->id);
+                       if (cfg.def->width_mode == WidthMode::Global)
+                               cell->setParam(ID::WIDTH, cfg.def->dbits[cfg.base_width_log2]);
+                       if (cfg.def->widthscale) {
+                               std::vector<State> val;
+                               for (auto &bit: init_swz.bits[rd])
+                                       val.push_back(bit.valid ? State::S1 : State::S0);
+                               cell->setParam(ID::BITS_USED, val);
+                       }
+                       for (auto &it: cfg.def->options)
+                               cell->setParam(stringf("\\OPTION_%s", it.first.c_str()), it.second);
+                       for (int i = 0; i < GetSize(cfg.def->shared_clocks); i++) {
+                               auto &cdef = cfg.def->shared_clocks[i];
+                               auto &ccfg = cfg.shared_clocks[i];
+                               if (cdef.anyedge) {
+                                       cell->setParam(stringf("\\CLK_%s_POL", cdef.name.c_str()), ccfg.used ? ccfg.polarity : true);
+                                       cell->setPort(stringf("\\CLK_%s", cdef.name.c_str()), ccfg.used ? ccfg.clk : State::S0);
+                               } else {
+                                       SigSpec sig = ccfg.used ? ccfg.clk : State::S0;
+                                       if (ccfg.used && ccfg.invert)
+                                               sig = mem.module->Not(NEW_ID, sig);
+                                       cell->setPort(stringf("\\CLK_%s", cdef.name.c_str()), sig);
+                               }
+                       }
+                       if (cfg.def->init == MemoryInitKind::Any || cfg.def->init == MemoryInitKind::NoUndef) {
+                               std::vector<State> initval;
+                               for (int hwa = 0; hwa < (1 << cfg.def->abits); hwa += 1 << (GetSize(cfg.def->dbits) - 1)) {
+                                       for (auto &bit: init_swz.bits[rd]) {
+                                               if (!bit.valid) {
+                                                       initval.push_back(State::Sx);
+                                               } else {
+                                                       int addr = bit.addr;
+                                                       for (int i = GetSize(cfg.def->dbits) - 1; i < cfg.def->abits; i++)
+                                                               if (hwa & 1 << i)
+                                                                       addr += 1 << hw_addr_swizzle[i];
+                                                       if (addr >= mem.start_offset && addr < mem.start_offset + mem.size)
+                                                               initval.push_back(init_data.bits[(addr - mem.start_offset) * mem.width + bit.bit]);
+                                                       else
+                                                               initval.push_back(State::Sx);
+                                               }
+                                       }
+                               }
+                               if (cfg.def->init == MemoryInitKind::NoUndef)
+                                       clean_undef(initval);
+                               cell->setParam(ID::INIT, initval);
+                       }
+                       cells.push_back(cell);
+               }
+               for (int pgi = 0; pgi < GetSize(cfg.def->port_groups); pgi++) {
+                       auto &pg = cfg.def->port_groups[pgi];
+                       for (int pi = 0; pi < GetSize(pg.names); pi++) {
+                               bool used = pi < GetSize(ports[rp][pgi]);
+                               bool used_r = false;
+                               bool used_w = false;
+                               if (used) {
+                                       auto &pd = ports[rp][pgi][pi];
+                                       const PortVariant *pdef;
+                                       if (pd.first != -1)
+                                               pdef = cfg.wr_ports[pd.first].def;
+                                       else
+                                               pdef = cfg.rd_ports[pd.second].def;
+                                       used_w = pd.first != -1;
+                                       used_r = pd.second != -1;
+                                       emit_port(cfg, cells, *pdef, pg.names[pi].c_str(), pd.first, pd.second, hw_addr_swizzle);
+                               } else {
+                                       emit_port(cfg, cells, pg.variants[0], pg.names[pi].c_str(), -1, -1, hw_addr_swizzle);
+                               }
+                               if (pg.optional)
+                                       for (auto cell: cells)
+                                               cell->setParam(stringf("\\PORT_%s_USED", pg.names[pi].c_str()), used);
+                               if (pg.optional_rw)
+                                       for (auto cell: cells) {
+                                               cell->setParam(stringf("\\PORT_%s_RD_USED", pg.names[pi].c_str()), used_r);
+                                               cell->setParam(stringf("\\PORT_%s_WR_USED", pg.names[pi].c_str()), used_w);
+                                       }
+                       }
+               }
+       }
+       mem.remove();
+}
+
+struct MemoryLibMapPass : public Pass {
+       MemoryLibMapPass() : Pass("memory_libmap", "map memories to cells") { }
+       void help() override
+       {
+               //   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+               log("\n");
+               log("    memory_libmap -lib <library_file> [-D <condition>] [selection]\n");
+               log("\n");
+               log("This pass takes a description of available RAM cell types and maps\n");
+               log("all selected memories to one of them, or leaves them  to be mapped to FFs.\n");
+               log("\n");
+               log("  -lib <library_file>\n");
+               log("    Selects a library file containing RAM cell definitions. This option\n");
+               log("    can be passed more than once to select multiple libraries.\n");
+               log("    See passes/memory/memlib.md for description of the library format.\n");
+               log("\n");
+               log("  -D <condition>\n");
+               log("    Enables a condition that can be checked within the library file\n");
+               log("    to eg. select between slightly different hardware variants.\n");
+               log("    This option can be passed any number of times.\n");
+               log("\n");
+               log("  -logic-cost-rom <num>\n");
+               log("  -logic-cost-ram <num>\n");
+               log("    Sets the cost of a single bit for memory lowered to soft logic.\n");
+               log("\n");
+               log("  -no-auto-distributed\n");
+               log("  -no-auto-block\n");
+               log("  -no-auto-huge\n");
+               log("    Disables automatic mapping of given kind of RAMs.  Manual mapping\n");
+               log("    (using ram_style or other attributes) is still supported.\n");
+               log("\n");
+       }
+       void execute(std::vector<std::string> args, RTLIL::Design *design) override
+       {
+               std::vector<std::string> lib_files;
+               pool<std::string> defines;
+               PassOptions opts;
+               opts.no_auto_distributed = false;
+               opts.no_auto_block = false;
+               opts.no_auto_huge = false;
+               opts.logic_cost_ram = 1.0;
+               opts.logic_cost_rom = 1.0/16.0;
+               log_header(design, "Executing MEMORY_LIBMAP pass (mapping memories to cells).\n");
+
+               size_t argidx;
+               for (argidx = 1; argidx < args.size(); argidx++) {
+                       if (args[argidx] == "-lib" && argidx+1 < args.size()) {
+                               lib_files.push_back(args[++argidx]);
+                               continue;
+                       }
+                       if (args[argidx] == "-D" && argidx+1 < args.size()) {
+                               defines.insert(args[++argidx]);
+                               continue;
+                       }
+                       if (args[argidx] == "-no-auto-distributed") {
+                               opts.no_auto_distributed = true;
+                               continue;
+                       }
+                       if (args[argidx] == "-no-auto-block") {
+                               opts.no_auto_block = true;
+                               continue;
+                       }
+                       if (args[argidx] == "-no-auto-huge") {
+                               opts.no_auto_huge = true;
+                               continue;
+                       }
+                       if (args[argidx] == "-logic-cost-rom" && argidx+1 < args.size()) {
+                               opts.logic_cost_rom = strtod(args[++argidx].c_str(), nullptr);
+                               continue;
+                       }
+                       if (args[argidx] == "-logic-cost-ram" && argidx+1 < args.size()) {
+                               opts.logic_cost_ram = strtod(args[++argidx].c_str(), nullptr);
+                               continue;
+                       }
+                       break;
+               }
+               extra_args(args, argidx, design);
+
+               Library lib = parse_library(lib_files, defines);
+
+               for (auto module : design->selected_modules()) {
+                       MapWorker worker(module);
+                       auto mems = Mem::get_selected_memories(module);
+                       for (auto &mem : mems)
+                       {
+                               MemMapping map(worker, mem, lib, opts);
+                               int idx = -1;
+                               int best = map.logic_cost;
+                               if (!map.logic_ok) {
+                                       if (map.cfgs.empty())
+                                               log_error("no valid mapping found for memory %s.%s\n", log_id(module->name), log_id(mem.memid));
+                                       idx = 0;
+                                       best = map.cfgs[0].cost;
+                               }
+                               for (int i = 0; i < GetSize(map.cfgs); i++) {
+                                       if (map.cfgs[i].cost < best) {
+                                               idx = i;
+                                               best = map.cfgs[i].cost;
+                                       }
+                               }
+                               if (idx == -1) {
+                                       log("using FF mapping for memory %s.%s\n", log_id(module->name), log_id(mem.memid));
+                               } else {
+                                       map.emit(map.cfgs[idx]);
+                               }
+                       }
+               }
+       }
+} MemoryLibMapPass;
+
+PRIVATE_NAMESPACE_END