bfd/merge.c

   1 /* SEC_MERGE support.
   2    Copyright (C) 2001-2023 Free Software Foundation, Inc.
   3    Written by Jakub Jelinek <jakub@redhat.com>.
   4
   5    This file is part of BFD, the Binary File Descriptor library.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  20    MA 02110-1301, USA.  */
  21
  22
  23 /* This file contains support for merging duplicate entities within sections,
  24    as used in ELF SHF_MERGE.  */
  25
  26 #include "sysdep.h"
  27 #include <limits.h>
  28 #include "bfd.h"
  29 #include "elf-bfd.h"
  30 #include "libbfd.h"
  31 #include "objalloc.h"
  32 #include "libiberty.h"
  33
  34 /* We partition all mergable input sections into sets of similar
  35    characteristics.  These sets are the unit of merging.  All content
  36    of the input sections is scanned and inserted into a hash table.
  37    We also remember an input-offset to entry mapping per input section, but
  38    the content itself is removed.  After everything is read in we assign
  39    output offsets to all hash entries, and when relocations are processed we
  40    lookup the given input offset per input-section, get the matching entry
  41    and its output offset (possibly adjusted for offset pointing into the
  42    middle of an entry).
  43
  44    The input-offset-to-entry mapping (in map_ofs/map) is sorted, so in principle
  45    we could binary search it, but that's not cache-friendly and it's faster
  46    to add another lookup structure that gets us very near the correct
  47    entry in just one step (that's what ofstolowbound is for) and do a linear
  48    search from there.  */
  49
  50 /* An entry in the section merge hash table.  */
  51
  52 struct sec_merge_hash_entry
  53 {
  54   /* Length of this entry.  This includes the zero terminator.  */
  55   unsigned int len;
  56   /* Start of this string needs to be aligned to
  57      alignment octets (not 1 << align).  */
  58   unsigned int alignment;
  59   union
  60   {
  61     /* Index within the merged section.  */
  62     bfd_size_type index;
  63     /* Entry this is a suffix of (if alignment is 0).  */
  64     struct sec_merge_hash_entry *suffix;
  65   } u;
  66   /* Next entity in the hash table (in order of entering).  */
  67   struct sec_merge_hash_entry *next;
  68   char str[1];
  69 };
  70
  71 /* The section merge hash table.  */
  72
  73 struct sec_merge_hash
  74 {
  75   struct bfd_hash_table table;
  76   /* Next available index.  */
  77   bfd_size_type size;
  78   /* First entity in the SEC_MERGE sections of this type.  */
  79   struct sec_merge_hash_entry *first;
  80   /* Last entity in the SEC_MERGE sections of this type.  */
  81   struct sec_merge_hash_entry *last;
  82   /* Entity size.  */
  83   unsigned int entsize;
  84   /* Are entries fixed size or zero terminated strings?  */
  85   bool strings;
  86   /* struct-of-array variant of all entries in the hash-table: */
  87   unsigned int nbuckets;
  88   /* We keep hash-code and length of entry together in a separate
  89      array in such a way that it can be checked with just a single memory
  90      reference.  In this way we don't need indirect access to the entries
  91      in the normal case.  keys_lens[i] is 'hashcode << 32) | len' for entry
  92      i (which is pointed to be values[i]).  */
  93   uint64_t *key_lens;
  94   struct sec_merge_hash_entry **values;
  95 };
  96
  97 /* True when given NEWCOUNT and NBUCKETS indicate that the hash table needs
  98    resizing.  */
  99 #define NEEDS_RESIZE(newcount, nbuckets) ((newcount) > (nbuckets) / 3 * 2)
 100
 101 struct sec_merge_sec_info;
 102
 103 /* Information per merged blob.  This is the unit of merging and is
 104    related to (multiple) input sections of similar characteristics
 105    (alignment, entity size, strings or blobs).  */
 106 struct sec_merge_info
 107 {
 108   /* Chain of sec_merge_infos.  */
 109   struct sec_merge_info *next;
 110   /* Chain of sec_merge_sec_infos.  This first one will be the representative
 111      section that conceptually collects all merged content.  */
 112   struct sec_merge_sec_info *chain;
 113   struct sec_merge_sec_info **last;
 114   /* A hash table used to hold section content.  */
 115   struct sec_merge_hash *htab;
 116 };
 117
 118 /* Offset into input mergable sections are represented by this type.
 119    Note how doesn't support crazy large mergable sections.  */
 120 typedef uint32_t mapofs_type;
 121
 122 /* Given a sec_merge_sec_info S this gives the input offset of the IDX's
 123    recorded entry.  */
 124 #define MAP_OFS(S,IDX) (S)->map_ofs[IDX]
 125 /* And this gives the output offset (in the merged blob representing
 126    this S.  */
 127 #define MAP_IDX(S,IDX) (S)->map[IDX].idx
 128 /* For quick lookup of output offset given an input offset we store
 129    an array mapping intput-offset / OFSDIV to entry index.
 130    16 is better than 8, 32 is roughly same as 16, but uses less memory, so
 131    we use that. */
 132 #define OFSDIV 32
 133
 134 /* Information per input merge section.  */
 135 struct sec_merge_sec_info
 136 {
 137   /* Chain of sec_merge_sec_infos.  */
 138   struct sec_merge_sec_info *next;
 139   /* The corresponding section.  */
 140   asection *sec;
 141   /* Pointer to merge_info pointing to us.  */
 142   void **psecinfo;
 143   /* The merge entity this is a part of.  */
 144   struct sec_merge_info *sinfo;
 145   /* The section associated with sinfo (i.e. the representative section).
 146      Same as sinfo->chain->sec, but faster to access in the hot function.  */
 147   asection *reprsec;
 148   /* First string in this section.  */
 149   struct sec_merge_hash_entry *first_str;
 150   /* Sparse mapping from input offset to entry covering that offset:  */
 151   unsigned int noffsetmap;  /* Number of these mappings.  */
 152   mapofs_type *map_ofs;     /* Input offset.  */
 153   union {
 154       struct sec_merge_hash_entry *entry;  /* Covering hash entry ... */
 155       bfd_size_type idx;                   /* ... or destination offset.  */
 156   } *map;
 157   /* Quick access: index into map_ofs[].  ofstolowbound[o / OFSDIV]=I is
 158      such that map_ofs[I] is the smallest offset higher that
 159      rounddown(o, OFSDIV) (and hence I-1 is the largest entry whose offset is
 160      smaller or equal to o/OFSDIV*OFSDIV).  */
 161   unsigned int *ofstolowbound;
 162   int fast_state;
 163 };
 164
 165
 166 /* Given a merge hash table TABLE and a number of entries to be
 167    ADDED, possibly resize the table for this to fit without further
 168    resizing.  */
 169
 170 static bool
 171 sec_merge_maybe_resize (struct sec_merge_hash *table, unsigned added)
 172 {
 173   struct bfd_hash_table *bfdtab = &table->table;
 174   if (NEEDS_RESIZE (bfdtab->count + added, table->nbuckets))
 175     {
 176       unsigned i;
 177       unsigned long newnb = table->nbuckets * 2;
 178       struct sec_merge_hash_entry **newv;
 179       uint64_t *newl;
 180       unsigned long alloc;
 181
 182       while (NEEDS_RESIZE (bfdtab->count + added, newnb))
 183         {
 184           newnb *= 2;
 185           if (!newnb)
 186             return false;
 187         }
 188
 189       alloc = newnb * sizeof (newl[0]);
 190       if (alloc / sizeof (newl[0]) != newnb)
 191         return false;
 192       newl = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
 193       if (newl == NULL)
 194         return false;
 195       memset (newl, 0, alloc);
 196       alloc = newnb * sizeof (newv[0]);
 197       if (alloc / sizeof (newv[0]) != newnb)
 198         return false;
 199       newv = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
 200       if (newv == NULL)
 201         return false;
 202       memset (newv, 0, alloc);
 203
 204       for (i = 0; i < table->nbuckets; i++)
 205         {
 206           struct sec_merge_hash_entry *v = table->values[i];
 207           if (v)
 208             {
 209               uint32_t thishash = table->key_lens[i] >> 32;
 210               unsigned idx = thishash & (newnb - 1);
 211               while (newv[idx])
 212                 idx = (idx + 1) & (newnb - 1);
 213               newl[idx] = table->key_lens[i];
 214               newv[idx] = v;
 215             }
 216         }
 217
 218       table->key_lens = newl;
 219       table->values = newv;
 220       table->nbuckets = newnb;
 221     }
 222   return true;
 223 }
 224
 225 /* Insert STRING (actually a byte blob of length LEN, with pre-computed
 226    HASH and bucket _INDEX) into our hash TABLE.  */
 227
 228 static struct sec_merge_hash_entry *
 229 sec_merge_hash_insert (struct sec_merge_hash *table,
 230                  const char *string,
 231                  uint64_t hash, unsigned int len, unsigned int _index)
 232 {
 233   struct bfd_hash_table *bfdtab = &table->table;
 234   struct sec_merge_hash_entry *hashp;
 235
 236   hashp = (struct sec_merge_hash_entry *)
 237       bfd_hash_allocate (bfdtab, len + sizeof (struct sec_merge_hash_entry));
 238   if (hashp == NULL)
 239     return NULL;
 240
 241   memcpy (hashp->str, string, len);
 242   hashp->len = len;
 243   hashp->alignment = 0;
 244   hashp->u.suffix = NULL;
 245   hashp->next = NULL;
 246   // We must not need resizing, otherwise the estimation was wrong
 247   BFD_ASSERT (!NEEDS_RESIZE (bfdtab->count + 1, table->nbuckets));
 248   bfdtab->count++;
 249   table->key_lens[_index] = (hash << 32) | (uint32_t)len;
 250   table->values[_index] = hashp;
 251
 252   return hashp;
 253 }
 254
 255 /* Read four bytes from *STR, interpret it as 32bit unsigned little
 256    endian value and return that.  */
 257
 258 static inline uint32_t
 259 hash_read32 (const char *str)
 260 {
 261   uint32_t i;
 262   /* All reasonable compilers will inline this memcpy and generate optimal
 263      code on architectures that support unaligned (4-byte) accesses.  */
 264   memcpy(&i, str, 4);
 265 #ifdef WORDS_BIGENDIAN
 266   i = (i << 24) | ((i & 0xff00) << 8) | ((i >> 8) & 0xff00) | (i >> 24);
 267 #endif
 268   return i;
 269 }
 270
 271 /* Calculate and return a hashvalue of the bytes at STR[0..LEN-1].
 272    All non-zero lengths and all alignments are supported.
 273
 274    This is somewhat similar to xxh3 (of xxhash), but restricted to 32bit.
 275    On cc1 strings this has quite similar statistic properties, and we
 276    don't need to jump through hoops to get fast 64x64->128 mults,
 277    or 64bit arith on 32 bit hosts.  We also don't care for seeds
 278    or secrets.  They improve mixing very little.  */
 279
 280 static uint32_t
 281 hash_blob (const char *str, unsigned int len)
 282 {
 283   uint32_t ret = 0;
 284   uint32_t mul = (1 << 0) +  (1 << 2) + (1 << 3) + (1 << 5) + (1 << 7);
 285   mul += (1 << 11) + (1 << 13) + (1 << 17) + (0 << 19) + (1 << 23) + (1 << 29);
 286   mul += (1u << 31);
 287   if (len >= 8)
 288     {
 289       uint32_t acc = len * 0x9e3779b1;
 290       while (len >= 8)
 291         {
 292           uint32_t i1 = hash_read32  (str) ^ (0x396cfeb8 + 1*len);
 293           uint32_t i2 = hash_read32  (str + 4) ^ (0xbe4ba423 + 1*len);
 294           str += 8;
 295           len -= 8;
 296           uint64_t m = (uint64_t)i1 * i2;
 297           acc += (uint32_t)m ^ (uint32_t)(m >> 32);
 298         }
 299       acc = acc ^ (acc >> 7);
 300       uint64_t r = (uint64_t)mul * acc;
 301       ret = (uint32_t)r ^ (uint32_t)(r >> 32);
 302       if (len == 0)
 303         goto end;
 304     }
 305   if (len >= 4)
 306     {
 307       uint32_t i1 = hash_read32  (str);
 308       uint32_t i2 = hash_read32  (str + len - 4);
 309       i1 = ((i1 + len) ^ (i1 >> 7));
 310       i2 = i2 ^ (i2 >> 7);
 311       uint64_t r = (uint64_t)mul * i1 + i2;
 312       ret += r ^ (r >> 32);
 313     }
 314   else
 315     {
 316       /* Cleverly read in 1 to 3 bytes without further conditionals.  */
 317       unsigned char c1 = str[0];
 318       unsigned char c2 = str[len >> 1];
 319       unsigned char c3 = str[len - 1];
 320       uint32_t i1 = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24)
 321                      | ((uint32_t) c3) | (len << 8);
 322       i1 = i1 ^ (i1 >> 7);
 323       uint64_t r = (uint64_t)mul * i1;
 324       ret += r ^ (r >> 32);
 325     }
 326 end:
 327   return ret;
 328 }
 329
 330 /* Given a hash TABLE, return the hash of STRING (a blob described
 331    according to info in TABLE, either a character string, or some fixed
 332    size entity) and set *PLEN to the length of this blob.  */
 333
 334 static uint32_t
 335 hashit (struct sec_merge_hash *table, const char *string, unsigned int *plen)
 336 {
 337   const unsigned char *s;
 338   uint32_t hash;
 339   unsigned int len, i;
 340
 341   s = (const unsigned char *) string;
 342   if (table->strings)
 343     {
 344       if (table->entsize == 1)
 345         len = strlen (string) + 1;
 346       else
 347         {
 348           len = 0;
 349           for (;;)
 350             {
 351               for (i = 0; i < table->entsize; ++i)
 352                 if (s[i] != '\0')
 353                   break;
 354               if (i == table->entsize)
 355                 break;
 356               s += table->entsize;
 357               ++len;
 358             }
 359           len *= table->entsize;
 360           len += table->entsize;
 361         }
 362     }
 363   else
 364     len = table->entsize;
 365   hash = hash_blob (string, len);
 366   *plen = len;
 367   return hash;
 368 }
 369
 370 /* Lookup or insert a blob STRING (of length LEN, precomputed HASH and
 371    input ALIGNMENT) into TABLE.  Return the found or new hash table entry.  */
 372
 373 static struct sec_merge_hash_entry *
 374 sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
 375                        unsigned int len, uint64_t hash,
 376                        unsigned int alignment)
 377 {
 378   struct sec_merge_hash_entry *hashp;
 379   unsigned int _index;
 380
 381   /*printf ("YYY insert 0x%x into %u buckets (%s)\n",
 382           (unsigned)hash, (unsigned)table->nbuckets, string);*/
 383   uint64_t *key_lens = table->key_lens;
 384   struct sec_merge_hash_entry **values = table->values;
 385   uint64_t hlen = (hash << 32) | (uint32_t)len;
 386   unsigned int nbuckets = table->nbuckets;
 387   _index = hash & (nbuckets - 1);
 388   while (1)
 389     {
 390       uint64_t candlen = key_lens[_index];
 391       if (candlen == hlen
 392           && !memcmp (values[_index]->str, string, len))
 393         {
 394           hashp = values[_index];
 395           if (hashp->alignment < alignment)
 396             hashp->alignment = alignment;
 397           return hashp;
 398         }
 399       if (!(candlen & (uint32_t)-1))
 400         break;
 401       _index = (_index + 1) & (nbuckets - 1);
 402     }
 403
 404   hashp = sec_merge_hash_insert (table, string, hash, len, _index);
 405   if (hashp == NULL)
 406     return NULL;
 407   hashp->alignment = alignment;
 408
 409   table->size++;
 410   BFD_ASSERT (table->size == table->table.count);
 411   if (table->first == NULL)
 412     table->first = hashp;
 413   else
 414     table->last->next = hashp;
 415   table->last = hashp;
 416
 417   return hashp;
 418 }
 419
 420 /* Create a new hash table.  */
 421
 422 static struct sec_merge_hash *
 423 sec_merge_init (unsigned int entsize, bool strings)
 424 {
 425   struct sec_merge_hash *table;
 426
 427   table = (struct sec_merge_hash *) bfd_malloc (sizeof (struct sec_merge_hash));
 428   if (table == NULL)
 429     return NULL;
 430
 431   if (! bfd_hash_table_init_n (&table->table, NULL,
 432                                sizeof (struct sec_merge_hash_entry), 0x2000))
 433     {
 434       free (table);
 435       return NULL;
 436     }
 437
 438   table->size = 0;
 439   table->first = NULL;
 440   table->last = NULL;
 441   table->entsize = entsize;
 442   table->strings = strings;
 443
 444   table->nbuckets = 0x2000;
 445   table->key_lens = objalloc_alloc ((struct objalloc *) table->table.memory,
 446                                 table->nbuckets * sizeof (table->key_lens[0]));
 447   memset (table->key_lens, 0, table->nbuckets * sizeof (table->key_lens[0]));
 448   table->values = objalloc_alloc ((struct objalloc *) table->table.memory,
 449                                 table->nbuckets * sizeof (table->values[0]));
 450   memset (table->values, 0, table->nbuckets * sizeof (table->values[0]));
 451
 452   return table;
 453 }
 454
 455 /* Append the tuple of input-offset O corresponding
 456    to hash table ENTRY into SECINFO, such that we later may lookup the
 457    entry just by O.  */
 458
 459 static bool
 460 append_offsetmap (struct sec_merge_sec_info *secinfo,
 461                   mapofs_type o,
 462                   struct sec_merge_hash_entry *entry)
 463 {
 464   if ((secinfo->noffsetmap & 2047) == 0)
 465     {
 466       bfd_size_type amt;
 467       amt = (secinfo->noffsetmap + 2048);
 468       secinfo->map_ofs = bfd_realloc (secinfo->map_ofs,
 469                                       amt * sizeof(secinfo->map_ofs[0]));
 470       if (!secinfo->map_ofs)
 471         return false;
 472       secinfo->map = bfd_realloc (secinfo->map, amt * sizeof(secinfo->map[0]));
 473       if (!secinfo->map)
 474         return false;
 475     }
 476   unsigned int i = secinfo->noffsetmap++;
 477   MAP_OFS(secinfo, i) = o;
 478   secinfo->map[i].entry = entry;
 479   return true;
 480 }
 481
 482 /* Prepare the input-offset-to-entry tables after output offsets are
 483    determined.  */
 484
 485 static void
 486 prepare_offsetmap (struct sec_merge_sec_info *secinfo)
 487 {
 488   unsigned int noffsetmap = secinfo->noffsetmap;
 489   unsigned int i, lbi;
 490   bfd_size_type l, sz, amt;
 491
 492   secinfo->fast_state = 1;
 493
 494   for (i = 0; i < noffsetmap; i++)
 495     MAP_IDX(secinfo, i) = secinfo->map[i].entry->u.index;
 496
 497   sz = secinfo->sec->rawsize;
 498   amt = (sz / OFSDIV + 1) * sizeof (secinfo->ofstolowbound[0]);
 499   secinfo->ofstolowbound = bfd_zmalloc (amt);
 500   if (!secinfo->ofstolowbound)
 501     return;
 502   for (l = lbi = 0; l < sz; l += OFSDIV)
 503     {
 504       /* No need for bounds checking on lbi, as we've added a sentinel that's
 505          larger than any offset.  */
 506       while (MAP_OFS(secinfo, lbi) <= l)
 507         lbi++;
 508       //BFD_ASSERT ((l / OFSDIV) <= (i / OFSDIV));
 509       secinfo->ofstolowbound[l / OFSDIV] = lbi;
 510     }
 511   secinfo->fast_state = 2;
 512 }
 513
 514 static bool
 515 sec_merge_emit (bfd *abfd, struct sec_merge_sec_info *secinfo,
 516                 unsigned char *contents)
 517 {
 518   struct sec_merge_hash_entry *entry = secinfo->first_str;
 519   asection *sec = secinfo->sec;
 520   file_ptr offset = sec->output_offset;
 521   char *pad = NULL;
 522   bfd_size_type off = 0;
 523   unsigned int opb = bfd_octets_per_byte (abfd, sec);
 524   int alignment_power = sec->output_section->alignment_power * opb;
 525   bfd_size_type pad_len;  /* Octets.  */
 526
 527   /* FIXME: If alignment_power is 0 then really we should scan the
 528      entry list for the largest required alignment and use that.  */
 529   pad_len = alignment_power ? ((bfd_size_type) 1 << alignment_power) : 16;
 530
 531   pad = (char *) bfd_zmalloc (pad_len);
 532   if (pad == NULL)
 533     return false;
 534
 535   for (; entry != NULL; entry = entry->next)
 536     {
 537       const char *str;
 538       bfd_size_type len;
 539
 540       if (!entry->len)
 541         continue;
 542       BFD_ASSERT (entry->alignment);
 543       len = -off & (entry->alignment - 1);
 544       if (len != 0)
 545         {
 546           BFD_ASSERT (len <= pad_len);
 547           if (contents)
 548             {
 549               memcpy (contents + offset, pad, len);
 550               offset += len;
 551             }
 552           else if (bfd_write (pad, len, abfd) != len)
 553             goto err;
 554           off += len;
 555         }
 556
 557       str = entry->str;
 558       len = entry->len;
 559
 560       if (contents)
 561         {
 562           memcpy (contents + offset, str, len);
 563           offset += len;
 564         }
 565       else if (bfd_write (str, len, abfd) != len)
 566         goto err;
 567
 568       off += len;
 569     }
 570   BFD_ASSERT (!entry);
 571
 572   /* Trailing alignment needed?  */
 573   off = sec->size - off;
 574   if (1 && off != 0)
 575     {
 576       BFD_ASSERT (off <= pad_len);
 577       if (contents)
 578         memcpy (contents + offset, pad, off);
 579       else if (bfd_write (pad, off, abfd) != off)
 580         goto err;
 581     }
 582
 583   free (pad);
 584   return true;
 585
 586  err:
 587   free (pad);
 588   return false;
 589 }
 590
 591 /* Register a SEC_MERGE section as a candidate for merging.
 592    This function is called for all non-dynamic SEC_MERGE input sections.  */
 593
 594 bool
 595 _bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
 596                         void **psecinfo)
 597 {
 598   struct sec_merge_info *sinfo;
 599   struct sec_merge_sec_info *secinfo;
 600   asection *repr;
 601   unsigned int alignment_power;  /* Octets.  */
 602   unsigned int align;            /* Octets.  */
 603   unsigned int opb = bfd_octets_per_byte (abfd, sec);
 604
 605   if ((abfd->flags & DYNAMIC) != 0
 606       || (sec->flags & SEC_MERGE) == 0)
 607     abort ();
 608
 609   if (sec->size == 0
 610       || (sec->flags & SEC_EXCLUDE) != 0
 611       || sec->entsize == 0)
 612     return true;
 613
 614   if (sec->size % sec->entsize != 0)
 615     return true;
 616
 617   if ((sec->flags & SEC_RELOC) != 0)
 618     {
 619       /* We aren't prepared to handle relocations in merged sections.  */
 620       return true;
 621     }
 622
 623   if (sec->size > (mapofs_type)-1)
 624     {
 625       /* Input offsets must be representable by mapofs_type.  */
 626       return true;
 627     }
 628
 629 #ifndef CHAR_BIT
 630 #define CHAR_BIT 8
 631 #endif
 632   alignment_power = sec->alignment_power * opb;
 633   if (alignment_power >= sizeof (align) * CHAR_BIT)
 634     return true;
 635
 636   align = 1u << alignment_power;
 637   if ((sec->entsize < align
 638        && ((sec->entsize & (sec->entsize - 1))
 639            || !(sec->flags & SEC_STRINGS)))
 640       || (sec->entsize > align
 641           && (sec->entsize & (align - 1))))
 642     {
 643       /* Sanity check.  If string character size is smaller than
 644          alignment, then we require character size to be a power
 645          of 2, otherwise character size must be integer multiple
 646          of alignment.  For non-string constants, alignment must
 647          be smaller than or equal to entity size and entity size
 648          must be integer multiple of alignment.  */
 649       return true;
 650     }
 651
 652   /* Initialize the descriptor for this input section.  */
 653
 654   *psecinfo = secinfo = bfd_zalloc (abfd, sizeof (*secinfo));
 655   if (*psecinfo == NULL)
 656     goto error_return;
 657
 658   secinfo->sec = sec;
 659   secinfo->psecinfo = psecinfo;
 660
 661   /* Search for a matching output merged section.  */
 662   for (sinfo = (struct sec_merge_info *) *psinfo; sinfo; sinfo = sinfo->next)
 663     if (sinfo->chain
 664         && (repr = sinfo->chain->sec)
 665         && ! ((repr->flags ^ sec->flags) & (SEC_MERGE | SEC_STRINGS))
 666         && repr->entsize == sec->entsize
 667         && repr->alignment_power == sec->alignment_power
 668         && repr->output_section == sec->output_section)
 669       break;
 670
 671   if (sinfo == NULL)
 672     {
 673       /* Initialize the information we need to keep track of.  */
 674       sinfo = (struct sec_merge_info *)
 675           bfd_alloc (abfd, sizeof (struct sec_merge_info));
 676       if (sinfo == NULL)
 677         goto error_return;
 678       sinfo->next = (struct sec_merge_info *) *psinfo;
 679       sinfo->chain = NULL;
 680       sinfo->last = &sinfo->chain;
 681       *psinfo = sinfo;
 682       sinfo->htab = sec_merge_init (sec->entsize, (sec->flags & SEC_STRINGS));
 683       if (sinfo->htab == NULL)
 684         goto error_return;
 685     }
 686
 687   *sinfo->last = secinfo;
 688   sinfo->last = &secinfo->next;
 689
 690   secinfo->sinfo = sinfo;
 691   secinfo->reprsec = sinfo->chain->sec;
 692
 693   return true;
 694
 695  error_return:
 696   *psecinfo = NULL;
 697   return false;
 698 }
 699
 700 /* Record one whole input section (described by SECINFO) into the hash table
 701    SINFO.  */
 702
 703 static bool
 704 record_section (struct sec_merge_info *sinfo,
 705                 struct sec_merge_sec_info *secinfo)
 706 {
 707   asection *sec = secinfo->sec;
 708   struct sec_merge_hash_entry *entry;
 709   unsigned char *p, *end;
 710   bfd_vma mask, eltalign;
 711   unsigned int align;
 712   bfd_size_type amt;
 713   bfd_byte *contents;
 714   void *tmpptr;
 715
 716   amt = sec->size;
 717   if (sec->flags & SEC_STRINGS)
 718     /* Some versions of gcc may emit a string without a zero terminator.
 719        See http://gcc.gnu.org/ml/gcc-patches/2006-06/msg01004.html
 720        Allocate space for an extra zero.  */
 721     amt += sec->entsize;
 722   contents = bfd_malloc (amt);
 723   if (!contents)
 724     goto error_return;
 725
 726   /* Slurp in all section contents (possibly decompressing it).  */
 727   sec->rawsize = sec->size;
 728   if (sec->flags & SEC_STRINGS)
 729     memset (contents + sec->size, 0, sec->entsize);
 730   if (! bfd_get_full_section_contents (sec->owner, sec, &contents))
 731     goto error_return;
 732
 733   /* Now populate the hash table and offset mapping.  */
 734
 735   /* Presize the hash table for what we're going to add.  We overestimate
 736      quite a bit, but if it turns out to be too much then other sections
 737      merged into this area will make use of that as well.  */
 738   if (!sec_merge_maybe_resize (sinfo->htab, 1 + sec->size / 2))
 739     {
 740       bfd_set_error (bfd_error_no_memory);
 741       goto error_return;
 742     }
 743
 744   /* Walk through the contents, calculate hashes and length of all
 745      blobs (strings or fixed-size entries) we find and fill the
 746      hash and offset tables.  */
 747   align = sec->alignment_power;
 748   mask = ((bfd_vma) 1 << align) - 1;
 749   end = contents + sec->size;
 750   for (p = contents; p < end;)
 751     {
 752       unsigned len;
 753       uint32_t hash = hashit (sinfo->htab, (char*) p, &len);
 754       unsigned int ofs = p - contents;
 755       eltalign = ofs;
 756       eltalign = ((eltalign ^ (eltalign - 1)) + 1) >> 1;
 757       if (!eltalign || eltalign > mask)
 758         eltalign = mask + 1;
 759       entry = sec_merge_hash_lookup (sinfo->htab, (char *) p, len, hash,
 760                                      (unsigned) eltalign);
 761       if (! entry)
 762         goto error_return;
 763       if (! append_offsetmap (secinfo, ofs, entry))
 764         goto error_return;
 765       p += len;
 766     }
 767
 768   /* Add a sentinel element that's conceptually behind all others.  */
 769   append_offsetmap (secinfo, sec->size, NULL);
 770   /* But don't count it.  */
 771   secinfo->noffsetmap--;
 772
 773   free (contents);
 774   contents = NULL;
 775
 776   /* We allocate the ofsmap arrays in blocks of 2048 elements.
 777      In case we have very many small input files/sections,
 778      this might waste large amounts of memory, so reallocate these
 779      arrays here to their true size.  */
 780   amt = secinfo->noffsetmap + 1;
 781   tmpptr = bfd_realloc (secinfo->map, amt * sizeof(secinfo->map[0]));
 782   if (tmpptr)
 783     secinfo->map = tmpptr;
 784   tmpptr = bfd_realloc (secinfo->map_ofs, amt * sizeof(secinfo->map_ofs[0]));
 785   if (tmpptr)
 786     secinfo->map_ofs = tmpptr;
 787
 788   /*printf ("ZZZ %s:%s %u entries\n", sec->owner->filename, sec->name,
 789           (unsigned)secinfo->noffsetmap);*/
 790
 791   return true;
 792
 793  error_return:
 794   free (contents);
 795   contents = NULL;
 796   for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
 797     *secinfo->psecinfo = NULL;
 798   return false;
 799 }
 800
 801 /* qsort comparison function.  Won't ever return zero as all entries
 802    differ, so there is no issue with qsort stability here.  */
 803
 804 static int
 805 strrevcmp (const void *a, const void *b)
 806 {
 807   struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
 808   struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
 809   unsigned int lenA = A->len;
 810   unsigned int lenB = B->len;
 811   const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
 812   const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
 813   int l = lenA < lenB ? lenA : lenB;
 814
 815   while (l)
 816     {
 817       if (*s != *t)
 818         return (int) *s - (int) *t;
 819       s--;
 820       t--;
 821       l--;
 822     }
 823   return lenA - lenB;
 824 }
 825
 826 /* Like strrevcmp, but for the case where all strings have the same
 827    alignment > entsize.  */
 828
 829 static int
 830 strrevcmp_align (const void *a, const void *b)
 831 {
 832   struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
 833   struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
 834   unsigned int lenA = A->len;
 835   unsigned int lenB = B->len;
 836   const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
 837   const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
 838   int l = lenA < lenB ? lenA : lenB;
 839   int tail_align = (lenA & (A->alignment - 1)) - (lenB & (A->alignment - 1));
 840
 841   if (tail_align != 0)
 842     return tail_align;
 843
 844   while (l)
 845     {
 846       if (*s != *t)
 847         return (int) *s - (int) *t;
 848       s--;
 849       t--;
 850       l--;
 851     }
 852   return lenA - lenB;
 853 }
 854
 855 static inline int
 856 is_suffix (const struct sec_merge_hash_entry *A,
 857            const struct sec_merge_hash_entry *B)
 858 {
 859   if (A->len <= B->len)
 860     /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
 861        not to be equal by the hash table.  */
 862     return 0;
 863
 864   return memcmp (A->str + (A->len - B->len),
 865                  B->str, B->len) == 0;
 866 }
 867
 868 /* This is a helper function for _bfd_merge_sections.  It attempts to
 869    merge strings matching suffixes of longer strings.  */
 870 static struct sec_merge_sec_info *
 871 merge_strings (struct sec_merge_info *sinfo)
 872 {
 873   struct sec_merge_hash_entry **array, **a, *e;
 874   struct sec_merge_sec_info *secinfo;
 875   bfd_size_type size, amt;
 876   unsigned int alignment = 0;
 877
 878   /* Now sort the strings */
 879   amt = sinfo->htab->size * sizeof (struct sec_merge_hash_entry *);
 880   array = (struct sec_merge_hash_entry **) bfd_malloc (amt);
 881   if (array == NULL)
 882     return NULL;
 883
 884   for (e = sinfo->htab->first, a = array; e; e = e->next)
 885     if (e->alignment)
 886       {
 887         *a++ = e;
 888         /* Adjust the length to not include the zero terminator.  */
 889         e->len -= sinfo->htab->entsize;
 890         if (alignment != e->alignment)
 891           {
 892             if (alignment == 0)
 893               alignment = e->alignment;
 894             else
 895               alignment = (unsigned) -1;
 896           }
 897       }
 898
 899   sinfo->htab->size = a - array;
 900   if (sinfo->htab->size != 0)
 901     {
 902       qsort (array, (size_t) sinfo->htab->size,
 903              sizeof (struct sec_merge_hash_entry *),
 904              (alignment != (unsigned) -1 && alignment > sinfo->htab->entsize
 905               ? strrevcmp_align : strrevcmp));
 906
 907       /* Loop over the sorted array and merge suffixes */
 908       e = *--a;
 909       e->len += sinfo->htab->entsize;
 910       while (--a >= array)
 911         {
 912           struct sec_merge_hash_entry *cmp = *a;
 913
 914           cmp->len += sinfo->htab->entsize;
 915           if (e->alignment >= cmp->alignment
 916               && !((e->len - cmp->len) & (cmp->alignment - 1))
 917               && is_suffix (e, cmp))
 918             {
 919               cmp->u.suffix = e;
 920               cmp->alignment = 0;
 921             }
 922           else
 923             e = cmp;
 924         }
 925     }
 926
 927   free (array);
 928
 929   /* Now assign positions to the strings we want to keep.  */
 930   size = 0;
 931   secinfo = sinfo->chain;
 932   for (e = sinfo->htab->first; e; e = e->next)
 933     {
 934       if (e->alignment)
 935         {
 936           size = (size + e->alignment - 1) & ~((bfd_vma) e->alignment - 1);
 937           e->u.index = size;
 938           size += e->len;
 939         }
 940     }
 941   secinfo->sec->size = size;
 942
 943   /* And now adjust the rest, removing them from the chain (but not hashtable)
 944      at the same time.  */
 945   for (a = &sinfo->htab->first, e = *a; e; e = e->next)
 946     if (e->alignment)
 947       a = &e->next;
 948     else
 949       {
 950         *a = e->next;
 951         if (e->len)
 952           {
 953             e->alignment = e->u.suffix->alignment;
 954             e->u.index = e->u.suffix->u.index + (e->u.suffix->len - e->len);
 955           }
 956       }
 957
 958   BFD_ASSERT (!secinfo->first_str);
 959   secinfo->first_str = sinfo->htab->first;
 960
 961   return secinfo;
 962 }
 963
 964 /* This function is called once after all SEC_MERGE sections are registered
 965    with _bfd_merge_section.  */
 966
 967 bool
 968 _bfd_merge_sections (bfd *abfd,
 969                      struct bfd_link_info *info ATTRIBUTE_UNUSED,
 970                      void *xsinfo,
 971                      void (*remove_hook) (bfd *, asection *))
 972 {
 973   struct sec_merge_info *sinfo;
 974
 975   for (sinfo = (struct sec_merge_info *) xsinfo; sinfo; sinfo = sinfo->next)
 976     {
 977       struct sec_merge_sec_info *secinfo;
 978       bfd_size_type align;  /* Bytes.  */
 979
 980       if (! sinfo->chain)
 981         continue;
 982
 983       /* Record the sections into the hash table.  */
 984       align = 1;
 985       for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
 986         if (secinfo->sec->flags & SEC_EXCLUDE)
 987           {
 988             *secinfo->psecinfo = NULL;
 989             if (remove_hook)
 990               (*remove_hook) (abfd, secinfo->sec);
 991           }
 992         else
 993           {
 994             if (!record_section (sinfo, secinfo))
 995               return false;
 996             if (align)
 997               {
 998                 unsigned int opb = bfd_octets_per_byte (abfd, secinfo->sec);
 999
1000                 align = (bfd_size_type) 1 << secinfo->sec->alignment_power;
1001                 if (((secinfo->sec->size / opb) & (align - 1)) != 0)
1002                   align = 0;
1003               }
1004           }
1005
1006       if (sinfo->htab->first == NULL)
1007         continue;
1008
1009       if (sinfo->htab->strings)
1010         {
1011           secinfo = merge_strings (sinfo);
1012           if (!secinfo)
1013             return false;
1014         }
1015       else
1016         {
1017           struct sec_merge_hash_entry *e = sinfo->htab->first;
1018           bfd_size_type size = 0;  /* Octets.  */
1019
1020           /* Things are much simpler for non-strings.
1021              Just assign them slots in the section.  */
1022           secinfo = sinfo->chain;
1023           BFD_ASSERT (!secinfo->first_str);
1024           secinfo->first_str = e;
1025           for (e = sinfo->htab->first; e; e = e->next)
1026             {
1027               if (e->alignment)
1028                 {
1029                   size = (size + e->alignment - 1)
1030                          & ~((bfd_vma) e->alignment - 1);
1031                   e->u.index = size;
1032                   size += e->len;
1033                 }
1034             }
1035           secinfo->sec->size = size;
1036         }
1037
1038       /* If the input sections were padded according to their alignments,
1039          then pad the output too.  */
1040       if (align)
1041         secinfo->sec->size = (secinfo->sec->size + align - 1) & -align;
1042
1043       /* Finally remove all input sections which have not made it into
1044          the hash table at all.  */
1045       for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
1046         if (secinfo->first_str == NULL)
1047           secinfo->sec->flags |= SEC_EXCLUDE | SEC_KEEP;
1048     }
1049
1050   return true;
1051 }
1052
1053 /* Write out the merged section.  */
1054
1055 bool
1056 _bfd_write_merged_section (bfd *output_bfd, asection *sec, void *psecinfo)
1057 {
1058   struct sec_merge_sec_info *secinfo;
1059   file_ptr pos;
1060   unsigned char *contents;
1061   Elf_Internal_Shdr *hdr;
1062
1063   secinfo = (struct sec_merge_sec_info *) psecinfo;
1064
1065   if (!secinfo)
1066     return false;
1067
1068   if (secinfo->first_str == NULL)
1069     return true;
1070
1071   /* FIXME: octets_per_byte.  */
1072   hdr = &elf_section_data (sec->output_section)->this_hdr;
1073   if (hdr->sh_offset == (file_ptr) -1)
1074     {
1075       /* We must compress this section.  Write output to the
1076          buffer.  */
1077       contents = hdr->contents;
1078       if (contents == NULL)
1079         abort ();
1080     }
1081   else
1082     {
1083       contents = NULL;
1084       pos = sec->output_section->filepos + sec->output_offset;
1085       if (bfd_seek (output_bfd, pos, SEEK_SET) != 0)
1086         return false;
1087     }
1088
1089   BFD_ASSERT (sec == secinfo->sec);
1090   BFD_ASSERT (secinfo == secinfo->sinfo->chain);
1091   if (! sec_merge_emit (output_bfd, secinfo, contents))
1092     return false;
1093
1094   return true;
1095 }
1096
1097 /* Adjust an address in the SEC_MERGE section.  Given OFFSET within
1098    *PSEC, this returns the new offset in the adjusted SEC_MERGE
1099    section and writes the new section back into *PSEC.  */
1100
1101 bfd_vma
1102 _bfd_merged_section_offset (bfd *output_bfd ATTRIBUTE_UNUSED, asection **psec,
1103                             void *psecinfo, bfd_vma offset)
1104 {
1105   struct sec_merge_sec_info *secinfo;
1106   asection *sec = *psec;
1107
1108   secinfo = (struct sec_merge_sec_info *) psecinfo;
1109
1110   if (!secinfo)
1111     return offset;
1112
1113   if (offset >= sec->rawsize)
1114     {
1115       if (offset > sec->rawsize)
1116         _bfd_error_handler
1117           /* xgettext:c-format */
1118           (_("%pB: access beyond end of merged section (%" PRId64 ")"),
1119            sec->owner, (int64_t) offset);
1120       return secinfo->first_str ? sec->size : 0;
1121     }
1122
1123   if (secinfo->fast_state != 2)
1124     {
1125       if (!secinfo->fast_state)
1126         prepare_offsetmap (secinfo);
1127       if (secinfo->fast_state != 2)
1128         return offset;
1129     }
1130
1131   long lb = secinfo->ofstolowbound[offset / OFSDIV];
1132   *psec = secinfo->reprsec;
1133
1134   /* No need for bounds checking on lb, as we've added a sentinel that's
1135      larger than any offset.  */
1136   while (MAP_OFS(secinfo, lb) <= offset)
1137     lb++;
1138   lb--;
1139
1140   /*printf ("YYY (%s:%s):%u -> (%s):%u\n",
1141           sec->owner->filename, sec->name, (unsigned)offset,
1142           (*psec)->name, (unsigned)lb);*/
1143   return MAP_IDX(secinfo, lb) + offset - MAP_OFS(secinfo, lb);
1144 }
1145
1146 /* Tidy up when done.  */
1147
1148 void
1149 _bfd_merge_sections_free (void *xsinfo)
1150 {
1151   struct sec_merge_info *sinfo;
1152
1153   for (sinfo = (struct sec_merge_info *) xsinfo; sinfo; sinfo = sinfo->next)
1154     {
1155       struct sec_merge_sec_info *secinfo;
1156       for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
1157         {
1158           free (secinfo->ofstolowbound);
1159           free (secinfo->map);
1160           free (secinfo->map_ofs);
1161         }
1162       bfd_hash_table_free (&sinfo->htab->table);
1163       free (sinfo->htab);
1164     }
1165 }