Codebase list asymptote / debian/2.23-2 symbol.cc
debian/2.23-2

Tree @debian/2.23-2 (Download .tar.gz)

symbol.cc @debian/2.23-2raw · history · blame

/*****
 * symbol.cc
 * Andy Hammerlindl 2002/06/18
 *
 * Creates symbols from strings so that multiple calls for a symbol of
 * the same string will return an identical object.
 *****/

#include <cstring>
#include <cstdlib>

using std::strlen;


#include "settings.h"
#include "symbol.h"

namespace sym {

const char USED = 1;
const char SKIP = 2;

struct symbolRecord {
  // When a symbol is entered into the table, its hash is computed.  If the
  // corresponding entry in the table is full, this value is incremented until
  // an empty slot is found.  hashplus stores the end value.
  // Each symbol has a unique hashplus value, even if there is a collision in
  // the original hashing function.
  uint hashplus;

  // Whether the cell of the table is empty, in use, or a "skip" entry due to
  // a resizing of the table.
  unsigned char flag;

  // Pointer to a copy of the string (allocated on the heap).  This string
  // will never be deallocated.  Symbols, in essence, last forever.
  char *s;
};

// The table size must be a power of two so that (h % tableSize) can be
// replaced by (h & tableMask).  1 << 15 was chosen based on the number of
// unique symbols (roughly 4000) which occured in all of the base modules.
const size_t SYMBOL_TABLE_BASE_CAPACITY = 1 << 15;
symbolRecord baseSymbolTable[SYMBOL_TABLE_BASE_CAPACITY];

symbolRecord *table = baseSymbolTable;
size_t tableCapacity = SYMBOL_TABLE_BASE_CAPACITY;
uint tableMask = 0;
size_t tableSize = 0;

symbolRecord &recordByHashplus(uint h)
{
  return table[h & tableMask];
}

GCInit symbol::initialize;
symbol symbol::nullsym;
symbol symbol::initsym;
symbol symbol::castsym;
symbol symbol::ecastsym;

const char *nullsymstr = "<nullsym>";

void initTable() {
  tableMask = (uint)(tableCapacity - 1);
  tableSize = 0;

  // Set every entry to empty.  (Is this faster than memsetting the whole
  // thing?)
  for (size_t i = 0; i < tableCapacity; ++i)
    table[i].flag = 0;

  // The zeroth entry is reserved for the "null" symbol.
  if (table == baseSymbolTable) {
    table[0].flag = USED;
    table[0].s = new char[strlen(nullsymstr) + 1];
    strcpy(table[0].s, nullsymstr);
    ++tableSize;

    symbol::nullsym.hashplus = 0;

    symbol::initsym = symbol::opTrans("init");
    symbol::castsym = symbol::opTrans("cast");
    symbol::ecastsym = symbol::opTrans("ecast");
  }
}

// Hashing constants found experimentally to reduce collision (a little).
const uint A = 25191, B = 16342, C = 1746, D = 18326;

// Hash the string into an integer.  Experimental testing has shown that
// hashing only the first few letters seems to be faster than hashing deeper
// into the string, even though this approach causes more hash collisions.
uint hash(const char *s, size_t len)
{
  uint h = s[0];
  if (len == 2)
    return h;
  h += A*s[1];
  if (len == 3)
    return h;
  h += B*s[2];
  if (len == 4)
    return h;
  h += C*s[3];
  if (len == 5)
    return h;
  h += D*s[4];
  return h+len;
}

/* Under normal circumstances, the initial table should be large enough for
 * all of the symbols used and will never be resized.  Just in case the
 * program encounters a large number of distinct symbols, we implement
 * resizing of the table.
 */
void resizeTable() {
  symbolRecord *oldTable = table;
  size_t oldSize = tableSize;
  size_t oldCapacity = tableCapacity;

  tableCapacity *= 4;
  table = new symbolRecord[tableCapacity];
  initTable();

  // The null symbol is a special case.
  table[0] = oldTable[0];
  ++tableSize;

#if 0
  printf("old:\n");
  for (size_t i = 0; i < oldCapacity; ++i) {
    symbolRecord &r = oldTable[i];

    if (r.flag != USED)
      continue;

    printf("  %u -> %s\n", r.hashplus, r.s);
  }
#endif

  for (size_t i = 1; i < oldCapacity; ++i) {
    symbolRecord &r = oldTable[i];

    if (r.flag != USED)
      continue;

    // Entries that were skipped over when this symbol was entered into the
    // old hash table may not appear in the same spot in the new hash table.
    // Put "SKIP" entries in their place, so that the symbol will still be
    // found.
    for (uint h = hash(r.s, strlen(r.s)+1); h < r.hashplus; ++h) {
      symbolRecord &skipr = recordByHashplus(h);
      if (skipr.flag == 0)
        skipr.flag = SKIP;
    }

    // Enter the symbol in its spot.
    symbolRecord &newr = recordByHashplus(r.hashplus);
    assert(newr.flag != USED);
    newr.flag = USED;
    newr.hashplus = r.hashplus;
    newr.s = r.s;
    ++tableSize;
  }

#if 0
  printf("new:\n");
  for (size_t i = 0; i < tableCapacity; ++i) {
    symbolRecord &r = table[i];

    if (r.flag != USED)
      continue;

    printf("  %u -> %s\n", r.hashplus, r.s);
  }
#endif

  assert(tableSize == oldSize);

  // Debugging resize.
  for (size_t i = 1; i < oldCapacity; ++i) {
    symbolRecord &r = oldTable[i];

    if (r.flag != USED)
      continue;

    symbolRecord &newr = recordByHashplus(r.hashplus);
    assert(newr.hashplus == r.hashplus);
    assert(newr.flag != 0);
    assert(newr.flag != SKIP);
    assert(newr.flag == USED);
    assert(newr.s = r.s);
    if (strncmp(r.s, "gensym", 6) != 0)
      assert(symbol::rawTrans(r.s, strlen(r.s)+1).hashplus == r.hashplus);
  }

#if 0
  // Diagnostics.
  uint empty=0, used=0, skip=0;
  for (size_t i = 0; i < tableCapacity; ++i) {
    symbolRecord &r = table[i];
    if (r.flag == 0) ++empty;
    else if (r.flag == USED) ++used;
    else if (r.flag == SKIP) ++skip;
    else assert("Unknown flag" == 0);
  }

  cout << "Resized symbol table.  "
       << "empty: " << empty
       << "used: " << used
       << "skip: " << skip
       << endl;
#endif
}

symbol symbolize(uint h) {
  symbol s;
  s.hashplus = h;
  return s;
}

// Handles the insertion of a new symbol into a table the has been resized (or
// needs resizing).
symbol advancedInsert(const char *s, size_t len)
{
  if (2*tableSize >= tableCapacity)
    resizeTable();

  uint hashplus = hash(s, len);

#if 1
  assert(s != 0);
  assert(len > 0);
  assert(2*tableSize <= tableCapacity);
#endif

  // We know the symbol is not in the table.  Just search for the first unused
  // entry (either empty or a skip entry) and insert there.
  for (;;) {
    symbolRecord &r = recordByHashplus(hashplus);
    
    if (r.flag != USED) {
      r.flag = USED;
      r.s = new char[len];
      memcpy(r.s, s, len);
      assert(r.s[len-1] == '\0');
      r.hashplus = hashplus;

      ++tableSize;
      assert(2*tableSize <= tableCapacity);

      return symbolize(hashplus);
    }
    
    ++hashplus;
  }

  assert("Unreachable code" == 0);
  return symbol::nullsym;
}

symbol symbol::gensym(string s) {
  // Gensym can be inserted as if it were a normal string not already in the
  // table.  advancedInsert handles this.
  s = "gensym " + s;
  return advancedInsert(s.c_str(), s.size() + 1);
}

symbol symbol::rawTrans(const char *s, size_t len)
{
  uint hashplus = sym::hash(s, len);

#if 1
  assert(s != 0);
  assert(len > 0);
  assert(2*tableSize <= tableCapacity);
#endif

  // Search through the table till we find the symbol already translated or
  // an empty field.
  for (;;) {
    symbolRecord &r = recordByHashplus(hashplus);
    
    // Translating pre-existing symbols is more common, so check for it first.
    if (r.hashplus == hashplus &&
        r.flag == USED &&
        strncmp(r.s, s, len) == 0) {
      return symbolize(hashplus);
    }
    
    // Then check for an empty entry, in which case the entry is added.
    if (r.flag == 0) {
      // Test if the table needs resizing before entering a new symbol, or if
      // the table has already been resized.  In either case, the symbol will
      // be added to a resized table which may contain skip entries, and a
      // more involved insertion routine is needed.
      if (2*tableSize >= SYMBOL_TABLE_BASE_CAPACITY)
        return advancedInsert(s, len);

      r.flag = USED;
      r.s = new char[len];
      memcpy(r.s, s, len);
      assert(r.s[len-1] == '\0');
      r.hashplus = hashplus;

      ++tableSize;
      assert(2*tableSize <= tableCapacity);

      return symbolize(hashplus);
    }
    
    // A case where a different symbol is in the spot, continue along the
    // table.
    ++hashplus;
  }

  assert("Unreachable code" == 0);
  return symbol::nullsym;
}

symbol::operator string () const {
  symbolRecord &r = recordByHashplus(this->hashplus);
  return (string)r.s;
}

ostream& operator<< (ostream& out, const symbol sym)
{
  symbolRecord &r = recordByHashplus(sym.hashplus);
  return out << r.s;
}

} // end namespace sym

/* Define all of operator symbols SYM_PLUS, etc. */
#define OPSYMBOL(str, name) \
  sym::symbol name = sym::symbol::opTrans(str)
#include "opsymbols.h"
#undef OPSYMBOL

/* Define all of the symbols of the type SYM(name) in selected files. */
#define ADDSYMBOL(name) \
  sym::symbol PRETRANSLATED_SYMBOL_##name = sym::symbol::literalTrans(#name)
#include "allsymbols.h"
#undef ADDSYMBOL