sqlite/tool/mkkeywordhash.c

/*
** Compile and run this standalone program in order to generate code that
** implements a function that will translate alphabetic identifiers into
** parser token codes.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/*
** All the keywords of the SQL language are stored as in a hash
** table composed of instances of the following structure.
*/
typedef struct Keyword Keyword;
struct Keyword {
  char *zName;         /* The keyword name */
  char *zTokenType;    /* Token value for this keyword */
  int hash;            /* Hash on the keyword */
  int offset;          /* Offset to start of name string */
  int len;             /* Length of this keyword, not counting final \000 */
  int iNext;           /* Index in aKeywordTable[] of next with same hash */
};

/*
** These are the keywords
*/
static Keyword aKeywordTable[] = {
  { "ABORT",            "TK_ABORT",        },
  { "AFTER",            "TK_AFTER",        },
  { "ALL",              "TK_ALL",          },
  { "AND",              "TK_AND",          },
  { "AS",               "TK_AS",           },
  { "ASC",              "TK_ASC",          },
  { "ATTACH",           "TK_ATTACH",       },
  { "BEFORE",           "TK_BEFORE",       },
  { "BEGIN",            "TK_BEGIN",        },
  { "BETWEEN",          "TK_BETWEEN",      },
  { "BY",               "TK_BY",           },
  { "CASCADE",          "TK_CASCADE",      },
  { "CASE",             "TK_CASE",         },
  { "CHECK",            "TK_CHECK",        },
  { "COLLATE",          "TK_COLLATE",      },
  { "COMMIT",           "TK_COMMIT",       },
  { "CONFLICT",         "TK_CONFLICT",     },
  { "CONSTRAINT",       "TK_CONSTRAINT",   },
  { "CREATE",           "TK_CREATE",       },
  { "CROSS",            "TK_JOIN_KW",      },
  { "DATABASE",         "TK_DATABASE",     },
  { "DEFAULT",          "TK_DEFAULT",      },
  { "DEFERRED",         "TK_DEFERRED",     },
  { "DEFERRABLE",       "TK_DEFERRABLE",   },
  { "DELETE",           "TK_DELETE",       },
  { "DESC",             "TK_DESC",         },
  { "DETACH",           "TK_DETACH",       },
  { "DISTINCT",         "TK_DISTINCT",     },
  { "DROP",             "TK_DROP",         },
  { "END",              "TK_END",          },
  { "EACH",             "TK_EACH",         },
  { "ELSE",             "TK_ELSE",         },
  { "EXCEPT",           "TK_EXCEPT",       },
  { "EXCLUSIVE",        "TK_EXCLUSIVE",    },
  { "EXPLAIN",          "TK_EXPLAIN",      },
  { "FAIL",             "TK_FAIL",         },
  { "FOR",              "TK_FOR",          },
  { "FOREIGN",          "TK_FOREIGN",      },
  { "FROM",             "TK_FROM",         },
  { "FULL",             "TK_JOIN_KW",      },
  { "GLOB",             "TK_GLOB",         },
  { "GROUP",            "TK_GROUP",        },
  { "HAVING",           "TK_HAVING",       },
  { "IGNORE",           "TK_IGNORE",       },
  { "IMMEDIATE",        "TK_IMMEDIATE",    },
  { "IN",               "TK_IN",           },
  { "INDEX",            "TK_INDEX",        },
  { "INITIALLY",        "TK_INITIALLY",    },
  { "INNER",            "TK_JOIN_KW",      },
  { "INSERT",           "TK_INSERT",       },
  { "INSTEAD",          "TK_INSTEAD",      },
  { "INTERSECT",        "TK_INTERSECT",    },
  { "INTO",             "TK_INTO",         },
  { "IS",               "TK_IS",           },
  { "ISNULL",           "TK_ISNULL",       },
  { "JOIN",             "TK_JOIN",         },
  { "KEY",              "TK_KEY",          },
  { "LEFT",             "TK_JOIN_KW",      },
  { "LIKE",             "TK_LIKE",         },
  { "LIMIT",            "TK_LIMIT",        },
  { "MATCH",            "TK_MATCH",        },
  { "NATURAL",          "TK_JOIN_KW",      },
  { "NOT",              "TK_NOT",          },
  { "NOTNULL",          "TK_NOTNULL",      },
  { "NULL",             "TK_NULL",         },
  { "OF",               "TK_OF",           },
  { "OFFSET",           "TK_OFFSET",       },
  { "ON",               "TK_ON",           },
  { "OR",               "TK_OR",           },
  { "ORDER",            "TK_ORDER",        },
  { "OUTER",            "TK_JOIN_KW",      },
  { "PRAGMA",           "TK_PRAGMA",       },
  { "PRIMARY",          "TK_PRIMARY",      },
  { "RAISE",            "TK_RAISE",        },
  { "REFERENCES",       "TK_REFERENCES",   },
  { "REPLACE",          "TK_REPLACE",      },
  { "RESTRICT",         "TK_RESTRICT",     },
  { "RIGHT",            "TK_JOIN_KW",      },
  { "ROLLBACK",         "TK_ROLLBACK",     },
  { "ROW",              "TK_ROW",          },
  { "SELECT",           "TK_SELECT",       },
  { "SET",              "TK_SET",          },
  { "STATEMENT",        "TK_STATEMENT",    },
  { "TABLE",            "TK_TABLE",        },
  { "TEMP",             "TK_TEMP",         },
  { "TEMPORARY",        "TK_TEMP",         },
  { "THEN",             "TK_THEN",         },
  { "TRANSACTION",      "TK_TRANSACTION",  },
  { "TRIGGER",          "TK_TRIGGER",      },
  { "UNION",            "TK_UNION",        },
  { "UNIQUE",           "TK_UNIQUE",       },
  { "UPDATE",           "TK_UPDATE",       },
  { "USING",            "TK_USING",        },
  { "VACUUM",           "TK_VACUUM",       },
  { "VALUES",           "TK_VALUES",       },
  { "VIEW",             "TK_VIEW",         },
  { "WHEN",             "TK_WHEN",         },
  { "WHERE",            "TK_WHERE",        },
};

/* Number of keywords */
#define NKEYWORD (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]))

/* An array to map all upper-case characters into their corresponding
** lower-case character.
*/
const unsigned char sqlite3UpperToLower[] = {
      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
    122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
    108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
    126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
    144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
    162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
    180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
    198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
    216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
    234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
    252,253,254,255
};
#define UpperToLower sqlite3UpperToLower

/*
** Comparision function for two Keyword records
*/
static int keywordCompare(const void *a, const void *b){
  const Keyword *pA = (Keyword*)a;
  const Keyword *pB = (Keyword*)b;
  return strcmp(pA->zName, pB->zName);
}

/*
** This routine does the work.  The generated code is printed on standard
** output.
*/
int main(int argc, char **argv){
  int i, j, h;
  int bestSize, bestCount;
  int count;
  int nChar;
  int aHash[1000];  /* 1000 is much bigger than NKEYWORD */

  /* Make sure the table is sorted */
  qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare);

  /* Fill in the hash value, length, and offset for all entries */
  nChar = 0;
  for(i=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    p->len = strlen(p->zName);
    /* p->hash = sqlite3HashNoCase(p->zName, p->len); */
    p->hash = UpperToLower[p->zName[0]]*5 +
              UpperToLower[p->zName[p->len-1]]*3 + p->len;
    p->offset = nChar;
    if( i<NKEYWORD-1 && strncmp(p->zName, aKeywordTable[i+1].zName,p->len)==0 ){
      /* This entry is a prefix of the one that follows.  Do not advance
      ** the offset */
    }else{
      nChar += p->len;
    }
  }

  /* Figure out how big to make the hash table in order to minimize the
  ** number of collisions */
  bestSize = NKEYWORD;
  bestCount = NKEYWORD*NKEYWORD;
  for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
    for(j=0; j<i; j++) aHash[j] = 0;
    for(j=0; j<NKEYWORD; j++){
      h = aKeywordTable[j].hash % i;
      aHash[h] *= 2;
      aHash[h]++;
    }
    for(j=count=0; j<i; j++) count += aHash[j];
    if( count<bestCount ){
      bestCount = count;
      bestSize = i;
    }
  }

  /* Compute the hash */
  for(i=0; i<bestSize; i++) aHash[i] = 0;
  for(i=0; i<NKEYWORD; i++){
    h = aKeywordTable[i].hash % bestSize;
    aKeywordTable[i].iNext = aHash[h];
    aHash[h] = i+1;
  }

  /* Begin generating code */
  printf("int sqlite3KeywordCode(const char *z, int n){\n");

  printf("  static const char zText[%d] =\n", nChar+1);
  for(i=j=0; i<NKEYWORD; i++){
    Keyword *p = &aKeywordTable[i];
    if( i<NKEYWORD-1 && p->offset==aKeywordTable[i+1].offset ) continue;
    if( j==0 ) printf("    \"");
    printf("%s", p->zName);
    j += p->len;
    if( j>60 ){
      printf("\"\n");
      j = 0;
    }
  }
  printf("%s;\n", j>0 ? "\"" : "  ");

  printf("  static const unsigned char aHash[%d] = {\n", bestSize);
  for(i=j=0; i<bestSize; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aHash[i]);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");

  printf("  static const unsigned char aNext[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].iNext);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");

  printf("  static const unsigned char aLen[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].len);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");

  printf("  static const unsigned short int aOffset[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    if( j==0 ) printf("    ");
    printf(" %3d,", aKeywordTable[i].offset);
    j++;
    if( j>12 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");

  printf("  static const unsigned char aCode[%d] = {\n", NKEYWORD);
  for(i=j=0; i<NKEYWORD; i++){
    char *zToken = aKeywordTable[i].zTokenType;
    if( j==0 ) printf("    ");
    printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
    j++;
    if( j>=5 ){
      printf("\n");
      j = 0;
    }
  }
  printf("%s  };\n", j==0 ? "" : "\n");

  printf("  int h, i;\n");
  printf("  if( n<2 ) return TK_ID;\n");
  printf("  h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + \n"
         "      sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +\n"
         "      n) %% %d;\n", bestSize);
  printf("  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n");
  printf("    if( aLen[i]==n &&"
                   " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n");
  printf("      return aCode[i];\n");
  printf("    }\n");
  printf("  }\n");
  printf("  return TK_ID;\n");
  printf("}\n");

  return 0;
}