/* Objective Modula-2 Compiler (objm2c) * * objm2_reserved_words.c * Minimal Perfect Hash for Objective Modula-2 reserved words * * Author: Benjamin Kowarsch * * Copyright (C) 2009 Sunrise Telephone Systems KK. All rights reserved. * * License: * * Permission is hereby granted to review and test this software for the sole * purpose of supporting the effort by the licensor to define and develop the * Objective Modula-2 language. It is not permissible under any circumstances * to use the software for the purpose of creating derivative languages or * dialects. This permission is valid until 31 December 2009, 24:00h GMT. * * Future licensing: * * The licensor undertakes to eventually release this software under a proper * open source license AFTER the Objective Modula-2 language definition has * been finalised and a conforming and working reference compiler completed. * * Version history: * * 2.00 2009-01-31 BK new file */ // --------------------------------------------------------------------------- // ObjM2 project imports // --------------------------------------------------------------------------- #include "common_macros.h" #include "objm2_hash.h" #include "objm2_reserved_words.h" // --------------------------------------------------------------------------- // Base hash algorithm to use for minimal perfect hash // --------------------------------------------------------------------------- // // The minimal perfect hash function has been designed to use the original // stored hash algorithm from objm2_hash.c as a base. If the algorithm for // stored hashes has been replaced, then the macros defined in objm2_hash.h // can no longer be used for the calculation of MPH values. In this case the // MPH function will need to use both algorithms, the original algorithm // to calculate MPH values for the table index, and the replacement algorithm // to calculate stored hashes for input string verification. // // Macro OBJM2_HASH_ALGORITHM_IS_ORIGINAL to be defined or left undefined in // objm2_hash.c determines whether the original algorithm has been replaced. // If it is defined, the original algorithm is in use, otherwise not. // // The MPH function uses its own local macros to calculate the MPH values. If // macro OBJM2_HASH_ALGORITHM_IS_ORIGINAL is defined, these macros will be // mapped to their respective counterparts defined in objm2_hash.h, otherwise // the local macros will be defined using the original algorithm and both // algorithms will then be used. // if storage hash alogrithm is orignal, map to it #ifdef OBJM2_HASH_ALGORITHM_IS_ORIGINAL #define _OBJM2_MPH_INITIAL OBJM2_HASH_INITIAL #define _OBJM2_MPH_NEXT_CHAR(_hash,_ch) OBJM2_HASH_NEXT_CHAR(_hash,_ch) #define _OBJM2_MPH_FINAL(_hash) OBJM2_HASH_FINAL(_hash) #endif // if storage hash algorithm is not original, define locally #ifndef OBJM2_HASH_ALGORITHM_IS_ORIGINAL #define _OBJM2_MPH_INITIAL 0 #define _OBJM2_MPH_NEXT_CHAR(_hash,_ch) \ (_ch + (_hash << 6) + (_hash << 16) - _hash) #define _OBJM2_MPH_FINAL(_hash) (hash & 0x7FFFFFFF) #endif // --------------------------------------------------------------------------- // Reserved word hash table // --------------------------------------------------------------------------- #define _add_reserved_word(_suffix, _hash) _hash, static const cardinal objm2_reserved_word[] = { 0, // leading null-entry // insert hashes from master table #include "objm2_reserved_word_table.h" 0 // trailing null-entry } /* end objm2_reserved_word */; #undef _add_reserved_word // --------------------------------------------------------------------------- // MPH mapping table // --------------------------------------------------------------------------- // // This table maps intermediate perfect hash codes to alphabetically ordered // minimal perfect hash codes between 1 and OBJM2_NUMBER_OF_RESERVED_WORDS - 1, // it further maps all invalid codes to OBJM2_NUMBER_OF_RESERVED_WORDS. #define _N _OBJM2_NUMBER_OF_RESERVED_WORDS static const char _objm2_mph_to_index_map[] = { // 0 1 2 3 4 5 6 7 8 9 /* 00 */ _N, 21, 9, 10, _N, _N, _N, _N, _N, _N, /* 10 */ 50, _N, _N, _N, _N, _N, _N, _N, _N, _N, /* 20 */ _N, _N, _N, _N, _N, _N, _N, _N, _N, _N, /* 30 */ _N, 47, _N, _N, 35, _N, _N, _N, _N, 54, /* 40 */ 23, _N, _N, 5, _N, _N, _N, 7, _N, _N, /* 50 */ _N, _N, 30, 15, 8, _N, _N, _N, _N, _N, /* 60 */ _N, _N, 26, _N, _N, _N, _N, _N, _N, _N, /* 70 */ _N, _N, _N, _N, _N, _N, _N, _N, 13, 56, /* 80 */ _N, _N, _N, 39, _N, 19, 4, _N, _N, _N, /* 90 */ _N, 48, _N, _N, _N, _N, 44, _N, _N, _N, /* 100 */ _N, _N, 33, _N, _N, _N, _N, _N, _N, _N, /* 110 */ 34, _N, 12, _N, 36, _N, _N, _N, _N, _N, /* 120 */ 57, _N, 43, _N, _N, _N, 41, _N, _N, 31, /* 130 */ _N, _N, _N, _N, _N, 53, 3, _N, _N, 38, /* 140 */ _N, _N, _N, _N, _N, _N, _N, 29, _N, _N, /* 150 */ _N, _N, _N, _N, _N, _N, _N, _N, _N, _N, /* 160 */ _N, _N, _N, _N, _N, _N, _N, _N, 52, _N, /* 170 */ _N, _N, _N, _N, _N, _N, _N, _N, _N, _N, /* 180 */ _N, _N, _N, _N, _N, _N, _N, _N, _N, _N, /* 190 */ 37, _N, _N, _N, _N, 40, 20, 16, 32, 55, /* 200 */ 14, _N, _N, _N, _N, 27, 49, _N, _N, _N, /* 210 */ _N, _N, _N, _N, 2, 18, _N, _N, _N, _N, /* 220 */ _N, 11, _N, _N, _N, _N, _N, _N, _N, _N, /* 230 */ 51, _N, 42, _N, 45, _N, _N, _N, 46, _N, /* 240 */ _N, _N, 22, 17, _N, _N, _N, 1, _N, 28, /* 250 */ 25, 6, _N, _N, 24, _N // last index is 255 }; // end _objm2_mph_to_index_map #undef _N // --------------------------------------------------------------------------- // function: objm2_hash_for_reserved_word(index) // --------------------------------------------------------------------------- // // Returns the stored hash value for the reserved word associated with index // . If is greater than or equal to the number of reserved // words as defined by OBJM2_NUMBER_OF_RESERVED_WORDS, then zero is returned. cardinal objm2_hash_for_reserved_word(cardinal index) { if (index >= _OBJM2_NUMBER_OF_RESERVED_WORDS) return 0; else return objm2_reserved_word[index]; } // end objm2_hash_for_reserved_word // --------------------------------------------------------------------------- // function: objm2_index_for_reserved_word(lexeme, hash) // --------------------------------------------------------------------------- // // Returns the hash table index for the reserved word represented by // and . If zero is passed for , the its value will be calculated // from . The function executes slightly faster if it does not need // to calculate the storage hash value. // // Return values are between 1 and OBJM2_NUMBER_OF_RESERVED_WORDS - 1. If the // hash value passed does not represent a valid reserved word, or, if zero // was passed for and the string passed in is not a valid // reserved word, then OBJM2_NUMBER_OF_RESERVED_WORDS is returned. cardinal objm2_index_for_reserved_word(const char *lexeme, cardinal hash) { int storage_hash = OBJM2_HASH_INITIAL; int index_hash = _OBJM2_MPH_INITIAL; int index = 0; if (lexeme == NULL) return _OBJM2_NUMBER_OF_RESERVED_WORDS; if (*lexeme == ASCII_NUL) return _OBJM2_NUMBER_OF_RESERVED_WORDS; // if zero was passed in for hash or algorithm has been replaced if ((OBJM2_HASH_ALGORITHM_IS_ORIGINAL == 0) || (hash == 0)) { // calculate the index hash from the lexeme while (lexeme[index] != 0) { if (index > OBJM2_MAX_KEYWORD_LENGTH) return _OBJM2_NUMBER_OF_KEYWORDS; storage_hash = OBJM2_HASH_NEXT_CHAR(storage_hash, lexeme[index]); #if (OBJM2_HASH_ALGORITHM_IS_ORIGINAL == 0) index_hash = _OBJM2_MPH_NEXT_CHAR(index_hash, lexeme[index]); #endif index++; } // end while storage_hash = OBJM2_HASH_FINAL(storage_hash); index_hash = _OBJM2_MPH_FINAL(index_hash); #if (OBJM2_HASH_ALGORITHM_IS_ORIGINAL == 0) index_hash = ((storage_hash - lexeme[0]) & 0x1FF); #else /* storage hash algorithm has been replaced */ index_hash = ((index_hash - lexeme[0]) & 0x1FF); #endif } else /* neither zero was passed nor has the algorithm been replaced */ { // use the passed in hash storage_hash = hash; index_hash = ((hash - lexeme[0]) & 0x1FF); } // end if // finalise index hash index_hash = index_hash + (lexeme[0] & 1) + (lexeme[1] & 1); // resolve collision between AND and END if (index_hash == 247) { index_hash = index_hash - (lexeme[0] & 4); } // resolve collision between OPTIONAL and OR else if (index_hash == 114) { index_hash = index_hash - (lexeme[1]); } // end if // fold to fit all values into 255 cells if (index_hash > 255) index_hash = index_hash - 259; // retrieve index from mph-to-index mapping table index = _objm2_mph_to_index_map[index_hash]; // compare calculated storage hash with stored hash if (storage_hash == objm2_keyword[index]) { // index is valid if they match return index; } else { // index is invalid if they don't return _OBJM2_NUMBER_OF_RESERVED_WORDS; } // end if } // objm2_index_for_reserved_word // END OF FILE