/* Objective Modula-2 Compiler (objm2c) * * @file objm2_tokenset.c * ObjM2 token set implementation * * Syntax analysis for ObjM2 source files * * Author: Benjamin Kowarsch * * Copyright (C) 2009 The Objective Modula-2 Project. All rights reserved. * * License: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met * * 1) This file, or any part thereof, may NOT be hosted on websites which * contain advertising, unless specific prior written permission has been * obtained. The licensor will grant such permission upon request at its * sole discretion to websites the licensor does NOT consider abusive in * their use of advertising. Small notices in the footer of a website * naming corporate rights holders, infrastructure providers or sponsors * are not considered advertising in the context of this license. * * 2) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 3) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and other materials provided with the distribution. * * 4) Neither the author's name nor the names of any contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * 5) Where this list of conditions or the following disclaimer, in part or * as a whole is overruled or nullified by applicable law, no permission * is granted to use the software. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Version history: * * 2.00 2009-08-10 BK new file * 2009-09-20 BK amended for use with gen_first_and_follow_sets.c * 2009-09-23 BK changed all instances of enumerator to iterator */ #include "objm2_alloc.h" #include "objm2_tokenset.h" #include "objm2_productions.h" #include "objm2_first_follow.h" #include "objm2_build_params.h" #include #include #include // --------------------------------------------------------------------------- // Token set segmentation // --------------------------------------------------------------------------- #define _OBJM2_TOKENSET_BITS_PER_SEGMENT 32 #if (OBJM2_TOKENSET_BITS_PER_SEGMENT != _OBJM2_TOKENSET_BITS_PER_SEGMENT) #error "illegal value for OBJM2_TOKENSET_BITS_PER_SEGMENT" #endif #define _OBJM2_TOKENSET_SEGMENTS_PER_SET \ ((OBJM2_NUMBER_OF_TOKENS / OBJM2_TOKENSET_BITS_PER_SEGMENT) + 1) #if (OBJM2_TOKENSET_SEGMENTS_PER_SET != _OBJM2_TOKENSET_SEGMENTS_PER_SET) #error "illegal value for OBJM2_TOKENSET_SEGMENTS_PER_SET" #endif #define _OBJM2_TOKENSET_SEGMENT_TYPE uint_fast32_t // --------------------------------------------------------------------------- // Token set type // --------------------------------------------------------------------------- typedef _OBJM2_TOKENSET_SEGMENT_TYPE objm2_tokenset_a[_OBJM2_TOKENSET_SEGMENTS_PER_SET]; // --------------------------------------------------------------------------- // Array of FIRST sets // --------------------------------------------------------------------------- #define _add_production(_production, \ _first0, _first1, _first2, _first3, \ _follow0, _follow1, _follow2, _follow3 ) \ { _first0, _first1, _first2, _first3 }, static const objm2_tokenset_a _first_set[] = { #include "objm2_production_table.h" }; // _first_set #undef _add_production // --------------------------------------------------------------------------- // Array of FOLLOW sets // --------------------------------------------------------------------------- #define _add_production(_production, \ _first0, _first1, _first2, _first3, \ _follow0, _follow1, _follow2, _follow3 ) \ { _follow0, _follow1, _follow2, _follow3 }, static const objm2_tokenset_a _follow_set[] = { #include "objm2_production_table.h" }; // _follow_set #undef _add_production // --------------------------------------------------------------------------- // Token set iterator type // --------------------------------------------------------------------------- #define _OBJM2_TOKENSET_ITERATOR_BASE_TYPE uint8_t typedef _OBJM2_TOKENSET_ITERATOR_BASE_TYPE *objm2_tokenset_iterator_a; #define _TOKEN_COUNT(_iterator) _iterator[0] #define _TOKEN_AT_INDEX(_iterator, _index) _iterator[_index + 1] // --------------------------------------------------------------------------- // Iterators for FIRST sets // --------------------------------------------------------------------------- #define _add_production(_production, \ _fi0, _fi1, _fi2, _fi3, _fo0, _fo1, _fo2, _fo3 ) \ static const _OBJM2_TOKENSET_ITERATOR_BASE_TYPE \ _FiSI ## _production[] = { FIRST_LIST_for ## _production }; #include "objm2_production_table.h" #undef _add_production // --------------------------------------------------------------------------- // Array of FIRST set iterators // --------------------------------------------------------------------------- #define _add_production(_production, \ _fi0, _fi1, _fi2, _fi3, _fo0, _fo1, _fo2, _fo3 ) \ (objm2_tokenset_iterator_t) &_FiSI ## _production, static const _OBJM2_TOKENSET_ITERATOR_BASE_TYPE * _first_set_iterator[] = { #include "objm2_production_table.h" }; // _first_set_iterator #undef _add_production // --------------------------------------------------------------------------- // Iterators for FOLLOW sets // --------------------------------------------------------------------------- #define _add_production(_production, \ _fi0, _fi1, _fi2, _fi3, _fo0, _fo1, _fo2, _fo3 ) \ static const _OBJM2_TOKENSET_ITERATOR_BASE_TYPE \ _FoSI ## _production[] = { FOLLOW_LIST_for ## _production }; #include "objm2_production_table.h" #undef _add_production // --------------------------------------------------------------------------- // Array of FOLLOW set iterators // --------------------------------------------------------------------------- #define _add_production(_production, \ _fi0, _fi1, _fi2, _fi3, _fo0, _fo1, _fo2, _fo3 ) \ (objm2_tokenset_iterator_t) &_FoSI ## _production, static const _OBJM2_TOKENSET_ITERATOR_BASE_TYPE * _follow_set_iterator[] = { #include "objm2_production_table.h" }; // _follow_set_iterator #undef _add_production // -------------------------------------------------------------------------- // function: objm2_tokenset_from_list( token_list ) // -------------------------------------------------------------------------- // // Returns a new token set with the tokens passed as parameters included. At // least one token must be passed. Further tokens may be passed as variadic // parameters. The list of tokens must always be terminated by passing zero // as the last argument in the function call. Values passed in that are out- // side of the range of defined tokens are ignored. If zero is passed as the // only argument, an empty token set is returned. objm2_tokenset_t objm2_tokenset_from_list(objm2_token_t first_token, ...) { objm2_tokenset_a *new_set; uint_fast8_t bit, segment = 0; objm2_token_t token; va_list token_list; va_start(token_list, first_token); new_set = (objm2_tokenset_a *) OBJM2_ALLOCATE(sizeof(objm2_tokenset_a)); if (new_set == NULL) return NULL; while (segment < _OBJM2_TOKENSET_SEGMENTS_PER_SET) { (*new_set)[segment] = 0; segment++; } // end while token = first_token; while (token != 0) { if (token < OBJM2_NUMBER_OF_TOKENS) { segment = (uint_fast8_t) (token / _OBJM2_TOKENSET_BITS_PER_SEGMENT); bit = (uint_fast8_t) (token % _OBJM2_TOKENSET_BITS_PER_SEGMENT); (*new_set)[segment] = (*new_set)[segment] | (1 << bit); } // end if token = va_arg(token_list, objm2_token_t); // next token } // end while return (objm2_tokenset_t) new_set; } // end objm2_tokenset_from_list // -------------------------------------------------------------------------- // function: objm2_tokenset_is_element( set, token ) // -------------------------------------------------------------------------- // // Tests membership of token in token set . Returns true if the // token is a member, token ∈ set, returns false otherwise. If a value is // passed that is outside the range of defined tokens, false is returned. bool objm2_tokenset_is_element(objm2_tokenset_t set, objm2_token_t token) { objm2_tokenset_a *_set = (objm2_tokenset_a *) set; uint_fast8_t segment, bit; if (token >= OBJM2_NUMBER_OF_TOKENS) return false; segment = (uint_fast8_t) (token / _OBJM2_TOKENSET_BITS_PER_SEGMENT); bit = (uint_fast8_t) (token % _OBJM2_TOKENSET_BITS_PER_SEGMENT); return ((*_set)[segment] & (1 << bit)) != 0; } // end objm2_tokenset_is_element // -------------------------------------------------------------------------- // function: objm2_tokenset_is_subset( set1, set2 ) // -------------------------------------------------------------------------- // // Returns true if token set is a subset of token set , that is // if all elements of token set are also elements of token set , // set2 ⊆ set1, returns false otherwise. bool objm2_tokenset_is_subset(objm2_tokenset_t set1, objm2_tokenset_t set2) { objm2_tokenset_a *_set1 = (objm2_tokenset_a *) set1; objm2_tokenset_a *_set2 = (objm2_tokenset_a *) set2; uint_fast8_t segment = 0; while (segment < _OBJM2_TOKENSET_SEGMENTS_PER_SET) { if ((((*_set1)[segment] & (*_set2)[segment]) ^ (*_set2)[segment]) == 0) segment++; else return false; } // end while return true; } // end objm2_tokenset_is_subset // -------------------------------------------------------------------------- // function: objm2_tokenset_is_disjunct( set1, set2 ) // -------------------------------------------------------------------------- // // Tests if token sets and are disjunct, Returns true if the // sets are disjunct, set1 ∩ set2 = {}, returns false otherwise. bool objm2_tokenset_is_disjunct(objm2_tokenset_t set1, objm2_tokenset_t set2) { objm2_tokenset_a *_set1 = (objm2_tokenset_a *) set1; objm2_tokenset_a *_set2 = (objm2_tokenset_a *) set2; uint_fast8_t segment = 0; while (segment < _OBJM2_TOKENSET_SEGMENTS_PER_SET) { if (((*_set1)[segment] & (*_set2)[segment]) == 0) segment++; else return false; } // end while return true; } // end objm2_tokenset_is_disjunct // -------------------------------------------------------------------------- // function: objm2_tokenset_incl( set, token ) // -------------------------------------------------------------------------- // // Includes token in token set . Any value passed in that is // outside of the range of defined tokens is ignored. void objm2_tokenset_incl(objm2_tokenset_t set, objm2_token_t token) { objm2_tokenset_a *_set = (objm2_tokenset_a *) set; uint_fast8_t segment, bit; if (token >= OBJM2_NUMBER_OF_TOKENS) return; segment = (uint_fast8_t) (token / _OBJM2_TOKENSET_BITS_PER_SEGMENT); bit = (uint_fast8_t) (token % _OBJM2_TOKENSET_BITS_PER_SEGMENT); (*_set)[segment] = (*_set)[segment] | (1 << bit); return; } // end objm2_tokenset_incl // -------------------------------------------------------------------------- // function: objm2_tokenset_excl( set, token ) // -------------------------------------------------------------------------- // // Excludes token in token set . Any value passed in that is // outside of the range of defined tokens is ignored. void objm2_tokenset_excl(objm2_tokenset_t set, objm2_token_t token) { objm2_tokenset_a (*_set) = (objm2_tokenset_a *) set; uint_fast8_t segment, bit; if (token >= OBJM2_NUMBER_OF_TOKENS) return; segment = (uint_fast8_t) (token / _OBJM2_TOKENSET_BITS_PER_SEGMENT); bit = (uint_fast8_t) (token % _OBJM2_TOKENSET_BITS_PER_SEGMENT); (*_set)[segment] = (*_set)[segment] & ~(1 << bit); return; } // end objm2_tokenset_excl // -------------------------------------------------------------------------- // function: objm2_tokenset_incl_list( set, token_list ) // -------------------------------------------------------------------------- // // Includes the tokens passed as variadic parameters in token set . // The list of tokens must be terminated by passing zero as the last argument // in the function call. Any value passed in that is outside of the range of // defined tokens is ignored. void objm2_tokenset_incl_list(objm2_tokenset_t set, ...) { objm2_tokenset_a *_set = (objm2_tokenset_a *) set; uint_fast8_t segment, bit; objm2_token_t token; va_list token_list; va_start(token_list, set); token = va_arg(token_list, objm2_token_t); while (token != 0) { if (token < OBJM2_NUMBER_OF_TOKENS) { segment = (uint_fast8_t) (token / _OBJM2_TOKENSET_BITS_PER_SEGMENT); bit = (uint_fast8_t) (token % _OBJM2_TOKENSET_BITS_PER_SEGMENT); (*_set)[segment] = (*_set)[segment] | (1 << bit); } // end if token = va_arg(token_list, objm2_token_t); // next token } // end while return; } // end objm2_tokenset_incl_list // -------------------------------------------------------------------------- // function: objm2_tokenset_excl_list( set, token_list ) // -------------------------------------------------------------------------- // // Excludes the tokens passed as variadic parameters from token set . // The list of tokens must be terminated by passing zero as the last argument // in the function call. Any value passed in that is outside of the range of // defined tokens is ignored. void objm2_tokenset_excl_list(objm2_tokenset_t set, ...) { objm2_tokenset_a *_set = (objm2_tokenset_a *) set; uint_fast8_t segment, bit; objm2_token_t token; va_list token_list; va_start(token_list, set); token = va_arg(token_list, objm2_token_t); while (token != 0) { if (token < OBJM2_NUMBER_OF_TOKENS) { segment = (uint_fast8_t) (token / _OBJM2_TOKENSET_BITS_PER_SEGMENT); bit = (uint_fast8_t) (token % _OBJM2_TOKENSET_BITS_PER_SEGMENT); (*_set)[segment] = (*_set)[segment] & ~(1 << bit); } // end if token = va_arg(token_list, objm2_token_t); // next token } // end while return; } // end objm2_tokenset_excl_list // -------------------------------------------------------------------------- // function: objm2_tokenset_union( set1, set2 ) // -------------------------------------------------------------------------- // // Returns the union of token sets and , set1 ∪ set2. objm2_tokenset_t objm2_tokenset_union(objm2_tokenset_t set1, objm2_tokenset_t set2) { objm2_tokenset_a *_set1 = (objm2_tokenset_a *) set1; objm2_tokenset_a *_set2 = (objm2_tokenset_a *) set2; objm2_tokenset_a *new_set; uint_fast8_t segment = 0; new_set = (objm2_tokenset_a *) OBJM2_ALLOCATE(sizeof(objm2_tokenset_a)); if (new_set == NULL) return NULL; while (segment < _OBJM2_TOKENSET_SEGMENTS_PER_SET) { (*new_set)[segment] = (*_set1)[segment] | (*_set2)[segment]; segment++; } // end while return (objm2_tokenset_t) new_set; } // end objm2_tokenset_union // -------------------------------------------------------------------------- // function: objm2_tokenset_intersection( set1, set2 ) // -------------------------------------------------------------------------- // // Returns the intersection of token sets and , set1 ∩ set2. objm2_tokenset_t objm2_tokenset_intersection(objm2_tokenset_t set1, objm2_tokenset_t set2) { objm2_tokenset_a *_set1 = (objm2_tokenset_a *) set1; objm2_tokenset_a *_set2 = (objm2_tokenset_a *) set2; objm2_tokenset_a *new_set; uint_fast8_t segment = 0; new_set = (objm2_tokenset_a *) OBJM2_ALLOCATE(sizeof(objm2_tokenset_a)); if (new_set == NULL) return NULL; while (segment < _OBJM2_TOKENSET_SEGMENTS_PER_SET) { *new_set[segment] = *_set1[segment] & *_set2[segment]; segment++; } // end while return (objm2_tokenset_t) new_set; } // end objm2_tokenset_intersection // -------------------------------------------------------------------------- // function: objm2_tokenset_difference( set1, set2 ) // -------------------------------------------------------------------------- // // Returns the difference of token sets and , set1 \ set 2. objm2_tokenset_t objm2_tokenset_difference(objm2_tokenset_t set1, objm2_tokenset_t set2) { objm2_tokenset_a *_set1 = (objm2_tokenset_a *) set1; objm2_tokenset_a *_set2 = (objm2_tokenset_a *) set2; objm2_tokenset_a *new_set; uint_fast32_t segment = 0; new_set = (objm2_tokenset_a *) OBJM2_ALLOCATE(sizeof(objm2_tokenset_a)); if (new_set == NULL) return NULL; while (segment < _OBJM2_TOKENSET_SEGMENTS_PER_SET) { (*new_set)[segment] = ((*_set1)[segment] | (*_set2)[segment]) & (~ (*_set2)[segment]); segment++; } // end while return (objm2_tokenset_t) new_set; } // end objm2_tokenset_difference // --------------------------------------------------------------------------- // function: objm2_tokenset_first_set( production ) // --------------------------------------------------------------------------- // // Returns the FIRST set of production . objm2_tokenset_t objm2_tokenset_first_set(objm2_production_t production) { if (production < OBJM2_NUMBER_OF_PRODUCTIONS) return (const objm2_tokenset_t) &_first_set[production]; else return NULL; } // end objm2_tokenset_first_set // --------------------------------------------------------------------------- // function: objm2_tokenset_follow_set( production ) // --------------------------------------------------------------------------- // // Returns the FOLLOW set of production . objm2_tokenset_t objm2_tokenset_follow_set(objm2_production_t production) { if (production < OBJM2_NUMBER_OF_PRODUCTIONS) return (const objm2_tokenset_t) &_follow_set[production]; else return NULL; } // end objm2_tokenset_follow_set // --------------------------------------------------------------------------- // function: objm2_tokenset_dispose( set ) // --------------------------------------------------------------------------- // // Disposes of tokenset . void objm2_tokenset_dispose(objm2_tokenset_t set) { if (set == NULL) return; OBJM2_DEALLOCATE(set); } // end objm2_tokenset_dispose // --------------------------------------------------------------------------- // function: objm2_tokenset_iterator( set ) // --------------------------------------------------------------------------- // // Returns a new iterator for tokenset . objm2_tokenset_iterator_t objm2_tokenset_iterator(objm2_tokenset_t set) { objm2_tokenset_a *_set = (objm2_tokenset_a *) set; objm2_token_t token_list[OBJM2_NUMBER_OF_TOKENS]; objm2_token_t token = 0; cardinal index = 0; _OBJM2_TOKENSET_ITERATOR_BASE_TYPE *new_iterator; uint_fast8_t bit, segment; if (set == NULL) return NULL; bit = 0; segment = 0; while (segment < _OBJM2_TOKENSET_SEGMENTS_PER_SET) { while (bit < _OBJM2_TOKENSET_BITS_PER_SEGMENT) { if (((*_set)[segment] & (1 << bit)) != 0) { token_list[index] = token; index++; } // end if token++; bit++; } // end while bit = 0; segment++; } // end while new_iterator = (_OBJM2_TOKENSET_ITERATOR_BASE_TYPE *) OBJM2_ALLOCATE(index * sizeof(_OBJM2_TOKENSET_ITERATOR_BASE_TYPE)); if (new_iterator == NULL) return NULL; _TOKEN_COUNT(new_iterator) = index; index = 0; while (index < _TOKEN_COUNT(new_iterator)) { _TOKEN_AT_INDEX(new_iterator, index) = token_list[index]; index++; } // end while return (objm2_tokenset_iterator_t) new_iterator; } // end objm2_tokenset_iterator // --------------------------------------------------------------------------- // function: objm2_tokenset_iterator_token_count( iterator ) // --------------------------------------------------------------------------- // // Returns the number of tokens in tokenset iterator . cardinal objm2_tokenset_iterator_token_count(objm2_tokenset_iterator_t iterator) { objm2_tokenset_iterator_a _iterator = (objm2_tokenset_iterator_a) iterator; if (iterator == NULL) return 0; return _TOKEN_COUNT(_iterator); } // end objm2_tokenset_iterator_token_count // --------------------------------------------------------------------------- // function: objm2_tokenset_iterator_token_at_index( iterator, index ) // --------------------------------------------------------------------------- // // Returns the token at index in tokenset iterator . objm2_token_t objm2_tokenset_iterator_token_at_index(objm2_tokenset_iterator_t iterator, cardinal index) { objm2_tokenset_iterator_a _iterator = (objm2_tokenset_iterator_a) iterator; if ((iterator == NULL) || (index >= _TOKEN_COUNT(_iterator))) return 0; return _TOKEN_AT_INDEX(_iterator, index); } // end objm2_tokenset_iterator_token_at_index // --------------------------------------------------------------------------- // function: objm2_tokenset_iterator_for_first_set( production ) // --------------------------------------------------------------------------- // // Returns an iterator for the FIRST set of production . objm2_tokenset_iterator_t objm2_tokenset_iterator_for_first_set(objm2_production_t production) { if (production < OBJM2_NUMBER_OF_PRODUCTIONS) return (objm2_tokenset_iterator_t) _first_set_iterator[production]; else return NULL; } // end objm2_tokenset_iterator_for_first_set // --------------------------------------------------------------------------- // function: objm2_tokenset_iterator_follow_set( production ) // --------------------------------------------------------------------------- // // Returns an iterator for the FOLLOW set of production . objm2_tokenset_iterator_t objm2_tokenset_iterator_for_follow_set(objm2_production_t production) { if (production < OBJM2_NUMBER_OF_PRODUCTIONS) return (objm2_tokenset_iterator_t) _follow_set_iterator[production]; else return NULL; } // end objm2_tokenset_iterator_for_follow_set // --------------------------------------------------------------------------- // function: objm2_tokenset_iterator_dispose( iterator ) // --------------------------------------------------------------------------- // // Disposes of tokenset iterator . void objm2_tokenset_iterator_dispose(objm2_tokenset_iterator_t iterator) { if (iterator == NULL) return; OBJM2_DEALLOCATE(iterator); } // end objm2_tokenset_iterator_dispose // END OF FILE