lexer.h

00001 /***************************************************************
00002  *
00003  * Copyright (C) 1990-2007, Condor Team, Computer Sciences Department,
00004  * University of Wisconsin-Madison, WI.
00005  * 
00006  * Licensed under the Apache License, Version 2.0 (the "License"); you
00007  * may not use this file except in compliance with the License.  You may
00008  * obtain a copy of the License at
00009  * 
00010  *    http://www.apache.org/licenses/LICENSE-2.0
00011  * 
00012  * Unless required by applicable law or agreed to in writing, software
00013  * distributed under the License is distributed on an "AS IS" BASIS,
00014  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  * See the License for the specific language governing permissions and
00016  * limitations under the License.
00017  *
00018  ***************************************************************/
00019 
00020 
00021 #ifndef __CLASSAD_LEXER_H__
00022 #define __CLASSAD_LEXER_H__
00023 
00024 #include "classad/common.h"
00025 #include "classad/value.h"
00026 #include "classad/lexerSource.h"
00027 
00028 BEGIN_NAMESPACE( classad )
00029 
00030 
00031 // the lexical analyzer class
00032 class Lexer
00033 {
00034     public:
00035         enum TokenType
00036         {
00037             LEX_TOKEN_ERROR,
00038             LEX_END_OF_INPUT,
00039             LEX_TOKEN_TOO_LONG,
00040             LEX_INTEGER_VALUE,
00041             LEX_REAL_VALUE,
00042             LEX_BOOLEAN_VALUE,
00043             LEX_STRING_VALUE,
00044             LEX_UNDEFINED_VALUE,
00045             LEX_ERROR_VALUE,
00046             LEX_IDENTIFIER,
00047             LEX_SELECTION,
00048             LEX_MULTIPLY,
00049             LEX_DIVIDE,
00050             LEX_MODULUS,
00051             LEX_PLUS,
00052             LEX_MINUS,
00053             LEX_BITWISE_AND,
00054             LEX_BITWISE_OR,
00055             LEX_BITWISE_NOT,
00056             LEX_BITWISE_XOR,
00057             LEX_LEFT_SHIFT,
00058             LEX_RIGHT_SHIFT,
00059             LEX_URIGHT_SHIFT,
00060             LEX_LOGICAL_AND,
00061             LEX_LOGICAL_OR,
00062             LEX_LOGICAL_NOT,
00063             LEX_LESS_THAN,
00064             LEX_LESS_OR_EQUAL,
00065             LEX_GREATER_THAN,
00066             LEX_GREATER_OR_EQUAL,
00067             LEX_EQUAL,
00068             LEX_NOT_EQUAL,
00069             LEX_META_EQUAL,
00070             LEX_META_NOT_EQUAL,
00071             LEX_BOUND_TO,
00072             LEX_QMARK,
00073             LEX_COLON,
00074             LEX_COMMA,
00075             LEX_SEMICOLON,
00076             LEX_OPEN_BOX,
00077             LEX_CLOSE_BOX,
00078             LEX_OPEN_PAREN,
00079             LEX_CLOSE_PAREN,
00080             LEX_OPEN_BRACE,
00081             LEX_CLOSE_BRACE,
00082             LEX_BACKSLASH,
00083             LEX_ABSOLUTE_TIME_VALUE,
00084             LEX_RELATIVE_TIME_VALUE
00085         };
00086 
00087         class TokenValue
00088         {
00089             public:
00090                 TokenValue( ) {
00091                     tt                   = LEX_TOKEN_ERROR;
00092                     factor               = Value::NO_FACTOR;
00093                     intValue             = 0;
00094                     realValue            = 0.0;
00095                     boolValue            = false;
00096                     relative_secs        = 0;
00097                     absolute_secs.secs   = 0;
00098                     absolute_secs.offset = 0;
00099                 }
00100 
00101                 ~TokenValue( ) {
00102                 }
00103 
00104                 void SetTokenType( TokenType t ) {
00105                     tt = t;
00106                 }
00107 
00108                 void SetIntValue( int i, Value::NumberFactor f) {
00109                     intValue = i;
00110                     factor = f;
00111                 }
00112 
00113                 void SetRealValue( double r, Value::NumberFactor f ) {
00114                     realValue = r;
00115                     factor = f;
00116                 }
00117 
00118                 void SetBoolValue( bool b ) {
00119                     boolValue = b;
00120                 }
00121 
00122                 void SetStringValue( const std::string &str) {
00123                     strValue = str;
00124                 }
00125 
00126                 void SetAbsTimeValue( abstime_t asecs ) {
00127                     absolute_secs = asecs;
00128                 }
00129 
00130                 void SetRelTimeValue( double rsecs ) {
00131                     relative_secs = rsecs;
00132                 }
00133 
00134                 TokenType GetTokenType( ) {
00135                     return tt;
00136                 }
00137 
00138                 void GetIntValue( int& i, Value::NumberFactor& f) {
00139                     i = intValue;
00140                     f = factor;
00141                 }
00142 
00143                 void GetRealValue( double& r, Value::NumberFactor& f ) {
00144                     r = realValue;
00145                     f = factor;
00146                 }
00147 
00148                 void GetBoolValue( bool& b ) {
00149                     b = boolValue;
00150                 }
00151 
00152                 void GetStringValue( std::string &str ) {
00153                     str = strValue; 
00154                 }   
00155 
00156                 void GetAbsTimeValue( abstime_t& asecs ) {
00157                     asecs = absolute_secs;
00158                 }
00159 
00160                 void GetRelTimeValue( double& rsecs ) {
00161                     rsecs = relative_secs;
00162                 }
00163 
00164                 void CopyFrom( TokenValue &tv ) {
00165                     tt = tv.tt;
00166                     factor = tv.factor;
00167                     intValue = tv.intValue;
00168                     realValue = tv.realValue;
00169                     boolValue = tv.boolValue;
00170                     relative_secs = tv.relative_secs;
00171                     absolute_secs = tv.absolute_secs;
00172                     strValue = tv.strValue;
00173                 }
00174                     
00175             private:
00176                 TokenType           tt;
00177                 Value::NumberFactor factor;
00178                 int                 intValue;
00179                 double              realValue;
00180                 bool                boolValue;
00181                 std::string         strValue;
00182                 double              relative_secs;
00183                 abstime_t           absolute_secs;
00184         };
00185 
00186         // ctor/dtor
00187         Lexer ();
00188         ~Lexer ();
00189 
00190         // initialize methods
00191         bool Initialize(LexerSource *source);
00192         bool Reinitialize(void);
00193         
00194         bool WasInitialized(void);
00195 
00196         // cleanup function --- purges strings from string space
00197         void FinishedParse();
00198         
00199         // the 'extract token' functions
00200         TokenType PeekToken( TokenValue* = 0 );
00201         TokenType ConsumeToken( TokenValue* = 0 );
00202 
00203         // internal buffer for token accumulation
00204         std::string lexBuffer;                      // the buffer itselfw
00205 
00206         // miscellaneous functions
00207         static const char *strLexToken (int);       // string rep'n of token
00208 
00209         // set debug flag 
00210         void SetDebug( bool d ) { debug = d; }
00211 
00212     private:
00213             // grant access to FunctionCall --- for tokenize{Abs,Rel}Time fns
00214         friend class FunctionCall;
00215         friend class ClassAdXMLParser;
00216 
00217         // The copy constructor and assignment operator are defined
00218         // to be private so we don't have to write them, or worry about
00219         // them being inappropriately used. The day we want them, we can 
00220         // write them. 
00221         Lexer(const Lexer &)            { return;       }
00222         Lexer &operator=(const Lexer &) { return *this; }
00223 
00224         // internal state of lexical analyzer
00225         bool        initialized;
00226         TokenType   tokenType;                  // the integer id of the token
00227         LexerSource *lexSource;
00228         int         markedPos;                  // index of marked character
00229         char        savedChar;                  // stores character when cut
00230         int         ch;                         // the current character
00231         int         lexBufferCount;             // current offset in lexBuffer
00232         bool        inString;                   // lexing a string constant
00233         bool        accumulating;               // are we in a token?
00234         int         debug;                      // debug flag
00235 
00236         // cached last token
00237         TokenValue  yylval;                     // the token itself
00238         bool        tokenConsumed;              // has the token been consumed?
00239 
00240         // internal lexing functions
00241         void        wind(void);                 // consume character from source
00242         void        mark(void);                 // mark()s beginning of a token
00243         void        cut(void);                  // delimits token
00244 
00245         // to tokenize the various tokens
00246         int         tokenizeNumber (void);      // integer or real
00247         int         tokenizeAlphaHead (void);   // identifiers/reserved strings
00248         int         tokenizePunctOperator(void);// punctuation and operators
00249         int         tokenizeString(char delim);//string constants
00250 };
00251 
00252 END_NAMESPACE // classad
00253 
00254 #endif //__CLASSAD_LEXER_H__
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends