2013-02-07 44 views
0

我已編碼的詞法分析器,其分析通過一個文件,並只顯示令牌,詞位,並且如果它們的值/評估師/文字(整數,浮點或文字)詞法分析器用C++

我非常肯定,在分析作品的代碼,但我不能寫下displaytoken 它會顯示在下面的輸出方式:

令牌詞位價值/評估師/文字

NUMT 1234值

我有這個公司德示例:爲lexical.cpp文件

#include <cstdlib> 
#include <iostream> 
#include <fstream> 
#include <string> 
#include <cstring> 

#include "lexical.h" 


int value ; // integer 
float valuer ; // float 
char ch ; // character 
string lexeme ; // string 
SYMBOL Token ; // token of symbols 
string Literal ; // string literals 

string reswords[15] ; // reserved words 

using namespace std; 


//constructor 

Lexical::Lexical() 
{ 

} 

//destructor 

Lexical::~Lexical() 
{ 





} 

void Lexical::GetNextToken() 
{ 
      // getting the lexeme ******************* 
      // ************************************** 

      value = 0 ; 
      valuer = 0.0 ; 

      int i = 1 ; // line tracker 

      while(!(fin.eof())) 
      { 
       lexeme[i] = ch ; 
       fin.get(ch); 
      } 

      if(fin.peek() == '\n') 
      { 
       i++ ; // increment the line ! 
      } 
      if(!fin.eof()) 
      { 
       ProcessToken() ; 
      } 
      else 
      { 
       fin.close() ; 
       Token = eofilet ; // set to end of file 
      } 





    } 







//rules not implemented yet 
void Lexical::displayToken() 
{ 
    cout << "Token" << "   " << "Lexeme" << "   " << " value/valuer/Literal " << endl ; 

    cout << Token << "   " << lexeme << "   " << value ; 

} 



//initialize reserved words 
void Lexical::InitResWords() 
{ 
    reswords[begint].copy("BEGIN",5,0); 
    reswords[programt].copy("PROGRAM",7,0); 
    reswords[constt].copy("CONST",5,0); 
    reswords[vart].copy("VAR",3,0); 
    reswords[proceduret].copy("PROCEDURE",9,0); 
    reswords[ift].copy("IF",2,0); 
    reswords[whilet].copy("WHILE",5,0); 
    reswords[thent].copy("THEN",4,0); 
    reswords[elset].copy("ELSE",4,0); 
    reswords[realt].copy("REAL",4,0); 
    reswords[integert].copy("INTEGER",7,0); 
    reswords[booleant].copy("BOOLEAN",7,0); 
    reswords[chart].copy("CHAR",4,0); 
    reswords[arrayt].copy("ARRAY",5,0); 
    reswords[endt].copy("END",3,0); 
} 

void Lexical::ProcessToken() 
{ 
    lexeme.at(0) = ch ; // 1 character at a time 
    fin.get(ch) ; 

    if((lexeme.at(0) >= 'A' && lexeme.at(0) <= 'Z') || (lexeme.at(0) >= 'a' && lexeme.at(0) <= 'z')) // if alphabets 
    { 
     int counter = 0 ; 
     //match word token 
      if(!isdigit(lexeme.at(0)) && !isalpha(lexeme.at(0)) && lexeme.at(0) != '_') 
      { 
      //*********** Working with reserved words !!! ************************************************** 
         int j = 0 ; 
         bool flag = false ; 
         while(j < endt) 
          { 
           if(lexeme.compare(reswords[j]) == 0) 
           { 
            Token = (SYMBOL)j; 
            displayToken(); 
            flag = true ; 
           } 
          }  
      //********************************************************************************************** 

      // if not a token , then we are alrea 
         Token = idt ;// then an identifier token 
         displayToken(); 
         return ; 
      }// if ends 

      lexeme.at(counter) = ch ;// keep proceeding 
      fin.get(ch) ; 
    } 
    else if (lexeme.at(0) >= '0' && lexeme.at(0) <= '9') // if numbers 
    { 
     NumToken() ; 
    } 
    else if (lexeme.at(0) == '\"') // for string literal 
    { 
     ProcessLiteralToken(); 
    } 
    else if (lexeme.at(0) == '/') // entering comment section 
     { 
       if (ch == '/' || ch == '*') 
       { 
         // MatchComment(); 
         if (ch == '/') // start of a comment maybe ? 
          { 
          //Line comment 
           while(ch != '\n') 
           fin.get(ch); 
          } 
         else if(ch == '*') // end of a comment ? 
          { 
            while(true) 
           { 
            fin.get(ch); 
            if (ch == '*') 
            { 
             char peek_value = fin.peek(); 
             if (peek_value == '/') 
              { 
               fin.get(ch); 
              // fin.get(ch); 
               return; 
              } 
             else 
              continue; 
            } 
           } // while ends 
          } 
         else 
          { 
          cout << "ERROR !!!" ; 
          } 

         GetNextToken(); 
         } // comment analyzer then moves to next token ! 
       else 
       { 
        OpToken(); 
       } 
     } 
    else if ((lexeme.at(0) == '<') || (lexeme.at(0) == '>') || (lexeme.at(0) == '=')) 
     { 
     if (ch == '=') 
      { 
      lexeme.at(1) = ch; 
      Token = relop ; 
      fin.get(ch); 
      } 
     else 
      OpToken() ; // process the final token 
     } 
    else if ((lexeme.at(0)) == ':') 
     { 
      if (ch == '=') 
      { 
      lexeme.at(1) = ch; 
      Token = relop ; 
      fin.get(ch); 
      } 
    } 
    else 
    OpToken(); 
} 
void Lexical::OpToken() 
{ 
    //Need to detect +, -, ||, *, /, &&, =,(), {}, comma, semicolon, period, quotation("), and [] 
if(lexeme.at(0) == '+' || lexeme.at(0) == '-' || lexeme.at(0) == 'OR') 
     { 
      Token = addop ; 
      return; 
     } 
else if(lexeme.at(0) == '*' || lexeme.at(0) == '/' || lexeme.at(0) == 'DIV' || lexeme.at(0) == 'MOD' || lexeme.at(0) == 'AND') 
    { 
     Token = mulop ; 
     return; 
    } 
else if(lexeme.at(0) == '<' || lexeme.at(0) == '>' || lexeme.at(0) == '=') 
    { 
     Token = relop ; 
     return; 
    } 
else if(lexeme.at(0) == '(') 
    { 
     Token = lparen; 
     return; 
    } 
else if(lexeme.at(0) == ')') 
    { 
     Token = rparen; 
     return; 
    } 
else if(lexeme.at(0) == '{') 
    { 
     Token = clbrat; 
     return; 
    } 
else if(lexeme.at(0) == '}') 
    { 
     Token = crbrat; 
     return; 
    } 
else if(lexeme.at(0) == ',') 
    { 
     Token = comma; 
     return; 
    } 
else if(lexeme.at(0) == ';') 
    { 
     Token = semicolon ; 
     return; 
    } 
else if(lexeme.at(0) == '.') 
    { 
     Token = period ; 
     return; 
    } 
else if(lexeme.at(0) == '~') 
    { 
     Token = tildat; 
     return; 
    } 
else if(lexeme.at(0) == '[') 
    { 
     Token = lbrat; 
     return; 
    } 
else if(lexeme.at(0) == ']') 
    { 
     Token = rbrat; 
     return; 
    } 
else if(lexeme.at(0) == '#') 
    { 
     Token = nott ; 
     return; 
    } 

else 
    { 
    Token = unknownt ; 
    cout << "Error !!" ; 
    exit(0); 
    } 
} 

void Lexical::NumToken() 
{ 
int i = 0; 
    while (isdigit(ch)) 
    { 
    //Update Lexeme and keep going 
    lexeme.at(i++) = ch; 
    fin.get(ch); 
    } 

    if (ch == '.') // checking for float 
    { 
    //if ch is a period, then we might be dealing with a float. We need to ensure that the next character is a digit 
    lexeme.at(i++) = ch; 
    fin.get(ch); 
    if (isdigit(ch)) 
    { 
     //ch is a digit, so we are good to go. 
     while (isdigit(ch)) 
     { 
     lexeme.at(i++) = ch; 
     fin.get(ch); 
     } 
     //If we are here, then we have a float and we have just encountered a new token 
     Token = numt; 
     valuer = atof(lexeme.c_str()); 
     return; 
    } 
    //If we are here, then we have a period but no digit after it--an error 
    Token = unknownt ; 
    cout << "Error." << endl; 
    exit(0); 
    } 
    else { 
    //If we are here, then it means that the next char is not a period.... so we have a NUMT int token 
    Token = numt; 
    value = atoi(lexeme.c_str()); 
    return; 
    } 

} 

void Lexical::ProcessLiteralToken() 
{ 

    int i = 0; 
    while (ch != '\"') 
    { 
    if (ch == '\n') 
    { 
     Token = unknownt ; 
     cout << "Error!" << endl ; 
     exit(0); 
    } 
    Literal.at(i++) = ch ; // advance ! 
    fin.get(ch); 
    } 
    //Literal[i++] = ch; 
    Token = literalt; 
    fin.get(ch); 
    return; 




} 

我lexical.h(頭文件)是這樣的:

#ifndef _LEXICAL_H 
#define _LEXICAL_H 

#include <iostream> 
#include <fstream> 


using namespace std ; 


      // enumerated data type 
      enum SYMBOL 
      { 
       begint,programt, constt, vart, proceduret, ift, whilet, thent, elset, realt, integert, booleant, chart, arrayt, endt, divt, modt, andt, nott, ort, addop, mulop, assignop, lparen, rparen, comma, semicolon, period, numt, idt, literalt, unknownt, eofilet, relop, clbrat, crbrat, tildat, lbrat, rbrat ,colon 
      }; 

      //extern int size = 15 ; 
      extern int value ; // integer 
      extern float valuer ; // float 
      extern char ch ; // character 
      extern string lexeme ; // string 
      extern SYMBOL Token ; // token of symbols 
      extern string Literal ; // string literals 
      extern string reswords[15] ; // string array 

class Lexical { 

     public : 

      Lexical(); // constructor 

      ~Lexical() ; // destructor 

      //GetNextToken function 
      void GetNextToken() ; 

      //displayToken function 
      void displayToken() ; 




     private : 

      //initialize reserved words 
      void InitResWords() ; 

      //ProcessToken 
      void ProcessToken() ; 

      // operator tokens 
      void OpToken() ; 

      //NumToken 
      void NumToken() ; 

      //Process string literals 
      void ProcessLiteralToken() ; 

      ifstream fin ; // file 

} ; 


#endif // !_lexical_H 

的代碼,如果有人想通過它,任何功能給。

我只是不知道如何正確顯示我的令牌! (displayToken) 有人可以幫助我使用該函數,因此對於每個令牌解析,它顯示的格式如下: 令牌----- Lexeme -------值/評估者/文字 numt ----- 1234 ------值

我只想寫下顯示令牌功能。在處理每個令牌後,您將如何顯示詞法,令牌以及它們是價值/估價師還是文字?

這是我的驅動程序文件

#include "lexical.h" 

#include <iostream> 
#include <fstream> 

using namespace std; 

int main() 
{ 
    cout << "creating the constructor" << endl << endl ; 
    Lexical myLex ; 
    ifstream fin; 

    fin.open("test.txt") ; 

    while (Token != eofilet) 
    { 
     myLex.GetNextToken(); 
     myLex.displayToken(); 
    } 

    cout << endl << "success" << endl ; 
    fin.close(); 
    //system("pause"); 
    return 0; 

} 
+2

你有什麼問題?你有什麼嘗試? –

+0

您是否嘗試過調試它? – neagoegab

+0

我無法弄清楚在哪裏發送我的顯示令牌功能來獲得我想要的輸出! @sftrabbit – thestralFeather7

回答

2

你偷看的「\ n」,我看不出你消耗該字符。

實現一個簡單的詞法分析器/分析器的最簡單方法是在沃思一個Pascal例如如何做(不記得了,不好意思):

  • 你總是有一個字先行,你讀一個字符作爲開始時的前瞻(在您的構造函數中)
  • 每次開始處理令牌時,都會查看預測字符,並根據該字符決定取決於的字符(例如,數字 - >數字,字母 - >(暫時)標識符,...)。要跳過空格,只需讀取下一個字符,直到它不是''(或'\ t'),然後在'\ n'上對其進行計數的行數並讀取下一個字符。
  • 無論何時你需要一個角色,你都會消耗前瞻,並獲得下一個。

解析器(遞歸下降)反過來對先行令牌有一個變量,並沿着類似的線路工作。

這種方式你不需要匆匆看一眼,並且在閱讀字符時感到困惑。