我已編碼的詞法分析器,其分析通過一個文件,並只顯示令牌,詞位,並且如果它們的值/評估師/文字(整數,浮點或文字)詞法分析器用C++
我非常肯定,在分析作品的代碼,但我不能寫下displaytoken 它會顯示在下面的輸出方式:
令牌詞位價值/評估師/文字
NUMT 1234值
我有這個公司德示例:爲lexical.cpp文件
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
#include "lexical.h"
int value ; // integer
float valuer ; // float
char ch ; // character
string lexeme ; // string
SYMBOL Token ; // token of symbols
string Literal ; // string literals
string reswords[15] ; // reserved words
using namespace std;
//constructor
Lexical::Lexical()
{
}
//destructor
Lexical::~Lexical()
{
}
void Lexical::GetNextToken()
{
// getting the lexeme *******************
// **************************************
value = 0 ;
valuer = 0.0 ;
int i = 1 ; // line tracker
while(!(fin.eof()))
{
lexeme[i] = ch ;
fin.get(ch);
}
if(fin.peek() == '\n')
{
i++ ; // increment the line !
}
if(!fin.eof())
{
ProcessToken() ;
}
else
{
fin.close() ;
Token = eofilet ; // set to end of file
}
}
//rules not implemented yet
void Lexical::displayToken()
{
cout << "Token" << " " << "Lexeme" << " " << " value/valuer/Literal " << endl ;
cout << Token << " " << lexeme << " " << value ;
}
//initialize reserved words
void Lexical::InitResWords()
{
reswords[begint].copy("BEGIN",5,0);
reswords[programt].copy("PROGRAM",7,0);
reswords[constt].copy("CONST",5,0);
reswords[vart].copy("VAR",3,0);
reswords[proceduret].copy("PROCEDURE",9,0);
reswords[ift].copy("IF",2,0);
reswords[whilet].copy("WHILE",5,0);
reswords[thent].copy("THEN",4,0);
reswords[elset].copy("ELSE",4,0);
reswords[realt].copy("REAL",4,0);
reswords[integert].copy("INTEGER",7,0);
reswords[booleant].copy("BOOLEAN",7,0);
reswords[chart].copy("CHAR",4,0);
reswords[arrayt].copy("ARRAY",5,0);
reswords[endt].copy("END",3,0);
}
void Lexical::ProcessToken()
{
lexeme.at(0) = ch ; // 1 character at a time
fin.get(ch) ;
if((lexeme.at(0) >= 'A' && lexeme.at(0) <= 'Z') || (lexeme.at(0) >= 'a' && lexeme.at(0) <= 'z')) // if alphabets
{
int counter = 0 ;
//match word token
if(!isdigit(lexeme.at(0)) && !isalpha(lexeme.at(0)) && lexeme.at(0) != '_')
{
//*********** Working with reserved words !!! **************************************************
int j = 0 ;
bool flag = false ;
while(j < endt)
{
if(lexeme.compare(reswords[j]) == 0)
{
Token = (SYMBOL)j;
displayToken();
flag = true ;
}
}
//**********************************************************************************************
// if not a token , then we are alrea
Token = idt ;// then an identifier token
displayToken();
return ;
}// if ends
lexeme.at(counter) = ch ;// keep proceeding
fin.get(ch) ;
}
else if (lexeme.at(0) >= '0' && lexeme.at(0) <= '9') // if numbers
{
NumToken() ;
}
else if (lexeme.at(0) == '\"') // for string literal
{
ProcessLiteralToken();
}
else if (lexeme.at(0) == '/') // entering comment section
{
if (ch == '/' || ch == '*')
{
// MatchComment();
if (ch == '/') // start of a comment maybe ?
{
//Line comment
while(ch != '\n')
fin.get(ch);
}
else if(ch == '*') // end of a comment ?
{
while(true)
{
fin.get(ch);
if (ch == '*')
{
char peek_value = fin.peek();
if (peek_value == '/')
{
fin.get(ch);
// fin.get(ch);
return;
}
else
continue;
}
} // while ends
}
else
{
cout << "ERROR !!!" ;
}
GetNextToken();
} // comment analyzer then moves to next token !
else
{
OpToken();
}
}
else if ((lexeme.at(0) == '<') || (lexeme.at(0) == '>') || (lexeme.at(0) == '='))
{
if (ch == '=')
{
lexeme.at(1) = ch;
Token = relop ;
fin.get(ch);
}
else
OpToken() ; // process the final token
}
else if ((lexeme.at(0)) == ':')
{
if (ch == '=')
{
lexeme.at(1) = ch;
Token = relop ;
fin.get(ch);
}
}
else
OpToken();
}
void Lexical::OpToken()
{
//Need to detect +, -, ||, *, /, &&, =,(), {}, comma, semicolon, period, quotation("), and []
if(lexeme.at(0) == '+' || lexeme.at(0) == '-' || lexeme.at(0) == 'OR')
{
Token = addop ;
return;
}
else if(lexeme.at(0) == '*' || lexeme.at(0) == '/' || lexeme.at(0) == 'DIV' || lexeme.at(0) == 'MOD' || lexeme.at(0) == 'AND')
{
Token = mulop ;
return;
}
else if(lexeme.at(0) == '<' || lexeme.at(0) == '>' || lexeme.at(0) == '=')
{
Token = relop ;
return;
}
else if(lexeme.at(0) == '(')
{
Token = lparen;
return;
}
else if(lexeme.at(0) == ')')
{
Token = rparen;
return;
}
else if(lexeme.at(0) == '{')
{
Token = clbrat;
return;
}
else if(lexeme.at(0) == '}')
{
Token = crbrat;
return;
}
else if(lexeme.at(0) == ',')
{
Token = comma;
return;
}
else if(lexeme.at(0) == ';')
{
Token = semicolon ;
return;
}
else if(lexeme.at(0) == '.')
{
Token = period ;
return;
}
else if(lexeme.at(0) == '~')
{
Token = tildat;
return;
}
else if(lexeme.at(0) == '[')
{
Token = lbrat;
return;
}
else if(lexeme.at(0) == ']')
{
Token = rbrat;
return;
}
else if(lexeme.at(0) == '#')
{
Token = nott ;
return;
}
else
{
Token = unknownt ;
cout << "Error !!" ;
exit(0);
}
}
void Lexical::NumToken()
{
int i = 0;
while (isdigit(ch))
{
//Update Lexeme and keep going
lexeme.at(i++) = ch;
fin.get(ch);
}
if (ch == '.') // checking for float
{
//if ch is a period, then we might be dealing with a float. We need to ensure that the next character is a digit
lexeme.at(i++) = ch;
fin.get(ch);
if (isdigit(ch))
{
//ch is a digit, so we are good to go.
while (isdigit(ch))
{
lexeme.at(i++) = ch;
fin.get(ch);
}
//If we are here, then we have a float and we have just encountered a new token
Token = numt;
valuer = atof(lexeme.c_str());
return;
}
//If we are here, then we have a period but no digit after it--an error
Token = unknownt ;
cout << "Error." << endl;
exit(0);
}
else {
//If we are here, then it means that the next char is not a period.... so we have a NUMT int token
Token = numt;
value = atoi(lexeme.c_str());
return;
}
}
void Lexical::ProcessLiteralToken()
{
int i = 0;
while (ch != '\"')
{
if (ch == '\n')
{
Token = unknownt ;
cout << "Error!" << endl ;
exit(0);
}
Literal.at(i++) = ch ; // advance !
fin.get(ch);
}
//Literal[i++] = ch;
Token = literalt;
fin.get(ch);
return;
}
我lexical.h(頭文件)是這樣的:
#ifndef _LEXICAL_H
#define _LEXICAL_H
#include <iostream>
#include <fstream>
using namespace std ;
// enumerated data type
enum SYMBOL
{
begint,programt, constt, vart, proceduret, ift, whilet, thent, elset, realt, integert, booleant, chart, arrayt, endt, divt, modt, andt, nott, ort, addop, mulop, assignop, lparen, rparen, comma, semicolon, period, numt, idt, literalt, unknownt, eofilet, relop, clbrat, crbrat, tildat, lbrat, rbrat ,colon
};
//extern int size = 15 ;
extern int value ; // integer
extern float valuer ; // float
extern char ch ; // character
extern string lexeme ; // string
extern SYMBOL Token ; // token of symbols
extern string Literal ; // string literals
extern string reswords[15] ; // string array
class Lexical {
public :
Lexical(); // constructor
~Lexical() ; // destructor
//GetNextToken function
void GetNextToken() ;
//displayToken function
void displayToken() ;
private :
//initialize reserved words
void InitResWords() ;
//ProcessToken
void ProcessToken() ;
// operator tokens
void OpToken() ;
//NumToken
void NumToken() ;
//Process string literals
void ProcessLiteralToken() ;
ifstream fin ; // file
} ;
#endif // !_lexical_H
的代碼,如果有人想通過它,任何功能給。
我只是不知道如何正確顯示我的令牌! (displayToken) 有人可以幫助我使用該函數,因此對於每個令牌解析,它顯示的格式如下: 令牌----- Lexeme -------值/評估者/文字 numt ----- 1234 ------值
我只想寫下顯示令牌功能。在處理每個令牌後,您將如何顯示詞法,令牌以及它們是價值/估價師還是文字?
這是我的驅動程序文件
#include "lexical.h"
#include <iostream>
#include <fstream>
using namespace std;
int main()
{
cout << "creating the constructor" << endl << endl ;
Lexical myLex ;
ifstream fin;
fin.open("test.txt") ;
while (Token != eofilet)
{
myLex.GetNextToken();
myLex.displayToken();
}
cout << endl << "success" << endl ;
fin.close();
//system("pause");
return 0;
}
你有什麼問題?你有什麼嘗試? –
您是否嘗試過調試它? – neagoegab
我無法弄清楚在哪裏發送我的顯示令牌功能來獲得我想要的輸出! @sftrabbit – thestralFeather7