?? lexicalanalyzer.cpp
字號:
/*
ADT for lexicalAnalyzer,has a symbolTable in it.
*/
#include "stdafx.h"
using namespace std;
LexicalAnalyzer::LexicalAnalyzer()
{}
void LexicalAnalyzer::setFilename(string a)
{
filename=a;
input.open(filename.c_str());
cTable.initiate(filename);//initiate the symbolTable
line=0;//first line is 0
cLength=0;
noT=new symbolElement;
}
symbolElement* LexicalAnalyzer::yylex()
{
cLength=0;//the contents in lexemeArr become meaningless.
int state=-1;//state -1 means hasn't found the entry for current lexeme.
char tempC=input.get();//read a character.
inBuffer(buffer,tempC);//inseert a character into buffer.
state=findEntry(tempC);//find entry for current lexeme according to the first character.
while(!DFA(state,tempC))//If can be sure of the token for current lexeme go on looping.
{
if(!input.eof()&&cLength<64)
{
tempC=input.get();
inBuffer(buffer,tempC);
}
else if((input.eof()||cLength>=64)&&(state!=4&&state!=6&&state!=7&&state!=8&&state!=11&&state!=12
&&state!=15&&state!=17&&state!=19&&state!=20&&state!=21&&state!=22&&state!=22
&&state!=23&&state!=25&&state!=26&&state!=27&&state!=29&&state!=30&&state!=34
&&state!=35&&state!=41&&state!=43))
//if meets the end of the source file or the maximum length of an id was reached,
//but the current state for the analyzing lexeme is not an acceptable one.
{
cLength=0;//the string in the buffer is not available.
skip();//skip the rest of the current string that is read.
return 0;//Lexical error occurs.
}
else
{
break;
}
}
if(state==-2)
//If other error occurs such as undifined symbols was met.
{
cLength=0;//the string in the buffer is not available
return 0;//Lexical error occurs.
}
if(cLength>0&&state!=4&&state!=5&&state!=6&&state!=7&&state!=8)
//If the state for current lexeme is not whitespace or comments
{
string a="";
for(int l=0;l<cLength;l++)
{
a+=buffer[l];
}
//read the lexeme from buffer put it into a.
string tok;
switch(state)
{
//these states are acceptable ones.
case 11:
tok="div";break;
case 12:
if(a=="+")tok="plus";
if(a=="-")tok="minus";
if(a=="*")tok="mul";break;
case 14:
if(a=="==")tok="eq";
if(a=="<=")tok="le";
if(a==">=")tok="ge";break;
case 15:
if(a=="=")tok="as";
if(a=="<")tok="lt";
if(a==">")tok="gt";break;
case 17:
tok="ne";break;
case 19:
tok="rp";break;
case 20:
tok="lp";break;
case 21:
tok="lb";break;
case 22:
tok="smc";break;
case 23:
tok="rb";break;
case 25:case 26:case 27:
tok="id";break;
case 34:case 35:case 30:case 37:case 38:case 41:case 43:case 29:
tok="num";break;
default:tok="undefined";
}
//assign the token value for the current lexeme according to its current state.
curT=cTable.insert(a,tok);//insert the current token into symbolTable.
return curT;//return the pointer points to the newly inserted token in symbolTable.
}
else
//if the current token is comments or whitespace.
{
cLength=0;//the contents in lexemeArr become meaningless.
return noT;//noT is the special node for this.
}
}
void LexicalAnalyzer::skip()
/*
skip the current string until meets
a ' 'or '\n'.
increase the line by 1 if meets '\n'.
*/
{
char fs;
input.get(fs);
while(!input.eof()&&fs!=' '&&fs!='\n')
{
input.get(fs);
}
if(fs=='\n')
line++;
}
void LexicalAnalyzer::inBuffer(char buffer[],char a)
//Insert a into buffer.Increase cLength by 1.
{
buffer[cLength++]=a;
}
int LexicalAnalyzer::findEntry(char a)
//if can't find entry for a, set state -2,means undefined symbol was met.
{
switch (a)
{
case '\n':
case ' ':
case '\t':return 5;
case '/':case '+':case '-':case '*':case '!':case '<':case '>':case '=':
return 9;
case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'g':case 'h':case 'i':
case 'j':case 'k':case 'l':case 'm':case 'n':case 'o':case 'p':case 'q':case 'r':
case 's':case 't':case 'u':case 'v':case 'w':case 'x':case 'y':case 'z':
case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case 'G':case 'H':case 'I':
case 'J':case 'K':case 'L':case 'M':case 'N':case 'O':case 'P':case 'Q':case 'R':
case 'S':case 'T':case 'U':case 'V':case 'W':case 'X':case 'Y':case 'Z':
return 24;
case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':
case '9':
return 28;
case '(':case ')':case '{':case '}':case ';':
return 18;
default:
return -2;
}
}
bool LexicalAnalyzer::DFA(int &state, char tempC)
/*
according to the current state and tempC,find the next state.
*/
{
switch (state)
{
case -2:
goto error;
case 2:
if(tempC!='\n')
{
state=3;
return false;
}
else
{
state=4;
line++;
return true;
}
case 3:
if(tempC!='\n')
{
state=3;
return false;
}
else
{
state=4;
line++;
return true;
}
case 4:return true;
case 5:
if(tempC=='\n')
{
state=7;
line++;
return false;
}
else if(tempC==' ')
{
state=6;
return false;
}
else if(tempC=='\t')
{
state=8;
return false;
}
else
{
goto error;
}
case 6:
if(tempC==' ')
{
state=6;
return false;
}
else if(tempC=='\t')
{
state=8;
return false;
}
else if(tempC=='\n')
{
state=7;
line++;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 7:
if(tempC==' ')
{
state=6;
return false;
}
else if(tempC=='\n')
{
state=7;
line++;
return false;
}
else if(tempC=='\t')
{
state=8;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 8:
if(tempC==' ')
{
state=6;
return false;
}
else if(tempC=='\n')
{
state=7;
line++;
return false;
}
else if(tempC=='/t')
{
state=8;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 9:
if(tempC=='/')
{
state=10;
return false;
}
else if(tempC=='+'||tempC=='-'||tempC=='*')
{
state=12;
return true;
}
else if(tempC=='='||tempC=='<'||tempC=='>')
{
state=13;
return false;
}
else if(tempC=='!')
{
state=16;
return false;
}
else
{
goto error;
}
case 10:
{
if(tempC!='/')
{
state=11;
cLength--;
input.unget();
return true;
}
else
{
state=2;
return false;
}
}
case 13:
if(tempC=='=')
{
state=14;
return true;
}
else
{
state=15;
cLength--;
input.unget();
return true;
}
case 16:
if(tempC=='=')
{
state=17;
return true;
}
else
{
goto error;
}
case 18:
if(tempC=='(')
{
state=20;
return true;
}
else if(tempC==')')
{
state=19;
return true;
}
else if(tempC=='{')
{
state=21;
return true;
}
else if(tempC=='}')
{
state=23;
return true;
}
else if(tempC==';')
{
state=22;
return true;
}
else
{
goto error;
}
case 24:
if(isalpha(tempC))
{
state=25;
return false;
}
else
{
goto error;
}
case 25:
if(isalpha(tempC))
{
state=26;
return false;
}
else if(isdigit(tempC))
{
state=27;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 26:
if(isalpha(tempC))
{
state=26;
return false;
}
else if(isdigit(tempC))
{
state=27;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 27:
if(isalpha(tempC))
{
state=26;
return false;
}
else if(isdigit(tempC))
{
state=27;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 28:
if(isdigit(tempC))
{
state=29;
return false;
}
else
{
goto error;
}
case 29:
if(tempC=='E'||tempC=='e')
{
state=31;
return false;
}
else if(isdigit(tempC))
{
state=30;
return false;
}
else if(tempC=='.')
{
state=36;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 30:
if(isdigit(tempC))
{
state=30;
return false;
}
else if(tempC=='E'||tempC=='e')
{
state=31;
return false;
}
else if(tempC=='.')
{
state=36;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 31:
if(tempC=='+')
{
state=32;
return false;
}
else if(tempC=='-')
{
state=33;
return false;
}
else if(isdigit(tempC))
{
state=34;
return false;
}
else
{
goto error;
}
case 32:
if(isdigit(tempC))
{
state=34;
return false;
}
else
{
goto error;
}
case 33:
if(isdigit(tempC))
{
state=34;
return false;
}
else
{
goto error;
}
case 34:
if(isdigit(tempC))
{
state=35;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 35:
if(isdigit(tempC))
{
state=35;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 36:
if(isdigit(tempC))
{
state=37;
return false;
}
else
{
goto error;
}
case 37:
if(isdigit(tempC))
{
state=38;
return false;
}
else if(tempC=='E'||tempC=='e')
{
state=39;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 38:
if(isdigit(tempC))
{
state=38;
return false;
}
else if(tempC=='E'||tempC=='e')
{
state=39;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 39:
if(tempC=='+')
{
state=40;
return false;
}
else if(isdigit(tempC))
{
state=41;
return false;
}
else if(tempC=='-')
{
state=42;
return false;
}
else
{
goto error;
}
case 40:
if(isdigit(tempC))
{
state=41;
return false;
}
else
{
goto error;
}
case 41:
if(isdigit(tempC))
{
state=43;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
case 42:
if(isdigit(tempC))
{
state=41;
return false;
}
else
{
goto error;
}
case 43:
if(isdigit(tempC))
{
state=43;
return false;
}
else
{
cLength--;
input.unget();
return true;
}
}
error:
{
for(;cLength>0;cLength--)
{
input.unget();
}
//if error occurs,push back the character in buffer.
skip();
//skip the string.
state=-2;
//set state -2.
return true;
//be sure of an occuring error.
}
}
bool LexicalAnalyzer::isEnd()
{
input.get();
if(input.eof())
{
input.unget();
return true;
}
else
{
input.unget();
return false;
}
}
void LexicalAnalyzer::ungetT()
{
for(;cLength>0;cLength--)
{
input.unget();
}
}
LexicalAnalyzer::~LexicalAnalyzer()
{
delete noT;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -