?? scanner.cpp
字號:
/**: scanner.cpp implementation file
&
* author: lonelyforest;
* data: 2006.03.16
*/
#include "scanner.h"
#include <cstdio>
// overload operator =
Token &Token::operator =(const Token &rh)
{
type = rh.type;
str = rh.str;
return *this;
}
/**: construction & destruction
&
* author: lonelyforest;
* data: 2006.03.16
*/
//-----------------------------------------------------------------------------
Scanner::Scanner(const string &filename):Tokenizer(filename)
{
m_pushed = false;
m_token.type = k_NONE;
build_key_map(); // initial key words map
}
// .........
Scanner::~Scanner()
{
}
// push current token
//-----------------------------------------------------------------------------
void Scanner::push()
{
m_pushed = true;
}
// initial keyword map
void Scanner::build_key_map()
{
string minic_keywords[] ={"int", "else",
"return", "void", "if","while", "read", "write"};
for (int i = 0; i < 8; ++i)
{
Token temp;
temp.type = tokenType(i);
temp.str = minic_keywords[i];
key_word.push_back( temp);
}
}
/**: getListFile
&
* create the trace log file...
&
* author: lonelyforest
* data: 2006.03.16
*/
//-----------------------------------------------------------------------------
bool Scanner::getListFile()
{
if (TraceSource)
{
int pos = source_name.rfind('.');
string listfile(source_name);
listfile.erase(pos, listfile.size()-1);
listfile += ".log";
ofstream listing(listfile.c_str(), ofstream::out);
if (!listing )
{
sprintf(msg_temp, "create source list file \"%s\" fail...",listfile.c_str());
outputMsg(-1,msg_temp );
return false;
}
else
{
for (std::vector< string >::iterator iter = list_msg_.begin();
iter != list_msg_.end();
++iter)
{
listing << *iter;
}
listing <<"\n---------------------------- Done. ----------------------------\n";
listing << " There has " << errCount()<< " error(s) and "
<< warnCount() << " warning(s)\n";
sprintf(msg_temp, "source list file has save to \"%s\"...",listfile.c_str());
outputMsg(-2, msg_temp); // -2 means source file list.
}
return true;
}
else
{
return false;
}
}
// look_up
// if found return the tokenType value,
// else return k_NONE;
//-----------------------------------------------------------------------------
tokenType Scanner::reservedLookup(const string& word)
{ // can use binary-search...maybe better
tokenType rev = k_ID;
for( std::vector<Token>::size_type i = 0;
i < key_word.size();
++i)
{
if ( key_word[i].str == word )
{
rev = key_word[i].type;
break;
}
}
return rev;
}
/*: s_state;
&
* 狀態機的各個狀態,nextToken() 的輔助
* 狀態。
*/
enum stateType {
s_START, s_INID, s_INNUM, s_INCOMMENT,
s_INASSIGN, s_INL, s_ING, s_DONE};
//-------------------------------------------------------------------------------
/**: nextToken
&
* primary interface ......
* return a Token from source file(list_of_source)
&
* author: lonelyforest
* data: 2006.03.16
*/
//-----------------------------------------------------------------------------
Token& Scanner::nextToken()
{
if (m_pushed )
{
m_pushed = false;
return m_token;
}
else
{
char t;
bool save = false;
int tokenStringIndex;
tokenStringIndex = 0;
stateType state = s_START;
m_token.str = "";
m_token.type = k_NONE;
while (state != s_DONE)
{
char c;
c = getNextChar();
save = false;
{
case s_START:
if (::isdigit(c)){
save= true;
state = s_INNUM;
}
else if (::isalpha(c)|| c == '_'){
save = true;
state = s_INID;
}
else if (c=='=')
state = s_INASSIGN;
else if ( ::isspace(c))//(c== ' ') || (c=='\t') || (c=='\n'))
state = s_START; /* blanks */
else if (c== '<')
state = s_INL;
else if (c== '>')
state = s_ING;
else
{
state = s_DONE;
switch (c)
{
case EOF:
m_token.type = k_EOF;
m_token.str = "EOF";
break;
case '+':
m_token.type = PLUS;
m_token.str = "+";
break;
case '-':
m_token.type = MINUS;
m_token.str = "-";
break;
case '*':
m_token.type = TIMES;
m_token.str = "*";
break;
case '%':
m_token.type = MOD;
m_token.str = "%";
break;
case '(':
m_token.type = LPARAN;
m_token.str = "(";
break;
case ')':
m_token.type = RPARAN;
m_token.str = ")";
break;
case '{':
m_token.type = LBRACE;
m_token.str = "{";
break;
case '}':
m_token.type = RBRACE;
m_token.str = "}";
break;
case ',':
m_token.type = COMMA;
m_token.str = ",";
break;
case ';':
m_token.type = SEMI;
m_token.str = ";";
break;
case '[':
m_token.type = LSQUARE;
m_token.str = "[";
break;
case ']':
m_token.type = RSQUARE;
m_token.str = "]";
break;
case '/': /* 判斷是否有注釋 */
t = getNextChar();
if ( t == '*') //C Style Comment,
{
state = s_INCOMMENT;
}
else if ( t == '/') // C++ Style Comment,
{
c = t;
while (c != '\n' && c != EOF)
{
c = getNextChar();
}
state = s_START;
}
else
{ /* not comment, */
m_token.type = DIV;
m_token.str = "/";
unGetNextChar();
}
break; // break case '/':
case '!':
t = getNextChar();
if ( t== '=' ) {
m_token.type = NEQ;
m_token.str = "!=";
}
else
{
m_token.type = k_ERROR;
m_token.str = "!";
unGetNextChar();
}
break;
default:
add_err();
m_token.type = k_ERROR;
m_token.str = c;
break;
} // end inside switch
}
break; // end case s_START
case s_INCOMMENT:
save = false;
t = getNextChar();
if ( (c!=EOF) && (t!=EOF))
{ /* 防止出現文件結束但是注釋尚未結束的情況 */
if ((c == '*') && (t=='/'))
{ // C Style Comment,
state = s_START;
}
else
{
unGetNextChar();
}
}
else
{ /* 出現文件結束但是注釋尚未結束 */
outputMsg(-1,"maybe comment end before code !");
add_err();
m_token.type = k_NONE;
m_token.str = "--> comment unexpected end before code !";
state = s_DONE;
}
break; // end state s_INCOMMENT
case s_INASSIGN:
state = s_DONE;
m_token.type = ASSIGN;
m_token.str = "=";
if (c== '=')
{
m_token.type = EQ;
m_token.str = "==";
}
else { unGetNextChar();}
break;
case s_INL: /* < or <= */
state = s_DONE;
m_token.type = LT;
m_token.str = "<";
if (c=='=')
{
m_token.type = NGT;
m_token.str = "<=";
}
else { unGetNextChar();}
break;
case s_ING: /* > or >= */
state = s_DONE;
m_token.type = GT;
m_token.str = ">";
if (c=='=')
m_token.type = NLT; /* >= */
else { unGetNextChar();}
break;
case s_INNUM: /* number, integer */
save = true;
if (!::isdigit(c))
{ /* backup int the input */
unGetNextChar();
save = false;
state = s_DONE;
m_token.type = k_NUM;
}
break;
case s_INID:
save = true;
if( !( ::isalpha(c) || ::isdigit(c) || c == '_'))
{ /*backup in the input */
unGetNextChar();
save = false;
state = s_DONE;
m_token.type = k_ID;
}
break;
case s_DONE: /* 除非機器或者系統故障,否則不會出現 */
default: /* should never happen */
sprintf(msg_temp, "Scanner Bug : State = %4d", state);
outputMsg(lineno(),msg_temp);
add_err();
m_token.type = k_ERROR;
m_token.str = msg_temp;
state = s_DONE;
break;
}
if (save && (tokenStringIndex < 43))
{
//tokenString[tokenStringIndex++] = c;
tokenStringIndex++;
m_token.str += c;
}
if (state == s_DONE)
{
//tokenString[tokenStringIndex] = '\0';
if (m_token.type == k_ID)
{
m_token.type = reservedLookup(m_token.str);
}
}
}
// trace compiler and trace scan
if (m_token.type == k_ERROR)
{
string msg = "unknow or unsuported symbol ----> \'";
msg += m_token.str+"\'";
outputMsg(lineno(), msg.c_str());
if (TraceSource )
{
sprintf(msg_temp, "\t%d: Error: unexpected or unsuported symbol--> '%s'\n", lineno(), m_token.str.c_str());
insert_list(msg_temp);
}
}
else if (TraceSource && (m_token.type != k_EOF) )
{
// 排版也很有藝術哦,
string outmsg;
sprintf(msg_temp, "\t%d: ", lineno());
outmsg = msg_temp;
switch (m_token.type)
{
case k_ID:
outmsg += "ID, name = ";
break;
case k_NUM:
outmsg += "NUM, val = ";
break;
case k_ELSE: case k_IF:
case k_WHILE: case k_READ:
case k_WRITE: case k_INT:
case k_RETURN: case k_VOID:
outmsg += "reserve word: ";
break;
case k_ERROR:
outmsg += m_token.str;
outmsg += ", Scanner Bug !";
case k_NONE:
outmsg += "Bug!";
break;
default:
break;
}
outmsg = outmsg + m_token.str + '\n';
Tokenizer::insert_list(outmsg.c_str());
}
return m_token;
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -