?? tokens.cpp
字號:
/* TOKENS.CPP
*/
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#ifdef _MSC_VER
#define _EXPORTING_DLL
#endif
#include "../include/UC/tokens.h"
/// a useful function for extracting a substring
static char *copy_str(char *tok, char *start, char *end)
{
int sz = (long)end - (long)start;
strncpy(tok,start,sz);
tok[sz] = '\0';
return tok;
}
static long convert_int(char *buff, int base)
{
// *fix 1.2.1 Use 'strtoul' instead of 'strtol' to input ints larger than MAXINT
char endptr[10];
long val = strtoul(buff,(char **)&endptr,base);
return val;
}
// some machines may not have this!
// Written by Kent Irwin, irwin@leland.stanford.edu.
char *strrstr(const char *s1,const char *s2)
{
const char *sc2, *psc1, *ps1;
if (*s2 == '\0')
return (char *)s1;
ps1 = s1 + strlen(s1);
while(ps1 != s1) {
--ps1;
for (psc1 = ps1, sc2 = s2; ; )
if (*(psc1++) != *(sc2++))
break;
else if (*sc2 == '\0')
return (char*)ps1;
}
return NULL;
}
static char g_sbuff[STRSIZE];
static char g_obuff[10];
static char g_tbuff[STRSIZE];
//-----------------Tokenizer class------------------------
Tokenizer::Tokenizer(const char *fname)
{
m_inf = NULL;
m_file = NULL;
if(fname) open(fname);
reset();
m_flags = 0;
m_comment = NULL;
m_search_fn = (SearchFn)&strstr;
}
void Tokenizer::set_str(char *str)
{
m_start = m_P = m_start_P = str;
}
Tokenizer::~Tokenizer()
{
close();
}
void Tokenizer::set_flags(int flags)
{
m_flags = flags;
}
void Tokenizer::set_comment(const char *c)
{
m_comment = c;
}
bool Tokenizer::open(const char *fname)
{
close();
m_inf = fopen(fname,"r");
m_file = fname;
*m_buff = 0;
reset();
return m_inf != NULL;
}
void Tokenizer::close()
{
if (m_inf) {
fclose((FILE*)m_inf);
m_inf = NULL;
}
}
void Tokenizer::reset()
{
m_line = 0;
set_str(m_buff);
}
bool Tokenizer::fetch_line(bool skipws /*= true*/)
{
do {
if (! m_inf) return false;
fgets(m_buff,LINESIZE,(FILE*)m_inf);
if (feof((FILE*)m_inf)) return false;
if (m_flags & STRIP_LINEFEEDS) {
m_buff[strlen(m_buff)-1] = 0;
}
++m_line;
if (m_comment) {
char *pc = strrstr(m_buff,m_comment);
if (pc) *pc = 0;
}
set_str(m_buff);
if (skipws) skip_space();
} while (*m_P == 0);
return true;
}
char *Tokenizer::get_upto(char ch, bool discard_ch)
// Grab characters from the stream, upto (and optionally including) ch.
{
bool found_end;
m_start_P = m_P;
while(*m_P && *m_P != ch) m_P++;
found_end = *m_P == ch;
if (! discard_ch) m_P++;
m_end_P = m_P;
if (discard_ch) m_P++;
if (found_end) {
return get_str(g_tbuff);
}
else return NULL;
}
void Tokenizer::discard_line()
{
*m_P = 0;
skip_whitespace();
}
// note:
// getline() will always trim off a linefeed, even if not present
// called w/ null arguments, will merely skip to the next line.
char* Tokenizer::getline(char* buff /*= NULL*/, int sz /*= 0*/)
{
if (buff != NULL) {
strncpy(buff,m_P,sz);
int end = strlen(buff) - 1;
if (buff[end] == '\n') buff[end] = 0;
}
discard_line();
return buff;
}
char Tokenizer::getch()
{
if (*m_P == NULL) fetch_line(false);
return *m_P++;
}
// back to start of file
void Tokenizer::rewind()
{
if (! m_inf) {
open(m_file);
} else {
set_pos(0);
reset();
}
}
// file position
long Tokenizer::pos()
{ return ftell((FILE*)m_inf); }
void Tokenizer::set_pos(long p)
{ fseek((FILE*)m_inf,p,SEEK_SET); }
int Tokenizer::eof()
{ return feof((FILE*)m_inf); }
bool Tokenizer::skip_whitespace()
{
top:
skip_space();
if (*m_P == 0) {
if(!fetch_line()) return false; // EOF will pass through as T_END
goto top;
}
return true;
}
void Tokenizer::skip_space()
{
while(*m_P && isspace(*m_P)) m_P++;
}
void Tokenizer::skip_digits()
{
while(isdigit(*m_P)) m_P++;
}
TokenType Tokenizer::next()
{
if (! skip_whitespace()) return T_END; // means: finis, end of file, bail out.
char ch = *m_P;
int c_iden = m_flags & C_IDEN;
if (isalpha(ch) || ch == '_' && c_iden) { //--------------------- TOKENS --------------
m_start_P = m_P;
while (isalnum(*m_P) || *m_P == '_' && c_iden) m_P++;
m_end_P = m_P;
copy_str(g_tbuff,m_start_P,m_end_P);
return T_TOKEN;
} else
if (isdigit(ch) || (ch == '.' || ch == '-') && isdigit(*(m_P+1))) { //------- NUMBERS ------------------
int c_num = m_flags & C_NUMBER;
m_int_type = c_num ? T_INT : T_NUMBER;
TokenType ntype = m_int_type;
m_start_P = m_P;
if (*m_P != '.') {
if (*m_P == '0' && c_num) {
// actual verification of hex or octal constants must happen in lexer
if (*(m_P+1) == 'x') { // hex constant
while (isxdigit(*m_P)) m_P++; // a preliminary check!
ntype = m_int_type = T_HEX;
} else
if (isdigit(*(m_P+1))) { // octal constant
skip_digits();
ntype = m_int_type = T_OCT;
}
else skip_digits(); // plain zero!
} else {
m_P++; // skip first - might be '-'
skip_digits();
}
}
if (*m_P == '.') { // (opt) fractional part
m_P++;
skip_digits();
ntype = T_DOUBLE;
}
if (*m_P == 'e' || *m_P == 'E') { // (opt) exponent part
m_P++;
if (*m_P == '+' || *m_P == '-') m_P++; // (opt) exp sign
skip_digits();
ntype = T_DOUBLE;
}
m_end_P = m_P;
return ntype;
} else
if (ch == '\"' || ch == '\'') { //------------CHAR OR STRING CONSTANT-------
char ch, endch = *m_P++; char *p = g_sbuff;
int c_str = m_flags & C_STRING;
m_start_P = g_sbuff;
while (*m_P && *m_P != endch) {
if (*m_P == '\\' && c_str) {
m_P++;
switch(*m_P) {
case '\\': ch = '\\'; break;
case 'n': ch = '\n'; break;
case 'r': ch = '\r'; break;
case 't': ch = '\t'; break;
case 'b': ch = '\b'; break;
case '\"': ch = '\"'; break;
case '\'': ch = '\''; break;
case '0': { //..collecting OCTAL constant
char *start_oct = m_P;
skip_digits();
copy_str(g_obuff,start_oct,m_P);
ch = (char)convert_int(g_obuff,8);
m_P--; // leave us on last digit
} break;
default: *p++ = '\\'; ch = *m_P; break;
} // switch
*p++ = ch;
m_P++;
} else *p++ = *m_P++;
}
if (! *m_P) return T_END; //fatal_error(*this,"Unterminated string constant");
m_P++; // skip the endch
*p = '\0';
m_end_P = p;
return (endch == '\"' || ! c_str) ? T_STRING : T_CHAR;
} else { // this is to allow us to use get_str() for ALL token types
m_start_P = m_P;
m_P++;
m_end_P = m_P;
return (TokenType)ch;
}
}
int Tokenizer::peek_ahead(int count)
{
return *(m_P+count);
}
char *Tokenizer::get_string()
{
return g_sbuff;
}
char *Tokenizer::get_str(char *tok)
{
if (tok==NULL) tok = g_tbuff;
copy_str(tok,m_start_P,m_end_P);
return tok;
}
char *Tokenizer::get_token()
{ return g_tbuff; }
double Tokenizer::get_float()
{
char buff[20];
return atof(get_str(buff));
}
int Tokenizer::get_int()
{
char buff[20];
return convert_int(get_str(buff),m_int_type == T_INT ? 10 : 16);
}
double Tokenizer::next_float()
{
TokenType t;
do {
t = next();
if (t == T_NUMBER) return get_float();
} while (t != T_END);
return 0.0;
}
bool Tokenizer::next_is(const char* str)
{
return strncmp(m_P,str,strlen(str)) == 0;
}
bool Tokenizer::go_to_word(const char* str)
{
TokenType t = next();
while (t != T_END) {
if (t == T_TOKEN && strcmp(get_str(),str)==0) return true;
t = next();
}
return false;
}
bool Tokenizer::go_to(const char* str)
{
do {
char* p = m_search_fn(m_P,str);
if (p) {
m_start_P = p;
m_end_P = p + strlen(str);
m_P = m_end_P;
return true;
}
} while (fetch_line());
return false;
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -