?? lexer.c
字號:
/* lexer.c
*
* (C) Copyright Apr 15 1995, Edmond J. Breen.
* ALL RIGHTS RESERVED.
* This code may be copied for personal, non-profit use only.
*
*
* History:
* First written V1.0 1984 (E.J.B)
* Revised V1.2 1994 (E.J.B)
* Revised V2.0 1995 (E.J.B)
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include "global.h"
#include "xalloc.h"
#include "error.h"
#include "symbol.h"
#include "lexer.h"
#include "preproc.h"
static void retract(char c);
static int fail(int ival, int c);
static int charliteral(int c);
static void EiC_stringliteral(void);
static void success(int ival);
char nextchar(void);
char *nextproline(void);
void retract(char c);
int fail(int ival, int c);
int charliteral(int c);
void stringliteral(void);
void success(int ival);
#define NEWLINE(ch) (ch == '\n' ? TRUE: FALSE)
#define LETTER(ch) ((ch >= 'a' && ch <= 'z') ? TRUE :\
((ch >= 'A' && ch <= 'Z') ? TRUE :\
((ch == '_' || ch == '$')? TRUE : FALSE)))
#define DIGIT(ch) ((ch >= '0' && ch <= '9') ? TRUE : FALSE)
#define WHITE(ch) ((ch == ' ' || ch == '\t')? TRUE : FALSE)
keyword_t cwords[] =
{
{"__eiclongjmp",eiclongjmpsym,},
{"__eicsetjmp",eicsetjmpsym,},
{"auto", autosym,},
{"break", breaksym,},
{"case", casesym,},
{"char", charsym,},
{"const", constsym,},
{"continue", continuesym,},
{"default", defaultsym,},
{"do", dosym,},
{"double", doublesym,},
{"else", elsesym,},
{"enum", enumsym,},
{"extern", externsym,},
{"float", floatsym,},
{"for", forsym,},
{"goto",gotosym,},
{"if", ifsym,},
{"int", intsym,},
{"long", longsym,},
{"register", registersym,},
{"return", returnsym,},
{"safe", safesym,},
{"short", shortsym,},
{"signed", signedsym,},
{"sizeof", sizeofsym,},
{"static", staticsym,},
{"struct", structsym,},
{"switch", switchsym,},
{"typedef", typedefsym,},
{"union", unionsym,},
{"unsafe", unsafesym,},
{"unsigned", unsignedsym,},
{"void", voidsym,},
{"volatile", volatilesym,},
{"while", whilesym,},
};
#define NOTOKEN 0
char EiC_LEXEM[BSIZE+1];
static token_t TOK;
token_t *token = &TOK;
#ifdef ILOOKAHEAD
token_t EiC_TokenArray[MAX_TOKENS];
int EiC_TokenI = 0, EiC_TokenP = 0, EiC_TokenR=0;
#else
unsigned short STOKEN;
#endif
unsigned lex_lineno, lex_lineprev, lex_linepos, lex_linelen, lex_curpos = 0,
lex_lastpos;
char *lex_buff;
static int state = 0;
static unsigned long lexival;
static double lexfval;
static char Lseen, /* long seen */
Fseen, /* floating point seen */
Useen, /* Unsigned specifier seen */
Hseen; /* Hex or Octal value seen */
#define getoct(x) (((x)>='0'&&(x)<='7')? (x)-'0':-1)
static int gethex(int c)
{
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
return -1;
}
void EiC_initlex(char *str)
{
lex_buff = str;
lex_curpos = 0;
lex_lineprev = 0;
lex_lineno = 1;
#ifdef ILOOKAHEAD
EiC_TokenR = EiC_TokenP = EiC_TokenI = 0;
#else
STOKEN = NOTOKEN;
#endif
}
char EiC_nextchar()
{
char *EiC_nextproline();
if (lex_buff[lex_curpos] == EOF) {
return 0;
}
else if (lex_buff[lex_curpos] == '\0') {
if (lex_curpos != lex_lastpos)
return 0;
lex_buff = EiC_nextproline();
lex_lastpos = lex_curpos = 0;
lex_lineprev = 0;
lex_lineno = 1;
}
if (lex_lineprev != lex_lineno) {
lex_linepos = lex_curpos;
lex_lineprev = lex_lineno;
}
return (lex_buff[lex_curpos++]);
}
static void checkExt(int c)
{ /* check for unsigned and long suffix */
Lseen = Useen = Fseen = 0;
if (c == 'f' || c == 'F')
Fseen = 1;
else if (c == 'u' || c == 'U') {
Useen = 1;
if ((c = EiC_nextchar()) == 'l' || c == 'L')
Lseen = 1;
else
retract(c);
} else if (c == 'l' || c == 'L')
Lseen = 1;
else
retract(c);
}
static void retract(char c)
{
if (c != '\0') {
lex_curpos--;
if (lex_curpos < lex_lastpos)
lex_lastpos = lex_curpos;
}
}
static int fail(int ival, int c)
{
retract(c);
switch (ival) {
case RELOP: return (10);
case ID: return (20);
case FLOAT:
case INT: return (100);
}
return 0;
}
static void setfloatval(void)
{
if (Lseen || lexfval > FLT_MAX || !Fseen ) {
token->Val.dval = lexfval;
token->Tok = DOUBLE;
} else {
/* mandatory conversion to float */
float f = lexfval;
token->Val.dval = f;
token->Tok = FLOAT;
}
}
static void setintval(void)
{
if (Fseen) {
lexfval = lexival;
setfloatval();
return;
}
if (Useen) {
if (Lseen || lexival > UINT_MAX) {
token->Tok = ULONG;
token->Val.ulval = lexival;
} else {
token->Tok = UINT;
token->Val.uival = (unsigned) lexival;
}
} else if (Lseen || lexival > UINT_MAX) {
if (lexival > ULONG_MAX) {
token->Tok = ULONG;
token->Val.ulval = lexival;
} else if (lexival >= ULONG_MAX) {
token->Tok = ULONG;
token->Val.ulval = lexival;
} else {
token->Val.lval = lexival;
token->Tok = LONG;
}
} else {
if (lexival <= INT_MAX) {
token->Val.ival = (int) lexival;
token->Tok = INT;
} else if(Hseen && lexival <= UINT_MAX) {
token->Tok = UINT;
token->Val.uival = (unsigned) lexival;
} else if(lexival <= LONG_MAX) {
token->Val.lval = (long) lexival;
token->Tok = LONG;
} else {
token->Tok = ULONG;
token->Val.ulval = lexival;
}
}
}
static void success(int ival)
{
int i, size;
size = (int) (lex_curpos - lex_lastpos);
memcpy(EiC_LEXEM, &lex_buff[lex_lastpos], size);
EiC_LEXEM[size] = '\0';
if (Lseen) size--;
if (Useen) size--;
if (Fseen) size--;
Hseen = 0;
switch (ival) {
case ID:
if ((token->Tok = EiC_iskeyword(cwords, EiC_LEXEM,
sizeof(cwords) / sizeof(keyword_t))) == 0) {
token->Tok = ID;
/* search for id in various name spaces */
if ((token->Val.sym = EiC_lookup(EiC_work_tab, EiC_LEXEM)) == NULL)
token->Val.sym = EiC_insertLUT(EiC_work_tab, EiC_LEXEM, ID);
if (token->Val.sym)
if (token->Val.sym->sclass == c_typedef)
token->Tok = TYPENAME;
}
break;
case OCTAL:
if (Fseen)
EiC_error("Declaration syntax error");
for (lexival = 0, i = 0; i < size; i++)
lexival = lexival * 8 + getoct(EiC_LEXEM[i]);
Hseen = 1;
setintval();
break;
case HEX:
for (lexival = 0, i = 2; i < size; i++)
lexival = lexival * 16 + gethex(EiC_LEXEM[i]);
Hseen = 1;
setintval();
break;
case INT:
for (lexival = 0, i = 0; i < size; i++)
lexival = lexival * 10 + EiC_LEXEM[i] - '0';
setintval();
break;
case FLOAT:
if (Useen)
EiC_error("Declaration syntax error");
lexfval = atof(EiC_LEXEM);
setfloatval();
break;
case RELOP:
case MISC:
break;
}
}
static int WASLITERAL;
static int charliteral(int c)
{
if (c == '\\') {
switch ((c = EiC_nextchar())) {
case 'n': c = '\n'; break; /* newline */
case 't': c = '\t'; break; /* tabspace */
case 'v': c = '\v'; break; /* vertical tab */
case 'b': c = '\b'; break; /* backspace */
case 'r': c = '\r'; break; /* carriage return */
case 'f': c = '\f'; break; /* formfeed */
case 'a': c = '\a'; break; /* bell */
case '\\': c = '\\'; break; /* backslash */
case '\'': c = '\''; break; /* single quote */
case '"': c = '\"'; break; /* double quote */
case '?': c = '\?'; break; /* question mark */
case 'x': /* string of hex characters */
case 'X':{
int i, val = 0;
while ((i = gethex((c = EiC_nextchar()))) > -1) {
val = val * 16 + i;
}
retract(c);
if (val > 255)
EiC_error("Illegal character hex value");
c = val;
}
break;
default:
if (getoct(c) > -1) { /* octal characters */
int i, val = 0;
while ((i = getoct(c)) > -1) {
val = val * 8 + i;
c = EiC_nextchar();
}
retract(c);
if (val > 255)
EiC_error("Illegal character octal value");
c = val;
} else
EiC_error("Illegal character escape sequence `\\%c'", c);
break;
}
WASLITERAL = 1;
} else
WASLITERAL = 0;
return ((signed char )c);
}
static void EiC_stringliteral(void)
{
unsigned size, lastsize = 0, c;
char *p=NULL;
lex_lastpos = lex_curpos;
do {
for (size = 0; ((c = charliteral(EiC_nextchar())) != '\0' || WASLITERAL) &&
!(c == '"' && !WASLITERAL) && size < BSIZE; size++)
EiC_LEXEM[size] = c;
if (lastsize)
p = (char *) xrealloc(p, lastsize + size + 1);
else
p = (char *) xcalloc(size + 1, sizeof(char));
memcpy(&p[lastsize], EiC_LEXEM, size);
lastsize += size;
if(c != '"' && size == BSIZE) {
p[lastsize++] = c;
continue;
}
if (c != '"')
EiC_error("String literal error");
do {
c = EiC_nextchar();
if (c == '\n')
lex_lastpos++, lex_lineno++;
} while (WHITE(c) || c == '\n');
lex_lastpos = lex_curpos;
if (!c)
do
c = EiC_nextchar();
while (WHITE(c));
} while (c == '"' || size == BSIZE);
retract(c);
p[lastsize] = '\0';
token->Val.p.sp = token->Val.p.p = p;
token->Val.p.ep = p + lastsize + 1;
}
extern int EiC_lexan(void)
{
int t=0, loop; char c=0, EiC_nextchar();
#ifdef ILOOKAHEAD
token = &EiC_TokenArray[EiC_TokenP];
if(EiC_TokenR > 0) {
EiC_TokenR--;
EiC_TokenI++;
EiC_TokenP=(EiC_TokenP+1)%MAX_TOKENS;
return token->Tok;
}
#else
if (STOKEN != NOTOKEN) {
STOKEN = NOTOKEN;
return token->Tok;
}
#endif
loop = 1;
state = 0;
while (loop) {
switch (state) {
case 0: lex_lastpos = lex_curpos; c = EiC_nextchar();
state = (WHITE(c) ? 0 :
(c == '\n' ? lex_lineno++, 0 :
(c == '<' ? t = LT, 1 :
(c == '>' ? t = GT, 2 :
(c == '+' ? t = '+', 3 :
(c == '-' ? t = '-', 4 :
(c == '|' ? t = BOR, 5 :
(c == '&' ? t = AND, 6 :
(c == '\''? 7 :
(c == '"' ? 8 :
(c == '.' ? 9 :
(c == '/' ? t = '/', c = EiC_nextchar(), 50 :
(c == '%' ? t = '%', c = EiC_nextchar(), 50 :
(c == '*' ? t = '*', c = EiC_nextchar(), 50 :
(c == '=' ? t = ASS, c = EiC_nextchar(), 50 :
(c == '!' ? t = NOT, c = EiC_nextchar(), 50 :
(c == '^' ? t = XOR, c = EiC_nextchar(), 50 :
fail(RELOP, c))))))))))))))))));
break;
case 1: /* get <, <= and << */
if ((c = EiC_nextchar()) == '<') t = LSHT;
else state = 50;
break;
case 2: /* get >, >= and >> */
if ((c = EiC_nextchar()) == '>') t = RSHT;
else state = 50;
break;
case 3: c = EiC_nextchar(); /* get +, += or ++ */
if (c == '+') t = INC, state = 60;
else state = 50;
break;
case 4: c = EiC_nextchar(); /* get -, -= -- */
state = 60;
if (c == '-') t = DEC;
else if (c == '>') t = RARROW;
else state = 50;
break;
case 5: c = EiC_nextchar(); /* get |, |= or || */
if (c == '|') t = LOR, state = 60;
else state = 50;
break;
case 6: c = EiC_nextchar(); /* get &, &= or && */
if (c == '&') t = LAND, state = 60;
else state = 50;
break;
case 7:token->Val.ival = charliteral(EiC_nextchar()); /* char_constants */
t = CHAR;
if (EiC_nextchar() != '\'')
EiC_error("Missing single quote '");
state = 60;
break;
case 8: EiC_stringliteral(); /* string literals */
token->Tok = STR;
/*return STR;*/ loop = 0; break;
case 9: c = EiC_nextchar();
t = '.';
if(DIGIT(c))
state = 22;
else
state = 60;
retract(c);
break;
case 10: c = EiC_nextchar(); /* identifiers and keywords */
state = (LETTER(c) ? 11 :
(c == '_' ? 11 : fail(ID, c)));
break;
case 11: c = EiC_nextchar();
state = (LETTER(c) ? 11 :
(DIGIT(c) ? 11 :
(c == '_' ? 11 : 12)));
break;
case 12: retract(c); success(ID); /*return (token->Tok);*/ loop = 0; break;
case 20: c = EiC_nextchar(); /* integers and reals */
state = (c == '0' ? 30 :
(DIGIT(c) ? 21 : fail(INT, c)));
break;
case 21: c = EiC_nextchar();
state = (DIGIT(c) ? 21 :
(c == '.' ? 22 :
(c == 'e' ? 23 :
(c == 'E' ? 23 : 25))));
break;
case 22: c = EiC_nextchar();
state = (DIGIT(c) ? 22 :
(c == 'e' ? 23 :
(c == 'E' ? 23 : 26)));
break;
case 23: c = EiC_nextchar();
state = (c == '+' ? 24 :
(c == '-' ? 24 :
(DIGIT(c) ? 24 : fail(FLOAT, c) /* ??? */ )));
break;
case 24: c = EiC_nextchar();
state = (DIGIT(c) ? 24 : 26);
break;
case 25: checkExt(c); success(INT); /*return (token->Tok);*/ loop = 0; break;
case 26: checkExt(c); success(FLOAT); /*return (token->Tok);*/ loop = 0; break;
case 27: checkExt(c); success(HEX); /*return (token->Tok);*/ loop = 0; break;
case 28: checkExt(c); success(OCTAL); /*return (token->Tok);*/ loop = 0; break;
case 30: /* check for octal and hex numbers */
if ((c = EiC_nextchar()) == 'x' || c == 'X') {
while (gethex((c = EiC_nextchar())) > -1);
state = 27;
break;
}
if (c != '.' && c != 'e' && c != 'E') {
while (getoct(c) > -1)
c = EiC_nextchar();
state = 28;
break;
}
retract(c); state = 21; break;
case 50: /* mix with equal's */
if (c == '=')
switch (t) {
case '+': t = ADDEQ; break; /* += */
case '-': t = SUBEQ; break; /* -= */
case '/': t = DIVEQ; break; /* /= */
case '*': t = MULEQ; break; /* *= */
case '%': t = MODEQ; break; /* %= */
case ASS: t = EQ; break; /* == */
case GT: t = GE; break; /* >= */
case LT: t = LE; break; /* <= */
case NOT: t = NE; break; /* != */
case RSHT:t = RSHTEQ; break; /* >>= */
case LSHT:t = LSHTEQ; break; /* <<= */
case AND: t = ANDEQ; break; /* &= */
case BOR: t = BOREQ; break; /* |= */
case XOR: t = XOREQ; break; /* ^= */
default: retract(c);
} else retract(c);
state = 60;
break;
case 60: success(MISC); token->Tok = t; /*return (token->Tok);*/ loop = 0; break;
case 100: token->Tok = EiC_nextchar(); /*return (token->Tok);*/ loop = 0; break;
}
}
#ifdef ILOOKAHEAD
if(EiC_TokenI<MAX_TOKENS)
EiC_TokenI++;
EiC_TokenP = (EiC_TokenP +1)%MAX_TOKENS;
#endif
return token->Tok;
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -