?? scanner.java
字號:
/**
* @(#)Scanner.java 1.36 03/01/23
*
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
*/
package com.sun.tools.javac.v8.parser;
import java.io.*;
import com.sun.tools.javac.v8.util.*;
/**
* 這個類就是gjc的詞法分析實現類。
*/
public class Scanner implements Tokens, LayoutCharacters {
/**
* The token, set by nextToken().
*/
int token;
/**
* The token's position. pos = line << Position.LINESHIFT + col.
* Line and column numbers start at 1.
*/
int pos;
/**
* The last character position of the token.
*/
int endPos;
/**
* The last character position of the previous token.
*/
int prevEndPos;
/**
* The position where a lexical error occurred;
*/
int errPos = Position.NOPOS;
/**
* The name of an identifier or token:
*/
Name name;
/**
* The radix of a numeric literal token.
*/
int radix;
/**
* Has a @deprecated been encountered in last doc comment?
* this needs to be reset by client.
*/
boolean deprecatedFlag = false;
/**
* A character buffer for literals.
*/
private char[] sbuf = new char[128];
private int sp;
/**
* The input buffer, index of next chacter to be read,
* index of one past last character in buffer.
*/
private char[] buf;
private int bp;
private int buflen;
/**
* The current character.
*/
private char ch;//當前要處理的字符,下面的line參數和col參數還定義了字符的位置,可以為報錯時顯示信息提供方便
/**
* The line number position of the current character.
*/
private int line;
/**
* The column number position of the current character.
*/
private int col;
/**
* The buffer index of the last converted unicode character
*/
private int unicodeConversionBp = 0;
/**
* The log to be used for error reporting.
*/
private final Log log;
/**
* The name table.
*/
private final Name.Table names;//存放標示符名字的字母表
/**
* The keyword table.
*/
private final Keywords keywords;
/**
* Documentation string of the current token.
*/
String docComment = null;
/**
* Buffer for doc comment.
*/
private char[] buffer;
/**
* Number of characters in doc comment buffer.
*/
private int count;
/**
*gjc詞法分析程序的入口和初始化都是在其構造函數Scanner中實現的,
*它實現了將文件流讀入內存緩沖區,預置當前字符的位置,并采用nextToken()
*方法讀入下一個標示符。
*文件讀入實現方法:為了保證讀入并編譯任意大的文件,其采用了動態分配空間
*的方法首先分配一個空間,在監測到空間不足后,分配的空間翻倍,以此類推,
*一直到分配空間滿足需求為止,這樣確實可以滿足任意大的文件,但是有可能會
*造成嚴重的空間浪費,而且文件越大,浪費越明顯。至于改進辦法,目前我還沒有
*想清楚,如何在空間節約和提高運行速率之間達到一個完美的平衡。
*/
public Scanner(Context context, InputStream in, String encoding) {
super();
this.log = Log.instance(context);
this.names = Name.Table.instance(context);
this.keywords = Keywords.instance(context);
try {
int bufsize = in.available() + 1;
if (buf == null || buf.length < bufsize)
buf = new char[bufsize];
buflen = 0;
InputStreamReader reader =//獲取數據流
(encoding == null) ? new InputStreamReader(in) :
new InputStreamReader(in, encoding);
while (true) {
int nread = reader.read(buf, buflen, buf.length - buflen);
if (nread < 0)
nread = 0;
buflen = buflen + nread;
if (buflen < buf.length)
break;
char[] newbuf = new char[buflen * 2];//空間擴容
System.arraycopy(buf, 0, newbuf, 0, buflen);//讀入部分內容整體coppy,不清楚在文件很大的時候運行是否很慢
buf = newbuf;
}
} catch (UnsupportedEncodingException e) {
lexError("unsupported.encoding", encoding);
buf = new char[1];
buflen = 0;
}
catch (IOException e) {
lexError("io.exception", e.toString());
buf = new char[1];
buflen = 0;
}
buf[buflen] = EOI;
line = 1;
col = 0;
bp = -1;
scanChar();//檢查字符,并讀入。
nextToken();//讀入下一個標示符。
}
/**
* Report an error at the given position using the provided argument.
*/
private void lexError(int pos, String msg, String arg) {
log.error(pos, msg, arg);
token = ERROR;
errPos = pos;
}
/**
* Report an error at the given position.
*/
private void lexError(int pos, String key) {
lexError(pos, key, null);
}
/**
* Report an error at the current token position.
*/
private void lexError(String key) {
lexError(pos, key, null);
}
/**
* Report an error at the current token position using the provided
* argument.
*/
private void lexError(String key, String arg) {
lexError(pos, key, arg);
}
/**
* Report a warning at the given position.
*/
private void lexWarning(int pos, String key) {
log.warning(pos, key);
}
/**
* 將ascII碼表示的數字轉為其真實的數值
*/
private int digit(int base) {
char c = ch;
int result = Character.digit(c, base);
if (result >= 0 && c > 127) {
lexWarning(pos + 1, "illegal.nonascii.digit");
ch = "0123456789abcdef".charAt(result);
}
return result;
}
/**
* 實現unicode輸入轉換,由于是編譯原理實習,重點在于分析詞法分析算法實現,
* 所以對于這些邊緣化的點沒有特別的關注,大概了解其用途即可,具體實現不必細究
*/
private void convertUnicode() {
int startcol = col;
if (ch == '\\') {
bp++;
ch = buf[bp];
col++;
if (ch == 'u') {
do {
bp++;
ch = buf[bp];
col++;
} while (ch == 'u')
;
int limit = bp + 3;
if (limit < buflen) {
int d = digit(16);
int code = d;
while (bp < limit && d >= 0) {
bp++;
ch = buf[bp];
col++;
d = digit(16);
code = (code << 4) + d;
}
if (d >= 0) {
ch = (char) code;
unicodeConversionBp = bp;
return;
}
}
lexError(Position.make(line, startcol), "illegal.unicode.esc");
} else {
bp--;
ch = '\\';
col--;
}
}
}
/**
* scanChar:實現的是字符讀入的功能,因為數據已經在Scanner中讀入了
* 內存緩沖區,所以讀取字符就簡單多了,但注意要維護當前字符所在的位置,
* 讀取時移動指針,將下一個字符賦值給ch就可以了。
*/
private void scanChar() {
bp++;
ch = buf[bp];
switch (ch) {
case '\r':
col = 0;
line++;
break;
case '\n':
if (bp == 0 || buf[bp - 1] != '\r') {
col = 0;
line++;
}
break;
case '\t':
col = (col / TabInc * TabInc) + TabInc;
break;
case '\\':
col++;
convertUnicode();
break;
default:
col++;
break;
}
}
/**
* 讀取注釋的下一個字符,跳過//標志
*/
private void scanCommentChar() {
scanChar();
if (ch == '\\') {
if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
bp++;
col++;
} else {
convertUnicode();
}
}
}
/**
* 強制給緩沖區擴容
*/
private void expandCommentBuffer() {
char[] newBuffer = new char[buffer.length * 2];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
/**
*
* 讀取注釋中的下一個字符,跳過//符號。
*/
private void scanDocCommentChar() {
scanChar();
if (ch == '\\') {
if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
if (count == buffer.length)
expandCommentBuffer();
buffer[count++] = ch;
bp++;
col++;
} else {
convertUnicode();
}
}
}
/**
* 像緩沖區中讀入一個字符,并在空間不足的時候給緩沖區擴容,gjc在每次讀入數據時都采用了
* 緩沖區容積翻倍的辦法,真的是最好的解決方法么?
*/
private void putChar(char ch) {
if (sp == sbuf.length) {
char[] newsbuf = new char[sbuf.length * 2];
System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
sbuf = newsbuf;
}
sbuf[sp++] = ch;
}
/**
* 調試目的而輸出字符
*/
private void dch() {
System.err.print((char) ch);
System.out.flush();
}
/**
* 讀取字符和字符串中的下一個各種命令標志
*/
private void scanLitChar() {
if (ch == '\\') {
if (buf[bp + 1] == '\\' && unicodeConversionBp != bp) {
bp++;
col++;
putChar('\\');
scanChar();
} else {
scanChar();
switch (ch) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
char leadch = ch;
int oct = digit(8);
scanChar();
if ('0' <= ch && ch <= '7') {
oct = oct * 8 + digit(8);
scanChar();
if (leadch <= '3' && '0' <= ch && ch <= '7') {
oct = oct * 8 + digit(8);
scanChar();
}
}
putChar((char) oct);
break;
case 'b':
putChar('\b');//各種字符串中的表示符號的處理方法
scanChar();
break;
case 't':
putChar('\t');
scanChar();
break;
case 'n':
putChar('\n');
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -