?? lexanalyzer.java
字號:
import java.io.*;import java.util.*;public class LexAnalyzer { private ArrayList<Word> wordList; private ArrayList<String> errorList; private String[] keyWord={"int","if","else","do","while","read","write"}; private int keyWordIndex=-1,line =1; public boolean isKeyWord(String word) { boolean isKeyWord=false; for(int i=0;i<keyWord.length;i++) { if(word.equals(keyWord[i])) { isKeyWord=true; keyWordIndex=i; } } return isKeyWord; } public ArrayList<Word> analysis(String source) { wordList = new ArrayList<Word>(); errorList = new ArrayList<String>(); String tempSource = source.replaceAll("(\t|\\ ){1,}", "\\ "); String tempWord=""; char[] sourceChar = tempSource.toCharArray(); int charIndex=0,state=1,totalCharIndex=sourceChar.length-1; while(charIndex<totalCharIndex) { switch(state) { case(1): { if(Character.isLetter(sourceChar[charIndex])) { tempWord+=sourceChar[charIndex]; charIndex++; state=3; } else if(Character.isDigit(sourceChar[charIndex])) { tempWord+=sourceChar[charIndex]; charIndex++; state=8; } else if(Character.isWhitespace(sourceChar[charIndex])||Character.isISOControl(sourceChar[charIndex]))//'\n'是whiteSpace? { if(sourceChar[charIndex]=='\n')line++; tempWord=""; charIndex++; state=1; } else { switch(sourceChar[charIndex]) { case('+'):state=6; tempWord+=sourceChar[charIndex]; charIndex++; break; case('-'):state=7; tempWord+=sourceChar[charIndex]; charIndex++; break; case('*'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"MULTYPLY",line)); tempWord=""; charIndex++; state=1; break; case('/'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"DIVISION",line)); tempWord=""; charIndex++; state=1; break; case(','):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"COMM",line)); tempWord=""; charIndex++; state=1; break; case(';'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"SEMICOLON",line)); tempWord=""; charIndex++; state=1; break; case('('):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"LEFT(",line)); tempWord=""; charIndex++; state=1; break; case(')'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"RIGHT)",line)); tempWord=""; charIndex++; state=1; break; case('.'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"DOT",line)); tempWord=""; charIndex++; state=1; break; case('"'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"DOUBLECOMM",line)); tempWord=""; charIndex++; state=1; break; case('['):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"LEFT[",line)); tempWord=""; charIndex++; state=1; break; case(']'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"RIGHT]",line)); tempWord=""; charIndex++; state=1; break; case('{'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"LEFT{",line)); tempWord=""; charIndex++; state=1; break; case('}'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"RIGHT}",line)); tempWord=""; charIndex++; state=1; break; case('<'):state=5; tempWord+=sourceChar[charIndex]; charIndex++; break; case('>'):tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"LARGER",line)); tempWord=""; charIndex++; state=1; break; case('='):state=4; tempWord+=sourceChar[charIndex]; charIndex++; break; default:String errorMessage = "Unexpected operator "+sourceChar[charIndex]+" in line:"+line; errorList.add(errorMessage); charIndex++; break; } } break; } case(3): { int tempState = ((Character.isLetterOrDigit(sourceChar[charIndex]) ||sourceChar[charIndex]=='_')?3:0); if(Character.isWhitespace(sourceChar[charIndex]) ||Character.isISOControl(sourceChar[charIndex])) tempState=1; switch(tempState) { case(0): state=1; if(isKeyWord(tempWord)) { wordList.add(new Word(tempWord,keyWord[keyWordIndex],line)); } else { wordList.add(new Word(tempWord,"ID",line)); } tempWord=""; break; case(3): tempWord += sourceChar[charIndex]; charIndex++; state = 3; break; case(1): if(sourceChar[charIndex-1]=='_') { errorList.add("Identifier should not end with '_' in line: "+line); tempWord=""; if(sourceChar[charIndex]=='\n')line++; charIndex++; state=1; break; } else { if(isKeyWord(tempWord)) { wordList.add(new Word(tempWord,keyWord[keyWordIndex],line)); } else { wordList.add(new Word(tempWord,"ID",line)); } tempWord = ""; if(sourceChar[charIndex]=='\n')line++; charIndex++; state=1; break; } default:state=10; } break; } case(4): { if(sourceChar[charIndex]=='=') { tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"EQUAL",line)); tempWord=""; charIndex++; } else { wordList.add(new Word(tempWord,"ASSIGN",line)); tempWord=""; } state=1; break; } case(5): { if(sourceChar[charIndex]=='>') { tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"NOTEQUAL",line)); tempWord=""; charIndex++; } else { wordList.add(new Word(tempWord,"LESS",line)); tempWord=""; } state=1; break; } case(6): { if(sourceChar[charIndex]=='+') { tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"AA",line)); tempWord=""; charIndex++; } else { wordList.add(new Word(tempWord,"ADD",line)); tempWord=""; } state=1; break; } case(7): { if(sourceChar[charIndex]=='-') { tempWord+=sourceChar[charIndex]; wordList.add(new Word(tempWord,"MM",line)); tempWord=""; charIndex++; } else { wordList.add(new Word(tempWord,"MINUS",line)); tempWord=""; } state=1; break; } case(8): { int tempState=(Character.isDigit(sourceChar[charIndex])?1:0); switch(tempState) { case(1): tempWord+=sourceChar[charIndex]; charIndex++; state=8; break; case(0): if(Character.isLetter(sourceChar[charIndex])) { errorList.add("Identifier should start with a letter in line: "+line); tempWord=""; state=1; break; } else { wordList.add(new Word(tempWord,"NUMBER",line)); tempWord=""; state=1; break; } } break; } case(10): { System.out.println("Should start with letter."); charIndex++; break; } } } wordList.add(new Word("","#",0)); return wordList; } public ArrayList<String> getErrorList() { return errorList; }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -