?? abstractdocumentwordtokenizer.java
字號:
/*******************************************************************************
* Copyright (c) 2003 Berthold Daum.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Common Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/cpl-v10.html
*
* Contributors:
* Berthold Daum
*******************************************************************************/
package com.bdaum.SpellChecker;
import java.text.BreakIterator;
import javax.swing.text.Segment;
import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.DocumentEvent;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IDocumentListener;
import com.bdaum.SpellChecker.preferences.SpellCheckerPreferences;
import com.swabunga.spell.event.WordTokenizer;
/**
* This is the base class for breaking JFace text documents into
* single words. In addition, it supports text replacements.
*
* After an instance is created, it must be initialized by calling
* the init() method.
*
*/
public abstract class AbstractDocumentWordTokenizer
implements WordTokenizer, IDocumentListener {
/** The document **/
protected IDocument document;
/** position of selection **/
protected int selectionOffset = 0;
/** length of selection **/
protected int selectionLength = 0;
/** start position of current word in document **/
protected int currentWordPos = 0;
/** end position of current word **/
protected int currentWordEnd = 0;
/** ignored characters at end of word + 1 **/
protected int endDistance = 0;
/** start position of next word **/
protected int nextWordPos = -1;
/** The character iterator over the document content **/
protected Segment text;
protected BreakIterator sentenceIterator;
/** The number of processed words **/
protected int wordCount = 0;
/** indicates if there are any more words **/
protected boolean moreTokens = true;
/** indicates if initialization is required **/
protected boolean first = true;
/** indicates that we are at a sentence start **/
protected boolean startsSentence = true;
/** indicates if the current word is part of a CamelCase word **/
private boolean camelCase = false;
/** holds next word **/
private String nextWord;
/** keeps length of current word for later inquiry **/
private int currentWordLength = 0;
/** Configuration **/
private SpellCheckConfiguration config;
/** options **/
private String compoundCharacters;
private boolean ignoreOneLetterWords;
/**
* Method init.
* Initalize the tokenizer.
* @param document - the document to be parsed
* @param offset - start position in text
* @param len - length of selected area or 0.
* @param config - the spell checker configuration
*/
public void init(
IDocument document,
int offset,
int len,
SpellCheckConfiguration config) {
this.config = config;
this.document = document;
// Evaluate selection
this.selectionOffset = offset;
this.selectionLength = len;
// Configure the tokenizer
configure();
// Iterator about the text
sentenceIterator = BreakIterator.getSentenceInstance();
getTextSegment(document);
sentenceIterator.setText(text);
currentWordPos = getNextWordStart(text, 0);
if (selectionLength > 0) {
while (currentWordPos != -1 && currentWordPos < selectionOffset) {
currentWordEnd = getNextWordEnd(text, currentWordPos);
camelCase = Character.isLetter(text.current());
if (camelCase)
currentWordPos = getNextWordStart(text, currentWordEnd);
else
currentWordPos =
getNextWordStart(text, currentWordEnd + endDistance);
}
if (currentWordPos > selectionOffset + selectionLength)
currentWordPos = -1;
}
if (currentWordPos != -1) {
currentWordEnd = getNextWordEnd(text, currentWordPos);
camelCase = Character.isLetter(text.current());
if (camelCase)
nextWordPos = getNextWordStart(text, currentWordEnd);
else
nextWordPos =
getNextWordStart(text, currentWordEnd + endDistance);
} else
// whole area consists of whitespace
moreTokens = false;
// Register as document listener to be
// informed about document changes
document.addDocumentListener(this);
}
protected SpellCheckConfiguration getCofiguration() {
return config;
}
/**
* Method configure.
*/
protected void configure() {
compoundCharacters =
config.getString(SpellCheckerPreferences.COMPOUNDCHARACTERS);
ignoreOneLetterWords =
config.getBoolean(SpellCheckerPreferences.IGNOREONELETTERWORDS);
}
/**
* Fetch text segment from document
*/
private void getTextSegment(IDocument document) {
char[] chars = document.get().toCharArray();
text = new Segment(chars, 0, chars.length);
}
/**
* Find start position of next word
*/
protected int getNextWordStart(Segment text, int startPos) {
int ignored = 0;
int endPos =
(selectionLength <= 0)
? text.getEndIndex()
: selectionOffset + selectionLength;
if (startPos <= endPos)
for (char ch = text.setIndex(startPos);
ch != Segment.DONE;
ch = text.next()) {
ch = parseAndTranslateCharacter(ch);
if (ch == 0) {
++ignored;
// count ignored characters (necessary for character entities in HTML)
} else {
if (Character.isLetterOrDigit(ch) && isToBeChecked()) {
return text.getIndex() - ignored;
}
ignored = 0;
}
}
return -1;
}
/**
* Method isToBeChecked.
* Is called at the start of each word.
* @return boolean - true, if the word is to be checked.
*/
protected abstract boolean isToBeChecked();
/**
* Method isToBeChecked.
* Is called at the end of each word.
* @param word - the word to be checked
* @return boolean - true, if the word is to be checked.
*/
protected boolean isToBeChecked(String word) {
if (word.length() == 0)
return false;
if (ignoreOneLetterWords && word.length() <= 1)
return false;
if (compoundCharacters != null && compoundCharacters.length() > 0) {
for (int i = 0; i < word.length(); i++) {
char c = word.charAt(i);
if (compoundCharacters.indexOf(c) >= 0)
return false;
}
}
return true;
}
/**
* Method parseCharacter.
* @param ch - the current character
*/
protected abstract void parseCharacter(char ch);
/**
* Method parseAndTranslateCharacter.
* can be overridden if the current character must be modified.
* @param ch - the current character
* @return - the modified character, 0 for characters to be ignored
*/
protected char parseAndTranslateCharacter(char ch) {
parseCharacter(ch);
return ch;
}
/**
* Computes the end position of the next word
*/
protected int getNextWordEnd(Segment text, int startPos) {
int ignored = 0;
boolean notFirst = false;
for (char ch = text.setIndex(startPos);
ch != Segment.DONE;
ch = text.next()) {
ch = parseAndTranslateCharacter(ch);
// skip character that should be ignored
if (ch == 0) {
++ignored;
continue;
}
// special treatment for some character that appear
// in word-like constructs
if ((ch == '.' || ch == ':' || ch == '\'' || ch == '@')
&& notFirst) {
char ch2 = text.next();
if (ch2 == Segment.DONE || !Character.isLetterOrDigit(ch2)) {
text.previous();
endDistance = ignored + 1;
return text.getIndex() - ignored;
}
ignored = 0;
continue;
}
// standard check for word end
if (isWordBreak(ch, notFirst)) {
endDistance = ignored + 1;
return text.getIndex() - ignored;
}
ignored = 0;
notFirst = true;
}
return text.getEndIndex();
}
protected boolean isWordBreak(char ch, boolean notFirst) {
return !Character.isLetterOrDigit(ch);
}
/**
* @see com.swabunga.spell.event.WordTokenizer#hasMoreWords()
*/
public boolean hasMoreWords() {
getNextWord();
return nextWord != null;
}
/**
* @see com.swabunga.spell.event.WordTokenizer#getCurrentWordPosition()
*/
public int getCurrentWordPosition() {
return currentWordPos;
}
/**
* @see com.swabunga.spell.event.WordTokenizer#getCurrentWordEnd()
*/
public int getCurrentWordEnd() {
return currentWordEnd;
}
/**
* @see com.swabunga.spell.event.WordTokenizer#nextWord()
*/
public String nextWord() {
currentWordLength = nextWord.length();
wordCount++;
return deSerialize(nextWord);
}
public void getNextWord() {
nextWord = null;
while (moreTokens) {
if (!first) {
currentWordPos = nextWordPos;
currentWordEnd = getNextWordEnd(text, currentWordPos);
// We simulate a sentence start because we don't want to
// ignore word components that start with an upper case letter
startsSentence = camelCase;
camelCase = Character.isLetter(text.current());
if (camelCase)
nextWordPos = getNextWordStart(text, currentWordEnd);
else {
nextWordPos =
getNextWordStart(text, currentWordEnd + endDistance);
int current = sentenceIterator.current();
if (current == currentWordPos)
startsSentence = true;
else {
if (currentWordEnd > current)
sentenceIterator.next();
}
}
}
try {
nextWord =
document.get(
currentWordPos,
currentWordEnd - currentWordPos);
} catch (BadLocationException ex) {
moreTokens = false;
}
first = false;
if ((selectionLength > 0
&& nextWordPos > selectionOffset + selectionLength)
|| nextWordPos == -1)
moreTokens = false;
if (isToBeChecked(nextWord))
break;
}
}
/**
* Returns length of current word
* @return - lenght of current word
*/
public int getCurrentWordLength() {
return currentWordLength;
}
/**
* Converts a word from document format to display format
* Subclasses may override.
* @param word - the word in document format
* @return - the word in display format
*/
public String deSerialize(String word) {
return word;
}
/**
* Converts a word from display format to document format
* Subclasses may override.
* @param word - the word in display format
* @return - the word in document format
*/
public String serializeWord(String word) {
return word;
}
/**
* @see com.swabunga.spell.event.WordTokenizer#getCurrentWordCount()
*/
public int getCurrentWordCount() {
return wordCount;
}
/**
* @see com.swabunga.spell.event.WordTokenizer
* #replaceWord(java.lang.String)
*/
public void replaceWord(String newWord) {
if (currentWordPos != -1) {
SpellCheckerPlugin.getManager().replaceWord(
currentWordPos,
currentWordEnd - currentWordPos,
newWord);
// Compute the position after the replaced word
first = true;
currentWordPos =
getNextWordStart(text, currentWordPos + newWord.length());
if (currentWordPos != -1) {
currentWordEnd = getNextWordEnd(text, currentWordPos);
nextWordPos =
getNextWordStart(text, currentWordEnd + endDistance - 1);
sentenceIterator.setText(text);
sentenceIterator.following(currentWordPos);
} else
moreTokens = false;
}
}
/**
* @see com.swabunga.spell.event.WordTokenizer#getContext()
*/
public String getContext() {
return text.toString();
}
/**
* @see com.swabunga.spell.event.WordTokenizer#isNewSentence()
*/
public boolean isNewSentence() {
// BreakIterator doesn't work when the first word in a sentence is not capitalised,
// but we need to check for capitalisation
if (startsSentence || currentWordPos < 2)
return (true);
String textBefore = null;
try {
textBefore = document.get(currentWordPos - 2, 2);
} catch (BadLocationException ex) {
return (false);
}
return (textBefore != null && ".".equals(textBefore.trim()));
}
/**
* @see org.eclipse.jface.text.IdocumentListener
* #documentAboutToBeChanged(org.eclipse.jface.text.DocumentEvent)
*/
public void documentAboutToBeChanged(DocumentEvent event) {
}
/**
* @see org.eclipse.jface.text.IdocumentListener
* #documentChanged(org.eclipse.jface.text.DocumentEvent)
*/
public void documentChanged(DocumentEvent event) {
// Update segment
getTextSegment(document);
// Evaluate event
int offset = event.getOffset();
String iText = event.getText();
int iLen = (iText == null) ? 0 : iText.length();
int increment = iLen - event.getLength();
// Update word position
if (currentWordPos > offset)
currentWordPos += increment;
if (currentWordEnd > offset)
currentWordEnd += increment;
if (nextWordPos > offset)
nextWordPos += increment;
}
/**
* Method dispose.
*/
public void dispose() {
// We stop to listen
document.removeDocumentListener(this);
}
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -