blob: 7f4591baf6a63b721eefc25c1c29121fc2db2c26 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2000, 2006 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.jdt.internal.ui.text.spelling;
import com.ibm.icu.text.BreakIterator;
import java.util.LinkedList;
import java.util.Locale;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IRegion;
import org.eclipse.jface.text.TextUtilities;
import org.eclipse.jdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
import org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
import org.eclipse.jdt.internal.corext.refactoring.nls.NLSElement;
import org.eclipse.jdt.internal.ui.text.javadoc.IHtmlTagConstants;
import org.eclipse.jdt.internal.ui.text.javadoc.IJavaDocTagConstants;
/**
* Iterator to spell-check javadoc comment regions.
*
* @since 3.0
*/
public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {
/** The content of the region */
private final String fContent;
/** The line delimiter */
private final String fDelimiter;
/** The last token */
private String fLastToken= null;
/** The next break */
private int fNext= 1;
/** The offset of the region */
private final int fOffset;
/** The predecessor break */
private int fPredecessor;
/** The previous break */
private int fPrevious= 0;
/** The sentence breaks */
private final LinkedList fSentenceBreaks= new LinkedList();
/** Does the current word start a sentence? */
private boolean fStartsSentence= false;
/** The successor break */
private int fSuccessor;
/** The word iterator */
private final BreakIterator fWordIterator;
/**
* Creates a new spell check iterator.
*
* @param document the document containing the specified partition
* @param region the region to spell-check
* @param locale the locale to use for spell-checking
*/
public SpellCheckIterator(IDocument document, IRegion region, Locale locale) {
this(document, region, locale, BreakIterator.getWordInstance(locale));
}
/**
* Creates a new spell check iterator.
*
* @param document the document containing the specified partition
* @param region the region to spell-check
* @param locale the locale to use for spell-checking
* @param breakIterator the break-iterator
*/
public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) {
fOffset= region.getOffset();
fWordIterator= breakIterator;
fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
String content;
try {
content= document.get(region.getOffset(), region.getLength());
if (content.startsWith(NLSElement.TAG_PREFIX))
content= ""; //$NON-NLS-1$
} catch (Exception exception) {
content= ""; //$NON-NLS-1$
}
fContent= content;
fWordIterator.setText(content);
fPredecessor= fWordIterator.first();
fSuccessor= fWordIterator.next();
final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
iterator.setText(content);
int offset= iterator.current();
while (offset != BreakIterator.DONE) {
fSentenceBreaks.add(new Integer(offset));
offset= iterator.next();
}
}
/*
* @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
*/
public final int getBegin() {
return fPrevious + fOffset;
}
/*
* @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
*/
public final int getEnd() {
return fNext + fOffset - 1;
}
/*
* @see java.util.Iterator#hasNext()
*/
public final boolean hasNext() {
return fSuccessor != BreakIterator.DONE;
}
/**
* Does the specified token consist of at least one letter and digits
* only?
*
* @param begin the begin index
* @param end the end index
* @return <code>true</code> iff the token consists of digits and at
* least one letter only, <code>false</code> otherwise
*/
protected final boolean isAlphaNumeric(final int begin, final int end) {
char character= 0;
boolean letter= false;
for (int index= begin; index < end; index++) {
character= fContent.charAt(index);
if (Character.isLetter(character))
letter= true;
if (!Character.isLetterOrDigit(character))
return false;
}
return letter;
}
/**
* Was the last token a Javadoc tag tag?
*
* @param tags the javadoc tags to check
* @return <code>true</code> iff the last token was a Javadoc tag,
* <code>false</code> otherwise
*/
protected final boolean isJavadocToken(final String[] tags) {
if (fLastToken != null) {
for (int index= 0; index < tags.length; index++) {
if (fLastToken.equals(tags[index]))
return true;
}
}
return false;
}
/**
* Is the current token a single letter token surrounded by
* non-whitespace characters?
*
* @param begin the begin index
* @return <code>true</code> iff the token is a single letter token,
* <code>false</code> otherwise
*/
protected final boolean isSingleLetter(final int begin) {
if (begin > 0 && begin < fContent.length() - 1)
return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));
return false;
}
/**
* Does the specified token look like an URL?
*
* @param begin the begin index
* @return <code>true</code> iff this token look like an URL,
* <code>false</code> otherwise
*/
protected final boolean isUrlToken(final int begin) {
for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
return true;
}
return false;
}
/**
* Does the specified token consist of whitespace only?
*
* @param begin the begin index
* @param end the end index
* @return <code>true</code> iff the token consists of whitespace
* only, <code>false</code> otherwise
*/
protected final boolean isWhitespace(final int begin, final int end) {
for (int index= begin; index < end; index++) {
if (!Character.isWhitespace(fContent.charAt(index)))
return false;
}
return true;
}
/*
* @see java.util.Iterator#next()
*/
public final Object next() {
String token= nextToken();
while (token == null && fSuccessor != BreakIterator.DONE)
token= nextToken();
fLastToken= token;
return token;
}
/**
* Advances the end index to the next word break.
*/
protected final void nextBreak() {
fNext= fSuccessor;
fPredecessor= fSuccessor;
fSuccessor= fWordIterator.next();
}
/**
* Returns the next sentence break.
*
* @return the next sentence break
*/
protected final int nextSentence() {
return ((Integer) fSentenceBreaks.getFirst()).intValue();
}
/**
* Determines the next token to be spell-checked.
*
* @return the next token to be spell-checked, or <code>null</code>
* iff the next token is not a candidate for spell-checking.
*/
protected String nextToken() {
String token= null;
fPrevious= fPredecessor;
fStartsSentence= false;
nextBreak();
boolean update= false;
if (fNext - fPrevious > 0) {
if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
nextBreak();
if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
update= true;
token= fContent.substring(fPrevious, fNext);
} else
fPredecessor= fNext;
} else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {
if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
nextBreak();
nextBreak();
if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
nextBreak();
if (fSuccessor != BreakIterator.DONE) {
update= true;
token= fContent.substring(fPrevious, fNext);
}
}
} else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {
if (isUrlToken(fPrevious))
skipTokens(fPrevious, ' ');
else if (isJavadocToken(JAVADOC_PARAM_TAGS))
fLastToken= null;
else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
fLastToken= null;
skipTokens(fPrevious, fDelimiter.charAt(0));
} else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
token= fContent.substring(fPrevious, fNext);
update= true;
}
}
if (update && fSentenceBreaks.size() > 0) {
if (fPrevious >= nextSentence()) {
while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
fSentenceBreaks.removeFirst();
fStartsSentence= (fLastToken == null) || (token != null);
}
}
return token;
}
/*
* @see java.util.Iterator#remove()
*/
public final void remove() {
throw new UnsupportedOperationException();
}
/**
* Skip the tokens until the stop character is reached.
*
* @param begin the begin index
* @param stop the stop character
*/
protected final void skipTokens(final int begin, final char stop) {
int end= begin;
while (end < fContent.length() && fContent.charAt(end) != stop)
end++;
if (end < fContent.length()) {
fNext= end;
fPredecessor= fNext;
fSuccessor= fWordIterator.following(fNext);
} else
fSuccessor= BreakIterator.DONE;
}
/*
* @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
*/
public final boolean startsSentence() {
return fStartsSentence;
}
}