| /******************************************************************************* |
| * Copyright (c) 2000, 2006 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| |
| package org.eclipse.jdt.internal.ui.text.spelling; |
| |
| import com.ibm.icu.text.BreakIterator; |
| import java.util.LinkedList; |
| import java.util.Locale; |
| |
| import org.eclipse.jface.text.IDocument; |
| import org.eclipse.jface.text.IRegion; |
| import org.eclipse.jface.text.TextUtilities; |
| |
| import org.eclipse.jdt.internal.ui.text.spelling.engine.DefaultSpellChecker; |
| import org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellCheckIterator; |
| |
| import org.eclipse.jdt.internal.corext.refactoring.nls.NLSElement; |
| import org.eclipse.jdt.internal.ui.text.javadoc.IHtmlTagConstants; |
| import org.eclipse.jdt.internal.ui.text.javadoc.IJavaDocTagConstants; |
| |
| /** |
| * Iterator to spell-check javadoc comment regions. |
| * |
| * @since 3.0 |
| */ |
| public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants { |
| |
| /** The content of the region */ |
| private final String fContent; |
| |
| /** The line delimiter */ |
| private final String fDelimiter; |
| |
| /** The last token */ |
| private String fLastToken= null; |
| |
| /** The next break */ |
| private int fNext= 1; |
| |
| /** The offset of the region */ |
| private final int fOffset; |
| |
| /** The predecessor break */ |
| private int fPredecessor; |
| |
| /** The previous break */ |
| private int fPrevious= 0; |
| |
| /** The sentence breaks */ |
| private final LinkedList fSentenceBreaks= new LinkedList(); |
| |
| /** Does the current word start a sentence? */ |
| private boolean fStartsSentence= false; |
| |
| /** The successor break */ |
| private int fSuccessor; |
| |
| /** The word iterator */ |
| private final BreakIterator fWordIterator; |
| |
| /** |
| * Creates a new spell check iterator. |
| * |
| * @param document the document containing the specified partition |
| * @param region the region to spell-check |
| * @param locale the locale to use for spell-checking |
| */ |
| public SpellCheckIterator(IDocument document, IRegion region, Locale locale) { |
| this(document, region, locale, BreakIterator.getWordInstance(locale)); |
| } |
| |
| /** |
| * Creates a new spell check iterator. |
| * |
| * @param document the document containing the specified partition |
| * @param region the region to spell-check |
| * @param locale the locale to use for spell-checking |
| * @param breakIterator the break-iterator |
| */ |
| public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) { |
| fOffset= region.getOffset(); |
| fWordIterator= breakIterator; |
| fDelimiter= TextUtilities.getDefaultLineDelimiter(document); |
| |
| String content; |
| try { |
| |
| content= document.get(region.getOffset(), region.getLength()); |
| if (content.startsWith(NLSElement.TAG_PREFIX)) |
| content= ""; //$NON-NLS-1$ |
| |
| } catch (Exception exception) { |
| content= ""; //$NON-NLS-1$ |
| } |
| fContent= content; |
| |
| fWordIterator.setText(content); |
| fPredecessor= fWordIterator.first(); |
| fSuccessor= fWordIterator.next(); |
| |
| final BreakIterator iterator= BreakIterator.getSentenceInstance(locale); |
| iterator.setText(content); |
| |
| int offset= iterator.current(); |
| while (offset != BreakIterator.DONE) { |
| |
| fSentenceBreaks.add(new Integer(offset)); |
| offset= iterator.next(); |
| } |
| } |
| |
| /* |
| * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin() |
| */ |
| public final int getBegin() { |
| return fPrevious + fOffset; |
| } |
| |
| /* |
| * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd() |
| */ |
| public final int getEnd() { |
| return fNext + fOffset - 1; |
| } |
| |
| /* |
| * @see java.util.Iterator#hasNext() |
| */ |
| public final boolean hasNext() { |
| return fSuccessor != BreakIterator.DONE; |
| } |
| |
| /** |
| * Does the specified token consist of at least one letter and digits |
| * only? |
| * |
| * @param begin the begin index |
| * @param end the end index |
| * @return <code>true</code> iff the token consists of digits and at |
| * least one letter only, <code>false</code> otherwise |
| */ |
| protected final boolean isAlphaNumeric(final int begin, final int end) { |
| |
| char character= 0; |
| |
| boolean letter= false; |
| for (int index= begin; index < end; index++) { |
| |
| character= fContent.charAt(index); |
| if (Character.isLetter(character)) |
| letter= true; |
| |
| if (!Character.isLetterOrDigit(character)) |
| return false; |
| } |
| return letter; |
| } |
| |
| /** |
| * Was the last token a Javadoc tag tag? |
| * |
| * @param tags the javadoc tags to check |
| * @return <code>true</code> iff the last token was a Javadoc tag, |
| * <code>false</code> otherwise |
| */ |
| protected final boolean isJavadocToken(final String[] tags) { |
| |
| if (fLastToken != null) { |
| |
| for (int index= 0; index < tags.length; index++) { |
| |
| if (fLastToken.equals(tags[index])) |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Is the current token a single letter token surrounded by |
| * non-whitespace characters? |
| * |
| * @param begin the begin index |
| * @return <code>true</code> iff the token is a single letter token, |
| * <code>false</code> otherwise |
| */ |
| protected final boolean isSingleLetter(final int begin) { |
| |
| if (begin > 0 && begin < fContent.length() - 1) |
| return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1)); |
| |
| return false; |
| } |
| |
| /** |
| * Does the specified token look like an URL? |
| * |
| * @param begin the begin index |
| * @return <code>true</code> iff this token look like an URL, |
| * <code>false</code> otherwise |
| */ |
| protected final boolean isUrlToken(final int begin) { |
| |
| for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) { |
| |
| if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin)) |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Does the specified token consist of whitespace only? |
| * |
| * @param begin the begin index |
| * @param end the end index |
| * @return <code>true</code> iff the token consists of whitespace |
| * only, <code>false</code> otherwise |
| */ |
| protected final boolean isWhitespace(final int begin, final int end) { |
| |
| for (int index= begin; index < end; index++) { |
| |
| if (!Character.isWhitespace(fContent.charAt(index))) |
| return false; |
| } |
| return true; |
| } |
| |
| /* |
| * @see java.util.Iterator#next() |
| */ |
| public final Object next() { |
| |
| String token= nextToken(); |
| while (token == null && fSuccessor != BreakIterator.DONE) |
| token= nextToken(); |
| |
| fLastToken= token; |
| |
| return token; |
| } |
| |
| /** |
| * Advances the end index to the next word break. |
| */ |
| protected final void nextBreak() { |
| |
| fNext= fSuccessor; |
| fPredecessor= fSuccessor; |
| |
| fSuccessor= fWordIterator.next(); |
| } |
| |
| /** |
| * Returns the next sentence break. |
| * |
| * @return the next sentence break |
| */ |
| protected final int nextSentence() { |
| return ((Integer) fSentenceBreaks.getFirst()).intValue(); |
| } |
| |
| /** |
| * Determines the next token to be spell-checked. |
| * |
| * @return the next token to be spell-checked, or <code>null</code> |
| * iff the next token is not a candidate for spell-checking. |
| */ |
| protected String nextToken() { |
| |
| String token= null; |
| |
| fPrevious= fPredecessor; |
| fStartsSentence= false; |
| |
| nextBreak(); |
| |
| boolean update= false; |
| if (fNext - fPrevious > 0) { |
| |
| if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) { |
| |
| nextBreak(); |
| if (Character.isLetter(fContent.charAt(fPrevious + 1))) { |
| update= true; |
| token= fContent.substring(fPrevious, fNext); |
| } else |
| fPredecessor= fNext; |
| |
| } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) { |
| |
| if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious)) |
| nextBreak(); |
| |
| nextBreak(); |
| |
| if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) { |
| |
| nextBreak(); |
| if (fSuccessor != BreakIterator.DONE) { |
| update= true; |
| token= fContent.substring(fPrevious, fNext); |
| } |
| } |
| } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) { |
| |
| if (isUrlToken(fPrevious)) |
| skipTokens(fPrevious, ' '); |
| else if (isJavadocToken(JAVADOC_PARAM_TAGS)) |
| fLastToken= null; |
| else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) { |
| fLastToken= null; |
| skipTokens(fPrevious, fDelimiter.charAt(0)); |
| } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious)) |
| token= fContent.substring(fPrevious, fNext); |
| |
| update= true; |
| } |
| } |
| |
| if (update && fSentenceBreaks.size() > 0) { |
| |
| if (fPrevious >= nextSentence()) { |
| |
| while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence()) |
| fSentenceBreaks.removeFirst(); |
| |
| fStartsSentence= (fLastToken == null) || (token != null); |
| } |
| } |
| return token; |
| } |
| |
| /* |
| * @see java.util.Iterator#remove() |
| */ |
| public final void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| /** |
| * Skip the tokens until the stop character is reached. |
| * |
| * @param begin the begin index |
| * @param stop the stop character |
| */ |
| protected final void skipTokens(final int begin, final char stop) { |
| |
| int end= begin; |
| |
| while (end < fContent.length() && fContent.charAt(end) != stop) |
| end++; |
| |
| if (end < fContent.length()) { |
| |
| fNext= end; |
| fPredecessor= fNext; |
| |
| fSuccessor= fWordIterator.following(fNext); |
| } else |
| fSuccessor= BreakIterator.DONE; |
| } |
| |
| /* |
| * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence() |
| */ |
| public final boolean startsSentence() { |
| return fStartsSentence; |
| } |
| } |