org.eclipse.jdt.ui/ui/org/eclipse/jdt/internal/ui/text/spelling/SpellCheckIterator.java - jdt/eclipse.jdt.ui - Git at Google

 /*******************************************************************************
  * Copyright (c) 2000, 2006 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/

 package org.eclipse.jdt.internal.ui.text.spelling;

 import com.ibm.icu.text.BreakIterator;
 import java.util.LinkedList;
 import java.util.Locale;

 import org.eclipse.jface.text.IDocument;
 import org.eclipse.jface.text.IRegion;
 import org.eclipse.jface.text.TextUtilities;

 import org.eclipse.jdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
 import org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellCheckIterator;

 import org.eclipse.jdt.internal.corext.refactoring.nls.NLSElement;
 import org.eclipse.jdt.internal.ui.text.javadoc.IHtmlTagConstants;
 import org.eclipse.jdt.internal.ui.text.javadoc.IJavaDocTagConstants;

 /**
  * Iterator to spell-check javadoc comment regions.
  *
  * @since 3.0
  */
 public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {

 	/** The content of the region */
 	private final String fContent;

 	/** The line delimiter */
 	private final String fDelimiter;

 	/** The last token */
 	private String fLastToken= null;

 	/** The next break */
 	private int fNext= 1;

 	/** The offset of the region */
 	private final int fOffset;

 	/** The predecessor break */
 	private int fPredecessor;

 	/** The previous break */
 	private int fPrevious= 0;

 	/** The sentence breaks */
 	private final LinkedList fSentenceBreaks= new LinkedList();

 	/** Does the current word start a sentence? */
 	private boolean fStartsSentence= false;

 	/** The successor break */
 	private int fSuccessor;

 	/** The word iterator */
 	private final BreakIterator fWordIterator;

 	/**
 	 * Creates a new spell check iterator.
 	 *
 	 * @param document the document containing the specified partition
 	 * @param region the region to spell-check
 	 * @param locale the locale to use for spell-checking
 	 */
 	public SpellCheckIterator(IDocument document, IRegion region, Locale locale) {
 		this(document, region, locale, BreakIterator.getWordInstance(locale));
 	}

 	/**
 	 * Creates a new spell check iterator.
 	 *
 	 * @param document the document containing the specified partition
 	 * @param region the region to spell-check
 	 * @param locale the locale to use for spell-checking
 	 * @param breakIterator the break-iterator
 	 */
 	public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) {
 		fOffset= region.getOffset();
 		fWordIterator= breakIterator;
 		fDelimiter= TextUtilities.getDefaultLineDelimiter(document);

 		String content;
 		try {

 			content= document.get(region.getOffset(), region.getLength());
 			if (content.startsWith(NLSElement.TAG_PREFIX))
 				content= ""; //$NON-NLS-1$

 		} catch (Exception exception) {
 			content= ""; //$NON-NLS-1$
 		}
 		fContent= content;

 		fWordIterator.setText(content);
 		fPredecessor= fWordIterator.first();
 		fSuccessor= fWordIterator.next();

 		final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
 		iterator.setText(content);

 		int offset= iterator.current();
 		while (offset != BreakIterator.DONE) {

 			fSentenceBreaks.add(new Integer(offset));
 			offset= iterator.next();
 		}
 	}

 	/*
 	 * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
 	 */
 	public final int getBegin() {
 		return fPrevious + fOffset;
 	}

 	/*
 	 * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
 	 */
 	public final int getEnd() {
 		return fNext + fOffset - 1;
 	}

 	/*
 	 * @see java.util.Iterator#hasNext()
 	 */
 	public final boolean hasNext() {
 		return fSuccessor != BreakIterator.DONE;
 	}

 	/**
 	 * Does the specified token consist of at least one letter and digits
 	 * only?
 	 *
 	 * @param begin the begin index
 	 * @param end the end index
 	 * @return <code>true</code> iff the token consists of digits and at
 	 *         least one letter only, <code>false</code> otherwise
 	 */
 	protected final boolean isAlphaNumeric(final int begin, final int end) {

 		char character= 0;

 		boolean letter= false;
 		for (int index= begin; index < end; index++) {

 			character= fContent.charAt(index);
 			if (Character.isLetter(character))
 				letter= true;

 			if (!Character.isLetterOrDigit(character))
 				return false;
 		}
 		return letter;
 	}

 	/**
 	 * Was the last token a Javadoc tag tag?
 	 *
 	 * @param tags the javadoc tags to check
 	 * @return <code>true</code> iff the last token was a Javadoc tag,
 	 *         <code>false</code> otherwise
 	 */
 	protected final boolean isJavadocToken(final String[] tags) {

 		if (fLastToken != null) {

 			for (int index= 0; index < tags.length; index++) {

 				if (fLastToken.equals(tags[index]))
 					return true;
 			}
 		}
 		return false;
 	}

 	/**
 	 * Is the current token a single letter token surrounded by
 	 * non-whitespace characters?
 	 *
 	 * @param begin the begin index
 	 * @return <code>true</code> iff the token is a single letter token,
 	 *         <code>false</code> otherwise
 	 */
 	protected final boolean isSingleLetter(final int begin) {

 		if (begin > 0 && begin < fContent.length() - 1)
 			return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));

 		return false;
 	}

 	/**
 	 * Does the specified token look like an URL?
 	 *
 	 * @param begin the begin index
 	 * @return <code>true</code> iff this token look like an URL,
 	 *         <code>false</code> otherwise
 	 */
 	protected final boolean isUrlToken(final int begin) {

 		for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {

 			if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
 				return true;
 		}
 		return false;
 	}

 	/**
 	 * Does the specified token consist of whitespace only?
 	 *
 	 * @param begin the begin index
 	 * @param end the end index
 	 * @return <code>true</code> iff the token consists of whitespace
 	 *         only, <code>false</code> otherwise
 	 */
 	protected final boolean isWhitespace(final int begin, final int end) {

 		for (int index= begin; index < end; index++) {

 			if (!Character.isWhitespace(fContent.charAt(index)))
 				return false;
 		}
 		return true;
 	}

 	/*
 	 * @see java.util.Iterator#next()
 	 */
 	public final Object next() {

 		String token= nextToken();
 		while (token == null && fSuccessor != BreakIterator.DONE)
 			token= nextToken();

 		fLastToken= token;

 		return token;
 	}

 	/**
 	 * Advances the end index to the next word break.
 	 */
 	protected final void nextBreak() {

 		fNext= fSuccessor;
 		fPredecessor= fSuccessor;

 		fSuccessor= fWordIterator.next();
 	}

 	/**
 	 * Returns the next sentence break.
 	 *
 	 * @return the next sentence break
 	 */
 	protected final int nextSentence() {
 		return ((Integer) fSentenceBreaks.getFirst()).intValue();
 	}

 	/**
 	 * Determines the next token to be spell-checked.
 	 *
 	 * @return the next token to be spell-checked, or <code>null</code>
 	 *         iff the next token is not a candidate for spell-checking.
 	 */
 	protected String nextToken() {

 		String token= null;

 		fPrevious= fPredecessor;
 		fStartsSentence= false;

 		nextBreak();

 		boolean update= false;
 		if (fNext - fPrevious > 0) {

 			if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {

 				nextBreak();
 				if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
 					update= true;
 					token= fContent.substring(fPrevious, fNext);
 				} else
 					fPredecessor= fNext;

 			} else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {

 				if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
 					nextBreak();

 				nextBreak();

 				if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {

 					nextBreak();
 					if (fSuccessor != BreakIterator.DONE) {
 						update= true;
 						token= fContent.substring(fPrevious, fNext);
 					}
 				}
 			} else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {

 				if (isUrlToken(fPrevious))
 					skipTokens(fPrevious, ' ');
 				else if (isJavadocToken(JAVADOC_PARAM_TAGS))
 					fLastToken= null;
 				else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
 					fLastToken= null;
 					skipTokens(fPrevious, fDelimiter.charAt(0));
 				} else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
 					token= fContent.substring(fPrevious, fNext);

 				update= true;
 			}
 		}

 		if (update && fSentenceBreaks.size() > 0) {

 			if (fPrevious >= nextSentence()) {

 				while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
 					fSentenceBreaks.removeFirst();

 				fStartsSentence= (fLastToken == null) || (token != null);
 			}
 		}
 		return token;
 	}

 	/*
 	 * @see java.util.Iterator#remove()
 	 */
 	public final void remove() {
 		throw new UnsupportedOperationException();
 	}

 	/**
 	 * Skip the tokens until the stop character is reached.
 	 *
 	 * @param begin the begin index
 	 * @param stop the stop character
 	 */
 	protected final void skipTokens(final int begin, final char stop) {

 		int end= begin;

 		while (end < fContent.length() && fContent.charAt(end) != stop)
 			end++;

 		if (end < fContent.length()) {

 			fNext= end;
 			fPredecessor= fNext;

 			fSuccessor= fWordIterator.following(fNext);
 		} else
 			fSuccessor= BreakIterator.DONE;
 	}

 	/*
 	 * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
 	 */
 	public final boolean startsSentence() {
 		return fStartsSentence;
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2000, 2006 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	*******************************************************************************/

	package org.eclipse.jdt.internal.ui.text.spelling;

	import com.ibm.icu.text.BreakIterator;
	import java.util.LinkedList;
	import java.util.Locale;

	import org.eclipse.jface.text.IDocument;
	import org.eclipse.jface.text.IRegion;
	import org.eclipse.jface.text.TextUtilities;

	import org.eclipse.jdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
	import org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellCheckIterator;

	import org.eclipse.jdt.internal.corext.refactoring.nls.NLSElement;
	import org.eclipse.jdt.internal.ui.text.javadoc.IHtmlTagConstants;
	import org.eclipse.jdt.internal.ui.text.javadoc.IJavaDocTagConstants;

	/**
	* Iterator to spell-check javadoc comment regions.
	*
	* @since 3.0
	*/
	public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {

	/** The content of the region */
	private final String fContent;

	/** The line delimiter */
	private final String fDelimiter;

	/** The last token */
	private String fLastToken= null;

	/** The next break */
	private int fNext= 1;

	/** The offset of the region */
	private final int fOffset;

	/** The predecessor break */
	private int fPredecessor;

	/** The previous break */
	private int fPrevious= 0;

	/** The sentence breaks */
	private final LinkedList fSentenceBreaks= new LinkedList();

	/** Does the current word start a sentence? */
	private boolean fStartsSentence= false;

	/** The successor break */
	private int fSuccessor;

	/** The word iterator */
	private final BreakIterator fWordIterator;

	/**
	* Creates a new spell check iterator.
	*
	* @param document the document containing the specified partition
	* @param region the region to spell-check
	* @param locale the locale to use for spell-checking
	*/
	public SpellCheckIterator(IDocument document, IRegion region, Locale locale) {
	this(document, region, locale, BreakIterator.getWordInstance(locale));
	}

	/**
	* Creates a new spell check iterator.
	*
	* @param document the document containing the specified partition
	* @param region the region to spell-check
	* @param locale the locale to use for spell-checking
	* @param breakIterator the break-iterator
	*/
	public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) {
	fOffset= region.getOffset();
	fWordIterator= breakIterator;
	fDelimiter= TextUtilities.getDefaultLineDelimiter(document);

	String content;
	try {

	content= document.get(region.getOffset(), region.getLength());
	if (content.startsWith(NLSElement.TAG_PREFIX))
	content= ""; //$NON-NLS-1$

	} catch (Exception exception) {
	content= ""; //$NON-NLS-1$
	}
	fContent= content;

	fWordIterator.setText(content);
	fPredecessor= fWordIterator.first();
	fSuccessor= fWordIterator.next();

	final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
	iterator.setText(content);

	int offset= iterator.current();
	while (offset != BreakIterator.DONE) {

	fSentenceBreaks.add(new Integer(offset));
	offset= iterator.next();
	}
	}

	/*
	* @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
	*/
	public final int getBegin() {
	return fPrevious + fOffset;
	}

	/*
	* @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
	*/
	public final int getEnd() {
	return fNext + fOffset - 1;
	}

	/*
	* @see java.util.Iterator#hasNext()
	*/
	public final boolean hasNext() {
	return fSuccessor != BreakIterator.DONE;
	}

	/**
	* Does the specified token consist of at least one letter and digits
	* only?
	*
	* @param begin the begin index
	* @param end the end index
	* @return <code>true</code> iff the token consists of digits and at
	* least one letter only, <code>false</code> otherwise
	*/
	protected final boolean isAlphaNumeric(final int begin, final int end) {

	char character= 0;

	boolean letter= false;
	for (int index= begin; index < end; index++) {

	character= fContent.charAt(index);
	if (Character.isLetter(character))
	letter= true;

	if (!Character.isLetterOrDigit(character))
	return false;
	}
	return letter;
	}

	/**
	* Was the last token a Javadoc tag tag?
	*
	* @param tags the javadoc tags to check
	* @return <code>true</code> iff the last token was a Javadoc tag,
	* <code>false</code> otherwise
	*/
	protected final boolean isJavadocToken(final String[] tags) {

	if (fLastToken != null) {

	for (int index= 0; index < tags.length; index++) {

	if (fLastToken.equals(tags[index]))
	return true;
	}
	}
	return false;
	}

	/**
	* Is the current token a single letter token surrounded by
	* non-whitespace characters?
	*
	* @param begin the begin index
	* @return <code>true</code> iff the token is a single letter token,
	* <code>false</code> otherwise
	*/
	protected final boolean isSingleLetter(final int begin) {

	if (begin > 0 && begin < fContent.length() - 1)
	return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));

	return false;
	}

	/**
	* Does the specified token look like an URL?
	*
	* @param begin the begin index
	* @return <code>true</code> iff this token look like an URL,
	* <code>false</code> otherwise
	*/
	protected final boolean isUrlToken(final int begin) {

	for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {

	if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
	return true;
	}
	return false;
	}

	/**
	* Does the specified token consist of whitespace only?
	*
	* @param begin the begin index
	* @param end the end index
	* @return <code>true</code> iff the token consists of whitespace
	* only, <code>false</code> otherwise
	*/
	protected final boolean isWhitespace(final int begin, final int end) {

	for (int index= begin; index < end; index++) {

	if (!Character.isWhitespace(fContent.charAt(index)))
	return false;
	}
	return true;
	}

	/*
	* @see java.util.Iterator#next()
	*/
	public final Object next() {

	String token= nextToken();
	while (token == null && fSuccessor != BreakIterator.DONE)
	token= nextToken();

	fLastToken= token;

	return token;
	}

	/**
	* Advances the end index to the next word break.
	*/
	protected final void nextBreak() {

	fNext= fSuccessor;
	fPredecessor= fSuccessor;

	fSuccessor= fWordIterator.next();
	}

	/**
	* Returns the next sentence break.
	*
	* @return the next sentence break
	*/
	protected final int nextSentence() {
	return ((Integer) fSentenceBreaks.getFirst()).intValue();
	}

	/**
	* Determines the next token to be spell-checked.
	*
	* @return the next token to be spell-checked, or <code>null</code>
	* iff the next token is not a candidate for spell-checking.
	*/
	protected String nextToken() {

	String token= null;

	fPrevious= fPredecessor;
	fStartsSentence= false;

	nextBreak();

	boolean update= false;
	if (fNext - fPrevious > 0) {

	if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {

	nextBreak();
	if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
	update= true;
	token= fContent.substring(fPrevious, fNext);
	} else
	fPredecessor= fNext;

	} else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) \|\| fContent.charAt(fNext) == '/')) {

	if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
	nextBreak();

	nextBreak();

	if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {

	nextBreak();
	if (fSuccessor != BreakIterator.DONE) {
	update= true;
	token= fContent.substring(fPrevious, fNext);
	}
	}
	} else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {

	if (isUrlToken(fPrevious))
	skipTokens(fPrevious, ' ');
	else if (isJavadocToken(JAVADOC_PARAM_TAGS))
	fLastToken= null;
	else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
	fLastToken= null;
	skipTokens(fPrevious, fDelimiter.charAt(0));
	} else if (fNext - fPrevious > 1 \|\| isSingleLetter(fPrevious))
	token= fContent.substring(fPrevious, fNext);

	update= true;
	}
	}

	if (update && fSentenceBreaks.size() > 0) {

	if (fPrevious >= nextSentence()) {

	while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
	fSentenceBreaks.removeFirst();

	fStartsSentence= (fLastToken == null) \|\| (token != null);
	}
	}
	return token;
	}

	/*
	* @see java.util.Iterator#remove()
	*/
	public final void remove() {
	throw new UnsupportedOperationException();
	}

	/**
	* Skip the tokens until the stop character is reached.
	*
	* @param begin the begin index
	* @param stop the stop character
	*/
	protected final void skipTokens(final int begin, final char stop) {

	int end= begin;

	while (end < fContent.length() && fContent.charAt(end) != stop)
	end++;

	if (end < fContent.length()) {

	fNext= end;
	fPredecessor= fNext;

	fSuccessor= fWordIterator.following(fNext);
	} else
	fSuccessor= BreakIterator.DONE;
	}

	/*
	* @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
	*/
	public final boolean startsSentence() {
	return fStartsSentence;
	}
	}