blob: a3d87d4e60901a3ba6c28634ed4f88f9054c19a7 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2009, 2012 Obeo.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Obeo - initial API and implementation
*******************************************************************************/
package org.eclipse.acceleo.internal.traceability.engine;
import java.util.NoSuchElementException;
// TODO this tokenizer doesn't accept delimiters with surrogates
/**
* This custom implementation of a String Tokenizer will allow access to the start and end offsets of the last
* returned token. We can't use the basic {@link java.util.StringTokenizer} as it doesn't allow this, and we
* do not override it because it was designed to be unoverridable.
* <p>
* Delimiters are not considered to be tokens themselves.
* </p>
*
* @author <a href="mailto:laurent.goubet@obeo.fr">Laurent Goubet</a>
*/
public final class TraceabilityTokenizer {
/** The String we are to tokenize. */
private final String source;
/** Delimiters around which to tokenize. */
private final String delims;
/** Offset at which we last found a delimiter in {@link #source}. */
private int lastOffset;
/** Starting offset of the next token will be computed beforehand. */
private int nextOffset;
/**
* This will be set to <code>true</code> whenever we've scanned ahead for the next token, yet haven't
* returned it yet.
*/
private boolean soughtNext;
/**
* Default constructor.
*
* @param source
* String that we are to tokenize.
* @param delims
* Delimiters around which to tokenize.
*/
public TraceabilityTokenizer(String source, String delims) {
this.source = source;
this.delims = delims;
}
/**
* Counts the number of tokens this tokenizer's string contains.
*
* @return The number of tokens this tokenizer's string contains.
*/
public int countTokens() {
int tokenOffset = 0;
int tokenCount = 0;
do {
tokenOffset = skipDelimiters(tokenOffset);
tokenOffset = scanToken(tokenOffset);
tokenCount++;
} while (skipDelimiters(tokenOffset) < source.length());
return tokenCount;
}
/**
* Returns the offset of the last token's start.
*
* @return The offset of the last token's start.
*/
public int getLastOffset() {
return lastOffset;
}
/**
* Returns the offset of the next token's start.
*
* @return The offset of the next token's start.
*/
public int getNextOffset() {
return nextOffset;
}
/**
* This will return <code>true</code> if there is at least one token after the current.
*
* @return <code>true</code> if there is at least one token after the current in {@link #source},
* <code>false</code> otherwise.
*/
public boolean hasMoreTokens() {
if (soughtNext) {
return true;
}
boolean result = true;
if (skipDelimiters(nextOffset) == source.length()) {
result = false;
} else {
lastOffset = skipDelimiters(nextOffset);
if (lastOffset == source.length()) {
result = false;
}
nextOffset = scanToken(lastOffset);
soughtNext = true;
}
return result;
}
/**
* Compute and returns the next token.
*
* @return The next token found in this tokenizer's string.
* @throws NoSuchElementException
* If there are no more tokens in this tokenizer's string.
*/
public String nextToken() throws NoSuchElementException {
if (!soughtNext) {
lastOffset = skipDelimiters(nextOffset);
if (lastOffset == source.length()) {
throw new NoSuchElementException();
}
nextOffset = scanToken(lastOffset);
} else {
soughtNext = false;
}
return new String(source.substring(lastOffset, nextOffset));
}
/**
* Skips ahead from <code>start</code> and returns the index of the next delimiter character encountered,
* or the tokenizer's string length if it contains no more token.
*
* @param start
* Offset from which to start searching.
* @return Index of the next delimiter in this tokenizer's string, the string's length if none.
*/
private int scanToken(int start) {
int position = start;
while (position < source.length()) {
char c = source.charAt(position);
if (delims.indexOf(c) >= 0) {
break;
}
position++;
}
return position;
}
/**
* Skips ahead from <code>start</code> and returns the index of the first non-delimiter character
* encountered.
*
* @param start
* Offset from which to start searching.
* @return Index of the next non-delimiter index in this tokenizer's string.
*/
private int skipDelimiters(int start) {
int position = start;
while (position < source.length()) {
char c = source.charAt(position);
if (delims.indexOf(c) < 0) {
break;
}
position++;
}
return position;
}
}