blob: 4f8cf94120f4fb99a29f0f68de80b74692d8eba5 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2000, 2015 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* Christopher Lenz (cmlenz@gmx.de) - support for line continuation
*******************************************************************************/
package org.eclipse.jface.text.rules;
import java.util.Arrays;
import java.util.Comparator;
import org.eclipse.core.runtime.Assert;
/**
* Standard implementation of <code>IPredicateRule</code>.
* Is is capable of detecting a pattern which begins with a given start
* sequence and ends with a given end sequence. If the end sequence is
* not specified, it can be either end of line, end or file, or both. Additionally,
* the pattern can be constrained to begin in a certain column. The rule can also
* be used to check whether the text to scan covers half of the pattern, i.e. contains
* the end sequence required by the rule.
*/
public class PatternRule implements IPredicateRule {
/**
* Comparator that orders <code>char[]</code> in decreasing array lengths.
*
* @since 3.1
*/
private static class DecreasingCharArrayLengthComparator implements Comparator<char[]> {
@Override
public int compare(char[] o1, char[] o2) {
return o2.length - o1.length;
}
}
/** Internal setting for the un-initialized column constraint */
protected static final int UNDEFINED= -1;
/** The token to be returned on success */
protected IToken fToken;
/** The pattern's start sequence */
protected char[] fStartSequence;
/** The pattern's end sequence */
protected char[] fEndSequence;
/** The pattern's column constrain */
protected int fColumn= UNDEFINED;
/** The pattern's escape character */
protected char fEscapeCharacter;
/**
* Indicates whether the escape character continues a line
* @since 3.0
*/
protected boolean fEscapeContinuesLine;
/** Indicates whether end of line terminates the pattern */
protected boolean fBreaksOnEOL;
/** Indicates whether end of file terminates the pattern */
protected boolean fBreaksOnEOF;
/**
* Line delimiter comparator which orders according to decreasing delimiter length.
* @since 3.1
*/
private Comparator<char[]> fLineDelimiterComparator= new DecreasingCharArrayLengthComparator();
/**
* Cached line delimiters.
* @since 3.1
*/
private char[][] fLineDelimiters;
/**
* Cached sorted {@linkplain #fLineDelimiters}.
* @since 3.1
*/
private char[][] fSortedLineDelimiters;
/**
* Creates a rule for the given starting and ending sequence.
* When these sequences are detected the rule will return the specified token.
* Alternatively, the sequence can also be ended by the end of the line.
* Any character which follows the given escapeCharacter will be ignored.
*
* @param startSequence the pattern's start sequence
* @param endSequence the pattern's end sequence, <code>null</code> is a legal value
* @param token the token which will be returned on success
* @param escapeCharacter any character following this one will be ignored
* @param breaksOnEOL indicates whether the end of the line also terminates the pattern
*/
public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL) {
Assert.isTrue(startSequence != null && startSequence.length() > 0);
Assert.isTrue(endSequence != null || breaksOnEOL);
Assert.isNotNull(token);
fStartSequence= startSequence.toCharArray();
fEndSequence= (endSequence == null ? new char[0] : endSequence.toCharArray());
fToken= token;
fEscapeCharacter= escapeCharacter;
fBreaksOnEOL= breaksOnEOL;
}
/**
* Creates a rule for the given starting and ending sequence.
* When these sequences are detected the rule will return the specified token.
* Alternatively, the sequence can also be ended by the end of the line or the end of the file.
* Any character which follows the given escapeCharacter will be ignored.
*
* @param startSequence the pattern's start sequence
* @param endSequence the pattern's end sequence, <code>null</code> is a legal value
* @param token the token which will be returned on success
* @param escapeCharacter any character following this one will be ignored
* @param breaksOnEOL indicates whether the end of the line also terminates the pattern
* @param breaksOnEOF indicates whether the end of the file also terminates the pattern
* @since 2.1
*/
public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF) {
this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL);
fBreaksOnEOF= breaksOnEOF;
}
/**
* Creates a rule for the given starting and ending sequence.
* When these sequences are detected the rule will return the specified token.
* Alternatively, the sequence can also be ended by the end of the line or the end of the file.
* Any character which follows the given escapeCharacter will be ignored. An end of line
* immediately after the given <code>lineContinuationCharacter</code> will not cause the
* pattern to terminate even if <code>breakOnEOL</code> is set to true.
*
* @param startSequence the pattern's start sequence
* @param endSequence the pattern's end sequence, <code>null</code> is a legal value
* @param token the token which will be returned on success
* @param escapeCharacter any character following this one will be ignored
* @param breaksOnEOL indicates whether the end of the line also terminates the pattern
* @param breaksOnEOF indicates whether the end of the file also terminates the pattern
* @param escapeContinuesLine indicates whether the specified escape character is used for line
* continuation, so that an end of line immediately after the escape character does not
* terminate the pattern, even if <code>breakOnEOL</code> is set
* @since 3.0
*/
public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF, boolean escapeContinuesLine) {
this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL, breaksOnEOF);
fEscapeContinuesLine= escapeContinuesLine;
}
/**
* Sets a column constraint for this rule. If set, the rule's token
* will only be returned if the pattern is detected starting at the
* specified column. If the column is smaller then 0, the column
* constraint is considered removed.
*
* @param column the column in which the pattern starts
*/
public void setColumnConstraint(int column) {
if (column < 0)
column= UNDEFINED;
fColumn= column;
}
/**
* Evaluates this rules without considering any column constraints.
*
* @param scanner the character scanner to be used
* @return the token resulting from this evaluation
*/
protected IToken doEvaluate(ICharacterScanner scanner) {
return doEvaluate(scanner, false);
}
/**
* Evaluates this rules without considering any column constraints. Resumes
* detection, i.e. look sonly for the end sequence required by this rule if the
* <code>resume</code> flag is set.
*
* @param scanner the character scanner to be used
* @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
* @return the token resulting from this evaluation
* @since 2.0
*/
protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
if (resume) {
if (endSequenceDetected(scanner))
return fToken;
} else {
int c= scanner.read();
if (c == fStartSequence[0]) {
if (sequenceDetected(scanner, fStartSequence, false)) {
if (endSequenceDetected(scanner))
return fToken;
}
}
}
scanner.unread();
return Token.UNDEFINED;
}
@Override
public IToken evaluate(ICharacterScanner scanner) {
return evaluate(scanner, false);
}
/**
* Returns whether the end sequence was detected. As the pattern can be considered
* ended by a line delimiter, the result of this method is <code>true</code> if the
* rule breaks on the end of the line, or if the EOF character is read.
*
* @param scanner the character scanner to be used
* @return <code>true</code> if the end sequence has been detected
*/
protected boolean endSequenceDetected(ICharacterScanner scanner) {
char[][] originalDelimiters= scanner.getLegalLineDelimiters();
int count= originalDelimiters.length;
if (fLineDelimiters == null || fLineDelimiters.length != count) {
fSortedLineDelimiters= new char[count][];
} else {
while (count > 0 && Arrays.equals(fLineDelimiters[count - 1], originalDelimiters[count - 1]))
count--;
}
if (count != 0) {
fLineDelimiters= originalDelimiters;
System.arraycopy(fLineDelimiters, 0, fSortedLineDelimiters, 0, fLineDelimiters.length);
Arrays.sort(fSortedLineDelimiters, fLineDelimiterComparator);
}
int readCount= 1;
int c;
while ((c= scanner.read()) != ICharacterScanner.EOF) {
if (c == fEscapeCharacter) {
// Skip escaped character(s)
if (fEscapeContinuesLine) {
c= scanner.read();
for (int i= 0; i < fSortedLineDelimiters.length; i++) {
if (c == fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], fBreaksOnEOF))
break;
}
} else
scanner.read();
} else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
// Check if the specified end sequence has been found.
if (sequenceDetected(scanner, fEndSequence, fBreaksOnEOF))
return true;
} else if (fBreaksOnEOL) {
// Check for end of line since it can be used to terminate the pattern.
for (int i= 0; i < fSortedLineDelimiters.length; i++) {
if (c == fSortedLineDelimiters[i][0] && sequenceDetected(scanner, fSortedLineDelimiters[i], fBreaksOnEOF))
return true;
}
}
readCount++;
}
if (fBreaksOnEOF)
return true;
for (; readCount > 0; readCount--)
scanner.unread();
return false;
}
/**
* Returns whether the next characters to be read by the character scanner
* are an exact match with the given sequence. No escape characters are allowed
* within the sequence. If specified the sequence is considered to be found
* when reading the EOF character.
*
* @param scanner the character scanner to be used
* @param sequence the sequence to be detected
* @param eofAllowed indicated whether EOF terminates the pattern
* @return <code>true</code> if the given sequence has been detected
*/
protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) {
for (int i= 1; i < sequence.length; i++) {
int c= scanner.read();
if (c == ICharacterScanner.EOF && eofAllowed) {
return true;
} else if (c != sequence[i]) {
// Non-matching character detected, rewind the scanner back to the start.
// Do not unread the first character.
scanner.unread();
for (int j= i-1; j > 0; j--)
scanner.unread();
return false;
}
}
return true;
}
@Override
public IToken evaluate(ICharacterScanner scanner, boolean resume) {
if (fColumn == UNDEFINED)
return doEvaluate(scanner, resume);
int c= scanner.read();
scanner.unread();
if (c == fStartSequence[0])
return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
return Token.UNDEFINED;
}
@Override
public IToken getSuccessToken() {
return fToken;
}
}