blob: 3c8e3d2cdd48acbc8c046b23c1994f2a7fbfcaa6 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2010, 2011 IBM Corporation and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* IBM Corporation - initial API and implementation
******************************************************************************/
package org.eclipse.equinox.bidi.internal.consumable;
import org.eclipse.equinox.bidi.advanced.IStructuredTextExpert;
import org.eclipse.equinox.bidi.advanced.StructuredTextEnvironment;
import org.eclipse.equinox.bidi.custom.*;
/**
* Handler for regular expressions.
* Such expressions may span multiple lines.
* <p>
* In applications like an editor where parts of the text might be modified
* while other parts are not, the user may want to call
* {@link IStructuredTextExpert#leanToFullText}
* separately on each line and save the initial state of each line (this is
* the final state of the previous line which can be retrieved using
* {@link IStructuredTextExpert#getState()}.
* If both the content
* of a line and its initial state have not changed, the user can be sure that
* the last <i>full</i> text computed for this line has not changed either.
*
* @see IStructuredTextExpert explanation of state
*/
public class StructuredTextRegex extends StructuredTextTypeHandler {
static final String[] startStrings = {"", /* 0 *//* dummy *///$NON-NLS-1$
"(?#", /* 1 *//* comment (?#...) *///$NON-NLS-1$
"(?<", /* 2 *//* named group (?<name> *///$NON-NLS-1$
"(?'", /* 3 *//* named group (?'name' *///$NON-NLS-1$
"(?(<", /* 4 *//* conditional named back reference (?(<name>) *///$NON-NLS-1$
"(?('", /* 5 *//* conditional named back reference (?('name') *///$NON-NLS-1$
"(?(", /* 6 *//* conditional named back reference (?(name) *///$NON-NLS-1$
"(?&", /* 7 *//* named parentheses reference (?&name) *///$NON-NLS-1$
"(?P<", /* 8 *//* named group (?P<name> *///$NON-NLS-1$
"\\k<", /* 9 *//* named back reference \k<name> *///$NON-NLS-1$
"\\k'", /* 10 *//* named back reference \k'name' *///$NON-NLS-1$
"\\k{", /* 11 *//* named back reference \k{name} *///$NON-NLS-1$
"(?P=", /* 12 *//* named back reference (?P=name) *///$NON-NLS-1$
"\\g{", /* 13 *//* named back reference \g{name} *///$NON-NLS-1$
"\\g<", /* 14 *//* subroutine call \g<name> *///$NON-NLS-1$
"\\g'", /* 15 *//* subroutine call \g'name' *///$NON-NLS-1$
"(?(R&", /* 16 *//* named back reference recursion (?(R&name) *///$NON-NLS-1$
"\\Q" /* 17 *//* quoted sequence \Q...\E *///$NON-NLS-1$
};
static final char[] endChars = {
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
'.', ')', '>', '\'', ')', ')', ')', ')', '>', '>', '\'', '}', ')', '}', '>', '\'', ')'};
static final int numberOfStrings = startStrings.length; /* 18 */
static final int maxSpecial = numberOfStrings;
static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER;
static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
private static final Integer STATE_COMMENT = new Integer(1);
private static final Integer STATE_QUOTED_SEQUENCE = new Integer(17);
/**
* Retrieves the number of special cases handled by this handler.
*
* @return the number of special cases for this handler.
*/
public int getSpecialsCount(IStructuredTextExpert expert) {
return maxSpecial;
}
/**
* Locates occurrences of the syntactic strings and of
* R, AL, EN, AN characters.
*/
public int indexOfSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int fromIndex) {
// In this method, L, R, AL, AN and EN represent bidi categories
// as defined in the Unicode Bidirectional Algorithm
// ( http://www.unicode.org/reports/tr9/ ).
// L represents the category Left to Right character.
// R represents the category Right to Left character.
// AL represents the category Arabic Letter.
// AN represents the category Arabic Number.
// EN represents the category European Number.
byte charType;
if (caseNumber < numberOfStrings) {
/* 1 *//* comment (?#...) */
/* 2 *//* named group (?<name> */
/* 3 *//* named group (?'name' */
/* 4 *//* conditional named back reference (?(name) */
/* 5 *//* conditional named back reference (?(<name>) */
/* 6 *//* conditional named back reference (?('name') */
/* 7 *//* named parentheses reference (?&name) */
/* 8 *//* named group (?P<name> */
/* 9 *//* named back reference \k<name> */
/* 10 *//* named back reference \k'name' */
/* 11 *//* named back reference \k{name} */
/* 12 *//* named back reference (?P=name) */
/* 13 *//* named back reference \g{name} */
/* 14 *//* subroutine call \g<name> */
/* 15 *//* subroutine call \g'name' */
/* 16 *//* named back reference recursion (?(R&name) */
/* 17 *//* quoted sequence \Q...\E */
return text.indexOf(startStrings[caseNumber], fromIndex);
}
// there never is a need for a mark before the first char
if (fromIndex <= 0)
fromIndex = 1;
// look for R, AL, AN, EN which are potentially needing a mark
for (; fromIndex < text.length(); fromIndex++) {
charType = charTypes.getBidiTypeAt(fromIndex);
// R and AL will always be examined using processSeparator()
if (charType == R || charType == AL)
return fromIndex;
if (charType == EN || charType == AN) {
// no need for a mark after the first digit in a number
if (charTypes.getBidiTypeAt(fromIndex - 1) == charType)
continue;
for (int i = fromIndex - 1; i >= 0; i--) {
charType = charTypes.getBidiTypeAt(i);
// after a L char, no need for a mark
if (charType == L)
continue;
// digit after R or AL or AN need a mark, except for EN
// following AN, but this is a contrived case, so we
// don't check for it (and calling processSeparator()
// for it will do no harm)
if (charType == R || charType == AL || charType == AN)
return fromIndex;
}
continue;
}
}
return -1;
}
/**
* Processes the special cases.
*/
public int processSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int separLocation) {
int location;
if (separLocation < 0) {
caseNumber = ((Integer) expert.getState()).intValue(); // TBD guard against "undefined"
expert.clearState();
}
switch (caseNumber) {
case 1 : /* comment (?#...) */
if (separLocation < 0) {
// initial state from previous line
location = 0;
} else {
StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
// skip the opening "(?#"
location = separLocation + 3;
}
location = text.indexOf(')', location);
if (location < 0) {
expert.setState(STATE_COMMENT);
return text.length();
}
return location + 1;
case 2 : /* named group (?<name> */
case 3 : /* named group (?'name' */
case 4 : /* conditional named back reference (?(name) */
case 5 : /* conditional named back reference (?(<name>) */
case 6 : /* conditional named back reference (?('name') */
case 7 : /* named parentheses reference (?&name) */
StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
// no need for calling processSeparator() for the following cases
// since the starting string contains a L char
case 8 : /* named group (?P<name> */
case 9 : /* named back reference \k<name> */
case 10 : /* named back reference \k'name' */
case 11 : /* named back reference \k{name} */
case 12 : /* named back reference (?P=name) */
case 13 : /* named back reference \g{name} */
case 14 : /* subroutine call \g<name> */
case 15 : /* subroutine call \g'name' */
case 16 : /* named back reference recursion (?(R&name) */
// skip the opening string
location = separLocation + startStrings[caseNumber].length();
// look for ending character
location = text.indexOf(endChars[caseNumber], location);
if (location < 0)
return text.length();
return location + 1;
case 17 : /* quoted sequence \Q...\E */
if (separLocation < 0) {
// initial state from previous line
location = 0;
} else {
StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
// skip the opening "\Q"
location = separLocation + 2;
}
location = text.indexOf("\\E", location); //$NON-NLS-1$
if (location < 0) {
expert.setState(STATE_QUOTED_SEQUENCE);
return text.length();
}
// set the charType for the "E" to L (Left to Right character)
charTypes.setBidiTypeAt(location + 1, L);
return location + 2;
case 18 : /* R, AL, AN, EN */
StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
return separLocation + 1;
}
// we should never get here
return text.length();
}
public int getDirection(IStructuredTextExpert expert, String text) {
return getDirection(expert, text, new StructuredTextCharTypes(expert, text));
}
/**
* @return {@link IStructuredTextExpert#DIR_RTL DIR_RTL} if the following
* conditions are satisfied:
* <ul>
* <li>The current locale (as expressed by the environment
* language) is Arabic.</li>
* <li>The first strong character has an RTL direction.</li>
* <li>If there is no strong character in the text, the
* GUI is mirrored.
* </ul>
* Otherwise, returns {@link IStructuredTextExpert#DIR_LTR DIR_LTR}.
*/
public int getDirection(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes) {
StructuredTextEnvironment environment = expert.getEnvironment();
String language = environment.getLanguage();
if (!language.equals("ar")) //$NON-NLS-1$
return IStructuredTextExpert.DIR_LTR;
for (int i = 0; i < text.length(); i++) {
byte charType = charTypes.getBidiTypeAt(i);
if (charType == AL || charType == R)
return IStructuredTextExpert.DIR_RTL;
if (charType == L)
return IStructuredTextExpert.DIR_LTR;
}
if (environment.getMirrored())
return IStructuredTextExpert.DIR_RTL;
return IStructuredTextExpert.DIR_LTR;
}
}