bundles/org.eclipse.equinox.bidi/src/org/eclipse/equinox/bidi/internal/consumable/StructuredTextRegex.java - equinox/rt.equinox.bundles - Git at Google

 /*******************************************************************************
  * Copyright (c) 2010, 2011 IBM Corporation and others.
  *
  * This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License 2.0
  * which accompanies this distribution, and is available at
  * https://www.eclipse.org/legal/epl-2.0/
  *
  * SPDX-License-Identifier: EPL-2.0
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  ******************************************************************************/
 package org.eclipse.equinox.bidi.internal.consumable;

 import org.eclipse.equinox.bidi.advanced.IStructuredTextExpert;
 import org.eclipse.equinox.bidi.advanced.StructuredTextEnvironment;
 import org.eclipse.equinox.bidi.custom.*;

 /**
  *  Handler for regular expressions.
  *  Such expressions may span multiple lines.
  *  <p>
  *  In applications like an editor where parts of the text might be modified
  *  while other parts are not, the user may want to call
  *  {@link IStructuredTextExpert#leanToFullText}
  *  separately on each line and save the initial state of each line (this is
  *  the final state of the previous line which can be retrieved using
  *  {@link IStructuredTextExpert#getState()}.
  *  If both the content
  *  of a line and its initial state have not changed, the user can be sure that
  *  the last <i>full</i> text computed for this line has not changed either.
  *
  *  @see IStructuredTextExpert explanation of state
  */
 public class StructuredTextRegex extends StructuredTextTypeHandler {
 	static final String[] startStrings = {"", /*  0 *//* dummy *///$NON-NLS-1$
 			"(?#", /*  1 *//* comment (?#...) *///$NON-NLS-1$
 			"(?<", /*  2 *//* named group (?<name> *///$NON-NLS-1$
 			"(?'", /*  3 *//* named group (?'name' *///$NON-NLS-1$
 			"(?(<", /*  4 *//* conditional named back reference (?(<name>) *///$NON-NLS-1$
 			"(?('", /*  5 *//* conditional named back reference (?('name') *///$NON-NLS-1$
 			"(?(", /*  6 *//* conditional named back reference (?(name) *///$NON-NLS-1$
 			"(?&", /*  7 *//* named parentheses reference (?&name) *///$NON-NLS-1$
 			"(?P<", /*  8 *//* named group (?P<name> *///$NON-NLS-1$
 			"\\k<", /*  9 *//* named back reference \k<name> *///$NON-NLS-1$
 			"\\k'", /* 10 *//* named back reference \k'name' *///$NON-NLS-1$
 			"\\k{", /* 11 *//* named back reference \k{name} *///$NON-NLS-1$
 			"(?P=", /* 12 *//* named back reference (?P=name) *///$NON-NLS-1$
 			"\\g{", /* 13 *//* named back reference \g{name} *///$NON-NLS-1$
 			"\\g<", /* 14 *//* subroutine call \g<name> *///$NON-NLS-1$
 			"\\g'", /* 15 *//* subroutine call \g'name' *///$NON-NLS-1$
 			"(?(R&", /* 16 *//* named back reference recursion (?(R&name) *///$NON-NLS-1$
 			"\\Q" /* 17 *//* quoted sequence \Q...\E *///$NON-NLS-1$
 	};
 	static final char[] endChars = {
 			// 0    1    2    3     4    5    6    7    8    9    10   11   12   13   14    15   16
 			'.', ')', '>', '\'', ')', ')', ')', ')', '>', '>', '\'', '}', ')', '}', '>', '\'', ')'};
 	static final int numberOfStrings = startStrings.length; /* 18 */
 	static final int maxSpecial = numberOfStrings;
 	static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
 	static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
 	static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
 	static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER;
 	static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
 	private static final Integer STATE_COMMENT = new Integer(1);
 	private static final Integer STATE_QUOTED_SEQUENCE = new Integer(17);

 	/**
 	 *  Retrieves the number of special cases handled by this handler.
 	 *
 	 *  @return the number of special cases for this handler.
 	 */
 	public int getSpecialsCount(IStructuredTextExpert expert) {
 		return maxSpecial;
 	}

 	/**
 	 *  Locates occurrences of the syntactic strings and of
 	 *  R, AL, EN, AN characters.
 	 */
 	public int indexOfSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int fromIndex) {
 		// In this method, L, R, AL, AN and EN represent bidi categories
 		// as defined in the Unicode Bidirectional Algorithm
 		// ( http://www.unicode.org/reports/tr9/ ).
 		// L  represents the category Left to Right character.
 		// R  represents the category Right to Left character.
 		// AL represents the category Arabic Letter.
 		// AN represents the category Arabic Number.
 		// EN  represents the category European Number.
 		byte charType;

 		if (caseNumber < numberOfStrings) {
 			/*  1 *//* comment (?#...) */
 			/*  2 *//* named group (?<name> */
 			/*  3 *//* named group (?'name' */
 			/*  4 *//* conditional named back reference (?(name) */
 			/*  5 *//* conditional named back reference (?(<name>) */
 			/*  6 *//* conditional named back reference (?('name') */
 			/*  7 *//* named parentheses reference (?&name) */
 			/*  8 *//* named group (?P<name> */
 			/*  9 *//* named back reference \k<name> */
 			/* 10 *//* named back reference \k'name' */
 			/* 11 *//* named back reference \k{name} */
 			/* 12 *//* named back reference (?P=name) */
 			/* 13 *//* named back reference \g{name} */
 			/* 14 *//* subroutine call \g<name> */
 			/* 15 *//* subroutine call \g'name' */
 			/* 16 *//* named back reference recursion (?(R&name) */
 			/* 17 *//* quoted sequence \Q...\E */
 			return text.indexOf(startStrings[caseNumber], fromIndex);
 		}
 		// there never is a need for a mark before the first char
 		if (fromIndex <= 0)
 			fromIndex = 1;
 		// look for R, AL, AN, EN which are potentially needing a mark
 		for (; fromIndex < text.length(); fromIndex++) {
 			charType = charTypes.getBidiTypeAt(fromIndex);
 			// R and AL will always be examined using processSeparator()
 			if (charType == R || charType == AL)
 				return fromIndex;

 			if (charType == EN || charType == AN) {
 				// no need for a mark after the first digit in a number
 				if (charTypes.getBidiTypeAt(fromIndex - 1) == charType)
 					continue;

 				for (int i = fromIndex - 1; i >= 0; i--) {
 					charType = charTypes.getBidiTypeAt(i);
 					// after a L char, no need for a mark
 					if (charType == L)
 						continue;

 					// digit after R or AL or AN need a mark, except for EN
 					//   following AN, but this is a contrived case, so we
 					//   don't check for it (and calling processSeparator()
 					//   for it will do no harm)
 					if (charType == R || charType == AL || charType == AN)
 						return fromIndex;
 				}
 				continue;
 			}
 		}
 		return -1;
 	}

 	/**
 	 *  Processes the special cases.
 	 */
 	public int processSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int separLocation) {
 		int location;

 		if (separLocation < 0) {
 			caseNumber = ((Integer) expert.getState()).intValue(); // TBD guard against "undefined"
 			expert.clearState();
 		}
 		switch (caseNumber) {
 			case 1 : /* comment (?#...) */
 				if (separLocation < 0) {
 					// initial state from previous line
 					location = 0;
 				} else {
 					StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
 					// skip the opening "(?#"
 					location = separLocation + 3;
 				}
 				location = text.indexOf(')', location);
 				if (location < 0) {
 					expert.setState(STATE_COMMENT);
 					return text.length();
 				}
 				return location + 1;
 			case 2 : /* named group (?<name> */
 			case 3 : /* named group (?'name' */
 			case 4 : /* conditional named back reference (?(name) */
 			case 5 : /* conditional named back reference (?(<name>) */
 			case 6 : /* conditional named back reference (?('name') */
 			case 7 : /* named parentheses reference (?&name) */
 				StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
 				// no need for calling processSeparator() for the following cases
 				//   since the starting string contains a L char
 			case 8 : /* named group (?P<name> */
 			case 9 : /* named back reference \k<name> */
 			case 10 : /* named back reference \k'name' */
 			case 11 : /* named back reference \k{name} */
 			case 12 : /* named back reference (?P=name) */
 			case 13 : /* named back reference \g{name} */
 			case 14 : /* subroutine call \g<name> */
 			case 15 : /* subroutine call \g'name' */
 			case 16 : /* named back reference recursion (?(R&name) */
 				// skip the opening string
 				location = separLocation + startStrings[caseNumber].length();
 				// look for ending character
 				location = text.indexOf(endChars[caseNumber], location);
 				if (location < 0)
 					return text.length();
 				return location + 1;
 			case 17 : /* quoted sequence \Q...\E */
 				if (separLocation < 0) {
 					// initial state from previous line
 					location = 0;
 				} else {
 					StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
 					// skip the opening "\Q"
 					location = separLocation + 2;
 				}
 				location = text.indexOf("\\E", location); //$NON-NLS-1$
 				if (location < 0) {
 					expert.setState(STATE_QUOTED_SEQUENCE);
 					return text.length();
 				}
 				// set the charType for the "E" to L (Left to Right character)
 				charTypes.setBidiTypeAt(location + 1, L);
 				return location + 2;
 			case 18 : /* R, AL, AN, EN */
 				StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
 				return separLocation + 1;

 		}
 		// we should never get here
 		return text.length();
 	}

 	public int getDirection(IStructuredTextExpert expert, String text) {
 		return getDirection(expert, text, new StructuredTextCharTypes(expert, text));
 	}

 	/**
 	 *  @return {@link IStructuredTextExpert#DIR_RTL DIR_RTL} if the following
 	 *          conditions are satisfied:
 	 *          <ul>
 	 *            <li>The current locale (as expressed by the environment
 	 *                language) is Arabic.</li>
 	 *            <li>The first strong character has an RTL direction.</li>
 	 *            <li>If there is no strong character in the text, the
 	 *                GUI is mirrored.
 	 *          </ul>
 	 *          Otherwise, returns {@link IStructuredTextExpert#DIR_LTR DIR_LTR}.
 	 */
 	public int getDirection(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes) {
 		StructuredTextEnvironment environment = expert.getEnvironment();
 		String language = environment.getLanguage();
 		if (!language.equals("ar")) //$NON-NLS-1$
 			return IStructuredTextExpert.DIR_LTR;
 		for (int i = 0; i < text.length(); i++) {
 			byte charType = charTypes.getBidiTypeAt(i);
 			if (charType == AL || charType == R)
 				return IStructuredTextExpert.DIR_RTL;
 			if (charType == L)
 				return IStructuredTextExpert.DIR_LTR;
 		}
 		if (environment.getMirrored())
 			return IStructuredTextExpert.DIR_RTL;
 		return IStructuredTextExpert.DIR_LTR;
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2010, 2011 IBM Corporation and others.
	*
	* This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License 2.0
	* which accompanies this distribution, and is available at
	* https://www.eclipse.org/legal/epl-2.0/
	*
	* SPDX-License-Identifier: EPL-2.0
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	******************************************************************************/
	package org.eclipse.equinox.bidi.internal.consumable;

	import org.eclipse.equinox.bidi.advanced.IStructuredTextExpert;
	import org.eclipse.equinox.bidi.advanced.StructuredTextEnvironment;
	import org.eclipse.equinox.bidi.custom.*;

	/**
	* Handler for regular expressions.
	* Such expressions may span multiple lines.
	* <p>
	* In applications like an editor where parts of the text might be modified
	* while other parts are not, the user may want to call
	* {@link IStructuredTextExpert#leanToFullText}
	* separately on each line and save the initial state of each line (this is
	* the final state of the previous line which can be retrieved using
	* {@link IStructuredTextExpert#getState()}.
	* If both the content
	* of a line and its initial state have not changed, the user can be sure that
	* the last <i>full</i> text computed for this line has not changed either.
	*
	* @see IStructuredTextExpert explanation of state
	*/
	public class StructuredTextRegex extends StructuredTextTypeHandler {
	static final String[] startStrings = {"", /* 0 // dummy *///$NON-NLS-1$
	"(?#", /* 1 // comment (?#...) *///$NON-NLS-1$
	"(?<", /* 2 // named group (?<name> *///$NON-NLS-1$
	"(?'", /* 3 // named group (?'name' *///$NON-NLS-1$
	"(?(<", /* 4 // conditional named back reference (?(<name>) *///$NON-NLS-1$
	"(?('", /* 5 // conditional named back reference (?('name') *///$NON-NLS-1$
	"(?(", /* 6 // conditional named back reference (?(name) *///$NON-NLS-1$
	"(?&", /* 7 // named parentheses reference (?&name) *///$NON-NLS-1$
	"(?P<", /* 8 // named group (?P<name> *///$NON-NLS-1$
	"\\k<", /* 9 // named back reference \k<name> *///$NON-NLS-1$
	"\\k'", /* 10 // named back reference \k'name' *///$NON-NLS-1$
	"\\k{", /* 11 // named back reference \k{name} *///$NON-NLS-1$
	"(?P=", /* 12 // named back reference (?P=name) *///$NON-NLS-1$
	"\\g{", /* 13 // named back reference \g{name} *///$NON-NLS-1$
	"\\g<", /* 14 // subroutine call \g<name> *///$NON-NLS-1$
	"\\g'", /* 15 // subroutine call \g'name' *///$NON-NLS-1$
	"(?(R&", /* 16 // named back reference recursion (?(R&name) *///$NON-NLS-1$
	"\\Q" /* 17 // quoted sequence \Q...\E *///$NON-NLS-1$
	};
	static final char[] endChars = {
	// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
	'.', ')', '>', '\'', ')', ')', ')', ')', '>', '>', '\'', '}', ')', '}', '>', '\'', ')'};
	static final int numberOfStrings = startStrings.length; /* 18 */
	static final int maxSpecial = numberOfStrings;
	static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
	static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
	static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
	static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER;
	static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
	private static final Integer STATE_COMMENT = new Integer(1);
	private static final Integer STATE_QUOTED_SEQUENCE = new Integer(17);

	/**
	* Retrieves the number of special cases handled by this handler.
	*
	* @return the number of special cases for this handler.
	*/
	public int getSpecialsCount(IStructuredTextExpert expert) {
	return maxSpecial;
	}

	/**
	* Locates occurrences of the syntactic strings and of
	* R, AL, EN, AN characters.
	*/
	public int indexOfSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int fromIndex) {
	// In this method, L, R, AL, AN and EN represent bidi categories
	// as defined in the Unicode Bidirectional Algorithm
	// ( http://www.unicode.org/reports/tr9/ ).
	// L represents the category Left to Right character.
	// R represents the category Right to Left character.
	// AL represents the category Arabic Letter.
	// AN represents the category Arabic Number.
	// EN represents the category European Number.
	byte charType;

	if (caseNumber < numberOfStrings) {
	/* 1 // comment (?#...) */
	/* 2 // named group (?<name> */
	/* 3 // named group (?'name' */
	/* 4 // conditional named back reference (?(name) */
	/* 5 // conditional named back reference (?(<name>) */
	/* 6 // conditional named back reference (?('name') */
	/* 7 // named parentheses reference (?&name) */
	/* 8 // named group (?P<name> */
	/* 9 // named back reference \k<name> */
	/* 10 // named back reference \k'name' */
	/* 11 // named back reference \k{name} */
	/* 12 // named back reference (?P=name) */
	/* 13 // named back reference \g{name} */
	/* 14 // subroutine call \g<name> */
	/* 15 // subroutine call \g'name' */
	/* 16 // named back reference recursion (?(R&name) */
	/* 17 // quoted sequence \Q...\E */
	return text.indexOf(startStrings[caseNumber], fromIndex);
	}
	// there never is a need for a mark before the first char
	if (fromIndex <= 0)
	fromIndex = 1;
	// look for R, AL, AN, EN which are potentially needing a mark
	for (; fromIndex < text.length(); fromIndex++) {
	charType = charTypes.getBidiTypeAt(fromIndex);
	// R and AL will always be examined using processSeparator()
	if (charType == R \|\| charType == AL)
	return fromIndex;

	if (charType == EN \|\| charType == AN) {
	// no need for a mark after the first digit in a number
	if (charTypes.getBidiTypeAt(fromIndex - 1) == charType)
	continue;

	for (int i = fromIndex - 1; i >= 0; i--) {
	charType = charTypes.getBidiTypeAt(i);
	// after a L char, no need for a mark
	if (charType == L)
	continue;

	// digit after R or AL or AN need a mark, except for EN
	// following AN, but this is a contrived case, so we
	// don't check for it (and calling processSeparator()
	// for it will do no harm)
	if (charType == R \|\| charType == AL \|\| charType == AN)
	return fromIndex;
	}
	continue;
	}
	}
	return -1;
	}

	/**
	* Processes the special cases.
	*/
	public int processSpecial(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int caseNumber, int separLocation) {
	int location;

	if (separLocation < 0) {
	caseNumber = ((Integer) expert.getState()).intValue(); // TBD guard against "undefined"
	expert.clearState();
	}
	switch (caseNumber) {
	case 1 : /* comment (?#...) */
	if (separLocation < 0) {
	// initial state from previous line
	location = 0;
	} else {
	StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
	// skip the opening "(?#"
	location = separLocation + 3;
	}
	location = text.indexOf(')', location);
	if (location < 0) {
	expert.setState(STATE_COMMENT);
	return text.length();
	}
	return location + 1;
	case 2 : /* named group (?<name> */
	case 3 : /* named group (?'name' */
	case 4 : /* conditional named back reference (?(name) */
	case 5 : /* conditional named back reference (?(<name>) */
	case 6 : /* conditional named back reference (?('name') */
	case 7 : /* named parentheses reference (?&name) */
	StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
	// no need for calling processSeparator() for the following cases
	// since the starting string contains a L char
	case 8 : /* named group (?P<name> */
	case 9 : /* named back reference \k<name> */
	case 10 : /* named back reference \k'name' */
	case 11 : /* named back reference \k{name} */
	case 12 : /* named back reference (?P=name) */
	case 13 : /* named back reference \g{name} */
	case 14 : /* subroutine call \g<name> */
	case 15 : /* subroutine call \g'name' */
	case 16 : /* named back reference recursion (?(R&name) */
	// skip the opening string
	location = separLocation + startStrings[caseNumber].length();
	// look for ending character
	location = text.indexOf(endChars[caseNumber], location);
	if (location < 0)
	return text.length();
	return location + 1;
	case 17 : /* quoted sequence \Q...\E */
	if (separLocation < 0) {
	// initial state from previous line
	location = 0;
	} else {
	StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
	// skip the opening "\Q"
	location = separLocation + 2;
	}
	location = text.indexOf("\\E", location); //$NON-NLS-1$
	if (location < 0) {
	expert.setState(STATE_QUOTED_SEQUENCE);
	return text.length();
	}
	// set the charType for the "E" to L (Left to Right character)
	charTypes.setBidiTypeAt(location + 1, L);
	return location + 2;
	case 18 : /* R, AL, AN, EN */
	StructuredTextTypeHandler.processSeparator(text, charTypes, offsets, separLocation);
	return separLocation + 1;

	}
	// we should never get here
	return text.length();
	}

	public int getDirection(IStructuredTextExpert expert, String text) {
	return getDirection(expert, text, new StructuredTextCharTypes(expert, text));
	}

	/**
	* @return {@link IStructuredTextExpert#DIR_RTL DIR_RTL} if the following
	* conditions are satisfied:
	* <ul>
	* <li>The current locale (as expressed by the environment
	* language) is Arabic.</li>
	* <li>The first strong character has an RTL direction.</li>
	* <li>If there is no strong character in the text, the
	* GUI is mirrored.
	* </ul>
	* Otherwise, returns {@link IStructuredTextExpert#DIR_LTR DIR_LTR}.
	*/
	public int getDirection(IStructuredTextExpert expert, String text, StructuredTextCharTypes charTypes) {
	StructuredTextEnvironment environment = expert.getEnvironment();
	String language = environment.getLanguage();
	if (!language.equals("ar")) //$NON-NLS-1$
	return IStructuredTextExpert.DIR_LTR;
	for (int i = 0; i < text.length(); i++) {
	byte charType = charTypes.getBidiTypeAt(i);
	if (charType == AL \|\| charType == R)
	return IStructuredTextExpert.DIR_RTL;
	if (charType == L)
	return IStructuredTextExpert.DIR_LTR;
	}
	if (environment.getMirrored())
	return IStructuredTextExpert.DIR_RTL;
	return IStructuredTextExpert.DIR_LTR;
	}

	}