blob: e7fe08809ce8fb7df340ec5ec7c70fe1280aca51 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2010, 2011 IBM Corporation and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* IBM Corporation - initial API and implementation
******************************************************************************/
package org.eclipse.equinox.bidi.internal;
import org.eclipse.equinox.bidi.advanced.*;
import org.eclipse.equinox.bidi.custom.*;
/**
* Implementation for IStructuredTextExpert.
*/
public class StructuredTextImpl implements IStructuredTextExpert {
static final String EMPTY_STRING = ""; //$NON-NLS-1$
// In the following lines, B, L, R and AL represent bidi categories
// as defined in the Unicode Bidirectional Algorithm
// ( http://www.unicode.org/reports/tr9/ ).
// B represents the category Block Separator.
// L represents the category Left to Right character.
// R represents the category Right to Left character.
// AL represents the category Arabic Letter.
// AN represents the category Arabic Number.
// EN represents the category European Number.
static final byte B = Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR;
static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER;
static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
static final char LRM = 0x200E;
static final char RLM = 0x200F;
static final char LRE = 0x202A;
static final char RLE = 0x202B;
static final char PDF = 0x202C;
static final char[] MARKS = {LRM, RLM};
static final char[] EMBEDS = {LRE, RLE};
static final int PREFIX_LENGTH = 2;
static final int SUFFIX_LENGTH = 2;
static final int FIXES_LENGTH = PREFIX_LENGTH + SUFFIX_LENGTH;
static final int[] EMPTY_INT_ARRAY = new int[0];
/**
* The structured text handler utilized by this expert.
*/
protected final StructuredTextTypeHandler handler;
/**
* The environment associated with the expert.
*/
protected final StructuredTextEnvironment environment;
/**
* Flag which is true if the expert is stateful.
*/
protected final boolean sharedExpert;
/**
* Last state value set by {@link #setState} or {@link #clearState}.
*/
protected Object state;
/**
* Constructor used in {@link StructuredTextExpertFactory}.
*
* @param structuredTextHandler the structured text handler used by this expert.
* @param environment the environment associated with this expert.
* @param shared flag which is true if the expert is stateful.
*/
public StructuredTextImpl(StructuredTextTypeHandler structuredTextHandler, StructuredTextEnvironment environment, boolean shared) {
this.handler = structuredTextHandler;
this.environment = environment;
sharedExpert = shared;
}
@Override
public StructuredTextTypeHandler getTypeHandler() {
return handler;
}
@Override
public StructuredTextEnvironment getEnvironment() {
return environment;
}
@Override
public int getTextDirection(String text) {
return handler.getDirection(this, text);
}
@Override
public void clearState() {
if (sharedExpert)
state = null;
}
@Override
public void setState(Object newState) {
if (sharedExpert)
state = newState;
}
@Override
public Object getState() {
return state;
}
long computeNextLocation(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int[] locations, int curPos) {
String separators = handler.getSeparators(this);
int separCount = separators.length();
int specialsCount = handler.getSpecialsCount(this);
int len = text.length();
int nextLocation = len;
int idxLocation = 0;
// Start with special sequences to give them precedence over simple
// separators. This may apply to cases like slash+asterisk versus slash.
for (int i = 0; i < specialsCount; i++) {
int location = locations[separCount + i];
if (location < curPos) {
location = handler.indexOfSpecial(this, text, charTypes, offsets, i + 1, curPos);
if (location < 0)
location = len;
locations[separCount + i] = location;
}
if (location < nextLocation) {
nextLocation = location;
idxLocation = separCount + i;
}
}
for (int i = 0; i < separCount; i++) {
int location = locations[i];
if (location < curPos) {
location = text.indexOf(separators.charAt(i), curPos);
if (location < 0)
location = len;
locations[i] = location;
}
if (location < nextLocation) {
nextLocation = location;
idxLocation = i;
}
}
return nextLocation + (((long) idxLocation) << 32);
}
/**
* @see StructuredTextTypeHandler#processSeparator StructuredTextTypeHandler.processSeparator
*/
static public void processSeparator(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int separLocation) {
int len = text.length();
int direction = charTypes.getDirection();
if (direction == DIR_RTL) {
// the structured text base direction is RTL
for (int i = separLocation - 1; i >= 0; i--) {
byte charType = charTypes.getBidiTypeAt(i);
if (charType == R || charType == AL)
return;
if (charType == L) {
for (int j = separLocation; j < len; j++) {
charType = charTypes.getBidiTypeAt(j);
if (charType == R || charType == AL)
return;
if (charType == L || charType == EN) {
offsets.insertOffset(charTypes, separLocation);
return;
}
}
return;
}
}
return;
}
// the structured text base direction is LTR
boolean doneAN = false;
for (int i = separLocation - 1; i >= 0; i--) {
byte charType = charTypes.getBidiTypeAt(i);
if (charType == L)
return;
if (charType == R || charType == AL) {
for (int j = separLocation; j < len; j++) {
charType = charTypes.getBidiTypeAt(j);
if (charType == L)
return;
if (charType == R || charType == EN || charType == AL || charType == AN) {
offsets.insertOffset(charTypes, separLocation);
return;
}
}
return;
}
if (charType == AN && !doneAN) {
for (int j = separLocation; j < len; j++) {
charType = charTypes.getBidiTypeAt(j);
if (charType == L)
return;
if (charType == AL || charType == AN || charType == R) {
offsets.insertOffset(charTypes, separLocation);
return;
}
}
doneAN = true;
}
}
}
/**
* When the orientation is <code>ORIENT_LTR</code> and the
* structured text has a RTL base direction,
* {@link IStructuredTextExpert#leanToFullText leanToFullText}
* adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its
* end.
* <p>
* When the orientation is <code>ORIENT_RTL</code> and the
* structured text has a LTR base direction,
* {@link IStructuredTextExpert#leanToFullText leanToFullText}
* adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its
* end.
* <p>
* When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or
* <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve
* to a RTL orientation while the structured text has a LTR base
* direction, {@link IStructuredTextExpert#leanToFullText leanToFullText}
* adds LRM at the head of the <i>full</i> text.
* <p>
* When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or
* <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve
* to a LTR orientation while the structured text has a RTL base
* direction, {@link IStructuredTextExpert#leanToFullText leanToFullText}
* adds RLM at the head of the <i>full</i> text.
* <p>
* When the orientation is <code>ORIENT_UNKNOWN</code> and the
* structured text has a LTR base direction,
* {@link IStructuredTextExpert#leanToFullText leanToFullText}
* adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its
* end.
* <p>
* When the orientation is <code>ORIENT_UNKNOWN</code> and the
* structured text has a RTL base direction,
* {@link IStructuredTextExpert#leanToFullText leanToFullText}
* adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its
* end.
* <p>
* When the orientation is <code>ORIENT_IGNORE</code>,
* {@link IStructuredTextExpert#leanToFullText leanToFullText} does not add any directional
* formatting characters as either prefix or suffix of the <i>full</i> text.
* <p>
*/
@Override
public String leanToFullText(String text) {
int len = text.length();
if (len == 0)
return text;
StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text);
StructuredTextOffsets offsets = leanToFullCommon(text, charTypes);
int prefixLength = offsets.getPrefixLength();
int direction = charTypes.getDirection();
return insertMarks(text, offsets.getOffsets(), direction, prefixLength);
}
@Override
public int[] leanToFullMap(String text) {
int len = text.length();
if (len == 0)
return EMPTY_INT_ARRAY;
StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text);
StructuredTextOffsets offsets = leanToFullCommon(text, charTypes);
int prefixLength = offsets.getPrefixLength();
int[] map = new int[len];
int count = offsets.getCount(); // number of used entries
int added = prefixLength;
for (int pos = 0, i = 0; pos < len; pos++) {
if (i < count && pos == offsets.getOffset(i)) {
added++;
i++;
}
map[pos] = pos + added;
}
return map;
}
@Override
public int[] leanBidiCharOffsets(String text) {
int len = text.length();
if (len == 0)
return EMPTY_INT_ARRAY;
StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text);
StructuredTextOffsets offsets = leanToFullCommon(text, charTypes);
return offsets.getOffsets();
}
private StructuredTextOffsets leanToFullCommon(String text, StructuredTextCharTypes charTypes) {
int len = text.length();
int direction = handler.getDirection(this, text, charTypes);
StructuredTextOffsets offsets = new StructuredTextOffsets();
if (!handler.skipProcessing(this, text, charTypes)) {
// initialize locations
int separCount = handler.getSeparators(this).length();
int[] locations = new int[separCount + handler.getSpecialsCount(this)];
for (int i = 0, k = locations.length; i < k; i++) {
locations[i] = -1;
}
// current position
int curPos = 0;
if (state != null) {
curPos = handler.processSpecial(this, text, charTypes, offsets, 0, -1);
}
while (true) {
// location of next token to handle
int nextLocation;
// index of next token to handle (if < separCount, this is a separator; otherwise a special case
int idxLocation;
long res = computeNextLocation(text, charTypes, offsets, locations, curPos);
nextLocation = (int) (res & 0x00000000FFFFFFFF); /* low word */
if (nextLocation >= len)
break;
idxLocation = (int) (res >> 32); /* high word */
if (idxLocation < separCount) {
processSeparator(text, charTypes, offsets, nextLocation);
curPos = nextLocation + 1;
} else {
idxLocation -= (separCount - 1); // because caseNumber starts from 1
curPos = handler.processSpecial(this, text, charTypes, offsets, idxLocation, nextLocation);
}
if (curPos >= len)
break;
} // end while
} // end if (!handler.skipProcessing())
int prefixLength;
int orientation = environment.getOrientation();
if (orientation == StructuredTextEnvironment.ORIENT_IGNORE)
prefixLength = 0;
else {
int resolvedOrientation = charTypes.resolveOrientation();
if (orientation != StructuredTextEnvironment.ORIENT_UNKNOWN && resolvedOrientation == direction)
prefixLength = 0;
else if ((orientation & StructuredTextEnvironment.ORIENT_CONTEXTUAL) != 0)
prefixLength = 1;
else
prefixLength = 2;
}
offsets.setPrefixLength(prefixLength);
return offsets;
}
@Override
public String fullToLeanText(String full) {
if (full.length() == 0)
return full;
int dir = handler.getDirection(this, full);
char curMark = MARKS[dir];
char curEmbed = EMBEDS[dir];
int i; // used as loop index
// remove any prefix and leading mark
int lenFull = full.length();
for (i = 0; i < lenFull; i++) {
char c = full.charAt(i);
if (c != curEmbed && c != curMark)
break;
}
if (i > 0) { // found at least one prefix or leading mark
full = full.substring(i);
lenFull = full.length();
}
// remove any suffix and trailing mark
for (i = lenFull - 1; i >= 0; i--) {
char c = full.charAt(i);
if (c != PDF && c != curMark)
break;
}
if (i < 0) // only suffix and trailing marks, no real data
return EMPTY_STRING;
if (i < (lenFull - 1)) { // found at least one suffix or trailing mark
full = full.substring(0, i + 1);
lenFull = full.length();
}
char[] chars = full.toCharArray();
// remove marks from chars
int cnt = 0;
for (i = 0; i < lenFull; i++) {
char c = chars[i];
if (c == curMark)
cnt++;
else if (cnt > 0)
chars[i - cnt] = c;
}
String lean = new String(chars, 0, lenFull - cnt);
String full2 = leanToFullText(lean);
// strip prefix and suffix
int beginIndex = 0, endIndex = full2.length();
if (full2.charAt(0) == curMark)
beginIndex = 1;
else {
if (full2.charAt(0) == curEmbed) {
beginIndex = 1;
if (full2.charAt(0) == curMark)
beginIndex = 2;
}
if (full2.charAt(endIndex - 1) == PDF) {
endIndex--;
if (full2.charAt(endIndex - 1) == curMark)
endIndex--;
}
}
if (beginIndex > 0 || endIndex < full2.length())
full2 = full2.substring(beginIndex, endIndex);
if (full2.equals(full))
return lean;
// There are some marks in full which are not in full2 and/or vice versa.
// We need to add to lean any mark appearing in full and not in full2.
// The completed lean can never be longer than full itself.
char[] newChars = new char[lenFull];
char cFull, cFull2;
int idxFull, idxFull2, idxLean, newCharsPos;
int lenFull2 = full2.length();
idxFull = idxFull2 = idxLean = newCharsPos = 0;
while (idxFull < lenFull && idxFull2 < lenFull2) {
cFull2 = full2.charAt(idxFull2);
cFull = full.charAt(idxFull);
if (cFull2 == cFull) { /* chars are equal, proceed */
if (cFull2 != curMark)
newChars[newCharsPos++] = chars[idxLean++];
idxFull++;
idxFull2++;
continue;
}
if (cFull2 == curMark) { /* extra Mark in full2 text */
idxFull2++;
continue;
}
if (cFull == curMark) { /* extra Mark in source full text */
idxFull++;
// idxFull-2 always >= 0 since leading Marks were removed from full
if (full.charAt(idxFull - 2) == curMark)
continue; // ignore successive Marks in full after the first one
newChars[newCharsPos++] = curMark;
continue;
}
// we should never get here (extra char which is not a Mark)
throw new IllegalStateException("Internal error: extra character not a Mark."); //$NON-NLS-1$
}
if (idxFull < lenFull) /* full2 ended before full - this should never happen since
we removed all marks and PDFs at the end of full */
throw new IllegalStateException("Internal error: unexpected EOL."); //$NON-NLS-1$
lean = new String(newChars, 0, newCharsPos);
return lean;
}
@Override
public int[] fullToLeanMap(String full) {
int lenFull = full.length();
if (lenFull == 0)
return EMPTY_INT_ARRAY;
String lean = fullToLeanText(full);
int lenLean = lean.length();
int dir = handler.getDirection(this, lean);
char curMark = MARKS[dir];
char curEmbed = EMBEDS[dir];
int[] map = new int[lenFull];
int idxFull, idxLean;
// skip any prefix and leading mark
for (idxFull = 0; idxFull < lenFull; idxFull++) {
char c = full.charAt(idxFull);
if (c != curEmbed && c != curMark)
break;
map[idxFull] = -1;
}
// lean must be a subset of Full, so we only check on iLean < leanLen
for (idxLean = 0; idxLean < lenLean; idxFull++) {
if (full.charAt(idxFull) == lean.charAt(idxLean)) {
map[idxFull] = idxLean;
idxLean++;
} else
map[idxFull] = -1;
}
for (; idxFull < lenFull; idxFull++)
map[idxFull] = -1;
return map;
}
@Override
public int[] fullBidiCharOffsets(String full) {
int lenFull = full.length();
if (lenFull == 0)
return EMPTY_INT_ARRAY;
String lean = fullToLeanText(full);
StructuredTextOffsets offsets = new StructuredTextOffsets();
int lenLean = lean.length();
int idxLean, idxFull;
// lean must be a subset of Full, so we only check on iLean < leanLen
for (idxLean = idxFull = 0; idxLean < lenLean; idxFull++) {
if (full.charAt(idxFull) == lean.charAt(idxLean))
idxLean++;
else
offsets.insertOffset(null, idxFull);
}
for (; idxFull < lenFull; idxFull++)
offsets.insertOffset(null, idxFull);
return offsets.getOffsets();
}
@Override
public String insertMarks(String text, int[] offsets, int direction, int affixLength) {
if (direction != DIR_LTR && direction != DIR_RTL)
throw new IllegalArgumentException("Invalid direction"); //$NON-NLS-1$
if (affixLength < 0 || affixLength > 2)
throw new IllegalArgumentException("Invalid affix length"); //$NON-NLS-1$
int count = offsets == null ? 0 : offsets.length;
if (count == 0 && affixLength == 0)
return text;
int textLength = text.length();
if (textLength == 0)
return text;
int newLen = textLength + count;
if (affixLength == 1)
newLen++; /* +1 for a mark char */
else if (affixLength == 2)
newLen += FIXES_LENGTH;
char[] fullChars = new char[newLen];
int added = affixLength;
// add marks at offsets
char curMark = MARKS[direction];
for (int i = 0, j = 0; i < textLength; i++) {
char c = text.charAt(i);
if (j < count && i == offsets[j]) {
fullChars[i + added] = curMark;
added++;
j++;
}
fullChars[i + added] = c;
}
if (affixLength > 0) { /* add prefix/suffix ? */
if (affixLength == 1) { /* contextual orientation */
fullChars[0] = curMark;
} else {
// When the orientation is RTL, we need to add EMBED at the
// start of the text and PDF at its end.
// However, because of a bug in Windows' handling of LRE/RLE/PDF,
// we add LRM or RLM (according to the direction) after the
// LRE/RLE and again before the PDF.
char curEmbed = EMBEDS[direction];
fullChars[0] = curEmbed;
fullChars[1] = curMark;
fullChars[newLen - 1] = PDF;
fullChars[newLen - 2] = curMark;
}
}
return new String(fullChars);
}
@Override
public String toString() {
return super.toString() + " [handler=" + handler.toString() + "]"; //$NON-NLS-1$ //$NON-NLS-2$
}
}