| /******************************************************************************* |
| * Copyright (c) 2010, 2011 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| ******************************************************************************/ |
| package org.eclipse.equinox.bidi.internal; |
| |
| import org.eclipse.equinox.bidi.advanced.*; |
| import org.eclipse.equinox.bidi.custom.*; |
| |
| /** |
| * Implementation for IStructuredTextExpert. |
| */ |
| public class StructuredTextImpl implements IStructuredTextExpert { |
| |
| static final String EMPTY_STRING = ""; //$NON-NLS-1$ |
| |
| // In the following lines, B, L, R and AL represent bidi categories |
| // as defined in the Unicode Bidirectional Algorithm |
| // ( http://www.unicode.org/reports/tr9/ ). |
| // B represents the category Block Separator. |
| // L represents the category Left to Right character. |
| // R represents the category Right to Left character. |
| // AL represents the category Arabic Letter. |
| // AN represents the category Arabic Number. |
| // EN represents the category European Number. |
| static final byte B = Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR; |
| static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT; |
| static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT; |
| static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; |
| static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER; |
| static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER; |
| |
| static final char LRM = 0x200E; |
| static final char RLM = 0x200F; |
| static final char LRE = 0x202A; |
| static final char RLE = 0x202B; |
| static final char PDF = 0x202C; |
| static final char[] MARKS = {LRM, RLM}; |
| static final char[] EMBEDS = {LRE, RLE}; |
| static final int PREFIX_LENGTH = 2; |
| static final int SUFFIX_LENGTH = 2; |
| static final int FIXES_LENGTH = PREFIX_LENGTH + SUFFIX_LENGTH; |
| static final int[] EMPTY_INT_ARRAY = new int[0]; |
| |
| /** |
| * The structured text handler utilized by this expert. |
| */ |
| protected final StructuredTextTypeHandler handler; |
| /** |
| * The environment associated with the expert. |
| */ |
| protected final StructuredTextEnvironment environment; |
| /** |
| * Flag which is true if the expert is stateful. |
| */ |
| protected final boolean sharedExpert; |
| /** |
| * Last state value set by {@link #setState} or {@link #clearState}. |
| */ |
| protected Object state; |
| |
| /** |
| * Constructor used in {@link StructuredTextExpertFactory}. |
| * |
| * @param structuredTextHandler the structured text handler used by this expert. |
| * @param environment the environment associated with this expert. |
| * @param shared flag which is true if the expert is stateful. |
| */ |
| public StructuredTextImpl(StructuredTextTypeHandler structuredTextHandler, StructuredTextEnvironment environment, boolean shared) { |
| this.handler = structuredTextHandler; |
| this.environment = environment; |
| sharedExpert = shared; |
| } |
| |
| public StructuredTextTypeHandler getTypeHandler() { |
| return handler; |
| } |
| |
| public StructuredTextEnvironment getEnvironment() { |
| return environment; |
| } |
| |
| public int getTextDirection(String text) { |
| return handler.getDirection(this, text); |
| } |
| |
| public void clearState() { |
| if (sharedExpert) |
| state = null; |
| } |
| |
| public void setState(Object newState) { |
| if (sharedExpert) |
| state = newState; |
| } |
| |
| public Object getState() { |
| return state; |
| } |
| |
| long computeNextLocation(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int[] locations, int curPos) { |
| String separators = handler.getSeparators(this); |
| int separCount = separators.length(); |
| int specialsCount = handler.getSpecialsCount(this); |
| int len = text.length(); |
| int nextLocation = len; |
| int idxLocation = 0; |
| // Start with special sequences to give them precedence over simple |
| // separators. This may apply to cases like slash+asterisk versus slash. |
| for (int i = 0; i < specialsCount; i++) { |
| int location = locations[separCount + i]; |
| if (location < curPos) { |
| location = handler.indexOfSpecial(this, text, charTypes, offsets, i + 1, curPos); |
| if (location < 0) |
| location = len; |
| locations[separCount + i] = location; |
| } |
| if (location < nextLocation) { |
| nextLocation = location; |
| idxLocation = separCount + i; |
| } |
| } |
| for (int i = 0; i < separCount; i++) { |
| int location = locations[i]; |
| if (location < curPos) { |
| location = text.indexOf(separators.charAt(i), curPos); |
| if (location < 0) |
| location = len; |
| locations[i] = location; |
| } |
| if (location < nextLocation) { |
| nextLocation = location; |
| idxLocation = i; |
| } |
| } |
| return nextLocation + (((long) idxLocation) << 32); |
| } |
| |
| /** |
| * @see StructuredTextTypeHandler#processSeparator StructuredTextTypeHandler.processSeparator |
| */ |
| static public void processSeparator(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int separLocation) { |
| int len = text.length(); |
| int direction = charTypes.getDirection(); |
| if (direction == DIR_RTL) { |
| // the structured text base direction is RTL |
| for (int i = separLocation - 1; i >= 0; i--) { |
| byte charType = charTypes.getBidiTypeAt(i); |
| if (charType == R || charType == AL) |
| return; |
| if (charType == L) { |
| for (int j = separLocation; j < len; j++) { |
| charType = charTypes.getBidiTypeAt(j); |
| if (charType == R || charType == AL) |
| return; |
| if (charType == L || charType == EN) { |
| offsets.insertOffset(charTypes, separLocation); |
| return; |
| } |
| } |
| return; |
| } |
| } |
| return; |
| } |
| |
| // the structured text base direction is LTR |
| boolean doneAN = false; |
| for (int i = separLocation - 1; i >= 0; i--) { |
| byte charType = charTypes.getBidiTypeAt(i); |
| if (charType == L) |
| return; |
| if (charType == R || charType == AL) { |
| for (int j = separLocation; j < len; j++) { |
| charType = charTypes.getBidiTypeAt(j); |
| if (charType == L) |
| return; |
| if (charType == R || charType == EN || charType == AL || charType == AN) { |
| offsets.insertOffset(charTypes, separLocation); |
| return; |
| } |
| } |
| return; |
| } |
| if (charType == AN && !doneAN) { |
| for (int j = separLocation; j < len; j++) { |
| charType = charTypes.getBidiTypeAt(j); |
| if (charType == L) |
| return; |
| if (charType == AL || charType == AN || charType == R) { |
| offsets.insertOffset(charTypes, separLocation); |
| return; |
| } |
| } |
| doneAN = true; |
| } |
| } |
| } |
| |
| /** |
| * When the orientation is <code>ORIENT_LTR</code> and the |
| * structured text has a RTL base direction, |
| * {@link IStructuredTextExpert#leanToFullText leanToFullText} |
| * adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its |
| * end. |
| * <p> |
| * When the orientation is <code>ORIENT_RTL</code> and the |
| * structured text has a LTR base direction, |
| * {@link IStructuredTextExpert#leanToFullText leanToFullText} |
| * adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its |
| * end. |
| * <p> |
| * When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or |
| * <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve |
| * to a RTL orientation while the structured text has a LTR base |
| * direction, {@link IStructuredTextExpert#leanToFullText leanToFullText} |
| * adds LRM at the head of the <i>full</i> text. |
| * <p> |
| * When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or |
| * <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve |
| * to a LTR orientation while the structured text has a RTL base |
| * direction, {@link IStructuredTextExpert#leanToFullText leanToFullText} |
| * adds RLM at the head of the <i>full</i> text. |
| * <p> |
| * When the orientation is <code>ORIENT_UNKNOWN</code> and the |
| * structured text has a LTR base direction, |
| * {@link IStructuredTextExpert#leanToFullText leanToFullText} |
| * adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its |
| * end. |
| * <p> |
| * When the orientation is <code>ORIENT_UNKNOWN</code> and the |
| * structured text has a RTL base direction, |
| * {@link IStructuredTextExpert#leanToFullText leanToFullText} |
| * adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its |
| * end. |
| * <p> |
| * When the orientation is <code>ORIENT_IGNORE</code>, |
| * {@link IStructuredTextExpert#leanToFullText leanToFullText} does not add any directional |
| * formatting characters as either prefix or suffix of the <i>full</i> text. |
| * <p> |
| */ |
| public String leanToFullText(String text) { |
| int len = text.length(); |
| if (len == 0) |
| return text; |
| StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); |
| StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); |
| int prefixLength = offsets.getPrefixLength(); |
| int direction = charTypes.getDirection(); |
| return insertMarks(text, offsets.getOffsets(), direction, prefixLength); |
| } |
| |
| public int[] leanToFullMap(String text) { |
| int len = text.length(); |
| if (len == 0) |
| return EMPTY_INT_ARRAY; |
| StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); |
| StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); |
| int prefixLength = offsets.getPrefixLength(); |
| int[] map = new int[len]; |
| int count = offsets.getCount(); // number of used entries |
| int added = prefixLength; |
| for (int pos = 0, i = 0; pos < len; pos++) { |
| if (i < count && pos == offsets.getOffset(i)) { |
| added++; |
| i++; |
| } |
| map[pos] = pos + added; |
| } |
| return map; |
| } |
| |
| public int[] leanBidiCharOffsets(String text) { |
| int len = text.length(); |
| if (len == 0) |
| return EMPTY_INT_ARRAY; |
| StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); |
| StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); |
| return offsets.getOffsets(); |
| } |
| |
| private StructuredTextOffsets leanToFullCommon(String text, StructuredTextCharTypes charTypes) { |
| int len = text.length(); |
| int direction = handler.getDirection(this, text, charTypes); |
| StructuredTextOffsets offsets = new StructuredTextOffsets(); |
| if (!handler.skipProcessing(this, text, charTypes)) { |
| // initialize locations |
| int separCount = handler.getSeparators(this).length(); |
| int[] locations = new int[separCount + handler.getSpecialsCount(this)]; |
| for (int i = 0, k = locations.length; i < k; i++) { |
| locations[i] = -1; |
| } |
| // current position |
| int curPos = 0; |
| if (state != null) { |
| curPos = handler.processSpecial(this, text, charTypes, offsets, 0, -1); |
| } |
| while (true) { |
| // location of next token to handle |
| int nextLocation; |
| // index of next token to handle (if < separCount, this is a separator; otherwise a special case |
| int idxLocation; |
| long res = computeNextLocation(text, charTypes, offsets, locations, curPos); |
| nextLocation = (int) (res & 0x00000000FFFFFFFF); /* low word */ |
| if (nextLocation >= len) |
| break; |
| idxLocation = (int) (res >> 32); /* high word */ |
| if (idxLocation < separCount) { |
| processSeparator(text, charTypes, offsets, nextLocation); |
| curPos = nextLocation + 1; |
| } else { |
| idxLocation -= (separCount - 1); // because caseNumber starts from 1 |
| curPos = handler.processSpecial(this, text, charTypes, offsets, idxLocation, nextLocation); |
| } |
| if (curPos >= len) |
| break; |
| } // end while |
| } // end if (!handler.skipProcessing()) |
| int prefixLength; |
| int orientation = environment.getOrientation(); |
| if (orientation == StructuredTextEnvironment.ORIENT_IGNORE) |
| prefixLength = 0; |
| else { |
| int resolvedOrientation = charTypes.resolveOrientation(); |
| if (orientation != StructuredTextEnvironment.ORIENT_UNKNOWN && resolvedOrientation == direction) |
| prefixLength = 0; |
| else if ((orientation & StructuredTextEnvironment.ORIENT_CONTEXTUAL) != 0) |
| prefixLength = 1; |
| else |
| prefixLength = 2; |
| } |
| offsets.setPrefixLength(prefixLength); |
| return offsets; |
| } |
| |
| public String fullToLeanText(String full) { |
| if (full.length() == 0) |
| return full; |
| int dir = handler.getDirection(this, full); |
| char curMark = MARKS[dir]; |
| char curEmbed = EMBEDS[dir]; |
| int i; // used as loop index |
| // remove any prefix and leading mark |
| int lenFull = full.length(); |
| for (i = 0; i < lenFull; i++) { |
| char c = full.charAt(i); |
| if (c != curEmbed && c != curMark) |
| break; |
| } |
| if (i > 0) { // found at least one prefix or leading mark |
| full = full.substring(i); |
| lenFull = full.length(); |
| } |
| // remove any suffix and trailing mark |
| for (i = lenFull - 1; i >= 0; i--) { |
| char c = full.charAt(i); |
| if (c != PDF && c != curMark) |
| break; |
| } |
| if (i < 0) // only suffix and trailing marks, no real data |
| return EMPTY_STRING; |
| if (i < (lenFull - 1)) { // found at least one suffix or trailing mark |
| full = full.substring(0, i + 1); |
| lenFull = full.length(); |
| } |
| char[] chars = full.toCharArray(); |
| // remove marks from chars |
| int cnt = 0; |
| for (i = 0; i < lenFull; i++) { |
| char c = chars[i]; |
| if (c == curMark) |
| cnt++; |
| else if (cnt > 0) |
| chars[i - cnt] = c; |
| } |
| String lean = new String(chars, 0, lenFull - cnt); |
| String full2 = leanToFullText(lean); |
| // strip prefix and suffix |
| int beginIndex = 0, endIndex = full2.length(); |
| if (full2.charAt(0) == curMark) |
| beginIndex = 1; |
| else { |
| if (full2.charAt(0) == curEmbed) { |
| beginIndex = 1; |
| if (full2.charAt(0) == curMark) |
| beginIndex = 2; |
| } |
| if (full2.charAt(endIndex - 1) == PDF) { |
| endIndex--; |
| if (full2.charAt(endIndex - 1) == curMark) |
| endIndex--; |
| } |
| } |
| if (beginIndex > 0 || endIndex < full2.length()) |
| full2 = full2.substring(beginIndex, endIndex); |
| if (full2.equals(full)) |
| return lean; |
| |
| // There are some marks in full which are not in full2 and/or vice versa. |
| // We need to add to lean any mark appearing in full and not in full2. |
| // The completed lean can never be longer than full itself. |
| char[] newChars = new char[lenFull]; |
| char cFull, cFull2; |
| int idxFull, idxFull2, idxLean, newCharsPos; |
| int lenFull2 = full2.length(); |
| idxFull = idxFull2 = idxLean = newCharsPos = 0; |
| while (idxFull < lenFull && idxFull2 < lenFull2) { |
| cFull2 = full2.charAt(idxFull2); |
| cFull = full.charAt(idxFull); |
| if (cFull2 == cFull) { /* chars are equal, proceed */ |
| if (cFull2 != curMark) |
| newChars[newCharsPos++] = chars[idxLean++]; |
| idxFull++; |
| idxFull2++; |
| continue; |
| } |
| if (cFull2 == curMark) { /* extra Mark in full2 text */ |
| idxFull2++; |
| continue; |
| } |
| if (cFull == curMark) { /* extra Mark in source full text */ |
| idxFull++; |
| // idxFull-2 always >= 0 since leading Marks were removed from full |
| if (full.charAt(idxFull - 2) == curMark) |
| continue; // ignore successive Marks in full after the first one |
| newChars[newCharsPos++] = curMark; |
| continue; |
| } |
| // we should never get here (extra char which is not a Mark) |
| throw new IllegalStateException("Internal error: extra character not a Mark."); //$NON-NLS-1$ |
| } |
| if (idxFull < lenFull) /* full2 ended before full - this should never happen since |
| we removed all marks and PDFs at the end of full */ |
| throw new IllegalStateException("Internal error: unexpected EOL."); //$NON-NLS-1$ |
| |
| lean = new String(newChars, 0, newCharsPos); |
| return lean; |
| } |
| |
| public int[] fullToLeanMap(String full) { |
| int lenFull = full.length(); |
| if (lenFull == 0) |
| return EMPTY_INT_ARRAY; |
| String lean = fullToLeanText(full); |
| int lenLean = lean.length(); |
| int dir = handler.getDirection(this, lean); |
| char curMark = MARKS[dir]; |
| char curEmbed = EMBEDS[dir]; |
| int[] map = new int[lenFull]; |
| int idxFull, idxLean; |
| // skip any prefix and leading mark |
| for (idxFull = 0; idxFull < lenFull; idxFull++) { |
| char c = full.charAt(idxFull); |
| if (c != curEmbed && c != curMark) |
| break; |
| map[idxFull] = -1; |
| } |
| // lean must be a subset of Full, so we only check on iLean < leanLen |
| for (idxLean = 0; idxLean < lenLean; idxFull++) { |
| if (full.charAt(idxFull) == lean.charAt(idxLean)) { |
| map[idxFull] = idxLean; |
| idxLean++; |
| } else |
| map[idxFull] = -1; |
| } |
| for (; idxFull < lenFull; idxFull++) |
| map[idxFull] = -1; |
| return map; |
| } |
| |
| public int[] fullBidiCharOffsets(String full) { |
| int lenFull = full.length(); |
| if (lenFull == 0) |
| return EMPTY_INT_ARRAY; |
| String lean = fullToLeanText(full); |
| StructuredTextOffsets offsets = new StructuredTextOffsets(); |
| int lenLean = lean.length(); |
| int idxLean, idxFull; |
| // lean must be a subset of Full, so we only check on iLean < leanLen |
| for (idxLean = idxFull = 0; idxLean < lenLean; idxFull++) { |
| if (full.charAt(idxFull) == lean.charAt(idxLean)) |
| idxLean++; |
| else |
| offsets.insertOffset(null, idxFull); |
| } |
| for (; idxFull < lenFull; idxFull++) |
| offsets.insertOffset(null, idxFull); |
| return offsets.getOffsets(); |
| } |
| |
| public String insertMarks(String text, int[] offsets, int direction, int affixLength) { |
| if (direction != DIR_LTR && direction != DIR_RTL) |
| throw new IllegalArgumentException("Invalid direction"); //$NON-NLS-1$ |
| if (affixLength < 0 || affixLength > 2) |
| throw new IllegalArgumentException("Invalid affix length"); //$NON-NLS-1$ |
| int count = offsets == null ? 0 : offsets.length; |
| if (count == 0 && affixLength == 0) |
| return text; |
| int textLength = text.length(); |
| if (textLength == 0) |
| return text; |
| int newLen = textLength + count; |
| if (affixLength == 1) |
| newLen++; /* +1 for a mark char */ |
| else if (affixLength == 2) |
| newLen += FIXES_LENGTH; |
| char[] fullChars = new char[newLen]; |
| int added = affixLength; |
| // add marks at offsets |
| char curMark = MARKS[direction]; |
| for (int i = 0, j = 0; i < textLength; i++) { |
| char c = text.charAt(i); |
| if (j < count && i == offsets[j]) { |
| fullChars[i + added] = curMark; |
| added++; |
| j++; |
| } |
| fullChars[i + added] = c; |
| } |
| if (affixLength > 0) { /* add prefix/suffix ? */ |
| if (affixLength == 1) { /* contextual orientation */ |
| fullChars[0] = curMark; |
| } else { |
| // When the orientation is RTL, we need to add EMBED at the |
| // start of the text and PDF at its end. |
| // However, because of a bug in Windows' handling of LRE/RLE/PDF, |
| // we add LRM or RLM (according to the direction) after the |
| // LRE/RLE and again before the PDF. |
| char curEmbed = EMBEDS[direction]; |
| fullChars[0] = curEmbed; |
| fullChars[1] = curMark; |
| fullChars[newLen - 1] = PDF; |
| fullChars[newLen - 2] = curMark; |
| } |
| } |
| return new String(fullChars); |
| } |
| |
| public String toString() { |
| return super.toString() + " [handler=" + handler.toString() + "]"; //$NON-NLS-1$ //$NON-NLS-2$ |
| } |
| } |