| /******************************************************************************* |
| * Copyright (c) 2006, 2012 IBM Corporation and others. |
| * |
| * This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License 2.0 |
| * which accompanies this distribution, and is available at |
| * https://www.eclipse.org/legal/epl-2.0/ |
| * |
| * SPDX-License-Identifier: EPL-2.0 |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.osgi.util; |
| |
| import java.util.Locale; |
| |
| /** |
| * This class is used to process strings that have special semantic meaning |
| * (such as file paths) in RTL-oriented locales so that they render in a way |
| * that does not corrupt the semantic meaning of the string but also maintains |
| * compliance with the Unicode BiDi algorithm of rendering Bidirectional text. |
| * <p> |
| * Processing of the string is done by breaking it down into segments that are |
| * specified by a set of user provided delimiters. Directional punctuation |
| * characters are injected into the string in order to ensure the string retains |
| * its semantic meaning and conforms with the Unicode BiDi algorithm within each |
| * segment. |
| * </p> |
| * |
| * @since 3.2 |
| * @noextend This class is not intended to be subclassed by clients. |
| */ |
| public class TextProcessor { |
| |
| // commonly used delimiters |
| /** |
| * Dot (.) delimiter. Used most often in package names and file extensions. |
| */ |
| private static final String DOT = "."; //$NON-NLS-1$ |
| |
| /** |
| * Colon (:) delimiter. Used most often in file paths and URLs. |
| */ |
| private static final String COLON = ":"; //$NON-NLS-1$ |
| |
| /** |
| * Forward slash (/) delimiter. Used most often in file paths and URLs. |
| */ |
| private static final String FILE_SEP_FSLASH = "/"; //$NON-NLS-1$ |
| |
| /** |
| * Backslash (\) delimiter. Used most often in file paths. |
| */ |
| private static final String FILE_SEP_BSLASH = "\\"; //$NON-NLS-1$ |
| |
| /** |
| * The default set of delimiters to use to segment a string. |
| */ |
| private static final String delimiterString = DOT + COLON + FILE_SEP_FSLASH + FILE_SEP_BSLASH; |
| |
| // left to right marker |
| private static final char LRM = '\u200e'; |
| |
| // left to right embedding |
| private static final char LRE = '\u202a'; |
| |
| // pop directional format |
| private static final char PDF = '\u202c'; |
| |
| // whether or not processing is needed |
| private static boolean IS_PROCESSING_NEEDED = false; |
| |
| // constant used to indicate an LRM need not precede a delimiter |
| private static final int INDEX_NOT_SET = 999999999; |
| |
| static { |
| Locale locale = Locale.getDefault(); |
| String lang = locale.getLanguage(); |
| |
| if ("iw".equals(lang) || "he".equals(lang) || "ar".equals(lang) || "fa".equals(lang) || "ur".equals(lang)) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ |
| String osName = System.getProperty("os.name").toLowerCase(); //$NON-NLS-1$ |
| if (osName.startsWith("windows") || osName.startsWith("linux") || osName.startsWith("mac") || osName.startsWith("freebsd")) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ |
| IS_PROCESSING_NEEDED = true; |
| } |
| } |
| } |
| |
| /** |
| * Process the given text and return a string with the appropriate |
| * substitution based on the locale. This is equivalent to calling |
| * <code>process(String, String)</code> with the default set of |
| * delimiters. |
| * |
| * @param text |
| * the text to be processed |
| * @return the manipulated string |
| * @see #process(String, String) |
| * @see #getDefaultDelimiters() |
| */ |
| public static String process(String text) { |
| if (!IS_PROCESSING_NEEDED || text == null || text.length() <= 1) |
| return text; |
| return process(text, getDefaultDelimiters()); |
| } |
| |
| /** |
| * Process a string that has a particular semantic meaning to render on BiDi |
| * locales in way that maintains the semantic meaning of the text, but |
| * differs from the Unicode BiDi algorithm. The text is segmented according |
| * to the provided delimiters. Each segment has the Unicode BiDi algorithm |
| * applied to it, but as a whole, the string is oriented left to right. |
| * <p> |
| * For example a file path such as <code>d:\myFolder\FOLDER\MYFILE.java</code> |
| * (where capital letters indicate RTL text) should render as |
| * <code>d:\myFolder\REDLOF\ELIFYM.java</code> when using the Unicode BiDi |
| * algorithm and segmenting the string according to the specified delimiter |
| * set. |
| * </p> |
| * <p> |
| * The following algorithm is used: |
| * </p> |
| * <ol> |
| * <li>Scan the string to locate the delimiters.</li> |
| * <li>While scanning, note the direction of the last strong character |
| * scanned. Strong characters are characters which have a BiDi |
| * classification of L, R or AL as defined in the Unicode standard.</li> |
| * <li>If the last strong character before a separator is of class R or AL, |
| * add a LRM before the separator. Since LRM itself is a strong L character, |
| * following separators do not need an LRM until a strong R or AL character |
| * is found.</li> |
| * <li>If the component where the pattern is displayed has a RTL basic |
| * direction, add a LRE at the beginning of the pattern and a PDF at its |
| * end. The string is considered to have RTL direction if it contains RTL |
| * characters and the runtime locale is BiDi. There is no need to add |
| * LRE/PDF if the string begins with an LTR letter, contains no RTL letter, |
| * and ends with either a LTR letter or a digit.</li> |
| * </ol> |
| * <p> |
| * NOTE: this method will change the shape of the original string passed in |
| * by inserting punctuation characters into the text in order to make it |
| * render to correctly reflect the semantic meaning of the text. Methods |
| * like <code>String.equals(String)</code> and |
| * <code>String.length()</code> called on the resulting string will not |
| * return the same values as would be returned for the original string. |
| * </p> |
| * |
| * @param str |
| * the text to process, if <code>null</code> return the string |
| * as it was passed in |
| * @param delimiter |
| * delimiters by which the string will be segmented, if |
| * <code>null</code> the default delimiters are used |
| * @return the processed string |
| */ |
| public static String process(String str, String delimiter) { |
| if (!IS_PROCESSING_NEEDED || str == null || str.length() <= 1) |
| return str; |
| |
| // do not process a string that has already been processed. |
| if (str.charAt(0) == LRE && str.charAt(str.length() - 1) == PDF) { |
| return str; |
| } |
| |
| // String contains RTL characters |
| boolean isStringBidi = false; |
| // Last strong character is RTL |
| boolean isLastRTL = false; |
| // Last candidate delimiter index |
| int delimIndex = INDEX_NOT_SET; |
| |
| delimiter = delimiter == null ? getDefaultDelimiters() : delimiter; |
| |
| StringBuilder target = new StringBuilder(); |
| target.append(LRE); |
| char ch; |
| |
| for (int i = 0, n = str.length(); i < n; i++) { |
| ch = str.charAt(i); |
| if (delimiter.indexOf(ch) != -1) { |
| // character is a delimiter, note its index in the buffer |
| if (isLastRTL) { |
| delimIndex = target.length(); |
| } |
| } else if (Character.isDigit(ch)) { |
| if (delimIndex != INDEX_NOT_SET) { |
| // consecutive neutral and weak directional characters |
| // explicitly force direction to be LRM |
| target.insert(delimIndex, LRM); |
| delimIndex = INDEX_NOT_SET; |
| isLastRTL = false; |
| } |
| } else if (Character.isLetter(ch)) { |
| if (isRTL(ch)) { |
| isStringBidi = true; |
| if (delimIndex != INDEX_NOT_SET) { |
| // neutral character followed by strong right directional character |
| // explicitly force direction to be LRM |
| target.insert(delimIndex, LRM); |
| delimIndex = INDEX_NOT_SET; |
| } |
| isLastRTL = true; |
| } else { |
| // strong LTR character, no LRM will be required |
| delimIndex = INDEX_NOT_SET; |
| isLastRTL = false; |
| } |
| } |
| target.append(ch); |
| } |
| /* |
| * TextProcessor is not aware of the orientation of the component owning |
| * the processed string. Enclose the string in LRE/PDF in either of 2 |
| * cases: |
| * (1) The string contains BiDi characters - implying that the |
| * string appearance depends on the basic orientation |
| * (2) The runtime locale is BiDi AND either the string does not start with |
| * an LTR character or it ends with LTR char or digit. |
| */ |
| if (isStringBidi || !Character.isLetter(str.charAt(0)) || isNeutral(str.charAt(str.length() - 1))) { |
| target.append(PDF); |
| return target.toString(); |
| } |
| // Otherwise, return the original string |
| return str; |
| } |
| |
| /** |
| * Removes directional marker characters in the given string that were inserted by |
| * utilizing the <code>process(String)</code> or <code>process(String, String)</code> |
| * methods. |
| * |
| * @param str string with directional markers to remove |
| * @return string with no directional markers |
| * @see #process(String) |
| * @see #process(String, String) |
| * @since 3.3 |
| */ |
| public static String deprocess(String str) { |
| if (!IS_PROCESSING_NEEDED || str == null || str.length() <= 1) |
| return str; |
| |
| StringBuilder buf = new StringBuilder(); |
| for (int i = 0; i < str.length(); i++) { |
| char c = str.charAt(i); |
| switch (c) { |
| case LRE : |
| continue; |
| case PDF : |
| continue; |
| case LRM : |
| continue; |
| default : |
| buf.append(c); |
| } |
| } |
| |
| return buf.toString(); |
| } |
| |
| /** |
| * Return the string containing all the default delimiter characters to be |
| * used to segment a given string. |
| * |
| * @return delimiter string |
| */ |
| public static String getDefaultDelimiters() { |
| return delimiterString; |
| } |
| |
| /* |
| * Return whether or not the character falls is right to left oriented. |
| */ |
| private static boolean isRTL(char c) { |
| /* |
| * Cannot use Character.getDirectionality() since the OSGi library can |
| * be compiled with execution environments that pre-date that API. |
| * |
| * The first range of characters is Unicode Hebrew and Arabic |
| * characters. The second range of characters is Unicode Hebrew and |
| * Arabic presentation forms. |
| * |
| * NOTE: Farsi and Urdu fall within the Arabic scripts. |
| */ |
| return (((c >= 0x05d0) && (c <= 0x07b1)) || ((c >= 0xfb1d) && (c <= 0xfefc))); |
| } |
| |
| /* |
| * Return whether or not the given character has a weak directional type |
| */ |
| private static boolean isNeutral(char c) { |
| return !(Character.isDigit(c) || Character.isLetter(c)); |
| } |
| |
| /* |
| * Constructor for the class. |
| */ |
| private TextProcessor() { |
| // prevent instantiation |
| } |
| } |