| //------------------------------------------------------------------------------ |
| // Copyright (c) 2005, 2006 IBM Corporation and others. |
| // All rights reserved. This program and the accompanying materials |
| // are made available under the terms of the Eclipse Public License v1.0 |
| // which accompanies this distribution, and is available at |
| // http://www.eclipse.org/legal/epl-v10.html |
| // |
| // Contributors: |
| // IBM Corporation - initial implementation |
| //------------------------------------------------------------------------------ |
| package org.eclipse.epf.common.utils; |
| |
| import java.util.List; |
| import java.util.regex.Pattern; |
| |
| import org.eclipse.core.runtime.Platform; |
| |
| import com.ibm.icu.util.StringTokenizer; |
| |
| /** |
| * A helper class for manuipulating strings. |
| * |
| * @author Kelvin Low |
| * @author Jinhua Xi |
| * @since 1.0 |
| */ |
| public class StrUtil { |
| |
| private static final String REGEXP_ANY_SPECIAL_CHARS = "(`|~|!|@|#|\\$|%|\\^|&|\\*|\\(|\\)|\\+|=|\\[|\\]|\\||\\:|\"|<|>|\\?|/|'|\\s|\\\\)+"; //$NON-NLS-1$ |
| |
| private static final String REGEXP_INVALID_PUBLISHED_PATH_CHARS = "(\\[|#|\\*|\\?|\"|<|>|\\||!|%|/|\\])+"; //$NON-NLS-1$ |
| |
| private static final String REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX = "(\\[|#|\\*|\\?|\"|<|>|\\||!|%|\\])+"; //$NON-NLS-1$ |
| |
| private static final String REGEXP_INVALID_FILENAME_CHARS = "(\\[|#|/|\\\\|\\:|\\*|\\?|\"|<|>|\\||\\]|\\s)+"; //$NON-NLS-1$ |
| |
| private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', |
| '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; |
| |
| /** |
| * Private constructor to prevent this class from being instantiated. All |
| * methods in this class should be static. |
| */ |
| private StrUtil() { |
| } |
| |
| /** |
| * Tests for null string. |
| * <p> |
| * A null string is defined as one that has an empty reference or has zero |
| * length. |
| * |
| * @param str |
| * A string. |
| * @return <code>true</code> if the given string is a null string. |
| */ |
| public static boolean isNull(String str) { |
| return str == null || str.length() == 0; |
| } |
| |
| /** |
| * Tests for blank string. |
| * <p> |
| * A blank string is defined as one that has an empty reference or has zero |
| * length after the leading and trailing space characters are trimmed. |
| * |
| * @param str |
| * A string. |
| * @return <code>true</code> if the given string is a blank string. |
| */ |
| public static boolean isBlank(String str) { |
| return str == null || str.trim().length() == 0; |
| } |
| |
| /** |
| * Removes the leading and trailing space characters from a string. |
| * |
| * @param str |
| * A string. |
| * @return A string with no leading and trailing space characters. |
| */ |
| public static String trim(String str) { |
| return str == null ? null : str.trim(); |
| } |
| |
| /** |
| * Removes whitespace characters (TAB, CR, LF and SPACE) from a string. |
| * |
| * @param str |
| * A string. |
| * @return A string with whitespace characters removed. |
| */ |
| public static String removeWhiteSpaceChars(String str) { |
| int len = (str == null) ? 0 : str.length(); |
| for (int i = 0; i < len; i++) { |
| switch (str.charAt(i)) { |
| case '\t': |
| case '\r': |
| case '\n': |
| case ' ': |
| break; |
| default: |
| return str; |
| } |
| } |
| return ""; //$NON-NLS-1$ |
| } |
| |
| /** |
| * Splits a string into an array of string tokens. |
| * |
| * @param str |
| * A string. |
| * @param sep |
| * A string containing the string separators. |
| * @param count |
| * The desired number of string tokens. |
| * @return An array of string tokens. |
| */ |
| public static String[] split(String str, String sep, int count) { |
| if (str == null || count == 0 || count < -1) { |
| return null; |
| } |
| |
| StringTokenizer tokenizer = new StringTokenizer(str, sep, |
| count == -1 ? false : true); |
| |
| if (count == -1) { |
| count = tokenizer.countTokens(); |
| } |
| |
| String[] result = new String[count]; |
| int i = 0; |
| while (tokenizer.hasMoreTokens()) { |
| String t = tokenizer.nextToken(); |
| if (i < count) { |
| if ((t.length() == 1) && (sep.indexOf(t) != -1)) { |
| continue; |
| } |
| result[i++] = t; |
| } else { |
| result[count - 1] += t; |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Splits a string into an array of string tokens. |
| * |
| * @param str |
| * A string. |
| * @param sep |
| * A string containing the string separators. |
| * @return An array of string tokens. |
| */ |
| public static String[] split(String str, String sep) { |
| return split(str, sep, -1); |
| } |
| |
| /** |
| * Replaces a substring within a string with another substring. |
| * <p> |
| * Note: Only the first occurrence of the substring will be replaced. |
| * |
| * @param str |
| * A string. |
| * @param src |
| * The substring to replace. |
| * @param tgt |
| * The substring to use for the replacement. |
| * @return A string with the first substring replaced. |
| * |
| * TODO: Review implementation. Call String.replaceFirst() instead? |
| */ |
| public static String replace(String str, String src, String tgt) { |
| if ((str == null || str.length() == 0) |
| || (src == null || src.length() == 0)) { |
| return str; |
| } |
| |
| String tmpStr = str; |
| int index; |
| while ((index = tmpStr.indexOf(src)) != -1) { |
| tmpStr = tmpStr.substring(0, index) + tgt |
| + tmpStr.substring(index + src.length()); |
| } |
| return tmpStr; |
| } |
| |
| /** |
| * Returns the integer value of a string. |
| * |
| * @param str |
| * A string. |
| * @param defaultValue |
| * A default integer value for the string. |
| * @param The |
| * integer value of the given string. |
| */ |
| public static int getIntValue(String str, int defaultValue) { |
| if (StrUtil.isBlank(str)) { |
| return defaultValue; |
| } |
| |
| try { |
| return Integer.parseInt(str); |
| } catch (NumberFormatException e) { |
| return defaultValue; |
| } |
| } |
| |
| /** |
| * Returns an array of bytes representing the UTF-8 encoding of a string. |
| * |
| * @param str |
| * A string. |
| * @return A byte array containing the UTF-8 encoding of the given string. |
| */ |
| public static byte[] getUTF8Bytes(String str) { |
| char[] c = str.toCharArray(); |
| int len = c.length; |
| int count = 0; |
| for (int i = 0; i < len; i++) { |
| int ch = c[i]; |
| if (ch <= 0x7f) { |
| count++; |
| } else if (ch <= 0x7ff) { |
| count += 2; |
| } else { |
| count += 3; |
| } |
| } |
| |
| byte[] b = new byte[count]; |
| int off = 0; |
| for (int i = 0; i < len; i++) { |
| int ch = c[i]; |
| if (ch <= 0x7f) { |
| b[off++] = (byte) ch; |
| } else if (ch <= 0x7ff) { |
| b[off++] = (byte) ((ch >> 6) | 0xc0); |
| b[off++] = (byte) ((ch & 0x3f) | 0x80); |
| } else { |
| b[off++] = (byte) ((ch >> 12) | 0xe0); |
| b[off++] = (byte) (((ch >> 6) & 0x3f) | 0x80); |
| b[off++] = (byte) ((ch & 0x3f) | 0x80); |
| } |
| } |
| return b; |
| } |
| |
| /** |
| * Returns the hexidecimal character representation for an integer. |
| * |
| * @param value |
| * An integer. |
| * @return The hexidecimal representation. |
| */ |
| private static char toHex(int value) { |
| return HEX_DIGITS[(value & 0xF)]; |
| } |
| |
| /** |
| * Returns the escaped Unicode representation of a string. |
| * |
| * @param str |
| * A string. |
| * @param skipASCII |
| * If true, avoid escaping the ASCII characters. |
| * @return The escaped Unicode representation of the given string. |
| */ |
| public static String toEscapedUnicode(String str, boolean skipASCII) { |
| int len = str.length(); |
| StringBuffer result = new StringBuffer(len * 2); |
| |
| for (int i = 0; i < len; i++) { |
| char ch = str.charAt(i); |
| |
| if (skipASCII && ch < 0x007E) { |
| result.append(ch); |
| } else { |
| result.append("\\u"); //$NON-NLS-1$ |
| result.append(toHex((ch >> 12) & 0xF)); |
| result.append(toHex((ch >> 8) & 0xF)); |
| result.append(toHex((ch >> 4) & 0xF)); |
| result.append(toHex(ch & 0xF)); |
| } |
| } |
| |
| return result.toString(); |
| } |
| |
| /** |
| * Returns the plain text from HTML text. |
| * <p> |
| * Note: All HTML tags will be stripped. |
| * |
| * @param html |
| * The HTML text. |
| * @return The plain text representation of the given HTML text. |
| */ |
| public static String getPlainText(String html) { |
| if (html == null) { |
| return ""; //$NON-NLS-1$ |
| } |
| |
| final Pattern p_plaintext_filter = Pattern.compile( |
| "<[^>]*?>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$ |
| final Pattern p_plaintext_filter2 = Pattern.compile( |
| "&.{1,5}[^;];", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$ |
| final Pattern p_plaintext_filter3 = Pattern.compile( |
| "\\s+", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$ |
| |
| String result = html.replaceAll(p_plaintext_filter.pattern(), " ") //$NON-NLS-1$ |
| .replaceAll(p_plaintext_filter2.pattern(), " ") //$NON-NLS-1$ |
| .replaceAll(p_plaintext_filter3.pattern(), " "); //$NON-NLS-1$ |
| return result; |
| } |
| |
| /** |
| * Converts a string into a valid file name. |
| * |
| * @param str |
| * A string. |
| * @return A valid file name dervied from the given string. |
| */ |
| public static String makeValidFileName(String str) { |
| if (str == null) { |
| return ""; //$NON-NLS-1$ |
| } |
| return getPlainText(str).replaceAll(REGEXP_INVALID_FILENAME_CHARS, " ").trim(); //$NON-NLS-1$ //$NON-NLS-2$ |
| } |
| |
| public static String removeSpecialCharacters(String str) { |
| if (str == null) { |
| return ""; //$NON-NLS-1$ |
| } |
| return getPlainText(str).replaceAll(REGEXP_ANY_SPECIAL_CHARS, " ").trim(); //$NON-NLS-1$ //$NON-NLS-2$ |
| } |
| |
| /** |
| * Returns true if the path does not contain any invalid filename |
| * characters. |
| * |
| * @param path |
| * The file path. |
| * @return <code>true</code> if the given path contains only valid |
| * filename characters. |
| */ |
| public static boolean isValidPublishPath(String path) { |
| // return path.replaceAll(invalidPublishPathCharsRegExp, |
| // "").equals(path); |
| |
| if (Platform.getOS().equals(Platform.WS_WIN32)) { |
| return path |
| .replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS, "").equals(path); //$NON-NLS-1$ |
| } |
| |
| // else default to Linux |
| return path |
| .replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX, "").equals(path); //$NON-NLS-1$ |
| } |
| |
| /** |
| * Returns the lower-case of str unless the current locale is German |
| * |
| * @param str |
| * @return |
| */ |
| public static String toLower(String str) { |
| if (Platform.getNL().startsWith("de")) { //$NON-NLS-1$ |
| return str; |
| } |
| return str.toLowerCase(); |
| } |
| |
| /** |
| * escape the " and ' |
| * |
| * @param str |
| * @return |
| */ |
| public static String escape(String str) { |
| return str.replaceAll("'", "\\\\\'").replaceAll("\"", "\\\\\""); |
| } |
| |
| /** |
| * Converts a <code>List</code> to an String array. |
| * |
| * @param list a <code>List</code> |
| * @return a String array |
| */ |
| public static String[] convertListToStrArray(List list) { |
| if (list != null) { |
| int cnt = list.size(); |
| String[] strArray = new String[cnt]; |
| |
| for (int i = 0; i < cnt; i++) { |
| String str = (String) list.get(i); |
| strArray[i] = new String(str); |
| } |
| return strArray; |
| } else { |
| return null; |
| } |
| } |
| |
| /** |
| * added some test code |
| * |
| * @param args |
| */ |
| public static void main(String[] args) { |
| |
| String text = "A&% `&# $~!@#$%^&*()_-+={}[\"]:|\\:;\"'<,>.?/ \t\r\nZ"; |
| |
| System.out.println("[" + text + "] <== text"); |
| System.out.println("[" + text.replaceAll(REGEXP_ANY_SPECIAL_CHARS, " ") + "] <== All"); |
| System.out.println("[" + text.replaceAll(REGEXP_INVALID_FILENAME_CHARS, " ") + "] <== File Name"); |
| System.out.println("[" + text.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS, " ") + "] <== path"); |
| System.out.println("[" + text.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX, " ") + "] <== path, Linux"); |
| |
| System.out.println(""); |
| } |
| } |