| /******************************************************************************* |
| * Copyright (c) 2006 Sybase, Inc. and others. |
| * |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Sybase, Inc. - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.jst.pagedesigner.utils; |
| |
| import org.eclipse.jst.pagedesigner.PDPlugin; |
| import org.eclipse.jst.pagedesigner.common.logging.Logger; |
| |
| /** |
| * @author mengbo |
| */ |
| public class EntityMap { |
| private static Logger _log = PDPlugin.getLogger(EntityMap.class); |
| |
| public static String translate(String s) { |
| char[] array = s.toCharArray(); |
| StringBuffer buffer = new StringBuffer(); |
| int posi = 0; |
| int len = array.length; |
| while (posi < len) { |
| if (array[posi] != '&') { |
| buffer.append(replaceBadEntity(array[posi++])); |
| continue; |
| } |
| |
| // now array[posi] == '&' |
| int lastPosi = posi; |
| posi++; |
| if (posi < len && array[posi] == '#') { |
| posi++; |
| } |
| while (posi < len) { |
| if (!Character.isLetterOrDigit(array[posi])) |
| break; |
| else |
| posi++; |
| } |
| // now posi>=array.length or array[posi] is non letter or digit |
| String str = new String(array, lastPosi, posi - lastPosi); |
| if (translateEntity(str, buffer)) { |
| // translated, skip the ';' |
| if (posi < len && array[posi] == ';') { |
| posi++; |
| } |
| } |
| |
| if (posi == len) { |
| return buffer.toString(); |
| } |
| } |
| return buffer.toString(); |
| } |
| |
| /** |
| * Translate entity maps and compact whitespace. For heading and training |
| * space, will not trim, only compact (making multiple whitespace to become |
| * a single ' ' char). |
| */ |
| public static String translateAndCompact(String s) { |
| char[] array = s.toCharArray(); |
| StringBuffer buffer = new StringBuffer(); |
| int posi = 0; |
| int len = array.length; |
| while (posi < len) { |
| if (HTMLUtil.isHTMLWhitespace(array[posi])) { |
| while (++posi < len && HTMLUtil.isHTMLWhitespace(array[posi])) |
| ; |
| buffer.append(' '); |
| continue; |
| } |
| if (array[posi] != '&') { |
| buffer.append(replaceBadEntity(array[posi++])); |
| continue; |
| } |
| |
| // now array[posi] == '&' |
| int lastPosi = posi; |
| posi++; |
| if (posi < len && array[posi] == '#') { |
| posi++; |
| } |
| while (posi < len) { |
| if (!Character.isLetterOrDigit(array[posi])) { |
| break; |
| } else { |
| posi++; |
| } |
| } |
| // now posi>=array.length or array[posi] is non letter or digit |
| String str = new String(array, lastPosi, posi - lastPosi); |
| |
| if (translateEntity(str, buffer)) { |
| // translated, skip the ';' |
| if (posi < len && array[posi] == ';') { |
| posi++; |
| } |
| } |
| |
| if (posi == len) { |
| return buffer.toString(); |
| } |
| } |
| return buffer.toString(); |
| } |
| |
| /** |
| * if can translate will return true and append the result string if can't |
| * translate will return false and append original string |
| * |
| * @param s |
| * the form &#number or &letterordigit without the trailing ";" |
| */ |
| public static boolean translateEntity(String s, StringBuffer strBuf) { |
| int i = HTMLSpecialCharHelper.getSpecial(s); // HTMLSpecialCharHelper |
| // support without |
| // traning ';' |
| if (i != -1) { |
| strBuf.append((char) i); |
| return true; |
| } |
| if (s.length() > 2 && s.charAt(1) == '#') { |
| String number; |
| number = s.substring(2); |
| try { |
| int n; |
| if (number.length() > 0 |
| && (number.charAt(0) == 'x' || number.charAt(0) == 'X')) { |
| n = Integer.parseInt(number.substring(1), 16); |
| } else { |
| n = Integer.parseInt(number); |
| } |
| strBuf.append(replaceBadEntity((char) n)); |
| return true; |
| } catch (Exception ex) { |
| // Error in integer formating |
| _log.info("Debug.EntityMap.0", ex); //$NON-NLS-1$ |
| strBuf.append(s); |
| return false; |
| } |
| } else { |
| strBuf.append(s); |
| return false; |
| } |
| } |
| |
| /** |
| * In HTML • is sometimes used (mostly based on CP 1252), but is |
| * illegal, because it does not exist in Unicode |
| * |
| * @param n |
| * @return |
| * @see http://www.w3.org/Talks/1999/0830-tutorial-unicode-mjd/slide27-0.html |
| */ |
| private static char replaceBadEntity(char n) { |
| if (n < 132 || n > 156) |
| return n; |
| switch ((int) n) { |
| case 132: |
| return (char) 8222; |
| case 133: |
| return (char) 8230; |
| case 134: |
| return (char) 8224; |
| case 135: |
| return (char) 8225; |
| case 139: |
| return (char) 8249; |
| case 140: |
| return (char) 338; |
| case 145: |
| return (char) 8216; |
| case 146: |
| return (char) 8217; |
| case 147: |
| return (char) 8220; |
| case 148: |
| return (char) 8221; |
| case 149: |
| return (char) 8226; |
| case 151: |
| return (char) 8212; |
| case 153: |
| return (char) 8482; |
| case 155: |
| return (char) 8250; |
| case 156: |
| return (char) 339; |
| default: |
| return n; |
| } |
| } |
| |
| } |