| /******************************************************************************* |
| * Copyright (c) 2001, 2008 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| * Jens Lukowski/Innoopract - initial renaming/restructuring |
| * |
| *******************************************************************************/ |
| package org.eclipse.wst.sse.core.internal.encoding; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.net.URL; |
| import java.nio.charset.Charset; |
| import java.nio.charset.IllegalCharsetNameException; |
| import java.util.Properties; |
| |
| import org.eclipse.core.runtime.IPath; |
| import org.eclipse.core.runtime.Path; |
| import org.eclipse.core.runtime.Platform; |
| import org.eclipse.core.runtime.content.IContentDescription; |
| import org.eclipse.wst.sse.core.internal.encoding.util.Assert; |
| import org.eclipse.wst.sse.core.internal.encoding.util.Logger; |
| import org.osgi.framework.Bundle; |
| |
| |
| public abstract class CodedIO { |
| |
| private final boolean DEBUG = false; |
| |
| public static final int MAX_BUF_SIZE = 1024 * 8; |
| |
| public static final int MAX_MARK_SIZE = MAX_BUF_SIZE; |
| |
| public static final String NO_SPEC_DEFAULT = "NoSpecDefault"; //$NON-NLS-1$ |
| |
| private static Properties overridenCharsets = null; |
| |
| /** |
| * <p> |
| * There are two well known understood cases where the standard/default |
| * Java Mappings are not sufficient. (Thanks to Hirotaka Matsumoto for |
| * providing these two). I believe there are others that individual |
| * customers have requested to override on a case by case basis, but I've |
| * lost the details. TODO-future: document some of those use-cases. |
| * </p> |
| * <ul> |
| * <li>ISO-8859-8-I</li> |
| * <p> |
| * In the code conversion point of view, ISO-9959-8 and ISO-8859-8-I are |
| * the same. However. the representation on the browser is different. ( |
| * It's very very hard to explain this into the words, but once you will |
| * see, you will understand it :) Many BiDi HTML/JSPs use ISO-8859-8-I in |
| * META/page directive. So WSAD needs to support this encoding. |
| * </p> |
| * <li>X-SJIS</li> |
| * <p> |
| * Because Mosaic/Navigator 2.0 supported only X-SJIS/X-EUC-JP, lots of |
| * old HTML files used X-SJIS/X-EUC-JP so that the customers still want us |
| * to support this code conversion for HTML files. |
| * </p> |
| * </ul> |
| * |
| * @param detectedCharsetName |
| * @return the detectedCharsetName, if no overrides, otherwise the charset |
| * name that should be used instead of detectedCharsetName |
| */ |
| /** |
| * This method is deliberatly 'default access' since clients should not |
| * need to access this information directly. |
| */ |
| static public String checkMappingOverrides(String detectedCharsetName) { |
| // This method MUST return what was passed in, if |
| // there are no |
| // overrides. |
| String result = detectedCharsetName; |
| String newResult = getOverridenCharsets().getProperty(detectedCharsetName); |
| if (newResult != null) { |
| result = newResult; |
| } |
| return result; |
| } |
| |
| /** |
| * Note: once this instance is created, trace info still needs to be |
| * appended by caller, depending on the context its created. |
| */ |
| public static EncodingMemento createEncodingMemento(byte[] detectedBom, String javaCharsetName, String detectedCharsetName, String unSupportedName, String specDefaultEncoding, String reason) { |
| EncodingMemento result = new EncodingMemento(); |
| result.setJavaCharsetName(javaCharsetName); |
| result.setDetectedCharsetName(detectedCharsetName); |
| // TODO: if detectedCharset and spec default is |
| // null, need to use "work |
| // bench based" defaults. |
| if (specDefaultEncoding == null) |
| result.setAppropriateDefault(NO_SPEC_DEFAULT); |
| else |
| result.setAppropriateDefault(specDefaultEncoding); |
| if (unSupportedName != null) { |
| result.setInvalidEncoding(unSupportedName); |
| } |
| // check if valid |
| try { |
| Charset.isSupported(javaCharsetName); |
| } catch (IllegalCharsetNameException e) { |
| result.setInvalidEncoding(javaCharsetName); |
| } |
| |
| // check UTF83ByteBOMUsed and UnicodeStream |
| if (detectedBom != null) { |
| if (detectedBom.length == 2) |
| result.setUnicodeStream(true); |
| else if (detectedBom.length == 3) |
| result.setUTF83ByteBOMUsed(true); |
| result.setUnicodeBOM(detectedBom); |
| } |
| return result; |
| } |
| |
| /** |
| * Note: once this instance is created, trace info still needs to be |
| * appended by caller, depending on the context its created. |
| */ |
| public static EncodingMemento createEncodingMemento(String detectedCharsetName) { |
| return createEncodingMemento(detectedCharsetName, null); |
| } |
| |
| /** |
| * Note: once this instance is created, trace info still needs to be |
| * appended by caller, depending on the context its created. |
| */ |
| public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason) { |
| return createEncodingMemento(detectedCharsetName, reason, null); |
| } |
| |
| /** |
| * Note: once this instance is created, trace info still needs to be |
| * appended by caller, depending on the context its created. |
| */ |
| public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason, String specDefaultEncoding) { |
| EncodingMemento result = new EncodingMemento(); |
| result = new EncodingMemento(); |
| String javaCharset = getAppropriateJavaCharset(detectedCharsetName); |
| result.setJavaCharsetName(javaCharset); |
| result.setDetectedCharsetName(detectedCharsetName); |
| // TODO: if detectedCharset and spec default is |
| // null, need to use "work |
| // bench based" defaults. |
| if (specDefaultEncoding == null) |
| result.setAppropriateDefault(NO_SPEC_DEFAULT); |
| else |
| result.setAppropriateDefault(specDefaultEncoding); |
| // check if valid |
| try { |
| Charset.isSupported(javaCharset); |
| } catch (IllegalCharsetNameException e) { |
| result.setInvalidEncoding(javaCharset); |
| } |
| |
| return result; |
| } |
| |
| /** |
| * This method can return null, if invalid charset name (in which case |
| * "appropriateDefault" should be used, if a name is really need for some |
| * "save anyway" cases). |
| * |
| * @param detectedCharsetName |
| * @return |
| */ |
| public static String getAppropriateJavaCharset(String detectedCharsetName) { |
| // we don't allow null argument (or risk NPE or |
| // IllegalArgumentException later at several |
| // points. |
| Assert.isNotNull(detectedCharsetName, "illegal charset argument. it can not be null"); //$NON-NLS-1$ |
| String result = detectedCharsetName; |
| // 1. Check explicit mapping overrides from |
| // property file |
| result = CodedIO.checkMappingOverrides(detectedCharsetName); |
| // 2. Use the "canonical" name from JRE mappings |
| // Note: see Charset JavaDoc, the name you get one |
| // with can be alias, |
| // the name you get back is "standard" name. |
| Charset javaCharset = null; |
| // Note: this will immediatly throw |
| // "UnsuppotedCharsetException" if it |
| // invalid. Issue: Is it more client friendly to |
| // eat that exception and return null? |
| javaCharset = Charset.forName(result); |
| if (javaCharset != null) { |
| result = javaCharset.name(); |
| } |
| return result; |
| } |
| |
| /** |
| * @return Returns the overridenCharsets. |
| */ |
| private static Properties getOverridenCharsets() { |
| if (overridenCharsets == null) { |
| overridenCharsets = new Properties(); |
| Bundle keyBundle = Platform.getBundle(ICodedResourcePlugin.ID); |
| IPath keyPath = new Path("config/override.properties"); //$NON-NLS-1$ |
| URL location = Platform.find(keyBundle, keyPath); |
| InputStream propertiesInputStream = null; |
| try { |
| propertiesInputStream = location.openStream(); |
| overridenCharsets.load(propertiesInputStream); |
| } catch (IOException e) { |
| // if can't read, just assume there's no |
| // overrides |
| // and repeated attempts will not occur, |
| // since they |
| // will be represented by an empty |
| // Properties object |
| } |
| } |
| return overridenCharsets; |
| } |
| |
| /** |
| * This class need not be instantiated (though its subclasses can be). |
| */ |
| protected CodedIO() { |
| super(); |
| } |
| |
| protected EncodingMemento createMemento(IContentDescription contentDescription) { |
| EncodingMemento result; |
| String appropriateDefault = contentDescription.getContentType().getDefaultCharset(); |
| String detectedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.DETECTED_CHARSET); |
| String unSupportedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET); |
| String javaCharset = contentDescription.getCharset(); |
| // integrity checks for debugging |
| if (javaCharset == null) { |
| Logger.log(Logger.INFO_DEBUG, "charset equaled null!"); //$NON-NLS-1$ |
| } else if (javaCharset.length() == 0) { |
| Logger.log(Logger.INFO_DEBUG, "charset equaled emptyString!"); //$NON-NLS-1$ |
| } |
| byte[] BOM = (byte[]) contentDescription.getProperty(IContentDescription.BYTE_ORDER_MARK); |
| //result = (EncodingMemento) |
| // contentDescription.getProperty(IContentDescriptionExtended.ENCODING_MEMENTO); |
| result = createEncodingMemento(BOM, javaCharset, detectedCharset, unSupportedCharset, appropriateDefault, null); |
| if (!result.isValid()) { |
| result.setAppropriateDefault(appropriateDefault); |
| // integrity check for debugging "invalid" cases. |
| // the apprriate default we have, should equal what's in the |
| // detected field. (not sure this is always required) |
| if (DEBUG && appropriateDefault != null && !appropriateDefault.equals(detectedCharset)) { |
| Logger.log(Logger.INFO_DEBUG, "appropriate did not equal detected, as expected for invalid charset case"); //$NON-NLS-1$ |
| } |
| } |
| return result; |
| } |
| } |