blob: 1f318294a4f0315f6311c0a139cee82b87449500 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2001, 2008 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* Jens Lukowski/Innoopract - initial renaming/restructuring
*
*******************************************************************************/
package org.eclipse.wst.sse.core.internal.encoding;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.Properties;
import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.Path;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
import org.osgi.framework.Bundle;
public abstract class CodedIO {
private final boolean DEBUG = false;
public static final int MAX_BUF_SIZE = 1024 * 8;
public static final int MAX_MARK_SIZE = MAX_BUF_SIZE;
public static final String NO_SPEC_DEFAULT = "NoSpecDefault"; //$NON-NLS-1$
private static Properties overridenCharsets = null;
/**
* <p>
* There are two well known understood cases where the standard/default
* Java Mappings are not sufficient. (Thanks to Hirotaka Matsumoto for
* providing these two). I believe there are others that individual
* customers have requested to override on a case by case basis, but I've
* lost the details. TODO-future: document some of those use-cases.
* </p>
* <ul>
* <li>ISO-8859-8-I</li>
* <p>
* In the code conversion point of view, ISO-9959-8 and ISO-8859-8-I are
* the same. However. the representation on the browser is different. (
* It's very very hard to explain this into the words, but once you will
* see, you will understand it :) Many BiDi HTML/JSPs use ISO-8859-8-I in
* META/page directive. So WSAD needs to support this encoding.
* </p>
* <li>X-SJIS</li>
* <p>
* Because Mosaic/Navigator 2.0 supported only X-SJIS/X-EUC-JP, lots of
* old HTML files used X-SJIS/X-EUC-JP so that the customers still want us
* to support this code conversion for HTML files.
* </p>
* </ul>
*
* @param detectedCharsetName
* @return the detectedCharsetName, if no overrides, otherwise the charset
* name that should be used instead of detectedCharsetName
*/
/**
* This method is deliberatly 'default access' since clients should not
* need to access this information directly.
*/
static public String checkMappingOverrides(String detectedCharsetName) {
// This method MUST return what was passed in, if
// there are no
// overrides.
String result = detectedCharsetName;
String newResult = getOverridenCharsets().getProperty(detectedCharsetName);
if (newResult != null) {
result = newResult;
}
return result;
}
/**
* Note: once this instance is created, trace info still needs to be
* appended by caller, depending on the context its created.
*/
public static EncodingMemento createEncodingMemento(byte[] detectedBom, String javaCharsetName, String detectedCharsetName, String unSupportedName, String specDefaultEncoding, String reason) {
EncodingMemento result = new EncodingMemento();
result.setJavaCharsetName(javaCharsetName);
result.setDetectedCharsetName(detectedCharsetName);
// TODO: if detectedCharset and spec default is
// null, need to use "work
// bench based" defaults.
if (specDefaultEncoding == null)
result.setAppropriateDefault(NO_SPEC_DEFAULT);
else
result.setAppropriateDefault(specDefaultEncoding);
if (unSupportedName != null) {
result.setInvalidEncoding(unSupportedName);
}
// check if valid
try {
Charset.isSupported(javaCharsetName);
} catch (IllegalCharsetNameException e) {
result.setInvalidEncoding(javaCharsetName);
}
// check UTF83ByteBOMUsed and UnicodeStream
if (detectedBom != null) {
if (detectedBom.length == 2)
result.setUnicodeStream(true);
else if (detectedBom.length == 3)
result.setUTF83ByteBOMUsed(true);
result.setUnicodeBOM(detectedBom);
}
return result;
}
/**
* Note: once this instance is created, trace info still needs to be
* appended by caller, depending on the context its created.
*/
public static EncodingMemento createEncodingMemento(String detectedCharsetName) {
return createEncodingMemento(detectedCharsetName, null);
}
/**
* Note: once this instance is created, trace info still needs to be
* appended by caller, depending on the context its created.
*/
public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason) {
return createEncodingMemento(detectedCharsetName, reason, null);
}
/**
* Note: once this instance is created, trace info still needs to be
* appended by caller, depending on the context its created.
*/
public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason, String specDefaultEncoding) {
EncodingMemento result = new EncodingMemento();
result = new EncodingMemento();
String javaCharset = getAppropriateJavaCharset(detectedCharsetName);
result.setJavaCharsetName(javaCharset);
result.setDetectedCharsetName(detectedCharsetName);
// TODO: if detectedCharset and spec default is
// null, need to use "work
// bench based" defaults.
if (specDefaultEncoding == null)
result.setAppropriateDefault(NO_SPEC_DEFAULT);
else
result.setAppropriateDefault(specDefaultEncoding);
// check if valid
try {
Charset.isSupported(javaCharset);
} catch (IllegalCharsetNameException e) {
result.setInvalidEncoding(javaCharset);
}
return result;
}
/**
* This method can return null, if invalid charset name (in which case
* "appropriateDefault" should be used, if a name is really need for some
* "save anyway" cases).
*
* @param detectedCharsetName
* @return
*/
public static String getAppropriateJavaCharset(String detectedCharsetName) {
// we don't allow null argument (or risk NPE or
// IllegalArgumentException later at several
// points.
Assert.isNotNull(detectedCharsetName, "illegal charset argument. it can not be null"); //$NON-NLS-1$
String result = detectedCharsetName;
// 1. Check explicit mapping overrides from
// property file
result = CodedIO.checkMappingOverrides(detectedCharsetName);
// 2. Use the "canonical" name from JRE mappings
// Note: see Charset JavaDoc, the name you get one
// with can be alias,
// the name you get back is "standard" name.
Charset javaCharset = null;
// Note: this will immediatly throw
// "UnsuppotedCharsetException" if it
// invalid. Issue: Is it more client friendly to
// eat that exception and return null?
javaCharset = Charset.forName(result);
if (javaCharset != null) {
result = javaCharset.name();
}
return result;
}
/**
* @return Returns the overridenCharsets.
*/
private static Properties getOverridenCharsets() {
if (overridenCharsets == null) {
overridenCharsets = new Properties();
Bundle keyBundle = Platform.getBundle(ICodedResourcePlugin.ID);
IPath keyPath = new Path("config/override.properties"); //$NON-NLS-1$
URL location = Platform.find(keyBundle, keyPath);
InputStream propertiesInputStream = null;
try {
propertiesInputStream = location.openStream();
overridenCharsets.load(propertiesInputStream);
} catch (IOException e) {
// if can't read, just assume there's no
// overrides
// and repeated attempts will not occur,
// since they
// will be represented by an empty
// Properties object
}
}
return overridenCharsets;
}
/**
* This class need not be instantiated (though its subclasses can be).
*/
protected CodedIO() {
super();
}
protected EncodingMemento createMemento(IContentDescription contentDescription) {
EncodingMemento result;
String appropriateDefault = contentDescription.getContentType().getDefaultCharset();
String detectedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.DETECTED_CHARSET);
String unSupportedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET);
String javaCharset = contentDescription.getCharset();
// integrity checks for debugging
if (javaCharset == null) {
Logger.log(Logger.INFO_DEBUG, "charset equaled null!"); //$NON-NLS-1$
} else if (javaCharset.length() == 0) {
Logger.log(Logger.INFO_DEBUG, "charset equaled emptyString!"); //$NON-NLS-1$
}
byte[] BOM = (byte[]) contentDescription.getProperty(IContentDescription.BYTE_ORDER_MARK);
//result = (EncodingMemento)
// contentDescription.getProperty(IContentDescriptionExtended.ENCODING_MEMENTO);
result = createEncodingMemento(BOM, javaCharset, detectedCharset, unSupportedCharset, appropriateDefault, null);
if (!result.isValid()) {
result.setAppropriateDefault(appropriateDefault);
// integrity check for debugging "invalid" cases.
// the apprriate default we have, should equal what's in the
// detected field. (not sure this is always required)
if (DEBUG && appropriateDefault != null && !appropriateDefault.equals(detectedCharset)) {
Logger.log(Logger.INFO_DEBUG, "appropriate did not equal detected, as expected for invalid charset case"); //$NON-NLS-1$
}
}
return result;
}
}