blob: b6b6e68b2e7665cf4df522c7130ae3dfc7ff165d [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.wst.css.core.internal.contenttype;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import org.eclipse.core.resources.IStorage;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
import org.eclipse.wst.sse.core.internal.encoding.NonContentBasedEncodingRules;
import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;
public class CSSResourceEncodingDetector implements IResourceCharsetDetector {
class NullMemento extends EncodingMemento {
/**
*
*/
public NullMemento() {
super();
String defaultCharset = NonContentBasedEncodingRules.useDefaultNameRules(null);
setJavaCharsetName(defaultCharset);
setAppropriateDefault(defaultCharset);
setDetectedCharsetName(null);
}
}
private CSSHeadTokenizer fTokenizer;
private EncodingMemento fEncodingMemento;
private boolean fHeaderParsed;
private Reader fReader;
/**
* There is no spec defined encoding for CSS, so Null is returned.
*/
public String getSpecDefaultEncoding() {
// should match what's in plugin.xml (or look it up from there).
return null;
}
private boolean canHandleAsUnicodeStream(String tokenType) {
boolean canHandleAsUnicodeStream = false;
if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
canHandleAsUnicodeStream = true;
String enc = "UTF-8"; //$NON-NLS-1$
createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
fEncodingMemento.setUTF83ByteBOMUsed(true);
}
else if (tokenType == EncodingParserConstants.UTF16BE) {
canHandleAsUnicodeStream = true;
String enc = "UTF-16BE"; //$NON-NLS-1$
createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
}
else if (tokenType == EncodingParserConstants.UTF16LE) {
canHandleAsUnicodeStream = true;
String enc = "UTF-16"; //$NON-NLS-1$
createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
}
return canHandleAsUnicodeStream;
}
/**
* @return Returns the tokenizer.
*/
private CSSHeadTokenizer getTokenizer() {
if (fTokenizer == null) {
fTokenizer = new CSSHeadTokenizer();
}
return fTokenizer;
}
private boolean isLegalString(String valueTokenType) {
boolean result = false;
if (valueTokenType != null) {
result = valueTokenType.equals(EncodingParserConstants.StringValue) || valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
}
return result;
}
private void parseInput() throws IOException {
checkInContents();
if (fEncodingMemento == null) {
checkHeuristics();
}
}
private void checkInContents() throws IOException {
CSSHeadTokenizer tokenizer = getTokenizer();
tokenizer.reset(fReader);
HeadParserToken token = null;
String tokenType = null;
do {
token = tokenizer.getNextToken();
tokenType = token.getType();
if (canHandleAsUnicodeStream(tokenType)) {
// side effect of canHandle is to create appropriate memento
}
else if (tokenType == CSSHeadTokenizerConstants.CHARSET_RULE) {
if (tokenizer.hasMoreTokens()) {
HeadParserToken valueToken = tokenizer.getNextToken();
String valueTokenType = valueToken.getType();
if (isLegalString(valueTokenType)) {
createEncodingMemento(valueToken.getText(), EncodingMemento.FOUND_ENCODING_IN_CONTENT);
}
}
}
}
while (tokenizer.hasMoreTokens());
}
/**
*
*/
private void checkHeuristics() throws IOException {
boolean noHeuristic = false;
String heuristicEncoding = null;
try {
fReader.reset();
byte[] bytes = new byte[3];
int nRead = 0;
for (int i = 0; i < bytes.length; i++) {
if (fReader.ready()) {
int oneByte = fReader.read();
nRead++;
if (oneByte <= 0xFF) {
bytes[i] = (byte) oneByte;
}
else {
noHeuristic = true;
}
}
else {
noHeuristic = true;
break;
}
}
if (!noHeuristic && nRead == 3) {
heuristicEncoding = EncodingGuesser.guessEncoding(bytes, 3);
}
}
catch (IOException e) {
// if any IO exception, then not a heuristic case
}
finally {
fReader.reset();
}
if (heuristicEncoding != null) {
createEncodingMemento(heuristicEncoding, EncodingMemento.GUESSED_ENCODING_FROM_STREAM);
}
}
/**
* Note: once this instance is created, trace info still needs to be
* appended by caller, depending on the context its created.
*/
private void createEncodingMemento(String detectedCharsetName) {
fEncodingMemento = new EncodingMemento();
fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName));
fEncodingMemento.setDetectedCharsetName(detectedCharsetName);
// TODO: if detectedCharset and spec default is
// null, need to use "work
// bench based" defaults.
fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding());
}
/**
* convience method all subclasses can use (but not override)
*
* @param detectedCharsetName
* @param reason
*/
private void createEncodingMemento(String detectedCharsetName, String reason) {
createEncodingMemento(detectedCharsetName);
}
/**
* convience method all subclasses can use (but not override)
*/
private final void ensureInputSet() {
if (fReader == null) {
throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
}
}
/**
* This method can return null, if invalid charset name (in which case
* "appropriateDefault" should be used, if a name is really need for some
* "save anyway" cases).
*
* @param detectedCharsetName
* @return
*/
private String getAppropriateJavaCharset(String detectedCharsetName) {
String result = null;
// 1. Check explicit mapping overrides from
// property file -- its here we pick up "rules" for cases
// that are not even in Java
result = CodedIO.checkMappingOverrides(detectedCharsetName);
// 2. Use the "canonical" name from JRE mappings
// Note: see Charset JavaDoc, the name you get one
// with can be alias,
// the name you get back is "standard" name.
Charset javaCharset = null;
try {
javaCharset = Charset.forName(detectedCharsetName);
}
catch (UnsupportedCharsetException e) {
// only set invalid, if result is same as detected -- they won't
// be equal if
// overridden
if (result != null && result.equals(detectedCharsetName)) {
fEncodingMemento.setInvalidEncoding(detectedCharsetName);
}
}
catch (IllegalCharsetNameException e) {
// only set invalid, if result is same as detected -- they won't
// be equal if
// overridden
if (result != null && result.equals(detectedCharsetName)) {
fEncodingMemento.setInvalidEncoding(detectedCharsetName);
}
}
// give priority to java cononical name, if present
if (javaCharset != null) {
result = javaCharset.name();
// but still allow overrides
result = CodedIO.checkMappingOverrides(result);
}
return result;
}
public String getEncoding() throws IOException {
return getEncodingMemento().getDetectedCharsetName();
}
public EncodingMemento getEncodingMemento() throws IOException {
ensureInputSet();
if (!fHeaderParsed) {
parseInput();
// we keep track of if header's already been
// parse, so can make
// multiple 'get' calls, without causing
// reparsing.
fHeaderParsed = true;
// Note: there is a "hidden assumption" here
// that an empty
// string in content should be treated same as
// not present.
}
if (fEncodingMemento == null) {
handleSpecDefault();
}
if (fEncodingMemento == null) {
// safty net
fEncodingMemento = new NullMemento();
}
return fEncodingMemento;
}
public EncodingMemento getSpecDefaultEncodingMemento() {
resetAll();
EncodingMemento result = null;
String enc = getSpecDefaultEncoding();
if (enc != null) {
createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
fEncodingMemento.setAppropriateDefault(enc);
result = fEncodingMemento;
}
return result;
}
private void handleSpecDefault() {
String encodingName;
encodingName = getSpecDefaultEncoding();
if (encodingName != null) {
// createEncodingMemento(encodingName,
// EncodingMemento.USED_CONTENT_TYPE_DEFAULT);
fEncodingMemento = new EncodingMemento();
fEncodingMemento.setJavaCharsetName(encodingName);
fEncodingMemento.setAppropriateDefault(encodingName);
}
}
/**
*
*/
private void resetAll() {
fReader = null;
fHeaderParsed = false;
fEncodingMemento = null;
}
/**
*
*/
public void set(InputStream inputStream) {
resetAll();
fReader = new ByteReader(inputStream);
try {
fReader.mark(CodedIO.MAX_MARK_SIZE);
}
catch (IOException e) {
// impossible, since we know ByteReader
// supports marking
throw new Error(e);
}
}
/**
*
*/
public void set(IStorage iStorage) throws CoreException {
resetAll();
InputStream inputStream = iStorage.getContents();
InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
resettableStream.mark(CodedIO.MAX_MARK_SIZE);
set(resettableStream);
// TODO we'll need to "remember" IFile, or
// get its (or its project's) settings, in case
// those are needed to handle cases when the
// encoding is not in the file stream.
}
/**
* Note: this is not part of interface to help avoid confusion ... it
* expected this Reader is a well formed character reader ... that is, its
* all ready been determined to not be a unicode marked input stream. And,
* its assumed to be in the correct position, at position zero, ready to
* read first character.
*/
public void set(Reader reader) {
resetAll();
fReader = reader;
if (!fReader.markSupported()) {
fReader = new BufferedReader(fReader);
}
try {
fReader.mark(CodedIO.MAX_MARK_SIZE);
}
catch (IOException e) {
// impossble, since we just checked if markable
throw new Error(e);
}
}
}