bundles/org.eclipse.wst.css.core/src/org/eclipse/wst/css/core/internal/contenttype/CSSResourceEncodingDetector.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2004 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/
 package org.eclipse.wst.css.core.internal.contenttype;

 import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;

 import org.eclipse.core.resources.IStorage;
 import org.eclipse.core.runtime.CoreException;
 import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
 import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
 import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
 import org.eclipse.wst.sse.core.internal.encoding.NonContentBasedEncodingRules;
 import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;


 public class CSSResourceEncodingDetector implements IResourceCharsetDetector {
 	class NullMemento extends EncodingMemento {
 		/**
 		 *
 		 */
 		public NullMemento() {
 			super();
 			String defaultCharset = NonContentBasedEncodingRules.useDefaultNameRules(null);
 			setJavaCharsetName(defaultCharset);
 			setAppropriateDefault(defaultCharset);
 			setDetectedCharsetName(null);
 		}
 	}


 	private CSSHeadTokenizer fTokenizer;
 	private EncodingMemento fEncodingMemento;
 	private boolean fHeaderParsed;
 	private Reader fReader;

 	/**
 	 * There is no spec defined encoding for CSS, so Null is returned.
 	 */
 	public String getSpecDefaultEncoding() {
 		// should match what's in plugin.xml (or look it up from there).
 		return null;
 	}

 	private boolean canHandleAsUnicodeStream(String tokenType) {
 		boolean canHandleAsUnicodeStream = false;
 		if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-8"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
 			fEncodingMemento.setUTF83ByteBOMUsed(true);
 		}
 		else if (tokenType == EncodingParserConstants.UTF16BE) {
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-16BE"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
 		}
 		else if (tokenType == EncodingParserConstants.UTF16LE) {
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-16"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
 		}
 		return canHandleAsUnicodeStream;
 	}

 	/**
 	 * @return Returns the tokenizer.
 	 */
 	private CSSHeadTokenizer getTokenizer() {
 		if (fTokenizer == null) {
 			fTokenizer = new CSSHeadTokenizer();
 		}
 		return fTokenizer;
 	}

 	private boolean isLegalString(String valueTokenType) {
 		boolean result = false;
 		if (valueTokenType != null) {
 			result = valueTokenType.equals(EncodingParserConstants.StringValue) || valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
 		}
 		return result;
 	}

 	private void parseInput() throws IOException {
 		checkInContents();
 		if (fEncodingMemento == null) {
 			checkHeuristics();
 		}
 	}

 	private void checkInContents() throws IOException {
 		CSSHeadTokenizer tokenizer = getTokenizer();
 		tokenizer.reset(fReader);
 		HeadParserToken token = null;
 		String tokenType = null;
 		do {
 			token = tokenizer.getNextToken();
 			tokenType = token.getType();
 			if (canHandleAsUnicodeStream(tokenType)) {
 				// side effect of canHandle is to create appropriate memento
 			}
 			else if (tokenType == CSSHeadTokenizerConstants.CHARSET_RULE) {
 				if (tokenizer.hasMoreTokens()) {
 					HeadParserToken valueToken = tokenizer.getNextToken();
 					String valueTokenType = valueToken.getType();
 					if (isLegalString(valueTokenType)) {
 						createEncodingMemento(valueToken.getText(), EncodingMemento.FOUND_ENCODING_IN_CONTENT);

 					}
 				}
 			}

 		}
 		while (tokenizer.hasMoreTokens());
 	}

 	/**
 	 *
 	 */
 	private void checkHeuristics() throws IOException {
 		boolean noHeuristic = false;
 		String heuristicEncoding = null;
 		try {
 			fReader.reset();
 			byte[] bytes = new byte[3];
 			int nRead = 0;
 			for (int i = 0; i < bytes.length; i++) {
 				if (fReader.ready()) {
 					int oneByte = fReader.read();
 					nRead++;
 					if (oneByte <= 0xFF) {
 						bytes[i] = (byte) oneByte;
 					}
 					else {
 						noHeuristic = true;
 					}
 				}
 				else {
 					noHeuristic = true;
 					break;
 				}
 			}
 			if (!noHeuristic && nRead == 3) {
 				heuristicEncoding = EncodingGuesser.guessEncoding(bytes, 3);
 			}
 		}
 		catch (IOException e) {
 			// if any IO exception, then not a heuristic case
 		}
 		finally {
 			fReader.reset();
 		}
 		if (heuristicEncoding != null) {
 			createEncodingMemento(heuristicEncoding, EncodingMemento.GUESSED_ENCODING_FROM_STREAM);
 		}

 	}

 	/**
 	 * Note: once this instance is created, trace info still needs to be
 	 * appended by caller, depending on the context its created.
 	 */
 	private void createEncodingMemento(String detectedCharsetName) {
 		fEncodingMemento = new EncodingMemento();
 		fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName));
 		fEncodingMemento.setDetectedCharsetName(detectedCharsetName);
 		// TODO: if detectedCharset and spec default is
 		// null, need to use "work
 		// bench based" defaults.
 		fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding());
 	}

 	/**
 	 * convience method all subclasses can use (but not override)
 	 *
 	 * @param detectedCharsetName
 	 * @param reason
 	 */
 	private void createEncodingMemento(String detectedCharsetName, String reason) {
 		createEncodingMemento(detectedCharsetName);
 	}

 	/**
 	 * convience method all subclasses can use (but not override)
 	 */
 	private final void ensureInputSet() {
 		if (fReader == null) {
 			throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
 		}
 	}

 	/**
 	 * This method can return null, if invalid charset name (in which case
 	 * "appropriateDefault" should be used, if a name is really need for some
 	 * "save anyway" cases).
 	 *
 	 * @param detectedCharsetName
 	 * @return
 	 */
 	private String getAppropriateJavaCharset(String detectedCharsetName) {
 		String result = null;
 		// 1. Check explicit mapping overrides from
 		// property file -- its here we pick up "rules" for cases
 		// that are not even in Java
 		result = CodedIO.checkMappingOverrides(detectedCharsetName);
 		// 2. Use the "canonical" name from JRE mappings
 		// Note: see Charset JavaDoc, the name you get one
 		// with can be alias,
 		// the name you get back is "standard" name.
 		Charset javaCharset = null;
 		try {
 			javaCharset = Charset.forName(detectedCharsetName);
 		}
 		catch (UnsupportedCharsetException e) {
 			// only set invalid, if result is same as detected -- they won't
 			// be equal if
 			// overridden
 			if (result != null && result.equals(detectedCharsetName)) {
 				fEncodingMemento.setInvalidEncoding(detectedCharsetName);
 			}
 		}
 		catch (IllegalCharsetNameException e) {
 			// only set invalid, if result is same as detected -- they won't
 			// be equal if
 			// overridden
 			if (result != null && result.equals(detectedCharsetName)) {
 				fEncodingMemento.setInvalidEncoding(detectedCharsetName);
 			}
 		}
 		// give priority to java cononical name, if present
 		if (javaCharset != null) {
 			result = javaCharset.name();
 			// but still allow overrides
 			result = CodedIO.checkMappingOverrides(result);
 		}
 		return result;
 	}

 	public String getEncoding() throws IOException {
 		return getEncodingMemento().getDetectedCharsetName();
 	}

 	public EncodingMemento getEncodingMemento() throws IOException {
 		ensureInputSet();
 		if (!fHeaderParsed) {
 			parseInput();
 			// we keep track of if header's already been
 			// parse, so can make
 			// multiple 'get' calls, without causing
 			// reparsing.
 			fHeaderParsed = true;
 			// Note: there is a "hidden assumption" here
 			// that an empty
 			// string in content should be treated same as
 			// not present.
 		}
 		if (fEncodingMemento == null) {
 			handleSpecDefault();
 		}
 		if (fEncodingMemento == null) {
 			// safty net
 			fEncodingMemento = new NullMemento();
 		}
 		return fEncodingMemento;
 	}

 	public EncodingMemento getSpecDefaultEncodingMemento() {
 		resetAll();
 		EncodingMemento result = null;
 		String enc = getSpecDefaultEncoding();
 		if (enc != null) {
 			createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
 			fEncodingMemento.setAppropriateDefault(enc);
 			result = fEncodingMemento;
 		}
 		return result;
 	}

 	private void handleSpecDefault() {
 		String encodingName;
 		encodingName = getSpecDefaultEncoding();
 		if (encodingName != null) {
 			// createEncodingMemento(encodingName,
 			// EncodingMemento.USED_CONTENT_TYPE_DEFAULT);
 			fEncodingMemento = new EncodingMemento();
 			fEncodingMemento.setJavaCharsetName(encodingName);
 			fEncodingMemento.setAppropriateDefault(encodingName);
 		}
 	}

 	/**
 	 *
 	 */
 	private void resetAll() {
 		fReader = null;
 		fHeaderParsed = false;
 		fEncodingMemento = null;
 	}

 	/**
 	 *
 	 */
 	public void set(InputStream inputStream) {
 		resetAll();
 		fReader = new ByteReader(inputStream);
 		try {
 			fReader.mark(CodedIO.MAX_MARK_SIZE);
 		}
 		catch (IOException e) {
 			// impossible, since we know ByteReader
 			// supports marking
 			throw new Error(e);
 		}
 	}

 	/**
 	 *
 	 */
 	public void set(IStorage iStorage) throws CoreException {
 		resetAll();
 		InputStream inputStream = iStorage.getContents();
 		InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
 		resettableStream.mark(CodedIO.MAX_MARK_SIZE);
 		set(resettableStream);
 		// TODO we'll need to "remember" IFile, or
 		// get its (or its project's) settings, in case
 		// those are needed to handle cases when the
 		// encoding is not in the file stream.
 	}

 	/**
 	 * Note: this is not part of interface to help avoid confusion ... it
 	 * expected this Reader is a well formed character reader ... that is, its
 	 * all ready been determined to not be a unicode marked input stream. And,
 	 * its assumed to be in the correct position, at position zero, ready to
 	 * read first character.
 	 */
 	public void set(Reader reader) {
 		resetAll();
 		fReader = reader;
 		if (!fReader.markSupported()) {
 			fReader = new BufferedReader(fReader);
 		}
 		try {
 			fReader.mark(CodedIO.MAX_MARK_SIZE);
 		}
 		catch (IOException e) {
 			// impossble, since we just checked if markable
 			throw new Error(e);
 		}
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2004 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	*******************************************************************************/
	package org.eclipse.wst.css.core.internal.contenttype;

	import java.io.BufferedInputStream;
	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.Reader;
	import java.nio.charset.Charset;
	import java.nio.charset.IllegalCharsetNameException;
	import java.nio.charset.UnsupportedCharsetException;

	import org.eclipse.core.resources.IStorage;
	import org.eclipse.core.runtime.CoreException;
	import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
	import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
	import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
	import org.eclipse.wst.sse.core.internal.encoding.NonContentBasedEncodingRules;
	import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;



	public class CSSResourceEncodingDetector implements IResourceCharsetDetector {
	class NullMemento extends EncodingMemento {
	/**
	*
	*/
	public NullMemento() {
	super();
	String defaultCharset = NonContentBasedEncodingRules.useDefaultNameRules(null);
	setJavaCharsetName(defaultCharset);
	setAppropriateDefault(defaultCharset);
	setDetectedCharsetName(null);
	}
	}


	private CSSHeadTokenizer fTokenizer;
	private EncodingMemento fEncodingMemento;
	private boolean fHeaderParsed;
	private Reader fReader;

	/**
	* There is no spec defined encoding for CSS, so Null is returned.
	*/
	public String getSpecDefaultEncoding() {
	// should match what's in plugin.xml (or look it up from there).
	return null;
	}

	private boolean canHandleAsUnicodeStream(String tokenType) {
	boolean canHandleAsUnicodeStream = false;
	if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
	canHandleAsUnicodeStream = true;
	String enc = "UTF-8"; //$NON-NLS-1$
	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
	fEncodingMemento.setUTF83ByteBOMUsed(true);
	}
	else if (tokenType == EncodingParserConstants.UTF16BE) {
	canHandleAsUnicodeStream = true;
	String enc = "UTF-16BE"; //$NON-NLS-1$
	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
	}
	else if (tokenType == EncodingParserConstants.UTF16LE) {
	canHandleAsUnicodeStream = true;
	String enc = "UTF-16"; //$NON-NLS-1$
	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
	}
	return canHandleAsUnicodeStream;
	}

	/**
	* @return Returns the tokenizer.
	*/
	private CSSHeadTokenizer getTokenizer() {
	if (fTokenizer == null) {
	fTokenizer = new CSSHeadTokenizer();
	}
	return fTokenizer;
	}

	private boolean isLegalString(String valueTokenType) {
	boolean result = false;
	if (valueTokenType != null) {
	result = valueTokenType.equals(EncodingParserConstants.StringValue) \|\| valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) \|\| valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) \|\| valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
	}
	return result;
	}

	private void parseInput() throws IOException {
	checkInContents();
	if (fEncodingMemento == null) {
	checkHeuristics();
	}
	}

	private void checkInContents() throws IOException {
	CSSHeadTokenizer tokenizer = getTokenizer();
	tokenizer.reset(fReader);
	HeadParserToken token = null;
	String tokenType = null;
	do {
	token = tokenizer.getNextToken();
	tokenType = token.getType();
	if (canHandleAsUnicodeStream(tokenType)) {
	// side effect of canHandle is to create appropriate memento
	}
	else if (tokenType == CSSHeadTokenizerConstants.CHARSET_RULE) {
	if (tokenizer.hasMoreTokens()) {
	HeadParserToken valueToken = tokenizer.getNextToken();
	String valueTokenType = valueToken.getType();
	if (isLegalString(valueTokenType)) {
	createEncodingMemento(valueToken.getText(), EncodingMemento.FOUND_ENCODING_IN_CONTENT);

	}
	}
	}

	}
	while (tokenizer.hasMoreTokens());
	}

	/**
	*
	*/
	private void checkHeuristics() throws IOException {
	boolean noHeuristic = false;
	String heuristicEncoding = null;
	try {
	fReader.reset();
	byte[] bytes = new byte[3];
	int nRead = 0;
	for (int i = 0; i < bytes.length; i++) {
	if (fReader.ready()) {
	int oneByte = fReader.read();
	nRead++;
	if (oneByte <= 0xFF) {
	bytes[i] = (byte) oneByte;
	}
	else {
	noHeuristic = true;
	}
	}
	else {
	noHeuristic = true;
	break;
	}
	}
	if (!noHeuristic && nRead == 3) {
	heuristicEncoding = EncodingGuesser.guessEncoding(bytes, 3);
	}
	}
	catch (IOException e) {
	// if any IO exception, then not a heuristic case
	}
	finally {
	fReader.reset();
	}
	if (heuristicEncoding != null) {
	createEncodingMemento(heuristicEncoding, EncodingMemento.GUESSED_ENCODING_FROM_STREAM);
	}

	}

	/**
	* Note: once this instance is created, trace info still needs to be
	* appended by caller, depending on the context its created.
	*/
	private void createEncodingMemento(String detectedCharsetName) {
	fEncodingMemento = new EncodingMemento();
	fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName));
	fEncodingMemento.setDetectedCharsetName(detectedCharsetName);
	// TODO: if detectedCharset and spec default is
	// null, need to use "work
	// bench based" defaults.
	fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding());
	}

	/**
	* convience method all subclasses can use (but not override)
	*
	* @param detectedCharsetName
	* @param reason
	*/
	private void createEncodingMemento(String detectedCharsetName, String reason) {
	createEncodingMemento(detectedCharsetName);
	}

	/**
	* convience method all subclasses can use (but not override)
	*/
	private final void ensureInputSet() {
	if (fReader == null) {
	throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
	}
	}

	/**
	* This method can return null, if invalid charset name (in which case
	* "appropriateDefault" should be used, if a name is really need for some
	* "save anyway" cases).
	*
	* @param detectedCharsetName
	* @return
	*/
	private String getAppropriateJavaCharset(String detectedCharsetName) {
	String result = null;
	// 1. Check explicit mapping overrides from
	// property file -- its here we pick up "rules" for cases
	// that are not even in Java
	result = CodedIO.checkMappingOverrides(detectedCharsetName);
	// 2. Use the "canonical" name from JRE mappings
	// Note: see Charset JavaDoc, the name you get one
	// with can be alias,
	// the name you get back is "standard" name.
	Charset javaCharset = null;
	try {
	javaCharset = Charset.forName(detectedCharsetName);
	}
	catch (UnsupportedCharsetException e) {
	// only set invalid, if result is same as detected -- they won't
	// be equal if
	// overridden
	if (result != null && result.equals(detectedCharsetName)) {
	fEncodingMemento.setInvalidEncoding(detectedCharsetName);
	}
	}
	catch (IllegalCharsetNameException e) {
	// only set invalid, if result is same as detected -- they won't
	// be equal if
	// overridden
	if (result != null && result.equals(detectedCharsetName)) {
	fEncodingMemento.setInvalidEncoding(detectedCharsetName);
	}
	}
	// give priority to java cononical name, if present
	if (javaCharset != null) {
	result = javaCharset.name();
	// but still allow overrides
	result = CodedIO.checkMappingOverrides(result);
	}
	return result;
	}

	public String getEncoding() throws IOException {
	return getEncodingMemento().getDetectedCharsetName();
	}

	public EncodingMemento getEncodingMemento() throws IOException {
	ensureInputSet();
	if (!fHeaderParsed) {
	parseInput();
	// we keep track of if header's already been
	// parse, so can make
	// multiple 'get' calls, without causing
	// reparsing.
	fHeaderParsed = true;
	// Note: there is a "hidden assumption" here
	// that an empty
	// string in content should be treated same as
	// not present.
	}
	if (fEncodingMemento == null) {
	handleSpecDefault();
	}
	if (fEncodingMemento == null) {
	// safty net
	fEncodingMemento = new NullMemento();
	}
	return fEncodingMemento;
	}

	public EncodingMemento getSpecDefaultEncodingMemento() {
	resetAll();
	EncodingMemento result = null;
	String enc = getSpecDefaultEncoding();
	if (enc != null) {
	createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
	fEncodingMemento.setAppropriateDefault(enc);
	result = fEncodingMemento;
	}
	return result;
	}

	private void handleSpecDefault() {
	String encodingName;
	encodingName = getSpecDefaultEncoding();
	if (encodingName != null) {
	// createEncodingMemento(encodingName,
	// EncodingMemento.USED_CONTENT_TYPE_DEFAULT);
	fEncodingMemento = new EncodingMemento();
	fEncodingMemento.setJavaCharsetName(encodingName);
	fEncodingMemento.setAppropriateDefault(encodingName);
	}
	}

	/**
	*
	*/
	private void resetAll() {
	fReader = null;
	fHeaderParsed = false;
	fEncodingMemento = null;
	}

	/**
	*
	*/
	public void set(InputStream inputStream) {
	resetAll();
	fReader = new ByteReader(inputStream);
	try {
	fReader.mark(CodedIO.MAX_MARK_SIZE);
	}
	catch (IOException e) {
	// impossible, since we know ByteReader
	// supports marking
	throw new Error(e);
	}
	}

	/**
	*
	*/
	public void set(IStorage iStorage) throws CoreException {
	resetAll();
	InputStream inputStream = iStorage.getContents();
	InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
	resettableStream.mark(CodedIO.MAX_MARK_SIZE);
	set(resettableStream);
	// TODO we'll need to "remember" IFile, or
	// get its (or its project's) settings, in case
	// those are needed to handle cases when the
	// encoding is not in the file stream.
	}

	/**
	* Note: this is not part of interface to help avoid confusion ... it
	* expected this Reader is a well formed character reader ... that is, its
	* all ready been determined to not be a unicode marked input stream. And,
	* its assumed to be in the correct position, at position zero, ready to
	* read first character.
	*/
	public void set(Reader reader) {
	resetAll();
	fReader = reader;
	if (!fReader.markSupported()) {
	fReader = new BufferedReader(fReader);
	}
	try {
	fReader.mark(CodedIO.MAX_MARK_SIZE);
	}
	catch (IOException e) {
	// impossble, since we just checked if markable
	throw new Error(e);
	}
	}

	}