bundles/org.eclipse.wst.dtd.core/src/org/eclipse/wst/dtd/core/internal/encoding/AbstractResourceEncodingDetector.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2001, 2005 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *     Jens Lukowski/Innoopract - initial renaming/restructuring
  *
  *******************************************************************************/
 package org.eclipse.wst.dtd.core.internal.encoding;

 import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;

 import org.eclipse.core.resources.IStorage;
 import org.eclipse.core.runtime.CoreException;
 import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
 import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
 import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


 public abstract class AbstractResourceEncodingDetector implements IResourceCharsetDetector {

 	protected EncodingMemento fEncodingMemento;

 	protected boolean fHeaderParsed;

 	protected Reader fReader;

 	/**
 	 *
 	 */
 	public AbstractResourceEncodingDetector() {
 		super();
 	}

 	/**
 	 * Note: once this instance is created, trace info still needs to be
 	 * appended by caller, depending on the context its created.
 	 */
 	private void createEncodingMemento(String detectedCharsetName) {
 		fEncodingMemento = new EncodingMemento();
 		fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName));
 		fEncodingMemento.setDetectedCharsetName(detectedCharsetName);
 		// TODO: if detectedCharset and spec default is
 		// null, need to use "work
 		// bench based" defaults.
 		fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding());
 	}

 	/**
 	 * convience method all subclasses can use (but not override)
 	 *
 	 * @param detectedCharsetName
 	 * @param reason
 	 */
 	final protected void createEncodingMemento(String detectedCharsetName, String reason) {
 		createEncodingMemento(detectedCharsetName);
 	}

 	/**
 	 * convience method all subclasses can use (but not override)
 	 */
 	final protected void ensureInputSet() {
 		if (fReader == null) {
 			throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
 		}
 	}

 	/**
 	 * This method can return null, if invalid charset name (in which case
 	 * "appropriateDefault" should be used, if a name is really need for some
 	 * "save anyway" cases).
 	 *
 	 * @param detectedCharsetName
 	 * @return
 	 */
 	private String getAppropriateJavaCharset(String detectedCharsetName) {
 		String result = null;
 		// 1. Check explicit mapping overrides from
 		// property file -- its here we pick up "rules" for cases
 		// that are not even in Java
 		result = CodedIO.checkMappingOverrides(detectedCharsetName);
 		// 2. Use the "canonical" name from JRE mappings
 		// Note: see Charset JavaDoc, the name you get one
 		// with can be alias,
 		// the name you get back is "standard" name.
 		Charset javaCharset = null;
 		try {
 			javaCharset = Charset.forName(detectedCharsetName);
 		}
 		catch (UnsupportedCharsetException e) {
 			// only set invalid, if result is same as detected -- they won't
 			// be equal if
 			// overridden
 			if (result != null && result.equals(detectedCharsetName)) {
 				fEncodingMemento.setInvalidEncoding(detectedCharsetName);
 			}
 		}
 		catch (IllegalCharsetNameException e) {
 			// only set invalid, if result is same as detected -- they won't
 			// be equal if
 			// overridden
 			if (result != null && result.equals(detectedCharsetName)) {
 				fEncodingMemento.setInvalidEncoding(detectedCharsetName);
 			}
 		}
 		// give priority to java cononical name, if present
 		if (javaCharset != null) {
 			result = javaCharset.name();
 			// but still allow overrides
 			result = CodedIO.checkMappingOverrides(result);
 		}
 		return result;
 	}

 	public String getEncoding() throws IOException {
 		return getEncodingMemento().getDetectedCharsetName();
 	}

 	// to ensure consist overall rules used, we'll mark as
 	// final,
 	// and require subclasses to provide certain pieces of
 	// the
 	// implementation
 	public EncodingMemento getEncodingMemento() throws IOException {
 		ensureInputSet();
 		if (!fHeaderParsed) {
 			parseInput();
 			// we keep track of if header's already been
 			// parse, so can make
 			// multiple 'get' calls, without causing
 			// reparsing.
 			fHeaderParsed = true;
 			// Note: there is a "hidden assumption" here
 			// that an empty
 			// string in content should be treated same as
 			// not present.
 		}
 		if (fEncodingMemento == null) {
 			handleSpecDefault();
 		}
 		if (fEncodingMemento == null) {
 			// safty net
 			fEncodingMemento = new NullMemento();
 		}
 		return fEncodingMemento;
 	}

 	/**
 	 * This is to return a default encoding -- as specified by an industry
 	 * content type spec -- when not present in the stream, for example, XML
 	 * specifies UTF-8, JSP specifies ISO-8859-1. This method should return
 	 * null if there is no such "spec default".
 	 */
 	abstract public String getSpecDefaultEncoding();

 	public EncodingMemento getSpecDefaultEncodingMemento() {
 		resetAll();
 		EncodingMemento result = null;
 		String enc = getSpecDefaultEncoding();
 		if (enc != null) {
 			createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
 			fEncodingMemento.setAppropriateDefault(enc);
 			result = fEncodingMemento;
 		}
 		return result;
 	}

 	private void handleSpecDefault() {
 		String encodingName;
 		encodingName = getSpecDefaultEncoding();
 		if (encodingName != null) {
 			// createEncodingMemento(encodingName,
 			// EncodingMemento.USED_CONTENT_TYPE_DEFAULT);
 			fEncodingMemento = new EncodingMemento();
 			fEncodingMemento.setJavaCharsetName(encodingName);
 			fEncodingMemento.setAppropriateDefault(encodingName);
 		}
 	}

 	/**
 	 * Every subclass must provide a way to parse the input. This method has
 	 * several critical responsibilities:
 	 * <li>set the fEncodingMemento field appropriately, according to the
 	 * results of the parse of fReader.</li>
 	 * <li>set fHarderParsed to true, to avoid wasted re-parsing.</li>
 	 */
 	abstract protected void parseInput() throws IOException;

 	/**
 	 *
 	 */
 	private void resetAll() {
 		fReader = null;
 		fHeaderParsed = false;
 		fEncodingMemento = null;
 	}

 	/**
 	 *
 	 */
 	public void set(InputStream inputStream) {
 		resetAll();
 		fReader = new ByteReader(inputStream);
 		try {
 			fReader.mark(CodedIO.MAX_MARK_SIZE);
 		}
 		catch (IOException e) {
 			// impossible, since we know ByteReader
 			// supports marking
 			throw new Error(e);
 		}
 	}

 	/**
 	 *
 	 */
 	public void set(IStorage iStorage) throws CoreException {
 		resetAll();
 		InputStream inputStream = iStorage.getContents();
 		InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
 		resettableStream.mark(CodedIO.MAX_MARK_SIZE);
 		set(resettableStream);
 		// TODO we'll need to "remember" IFile, or
 		// get its (or its project's) settings, in case
 		// those are needed to handle cases when the
 		// encoding is not in the file stream.
 	}

 	/**
 	 * Note: this is not part of interface to help avoid confusion ... it
 	 * expected this Reader is a well formed character reader ... that is, its
 	 * all ready been determined to not be a unicode marked input stream. And,
 	 * its assumed to be in the correct position, at position zero, ready to
 	 * read first character.
 	 */
 	public void set(Reader reader) {
 		resetAll();
 		fReader = reader;
 		if (!fReader.markSupported()) {
 			fReader = new BufferedReader(fReader);
 		}
 		try {
 			fReader.mark(CodedIO.MAX_MARK_SIZE);
 		}
 		catch (IOException e) {
 			// impossble, since we just checked if markable
 			throw new Error(e);
 		}
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2001, 2005 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	* Jens Lukowski/Innoopract - initial renaming/restructuring
	*
	*******************************************************************************/
	package org.eclipse.wst.dtd.core.internal.encoding;

	import java.io.BufferedInputStream;
	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.Reader;
	import java.nio.charset.Charset;
	import java.nio.charset.IllegalCharsetNameException;
	import java.nio.charset.UnsupportedCharsetException;

	import org.eclipse.core.resources.IStorage;
	import org.eclipse.core.runtime.CoreException;
	import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
	import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
	import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


	public abstract class AbstractResourceEncodingDetector implements IResourceCharsetDetector {

	protected EncodingMemento fEncodingMemento;

	protected boolean fHeaderParsed;

	protected Reader fReader;

	/**
	*
	*/
	public AbstractResourceEncodingDetector() {
	super();
	}

	/**
	* Note: once this instance is created, trace info still needs to be
	* appended by caller, depending on the context its created.
	*/
	private void createEncodingMemento(String detectedCharsetName) {
	fEncodingMemento = new EncodingMemento();
	fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName));
	fEncodingMemento.setDetectedCharsetName(detectedCharsetName);
	// TODO: if detectedCharset and spec default is
	// null, need to use "work
	// bench based" defaults.
	fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding());
	}

	/**
	* convience method all subclasses can use (but not override)
	*
	* @param detectedCharsetName
	* @param reason
	*/
	final protected void createEncodingMemento(String detectedCharsetName, String reason) {
	createEncodingMemento(detectedCharsetName);
	}

	/**
	* convience method all subclasses can use (but not override)
	*/
	final protected void ensureInputSet() {
	if (fReader == null) {
	throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
	}
	}

	/**
	* This method can return null, if invalid charset name (in which case
	* "appropriateDefault" should be used, if a name is really need for some
	* "save anyway" cases).
	*
	* @param detectedCharsetName
	* @return
	*/
	private String getAppropriateJavaCharset(String detectedCharsetName) {
	String result = null;
	// 1. Check explicit mapping overrides from
	// property file -- its here we pick up "rules" for cases
	// that are not even in Java
	result = CodedIO.checkMappingOverrides(detectedCharsetName);
	// 2. Use the "canonical" name from JRE mappings
	// Note: see Charset JavaDoc, the name you get one
	// with can be alias,
	// the name you get back is "standard" name.
	Charset javaCharset = null;
	try {
	javaCharset = Charset.forName(detectedCharsetName);
	}
	catch (UnsupportedCharsetException e) {
	// only set invalid, if result is same as detected -- they won't
	// be equal if
	// overridden
	if (result != null && result.equals(detectedCharsetName)) {
	fEncodingMemento.setInvalidEncoding(detectedCharsetName);
	}
	}
	catch (IllegalCharsetNameException e) {
	// only set invalid, if result is same as detected -- they won't
	// be equal if
	// overridden
	if (result != null && result.equals(detectedCharsetName)) {
	fEncodingMemento.setInvalidEncoding(detectedCharsetName);
	}
	}
	// give priority to java cononical name, if present
	if (javaCharset != null) {
	result = javaCharset.name();
	// but still allow overrides
	result = CodedIO.checkMappingOverrides(result);
	}
	return result;
	}

	public String getEncoding() throws IOException {
	return getEncodingMemento().getDetectedCharsetName();
	}

	// to ensure consist overall rules used, we'll mark as
	// final,
	// and require subclasses to provide certain pieces of
	// the
	// implementation
	public EncodingMemento getEncodingMemento() throws IOException {
	ensureInputSet();
	if (!fHeaderParsed) {
	parseInput();
	// we keep track of if header's already been
	// parse, so can make
	// multiple 'get' calls, without causing
	// reparsing.
	fHeaderParsed = true;
	// Note: there is a "hidden assumption" here
	// that an empty
	// string in content should be treated same as
	// not present.
	}
	if (fEncodingMemento == null) {
	handleSpecDefault();
	}
	if (fEncodingMemento == null) {
	// safty net
	fEncodingMemento = new NullMemento();
	}
	return fEncodingMemento;
	}

	/**
	* This is to return a default encoding -- as specified by an industry
	* content type spec -- when not present in the stream, for example, XML
	* specifies UTF-8, JSP specifies ISO-8859-1. This method should return
	* null if there is no such "spec default".
	*/
	abstract public String getSpecDefaultEncoding();

	public EncodingMemento getSpecDefaultEncodingMemento() {
	resetAll();
	EncodingMemento result = null;
	String enc = getSpecDefaultEncoding();
	if (enc != null) {
	createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
	fEncodingMemento.setAppropriateDefault(enc);
	result = fEncodingMemento;
	}
	return result;
	}

	private void handleSpecDefault() {
	String encodingName;
	encodingName = getSpecDefaultEncoding();
	if (encodingName != null) {
	// createEncodingMemento(encodingName,
	// EncodingMemento.USED_CONTENT_TYPE_DEFAULT);
	fEncodingMemento = new EncodingMemento();
	fEncodingMemento.setJavaCharsetName(encodingName);
	fEncodingMemento.setAppropriateDefault(encodingName);
	}
	}

	/**
	* Every subclass must provide a way to parse the input. This method has
	* several critical responsibilities:
	* <li>set the fEncodingMemento field appropriately, according to the
	* results of the parse of fReader.</li>
	* <li>set fHarderParsed to true, to avoid wasted re-parsing.</li>
	*/
	abstract protected void parseInput() throws IOException;

	/**
	*
	*/
	private void resetAll() {
	fReader = null;
	fHeaderParsed = false;
	fEncodingMemento = null;
	}

	/**
	*
	*/
	public void set(InputStream inputStream) {
	resetAll();
	fReader = new ByteReader(inputStream);
	try {
	fReader.mark(CodedIO.MAX_MARK_SIZE);
	}
	catch (IOException e) {
	// impossible, since we know ByteReader
	// supports marking
	throw new Error(e);
	}
	}

	/**
	*
	*/
	public void set(IStorage iStorage) throws CoreException {
	resetAll();
	InputStream inputStream = iStorage.getContents();
	InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
	resettableStream.mark(CodedIO.MAX_MARK_SIZE);
	set(resettableStream);
	// TODO we'll need to "remember" IFile, or
	// get its (or its project's) settings, in case
	// those are needed to handle cases when the
	// encoding is not in the file stream.
	}

	/**
	* Note: this is not part of interface to help avoid confusion ... it
	* expected this Reader is a well formed character reader ... that is, its
	* all ready been determined to not be a unicode marked input stream. And,
	* its assumed to be in the correct position, at position zero, ready to
	* read first character.
	*/
	public void set(Reader reader) {
	resetAll();
	fReader = reader;
	if (!fReader.markSupported()) {
	fReader = new BufferedReader(fReader);
	}
	try {
	fReader.mark(CodedIO.MAX_MARK_SIZE);
	}
	catch (IOException e) {
	// impossble, since we just checked if markable
	throw new Error(e);
	}
	}
	}