bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ContentDescriberForHTML.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2004 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/
 package org.eclipse.wst.html.core.internal.contenttype;

 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;

 import org.eclipse.core.runtime.QualifiedName;
 import org.eclipse.core.runtime.content.IContentDescriber;
 import org.eclipse.core.runtime.content.IContentDescription;
 import org.eclipse.core.runtime.content.ITextContentDescriber;
 import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
 import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
 import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;

 /**
  *
  * ContentDescriberForHTML
  *
  * A few design principles to remember with content describers:
  * <ul>
  * <li>Remember not to store values/data in the descriptions array of properties,
  * especially not large objects! and even no value that is already the default value,
  * since those description properties are cached per session, so can add up in memory.
  * <li>Remember that a ContentDescriber instance becomes a "root object" in the
  * ContentDescriberManager (that is, always in memory, never GC'd), so it should
  * not have any instance or state data since it would always become stale and
  * "hold on" to objects unneccessarily.
  * </ul>
  */

 public final class ContentDescriberForHTML implements ITextContentDescriber {

 	final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};

 	public int describe(InputStream contents, IContentDescription description) throws IOException {
 		int result = IContentDescriber.INDETERMINATE;

 		if (description == null) {
 			result = computeValidity(contents);
 		}
 		else {
 			calculateSupportedOptions(contents, description);
 			// assummming we should return same 'validity' value we did
 			// when called before. (technically, could be a performance issue
 			// in future, so might want to check if any 'ol value would
 			// be ok here.
 			result = computeValidity(contents);
 		}

 		return result;
 	}

 	public int describe(Reader contents, IContentDescription description) throws IOException {
 		int result = IContentDescriber.INDETERMINATE;

 		if (description == null) {
 			result = computeValidity(contents);
 		}
 		else {
 			calculateSupportedOptions(contents, description);
 			// assummming we should return same 'validity' value we did
 			// when called before. (technically, could be a performance issue
 			// in future, so might want to check if hard coded 'valid' would
 			// be ok here.
 			result = computeValidity(contents);
 		}

 		return result;
 	}

 	public QualifiedName[] getSupportedOptions() {

 		return SUPPORTED_OPTIONS;
 	}

 	private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			detector.set(contents);
 			handleCalculations(description, detector);
 		}
 	}

 	/**
 	 * @param contents
 	 * @param description
 	 * @throws IOException
 	 */
 	private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			detector.set(contents);
 			handleCalculations(description, detector);
 		}
 	}

 	private int computeValidity(InputStream inputStream) {
 		// currently no contents specific check for valid HTML contents
 		// (this may change once we add XHTML content type)
 		return IContentDescriber.INDETERMINATE;
 	}

 	private int computeValidity(Reader reader) {
 		// currently no contents specific check for valid HTML contents
 		// (this may change once we add XHTML content type)
 		return IContentDescriber.INDETERMINATE;
 	}

 	private IResourceCharsetDetector getDetector() {

 		return new HTMLResourceEncodingDetector();

 	}

 	/**
 	 * @param description
 	 * @param detector
 	 * @throws IOException
 	 */
 	private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {

 		EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento();
 		// TODO: I need to verify to see if this BOM work is always done
 		// by text type.
 		Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
 		if (detectedByteOrderMark != null) {
 			Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
 			// not sure why would ever be different, so if is different, may
 			// need to "push" up into base.
 			if (!detectedByteOrderMark.equals(existingByteOrderMark))
 				description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
 		}


 		if (!encodingMemento.isValid()) {
 			/*
 			 * note: after setting here, its the mere presence of
 			 * IContentDescriptionExtended.UNSUPPORTED_CHARSET in the
 			 * resource's description that can be used to determine if invalid
 			 * in those cases, the "detected" property contains an
 			 * "appropriate default" to use.
 			 */
 			description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
 			description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
 		}

 		Object detectedCharset = encodingMemento.getDetectedCharsetName();
 		Object javaCharset = encodingMemento.getJavaCharsetName();

 		// we always include detected, if its different than java
 		handleDetectedSpecialCase(description, detectedCharset, javaCharset);

 		if (javaCharset != null) {
 			Object existingCharset = description.getProperty(IContentDescription.CHARSET);
 			if (javaCharset.equals(existingCharset)) {
 				handleDetectedSpecialCase(description, detectedCharset, javaCharset);
 			}
 			else {
 				// we may need to add what we found, but only need to add
 				// if different from default.the
 				Object defaultCharset = detector.getSpecDefaultEncoding();
 				if (defaultCharset != null) {
 					if (!defaultCharset.equals(javaCharset)) {
 						description.setProperty(IContentDescription.CHARSET, javaCharset);
 					}
 				}
 				else {
 					// assuming if there is no spec default, we always need to
 					// add, I'm assuming
 					description.setProperty(IContentDescription.CHARSET, javaCharset);
 				}
 			}
 		}

 	}

 	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
 		// since equal, we don't need to add, but if our detected version is
 		// different than
 		// javaCharset, then we should add it. This will happen, for example,
 		// if there's
 		// differences in case, or differences due to override properties
 		if (detectedCharset != null) {
 			// if (!detectedCharset.equals(javaCharset)) {
 			// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
 			// detectedCharset);
 			// }

 			// Once we detected a charset, we should set the property even
 			// though it's the same as javaCharset
 			// because there are clients that rely on this property to
 			// determine if the charset is actually detected in file or not.
 			description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
 		}
 	}

 	/**
 	 * @param description
 	 * @return
 	 */
 	private boolean isRelevent(IContentDescription description) {
 		boolean result = false;
 		if (description == null)
 			result = false;
 		else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
 			result = true;
 		else if (description.isRequested(IContentDescription.CHARSET))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
 			result = true;
 		// else if
 		// (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO))
 		// result = true;
 		return result;
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2004 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	*******************************************************************************/
	package org.eclipse.wst.html.core.internal.contenttype;

	import java.io.IOException;
	import java.io.InputStream;
	import java.io.Reader;

	import org.eclipse.core.runtime.QualifiedName;
	import org.eclipse.core.runtime.content.IContentDescriber;
	import org.eclipse.core.runtime.content.IContentDescription;
	import org.eclipse.core.runtime.content.ITextContentDescriber;
	import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
	import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
	import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;

	/**
	*
	* ContentDescriberForHTML
	*
	* A few design principles to remember with content describers:
	* <ul>
	* <li>Remember not to store values/data in the descriptions array of properties,
	* especially not large objects! and even no value that is already the default value,
	* since those description properties are cached per session, so can add up in memory.
	* <li>Remember that a ContentDescriber instance becomes a "root object" in the
	* ContentDescriberManager (that is, always in memory, never GC'd), so it should
	* not have any instance or state data since it would always become stale and
	* "hold on" to objects unneccessarily.
	* </ul>
	*/

	public final class ContentDescriberForHTML implements ITextContentDescriber {

	final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};

	public int describe(InputStream contents, IContentDescription description) throws IOException {
	int result = IContentDescriber.INDETERMINATE;

	if (description == null) {
	result = computeValidity(contents);
	}
	else {
	calculateSupportedOptions(contents, description);
	// assummming we should return same 'validity' value we did
	// when called before. (technically, could be a performance issue
	// in future, so might want to check if any 'ol value would
	// be ok here.
	result = computeValidity(contents);
	}

	return result;
	}

	public int describe(Reader contents, IContentDescription description) throws IOException {
	int result = IContentDescriber.INDETERMINATE;

	if (description == null) {
	result = computeValidity(contents);
	}
	else {
	calculateSupportedOptions(contents, description);
	// assummming we should return same 'validity' value we did
	// when called before. (technically, could be a performance issue
	// in future, so might want to check if hard coded 'valid' would
	// be ok here.
	result = computeValidity(contents);
	}

	return result;
	}

	public QualifiedName[] getSupportedOptions() {

	return SUPPORTED_OPTIONS;
	}

	private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
	if (isRelevent(description)) {
	IResourceCharsetDetector detector = getDetector();
	detector.set(contents);
	handleCalculations(description, detector);
	}
	}

	/**
	* @param contents
	* @param description
	* @throws IOException
	*/
	private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
	if (isRelevent(description)) {
	IResourceCharsetDetector detector = getDetector();
	detector.set(contents);
	handleCalculations(description, detector);
	}
	}

	private int computeValidity(InputStream inputStream) {
	// currently no contents specific check for valid HTML contents
	// (this may change once we add XHTML content type)
	return IContentDescriber.INDETERMINATE;
	}

	private int computeValidity(Reader reader) {
	// currently no contents specific check for valid HTML contents
	// (this may change once we add XHTML content type)
	return IContentDescriber.INDETERMINATE;
	}

	private IResourceCharsetDetector getDetector() {

	return new HTMLResourceEncodingDetector();

	}

	/**
	* @param description
	* @param detector
	* @throws IOException
	*/
	private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {

	EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento();
	// TODO: I need to verify to see if this BOM work is always done
	// by text type.
	Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
	if (detectedByteOrderMark != null) {
	Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
	// not sure why would ever be different, so if is different, may
	// need to "push" up into base.
	if (!detectedByteOrderMark.equals(existingByteOrderMark))
	description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
	}


	if (!encodingMemento.isValid()) {
	/*
	* note: after setting here, its the mere presence of
	* IContentDescriptionExtended.UNSUPPORTED_CHARSET in the
	* resource's description that can be used to determine if invalid
	* in those cases, the "detected" property contains an
	* "appropriate default" to use.
	*/
	description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
	description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
	}

	Object detectedCharset = encodingMemento.getDetectedCharsetName();
	Object javaCharset = encodingMemento.getJavaCharsetName();

	// we always include detected, if its different than java
	handleDetectedSpecialCase(description, detectedCharset, javaCharset);

	if (javaCharset != null) {
	Object existingCharset = description.getProperty(IContentDescription.CHARSET);
	if (javaCharset.equals(existingCharset)) {
	handleDetectedSpecialCase(description, detectedCharset, javaCharset);
	}
	else {
	// we may need to add what we found, but only need to add
	// if different from default.the
	Object defaultCharset = detector.getSpecDefaultEncoding();
	if (defaultCharset != null) {
	if (!defaultCharset.equals(javaCharset)) {
	description.setProperty(IContentDescription.CHARSET, javaCharset);
	}
	}
	else {
	// assuming if there is no spec default, we always need to
	// add, I'm assuming
	description.setProperty(IContentDescription.CHARSET, javaCharset);
	}
	}
	}

	}

	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
	// since equal, we don't need to add, but if our detected version is
	// different than
	// javaCharset, then we should add it. This will happen, for example,
	// if there's
	// differences in case, or differences due to override properties
	if (detectedCharset != null) {
	// if (!detectedCharset.equals(javaCharset)) {
	// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
	// detectedCharset);
	// }

	// Once we detected a charset, we should set the property even
	// though it's the same as javaCharset
	// because there are clients that rely on this property to
	// determine if the charset is actually detected in file or not.
	description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
	}
	}

	/**
	* @param description
	* @return
	*/
	private boolean isRelevent(IContentDescription description) {
	boolean result = false;
	if (description == null)
	result = false;
	else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
	result = true;
	else if (description.isRequested(IContentDescription.CHARSET))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
	result = true;
	// else if
	// (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO))
	// result = true;
	return result;
	}

	}