bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ContentDescriberForHTML.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2004 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/
 package org.eclipse.wst.html.core.internal.contenttype;

 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;

 import org.eclipse.core.runtime.QualifiedName;
 import org.eclipse.core.runtime.content.IContentDescriber;
 import org.eclipse.core.runtime.content.IContentDescription;
 import org.eclipse.core.runtime.content.ITextContentDescriber;
 import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
 import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
 import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


 public final class ContentDescriberForHTML implements ITextContentDescriber {

 	final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
 	private IResourceCharsetDetector resourceCharsetDetector;

 	public int describe(InputStream contents, IContentDescription description) throws IOException {
 		int result = IContentDescriber.INDETERMINATE;

 		if (description == null) {
 			result = computeValidity(contents);
 		}
 		else {
 			calculateSupportedOptions(contents, description);
 			// assummming we should return same 'validity' value we did
 			// when called before. (technically, could be a performance issue
 			// in future, so might want to check if any 'ol value would
 			// be ok here.
 			result = computeValidity(contents);
 		}

 		return result;
 	}

 	public int describe(Reader contents, IContentDescription description) throws IOException {
 		int result = IContentDescriber.INDETERMINATE;

 		if (description == null) {
 			result = computeValidity(contents);
 		}
 		else {
 			calculateSupportedOptions(contents, description);
 			// assummming we should return same 'validity' value we did
 			// when called before. (technically, could be a performance issue
 			// in future, so might want to check if hard coded 'valid' would
 			// be ok here.
 			result = computeValidity(contents);
 		}

 		return result;
 	}

 	public QualifiedName[] getSupportedOptions() {

 		return SUPPORTED_OPTIONS;
 	}

 	private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			detector.set(contents);
 			handleCalculations(description, detector);
 		}
 	}

 	/**
 	 * @param contents
 	 * @param description
 	 * @throws IOException
 	 */
 	private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			detector.set(contents);
 			handleCalculations(description, detector);
 		}
 	}

 	private int computeValidity(InputStream inputStream) {
 		// currently no contents specific check for valid HTML contents
 		// (this may change once we add XHTML content type)
 		return IContentDescriber.INDETERMINATE;
 	}

 	private int computeValidity(Reader reader) {
 		// currently no contents specific check for valid HTML contents
 		// (this may change once we add XHTML content type)
 		return IContentDescriber.INDETERMINATE;
 	}

 	private IResourceCharsetDetector getDetector() {
 		if (resourceCharsetDetector == null) {
 			resourceCharsetDetector = new HTMLResourceEncodingDetector();
 		}
 		return resourceCharsetDetector;
 	}

 	/**
 	 * @param description
 	 * @param detector
 	 * @throws IOException
 	 */
 	private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
 		// note: if we're asked for one, we set them all. I need to be sure if
 		// called
 		// mulitiple times (one for each, say) that we don't waste time
 		// processing same
 		// content again.
 		EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento();
 		// TODO: I need to verify to see if this BOM work is always done
 		// by text type.
 		Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
 		if (detectedByteOrderMark != null) {
 			Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
 			// not sure why would ever be different, so if is different, may
 			// need to "push" up into base.
 			if (!detectedByteOrderMark.equals(existingByteOrderMark))
 				description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
 		}


 		if (!encodingMemento.isValid()) {
 			// note: after setting here, its the mere presence of
 			// IContentDescriptionExtended.UNSUPPORTED_CHARSET
 			// in the resource's description that can be used to determine if
 			// invalid
 			// in those cases, the "detected" property contains an
 			// "appropriate default" to use.
 			description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
 			description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
 		}

 		Object detectedCharset = encodingMemento.getDetectedCharsetName();
 		Object javaCharset = encodingMemento.getJavaCharsetName();

 		// we always include detected, if its different than java
 		handleDetectedSpecialCase(description, detectedCharset, javaCharset);

 		if (javaCharset != null) {
 			Object existingCharset = description.getProperty(IContentDescription.CHARSET);
 			if (javaCharset.equals(existingCharset)) {
 				handleDetectedSpecialCase(description, detectedCharset, javaCharset);
 			}
 			else {
 				// we may need to add what we found, but only need to add
 				// if different from default.the
 				Object defaultCharset = getDetector().getSpecDefaultEncoding();
 				if (defaultCharset != null) {
 					if (!defaultCharset.equals(javaCharset)) {
 						description.setProperty(IContentDescription.CHARSET, javaCharset);
 					}
 				}
 				else {
 					// assuming if there is no spec default, we always need to
 					// add, I'm assuming
 					description.setProperty(IContentDescription.CHARSET, javaCharset);
 				}
 			}
 		}

 		// avoid adding anything if not absolutly needed, since always
 		// "cached" per session
 		// description.setProperty(IContentDescriptionExtended.ENCODING_MEMENTO,
 		// encodingMemento);
 	}

 	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
 		// since equal, we don't need to add, but if our detected version is
 		// different than
 		// javaCharset, then we should add it. This will happen, for example,
 		// if there's
 		// differences in case, or differences due to override properties
 		if (detectedCharset != null) {
 			// if (!detectedCharset.equals(javaCharset)) {
 			// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
 			// detectedCharset);
 			// }

 			// Once we detected a charset, we should set the property even
 			// though it's the same as javaCharset
 			// because there are clients that rely on this property to
 			// determine if the charset is actually detected in file or not.
 			description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
 		}
 	}

 	/**
 	 * @param description
 	 * @return
 	 */
 	private boolean isRelevent(IContentDescription description) {
 		boolean result = false;
 		if (description == null)
 			result = false;
 		else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
 			result = true;
 		else if (description.isRequested(IContentDescription.CHARSET))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
 			result = true;
 		// else if
 		// (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO))
 		// result = true;
 		return result;
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2004 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	*******************************************************************************/
	package org.eclipse.wst.html.core.internal.contenttype;

	import java.io.IOException;
	import java.io.InputStream;
	import java.io.Reader;

	import org.eclipse.core.runtime.QualifiedName;
	import org.eclipse.core.runtime.content.IContentDescriber;
	import org.eclipse.core.runtime.content.IContentDescription;
	import org.eclipse.core.runtime.content.ITextContentDescriber;
	import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
	import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
	import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


	public final class ContentDescriberForHTML implements ITextContentDescriber {

	final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
	private IResourceCharsetDetector resourceCharsetDetector;

	public int describe(InputStream contents, IContentDescription description) throws IOException {
	int result = IContentDescriber.INDETERMINATE;

	if (description == null) {
	result = computeValidity(contents);
	}
	else {
	calculateSupportedOptions(contents, description);
	// assummming we should return same 'validity' value we did
	// when called before. (technically, could be a performance issue
	// in future, so might want to check if any 'ol value would
	// be ok here.
	result = computeValidity(contents);
	}

	return result;
	}

	public int describe(Reader contents, IContentDescription description) throws IOException {
	int result = IContentDescriber.INDETERMINATE;

	if (description == null) {
	result = computeValidity(contents);
	}
	else {
	calculateSupportedOptions(contents, description);
	// assummming we should return same 'validity' value we did
	// when called before. (technically, could be a performance issue
	// in future, so might want to check if hard coded 'valid' would
	// be ok here.
	result = computeValidity(contents);
	}

	return result;
	}

	public QualifiedName[] getSupportedOptions() {

	return SUPPORTED_OPTIONS;
	}

	private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
	if (isRelevent(description)) {
	IResourceCharsetDetector detector = getDetector();
	detector.set(contents);
	handleCalculations(description, detector);
	}
	}

	/**
	* @param contents
	* @param description
	* @throws IOException
	*/
	private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
	if (isRelevent(description)) {
	IResourceCharsetDetector detector = getDetector();
	detector.set(contents);
	handleCalculations(description, detector);
	}
	}

	private int computeValidity(InputStream inputStream) {
	// currently no contents specific check for valid HTML contents
	// (this may change once we add XHTML content type)
	return IContentDescriber.INDETERMINATE;
	}

	private int computeValidity(Reader reader) {
	// currently no contents specific check for valid HTML contents
	// (this may change once we add XHTML content type)
	return IContentDescriber.INDETERMINATE;
	}

	private IResourceCharsetDetector getDetector() {
	if (resourceCharsetDetector == null) {
	resourceCharsetDetector = new HTMLResourceEncodingDetector();
	}
	return resourceCharsetDetector;
	}

	/**
	* @param description
	* @param detector
	* @throws IOException
	*/
	private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
	// note: if we're asked for one, we set them all. I need to be sure if
	// called
	// mulitiple times (one for each, say) that we don't waste time
	// processing same
	// content again.
	EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento();
	// TODO: I need to verify to see if this BOM work is always done
	// by text type.
	Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
	if (detectedByteOrderMark != null) {
	Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
	// not sure why would ever be different, so if is different, may
	// need to "push" up into base.
	if (!detectedByteOrderMark.equals(existingByteOrderMark))
	description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
	}


	if (!encodingMemento.isValid()) {
	// note: after setting here, its the mere presence of
	// IContentDescriptionExtended.UNSUPPORTED_CHARSET
	// in the resource's description that can be used to determine if
	// invalid
	// in those cases, the "detected" property contains an
	// "appropriate default" to use.
	description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
	description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
	}

	Object detectedCharset = encodingMemento.getDetectedCharsetName();
	Object javaCharset = encodingMemento.getJavaCharsetName();

	// we always include detected, if its different than java
	handleDetectedSpecialCase(description, detectedCharset, javaCharset);

	if (javaCharset != null) {
	Object existingCharset = description.getProperty(IContentDescription.CHARSET);
	if (javaCharset.equals(existingCharset)) {
	handleDetectedSpecialCase(description, detectedCharset, javaCharset);
	}
	else {
	// we may need to add what we found, but only need to add
	// if different from default.the
	Object defaultCharset = getDetector().getSpecDefaultEncoding();
	if (defaultCharset != null) {
	if (!defaultCharset.equals(javaCharset)) {
	description.setProperty(IContentDescription.CHARSET, javaCharset);
	}
	}
	else {
	// assuming if there is no spec default, we always need to
	// add, I'm assuming
	description.setProperty(IContentDescription.CHARSET, javaCharset);
	}
	}
	}

	// avoid adding anything if not absolutly needed, since always
	// "cached" per session
	// description.setProperty(IContentDescriptionExtended.ENCODING_MEMENTO,
	// encodingMemento);
	}

	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
	// since equal, we don't need to add, but if our detected version is
	// different than
	// javaCharset, then we should add it. This will happen, for example,
	// if there's
	// differences in case, or differences due to override properties
	if (detectedCharset != null) {
	// if (!detectedCharset.equals(javaCharset)) {
	// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
	// detectedCharset);
	// }

	// Once we detected a charset, we should set the property even
	// though it's the same as javaCharset
	// because there are clients that rely on this property to
	// determine if the charset is actually detected in file or not.
	description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
	}
	}

	/**
	* @param description
	* @return
	*/
	private boolean isRelevent(IContentDescription description) {
	boolean result = false;
	if (description == null)
	result = false;
	else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
	result = true;
	else if (description.isRequested(IContentDescription.CHARSET))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
	result = true;
	// else if
	// (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO))
	// result = true;
	return result;
	}

	}