bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java - sourceediting/webtools.sourceediting - Git at Google

 /*******************************************************************************
  * Copyright (c) 2001, 2006 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *     Jens Lukowski/Innoopract - initial renaming/restructuring
  *
  *******************************************************************************/
 package org.eclipse.wst.xml.core.internal.contenttype;

 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;

 import org.eclipse.core.runtime.QualifiedName;
 import org.eclipse.core.runtime.content.IContentDescriber;
 import org.eclipse.core.runtime.content.IContentDescription;
 import org.eclipse.core.runtime.content.ITextContentDescriber;
 import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
 import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
 import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


 public final class ContentDescriberForXML implements ITextContentDescriber {
 	private final static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
 	/**
 	 * <code>restrictedMode</code> is used just for testing/experiments.
 	 *
 	 * If in restrictedMode, our "custom" contentType is seen as valid only in
 	 * cases that the platform's standard one does not cover.
 	 */
 	private final static boolean restrictedMode = true;
 	private IResourceCharsetDetector getDetector() {
 			return new XMLResourceEncodingDetector();
 	}

 	/*
 	 * (non-Javadoc)
 	 *
 	 * @see org.eclipse.core.runtime.content.IContentDescriber#describe(java.io.InputStream,
 	 *      org.eclipse.core.runtime.content.IContentDescription)
 	 */
 	public int describe(InputStream contents, IContentDescription description) throws IOException {
 		// for this special case, always assume invalid, unless
 		// our special circumstances are met.
 		int result = IContentDescriber.INVALID;

 		if (description == null) {
 			// purely request for validty
 			result = determineValidity(result, contents);
 		}
 		else {
 			result = calculateSupportedOptions(result, contents, description);
 		}
 		return result;
 	}

 	/*
 	 * (non-Javadoc)
 	 *
 	 * @see org.eclipse.core.runtime.content.ITextContentDescriber#describe(java.io.Reader,
 	 *      org.eclipse.core.runtime.content.IContentDescription)
 	 */
 	public int describe(Reader contents, IContentDescription description) throws IOException {
 		// for this special case, always assume invalid, unless
 		// our special circumstances are met.
 		int result = IContentDescriber.INVALID;

 		if (description == null) {
 			// purely request for validty
 			result = determineValidity(result, contents);
 		}
 		else {
 			result = calculateSupportedOptions(result, contents, description);
 		}
 		return result;
 	}

 	/*
 	 * (non-Javadoc)
 	 *
 	 * @see org.eclipse.core.runtime.content.IContentDescriber#getSupportedOptions()
 	 */
 	public QualifiedName[] getSupportedOptions() {

 		return SUPPORTED_OPTIONS;
 	}

 	private int calculateSupportedOptions(int result, InputStream contents, IContentDescription description) throws IOException {
 		int returnResult = result;
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			contents.reset();
 			detector.set(contents);
 			returnResult = handleCalculations(result, description, detector);
 		}
 		return returnResult;
 	}

 	private int determineValidity(int result, InputStream contents) throws IOException {
 		int returnResult = result;
 		IResourceCharsetDetector detector = getDetector();
 		contents.reset();
 		detector.set(contents);
 		returnResult = determineValidity(detector, returnResult);
 		return returnResult;
 	}
 	private int determineValidity(int result, Reader contents) throws IOException {
 		int returnResult = result;
 		IResourceCharsetDetector detector = getDetector();
 		contents.reset();
 		detector.set(contents);
 		returnResult = determineValidity(detector, returnResult);
 		return returnResult;
 	}
 	/**
 	 * @param contents
 	 * @param description
 	 * @throws IOException
 	 */
 	private int calculateSupportedOptions(int result, Reader contents, IContentDescription description) throws IOException {
 		int returnResult = result;
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			detector.set(contents);
 			returnResult = handleCalculations(result, description, detector);
 		}
 		return returnResult;
 	}

 	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
 		// since equal, we don't need to add, but if our detected version is
 		// different than
 		// javaCharset, then we should add it. This will happen, for example,
 		// if there's
 		// differences in case, or differences due to override properties
 		if (detectedCharset != null) {
 			// if (!detectedCharset.equals(javaCharset)) {
 			// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
 			// detectedCharset);
 			// }

 			// Once we detected a charset, we should set the property even
 			// though it's the same as javaCharset
 			// because there are clients that rely on this property to
 			// determine if the charset is actually detected in file or not.
 			description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
 		}
 	}

 	/**
 	 * @param description
 	 * @return
 	 */
 	private boolean isRelevent(IContentDescription description) {
 		boolean result = false;
 		if (description == null)
 			result = false;
 		else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
 			result = true;
 		else if (description.isRequested(IContentDescription.CHARSET))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
 			result = true;
 		else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
 			result = true;
 		return result;
 	}

 	/**
 	 * @param description
 	 * @param detector
 	 * @throws IOException
 	 */
 	private int handleCalculations(int result, IContentDescription description, IResourceCharsetDetector detector) throws IOException {
 		int returnResult = result;
 		EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
 		if (description != null) {
 			// TODO: I need to verify to see if this BOM work is always done
 			// by text type.
 			Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
 			if (detectedByteOrderMark != null) {
 				Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
 				// not sure why would ever be different, so if is different,
 				// may
 				// need to "push" up into base.
 				if (!detectedByteOrderMark.equals(existingByteOrderMark))
 					description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
 			}


 			if (!encodingMemento.isValid()) {
 				// note: after setting here, its the mere presence of
 				// IContentDescriptionExtended.UNSUPPORTED_CHARSET
 				// in the resource's description that can be used to determine
 				// if invalid in those cases, the "detected" property contains
 				// an "appropriate default" to use.
 				description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
 				description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
 			}

 			Object detectedCharset = encodingMemento.getDetectedCharsetName();
 			Object javaCharset = encodingMemento.getJavaCharsetName();

 			// we always include detected, if its different than java
 			handleDetectedSpecialCase(description, detectedCharset, javaCharset);

 			if (javaCharset != null) {
 				Object existingCharset = description.getProperty(IContentDescription.CHARSET);
 				if (javaCharset.equals(existingCharset)) {
 					handleDetectedSpecialCase(description, detectedCharset, javaCharset);
 				}
 				else {
 					// we may need to add what we found, but only need to add
 					// if different from the default
 					Object defaultCharset = detector.getSpecDefaultEncoding();
 					if (defaultCharset != null) {
 						if (!defaultCharset.equals(javaCharset)) {
 							description.setProperty(IContentDescription.CHARSET, javaCharset);
 						}
 					}
 					else {
 						// assuming if there is no spec default, we always
 						// need to add.
 						// TODO: this is probably a dead branch in current
 						// code, should re-examine for removal.
 						description.setProperty(IContentDescription.CHARSET, javaCharset);
 					}
 				}
 			}
 		}

 		returnResult = determineValidity(detector, returnResult);
 		return returnResult;
 	}

 	private int determineValidity(IResourceCharsetDetector detector, int returnResult) {
 		// we always expect XMLResourceEncodingDetector, but just to make safe
 		// cast.
 		if (detector instanceof XMLResourceEncodingDetector) {
 			XMLResourceEncodingDetector xmlResourceDetector = (XMLResourceEncodingDetector) detector;
 			if (xmlResourceDetector.isDeclDetected()) {
 				if (restrictedMode) {
 					// if there is no initial whitespace, then platform's
 					// default one will do.
 					if (xmlResourceDetector.hasInitialWhiteSpace()) {
 						returnResult = IContentDescriber.VALID;
 					}
 				}
 				else {
 					returnResult = IContentDescriber.VALID;
 				}
 			}
 		}
 		return returnResult;
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2001, 2006 IBM Corporation and others.
	* All rights reserved. This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License v1.0
	* which accompanies this distribution, and is available at
	* http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors:
	* IBM Corporation - initial API and implementation
	* Jens Lukowski/Innoopract - initial renaming/restructuring
	*
	*******************************************************************************/
	package org.eclipse.wst.xml.core.internal.contenttype;

	import java.io.IOException;
	import java.io.InputStream;
	import java.io.Reader;

	import org.eclipse.core.runtime.QualifiedName;
	import org.eclipse.core.runtime.content.IContentDescriber;
	import org.eclipse.core.runtime.content.IContentDescription;
	import org.eclipse.core.runtime.content.ITextContentDescriber;
	import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
	import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
	import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


	public final class ContentDescriberForXML implements ITextContentDescriber {
	private final static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
	/**
	* <code>restrictedMode</code> is used just for testing/experiments.
	*
	* If in restrictedMode, our "custom" contentType is seen as valid only in
	* cases that the platform's standard one does not cover.
	*/
	private final static boolean restrictedMode = true;
	private IResourceCharsetDetector getDetector() {
	return new XMLResourceEncodingDetector();
	}

	/*
	* (non-Javadoc)
	*
	* @see org.eclipse.core.runtime.content.IContentDescriber#describe(java.io.InputStream,
	* org.eclipse.core.runtime.content.IContentDescription)
	*/
	public int describe(InputStream contents, IContentDescription description) throws IOException {
	// for this special case, always assume invalid, unless
	// our special circumstances are met.
	int result = IContentDescriber.INVALID;

	if (description == null) {
	// purely request for validty
	result = determineValidity(result, contents);
	}
	else {
	result = calculateSupportedOptions(result, contents, description);
	}
	return result;
	}

	/*
	* (non-Javadoc)
	*
	* @see org.eclipse.core.runtime.content.ITextContentDescriber#describe(java.io.Reader,
	* org.eclipse.core.runtime.content.IContentDescription)
	*/
	public int describe(Reader contents, IContentDescription description) throws IOException {
	// for this special case, always assume invalid, unless
	// our special circumstances are met.
	int result = IContentDescriber.INVALID;

	if (description == null) {
	// purely request for validty
	result = determineValidity(result, contents);
	}
	else {
	result = calculateSupportedOptions(result, contents, description);
	}
	return result;
	}

	/*
	* (non-Javadoc)
	*
	* @see org.eclipse.core.runtime.content.IContentDescriber#getSupportedOptions()
	*/
	public QualifiedName[] getSupportedOptions() {

	return SUPPORTED_OPTIONS;
	}

	private int calculateSupportedOptions(int result, InputStream contents, IContentDescription description) throws IOException {
	int returnResult = result;
	if (isRelevent(description)) {
	IResourceCharsetDetector detector = getDetector();
	contents.reset();
	detector.set(contents);
	returnResult = handleCalculations(result, description, detector);
	}
	return returnResult;
	}

	private int determineValidity(int result, InputStream contents) throws IOException {
	int returnResult = result;
	IResourceCharsetDetector detector = getDetector();
	contents.reset();
	detector.set(contents);
	returnResult = determineValidity(detector, returnResult);
	return returnResult;
	}
	private int determineValidity(int result, Reader contents) throws IOException {
	int returnResult = result;
	IResourceCharsetDetector detector = getDetector();
	contents.reset();
	detector.set(contents);
	returnResult = determineValidity(detector, returnResult);
	return returnResult;
	}
	/**
	* @param contents
	* @param description
	* @throws IOException
	*/
	private int calculateSupportedOptions(int result, Reader contents, IContentDescription description) throws IOException {
	int returnResult = result;
	if (isRelevent(description)) {
	IResourceCharsetDetector detector = getDetector();
	detector.set(contents);
	returnResult = handleCalculations(result, description, detector);
	}
	return returnResult;
	}

	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
	// since equal, we don't need to add, but if our detected version is
	// different than
	// javaCharset, then we should add it. This will happen, for example,
	// if there's
	// differences in case, or differences due to override properties
	if (detectedCharset != null) {
	// if (!detectedCharset.equals(javaCharset)) {
	// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
	// detectedCharset);
	// }

	// Once we detected a charset, we should set the property even
	// though it's the same as javaCharset
	// because there are clients that rely on this property to
	// determine if the charset is actually detected in file or not.
	description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
	}
	}

	/**
	* @param description
	* @return
	*/
	private boolean isRelevent(IContentDescription description) {
	boolean result = false;
	if (description == null)
	result = false;
	else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
	result = true;
	else if (description.isRequested(IContentDescription.CHARSET))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
	result = true;
	else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
	result = true;
	return result;
	}

	/**
	* @param description
	* @param detector
	* @throws IOException
	*/
	private int handleCalculations(int result, IContentDescription description, IResourceCharsetDetector detector) throws IOException {
	int returnResult = result;
	EncodingMemento encodingMemento = ((XMLResourceEncodingDetector) detector).getEncodingMemento();
	if (description != null) {
	// TODO: I need to verify to see if this BOM work is always done
	// by text type.
	Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
	if (detectedByteOrderMark != null) {
	Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
	// not sure why would ever be different, so if is different,
	// may
	// need to "push" up into base.
	if (!detectedByteOrderMark.equals(existingByteOrderMark))
	description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
	}


	if (!encodingMemento.isValid()) {
	// note: after setting here, its the mere presence of
	// IContentDescriptionExtended.UNSUPPORTED_CHARSET
	// in the resource's description that can be used to determine
	// if invalid in those cases, the "detected" property contains
	// an "appropriate default" to use.
	description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
	description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
	}

	Object detectedCharset = encodingMemento.getDetectedCharsetName();
	Object javaCharset = encodingMemento.getJavaCharsetName();

	// we always include detected, if its different than java
	handleDetectedSpecialCase(description, detectedCharset, javaCharset);

	if (javaCharset != null) {
	Object existingCharset = description.getProperty(IContentDescription.CHARSET);
	if (javaCharset.equals(existingCharset)) {
	handleDetectedSpecialCase(description, detectedCharset, javaCharset);
	}
	else {
	// we may need to add what we found, but only need to add
	// if different from the default
	Object defaultCharset = detector.getSpecDefaultEncoding();
	if (defaultCharset != null) {
	if (!defaultCharset.equals(javaCharset)) {
	description.setProperty(IContentDescription.CHARSET, javaCharset);
	}
	}
	else {
	// assuming if there is no spec default, we always
	// need to add.
	// TODO: this is probably a dead branch in current
	// code, should re-examine for removal.
	description.setProperty(IContentDescription.CHARSET, javaCharset);
	}
	}
	}
	}

	returnResult = determineValidity(detector, returnResult);
	return returnResult;
	}

	private int determineValidity(IResourceCharsetDetector detector, int returnResult) {
	// we always expect XMLResourceEncodingDetector, but just to make safe
	// cast.
	if (detector instanceof XMLResourceEncodingDetector) {
	XMLResourceEncodingDetector xmlResourceDetector = (XMLResourceEncodingDetector) detector;
	if (xmlResourceDetector.isDeclDetected()) {
	if (restrictedMode) {
	// if there is no initial whitespace, then platform's
	// default one will do.
	if (xmlResourceDetector.hasInitialWhiteSpace()) {
	returnResult = IContentDescriber.VALID;
	}
	}
	else {
	returnResult = IContentDescriber.VALID;
	}
	}
	}
	return returnResult;
	}

	}