| /******************************************************************************* |
| * Copyright (c) 2004, 2006 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.wst.html.core.internal.contenttype; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.Reader; |
| |
| import org.eclipse.core.runtime.QualifiedName; |
| import org.eclipse.core.runtime.content.IContentDescriber; |
| import org.eclipse.core.runtime.content.IContentDescription; |
| import org.eclipse.core.runtime.content.ITextContentDescriber; |
| import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento; |
| import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended; |
| import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; |
| |
| /** |
| * |
| * ContentDescriberForHTML |
| * |
| * A few design principles to remember with content describers: |
| * <ul> |
| * <li>Remember not to store values/data in the descriptions array of properties, |
| * especially not large objects! and even no value that is already the default value, |
| * since those description properties are cached per session, so can add up in memory. |
| * <li>Remember that a ContentDescriber instance becomes a "root object" in the |
| * ContentDescriberManager (that is, always in memory, never GC'd), so it should |
| * not have any instance or state data since it would always become stale and |
| * "hold on" to objects unneccessarily. |
| * </ul> |
| */ |
| |
| public final class ContentDescriberForHTML implements ITextContentDescriber { |
| |
| final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT}; |
| |
| public int describe(InputStream contents, IContentDescription description) throws IOException { |
| int result = IContentDescriber.INDETERMINATE; |
| |
| if (description == null) { |
| result = computeValidity(contents); |
| } |
| else { |
| calculateSupportedOptions(contents, description); |
| // assummming we should return same 'validity' value we did |
| // when called before. (technically, could be a performance issue |
| // in future, so might want to check if any 'ol value would |
| // be ok here. |
| result = computeValidity(contents); |
| } |
| |
| return result; |
| } |
| |
| public int describe(Reader contents, IContentDescription description) throws IOException { |
| int result = IContentDescriber.INDETERMINATE; |
| |
| if (description == null) { |
| result = computeValidity(contents); |
| } |
| else { |
| calculateSupportedOptions(contents, description); |
| // assummming we should return same 'validity' value we did |
| // when called before. (technically, could be a performance issue |
| // in future, so might want to check if hard coded 'valid' would |
| // be ok here. |
| result = computeValidity(contents); |
| } |
| |
| return result; |
| } |
| |
| public QualifiedName[] getSupportedOptions() { |
| |
| return SUPPORTED_OPTIONS; |
| } |
| |
| private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException { |
| if (isRelevent(description)) { |
| IResourceCharsetDetector detector = getDetector(); |
| detector.set(contents); |
| handleCalculations(description, detector); |
| } |
| } |
| |
| /** |
| * @param contents |
| * @param description |
| * @throws IOException |
| */ |
| private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException { |
| if (isRelevent(description)) { |
| IResourceCharsetDetector detector = getDetector(); |
| detector.set(contents); |
| handleCalculations(description, detector); |
| } |
| } |
| |
| private int computeValidity(InputStream inputStream) { |
| // currently no contents specific check for valid HTML contents |
| // (this may change once we add XHTML content type) |
| return IContentDescriber.INDETERMINATE; |
| } |
| |
| private int computeValidity(Reader reader) { |
| // currently no contents specific check for valid HTML contents |
| // (this may change once we add XHTML content type) |
| return IContentDescriber.INDETERMINATE; |
| } |
| |
| private IResourceCharsetDetector getDetector() { |
| |
| return new HTMLResourceEncodingDetector(); |
| |
| } |
| |
| /** |
| * @param description |
| * @param detector |
| * @throws IOException |
| */ |
| private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException { |
| |
| EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento(); |
| // TODO: I need to verify to see if this BOM work is always done |
| // by text type. |
| Object detectedByteOrderMark = encodingMemento.getUnicodeBOM(); |
| if (detectedByteOrderMark != null) { |
| Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK); |
| // not sure why would ever be different, so if is different, may |
| // need to "push" up into base. |
| if (!detectedByteOrderMark.equals(existingByteOrderMark)) |
| description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark); |
| } |
| |
| |
| if (!encodingMemento.isValid()) { |
| /* |
| * note: after setting here, its the mere presence of |
| * IContentDescriptionExtended.UNSUPPORTED_CHARSET in the |
| * resource's description that can be used to determine if invalid |
| * in those cases, the "detected" property contains an |
| * "appropriate default" to use. |
| */ |
| description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding()); |
| description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault()); |
| } |
| |
| Object detectedCharset = encodingMemento.getDetectedCharsetName(); |
| Object javaCharset = encodingMemento.getJavaCharsetName(); |
| |
| // we always include detected, if its different than java |
| handleDetectedSpecialCase(description, detectedCharset, javaCharset); |
| |
| if (javaCharset != null) { |
| Object existingCharset = description.getProperty(IContentDescription.CHARSET); |
| if (javaCharset.equals(existingCharset)) { |
| handleDetectedSpecialCase(description, detectedCharset, javaCharset); |
| } |
| else { |
| // we may need to add what we found, but only need to add |
| // if different from default.the |
| Object defaultCharset = detector.getSpecDefaultEncoding(); |
| if (defaultCharset != null) { |
| if (!defaultCharset.equals(javaCharset)) { |
| description.setProperty(IContentDescription.CHARSET, javaCharset); |
| } |
| } |
| else { |
| // assuming if there is no spec default, we always need to |
| // add, I'm assuming |
| description.setProperty(IContentDescription.CHARSET, javaCharset); |
| } |
| } |
| } |
| |
| } |
| |
| private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) { |
| // since equal, we don't need to add, but if our detected version is |
| // different than |
| // javaCharset, then we should add it. This will happen, for example, |
| // if there's |
| // differences in case, or differences due to override properties |
| if (detectedCharset != null) { |
| // if (!detectedCharset.equals(javaCharset)) { |
| // description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, |
| // detectedCharset); |
| // } |
| |
| // Once we detected a charset, we should set the property even |
| // though it's the same as javaCharset |
| // because there are clients that rely on this property to |
| // determine if the charset is actually detected in file or not. |
| description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset); |
| } |
| } |
| |
| /** |
| * @param description |
| * @return |
| */ |
| private boolean isRelevent(IContentDescription description) { |
| boolean result = false; |
| if (description == null) |
| result = false; |
| else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK)) |
| result = true; |
| else if (description.isRequested(IContentDescription.CHARSET)) |
| result = true; |
| else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT)) |
| result = true; |
| else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET)) |
| result = true; |
| else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET)) |
| result = true; |
| // else if |
| // (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO)) |
| // result = true; |
| return result; |
| } |
| |
| } |