blob: 376d2f87931be0205a3a8bb4138b3e5ac32d9429 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.wst.html.core.internal.contenttype;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import org.eclipse.core.runtime.QualifiedName;
import org.eclipse.core.runtime.content.IContentDescriber;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.ITextContentDescriber;
import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
/**
*
* ContentDescriberForHTML
*
* A few design principles to remember with content describers:
* <ul>
* <li>Remember not to store values/data in the descriptions array of properties,
* especially not large objects! and even no value that is already the default value,
* since those description properties are cached per session, so can add up in memory.
* <li>Remember that a ContentDescriber instance becomes a "root object" in the
* ContentDescriberManager (that is, always in memory, never GC'd), so it should
* not have any instance or state data since it would always become stale and
* "hold on" to objects unneccessarily.
* </ul>
*/
public final class ContentDescriberForHTML implements ITextContentDescriber {
final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
public int describe(InputStream contents, IContentDescription description) throws IOException {
int result = IContentDescriber.INDETERMINATE;
if (description == null) {
result = computeValidity(contents);
}
else {
calculateSupportedOptions(contents, description);
// assummming we should return same 'validity' value we did
// when called before. (technically, could be a performance issue
// in future, so might want to check if any 'ol value would
// be ok here.
result = computeValidity(contents);
}
return result;
}
public int describe(Reader contents, IContentDescription description) throws IOException {
int result = IContentDescriber.INDETERMINATE;
if (description == null) {
result = computeValidity(contents);
}
else {
calculateSupportedOptions(contents, description);
// assummming we should return same 'validity' value we did
// when called before. (technically, could be a performance issue
// in future, so might want to check if hard coded 'valid' would
// be ok here.
result = computeValidity(contents);
}
return result;
}
public QualifiedName[] getSupportedOptions() {
return SUPPORTED_OPTIONS;
}
private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
if (isRelevent(description)) {
IResourceCharsetDetector detector = getDetector();
detector.set(contents);
handleCalculations(description, detector);
}
}
/**
* @param contents
* @param description
* @throws IOException
*/
private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
if (isRelevent(description)) {
IResourceCharsetDetector detector = getDetector();
detector.set(contents);
handleCalculations(description, detector);
}
}
private int computeValidity(InputStream inputStream) {
// currently no contents specific check for valid HTML contents
// (this may change once we add XHTML content type)
return IContentDescriber.INDETERMINATE;
}
private int computeValidity(Reader reader) {
// currently no contents specific check for valid HTML contents
// (this may change once we add XHTML content type)
return IContentDescriber.INDETERMINATE;
}
private IResourceCharsetDetector getDetector() {
return new HTMLResourceEncodingDetector();
}
/**
* @param description
* @param detector
* @throws IOException
*/
private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento();
// TODO: I need to verify to see if this BOM work is always done
// by text type.
Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
if (detectedByteOrderMark != null) {
Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
// not sure why would ever be different, so if is different, may
// need to "push" up into base.
if (!detectedByteOrderMark.equals(existingByteOrderMark))
description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
}
if (!encodingMemento.isValid()) {
/*
* note: after setting here, its the mere presence of
* IContentDescriptionExtended.UNSUPPORTED_CHARSET in the
* resource's description that can be used to determine if invalid
* in those cases, the "detected" property contains an
* "appropriate default" to use.
*/
description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
}
Object detectedCharset = encodingMemento.getDetectedCharsetName();
Object javaCharset = encodingMemento.getJavaCharsetName();
// we always include detected, if its different than java
handleDetectedSpecialCase(description, detectedCharset, javaCharset);
if (javaCharset != null) {
Object existingCharset = description.getProperty(IContentDescription.CHARSET);
if (javaCharset.equals(existingCharset)) {
handleDetectedSpecialCase(description, detectedCharset, javaCharset);
}
else {
// we may need to add what we found, but only need to add
// if different from default.the
Object defaultCharset = detector.getSpecDefaultEncoding();
if (defaultCharset != null) {
if (!defaultCharset.equals(javaCharset)) {
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
}
else {
// assuming if there is no spec default, we always need to
// add, I'm assuming
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
}
}
}
private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
// since equal, we don't need to add, but if our detected version is
// different than
// javaCharset, then we should add it. This will happen, for example,
// if there's
// differences in case, or differences due to override properties
if (detectedCharset != null) {
// if (!detectedCharset.equals(javaCharset)) {
// description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
// detectedCharset);
// }
// Once we detected a charset, we should set the property even
// though it's the same as javaCharset
// because there are clients that rely on this property to
// determine if the charset is actually detected in file or not.
description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
}
}
/**
* @param description
* @return
*/
private boolean isRelevent(IContentDescription description) {
boolean result = false;
if (description == null)
result = false;
else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
result = true;
else if (description.isRequested(IContentDescription.CHARSET))
result = true;
else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
result = true;
else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
result = true;
else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
result = true;
// else if
// (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO))
// result = true;
return result;
}
}