blob: 3b73e4edeb2a8da03460ce2fc0cf3731861f2c26 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2004, 2009 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* OBEO - Forked for eclipse 3.4 compatibility
*******************************************************************************/
// CHECKSTYLE:OFF
package org.eclipse.acceleo.model.mtl.resource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import org.eclipse.core.internal.content.TextContentDescriber;
import org.eclipse.core.runtime.QualifiedName;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.ITextContentDescriber;
/**
* A content describer for XML files. This class provides basis for XML-based content describers.
* <p>
* The document is detected by the describer as <code>VALID</code>, if it contains an xml declaration with
* <code>&lt;?xml</code> prefix and the encoding in the declaration is correct.
* </p>
* Below are sample declarations recognized by the describer as <code>VALID</code>
* <ul>
* <li>&lt;?xml version="1.0"?&gt;</li>
* <li>&lt;?xml version="1.0"</li>
* <li>&lt;?xml version="1.0" encoding="utf-16"?&gt;</li>
* <li>&lt;?xml version="1.0" encoding="utf-16?&gt;</li>
* </ul>
*
* @noinstantiate This class is not intended to be instantiated by clients. Clients should use it to provide
* their own XML-based describers that can be referenced by the "describer" configuration
* element in extensions to the <code>org.eclipse.core.runtime.contentTypes</code> extension
* point.
* @see org.eclipse.core.runtime.content.IContentDescriber
* @see org.eclipse.core.runtime.content.XMLRootElementContentDescriber2
* @see "http://www.w3.org/TR/REC-xml *"
* @since 3.1
*/
public class XMLContentDescriber extends TextContentDescriber implements ITextContentDescriber {
private static final QualifiedName[] SUPPORTED_OPTIONS = new QualifiedName[] {
IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK };
private static final String XML_PREFIX = "<?xml "; //$NON-NLS-1$
private static final String XML_DECL_END = "?>"; //$NON-NLS-1$
private static final String BOM = "org.eclipse.core.runtime.content.XMLContentDescriber.bom"; //$NON-NLS-1$
private static final String CHARSET = "org.eclipse.core.runtime.content.XMLContentDescriber.charset"; //$NON-NLS-1$
private static final String FULL_XML_DECL = "org.eclipse.core.runtime.content.XMLContentDescriber.fullXMLDecl"; //$NON-NLS-1$
private static final String RESULT = "org.eclipse.core.runtime.content.XMLContentDescriber.processed"; //$NON-NLS-1$
@Override
public int describe(InputStream input, IContentDescription description) throws IOException {
return describe2(input, description, new HashMap());
}
int describe2(InputStream input, IContentDescription description, Map properties) throws IOException {
if (!isProcessed(properties)) {
fillContentProperties(input, description, properties);
}
return internalDescribe(description, properties);
}
@Override
public int describe(Reader input, IContentDescription description) throws IOException {
return describe2(input, description, new HashMap());
}
int describe2(Reader input, IContentDescription description, Map properties) throws IOException {
if (!isProcessed(properties)) {
fillContentProperties(readXMLDecl(input), description, properties);
}
return internalDescribe(description, properties);
}
private boolean isProcessed(Map properties) {
Boolean result = (Boolean)properties.get(RESULT);
if (result != null) {
return true;
}
return false;
}
private void fillContentProperties(InputStream input, IContentDescription description, Map properties)
throws IOException {
byte[] bom = getByteOrderMark(input);
String xmlDeclEncoding = "UTF-8"; //$NON-NLS-1$
input.reset();
if (bom != null) {
if (bom == IContentDescription.BOM_UTF_16BE) {
xmlDeclEncoding = "UTF-16BE"; //$NON-NLS-1$
} else if (bom == IContentDescription.BOM_UTF_16LE) {
xmlDeclEncoding = "UTF-16LE"; //$NON-NLS-1$
}
// skip BOM to make comparison simpler
input.skip(bom.length);
properties.put(BOM, bom);
}
fillContentProperties(readXMLDecl(input, xmlDeclEncoding), description, properties);
}
/*
* Reads bom from the stream. Note that the stream will not be repositioned when the method returns.
*/
public byte[] getByteOrderMark(InputStream input) throws IOException {
int first = input.read();
if (first == 0xEF) {
// look for the UTF-8 Byte Order Mark (BOM)
int second = input.read();
int third = input.read();
if (second == 0xBB && third == 0xBF) {
return IContentDescription.BOM_UTF_8;
}
} else if (first == 0xFE) {
// look for the UTF-16 BOM
if (input.read() == 0xFF) {
return IContentDescription.BOM_UTF_16BE;
}
} else if (first == 0xFF) {
if (input.read() == 0xFE) {
return IContentDescription.BOM_UTF_16LE;
}
}
return null;
}
private void fillContentProperties(String line, IContentDescription description, Map properties)
throws IOException {
// XMLDecl should be the first string (no blanks allowed)
if (line != null && line.startsWith(XML_PREFIX)) {
properties.put(FULL_XML_DECL, new Boolean(true));
}
String charset = getCharset(line);
if (charset != null) {
properties.put(CHARSET, charset);
}
properties.put(RESULT, new Boolean(true));
}
private int internalDescribe(IContentDescription description, Map properties) {
if (description != null) {
byte[] bom = (byte[])properties.get(BOM);
if (bom != null && description.isRequested(IContentDescription.BYTE_ORDER_MARK)) {
description.setProperty(IContentDescription.BYTE_ORDER_MARK, bom);
}
}
Boolean fullXMLDecl = (Boolean)properties.get(FULL_XML_DECL);
if (fullXMLDecl == null || !fullXMLDecl.booleanValue()) {
return INDETERMINATE;
}
if (description == null) {
return VALID;
}
String charset = (String)properties.get(CHARSET);
if (description.isRequested(IContentDescription.CHARSET)) {
if (charset != null && !isCharsetValid(charset)) {
return INVALID;
}
if (isNonDefaultCharset(charset)) {
description.setProperty(IContentDescription.CHARSET, charset);
}
}
return VALID;
}
private boolean isNonDefaultCharset(String charset) {
if (charset == null) {
return false;
}
if (charset.equalsIgnoreCase("utf8") || charset.equalsIgnoreCase("utf-8")) { //$NON-NLS-1$ //$NON-NLS-2$
return false;
}
return true;
}
private boolean isFullXMLDecl(String xmlDecl) {
return xmlDecl.endsWith(XML_DECL_END);
}
private String readXMLDecl(InputStream input, String encoding) throws IOException {
byte[] xmlDeclEndBytes = XML_DECL_END.getBytes(encoding);
// allocate an array for the input
int xmlDeclSize = 100 * xmlDeclEndBytes.length / 2;
byte[] xmlDecl = new byte[xmlDeclSize];
// looks for XMLDecl end (?>)
int c = 0;
int read = 0;
// count is incremented when subsequent read characters match the xmlDeclEnd bytes,
// the end of xmlDecl is reached, when count equals the xmlDeclEnd length
int count = 0;
while (read < xmlDecl.length && (c = input.read()) != -1) {
if (c == xmlDeclEndBytes[count]) {
count++;
} else {
count = 0;
}
xmlDecl[read++] = (byte)c;
if (count == xmlDeclEndBytes.length) {
break;
}
}
return new String(xmlDecl, 0, read, encoding);
}
private String readXMLDecl(Reader input) throws IOException {
BufferedReader reader = new BufferedReader(input);
String xmlDecl = new String();
String line = null;
while (xmlDecl.length() < 100 && ((line = reader.readLine()) != null)) {
xmlDecl = xmlDecl + line;
if (line.indexOf(XML_DECL_END) != -1) {
return xmlDecl.substring(0, xmlDecl.indexOf(XML_DECL_END) + XML_DECL_END.length());
}
}
return xmlDecl;
}
private String getCharset(String firstLine) {
int encodingPos = findEncodingPosition(firstLine);
if (encodingPos == -1) {
return null;
}
char quoteChar = '"';
int firstQuote = firstLine.indexOf(quoteChar, encodingPos);
if (firstQuote == -1) {
quoteChar = '\'';
firstQuote = firstLine.indexOf(quoteChar, encodingPos);
}
if (firstQuote == -1 || firstLine.length() == firstQuote + 1) {
return null;
}
int secondQuote = firstLine.indexOf(quoteChar, firstQuote + 1);
if (secondQuote == -1) {
return isFullXMLDecl(firstLine) ? firstLine.substring(firstQuote + 1,
firstLine.lastIndexOf(XML_DECL_END)).trim() : null;
}
return firstLine.substring(firstQuote + 1, secondQuote);
}
private int findEncodingPosition(String line) {
String encoding = "encoding"; //$NON-NLS-1$
int fromIndex = 0;
int position = 0;
while ((position = line.indexOf(encoding, fromIndex)) != -1) {
boolean equals = false;
fromIndex = position + encoding.length();
for (int i = fromIndex; i < line.length(); i++) {
char c = line.charAt(i);
if (c == '=' && !equals) {
equals = true;
} else if (c == 0x20 || c == 0x09 || c == 0x0D || c == 0x0A) {
// white space characters to ignore
} else if ((c == '"' || c == '\'') && equals) {
return position;
} else {
break;
}
}
}
return -1;
}
private boolean isCharsetValid(String charset) {
if (charset.length() == 0) {
return false;
}
char c = charset.charAt(0);
if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z')) {
return false;
}
for (int i = 1; i < charset.length(); i++) {
c = charset.charAt(i);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-'
|| c == '_' || c == '.') {
continue;
}
return false;
}
return true;
}
@Override
public QualifiedName[] getSupportedOptions() {
return SUPPORTED_OPTIONS;
}
}