blob: 9b3eae82f564b4b259d8cf5b877a5ededff5e17b [file] [log] [blame]
/*=============================================================================#
# Copyright (c) 2004, 2021 IBM Corporation and others.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# https://www.eclipse.org/legal/epl-2.0.
#
# SPDX-License-Identifier: EPL-2.0
#
# Contributors:
# IBM Corporation - org.eclipse.platform: initial API and implementation
# Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
#=============================================================================*/
package org.eclipse.statet.ecommons.io;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import org.eclipse.core.runtime.QualifiedName;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.ITextContentDescriber;
/**
* This class provides basis for text-based content describers.
*/
public class TextContentDescriber implements ITextContentDescriber {
private final static QualifiedName[] SUPPORTED_OPTIONS = { IContentDescription.BYTE_ORDER_MARK };
@Override
public QualifiedName[] getSupportedOptions() {
return SUPPORTED_OPTIONS;
}
@Override
public int describe(final Reader contents, final IContentDescription description) throws IOException {
// we want to be pretty loose on detecting the text content type
return INDETERMINATE;
}
@Override
public int describe(final InputStream contents, final IContentDescription description) throws IOException {
if (description == null || !description.isRequested(IContentDescription.BYTE_ORDER_MARK)) {
return INDETERMINATE;
}
final byte[] bom = getByteOrderMark(contents);
if (bom != null) {
description.setProperty(IContentDescription.BYTE_ORDER_MARK, bom);
}
// we want to be pretty loose on detecting the text content type
return INDETERMINATE;
}
/**
* Return the byte order mark of the input and
* moves input pointer to position after the BOM, if available or otherwise to 0.
* @param input the input to check
* @return the BOM or null, if none common BOM
* @throws IOException
*/
protected final byte[] getByteOrderMark(final InputStream input) throws IOException {
final int first = input.read();
byte[] bom = null;
if (first == 0xEF) {
//look for the UTF-8 Byte Order Mark (BOM)
final int second = input.read();
final int third = input.read();
if (second == 0xBB && third == 0xBF) {
bom = IContentDescription.BOM_UTF_8;
}
} else if (first == 0xFE) {
//look for the UTF-16 BOM
if (input.read() == 0xFF) {
bom = IContentDescription.BOM_UTF_16BE;
}
} else if (first == 0xFF) {
if (input.read() == 0xFE) {
bom = IContentDescription.BOM_UTF_16LE;
}
}
input.reset();
if (bom != null) {
input.skip(bom.length);
}
return bom;
}
}