blob: b531492845384e5a3b566615fd7e670f2bf287e7 [file] [log] [blame]
/*=============================================================================#
# Copyright (c) 2008, 2019 Stephan Wahlbrink and others.
#
# This program and the accompanying materials are made available under the
# terms of the Eclipse Public License 2.0 which is available at
# https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
# which is available at https://www.apache.org/licenses/LICENSE-2.0.
#
# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
#
# Contributors:
# Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
#=============================================================================*/
package org.eclipse.statet.internal.r.core;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.core.runtime.QualifiedName;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.IContentType;
import org.eclipse.statet.ecommons.io.TextContentDescriber;
/**
* Detects encoding of Rd file based on optional \encoding element
* The \encoding element must be the first command in Rd file
*/
public class RdTextContentDescriber extends TextContentDescriber {
private static final QualifiedName[] SUPPORTED_OPTIONS= new QualifiedName[] {
IContentDescription.CHARSET,
IContentDescription.BYTE_ORDER_MARK,
};
private static String ENCODING_COMMAND_NAME= "encoding"; //$NON-NLS-1$
private static Pattern BRACKET_CONTENT_PATTERN= Pattern.compile("\\s*\\{\\s*(\\S*)\\s*\\}.*"); //$NON-NLS-1$
public RdTextContentDescriber() {
}
@Override
public QualifiedName[] getSupportedOptions() {
return SUPPORTED_OPTIONS;
}
@Override
public int describe(final Reader contents, final IContentDescription description) throws IOException {
if (description != null && description.isRequested(IContentDescription.CHARSET)) {
final BufferedReader reader= new BufferedReader(contents);
final String encoding= searchEncoding(reader);
if (encoding == null) {
return INDETERMINATE;
}
description.setProperty(IContentDescription.CHARSET, encoding);
}
return VALID;
}
@Override
public int describe(final InputStream contents, final IContentDescription description) throws IOException {
final byte[] bom= getByteOrderMark(contents);
String baseEncoding= "UTF-8"; // "ISO-8859-1"; //$NON-NLS-1$
if (bom != null) {
if (bom == IContentDescription.BOM_UTF_16BE) {
baseEncoding= "UTF-16BE"; //$NON-NLS-1$
}
else if (bom == IContentDescription.BOM_UTF_16LE) {
baseEncoding= "UTF-16LE"; //$NON-NLS-1$
}
else if (bom == IContentDescription.BOM_UTF_8) {
baseEncoding= "UTF-8"; //$NON-NLS-1$
}
if (description != null && description.isRequested(IContentDescription.BYTE_ORDER_MARK)) {
description.setProperty(IContentDescription.BYTE_ORDER_MARK, bom);
}
}
if (description != null && description.isRequested(IContentDescription.CHARSET)) {
final BufferedReader reader= new BufferedReader(
new InputStreamReader(contents, baseEncoding) );
final String encoding= searchEncoding(reader);
if (encoding == null) {
return INDETERMINATE;
}
final IContentType contentType= description.getContentType();
if (contentType == null || !encoding.equals(contentType.getDefaultCharset())) {
description.setProperty(IContentDescription.CHARSET, encoding);
}
}
return VALID;
}
private String searchEncoding(final BufferedReader reader) throws IOException {
String line;
ITER_LINES: while ((line= reader.readLine()) != null) {
ITER_CHARS: for (int i= 0; i < line.length(); i++) {
switch (line.charAt(i)) {
case ' ':
case '\t':
continue ITER_CHARS;
case '%':
continue ITER_LINES;
case '\\':
if (line.regionMatches(i + 1, ENCODING_COMMAND_NAME, 0, ENCODING_COMMAND_NAME.length())) {
final Matcher matcher= BRACKET_CONTENT_PATTERN.matcher(line.substring(i + 1 + ENCODING_COMMAND_NAME.length()));
if (matcher.matches()) {
return matcher.group(1);
}
}
break ITER_LINES;
default:
break ITER_LINES;
}
}
}
return null;
}
}