r/org.eclipse.statet.r.core/src/org/eclipse/statet/internal/r/core/RdTextContentDescriber.java - statet/org.eclipse.statet-r - Git at Google

 /*=============================================================================#
  # Copyright (c) 2008, 2019 Stephan Wahlbrink and others.
  #
  # This program and the accompanying materials are made available under the
  # terms of the Eclipse Public License 2.0 which is available at
  # https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
  # which is available at https://www.apache.org/licenses/LICENSE-2.0.
  #
  # SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
  #
  # Contributors:
  #     Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
  #=============================================================================*/

 package org.eclipse.statet.internal.r.core;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.eclipse.core.runtime.QualifiedName;
 import org.eclipse.core.runtime.content.IContentDescription;
 import org.eclipse.core.runtime.content.IContentType;

 import org.eclipse.statet.ecommons.io.TextContentDescriber;


 /**
  * Detects encoding of Rd file based on optional \encoding element
  * The \encoding element must be the first command in Rd file
  */
 public class RdTextContentDescriber extends TextContentDescriber {


 	private static final QualifiedName[] SUPPORTED_OPTIONS= new QualifiedName[] {
 		IContentDescription.CHARSET,
 		IContentDescription.BYTE_ORDER_MARK,
 	};


 	private static String ENCODING_COMMAND_NAME= "encoding"; //$NON-NLS-1$
 	private static Pattern BRACKET_CONTENT_PATTERN= Pattern.compile("\\s*\\{\\s*(\\S*)\\s*\\}.*"); //$NON-NLS-1$


 	public RdTextContentDescriber() {
 	}


 	@Override
 	public QualifiedName[] getSupportedOptions() {
 		return SUPPORTED_OPTIONS;
 	}

 	@Override
 	public int describe(final Reader contents, final IContentDescription description) throws IOException {
 		if (description != null && description.isRequested(IContentDescription.CHARSET)) {
 			final BufferedReader reader= new BufferedReader(contents);
 			final String encoding= searchEncoding(reader);
 			if (encoding == null) {
 				return INDETERMINATE;
 			}
 			description.setProperty(IContentDescription.CHARSET, encoding);
 		}
 		return VALID;
 	}

 	@Override
 	public int describe(final InputStream contents, final IContentDescription description) throws IOException {
 		final byte[] bom= getByteOrderMark(contents);
 		String baseEncoding= "UTF-8"; // "ISO-8859-1"; //$NON-NLS-1$
 		if (bom != null) {
 			if (bom == IContentDescription.BOM_UTF_16BE) {
 				baseEncoding= "UTF-16BE"; //$NON-NLS-1$
 			}
 			else if (bom == IContentDescription.BOM_UTF_16LE) {
 				baseEncoding= "UTF-16LE"; //$NON-NLS-1$
 			}
 			else if (bom == IContentDescription.BOM_UTF_8) {
 				baseEncoding= "UTF-8"; //$NON-NLS-1$
 			}

 			if (description != null && description.isRequested(IContentDescription.BYTE_ORDER_MARK)) {
 				description.setProperty(IContentDescription.BYTE_ORDER_MARK, bom);
 			}
 		}
 		if (description != null && description.isRequested(IContentDescription.CHARSET)) {
 			final BufferedReader reader= new BufferedReader(
 					new InputStreamReader(contents, baseEncoding) );
 			final String encoding= searchEncoding(reader);
 			if (encoding == null) {
 				return INDETERMINATE;
 			}
 			final IContentType contentType= description.getContentType();
 			if (contentType == null || !encoding.equals(contentType.getDefaultCharset())) {
 				description.setProperty(IContentDescription.CHARSET, encoding);
 			}
 		}
 		return VALID;
 	}

 	private String searchEncoding(final BufferedReader reader) throws IOException {
 		String line;
 		ITER_LINES: while ((line= reader.readLine()) != null) {
 			ITER_CHARS: for (int i= 0; i < line.length(); i++) {
 				switch (line.charAt(i)) {
 				case ' ':
 				case '\t':
 					continue ITER_CHARS;
 				case '%':
 					continue ITER_LINES;
 				case '\\':
 					if (line.regionMatches(i + 1, ENCODING_COMMAND_NAME, 0, ENCODING_COMMAND_NAME.length())) {
 						final Matcher matcher= BRACKET_CONTENT_PATTERN.matcher(line.substring(i + 1 + ENCODING_COMMAND_NAME.length()));
 						if (matcher.matches()) {
 							return matcher.group(1);
 						}
 					}
 					break ITER_LINES;
 				default:
 					break ITER_LINES;
 				}
 			}
 		}
 		return null;
 	}

 }
	/*=============================================================================#
	# Copyright (c) 2008, 2019 Stephan Wahlbrink and others.
	#
	# This program and the accompanying materials are made available under the
	# terms of the Eclipse Public License 2.0 which is available at
	# https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
	# which is available at https://www.apache.org/licenses/LICENSE-2.0.
	#
	# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
	#
	# Contributors:
	# Stephan Wahlbrink <sw@wahlbrink.eu> - initial API and implementation
	#=============================================================================*/

	package org.eclipse.statet.internal.r.core;

	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.InputStreamReader;
	import java.io.Reader;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	import org.eclipse.core.runtime.QualifiedName;
	import org.eclipse.core.runtime.content.IContentDescription;
	import org.eclipse.core.runtime.content.IContentType;

	import org.eclipse.statet.ecommons.io.TextContentDescriber;


	/**
	* Detects encoding of Rd file based on optional \encoding element
	* The \encoding element must be the first command in Rd file
	*/
	public class RdTextContentDescriber extends TextContentDescriber {


	private static final QualifiedName[] SUPPORTED_OPTIONS= new QualifiedName[] {
	IContentDescription.CHARSET,
	IContentDescription.BYTE_ORDER_MARK,
	};


	private static String ENCODING_COMMAND_NAME= "encoding"; //$NON-NLS-1$
	private static Pattern BRACKET_CONTENT_PATTERN= Pattern.compile("\\s\\{\\s(\\S)\\s\\}.*"); //$NON-NLS-1$


	public RdTextContentDescriber() {
	}


	@Override
	public QualifiedName[] getSupportedOptions() {
	return SUPPORTED_OPTIONS;
	}

	@Override
	public int describe(final Reader contents, final IContentDescription description) throws IOException {
	if (description != null && description.isRequested(IContentDescription.CHARSET)) {
	final BufferedReader reader= new BufferedReader(contents);
	final String encoding= searchEncoding(reader);
	if (encoding == null) {
	return INDETERMINATE;
	}
	description.setProperty(IContentDescription.CHARSET, encoding);
	}
	return VALID;
	}

	@Override
	public int describe(final InputStream contents, final IContentDescription description) throws IOException {
	final byte[] bom= getByteOrderMark(contents);
	String baseEncoding= "UTF-8"; // "ISO-8859-1"; //$NON-NLS-1$
	if (bom != null) {
	if (bom == IContentDescription.BOM_UTF_16BE) {
	baseEncoding= "UTF-16BE"; //$NON-NLS-1$
	}
	else if (bom == IContentDescription.BOM_UTF_16LE) {
	baseEncoding= "UTF-16LE"; //$NON-NLS-1$
	}
	else if (bom == IContentDescription.BOM_UTF_8) {
	baseEncoding= "UTF-8"; //$NON-NLS-1$
	}

	if (description != null && description.isRequested(IContentDescription.BYTE_ORDER_MARK)) {
	description.setProperty(IContentDescription.BYTE_ORDER_MARK, bom);
	}
	}
	if (description != null && description.isRequested(IContentDescription.CHARSET)) {
	final BufferedReader reader= new BufferedReader(
	new InputStreamReader(contents, baseEncoding) );
	final String encoding= searchEncoding(reader);
	if (encoding == null) {
	return INDETERMINATE;
	}
	final IContentType contentType= description.getContentType();
	if (contentType == null \|\| !encoding.equals(contentType.getDefaultCharset())) {
	description.setProperty(IContentDescription.CHARSET, encoding);
	}
	}
	return VALID;
	}

	private String searchEncoding(final BufferedReader reader) throws IOException {
	String line;
	ITER_LINES: while ((line= reader.readLine()) != null) {
	ITER_CHARS: for (int i= 0; i < line.length(); i++) {
	switch (line.charAt(i)) {
	case ' ':
	case '\t':
	continue ITER_CHARS;
	case '%':
	continue ITER_LINES;
	case '\\':
	if (line.regionMatches(i + 1, ENCODING_COMMAND_NAME, 0, ENCODING_COMMAND_NAME.length())) {
	final Matcher matcher= BRACKET_CONTENT_PATTERN.matcher(line.substring(i + 1 + ENCODING_COMMAND_NAME.length()));
	if (matcher.matches()) {
	return matcher.group(1);
	}
	}
	break ITER_LINES;
	default:
	break ITER_LINES;
	}
	}
	}
	return null;
	}

	}