blob: 1bb216d8b3582bdbec71f2aabebe436543e1e327 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2001, 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* Jens Lukowski/Innoopract - initial renaming/restructuring
*
*******************************************************************************/
package org.eclipse.wst.sse.core.internal.encoding;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.IStatus;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.Status;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.IContentTypeManager;
import org.eclipse.core.runtime.jobs.Job;
import org.eclipse.wst.sse.core.internal.SSECoreMessages;
import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
import org.eclipse.wst.sse.core.internal.encoding.util.BufferedLimitedStream;
import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
import org.eclipse.wst.sse.core.internal.encoding.util.NullInputStream;
import org.eclipse.wst.sse.core.internal.encoding.util.UnicodeBOMEncodingDetector;
import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail;
/**
* The purpose of this class is to centralize analysis of a file to determine
* the most appropriate rules of decoding it. The intended use is to set the
* input, then get the reader for that input which will have its encoding set
* appropriately. Additionally, there is an EncodingMemento provided, which
* will be required, in some cases, to later determine the most appropriate
* form of encoded output.
*/
public class CodedReaderCreator extends CodedIO {
private boolean fClientSuppliedStream;
private EncodingMemento fEncodingMemento;
private EncodingRule fEncodingRule;
private String fFilename;
private IFile fIFile;
private InputStream fInputStream;
public CodedReaderCreator() {
super();
}
public CodedReaderCreator(IFile file) throws CoreException, IOException {
this();
set(file);
setEncodingRule(EncodingRule.CONTENT_BASED);
}
public CodedReaderCreator(IFile file, EncodingRule encodingRule) throws CoreException, IOException {
this();
set(file);
setEncodingRule(encodingRule);
}
public CodedReaderCreator(String filename, InputStream inputStream) {
this();
set(filename, inputStream);
setEncodingRule(EncodingRule.CONTENT_BASED);
}
public CodedReaderCreator(String filename, InputStream inputStream, EncodingRule encodingRule) {
this();
set(filename, inputStream);
setEncodingRule(encodingRule);
}
private EncodingMemento checkForEncodingInContents(InputStream limitedStream) throws CoreException, IOException {
EncodingMemento result = null;
// if encoding memento already set, then iFile must
// have been set, and no need to get again.
if (fEncodingMemento != null) {
result = fEncodingMemento;
}
else {
if (fClientSuppliedStream) {
try {
limitedStream.reset();
IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
IContentDescription contentDescription = contentTypeManager.getDescriptionFor(limitedStream, fFilename, IContentDescription.ALL);
if (contentDescription != null) {
fEncodingMemento = createMemento(contentDescription);
}
result = fEncodingMemento;
}
finally {
limitedStream.reset();
}
}
else {
// throw new IllegalStateException("unexpected state:
// encodingMemento was null but no input stream supplied by
// client"); //$NON-NLS-1$
result = null;
}
}
if (result != null && !result.isValid() && !forceDefault()) {
throw new UnsupportedCharsetExceptionWithDetail(result);
}
return result;
}
/**
* @param resettableLimitedStream
*/
private EncodingMemento checkStreamForBOM(InputStream resettableLimitedStream) {
EncodingMemento result = null;
UnicodeBOMEncodingDetector unicodeBOMEncodingDetector = new UnicodeBOMEncodingDetector();
unicodeBOMEncodingDetector.set(resettableLimitedStream);
result = unicodeBOMEncodingDetector.getEncodingMemento();
return result;
}
/**
* @param iFile
* @throws CoreException
* @throws IOException
*/
private EncodingMemento findMementoFromFileCase() throws CoreException, IOException {
EncodingMemento result = null;
IContentDescription contentDescription = null;
try {
// This method provides possible improved performance at the
// cost of sometimes returning null
if (fIFile.exists())
contentDescription = fIFile.getContentDescription();
}
catch (CoreException e) {
// Assume if core exception occurs, we can still try more
// expensive
// discovery options.
Logger.logException(e);
}
if (contentDescription == null && fIFile.isAccessible()) {
InputStream contents = null;
try {
contents = fIFile.getContents();
contentDescription = Platform.getContentTypeManager().getDescriptionFor(contents, fIFile.getName(), IContentDescription.ALL);
}
catch (CoreException e1) {
// Assume if core exception occurs, we can't really do much
// with
// determining encoding, etc.
Logger.logException(e1);
throw e1;
}
catch (IOException e2) {
// We likely couldn't get the contents of the file, something
// is really wrong
Logger.logException(e2);
throw e2;
}
if (contents != null) {
try {
contents.close();
}
catch (IOException e2) {
Logger.logException(e2);
}
}
}
if (contentDescription != null) {
result = createMemento(contentDescription);
}
return result;
}
/**
* The primary method which contains the highest level rules for how to
* decide appropriate decoding rules: 1. first check for unicode stream 2.
* then looked for encoding specified in content (according to the type of
* content that is it ... xml, html, jsp, etc. 3. then check for various
* settings: file settings first, if null check project settings, if null,
* check user preferences. 4. lastly (or, what is the last user
* preference) is to use "workbench defaults".
*
* @throws IOException
* @throws CoreException
*/
private EncodingMemento findMementoFromStreamCase() throws CoreException, IOException {
EncodingMemento result = null;
InputStream resettableLimitedStream = null;
try {
resettableLimitedStream = getLimitedStream(getResettableStream());
if (resettableLimitedStream != null) {
// first check for unicode stream
result = checkStreamForBOM(resettableLimitedStream);
// if not that, then check contents
if (result == null) {
resettableLimitedStream.reset();
result = checkForEncodingInContents(resettableLimitedStream);
}
}
else {
// stream null, may name's not.
if (fFilename != null) {
// filename not null
IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
IContentDescription contentDescription = contentTypeManager.getDescriptionFor(new NullInputStream(), fFilename, IContentDescription.ALL);
if (contentDescription != null) {
result = createMemento(contentDescription);
}
}
}
}
finally {
if (resettableLimitedStream != null) {
handleStreamClose(resettableLimitedStream);
}
}
return result;
}
private boolean forceDefault() {
boolean result = false;
if (fEncodingRule != null && fEncodingRule == EncodingRule.FORCE_DEFAULT)
result = true;
return result;
}
public Reader getCodedReader() throws CoreException, IOException {
Reader result = null;
// we make a local copy of encoding memento so
// stream won't
// be accessed simultaneously.
EncodingMemento encodingMemento = getEncodingMemento();
Assert.isNotNull(encodingMemento, "Appears reader requested before file or stream set"); //$NON-NLS-1$
InputStream streamToReturn = getResettableStream();
streamToReturn.reset();
// if UTF 3 byte BOM is used (or UTF-16LE), the
// built in converters
// don't
// correct skip all three bytes ... so skip
// remaining one to leave
// stream transparently ready for client.
// see ... TODO look up bug number
if (encodingMemento.isUnicodeStream()) {
streamToReturn.skip(2);
}
else if (encodingMemento.isUTF83ByteBOMUsed()) {
streamToReturn.skip(3);
}
String charsetName = encodingMemento.getJavaCharsetName();
if (charsetName == null) {
charsetName = encodingMemento.getDetectedCharsetName();
}
if (!encodingMemento.isValid() && !forceDefault()) {
throw new UnsupportedCharsetExceptionWithDetail(encodingMemento);
}
if (fEncodingRule == EncodingRule.FORCE_DEFAULT) {
charsetName = encodingMemento.getAppropriateDefault();
}
Charset charset = Charset.forName(charsetName);
CharsetDecoder charsetDecoder = charset.newDecoder();
if (fEncodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) {
charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
else {
charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
// more efficient to be buffered, and I know of no
// reason not to return
// that directly.
result = new BufferedReader(new InputStreamReader(streamToReturn, charsetDecoder), CodedIO.MAX_BUF_SIZE);
result.mark(CodedIO.MAX_BUF_SIZE);
return result;
}
public EncodingMemento getEncodingMemento() throws CoreException, IOException {
// figure out encoding memento from encoding strategy
if (fEncodingMemento == null) {
if (fClientSuppliedStream) {
fEncodingMemento = findMementoFromStreamCase();
}
else if (fIFile != null) {
fEncodingMemento = findMementoFromFileCase();
}
}
// if encoding stratagy doesn't provide answer,
// then try file settings, project settings,
// user preferences, and
// finally workbench default.
//
if (fEncodingMemento == null || fEncodingMemento.getDetectedCharsetName() == null) {
fEncodingMemento = getEncodingMementoFromResourceAndPreference();
}
// use DefaultNameRules from NonContentBasedEncodingRules as the final
// default
if (fEncodingMemento == null) {
fEncodingMemento = handleNotProvidedFromContentCase();
}
return fEncodingMemento;
}
/*
* This method is called only when encoding is not detected in the file.
*
* Here is encoding lookup order we will try: - try resource content
* description (Eclipse Text file encoding) - try resource content
* properties (for JSP only) - try content type encoding preferences (for
* HTML only) - try resource content description (Eclipse Text file
* encoding, implicit check)
*
* Note: This method appears in both CodedReaderCreator and
* CodedStreamCreator (with just a minor difference). They should be kept
* the same.
*/
private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException {
EncodingMemento encodingMemento = fEncodingMemento;
// Follow Eclipse Platform's direction. Get the charset from IFile.
if (fIFile != null) {
String charset = fIFile.getCharset();
encodingMemento = CodedIO.createEncodingMemento(charset);
}
return encodingMemento;
}
/**
* Ensures that an InputStream has mark/reset support, is readlimit is
* set, and that the stream is "limitable" (that is, reports "end of
* input" rather than allow going past mark). This is very specialized
* stream introduced to overcome
* https://bugs.eclipse.org/bugs/show_bug.cgi?id=67211. See also
* https://bugs.eclipse.org/bugs/show_bug.cgi?id=68565
*/
private InputStream getLimitedStream(InputStream original) {
if (original == null)
return null;
if (original instanceof BufferedLimitedStream)
return original;
InputStream s = new BufferedLimitedStream(original, CodedIO.MAX_MARK_SIZE);
s.mark(CodedIO.MAX_MARK_SIZE);
return s;
}
private InputStream getResettableStream() throws CoreException, IOException {
InputStream resettableStream = null;
if (fIFile != null) {
InputStream inputStream = null;
try {
// note we always get contents, even if out of synch
inputStream = fIFile.getContents(true);
}
catch (CoreException e) {
// SHOULD actually check for existence of
// fIStorage, but
// for now will just assume core exception
// means it
// doesn't exist on file system, yet.
// and we'll log, just in case its a noteable error
Logger.logException(e);
inputStream = new NullInputStream();
}
resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
}
else {
if (fInputStream != null) {
if (fInputStream.markSupported()) {
resettableStream = fInputStream;
// try {
resettableStream.reset();
// }
// catch (IOException e) {
// // assumed just hasn't been marked yet, so ignore
// }
}
else {
resettableStream = new BufferedInputStream(fInputStream, CodedIO.MAX_BUF_SIZE);
}
}
}
if (resettableStream == null) {
resettableStream = new NullInputStream();
}
// mark this once, stream at "zero" position
resettableStream.mark(MAX_MARK_SIZE);
return resettableStream;
}
private EncodingMemento handleNotProvidedFromContentCase() {
EncodingMemento result = null;
String specDefault = null;
// try {
// specDefault = getEncodingDetector().getSpecDefaultEncoding();
// }
// catch (CoreException e) {
// // If this exception occurs, assumes there is
// // no specDefault
// }
// catch (IOException e) {
// // If this exception occurs, assumes there is
// // no specDefault
// }
// finally {
// try {
// handleStreamClose(fEncodingDetectorStream);
// }
// catch (IOException e1) {
// // severe error, not much to do here
// }
// }
// this logic should be moved to 'detection' if not already
String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault);
Assert.isNotNull(charset, "post condition failed"); //$NON-NLS-1$
result = CodedIO.createEncodingMemento(charset);
return result;
}
/**
* @param resettableInputStream
* @throws IOException
*/
private void handleStreamClose(InputStream resettableInputStream) throws IOException {
if (resettableInputStream != null) {
if (fClientSuppliedStream) {
resettableInputStream.reset();
}
else {
resettableInputStream.close();
}
}
}
// TODO We just copy the content properties encoding to current resource's
// encoding for now. May improve the UI later by setting an informational
// message and/or disable the content properties encoding field.
// TODO: remake private else remove
void migrateContentPropertiesEncoding(String encoding) throws CoreException {
final IFile file = fIFile;
final String charset = encoding;
// TODO: externalize string later
Job migrater = new Job(SSECoreMessages.Migrate_Charset) { //$NON-NLS-1$
protected IStatus run(IProgressMonitor monitor) {
if (file != null) {
try {
file.setCharset(charset, null);
}
catch (CoreException e) {
Logger.logException(e);
}
}
return Status.OK_STATUS;
}
};
migrater.setSystem(true);
migrater.schedule();
}
private void resetAll() {
fEncodingRule = null;
fIFile = null;
fFilename = null;
fInputStream = null;
fEncodingMemento = null;
fClientSuppliedStream = false;
}
public void set(IFile iFile) throws CoreException, IOException {
Assert.isNotNull(iFile, "illegal argument"); //$NON-NLS-1$
resetAll();
fIFile = iFile;
}
public void set(String filename, InputStream inputStream) {
resetAll();
fFilename = filename;
fInputStream = inputStream;
fClientSuppliedStream = true;
}
public void setEncodingRule(EncodingRule encodingRule) {
fEncodingRule = encodingRule;
}
}