| /******************************************************************************* |
| * Copyright (c) 2001, 2008 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| * Jens Lukowski/Innoopract - initial renaming/restructuring |
| * |
| *******************************************************************************/ |
| package org.eclipse.wst.sse.core.internal.encoding; |
| |
| import java.io.BufferedInputStream; |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CodingErrorAction; |
| |
| import org.eclipse.core.resources.IFile; |
| import org.eclipse.core.runtime.CoreException; |
| import org.eclipse.core.runtime.IProgressMonitor; |
| import org.eclipse.core.runtime.IStatus; |
| import org.eclipse.core.runtime.Platform; |
| import org.eclipse.core.runtime.Status; |
| import org.eclipse.core.runtime.content.IContentDescription; |
| import org.eclipse.core.runtime.content.IContentTypeManager; |
| import org.eclipse.core.runtime.jobs.Job; |
| import org.eclipse.wst.sse.core.internal.SSECoreMessages; |
| import org.eclipse.wst.sse.core.internal.encoding.util.Assert; |
| import org.eclipse.wst.sse.core.internal.encoding.util.BufferedLimitedStream; |
| import org.eclipse.wst.sse.core.internal.encoding.util.Logger; |
| import org.eclipse.wst.sse.core.internal.encoding.util.NullInputStream; |
| import org.eclipse.wst.sse.core.internal.encoding.util.UnicodeBOMEncodingDetector; |
| import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail; |
| |
| |
| /** |
| * The purpose of this class is to centralize analysis of a file to determine |
| * the most appropriate rules of decoding it. The intended use is to set the |
| * input, then get the reader for that input which will have its encoding set |
| * appropriately. Additionally, there is an EncodingMemento provided, which |
| * will be required, in some cases, to later determine the most appropriate |
| * form of encoded output. |
| */ |
| public class CodedReaderCreator extends CodedIO { |
| |
| |
| private boolean fClientSuppliedStream; |
| |
| |
| private EncodingMemento fEncodingMemento; |
| |
| private EncodingRule fEncodingRule; |
| |
| private String fFilename; |
| |
| private IFile fIFile; |
| |
| |
| private InputStream fInputStream; |
| |
| private static final String CHARSET_UTF_16= "UTF-16"; //$NON-NLS-1$ |
| |
| private static final String CHARSET_UTF_16LE= "UTF-16LE"; //$NON-NLS-1$ |
| |
| public CodedReaderCreator() { |
| |
| super(); |
| } |
| |
| public CodedReaderCreator(IFile file) throws CoreException, IOException { |
| |
| this(); |
| set(file); |
| setEncodingRule(EncodingRule.CONTENT_BASED); |
| } |
| |
| public CodedReaderCreator(IFile file, EncodingRule encodingRule) throws CoreException, IOException { |
| |
| this(); |
| set(file); |
| setEncodingRule(encodingRule); |
| } |
| |
| public CodedReaderCreator(String filename, InputStream inputStream) { |
| |
| this(); |
| set(filename, inputStream); |
| setEncodingRule(EncodingRule.CONTENT_BASED); |
| } |
| |
| public CodedReaderCreator(String filename, InputStream inputStream, EncodingRule encodingRule) { |
| |
| this(); |
| set(filename, inputStream); |
| setEncodingRule(encodingRule); |
| } |
| |
| private EncodingMemento checkForEncodingInContents(InputStream limitedStream) throws CoreException, IOException { |
| EncodingMemento result = null; |
| |
| // if encoding memento already set, then iFile must |
| // have been set, and no need to get again. |
| if (fEncodingMemento != null) { |
| result = fEncodingMemento; |
| } |
| else { |
| if (fClientSuppliedStream) { |
| try { |
| limitedStream.reset(); |
| IContentTypeManager contentTypeManager = Platform.getContentTypeManager(); |
| IContentDescription contentDescription = contentTypeManager.getDescriptionFor(limitedStream, fFilename, IContentDescription.ALL); |
| if (contentDescription != null) { |
| fEncodingMemento = createMemento(contentDescription); |
| } |
| result = fEncodingMemento; |
| } |
| finally { |
| limitedStream.reset(); |
| } |
| } |
| else { |
| // throw new IllegalStateException("unexpected state: |
| // encodingMemento was null but no input stream supplied by |
| // client"); //$NON-NLS-1$ |
| result = null; |
| } |
| } |
| |
| if (result != null && !result.isValid() && !forceDefault()) { |
| throw new UnsupportedCharsetExceptionWithDetail(result); |
| } |
| |
| return result; |
| } |
| |
| /** |
| * @param resettableLimitedStream |
| */ |
| private EncodingMemento checkStreamForBOM(InputStream resettableLimitedStream) { |
| EncodingMemento result = null; |
| UnicodeBOMEncodingDetector unicodeBOMEncodingDetector = new UnicodeBOMEncodingDetector(); |
| unicodeBOMEncodingDetector.set(resettableLimitedStream); |
| result = unicodeBOMEncodingDetector.getEncodingMemento(); |
| return result; |
| } |
| |
| /** |
| * @param iFile |
| * @throws CoreException |
| * @throws IOException |
| */ |
| private EncodingMemento findMementoFromFileCase() throws CoreException, IOException { |
| EncodingMemento result = null; |
| IContentDescription contentDescription = null; |
| try { |
| // This method provides possible improved performance at the |
| // cost of sometimes returning null |
| if (fIFile.exists()) |
| contentDescription = fIFile.getContentDescription(); |
| } |
| catch (CoreException e) { |
| // Assume if core exception occurs, we can still try more |
| // expensive |
| // discovery options. |
| Logger.logException(e); |
| } |
| if (contentDescription == null && fIFile.isAccessible()) { |
| InputStream contents = null; |
| try { |
| contents = fIFile.getContents(); |
| contentDescription = Platform.getContentTypeManager().getDescriptionFor(contents, fIFile.getName(), IContentDescription.ALL); |
| } |
| catch (CoreException e1) { |
| // Assume if core exception occurs, we can't really do much |
| // with |
| // determining encoding, etc. |
| Logger.logException(e1); |
| throw e1; |
| } |
| catch (IOException e2) { |
| // We likely couldn't get the contents of the file, something |
| // is really wrong |
| Logger.logException(e2); |
| throw e2; |
| } |
| if (contents != null) { |
| try { |
| contents.close(); |
| } |
| catch (IOException e2) { |
| Logger.logException(e2); |
| } |
| } |
| } |
| if (contentDescription != null) { |
| result = createMemento(contentDescription); |
| } |
| |
| return result; |
| } |
| |
| /** |
| * The primary method which contains the highest level rules for how to |
| * decide appropriate decoding rules: 1. first check for unicode stream 2. |
| * then looked for encoding specified in content (according to the type of |
| * content that is it ... xml, html, jsp, etc. 3. then check for various |
| * settings: file settings first, if null check project settings, if null, |
| * check user preferences. 4. lastly (or, what is the last user |
| * preference) is to use "workbench defaults". |
| * |
| * @throws IOException |
| * @throws CoreException |
| */ |
| private EncodingMemento findMementoFromStreamCase() throws CoreException, IOException { |
| |
| EncodingMemento result = null; |
| InputStream resettableLimitedStream = null; |
| try { |
| resettableLimitedStream = getLimitedStream(getResettableStream()); |
| if (resettableLimitedStream != null) { |
| // first check for unicode stream |
| result = checkStreamForBOM(resettableLimitedStream); |
| // if not that, then check contents |
| if (result == null) { |
| resettableLimitedStream.reset(); |
| result = checkForEncodingInContents(resettableLimitedStream); |
| } |
| |
| } |
| else { |
| // stream null, may name's not. |
| if (fFilename != null) { |
| // filename not null |
| IContentTypeManager contentTypeManager = Platform.getContentTypeManager(); |
| IContentDescription contentDescription = contentTypeManager.getDescriptionFor(new NullInputStream(), fFilename, IContentDescription.ALL); |
| if (contentDescription != null) { |
| result = createMemento(contentDescription); |
| } |
| } |
| } |
| } |
| finally { |
| if (resettableLimitedStream != null) { |
| handleStreamClose(resettableLimitedStream); |
| } |
| } |
| return result; |
| } |
| |
| private boolean forceDefault() { |
| |
| boolean result = false; |
| if (fEncodingRule != null && fEncodingRule == EncodingRule.FORCE_DEFAULT) |
| result = true; |
| return result; |
| } |
| |
| public Reader getCodedReader() throws CoreException, IOException { |
| |
| Reader result = null; |
| // we make a local copy of encoding memento so |
| // stream won't |
| // be accessed simultaneously. |
| EncodingMemento encodingMemento = getEncodingMemento(); |
| Assert.isNotNull(encodingMemento, "Appears reader requested before file or stream set"); //$NON-NLS-1$ |
| InputStream streamToReturn = getResettableStream(); |
| streamToReturn.reset(); |
| // if UTF 3 byte BOM is used (or UTF-16LE), the |
| // built in converters |
| // don't |
| // correct skip all three bytes ... so skip |
| // remaining one to leave |
| // stream transparently ready for client. |
| // see ... TODO look up bug number |
| if (encodingMemento.isUnicodeStream()) { |
| streamToReturn.skip(2); |
| } |
| else if (encodingMemento.isUTF83ByteBOMUsed()) { |
| streamToReturn.skip(3); |
| } |
| String charsetName = encodingMemento.getJavaCharsetName(); |
| if (charsetName == null) { |
| charsetName = encodingMemento.getDetectedCharsetName(); |
| } |
| if (!encodingMemento.isValid() && !forceDefault()) { |
| throw new UnsupportedCharsetExceptionWithDetail(encodingMemento); |
| } |
| |
| if (fEncodingRule == EncodingRule.FORCE_DEFAULT) { |
| charsetName = encodingMemento.getAppropriateDefault(); |
| } |
| |
| // [228366] For files that have a unicode BOM, and a charset name of UTF-16, the charset decoder needs "UTF-16LE" |
| if(CHARSET_UTF_16.equals(charsetName) && encodingMemento.getUnicodeBOM() == IContentDescription.BOM_UTF_16LE) |
| charsetName = CHARSET_UTF_16LE; |
| |
| Charset charset = Charset.forName(charsetName); |
| CharsetDecoder charsetDecoder = charset.newDecoder(); |
| if (fEncodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) { |
| charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE); |
| charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE); |
| } |
| else { |
| charsetDecoder.onMalformedInput(CodingErrorAction.REPORT); |
| charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT); |
| } |
| // more efficient to be buffered, and I know of no |
| // reason not to return |
| // that directly. |
| result = new BufferedReader(new InputStreamReader(streamToReturn, charsetDecoder), CodedIO.MAX_BUF_SIZE); |
| result.mark(CodedIO.MAX_BUF_SIZE); |
| return result; |
| } |
| |
| public EncodingMemento getEncodingMemento() throws CoreException, IOException { |
| // figure out encoding memento from encoding strategy |
| if (fEncodingMemento == null) { |
| if (fClientSuppliedStream) { |
| fEncodingMemento = findMementoFromStreamCase(); |
| } |
| else if (fIFile != null) { |
| fEncodingMemento = findMementoFromFileCase(); |
| } |
| } |
| |
| // if encoding stratagy doesn't provide answer, |
| // then try file settings, project settings, |
| // user preferences, and |
| // finally workbench default. |
| // |
| if (fEncodingMemento == null || fEncodingMemento.getDetectedCharsetName() == null) { |
| fEncodingMemento = getEncodingMementoFromResourceAndPreference(); |
| } |
| |
| // use DefaultNameRules from NonContentBasedEncodingRules as the final |
| // default |
| if (fEncodingMemento == null) { |
| fEncodingMemento = handleNotProvidedFromContentCase(); |
| } |
| |
| return fEncodingMemento; |
| } |
| |
| /* |
| * This method is called only when encoding is not detected in the file. |
| * |
| * Here is encoding lookup order we will try: - try resource content |
| * description (Eclipse Text file encoding) - try resource content |
| * properties (for JSP only) - try content type encoding preferences (for |
| * HTML only) - try resource content description (Eclipse Text file |
| * encoding, implicit check) |
| * |
| * Note: This method appears in both CodedReaderCreator and |
| * CodedStreamCreator (with just a minor difference). They should be kept |
| * the same. |
| */ |
| private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException { |
| EncodingMemento encodingMemento = fEncodingMemento; |
| |
| // Follow Eclipse Platform's direction. Get the charset from IFile. |
| if (fIFile != null) { |
| String charset = fIFile.getCharset(); |
| encodingMemento = CodedIO.createEncodingMemento(charset); |
| } |
| |
| return encodingMemento; |
| } |
| |
| /** |
| * Ensures that an InputStream has mark/reset support, is readlimit is |
| * set, and that the stream is "limitable" (that is, reports "end of |
| * input" rather than allow going past mark). This is very specialized |
| * stream introduced to overcome |
| * https://bugs.eclipse.org/bugs/show_bug.cgi?id=67211. See also |
| * https://bugs.eclipse.org/bugs/show_bug.cgi?id=68565 |
| */ |
| private InputStream getLimitedStream(InputStream original) { |
| if (original == null) |
| return null; |
| if (original instanceof BufferedLimitedStream) |
| return original; |
| InputStream s = new BufferedLimitedStream(original, CodedIO.MAX_MARK_SIZE); |
| s.mark(CodedIO.MAX_MARK_SIZE); |
| return s; |
| } |
| |
| private InputStream getResettableStream() throws CoreException, IOException { |
| |
| InputStream resettableStream = null; |
| |
| if (fIFile != null) { |
| InputStream inputStream = null; |
| try { |
| // note we always get contents, even if out of synch |
| inputStream = fIFile.getContents(true); |
| } |
| catch (CoreException e) { |
| // SHOULD actually check for existence of |
| // fIStorage, but |
| // for now will just assume core exception |
| // means it |
| // doesn't exist on file system, yet. |
| // and we'll log, just in case its a noteable error |
| Logger.logException(e); |
| inputStream = new NullInputStream(); |
| } |
| resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE); |
| } |
| else { |
| if (fInputStream != null) { |
| if (fInputStream.markSupported()) { |
| resettableStream = fInputStream; |
| // try { |
| resettableStream.reset(); |
| // } |
| // catch (IOException e) { |
| // // assumed just hasn't been marked yet, so ignore |
| // } |
| } |
| else { |
| resettableStream = new BufferedInputStream(fInputStream, CodedIO.MAX_BUF_SIZE); |
| } |
| } |
| } |
| |
| if (resettableStream == null) { |
| resettableStream = new NullInputStream(); |
| } |
| |
| // mark this once, stream at "zero" position |
| resettableStream.mark(MAX_MARK_SIZE); |
| return resettableStream; |
| } |
| |
| private EncodingMemento handleNotProvidedFromContentCase() { |
| |
| EncodingMemento result = null; |
| String specDefault = null; |
| // try { |
| // specDefault = getEncodingDetector().getSpecDefaultEncoding(); |
| // } |
| // catch (CoreException e) { |
| // // If this exception occurs, assumes there is |
| // // no specDefault |
| // } |
| // catch (IOException e) { |
| // // If this exception occurs, assumes there is |
| // // no specDefault |
| // } |
| // finally { |
| // try { |
| // handleStreamClose(fEncodingDetectorStream); |
| // } |
| // catch (IOException e1) { |
| // // severe error, not much to do here |
| // } |
| // } |
| // this logic should be moved to 'detection' if not already |
| String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault); |
| Assert.isNotNull(charset, "post condition failed"); //$NON-NLS-1$ |
| result = CodedIO.createEncodingMemento(charset); |
| return result; |
| } |
| |
| /** |
| * @param resettableInputStream |
| * @throws IOException |
| */ |
| private void handleStreamClose(InputStream resettableInputStream) throws IOException { |
| |
| if (resettableInputStream != null) { |
| if (fClientSuppliedStream) { |
| resettableInputStream.reset(); |
| } |
| else { |
| |
| resettableInputStream.close(); |
| } |
| } |
| } |
| |
| // TODO We just copy the content properties encoding to current resource's |
| // encoding for now. May improve the UI later by setting an informational |
| // message and/or disable the content properties encoding field. |
| // TODO: remake private else remove |
| void migrateContentPropertiesEncoding(String encoding) throws CoreException { |
| final IFile file = fIFile; |
| final String charset = encoding; |
| // TODO: externalize string later |
| Job migrater = new Job(SSECoreMessages.Migrate_Charset) { //$NON-NLS-1$ |
| protected IStatus run(IProgressMonitor monitor) { |
| if (file != null) { |
| try { |
| file.setCharset(charset, null); |
| } |
| catch (CoreException e) { |
| Logger.logException(e); |
| } |
| } |
| return Status.OK_STATUS; |
| } |
| }; |
| migrater.setSystem(true); |
| migrater.schedule(); |
| } |
| |
| private void resetAll() { |
| |
| fEncodingRule = null; |
| fIFile = null; |
| fFilename = null; |
| fInputStream = null; |
| fEncodingMemento = null; |
| fClientSuppliedStream = false; |
| } |
| |
| public void set(IFile iFile) throws CoreException, IOException { |
| Assert.isNotNull(iFile, "illegal argument"); //$NON-NLS-1$ |
| resetAll(); |
| fIFile = iFile; |
| } |
| |
| public void set(String filename, InputStream inputStream) { |
| |
| resetAll(); |
| fFilename = filename; |
| fInputStream = inputStream; |
| fClientSuppliedStream = true; |
| } |
| |
| public void setEncodingRule(EncodingRule encodingRule) { |
| |
| fEncodingRule = encodingRule; |
| } |
| } |