| /******************************************************************************* |
| * Copyright (c) 2001, 2005 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| * Jens Lukowski/Innoopract - initial renaming/restructuring |
| * |
| *******************************************************************************/ |
| package org.eclipse.wst.sse.core.internal.encoding; |
| |
| import java.io.BufferedReader; |
| import java.io.ByteArrayOutputStream; |
| import java.io.CharArrayReader; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.io.OutputStreamWriter; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CodingErrorAction; |
| import java.nio.charset.UnmappableCharacterException; |
| |
| import org.eclipse.core.resources.IFile; |
| import org.eclipse.core.runtime.CoreException; |
| import org.eclipse.core.runtime.IProgressMonitor; |
| import org.eclipse.core.runtime.IStatus; |
| import org.eclipse.core.runtime.Platform; |
| import org.eclipse.core.runtime.Status; |
| import org.eclipse.core.runtime.content.IContentDescription; |
| import org.eclipse.core.runtime.content.IContentTypeManager; |
| import org.eclipse.core.runtime.jobs.Job; |
| import org.eclipse.wst.sse.core.internal.SSECoreMessages; |
| import org.eclipse.wst.sse.core.internal.SSECorePlugin; |
| import org.eclipse.wst.sse.core.internal.encoding.util.Assert; |
| import org.eclipse.wst.sse.core.internal.encoding.util.Logger; |
| import org.eclipse.wst.sse.core.internal.exceptions.CharConversionErrorWithDetail; |
| import org.eclipse.wst.sse.core.internal.exceptions.MalformedOutputExceptionWithDetail; |
| import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail; |
| |
| |
| public class CodedStreamCreator extends CodedIO { |
| |
| private final static int INITIAL_BUFFER_SIZE = 1024 * 16; |
| |
| // the 32 bytes used by default by ByteOutputStream is |
| // a little small |
| private static final String PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_ = "Program error: failed to find any charset anywhere!"; //$NON-NLS-1$ |
| |
| private static final String UTF_16BE_CHARSET_NAME = "UTF-16BE"; //$NON-NLS-1$ |
| private static final String UTF_16LE_CHARSET_NAME = "UTF-16LE"; //$NON-NLS-1$ |
| // private static final String UTF_16_CHARSET_NAME = "UTF-16"; |
| // //$NON-NLS-1$ |
| |
| private static final String UTF_8_CHARSET_NAME = "UTF-8"; //$NON-NLS-1$ |
| |
| private boolean fClientSuppliedReader; |
| |
| // future_TODO: this 'checkConversion' can be a little |
| // pricey for large |
| // files, chould be a user preference, or something. |
| // private static final boolean checkConversion = true; |
| private EncodingMemento fCurrentEncodingMemento; |
| |
| private EncodingMemento fEncodingMemento; |
| |
| private String fFilename; |
| |
| private boolean fHasBeenAnalyzed; |
| |
| private IFile fIFile; |
| |
| private EncodingMemento fPreviousEncodingMemento; |
| |
| private Reader fReader; |
| |
| private Reader fResettableReader; |
| private byte[] UTF16BEBOM = new byte[]{(byte) 0xFE, (byte) 0xFF}; |
| |
| private byte[] UTF16LEBOM = new byte[]{(byte) 0xFF, (byte) 0xFE}; |
| private byte[] UTF3BYTEBOM = new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; |
| |
| public CodedStreamCreator() { |
| super(); |
| } |
| |
| public CodedStreamCreator(String filename, char[] characterArray) { |
| super(); |
| fFilename = filename; |
| fReader = new CharArrayReader(characterArray); |
| } |
| |
| public CodedStreamCreator(String filename, Reader reader) { |
| super(); |
| fFilename = filename; |
| fReader = reader; |
| } |
| |
| public CodedStreamCreator(String filename, String textString) { |
| super(); |
| fFilename = filename; |
| fReader = new StringReader(textString); |
| } |
| |
| /** |
| * The primary method which contains the highest level rules for how to |
| * decide appropriate decoding rules: 1. first check for unicode stream 2. |
| * then looked for encoding specified in content (according to the type of |
| * content that is it ... xml, html, jsp, etc. 3. then check for various |
| * settings: file settings first, if null check project settings, if null, |
| * check user preferences. 4. lastly (or, what is the last user |
| * preference) is to use "workbench defaults". |
| */ |
| private void analyze() throws CoreException, IOException { |
| Reader resettableReader = getResettableReader(); |
| try { |
| if (fCurrentEncodingMemento == null) { |
| resettableReader.reset(); |
| fCurrentEncodingMemento = checkForEncodingInContents(); |
| } |
| // if encoding stratagy doesn't provide answer, |
| // then try file settings, project settings, |
| // user preferences, and |
| // finally workbench default. |
| // |
| if (fCurrentEncodingMemento == null || fCurrentEncodingMemento.getDetectedCharsetName() == null) { |
| resettableReader.reset(); |
| fCurrentEncodingMemento = getEncodingMementoFromResourceAndPreference(); |
| } |
| |
| // use DefaultNameRules from NonContentBasedEncodingRules as the |
| // final default |
| if (fEncodingMemento == null) { |
| handleNotProvidedFromContentCase(); |
| } |
| |
| fHasBeenAnalyzed = true; |
| } finally { |
| if (resettableReader != null) { |
| resettableReader.reset(); |
| } |
| } |
| } |
| |
| /** |
| * Need to check conversion early on. There's some danger than old |
| * contents of a file are set to empty, if an exception occurs. |
| * |
| * @param allText |
| * @param encoding |
| * @param encodingRule |
| * @throws java.io.UnsupportedEncodingException |
| * @throws MalformedOutputExceptionWithDetail |
| * @deprecated - we need to find "cheaper" way to to this functionality so |
| * likely to go away in future |
| */ |
| private void checkConversion(EncodingMemento memento, EncodingRule encodingRule) throws IOException { |
| String javaEncoding = memento.getJavaCharsetName(); |
| String detectedEncoding = memento.getDetectedCharsetName(); |
| Charset charset = Charset.forName(javaEncoding); |
| CharsetEncoder charsetEncoder = charset.newEncoder(); |
| charsetEncoder.onMalformedInput(CodingErrorAction.REPORT); |
| charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT); |
| Reader reader = getResettableReader(); |
| reader.reset(); |
| int currentChar = reader.read(); |
| int currentPos = 1; |
| try { |
| while (currentChar != -1) { |
| // note: this can probably be made more |
| // efficient later to |
| // check buffer by buffer, instead of |
| // character by character. |
| try { |
| boolean canConvert = charsetEncoder.canEncode((char) currentChar); |
| if (!canConvert) { |
| if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) { |
| // if we're told to ignore the |
| // encoding conversion |
| // error, |
| // notice we still want to detect |
| // and log it. We simply |
| // don't throw the exception, and |
| // we do continue with |
| // the |
| // save. |
| Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); //$NON-NLS-1$ |
| } else { |
| throw new MalformedOutputExceptionWithDetail(javaEncoding, detectedEncoding, currentPos); |
| } |
| } |
| currentChar = reader.read(); |
| currentPos++; |
| } |
| // IBM's JRE seems to throw NPE when DBCS char is given to |
| // SBCS charsetEncoder |
| catch (NullPointerException e) { |
| throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$ |
| } |
| } |
| // if we get all the way through loop without throwing exception, |
| // then there must |
| // be an error not detectable when going character by character. |
| throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$ |
| } finally { |
| reader.reset(); |
| } |
| } |
| |
| private EncodingMemento checkForEncodingInContents() throws CoreException, IOException { |
| EncodingMemento result = null; |
| |
| // if encoding memento already set, and no need to get again. |
| if (fEncodingMemento != null) { |
| result = fEncodingMemento; |
| } else { |
| if (fClientSuppliedReader) { |
| fReader.reset(); |
| IContentTypeManager contentTypeManager = Platform.getContentTypeManager(); |
| try { |
| IContentDescription contentDescription = contentTypeManager.getDescriptionFor(fReader, fFilename, IContentDescription.ALL); |
| if (contentDescription != null) { |
| fEncodingMemento = createMemento(contentDescription); |
| } else { |
| fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$ |
| } |
| } catch (NullPointerException e) { |
| // TODO: work around for 5/14 bug in base, should be |
| // removed when move up to 5/21 |
| // just created a simple default one |
| fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$ |
| } |
| result = fEncodingMemento; |
| } else { |
| throw new IllegalStateException("unexpected state: encodingMemento was null but no input stream supplied"); //$NON-NLS-1$ |
| } |
| } |
| // try { |
| // result = getEncodingDetector().getEncodingMemento(); |
| // if (result != null && !result.isValid() && !forceDefault()) { |
| // throw new UnsupportedCharsetExceptionWithDetail(result); |
| // } |
| // } |
| // finally { |
| // handleStreamClose(fEncodingDetectorStream); |
| // } |
| return result; |
| } |
| |
| |
| private void dump(OutputStream outputStream, EncodingRule encodingRule, boolean use3ByteBOMifUTF8) throws CoreException, IOException { |
| getCurrentEncodingMemento(); |
| String javaEncodingName = null; |
| if (encodingRule == EncodingRule.CONTENT_BASED) { |
| if (fCurrentEncodingMemento.isValid()) { |
| javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName(); |
| } else { |
| throw new UnsupportedCharsetExceptionWithDetail(fCurrentEncodingMemento); |
| } |
| } else if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) |
| javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName(); |
| else if (encodingRule == EncodingRule.FORCE_DEFAULT) |
| javaEncodingName = fCurrentEncodingMemento.getAppropriateDefault(); |
| // write appropriate "header" unicode BOM bytes |
| // Note: Java seems to write appropriate header for |
| // UTF-16, but not |
| // UTF-8 nor UTF-16BE. This |
| // may vary by JRE version, so need to test well. |
| // Note: javaEncodingName can be null in invalid |
| // cases, so we no hard |
| // to skip whole check if that's the case. |
| if (javaEncodingName != null) { |
| if ((javaEncodingName.equals(UTF_8_CHARSET_NAME) && use3ByteBOMifUTF8) || (javaEncodingName.equals(UTF_8_CHARSET_NAME) && fCurrentEncodingMemento.isUTF83ByteBOMUsed())) { |
| outputStream.write(UTF3BYTEBOM); |
| } else if (javaEncodingName.equals(UTF_16LE_CHARSET_NAME)) { |
| outputStream.write(UTF16LEBOM); |
| } else if (javaEncodingName.equals(UTF_16BE_CHARSET_NAME)) { |
| outputStream.write(UTF16BEBOM); |
| } |
| } |
| // TODO add back in line delimiter handling the |
| // "right" way (updating |
| // markers, not requiring string, etc. .. may need |
| // to move to document |
| // level) |
| //allTextBuffer = |
| // handleLineDelimiter(allTextBuffer, document); |
| Reader reader = getResettableReader(); |
| // be sure to test large "readers" ... we'll need |
| // to make sure they all |
| // can reset to initial position (StringReader, |
| // CharArrayReader, and |
| // DocumentReader should all work ok). |
| reader.reset(); |
| // There must be cleaner logic somehow, but the |
| // idea is that |
| // javaEncodingName can be null |
| // if original detected encoding is not valid (and |
| // if FORCE_DEFAULT was |
| // not specified). Hence, we WANT the first |
| // Charset.forName to |
| // throw appropriate exception. |
| Charset charset = null; |
| |
| // this call checks "override" properties file |
| javaEncodingName = CodedIO.getAppropriateJavaCharset(javaEncodingName); |
| |
| if (javaEncodingName == null) { |
| charset = Charset.forName(fCurrentEncodingMemento.getDetectedCharsetName()); |
| } else { |
| charset = Charset.forName(javaEncodingName); |
| } |
| CharsetEncoder charsetEncoder = charset.newEncoder(); |
| if (!(encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)) { |
| charsetEncoder.onMalformedInput(CodingErrorAction.REPORT); |
| charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT); |
| } else { |
| charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); |
| charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE); |
| |
| } |
| OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, charsetEncoder); |
| //TODO: this may no longer be needed (and is at |
| // least wrong spot for |
| // it). |
| // if (checkConversion && (!(encodingRule == |
| // EncodingRule.IGNORE_CONVERSION_ERROR))) { |
| // checkConversion(fCurrentEncodingMemento, |
| // encodingRule); |
| // } |
| char[] charbuf = new char[CodedIO.MAX_BUF_SIZE]; |
| int nRead = 0; |
| try { |
| while (nRead != -1) { |
| nRead = reader.read(charbuf, 0, MAX_BUF_SIZE); |
| if (nRead > 0) { |
| outputStreamWriter.flush(); |
| outputStreamWriter.write(charbuf, 0, nRead); |
| } |
| } |
| } catch (UnmappableCharacterException e) { |
| checkConversion(fCurrentEncodingMemento, encodingRule); |
| } finally { |
| // since we don't own the original output stream, we |
| // won't close it ours. |
| // the caller who passed it to us must close original one |
| // when appropriate. |
| // (but we do flush to be sure all up-to-date) |
| outputStreamWriter.flush(); |
| } |
| } |
| |
| private boolean get3ByteBOMPreference() { |
| return SSECorePlugin.getDefault().getPluginPreferences().getBoolean(CommonEncodingPreferenceNames.USE_3BYTE_BOM_WITH_UTF8); |
| } |
| |
| public ByteArrayOutputStream getCodedByteArrayOutputStream() throws CoreException, IOException { |
| return getCodedByteArrayOutputStream(EncodingRule.CONTENT_BASED); |
| } |
| |
| public ByteArrayOutputStream getCodedByteArrayOutputStream(EncodingRule encodingRule) throws CoreException, IOException { |
| //Assert.isNotNull(fPreviousEncodingMemento, |
| // "previousEncodingMemento |
| // needs to be set first"); |
| ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE); |
| dump(byteArrayOutputStream, encodingRule, get3ByteBOMPreference()); |
| return byteArrayOutputStream; |
| } |
| |
| public EncodingMemento getCurrentEncodingMemento() throws CoreException, IOException { |
| //Assert.isNotNull(fPreviousEncodingMemento, |
| // "previousEncodingMemento |
| // needs to be set first"); |
| if (!fHasBeenAnalyzed) { |
| analyze(); |
| } |
| // post condition |
| Assert.isNotNull(fCurrentEncodingMemento, "illegal post condition state"); //$NON-NLS-1$ |
| // be sure to carry over appropriate encoding |
| // "state" that may be |
| // relevent. |
| if (fPreviousEncodingMemento != null) { |
| fCurrentEncodingMemento.setUTF83ByteBOMUsed(fPreviousEncodingMemento.isUTF83ByteBOMUsed()); |
| } |
| return fCurrentEncodingMemento; |
| } |
| |
| /* |
| * This method is called only when encoding is not detected in the file. |
| * |
| * Here is encoding lookup order we will try: - try resource content |
| * description (Eclipse Text file encoding) - try resource content |
| * properties (for JSP only) - try content type encoding preferences (for |
| * HTML only) - try resource content description (Eclipse Text file |
| * encoding, implicit check) |
| * |
| * Note: This method appears in both CodedReaderCreator and |
| * CodedStreamCreator (with just a minor difference). They should be kept |
| * the same. |
| */ |
| private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException { |
| EncodingMemento encodingMemento = fEncodingMemento; |
| |
| // Follow Eclipse Platform's direction. Get the charset from IFile. |
| if (fIFile != null) { |
| String charset = fIFile.getCharset(); |
| encodingMemento = CodedIO.createEncodingMemento(charset); |
| } |
| |
| return encodingMemento; |
| } |
| |
| private Reader getResettableReader() { |
| if (fResettableReader == null) { |
| if (fReader.markSupported()) { |
| fResettableReader = fReader; |
| } else { |
| fResettableReader = new BufferedReader(fReader); |
| try { |
| fResettableReader.mark(MAX_MARK_SIZE); |
| } catch (IOException e) { |
| // impossible, since we just checked if |
| // markable |
| throw new Error(e); |
| } |
| |
| } |
| } |
| return fResettableReader; |
| } |
| |
| protected void handleNotProvidedFromContentCase() { |
| // move to "detectors" if not already |
| String specDefault = null; |
| //specDefault = getEncodingDetector().getSpecDefaultEncoding(); |
| String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault); |
| Assert.isNotNull(charset, PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_); |
| fCurrentEncodingMemento = CodedIO.createEncodingMemento(charset); |
| } |
| |
| // TODO We just copy the content properties encoding to current resource's |
| // encoding for now. May improve the UI later by setting an informational |
| // message and/or disable the content properties encoding field. |
| // TODO make priviate if needed, else remove |
| void migrateContentPropertiesEncoding(String encoding) throws CoreException { |
| if (fIFile != null) |
| fIFile.setCharset(encoding, null); |
| final IFile file = fIFile; |
| final String charset = encoding; |
| // TODO: externalize string later |
| Job migrater = new Job(SSECoreMessages.Migrate_Charset) { //$NON-NLS-1$ |
| protected IStatus run(IProgressMonitor monitor) { |
| if (file != null) { |
| try { |
| file.setCharset(charset, null); |
| } catch (CoreException e) { |
| Logger.logException(e); |
| } |
| } |
| return Status.OK_STATUS; |
| } |
| }; |
| migrater.setSystem(true); |
| migrater.schedule(); |
| |
| } |
| |
| /** |
| * |
| */ |
| private void resetAll() { |
| fFilename = null; |
| fReader = null; |
| fPreviousEncodingMemento = null; |
| fCurrentEncodingMemento = null; |
| fHasBeenAnalyzed = false; |
| fClientSuppliedReader = false; |
| } |
| |
| public void set(IFile file, Reader reader) { |
| fIFile = file; |
| set(file.getName(), reader); |
| } |
| |
| public void set(String filename, char[] characterArray) { |
| resetAll(); |
| fFilename = filename; |
| fReader = new CharArrayReader(characterArray); |
| } |
| |
| public void set(String filename, Reader reader) { |
| resetAll(); |
| fFilename = filename; |
| fReader = reader; |
| fClientSuppliedReader = true; |
| } |
| |
| public void set(String filename, String textString) { |
| set(filename, new StringReader(textString)); |
| } |
| |
| public void setPreviousEncodingMemento(EncodingMemento previousEncodingMemento) { |
| fPreviousEncodingMemento = previousEncodingMemento; |
| } |
| } |