| /* |
| * Copyright (c) 2009 Borland Software Corporation |
| * |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * Artem Tikhomirov (Borland) - initial API and implementation |
| */ |
| package org.eclipse.gmf.internal.xpand.inactive; |
| |
| import java.io.BufferedInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| import java.nio.charset.Charset; |
| |
| import org.eclipse.gmf.internal.xpand.Activator; |
| |
| /** |
| * FIXME tests!!! (especially that C2AB and C2BB without BOM give UTF8) |
| * @author artem |
| */ |
| public class StreamDecoder { |
| |
| public static final Charset LEGACY_ENCODING = Charset.forName("ISO-8859-1"); //$NON-NLS-1$ |
| |
| private final InputStream myInputStream; |
| private final Charset myDefaultEncoding; |
| private Reader myResult; |
| private Charset myEncoding; |
| |
| /** |
| * @param is can't be null |
| * @param defaultEncoding may be null |
| */ |
| public StreamDecoder(InputStream is, Charset defaultEncoding) { |
| assert is != null; |
| myInputStream = ensureMarkSupported(is); |
| myDefaultEncoding = defaultEncoding; |
| } |
| |
| public Reader getReader() { |
| if (myResult == null) { |
| myResult = createReader(myInputStream, getEncoding()); |
| } |
| return myResult; |
| } |
| |
| /** |
| * @return defaultEncoding, if can't detect |
| */ |
| public Charset getEncoding() { |
| if (myEncoding == null) { |
| myEncoding = detectEncoding(myInputStream); |
| } |
| return myEncoding; |
| } |
| |
| // is passed supports marks |
| protected Charset detectEncoding(InputStream is) { |
| assert is.markSupported(); |
| final int markLimit = 1024; |
| is.mark(markLimit); // pure guess, most templates, even those with EPL comment header, got smth that far |
| try { |
| int b1 = is.read(); |
| int b2 = is.read(); |
| if (b1 == -1 || b2 == -1) { |
| return myDefaultEncoding; |
| } |
| if (b1 == 0xFE && b2 == 0xFF) { |
| return Charset.forName("UTF-16BE"); |
| } |
| if (b1 == 0xFF && b2 == 0xFE) { |
| return Charset.forName("UTF-16LE"); |
| } |
| int b3 = is.read(); |
| if (b3 == -1) { |
| return myDefaultEncoding; |
| } |
| if (b1 == 0xEF && b2 == 0xBB && b3 == 0xBF) { |
| return Charset.forName("UTF-8"); |
| } |
| is.reset(); // all over again |
| boolean foundC2, foundAB, foundBB, foundC2AB, foundC2BB; |
| foundC2 = foundAB = foundBB = foundC2AB = foundC2BB = false; |
| for (int i = markLimit; i > 0; i--) { |
| int b = is.read(); |
| if (b == -1) { |
| break; |
| } |
| if (!foundAB && !foundC2AB) { |
| foundAB = b == 0xAB; |
| foundC2AB = foundC2 && foundAB; |
| } |
| if (!foundBB && !foundC2BB) { |
| foundBB = b == 0xBB; |
| foundC2BB = foundC2 && foundBB; |
| } |
| foundC2 = b == 0xC2; // keeps knowledge whether current byte is C2 for the next iteration |
| } |
| if (foundC2AB && foundC2BB) { |
| return Charset.forName("UTF-8"); |
| } |
| if (foundAB && foundBB) { |
| return LEGACY_ENCODING; |
| } |
| } catch (IOException ex) { |
| // IGNORE |
| } finally { |
| try { |
| is.reset(); |
| } catch (IOException ex) { |
| // XXX actually, should avoid using Activator as it may trigger plugin initialization |
| // but as long as it can barely happen here... |
| Activator.logError(ex); |
| } |
| } |
| return myDefaultEncoding; |
| } |
| |
| protected Reader createReader(InputStream is, Charset encoding) { |
| return encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is); |
| } |
| |
| /** |
| * @return same or wrapped input stream that has {@link InputStream#markSupported()} == true |
| */ |
| public static InputStream ensureMarkSupported(InputStream is) { |
| return is.markSupported() ? is : new BufferedInputStream(is); |
| } |
| // public static Reader ensureMarkSupported(Reader r) { |
| // return r.markSupported() ? r : new BufferedReader(r); |
| // } |
| } |