Bug 545769 - [console] UTF-8 content read or send to process can be
corrupted

With some bad luck the ProcessConsole may disrupt multibyte UTF-8
characters read from input source (usually user input or file) due to
incorrect stream reading / buffer handling.
The same problem exist for reading the processes output streams.

Change-Id: I8d52d1973f3739e2c510a8a4c48b44f345c33dfe
Signed-off-by: Paul Pazderski <paul-eclipse@ppazderski.de>
diff --git a/org.eclipse.debug.core/core/org/eclipse/debug/internal/core/OutputStreamMonitor.java b/org.eclipse.debug.core/core/org/eclipse/debug/internal/core/OutputStreamMonitor.java
index af02abd..ac57dae 100644
--- a/org.eclipse.debug.core/core/org/eclipse/debug/internal/core/OutputStreamMonitor.java
+++ b/org.eclipse.debug.core/core/org/eclipse/debug/internal/core/OutputStreamMonitor.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2018 IBM Corporation and others.
+ * Copyright (c) 2000, 2019 IBM Corporation and others.
  *
  * This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License 2.0
@@ -10,13 +10,14 @@
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
+ *     Paul Pazderski  - Bug 545769: fixed rare UTF-8 character corruption bug
  *******************************************************************************/
 package org.eclipse.debug.internal.core;
 
-
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.eclipse.core.runtime.ISafeRunnable;
@@ -71,7 +72,7 @@
 	 */
 	private boolean fKilled= false;
 
-    private long lastSleep;
+	private long lastSleep;
 
 	private String fEncoding;
 
@@ -85,8 +86,8 @@
 	 * @param encoding stream encoding or <code>null</code> for system default
 	 */
 	public OutputStreamMonitor(InputStream stream, String encoding) {
-        fStream = new BufferedInputStream(stream, 8192);
-        fEncoding = encoding;
+		fStream = new BufferedInputStream(stream, 8192);
+		fEncoding = encoding;
 		fContents= new StringBuilder();
 		fDone = new AtomicBoolean(false);
 	}
@@ -134,6 +135,7 @@
 			fDone.set(true);
 		}
 	}
+
 	/**
 	 * Continually reads from the stream.
 	 * <p>
@@ -143,55 +145,50 @@
 	 * exposing a <code>run</code> method.
 	 */
 	private void internalRead() {
-        lastSleep = System.currentTimeMillis();
-        long currentTime = lastSleep;
-		byte[] bytes= new byte[BUFFER_SIZE];
+		lastSleep = System.currentTimeMillis();
+		long currentTime = lastSleep;
+		char[] chars = new char[BUFFER_SIZE];
 		int read = 0;
-		while (read >= 0) {
-			try {
-				if (fKilled) {
-					break;
-				}
-				read= fStream.read(bytes);
-				if (read > 0) {
-					String text;
-					if (fEncoding != null) {
-						text = new String(bytes, 0, read, fEncoding);
-					} else {
-						text = new String(bytes, 0, read);
+		try (InputStreamReader reader = (fEncoding == null ? new InputStreamReader(fStream) : new InputStreamReader(fStream, fEncoding))) {
+			while (read >= 0) {
+				try {
+					if (fKilled) {
+						break;
 					}
-					synchronized (this) {
-						if (isBuffered()) {
-							fContents.append(text);
+					read = reader.read(chars);
+					if (read > 0) {
+						String text = new String(chars, 0, read);
+						synchronized (this) {
+							if (isBuffered()) {
+								fContents.append(text);
+							}
+							fireStreamAppended(text);
 						}
-						fireStreamAppended(text);
+					}
+				} catch (IOException ioe) {
+					if (!fKilled) {
+						DebugPlugin.log(ioe);
+					}
+					return;
+				} catch (NullPointerException e) {
+					// killing the stream monitor while reading can cause an NPE
+					// when reading from the stream
+					if (!fKilled && fThread != null) {
+						DebugPlugin.log(e);
+					}
+					return;
+				}
+
+				currentTime = System.currentTimeMillis();
+				if (currentTime - lastSleep > 1000) {
+					lastSleep = currentTime;
+					try {
+						// just give up CPU to maintain UI responsiveness.
+						Thread.sleep(1);
+					} catch (InterruptedException e) {
 					}
 				}
-			} catch (IOException ioe) {
-				if (!fKilled) {
-					DebugPlugin.log(ioe);
-				}
-				return;
-			} catch (NullPointerException e) {
-				// killing the stream monitor while reading can cause an NPE
-				// when reading from the stream
-				if (!fKilled && fThread != null) {
-					DebugPlugin.log(e);
-				}
-				return;
 			}
-
-            currentTime = System.currentTimeMillis();
-            if (currentTime - lastSleep > 1000) {
-                lastSleep = currentTime;
-                try {
-                    Thread.sleep(1); // just give up CPU to maintain UI responsiveness.
-                } catch (InterruptedException e) {
-                }
-            }
-		}
-		try {
-			fStream.close();
 		} catch (IOException e) {
 			DebugPlugin.log(e);
 		}
@@ -213,31 +210,22 @@
 		if (fThread == null) {
 			fDone.set(false);
 			fThread = new Thread((Runnable) () -> read(), DebugCoreMessages.OutputStreamMonitor_label);
-            fThread.setDaemon(true);
-            fThread.setPriority(Thread.MIN_PRIORITY);
+			fThread.setDaemon(true);
+			fThread.setPriority(Thread.MIN_PRIORITY);
 			fThread.start();
 		}
 	}
 
-	/**
-	 * @see org.eclipse.debug.core.model.IFlushableStreamMonitor#setBuffered(boolean)
-	 */
 	@Override
 	public synchronized void setBuffered(boolean buffer) {
 		fBuffered = buffer;
 	}
 
-	/**
-	 * @see org.eclipse.debug.core.model.IFlushableStreamMonitor#flushContents()
-	 */
 	@Override
 	public synchronized void flushContents() {
 		fContents.setLength(0);
 	}
 
-	/**
-	 * @see IFlushableStreamMonitor#isBuffered()
-	 */
 	@Override
 	public synchronized boolean isBuffered() {
 		return fBuffered;
@@ -260,17 +248,11 @@
 		private IStreamListener fListener;
 		private String fText;
 
-		/**
-		 * @see org.eclipse.core.runtime.ISafeRunnable#handleException(java.lang.Throwable)
-		 */
 		@Override
 		public void handleException(Throwable exception) {
 			DebugPlugin.log(exception);
 		}
 
-		/**
-		 * @see org.eclipse.core.runtime.ISafeRunnable#run()
-		 */
 		@Override
 		public void run() throws Exception {
 			fListener.streamAppended(fText, OutputStreamMonitor.this);
diff --git a/org.eclipse.debug.tests/src/org/eclipse/debug/tests/AutomatedSuite.java b/org.eclipse.debug.tests/src/org/eclipse/debug/tests/AutomatedSuite.java
index 36beec0..d1858f0 100644
--- a/org.eclipse.debug.tests/src/org/eclipse/debug/tests/AutomatedSuite.java
+++ b/org.eclipse.debug.tests/src/org/eclipse/debug/tests/AutomatedSuite.java
@@ -22,6 +22,7 @@
 import org.eclipse.debug.tests.console.IOConsoleTests;
 import org.eclipse.debug.tests.console.ProcessConsoleManagerTests;
 import org.eclipse.debug.tests.console.ProcessConsoleTests;
+import org.eclipse.debug.tests.console.StreamsProxyTests;
 import org.eclipse.debug.tests.launching.AcceleratorSubstitutionTests;
 import org.eclipse.debug.tests.launching.ArgumentParsingTests;
 import org.eclipse.debug.tests.launching.LaunchConfigurationTests;
@@ -115,8 +116,9 @@
 		addTest(new TestSuite(ConsoleManagerTests.class));
 		addTest(new TestSuite(ConsoleTests.class));
 		addTest(new TestSuite(IOConsoleTests.class));
-		addTest(new TestSuite(ProcessConsoleTests.class));
 		addTest(new TestSuite(ProcessConsoleManagerTests.class));
+		addTest(new TestSuite(ProcessConsoleTests.class));
+		addTest(new TestSuite(StreamsProxyTests.class));
 
 		// Launch Groups
 		addTest(new TestSuite(LaunchGroupTests.class));
diff --git a/org.eclipse.debug.tests/src/org/eclipse/debug/tests/console/ProcessConsoleTests.java b/org.eclipse.debug.tests/src/org/eclipse/debug/tests/console/ProcessConsoleTests.java
index 3ac3fa9..bdcc9e6 100644
--- a/org.eclipse.debug.tests/src/org/eclipse/debug/tests/console/ProcessConsoleTests.java
+++ b/org.eclipse.debug.tests/src/org/eclipse/debug/tests/console/ProcessConsoleTests.java
@@ -13,12 +13,18 @@
  *******************************************************************************/
 package org.eclipse.debug.tests.console;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.eclipse.core.runtime.ILogListener;
 import org.eclipse.core.runtime.IStatus;
 import org.eclipse.core.runtime.Platform;
 import org.eclipse.core.runtime.jobs.Job;
+import org.eclipse.debug.core.DebugPlugin;
+import org.eclipse.debug.core.ILaunchManager;
+import org.eclipse.debug.core.Launch;
 import org.eclipse.debug.core.model.IProcess;
 import org.eclipse.debug.tests.AbstractDebugTest;
 import org.eclipse.debug.tests.TestUtil;
@@ -68,6 +74,68 @@
 	}
 
 	/**
+	 * Test if two byte UTF-8 characters get disrupted on there way from process
+	 * console to the runtime process.
+	 * <p>
+	 * This test starts every two byte character on an even byte offset.
+	 * </p>
+	 */
+	public void testUTF8InputEven() throws Exception {
+		// 5000 characters result in 10000 bytes which should be more than most
+		// common buffer sizes.
+		processConsoleUTF8Input("", 5000);
+	}
+
+	/**
+	 * Test if two byte UTF-8 characters get disrupted on there way from process
+	 * console to the runtime process.
+	 * <p>
+	 * This test starts every two byte character on an odd byte offset.
+	 * </p>
+	 */
+	public void testUTF8InputOdd() throws Exception {
+		// 5000 characters result in 10000 bytes which should be more than most
+		// common buffer sizes.
+		processConsoleUTF8Input("+", 5000);
+	}
+
+	/**
+	 * Shared code for the UTF-8 input tests.
+	 * <p>
+	 * Send some two byte UTF-8 characters through process console user input
+	 * stream to mockup process and check if the input got corrupted on its way.
+	 * </p>
+	 *
+	 * @param prefix an arbitrary prefix inserted before the two byte UTF-8
+	 *            characters. Used to move the other characters to specific
+	 *            offsets e.g. a prefix of one byte will produce an input string
+	 *            where every two byte character starts at an odd offset.
+	 * @param numTwoByteCharacters number of two byte UTF-8 characters to send
+	 *            to process
+	 */
+	public void processConsoleUTF8Input(String prefix, int numTwoByteCharacters) throws Exception {
+		final String input = prefix + String.join("", Collections.nCopies(numTwoByteCharacters, "\u00F8"));
+		final MockProcess mockProcess = new MockProcess(input.getBytes(StandardCharsets.UTF_8).length, testTimeout);
+		try {
+			final IProcess process = DebugPlugin.newProcess(new Launch(null, ILaunchManager.RUN_MODE, null), mockProcess, "testUtf8Input");
+			@SuppressWarnings("restriction")
+			final org.eclipse.debug.internal.ui.views.console.ProcessConsole console = new org.eclipse.debug.internal.ui.views.console.ProcessConsole(process, new ConsoleColorProvider());
+			try {
+				console.initialize();
+				console.getInputStream().appendData(input);
+				mockProcess.waitFor(testTimeout, TimeUnit.MILLISECONDS);
+			} finally {
+				console.destroy();
+			}
+		} finally {
+			mockProcess.destroy();
+		}
+
+		final String receivedInput = new String(mockProcess.getReceivedInput(), StandardCharsets.UTF_8);
+		assertEquals(input, receivedInput);
+	}
+
+	/**
 	 * Test if InputReadJob can be canceled.
 	 * <p>
 	 * Actually tests cancellation for all jobs of
diff --git a/org.eclipse.debug.tests/src/org/eclipse/debug/tests/console/StreamsProxyTests.java b/org.eclipse.debug.tests/src/org/eclipse/debug/tests/console/StreamsProxyTests.java
new file mode 100644
index 0000000..b4ffdb4
--- /dev/null
+++ b/org.eclipse.debug.tests/src/org/eclipse/debug/tests/console/StreamsProxyTests.java
@@ -0,0 +1,77 @@
+/*******************************************************************************
+ * Copyright (c) 2019 Paul Pazderski and others.
+ *
+ * This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License 2.0
+ * which accompanies this distribution, and is available at
+ * https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ * Contributors:
+ *     Paul Pazderski - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.debug.tests.console;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+
+import org.eclipse.debug.internal.core.StreamsProxy;
+import org.eclipse.debug.tests.AbstractDebugTest;
+
+/**
+ * Tests the {@link StreamsProxy}.
+ */
+public class StreamsProxyTests extends AbstractDebugTest {
+
+	public StreamsProxyTests() {
+		super(StreamsProxyTests.class.getSimpleName());
+	}
+
+	public StreamsProxyTests(String name) {
+		super(name);
+	}
+
+	/**
+	 * Test console receiving UTF-8 output from process where two-byte UTF-8
+	 * characters start at even offsets.
+	 */
+	public void testReceiveUTF8Even() throws Exception {
+		// 4500 characters results in 9000 byte of output which should be more
+		// than most common buffer sizes.
+		receiveUTF8Test("", 4500);
+	}
+
+	/**
+	 * Test console receiving UTF-8 output from process where two-byte UTF-8
+	 * characters start at odd offsets.
+	 */
+	public void testReceiveUTF8Odd() throws Exception {
+		// 4500 characters results in 9000 byte of output which should be more
+		// than most common buffer sizes.
+		receiveUTF8Test("+", 4500);
+	}
+
+	/**
+	 * Shared code for the UTF-8 tests.
+	 * <p>
+	 * Receive some UTF-8 content from a (mockup) process output stream.
+	 * </p>
+	 *
+	 * @param prefix an arbitrary prefix inserted before the two byte UTF-8
+	 *            characters. Used to move the other characters to specific
+	 *            offsets e.g. a prefix of one byte will produce output where
+	 *            every two byte character starts at an odd offset.
+	 * @param numTwoByteCharacters number of two byte UTF-8 characters to output
+	 */
+	private void receiveUTF8Test(String prefix, int numTwoByteCharacters) throws Exception {
+		final String s = prefix + String.join("", Collections.nCopies(numTwoByteCharacters, "\u00F8"));
+		final ByteArrayInputStream stdout = new ByteArrayInputStream(s.getBytes(StandardCharsets.UTF_8));
+		final Process mockProcess = new MockProcess(stdout, null, 0);
+		final StreamsProxy streamProxy = new StreamsProxy(mockProcess, StandardCharsets.UTF_8.name());
+		streamProxy.close();
+		final String readContent = streamProxy.getOutputStreamMonitor().getContents();
+		assertEquals("Process output got corrupted.", s, readContent);
+	}
+}
diff --git a/org.eclipse.debug.ui/ui/org/eclipse/debug/internal/ui/views/console/ProcessConsole.java b/org.eclipse.debug.ui/ui/org/eclipse/debug/internal/ui/views/console/ProcessConsole.java
index 6659ef2..0483310 100644
--- a/org.eclipse.debug.ui/ui/org/eclipse/debug/internal/ui/views/console/ProcessConsole.java
+++ b/org.eclipse.debug.ui/ui/org/eclipse/debug/internal/ui/views/console/ProcessConsole.java
@@ -10,6 +10,7 @@
  *
  *  Contributors:
  *     IBM Corporation - initial API and implementation
+ *     Paul Pazderski  - Bug 545769: fixed rare UTF-8 character corruption bug
  *******************************************************************************/
 package org.eclipse.debug.internal.ui.views.console;
 
@@ -20,6 +21,8 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -740,23 +743,29 @@
 
 		@Override
 		protected IStatus run(IProgressMonitor monitor) {
-            String encoding = getEncoding();
+			if (fInput == null) {
+				return monitor.isCanceled() ? Status.CANCEL_STATUS : Status.OK_STATUS;
+			}
+			Charset encoding = getCharset();
+			readingStream = fInput;
+			InputStreamReader streamReader = (encoding == null ? new InputStreamReader(readingStream)
+					: new InputStreamReader(readingStream, encoding));
             try {
-                byte[] b = new byte[1024];
-                int read = 0;
-				while (read >= 0 && !monitor.isCanceled()) {
-					readingStream = fInput;
-					if (readingStream == null) {
+				char[] cbuf = new char[1024];
+				int charRead = 0;
+				while (charRead >= 0 && !monitor.isCanceled()) {
+					if (fInput == null) {
 						break;
 					}
-					read = readingStream.read(b);
-                    if (read > 0) {
-                        String s;
-                        if (encoding != null) {
-							s = new String(b, 0, read, encoding);
-						} else {
-							s = new String(b, 0, read);
-						}
+					if (fInput != readingStream) {
+						readingStream = fInput;
+						streamReader = (encoding == null ? new InputStreamReader(readingStream)
+								: new InputStreamReader(readingStream, encoding));
+					}
+
+					charRead = streamReader.read(cbuf);
+					if (charRead > 0) {
+						String s = new String(cbuf, 0, charRead);
                         streamsProxy.write(s);
                     }
                 }