tests/org.eclipse.swt.tests.gtk/JUnit Tests/org/eclipse/swt/tests/gtk/Test_GtkConverter.java - platform/eclipse.platform.swt - Git at Google

 /*******************************************************************************
  * Copyright (c) 2018 Red Hat and others. All rights reserved.
  * The contents of this file are made available under the terms
  * of the GNU Lesser General Public License (LGPL) Version 2.1 that
  * accompanies this distribution (lgpl-v21.txt).  The LGPL is also
  * available at http://www.gnu.org/licenses/lgpl.html.  If the version
  * of the LGPL at http://www.gnu.org is different to the version of
  * the LGPL accompanying this distribution and there is any conflict
  * between the two license versions, the terms of the LGPL accompanying
  * this distribution shall govern.
  *
  * Contributors:
  *     Red Hat - initial API and implementation
  */
 package org.eclipse.swt.tests.gtk;

 import static org.junit.Assert.fail;

 import java.nio.charset.StandardCharsets;

 import org.eclipse.swt.internal.Converter;
 import org.junit.Ignore;
 import org.junit.Test;

 /**
  *  Good source for UTF-8 code points for testing:
  *  https://en.wikipedia.org/wiki/List_of_Unicode_characters
  *
  *  We care about Ascii, UTF-8 (as it's used by glib/gtk much) and UTF-16LE (as it's used by java/intel/amd architecture).
  */
 public class Test_GtkConverter {

 	static final String emptyStr = "";

 	static final String asciiLetterA = "A";	// = 65  // Note, UTF-8 is backwards compatible with Ascii
 	static final String dollarSign = "$"; // =36

 	static final String asciiLetters = "ABCabc"; // 65(A), 66, 67   97(a), 98, 99

 	// Anything above 127 translates to 2 bytes in utf-8.  See: https://en.wikipedia.org/wiki/UTF-8#Description
 	static final String codePoint174 = "®";   // U+00AE	Registered sign.
 	static final String unicodeCharactersLowCodePoints = "®ÖöėŊ‐"; // bigger than 127, but not many bytes.
 	static final String unicodeCharactersHighCodePoints = "▇░▙▚▧▫♂☢⛔";  //2000+ code points.

 	@Test
 	public void test_HeuristicASCII_letterA() {
 		helper_testHeuristic(asciiLetterA.getBytes(StandardCharsets.US_ASCII), asciiLetterA); // A = 65
 	}
 	@Test
 	public void test_HeuristicASCII_dollarSign() {
 		helper_testHeuristic(dollarSign.getBytes(StandardCharsets.US_ASCII), dollarSign); // $ = 36
 	}

 	@Test
 	public void test_Heuristic_null() {
 		helper_testHeuristic(new byte[] {0}, emptyStr);  // simulate null terminator.
 	}

 	@Test
 	public void test_HeuristicASCII_emptyString() {
 		helper_testHeuristic(emptyStr.getBytes(StandardCharsets.US_ASCII), emptyStr); // "" -> [] (empty byte array)
 	}

 	@Test
 	public void test_HeuristicUTF8_null() {
 		helper_testHeuristic(emptyStr.getBytes(StandardCharsets.UTF_8), emptyStr);
 	}

 	@Test
 	public void test_HeuristicUTF16LE_null() {
 		helper_testHeuristic(emptyStr.getBytes(StandardCharsets.UTF_16LE), emptyStr);
 	}

 	@Test
 	public void test_HeuristicASCII_letters() {
 		helper_testHeuristic(asciiLetters.getBytes(StandardCharsets.US_ASCII), asciiLetters);
 	}

 	@Test
 	public void test_HeuristicUTF8_letterUnder127() {
 		helper_testHeuristic(asciiLetterA.getBytes(StandardCharsets.UTF_8), asciiLetterA);
 	}

 	@Test
 	public void test_HeuristicUTF8_letterOver127() {
 		helper_testHeuristic(codePoint174.getBytes(StandardCharsets.UTF_8), codePoint174);
 	}


 	@Test
 	public void test_HeuristicUTF8_letterSpecial() {
 		helper_testHeuristic("Ё".getBytes(StandardCharsets.UTF_8), "Ё");
 	}

 	@Test
 	public void test_HeuristicUTF8_LowCodePoints() {
 		helper_testHeuristic(unicodeCharactersLowCodePoints.getBytes(StandardCharsets.UTF_8), unicodeCharactersLowCodePoints);
 	}

 	@Test
 	public void test_HeuristicUTF8_HighCodePoints() {
 		byte [] testBytes = unicodeCharactersHighCodePoints.getBytes(StandardCharsets.UTF_8);
 		helper_testHeuristic(testBytes, unicodeCharactersHighCodePoints);
 	}


 	@Test
 	public void test_HeuristicUTF16_Asciiletter() {
 		helper_testHeuristic(asciiLetterA.getBytes(StandardCharsets.UTF_16LE), asciiLetterA);
 	}

 	@Test
 	public void test_HeuristicUTF16_AsciiLetters() {
 		helper_testHeuristic(asciiLetters.getBytes(StandardCharsets.UTF_16LE), asciiLetters);
 	}

 	@Test
 	public void test_HeuristicUTF16_letter() {
 		String testValue = "®"; // 174
 		byte [] testBytes = testValue.getBytes(StandardCharsets.UTF_16LE);
 		helper_testHeuristic(testBytes, testValue);
 	}

 	@Test
 	public void test_HeuristicUTF16_letters() {
 		helper_testHeuristic(unicodeCharactersLowCodePoints.getBytes(StandardCharsets.UTF_16LE), unicodeCharactersLowCodePoints);
 	}

 	@Test
 	public void test_HeuristicUTF16_LotsOfLetters() {
 		byte [] testBytes = unicodeCharactersHighCodePoints.getBytes(StandardCharsets.UTF_16LE);
 		helper_testHeuristic(testBytes, unicodeCharactersHighCodePoints);
 	}

 	/**
 	 * There are a few unicode characters that are ambiguous if they are decoded on their own,
 	 * as they can translate to either two valid UTF-8 characters or a single valid UTF-16LE character.
 	 *
 	 * e.g 'Ё'. (but there are others).
 	 *
 	 * The heuristic is better is better if there are 2+ characters, e.g HЁLLO WORLD.
 	 *
 	 * This test is documented, but is currently known to fail.
 	 *
 	 */
 	@Ignore
 	@Test
 	public void test_Heuristic_specialSingleCases() {
 		byte [] testBytes = "Ё".getBytes(StandardCharsets.UTF_16LE);
 		helper_testHeuristic(testBytes, "Ё");
 	}

 	private void helper_testHeuristic(byte[] testBytes, String expected) {
 		String result = Converter.byteToStringViaHeuristic(testBytes);
 		if (!expected.equals(result)) {
 			fail();
 		}
 	}

 }
	/*******************************************************************************
	* Copyright (c) 2018 Red Hat and others. All rights reserved.
	* The contents of this file are made available under the terms
	* of the GNU Lesser General Public License (LGPL) Version 2.1 that
	* accompanies this distribution (lgpl-v21.txt). The LGPL is also
	* available at http://www.gnu.org/licenses/lgpl.html. If the version
	* of the LGPL at http://www.gnu.org is different to the version of
	* the LGPL accompanying this distribution and there is any conflict
	* between the two license versions, the terms of the LGPL accompanying
	* this distribution shall govern.
	*
	* Contributors:
	* Red Hat - initial API and implementation
	*/
	package org.eclipse.swt.tests.gtk;

	import static org.junit.Assert.fail;

	import java.nio.charset.StandardCharsets;

	import org.eclipse.swt.internal.Converter;
	import org.junit.Ignore;
	import org.junit.Test;

	/**
	* Good source for UTF-8 code points for testing:
	* https://en.wikipedia.org/wiki/List_of_Unicode_characters
	*
	* We care about Ascii, UTF-8 (as it's used by glib/gtk much) and UTF-16LE (as it's used by java/intel/amd architecture).
	*/
	public class Test_GtkConverter {

	static final String emptyStr = "";

	static final String asciiLetterA = "A"; // = 65 // Note, UTF-8 is backwards compatible with Ascii
	static final String dollarSign = "$"; // =36

	static final String asciiLetters = "ABCabc"; // 65(A), 66, 67 97(a), 98, 99

	// Anything above 127 translates to 2 bytes in utf-8. See: https://en.wikipedia.org/wiki/UTF-8#Description
	static final String codePoint174 = "®"; // U+00AE Registered sign.
	static final String unicodeCharactersLowCodePoints = "®ÖöėŊ‐"; // bigger than 127, but not many bytes.
	static final String unicodeCharactersHighCodePoints = "▇░▙▚▧▫♂☢⛔"; //2000+ code points.

	@Test
	public void test_HeuristicASCII_letterA() {
	helper_testHeuristic(asciiLetterA.getBytes(StandardCharsets.US_ASCII), asciiLetterA); // A = 65
	}
	@Test
	public void test_HeuristicASCII_dollarSign() {
	helper_testHeuristic(dollarSign.getBytes(StandardCharsets.US_ASCII), dollarSign); // $ = 36
	}

	@Test
	public void test_Heuristic_null() {
	helper_testHeuristic(new byte[] {0}, emptyStr); // simulate null terminator.
	}

	@Test
	public void test_HeuristicASCII_emptyString() {
	helper_testHeuristic(emptyStr.getBytes(StandardCharsets.US_ASCII), emptyStr); // "" -> [] (empty byte array)
	}

	@Test
	public void test_HeuristicUTF8_null() {
	helper_testHeuristic(emptyStr.getBytes(StandardCharsets.UTF_8), emptyStr);
	}

	@Test
	public void test_HeuristicUTF16LE_null() {
	helper_testHeuristic(emptyStr.getBytes(StandardCharsets.UTF_16LE), emptyStr);
	}

	@Test
	public void test_HeuristicASCII_letters() {
	helper_testHeuristic(asciiLetters.getBytes(StandardCharsets.US_ASCII), asciiLetters);
	}

	@Test
	public void test_HeuristicUTF8_letterUnder127() {
	helper_testHeuristic(asciiLetterA.getBytes(StandardCharsets.UTF_8), asciiLetterA);
	}

	@Test
	public void test_HeuristicUTF8_letterOver127() {
	helper_testHeuristic(codePoint174.getBytes(StandardCharsets.UTF_8), codePoint174);
	}


	@Test
	public void test_HeuristicUTF8_letterSpecial() {
	helper_testHeuristic("Ё".getBytes(StandardCharsets.UTF_8), "Ё");
	}

	@Test
	public void test_HeuristicUTF8_LowCodePoints() {
	helper_testHeuristic(unicodeCharactersLowCodePoints.getBytes(StandardCharsets.UTF_8), unicodeCharactersLowCodePoints);
	}

	@Test
	public void test_HeuristicUTF8_HighCodePoints() {
	byte [] testBytes = unicodeCharactersHighCodePoints.getBytes(StandardCharsets.UTF_8);
	helper_testHeuristic(testBytes, unicodeCharactersHighCodePoints);
	}


	@Test
	public void test_HeuristicUTF16_Asciiletter() {
	helper_testHeuristic(asciiLetterA.getBytes(StandardCharsets.UTF_16LE), asciiLetterA);
	}

	@Test
	public void test_HeuristicUTF16_AsciiLetters() {
	helper_testHeuristic(asciiLetters.getBytes(StandardCharsets.UTF_16LE), asciiLetters);
	}

	@Test
	public void test_HeuristicUTF16_letter() {
	String testValue = "®"; // 174
	byte [] testBytes = testValue.getBytes(StandardCharsets.UTF_16LE);
	helper_testHeuristic(testBytes, testValue);
	}

	@Test
	public void test_HeuristicUTF16_letters() {
	helper_testHeuristic(unicodeCharactersLowCodePoints.getBytes(StandardCharsets.UTF_16LE), unicodeCharactersLowCodePoints);
	}

	@Test
	public void test_HeuristicUTF16_LotsOfLetters() {
	byte [] testBytes = unicodeCharactersHighCodePoints.getBytes(StandardCharsets.UTF_16LE);
	helper_testHeuristic(testBytes, unicodeCharactersHighCodePoints);
	}

	/**
	* There are a few unicode characters that are ambiguous if they are decoded on their own,
	* as they can translate to either two valid UTF-8 characters or a single valid UTF-16LE character.
	*
	* e.g 'Ё'. (but there are others).
	*
	* The heuristic is better is better if there are 2+ characters, e.g HЁLLO WORLD.
	*
	* This test is documented, but is currently known to fail.
	*
	*/
	@Ignore
	@Test
	public void test_Heuristic_specialSingleCases() {
	byte [] testBytes = "Ё".getBytes(StandardCharsets.UTF_16LE);
	helper_testHeuristic(testBytes, "Ё");
	}

	private void helper_testHeuristic(byte[] testBytes, String expected) {
	String result = Converter.byteToStringViaHeuristic(testBytes);
	if (!expected.equals(result)) {
	fail();
	}
	}

	}