Bug 388504: [xpath2] XPath scanner does not detect non-ASCII names

https://bugs.eclipse.org/bugs/show_bug.cgi?id=388504
diff --git a/tests/org.eclipse.wst.xml.xpath2.processor.tests/META-INF/MANIFEST.MF b/tests/org.eclipse.wst.xml.xpath2.processor.tests/META-INF/MANIFEST.MF
index 649ef43..158f5cc 100644
--- a/tests/org.eclipse.wst.xml.xpath2.processor.tests/META-INF/MANIFEST.MF
+++ b/tests/org.eclipse.wst.xml.xpath2.processor.tests/META-INF/MANIFEST.MF
@@ -10,7 +10,8 @@
  org.eclipse.core.runtime;bundle-version="[3.4.0,4.0.0)",
  org.junit;bundle-version="3.8.2",
  org.w3c.xqts.testsuite;bundle-version="1.0.2",
- org.eclipse.core.resources;bundle-version="[3.4.0,4.0.0)"
+ org.eclipse.core.resources;bundle-version="[3.4.0,4.0.0)",
+  java_cup.runtime;bundle-version="0.10.0"
 Export-Package: org.custommonkey.xmlunit,
  org.custommonkey.xmlunit.examples,
  org.custommonkey.xmlunit.exceptions,
diff --git a/tests/org.eclipse.wst.xml.xpath2.processor.tests/bugTestFiles/bug388504.xml b/tests/org.eclipse.wst.xml.xpath2.processor.tests/bugTestFiles/bug388504.xml
new file mode 100644
index 0000000..74519e7
--- /dev/null
+++ b/tests/org.eclipse.wst.xml.xpath2.processor.tests/bugTestFiles/bug388504.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<Sample>
+	<Schlüssel>1</Schlüssel>
+	<걉걋>2</걉걋>
+</Sample>
diff --git a/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/internal/XPathFlexTest.java b/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/internal/XPathFlexTest.java
new file mode 100644
index 0000000..1b5c50e
--- /dev/null
+++ b/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/internal/XPathFlexTest.java
@@ -0,0 +1,63 @@
+package org.eclipse.wst.xml.xpath2.processor.internal;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import java_cup.runtime.Symbol;
+
+import junit.framework.TestCase;
+
+public class XPathFlexTest extends TestCase {
+	
+	void assertSymbolEquals(int expectedId, Object expectedValue, Symbol symbol) {
+		assertEquals("Expected symbol type for " + symbol.value, expectedId, symbol.sym);
+		assertEquals("Expected symbol value for symbol type", expectedValue, symbol.value);
+	}
+	
+	Symbol tokenizeFirst(String source) throws IOException {
+		XPathFlex lexer = new XPathFlex(new StringReader(source));
+		return lexer.next_token();
+	}
+
+	public void testDigits() throws IOException {
+		assertSymbolEquals(XpathSym.INTEGER, BigInteger.valueOf(1234), tokenizeFirst("1234"));
+		assertSymbolEquals(XpathSym.DECIMAL, BigDecimal.valueOf(1234.0), tokenizeFirst("1234.0"));
+	}
+
+	public void testNCNAME() throws IOException {
+		assertSymbolEquals(XpathSym.NCNAME, "beef", tokenizeFirst("beef"));
+		assertSymbolEquals(XpathSym.NCNAME, "_beef", tokenizeFirst("_beef"));
+	}
+
+	public void testNonAsciiIdentifiers() throws IOException {
+		assertSymbolEquals(XpathSym.NCNAME, "M\u00e8ller", tokenizeFirst("M\u00e8ller"));
+		assertSymbolEquals(XpathSym.NCNAME, "\uAC20", tokenizeFirst("\uAC20"));
+	}
+
+	public void testSimpleIdentifiersInSpace() throws IOException {
+		assertEquals("myElement", tokenizeFirst(" myElement ").value);
+	}
+	public void testUTF16_SurogatePair_valid() throws IOException {
+		// SPEAK-NO-EVIL MONKEY is a valid XML name
+		// Unicode: U+1F64A (U+D83D U+DE4A)
+		XPathFlex lexer = new XPathFlex(new StringReader(" monkey\uD83D\uDE4Ame "));
+		Symbol symbol = lexer.next_token();
+		
+		assertEquals("monkey\uD83D\uDE4Ame", symbol.value);
+	}
+
+	public void testUTF16_SurogatePair_invalid() throws IOException {
+		// Lets get the surrogate order wrong
+		try {
+			XPathFlex lexer = new XPathFlex(new StringReader("\uDE4A\uD83D"));
+			lexer.next_token();
+			fail("Should have gotten an exception");
+		}
+		catch (JFlexError e) {
+			// hooray!
+		}			
+	}
+
+}
diff --git a/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/test/TestBugs.java b/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/test/TestBugs.java
index 0e3d453..73caafe 100644
--- a/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/test/TestBugs.java
+++ b/tests/org.eclipse.wst.xml.xpath2.processor.tests/src/org/eclipse/wst/xml/xpath2/processor/test/TestBugs.java
@@ -59,7 +59,7 @@
  *  Jesper Steen Moller  - bug 340933 - Migrate tests to new XPath2 API
  *  Lukasz Wycisk   - bug 361060 - Aggregations with nil=ÕtrueÕ throw exceptions.
  *  Lukasz Wycisk   - bug 361059 - FnRoundHalfToEven is wrong in case of 2 arguments
- *  Lukasz Wycisk   - bug 361659 - ElemntType typed value in case of nil=ÕtrueÕ                              
+ *  Jesper Moller   - bug 388504 - XPath scanner does not detect non-ASCII names                              
  ******************************************************************************/
 
 package org.eclipse.wst.xml.xpath2.processor.test;
@@ -198,6 +198,20 @@
 		});
 	}
 	
+	public void testNamesInUnicode() throws Exception {
+		URL fileURL = bundle.getEntry("/bugTestFiles/bug388504.xml");
+		loadDOMDocument(fileURL);
+
+		setupDynamicContext(null);
+
+		compileXPath("count(//SchlŸssel)");
+		ResultSequence rs = evaluate(domDoc);
+
+		String actual = rs.first().getStringValue();
+
+		assertEquals("1", actual);
+	}
+	
 	public void testNamesWhichAreKeywords() throws Exception {
 		// Bug 273719
 		URL fileURL = bundle.getEntry("/bugTestFiles/bug311480.xml");