blob: d7bfe968f152efabd7c240f90adba76fb00bea52 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2006, 2012 Oracle Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Oracle Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.bpel.ui.editors.xpath;
import org.eclipse.jface.text.rules.IWordDetector;
/**
* @author Michal Chmielewski (michal.chmielewski@oracle.com)
* @date Oct 25, 2006
*
*/
public class XPathWordDetector implements IWordDetector {
/**
* Detects NCNames
* @author Michal Chmielewski (michal.chmielewski@oracle.com)
* @date Oct 26, 2006
*
*/
static public class NCNameWordDetector extends XPathWordDetector {
/** (non-Javadoc)
* @see org.eclipse.bpel.ui.editors.xpath.XPathWordDetector#isWordPart(char)
*/
@Override
public boolean isWordPart (char c) {
return c != ':' && super.isWordPart(c);
}
}
/**
* Detects variables in XPath expressions.
*
* @author Michal Chmielewski (michal.chmielewski@oracle.com)
* @date Nov 17, 2006
*
*/
static public class VariableDetector extends NCNameWordDetector {
/**
* These always start with a $
* @see org.eclipse.bpel.ui.editors.xpath.XPathWordDetector#isWordStart(char)
*/
@Override
public boolean isWordStart (char c) {
return c == '$' ;
}
/**
* The rest of the variable is a NCName but does not contain a '.'
*/
@Override
public boolean isWordPart (char c) {
return c != '.' && super.isWordPart(c);
}
}
/**
* @author Michal Chmielewski (michal.chmielewski@oracle.com)
* @date Nov 27, 2006
*
*/
static public class MessagePartDetector extends NCNameWordDetector {
/**
* These always start with a $
* @see org.eclipse.bpel.ui.editors.xpath.XPathWordDetector#isWordStart(char)
*/
@Override
public boolean isWordStart (char c) {
return c == '.' ;
}
/**
* The rest of the variable is a NCName but does not contain a '.'
*/
@Override
public boolean isWordPart (char c) {
return c != '.' && c != '/' && super.isWordPart(c);
}
}
/**
* Detects variables in XPath expressions.
*
* @author Michal Chmielewski (michal.chmielewski@oracle.com)
* @date Nov 17, 2006
*
*/
static public class QNameDetector extends NCNameWordDetector {
int colCount = 0;
/**
* These always start with a $
* @see org.eclipse.bpel.ui.editors.xpath.XPathWordDetector#isWordStart(char)
*/
@Override
public boolean isWordStart (char c) {
colCount = 0;
return super.isWordStart(c);
}
/**
* The rest of the variable is a NCName but does not contain a '.'
*/
@Override
public boolean isWordPart (char c) {
if (c == ':') {
if (colCount == 0) {
colCount += 1;
return true;
}
return false;
}
return super.isWordPart(c);
}
}
/**
* @see org.eclipse.jface.text.rules.IWordDetector#isWordPart(char)
*/
public boolean isWordPart (char c) {
return isNameChar(c);
}
/** (non-Javadoc)
* @see org.eclipse.jface.text.rules.IWordDetector#isWordStart(char)
*/
public boolean isWordStart (char c) {
return (c == '_' || isLetter(c)) ;
}
/**
* Answer if we are a name character.
*
* @param c
* @return true if name character, false otherwise
*/
static public boolean isNameChar (char c) {
return isLetter(c) ||
isDigit(c) ||
c == '.' ||
c == '-' ||
c == '_' ||
c == ':' ||
isCombiningCharacter(c) ||
isExtender ( c );
}
/**
* Return of we are a letter according to the XML spec.
*
* @param c
* @return true if letter, false otherwise.
*/
static public boolean isLetter ( char c ) {
return isBaseChar ( c ) || isIdeographic ( c );
}
/**
* Check if we are base character according to the XML spec.
* @param c the character
* @return true if base, false otherwise.
*/
static public boolean isBaseChar ( char c ) {
switch ( getPlane ( c ) ) {
case 0x00 :
return range(c,0x0041,0x005A) ||
range(c,0x0061,0x007A) ||
range(c,0x00C0,0x00D6) ||
range(c,0x00D8,0x00F6) ||
range(c,0x00F8,0x00FF) ;
case 0x01 :
return range(c,0x0100,0x0131) ||
range(c,0x0134,0x013E) ||
range(c,0x0141,0x0148) ||
range(c,0x014A,0x017E) ||
range(c,0x0180,0x01C3) ||
range(c,0x01CD,0x01F0) ||
range(c,0x01F4,0x01F5) ||
range(c,0x01FA,0x01FF);
case 0x02 :
return range(c,0x0200, 0x0217) ||
range(c,0x0250,0x02A8) ||
range(c,0x02BB,0x02C1);
case 0x03 :
return c == 0x0386 ||
range(c,0x0388,0x038A) ||
c == 0x038C ||
range(c,0x038E,0x03A1) ||
range(c,0x03A3,0x03CE) ||
range(c,0x03D0,0x03D6) ||
c == 0x03DA ||
c == 0x03DC ||
c == 0x03DE ||
c == 0x03E0 ||
range(c,0x03E2,0x03F3);
case 0x04 :
return range(c,0x0401,0x040C) ||
range(c,0x040E,0x044F) ||
range(c,0x0451,0x045C) ||
range(c,0x045E,0x0481) ||
range(c,0x0490,0x04C4) ||
range(c,0x04C7,0x04C8) ||
range(c,0x04CB,0x04CC) ||
range(c,0x04D0,0x04EB) ||
range(c,0x04EE,0x04F5) ||
range(c,0x04F8,0x04F9);
case 0x05 :
return range(c,0x0531,0x0556) ||
c == 0x0559 ||
range(c,0x0561,0x0586) ||
range(c,0x05D0,0x05EA) ||
range(c,0x05F0,0x05F2);
case 0x06 :
return range(c,0x0621,0x063A) ||
range(c,0x0641,0x064A) ||
range(c,0x0671,0x06B7) ||
range(c,0x06BA,0x06BE) ||
range(c,0x06C0,0x06CE) ||
range(c,0x06D0,0x06D3) ||
c == 0x06D5 ||
range(c,0x06E5,0x06E6);
case 0x07 :
case 0x08 :
return false;
case 0x09 :
return range(c,0x0905,0x0939) ||
c == 0x093D ||
range(c,0x0958,0x0961) ||
range(c,0x0985,0x098C) ||
range(c,0x098F,0x0990) ||
range(c,0x0993,0x09A8) ||
range(c,0x09AA,0x09B0) ||
c == 0x09B2 ||
range(c,0x09B6,0x09B9) ||
range(c,0x09DC,0x09DD) ||
range(c,0x09DF,0x09E1) ||
range(c,0x09F0,0x09F1) ;
case 0x0A :
return range(c,0x0A05,0x0A0A) ||
range(c,0x0A0F,0x0A10) ||
range(c,0x0A13,0x0A28) ||
range(c,0x0A2A,0x0A30) ||
range(c,0x0A32,0x0A33) ||
range(c,0x0A35,0x0A36) ||
range(c,0x0A38,0x0A39) ||
range(c,0x0A59,0x0A5C) ||
c == 0x0A5E ||
range(c,0x0A72,0x0A74) ||
range(c,0x0A85,0x0A8B) ||
c == 0x0A8D ||
range(c,0x0A8F,0x0A91) ||
range(c,0x0A93,0x0AA8) ||
range(c,0x0AAA,0x0AB0) ||
range(c,0x0AB2,0x0AB3) ||
range(c,0x0AB5,0x0AB9) ||
c == 0x0ABD ||
c == 0x0AE0;
case 0x0B :
return range(c,0x0B05,0x0B0C) ||
range(c,0x0B0F,0x0B10) ||
range(c,0x0B13,0x0B28) ||
range(c,0x0B2A,0x0B30) ||
range(c,0x0B32,0x0B33) ||
range(c,0x0B36,0x0B39) ||
c == 0x0B3D ||
range(c,0x0B5C,0x0B5D) ||
range(c,0x0B5F,0x0B61) ||
range(c,0x0B85,0x0B8A) ||
range(c,0x0B8E,0x0B90) ||
range(c,0x0B92,0x0B95) ||
range(c,0x0B99,0x0B9A) ||
c == 0x0B9C ||
range(c,0x0B9E,0x0B9F) ||
range(c,0x0BA3,0x0BA4) ||
range(c,0x0BA8,0x0BAA) ||
range(c,0x0BAE,0x0BB5) ||
range(c,0x0BB7,0x0BB9);
case 0x0C :
return range(c,0x0C05,0x0C0C) ||
range(c,0x0C0E,0x0C10) ||
range(c,0x0C12,0x0C28) ||
range(c,0x0C2A,0x0C33) ||
range(c,0x0C35,0x0C39) ||
range(c,0x0C60,0x0C61) ||
range(c,0x0C85,0x0C8C) ||
range(c,0x0C8E,0x0C90) ||
range(c,0x0C92,0x0CA8) ||
range(c,0x0CAA,0x0CB3) ||
range(c,0x0CB5,0x0CB9) ||
c == 0x0CDE ||
range(c,0x0CE0,0x0CE1);
case 0x0D :
return range(c,0x0D05,0x0D0C) ||
range(c,0x0D0E,0x0D10) ||
range(c,0x0D12,0x0D28) ||
range(c,0x0D2A,0x0D39) ||
range(c,0x0D60,0x0D61) ;
case 0x0E :
return range(c,0x0E01,0x0E2E) ||
c == 0x0E30 ||
range(c,0x0E32,0x0E33) ||
range(c,0x0E40,0x0E45) ||
range(c,0x0E81,0x0E82) ||
c == 0x0E84 ||
range(c,0x0E87,0x0E88) ||
c == 0x0E8A ||
c == 0x0E8D ||
range(c,0x0E94,0x0E97) ||
range(c,0x0E99,0x0E9F) ||
range(c,0x0EA1,0x0EA3) ||
c == 0x0EA5 ||
c == 0x0EA7 ||
range(c,0x0EAA,0x0EAB) ||
range(c,0x0EAD,0x0EAE) ||
c == 0x0EB0 ||
range(c,0x0EB2,0x0EB3) ||
c == 0x0EBD ||
range(c,0x0EC0,0x0EC4);
case 0x0F :
return range(c,0x0F40,0x0F47) ||
range(c,0x0F49,0x0F69);
case 0x10 :
return range(c,0x0F40,0x0F47) ||
range(c,0x0F49,0x0F69);
case 0x11 :
return c == 0x1100 ||
range(c,0x1102,0x1103) ||
range(c,0x1105,0x1107) ||
c == 0x1109 ||
range(c,0x110B,0x110C) ||
range(c,0x110E,0x1112) ||
c == 0x113C ||
c == 0x113E ||
c == 0x1140 ||
c == 0x114C ||
c == 0x114E ||
c == 0x1150 ||
range(c,0x1154,0x1155) ||
c == 0x1159 ||
range(c,0x115F,0x1161) ||
c == 0x1163 ||
c == 0x1165 ||
c == 0x1167 ||
c == 0x1169 ||
range(c,0x116D,0x116E) ||
range(c,0x1172,0x1173) ||
c == 0x1175 ||
c == 0x119E ||
c == 0x11A8 ||
c == 0x11AB ||
range(c,0x11AE,0x11AF) ||
range(c,0x11B7,0x11B8) ||
c == 0x11BA ||
range(c,0x11BC,0x11C2) ||
c == 0x11EB ||
c == 0x11F0 ||
c == 0x11F9;
case 0x1E :
return range(c,0x1E00,0x1E9B) ||
range(c,0x1EA0,0x1EF9);
case 0x1F :
return range(c,0x1F00,0x1F15) ||
range(c,0x1F18,0x1F1D) ||
range(c,0x1F20,0x1F45) ||
range(c,0x1F48,0x1F4D) ||
range(c,0x1F50,0x1F57) ||
c == 0x1F59 ||
c == 0x1F5B ||
c == 0x1F5D ||
range(c,0x1F5F,0x1F7D) ||
range(c,0x1F80,0x1FB4) ||
range(c,0x1FB6,0x1FBC) ||
c == 0x1FBE ||
range(c,0x1FC2,0x1FC4) ||
range(c,0x1FC6,0x1FCC) ||
range(c,0x1FD0,0x1FD3) ||
range(c,0x1FD6,0x1FDB) ||
range(c,0x1FE0,0x1FEC) ||
range(c,0x1FF2,0x1FF4) ||
range(c,0x1FF6,0x1FFC);
case 0x21 :
return c == 0x2126 ||
range(c,0x212A,0x212B) ||
c == 0x212E ||
range(c,0x2180,0x2182);
case 0x30 :
return range(c,0x3041,0x3094) ||
range(c,0x30A1,0x30FA);
case 0x31 :
return range(c,0x3105,0x312C);
default :
return range(c,0xAC00,0xD7A3);
}
}
/**
* Answer if the character is a combining character.
*
* @param c the character
* @return true of combining, false otherwise.
*/
static public boolean isCombiningCharacter ( char c ) {
switch ( getPlane ( c ) ) {
case 0x03 :
return range(c,0x0300,0x0345) ||
range(c,0x0360,0x0361) ;
case 0x04 :
return range(c,0x0483,0x0486);
case 0x05 :
return range(c,0x0591,0x05A1) ||
range(c,0x05A3,0x05B9) ||
range(c,0x05BB,0x05BD) ||
c == 0x05BF ||
range(c,0x05C1,0x05C2) ||
c == 0x05C4 ;
case 0x06 :
return range(c,0x064B,0x0652) ||
c == 0x0670 ||
range(c,0x06D6,0x06DC) ||
range(c,0x06DD,0x06DF) ||
range(c,0x06E0,0x06E4) ||
range(c,0x06E7,0x06E8) ||
range(c,0x06EA,0x06ED) ;
case 0x09 :
return range(c,0x0901,0x0903) ||
c == 0x093C ||
range(c,0x093E,0x094C) ||
c == 0x094D ||
range(c,0x0951,0x0954) ||
range(c,0x0962,0x0963) ||
range(c,0x0981,0x0983) ||
c == 0x09BC ||
c == 0x09BE ||
c == 0x09BF ||
range(c,0x09C0,0x09C4) ||
range(c,0x09C7,0x09C8) ||
range(c,0x09CB,0x09CD) ||
c == 0x09D7 ||
range(c,0x09E2,0x09E3) ;
case 0x0A :
return c == 0x0A02 ||
c == 0x0A3C ||
c == 0x0A3E ||
c == 0x0A3F ||
range(c,0x0A40,0x0A42) ||
range(c,0x0A47,0x0A48) ||
range(c,0x0A4B,0x0A4D) ||
range(c,0x0A70,0x0A71) ||
range(c,0x0A81,0x0A83) ||
c == 0x0ABC ||
range(c,0x0ABE,0x0AC5) ||
range(c,0x0AC7,0x0AC9) ||
range(c,0x0ACB,0x0ACD) ;
case 0x0B :
return range(c,0x0B01,0x0B03) ||
c == 0x0B3C ||
range(c,0x0B3E,0x0B43) ||
range(c,0x0B47,0x0B48) ||
range(c,0x0B4B,0x0B4D) ||
range(c,0x0B56,0x0B57) ||
range(c,0x0B82,0x0B83) ||
range(c,0x0BBE,0x0BC2) ||
range(c,0x0BC6,0x0BC8) ||
range(c,0x0BCA,0x0BCD) ||
c == 0x0BD7 ;
case 0x0C :
return range(c,0x0C01,0x0C03) ||
range(c,0x0C3E,0x0C44) ||
range(c,0x0C46,0x0C48) ||
range(c,0x0C4A,0x0C4D) ||
range(c,0x0C55,0x0C56) ||
range(c,0x0C82,0x0C83) ||
range(c,0x0CBE,0x0CC4) ||
range(c,0x0CC6,0x0CC8) ||
range(c,0x0CCA,0x0CCD) ||
range(c,0x0CD5,0x0CD6) ;
case 0x0D :
return range(c,0x0D02,0x0D03) ||
range(c,0x0D3E,0x0D43) ||
range(c,0x0D46,0x0D48) ||
range(c,0x0D4A,0x0D4D) ||
c == 0x0D57 ;
case 0x0E :
return c == 0x0E31 ||
range(c,0x0E34,0x0E3A) ||
range(c,0x0E47,0x0E4E) ||
c == 0x0EB1 ||
range(c,0x0EB4,0x0EB9) ||
range(c,0x0EBB,0x0EBC) ||
range(c,0x0EC8,0x0ECD);
case 0x0F :
return range(c,0x0F18,0x0F19) ||
c == 0x0F35 ||
c == 0x0F37 ||
c == 0x0F39 ||
c == 0x0F3E ||
c == 0x0F3F ||
range(c,0x0F71,0x0F84) ||
range(c,0x0F86,0x0F8B) ||
range(c,0x0F90,0x0F95) ||
c == 0x0F97 ||
range(c,0x0F99,0x0FAD) ||
range(c,0x0FB1,0x0FB7) ||
c == 0x0FB9 ;
case 0x20 :
return range(c,0x20D0,0x20DC) ||
c == 0x20E1;
case 0x30 :
return range(c,0x302A,0x302F) ||
c == 0x3099 ||
c == 0x309A ;
default :
return false;
}
}
/**
* Answer is the character is an Ideographic, per XML spec.
* <pre>
* [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
* </pre>
* @param c the character to test
* @return return true if yes, false if no.
*/
static public boolean isIdeographic (char c) {
return range(c,0x4e00, 0x9fa5) ||
c == 0x3007 ||
range(c,0x3021, 0x3029 ) ;
}
/**
* Answer if we are a digit, based on the XML specification.
* <pre>
* [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
* [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
* [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
* [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
* </pre>
* @param c
* @return true if digit, false otherwise.
*/
static public boolean isDigit ( char c ) {
switch ( getPlane ( c ) ) {
case 0x00 :
return range(c,0x0030,0x0039);
case 0x06 :
return range(c,0x0660,0x0669) ||
range(c,0x06F0,0x06F9);
case 0x09 :
return range(c,0x0966,0x096F) ||
range(c,0x09E6,0x09EF);
case 0x0A:
return range(c,0x0A66,0x0A6F) ||
range(c,0x0AE6,0x0AEF);
case 0x0B:
return range(c,0x0B66,0x0B6F) ||
range(c,0x0BE7,0x0BEF);
case 0x0C:
return range(c,0x0C66,0x0C6F) ||
range(c,0x0CE6,0x0CEF);
case 0x0D:
return range(c,0x0D66,0x0D6F);
case 0x0E:
return range(c,0x0E50,0x0E59) ||
range(c,0x0ED0,0x0ED9);
case 0x0F:
return range(c,0x0F20,0x0F29);
default :
return false;
}
}
/**
* Answer if we are an extender character.
* <pre>
* #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
* [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
* </pre>
* @param c
* @return true if extender false otherwise.
*/
static public boolean isExtender (char c) {
boolean res =
( c == 0x00B7 ||
c == 0x02D0 ||
c == 0x02D1 ||
c == 0x0387 ||
c == 0x0640 ||
c == 0x0E46 ||
c == 0x0EC6 ||
c == 0x3005 );
if (res) {
return res;
}
return range(c,0x3031,0x3035) ||
range(c,0x309D,0x309E) ||
range(c,0x30FC,0x30FE);
}
/**
* Return the plane of the character in the unicode system.
*
* @param c
* @return the plane (high 2 bytes)
*/
static public int getPlane (int c) {
return (c >>> 16);
}
/**
* Ask if the character is in the range specified.
*
* @param c the character
* @param l low value (inclusive)
* @param h high value (inclusive)
* @return true if in range, false otherwise.
*/
static public boolean range ( char c, int l, int h) {
return ( c >= l && c <= h );
}
}