blob: 08bb8a3984cc5981bbee02886b655ac82d2c1e11 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2005, 2007 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
*******************************************************************************/
package org.eclipse.dltk.compiler.util;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.eclipse.dltk.compiler.InvalidInputException;
public class ScannerHelper {
// extended unicode support
public static final int LOW_SURROGATE_MIN_VALUE = 0xDC00;
public static final int HIGH_SURROGATE_MIN_VALUE = 0xD800;
public static final int HIGH_SURROGATE_MAX_VALUE = 0xDBFF;
public static final int LOW_SURROGATE_MAX_VALUE = 0xDFFF;
// storage for internal flags (32 bits) BIT USAGE
public final static int Bit1 = 0x1; // return type (operator) | name reference kind (name ref) | add assertion (type decl) | useful empty statement (empty statement)
public final static int Bit2 = 0x2; // return type (operator) | name reference kind (name ref) | has local type (type, method, field decl)
public final static int Bit3 = 0x4; // return type (operator) | name reference kind (name ref) | implicit this (this ref)
public final static int Bit4 = 0x8; // return type (operator) | first assignment to local (local decl) | undocumented empty block (block, type and method decl)
public final static int Bit5 = 0x10; // value for return (expression) | has all method bodies (unit) | supertype ref (type ref)
public final static int Bit6 = 0x20; // depth (name ref, msg) | ignore need cast check (cast expression)
public final static int Bit7 = 0x40; // depth (name ref, msg) | operator (operator) | need runtime checkcast (cast expression) | label used (labelStatement)
public final static int Bit8 = 0x80; // depth (name ref, msg) | operator (operator) | unsafe cast (cast expression)
public final static int Bit9 = 0x100; // depth (name ref, msg) | operator (operator) | is local type (type decl)
public final static int Bit10= 0x200; // depth (name ref, msg) | operator (operator) | is anonymous type (type decl)
public final static int Bit11 = 0x400; // depth (name ref, msg) | operator (operator) | is member type (type decl)
public final static int Bit12 = 0x800; // depth (name ref, msg) | operator (operator) | has abstract methods (type decl)
public final static int Bit13 = 0x1000; // depth (name ref, msg) | is secondary type (type decl)
public final static int Bit14 = 0x2000; // strictly assigned (reference lhs)
public final static int Bit15 = 0x4000; // is unnecessary cast (expression) | is varargs (type ref) | isSubRoutineEscaping (try statement)
public final static int Bit16 = 0x8000; // in javadoc comment (name ref, type ref, msg)
public final static int Bit17 = 0x10000; // compound assigned (reference lhs)
public final static int Bit18 = 0x20000; // non null (expression)
public final static int Bit19 = 0x40000;
public final static int Bit20 = 0x80000;
public final static int Bit21 = 0x100000;
public final static int Bit22 = 0x200000; // parenthesis count (expression)
public final static int Bit23 = 0x400000; // parenthesis count (expression)
public final static int Bit24 = 0x800000; // parenthesis count (expression)
public final static int Bit25 = 0x1000000; // parenthesis count (expression)
public final static int Bit26 = 0x2000000; // parenthesis count (expression)
public final static int Bit27 = 0x4000000; // parenthesis count (expression)
public final static int Bit28 = 0x8000000; // parenthesis count (expression)
public final static int Bit29 = 0x10000000; // parenthesis count (expression)
public final static int Bit30 = 0x20000000; // elseif (if statement) | try block exit (try statement) | fall-through (case statement)
public final static int Bit31 = 0x40000000; // local declaration reachable (local decl) | ignore raw type check (type ref) | discard entire assignment (assignment)
public final static int Bit32 = 0x80000000; // reachable (statement)
public final static long Bit32L = 0x80000000L;
public final static long Bit33L = 0x100000000L;
public final static long Bit34L = 0x200000000L;
public final static long Bit35L = 0x400000000L;
public final static long Bit36L = 0x800000000L;
public final static long Bit37L = 0x1000000000L;
public final static long Bit38L = 0x2000000000L;
public final static long Bit39L = 0x4000000000L;
public final static long Bit40L = 0x8000000000L;
public final static long Bit41L = 0x10000000000L;
public final static long Bit42L = 0x20000000000L;
public final static long Bit43L = 0x40000000000L;
public final static long Bit44L = 0x80000000000L;
public final static long Bit45L = 0x100000000000L;
public final static long Bit46L = 0x200000000000L;
public final static long Bit47L = 0x400000000000L;
public final static long Bit48L = 0x800000000000L;
public final static long Bit49L = 0x1000000000000L;
public final static long Bit50L = 0x2000000000000L;
public final static long Bit51L = 0x4000000000000L;
public final static long Bit52L = 0x8000000000000L;
public final static long Bit53L = 0x10000000000000L;
public final static long Bit54L = 0x20000000000000L;
public final static long Bit55L = 0x40000000000000L;
public final static long Bit56L = 0x80000000000000L;
public final static long Bit57L = 0x100000000000000L;
public final static long Bit58L = 0x200000000000000L;
public final static long Bit59L = 0x400000000000000L;
public final static long Bit60L = 0x800000000000000L;
public final static long Bit61L = 0x1000000000000000L;
public final static long Bit62L = 0x2000000000000000L;
public final static long Bit63L = 0x4000000000000000L;
public final static long Bit64L = 0x8000000000000000L;
public final static long[] Bits = {
Bit1, Bit2, Bit3, Bit4, Bit5, Bit6,
Bit7, Bit8, Bit9, Bit10, Bit11, Bit12,
Bit13, Bit14, Bit15, Bit16, Bit17, Bit18,
Bit19, Bit20, Bit21, Bit22, Bit23, Bit24,
Bit25, Bit26, Bit27, Bit28, Bit29, Bit30,
Bit31, Bit32, Bit33L, Bit34L, Bit35L, Bit36L,
Bit37L, Bit38L, Bit39L, Bit40L, Bit41L, Bit42L,
Bit43L, Bit44L, Bit45L, Bit46L, Bit47L, Bit48L,
Bit49L, Bit50L, Bit51L, Bit52L, Bit53L, Bit54L,
Bit55L, Bit56L, Bit57L, Bit58L, Bit59L, Bit60L,
Bit61L, Bit62L, Bit63L, Bit64L,
};
private static final int START_INDEX = 0;
private static final int PART_INDEX = 1;
private static long[][][] Tables;
public final static int MAX_OBVIOUS = 128;
public final static int[] OBVIOUS_IDENT_CHAR_NATURES = new int[MAX_OBVIOUS];
public final static int C_JLS_SPACE = 0x100;
public final static int C_SPECIAL = 0x80;
public final static int C_IDENT_START = 0x40;
public final static int C_UPPER_LETTER = 0x20;
public final static int C_LOWER_LETTER = 0x10;
public final static int C_IDENT_PART = 0x8;
public final static int C_DIGIT = 0x4;
public final static int C_SEPARATOR = 0x2;
public final static int C_SPACE = 0x1;
static {
OBVIOUS_IDENT_CHAR_NATURES[0] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[1] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[2] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[3] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[4] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[5] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[6] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[7] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[8] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[14] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[15] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[16] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[17] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[18] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[19] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[20] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[21] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[22] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[23] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[24] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[25] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[26] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[27] = C_IDENT_PART;
OBVIOUS_IDENT_CHAR_NATURES[127] = C_IDENT_PART;
for (int i = '0'; i <= '9'; i++)
OBVIOUS_IDENT_CHAR_NATURES[i] = C_DIGIT | C_IDENT_PART;
for (int i = 'a'; i <= 'z'; i++)
OBVIOUS_IDENT_CHAR_NATURES[i] = C_LOWER_LETTER | C_IDENT_PART
| C_IDENT_START;
for (int i = 'A'; i <= 'Z'; i++)
OBVIOUS_IDENT_CHAR_NATURES[i] = C_UPPER_LETTER | C_IDENT_PART
| C_IDENT_START;
OBVIOUS_IDENT_CHAR_NATURES['_'] = C_SPECIAL | C_IDENT_PART
| C_IDENT_START;
OBVIOUS_IDENT_CHAR_NATURES['$'] = C_SPECIAL | C_IDENT_PART
| C_IDENT_START;
OBVIOUS_IDENT_CHAR_NATURES[9] = C_SPACE | C_JLS_SPACE; // \ u0009:
// HORIZONTAL
// TABULATION
OBVIOUS_IDENT_CHAR_NATURES[10] = C_SPACE | C_JLS_SPACE; // \ u000a: LINE
// FEED
OBVIOUS_IDENT_CHAR_NATURES[11] = C_SPACE;
OBVIOUS_IDENT_CHAR_NATURES[12] = C_SPACE | C_JLS_SPACE; // \ u000c: FORM
// FEED
OBVIOUS_IDENT_CHAR_NATURES[13] = C_SPACE | C_JLS_SPACE; // \ u000d:
// CARRIAGE
// RETURN
OBVIOUS_IDENT_CHAR_NATURES[28] = C_SPACE;
OBVIOUS_IDENT_CHAR_NATURES[29] = C_SPACE;
OBVIOUS_IDENT_CHAR_NATURES[30] = C_SPACE;
OBVIOUS_IDENT_CHAR_NATURES[31] = C_SPACE;
OBVIOUS_IDENT_CHAR_NATURES[32] = C_SPACE | C_JLS_SPACE; // \ u0020:
// SPACE
OBVIOUS_IDENT_CHAR_NATURES['.'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES[':'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES[';'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES[','] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['['] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES[']'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['('] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES[')'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['{'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['}'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['+'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['-'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['*'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['/'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['='] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['&'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['|'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['?'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['<'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['>'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['!'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['%'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['^'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['~'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['"'] = C_SEPARATOR;
OBVIOUS_IDENT_CHAR_NATURES['\''] = C_SEPARATOR;
}
static {
Tables = new long[2][][];
Tables[START_INDEX] = new long[2][];
Tables[PART_INDEX] = new long[3][];
try {
DataInputStream inputStream = new DataInputStream(
ScannerHelper.class.getResourceAsStream("start1.rsc")); //$NON-NLS-1$
long[] readValues = new long[1024];
for (int i = 0; i < 1024; i++) {
readValues[i] = inputStream.readLong();
}
inputStream.close();
Tables[START_INDEX][0] = readValues;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
try {
DataInputStream inputStream = new DataInputStream(
ScannerHelper.class.getResourceAsStream("start2.rsc")); //$NON-NLS-1$
long[] readValues = new long[1024];
for (int i = 0; i < 1024; i++) {
readValues[i] = inputStream.readLong();
}
inputStream.close();
Tables[START_INDEX][1] = readValues;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
try {
DataInputStream inputStream = new DataInputStream(
ScannerHelper.class.getResourceAsStream("part1.rsc")); //$NON-NLS-1$
long[] readValues = new long[1024];
for (int i = 0; i < 1024; i++) {
readValues[i] = inputStream.readLong();
}
inputStream.close();
Tables[PART_INDEX][0] = readValues;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
try {
DataInputStream inputStream = new DataInputStream(
ScannerHelper.class.getResourceAsStream("part2.rsc")); //$NON-NLS-1$
long[] readValues = new long[1024];
for (int i = 0; i < 1024; i++) {
readValues[i] = inputStream.readLong();
}
inputStream.close();
Tables[PART_INDEX][1] = readValues;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
try {
DataInputStream inputStream = new DataInputStream(
ScannerHelper.class.getResourceAsStream("part14.rsc")); //$NON-NLS-1$
long[] readValues = new long[1024];
for (int i = 0; i < 1024; i++) {
readValues[i] = inputStream.readLong();
}
inputStream.close();
Tables[PART_INDEX][2] = readValues;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private final static boolean isBitSet(long[] values, int i) {
try {
return (values[i / 64] & Bits[i % 64]) != 0;
} catch (NullPointerException e) {
return false;
}
}
public static boolean isScriptIdentifierPart(char c) {
if (c < MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_IDENT_PART) != 0;
}
return Character.isJavaIdentifierPart(c);
}
public static boolean isScriptIdentifierPart(char high, char low) {
int codePoint = toCodePoint(high, low);
switch ((codePoint & 0x1F0000) >> 16) {
case 0:
return Character.isJavaIdentifierPart((char) codePoint);
case 1:
return isBitSet(Tables[PART_INDEX][0], codePoint & 0xFFFF);
case 2:
return isBitSet(Tables[PART_INDEX][1], codePoint & 0xFFFF);
case 14:
return isBitSet(Tables[PART_INDEX][2], codePoint & 0xFFFF);
}
return false;
}
public static boolean isScriptIdentifierStart(char c) {
if (c < MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_IDENT_START) != 0;
}
return Character.isJavaIdentifierStart(c);
}
public static boolean isScriptIdentifierStart(char high, char low) {
int codePoint = toCodePoint(high, low);
switch ((codePoint & 0x1F0000) >> 16) {
case 0:
return Character.isJavaIdentifierStart((char) codePoint);
case 1:
return isBitSet(Tables[START_INDEX][0], codePoint & 0xFFFF);
case 2:
return isBitSet(Tables[START_INDEX][1], codePoint & 0xFFFF);
}
return false;
}
private static int toCodePoint(char high, char low) {
return (high - HIGH_SURROGATE_MIN_VALUE) * 0x400
+ (low - LOW_SURROGATE_MIN_VALUE) + 0x10000;
}
public static boolean isDigit(char c) throws InvalidInputException {
if (c < ScannerHelper.MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_DIGIT) != 0;
}
if (Character.isDigit(c)) {
throw new InvalidInputException("Invalid_Digit"); //$NON-NLS-1$
}
return false;
}
public static int digit(char c, int radix) {
if (c < ScannerHelper.MAX_OBVIOUS) {
switch (radix) {
case 8:
if (c >= 48 && c <= 55) {
return c - 48;
}
return -1;
case 10:
if (c >= 48 && c <= 57) {
return c - 48;
}
return -1;
case 16:
if (c >= 48 && c <= 57) {
return c - 48;
}
if (c >= 65 && c <= 70) {
return c - 65 + 10;
}
if (c >= 97 && c <= 102) {
return c - 97 + 10;
}
return -1;
}
}
return Character.digit(c, radix);
}
public static int getNumericValue(char c) {
if (c < ScannerHelper.MAX_OBVIOUS) {
switch (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c]) {
case C_DIGIT:
return c - '0';
case C_LOWER_LETTER:
return 10 + c - 'a';
case C_UPPER_LETTER:
return 10 + c - 'A';
}
}
return Character.getNumericValue(c);
}
public static char toUpperCase(char c) {
if (c < MAX_OBVIOUS) {
if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0) {
return c;
} else if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0) {
return (char) (c - 32);
}
}
return Character.toLowerCase(c);
}
public static char toLowerCase(char c) {
if (c < MAX_OBVIOUS) {
if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0) {
return c;
} else if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0) {
return (char) (32 + c);
}
}
return Character.toLowerCase(c);
}
public static boolean isLowerCase(char c) {
if (c < MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0;
}
return Character.isLowerCase(c);
}
public static boolean isUpperCase(char c) {
if (c < MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0;
}
return Character.isUpperCase(c);
}
/**
* Include also non JLS whitespaces.
*
* return true if Character.isWhitespace(c) would return true
*/
public static boolean isWhitespace(char c) {
if (c < MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_SPACE) != 0;
}
return Character.isWhitespace(c);
}
public static boolean isLetter(char c) {
if (c < MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_LOWER_LETTER)) != 0;
}
return Character.isLetter(c);
}
public static boolean isLetterOrDigit(char c) {
if (c < MAX_OBVIOUS) {
return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & (ScannerHelper.C_UPPER_LETTER
| ScannerHelper.C_LOWER_LETTER | ScannerHelper.C_DIGIT)) != 0;
}
return Character.isLetterOrDigit(c);
}
}