| /******************************************************************************* |
| * Copyright (c) 2005, 2007 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| |
| *******************************************************************************/ |
| package org.eclipse.dltk.compiler.util; |
| |
| import java.io.DataInputStream; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| |
| import org.eclipse.dltk.compiler.InvalidInputException; |
| |
| |
| |
| public class ScannerHelper { |
| // extended unicode support |
| public static final int LOW_SURROGATE_MIN_VALUE = 0xDC00; |
| public static final int HIGH_SURROGATE_MIN_VALUE = 0xD800; |
| public static final int HIGH_SURROGATE_MAX_VALUE = 0xDBFF; |
| public static final int LOW_SURROGATE_MAX_VALUE = 0xDFFF; |
| |
| // storage for internal flags (32 bits) BIT USAGE |
| public final static int Bit1 = 0x1; // return type (operator) | name reference kind (name ref) | add assertion (type decl) | useful empty statement (empty statement) |
| public final static int Bit2 = 0x2; // return type (operator) | name reference kind (name ref) | has local type (type, method, field decl) |
| public final static int Bit3 = 0x4; // return type (operator) | name reference kind (name ref) | implicit this (this ref) |
| public final static int Bit4 = 0x8; // return type (operator) | first assignment to local (local decl) | undocumented empty block (block, type and method decl) |
| public final static int Bit5 = 0x10; // value for return (expression) | has all method bodies (unit) | supertype ref (type ref) |
| public final static int Bit6 = 0x20; // depth (name ref, msg) | ignore need cast check (cast expression) |
| public final static int Bit7 = 0x40; // depth (name ref, msg) | operator (operator) | need runtime checkcast (cast expression) | label used (labelStatement) |
| public final static int Bit8 = 0x80; // depth (name ref, msg) | operator (operator) | unsafe cast (cast expression) |
| public final static int Bit9 = 0x100; // depth (name ref, msg) | operator (operator) | is local type (type decl) |
| public final static int Bit10= 0x200; // depth (name ref, msg) | operator (operator) | is anonymous type (type decl) |
| public final static int Bit11 = 0x400; // depth (name ref, msg) | operator (operator) | is member type (type decl) |
| public final static int Bit12 = 0x800; // depth (name ref, msg) | operator (operator) | has abstract methods (type decl) |
| public final static int Bit13 = 0x1000; // depth (name ref, msg) | is secondary type (type decl) |
| public final static int Bit14 = 0x2000; // strictly assigned (reference lhs) |
| public final static int Bit15 = 0x4000; // is unnecessary cast (expression) | is varargs (type ref) | isSubRoutineEscaping (try statement) |
| public final static int Bit16 = 0x8000; // in javadoc comment (name ref, type ref, msg) |
| public final static int Bit17 = 0x10000; // compound assigned (reference lhs) |
| public final static int Bit18 = 0x20000; // non null (expression) |
| public final static int Bit19 = 0x40000; |
| public final static int Bit20 = 0x80000; |
| public final static int Bit21 = 0x100000; |
| public final static int Bit22 = 0x200000; // parenthesis count (expression) |
| public final static int Bit23 = 0x400000; // parenthesis count (expression) |
| public final static int Bit24 = 0x800000; // parenthesis count (expression) |
| public final static int Bit25 = 0x1000000; // parenthesis count (expression) |
| public final static int Bit26 = 0x2000000; // parenthesis count (expression) |
| public final static int Bit27 = 0x4000000; // parenthesis count (expression) |
| public final static int Bit28 = 0x8000000; // parenthesis count (expression) |
| public final static int Bit29 = 0x10000000; // parenthesis count (expression) |
| public final static int Bit30 = 0x20000000; // elseif (if statement) | try block exit (try statement) | fall-through (case statement) |
| public final static int Bit31 = 0x40000000; // local declaration reachable (local decl) | ignore raw type check (type ref) | discard entire assignment (assignment) |
| public final static int Bit32 = 0x80000000; // reachable (statement) |
| |
| public final static long Bit32L = 0x80000000L; |
| public final static long Bit33L = 0x100000000L; |
| public final static long Bit34L = 0x200000000L; |
| public final static long Bit35L = 0x400000000L; |
| public final static long Bit36L = 0x800000000L; |
| public final static long Bit37L = 0x1000000000L; |
| public final static long Bit38L = 0x2000000000L; |
| public final static long Bit39L = 0x4000000000L; |
| public final static long Bit40L = 0x8000000000L; |
| public final static long Bit41L = 0x10000000000L; |
| public final static long Bit42L = 0x20000000000L; |
| public final static long Bit43L = 0x40000000000L; |
| public final static long Bit44L = 0x80000000000L; |
| public final static long Bit45L = 0x100000000000L; |
| public final static long Bit46L = 0x200000000000L; |
| public final static long Bit47L = 0x400000000000L; |
| public final static long Bit48L = 0x800000000000L; |
| public final static long Bit49L = 0x1000000000000L; |
| public final static long Bit50L = 0x2000000000000L; |
| public final static long Bit51L = 0x4000000000000L; |
| public final static long Bit52L = 0x8000000000000L; |
| public final static long Bit53L = 0x10000000000000L; |
| public final static long Bit54L = 0x20000000000000L; |
| public final static long Bit55L = 0x40000000000000L; |
| public final static long Bit56L = 0x80000000000000L; |
| public final static long Bit57L = 0x100000000000000L; |
| public final static long Bit58L = 0x200000000000000L; |
| public final static long Bit59L = 0x400000000000000L; |
| public final static long Bit60L = 0x800000000000000L; |
| public final static long Bit61L = 0x1000000000000000L; |
| public final static long Bit62L = 0x2000000000000000L; |
| public final static long Bit63L = 0x4000000000000000L; |
| public final static long Bit64L = 0x8000000000000000L; |
| |
| public final static long[] Bits = { |
| Bit1, Bit2, Bit3, Bit4, Bit5, Bit6, |
| Bit7, Bit8, Bit9, Bit10, Bit11, Bit12, |
| Bit13, Bit14, Bit15, Bit16, Bit17, Bit18, |
| Bit19, Bit20, Bit21, Bit22, Bit23, Bit24, |
| Bit25, Bit26, Bit27, Bit28, Bit29, Bit30, |
| Bit31, Bit32, Bit33L, Bit34L, Bit35L, Bit36L, |
| Bit37L, Bit38L, Bit39L, Bit40L, Bit41L, Bit42L, |
| Bit43L, Bit44L, Bit45L, Bit46L, Bit47L, Bit48L, |
| Bit49L, Bit50L, Bit51L, Bit52L, Bit53L, Bit54L, |
| Bit55L, Bit56L, Bit57L, Bit58L, Bit59L, Bit60L, |
| Bit61L, Bit62L, Bit63L, Bit64L, |
| }; |
| |
| private static final int START_INDEX = 0; |
| |
| private static final int PART_INDEX = 1; |
| |
| private static long[][][] Tables; |
| |
| public final static int MAX_OBVIOUS = 128; |
| |
| public final static int[] OBVIOUS_IDENT_CHAR_NATURES = new int[MAX_OBVIOUS]; |
| |
| public final static int C_JLS_SPACE = 0x100; |
| |
| public final static int C_SPECIAL = 0x80; |
| |
| public final static int C_IDENT_START = 0x40; |
| |
| public final static int C_UPPER_LETTER = 0x20; |
| |
| public final static int C_LOWER_LETTER = 0x10; |
| |
| public final static int C_IDENT_PART = 0x8; |
| |
| public final static int C_DIGIT = 0x4; |
| |
| public final static int C_SEPARATOR = 0x2; |
| |
| public final static int C_SPACE = 0x1; |
| |
| static { |
| OBVIOUS_IDENT_CHAR_NATURES[0] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[1] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[2] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[3] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[4] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[5] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[6] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[7] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[8] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[14] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[15] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[16] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[17] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[18] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[19] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[20] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[21] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[22] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[23] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[24] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[25] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[26] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[27] = C_IDENT_PART; |
| OBVIOUS_IDENT_CHAR_NATURES[127] = C_IDENT_PART; |
| |
| for (int i = '0'; i <= '9'; i++) |
| OBVIOUS_IDENT_CHAR_NATURES[i] = C_DIGIT | C_IDENT_PART; |
| |
| for (int i = 'a'; i <= 'z'; i++) |
| OBVIOUS_IDENT_CHAR_NATURES[i] = C_LOWER_LETTER | C_IDENT_PART |
| | C_IDENT_START; |
| for (int i = 'A'; i <= 'Z'; i++) |
| OBVIOUS_IDENT_CHAR_NATURES[i] = C_UPPER_LETTER | C_IDENT_PART |
| | C_IDENT_START; |
| |
| OBVIOUS_IDENT_CHAR_NATURES['_'] = C_SPECIAL | C_IDENT_PART |
| | C_IDENT_START; |
| OBVIOUS_IDENT_CHAR_NATURES['$'] = C_SPECIAL | C_IDENT_PART |
| | C_IDENT_START; |
| |
| OBVIOUS_IDENT_CHAR_NATURES[9] = C_SPACE | C_JLS_SPACE; // \ u0009: |
| // HORIZONTAL |
| // TABULATION |
| OBVIOUS_IDENT_CHAR_NATURES[10] = C_SPACE | C_JLS_SPACE; // \ u000a: LINE |
| // FEED |
| OBVIOUS_IDENT_CHAR_NATURES[11] = C_SPACE; |
| OBVIOUS_IDENT_CHAR_NATURES[12] = C_SPACE | C_JLS_SPACE; // \ u000c: FORM |
| // FEED |
| OBVIOUS_IDENT_CHAR_NATURES[13] = C_SPACE | C_JLS_SPACE; // \ u000d: |
| // CARRIAGE |
| // RETURN |
| OBVIOUS_IDENT_CHAR_NATURES[28] = C_SPACE; |
| OBVIOUS_IDENT_CHAR_NATURES[29] = C_SPACE; |
| OBVIOUS_IDENT_CHAR_NATURES[30] = C_SPACE; |
| OBVIOUS_IDENT_CHAR_NATURES[31] = C_SPACE; |
| OBVIOUS_IDENT_CHAR_NATURES[32] = C_SPACE | C_JLS_SPACE; // \ u0020: |
| // SPACE |
| |
| OBVIOUS_IDENT_CHAR_NATURES['.'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES[':'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES[';'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES[','] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['['] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES[']'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['('] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES[')'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['{'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['}'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['+'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['-'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['*'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['/'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['='] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['&'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['|'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['?'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['<'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['>'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['!'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['%'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['^'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['~'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['"'] = C_SEPARATOR; |
| OBVIOUS_IDENT_CHAR_NATURES['\''] = C_SEPARATOR; |
| } |
| |
| static { |
| Tables = new long[2][][]; |
| Tables[START_INDEX] = new long[2][]; |
| Tables[PART_INDEX] = new long[3][]; |
| try { |
| DataInputStream inputStream = new DataInputStream( |
| ScannerHelper.class.getResourceAsStream("start1.rsc")); //$NON-NLS-1$ |
| long[] readValues = new long[1024]; |
| for (int i = 0; i < 1024; i++) { |
| readValues[i] = inputStream.readLong(); |
| } |
| inputStream.close(); |
| Tables[START_INDEX][0] = readValues; |
| } catch (FileNotFoundException e) { |
| e.printStackTrace(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| try { |
| DataInputStream inputStream = new DataInputStream( |
| ScannerHelper.class.getResourceAsStream("start2.rsc")); //$NON-NLS-1$ |
| long[] readValues = new long[1024]; |
| for (int i = 0; i < 1024; i++) { |
| readValues[i] = inputStream.readLong(); |
| } |
| inputStream.close(); |
| Tables[START_INDEX][1] = readValues; |
| } catch (FileNotFoundException e) { |
| e.printStackTrace(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| try { |
| DataInputStream inputStream = new DataInputStream( |
| ScannerHelper.class.getResourceAsStream("part1.rsc")); //$NON-NLS-1$ |
| long[] readValues = new long[1024]; |
| for (int i = 0; i < 1024; i++) { |
| readValues[i] = inputStream.readLong(); |
| } |
| inputStream.close(); |
| Tables[PART_INDEX][0] = readValues; |
| } catch (FileNotFoundException e) { |
| e.printStackTrace(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| try { |
| DataInputStream inputStream = new DataInputStream( |
| ScannerHelper.class.getResourceAsStream("part2.rsc")); //$NON-NLS-1$ |
| long[] readValues = new long[1024]; |
| for (int i = 0; i < 1024; i++) { |
| readValues[i] = inputStream.readLong(); |
| } |
| inputStream.close(); |
| Tables[PART_INDEX][1] = readValues; |
| } catch (FileNotFoundException e) { |
| e.printStackTrace(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| try { |
| DataInputStream inputStream = new DataInputStream( |
| ScannerHelper.class.getResourceAsStream("part14.rsc")); //$NON-NLS-1$ |
| long[] readValues = new long[1024]; |
| for (int i = 0; i < 1024; i++) { |
| readValues[i] = inputStream.readLong(); |
| } |
| inputStream.close(); |
| Tables[PART_INDEX][2] = readValues; |
| } catch (FileNotFoundException e) { |
| e.printStackTrace(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| |
| private final static boolean isBitSet(long[] values, int i) { |
| try { |
| return (values[i / 64] & Bits[i % 64]) != 0; |
| } catch (NullPointerException e) { |
| return false; |
| } |
| } |
| |
| public static boolean isScriptIdentifierPart(char c) { |
| if (c < MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_IDENT_PART) != 0; |
| } |
| return Character.isJavaIdentifierPart(c); |
| } |
| |
| public static boolean isScriptIdentifierPart(char high, char low) { |
| int codePoint = toCodePoint(high, low); |
| switch ((codePoint & 0x1F0000) >> 16) { |
| case 0: |
| return Character.isJavaIdentifierPart((char) codePoint); |
| case 1: |
| return isBitSet(Tables[PART_INDEX][0], codePoint & 0xFFFF); |
| case 2: |
| return isBitSet(Tables[PART_INDEX][1], codePoint & 0xFFFF); |
| case 14: |
| return isBitSet(Tables[PART_INDEX][2], codePoint & 0xFFFF); |
| } |
| return false; |
| } |
| |
| public static boolean isScriptIdentifierStart(char c) { |
| if (c < MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_IDENT_START) != 0; |
| } |
| return Character.isJavaIdentifierStart(c); |
| } |
| |
| public static boolean isScriptIdentifierStart(char high, char low) { |
| int codePoint = toCodePoint(high, low); |
| switch ((codePoint & 0x1F0000) >> 16) { |
| case 0: |
| return Character.isJavaIdentifierStart((char) codePoint); |
| case 1: |
| return isBitSet(Tables[START_INDEX][0], codePoint & 0xFFFF); |
| case 2: |
| return isBitSet(Tables[START_INDEX][1], codePoint & 0xFFFF); |
| } |
| return false; |
| } |
| |
| private static int toCodePoint(char high, char low) { |
| return (high - HIGH_SURROGATE_MIN_VALUE) * 0x400 |
| + (low - LOW_SURROGATE_MIN_VALUE) + 0x10000; |
| } |
| |
| public static boolean isDigit(char c) throws InvalidInputException { |
| if (c < ScannerHelper.MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_DIGIT) != 0; |
| } |
| if (Character.isDigit(c)) { |
| throw new InvalidInputException("Invalid_Digit"); //$NON-NLS-1$ |
| } |
| return false; |
| } |
| |
| public static int digit(char c, int radix) { |
| if (c < ScannerHelper.MAX_OBVIOUS) { |
| switch (radix) { |
| case 8: |
| if (c >= 48 && c <= 55) { |
| return c - 48; |
| } |
| return -1; |
| case 10: |
| if (c >= 48 && c <= 57) { |
| return c - 48; |
| } |
| return -1; |
| case 16: |
| if (c >= 48 && c <= 57) { |
| return c - 48; |
| } |
| if (c >= 65 && c <= 70) { |
| return c - 65 + 10; |
| } |
| if (c >= 97 && c <= 102) { |
| return c - 97 + 10; |
| } |
| return -1; |
| } |
| } |
| return Character.digit(c, radix); |
| } |
| |
| public static int getNumericValue(char c) { |
| if (c < ScannerHelper.MAX_OBVIOUS) { |
| switch (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c]) { |
| case C_DIGIT: |
| return c - '0'; |
| case C_LOWER_LETTER: |
| return 10 + c - 'a'; |
| case C_UPPER_LETTER: |
| return 10 + c - 'A'; |
| } |
| } |
| return Character.getNumericValue(c); |
| } |
| |
| public static char toUpperCase(char c) { |
| if (c < MAX_OBVIOUS) { |
| if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0) { |
| return c; |
| } else if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0) { |
| return (char) (c - 32); |
| } |
| } |
| return Character.toLowerCase(c); |
| } |
| |
| public static char toLowerCase(char c) { |
| if (c < MAX_OBVIOUS) { |
| if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0) { |
| return c; |
| } else if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0) { |
| return (char) (32 + c); |
| } |
| } |
| return Character.toLowerCase(c); |
| } |
| |
| public static boolean isLowerCase(char c) { |
| if (c < MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0; |
| } |
| return Character.isLowerCase(c); |
| } |
| |
| public static boolean isUpperCase(char c) { |
| if (c < MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0; |
| } |
| return Character.isUpperCase(c); |
| } |
| |
| /** |
| * Include also non JLS whitespaces. |
| * |
| * return true if Character.isWhitespace(c) would return true |
| */ |
| public static boolean isWhitespace(char c) { |
| if (c < MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_SPACE) != 0; |
| } |
| return Character.isWhitespace(c); |
| } |
| |
| public static boolean isLetter(char c) { |
| if (c < MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_LOWER_LETTER)) != 0; |
| } |
| return Character.isLetter(c); |
| } |
| |
| public static boolean isLetterOrDigit(char c) { |
| if (c < MAX_OBVIOUS) { |
| return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & (ScannerHelper.C_UPPER_LETTER |
| | ScannerHelper.C_LOWER_LETTER | ScannerHelper.C_DIGIT)) != 0; |
| } |
| return Character.isLetterOrDigit(c); |
| } |
| } |