| /***** BEGIN LICENSE BLOCK ***** |
| * Version: CPL 1.0/GPL 2.0/LGPL 2.1 |
| * |
| * The contents of this file are subject to the Common Public |
| * License Version 1.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at http://www.eclipse.org/legal/cpl-v10.html |
| * |
| * Software distributed under the License is distributed on an "AS |
| * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * rights and limitations under the License. |
| * |
| * Copyright (C) 2002 Benoit Cerrina <b.cerrina@wanadoo.fr> |
| * Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se> |
| * Copyright (C) 2002-2004 Jan Arne Petersen <jpetersen@uni-bonn.de> |
| * Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org> |
| * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de> |
| * Copyright (C) 2004-2005 David Corbin <dcorbin@users.sourceforge.net> |
| * Copyright (C) 2005 Zach Dennis <zdennis@mktec.com> |
| * Copyright (C) 2006 Thomas Corbat <tcorbat@hsr.ch> |
| * |
| * Alternatively, the contents of this file may be used under the terms of |
| * either of the GNU General Public License Version 2 or later (the "GPL"), |
| * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
| * in which case the provisions of the GPL or the LGPL are applicable instead |
| * of those above. If you wish to allow use of your version of this file only |
| * under the terms of either the GPL or the LGPL, and not to allow others to |
| * use your version of this file under the terms of the CPL, indicate your |
| * decision by deleting the provisions above and replace them with the notice |
| * and other provisions required by the GPL or the LGPL. If you do not delete |
| * the provisions above, a recipient may use your version of this file under |
| * the terms of any one of the CPL, the GPL or the LGPL. |
| ***** END LICENSE BLOCK *****/ |
| package org.jruby.lexer.yacc; |
| |
| import java.io.IOException; |
| import java.math.BigInteger; |
| |
| import org.jruby.ast.BackRefNode; |
| import org.jruby.ast.BignumNode; |
| import org.jruby.ast.CommentNode; |
| import org.jruby.ast.FixnumNode; |
| import org.jruby.ast.FloatNode; |
| import org.jruby.ast.NthRefNode; |
| import org.jruby.common.IRubyWarnings; |
| import org.jruby.parser.BlockStaticScope; |
| import org.jruby.parser.ParserSupport; |
| import org.jruby.parser.StaticScope; |
| import org.jruby.parser.Tokens; |
| import org.jruby.util.IdUtil; |
| |
| /** This is a port of the MRI lexer to Java it is compatible to Ruby 1.8.1. |
| */ |
| public class RubyYaccLexer { |
| // Last token read via yylex(). |
| private int token; |
| |
| // Value of last token which had a value associated with it. |
| Object yaccValue; |
| |
| // Stream of data that yylex() examines. |
| private LexerSource src; |
| |
| // Used for tiny smidgen of grammar in lexer (see setParserSupport()) |
| private ParserSupport parserSupport = null; |
| |
| // What handles warnings |
| private IRubyWarnings warnings; |
| |
| // Additional context surrounding tokens that both the lexer and |
| // grammar use. |
| private LexState lex_state; |
| |
| // Tempory buffer to build up a potential token. Consumer takes responsibility to reset |
| // this before use. |
| private StringBuffer tokenBuffer = new StringBuffer(60); |
| |
| private StackState conditionState = new StackState(); |
| private StackState cmdArgumentState = new StackState(); |
| private StrTerm lex_strterm; |
| private boolean commandStart; |
| |
| // Give a name to a value. Enebo: This should be used more. |
| static final int EOF = 0; |
| |
| // ruby constants for strings (should this be moved somewhere else?) |
| static final int STR_FUNC_ESCAPE=0x01; |
| static final int STR_FUNC_EXPAND=0x02; |
| static final int STR_FUNC_REGEXP=0x04; |
| static final int STR_FUNC_QWORDS=0x08; |
| static final int STR_FUNC_SYMBOL=0x10; |
| static final int STR_FUNC_INDENT=0x20; |
| |
| private final int str_squote = 0; |
| private final int str_dquote = STR_FUNC_EXPAND; |
| private final int str_xquote = STR_FUNC_EXPAND; |
| private final int str_regexp = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND; |
| private final int str_ssym = STR_FUNC_SYMBOL; |
| private final int str_dsym = STR_FUNC_SYMBOL | STR_FUNC_EXPAND; |
| |
| public RubyYaccLexer() { |
| reset(); |
| } |
| |
| public void reset() { |
| token = 0; |
| yaccValue = null; |
| src = null; |
| lex_state = null; |
| resetStacks(); |
| lex_strterm = null; |
| commandStart = true; |
| } |
| |
| /** |
| * How the parser advances to the next token. |
| * |
| * @return true if not at end of file (EOF). |
| */ |
| public boolean advance() throws IOException { |
| return (token = yylex()) != EOF; |
| } |
| |
| /** |
| * Last token read from the lexer at the end of a call to yylex() |
| * |
| * @return last token read |
| */ |
| public int token() { |
| return token; |
| } |
| |
| public StringBuffer getTokenBuffer() { |
| return tokenBuffer; |
| } |
| |
| /** |
| * Value of last token (if it is a token which has a value). |
| * |
| * @return value of last value-laden token |
| */ |
| public Object value() { |
| return yaccValue; |
| } |
| |
| public ISourcePositionFactory getPositionFactory() { |
| return src.getPositionFactory(); |
| } |
| |
| /** |
| * Get position information for Token/Node that follows node represented by startPosition |
| * and current lexer location. |
| * |
| * @param startPosition previous node/token |
| * @param inclusive include previous node into position information of current node |
| * @return a new position |
| */ |
| public ISourcePosition getPosition(ISourcePosition startPosition, boolean inclusive) { |
| return src.getPosition(startPosition, inclusive); |
| } |
| |
| public ISourcePosition getPosition() { |
| return src.getPosition(null, false); |
| } |
| |
| /** |
| * Parse must pass its support object for some check at bottom of |
| * yylex(). Ruby does it this way as well (i.e. a little parsing |
| * logic in the lexer). |
| * |
| * @param parserSupport |
| */ |
| public void setParserSupport(ParserSupport parserSupport) { |
| this.parserSupport = parserSupport; |
| } |
| |
| /** |
| * Allow the parser to set the source for its lexer. |
| * |
| * @param source where the lexer gets raw data |
| */ |
| public void setSource(LexerSource source) { |
| this.src = source; |
| } |
| |
| public StrTerm getStrTerm() { |
| return lex_strterm; |
| } |
| |
| public void setStrTerm(StrTerm strterm) { |
| this.lex_strterm = strterm; |
| } |
| |
| public void resetStacks() { |
| conditionState.reset(); |
| cmdArgumentState.reset(); |
| } |
| |
| public void setWarnings(IRubyWarnings warnings) { |
| this.warnings = warnings; |
| } |
| |
| |
| public void setState(LexState state) { |
| this.lex_state = state; |
| } |
| |
| public StackState getCmdArgumentState() { |
| return cmdArgumentState; |
| } |
| |
| public StackState getConditionState() { |
| return conditionState; |
| } |
| |
| public void setValue(Object yaccValue) { |
| this.yaccValue = yaccValue; |
| } |
| |
| private boolean isNext_identchar() throws IOException { |
| char c = src.read(); |
| src.unread(c); |
| |
| return c != EOF && (Character.isLetterOrDigit(c) || c == '-'); |
| } |
| |
| private Object getInteger(String value, int radix) { |
| try { |
| return new FixnumNode(getPosition(), Long.parseLong(value, radix)); |
| } catch (NumberFormatException e) { |
| return new BignumNode(getPosition(), new BigInteger(value, radix)); |
| } |
| } |
| |
| /** |
| * Do the next characters from the source match provided String in a case insensitive manner. |
| * If so, then consume those characters and that string. Otherwise, consume none of them and |
| * return null. |
| * |
| * @param s to be matched against |
| * @return string if string matches, null otherwise |
| */ |
| private String isNextNoCase(String s) throws IOException { |
| StringBuffer buf = new StringBuffer(); |
| |
| for (int i = 0; i < s.length(); i++) { |
| char c = s.charAt(i); |
| char r = src.read(); |
| buf.append(r); |
| |
| if (Character.toLowerCase(c) != r && |
| Character.toUpperCase(c) != r) { |
| src.unreadMany(buf); |
| return null; |
| } |
| } |
| |
| return buf.toString(); |
| } |
| |
| /** |
| * @param c the character to test |
| * @return true if character is a hex value (0-9a-f) |
| */ |
| static final boolean isHexChar(char c) { |
| return Character.isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); |
| } |
| |
| /** |
| * @param c the character to test |
| * @return true if character is an octal value (0-7) |
| */ |
| static final boolean isOctChar(char c) { |
| return '0' <= c && c <= '7'; |
| } |
| |
| /** |
| * @param c is character to be compared |
| * @return whether c is an identifier or not |
| */ |
| private static final boolean isIdentifierChar(char c) { |
| return Character.isLetterOrDigit(c) || c == '_'; |
| } |
| |
| /** |
| * What type/kind of quote are we dealing with? |
| * |
| * @param c first character the the quote construct |
| * @return a token that specifies the quote type |
| */ |
| private int parseQuote(char c) throws IOException { |
| char begin, end; |
| boolean shortHand; |
| |
| // Short-hand (e.g. %{,%.,%!,... versus %Q{). |
| if (!Character.isLetterOrDigit(c)) { |
| begin = c; |
| c = 'Q'; |
| shortHand = true; |
| // Long-hand (e.g. %Q{}). |
| } else { |
| shortHand = false; |
| begin = src.read(); |
| if (Character.isLetterOrDigit(begin) /* no mb || ismbchar(term)*/) { |
| throw new SyntaxException(getPosition(), "unknown type of %string"); |
| } |
| } |
| if (c == EOF || begin == EOF) { |
| throw new SyntaxException(getPosition(), "unterminated quoted string meets end of file"); |
| } |
| |
| // Figure end-char. '\0' is special to indicate begin=end and that no nesting? |
| if (begin == '(') end = ')'; |
| else if (begin == '[') end = ']'; |
| else if (begin == '{') end = '}'; |
| else if (begin == '<') end = '>'; |
| else { end = begin; begin = '\0'; }; |
| |
| switch (c) { |
| case 'Q': |
| lex_strterm = new StringTerm(str_dquote, end, begin); |
| yaccValue = new Token("%"+ (shortHand ? (""+end) : ("" + c + begin)), getPosition()); |
| return Tokens.tSTRING_BEG; |
| |
| case 'q': |
| lex_strterm = new StringTerm(str_squote, end, begin); |
| yaccValue = new Token("%"+c+begin, getPosition()); |
| return Tokens.tSTRING_BEG; |
| |
| case 'W': |
| lex_strterm = new StringTerm(str_dquote | STR_FUNC_QWORDS, end, begin); |
| do {c = src.read();} while (Character.isWhitespace(c)); |
| src.unread(c); |
| yaccValue = new Token("%"+c+begin, getPosition()); |
| return Tokens.tWORDS_BEG; |
| |
| case 'w': |
| lex_strterm = new StringTerm(str_squote | STR_FUNC_QWORDS, end, begin); |
| do {c = src.read();} while (Character.isWhitespace(c)); |
| src.unread(c); |
| yaccValue = new Token("%"+c+begin, getPosition()); |
| return Tokens.tQWORDS_BEG; |
| |
| case 'x': |
| lex_strterm = new StringTerm(str_xquote, end, begin); |
| yaccValue = new Token("%"+c+begin, getPosition()); |
| return Tokens.tXSTRING_BEG; |
| |
| case 'r': |
| lex_strterm = new StringTerm(str_regexp, end, begin); |
| yaccValue = new Token("%"+c+begin, getPosition()); |
| return Tokens.tREGEXP_BEG; |
| |
| case 's': |
| lex_strterm = new StringTerm(str_ssym, end, begin); |
| lex_state = LexState.EXPR_FNAME; |
| yaccValue = new Token("%"+c+begin, getPosition()); |
| return Tokens.tSYMBEG; |
| |
| default: |
| throw new SyntaxException(getPosition(), "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'."); |
| } |
| } |
| |
| private int hereDocumentIdentifier() throws IOException { |
| char c = src.read(); |
| int term; |
| |
| int func = 0; |
| if (c == '-') { |
| c = src.read(); |
| func = STR_FUNC_INDENT; |
| } |
| |
| if (c == '\'' || c == '"' || c == '`') { |
| if (c == '\'') { |
| func |= str_squote; |
| } else if (c == '"') { |
| func |= str_dquote; |
| } else { |
| func |= str_xquote; |
| } |
| |
| tokenBuffer.setLength(0); |
| term = c; |
| while ((c = src.read()) != EOF && c != term) { |
| tokenBuffer.append(c); |
| } |
| if (c == EOF) { |
| throw new SyntaxException(getPosition(), "unterminated here document identifier"); |
| } |
| } else { |
| if (!isIdentifierChar(c)) { |
| src.unread(c); |
| if ((func & STR_FUNC_INDENT) != 0) { |
| src.unread('-'); |
| } |
| return 0; |
| } |
| tokenBuffer.setLength(0); |
| term = '"'; |
| func |= str_dquote; |
| do { |
| tokenBuffer.append(c); |
| } while ((c = src.read()) != EOF && isIdentifierChar(c)); |
| src.unread(c); |
| } |
| |
| String line = src.readLine() + '\n'; |
| String tok = tokenBuffer.toString(); |
| lex_strterm = new HeredocTerm(tok, func, line); |
| |
| if (term == '`') { |
| yaccValue = new Token("`", getPosition()); |
| return Tokens.tXSTRING_BEG; |
| } |
| |
| yaccValue = new Token("\"", getPosition()); |
| // Hacky: Advance position to eat newline here.... |
| getPosition(); |
| return Tokens.tSTRING_BEG; |
| } |
| |
| private void arg_ambiguous() { |
| warnings.warning(getPosition(), "Ambiguous first argument; make sure."); |
| } |
| |
| /** |
| * Read a comment up to end of line. When found each comment will get stored away into |
| * the parser result so that any interested party can use them as they seem fit. One idea |
| * is that IDE authors can do distance based heuristics to associate these comments to the |
| * AST node they think they belong to. |
| * |
| * @param c last character read from lexer source |
| * @return newline or eof value |
| */ |
| protected int readComment(char c) throws IOException { |
| ISourcePosition startPosition = src.getPosition(); |
| tokenBuffer.setLength(0); |
| tokenBuffer.append(c); |
| |
| // FIXME: Consider making a better LexerSource.readLine |
| while ((c = src.read()) != '\n') { |
| tokenBuffer.append(c); |
| if (c == EOF) { |
| break; |
| } |
| } |
| src.unread(c); |
| |
| // Store away each comment to parser result so IDEs can do whatever they want with them. |
| ISourcePosition position = startPosition.union(getPosition()); |
| parserSupport.getResult().addComment(new CommentNode(position, tokenBuffer.toString())); |
| |
| return c; |
| } |
| |
| /* |
| * Not normally used, but is left in here since it can be useful in debugging |
| * grammar and lexing problems. |
| private void printToken(int token) { |
| //System.out.print("LOC: " + support.getPosition() + " ~ "); |
| |
| switch (token) { |
| case Tokens.yyErrorCode: System.err.print("yyErrorCode,"); break; |
| case Tokens.kCLASS: System.err.print("kClass,"); break; |
| case Tokens.kMODULE: System.err.print("kModule,"); break; |
| case Tokens.kDEF: System.err.print("kDEF,"); break; |
| case Tokens.kUNDEF: System.err.print("kUNDEF,"); break; |
| case Tokens.kBEGIN: System.err.print("kBEGIN,"); break; |
| case Tokens.kRESCUE: System.err.print("kRESCUE,"); break; |
| case Tokens.kENSURE: System.err.print("kENSURE,"); break; |
| case Tokens.kEND: System.err.print("kEND,"); break; |
| case Tokens.kIF: System.err.print("kIF,"); break; |
| case Tokens.kUNLESS: System.err.print("kUNLESS,"); break; |
| case Tokens.kTHEN: System.err.print("kTHEN,"); break; |
| case Tokens.kELSIF: System.err.print("kELSIF,"); break; |
| case Tokens.kELSE: System.err.print("kELSE,"); break; |
| case Tokens.kCASE: System.err.print("kCASE,"); break; |
| case Tokens.kWHEN: System.err.print("kWHEN,"); break; |
| case Tokens.kWHILE: System.err.print("kWHILE,"); break; |
| case Tokens.kUNTIL: System.err.print("kUNTIL,"); break; |
| case Tokens.kFOR: System.err.print("kFOR,"); break; |
| case Tokens.kBREAK: System.err.print("kBREAK,"); break; |
| case Tokens.kNEXT: System.err.print("kNEXT,"); break; |
| case Tokens.kREDO: System.err.print("kREDO,"); break; |
| case Tokens.kRETRY: System.err.print("kRETRY,"); break; |
| case Tokens.kIN: System.err.print("kIN,"); break; |
| case Tokens.kDO: System.err.print("kDO,"); break; |
| case Tokens.kDO_COND: System.err.print("kDO_COND,"); break; |
| case Tokens.kDO_BLOCK: System.err.print("kDO_BLOCK,"); break; |
| case Tokens.kRETURN: System.err.print("kRETURN,"); break; |
| case Tokens.kYIELD: System.err.print("kYIELD,"); break; |
| case Tokens.kSUPER: System.err.print("kSUPER,"); break; |
| case Tokens.kSELF: System.err.print("kSELF,"); break; |
| case Tokens.kNIL: System.err.print("kNIL,"); break; |
| case Tokens.kTRUE: System.err.print("kTRUE,"); break; |
| case Tokens.kFALSE: System.err.print("kFALSE,"); break; |
| case Tokens.kAND: System.err.print("kAND,"); break; |
| case Tokens.kOR: System.err.print("kOR,"); break; |
| case Tokens.kNOT: System.err.print("kNOT,"); break; |
| case Tokens.kIF_MOD: System.err.print("kIF_MOD,"); break; |
| case Tokens.kUNLESS_MOD: System.err.print("kUNLESS_MOD,"); break; |
| case Tokens.kWHILE_MOD: System.err.print("kWHILE_MOD,"); break; |
| case Tokens.kUNTIL_MOD: System.err.print("kUNTIL_MOD,"); break; |
| case Tokens.kRESCUE_MOD: System.err.print("kRESCUE_MOD,"); break; |
| case Tokens.kALIAS: System.err.print("kALIAS,"); break; |
| case Tokens.kDEFINED: System.err.print("kDEFINED,"); break; |
| case Tokens.klBEGIN: System.err.print("klBEGIN,"); break; |
| case Tokens.klEND: System.err.print("klEND,"); break; |
| case Tokens.k__LINE__: System.err.print("k__LINE__,"); break; |
| case Tokens.k__FILE__: System.err.print("k__FILE__,"); break; |
| case Tokens.tIDENTIFIER: System.err.print("tIDENTIFIER["+ value() + "],"); break; |
| case Tokens.tFID: System.err.print("tFID[" + value() + "],"); break; |
| case Tokens.tGVAR: System.err.print("tGVAR[" + value() + "],"); break; |
| case Tokens.tIVAR: System.err.print("tIVAR[" + value() +"],"); break; |
| case Tokens.tCONSTANT: System.err.print("tCONSTANT["+ value() +"],"); break; |
| case Tokens.tCVAR: System.err.print("tCVAR,"); break; |
| case Tokens.tINTEGER: System.err.print("tINTEGER,"); break; |
| case Tokens.tFLOAT: System.err.print("tFLOAT,"); break; |
| case Tokens.tSTRING_CONTENT: System.err.print("tSTRING_CONTENT[" + yaccValue + "],"); break; |
| case Tokens.tSTRING_BEG: System.err.print("tSTRING_BEG,"); break; |
| case Tokens.tSTRING_END: System.err.print("tSTRING_END,"); break; |
| case Tokens.tSTRING_DBEG: System.err.print("STRING_DBEG,"); break; |
| case Tokens.tSTRING_DVAR: System.err.print("tSTRING_DVAR,"); break; |
| case Tokens.tXSTRING_BEG: System.err.print("tXSTRING_BEG,"); break; |
| case Tokens.tREGEXP_BEG: System.err.print("tREGEXP_BEG,"); break; |
| case Tokens.tREGEXP_END: System.err.print("tREGEXP_END,"); break; |
| case Tokens.tWORDS_BEG: System.err.print("tWORDS_BEG,"); break; |
| case Tokens.tQWORDS_BEG: System.err.print("tQWORDS_BEG,"); break; |
| case Tokens.tBACK_REF: System.err.print("tBACK_REF,"); break; |
| case Tokens.tNTH_REF: System.err.print("tNTH_REF,"); break; |
| case Tokens.tUPLUS: System.err.print("tUPLUS"); break; |
| case Tokens.tUMINUS: System.err.print("tUMINUS,"); break; |
| case Tokens.tPOW: System.err.print("tPOW,"); break; |
| case Tokens.tCMP: System.err.print("tCMP,"); break; |
| case Tokens.tEQ: System.err.print("tEQ,"); break; |
| case Tokens.tEQQ: System.err.print("tEQQ,"); break; |
| case Tokens.tNEQ: System.err.print("tNEQ,"); break; |
| case Tokens.tGEQ: System.err.print("tGEQ,"); break; |
| case Tokens.tLEQ: System.err.print("tLEQ,"); break; |
| case Tokens.tANDOP: System.err.print("tANDOP,"); break; |
| case Tokens.tOROP: System.err.print("tOROP,"); break; |
| case Tokens.tMATCH: System.err.print("tMATCH,"); break; |
| case Tokens.tNMATCH: System.err.print("tNMATCH,"); break; |
| case Tokens.tDOT2: System.err.print("tDOT2,"); break; |
| case Tokens.tDOT3: System.err.print("tDOT3,"); break; |
| case Tokens.tAREF: System.err.print("tAREF,"); break; |
| case Tokens.tASET: System.err.print("tASET,"); break; |
| case Tokens.tLSHFT: System.err.print("tLSHFT,"); break; |
| case Tokens.tRSHFT: System.err.print("tRSHFT,"); break; |
| case Tokens.tCOLON2: System.err.print("tCOLON2,"); break; |
| case Tokens.tCOLON3: System.err.print("tCOLON3,"); break; |
| case Tokens.tOP_ASGN: System.err.print("tOP_ASGN,"); break; |
| case Tokens.tASSOC: System.err.print("tASSOC,"); break; |
| case Tokens.tLPAREN: System.err.print("tLPAREN,"); break; |
| case Tokens.tLPAREN_ARG: System.err.print("tLPAREN_ARG,"); break; |
| case Tokens.tLBRACK: System.err.print("tLBRACK,"); break; |
| case Tokens.tLBRACE: System.err.print("tLBRACE,"); break; |
| case Tokens.tSTAR: System.err.print("tSTAR,"); break; |
| case Tokens.tAMPER: System.err.print("tAMPER,"); break; |
| case Tokens.tSYMBEG: System.err.print("tSYMBEG,"); break; |
| case '\n': System.err.println("NL"); break; |
| default: System.err.print("'" + (int)token + "',"); break; |
| } |
| } |
| |
| // DEBUGGING HELP |
| private int yylex() { |
| int token = yylex2(); |
| |
| printToken(token); |
| |
| return token; |
| } |
| */ |
| |
| /** |
| * Returns the next token. Also sets yyVal is needed. |
| * |
| *@return Description of the Returned Value |
| */ |
| private int yylex() throws IOException { |
| char c; |
| boolean spaceSeen = false; |
| boolean commandState; |
| |
| if (lex_strterm != null) { |
| int tok = lex_strterm.parseString(this, src); |
| if (tok == Tokens.tSTRING_END || tok == Tokens.tREGEXP_END) { |
| lex_strterm = null; |
| lex_state = LexState.EXPR_END; |
| } |
| return tok; |
| } |
| |
| commandState = commandStart; |
| commandStart = false; |
| |
| LexState last_state = lex_state; |
| |
| retry: for(;;) { |
| c = src.read(); |
| switch(c) { |
| case '\004': /* ^D */ |
| case '\032': /* ^Z */ |
| case 0: /* end of script. */ |
| return 0; |
| |
| /* white spaces */ |
| case ' ': case '\t': case '\f': case '\r': |
| case '\13': /* '\v' */ |
| getPosition(); |
| spaceSeen = true; |
| continue retry; |
| case '#': /* it's a comment */ |
| if (readComment(c) == 0) return 0; |
| |
| /* fall through */ |
| case '\n': |
| // Replace a string of newlines with a single one |
| while((c = src.read()) == '\n') { |
| |
| } |
| src.unread( c ); |
| getPosition(); |
| |
| if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT || |
| lex_state == LexState.EXPR_CLASS) { |
| continue retry; |
| } |
| |
| commandStart = true; |
| lex_state = LexState.EXPR_BEG; |
| return '\n'; |
| |
| case '*': |
| if ((c = src.read()) == '*') { |
| if ((c = src.read()) == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("**", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| yaccValue = new Token("**", getPosition()); |
| c = Tokens.tPOW; |
| } else { |
| if (c == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("*", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)) { |
| warnings.warning(getPosition(), "`*' interpreted as argument prefix"); |
| c = Tokens.tSTAR; |
| } else if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID) { |
| c = Tokens.tSTAR; |
| } else { |
| c = Tokens.tSTAR2; |
| } |
| yaccValue = new Token("*", getPosition()); |
| } |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| return c; |
| |
| case '!': |
| lex_state = LexState.EXPR_BEG; |
| if ((c = src.read()) == '=') { |
| yaccValue = new Token("!=",getPosition()); |
| return Tokens.tNEQ; |
| } |
| if (c == '~') { |
| yaccValue = new Token("!~",getPosition()); |
| return Tokens.tNMATCH; |
| } |
| src.unread(c); |
| yaccValue = new Token("!",getPosition()); |
| return Tokens.tBANG; |
| |
| case '=': |
| // documentation nodes |
| if (src.wasBeginOfLine()) { |
| String equalLabel; |
| if ((equalLabel = isNextNoCase("begin")) != null) { |
| tokenBuffer.setLength(0); |
| tokenBuffer.append(equalLabel); |
| c = src.read(); |
| |
| if (Character.isWhitespace(c)) { |
| // In case last next was the newline. |
| src.unread(c); |
| for (;;) { |
| c = src.read(); |
| tokenBuffer.append(c); |
| |
| // If a line is followed by a blank line put |
| // it back. |
| while (c == '\n') { |
| c = src.read(); |
| tokenBuffer.append(c); |
| } |
| if (c == EOF) { |
| throw new SyntaxException(getPosition(), "embedded document meets end of file"); |
| } |
| if (c != '=') continue; |
| if (src.wasBeginOfLine() && (equalLabel = isNextNoCase("end")) != null) { |
| tokenBuffer.append(equalLabel); |
| tokenBuffer.append(src.readLine()); |
| src.unread('\n'); |
| break; |
| } |
| } |
| |
| parserSupport.getResult().addComment(new CommentNode(getPosition(), tokenBuffer.toString())); |
| continue retry; |
| } |
| src.unread(c); |
| } |
| } |
| |
| if (lex_state == LexState.EXPR_FNAME || lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| |
| c = src.read(); |
| if (c == '=') { |
| c = src.read(); |
| if (c == '=') { |
| yaccValue = new Token("===", getPosition()); |
| return Tokens.tEQQ; |
| } |
| src.unread(c); |
| yaccValue = new Token("==", getPosition()); |
| return Tokens.tEQ; |
| } |
| if (c == '~') { |
| yaccValue = new Token("=~", getPosition()); |
| return Tokens.tMATCH; |
| } else if (c == '>') { |
| yaccValue = new Token("=>", getPosition()); |
| return Tokens.tASSOC; |
| } |
| src.unread(c); |
| yaccValue = new Token("=", getPosition()); |
| return '='; |
| |
| case '<': |
| c = src.read(); |
| if (c == '<' && |
| lex_state != LexState.EXPR_END && |
| lex_state != LexState.EXPR_DOT && |
| lex_state != LexState.EXPR_ENDARG && |
| lex_state != LexState.EXPR_CLASS && |
| (!lex_state.isArgument() || spaceSeen)) { |
| int tok = hereDocumentIdentifier(); |
| if (tok != 0) return tok; |
| } |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| if (c == '=') { |
| if ((c = src.read()) == '>') { |
| yaccValue = new Token("<=>", getPosition()); |
| return Tokens.tCMP; |
| } |
| src.unread(c); |
| yaccValue = new Token("<=", getPosition()); |
| return Tokens.tLEQ; |
| } |
| if (c == '<') { |
| if ((c = src.read()) == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("<<", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| yaccValue = new Token("<<", getPosition()); |
| return Tokens.tLSHFT; |
| } |
| yaccValue = new Token("<", getPosition()); |
| src.unread(c); |
| return Tokens.tLT; |
| |
| case '>': |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| |
| if ((c = src.read()) == '=') { |
| yaccValue = new Token(">=", getPosition()); |
| return Tokens.tGEQ; |
| } |
| if (c == '>') { |
| if ((c = src.read()) == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token(">>", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| yaccValue = new Token(">>", getPosition()); |
| return Tokens.tRSHFT; |
| } |
| src.unread(c); |
| yaccValue = new Token(">", getPosition()); |
| return Tokens.tGT; |
| |
| case '"': |
| lex_strterm = new StringTerm(str_dquote, '"', '\0'); |
| yaccValue = new Token("\"", getPosition()); |
| return Tokens.tSTRING_BEG; |
| |
| case '`': |
| yaccValue = new Token("`", getPosition()); |
| if (lex_state == LexState.EXPR_FNAME) { |
| lex_state = LexState.EXPR_END; |
| return Tokens.tBACK_REF2; |
| } |
| if (lex_state == LexState.EXPR_DOT) { |
| if (commandState) { |
| lex_state = LexState.EXPR_CMDARG; |
| } else { |
| lex_state = LexState.EXPR_ARG; |
| } |
| return Tokens.tBACK_REF2; |
| } |
| lex_strterm = new StringTerm(str_xquote, '`', '\0'); |
| return Tokens.tXSTRING_BEG; |
| |
| case '\'': |
| lex_strterm = new StringTerm(str_squote, '\'', '\0'); |
| yaccValue = new Token("'", getPosition()); |
| return Tokens.tSTRING_BEG; |
| |
| case '?': |
| if (lex_state == LexState.EXPR_END || |
| lex_state == LexState.EXPR_ENDARG) { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("?",getPosition()); |
| return '?'; |
| } |
| c = src.read(); |
| if (c == EOF) { |
| throw new SyntaxException(getPosition(), "incomplete character syntax"); |
| } |
| if (Character.isWhitespace(c)){ |
| if (!lex_state.isArgument()){ |
| int c2 = 0; |
| switch (c) { |
| case ' ': |
| c2 = 's'; |
| break; |
| case '\n': |
| c2 = 'n'; |
| break; |
| case '\t': |
| c2 = 't'; |
| break; |
| /* What is \v in C? |
| case '\v': |
| c2 = 'v'; |
| break; |
| */ |
| case '\r': |
| c2 = 'r'; |
| break; |
| case '\f': |
| c2 = 'f'; |
| break; |
| } |
| if (c2 != 0) { |
| warnings.warn(getPosition(), "invalid character syntax; use ?\\" + c2); |
| } |
| } |
| src.unread(c); |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("?", getPosition()); |
| return '?'; |
| /*} else if (ismbchar(c)) { // ruby - we don't support them either? |
| rb_warn("multibyte character literal not supported yet; use ?\\" + c); |
| support.unread(c); |
| lexState = LexState.EXPR_BEG; |
| return '?';*/ |
| } else if ((Character.isLetterOrDigit(c) || c == '_') && |
| !src.peek('\n') && isNext_identchar()) { |
| src.unread(c); |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("?", getPosition()); |
| return '?'; |
| } else if (c == '\\') { |
| c = src.readEscape(); |
| } |
| c &= 0xff; |
| lex_state = LexState.EXPR_END; |
| yaccValue = new FixnumNode(getPosition(), c); |
| return Tokens.tINTEGER; |
| |
| case '&': |
| if ((c = src.read()) == '&') { |
| lex_state = LexState.EXPR_BEG; |
| if ((c = src.read()) == '=') { |
| yaccValue = new Token("&&", getPosition()); |
| lex_state = LexState.EXPR_BEG; |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| yaccValue = new Token("&&", getPosition()); |
| return Tokens.tANDOP; |
| } |
| else if (c == '=') { |
| yaccValue = new Token("&", getPosition()); |
| lex_state = LexState.EXPR_BEG; |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| //tmpPosition is required because getPosition()'s side effects. |
| //if the warning is generated, the getPosition() on line 954 (this line + 18) will create |
| //a wrong position if the "inclusive" flag is not set. |
| ISourcePosition tmpPosition = getPosition(); |
| if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)){ |
| warnings.warning(tmpPosition, "`&' interpreted as argument prefix"); |
| c = Tokens.tAMPER; |
| } else if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID) { |
| c = Tokens.tAMPER; |
| } else { |
| c = Tokens.tAMPER2; |
| } |
| |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| yaccValue = new Token("&", tmpPosition); |
| return c; |
| |
| case '|': |
| if ((c = src.read()) == '|') { |
| lex_state = LexState.EXPR_BEG; |
| if ((c = src.read()) == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("||", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| yaccValue = new Token("||", getPosition()); |
| return Tokens.tOROP; |
| } |
| if (c == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("|", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| src.unread(c); |
| yaccValue = new Token("|", getPosition()); |
| return Tokens.tPIPE; |
| |
| case '+': |
| c = src.read(); |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| if (c == '@') { |
| yaccValue = new Token("+@", getPosition()); |
| return Tokens.tUPLUS; |
| } |
| src.unread(c); |
| yaccValue = new Token("+", getPosition()); |
| return Tokens.tPLUS; |
| } |
| if (c == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("+", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID || |
| (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c))) { |
| if (lex_state.isArgument()) arg_ambiguous(); |
| lex_state = LexState.EXPR_BEG; |
| src.unread(c); |
| if (Character.isDigit(c)) { |
| c = '+'; |
| return parseNumber(c); |
| } |
| yaccValue = new Token("+", getPosition()); |
| return Tokens.tUPLUS; |
| } |
| lex_state = LexState.EXPR_BEG; |
| src.unread(c); |
| yaccValue = new Token("+", getPosition()); |
| return Tokens.tPLUS; |
| |
| case '-': |
| c = src.read(); |
| if (lex_state == LexState.EXPR_FNAME || lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| if (c == '@') { |
| yaccValue = new Token("-@", getPosition()); |
| return Tokens.tUMINUS; |
| } |
| src.unread(c); |
| yaccValue = new Token("-", getPosition()); |
| return Tokens.tMINUS; |
| } |
| if (c == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("-", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| if (lex_state == LexState.EXPR_BEG || lex_state == LexState.EXPR_MID || |
| (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c))) { |
| if (lex_state.isArgument()) arg_ambiguous(); |
| lex_state = LexState.EXPR_BEG; |
| src.unread(c); |
| yaccValue = new Token("-", getPosition()); |
| if (Character.isDigit(c)) { |
| return Tokens.tUMINUS_NUM; |
| } |
| return Tokens.tUMINUS; |
| } |
| lex_state = LexState.EXPR_BEG; |
| src.unread(c); |
| yaccValue = new Token("-", getPosition()); |
| return Tokens.tMINUS; |
| |
| case '.': |
| lex_state = LexState.EXPR_BEG; |
| if ((c = src.read()) == '.') { |
| if ((c = src.read()) == '.') { |
| yaccValue = new Token("...", getPosition()); |
| return Tokens.tDOT3; |
| } |
| src.unread(c); |
| yaccValue = new Token("..", getPosition()); |
| return Tokens.tDOT2; |
| } |
| src.unread(c); |
| if (Character.isDigit(c)) { |
| throw new SyntaxException(getPosition(), "no .<digit> floating literal anymore; put 0 before dot"); |
| } |
| lex_state = LexState.EXPR_DOT; |
| yaccValue = new Token(".", getPosition()); |
| return Tokens.tDOT; |
| case '0' : case '1' : case '2' : case '3' : case '4' : |
| case '5' : case '6' : case '7' : case '8' : case '9' : |
| return parseNumber(c); |
| |
| case ')': |
| conditionState.restart(); |
| cmdArgumentState.restart(); |
| lex_state = LexState.EXPR_END; |
| yaccValue = new Token(")", getPosition()); |
| return Tokens.tRPAREN; |
| case ']': |
| conditionState.restart(); |
| cmdArgumentState.restart(); |
| lex_state = LexState.EXPR_END; |
| yaccValue = new Token(")", getPosition()); |
| return Tokens.tRBRACK; |
| case '}': |
| conditionState.restart(); |
| cmdArgumentState.restart(); |
| lex_state = LexState.EXPR_END; |
| yaccValue = new Token("}",getPosition()); |
| return Tokens.tRCURLY; |
| |
| case ':': |
| c = src.read(); |
| if (c == ':') { |
| if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID || |
| lex_state == LexState.EXPR_CLASS || |
| (lex_state.isArgument() && spaceSeen)) { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("::", getPosition()); |
| return Tokens.tCOLON3; |
| } |
| lex_state = LexState.EXPR_DOT; |
| yaccValue = new Token(":",getPosition()); |
| return Tokens.tCOLON2; |
| } |
| if (lex_state == LexState.EXPR_END || |
| lex_state == LexState.EXPR_ENDARG || Character.isWhitespace(c)) { |
| src.unread(c); |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token(":",getPosition()); |
| return ':'; |
| } |
| switch (c) { |
| case '\'': |
| lex_strterm = new StringTerm(str_ssym, c, '\0'); |
| break; |
| case '"': |
| lex_strterm = new StringTerm(str_dsym, c, '\0'); |
| break; |
| default: |
| src.unread(c); |
| break; |
| } |
| lex_state = LexState.EXPR_FNAME; |
| yaccValue = new Token(":", getPosition()); |
| return Tokens.tSYMBEG; |
| |
| case '/': |
| if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID) { |
| lex_strterm = new StringTerm(str_regexp, '/', '\0'); |
| yaccValue = new Token("/",getPosition()); |
| return Tokens.tREGEXP_BEG; |
| } |
| |
| if ((c = src.read()) == '=') { |
| yaccValue = new Token("/", getPosition()); |
| lex_state = LexState.EXPR_BEG; |
| return Tokens.tOP_ASGN; |
| } |
| src.unread(c); |
| if (lex_state.isArgument() && spaceSeen) { |
| if (!Character.isWhitespace(c)) { |
| arg_ambiguous(); |
| lex_strterm = new StringTerm(str_regexp, '/', '\0'); |
| yaccValue = new Token("/",getPosition()); |
| return Tokens.tREGEXP_BEG; |
| } |
| } |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| yaccValue = new Token("/", getPosition()); |
| return Tokens.tDIVIDE; |
| |
| case '^': |
| if ((c = src.read()) == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("^", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| src.unread(c); |
| yaccValue = new Token("^", getPosition()); |
| return Tokens.tCARET; |
| |
| case ';': |
| commandStart = true; |
| case ',': |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token(",", getPosition()); |
| return c; |
| |
| case '~': |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| if ((c = src.read()) != '@') { |
| src.unread(c); |
| } |
| } |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| yaccValue = new Token("~", getPosition()); |
| return Tokens.tTILDE; |
| case '(': |
| c = Tokens.tLPAREN2; |
| commandStart = true; |
| if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID) { |
| c = Tokens.tLPAREN; |
| } else if (spaceSeen) { |
| if (lex_state == LexState.EXPR_CMDARG) { |
| c = Tokens.tLPAREN_ARG; |
| } else if (lex_state == LexState.EXPR_ARG) { |
| warnings.warn(getPosition(), "don't put space before argument parentheses"); |
| c = Tokens.tLPAREN2; |
| } |
| } |
| conditionState.stop(); |
| cmdArgumentState.stop(); |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("(", getPosition()); |
| return c; |
| |
| case '[': |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| if ((c = src.read()) == ']') { |
| if (src.peek('=')) { |
| c = src.read(); |
| yaccValue = new Token("[]=", getPosition()); |
| return Tokens.tASET; |
| } |
| yaccValue = new Token("[]", getPosition()); |
| return Tokens.tAREF; |
| } |
| src.unread(c); |
| yaccValue = new Token("[", getPosition()); |
| return '['; |
| } else if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID) { |
| c = Tokens.tLBRACK; |
| } else if (lex_state.isArgument() && spaceSeen) { |
| c = Tokens.tLBRACK; |
| } |
| lex_state = LexState.EXPR_BEG; |
| conditionState.stop(); |
| cmdArgumentState.stop(); |
| yaccValue = new Token("[", getPosition()); |
| return c; |
| |
| case '{': |
| c = Tokens.tLCURLY; |
| |
| if (lex_state.isArgument() || lex_state == LexState.EXPR_END) { |
| c = Tokens.tLCURLY; /* block (primary) */ |
| } else if (lex_state == LexState.EXPR_ENDARG) { |
| c = Tokens.tLBRACE_ARG; /* block (expr) */ |
| } else { |
| c = Tokens.tLBRACE; /* hash */ |
| } |
| conditionState.stop(); |
| cmdArgumentState.stop(); |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("{", getPosition()); |
| return c; |
| |
| case '\\': |
| c = src.read(); |
| if (c == '\n') { |
| spaceSeen = true; |
| continue retry; /* skip \\n */ |
| } |
| src.unread(c); |
| yaccValue = new Token("\\", getPosition()); |
| return '\\'; |
| |
| case '%': |
| if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID) { |
| return parseQuote(src.read()); |
| } |
| if ((c = src.read()) == '=') { |
| lex_state = LexState.EXPR_BEG; |
| yaccValue = new Token("%", getPosition()); |
| return Tokens.tOP_ASGN; |
| } |
| if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)) { |
| return parseQuote(c); |
| } |
| if (lex_state == LexState.EXPR_FNAME || |
| lex_state == LexState.EXPR_DOT) { |
| lex_state = LexState.EXPR_ARG; |
| } else { |
| lex_state = LexState.EXPR_BEG; |
| } |
| src.unread(c); |
| yaccValue = new Token("%", getPosition()); |
| return Tokens.tPERCENT; |
| |
| case '$': |
| lex_state = LexState.EXPR_END; |
| tokenBuffer.setLength(0); |
| c = src.read(); |
| switch (c) { |
| case '_': /* $_: last read line string */ |
| c = src.read(); |
| if (isIdentifierChar(c)) { |
| tokenBuffer.append('$'); |
| tokenBuffer.append('_'); |
| break; |
| } |
| src.unread(c); |
| c = '_'; |
| /* fall through */ |
| case '~': /* $~: match-data */ |
| case '*': /* $*: argv */ |
| case '$': /* $$: pid */ |
| case '?': /* $?: last status */ |
| case '!': /* $!: error string */ |
| case '@': /* $@: error position */ |
| case '/': /* $/: input record separator */ |
| case '\\': /* $\: output record separator */ |
| case ';': /* $;: field separator */ |
| case ',': /* $,: output field separator */ |
| case '.': /* $.: last read line number */ |
| case '=': /* $=: ignorecase */ |
| case ':': /* $:: load path */ |
| case '<': /* $<: reading filename */ |
| case '>': /* $>: default output handle */ |
| case '\"': /* $": already loaded files */ |
| tokenBuffer.append('$'); |
| tokenBuffer.append(c); |
| yaccValue = new Token(tokenBuffer.toString(), getPosition()); |
| return Tokens.tGVAR; |
| |
| case '-': |
| tokenBuffer.append('$'); |
| tokenBuffer.append(c); |
| c = src.read(); |
| if (isIdentifierChar(c)) { |
| tokenBuffer.append(c); |
| } else { |
| src.unread(c); |
| } |
| yaccValue = new Token(tokenBuffer.toString(), getPosition()); |
| /* xxx shouldn't check if valid option variable */ |
| return Tokens.tGVAR; |
| |
| case '&': /* $&: last match */ |
| case '`': /* $`: string before last match */ |
| case '\'': /* $': string after last match */ |
| case '+': /* $+: string matches last paren. */ |
| yaccValue = new BackRefNode(getPosition(), c); |
| return Tokens.tBACK_REF; |
| |
| case '1': case '2': case '3': |
| case '4': case '5': case '6': |
| case '7': case '8': case '9': |
| tokenBuffer.append('$'); |
| do { |
| tokenBuffer.append(c); |
| c = src.read(); |
| } while (Character.isDigit(c)); |
| src.unread(c); |
| if(last_state == LexState.EXPR_FNAME) { |
| yaccValue = new Token(tokenBuffer.toString(), getPosition()); |
| return Tokens.tGVAR; |
| } else { |
| yaccValue = new NthRefNode(getPosition(), Integer.parseInt(tokenBuffer.substring(1))); |
| return Tokens.tNTH_REF; |
| } |
| default: |
| if (!isIdentifierChar(c)) { |
| src.unread(c); |
| yaccValue = new Token("$", getPosition()); |
| return '$'; |
| } |
| case '0': |
| tokenBuffer.append('$'); |
| } |
| break; |
| |
| case '@': |
| c = src.read(); |
| tokenBuffer.setLength(0); |
| tokenBuffer.append('@'); |
| if (c == '@') { |
| tokenBuffer.append('@'); |
| c = src.read(); |
| } |
| if (Character.isDigit(c)) { |
| if (tokenBuffer.length() == 1) { |
| throw new SyntaxException(getPosition(), "`@" + c + "' is not allowed as an instance variable name"); |
| } |
| throw new SyntaxException(getPosition(), "`@@" + c + "' is not allowed as a class variable name"); |
| } |
| if (!isIdentifierChar(c)) { |
| src.unread(c); |
| yaccValue = new Token("@", getPosition()); |
| return '@'; |
| } |
| break; |
| |
| case '_': |
| if (src.wasBeginOfLine() && src.matchString("_END__\n", false)) { |
| parserSupport.getResult().setEndSeen(true); |
| return 0; |
| } |
| tokenBuffer.setLength(0); |
| break; |
| |
| default: |
| if (!isIdentifierChar(c)) { |
| throw new SyntaxException(getPosition(), "Invalid char `\\" + Integer.parseInt(""+c, 8) + "' in expression"); |
| } |
| |
| tokenBuffer.setLength(0); |
| break; |
| } |
| |
| do { |
| tokenBuffer.append(c); |
| /* no special multibyte character handling is needed in Java |
| * if (ismbchar(c)) { |
| int i, len = mbclen(c)-1; |
| |
| for (i = 0; i < len; i++) { |
| c = src.read(); |
| tokenBuffer.append(c); |
| } |
| }*/ |
| c = src.read(); |
| } while (isIdentifierChar(c)); |
| |
| char peek = src.read(); |
| if ((c == '!' || c == '?') && |
| isIdentifierChar(tokenBuffer.charAt(0)) && peek != '=') { |
| src.unread(peek); |
| tokenBuffer.append(c); |
| } else { |
| src.unread(peek); |
| src.unread(c); |
| } |
| |
| int result = 0; |
| |
| switch (tokenBuffer.charAt(0)) { |
| case '$': |
| lex_state = LexState.EXPR_END; |
| result = Tokens.tGVAR; |
| break; |
| case '@': |
| lex_state = LexState.EXPR_END; |
| if (tokenBuffer.charAt(1) == '@') { |
| result = Tokens.tCVAR; |
| } else { |
| result = Tokens.tIVAR; |
| } |
| break; |
| |
| default: |
| char last = tokenBuffer.charAt(tokenBuffer.length() - 1); |
| if (last == '!' || last == '?') { |
| result = Tokens.tFID; |
| } else { |
| if (lex_state == LexState.EXPR_FNAME) { |
| if ((c = src.read()) == '=') { |
| char c2 = src.read(); |
| |
| if (c2 != '~' && c2 != '>' && |
| (c2 != '=' || (c2 == '\n' && src.peek('>')))) { |
| result = Tokens.tIDENTIFIER; |
| tokenBuffer.append(c); |
| src.unread(c2); |
| } else { |
| src.unread(c2); |
| src.unread(c); |
| } |
| } else { |
| src.unread(c); |
| } |
| } |
| if (result == 0 && Character.isUpperCase(tokenBuffer.charAt(0))) { |
| result = Tokens.tCONSTANT; |
| } else { |
| result = Tokens.tIDENTIFIER; |
| } |
| } |
| |
| if (lex_state != LexState.EXPR_DOT) { |
| /* See if it is a reserved word. */ |
| Keyword keyword = Keyword.getKeyword(tokenBuffer.toString(), tokenBuffer.length()); |
| if (keyword != null) { |
| // enum lex_state |
| LexState state = lex_state; |
| |
| lex_state = keyword.state; |
| if (state.isExprFName()) { |
| yaccValue = new Token(keyword.name, getPosition()); |
| } else { |
| yaccValue = new Token(tokenBuffer.toString(), getPosition()); |
| } |
| if (keyword.id0 == Tokens.kDO) { |
| if (conditionState.isInState()) { |
| return Tokens.kDO_COND; |
| } |
| if (cmdArgumentState.isInState() && state != LexState.EXPR_CMDARG) { |
| return Tokens.kDO_BLOCK; |
| } |
| if (state == LexState.EXPR_ENDARG) { |
| return Tokens.kDO_BLOCK; |
| } |
| return Tokens.kDO; |
| } |
| |
| if (state == LexState.EXPR_BEG) { |
| return keyword.id0; |
| } |
| if (keyword.id0 != keyword.id1) { |
| lex_state = LexState.EXPR_BEG; |
| } |
| return keyword.id1; |
| } |
| } |
| |
| if (lex_state == LexState.EXPR_BEG || |
| lex_state == LexState.EXPR_MID || |
| lex_state == LexState.EXPR_DOT || |
| lex_state == LexState.EXPR_ARG || |
| lex_state == LexState.EXPR_CMDARG) { |
| if (commandState) { |
| lex_state = LexState.EXPR_CMDARG; |
| } else { |
| lex_state = LexState.EXPR_ARG; |
| } |
| } else { |
| lex_state = LexState.EXPR_END; |
| } |
| } |
| |
| String tempVal = tokenBuffer.toString(); |
| |
| // Lame: parsing logic made it into lexer in ruby...So we |
| // are emulating |
| // FIXME: I believe this is much simpler now... |
| StaticScope scope = parserSupport.getCurrentScope(); |
| if (IdUtil.getVarType(tempVal) == IdUtil.LOCAL_VAR && |
| (scope instanceof BlockStaticScope && (scope.isDefined(tempVal) >= 0)) || |
| (scope.getLocalScope().isDefined(tempVal) >= 0)) { |
| lex_state = LexState.EXPR_END; |
| } |
| |
| yaccValue = new Token(tempVal, getPosition()); |
| |
| return result; |
| } |
| } |
| |
| /** |
| * Parse a number from the input stream. |
| * |
| *@param c The first character of the number. |
| *@return A int constant wich represents a token. |
| */ |
| private int parseNumber(char c) throws IOException { |
| lex_state = LexState.EXPR_END; |
| |
| tokenBuffer.setLength(0); |
| |
| if (c == '-') { |
| tokenBuffer.append(c); |
| c = src.read(); |
| } else if (c == '+') { |
| // We don't append '+' since Java number parser gets confused |
| c = src.read(); |
| } |
| |
| char nondigit = '\0'; |
| |
| if (c == '0') { |
| int startLen = tokenBuffer.length(); |
| |
| switch (c = src.read()) { |
| case 'x' : |
| case 'X' : // hexadecimal |
| c = src.read(); |
| if (isHexChar(c)) { |
| for (;; c = src.read()) { |
| if (c == '_') { |
| if (nondigit != '\0') { |
| break; |
| } |
| nondigit = c; |
| } else if (isHexChar(c)) { |
| nondigit = '\0'; |
| tokenBuffer.append(c); |
| } else { |
| break; |
| } |
| } |
| } |
| src.unread(c); |
| |
| if (tokenBuffer.length() == startLen) { |
| throw new SyntaxException(getPosition(), "Hexadecimal number without hex-digits."); |
| } else if (nondigit != '\0') { |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } |
| yaccValue = getInteger(tokenBuffer.toString(), 16); |
| return Tokens.tINTEGER; |
| case 'b' : |
| case 'B' : // binary |
| c = src.read(); |
| if (c == '0' || c == '1') { |
| for (;; c = src.read()) { |
| if (c == '_') { |
| if (nondigit != '\0') { |
| break; |
| } |
| nondigit = c; |
| } else if (c == '0' || c == '1') { |
| nondigit = '\0'; |
| tokenBuffer.append(c); |
| } else { |
| break; |
| } |
| } |
| } |
| src.unread(c); |
| |
| if (tokenBuffer.length() == startLen) { |
| throw new SyntaxException(getPosition(), "Binary number without digits."); |
| } else if (nondigit != '\0') { |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } |
| yaccValue = getInteger(tokenBuffer.toString(), 2); |
| return Tokens.tINTEGER; |
| case 'd' : |
| case 'D' : // decimal |
| c = src.read(); |
| if (Character.isDigit(c)) { |
| for (;; c = src.read()) { |
| if (c == '_') { |
| if (nondigit != '\0') { |
| break; |
| } |
| nondigit = c; |
| } else if (Character.isDigit(c)) { |
| nondigit = '\0'; |
| tokenBuffer.append(c); |
| } else { |
| break; |
| } |
| } |
| } |
| src.unread(c); |
| |
| if (tokenBuffer.length() == startLen) { |
| throw new SyntaxException(getPosition(), "Binary number without digits."); |
| } else if (nondigit != '\0') { |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } |
| yaccValue = getInteger(tokenBuffer.toString(), 2); |
| return Tokens.tINTEGER; |
| case '0' : case '1' : case '2' : case '3' : case '4' : //Octal |
| case '5' : case '6' : case '7' : case '_' : |
| for (;; c = src.read()) { |
| if (c == '_') { |
| if (nondigit != '\0') { |
| break; |
| } |
| nondigit = c; |
| } else if (c >= '0' && c <= '7') { |
| nondigit = '\0'; |
| tokenBuffer.append(c); |
| } else { |
| break; |
| } |
| } |
| if (tokenBuffer.length() > startLen) { |
| src.unread(c); |
| |
| if (nondigit != '\0') { |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } |
| |
| yaccValue = getInteger(tokenBuffer.toString(), 8); |
| return Tokens.tINTEGER; |
| } |
| case '8' : |
| case '9' : |
| throw new SyntaxException(getPosition(), "Illegal octal digit."); |
| case '.' : |
| case 'e' : |
| case 'E' : |
| tokenBuffer.append('0'); |
| break; |
| default : |
| src.unread(c); |
| yaccValue = new FixnumNode(getPosition(), 0); |
| return Tokens.tINTEGER; |
| } |
| } |
| |
| boolean seen_point = false; |
| boolean seen_e = false; |
| |
| for (;; c = src.read()) { |
| switch (c) { |
| case '0' : |
| case '1' : |
| case '2' : |
| case '3' : |
| case '4' : |
| case '5' : |
| case '6' : |
| case '7' : |
| case '8' : |
| case '9' : |
| nondigit = '\0'; |
| tokenBuffer.append(c); |
| break; |
| case '.' : |
| if (nondigit != '\0') { |
| src.unread(c); |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } else if (seen_point || seen_e) { |
| src.unread(c); |
| return getNumberToken(tokenBuffer.toString(), true, nondigit); |
| } else { |
| char c2; |
| if (!Character.isDigit(c2 = src.read())) { |
| src.unread(c2); |
| src.unread('.'); |
| if (c == '_') { |
| // Enebo: c can never be antrhign but '.' |
| // Why did I put this here? |
| } else { |
| yaccValue = getInteger(tokenBuffer.toString(), 10); |
| return Tokens.tINTEGER; |
| } |
| } else { |
| tokenBuffer.append('.'); |
| tokenBuffer.append(c2); |
| seen_point = true; |
| nondigit = '\0'; |
| } |
| } |
| break; |
| case 'e' : |
| case 'E' : |
| if (nondigit != '\0') { |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } else if (seen_e) { |
| src.unread(c); |
| return getNumberToken(tokenBuffer.toString(), true, nondigit); |
| } else { |
| tokenBuffer.append(c); |
| seen_e = true; |
| nondigit = c; |
| c = src.read(); |
| if (c == '-' || c == '+') { |
| tokenBuffer.append(c); |
| nondigit = c; |
| } else { |
| src.unread(c); |
| } |
| } |
| break; |
| case '_' : // '_' in number just ignored |
| if (nondigit != '\0') { |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } |
| nondigit = c; |
| break; |
| default : |
| src.unread(c); |
| return getNumberToken(tokenBuffer.toString(), seen_e || seen_point, nondigit); |
| } |
| } |
| } |
| |
| private int getNumberToken(String number, boolean isFloat, char nondigit) { |
| if (nondigit != '\0') { |
| throw new SyntaxException(getPosition(), "Trailing '_' in number."); |
| } |
| if (isFloat) { |
| double d; |
| try { |
| d = Double.parseDouble(number); |
| } catch (NumberFormatException e) { |
| warnings.warn(getPosition(), "Float " + number + " out of range."); |
| |
| d = number.startsWith("-") ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; |
| } |
| yaccValue = new FloatNode(getPosition(), d); |
| return Tokens.tFLOAT; |
| } |
| yaccValue = getInteger(number, 10); |
| return Tokens.tINTEGER; |
| } |
| } |