plugins/org.eclipse.dltk.ruby.core/jruby/org/jruby/lexer/yacc/StringTerm.java - dltk/org.eclipse.dltk.ruby - Git at Google

 /***** BEGIN LICENSE BLOCK *****
  * Version: CPL 1.0/GPL 2.0/LGPL 2.1
  *
  * The contents of this file are subject to the Common Public
  * License Version 1.0 (the "License"); you may not use this file
  * except in compliance with the License. You may obtain a copy of
  * the License at http://www.eclipse.org/legal/cpl-v10.html
  *
  * Software distributed under the License is distributed on an "AS
  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  * implied. See the License for the specific language governing
  * rights and limitations under the License.
  *
  * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either of the GNU General Public License Version 2 or later (the "GPL"),
  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the CPL, indicate your
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the CPL, the GPL or the LGPL.
  ***** END LICENSE BLOCK *****/
 package org.jruby.lexer.yacc;

 import org.jruby.ast.RegexpNode;
 import org.jruby.ast.StrNode;
 import org.jruby.parser.ReOptions;
 import org.jruby.parser.Tokens;
 import org.jruby.util.ByteList;

 public class StringTerm extends StrTerm {
     /* bit flags to indicate the string type */
 	private int func;

     private final char term;

     private final char paren;

     /* nested string level */
     private int nest;

     public StringTerm(int func, char term, char paren) {
         this.func = func;
         this.term = term;
         this.paren = paren;
         this.nest = 0;
     }

     public int parseString(final RubyYaccLexer lexer, LexerSource src) throws java.io.IOException {
         char c;
         int space = 0;

         if (func == -1) {
             lexer.setValue(new Token("\"", lexer.getPosition()));
             return Tokens.tSTRING_END;
         }

         c = src.read();
         if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
                 && Character.isWhitespace(c)) {
             do {
                 c = src.read();
             } while (Character.isWhitespace(c));
             space = 1;
         }

         if (c == term && nest == 0) {
             if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
                 func = -1;
                 lexer.getPosition();
                 return ' ';
             }
             if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
                 lexer.setValue(new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src)));
                 return Tokens.tREGEXP_END;
             }
             lexer.setValue(new Token("\"", lexer.getPosition()));
             return Tokens.tSTRING_END;
         }
         if (space != 0) {
             src.unread(c);
             lexer.getPosition();
             return ' ';
         }
         ByteList buffer = new ByteList();

         if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') {
             c = src.read();
             switch (c) {
             case '$':
             case '@':
                 src.unread(c);
                 lexer.setValue(new Token("#" + c, lexer.getPosition()));
                 return Tokens.tSTRING_DVAR;
             case '{':
                 lexer.setValue(new Token("#" + c, lexer.getPosition()));
                 return Tokens.tSTRING_DBEG;
             }
             buffer.append('#');
         }
         src.unread(c);
         if (parseStringIntoBuffer(src, buffer) == 0) {
             throw new SyntaxException(src.getPosition(), "unterminated string meets end of file");
         }

         lexer.setValue(new StrNode(lexer.getPosition(), buffer));
         return Tokens.tSTRING_CONTENT;
     }

     private int parseRegexpFlags(final LexerSource src) throws java.io.IOException {
         char kcode = 0;
         int options = 0;
         char c;
         StringBuffer unknownFlags = new StringBuffer(10);

         for (c = src.read(); c != RubyYaccLexer.EOF
                 && Character.isLetter(c); c = src.read()) {
             switch (c) {
             case 'i':
                 options |= ReOptions.RE_OPTION_IGNORECASE;
                 break;
             case 'x':
                 options |= ReOptions.RE_OPTION_EXTENDED;
                 break;
             case 'm':
                 options |= ReOptions.RE_OPTION_MULTILINE;
                 break;
             case 'o':
                 options |= ReOptions.RE_OPTION_ONCE;
                 break;
             case 'n':
                 kcode = 16;
                 break;
             case 'e':
                 kcode = 32;
                 break;
             case 's':
                 kcode = 48;
                 break;
             case 'u':
                 kcode = 64;
                 break;
             default:
                 unknownFlags.append(c);
                 break;
             }
         }
         src.unread(c);
         if (unknownFlags.length() != 0) {
             throw new SyntaxException(src.getPosition(), "unknown regexp option"
                     + (unknownFlags.length() > 1 ? "s" : "") + " - "
                     + unknownFlags.toString());
         }
         return options | kcode;
     }

     public char parseStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException {
         char c;

         while ((c = src.read()) != RubyYaccLexer.EOF) {
             if (paren != '\0' && c == paren) {
                 nest++;
             } else if (c == term) {
                 if (nest == 0) {
                     src.unread(c);
                     break;
                 }
                 nest--;
             } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek('\n')) {
                 char c2 = src.read();

                 if (c2 == '$' || c2 == '@' || c2 == '{') {
                     src.unread(c2);
                     src.unread(c);
                     break;
                 }
                 src.unread(c2);
             } else if (c == '\\') {
                 c = src.read();
                 switch (c) {
                 case '\n':
                     if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
                         break;
                     }
                     if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
                         continue;
                     }
                     buffer.append('\\');
                     break;

                 case '\\':
                     if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
                         buffer.append(c);
                     }
                     break;

                 default:
                     if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
                         src.unread(c);
                         parseEscapeIntoBuffer(src, buffer);
                         continue;
                     } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
                         src.unread(c);
                         if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
                             buffer.append('\\');
                         }
                         c = src.readEscape();
                     } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
                             && Character.isWhitespace(c)) {
                         /* ignore backslashed spaces in %w */
                     } else if (c != term && !(paren != '\0' && c == paren)) {
                         buffer.append('\\');
                     }
                 }
             } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
                     && Character.isWhitespace(c)) {
                 src.unread(c);
                 break;
             }
             if (c == '\0' && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) {
                 throw new SyntaxException(src.getPosition(), "symbol cannot contain '\\0'");
             }
             buffer.append(c);
         }
         return c;
     }

     // Was a goto in original ruby lexer
     private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException {
         char c;

         switch (c = src.read()) {
         case '\\':
             parseEscapeIntoBuffer(src, buffer);
             break;
         case RubyYaccLexer.EOF:
             throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
         default:
             buffer.append(c);
         }
     }

     private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException {
         char c;

         switch (c = src.read()) {
         case '\n':
             break; /* just ignore */
         case '0':
         case '1':
         case '2':
         case '3': /* octal constant */
         case '4':
         case '5':
         case '6':
         case '7':
             buffer.append('\\');
             buffer.append(c);
             for (int i = 0; i < 2; i++) {
                 c = src.read();
                 if (c == RubyYaccLexer.EOF) {
                     throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
                 }
                 if (!RubyYaccLexer.isOctChar(c)) {
                     src.unread(c);
                     break;
                 }
                 buffer.append(c);
             }
             break;
         case 'x': /* hex constant */
             buffer.append('\\');
             buffer.append(c);
             c = src.read();
             if (!RubyYaccLexer.isHexChar(c)) {
                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
             }
             buffer.append(c);
             c = src.read();
             if (RubyYaccLexer.isHexChar(c)) {
                 buffer.append(c);
             } else {
                 src.unread(c);
             }
             break;
         case 'M':
             if ((c = src.read()) != '-') {
                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
             }
             buffer.append(new byte[] { '\\', 'M', '-' });
             escaped(src, buffer);
             break;
         case 'C':
             if ((c = src.read()) != '-') {
                 throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
             }
             buffer.append(new byte[] { '\\', 'C', '-' });
             escaped(src, buffer);
             break;
         case 'c':
             buffer.append(new byte[] { '\\', 'c' });
             escaped(src, buffer);
             break;
         case 0:
             throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
         default:
             if (c != '\\' || c != term) {
                 buffer.append('\\');
             }
             buffer.append(c);
         }
     }
 }
	/*** BEGIN LICENSE BLOCK ***
	* Version: CPL 1.0/GPL 2.0/LGPL 2.1
	*
	* The contents of this file are subject to the Common Public
	* License Version 1.0 (the "License"); you may not use this file
	* except in compliance with the License. You may obtain a copy of
	* the License at http://www.eclipse.org/legal/cpl-v10.html
	*
	* Software distributed under the License is distributed on an "AS
	* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
	* implied. See the License for the specific language governing
	* rights and limitations under the License.
	*
	* Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
	*
	* Alternatively, the contents of this file may be used under the terms of
	* either of the GNU General Public License Version 2 or later (the "GPL"),
	* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
	* in which case the provisions of the GPL or the LGPL are applicable instead
	* of those above. If you wish to allow use of your version of this file only
	* under the terms of either the GPL or the LGPL, and not to allow others to
	* use your version of this file under the terms of the CPL, indicate your
	* decision by deleting the provisions above and replace them with the notice
	* and other provisions required by the GPL or the LGPL. If you do not delete
	* the provisions above, a recipient may use your version of this file under
	* the terms of any one of the CPL, the GPL or the LGPL.
	*** END LICENSE BLOCK ***/
	package org.jruby.lexer.yacc;

	import org.jruby.ast.RegexpNode;
	import org.jruby.ast.StrNode;
	import org.jruby.parser.ReOptions;
	import org.jruby.parser.Tokens;
	import org.jruby.util.ByteList;

	public class StringTerm extends StrTerm {
	/* bit flags to indicate the string type */
	private int func;

	private final char term;

	private final char paren;

	/* nested string level */
	private int nest;

	public StringTerm(int func, char term, char paren) {
	this.func = func;
	this.term = term;
	this.paren = paren;
	this.nest = 0;
	}

	public int parseString(final RubyYaccLexer lexer, LexerSource src) throws java.io.IOException {
	char c;
	int space = 0;

	if (func == -1) {
	lexer.setValue(new Token("\"", lexer.getPosition()));
	return Tokens.tSTRING_END;
	}

	c = src.read();
	if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
	&& Character.isWhitespace(c)) {
	do {
	c = src.read();
	} while (Character.isWhitespace(c));
	space = 1;
	}

	if (c == term && nest == 0) {
	if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
	func = -1;
	lexer.getPosition();
	return ' ';
	}
	if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
	lexer.setValue(new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src)));
	return Tokens.tREGEXP_END;
	}
	lexer.setValue(new Token("\"", lexer.getPosition()));
	return Tokens.tSTRING_END;
	}
	if (space != 0) {
	src.unread(c);
	lexer.getPosition();
	return ' ';
	}
	ByteList buffer = new ByteList();

	if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') {
	c = src.read();
	switch (c) {
	case '$':
	case '@':
	src.unread(c);
	lexer.setValue(new Token("#" + c, lexer.getPosition()));
	return Tokens.tSTRING_DVAR;
	case '{':
	lexer.setValue(new Token("#" + c, lexer.getPosition()));
	return Tokens.tSTRING_DBEG;
	}
	buffer.append('#');
	}
	src.unread(c);
	if (parseStringIntoBuffer(src, buffer) == 0) {
	throw new SyntaxException(src.getPosition(), "unterminated string meets end of file");
	}

	lexer.setValue(new StrNode(lexer.getPosition(), buffer));
	return Tokens.tSTRING_CONTENT;
	}

	private int parseRegexpFlags(final LexerSource src) throws java.io.IOException {
	char kcode = 0;
	int options = 0;
	char c;
	StringBuffer unknownFlags = new StringBuffer(10);

	for (c = src.read(); c != RubyYaccLexer.EOF
	&& Character.isLetter(c); c = src.read()) {
	switch (c) {
	case 'i':
	options \|= ReOptions.RE_OPTION_IGNORECASE;
	break;
	case 'x':
	options \|= ReOptions.RE_OPTION_EXTENDED;
	break;
	case 'm':
	options \|= ReOptions.RE_OPTION_MULTILINE;
	break;
	case 'o':
	options \|= ReOptions.RE_OPTION_ONCE;
	break;
	case 'n':
	kcode = 16;
	break;
	case 'e':
	kcode = 32;
	break;
	case 's':
	kcode = 48;
	break;
	case 'u':
	kcode = 64;
	break;
	default:
	unknownFlags.append(c);
	break;
	}
	}
	src.unread(c);
	if (unknownFlags.length() != 0) {
	throw new SyntaxException(src.getPosition(), "unknown regexp option"
	+ (unknownFlags.length() > 1 ? "s" : "") + " - "
	+ unknownFlags.toString());
	}
	return options \| kcode;
	}

	public char parseStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException {
	char c;

	while ((c = src.read()) != RubyYaccLexer.EOF) {
	if (paren != '\0' && c == paren) {
	nest++;
	} else if (c == term) {
	if (nest == 0) {
	src.unread(c);
	break;
	}
	nest--;
	} else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek('\n')) {
	char c2 = src.read();

	if (c2 == '$' \|\| c2 == '@' \|\| c2 == '{') {
	src.unread(c2);
	src.unread(c);
	break;
	}
	src.unread(c2);
	} else if (c == '\\') {
	c = src.read();
	switch (c) {
	case '\n':
	if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
	break;
	}
	if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
	continue;
	}
	buffer.append('\\');
	break;

	case '\\':
	if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
	buffer.append(c);
	}
	break;

	default:
	if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
	src.unread(c);
	parseEscapeIntoBuffer(src, buffer);
	continue;
	} else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) {
	src.unread(c);
	if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) {
	buffer.append('\\');
	}
	c = src.readEscape();
	} else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
	&& Character.isWhitespace(c)) {
	/* ignore backslashed spaces in %w */
	} else if (c != term && !(paren != '\0' && c == paren)) {
	buffer.append('\\');
	}
	}
	} else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0
	&& Character.isWhitespace(c)) {
	src.unread(c);
	break;
	}
	if (c == '\0' && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) {
	throw new SyntaxException(src.getPosition(), "symbol cannot contain '\\0'");
	}
	buffer.append(c);
	}
	return c;
	}

	// Was a goto in original ruby lexer
	private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException {
	char c;

	switch (c = src.read()) {
	case '\\':
	parseEscapeIntoBuffer(src, buffer);
	break;
	case RubyYaccLexer.EOF:
	throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
	default:
	buffer.append(c);
	}
	}

	private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException {
	char c;

	switch (c = src.read()) {
	case '\n':
	break; /* just ignore */
	case '0':
	case '1':
	case '2':
	case '3': /* octal constant */
	case '4':
	case '5':
	case '6':
	case '7':
	buffer.append('\\');
	buffer.append(c);
	for (int i = 0; i < 2; i++) {
	c = src.read();
	if (c == RubyYaccLexer.EOF) {
	throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
	}
	if (!RubyYaccLexer.isOctChar(c)) {
	src.unread(c);
	break;
	}
	buffer.append(c);
	}
	break;
	case 'x': /* hex constant */
	buffer.append('\\');
	buffer.append(c);
	c = src.read();
	if (!RubyYaccLexer.isHexChar(c)) {
	throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
	}
	buffer.append(c);
	c = src.read();
	if (RubyYaccLexer.isHexChar(c)) {
	buffer.append(c);
	} else {
	src.unread(c);
	}
	break;
	case 'M':
	if ((c = src.read()) != '-') {
	throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
	}
	buffer.append(new byte[] { '\\', 'M', '-' });
	escaped(src, buffer);
	break;
	case 'C':
	if ((c = src.read()) != '-') {
	throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
	}
	buffer.append(new byte[] { '\\', 'C', '-' });
	escaped(src, buffer);
	break;
	case 'c':
	buffer.append(new byte[] { '\\', 'c' });
	escaped(src, buffer);
	break;
	case 0:
	throw new SyntaxException(src.getPosition(), "Invalid escape character syntax");
	default:
	if (c != '\\' \|\| c != term) {
	buffer.append('\\');
	}
	buffer.append(c);
	}
	}
	}