core/org.eclipse.smila.solr/lib/source/org/apache/solr/parser/QueryParser.jj - smila/org.eclipse.smila.core - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 options {
   STATIC=false;
   JAVA_UNICODE_ESCAPE=true;
   USER_CHAR_STREAM=true;
 }

 PARSER_BEGIN(QueryParser)

 package org.apache.solr.parser;

 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.Version;
 import org.apache.solr.search.SyntaxError;
 import org.apache.solr.search.QParser;


 public class QueryParser extends SolrQueryParserBase {
   /** The default operator for parsing queries.
    */
   static public enum Operator { OR, AND }

    public QueryParser(Version matchVersion, String defaultField, QParser parser) {
     this(new FastCharStream(new StringReader("")));
     init(matchVersion, defaultField, parser);
   }
 }

 PARSER_END(QueryParser)

 /* ***************** */
 /* Token Definitions */
 /* ***************** */

 <*> TOKEN : {
   <#_NUM_CHAR:   ["0"-"9"] >
  // every character that follows a backslash is considered as an escaped character
  | <#_ESCAPED_CHAR: "\\" ~[] >
  | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
                       "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
                       | <_ESCAPED_CHAR> ) >
  | <#_TERM_CHAR: ( <_TERM_START_CHAR>
                        | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") >
  | <#_WHITESPACE:  ( " " | "\t" | "\n" | "\r" | "\u3000") >
  | <#_QUOTED_CHAR:  ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
  | <#_SQUOTED_CHAR:  ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
 }

 <DEFAULT, Range> SKIP : {
   < <_WHITESPACE>>
 }

 <DEFAULT> TOKEN : {
   <AND:       ("AND" | "&&") >
  | <OR:        ("OR" | "||") >
  | <NOT:       ("NOT" | "!") >
  | <PLUS:      "+" >
  | <MINUS:     "-" >
  | <BAREOPER:    ("+"|"-"|"!") <_WHITESPACE> >
  | <LPAREN:    "(" >
  | <RPAREN:    ")" >
  | <COLON:     ":" >
  | <STAR:      "*" >
  | <CARAT:     "^" > : Boost
  | <QUOTED:     "\"" (<_QUOTED_CHAR>)* "\"">
  | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
  | <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
  | <PREFIXTERM:  ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
  | <WILDTERM:  (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
  | <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
  | <RANGEIN_START: "[" > : Range
  | <RANGEEX_START: "{" > : Range
 // TODO: consider using token states instead of inlining SQUOTED
 //  | <SQUOTED:     "'" (<_SQUOTED_CHAR>)* "'">
 //  | <LPARAMS:     ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+  (~[")"," ","\t","\n","{","^"])*  >
   | <LPARAMS:     ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+  (~[")"," ","\t","\n","{","^"])*  >
 }

 <Boost> TOKEN : {
  <NUMBER:    ("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
 }

 <Range> TOKEN : {
  <RANGE_TO: "TO">
  | <RANGEIN_END: "]"> : DEFAULT
  | <RANGEEX_END: "}"> : DEFAULT
  | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
  | <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
 }

 // *   Query  ::= ( Clause )*
 // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )

 int Conjunction() : {
   int ret = CONJ_NONE;
 }
 {
   [
     <AND> { ret = CONJ_AND; }
     | <OR>  { ret = CONJ_OR; }
   ]
   { return ret; }
 }

 int Modifiers() : {
   int ret = MOD_NONE;
 }
 {
   [
      <PLUS> { ret = MOD_REQ; }
      | <MINUS> { ret = MOD_NOT; }
      | <NOT> { ret = MOD_NOT; }
   ]
   { return ret; }
 }

 // This makes sure that there is no garbage after the query string
 Query TopLevelQuery(String field) throws SyntaxError :
 {
   Query q;
 }
 {
   q=Query(field) <EOF>
   {
     return q;
   }
 }

 Query Query(String field) throws SyntaxError :
 {
   List<BooleanClause> clauses = new ArrayList<BooleanClause>();
   Query q, firstQuery=null;
   int conj, mods;
 }
 {
   mods=Modifiers() q=Clause(field)
   {
     addClause(clauses, CONJ_NONE, mods, q);
     if (mods == MOD_NONE)
         firstQuery=q;
   }
   (
     conj=Conjunction() mods=Modifiers() q=Clause(field)
     { addClause(clauses, conj, mods, q); }
   )*
     {
       if (clauses.size() == 1 && firstQuery != null)
         return firstQuery;
       else {
   return getBooleanQuery(clauses);
       }
     }
 }

 Query Clause(String field) throws SyntaxError : {
   Query q;
   Token fieldToken=null, boost=null;
   Token localParams=null;
 }
 {

   [
     LOOKAHEAD(2)
     (
     fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
     | <STAR> <COLON> {field="*";}
     )
   ]


   (
    q=Term(field)
    | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
    | (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); }  )
   )
     {  return handleBoost(q, boost); }
 }


 Query Term(String field) throws SyntaxError : {
   Token term, boost=null, fuzzySlop=null, goop1, goop2;
   boolean prefix = false;
   boolean wildcard = false;
   boolean fuzzy = false;
   boolean regexp = false;
   boolean startInc=false;
   boolean endInc=false;
   Query q;
 }
 {
   (
     (
       term=<TERM>
       | term=<STAR> { wildcard=true; }
       | term=<PREFIXTERM> { prefix=true; }
       | term=<WILDTERM> { wildcard=true; }
       | term=<REGEXPTERM> { regexp=true; }
       | term=<NUMBER>
       | term=<BAREOPER> { term.image = term.image.substring(0,1); }
     )
     [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
     [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
     {
       q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
     }
     | ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
         ( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> )
         [ <RANGE_TO> ]
         ( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> )
         ( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
       [ <CARAT> boost=<NUMBER> ]
        {
          boolean startOpen=false;
          boolean endOpen=false;
          if (goop1.kind == RANGE_QUOTED) {
            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
          } else if ("*".equals(goop1.image)) {
            startOpen=true;
          }
          if (goop2.kind == RANGE_QUOTED) {
            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
          } else if ("*".equals(goop2.image)) {
            endOpen=true;
          }
          q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
        }
     | term=<QUOTED>
       [ fuzzySlop=<FUZZY_SLOP> ]
       [ <CARAT> boost=<NUMBER> ]
       {
         q = handleQuotedTerm(getField(field), term, fuzzySlop);
       }
   )
   {
     return handleBoost(q, boost);
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	options {
	STATIC=false;
	JAVA_UNICODE_ESCAPE=true;
	USER_CHAR_STREAM=true;
	}

	PARSER_BEGIN(QueryParser)

	package org.apache.solr.parser;

	import java.io.StringReader;
	import java.util.ArrayList;
	import java.util.List;
	import java.util.Locale;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.document.DateTools;
	import org.apache.lucene.search.BooleanClause;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.TermRangeQuery;
	import org.apache.lucene.search.TermQuery;
	import org.apache.lucene.util.Version;
	import org.apache.solr.search.SyntaxError;
	import org.apache.solr.search.QParser;


	public class QueryParser extends SolrQueryParserBase {
	/** The default operator for parsing queries.
	*/
	static public enum Operator { OR, AND }

	public QueryParser(Version matchVersion, String defaultField, QParser parser) {
	this(new FastCharStream(new StringReader("")));
	init(matchVersion, defaultField, parser);
	}
	}

	PARSER_END(QueryParser)

	/* ***************** */
	/* Token Definitions */
	/* ***************** */

	<*> TOKEN : {
	<#_NUM_CHAR: ["0"-"9"] >
	// every character that follows a backslash is considered as an escaped character
	\| <#_ESCAPED_CHAR: "\\" ~[] >
	\| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
	"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
	\| <_ESCAPED_CHAR> ) >
	\| <#_TERM_CHAR: ( <_TERM_START_CHAR>
	\| <_ESCAPED_CHAR> \| "-" \| "+" \| "/" \| "!") >
	\| <#_WHITESPACE: ( " " \| "\t" \| "\n" \| "\r" \| "\u3000") >
	\| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] \| <_ESCAPED_CHAR> ) >
	\| <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] \| <_ESCAPED_CHAR> ) >
	}

	<DEFAULT, Range> SKIP : {
	< <_WHITESPACE>>
	}

	<DEFAULT> TOKEN : {
	<AND: ("AND" \| "&&") >
	\| <OR: ("OR" \| "\|\|") >
	\| <NOT: ("NOT" \| "!") >
	\| <PLUS: "+" >
	\| <MINUS: "-" >
	\| <BAREOPER: ("+"\|"-"\|"!") <_WHITESPACE> >
	\| <LPAREN: "(" >
	\| <RPAREN: ")" >
	\| <COLON: ":" >
	\| <STAR: "*" >
	\| <CARAT: "^" > : Boost
	\| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
	\| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
	\| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
	\| <PREFIXTERM: ("") \| ( <_TERM_START_CHAR> (<_TERM_CHAR>) "*" ) >
	\| <WILDTERM: (<_TERM_START_CHAR> \| [ "", "?" ]) (<_TERM_CHAR> \| ( [ "", "?" ] ))* >
	\| <REGEXPTERM: "/" (~[ "/" ] \| "\\/" )* "/" >
	\| <RANGEIN_START: "[" > : Range
	\| <RANGEEX_START: "{" > : Range
	// TODO: consider using token states instead of inlining SQUOTED
	// \| <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'">
	// \| <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> \| <SQUOTED> \| (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
	\| <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> \| ("'" (<_SQUOTED_CHAR>)* "'") \| (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
	}

	<Boost> TOKEN : {
	<NUMBER: ("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
	}

	<Range> TOKEN : {
	<RANGE_TO: "TO">
	\| <RANGEIN_END: "]"> : DEFAULT
	\| <RANGEEX_END: "}"> : DEFAULT
	\| <RANGE_QUOTED: "\"" (~["\""] \| "\\\"")+ "\"">
	\| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
	}

	// * Query ::= ( Clause )*
	// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> \| "(" Query ")" )

	int Conjunction() : {
	int ret = CONJ_NONE;
	}
	{
	[
	<AND> { ret = CONJ_AND; }
	\| <OR> { ret = CONJ_OR; }
	]
	{ return ret; }
	}

	int Modifiers() : {
	int ret = MOD_NONE;
	}
	{
	[
	<PLUS> { ret = MOD_REQ; }
	\| <MINUS> { ret = MOD_NOT; }
	\| <NOT> { ret = MOD_NOT; }
	]
	{ return ret; }
	}

	// This makes sure that there is no garbage after the query string
	Query TopLevelQuery(String field) throws SyntaxError :
	{
	Query q;
	}
	{
	q=Query(field) <EOF>
	{
	return q;
	}
	}

	Query Query(String field) throws SyntaxError :
	{
	List<BooleanClause> clauses = new ArrayList<BooleanClause>();
	Query q, firstQuery=null;
	int conj, mods;
	}
	{
	mods=Modifiers() q=Clause(field)
	{
	addClause(clauses, CONJ_NONE, mods, q);
	if (mods == MOD_NONE)
	firstQuery=q;
	}
	(
	conj=Conjunction() mods=Modifiers() q=Clause(field)
	{ addClause(clauses, conj, mods, q); }
	)*
	{
	if (clauses.size() == 1 && firstQuery != null)
	return firstQuery;
	else {
	return getBooleanQuery(clauses);
	}
	}
	}

	Query Clause(String field) throws SyntaxError : {
	Query q;
	Token fieldToken=null, boost=null;
	Token localParams=null;
	}
	{

	[
	LOOKAHEAD(2)
	(
	fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
	\| <STAR> <COLON> {field="*";}
	)
	]


	(
	q=Term(field)
	\| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
	\| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } )
	)
	{ return handleBoost(q, boost); }
	}


	Query Term(String field) throws SyntaxError : {
	Token term, boost=null, fuzzySlop=null, goop1, goop2;
	boolean prefix = false;
	boolean wildcard = false;
	boolean fuzzy = false;
	boolean regexp = false;
	boolean startInc=false;
	boolean endInc=false;
	Query q;
	}
	{
	(
	(
	term=<TERM>
	\| term=<STAR> { wildcard=true; }
	\| term=<PREFIXTERM> { prefix=true; }
	\| term=<WILDTERM> { wildcard=true; }
	\| term=<REGEXPTERM> { regexp=true; }
	\| term=<NUMBER>
	\| term=<BAREOPER> { term.image = term.image.substring(0,1); }
	)
	[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
	[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
	{
	q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
	}
	\| ( ( <RANGEIN_START> {startInc=true;} \| <RANGEEX_START> )
	( goop1=<RANGE_GOOP>\|goop1=<RANGE_QUOTED> )
	[ <RANGE_TO> ]
	( goop2=<RANGE_GOOP>\|goop2=<RANGE_QUOTED> )
	( <RANGEIN_END> {endInc=true;} \| <RANGEEX_END>))
	[ <CARAT> boost=<NUMBER> ]
	{
	boolean startOpen=false;
	boolean endOpen=false;
	if (goop1.kind == RANGE_QUOTED) {
	goop1.image = goop1.image.substring(1, goop1.image.length()-1);
	} else if ("*".equals(goop1.image)) {
	startOpen=true;
	}
	if (goop2.kind == RANGE_QUOTED) {
	goop2.image = goop2.image.substring(1, goop2.image.length()-1);
	} else if ("*".equals(goop2.image)) {
	endOpen=true;
	}
	q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
	}
	\| term=<QUOTED>
	[ fuzzySlop=<FUZZY_SLOP> ]
	[ <CARAT> boost=<NUMBER> ]
	{
	q = handleQuotedTerm(getField(field), term, fuzzySlop);
	}
	)
	{
	return handleBoost(q, boost);
	}
	}