| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| options { |
| STATIC=false; |
| JAVA_UNICODE_ESCAPE=true; |
| USER_CHAR_STREAM=true; |
| } |
| |
| PARSER_BEGIN(QueryParser) |
| |
| package org.apache.solr.parser; |
| |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Locale; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.document.DateTools; |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TermRangeQuery; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.util.Version; |
| import org.apache.solr.search.SyntaxError; |
| import org.apache.solr.search.QParser; |
| |
| |
| public class QueryParser extends SolrQueryParserBase { |
| /** The default operator for parsing queries. |
| */ |
| static public enum Operator { OR, AND } |
| |
| public QueryParser(Version matchVersion, String defaultField, QParser parser) { |
| this(new FastCharStream(new StringReader(""))); |
| init(matchVersion, defaultField, parser); |
| } |
| } |
| |
| PARSER_END(QueryParser) |
| |
| /* ***************** */ |
| /* Token Definitions */ |
| /* ***************** */ |
| |
| <*> TOKEN : { |
| <#_NUM_CHAR: ["0"-"9"] > |
| // every character that follows a backslash is considered as an escaped character |
| | <#_ESCAPED_CHAR: "\\" ~[] > |
| | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", |
| "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] |
| | <_ESCAPED_CHAR> ) > |
| | <#_TERM_CHAR: ( <_TERM_START_CHAR> |
| | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") > |
| | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > |
| | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > |
| | <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) > |
| } |
| |
| <DEFAULT, Range> SKIP : { |
| < <_WHITESPACE>> |
| } |
| |
| <DEFAULT> TOKEN : { |
| <AND: ("AND" | "&&") > |
| | <OR: ("OR" | "||") > |
| | <NOT: ("NOT" | "!") > |
| | <PLUS: "+" > |
| | <MINUS: "-" > |
| | <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> > |
| | <LPAREN: "(" > |
| | <RPAREN: ")" > |
| | <COLON: ":" > |
| | <STAR: "*" > |
| | <CARAT: "^" > : Boost |
| | <QUOTED: "\"" (<_QUOTED_CHAR>)* "\""> |
| | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > |
| | <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > |
| | <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) > |
| | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > |
| | <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" > |
| | <RANGEIN_START: "[" > : Range |
| | <RANGEEX_START: "{" > : Range |
| // TODO: consider using token states instead of inlining SQUOTED |
| // | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'"> |
| // | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > |
| | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > |
| } |
| |
| <Boost> TOKEN : { |
| <NUMBER: ("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT |
| } |
| |
| <Range> TOKEN : { |
| <RANGE_TO: "TO"> |
| | <RANGEIN_END: "]"> : DEFAULT |
| | <RANGEEX_END: "}"> : DEFAULT |
| | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\""> |
| | <RANGE_GOOP: (~[ " ", "]", "}" ])+ > |
| } |
| |
| // * Query ::= ( Clause )* |
| // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) |
| |
| int Conjunction() : { |
| int ret = CONJ_NONE; |
| } |
| { |
| [ |
| <AND> { ret = CONJ_AND; } |
| | <OR> { ret = CONJ_OR; } |
| ] |
| { return ret; } |
| } |
| |
| int Modifiers() : { |
| int ret = MOD_NONE; |
| } |
| { |
| [ |
| <PLUS> { ret = MOD_REQ; } |
| | <MINUS> { ret = MOD_NOT; } |
| | <NOT> { ret = MOD_NOT; } |
| ] |
| { return ret; } |
| } |
| |
| // This makes sure that there is no garbage after the query string |
| Query TopLevelQuery(String field) throws SyntaxError : |
| { |
| Query q; |
| } |
| { |
| q=Query(field) <EOF> |
| { |
| return q; |
| } |
| } |
| |
| Query Query(String field) throws SyntaxError : |
| { |
| List<BooleanClause> clauses = new ArrayList<BooleanClause>(); |
| Query q, firstQuery=null; |
| int conj, mods; |
| } |
| { |
| mods=Modifiers() q=Clause(field) |
| { |
| addClause(clauses, CONJ_NONE, mods, q); |
| if (mods == MOD_NONE) |
| firstQuery=q; |
| } |
| ( |
| conj=Conjunction() mods=Modifiers() q=Clause(field) |
| { addClause(clauses, conj, mods, q); } |
| )* |
| { |
| if (clauses.size() == 1 && firstQuery != null) |
| return firstQuery; |
| else { |
| return getBooleanQuery(clauses); |
| } |
| } |
| } |
| |
| Query Clause(String field) throws SyntaxError : { |
| Query q; |
| Token fieldToken=null, boost=null; |
| Token localParams=null; |
| } |
| { |
| |
| [ |
| LOOKAHEAD(2) |
| ( |
| fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);} |
| | <STAR> <COLON> {field="*";} |
| ) |
| ] |
| |
| |
| ( |
| q=Term(field) |
| | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? |
| | (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } ) |
| ) |
| { return handleBoost(q, boost); } |
| } |
| |
| |
| Query Term(String field) throws SyntaxError : { |
| Token term, boost=null, fuzzySlop=null, goop1, goop2; |
| boolean prefix = false; |
| boolean wildcard = false; |
| boolean fuzzy = false; |
| boolean regexp = false; |
| boolean startInc=false; |
| boolean endInc=false; |
| Query q; |
| } |
| { |
| ( |
| ( |
| term=<TERM> |
| | term=<STAR> { wildcard=true; } |
| | term=<PREFIXTERM> { prefix=true; } |
| | term=<WILDTERM> { wildcard=true; } |
| | term=<REGEXPTERM> { regexp=true; } |
| | term=<NUMBER> |
| | term=<BAREOPER> { term.image = term.image.substring(0,1); } |
| ) |
| [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] |
| [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] |
| { |
| q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); |
| } |
| | ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> ) |
| ( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> ) |
| [ <RANGE_TO> ] |
| ( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> ) |
| ( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>)) |
| [ <CARAT> boost=<NUMBER> ] |
| { |
| boolean startOpen=false; |
| boolean endOpen=false; |
| if (goop1.kind == RANGE_QUOTED) { |
| goop1.image = goop1.image.substring(1, goop1.image.length()-1); |
| } else if ("*".equals(goop1.image)) { |
| startOpen=true; |
| } |
| if (goop2.kind == RANGE_QUOTED) { |
| goop2.image = goop2.image.substring(1, goop2.image.length()-1); |
| } else if ("*".equals(goop2.image)) { |
| endOpen=true; |
| } |
| q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); |
| } |
| | term=<QUOTED> |
| [ fuzzySlop=<FUZZY_SLOP> ] |
| [ <CARAT> boost=<NUMBER> ] |
| { |
| q = handleQuotedTerm(getField(field), term, fuzzySlop); |
| } |
| ) |
| { |
| return handleBoost(q, boost); |
| } |
| } |