blob: dafc329146f461a4f435ec51761b29b80fa829ea [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
options {
STATIC=false;
JAVA_UNICODE_ESCAPE=true;
USER_CHAR_STREAM=true;
}
PARSER_BEGIN(QueryParser)
package org.apache.solr.parser;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Version;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
public class QueryParser extends SolrQueryParserBase {
/** The default operator for parsing queries.
*/
static public enum Operator { OR, AND }
public QueryParser(Version matchVersion, String defaultField, QParser parser) {
this(new FastCharStream(new StringReader("")));
init(matchVersion, defaultField, parser);
}
}
PARSER_END(QueryParser)
/* ***************** */
/* Token Definitions */
/* ***************** */
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
// every character that follows a backslash is considered as an escaped character
| <#_ESCAPED_CHAR: "\\" ~[] >
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR>
| <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
| <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
}
<DEFAULT, Range> SKIP : {
< <_WHITESPACE>>
}
<DEFAULT> TOKEN : {
<AND: ("AND" | "&&") >
| <OR: ("OR" | "||") >
| <NOT: ("NOT" | "!") >
| <PLUS: "+" >
| <MINUS: "-" >
| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
| <LPAREN: "(" >
| <RPAREN: ")" >
| <COLON: ":" >
| <STAR: "*" >
| <CARAT: "^" > : Boost
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : Range
| <RANGEEX_START: "{" > : Range
// TODO: consider using token states instead of inlining SQUOTED
// | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'">
// | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
| <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
}
<Boost> TOKEN : {
<NUMBER: ("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}
<Range> TOKEN : {
<RANGE_TO: "TO">
| <RANGEIN_END: "]"> : DEFAULT
| <RANGEEX_END: "}"> : DEFAULT
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
}
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
int Conjunction() : {
int ret = CONJ_NONE;
}
{
[
<AND> { ret = CONJ_AND; }
| <OR> { ret = CONJ_OR; }
]
{ return ret; }
}
int Modifiers() : {
int ret = MOD_NONE;
}
{
[
<PLUS> { ret = MOD_REQ; }
| <MINUS> { ret = MOD_NOT; }
| <NOT> { ret = MOD_NOT; }
]
{ return ret; }
}
// This makes sure that there is no garbage after the query string
Query TopLevelQuery(String field) throws SyntaxError :
{
Query q;
}
{
q=Query(field) <EOF>
{
return q;
}
}
Query Query(String field) throws SyntaxError :
{
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
int conj, mods;
}
{
mods=Modifiers() q=Clause(field)
{
addClause(clauses, CONJ_NONE, mods, q);
if (mods == MOD_NONE)
firstQuery=q;
}
(
conj=Conjunction() mods=Modifiers() q=Clause(field)
{ addClause(clauses, conj, mods, q); }
)*
{
if (clauses.size() == 1 && firstQuery != null)
return firstQuery;
else {
return getBooleanQuery(clauses);
}
}
}
Query Clause(String field) throws SyntaxError : {
Query q;
Token fieldToken=null, boost=null;
Token localParams=null;
}
{
[
LOOKAHEAD(2)
(
fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
| <STAR> <COLON> {field="*";}
)
]
(
q=Term(field)
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } )
)
{ return handleBoost(q, boost); }
}
Query Term(String field) throws SyntaxError : {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean regexp = false;
boolean startInc=false;
boolean endInc=false;
Query q;
}
{
(
(
term=<TERM>
| term=<STAR> { wildcard=true; }
| term=<PREFIXTERM> { prefix=true; }
| term=<WILDTERM> { wildcard=true; }
| term=<REGEXPTERM> { regexp=true; }
| term=<NUMBER>
| term=<BAREOPER> { term.image = term.image.substring(0,1); }
)
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
{
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
}
| ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> )
[ <RANGE_TO> ]
( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> )
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
[ <CARAT> boost=<NUMBER> ]
{
boolean startOpen=false;
boolean endOpen=false;
if (goop1.kind == RANGE_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
} else if ("*".equals(goop1.image)) {
startOpen=true;
}
if (goop2.kind == RANGE_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else if ("*".equals(goop2.image)) {
endOpen=true;
}
q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
}
| term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ]
[ <CARAT> boost=<NUMBER> ]
{
q = handleQuotedTerm(getField(field), term, fuzzySlop);
}
)
{
return handleBoost(q, boost);
}
}