""" @package antlr3.tree | |
@brief ANTLR3 runtime package, tree module | |
This module contains all support classes for AST construction and tree parsers. | |
""" | |
# begin[licence] | |
# | |
# [The "BSD licence"] | |
# Copyright (c) 2005-2008 Terence Parr | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions | |
# are met: | |
# 1. Redistributions of source code must retain the above copyright | |
# notice, this list of conditions and the following disclaimer. | |
# 2. Redistributions in binary form must reproduce the above copyright | |
# notice, this list of conditions and the following disclaimer in the | |
# documentation and/or other materials provided with the distribution. | |
# 3. The name of the author may not be used to endorse or promote products | |
# derived from this software without specific prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
# | |
# end[licence] | |
# lot's of docstrings are missing, don't complain for now... | |
# pylint: disable-msg=C0111 | |
import re | |
from antlr3.constants import UP, DOWN, EOF, INVALID_TOKEN_TYPE | |
from antlr3.recognizers import BaseRecognizer, RuleReturnScope | |
from antlr3.streams import IntStream | |
from antlr3.tokens import CommonToken, Token, INVALID_TOKEN | |
from antlr3.exceptions import MismatchedTreeNodeException, \ | |
MissingTokenException, UnwantedTokenException, MismatchedTokenException, \ | |
NoViableAltException | |
############################################################################ | |
# | |
# tree related exceptions | |
# | |
############################################################################ | |
class RewriteCardinalityException(RuntimeError): | |
""" | |
@brief Base class for all exceptions thrown during AST rewrite construction. | |
This signifies a case where the cardinality of two or more elements | |
in a subrule are different: (ID INT)+ where |ID|!=|INT| | |
""" | |
def __init__(self, elementDescription): | |
RuntimeError.__init__(self, elementDescription) | |
self.elementDescription = elementDescription | |
def getMessage(self): | |
return self.elementDescription | |
class RewriteEarlyExitException(RewriteCardinalityException): | |
"""@brief No elements within a (...)+ in a rewrite rule""" | |
def __init__(self, elementDescription=None): | |
RewriteCardinalityException.__init__(self, elementDescription) | |
class RewriteEmptyStreamException(RewriteCardinalityException): | |
""" | |
@brief Ref to ID or expr but no tokens in ID stream or subtrees in expr stream | |
""" | |
pass | |
############################################################################ | |
# | |
# basic Tree and TreeAdaptor interfaces | |
# | |
############################################################################ | |
class Tree(object): | |
""" | |
@brief Abstract baseclass for tree nodes. | |
What does a tree look like? ANTLR has a number of support classes | |
such as CommonTreeNodeStream that work on these kinds of trees. You | |
don't have to make your trees implement this interface, but if you do, | |
you'll be able to use more support code. | |
NOTE: When constructing trees, ANTLR can build any kind of tree; it can | |
even use Token objects as trees if you add a child list to your tokens. | |
This is a tree node without any payload; just navigation and factory stuff. | |
""" | |
def getChild(self, i): | |
raise NotImplementedError | |
def getChildCount(self): | |
raise NotImplementedError | |
def getParent(self): | |
"""Tree tracks parent and child index now > 3.0""" | |
raise NotImplementedError | |
def setParent(self, t): | |
"""Tree tracks parent and child index now > 3.0""" | |
raise NotImplementedError | |
def hasAncestor(self, ttype): | |
"""Walk upwards looking for ancestor with this token type.""" | |
raise NotImplementedError | |
def getAncestor(self, ttype): | |
"""Walk upwards and get first ancestor with this token type.""" | |
raise NotImplementedError | |
def getAncestors(self): | |
"""Return a list of all ancestors of this node. | |
The first node of list is the root and the last is the parent of | |
this node. | |
""" | |
raise NotImplementedError | |
def getChildIndex(self): | |
"""This node is what child index? 0..n-1""" | |
raise NotImplementedError | |
def setChildIndex(self, index): | |
"""This node is what child index? 0..n-1""" | |
raise NotImplementedError | |
def freshenParentAndChildIndexes(self): | |
"""Set the parent and child index values for all children""" | |
raise NotImplementedError | |
def addChild(self, t): | |
""" | |
Add t as a child to this node. If t is null, do nothing. If t | |
is nil, add all children of t to this' children. | |
""" | |
raise NotImplementedError | |
def setChild(self, i, t): | |
"""Set ith child (0..n-1) to t; t must be non-null and non-nil node""" | |
raise NotImplementedError | |
def deleteChild(self, i): | |
raise NotImplementedError | |
def replaceChildren(self, startChildIndex, stopChildIndex, t): | |
""" | |
Delete children from start to stop and replace with t even if t is | |
a list (nil-root tree). num of children can increase or decrease. | |
For huge child lists, inserting children can force walking rest of | |
children to set their childindex; could be slow. | |
""" | |
raise NotImplementedError | |
def isNil(self): | |
""" | |
Indicates the node is a nil node but may still have children, meaning | |
the tree is a flat list. | |
""" | |
raise NotImplementedError | |
def getTokenStartIndex(self): | |
""" | |
What is the smallest token index (indexing from 0) for this node | |
and its children? | |
""" | |
raise NotImplementedError | |
def setTokenStartIndex(self, index): | |
raise NotImplementedError | |
def getTokenStopIndex(self): | |
""" | |
What is the largest token index (indexing from 0) for this node | |
and its children? | |
""" | |
raise NotImplementedError | |
def setTokenStopIndex(self, index): | |
raise NotImplementedError | |
def dupNode(self): | |
raise NotImplementedError | |
def getType(self): | |
"""Return a token type; needed for tree parsing.""" | |
raise NotImplementedError | |
def getText(self): | |
raise NotImplementedError | |
def getLine(self): | |
""" | |
In case we don't have a token payload, what is the line for errors? | |
""" | |
raise NotImplementedError | |
def getCharPositionInLine(self): | |
raise NotImplementedError | |
def toStringTree(self): | |
raise NotImplementedError | |
def toString(self): | |
raise NotImplementedError | |
class TreeAdaptor(object): | |
""" | |
@brief Abstract baseclass for tree adaptors. | |
How to create and navigate trees. Rather than have a separate factory | |
and adaptor, I've merged them. Makes sense to encapsulate. | |
This takes the place of the tree construction code generated in the | |
generated code in 2.x and the ASTFactory. | |
I do not need to know the type of a tree at all so they are all | |
generic Objects. This may increase the amount of typecasting needed. :( | |
""" | |
# C o n s t r u c t i o n | |
def createWithPayload(self, payload): | |
""" | |
Create a tree node from Token object; for CommonTree type trees, | |
then the token just becomes the payload. This is the most | |
common create call. | |
Override if you want another kind of node to be built. | |
""" | |
raise NotImplementedError | |
def dupNode(self, treeNode): | |
"""Duplicate a single tree node. | |
Override if you want another kind of node to be built.""" | |
raise NotImplementedError | |
def dupTree(self, tree): | |
"""Duplicate tree recursively, using dupNode() for each node""" | |
raise NotImplementedError | |
def nil(self): | |
""" | |
Return a nil node (an empty but non-null node) that can hold | |
a list of element as the children. If you want a flat tree (a list) | |
use "t=adaptor.nil(); t.addChild(x); t.addChild(y);" | |
""" | |
raise NotImplementedError | |
def errorNode(self, input, start, stop, exc): | |
""" | |
Return a tree node representing an error. This node records the | |
tokens consumed during error recovery. The start token indicates the | |
input symbol at which the error was detected. The stop token indicates | |
the last symbol consumed during recovery. | |
You must specify the input stream so that the erroneous text can | |
be packaged up in the error node. The exception could be useful | |
to some applications; default implementation stores ptr to it in | |
the CommonErrorNode. | |
This only makes sense during token parsing, not tree parsing. | |
Tree parsing should happen only when parsing and tree construction | |
succeed. | |
""" | |
raise NotImplementedError | |
def isNil(self, tree): | |
"""Is tree considered a nil node used to make lists of child nodes?""" | |
raise NotImplementedError | |
def addChild(self, t, child): | |
""" | |
Add a child to the tree t. If child is a flat tree (a list), make all | |
in list children of t. Warning: if t has no children, but child does | |
and child isNil then you can decide it is ok to move children to t via | |
t.children = child.children; i.e., without copying the array. Just | |
make sure that this is consistent with have the user will build | |
ASTs. Do nothing if t or child is null. | |
""" | |
raise NotImplementedError | |
def becomeRoot(self, newRoot, oldRoot): | |
""" | |
If oldRoot is a nil root, just copy or move the children to newRoot. | |
If not a nil root, make oldRoot a child of newRoot. | |
old=^(nil a b c), new=r yields ^(r a b c) | |
old=^(a b c), new=r yields ^(r ^(a b c)) | |
If newRoot is a nil-rooted single child tree, use the single | |
child as the new root node. | |
old=^(nil a b c), new=^(nil r) yields ^(r a b c) | |
old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) | |
If oldRoot was null, it's ok, just return newRoot (even if isNil). | |
old=null, new=r yields r | |
old=null, new=^(nil r) yields ^(nil r) | |
Return newRoot. Throw an exception if newRoot is not a | |
simple node or nil root with a single child node--it must be a root | |
node. If newRoot is ^(nil x) return x as newRoot. | |
Be advised that it's ok for newRoot to point at oldRoot's | |
children; i.e., you don't have to copy the list. We are | |
constructing these nodes so we should have this control for | |
efficiency. | |
""" | |
raise NotImplementedError | |
def rulePostProcessing(self, root): | |
""" | |
Given the root of the subtree created for this rule, post process | |
it to do any simplifications or whatever you want. A required | |
behavior is to convert ^(nil singleSubtree) to singleSubtree | |
as the setting of start/stop indexes relies on a single non-nil root | |
for non-flat trees. | |
Flat trees such as for lists like "idlist : ID+ ;" are left alone | |
unless there is only one ID. For a list, the start/stop indexes | |
are set in the nil node. | |
This method is executed after all rule tree construction and right | |
before setTokenBoundaries(). | |
""" | |
raise NotImplementedError | |
def getUniqueID(self, node): | |
"""For identifying trees. | |
How to identify nodes so we can say "add node to a prior node"? | |
Even becomeRoot is an issue. Use System.identityHashCode(node) | |
usually. | |
""" | |
raise NotImplementedError | |
# R e w r i t e R u l e s | |
def createFromToken(self, tokenType, fromToken, text=None): | |
""" | |
Create a new node derived from a token, with a new token type and | |
(optionally) new text. | |
This is invoked from an imaginary node ref on right side of a | |
rewrite rule as IMAG[$tokenLabel] or IMAG[$tokenLabel "IMAG"]. | |
This should invoke createToken(Token). | |
""" | |
raise NotImplementedError | |
def createFromType(self, tokenType, text): | |
"""Create a new node derived from a token, with a new token type. | |
This is invoked from an imaginary node ref on right side of a | |
rewrite rule as IMAG["IMAG"]. | |
This should invoke createToken(int,String). | |
""" | |
raise NotImplementedError | |
# C o n t e n t | |
def getType(self, t): | |
"""For tree parsing, I need to know the token type of a node""" | |
raise NotImplementedError | |
def setType(self, t, type): | |
"""Node constructors can set the type of a node""" | |
raise NotImplementedError | |
def getText(self, t): | |
raise NotImplementedError | |
def setText(self, t, text): | |
"""Node constructors can set the text of a node""" | |
raise NotImplementedError | |
def getToken(self, t): | |
"""Return the token object from which this node was created. | |
Currently used only for printing an error message. | |
The error display routine in BaseRecognizer needs to | |
display where the input the error occurred. If your | |
tree of limitation does not store information that can | |
lead you to the token, you can create a token filled with | |
the appropriate information and pass that back. See | |
BaseRecognizer.getErrorMessage(). | |
""" | |
raise NotImplementedError | |
def setTokenBoundaries(self, t, startToken, stopToken): | |
""" | |
Where are the bounds in the input token stream for this node and | |
all children? Each rule that creates AST nodes will call this | |
method right before returning. Flat trees (i.e., lists) will | |
still usually have a nil root node just to hold the children list. | |
That node would contain the start/stop indexes then. | |
""" | |
raise NotImplementedError | |
def getTokenStartIndex(self, t): | |
""" | |
Get the token start index for this subtree; return -1 if no such index | |
""" | |
raise NotImplementedError | |
def getTokenStopIndex(self, t): | |
""" | |
Get the token stop index for this subtree; return -1 if no such index | |
""" | |
raise NotImplementedError | |
# N a v i g a t i o n / T r e e P a r s i n g | |
def getChild(self, t, i): | |
"""Get a child 0..n-1 node""" | |
raise NotImplementedError | |
def setChild(self, t, i, child): | |
"""Set ith child (0..n-1) to t; t must be non-null and non-nil node""" | |
raise NotImplementedError | |
def deleteChild(self, t, i): | |
"""Remove ith child and shift children down from right.""" | |
raise NotImplementedError | |
def getChildCount(self, t): | |
"""How many children? If 0, then this is a leaf node""" | |
raise NotImplementedError | |
def getParent(self, t): | |
""" | |
Who is the parent node of this node; if null, implies node is root. | |
If your node type doesn't handle this, it's ok but the tree rewrites | |
in tree parsers need this functionality. | |
""" | |
raise NotImplementedError | |
def setParent(self, t, parent): | |
""" | |
Who is the parent node of this node; if null, implies node is root. | |
If your node type doesn't handle this, it's ok but the tree rewrites | |
in tree parsers need this functionality. | |
""" | |
raise NotImplementedError | |
def getChildIndex(self, t): | |
""" | |
What index is this node in the child list? Range: 0..n-1 | |
If your node type doesn't handle this, it's ok but the tree rewrites | |
in tree parsers need this functionality. | |
""" | |
raise NotImplementedError | |
def setChildIndex(self, t, index): | |
""" | |
What index is this node in the child list? Range: 0..n-1 | |
If your node type doesn't handle this, it's ok but the tree rewrites | |
in tree parsers need this functionality. | |
""" | |
raise NotImplementedError | |
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): | |
""" | |
Replace from start to stop child index of parent with t, which might | |
be a list. Number of children may be different | |
after this call. | |
If parent is null, don't do anything; must be at root of overall tree. | |
Can't replace whatever points to the parent externally. Do nothing. | |
""" | |
raise NotImplementedError | |
# Misc | |
def create(self, *args): | |
""" | |
Deprecated, use createWithPayload, createFromToken or createFromType. | |
This method only exists to mimic the Java interface of TreeAdaptor. | |
""" | |
if len(args) == 1 and isinstance(args[0], Token): | |
# Object create(Token payload); | |
## warnings.warn( | |
## "Using create() is deprecated, use createWithPayload()", | |
## DeprecationWarning, | |
## stacklevel=2 | |
## ) | |
return self.createWithPayload(args[0]) | |
if (len(args) == 2 | |
and isinstance(args[0], (int, long)) | |
and isinstance(args[1], Token) | |
): | |
# Object create(int tokenType, Token fromToken); | |
## warnings.warn( | |
## "Using create() is deprecated, use createFromToken()", | |
## DeprecationWarning, | |
## stacklevel=2 | |
## ) | |
return self.createFromToken(args[0], args[1]) | |
if (len(args) == 3 | |
and isinstance(args[0], (int, long)) | |
and isinstance(args[1], Token) | |
and isinstance(args[2], basestring) | |
): | |
# Object create(int tokenType, Token fromToken, String text); | |
## warnings.warn( | |
## "Using create() is deprecated, use createFromToken()", | |
## DeprecationWarning, | |
## stacklevel=2 | |
## ) | |
return self.createFromToken(args[0], args[1], args[2]) | |
if (len(args) == 2 | |
and isinstance(args[0], (int, long)) | |
and isinstance(args[1], basestring) | |
): | |
# Object create(int tokenType, String text); | |
## warnings.warn( | |
## "Using create() is deprecated, use createFromType()", | |
## DeprecationWarning, | |
## stacklevel=2 | |
## ) | |
return self.createFromType(args[0], args[1]) | |
raise TypeError( | |
"No create method with this signature found: %s" | |
% (', '.join(type(v).__name__ for v in args)) | |
) | |
############################################################################ | |
# | |
# base implementation of Tree and TreeAdaptor | |
# | |
# Tree | |
# \- BaseTree | |
# | |
# TreeAdaptor | |
# \- BaseTreeAdaptor | |
# | |
############################################################################ | |
class BaseTree(Tree): | |
""" | |
@brief A generic tree implementation with no payload. | |
You must subclass to | |
actually have any user data. ANTLR v3 uses a list of children approach | |
instead of the child-sibling approach in v2. A flat tree (a list) is | |
an empty node whose children represent the list. An empty, but | |
non-null node is called "nil". | |
""" | |
# BaseTree is abstract, no need to complain about not implemented abstract | |
# methods | |
# pylint: disable-msg=W0223 | |
def __init__(self, node=None): | |
""" | |
Create a new node from an existing node does nothing for BaseTree | |
as there are no fields other than the children list, which cannot | |
be copied as the children are not considered part of this node. | |
""" | |
Tree.__init__(self) | |
self.children = [] | |
self.parent = None | |
self.childIndex = 0 | |
def getChild(self, i): | |
try: | |
return self.children[i] | |
except IndexError: | |
return None | |
def getChildren(self): | |
"""@brief Get the children internal List | |
Note that if you directly mess with | |
the list, do so at your own risk. | |
""" | |
# FIXME: mark as deprecated | |
return self.children | |
def getFirstChildWithType(self, treeType): | |
for child in self.children: | |
if child.getType() == treeType: | |
return child | |
return None | |
def getChildCount(self): | |
return len(self.children) | |
def addChild(self, childTree): | |
"""Add t as child of this node. | |
Warning: if t has no children, but child does | |
and child isNil then this routine moves children to t via | |
t.children = child.children; i.e., without copying the array. | |
""" | |
# this implementation is much simpler and probably less efficient | |
# than the mumbo-jumbo that Ter did for the Java runtime. | |
if childTree is None: | |
return | |
if childTree.isNil(): | |
# t is an empty node possibly with children | |
if self.children is childTree.children: | |
raise ValueError("attempt to add child list to itself") | |
# fix parent pointer and childIndex for new children | |
for idx, child in enumerate(childTree.children): | |
child.parent = self | |
child.childIndex = len(self.children) + idx | |
self.children += childTree.children | |
else: | |
# child is not nil (don't care about children) | |
self.children.append(childTree) | |
childTree.parent = self | |
childTree.childIndex = len(self.children) - 1 | |
def addChildren(self, children): | |
"""Add all elements of kids list as children of this node""" | |
self.children += children | |
def setChild(self, i, t): | |
if t is None: | |
return | |
if t.isNil(): | |
raise ValueError("Can't set single child to a list") | |
self.children[i] = t | |
t.parent = self | |
t.childIndex = i | |
def deleteChild(self, i): | |
killed = self.children[i] | |
del self.children[i] | |
# walk rest and decrement their child indexes | |
for idx, child in enumerate(self.children[i:]): | |
child.childIndex = i + idx | |
return killed | |
def replaceChildren(self, startChildIndex, stopChildIndex, newTree): | |
""" | |
Delete children from start to stop and replace with t even if t is | |
a list (nil-root tree). num of children can increase or decrease. | |
For huge child lists, inserting children can force walking rest of | |
children to set their childindex; could be slow. | |
""" | |
if (startChildIndex >= len(self.children) | |
or stopChildIndex >= len(self.children) | |
): | |
raise IndexError("indexes invalid") | |
replacingHowMany = stopChildIndex - startChildIndex + 1 | |
# normalize to a list of children to add: newChildren | |
if newTree.isNil(): | |
newChildren = newTree.children | |
else: | |
newChildren = [newTree] | |
replacingWithHowMany = len(newChildren) | |
delta = replacingHowMany - replacingWithHowMany | |
if delta == 0: | |
# if same number of nodes, do direct replace | |
for idx, child in enumerate(newChildren): | |
self.children[idx + startChildIndex] = child | |
child.parent = self | |
child.childIndex = idx + startChildIndex | |
else: | |
# length of children changes... | |
# ...delete replaced segment... | |
del self.children[startChildIndex:stopChildIndex+1] | |
# ...insert new segment... | |
self.children[startChildIndex:startChildIndex] = newChildren | |
# ...and fix indeces | |
self.freshenParentAndChildIndexes(startChildIndex) | |
def isNil(self): | |
return False | |
def freshenParentAndChildIndexes(self, offset=0): | |
for idx, child in enumerate(self.children[offset:]): | |
child.childIndex = idx + offset | |
child.parent = self | |
def sanityCheckParentAndChildIndexes(self, parent=None, i=-1): | |
if parent != self.parent: | |
raise ValueError( | |
"parents don't match; expected %r found %r" | |
% (parent, self.parent) | |
) | |
if i != self.childIndex: | |
raise ValueError( | |
"child indexes don't match; expected %d found %d" | |
% (i, self.childIndex) | |
) | |
for idx, child in enumerate(self.children): | |
child.sanityCheckParentAndChildIndexes(self, idx) | |
def getChildIndex(self): | |
"""BaseTree doesn't track child indexes.""" | |
return 0 | |
def setChildIndex(self, index): | |
"""BaseTree doesn't track child indexes.""" | |
pass | |
def getParent(self): | |
"""BaseTree doesn't track parent pointers.""" | |
return None | |
def setParent(self, t): | |
"""BaseTree doesn't track parent pointers.""" | |
pass | |
def hasAncestor(self, ttype): | |
"""Walk upwards looking for ancestor with this token type.""" | |
return self.getAncestor(ttype) is not None | |
def getAncestor(self, ttype): | |
"""Walk upwards and get first ancestor with this token type.""" | |
t = self.getParent() | |
while t is not None: | |
if t.getType() == ttype: | |
return t | |
t = t.getParent() | |
return None | |
def getAncestors(self): | |
"""Return a list of all ancestors of this node. | |
The first node of list is the root and the last is the parent of | |
this node. | |
""" | |
if selfgetParent() is None: | |
return None | |
ancestors = [] | |
t = self.getParent() | |
while t is not None: | |
ancestors.insert(0, t) # insert at start | |
t = t.getParent() | |
return ancestors | |
def toStringTree(self): | |
"""Print out a whole tree not just a node""" | |
if len(self.children) == 0: | |
return self.toString() | |
buf = [] | |
if not self.isNil(): | |
buf.append('(') | |
buf.append(self.toString()) | |
buf.append(' ') | |
for i, child in enumerate(self.children): | |
if i > 0: | |
buf.append(' ') | |
buf.append(child.toStringTree()) | |
if not self.isNil(): | |
buf.append(')') | |
return ''.join(buf) | |
def getLine(self): | |
return 0 | |
def getCharPositionInLine(self): | |
return 0 | |
def toString(self): | |
"""Override to say how a node (not a tree) should look as text""" | |
raise NotImplementedError | |
class BaseTreeAdaptor(TreeAdaptor): | |
""" | |
@brief A TreeAdaptor that works with any Tree implementation. | |
""" | |
# BaseTreeAdaptor is abstract, no need to complain about not implemented | |
# abstract methods | |
# pylint: disable-msg=W0223 | |
def nil(self): | |
return self.createWithPayload(None) | |
def errorNode(self, input, start, stop, exc): | |
""" | |
create tree node that holds the start and stop tokens associated | |
with an error. | |
If you specify your own kind of tree nodes, you will likely have to | |
override this method. CommonTree returns Token.INVALID_TOKEN_TYPE | |
if no token payload but you might have to set token type for diff | |
node type. | |
You don't have to subclass CommonErrorNode; you will likely need to | |
subclass your own tree node class to avoid class cast exception. | |
""" | |
return CommonErrorNode(input, start, stop, exc) | |
def isNil(self, tree): | |
return tree.isNil() | |
def dupTree(self, t, parent=None): | |
""" | |
This is generic in the sense that it will work with any kind of | |
tree (not just Tree interface). It invokes the adaptor routines | |
not the tree node routines to do the construction. | |
""" | |
if t is None: | |
return None | |
newTree = self.dupNode(t) | |
# ensure new subtree root has parent/child index set | |
# same index in new tree | |
self.setChildIndex(newTree, self.getChildIndex(t)) | |
self.setParent(newTree, parent) | |
for i in range(self.getChildCount(t)): | |
child = self.getChild(t, i) | |
newSubTree = self.dupTree(child, t) | |
self.addChild(newTree, newSubTree) | |
return newTree | |
def addChild(self, tree, child): | |
""" | |
Add a child to the tree t. If child is a flat tree (a list), make all | |
in list children of t. Warning: if t has no children, but child does | |
and child isNil then you can decide it is ok to move children to t via | |
t.children = child.children; i.e., without copying the array. Just | |
make sure that this is consistent with have the user will build | |
ASTs. | |
""" | |
#if isinstance(child, Token): | |
# child = self.createWithPayload(child) | |
if tree is not None and child is not None: | |
tree.addChild(child) | |
def becomeRoot(self, newRoot, oldRoot): | |
""" | |
If oldRoot is a nil root, just copy or move the children to newRoot. | |
If not a nil root, make oldRoot a child of newRoot. | |
old=^(nil a b c), new=r yields ^(r a b c) | |
old=^(a b c), new=r yields ^(r ^(a b c)) | |
If newRoot is a nil-rooted single child tree, use the single | |
child as the new root node. | |
old=^(nil a b c), new=^(nil r) yields ^(r a b c) | |
old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) | |
If oldRoot was null, it's ok, just return newRoot (even if isNil). | |
old=null, new=r yields r | |
old=null, new=^(nil r) yields ^(nil r) | |
Return newRoot. Throw an exception if newRoot is not a | |
simple node or nil root with a single child node--it must be a root | |
node. If newRoot is ^(nil x) return x as newRoot. | |
Be advised that it's ok for newRoot to point at oldRoot's | |
children; i.e., you don't have to copy the list. We are | |
constructing these nodes so we should have this control for | |
efficiency. | |
""" | |
if isinstance(newRoot, Token): | |
newRoot = self.create(newRoot) | |
if oldRoot is None: | |
return newRoot | |
if not isinstance(newRoot, CommonTree): | |
newRoot = self.createWithPayload(newRoot) | |
# handle ^(nil real-node) | |
if newRoot.isNil(): | |
nc = newRoot.getChildCount() | |
if nc == 1: | |
newRoot = newRoot.getChild(0) | |
elif nc > 1: | |
# TODO: make tree run time exceptions hierarchy | |
raise RuntimeError("more than one node as root") | |
# add oldRoot to newRoot; addChild takes care of case where oldRoot | |
# is a flat list (i.e., nil-rooted tree). All children of oldRoot | |
# are added to newRoot. | |
newRoot.addChild(oldRoot) | |
return newRoot | |
def rulePostProcessing(self, root): | |
"""Transform ^(nil x) to x and nil to null""" | |
if root is not None and root.isNil(): | |
if root.getChildCount() == 0: | |
root = None | |
elif root.getChildCount() == 1: | |
root = root.getChild(0) | |
# whoever invokes rule will set parent and child index | |
root.setParent(None) | |
root.setChildIndex(-1) | |
return root | |
def createFromToken(self, tokenType, fromToken, text=None): | |
if fromToken is None: | |
return self.createFromType(tokenType, text) | |
assert isinstance(tokenType, (int, long)), type(tokenType).__name__ | |
assert isinstance(fromToken, Token), type(fromToken).__name__ | |
assert text is None or isinstance(text, basestring), type(text).__name__ | |
fromToken = self.createToken(fromToken) | |
fromToken.type = tokenType | |
if text is not None: | |
fromToken.text = text | |
t = self.createWithPayload(fromToken) | |
return t | |
def createFromType(self, tokenType, text): | |
assert isinstance(tokenType, (int, long)), type(tokenType).__name__ | |
assert isinstance(text, basestring) or text is None, type(text).__name__ | |
fromToken = self.createToken(tokenType=tokenType, text=text) | |
t = self.createWithPayload(fromToken) | |
return t | |
def getType(self, t): | |
return t.getType() | |
def setType(self, t, type): | |
raise RuntimeError("don't know enough about Tree node") | |
def getText(self, t): | |
return t.getText() | |
def setText(self, t, text): | |
raise RuntimeError("don't know enough about Tree node") | |
def getChild(self, t, i): | |
return t.getChild(i) | |
def setChild(self, t, i, child): | |
t.setChild(i, child) | |
def deleteChild(self, t, i): | |
return t.deleteChild(i) | |
def getChildCount(self, t): | |
return t.getChildCount() | |
def getUniqueID(self, node): | |
return hash(node) | |
def createToken(self, fromToken=None, tokenType=None, text=None): | |
""" | |
Tell me how to create a token for use with imaginary token nodes. | |
For example, there is probably no input symbol associated with imaginary | |
token DECL, but you need to create it as a payload or whatever for | |
the DECL node as in ^(DECL type ID). | |
If you care what the token payload objects' type is, you should | |
override this method and any other createToken variant. | |
""" | |
raise NotImplementedError | |
############################################################################ | |
# | |
# common tree implementation | |
# | |
# Tree | |
# \- BaseTree | |
# \- CommonTree | |
# \- CommonErrorNode | |
# | |
# TreeAdaptor | |
# \- BaseTreeAdaptor | |
# \- CommonTreeAdaptor | |
# | |
############################################################################ | |
class CommonTree(BaseTree): | |
"""@brief A tree node that is wrapper for a Token object. | |
After 3.0 release | |
while building tree rewrite stuff, it became clear that computing | |
parent and child index is very difficult and cumbersome. Better to | |
spend the space in every tree node. If you don't want these extra | |
fields, it's easy to cut them out in your own BaseTree subclass. | |
""" | |
def __init__(self, payload): | |
BaseTree.__init__(self) | |
# What token indexes bracket all tokens associated with this node | |
# and below? | |
self.startIndex = -1 | |
self.stopIndex = -1 | |
# Who is the parent node of this node; if null, implies node is root | |
self.parent = None | |
# What index is this node in the child list? Range: 0..n-1 | |
self.childIndex = -1 | |
# A single token is the payload | |
if payload is None: | |
self.token = None | |
elif isinstance(payload, CommonTree): | |
self.token = payload.token | |
self.startIndex = payload.startIndex | |
self.stopIndex = payload.stopIndex | |
elif payload is None or isinstance(payload, Token): | |
self.token = payload | |
else: | |
raise TypeError(type(payload).__name__) | |
def getToken(self): | |
return self.token | |
def dupNode(self): | |
return CommonTree(self) | |
def isNil(self): | |
return self.token is None | |
def getType(self): | |
if self.token is None: | |
return INVALID_TOKEN_TYPE | |
return self.token.getType() | |
type = property(getType) | |
def getText(self): | |
if self.token is None: | |
return None | |
return self.token.text | |
text = property(getText) | |
def getLine(self): | |
if self.token is None or self.token.getLine() == 0: | |
if self.getChildCount(): | |
return self.getChild(0).getLine() | |
else: | |
return 0 | |
return self.token.getLine() | |
line = property(getLine) | |
def getCharPositionInLine(self): | |
if self.token is None or self.token.getCharPositionInLine() == -1: | |
if self.getChildCount(): | |
return self.getChild(0).getCharPositionInLine() | |
else: | |
return 0 | |
else: | |
return self.token.getCharPositionInLine() | |
charPositionInLine = property(getCharPositionInLine) | |
def getTokenStartIndex(self): | |
if self.startIndex == -1 and self.token is not None: | |
return self.token.getTokenIndex() | |
return self.startIndex | |
def setTokenStartIndex(self, index): | |
self.startIndex = index | |
tokenStartIndex = property(getTokenStartIndex, setTokenStartIndex) | |
def getTokenStopIndex(self): | |
if self.stopIndex == -1 and self.token is not None: | |
return self.token.getTokenIndex() | |
return self.stopIndex | |
def setTokenStopIndex(self, index): | |
self.stopIndex = index | |
tokenStopIndex = property(getTokenStopIndex, setTokenStopIndex) | |
def setUnknownTokenBoundaries(self): | |
"""For every node in this subtree, make sure it's start/stop token's | |
are set. Walk depth first, visit bottom up. Only updates nodes | |
with at least one token index < 0. | |
""" | |
if self.children is None: | |
if self.startIndex < 0 or self.stopIndex < 0: | |
self.startIndex = self.stopIndex = self.token.getTokenIndex() | |
return | |
for child in self.children: | |
child.setUnknownTokenBoundaries() | |
if self.startIndex >= 0 and self.stopIndex >= 0: | |
# already set | |
return | |
if self.children: | |
firstChild = self.children[0] | |
lastChild = self.children[-1] | |
self.startIndex = firstChild.getTokenStartIndex() | |
self.stopIndex = lastChild.getTokenStopIndex() | |
def getChildIndex(self): | |
#FIXME: mark as deprecated | |
return self.childIndex | |
def setChildIndex(self, idx): | |
#FIXME: mark as deprecated | |
self.childIndex = idx | |
def getParent(self): | |
#FIXME: mark as deprecated | |
return self.parent | |
def setParent(self, t): | |
#FIXME: mark as deprecated | |
self.parent = t | |
def toString(self): | |
if self.isNil(): | |
return "nil" | |
if self.getType() == INVALID_TOKEN_TYPE: | |
return "<errornode>" | |
return self.token.text | |
__str__ = toString | |
def toStringTree(self): | |
if not self.children: | |
return self.toString() | |
ret = '' | |
if not self.isNil(): | |
ret += '(%s ' % (self.toString()) | |
ret += ' '.join([child.toStringTree() for child in self.children]) | |
if not self.isNil(): | |
ret += ')' | |
return ret | |
INVALID_NODE = CommonTree(INVALID_TOKEN) | |
class CommonErrorNode(CommonTree): | |
"""A node representing erroneous token range in token stream""" | |
def __init__(self, input, start, stop, exc): | |
CommonTree.__init__(self, None) | |
if (stop is None or | |
(stop.getTokenIndex() < start.getTokenIndex() and | |
stop.getType() != EOF | |
) | |
): | |
# sometimes resync does not consume a token (when LT(1) is | |
# in follow set. So, stop will be 1 to left to start. adjust. | |
# Also handle case where start is the first token and no token | |
# is consumed during recovery; LT(-1) will return null. | |
stop = start | |
self.input = input | |
self.start = start | |
self.stop = stop | |
self.trappedException = exc | |
def isNil(self): | |
return False | |
def getType(self): | |
return INVALID_TOKEN_TYPE | |
def getText(self): | |
if isinstance(self.start, Token): | |
i = self.start.getTokenIndex() | |
j = self.stop.getTokenIndex() | |
if self.stop.getType() == EOF: | |
j = self.input.size() | |
badText = self.input.toString(i, j) | |
elif isinstance(self.start, Tree): | |
badText = self.input.toString(self.start, self.stop) | |
else: | |
# people should subclass if they alter the tree type so this | |
# next one is for sure correct. | |
badText = "<unknown>" | |
return badText | |
def toString(self): | |
if isinstance(self.trappedException, MissingTokenException): | |
return ("<missing type: " | |
+ str(self.trappedException.getMissingType()) | |
+ ">") | |
elif isinstance(self.trappedException, UnwantedTokenException): | |
return ("<extraneous: " | |
+ str(self.trappedException.getUnexpectedToken()) | |
+ ", resync=" + self.getText() + ">") | |
elif isinstance(self.trappedException, MismatchedTokenException): | |
return ("<mismatched token: " | |
+ str(self.trappedException.token) | |
+ ", resync=" + self.getText() + ">") | |
elif isinstance(self.trappedException, NoViableAltException): | |
return ("<unexpected: " | |
+ str(self.trappedException.token) | |
+ ", resync=" + self.getText() + ">") | |
return "<error: "+self.getText()+">" | |
class CommonTreeAdaptor(BaseTreeAdaptor): | |
""" | |
@brief A TreeAdaptor that works with any Tree implementation. | |
It provides | |
really just factory methods; all the work is done by BaseTreeAdaptor. | |
If you would like to have different tokens created than ClassicToken | |
objects, you need to override this and then set the parser tree adaptor to | |
use your subclass. | |
To get your parser to build nodes of a different type, override | |
create(Token), errorNode(), and to be safe, YourTreeClass.dupNode(). | |
dupNode is called to duplicate nodes during rewrite operations. | |
""" | |
def dupNode(self, treeNode): | |
""" | |
Duplicate a node. This is part of the factory; | |
override if you want another kind of node to be built. | |
I could use reflection to prevent having to override this | |
but reflection is slow. | |
""" | |
if treeNode is None: | |
return None | |
return treeNode.dupNode() | |
def createWithPayload(self, payload): | |
return CommonTree(payload) | |
def createToken(self, fromToken=None, tokenType=None, text=None): | |
""" | |
Tell me how to create a token for use with imaginary token nodes. | |
For example, there is probably no input symbol associated with imaginary | |
token DECL, but you need to create it as a payload or whatever for | |
the DECL node as in ^(DECL type ID). | |
If you care what the token payload objects' type is, you should | |
override this method and any other createToken variant. | |
""" | |
if fromToken is not None: | |
return CommonToken(oldToken=fromToken) | |
return CommonToken(type=tokenType, text=text) | |
def setTokenBoundaries(self, t, startToken, stopToken): | |
""" | |
Track start/stop token for subtree root created for a rule. | |
Only works with Tree nodes. For rules that match nothing, | |
seems like this will yield start=i and stop=i-1 in a nil node. | |
Might be useful info so I'll not force to be i..i. | |
""" | |
if t is None: | |
return | |
start = 0 | |
stop = 0 | |
if startToken is not None: | |
start = startToken.index | |
if stopToken is not None: | |
stop = stopToken.index | |
t.setTokenStartIndex(start) | |
t.setTokenStopIndex(stop) | |
def getTokenStartIndex(self, t): | |
if t is None: | |
return -1 | |
return t.getTokenStartIndex() | |
def getTokenStopIndex(self, t): | |
if t is None: | |
return -1 | |
return t.getTokenStopIndex() | |
def getText(self, t): | |
if t is None: | |
return None | |
return t.getText() | |
def getType(self, t): | |
if t is None: | |
return INVALID_TOKEN_TYPE | |
return t.getType() | |
def getToken(self, t): | |
""" | |
What is the Token associated with this node? If | |
you are not using CommonTree, then you must | |
override this in your own adaptor. | |
""" | |
if isinstance(t, CommonTree): | |
return t.getToken() | |
return None # no idea what to do | |
def getChild(self, t, i): | |
if t is None: | |
return None | |
return t.getChild(i) | |
def getChildCount(self, t): | |
if t is None: | |
return 0 | |
return t.getChildCount() | |
def getParent(self, t): | |
return t.getParent() | |
def setParent(self, t, parent): | |
t.setParent(parent) | |
def getChildIndex(self, t): | |
if t is None: | |
return 0 | |
return t.getChildIndex() | |
def setChildIndex(self, t, index): | |
t.setChildIndex(index) | |
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): | |
if parent is not None: | |
parent.replaceChildren(startChildIndex, stopChildIndex, t) | |
############################################################################ | |
# | |
# streams | |
# | |
# TreeNodeStream | |
# \- BaseTree | |
# \- CommonTree | |
# | |
# TreeAdaptor | |
# \- BaseTreeAdaptor | |
# \- CommonTreeAdaptor | |
# | |
############################################################################ | |
class TreeNodeStream(IntStream): | |
"""@brief A stream of tree nodes | |
It accessing nodes from a tree of some kind. | |
""" | |
# TreeNodeStream is abstract, no need to complain about not implemented | |
# abstract methods | |
# pylint: disable-msg=W0223 | |
def get(self, i): | |
"""Get a tree node at an absolute index i; 0..n-1. | |
If you don't want to buffer up nodes, then this method makes no | |
sense for you. | |
""" | |
raise NotImplementedError | |
def LT(self, k): | |
""" | |
Get tree node at current input pointer + i ahead where i=1 is next node. | |
i<0 indicates nodes in the past. So LT(-1) is previous node, but | |
implementations are not required to provide results for k < -1. | |
LT(0) is undefined. For i>=n, return null. | |
Return null for LT(0) and any index that results in an absolute address | |
that is negative. | |
This is analogus to the LT() method of the TokenStream, but this | |
returns a tree node instead of a token. Makes code gen identical | |
for both parser and tree grammars. :) | |
""" | |
raise NotImplementedError | |
def getTreeSource(self): | |
""" | |
Where is this stream pulling nodes from? This is not the name, but | |
the object that provides node objects. | |
""" | |
raise NotImplementedError | |
def getTokenStream(self): | |
""" | |
If the tree associated with this stream was created from a TokenStream, | |
you can specify it here. Used to do rule $text attribute in tree | |
parser. Optional unless you use tree parser rule text attribute | |
or output=template and rewrite=true options. | |
""" | |
raise NotImplementedError | |
def getTreeAdaptor(self): | |
""" | |
What adaptor can tell me how to interpret/navigate nodes and | |
trees. E.g., get text of a node. | |
""" | |
raise NotImplementedError | |
def setUniqueNavigationNodes(self, uniqueNavigationNodes): | |
""" | |
As we flatten the tree, we use UP, DOWN nodes to represent | |
the tree structure. When debugging we need unique nodes | |
so we have to instantiate new ones. When doing normal tree | |
parsing, it's slow and a waste of memory to create unique | |
navigation nodes. Default should be false; | |
""" | |
raise NotImplementedError | |
def reset(self): | |
""" | |
Reset the tree node stream in such a way that it acts like | |
a freshly constructed stream. | |
""" | |
raise NotImplementedError | |
def toString(self, start, stop): | |
""" | |
Return the text of all nodes from start to stop, inclusive. | |
If the stream does not buffer all the nodes then it can still | |
walk recursively from start until stop. You can always return | |
null or "" too, but users should not access $ruleLabel.text in | |
an action of course in that case. | |
""" | |
raise NotImplementedError | |
# REWRITING TREES (used by tree parser) | |
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): | |
""" | |
Replace from start to stop child index of parent with t, which might | |
be a list. Number of children may be different | |
after this call. The stream is notified because it is walking the | |
tree and might need to know you are monkeying with the underlying | |
tree. Also, it might be able to modify the node stream to avoid | |
restreaming for future phases. | |
If parent is null, don't do anything; must be at root of overall tree. | |
Can't replace whatever points to the parent externally. Do nothing. | |
""" | |
raise NotImplementedError | |
class CommonTreeNodeStream(TreeNodeStream): | |
"""@brief A buffered stream of tree nodes. | |
Nodes can be from a tree of ANY kind. | |
This node stream sucks all nodes out of the tree specified in | |
the constructor during construction and makes pointers into | |
the tree using an array of Object pointers. The stream necessarily | |
includes pointers to DOWN and UP and EOF nodes. | |
This stream knows how to mark/release for backtracking. | |
This stream is most suitable for tree interpreters that need to | |
jump around a lot or for tree parsers requiring speed (at cost of memory). | |
There is some duplicated functionality here with UnBufferedTreeNodeStream | |
but just in bookkeeping, not tree walking etc... | |
@see UnBufferedTreeNodeStream | |
""" | |
def __init__(self, *args): | |
TreeNodeStream.__init__(self) | |
if len(args) == 1: | |
adaptor = CommonTreeAdaptor() | |
tree = args[0] | |
nodes = None | |
down = None | |
up = None | |
eof = None | |
elif len(args) == 2: | |
adaptor = args[0] | |
tree = args[1] | |
nodes = None | |
down = None | |
up = None | |
eof = None | |
elif len(args) == 3: | |
parent = args[0] | |
start = args[1] | |
stop = args[2] | |
adaptor = parent.adaptor | |
tree = parent.root | |
nodes = parent.nodes[start:stop] | |
down = parent.down | |
up = parent.up | |
eof = parent.eof | |
else: | |
raise TypeError("Invalid arguments") | |
# all these navigation nodes are shared and hence they | |
# cannot contain any line/column info | |
if down is not None: | |
self.down = down | |
else: | |
self.down = adaptor.createFromType(DOWN, "DOWN") | |
if up is not None: | |
self.up = up | |
else: | |
self.up = adaptor.createFromType(UP, "UP") | |
if eof is not None: | |
self.eof = eof | |
else: | |
self.eof = adaptor.createFromType(EOF, "EOF") | |
# The complete mapping from stream index to tree node. | |
# This buffer includes pointers to DOWN, UP, and EOF nodes. | |
# It is built upon ctor invocation. The elements are type | |
# Object as we don't what the trees look like. | |
# Load upon first need of the buffer so we can set token types | |
# of interest for reverseIndexing. Slows us down a wee bit to | |
# do all of the if p==-1 testing everywhere though. | |
if nodes is not None: | |
self.nodes = nodes | |
else: | |
self.nodes = [] | |
# Pull nodes from which tree? | |
self.root = tree | |
# IF this tree (root) was created from a token stream, track it. | |
self.tokens = None | |
# What tree adaptor was used to build these trees | |
self.adaptor = adaptor | |
# Reuse same DOWN, UP navigation nodes unless this is true | |
self.uniqueNavigationNodes = False | |
# The index into the nodes list of the current node (next node | |
# to consume). If -1, nodes array not filled yet. | |
self.p = -1 | |
# Track the last mark() call result value for use in rewind(). | |
self.lastMarker = None | |
# Stack of indexes used for push/pop calls | |
self.calls = [] | |
def __iter__(self): | |
return TreeIterator(self.root, self.adaptor) | |
def fillBuffer(self): | |
"""Walk tree with depth-first-search and fill nodes buffer. | |
Don't do DOWN, UP nodes if its a list (t is isNil). | |
""" | |
self._fillBuffer(self.root) | |
self.p = 0 # buffer of nodes intialized now | |
def _fillBuffer(self, t): | |
nil = self.adaptor.isNil(t) | |
if not nil: | |
self.nodes.append(t) # add this node | |
# add DOWN node if t has children | |
n = self.adaptor.getChildCount(t) | |
if not nil and n > 0: | |
self.addNavigationNode(DOWN) | |
# and now add all its children | |
for c in range(n): | |
self._fillBuffer(self.adaptor.getChild(t, c)) | |
# add UP node if t has children | |
if not nil and n > 0: | |
self.addNavigationNode(UP) | |
def getNodeIndex(self, node): | |
"""What is the stream index for node? 0..n-1 | |
Return -1 if node not found. | |
""" | |
if self.p == -1: | |
self.fillBuffer() | |
for i, t in enumerate(self.nodes): | |
if t == node: | |
return i | |
return -1 | |
def addNavigationNode(self, ttype): | |
""" | |
As we flatten the tree, we use UP, DOWN nodes to represent | |
the tree structure. When debugging we need unique nodes | |
so instantiate new ones when uniqueNavigationNodes is true. | |
""" | |
navNode = None | |
if ttype == DOWN: | |
if self.hasUniqueNavigationNodes(): | |
navNode = self.adaptor.createFromType(DOWN, "DOWN") | |
else: | |
navNode = self.down | |
else: | |
if self.hasUniqueNavigationNodes(): | |
navNode = self.adaptor.createFromType(UP, "UP") | |
else: | |
navNode = self.up | |
self.nodes.append(navNode) | |
def get(self, i): | |
if self.p == -1: | |
self.fillBuffer() | |
return self.nodes[i] | |
def LT(self, k): | |
if self.p == -1: | |
self.fillBuffer() | |
if k == 0: | |
return None | |
if k < 0: | |
return self.LB(-k) | |
if self.p + k - 1 >= len(self.nodes): | |
return self.eof | |
return self.nodes[self.p + k - 1] | |
def getCurrentSymbol(self): | |
return self.LT(1) | |
def LB(self, k): | |
"""Look backwards k nodes""" | |
if k == 0: | |
return None | |
if self.p - k < 0: | |
return None | |
return self.nodes[self.p - k] | |
def isEOF(self, obj): | |
return self.adaptor.getType(obj) == EOF | |
def getTreeSource(self): | |
return self.root | |
def getSourceName(self): | |
return self.getTokenStream().getSourceName() | |
def getTokenStream(self): | |
return self.tokens | |
def setTokenStream(self, tokens): | |
self.tokens = tokens | |
def getTreeAdaptor(self): | |
return self.adaptor | |
def hasUniqueNavigationNodes(self): | |
return self.uniqueNavigationNodes | |
def setUniqueNavigationNodes(self, uniqueNavigationNodes): | |
self.uniqueNavigationNodes = uniqueNavigationNodes | |
def consume(self): | |
if self.p == -1: | |
self.fillBuffer() | |
self.p += 1 | |
def LA(self, i): | |
return self.adaptor.getType(self.LT(i)) | |
def mark(self): | |
if self.p == -1: | |
self.fillBuffer() | |
self.lastMarker = self.index() | |
return self.lastMarker | |
def release(self, marker=None): | |
# no resources to release | |
pass | |
def index(self): | |
return self.p | |
def rewind(self, marker=None): | |
if marker is None: | |
marker = self.lastMarker | |
self.seek(marker) | |
def seek(self, index): | |
if self.p == -1: | |
self.fillBuffer() | |
self.p = index | |
def push(self, index): | |
""" | |
Make stream jump to a new location, saving old location. | |
Switch back with pop(). | |
""" | |
self.calls.append(self.p) # save current index | |
self.seek(index) | |
def pop(self): | |
""" | |
Seek back to previous index saved during last push() call. | |
Return top of stack (return index). | |
""" | |
ret = self.calls.pop(-1) | |
self.seek(ret) | |
return ret | |
def reset(self): | |
self.p = 0 | |
self.lastMarker = 0 | |
self.calls = [] | |
def size(self): | |
if self.p == -1: | |
self.fillBuffer() | |
return len(self.nodes) | |
# TREE REWRITE INTERFACE | |
def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): | |
if parent is not None: | |
self.adaptor.replaceChildren( | |
parent, startChildIndex, stopChildIndex, t | |
) | |
def __str__(self): | |
"""Used for testing, just return the token type stream""" | |
if self.p == -1: | |
self.fillBuffer() | |
return ' '.join([str(self.adaptor.getType(node)) | |
for node in self.nodes | |
]) | |
def toString(self, start, stop): | |
if start is None or stop is None: | |
return None | |
if self.p == -1: | |
self.fillBuffer() | |
#System.out.println("stop: "+stop); | |
#if ( start instanceof CommonTree ) | |
# System.out.print("toString: "+((CommonTree)start).getToken()+", "); | |
#else | |
# System.out.println(start); | |
#if ( stop instanceof CommonTree ) | |
# System.out.println(((CommonTree)stop).getToken()); | |
#else | |
# System.out.println(stop); | |
# if we have the token stream, use that to dump text in order | |
if self.tokens is not None: | |
beginTokenIndex = self.adaptor.getTokenStartIndex(start) | |
endTokenIndex = self.adaptor.getTokenStopIndex(stop) | |
# if it's a tree, use start/stop index from start node | |
# else use token range from start/stop nodes | |
if self.adaptor.getType(stop) == UP: | |
endTokenIndex = self.adaptor.getTokenStopIndex(start) | |
elif self.adaptor.getType(stop) == EOF: | |
endTokenIndex = self.size() -2 # don't use EOF | |
return self.tokens.toString(beginTokenIndex, endTokenIndex) | |
# walk nodes looking for start | |
i, t = 0, None | |
for i, t in enumerate(self.nodes): | |
if t == start: | |
break | |
# now walk until we see stop, filling string buffer with text | |
buf = [] | |
t = self.nodes[i] | |
while t != stop: | |
text = self.adaptor.getText(t) | |
if text is None: | |
text = " " + self.adaptor.getType(t) | |
buf.append(text) | |
i += 1 | |
t = self.nodes[i] | |
# include stop node too | |
text = self.adaptor.getText(stop) | |
if text is None: | |
text = " " +self.adaptor.getType(stop) | |
buf.append(text) | |
return ''.join(buf) | |
## iterator interface | |
def __iter__(self): | |
if self.p == -1: | |
self.fillBuffer() | |
for node in self.nodes: | |
yield node | |
############################################################################# | |
# | |
# tree parser | |
# | |
############################################################################# | |
class TreeParser(BaseRecognizer): | |
"""@brief Baseclass for generated tree parsers. | |
A parser for a stream of tree nodes. "tree grammars" result in a subclass | |
of this. All the error reporting and recovery is shared with Parser via | |
the BaseRecognizer superclass. | |
""" | |
def __init__(self, input, state=None): | |
BaseRecognizer.__init__(self, state) | |
self.input = None | |
self.setTreeNodeStream(input) | |
def reset(self): | |
BaseRecognizer.reset(self) # reset all recognizer state variables | |
if self.input is not None: | |
self.input.seek(0) # rewind the input | |
def setTreeNodeStream(self, input): | |
"""Set the input stream""" | |
self.input = input | |
def getTreeNodeStream(self): | |
return self.input | |
def getSourceName(self): | |
return self.input.getSourceName() | |
def getCurrentInputSymbol(self, input): | |
return input.LT(1) | |
def getMissingSymbol(self, input, e, expectedTokenType, follow): | |
tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">" | |
adaptor = input.adaptor | |
return adaptor.createToken( | |
CommonToken(type=expectedTokenType, text=tokenText)) | |
# precompiled regex used by inContext | |
dotdot = ".*[^.]\\.\\.[^.].*" | |
doubleEtc = ".*\\.\\.\\.\\s+\\.\\.\\..*" | |
dotdotPattern = re.compile(dotdot) | |
doubleEtcPattern = re.compile(doubleEtc) | |
def inContext(self, context, adaptor=None, tokenName=None, t=None): | |
"""Check if current node in input has a context. | |
Context means sequence of nodes towards root of tree. For example, | |
you might say context is "MULT" which means my parent must be MULT. | |
"CLASS VARDEF" says current node must be child of a VARDEF and whose | |
parent is a CLASS node. You can use "..." to mean zero-or-more nodes. | |
"METHOD ... VARDEF" means my parent is VARDEF and somewhere above | |
that is a METHOD node. The first node in the context is not | |
necessarily the root. The context matcher stops matching and returns | |
true when it runs out of context. There is no way to force the first | |
node to be the root. | |
""" | |
return _inContext( | |
self.input.getTreeAdaptor(), self.getTokenNames(), | |
self.input.LT(1), context) | |
@classmethod | |
def _inContext(cls, adaptor, tokenNames, t, context): | |
"""The worker for inContext. | |
It's static and full of parameters for testing purposes. | |
""" | |
if cls.dotdotPattern.match(context): | |
# don't allow "..", must be "..." | |
raise ValueError("invalid syntax: ..") | |
if cls.doubleEtcPattern.match(context): | |
# don't allow double "..." | |
raise ValueError("invalid syntax: ... ...") | |
# ensure spaces around ... | |
context = context.replace("...", " ... ") | |
context = context.strip() | |
nodes = context.split() | |
ni = len(nodes) - 1 | |
t = adaptor.getParent(t) | |
while ni >= 0 and t is not None: | |
if nodes[ni] == "...": | |
# walk upwards until we see nodes[ni-1] then continue walking | |
if ni == 0: | |
# ... at start is no-op | |
return True | |
goal = nodes[ni-1] | |
ancestor = cls._getAncestor(adaptor, tokenNames, t, goal) | |
if ancestor is None: | |
return False | |
t = ancestor | |
ni -= 1 | |
name = tokenNames[adaptor.getType(t)] | |
if name != nodes[ni]: | |
return False | |
# advance to parent and to previous element in context node list | |
ni -= 1 | |
t = adaptor.getParent(t) | |
# at root but more nodes to match | |
if t is None and ni >= 0: | |
return False | |
return True | |
@staticmethod | |
def _getAncestor(adaptor, tokenNames, t, goal): | |
"""Helper for static inContext.""" | |
while t is not None: | |
name = tokenNames[adaptor.getType(t)] | |
if name == goal: | |
return t | |
t = adaptor.getParent(t) | |
return None | |
def matchAny(self, ignore): # ignore stream, copy of this.input | |
""" | |
Match '.' in tree parser has special meaning. Skip node or | |
entire tree if node has children. If children, scan until | |
corresponding UP node. | |
""" | |
self._state.errorRecovery = False | |
look = self.input.LT(1) | |
if self.input.getTreeAdaptor().getChildCount(look) == 0: | |
self.input.consume() # not subtree, consume 1 node and return | |
return | |
# current node is a subtree, skip to corresponding UP. | |
# must count nesting level to get right UP | |
level = 0 | |
tokenType = self.input.getTreeAdaptor().getType(look) | |
while tokenType != EOF and not (tokenType == UP and level==0): | |
self.input.consume() | |
look = self.input.LT(1) | |
tokenType = self.input.getTreeAdaptor().getType(look) | |
if tokenType == DOWN: | |
level += 1 | |
elif tokenType == UP: | |
level -= 1 | |
self.input.consume() # consume UP | |
def mismatch(self, input, ttype, follow): | |
""" | |
We have DOWN/UP nodes in the stream that have no line info; override. | |
plus we want to alter the exception type. Don't try to recover | |
from tree parser errors inline... | |
""" | |
raise MismatchedTreeNodeException(ttype, input) | |
def getErrorHeader(self, e): | |
""" | |
Prefix error message with the grammar name because message is | |
always intended for the programmer because the parser built | |
the input tree not the user. | |
""" | |
return (self.getGrammarFileName() + | |
": node from %sline %s:%s" | |
% (['', "after "][e.approximateLineInfo], | |
e.line, | |
e.charPositionInLine | |
) | |
) | |
def getErrorMessage(self, e, tokenNames): | |
""" | |
Tree parsers parse nodes they usually have a token object as | |
payload. Set the exception token and do the default behavior. | |
""" | |
if isinstance(self, TreeParser): | |
adaptor = e.input.getTreeAdaptor() | |
e.token = adaptor.getToken(e.node) | |
if e.token is not None: # could be an UP/DOWN node | |
e.token = CommonToken( | |
type=adaptor.getType(e.node), | |
text=adaptor.getText(e.node) | |
) | |
return BaseRecognizer.getErrorMessage(self, e, tokenNames) | |
def traceIn(self, ruleName, ruleIndex): | |
BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) | |
def traceOut(self, ruleName, ruleIndex): | |
BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) | |
############################################################################# | |
# | |
# tree visitor | |
# | |
############################################################################# | |
class TreeVisitor(object): | |
"""Do a depth first walk of a tree, applying pre() and post() actions | |
we go. | |
""" | |
def __init__(self, adaptor=None): | |
if adaptor is not None: | |
self.adaptor = adaptor | |
else: | |
self.adaptor = CommonTreeAdaptor() | |
def visit(self, t, pre_action=None, post_action=None): | |
"""Visit every node in tree t and trigger an action for each node | |
before/after having visited all of its children. Bottom up walk. | |
Execute both actions even if t has no children. Ignore return | |
results from transforming children since they will have altered | |
the child list of this node (their parent). Return result of | |
applying post action to this node. | |
The Python version differs from the Java version by taking two | |
callables 'pre_action' and 'post_action' instead of a class instance | |
that wraps those methods. Those callables must accept a TreeNode as | |
their single argument and return the (potentially transformed or | |
replaced) TreeNode. | |
""" | |
isNil = self.adaptor.isNil(t) | |
if pre_action is not None and not isNil: | |
# if rewritten, walk children of new t | |
t = pre_action(t) | |
idx = 0 | |
while idx < self.adaptor.getChildCount(t): | |
child = self.adaptor.getChild(t, idx) | |
self.visit(child, pre_action, post_action) | |
idx += 1 | |
if post_action is not None and not isNil: | |
t = post_action(t) | |
return t | |
############################################################################# | |
# | |
# tree iterator | |
# | |
############################################################################# | |
class TreeIterator(object): | |
""" | |
Return a node stream from a doubly-linked tree whose nodes | |
know what child index they are. | |
Emit navigation nodes (DOWN, UP, and EOF) to let show tree structure. | |
""" | |
def __init__(self, tree, adaptor=None): | |
if adaptor is None: | |
adaptor = CommonTreeAdaptor() | |
self.root = tree | |
self.adaptor = adaptor | |
self.first_time = True | |
self.tree = tree | |
# If we emit UP/DOWN nodes, we need to spit out multiple nodes per | |
# next() call. | |
self.nodes = [] | |
# navigation nodes to return during walk and at end | |
self.down = adaptor.createFromType(DOWN, "DOWN") | |
self.up = adaptor.createFromType(UP, "UP") | |
self.eof = adaptor.createFromType(EOF, "EOF") | |
def reset(self): | |
self.first_time = True | |
self.tree = self.root | |
self.nodes = [] | |
def __iter__(self): | |
return self | |
def has_next(self): | |
if self.first_time: | |
return self.root is not None | |
if len(self.nodes) > 0: | |
return True | |
if self.tree is None: | |
return False | |
if self.adaptor.getChildCount(self.tree) > 0: | |
return True | |
# back at root? | |
return self.adaptor.getParent(self.tree) is not None | |
def next(self): | |
if not self.has_next(): | |
raise StopIteration | |
if self.first_time: | |
# initial condition | |
self.first_time = False | |
if self.adaptor.getChildCount(self.tree) == 0: | |
# single node tree (special) | |
self.nodes.append(self.eof) | |
return self.tree | |
return self.tree | |
# if any queued up, use those first | |
if len(self.nodes) > 0: | |
return self.nodes.pop(0) | |
# no nodes left? | |
if self.tree is None: | |
return self.eof | |
# next node will be child 0 if any children | |
if self.adaptor.getChildCount(self.tree) > 0: | |
self.tree = self.adaptor.getChild(self.tree, 0) | |
# real node is next after DOWN | |
self.nodes.append(self.tree) | |
return self.down | |
# if no children, look for next sibling of tree or ancestor | |
parent = self.adaptor.getParent(self.tree) | |
# while we're out of siblings, keep popping back up towards root | |
while (parent is not None | |
and self.adaptor.getChildIndex(self.tree)+1 >= self.adaptor.getChildCount(parent)): | |
# we're moving back up | |
self.nodes.append(self.up) | |
self.tree = parent | |
parent = self.adaptor.getParent(self.tree) | |
# no nodes left? | |
if parent is None: | |
self.tree = None # back at root? nothing left then | |
self.nodes.append(self.eof) # add to queue, might have UP nodes in there | |
return self.nodes.pop(0) | |
# must have found a node with an unvisited sibling | |
# move to it and return it | |
nextSiblingIndex = self.adaptor.getChildIndex(self.tree) + 1 | |
self.tree = self.adaptor.getChild(parent, nextSiblingIndex) | |
self.nodes.append(self.tree) # add to queue, might have UP nodes in there | |
return self.nodes.pop(0) | |
############################################################################# | |
# | |
# streams for rule rewriting | |
# | |
############################################################################# | |
class RewriteRuleElementStream(object): | |
"""@brief Internal helper class. | |
A generic list of elements tracked in an alternative to be used in | |
a -> rewrite rule. We need to subclass to fill in the next() method, | |
which returns either an AST node wrapped around a token payload or | |
an existing subtree. | |
Once you start next()ing, do not try to add more elements. It will | |
break the cursor tracking I believe. | |
@see org.antlr.runtime.tree.RewriteRuleSubtreeStream | |
@see org.antlr.runtime.tree.RewriteRuleTokenStream | |
TODO: add mechanism to detect/puke on modification after reading from | |
stream | |
""" | |
def __init__(self, adaptor, elementDescription, elements=None): | |
# Cursor 0..n-1. If singleElement!=null, cursor is 0 until you next(), | |
# which bumps it to 1 meaning no more elements. | |
self.cursor = 0 | |
# Track single elements w/o creating a list. Upon 2nd add, alloc list | |
self.singleElement = None | |
# The list of tokens or subtrees we are tracking | |
self.elements = None | |
# Once a node / subtree has been used in a stream, it must be dup'd | |
# from then on. Streams are reset after subrules so that the streams | |
# can be reused in future subrules. So, reset must set a dirty bit. | |
# If dirty, then next() always returns a dup. | |
self.dirty = False | |
# The element or stream description; usually has name of the token or | |
# rule reference that this list tracks. Can include rulename too, but | |
# the exception would track that info. | |
self.elementDescription = elementDescription | |
self.adaptor = adaptor | |
if isinstance(elements, (list, tuple)): | |
# Create a stream, but feed off an existing list | |
self.singleElement = None | |
self.elements = elements | |
else: | |
# Create a stream with one element | |
self.add(elements) | |
def reset(self): | |
""" | |
Reset the condition of this stream so that it appears we have | |
not consumed any of its elements. Elements themselves are untouched. | |
Once we reset the stream, any future use will need duplicates. Set | |
the dirty bit. | |
""" | |
self.cursor = 0 | |
self.dirty = True | |
def add(self, el): | |
if el is None: | |
return | |
if self.elements is not None: # if in list, just add | |
self.elements.append(el) | |
return | |
if self.singleElement is None: # no elements yet, track w/o list | |
self.singleElement = el | |
return | |
# adding 2nd element, move to list | |
self.elements = [] | |
self.elements.append(self.singleElement) | |
self.singleElement = None | |
self.elements.append(el) | |
def nextTree(self): | |
""" | |
Return the next element in the stream. If out of elements, throw | |
an exception unless size()==1. If size is 1, then return elements[0]. | |
Return a duplicate node/subtree if stream is out of elements and | |
size==1. If we've already used the element, dup (dirty bit set). | |
""" | |
if (self.dirty | |
or (self.cursor >= len(self) and len(self) == 1) | |
): | |
# if out of elements and size is 1, dup | |
el = self._next() | |
return self.dup(el) | |
# test size above then fetch | |
el = self._next() | |
return el | |
def _next(self): | |
""" | |
do the work of getting the next element, making sure that it's | |
a tree node or subtree. Deal with the optimization of single- | |
element list versus list of size > 1. Throw an exception | |
if the stream is empty or we're out of elements and size>1. | |
protected so you can override in a subclass if necessary. | |
""" | |
if len(self) == 0: | |
raise RewriteEmptyStreamException(self.elementDescription) | |
if self.cursor >= len(self): # out of elements? | |
if len(self) == 1: # if size is 1, it's ok; return and we'll dup | |
return self.toTree(self.singleElement) | |
# out of elements and size was not 1, so we can't dup | |
raise RewriteCardinalityException(self.elementDescription) | |
# we have elements | |
if self.singleElement is not None: | |
self.cursor += 1 # move cursor even for single element list | |
return self.toTree(self.singleElement) | |
# must have more than one in list, pull from elements | |
o = self.toTree(self.elements[self.cursor]) | |
self.cursor += 1 | |
return o | |
def dup(self, el): | |
""" | |
When constructing trees, sometimes we need to dup a token or AST | |
subtree. Dup'ing a token means just creating another AST node | |
around it. For trees, you must call the adaptor.dupTree() unless | |
the element is for a tree root; then it must be a node dup. | |
""" | |
raise NotImplementedError | |
def toTree(self, el): | |
""" | |
Ensure stream emits trees; tokens must be converted to AST nodes. | |
AST nodes can be passed through unmolested. | |
""" | |
return el | |
def hasNext(self): | |
return ( (self.singleElement is not None and self.cursor < 1) | |
or (self.elements is not None | |
and self.cursor < len(self.elements) | |
) | |
) | |
def size(self): | |
if self.singleElement is not None: | |
return 1 | |
if self.elements is not None: | |
return len(self.elements) | |
return 0 | |
__len__ = size | |
def getDescription(self): | |
"""Deprecated. Directly access elementDescription attribute""" | |
return self.elementDescription | |
class RewriteRuleTokenStream(RewriteRuleElementStream): | |
"""@brief Internal helper class.""" | |
def toTree(self, el): | |
# Don't convert to a tree unless they explicitly call nextTree. | |
# This way we can do hetero tree nodes in rewrite. | |
return el | |
def nextNode(self): | |
t = self._next() | |
return self.adaptor.createWithPayload(t) | |
def nextToken(self): | |
return self._next() | |
def dup(self, el): | |
raise TypeError("dup can't be called for a token stream.") | |
class RewriteRuleSubtreeStream(RewriteRuleElementStream): | |
"""@brief Internal helper class.""" | |
def nextNode(self): | |
""" | |
Treat next element as a single node even if it's a subtree. | |
This is used instead of next() when the result has to be a | |
tree root node. Also prevents us from duplicating recently-added | |
children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration | |
must dup the type node, but ID has been added. | |
Referencing a rule result twice is ok; dup entire tree as | |
we can't be adding trees as root; e.g., expr expr. | |
Hideous code duplication here with super.next(). Can't think of | |
a proper way to refactor. This needs to always call dup node | |
and super.next() doesn't know which to call: dup node or dup tree. | |
""" | |
if (self.dirty | |
or (self.cursor >= len(self) and len(self) == 1) | |
): | |
# if out of elements and size is 1, dup (at most a single node | |
# since this is for making root nodes). | |
el = self._next() | |
return self.adaptor.dupNode(el) | |
# test size above then fetch | |
el = self._next() | |
while self.adaptor.isNil(el) and self.adaptor.getChildCount(el) == 1: | |
el = self.adaptor.getChild(el, 0) | |
# dup just the root (want node here) | |
return self.adaptor.dupNode(el) | |
def dup(self, el): | |
return self.adaptor.dupTree(el) | |
class RewriteRuleNodeStream(RewriteRuleElementStream): | |
""" | |
Queues up nodes matched on left side of -> in a tree parser. This is | |
the analog of RewriteRuleTokenStream for normal parsers. | |
""" | |
def nextNode(self): | |
return self._next() | |
def toTree(self, el): | |
return self.adaptor.dupNode(el) | |
def dup(self, el): | |
# we dup every node, so don't have to worry about calling dup; short- | |
#circuited next() so it doesn't call. | |
raise TypeError("dup can't be called for a node stream.") | |
class TreeRuleReturnScope(RuleReturnScope): | |
""" | |
This is identical to the ParserRuleReturnScope except that | |
the start property is a tree nodes not Token object | |
when you are parsing trees. To be generic the tree node types | |
have to be Object. | |
""" | |
def __init__(self): | |
self.start = None | |
self.tree = None | |
def getStart(self): | |
return self.start | |
def getTree(self): | |
return self.tree |