First Commit

2026-05-31 10:17:09 +07:00
commit 17a9c69379
4547 changed files with 1170384 additions and 0 deletions
@@ -0,0 +1,340 @@
+"""This module implements a CYK parser."""
+
+# Author: https://github.com/ehudt (2018)
+#
+# Adapted by Erez
+
+
+from collections import defaultdict
+import itertools
+
+from ..exceptions import ParseError
+from ..lexer import Token
+from ..tree import Tree
+from ..grammar import Terminal as T, NonTerminal as NT, Symbol
+
+def match(t, s):
+    assert isinstance(t, T)
+    return t.name == s.type
+
+
+class Rule:
+    """Context-free grammar rule."""
+
+    def __init__(self, lhs, rhs, weight, alias):
+        super(Rule, self).__init__()
+        assert isinstance(lhs, NT), lhs
+        assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs
+        self.lhs = lhs
+        self.rhs = rhs
+        self.weight = weight
+        self.alias = alias
+
+    def __str__(self):
+        return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs))
+
+    def __repr__(self):
+        return str(self)
+
+    def __hash__(self):
+        return hash((self.lhs, tuple(self.rhs)))
+
+    def __eq__(self, other):
+        return self.lhs == other.lhs and self.rhs == other.rhs
+
+    def __ne__(self, other):
+        return not (self == other)
+
+
+class Grammar:
+    """Context-free grammar."""
+
+    def __init__(self, rules):
+        self.rules = frozenset(rules)
+
+    def __eq__(self, other):
+        return self.rules == other.rules
+
+    def __str__(self):
+        return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
+
+    def __repr__(self):
+        return str(self)
+
+
+# Parse tree data structures
+class RuleNode:
+    """A node in the parse tree, which also contains the full rhs rule."""
+
+    def __init__(self, rule, children, weight=0):
+        self.rule = rule
+        self.children = children
+        self.weight = weight
+
+    def __repr__(self):
+        return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children))
+
+
+
+class Parser:
+    """Parser wrapper."""
+
+    def __init__(self, rules):
+        super(Parser, self).__init__()
+        self.orig_rules = {rule: rule for rule in rules}
+        rules = [self._to_rule(rule) for rule in rules]
+        self.grammar = to_cnf(Grammar(rules))
+
+    def _to_rule(self, lark_rule):
+        """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
+        assert isinstance(lark_rule.origin, NT)
+        assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
+        return Rule(
+            lark_rule.origin, lark_rule.expansion,
+            weight=lark_rule.options.priority if lark_rule.options.priority else 0,
+            alias=lark_rule)
+
+    def parse(self, tokenized, start):  # pylint: disable=invalid-name
+        """Parses input, which is a list of tokens."""
+        assert start
+        start = NT(start)
+
+        table, trees = _parse(tokenized, self.grammar)
+        # Check if the parse succeeded.
+        if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
+            raise ParseError('Parsing failed.')
+        parse = trees[(0, len(tokenized) - 1)][start]
+        return self._to_tree(revert_cnf(parse))
+
+    def _to_tree(self, rule_node):
+        """Converts a RuleNode parse tree to a lark Tree."""
+        orig_rule = self.orig_rules[rule_node.rule.alias]
+        children = []
+        for child in rule_node.children:
+            if isinstance(child, RuleNode):
+                children.append(self._to_tree(child))
+            else:
+                assert isinstance(child.name, Token)
+                children.append(child.name)
+        t = Tree(orig_rule.origin, children)
+        t.rule=orig_rule
+        return t
+
+
+def print_parse(node, indent=0):
+    if isinstance(node, RuleNode):
+        print(' ' * (indent * 2) + str(node.rule.lhs))
+        for child in node.children:
+            print_parse(child, indent + 1)
+    else:
+        print(' ' * (indent * 2) + str(node.s))
+
+
+def _parse(s, g):
+    """Parses sentence 's' using CNF grammar 'g'."""
+    # The CYK table. Indexed with a 2-tuple: (start pos, end pos)
+    table = defaultdict(set)
+    # Top-level structure is similar to the CYK table. Each cell is a dict from
+    # rule name to the best (lightest) tree for that rule.
+    trees = defaultdict(dict)
+    # Populate base case with existing terminal production rules
+    for i, w in enumerate(s):
+        for terminal, rules in g.terminal_rules.items():
+            if match(terminal, w):
+                for rule in rules:
+                    table[(i, i)].add(rule)
+                    if (rule.lhs not in trees[(i, i)] or
+                        rule.weight < trees[(i, i)][rule.lhs].weight):
+                        trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
+
+    # Iterate over lengths of sub-sentences
+    for l in range(2, len(s) + 1):
+        # Iterate over sub-sentences with the given length
+        for i in range(len(s) - l + 1):
+            # Choose partition of the sub-sentence in [1, l)
+            for p in range(i + 1, i + l):
+                span1 = (i, p - 1)
+                span2 = (p, i + l - 1)
+                for r1, r2 in itertools.product(table[span1], table[span2]):
+                    for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
+                        table[(i, i + l - 1)].add(rule)
+                        r1_tree = trees[span1][r1.lhs]
+                        r2_tree = trees[span2][r2.lhs]
+                        rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
+                        if (rule.lhs not in trees[(i, i + l - 1)]
+                            or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight):
+                            trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight)
+    return table, trees
+
+
+# This section implements context-free grammar converter to Chomsky normal form.
+# It also implements a conversion of parse trees from its CNF to the original
+# grammar.
+# Overview:
+# Applies the following operations in this order:
+# * TERM: Eliminates non-solitary terminals from all rules
+# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side.
+# * UNIT: Eliminates non-terminal unit rules
+#
+# The following grammar characteristics aren't featured:
+# * Start symbol appears on RHS
+# * Empty rules (epsilon rules)
+
+
+class CnfWrapper:
+    """CNF wrapper for grammar.
+
+  Validates that the input grammar is CNF and provides helper data structures.
+  """
+
+    def __init__(self, grammar):
+        super(CnfWrapper, self).__init__()
+        self.grammar = grammar
+        self.rules = grammar.rules
+        self.terminal_rules = defaultdict(list)
+        self.nonterminal_rules = defaultdict(list)
+        for r in self.rules:
+            # Validate that the grammar is CNF and populate auxiliary data structures.
+            assert isinstance(r.lhs, NT), r
+            if len(r.rhs) not in [1, 2]:
+                raise ParseError("CYK doesn't support empty rules")
+            if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
+                self.terminal_rules[r.rhs[0]].append(r)
+            elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
+                self.nonterminal_rules[tuple(r.rhs)].append(r)
+            else:
+                assert False, r
+
+    def __eq__(self, other):
+        return self.grammar == other.grammar
+
+    def __repr__(self):
+        return repr(self.grammar)
+
+
+class UnitSkipRule(Rule):
+    """A rule that records NTs that were skipped during transformation."""
+
+    def __init__(self, lhs, rhs, skipped_rules, weight, alias):
+        super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias)
+        self.skipped_rules = skipped_rules
+
+    def __eq__(self, other):
+        return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules
+
+    __hash__ = Rule.__hash__
+
+
+def build_unit_skiprule(unit_rule, target_rule):
+    skipped_rules = []
+    if isinstance(unit_rule, UnitSkipRule):
+        skipped_rules += unit_rule.skipped_rules
+    skipped_rules.append(target_rule)
+    if isinstance(target_rule, UnitSkipRule):
+        skipped_rules += target_rule.skipped_rules
+    return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules,
+                      weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)
+
+
+def get_any_nt_unit_rule(g):
+    """Returns a non-terminal unit rule from 'g', or None if there is none."""
+    for rule in g.rules:
+        if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
+            return rule
+    return None
+
+
+def _remove_unit_rule(g, rule):
+    """Removes 'rule' from 'g' without changing the language produced by 'g'."""
+    new_rules = [x for x in g.rules if x != rule]
+    refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
+    new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
+    return Grammar(new_rules)
+
+
+def _split(rule):
+    """Splits a rule whose len(rhs) > 2 into shorter rules."""
+    rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
+    rule_name = '__SP_%s' % (rule_str) + '_%d'
+    yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
+    for i in range(1, len(rule.rhs) - 2):
+        yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
+    yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
+
+
+def _term(g):
+    """Applies the TERM rule on 'g' (see top comment)."""
+    all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
+    t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
+    new_rules = []
+    for rule in g.rules:
+        if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
+            new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
+            new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
+            new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
+        else:
+            new_rules.append(rule)
+    return Grammar(new_rules)
+
+
+def _bin(g):
+    """Applies the BIN rule to 'g' (see top comment)."""
+    new_rules = []
+    for rule in g.rules:
+        if len(rule.rhs) > 2:
+            new_rules += _split(rule)
+        else:
+            new_rules.append(rule)
+    return Grammar(new_rules)
+
+
+def _unit(g):
+    """Applies the UNIT rule to 'g' (see top comment)."""
+    nt_unit_rule = get_any_nt_unit_rule(g)
+    while nt_unit_rule:
+        g = _remove_unit_rule(g, nt_unit_rule)
+        nt_unit_rule = get_any_nt_unit_rule(g)
+    return g
+
+
+def to_cnf(g):
+    """Creates a CNF grammar from a general context-free grammar 'g'."""
+    g = _unit(_bin(_term(g)))
+    return CnfWrapper(g)
+
+
+def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
+    if not skipped_rules:
+        return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
+    else:
+        weight = weight - skipped_rules[0].weight
+        return RuleNode(
+            Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
+                unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
+                                skipped_rules[1:], children,
+                                skipped_rules[0].weight, skipped_rules[0].alias)
+            ], weight=weight)
+
+
+def revert_cnf(node):
+    """Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
+    if isinstance(node, T):
+        return node
+    # Reverts TERM rule.
+    if node.rule.lhs.name.startswith('__T_'):
+        return node.children[0]
+    else:
+        children = []
+        for child in map(revert_cnf, node.children):
+            # Reverts BIN rule.
+            if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
+                children += child.children
+            else:
+                children.append(child)
+        # Reverts UNIT rule.
+        if isinstance(node.rule, UnitSkipRule):
+            return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
+                                    node.rule.skipped_rules, children,
+                                    node.rule.weight, node.rule.alias)
+        else:
+            return RuleNode(node.rule, children)
@@ -0,0 +1,312 @@
+"""This module implements an Earley parser.
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+    https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
+"""
+
+from typing import TYPE_CHECKING, Callable, Optional, List, Any
+from collections import deque
+
+from ..lexer import Token
+from ..tree import Tree
+from ..exceptions import UnexpectedEOF, UnexpectedToken
+from ..utils import logger, OrderedSet, dedup_list
+from .grammar_analysis import GrammarAnalyzer
+from ..grammar import NonTerminal
+from .earley_common import Item
+from .earley_forest import ForestSumVisitor, SymbolNode, StableSymbolNode, TokenNode, ForestToParseTree
+
+if TYPE_CHECKING:
+    from ..common import LexerConf, ParserConf
+
+class Parser:
+    lexer_conf: 'LexerConf'
+    parser_conf: 'ParserConf'
+    debug: bool
+
+    def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
+                 resolve_ambiguity: bool=True, debug: bool=False,
+                 tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
+        analysis = GrammarAnalyzer(parser_conf)
+        self.lexer_conf = lexer_conf
+        self.parser_conf = parser_conf
+        self.resolve_ambiguity = resolve_ambiguity
+        self.debug = debug
+        self.Tree = tree_class
+        self.Set = OrderedSet if ordered_sets else set
+        self.SymbolNode = StableSymbolNode if ordered_sets else SymbolNode
+
+        self.FIRST = analysis.FIRST
+        self.NULLABLE = analysis.NULLABLE
+        self.callbacks = parser_conf.callbacks
+        # TODO add typing info
+        self.predictions = {}   # type: ignore[var-annotated]
+
+        ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
+        #  the slow 'isupper' in is_terminal.
+        self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
+        self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
+
+        self.forest_sum_visitor = None
+        for rule in parser_conf.rules:
+            if rule.origin not in self.predictions:
+                self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
+
+            ## Detect if any rules/terminals have priorities set. If the user specified priority = None, then
+            #  the priorities will be stripped from all rules/terminals before they reach us, allowing us to
+            #  skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
+            #  on any rules/terminals.
+            if self.forest_sum_visitor is None and rule.options.priority is not None:
+                self.forest_sum_visitor = ForestSumVisitor
+
+        # Check terminals for priorities
+        # Ignore terminal priorities if the basic lexer is used
+        if self.lexer_conf.lexer_type != 'basic' and self.forest_sum_visitor is None:
+            for term in self.lexer_conf.terminals:
+                if term.priority:
+                    self.forest_sum_visitor = ForestSumVisitor
+                    break
+
+        self.term_matcher = term_matcher
+
+
+    def predict_and_complete(self, i, to_scan, columns, transitives, node_cache):
+        """The core Earley Predictor and Completer.
+
+        At each stage of the input, we handling any completed items (things
+        that matched on the last cycle) and use those to predict what should
+        come next in the input stream. The completions and any predicted
+        non-terminals are recursively processed until we reach a set of,
+        which can be added to the scan list for the next scanner cycle."""
+        # Held Completions (H in E.Scotts paper).
+        held_completions = {}
+
+        column = columns[i]
+        # R (items) = Ei (column.items)
+        items = deque(column)
+        while items:
+            item = items.pop()    # remove an element, A say, from R
+
+            ### The Earley completer
+            if item.is_complete:   ### (item.s == string)
+                if item.node is None:
+                    label = (item.s, item.start, i)
+                    item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    item.node.add_family(item.s, item.rule, item.start, None, None)
+
+                # create_leo_transitives(item.rule.origin, item.start)
+
+                ###R Joop Leo right recursion Completer
+                if item.rule.origin in transitives[item.start]:
+                    transitive = transitives[item.start][item.s]
+                    if transitive.previous in transitives[transitive.column]:
+                        root_transitive = transitives[transitive.column][transitive.previous]
+                    else:
+                        root_transitive = transitive
+
+                    new_item = Item(transitive.rule, transitive.ptr, transitive.start)
+                    label = (root_transitive.s, root_transitive.start, i)
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_path(root_transitive, item.node)
+                    if new_item.expect in self.TERMINALS:
+                        # Add (B :: aC.B, h, y) to Q
+                        to_scan.add(new_item)
+                    elif new_item not in column:
+                        # Add (B :: aC.B, h, y) to Ei and R
+                        column.add(new_item)
+                        items.append(new_item)
+                ###R Regular Earley completer
+                else:
+                    # Empty has 0 length. If we complete an empty symbol in a particular
+                    # parse step, we need to be able to use that same empty symbol to complete
+                    # any predictions that result, that themselves require empty. Avoids
+                    # infinite recursion on empty symbols.
+                    # held_completions is 'H' in E.Scott's paper.
+                    is_empty_item = item.start == i
+                    if is_empty_item:
+                        held_completions[item.rule.origin] = item.node
+
+                    originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
+                    for originator in originators:
+                        new_item = originator.advance()
+                        label = (new_item.s, originator.start, i)
+                        new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                        new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
+                        if new_item.expect in self.TERMINALS:
+                            # Add (B :: aC.B, h, y) to Q
+                            to_scan.add(new_item)
+                        elif new_item not in column:
+                            # Add (B :: aC.B, h, y) to Ei and R
+                            column.add(new_item)
+                            items.append(new_item)
+
+            ### The Earley predictor
+            elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
+                new_items = []
+                for rule in self.predictions[item.expect]:
+                    new_item = Item(rule, 0, i)
+                    new_items.append(new_item)
+
+                # Process any held completions (H).
+                if item.expect in held_completions:
+                    new_item = item.advance()
+                    label = (new_item.s, item.start, i)
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
+                    new_items.append(new_item)
+
+                for new_item in new_items:
+                    if new_item.expect in self.TERMINALS:
+                        to_scan.add(new_item)
+                    elif new_item not in column:
+                        column.add(new_item)
+                        items.append(new_item)
+
+    def _parse(self, lexer, columns, to_scan, start_symbol=None):
+
+        def is_quasi_complete(item):
+            if item.is_complete:
+                return True
+
+            quasi = item.advance()
+            while not quasi.is_complete:
+                if quasi.expect not in self.NULLABLE:
+                    return False
+                if quasi.rule.origin == start_symbol and quasi.expect == start_symbol:
+                    return False
+                quasi = quasi.advance()
+            return True
+
+        # def create_leo_transitives(origin, start):
+        #   ...   # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
+
+        def scan(i, token, to_scan):
+            """The core Earley Scanner.
+
+            This is a custom implementation of the scanner that uses the
+            Lark lexer to match tokens. The scan list is built by the
+            Earley predictor, based on the previously completed tokens.
+            This ensures that at each phase of the parse we have a custom
+            lexer context, allowing for more complex ambiguities."""
+            next_to_scan = self.Set()
+            next_set = self.Set()
+            columns.append(next_set)
+            transitives.append({})
+            node_cache = {}
+
+            for item in self.Set(to_scan):
+                if match(item.expect, token):
+                    new_item = item.advance()
+                    label = (new_item.s, new_item.start, i + 1)
+                    # 'terminals' may not contain token.type when using %declare
+                    # Additionally, token is not always a Token
+                    # For example, it can be a Tree when using TreeMatcher
+                    term = terminals.get(token.type) if isinstance(token, Token) else None
+                    # Set the priority of the token node to 0 so that the
+                    # terminal priorities do not affect the Tree chosen by
+                    # ForestSumVisitor after the basic lexer has already
+                    # "used up" the terminal priorities
+                    token_node = TokenNode(token, term, priority=0)
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
+
+                    if new_item.expect in self.TERMINALS:
+                        # add (B ::= Aai+1.B, h, y) to Q'
+                        next_to_scan.add(new_item)
+                    else:
+                        # add (B ::= Aa+1.B, h, y) to Ei+1
+                        next_set.add(new_item)
+
+            if not next_set and not next_to_scan:
+                expect = {i.expect.name for i in to_scan}
+                raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
+
+            return next_to_scan, node_cache
+
+
+        # Define parser functions
+        match = self.term_matcher
+
+        terminals = self.lexer_conf.terminals_by_name
+
+        # Cache for nodes & tokens created in a particular parse step.
+        transitives = [{}]
+
+        ## The main Earley loop.
+        # Run the Prediction/Completion cycle for any Items in the current Earley set.
+        # Completions will be added to the SPPF tree, and predictions will be recursively
+        # processed down to terminals/empty nodes to be added to the scanner for the next
+        # step.
+        expects = {i.expect for i in to_scan}
+        i = 0
+        node_cache = {}
+        for token in lexer.lex(expects):
+            self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+            to_scan, node_cache = scan(i, token, to_scan)
+            i += 1
+
+            expects.clear()
+            expects |= {i.expect for i in to_scan}
+
+        self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+        ## Column is now the final column in the parse.
+        assert i == len(columns)-1
+        return to_scan
+
+    def parse(self, lexer, start):
+        assert start, start
+        start_symbol = NonTerminal(start)
+
+        columns = [self.Set()]
+        to_scan = self.Set()     # The scan buffer. 'Q' in E.Scott's paper.
+
+        ## Predict for the start_symbol.
+        # Add predicted items to the first Earley set (for the predictor) if they
+        # result in a non-terminal, or the scanner if they result in a terminal.
+        for rule in self.predictions[start_symbol]:
+            item = Item(rule, 0, 0)
+            if item.expect in self.TERMINALS:
+                to_scan.add(item)
+            else:
+                columns[0].add(item)
+
+        to_scan = self._parse(lexer, columns, to_scan, start_symbol)
+
+        # If the parse was successful, the start
+        # symbol should have been completed in the last step of the Earley cycle, and will be in
+        # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
+        solutions = dedup_list(n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0)
+        if not solutions:
+            expected_terminals = [t.expect.name for t in to_scan]
+            raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
+        if len(solutions) > 1:
+            raise RuntimeError('Earley should not generate multiple start symbol items! Please report this bug.')
+        solution ,= solutions
+
+        if self.debug:
+            from .earley_forest import ForestToPyDotVisitor
+            try:
+                debug_walker = ForestToPyDotVisitor()
+            except ImportError:
+                logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
+            else:
+                debug_walker.visit(solution, "sppf.png")
+
+
+        if self.Tree is not None:
+            # Perform our SPPF -> AST conversion
+            # Disable the ForestToParseTree cache when ambiguity='resolve'
+            # to prevent a tree construction bug. See issue #1283
+            use_cache = not self.resolve_ambiguity
+            transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity, use_cache)
+            return transformer.transform(solution)
+
+        # return the root of the SPPF
+        return solution
@@ -0,0 +1,42 @@
+"""This module implements useful building blocks for the Earley parser
+"""
+
+
+class Item:
+    "An Earley Item, the atom of the algorithm."
+
+    __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
+    def __init__(self, rule, ptr, start):
+        self.is_complete = len(rule.expansion) == ptr
+        self.rule = rule    # rule
+        self.ptr = ptr      # ptr
+        self.start = start  # j
+        self.node = None    # w
+        if self.is_complete:
+            self.s = rule.origin
+            self.expect = None
+            self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+        else:
+            self.s = (rule, ptr)
+            self.expect = rule.expansion[ptr]
+            self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+        self._hash = hash((self.s, self.start, self.rule))
+
+    def advance(self):
+        return Item(self.rule, self.ptr + 1, self.start)
+
+    def __eq__(self, other):
+        return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule)
+
+    def __hash__(self):
+        return self._hash
+
+    def __repr__(self):
+        before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
+        after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
+        symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
+        return '%s (%d)' % (symbol, self.start)
+
+
+# class TransitiveItem(Item):
+#   ...   # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
@@ -0,0 +1,802 @@
+""""This module implements an SPPF implementation
+
+This is used as the primary output mechanism for the Earley parser
+in order to store complex ambiguities.
+
+Full reference and more details is here:
+https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+"""
+
+from typing import Type, AbstractSet
+from random import randint
+from collections import deque
+from operator import attrgetter
+from importlib import import_module
+from functools import partial
+
+from ..parse_tree_builder import AmbiguousIntermediateExpander
+from ..visitors import Discard
+from ..utils import logger, OrderedSet
+from ..tree import Tree
+
+class ForestNode:
+    pass
+
+class SymbolNode(ForestNode):
+    """
+    A Symbol Node represents a symbol (or Intermediate LR0).
+
+    Symbol nodes are keyed by the symbol (s). For intermediate nodes
+    s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
+    nodes, s will be a string representing the non-terminal origin (i.e.
+    the left hand side of the rule).
+
+    The children of a Symbol or Intermediate Node will always be Packed Nodes;
+    with each Packed Node child representing a single derivation of a production.
+
+    Hence a Symbol Node with a single child is unambiguous.
+
+    Parameters:
+        s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
+        start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive).
+        end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive).
+
+    Properties:
+        is_intermediate: True if this node is an intermediate node.
+        priority: The priority of the node's symbol.
+    """
+    Set: Type[AbstractSet] = set   # Overridden by StableSymbolNode
+    __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate')
+    def __init__(self, s, start, end):
+        self.s = s
+        self.start = start
+        self.end = end
+        self._children = self.Set()
+        self.paths = self.Set()
+        self.paths_loaded = False
+
+        ### We use inf here as it can be safely negated without resorting to conditionals,
+        #   unlike None or float('NaN'), and sorts appropriately.
+        self.priority = float('-inf')
+        self.is_intermediate = isinstance(s, tuple)
+
+    def add_family(self, lr0, rule, start, left, right):
+        self._children.add(PackedNode(self, lr0, rule, start, left, right))
+
+    def add_path(self, transitive, node):
+        self.paths.add((transitive, node))
+
+    def load_paths(self):
+        for transitive, node in self.paths:
+            if transitive.next_titem is not None:
+                vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end)
+                vn.add_path(transitive.next_titem, node)
+                self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
+            else:
+                self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
+        self.paths_loaded = True
+
+    @property
+    def is_ambiguous(self):
+        """Returns True if this node is ambiguous."""
+        return len(self.children) > 1
+
+    @property
+    def children(self):
+        """Returns a list of this node's children sorted from greatest to
+        least priority."""
+        if not self.paths_loaded:
+            self.load_paths()
+        return sorted(self._children, key=attrgetter('sort_key'))
+
+    def __iter__(self):
+        return iter(self._children)
+
+    def __repr__(self):
+        if self.is_intermediate:
+            rule = self.s[0]
+            ptr = self.s[1]
+            before = ( expansion.name for expansion in rule.expansion[:ptr] )
+            after = ( expansion.name for expansion in rule.expansion[ptr:] )
+            symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+        else:
+            symbol = self.s.name
+        return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
+
+class StableSymbolNode(SymbolNode):
+    "A version of SymbolNode that uses OrderedSet for output stability"
+    Set = OrderedSet
+
+class PackedNode(ForestNode):
+    """
+    A Packed Node represents a single derivation in a symbol node.
+
+    Parameters:
+        rule: The rule associated with this node.
+        parent: The parent of this node.
+        left: The left child of this node. ``None`` if one does not exist.
+        right: The right child of this node. ``None`` if one does not exist.
+        priority: The priority of this node.
+    """
+    __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
+    def __init__(self, parent, s, rule, start, left, right):
+        self.parent = parent
+        self.s = s
+        self.start = start
+        self.rule = rule
+        self.left = left
+        self.right = right
+        self.priority = float('-inf')
+        self._hash = hash((self.left, self.right))
+
+    @property
+    def is_empty(self):
+        return self.left is None and self.right is None
+
+    @property
+    def sort_key(self):
+        """
+        Used to sort PackedNode children of SymbolNodes.
+        A SymbolNode has multiple PackedNodes if it matched
+        ambiguously. Hence, we use the sort order to identify
+        the order in which ambiguous children should be considered.
+        """
+        return self.is_empty, -self.priority, self.rule.order
+
+    @property
+    def children(self):
+        """Returns a list of this node's children."""
+        return [x for x in [self.left, self.right] if x is not None]
+
+    def __iter__(self):
+        yield self.left
+        yield self.right
+
+    def __eq__(self, other):
+        if not isinstance(other, PackedNode):
+            return False
+        return self is other or (self.left == other.left and self.right == other.right)
+
+    def __hash__(self):
+        return self._hash
+
+    def __repr__(self):
+        if isinstance(self.s, tuple):
+            rule = self.s[0]
+            ptr = self.s[1]
+            before = ( expansion.name for expansion in rule.expansion[:ptr] )
+            after = ( expansion.name for expansion in rule.expansion[ptr:] )
+            symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+        else:
+            symbol = self.s.name
+        return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
+
+class TokenNode(ForestNode):
+    """
+    A Token Node represents a matched terminal and is always a leaf node.
+
+    Parameters:
+        token: The Token associated with this node.
+        term: The TerminalDef matched by the token.
+        priority: The priority of this node.
+    """
+    __slots__ = ('token', 'term', 'priority', '_hash')
+    def __init__(self, token, term, priority=None):
+        self.token = token
+        self.term = term
+        if priority is not None:
+            self.priority = priority
+        else:
+            self.priority = term.priority if term is not None else 0
+        self._hash = hash(token)
+
+    def __eq__(self, other):
+        if not isinstance(other, TokenNode):
+            return False
+        return self is other or (self.token == other.token)
+
+    def __hash__(self):
+        return self._hash
+
+    def __repr__(self):
+        return repr(self.token)
+
+class ForestVisitor:
+    """
+    An abstract base class for building forest visitors.
+
+    This class performs a controllable depth-first walk of an SPPF.
+    The visitor will not enter cycles and will backtrack if one is encountered.
+    Subclasses are notified of cycles through the ``on_cycle`` method.
+
+    Behavior for visit events is defined by overriding the
+    ``visit*node*`` functions.
+
+    The walk is controlled by the return values of the ``visit*node_in``
+    methods. Returning a node(s) will schedule them to be visited. The visitor
+    will begin to backtrack if no nodes are returned.
+
+    Parameters:
+        single_visit: If ``True``, non-Token nodes will only be visited once.
+    """
+
+    def __init__(self, single_visit=False):
+        self.single_visit = single_visit
+
+    def visit_token_node(self, node):
+        """Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
+        pass
+
+    def visit_symbol_node_in(self, node):
+        """Called when a symbol node is visited. Nodes that are returned
+        will be scheduled to be visited. If ``visit_intermediate_node_in``
+        is not implemented, this function will be called for intermediate
+        nodes as well."""
+        pass
+
+    def visit_symbol_node_out(self, node):
+        """Called after all nodes returned from a corresponding ``visit_symbol_node_in``
+        call have been visited. If ``visit_intermediate_node_out``
+        is not implemented, this function will be called for intermediate
+        nodes as well."""
+        pass
+
+    def visit_packed_node_in(self, node):
+        """Called when a packed node is visited. Nodes that are returned
+        will be scheduled to be visited. """
+        pass
+
+    def visit_packed_node_out(self, node):
+        """Called after all nodes returned from a corresponding ``visit_packed_node_in``
+        call have been visited."""
+        pass
+
+    def on_cycle(self, node, path):
+        """Called when a cycle is encountered.
+
+        Parameters:
+            node: The node that causes a cycle.
+            path: The list of nodes being visited: nodes that have been
+                entered but not exited. The first element is the root in a forest
+                visit, and the last element is the node visited most recently.
+                ``path`` should be treated as read-only.
+        """
+        pass
+
+    def get_cycle_in_path(self, node, path):
+        """A utility function for use in ``on_cycle`` to obtain a slice of
+        ``path`` that only contains the nodes that make up the cycle."""
+        index = len(path) - 1
+        while id(path[index]) != id(node):
+            index -= 1
+        return path[index:]
+
+    def visit(self, root):
+        # Visiting is a list of IDs of all symbol/intermediate nodes currently in
+        # the stack. It serves two purposes: to detect when we 'recurse' in and out
+        # of a symbol/intermediate so that we can process both up and down. Also,
+        # since the SPPF can have cycles it allows us to detect if we're trying
+        # to recurse into a node that's already on the stack (infinite recursion).
+        visiting = set()
+
+        # set of all nodes that have been visited
+        visited = set()
+
+        # a list of nodes that are currently being visited
+        # used for the `on_cycle` callback
+        path = []
+
+        # We do not use recursion here to walk the Forest due to the limited
+        # stack size in python. Therefore input_stack is essentially our stack.
+        input_stack = deque([root])
+
+        # It is much faster to cache these as locals since they are called
+        # many times in large parses.
+        vpno = getattr(self, 'visit_packed_node_out')
+        vpni = getattr(self, 'visit_packed_node_in')
+        vsno = getattr(self, 'visit_symbol_node_out')
+        vsni = getattr(self, 'visit_symbol_node_in')
+        vino = getattr(self, 'visit_intermediate_node_out', vsno)
+        vini = getattr(self, 'visit_intermediate_node_in', vsni)
+        vtn = getattr(self, 'visit_token_node')
+        oc = getattr(self, 'on_cycle')
+
+        while input_stack:
+            current = next(reversed(input_stack))
+            try:
+                next_node = next(current)
+            except StopIteration:
+                input_stack.pop()
+                continue
+            except TypeError:
+                ### If the current object is not an iterator, pass through to Token/SymbolNode
+                pass
+            else:
+                if next_node is None:
+                    continue
+
+                if id(next_node) in visiting:
+                    oc(next_node, path)
+                    continue
+
+                input_stack.append(next_node)
+                continue
+
+            if isinstance(current, TokenNode):
+                vtn(current.token)
+                input_stack.pop()
+                continue
+
+            current_id = id(current)
+            if current_id in visiting:
+                if isinstance(current, PackedNode):
+                    vpno(current)
+                elif current.is_intermediate:
+                    vino(current)
+                else:
+                    vsno(current)
+                input_stack.pop()
+                path.pop()
+                visiting.remove(current_id)
+                visited.add(current_id)
+            elif self.single_visit and current_id in visited:
+                input_stack.pop()
+            else:
+                visiting.add(current_id)
+                path.append(current)
+                if isinstance(current, PackedNode):
+                    next_node = vpni(current)
+                elif current.is_intermediate:
+                    next_node = vini(current)
+                else:
+                    next_node = vsni(current)
+                if next_node is None:
+                    continue
+
+                if not isinstance(next_node, ForestNode):
+                    next_node = iter(next_node)
+                elif id(next_node) in visiting:
+                    oc(next_node, path)
+                    continue
+
+                input_stack.append(next_node)
+
+class ForestTransformer(ForestVisitor):
+    """The base class for a bottom-up forest transformation. Most users will
+    want to use ``TreeForestTransformer`` instead as it has a friendlier
+    interface and covers most use cases.
+
+    Transformations are applied via inheritance and overriding of the
+    ``transform*node`` methods.
+
+    ``transform_token_node`` receives a ``Token`` as an argument.
+    All other methods receive the node that is being transformed and
+    a list of the results of the transformations of that node's children.
+    The return value of these methods are the resulting transformations.
+
+    If ``Discard`` is raised in a node's transformation, no data from that node
+    will be passed to its parent's transformation.
+    """
+
+    def __init__(self):
+        super(ForestTransformer, self).__init__()
+        # results of transformations
+        self.data = dict()
+        # used to track parent nodes
+        self.node_stack = deque()
+
+    def transform(self, root):
+        """Perform a transformation on an SPPF."""
+        self.node_stack.append('result')
+        self.data['result'] = []
+        self.visit(root)
+        assert len(self.data['result']) <= 1
+        if self.data['result']:
+            return self.data['result'][0]
+
+    def transform_symbol_node(self, node, data):
+        """Transform a symbol node."""
+        return node
+
+    def transform_intermediate_node(self, node, data):
+        """Transform an intermediate node."""
+        return node
+
+    def transform_packed_node(self, node, data):
+        """Transform a packed node."""
+        return node
+
+    def transform_token_node(self, node):
+        """Transform a ``Token``."""
+        return node
+
+    def visit_symbol_node_in(self, node):
+        self.node_stack.append(id(node))
+        self.data[id(node)] = []
+        return node.children
+
+    def visit_packed_node_in(self, node):
+        self.node_stack.append(id(node))
+        self.data[id(node)] = []
+        return node.children
+
+    def visit_token_node(self, node):
+        transformed = self.transform_token_node(node)
+        if transformed is not Discard:
+            self.data[self.node_stack[-1]].append(transformed)
+
+    def _visit_node_out_helper(self, node, method):
+        self.node_stack.pop()
+        transformed = method(node, self.data[id(node)])
+        if transformed is not Discard:
+            self.data[self.node_stack[-1]].append(transformed)
+        del self.data[id(node)]
+
+    def visit_symbol_node_out(self, node):
+        self._visit_node_out_helper(node, self.transform_symbol_node)
+
+    def visit_intermediate_node_out(self, node):
+        self._visit_node_out_helper(node, self.transform_intermediate_node)
+
+    def visit_packed_node_out(self, node):
+        self._visit_node_out_helper(node, self.transform_packed_node)
+
+
+class ForestSumVisitor(ForestVisitor):
+    """
+    A visitor for prioritizing ambiguous parts of the Forest.
+
+    This visitor is used when support for explicit priorities on
+    rules is requested (whether normal, or invert). It walks the
+    forest (or subsets thereof) and cascades properties upwards
+    from the leaves.
+
+    It would be ideal to do this during parsing, however this would
+    require processing each Earley item multiple times. That's
+    a big performance drawback; so running a forest walk is the
+    lesser of two evils: there can be significantly more Earley
+    items created during parsing than there are SPPF nodes in the
+    final tree.
+    """
+    def __init__(self):
+        super(ForestSumVisitor, self).__init__(single_visit=True)
+
+    def visit_packed_node_in(self, node):
+        yield node.left
+        yield node.right
+
+    def visit_symbol_node_in(self, node):
+        return iter(node.children)
+
+    def visit_packed_node_out(self, node):
+        priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
+        priority += getattr(node.right, 'priority', 0)
+        priority += getattr(node.left, 'priority', 0)
+        node.priority = priority
+
+    def visit_symbol_node_out(self, node):
+        node.priority = max(child.priority for child in node.children)
+
+class PackedData():
+    """Used in transformationss of packed nodes to distinguish the data
+    that comes from the left child and the right child.
+    """
+
+    class _NoData():
+        pass
+
+    NO_DATA = _NoData()
+
+    def __init__(self, node, data):
+        self.left = self.NO_DATA
+        self.right = self.NO_DATA
+        if data:
+            if node.left is not None:
+                self.left = data[0]
+                if len(data) > 1:
+                    self.right = data[1]
+            else:
+                self.right = data[0]
+
+class ForestToParseTree(ForestTransformer):
+    """Used by the earley parser when ambiguity equals 'resolve' or
+    'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
+
+    Parameters:
+        tree_class: The tree class to use for construction
+        callbacks: A dictionary of rules to functions that output a tree
+        prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
+        resolve_ambiguity: If True, ambiguities will be resolved based on
+                        priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
+        use_cache: If True, the results of packed node transformations will be cached.
+    """
+
+    def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
+        super(ForestToParseTree, self).__init__()
+        self.tree_class = tree_class
+        self.callbacks = callbacks
+        self.prioritizer = prioritizer
+        self.resolve_ambiguity = resolve_ambiguity
+        self._use_cache = use_cache
+        self._cache = {}
+        self._on_cycle_retreat = False
+        self._cycle_node = None
+        self._successful_visits = set()
+
+    def visit(self, root):
+        if self.prioritizer:
+            self.prioritizer.visit(root)
+        super(ForestToParseTree, self).visit(root)
+        self._cache = {}
+
+    def on_cycle(self, node, path):
+        logger.debug("Cycle encountered in the SPPF at node: %s. "
+                "As infinite ambiguities cannot be represented in a tree, "
+                "this family of derivations will be discarded.", node)
+        self._cycle_node = node
+        self._on_cycle_retreat = True
+
+    def _check_cycle(self, node):
+        if self._on_cycle_retreat:
+            if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
+                self._cycle_node = None
+                self._on_cycle_retreat = False
+            else:
+                return Discard
+
+    def _collapse_ambig(self, children):
+        new_children = []
+        for child in children:
+            if hasattr(child, 'data') and child.data == '_ambig':
+                new_children += child.children
+            else:
+                new_children.append(child)
+        return new_children
+
+    def _call_rule_func(self, node, data):
+        # called when transforming children of symbol nodes
+        # data is a list of trees or tokens that correspond to the
+        # symbol's rule expansion
+        return self.callbacks[node.rule](data)
+
+    def _call_ambig_func(self, node, data):
+        # called when transforming a symbol node
+        # data is a list of trees where each tree's data is
+        # equal to the name of the symbol or one of its aliases.
+        if len(data) > 1:
+            return self.tree_class('_ambig', data)
+        elif data:
+            return data[0]
+        return Discard
+
+    def transform_symbol_node(self, node, data):
+        if id(node) not in self._successful_visits:
+            return Discard
+        r = self._check_cycle(node)
+        if r is Discard:
+            return r
+        self._successful_visits.remove(id(node))
+        data = self._collapse_ambig(data)
+        return self._call_ambig_func(node, data)
+
+    def transform_intermediate_node(self, node, data):
+        if id(node) not in self._successful_visits:
+            return Discard
+        r = self._check_cycle(node)
+        if r is Discard:
+            return r
+        self._successful_visits.remove(id(node))
+        if len(data) > 1:
+            children = [self.tree_class('_inter', c) for c in data]
+            return self.tree_class('_iambig', children)
+        return data[0]
+
+    def transform_packed_node(self, node, data):
+        r = self._check_cycle(node)
+        if r is Discard:
+            return r
+        if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
+            return Discard
+        if self._use_cache and id(node) in self._cache:
+            return self._cache[id(node)]
+        children = []
+        assert len(data) <= 2
+        data = PackedData(node, data)
+        if data.left is not PackedData.NO_DATA:
+            if node.left.is_intermediate and isinstance(data.left, list):
+                children += data.left
+            else:
+                children.append(data.left)
+        if data.right is not PackedData.NO_DATA:
+            children.append(data.right)
+        transformed = children if node.parent.is_intermediate else self._call_rule_func(node, children)
+        if self._use_cache:
+            self._cache[id(node)] = transformed
+        return transformed
+
+    def visit_symbol_node_in(self, node):
+        super(ForestToParseTree, self).visit_symbol_node_in(node)
+        if self._on_cycle_retreat:
+            return
+        return node.children
+
+    def visit_packed_node_in(self, node):
+        self._on_cycle_retreat = False
+        to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
+        if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
+            if not self._use_cache or id(node) not in self._cache:
+                return to_visit
+
+    def visit_packed_node_out(self, node):
+        super(ForestToParseTree, self).visit_packed_node_out(node)
+        if not self._on_cycle_retreat:
+            self._successful_visits.add(id(node.parent))
+
+def handles_ambiguity(func):
+    """Decorator for methods of subclasses of ``TreeForestTransformer``.
+    Denotes that the method should receive a list of transformed derivations."""
+    func.handles_ambiguity = True
+    return func
+
+class TreeForestTransformer(ForestToParseTree):
+    """A ``ForestTransformer`` with a tree ``Transformer``-like interface.
+    By default, it will construct a tree.
+
+    Methods provided via inheritance are called based on the rule/symbol
+    names of nodes in the forest.
+
+    Methods that act on rules will receive a list of the results of the
+    transformations of the rule's children. By default, trees and tokens.
+
+    Methods that act on tokens will receive a token.
+
+    Alternatively, methods that act on rules may be annotated with
+    ``handles_ambiguity``. In this case, the function will receive a list
+    of all the transformations of all the derivations of the rule.
+    By default, a list of trees where each tree.data is equal to the
+    rule name or one of its aliases.
+
+    Non-tree transformations are made possible by override of
+    ``__default__``, ``__default_token__``, and ``__default_ambig__``.
+
+    Note:
+        Tree shaping features such as inlined rules and token filtering are
+        not built into the transformation. Positions are also not propagated.
+
+    Parameters:
+        tree_class: The tree class to use for construction
+        prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
+        resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
+        use_cache (bool): If True, caches the results of some transformations,
+                          potentially improving performance when ``resolve_ambiguity==False``.
+                          Only use if you know what you are doing: i.e. All transformation
+                          functions are pure and referentially transparent.
+    """
+
+    def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
+        super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
+
+    def __default__(self, name, data):
+        """Default operation on tree (for override).
+
+        Returns a tree with name with data as children.
+        """
+        return self.tree_class(name, data)
+
+    def __default_ambig__(self, name, data):
+        """Default operation on ambiguous rule (for override).
+
+        Wraps data in an '_ambig_' node if it contains more than
+        one element.
+        """
+        if len(data) > 1:
+            return self.tree_class('_ambig', data)
+        elif data:
+            return data[0]
+        return Discard
+
+    def __default_token__(self, node):
+        """Default operation on ``Token`` (for override).
+
+        Returns ``node``.
+        """
+        return node
+
+    def transform_token_node(self, node):
+        return getattr(self, node.type, self.__default_token__)(node)
+
+    def _call_rule_func(self, node, data):
+        name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
+        user_func = getattr(self, name, self.__default__)
+        if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
+            user_func = partial(self.__default__, name)
+        if not self.resolve_ambiguity:
+            wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
+            user_func = wrapper(user_func)
+        return user_func(data)
+
+    def _call_ambig_func(self, node, data):
+        name = node.s.name
+        user_func = getattr(self, name, self.__default_ambig__)
+        if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
+            user_func = partial(self.__default_ambig__, name)
+        return user_func(data)
+
+class ForestToPyDotVisitor(ForestVisitor):
+    """
+    A Forest visitor which writes the SPPF to a PNG.
+
+    The SPPF can get really large, really quickly because
+    of the amount of meta-data it stores, so this is probably
+    only useful for trivial trees and learning how the SPPF
+    is structured.
+    """
+    def __init__(self, rankdir="TB"):
+        super(ForestToPyDotVisitor, self).__init__(single_visit=True)
+        self.pydot = import_module('pydot')
+        self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
+
+    def visit(self, root, filename):
+        super(ForestToPyDotVisitor, self).visit(root)
+        try:
+            self.graph.write_png(filename)
+        except FileNotFoundError as e:
+            logger.error("Could not write png: ", e)
+
+    def visit_token_node(self, node):
+        graph_node_id = str(id(node))
+        graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
+        graph_node_color = 0x808080
+        graph_node_style = "\"filled,rounded\""
+        graph_node_shape = "diamond"
+        graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+        self.graph.add_node(graph_node)
+
+    def visit_packed_node_in(self, node):
+        graph_node_id = str(id(node))
+        graph_node_label = repr(node)
+        graph_node_color = 0x808080
+        graph_node_style = "filled"
+        graph_node_shape = "diamond"
+        graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+        self.graph.add_node(graph_node)
+        yield node.left
+        yield node.right
+
+    def visit_packed_node_out(self, node):
+        graph_node_id = str(id(node))
+        graph_node = self.graph.get_node(graph_node_id)[0]
+        for child in [node.left, node.right]:
+            if child is not None:
+                child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
+                child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+                self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
+            else:
+                #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
+                child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
+                child_graph_node_style = "invis"
+                child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
+                child_edge_style = "invis"
+                self.graph.add_node(child_graph_node)
+                self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
+
+    def visit_symbol_node_in(self, node):
+        graph_node_id = str(id(node))
+        graph_node_label = repr(node)
+        graph_node_color = 0x808080
+        graph_node_style = "\"filled\""
+        if node.is_intermediate:
+            graph_node_shape = "ellipse"
+        else:
+            graph_node_shape = "rectangle"
+        graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+        self.graph.add_node(graph_node)
+        return iter(node.children)
+
+    def visit_symbol_node_out(self, node):
+        graph_node_id = str(id(node))
+        graph_node = self.graph.get_node(graph_node_id)[0]
+        for child in node.children:
+            child_graph_node_id = str(id(child))
+            child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+            self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
@@ -0,0 +1,203 @@
+"Provides for superficial grammar analysis."
+
+from collections import Counter, defaultdict
+from typing import List, Dict, Iterator, FrozenSet, Set
+
+from ..utils import bfs, fzset, classify, OrderedSet
+from ..exceptions import GrammarError
+from ..grammar import Rule, Terminal, NonTerminal, Symbol
+from ..common import ParserConf
+
+
+class RulePtr:
+    __slots__ = ('rule', 'index')
+    rule: Rule
+    index: int
+
+    def __init__(self, rule: Rule, index: int):
+        assert isinstance(rule, Rule)
+        assert index <= len(rule.expansion)
+        self.rule = rule
+        self.index = index
+
+    def __repr__(self):
+        before = [x.name for x in self.rule.expansion[:self.index]]
+        after = [x.name for x in self.rule.expansion[self.index:]]
+        return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
+
+    @property
+    def next(self) -> Symbol:
+        return self.rule.expansion[self.index]
+
+    def advance(self, sym: Symbol) -> 'RulePtr':
+        assert self.next == sym
+        return RulePtr(self.rule, self.index+1)
+
+    @property
+    def is_satisfied(self) -> bool:
+        return self.index == len(self.rule.expansion)
+
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, RulePtr):
+            return NotImplemented
+        return self.rule == other.rule and self.index == other.index
+
+    def __hash__(self) -> int:
+        return hash((self.rule, self.index))
+
+
+State = FrozenSet[RulePtr]
+
+# state generation ensures no duplicate LR0ItemSets
+class LR0ItemSet:
+    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
+
+    kernel: State
+    closure: State
+    transitions: Dict[Symbol, 'LR0ItemSet']
+    lookaheads: Dict[Symbol, Set[Rule]]
+
+    def __init__(self, kernel, closure):
+        self.kernel = fzset(kernel)
+        self.closure = fzset(closure)
+        self.transitions = {}
+        self.lookaheads = defaultdict(set)
+
+    def __repr__(self):
+        return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
+
+
+def update_set(set1, set2):
+    if not set2 or set1 > set2:
+        return False
+
+    copy = set(set1)
+    set1 |= set2
+    return set1 != copy
+
+def calculate_sets(rules):
+    """Calculate FOLLOW sets.
+
+    Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
+    symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
+
+    # foreach grammar rule X ::= Y(1) ... Y(k)
+    # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
+    #   NULLABLE = NULLABLE union {X}
+    # for i = 1 to k
+    #   if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
+    #     FIRST(X) = FIRST(X) union FIRST(Y(i))
+    #   for j = i+1 to k
+    #     if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
+    #       FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
+    #     if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
+    #       FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
+    # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
+
+    NULLABLE = set()
+    FIRST = {}
+    FOLLOW = {}
+    for sym in symbols:
+        FIRST[sym]={sym} if sym.is_term else set()
+        FOLLOW[sym]=set()
+
+    # Calculate NULLABLE and FIRST
+    changed = True
+    while changed:
+        changed = False
+
+        for rule in rules:
+            if set(rule.expansion) <= NULLABLE:
+                if update_set(NULLABLE, {rule.origin}):
+                    changed = True
+
+            for i, sym in enumerate(rule.expansion):
+                if set(rule.expansion[:i]) <= NULLABLE:
+                    if update_set(FIRST[rule.origin], FIRST[sym]):
+                        changed = True
+                else:
+                    break
+
+    # Calculate FOLLOW
+    changed = True
+    while changed:
+        changed = False
+
+        for rule in rules:
+            for i, sym in enumerate(rule.expansion):
+                if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
+                    if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
+                        changed = True
+
+                for j in range(i+1, len(rule.expansion)):
+                    if set(rule.expansion[i+1:j]) <= NULLABLE:
+                        if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
+                            changed = True
+
+    return FIRST, FOLLOW, NULLABLE
+
+
+class GrammarAnalyzer:
+    def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
+        self.debug = debug
+        self.strict = strict
+
+        root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
+                      for start in parser_conf.start}
+
+        rules = parser_conf.rules + list(root_rules.values())
+        self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin)
+
+        if len(rules) != len(set(rules)):
+            duplicates = [item for item, count in Counter(rules).items() if count > 1]
+            raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
+
+        for r in rules:
+            for sym in r.expansion:
+                if not (sym.is_term or sym in self.rules_by_origin):
+                    raise GrammarError("Using an undefined rule: %s" % sym)
+
+        self.start_states = {start: self.expand_rule(root_rule.origin)
+                             for start, root_rule in root_rules.items()}
+
+        self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
+                           for start, root_rule in root_rules.items()}
+
+        lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
+                for start in parser_conf.start}
+
+        lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
+        assert(len(lr0_rules) == len(set(lr0_rules)))
+
+        self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
+
+        # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
+        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
+                for start, root_rule in lr0_root_rules.items()}
+
+        self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
+
+    def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> OrderedSet[RulePtr]:
+        "Returns all init_ptrs accessible by rule (recursive)"
+
+        if rules_by_origin is None:
+            rules_by_origin = self.rules_by_origin
+
+        init_ptrs = OrderedSet[RulePtr]()
+        def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]:
+            assert not rule.is_term, rule
+
+            for r in rules_by_origin[rule]:
+                init_ptr = RulePtr(r, 0)
+                init_ptrs.add(init_ptr)
+
+                if r.expansion: # if not empty rule
+                    new_r = init_ptr.next
+                    if not new_r.is_term:
+                        assert isinstance(new_r, NonTerminal)
+                        yield new_r
+
+        for _ in bfs([source_rule], _expand_rule):
+            pass
+
+        return init_ptrs
@@ -0,0 +1,334 @@
+"""This module builds a LALR(1) transition-table for lalr_parser.py
+
+For now, shift/reduce conflicts are automatically resolved as shifts.
+"""
+
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+
+from typing import Dict, Set, Iterator, Tuple, List, TypeVar, Generic
+from collections import defaultdict
+
+from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
+from ..exceptions import GrammarError
+
+from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet, RulePtr, State
+from ..grammar import Rule, Symbol
+from ..common import ParserConf
+
+###{standalone
+
+class Action:
+    def __init__(self, name):
+        self.name = name
+    def __str__(self):
+        return self.name
+    def __repr__(self):
+        return str(self)
+
+Shift = Action('Shift')
+Reduce = Action('Reduce')
+
+StateT = TypeVar("StateT")
+
+class ParseTableBase(Generic[StateT]):
+    states: Dict[StateT, Dict[str, Tuple]]
+    start_states: Dict[str, StateT]
+    end_states: Dict[str, StateT]
+
+    def __init__(self, states, start_states, end_states):
+        self.states = states
+        self.start_states = start_states
+        self.end_states = end_states
+
+    def serialize(self, memo):
+        tokens = Enumerator()
+
+        states = {
+            state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
+                    for token, (action, arg) in actions.items()}
+            for state, actions in self.states.items()
+        }
+
+        return {
+            'tokens': tokens.reversed(),
+            'states': states,
+            'start_states': self.start_states,
+            'end_states': self.end_states,
+        }
+
+    @classmethod
+    def deserialize(cls, data, memo):
+        tokens = data['tokens']
+        states = {
+            state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
+                    for token, (action, arg) in actions.items()}
+            for state, actions in data['states'].items()
+        }
+        return cls(states, data['start_states'], data['end_states'])
+
+class ParseTable(ParseTableBase['State']):
+    """Parse-table whose key is State, i.e. set[RulePtr]
+
+    Slower than IntParseTable, but useful for debugging
+    """
+    pass
+
+
+class IntParseTable(ParseTableBase[int]):
+    """Parse-table whose key is int. Best for performance."""
+
+    @classmethod
+    def from_ParseTable(cls, parse_table: ParseTable):
+        enum = list(parse_table.states)
+        state_to_idx: Dict['State', int] = {s:i for i,s in enumerate(enum)}
+        int_states = {}
+
+        for s, la in parse_table.states.items():
+            la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
+                  for k,v in la.items()}
+            int_states[ state_to_idx[s] ] = la
+
+
+        start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
+        end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
+        return cls(int_states, start_states, end_states)
+
+###}
+
+
+# digraph and traverse, see The Theory and Practice of Compiler Writing
+
+# computes F(x) = G(x) union (union { G(y) | x R y })
+# X: nodes
+# R: relation (function mapping node -> list of nodes that satisfy the relation)
+# G: set valued function
+def digraph(X, R, G):
+    F = {}
+    S = []
+    N = dict.fromkeys(X, 0)
+    for x in X:
+        # this is always true for the first iteration, but N[x] may be updated in traverse below
+        if N[x] == 0:
+            traverse(x, S, N, X, R, G, F)
+    return F
+
+# x: single node
+# S: stack
+# N: weights
+# X: nodes
+# R: relation (see above)
+# G: set valued function
+# F: set valued function we are computing (map of input -> output)
+def traverse(x, S, N, X, R, G, F):
+    S.append(x)
+    d = len(S)
+    N[x] = d
+    F[x] = G[x]
+    for y in R[x]:
+        if N[y] == 0:
+            traverse(y, S, N, X, R, G, F)
+        n_x = N[x]
+        assert(n_x > 0)
+        n_y = N[y]
+        assert(n_y != 0)
+        if (n_y > 0) and (n_y < n_x):
+            N[x] = n_y
+        F[x].update(F[y])
+    if N[x] == d:
+        f_x = F[x]
+        while True:
+            z = S.pop()
+            N[z] = -1
+            F[z] = f_x
+            if z == x:
+                break
+
+
+class LALR_Analyzer(GrammarAnalyzer):
+    lr0_itemsets: Set[LR0ItemSet]
+    nonterminal_transitions: List[Tuple[LR0ItemSet, Symbol]]
+    lookback: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Rule]]]
+    includes: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
+    reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
+    directly_reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Symbol]]
+
+
+    def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
+        GrammarAnalyzer.__init__(self, parser_conf, debug, strict)
+        self.nonterminal_transitions = []
+        self.directly_reads = defaultdict(set)
+        self.reads = defaultdict(set)
+        self.includes = defaultdict(set)
+        self.lookback = defaultdict(set)
+
+
+    def compute_lr0_states(self) -> None:
+        self.lr0_itemsets = set()
+        # map of kernels to LR0ItemSets
+        cache: Dict['State', LR0ItemSet] = {}
+
+        def step(state: LR0ItemSet) -> Iterator[LR0ItemSet]:
+            _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
+
+            d = classify(unsat, lambda rp: rp.next)
+            for sym, rps in d.items():
+                kernel = fzset({rp.advance(sym) for rp in rps})
+                new_state = cache.get(kernel, None)
+                if new_state is None:
+                    closure = set(kernel)
+                    for rp in kernel:
+                        if not rp.is_satisfied and not rp.next.is_term:
+                            closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
+                    new_state = LR0ItemSet(kernel, closure)
+                    cache[kernel] = new_state
+
+                state.transitions[sym] = new_state
+                yield new_state
+
+            self.lr0_itemsets.add(state)
+
+        for _ in bfs(self.lr0_start_states.values(), step):
+            pass
+
+    def compute_reads_relations(self):
+        # handle start state
+        for root in self.lr0_start_states.values():
+            assert(len(root.kernel) == 1)
+            for rp in root.kernel:
+                assert(rp.index == 0)
+                self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
+
+        for state in self.lr0_itemsets:
+            seen = set()
+            for rp in state.closure:
+                if rp.is_satisfied:
+                    continue
+                s = rp.next
+                # if s is a not a nonterminal
+                if s not in self.lr0_rules_by_origin:
+                    continue
+                if s in seen:
+                    continue
+                seen.add(s)
+                nt = (state, s)
+                self.nonterminal_transitions.append(nt)
+                dr = self.directly_reads[nt]
+                r = self.reads[nt]
+                next_state = state.transitions[s]
+                for rp2 in next_state.closure:
+                    if rp2.is_satisfied:
+                        continue
+                    s2 = rp2.next
+                    # if s2 is a terminal
+                    if s2 not in self.lr0_rules_by_origin:
+                        dr.add(s2)
+                    if s2 in self.NULLABLE:
+                        r.add((next_state, s2))
+
+    def compute_includes_lookback(self):
+        for nt in self.nonterminal_transitions:
+            state, nonterminal = nt
+            includes = []
+            lookback = self.lookback[nt]
+            for rp in state.closure:
+                if rp.rule.origin != nonterminal:
+                    continue
+                # traverse the states for rp(.rule)
+                state2 = state
+                for i in range(rp.index, len(rp.rule.expansion)):
+                    s = rp.rule.expansion[i]
+                    nt2 = (state2, s)
+                    state2 = state2.transitions[s]
+                    if nt2 not in self.reads:
+                        continue
+                    for j in range(i + 1, len(rp.rule.expansion)):
+                        if rp.rule.expansion[j] not in self.NULLABLE:
+                            break
+                    else:
+                        includes.append(nt2)
+                # state2 is at the final state for rp.rule
+                if rp.index == 0:
+                    for rp2 in state2.closure:
+                        if (rp2.rule == rp.rule) and rp2.is_satisfied:
+                            lookback.add((state2, rp2.rule))
+            for nt2 in includes:
+                self.includes[nt2].add(nt)
+
+    def compute_lookaheads(self):
+        read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
+        follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
+
+        for nt, lookbacks in self.lookback.items():
+            for state, rule in lookbacks:
+                for s in follow_sets[nt]:
+                    state.lookaheads[s].add(rule)
+
+    def compute_lalr1_states(self) -> None:
+        m: Dict[LR0ItemSet, Dict[str, Tuple]] = {}
+        reduce_reduce = []
+        for itemset in self.lr0_itemsets:
+            actions: Dict[Symbol, Tuple] = {la: (Shift, next_state.closure)
+                                                      for la, next_state in itemset.transitions.items()}
+            for la, rules in itemset.lookaheads.items():
+                if len(rules) > 1:
+                    # Try to resolve conflict based on priority
+                    p = [(r.options.priority or 0, r) for r in rules]
+                    p.sort(key=lambda r: r[0], reverse=True)
+                    best, second_best = p[:2]
+                    if best[0] > second_best[0]:
+                        rules = {best[1]}
+                    else:
+                        reduce_reduce.append((itemset, la, rules))
+                        continue
+
+                rule ,= rules
+                if la in actions:
+                    if self.strict:
+                        msg = f'Shift/Reduce conflict for terminal {la.name}. [strict-mode]\n' \
+                              f' * {rule}\n'
+                        raise GrammarError(msg)
+                    elif self.debug:
+                        logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logger.warning(' * %s', rule)
+                    else:
+                        logger.debug('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logger.debug(' * %s', rule)
+                else:
+                    actions[la] = (Reduce, rule)
+            m[itemset] = { k.name: v for k, v in actions.items() }
+
+        if reduce_reduce:
+            msgs = []
+            for itemset, la, rules in reduce_reduce:
+                msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
+                if self.debug:
+                    msg += '\n    collision occurred in state: {%s\n    }' % ''.join(['\n\t' + str(x) for x in itemset.closure])
+                msgs.append(msg)
+            raise GrammarError('\n\n'.join(msgs))
+
+        states = { k.closure: v for k, v in m.items() }
+
+        # compute end states
+        end_states: Dict[str, 'State'] = {}
+        for state in states:
+            for rp in state:
+                for start in self.lr0_start_states:
+                    if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
+                        assert start not in end_states
+                        end_states[start] = state
+
+        start_states = { start: state.closure for start, state in self.lr0_start_states.items() }
+        _parse_table = ParseTable(states, start_states, end_states)
+
+        if self.debug:
+            self.parse_table = _parse_table
+        else:
+            self.parse_table = IntParseTable.from_ParseTable(_parse_table)
+
+    def compute_lalr(self):
+        self.compute_lr0_states()
+        self.compute_reads_relations()
+        self.compute_includes_lookback()
+        self.compute_lookaheads()
+        self.compute_lalr1_states()
@@ -0,0 +1,158 @@
+# This module provides a LALR interactive parser, which is used for debugging and error handling
+
+from typing import Iterator, List
+from copy import copy
+import warnings
+
+from lark.exceptions import UnexpectedToken
+from lark.lexer import Token, LexerThread
+from .lalr_parser_state import ParserState
+
+###{standalone
+
+class InteractiveParser:
+    """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
+
+    For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
+    """
+    def __init__(self, parser, parser_state: ParserState, lexer_thread: LexerThread):
+        self.parser = parser
+        self.parser_state = parser_state
+        self.lexer_thread = lexer_thread
+        self.result = None
+
+    @property
+    def lexer_state(self) -> LexerThread:
+        warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
+        return self.lexer_thread
+
+    def feed_token(self, token: Token):
+        """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
+
+        Note that ``token`` has to be an instance of ``Token``.
+        """
+        return self.parser_state.feed_token(token, token.type == '$END')
+
+    def iter_parse(self) -> Iterator[Token]:
+        """Step through the different stages of the parse, by reading tokens from the lexer
+        and feeding them to the parser, one per iteration.
+
+        Returns an iterator of the tokens it encounters.
+
+        When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
+        """
+        for token in self.lexer_thread.lex(self.parser_state):
+            yield token
+            self.result = self.feed_token(token)
+
+    def exhaust_lexer(self) -> List[Token]:
+        """Try to feed the rest of the lexer state into the interactive parser.
+
+        Note that this modifies the instance in place and does not feed an '$END' Token
+        """
+        return list(self.iter_parse())
+
+
+    def feed_eof(self, last_token=None):
+        """Feed a '$END' Token. Borrows from 'last_token' if given."""
+        eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
+        return self.feed_token(eof)
+
+
+    def __copy__(self):
+        """Create a new interactive parser with a separate state.
+
+        Calls to feed_token() won't affect the old instance, and vice-versa.
+        """
+        return self.copy()
+
+    def copy(self, deepcopy_values=True):
+        return type(self)(
+            self.parser,
+            self.parser_state.copy(deepcopy_values=deepcopy_values),
+            copy(self.lexer_thread),
+        )
+
+    def __eq__(self, other):
+        if not isinstance(other, InteractiveParser):
+            return False
+
+        return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
+
+    def as_immutable(self):
+        """Convert to an ``ImmutableInteractiveParser``."""
+        p = copy(self)
+        return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
+
+    def pretty(self):
+        """Print the output of ``choices()`` in a way that's easier to read."""
+        out = ["Parser choices:"]
+        for k, v in self.choices().items():
+            out.append('\t- %s -> %r' % (k, v))
+        out.append('stack size: %s' % len(self.parser_state.state_stack))
+        return '\n'.join(out)
+
+    def choices(self):
+        """Returns a dictionary of token types, matched to their action in the parser.
+
+        Only returns token types that are accepted by the current state.
+
+        Updated by ``feed_token()``.
+        """
+        return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
+
+    def accepts(self):
+        """Returns the set of possible tokens that will advance the parser into a new valid state."""
+        accepts = set()
+        conf_no_callbacks = copy(self.parser_state.parse_conf)
+        # We don't want to call callbacks here since those might have arbitrary side effects
+        # and are unnecessarily slow.
+        conf_no_callbacks.callbacks = {}
+        for t in self.choices():
+            if t.isupper(): # is terminal?
+                new_cursor = self.copy(deepcopy_values=False)
+                new_cursor.parser_state.parse_conf = conf_no_callbacks
+                try:
+                    new_cursor.feed_token(self.lexer_thread._Token(t, ''))
+                except UnexpectedToken:
+                    pass
+                else:
+                    accepts.add(t)
+        return accepts
+
+    def resume_parse(self):
+        """Resume automated parsing from the current state.
+        """
+        return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token)
+
+
+
+class ImmutableInteractiveParser(InteractiveParser):
+    """Same as ``InteractiveParser``, but operations create a new instance instead
+    of changing it in-place.
+    """
+
+    result = None
+
+    def __hash__(self):
+        return hash((self.parser_state, self.lexer_thread))
+
+    def feed_token(self, token):
+        c = copy(self)
+        c.result = InteractiveParser.feed_token(c, token)
+        return c
+
+    def exhaust_lexer(self):
+        """Try to feed the rest of the lexer state into the parser.
+
+        Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
+        cursor = self.as_mutable()
+        cursor.exhaust_lexer()
+        return cursor.as_immutable()
+
+    def as_mutable(self):
+        """Convert to an ``InteractiveParser``."""
+        p = copy(self)
+        return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
+
+###}
@@ -0,0 +1,122 @@
+"""This module implements a LALR(1) Parser
+"""
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+from typing import Dict, Any, Optional
+from ..lexer import Token, LexerThread
+from ..utils import Serialize
+from ..common import ParserConf, ParserCallbacks
+
+from .lalr_analysis import LALR_Analyzer, IntParseTable, ParseTableBase
+from .lalr_interactive_parser import InteractiveParser
+from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
+from .lalr_parser_state import ParserState, ParseConf
+
+###{standalone
+
+class LALR_Parser(Serialize):
+    def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
+        analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
+        analysis.compute_lalr()
+        callbacks = parser_conf.callbacks
+
+        self._parse_table = analysis.parse_table
+        self.parser_conf = parser_conf
+        self.parser = _Parser(analysis.parse_table, callbacks, debug)
+
+    @classmethod
+    def deserialize(cls, data, memo, callbacks, debug=False):
+        inst = cls.__new__(cls)
+        inst._parse_table = IntParseTable.deserialize(data, memo)
+        inst.parser = _Parser(inst._parse_table, callbacks, debug)
+        return inst
+
+    def serialize(self, memo: Any = None) -> Dict[str, Any]:
+        return self._parse_table.serialize(memo)
+
+    def parse_interactive(self, lexer: LexerThread, start: str):
+        return self.parser.parse(lexer, start, start_interactive=True)
+
+    def parse(self, lexer, start, on_error=None):
+        try:
+            return self.parser.parse(lexer, start)
+        except UnexpectedInput as e:
+            if on_error is None:
+                raise
+
+            while True:
+                if isinstance(e, UnexpectedCharacters):
+                    s = e.interactive_parser.lexer_thread.state
+                    p = s.line_ctr.char_pos
+
+                if not on_error(e):
+                    raise e
+
+                if isinstance(e, UnexpectedCharacters):
+                    # If user didn't change the character position, then we should
+                    if p == s.line_ctr.char_pos:
+                        s.line_ctr.feed(s.text.text[p:p+1])
+
+                try:
+                    return e.interactive_parser.resume_parse()
+                except UnexpectedToken as e2:
+                    if (isinstance(e, UnexpectedToken)
+                        and e.token.type == e2.token.type == '$END'
+                        and e.interactive_parser == e2.interactive_parser):
+                        # Prevent infinite loop
+                        raise e2
+                    e = e2
+                except UnexpectedCharacters as e2:
+                    e = e2
+
+
+class _Parser:
+    parse_table: ParseTableBase
+    callbacks: ParserCallbacks
+    debug: bool
+
+    def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False):
+        self.parse_table = parse_table
+        self.callbacks = callbacks
+        self.debug = debug
+
+    def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False):
+        parse_conf = ParseConf(self.parse_table, self.callbacks, start)
+        parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
+        if start_interactive:
+            return InteractiveParser(self, parser_state, parser_state.lexer)
+        return self.parse_from_state(parser_state)
+
+
+    def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None):
+        """Run the main LALR parser loop
+
+        Parameters:
+            state - the initial state. Changed in-place.
+            last_token - Used only for line information in case of an empty lexer.
+        """
+        try:
+            token = last_token
+            for token in state.lexer.lex(state):
+                assert token is not None
+                state.feed_token(token)
+
+            end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
+            return state.feed_token(end_token, True)
+        except UnexpectedInput as e:
+            try:
+                e.interactive_parser = InteractiveParser(self, state, state.lexer)
+            except NameError:
+                pass
+            raise e
+        except Exception as e:
+            if self.debug:
+                print("")
+                print("STATE STACK DUMP")
+                print("----------------")
+                for i, s in enumerate(state.state_stack):
+                    print('%d)' % i , s)
+                print("")
+
+            raise
+###}
@@ -0,0 +1,110 @@
+from copy import deepcopy, copy
+from typing import Dict, Any, Generic, List
+from ..lexer import Token, LexerThread
+from ..common import ParserCallbacks
+
+from .lalr_analysis import Shift, ParseTableBase, StateT
+from lark.exceptions import UnexpectedToken
+
+###{standalone
+
+class ParseConf(Generic[StateT]):
+    __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
+
+    parse_table: ParseTableBase[StateT]
+    callbacks: ParserCallbacks
+    start: str
+
+    start_state: StateT
+    end_state: StateT
+    states: Dict[StateT, Dict[str, tuple]]
+
+    def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str):
+        self.parse_table = parse_table
+
+        self.start_state = self.parse_table.start_states[start]
+        self.end_state = self.parse_table.end_states[start]
+        self.states = self.parse_table.states
+
+        self.callbacks = callbacks
+        self.start = start
+
+class ParserState(Generic[StateT]):
+    __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
+
+    parse_conf: ParseConf[StateT]
+    lexer: LexerThread
+    state_stack: List[StateT]
+    value_stack: list
+
+    def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None):
+        self.parse_conf = parse_conf
+        self.lexer = lexer
+        self.state_stack = state_stack or [self.parse_conf.start_state]
+        self.value_stack = value_stack or []
+
+    @property
+    def position(self) -> StateT:
+        return self.state_stack[-1]
+
+    # Necessary for match_examples() to work
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, ParserState):
+            return NotImplemented
+        return len(self.state_stack) == len(other.state_stack) and self.position == other.position
+
+    def __copy__(self):
+        return self.copy()
+
+    def copy(self, deepcopy_values=True) -> 'ParserState[StateT]':
+        return type(self)(
+            self.parse_conf,
+            self.lexer, # XXX copy
+            copy(self.state_stack),
+            deepcopy(self.value_stack) if deepcopy_values else copy(self.value_stack),
+        )
+
+    def feed_token(self, token: Token, is_end=False) -> Any:
+        state_stack = self.state_stack
+        value_stack = self.value_stack
+        states = self.parse_conf.states
+        end_state = self.parse_conf.end_state
+        callbacks = self.parse_conf.callbacks
+
+        while True:
+            state = state_stack[-1]
+            try:
+                action, arg = states[state][token.type]
+            except KeyError:
+                expected = {s for s in states[state].keys() if s.isupper()}
+                raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
+
+            assert arg != end_state
+
+            if action is Shift:
+                # shift once and return
+                assert not is_end
+                state_stack.append(arg)
+                value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
+                return
+            else:
+                # reduce+shift as many times as necessary
+                rule = arg
+                size = len(rule.expansion)
+                if size:
+                    s = value_stack[-size:]
+                    del state_stack[-size:]
+                    del value_stack[-size:]
+                else:
+                    s = []
+
+                value = callbacks[rule](s) if callbacks else s
+
+                _action, new_state = states[state_stack[-1]][rule.origin.name]
+                assert _action is Shift
+                state_stack.append(new_state)
+                value_stack.append(value)
+
+                if is_end and state_stack[-1] == end_state:
+                    return value_stack[-1]
+###}
@@ -0,0 +1,166 @@
+"""This module implements an Earley parser with a dynamic lexer
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+    https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is better documented here:
+    http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+
+Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
+uses regular expressions by necessity, achieving high-performance while maintaining all of
+Earley's power in parsing any CFG.
+"""
+
+from typing import TYPE_CHECKING, Callable, Optional, List, Any
+from collections import defaultdict
+
+from ..tree import Tree
+from ..exceptions import UnexpectedCharacters
+from ..lexer import Token
+from ..grammar import Terminal
+from .earley import Parser as BaseParser
+from .earley_forest import TokenNode
+
+if TYPE_CHECKING:
+    from ..common import LexerConf, ParserConf
+
+class Parser(BaseParser):
+    def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
+                 resolve_ambiguity: bool=True, complete_lex: bool=False, debug: bool=False,
+                 tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
+        BaseParser.__init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity,
+                            debug, tree_class, ordered_sets)
+        self.ignore = [Terminal(t) for t in lexer_conf.ignore]
+        self.complete_lex = complete_lex
+
+    def _parse(self, stream, columns, to_scan, start_symbol=None):
+
+        def scan(i, to_scan):
+            """The core Earley Scanner.
+
+            This is a custom implementation of the scanner that uses the
+            Lark lexer to match tokens. The scan list is built by the
+            Earley predictor, based on the previously completed tokens.
+            This ensures that at each phase of the parse we have a custom
+            lexer context, allowing for more complex ambiguities."""
+
+            node_cache = {}
+
+            # 1) Loop the expectations and ask the lexer to match.
+            # Since regexp is forward looking on the input stream, and we only
+            # want to process tokens when we hit the point in the stream at which
+            # they complete, we push all tokens into a buffer (delayed_matches), to
+            # be held possibly for a later parse step when we reach the point in the
+            # input stream at which they complete.
+            for item in self.Set(to_scan):
+                m = match(item.expect, stream, i)
+                if m:
+                    t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+                    delayed_matches[m.end()].append( (item, i, t) )
+
+                    if self.complete_lex:
+                        s = m.group(0)
+                        for j in range(1, len(s)):
+                            m = match(item.expect, s[:-j])
+                            if m:
+                                t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+                                delayed_matches[i+m.end()].append( (item, i, t) )
+
+                    # XXX The following 3 lines were commented out for causing a bug. See issue #768
+                    # # Remove any items that successfully matched in this pass from the to_scan buffer.
+                    # # This ensures we don't carry over tokens that already matched, if we're ignoring below.
+                    # to_scan.remove(item)
+
+            # 3) Process any ignores. This is typically used for e.g. whitespace.
+            # We carry over any unmatched items from the to_scan buffer to be matched again after
+            # the ignore. This should allow us to use ignored symbols in non-terminals to implement
+            # e.g. mandatory spacing.
+            for x in self.ignore:
+                m = match(x, stream, i)
+                if m:
+                    # Carry over any items still in the scan buffer, to past the end of the ignored items.
+                    delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])
+
+                    # If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
+                    delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])
+
+            next_to_scan = self.Set()
+            next_set = self.Set()
+            columns.append(next_set)
+            transitives.append({})
+
+            ## 4) Process Tokens from delayed_matches.
+            # This is the core of the Earley scanner. Create an SPPF node for each Token,
+            # and create the symbol node in the SPPF tree. Advance the item that completed,
+            # and add the resulting new item to either the Earley set (for processing by the
+            # completer/predictor) or the to_scan buffer for the next parse step.
+            for item, start, token in delayed_matches[i+1]:
+                if token is not None:
+                    token.end_line = text_line
+                    token.end_column = text_column + 1
+                    token.end_pos = i + 1
+
+                    new_item = item.advance()
+                    label = (new_item.s, new_item.start, i + 1)
+                    token_node = TokenNode(token, terminals[token.type])
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
+                else:
+                    new_item = item
+
+                if new_item.expect in self.TERMINALS:
+                    # add (B ::= Aai+1.B, h, y) to Q'
+                    next_to_scan.add(new_item)
+                else:
+                    # add (B ::= Aa+1.B, h, y) to Ei+1
+                    next_set.add(new_item)
+
+            del delayed_matches[i+1]    # No longer needed, so unburden memory
+
+            if not next_set and not delayed_matches and not next_to_scan:
+                considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name))
+                raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
+                                           set(to_scan), state=frozenset(i.s for i in to_scan),
+                                           considered_rules=considered_rules
+                                           )
+
+            return next_to_scan, node_cache
+
+
+        delayed_matches = defaultdict(list)
+        match = self.term_matcher
+        terminals = self.lexer_conf.terminals_by_name
+
+        # Cache for nodes & tokens created in a particular parse step.
+        transitives = [{}]
+
+        text_line = 1
+        text_column = 1
+
+        ## The main Earley loop.
+        # Run the Prediction/Completion cycle for any Items in the current Earley set.
+        # Completions will be added to the SPPF tree, and predictions will be recursively
+        # processed down to terminals/empty nodes to be added to the scanner for the next
+        # step.
+        i = 0
+        node_cache = {}
+        for token in stream:
+            self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+            to_scan, node_cache = scan(i, to_scan)
+
+            if token == '\n':
+                text_line += 1
+                text_column = 1
+            else:
+                text_column += 1
+            i += 1
+
+        self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+        ## Column is now the final column in the parse.
+        assert i == len(columns)-1
+        return to_scan