First Commit

2026-05-31 10:17:09 +07:00
commit 17a9c69379
4547 changed files with 1170384 additions and 0 deletions
@@ -0,0 +1,39 @@
+from .exceptions import (
+    GrammarError,
+    LarkError,
+    LexError,
+    ParseError,
+    UnexpectedCharacters,
+    UnexpectedEOF,
+    UnexpectedInput,
+    UnexpectedToken,
+)
+from .lark import Lark
+from .lexer import Token
+from .tree import ParseTree, Tree
+from .utils import logger, TextSlice
+from .visitors import Discard, Transformer, Transformer_NonRecursive, Visitor, v_args
+
+__version__: str = "1.3.1"
+
+__all__ = (
+    "GrammarError",
+    "LarkError",
+    "LexError",
+    "ParseError",
+    "UnexpectedCharacters",
+    "UnexpectedEOF",
+    "UnexpectedInput",
+    "UnexpectedToken",
+    "Lark",
+    "Token",
+    "ParseTree",
+    "Tree",
+    "logger",
+    "Discard",
+    "Transformer",
+    "Transformer_NonRecursive",
+    "TextSlice",
+    "Visitor",
+    "v_args",
+)
@@ -0,0 +1,6 @@
+# For usage of lark with PyInstaller. See https://pyinstaller-sample-hook.readthedocs.io/en/latest/index.html
+
+import os
+
+def get_hook_dirs():
+    return [os.path.dirname(__file__)]
@@ -0,0 +1,14 @@
+#-----------------------------------------------------------------------------
+# Copyright (c) 2017-2020, PyInstaller Development Team.
+#
+# Distributed under the terms of the GNU General Public License (version 2
+# or later) with exception for distributing the bootloader.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+#
+# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
+#-----------------------------------------------------------------------------
+
+from PyInstaller.utils.hooks import collect_data_files
+
+datas = collect_data_files('lark')
@@ -0,0 +1,59 @@
+"""
+    Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree (AST defined in classes)
+"""
+
+import inspect, re
+import types
+from typing import Optional, Callable
+
+from lark import Transformer, v_args
+
+class Ast:
+    """Abstract class
+
+    Subclasses will be collected by `create_transformer()`
+    """
+    pass
+
+class AsList:
+    """Abstract class
+
+    Subclasses will be instantiated with the parse results as a single list, instead of as arguments.
+    """
+
+class WithMeta:
+    """Abstract class
+
+    Subclasses will be instantiated with the Meta instance of the tree. (see ``v_args`` for more detail)
+    """
+    pass
+
+def camel_to_snake(name):
+    return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
+
+def create_transformer(ast_module: types.ModuleType,
+                       transformer: Optional[Transformer]=None,
+                       decorator_factory: Callable=v_args) -> Transformer:
+    """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST.
+
+    For each class, we create a corresponding rule in the transformer, with a matching name.
+    CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block".
+
+    Classes starting with an underscore (`_`) will be skipped.
+
+    Parameters:
+        ast_module: A Python module containing all the subclasses of ``ast_utils.Ast``
+        transformer (Optional[Transformer]): An initial transformer. Its attributes may be overwritten.
+        decorator_factory (Callable): An optional callable accepting two booleans, inline, and meta,
+            and returning a decorator for the methods of ``transformer``. (default: ``v_args``).
+    """
+    t = transformer or Transformer()
+
+    for name, obj in inspect.getmembers(ast_module):
+        if not name.startswith('_') and inspect.isclass(obj):
+            if issubclass(obj, Ast):
+                wrapper = decorator_factory(inline=not issubclass(obj, AsList), meta=issubclass(obj, WithMeta))
+                obj = wrapper(obj).__get__(t)
+                setattr(t, camel_to_snake(name), obj)
+
+    return t
@@ -0,0 +1,86 @@
+from copy import deepcopy
+import sys
+from types import ModuleType
+from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING, List
+
+if TYPE_CHECKING:
+    from .lark import PostLex
+    from .lexer import Lexer
+    from .grammar import Rule
+    from typing import Union, Type
+    from typing import Literal
+    if sys.version_info >= (3, 10):
+        from typing import TypeAlias
+    else:
+        from typing_extensions import TypeAlias
+
+from .utils import Serialize
+from .lexer import TerminalDef, Token
+
+###{standalone
+
+_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]'
+_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
+_LexerCallback = Callable[[Token], Token]
+ParserCallbacks = Dict[str, Callable]
+
+class LexerConf(Serialize):
+    __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
+    __serialize_namespace__ = TerminalDef,
+
+    terminals: Collection[TerminalDef]
+    re_module: ModuleType
+    ignore: Collection[str]
+    postlex: 'Optional[PostLex]'
+    callbacks: Dict[str, _LexerCallback]
+    g_regex_flags: int
+    skip_validation: bool
+    use_bytes: bool
+    lexer_type: Optional[_LexerArgType]
+    strict: bool
+
+    def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None,
+                 callbacks: Optional[Dict[str, _LexerCallback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False):
+        self.terminals = terminals
+        self.terminals_by_name = {t.name: t for t in self.terminals}
+        assert len(self.terminals) == len(self.terminals_by_name)
+        self.ignore = ignore
+        self.postlex = postlex
+        self.callbacks = callbacks or {}
+        self.g_regex_flags = g_regex_flags
+        self.re_module = re_module
+        self.skip_validation = skip_validation
+        self.use_bytes = use_bytes
+        self.strict = strict
+        self.lexer_type = None
+
+    def _deserialize(self):
+        self.terminals_by_name = {t.name: t for t in self.terminals}
+
+    def __deepcopy__(self, memo=None):
+        return type(self)(
+            deepcopy(self.terminals, memo),
+            self.re_module,
+            deepcopy(self.ignore, memo),
+            deepcopy(self.postlex, memo),
+            deepcopy(self.callbacks, memo),
+            deepcopy(self.g_regex_flags, memo),
+            deepcopy(self.skip_validation, memo),
+            deepcopy(self.use_bytes, memo),
+        )
+
+class ParserConf(Serialize):
+    __serialize_fields__ = 'rules', 'start', 'parser_type'
+
+    rules: List['Rule']
+    callbacks: ParserCallbacks
+    start: List[str]
+    parser_type: _ParserArgType
+
+    def __init__(self, rules: List['Rule'], callbacks: ParserCallbacks, start: List[str]):
+        assert isinstance(start, list)
+        self.rules = rules
+        self.callbacks = callbacks
+        self.start = start
+
+###}
@@ -0,0 +1,291 @@
+from .utils import logger, NO_VALUE
+from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .lexer import Token
+    from .parsers.lalr_interactive_parser import InteractiveParser
+    from .tree import Tree
+
+###{standalone
+
+class LarkError(Exception):
+    pass
+
+
+class ConfigurationError(LarkError, ValueError):
+    pass
+
+
+def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
+    if value not in options:
+        raise ConfigurationError(msg % (value, options))
+
+
+class GrammarError(LarkError):
+    pass
+
+
+class ParseError(LarkError):
+    pass
+
+
+class LexError(LarkError):
+    pass
+
+T = TypeVar('T')
+
+class UnexpectedInput(LarkError):
+    """UnexpectedInput Error.
+
+    Used as a base class for the following exceptions:
+
+    - ``UnexpectedCharacters``: The lexer encountered an unexpected string
+    - ``UnexpectedToken``: The parser received an unexpected token
+    - ``UnexpectedEOF``: The parser expected a token, but the input ended
+
+    After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
+    """
+    line: int
+    column: int
+    pos_in_stream = None
+    state: Any
+    _terminals_by_name = None
+    interactive_parser: 'InteractiveParser'
+
+    def get_context(self, text: str, span: int=40) -> str:
+        """Returns a pretty string pinpointing the error in the text,
+        with span amount of context characters around it.
+
+        Note:
+            The parser doesn't hold a copy of the text it has to parse,
+            so you have to provide it again
+        """
+        pos = self.pos_in_stream or 0
+        start = max(pos - span, 0)
+        end = pos + span
+        if not isinstance(text, bytes):
+            before = text[start:pos].rsplit('\n', 1)[-1]
+            after = text[pos:end].split('\n', 1)[0]
+            return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
+        else:
+            before = text[start:pos].rsplit(b'\n', 1)[-1]
+            after = text[pos:end].split(b'\n', 1)[0]
+            return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
+
+    def match_examples(self, parse_fn: 'Callable[[str], Tree]',
+                             examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
+                             token_type_match_fallback: bool=False,
+                             use_accepts: bool=True
+                         ) -> Optional[T]:
+        """Allows you to detect what's wrong in the input text by matching
+        against example errors.
+
+        Given a parser instance and a dictionary mapping some label with
+        some malformed syntax examples, it'll return the label for the
+        example that bests matches the current error. The function will
+        iterate the dictionary until it finds a matching error, and
+        return the corresponding value.
+
+        For an example usage, see `examples/error_reporting_lalr.py`
+
+        Parameters:
+            parse_fn: parse function (usually ``lark_instance.parse``)
+            examples: dictionary of ``{'example_string': value}``.
+            use_accepts: Recommended to keep this as ``use_accepts=True``.
+        """
+        assert self.state is not None, "Not supported for this exception"
+
+        if isinstance(examples, Mapping):
+            examples = examples.items()
+
+        candidate = (None, False)
+        for i, (label, example) in enumerate(examples):
+            assert not isinstance(example, str), "Expecting a list"
+
+            for j, malformed in enumerate(example):
+                try:
+                    parse_fn(malformed)
+                except UnexpectedInput as ut:
+                    if ut.state == self.state:
+                        if (
+                            use_accepts
+                            and isinstance(self, UnexpectedToken)
+                            and isinstance(ut, UnexpectedToken)
+                            and ut.accepts != self.accepts
+                        ):
+                            logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
+                                         (self.state, self.accepts, ut.accepts, i, j))
+                            continue
+                        if (
+                            isinstance(self, (UnexpectedToken, UnexpectedEOF))
+                            and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
+                        ):
+                            if ut.token == self.token:  # Try exact match first
+                                logger.debug("Exact Match at example [%s][%s]" % (i, j))
+                                return label
+
+                            if token_type_match_fallback:
+                                # Fallback to token types match
+                                if (ut.token.type == self.token.type) and not candidate[-1]:
+                                    logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
+                                    candidate = label, True
+
+                        if candidate[0] is None:
+                            logger.debug("Same State match at example [%s][%s]" % (i, j))
+                            candidate = label, False
+
+        return candidate[0]
+
+    def _format_expected(self, expected):
+        if self._terminals_by_name:
+            d = self._terminals_by_name
+            expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
+        return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
+
+
+class UnexpectedEOF(ParseError, UnexpectedInput):
+    """An exception that is raised by the parser, when the input ends while it still expects a token.
+    """
+    expected: 'List[Token]'
+
+    def __init__(self, expected, state=None, terminals_by_name=None):
+        super(UnexpectedEOF, self).__init__()
+
+        self.expected = expected
+        self.state = state
+        from .lexer import Token
+        self.token = Token("<EOF>", "")  # , line=-1, column=-1, pos_in_stream=-1)
+        self.pos_in_stream = -1
+        self.line = -1
+        self.column = -1
+        self._terminals_by_name = terminals_by_name
+
+
+    def __str__(self):
+        message = "Unexpected end-of-input. "
+        message += self._format_expected(self.expected)
+        return message
+
+
+class UnexpectedCharacters(LexError, UnexpectedInput):
+    """An exception that is raised by the lexer, when it cannot match the next
+    string of characters to any of its terminals.
+    """
+
+    allowed: Set[str]
+    considered_tokens: Set[Any]
+
+    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
+                 terminals_by_name=None, considered_rules=None):
+        super(UnexpectedCharacters, self).__init__()
+
+        # TODO considered_tokens and allowed can be figured out using state
+        self.line = line
+        self.column = column
+        self.pos_in_stream = lex_pos
+        self.state = state
+        self._terminals_by_name = terminals_by_name
+
+        self.allowed = allowed
+        self.considered_tokens = considered_tokens
+        self.considered_rules = considered_rules
+        self.token_history = token_history
+
+        if isinstance(seq, bytes):
+            self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
+        else:
+            self.char = seq[lex_pos]
+        self._context = self.get_context(seq)
+
+
+    def __str__(self):
+        message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
+        message += '\n\n' + self._context
+        if self.allowed:
+            message += self._format_expected(self.allowed)
+        if self.token_history:
+            message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
+        return message
+
+
+class UnexpectedToken(ParseError, UnexpectedInput):
+    """An exception that is raised by the parser, when the token it received
+    doesn't match any valid step forward.
+
+    Parameters:
+        token: The mismatched token
+        expected: The set of expected tokens
+        considered_rules: Which rules were considered, to deduce the expected tokens
+        state: A value representing the parser state. Do not rely on its value or type.
+        interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failure,
+                            and can be used for debugging and error handling.
+
+    Note: These parameters are available as attributes of the instance.
+    """
+
+    expected: Set[str]
+    considered_rules: Set[str]
+
+    def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
+        super(UnexpectedToken, self).__init__()
+
+        # TODO considered_rules and expected can be figured out using state
+        self.line = getattr(token, 'line', '?')
+        self.column = getattr(token, 'column', '?')
+        self.pos_in_stream = getattr(token, 'start_pos', None)
+        self.state = state
+
+        self.token = token
+        self.expected = expected  # XXX deprecate? `accepts` is better
+        self._accepts = NO_VALUE
+        self.considered_rules = considered_rules
+        self.interactive_parser = interactive_parser
+        self._terminals_by_name = terminals_by_name
+        self.token_history = token_history
+
+
+    @property
+    def accepts(self) -> Set[str]:
+        if self._accepts is NO_VALUE:
+            self._accepts = self.interactive_parser and self.interactive_parser.accepts()
+        return self._accepts
+
+    def __str__(self):
+        message = ("Unexpected token %r at line %s, column %s.\n%s"
+                   % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
+        if self.token_history:
+            message += "Previous tokens: %r\n" % self.token_history
+
+        return message
+
+
+
+class VisitError(LarkError):
+    """VisitError is raised when visitors are interrupted by an exception
+
+    It provides the following attributes for inspection:
+
+    Parameters:
+        rule: the name of the visit rule that failed
+        obj: the tree-node or token that was being processed
+        orig_exc: the exception that cause it to fail
+
+    Note: These parameters are available as attributes
+    """
+
+    obj: 'Union[Tree, Token]'
+    orig_exc: Exception
+
+    def __init__(self, rule, obj, orig_exc):
+        message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
+        super(VisitError, self).__init__(message)
+
+        self.rule = rule
+        self.obj = obj
+        self.orig_exc = orig_exc
+
+
+class MissingVariableError(LarkError):
+    pass
+
+###}
@@ -0,0 +1,136 @@
+from typing import Any, Dict, Optional, Tuple, ClassVar, Sequence
+
+from .utils import Serialize
+
+###{standalone
+TOKEN_DEFAULT_PRIORITY = 0
+
+
+class Symbol(Serialize):
+    __slots__ = ('name',)
+
+    name: str
+    is_term: ClassVar[bool] = NotImplemented
+
+    def __init__(self, name: str) -> None:
+        self.name = name
+
+    def __eq__(self, other):
+        if not isinstance(other, Symbol):
+            return NotImplemented
+        return self.is_term == other.is_term and self.name == other.name
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __repr__(self):
+        return '%s(%r)' % (type(self).__name__, self.name)
+
+    fullrepr = property(__repr__)
+
+    def renamed(self, f):
+        return type(self)(f(self.name))
+
+
+class Terminal(Symbol):
+    __serialize_fields__ = 'name', 'filter_out'
+
+    is_term: ClassVar[bool] = True
+
+    def __init__(self, name: str, filter_out: bool = False) -> None:
+        self.name = name
+        self.filter_out = filter_out
+
+    @property
+    def fullrepr(self):
+        return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
+
+    def renamed(self, f):
+        return type(self)(f(self.name), self.filter_out)
+
+
+class NonTerminal(Symbol):
+    __serialize_fields__ = 'name',
+
+    is_term: ClassVar[bool] = False
+
+    def serialize(self, memo=None) -> Dict[str, Any]:
+        # TODO this is here because self.name can be a Token instance.
+        #      remove this function when the issue is fixed. (backwards-incompatible)
+        return {'name': str(self.name), '__type__': 'NonTerminal'}
+
+
+class RuleOptions(Serialize):
+    __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
+
+    keep_all_tokens: bool
+    expand1: bool
+    priority: Optional[int]
+    template_source: Optional[str]
+    empty_indices: Tuple[bool, ...]
+
+    def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
+        self.keep_all_tokens = keep_all_tokens
+        self.expand1 = expand1
+        self.priority = priority
+        self.template_source = template_source
+        self.empty_indices = empty_indices
+
+    def __repr__(self):
+        return 'RuleOptions(%r, %r, %r, %r)' % (
+            self.keep_all_tokens,
+            self.expand1,
+            self.priority,
+            self.template_source
+        )
+
+
+class Rule(Serialize):
+    """
+        origin : a symbol
+        expansion : a list of symbols
+        order : index of this expansion amongst all rules of the same name
+    """
+    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
+
+    __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
+    __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
+
+    origin: NonTerminal
+    expansion: Sequence[Symbol]
+    order: int
+    alias: Optional[str]
+    options: RuleOptions
+    _hash: int
+
+    def __init__(self, origin: NonTerminal, expansion: Sequence[Symbol],
+                 order: int=0, alias: Optional[str]=None, options: Optional[RuleOptions]=None):
+        self.origin = origin
+        self.expansion = expansion
+        self.alias = alias
+        self.order = order
+        self.options = options or RuleOptions()
+        self._hash = hash((self.origin, tuple(self.expansion)))
+
+    def _deserialize(self):
+        self._hash = hash((self.origin, tuple(self.expansion)))
+
+    def __str__(self):
+        return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
+
+    def __repr__(self):
+        return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
+
+    def __hash__(self):
+        return self._hash
+
+    def __eq__(self, other):
+        if not isinstance(other, Rule):
+            return False
+        return self.origin == other.origin and self.expansion == other.expansion
+
+
+###}
@@ -0,0 +1,59 @@
+// Basic terminals for common use
+
+
+//
+// Numbers
+//
+
+DIGIT: "0".."9"
+HEXDIGIT: "a".."f"|"A".."F"|DIGIT
+
+INT: DIGIT+
+SIGNED_INT: ["+"|"-"] INT
+DECIMAL: INT "." INT? | "." INT
+
+// float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/
+_EXP: ("e"|"E") SIGNED_INT
+FLOAT: INT _EXP | DECIMAL _EXP?
+SIGNED_FLOAT: ["+"|"-"] FLOAT
+
+NUMBER: FLOAT | INT
+SIGNED_NUMBER: ["+"|"-"] NUMBER
+
+//
+// Strings
+//
+_STRING_INNER: /.*?/
+_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/
+
+ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
+
+
+//
+// Names (Variables)
+//
+LCASE_LETTER: "a".."z"
+UCASE_LETTER: "A".."Z"
+
+LETTER: UCASE_LETTER | LCASE_LETTER
+WORD: LETTER+
+
+CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
+
+
+//
+// Whitespace
+//
+WS_INLINE: (" "|/\t/)+
+WS: /[ \t\f\r\n]/+
+
+CR : /\r/
+LF : /\n/
+NEWLINE: (CR? LF)+
+
+
+// Comments
+SH_COMMENT: /#[^\n]*/
+CPP_COMMENT: /\/\/[^\n]*/
+C_COMMENT: "/*" /(.|\n)*?/ "*/"
+SQL_COMMENT: /--[^\n]*/
@@ -0,0 +1,62 @@
+# Lark grammar of Lark's syntax
+# Note: Lark is not bootstrapped, its parser is implemented in load_grammar.py
+
+start: (_item? _NL)* _item?
+
+_item: rule
+     | token
+     | statement
+
+rule: RULE rule_params priority? ":" expansions
+token: TOKEN token_params priority? ":" expansions
+
+rule_params: ["{" RULE ("," RULE)* "}"]
+token_params: ["{" TOKEN ("," TOKEN)* "}"]
+
+priority: "." NUMBER
+
+statement: "%ignore" expansions                    -> ignore
+         | "%import" import_path ["->" name]       -> import
+         | "%import" import_path name_list         -> multi_import
+         | "%override" rule                        -> override_rule
+         | "%declare" name+                        -> declare
+
+!import_path: "."? name ("." name)*
+name_list: "(" name ("," name)* ")"
+
+?expansions: alias (_VBAR alias)*
+
+?alias: expansion ["->" RULE]
+
+?expansion: expr*
+
+?expr: atom [OP | "~" NUMBER [".." NUMBER]]
+
+?atom: "(" expansions ")"
+     | "[" expansions "]" -> maybe
+     | value
+
+?value: STRING ".." STRING -> literal_range
+      | name
+      | (REGEXP | STRING) -> literal
+      | name "{" value ("," value)* "}" -> template_usage
+
+name: RULE
+    | TOKEN
+
+_VBAR: _NL? "|"
+OP: /[+*]|[?](?![a-z])/
+RULE: /!?[_?]?[a-z][_a-z0-9]*/
+TOKEN: /_?[A-Z][_A-Z0-9]*/
+STRING: _STRING "i"?
+REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/
+_NL: /(\r?\n)+\s*/
+
+%import common.ESCAPED_STRING -> _STRING
+%import common.SIGNED_INT -> NUMBER
+%import common.WS_INLINE
+
+COMMENT: /\s*/ "//" /[^\n]/* | /\s*/ "#" /[^\n]/*
+
+%ignore WS_INLINE
+%ignore COMMENT
@@ -0,0 +1,302 @@
+// Python 3 grammar for Lark
+
+// This grammar should parse all python 3.x code successfully.
+
+// Adapted from: https://docs.python.org/3/reference/grammar.html
+
+// Start symbols for the grammar:
+//       single_input is a single interactive statement;
+//       file_input is a module or sequence of commands read from an input file;
+//       eval_input is the input for the eval() functions.
+// NB: compound_stmt in single_input is followed by extra NEWLINE!
+//
+
+single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
+file_input: (_NEWLINE | stmt)*
+eval_input: testlist _NEWLINE*
+
+decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef | async_funcdef)
+
+async_funcdef: "async" funcdef
+funcdef: "def" name "(" [parameters] ")" ["->" test] ":" suite
+
+parameters: paramvalue ("," paramvalue)* ["," SLASH ("," paramvalue)*] ["," [starparams | kwparams]]
+          | starparams
+          | kwparams
+
+SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result
+starparams: (starparam | starguard) poststarparams
+starparam: "*" typedparam
+starguard: "*"
+poststarparams: ("," paramvalue)* ["," kwparams]
+kwparams: "**" typedparam ","?
+
+?paramvalue: typedparam ("=" test)?
+?typedparam: name (":" test)?
+
+
+lambdef: "lambda" [lambda_params] ":" test
+lambdef_nocond: "lambda" [lambda_params] ":" test_nocond
+lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]]
+          | lambda_starparams
+          | lambda_kwparams
+?lambda_paramvalue: name ("=" test)?
+lambda_starparams: "*" [name]  ("," lambda_paramvalue)* ["," [lambda_kwparams]]
+lambda_kwparams: "**" name ","?
+
+
+?stmt: simple_stmt | compound_stmt
+?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
+?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+expr_stmt: testlist_star_expr
+assign_stmt: annassign | augassign | assign
+
+annassign: testlist_star_expr ":" test ["=" test]
+assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+
+augassign: testlist_star_expr augassign_op (yield_expr|testlist)
+!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//="
+?testlist_star_expr: test_or_star_expr
+                   | test_or_star_expr ("," test_or_star_expr)+ ","?  -> tuple
+                   | test_or_star_expr ","  -> tuple
+
+// For normal and annotated assignments, additional restrictions enforced by the interpreter
+del_stmt: "del" exprlist
+pass_stmt: "pass"
+?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: "break"
+continue_stmt: "continue"
+return_stmt: "return" [testlist]
+yield_stmt: yield_expr
+raise_stmt: "raise" [test ["from" test]]
+import_stmt: import_name | import_from
+import_name: "import" dotted_as_names
+// note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS
+import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names)
+!dots: "."+
+import_as_name: name ["as" name]
+dotted_as_name: dotted_name ["as" name]
+import_as_names: import_as_name ("," import_as_name)* [","]
+dotted_as_names: dotted_as_name ("," dotted_as_name)*
+dotted_name: name ("." name)*
+global_stmt: "global" name ("," name)*
+nonlocal_stmt: "nonlocal" name ("," name)*
+assert_stmt: "assert" test ["," test]
+
+?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | match_stmt
+              | with_stmt | funcdef | classdef | decorated | async_stmt
+async_stmt: "async" (funcdef | with_stmt | for_stmt)
+if_stmt: "if" test ":" suite elifs ["else" ":" suite]
+elifs: elif_*
+elif_: "elif" test ":" suite
+while_stmt: "while" test ":" suite ["else" ":" suite]
+for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
+try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally]
+        | "try" ":" suite finally   -> try_finally
+finally: "finally" ":" suite
+except_clauses: except_clause+
+except_clause: "except" [test ["as" name]] ":" suite
+// NB compile.c makes sure that the default except clause is last
+
+
+with_stmt: "with" with_items ":" suite
+with_items: with_item ("," with_item)*
+with_item: test ["as" name]
+
+match_stmt: "match" test ":" _NEWLINE _INDENT case+ _DEDENT
+
+case: "case" pattern ["if" test] ":" suite
+
+?pattern: sequence_item_pattern "," _sequence_pattern -> sequence_pattern
+        | as_pattern
+?as_pattern: or_pattern ("as" NAME)?
+?or_pattern: closed_pattern ("|" closed_pattern)*
+?closed_pattern: literal_pattern
+               | NAME -> capture_pattern
+               | "_" -> any_pattern
+               | attr_pattern
+               | "(" as_pattern ")"
+               | "[" _sequence_pattern "]" -> sequence_pattern
+               | "(" (sequence_item_pattern "," _sequence_pattern)? ")" -> sequence_pattern
+               | "{" (mapping_item_pattern ("," mapping_item_pattern)* ","?)?"}" -> mapping_pattern
+               | "{" (mapping_item_pattern ("," mapping_item_pattern)* ",")? "**" NAME ","? "}" -> mapping_star_pattern
+               | class_pattern
+
+literal_pattern: inner_literal_pattern
+
+?inner_literal_pattern: "None" -> const_none
+                      | "True" -> const_true
+                      | "False" -> const_false
+                      | STRING -> string
+                      | number
+
+attr_pattern: NAME ("." NAME)+ -> value
+
+name_or_attr_pattern: NAME ("." NAME)* -> value
+
+mapping_item_pattern: (literal_pattern|attr_pattern) ":" as_pattern
+
+_sequence_pattern: (sequence_item_pattern ("," sequence_item_pattern)* ","?)?
+?sequence_item_pattern: as_pattern
+                      | "*" NAME -> star_pattern
+
+class_pattern: name_or_attr_pattern "(" [arguments_pattern ","?] ")"
+arguments_pattern: pos_arg_pattern ["," keyws_arg_pattern]
+                 | keyws_arg_pattern -> no_pos_arguments
+
+pos_arg_pattern: as_pattern ("," as_pattern)*
+keyws_arg_pattern: keyw_arg_pattern ("," keyw_arg_pattern)*
+keyw_arg_pattern: NAME "=" as_pattern
+
+
+
+suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT
+
+?test: or_test ("if" or_test "else" test)?
+     | lambdef
+     | assign_expr
+
+assign_expr: name ":=" test
+
+?test_nocond: or_test | lambdef_nocond
+
+?or_test: and_test ("or" and_test)*
+?and_test: not_test_ ("and" not_test_)*
+?not_test_: "not" not_test_ -> not_test
+         | comparison
+?comparison: expr (comp_op expr)*
+star_expr: "*" expr
+
+?expr: or_expr
+?or_expr: xor_expr ("|" xor_expr)*
+?xor_expr: and_expr ("^" and_expr)*
+?and_expr: shift_expr ("&" shift_expr)*
+?shift_expr: arith_expr (_shift_op arith_expr)*
+?arith_expr: term (_add_op term)*
+?term: factor (_mul_op factor)*
+?factor: _unary_op factor | power
+
+!_unary_op: "+"|"-"|"~"
+!_add_op: "+"|"-"
+!_shift_op: "<<"|">>"
+!_mul_op: "*"|"@"|"/"|"%"|"//"
+// <> isn't actually a valid comparison operator in Python. It's here for the
+// sake of a __future__ import described in PEP 401 (which really works :-)
+!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
+
+?power: await_expr ("**" factor)?
+?await_expr: AWAIT? atom_expr
+AWAIT: "await"
+
+?atom_expr: atom_expr "(" [arguments] ")"      -> funccall
+          | atom_expr "[" subscriptlist "]"  -> getitem
+          | atom_expr "." name               -> getattr
+          | atom
+
+?atom: "(" yield_expr ")"
+     | "(" _tuple_inner? ")" -> tuple
+     | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension
+     | "[" _exprlist? "]"  -> list
+     | "[" comprehension{test_or_star_expr} "]"  -> list_comprehension
+     | "{" _dict_exprlist? "}" -> dict
+     | "{" comprehension{key_value} "}" -> dict_comprehension
+     | "{" _exprlist "}" -> set
+     | "{" comprehension{test} "}" -> set_comprehension
+     | name -> var
+     | number
+     | string_concat
+     | "(" test ")"
+     | "..." -> ellipsis
+     | "None"    -> const_none
+     | "True"    -> const_true
+     | "False"   -> const_false
+
+
+?string_concat: string+
+
+_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",")
+
+?test_or_star_expr: test
+                 | star_expr
+
+?subscriptlist: subscript
+              | subscript (("," subscript)+ [","] | ",") -> subscript_tuple
+?subscript: test | ([test] ":" [test] [sliceop]) -> slice
+sliceop: ":" [test]
+?exprlist: (expr|star_expr)
+         | (expr|star_expr) (("," (expr|star_expr))+ [","]|",")
+?testlist: test | testlist_tuple
+testlist_tuple: test (("," test)+ [","] | ",")
+_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","]
+
+key_value: test ":"  test
+
+_exprlist: test_or_star_expr (","  test_or_star_expr)* [","]
+
+classdef: "class" name ["(" [arguments] ")"] ":" suite
+
+
+
+arguments: argvalue ("," argvalue)*  ("," [ starargs | kwargs])?
+         | starargs
+         | kwargs
+         | comprehension{test}
+
+starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs]
+stararg: "*" test
+kwargs: "**" test ("," argvalue)*
+
+?argvalue: test ("=" test)?
+
+
+comprehension{comp_result}: comp_result comp_fors [comp_if]
+comp_fors: comp_for+
+comp_for: [ASYNC] "for" exprlist "in" or_test
+ASYNC: "async"
+?comp_if: "if" test_nocond
+
+// not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: name
+
+yield_expr: "yield" [testlist]
+          | "yield" "from" test -> yield_from
+
+number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
+string: STRING | LONG_STRING
+
+// Other terminals
+
+_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
+
+%ignore /[\t \f]+/  // WS
+%ignore /\\[\t \f]*\r?\n/   // LINE_CONT
+%ignore COMMENT
+%declare _INDENT _DEDENT
+
+
+// Python terminals
+
+!name: NAME | "match" | "case"
+NAME: /[^\W\d]\w*/
+COMMENT: /#[^\n]*/
+
+STRING: /([ubf]?r?|r[ubf])("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
+LONG_STRING: /([ubf]?r?|r[ubf])(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
+
+_SPECIAL_DEC: "0".."9"        ("_"?  "0".."9"                       )*
+DEC_NUMBER:   "1".."9"        ("_"?  "0".."9"                       )*
+          |   "0"             ("_"?  "0"                            )* /(?![1-9])/
+HEX_NUMBER.2: "0" ("x" | "X") ("_"? ("0".."9" | "a".."f" | "A".."F"))+
+OCT_NUMBER.2: "0" ("o" | "O") ("_"?  "0".."7"                       )+
+BIN_NUMBER.2: "0" ("b" | "B") ("_"?  "0".."1"                       )+
+
+_EXP: ("e"|"E") ["+" | "-"] _SPECIAL_DEC
+DECIMAL: "." _SPECIAL_DEC | _SPECIAL_DEC "." _SPECIAL_DEC?
+FLOAT_NUMBER.2: _SPECIAL_DEC _EXP | DECIMAL _EXP?
+IMAG_NUMBER.2: (_SPECIAL_DEC      | FLOAT_NUMBER) ("J" | "j")
+
+
+// Comma-separated list (with an optional trailing comma)
+cs_list{item}: item ("," item)* ","?
+_cs_list{item}: item ("," item)* ","?
@@ -0,0 +1,7 @@
+// TODO: LETTER, WORD, etc.
+
+//
+// Whitespace
+//
+WS_INLINE: /[ \t\xa0]/+
+WS: /[ \t\xa0\f\r\n]/+
@@ -0,0 +1,144 @@
+"Provides a post-lexer for implementing Python-style indentation."
+
+from abc import ABC, abstractmethod
+from typing import List, Iterator
+
+from .exceptions import LarkError
+from .lark import PostLex
+from .lexer import Token
+
+###{standalone
+
+class DedentError(LarkError):
+    pass
+
+class Indenter(PostLex, ABC):
+    """This is a postlexer that "injects" indent/dedent tokens based on indentation.
+
+    It keeps track of the current indentation, as well as the current level of parentheses.
+    Inside parentheses, the indentation is ignored, and no indent/dedent tokens get generated.
+
+    Note: This is an abstract class. To use it, inherit and implement all its abstract methods:
+        - tab_len
+        - NL_type
+        - OPEN_PAREN_types, CLOSE_PAREN_types
+        - INDENT_type, DEDENT_type
+
+    See also: the ``postlex`` option in `Lark`.
+    """
+    paren_level: int
+    indent_level: List[int]
+
+    def __init__(self) -> None:
+        self.paren_level = 0
+        self.indent_level = [0]
+        assert self.tab_len > 0
+
+    def handle_NL(self, token: Token) -> Iterator[Token]:
+        if self.paren_level > 0:
+            return
+
+        yield token
+
+        indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
+        indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
+
+        if indent > self.indent_level[-1]:
+            self.indent_level.append(indent)
+            yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
+        else:
+            while indent < self.indent_level[-1]:
+                self.indent_level.pop()
+                yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
+
+            if indent != self.indent_level[-1]:
+                raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
+
+    def _process(self, stream):
+        token = None
+        for token in stream:
+            if token.type == self.NL_type:
+                yield from self.handle_NL(token)
+            else:
+                yield token
+
+            if token.type in self.OPEN_PAREN_types:
+                self.paren_level += 1
+            elif token.type in self.CLOSE_PAREN_types:
+                self.paren_level -= 1
+                assert self.paren_level >= 0
+
+        while len(self.indent_level) > 1:
+            self.indent_level.pop()
+            yield Token.new_borrow_pos(self.DEDENT_type, '', token) if token else Token(self.DEDENT_type, '', 0, 0, 0, 0, 0, 0)
+
+        assert self.indent_level == [0], self.indent_level
+
+    def process(self, stream):
+        self.paren_level = 0
+        self.indent_level = [0]
+        return self._process(stream)
+
+    # XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
+    @property
+    def always_accept(self):
+        return (self.NL_type,)
+
+    @property
+    @abstractmethod
+    def NL_type(self) -> str:
+        "The name of the newline token"
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def OPEN_PAREN_types(self) -> List[str]:
+        "The names of the tokens that open a parenthesis"
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def CLOSE_PAREN_types(self) -> List[str]:
+        """The names of the tokens that close a parenthesis
+        """
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def INDENT_type(self) -> str:
+        """The name of the token that starts an indentation in the grammar.
+
+        See also: %declare
+        """
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def DEDENT_type(self) -> str:
+        """The name of the token that end an indentation in the grammar.
+
+        See also: %declare
+        """
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def tab_len(self) -> int:
+        """How many spaces does a tab equal"""
+        raise NotImplementedError()
+
+
+class PythonIndenter(Indenter):
+    """A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to the Python syntax.
+
+    See also: the ``postlex`` option in `Lark`.
+    """
+
+    NL_type = '_NEWLINE'
+    OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
+    CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
+    INDENT_type = '_INDENT'
+    DEDENT_type = '_DEDENT'
+    tab_len = 8
+
+###}
@@ -0,0 +1,680 @@
+from abc import ABC, abstractmethod
+import getpass
+import sys, os, pickle
+import tempfile
+import types
+import re
+from typing import (
+    TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, Sequence,
+    Tuple, Iterable, IO, Any, TYPE_CHECKING, Collection
+)
+if TYPE_CHECKING:
+    from .parsers.lalr_interactive_parser import InteractiveParser
+    from .tree import ParseTree
+    from .visitors import Transformer
+    from typing import Literal
+    from .parser_frontends import ParsingFrontend
+
+from .exceptions import ConfigurationError, assert_config, UnexpectedInput
+from .utils import Serialize, SerializeMemoizer, FS, logger, TextOrSlice, LarkInput
+from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest
+from .tree import Tree
+from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
+
+from .lexer import Lexer, BasicLexer, TerminalDef, LexerThread, Token
+from .parse_tree_builder import ParseTreeBuilder
+from .parser_frontends import _validate_frontend_args, _get_lexer_callbacks, _deserialize_parsing_frontend, _construct_parsing_frontend
+from .grammar import Rule
+
+
+try:
+    import regex
+    _has_regex = True
+except ImportError:
+    _has_regex = False
+
+
+###{standalone
+
+
+class PostLex(ABC):
+    @abstractmethod
+    def process(self, stream: Iterator[Token]) -> Iterator[Token]:
+        return stream
+
+    always_accept: Iterable[str] = ()
+
+class LarkOptions(Serialize):
+    """Specifies the options for Lark
+
+    """
+
+    start: List[str]
+    debug: bool
+    strict: bool
+    transformer: 'Optional[Transformer]'
+    propagate_positions: Union[bool, str]
+    maybe_placeholders: bool
+    cache: Union[bool, str]
+    cache_grammar: bool
+    regex: bool
+    g_regex_flags: int
+    keep_all_tokens: bool
+    tree_class: Optional[Callable[[str, List], Any]]
+    parser: _ParserArgType
+    lexer: _LexerArgType
+    ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
+    postlex: Optional[PostLex]
+    priority: 'Optional[Literal["auto", "normal", "invert"]]'
+    lexer_callbacks: Dict[str, Callable[[Token], Token]]
+    use_bytes: bool
+    ordered_sets: bool
+    edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
+    import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
+    source_path: Optional[str]
+
+    OPTIONS_DOC = r"""
+    **===  General Options  ===**
+
+    start
+            The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
+    debug
+            Display debug information and extra warnings. Use only when debugging (Default: ``False``)
+            When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
+    strict
+            Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions.
+    transformer
+            Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
+    propagate_positions
+            Propagates positional attributes into the 'meta' attribute of all tree branches.
+            Sets attributes: (line, column, end_line, end_column, start_pos, end_pos,
+                              container_line, container_column, container_end_line, container_end_column)
+            Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
+    maybe_placeholders
+            When ``True``, the ``[]`` operator returns ``None`` when not matched.
+            When ``False``,  ``[]`` behaves like the ``?`` operator, and returns no value at all.
+            (default= ``True``)
+    cache
+            Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
+
+            - When ``False``, does nothing (default)
+            - When ``True``, caches to a temporary file in the local directory
+            - When given a string, caches to the path pointed by the string
+    cache_grammar
+            For use with ``cache`` option. When ``True``, the unanalyzed grammar is also included in the cache.
+            Useful for classes that require the ``Lark.grammar`` to be present (e.g. Reconstructor).
+            (default= ``False``)
+    regex
+            When True, uses the ``regex`` module instead of the stdlib ``re``.
+    g_regex_flags
+            Flags that are applied to all terminals (both regex and strings)
+    keep_all_tokens
+            Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
+    tree_class
+            Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
+
+    **=== Algorithm Options ===**
+
+    parser
+            Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
+            (there is also a "cyk" option for legacy)
+    lexer
+            Decides whether or not to use a lexer stage
+
+            - "auto" (default): Choose for me based on the parser
+            - "basic": Use a basic lexer
+            - "contextual": Stronger lexer (only works with parser="lalr")
+            - "dynamic": Flexible and powerful (only with parser="earley")
+            - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
+    ambiguity
+            Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
+
+            - "resolve": The parser will automatically choose the simplest derivation
+              (it chooses consistently: greedy for tokens, non-greedy for rules)
+            - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
+            - "forest": The parser will return the root of the shared packed parse forest.
+
+    **=== Misc. / Domain Specific Options ===**
+
+    postlex
+            Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
+    priority
+            How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
+    lexer_callbacks
+            Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
+    use_bytes
+            Accept an input of type ``bytes`` instead of ``str``.
+    ordered_sets
+            Should Earley use ordered-sets to achieve stable output (~10% slower than regular sets. Default: True)
+    edit_terminals
+            A callback for editing the terminals before parse.
+    import_paths
+            A List of either paths or loader functions to specify from where grammars are imported
+    source_path
+            Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
+    **=== End of Options ===**
+    """
+    if __doc__:
+        __doc__ += OPTIONS_DOC
+
+
+    # Adding a new option needs to be done in multiple places:
+    # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
+    # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
+    # - As an attribute of `LarkOptions` above
+    # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
+    # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
+    _defaults: Dict[str, Any] = {
+        'debug': False,
+        'strict': False,
+        'keep_all_tokens': False,
+        'tree_class': None,
+        'cache': False,
+        'cache_grammar': False,
+        'postlex': None,
+        'parser': 'earley',
+        'lexer': 'auto',
+        'transformer': None,
+        'start': 'start',
+        'priority': 'auto',
+        'ambiguity': 'auto',
+        'regex': False,
+        'propagate_positions': False,
+        'lexer_callbacks': {},
+        'maybe_placeholders': True,
+        'edit_terminals': None,
+        'g_regex_flags': 0,
+        'use_bytes': False,
+        'ordered_sets': True,
+        'import_paths': [],
+        'source_path': None,
+        '_plugins': {},
+    }
+
+    def __init__(self, options_dict: Dict[str, Any]) -> None:
+        o = dict(options_dict)
+
+        options = {}
+        for name, default in self._defaults.items():
+            if name in o:
+                value = o.pop(name)
+                if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
+                    value = bool(value)
+            else:
+                value = default
+
+            options[name] = value
+
+        if isinstance(options['start'], str):
+            options['start'] = [options['start']]
+
+        self.__dict__['options'] = options
+
+
+        assert_config(self.parser, ('earley', 'lalr', 'cyk', None))
+
+        if self.parser == 'earley' and self.transformer:
+            raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
+                             'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
+
+        if self.cache_grammar and not self.cache:
+            raise ConfigurationError('cache_grammar cannot be set when cache is disabled')
+
+        if o:
+            raise ConfigurationError("Unknown options: %s" % o.keys())
+
+    def __getattr__(self, name: str) -> Any:
+        try:
+            return self.__dict__['options'][name]
+        except KeyError as e:
+            raise AttributeError(e)
+
+    def __setattr__(self, name: str, value: str) -> None:
+        assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
+        self.options[name] = value
+
+    def serialize(self, memo = None) -> Dict[str, Any]:
+        return self.options
+
+    @classmethod
+    def deserialize(cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]) -> "LarkOptions":
+        return cls(data)
+
+
+# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
+# These options are only used outside of `load_grammar`.
+_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'}
+
+_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
+_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
+
+
+_T = TypeVar('_T', bound="Lark")
+
+class Lark(Serialize):
+    """Main interface for the library.
+
+    It's mostly a thin wrapper for the many different parsers, and for the tree constructor.
+
+    Parameters:
+        grammar: a string or file-object containing the grammar spec (using Lark's ebnf syntax)
+        options: a dictionary controlling various aspects of Lark.
+
+    Example:
+        >>> Lark(r'''start: "foo" ''')
+        Lark(...)
+    """
+
+    source_path: str
+    source_grammar: str
+    grammar: 'Grammar'
+    options: LarkOptions
+    lexer: Lexer
+    parser: 'ParsingFrontend'
+    terminals: Collection[TerminalDef]
+
+    __serialize_fields__ = ['parser', 'rules', 'options']
+
+    def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
+        self.options = LarkOptions(options)
+        re_module: types.ModuleType
+
+        # Update which fields are serialized
+        if self.options.cache_grammar:
+            self.__serialize_fields__ = self.__serialize_fields__ + ['grammar']
+
+        # Set regex or re module
+        use_regex = self.options.regex
+        if use_regex:
+            if _has_regex:
+                re_module = regex
+            else:
+                raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
+        else:
+            re_module = re
+
+        # Some, but not all file-like objects have a 'name' attribute
+        if self.options.source_path is None:
+            try:
+                self.source_path = grammar.name  # type: ignore[union-attr]
+            except AttributeError:
+                self.source_path = '<string>'
+        else:
+            self.source_path = self.options.source_path
+
+        # Drain file-like objects to get their contents
+        try:
+            read = grammar.read  # type: ignore[union-attr]
+        except AttributeError:
+            pass
+        else:
+            grammar = read()
+
+        cache_fn = None
+        cache_sha256 = None
+        if isinstance(grammar, str):
+            self.source_grammar = grammar
+            if self.options.use_bytes:
+                if not grammar.isascii():
+                    raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
+
+            if self.options.cache:
+                if self.options.parser != 'lalr':
+                    raise ConfigurationError("cache only works with parser='lalr' for now")
+
+                unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins')
+                options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
+                from . import __version__
+                s = grammar + options_str + __version__ + str(sys.version_info[:2])
+                cache_sha256 = sha256_digest(s)
+
+                if isinstance(self.options.cache, str):
+                    cache_fn = self.options.cache
+                else:
+                    if self.options.cache is not True:
+                        raise ConfigurationError("cache argument must be bool or str")
+
+                    try:
+                        username = getpass.getuser()
+                    except Exception:
+                        # The exception raised may be ImportError or OSError in
+                        # the future.  For the cache, we don't care about the
+                        # specific reason - we just want a username.
+                        username = "unknown"
+
+
+                    cache_fn = tempfile.gettempdir() + "/.lark_%s_%s_%s_%s_%s.tmp" % (
+                        "cache_grammar" if self.options.cache_grammar else "cache", username, cache_sha256, *sys.version_info[:2])
+
+                old_options = self.options
+                try:
+                    with FS.open(cache_fn, 'rb') as f:
+                        logger.debug('Loading grammar from cache: %s', cache_fn)
+                        # Remove options that aren't relevant for loading from cache
+                        for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
+                            del options[name]
+                        file_sha256 = f.readline().rstrip(b'\n')
+                        cached_used_files = pickle.load(f)
+                        if file_sha256 == cache_sha256.encode('utf8') and verify_used_files(cached_used_files):
+                            cached_parser_data = pickle.load(f)
+                            self._load(cached_parser_data, **options)
+                            return
+                except FileNotFoundError:
+                    # The cache file doesn't exist; parse and compose the grammar as normal
+                    pass
+                except Exception: # We should probably narrow done which errors we catch here.
+                    logger.exception("Failed to load Lark from cache: %r. We will try to carry on.", cache_fn)
+
+                    # In theory, the Lark instance might have been messed up by the call to `_load`.
+                    # In practice the only relevant thing that might have been overwritten should be `options`
+                    self.options = old_options
+
+
+            # Parse the grammar file and compose the grammars
+            self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
+        else:
+            assert isinstance(grammar, Grammar)
+            self.grammar = grammar
+
+
+        if self.options.lexer == 'auto':
+            if self.options.parser == 'lalr':
+                self.options.lexer = 'contextual'
+            elif self.options.parser == 'earley':
+                if self.options.postlex is not None:
+                    logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
+                                "Consider using lalr with contextual instead of earley")
+                    self.options.lexer = 'basic'
+                else:
+                    self.options.lexer = 'dynamic'
+            elif self.options.parser == 'cyk':
+                self.options.lexer = 'basic'
+            else:
+                assert False, self.options.parser
+        lexer = self.options.lexer
+        if isinstance(lexer, type):
+            assert issubclass(lexer, Lexer)     # XXX Is this really important? Maybe just ensure interface compliance
+        else:
+            assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete'))
+            if self.options.postlex is not None and 'dynamic' in lexer:
+                raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead")
+
+        if self.options.ambiguity == 'auto':
+            if self.options.parser == 'earley':
+                self.options.ambiguity = 'resolve'
+        else:
+            assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")
+
+        if self.options.priority == 'auto':
+            self.options.priority = 'normal'
+
+        if self.options.priority not in _VALID_PRIORITY_OPTIONS:
+            raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
+        if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
+            raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
+
+        if self.options.parser is None:
+            terminals_to_keep = '*'     # For lexer-only mode, keep all terminals
+        elif self.options.postlex is not None:
+            terminals_to_keep = set(self.options.postlex.always_accept)
+        else:
+            terminals_to_keep = set()
+
+        # Compile the EBNF grammar into BNF
+        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)
+
+        if self.options.edit_terminals:
+            for t in self.terminals:
+                self.options.edit_terminals(t)
+
+        self._terminals_dict = {t.name: t for t in self.terminals}
+
+        # If the user asked to invert the priorities, negate them all here.
+        if self.options.priority == 'invert':
+            for rule in self.rules:
+                if rule.options.priority is not None:
+                    rule.options.priority = -rule.options.priority
+            for term in self.terminals:
+                term.priority = -term.priority
+        # Else, if the user asked to disable priorities, strip them from the
+        # rules and terminals. This allows the Earley parsers to skip an extra forest walk
+        # for improved performance, if you don't need them (or didn't specify any).
+        elif self.options.priority is None:
+            for rule in self.rules:
+                if rule.options.priority is not None:
+                    rule.options.priority = None
+            for term in self.terminals:
+                term.priority = 0
+
+        # TODO Deprecate lexer_callbacks?
+        self.lexer_conf = LexerConf(
+                self.terminals, re_module, self.ignore_tokens, self.options.postlex,
+                self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes, strict=self.options.strict
+            )
+
+        if self.options.parser:
+            self.parser = self._build_parser()
+        elif lexer:
+            self.lexer = self._build_lexer()
+
+        if cache_fn:
+            logger.debug('Saving grammar to cache: %s', cache_fn)
+            try:
+                with FS.open(cache_fn, 'wb') as f:
+                    assert cache_sha256 is not None
+                    f.write(cache_sha256.encode('utf8') + b'\n')
+                    pickle.dump(used_files, f)
+                    self.save(f, _LOAD_ALLOWED_OPTIONS)
+            except IOError as e:
+                logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)
+
+    if __doc__:
+        __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
+
+    def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer:
+        lexer_conf = self.lexer_conf
+        if dont_ignore:
+            from copy import copy
+            lexer_conf = copy(lexer_conf)
+            lexer_conf.ignore = ()
+        return BasicLexer(lexer_conf)
+
+    def _prepare_callbacks(self) -> None:
+        self._callbacks = {}
+        # we don't need these callbacks if we aren't building a tree
+        if self.options.ambiguity != 'forest':
+            self._parse_tree_builder = ParseTreeBuilder(
+                    self.rules,
+                    self.options.tree_class or Tree,
+                    self.options.propagate_positions,
+                    self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
+                    self.options.maybe_placeholders
+                )
+            self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
+        self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))
+
+    def _build_parser(self) -> "ParsingFrontend":
+        self._prepare_callbacks()
+        _validate_frontend_args(self.options.parser, self.options.lexer)
+        parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
+        return _construct_parsing_frontend(
+            self.options.parser,
+            self.options.lexer,
+            self.lexer_conf,
+            parser_conf,
+            options=self.options
+        )
+
+    def save(self, f, exclude_options: Collection[str] = ()) -> None:
+        """Saves the instance into the given file object
+
+        Useful for caching and multiprocessing.
+        """
+        if self.options.parser != 'lalr':
+            raise NotImplementedError("Lark.save() is only implemented for the LALR(1) parser.")
+        data, m = self.memo_serialize([TerminalDef, Rule])
+        if exclude_options:
+            data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options}
+        pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+    @classmethod
+    def load(cls: Type[_T], f) -> _T:
+        """Loads an instance from the given file object
+
+        Useful for caching and multiprocessing.
+        """
+        inst = cls.__new__(cls)
+        return inst._load(f)
+
+    def _deserialize_lexer_conf(self, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]], options: LarkOptions) -> LexerConf:
+        lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
+        lexer_conf.callbacks = options.lexer_callbacks or {}
+        lexer_conf.re_module = regex if options.regex else re
+        lexer_conf.use_bytes = options.use_bytes
+        lexer_conf.g_regex_flags = options.g_regex_flags
+        lexer_conf.skip_validation = True
+        lexer_conf.postlex = options.postlex
+        return lexer_conf
+
+    def _load(self: _T, f: Any, **kwargs) -> _T:
+        if isinstance(f, dict):
+            d = f
+        else:
+            d = pickle.load(f)
+        memo_json = d['memo']
+        data = d['data']
+
+        assert memo_json
+        memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
+        if 'grammar' in data:
+            self.grammar = Grammar.deserialize(data['grammar'], memo)
+        options = dict(data['options'])
+        if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
+            raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
+                             .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
+        options.update(kwargs)
+        self.options = LarkOptions.deserialize(options, memo)
+        self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
+        self.source_path = '<deserialized>'
+        _validate_frontend_args(self.options.parser, self.options.lexer)
+        self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
+        self.terminals = self.lexer_conf.terminals
+        self._prepare_callbacks()
+        self._terminals_dict = {t.name: t for t in self.terminals}
+        self.parser = _deserialize_parsing_frontend(
+            data['parser'],
+            memo,
+            self.lexer_conf,
+            self._callbacks,
+            self.options,  # Not all, but multiple attributes are used
+        )
+        return self
+
+    @classmethod
+    def _load_from_dict(cls, data, memo, **kwargs):
+        inst = cls.__new__(cls)
+        return inst._load({'data': data, 'memo': memo}, **kwargs)
+
+    @classmethod
+    def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
+        """Create an instance of Lark with the grammar given by its filename
+
+        If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
+
+        Example:
+
+            >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr")
+            Lark(...)
+
+        """
+        if rel_to:
+            basepath = os.path.dirname(rel_to)
+            grammar_filename = os.path.join(basepath, grammar_filename)
+        with open(grammar_filename, encoding='utf8') as f:
+            return cls(f, **options)
+
+    @classmethod
+    def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T:
+        """Create an instance of Lark with the grammar loaded from within the package `package`.
+        This allows grammar loading from zipapps.
+
+        Imports in the grammar will use the `package` and `search_paths` provided, through `FromPackageLoader`
+
+        Example:
+
+            Lark.open_from_package(__name__, "example.lark", ("grammars",), parser=...)
+        """
+        package_loader = FromPackageLoader(package, search_paths)
+        full_path, text = package_loader(None, grammar_path)
+        options.setdefault('source_path', full_path)
+        options.setdefault('import_paths', [])
+        options['import_paths'].append(package_loader)
+        return cls(text, **options)
+
+    def __repr__(self):
+        return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)
+
+
+    def lex(self, text: TextOrSlice, dont_ignore: bool=False) -> Iterator[Token]:
+        """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='basic'
+
+        When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
+
+        :raises UnexpectedCharacters: In case the lexer cannot find a suitable match.
+        """
+        lexer: Lexer
+        if not hasattr(self, 'lexer') or dont_ignore:
+            lexer = self._build_lexer(dont_ignore)
+        else:
+            lexer = self.lexer
+        lexer_thread = LexerThread.from_text(lexer, text)
+        stream = lexer_thread.lex(None)
+        if self.options.postlex:
+            return self.options.postlex.process(stream)
+        return stream
+
+    def get_terminal(self, name: str) -> TerminalDef:
+        """Get information about a terminal"""
+        return self._terminals_dict[name]
+
+    def parse_interactive(self, text: Optional[LarkInput]=None, start: Optional[str]=None) -> 'InteractiveParser':
+        """Start an interactive parsing session. Only works when parser='lalr'.
+
+        Parameters:
+            text (LarkInput, optional): Text to be parsed. Required for ``resume_parse()``.
+            start (str, optional): Start symbol
+
+        Returns:
+            A new InteractiveParser instance.
+
+        See Also: ``Lark.parse()``
+        """
+        return self.parser.parse_interactive(text, start=start)
+
+    def parse(self, text: LarkInput, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree':
+        """Parse the given text, according to the options provided.
+
+        Parameters:
+            text (LarkInput): Text to be parsed, as `str` or `bytes`.
+                TextSlice may also be used, but only when lexer='basic' or 'contextual'.
+                If Lark was created with a custom lexer, this may be an object of any type.
+            start (str, optional): Required if Lark was given multiple possible start symbols (using the start option).
+            on_error (function, optional): if provided, will be called on UnexpectedInput error,
+                with the exception as its argument. Return true to resume parsing, or false to raise the exception.
+                LALR only. See examples/advanced/error_handling.py for an example of how to use on_error.
+
+        Returns:
+            If a transformer is supplied to ``__init__``, returns whatever is the
+            result of the transformation. Otherwise, returns a Tree instance.
+
+        :raises UnexpectedInput: On a parse error, one of these sub-exceptions will rise:
+                ``UnexpectedCharacters``, ``UnexpectedToken``, or ``UnexpectedEOF``.
+                For convenience, these sub-exceptions also inherit from ``ParserError`` and ``LexerError``.
+
+        """
+        if on_error is not None and self.options.parser != 'lalr':
+            raise NotImplementedError("The on_error option is only implemented for the LALR(1) parser.")
+        return self.parser.parse(text, start=start, on_error=on_error)
+
+
+###}
@@ -0,0 +1,702 @@
+# Lexer Implementation
+
+from abc import abstractmethod, ABC
+import re
+from typing import (
+    TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
+    ClassVar, TYPE_CHECKING, overload
+)
+from types import ModuleType
+import warnings
+try:
+    import interegular
+except ImportError:
+    pass
+if TYPE_CHECKING:
+    from .common import LexerConf
+    from .parsers.lalr_parser_state import ParserState
+
+from .utils import classify, get_regexp_width, Serialize, logger, TextSlice, TextOrSlice
+from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
+from .grammar import TOKEN_DEFAULT_PRIORITY
+
+
+###{standalone
+from contextlib import suppress
+from copy import copy
+
+try:  # For the standalone parser, we need to make sure that has_interegular is False to avoid NameErrors later on
+    has_interegular = bool(interegular)
+except NameError:
+    has_interegular = False
+
+class Pattern(Serialize, ABC):
+    "An abstraction over regular expressions."
+
+    value: str
+    flags: Collection[str]
+    raw: Optional[str]
+    type: ClassVar[str]
+
+    def __init__(self, value: str, flags: Collection[str] = (), raw: Optional[str] = None) -> None:
+        self.value = value
+        self.flags = frozenset(flags)
+        self.raw = raw
+
+    def __repr__(self):
+        return repr(self.to_regexp())
+
+    # Pattern Hashing assumes all subclasses have a different priority!
+    def __hash__(self):
+        return hash((type(self), self.value, self.flags))
+
+    def __eq__(self, other):
+        return type(self) == type(other) and self.value == other.value and self.flags == other.flags
+
+    @abstractmethod
+    def to_regexp(self) -> str:
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def min_width(self) -> int:
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def max_width(self) -> int:
+        raise NotImplementedError()
+
+    def _get_flags(self, value):
+        for f in self.flags:
+            value = ('(?%s:%s)' % (f, value))
+        return value
+
+
+class PatternStr(Pattern):
+    __serialize_fields__ = 'value', 'flags', 'raw'
+
+    type: ClassVar[str] = "str"
+
+    def to_regexp(self) -> str:
+        return self._get_flags(re.escape(self.value))
+
+    @property
+    def min_width(self) -> int:
+        return len(self.value)
+
+    @property
+    def max_width(self) -> int:
+        return len(self.value)
+
+
+class PatternRE(Pattern):
+    __serialize_fields__ = 'value', 'flags', 'raw', '_width'
+
+    type: ClassVar[str] = "re"
+
+    def to_regexp(self) -> str:
+        return self._get_flags(self.value)
+
+    _width = None
+    def _get_width(self):
+        if self._width is None:
+            self._width = get_regexp_width(self.to_regexp())
+        return self._width
+
+    @property
+    def min_width(self) -> int:
+        return self._get_width()[0]
+
+    @property
+    def max_width(self) -> int:
+        return self._get_width()[1]
+
+
+class TerminalDef(Serialize):
+    "A definition of a terminal"
+    __serialize_fields__ = 'name', 'pattern', 'priority'
+    __serialize_namespace__ = PatternStr, PatternRE
+
+    name: str
+    pattern: Pattern
+    priority: int
+
+    def __init__(self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY) -> None:
+        assert isinstance(pattern, Pattern), pattern
+        self.name = name
+        self.pattern = pattern
+        self.priority = priority
+
+    def __repr__(self):
+        return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
+
+    def user_repr(self) -> str:
+        if self.name.startswith('__'):  # We represent a generated terminal
+            return self.pattern.raw or self.name
+        else:
+            return self.name
+
+_T = TypeVar('_T', bound="Token")
+
+class Token(str):
+    """A string with meta-information, that is produced by the lexer.
+
+    When parsing text, the resulting chunks of the input that haven't been discarded,
+    will end up in the tree as Token instances. The Token class inherits from Python's ``str``,
+    so normal string comparisons and operations will work as expected.
+
+    Attributes:
+        type: Name of the token (as specified in grammar)
+        value: Value of the token (redundant, as ``token.value == token`` will always be true)
+        start_pos: The index of the token in the text
+        line: The line of the token in the text (starting with 1)
+        column: The column of the token in the text (starting with 1)
+        end_line: The line where the token ends
+        end_column: The next column after the end of the token. For example,
+            if the token is a single character with a column value of 4,
+            end_column will be 5.
+        end_pos: the index where the token ends (basically ``start_pos + len(token)``)
+    """
+    __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
+
+    __match_args__ = ('type', 'value')
+
+    type: str
+    start_pos: Optional[int]
+    value: Any
+    line: Optional[int]
+    column: Optional[int]
+    end_line: Optional[int]
+    end_column: Optional[int]
+    end_pos: Optional[int]
+
+
+    @overload
+    def __new__(
+            cls,
+            type: str,
+            value: Any,
+            start_pos: Optional[int] = None,
+            line: Optional[int] = None,
+            column: Optional[int] = None,
+            end_line: Optional[int] = None,
+            end_column: Optional[int] = None,
+            end_pos: Optional[int] = None
+    ) -> 'Token':
+        ...
+
+    @overload
+    def __new__(
+            cls,
+            type_: str,
+            value: Any,
+            start_pos: Optional[int] = None,
+            line: Optional[int] = None,
+            column: Optional[int] = None,
+            end_line: Optional[int] = None,
+            end_column: Optional[int] = None,
+            end_pos: Optional[int] = None
+    ) -> 'Token':        ...
+
+    def __new__(cls, *args, **kwargs):
+        if "type_" in kwargs:
+            warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
+
+            if "type" in kwargs:
+                raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
+            kwargs["type"] = kwargs.pop("type_")
+
+        return cls._future_new(*args, **kwargs)
+
+
+    @classmethod
+    def _future_new(cls, type, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
+        inst = super(Token, cls).__new__(cls, value)
+
+        inst.type = type
+        inst.start_pos = start_pos
+        inst.value = value
+        inst.line = line
+        inst.column = column
+        inst.end_line = end_line
+        inst.end_column = end_column
+        inst.end_pos = end_pos
+        return inst
+
+    @overload
+    def update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
+        ...
+
+    @overload
+    def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
+        ...
+
+    def update(self, *args, **kwargs):
+        if "type_" in kwargs:
+            warnings.warn("`type_` is deprecated use `type` instead", DeprecationWarning)
+
+            if "type" in kwargs:
+                raise TypeError("Error: using both 'type' and the deprecated 'type_' as arguments.")
+            kwargs["type"] = kwargs.pop("type_")
+
+        return self._future_update(*args, **kwargs)
+
+    def _future_update(self, type: Optional[str] = None, value: Optional[Any] = None) -> 'Token':
+        return Token.new_borrow_pos(
+            type if type is not None else self.type,
+            value if value is not None else self.value,
+            self
+        )
+
+    @classmethod
+    def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
+        return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
+
+    def __reduce__(self):
+        return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))
+
+    def __repr__(self):
+        return 'Token(%r, %r)' % (self.type, self.value)
+
+    def __deepcopy__(self, memo):
+        return Token(self.type, self.value, self.start_pos, self.line, self.column)
+
+    def __eq__(self, other):
+        if isinstance(other, Token) and self.type != other.type:
+            return False
+
+        return str.__eq__(self, other)
+
+    __hash__ = str.__hash__
+
+
+class LineCounter:
+    "A utility class for keeping track of line & column information"
+
+    __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char'
+
+    def __init__(self, newline_char):
+        self.newline_char = newline_char
+        self.char_pos = 0
+        self.line = 1
+        self.column = 1
+        self.line_start_pos = 0
+
+    def __eq__(self, other):
+        if not isinstance(other, LineCounter):
+            return NotImplemented
+
+        return self.char_pos == other.char_pos and self.newline_char == other.newline_char
+
+    def feed(self, token: TextOrSlice, test_newline=True):
+        """Consume a token and calculate the new line & column.
+
+        As an optional optimization, set test_newline=False if token doesn't contain a newline.
+        """
+        if test_newline:
+            newlines = token.count(self.newline_char)
+            if newlines:
+                self.line += newlines
+                self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
+
+        self.char_pos += len(token)
+        self.column = self.char_pos - self.line_start_pos + 1
+
+
+class UnlessCallback:
+    def __init__(self, scanner: 'Scanner'):
+        self.scanner = scanner
+
+    def __call__(self, t: Token):
+        res = self.scanner.fullmatch(t.value)
+        if res is not None:
+            t.type = res
+        return t
+
+
+class CallChain:
+    def __init__(self, callback1, callback2, cond):
+        self.callback1 = callback1
+        self.callback2 = callback2
+        self.cond = cond
+
+    def __call__(self, t):
+        t2 = self.callback1(t)
+        return self.callback2(t) if self.cond(t2) else t2
+
+
+def _get_match(re_, regexp, s, flags):
+    m = re_.match(regexp, s, flags)
+    if m:
+        return m.group(0)
+
+def _create_unless(terminals, g_regex_flags, re_, use_bytes):
+    tokens_by_type = classify(terminals, lambda t: type(t.pattern))
+    assert len(tokens_by_type) <= 2, tokens_by_type.keys()
+    embedded_strs = set()
+    callback = {}
+    for retok in tokens_by_type.get(PatternRE, []):
+        unless = []
+        for strtok in tokens_by_type.get(PatternStr, []):
+            if strtok.priority != retok.priority:
+                continue
+            s = strtok.pattern.value
+            if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
+                unless.append(strtok)
+                if strtok.pattern.flags <= retok.pattern.flags:
+                    embedded_strs.add(strtok)
+        if unless:
+            callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, use_bytes=use_bytes))
+
+    new_terminals = [t for t in terminals if t not in embedded_strs]
+    return new_terminals, callback
+
+
+class Scanner:
+    def __init__(self, terminals, g_regex_flags, re_, use_bytes):
+        self.terminals = terminals
+        self.g_regex_flags = g_regex_flags
+        self.re_ = re_
+        self.use_bytes = use_bytes
+
+        self.allowed_types = {t.name for t in self.terminals}
+
+        self._mres = self._build_mres(terminals, len(terminals))
+
+    def _build_mres(self, terminals, max_size):
+        # Python sets an unreasonable group limit (currently 100) in its re module
+        # Worse, the only way to know we reached it is by catching an AssertionError!
+        # This function recursively tries less and less groups until it's successful.
+        mres = []
+        while terminals:
+            pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp()) for t in terminals[:max_size])
+            if self.use_bytes:
+                pattern = pattern.encode('latin-1')
+            try:
+                mre = self.re_.compile(pattern, self.g_regex_flags)
+            except AssertionError:  # Yes, this is what Python provides us.. :/
+                return self._build_mres(terminals, max_size // 2)
+
+            mres.append(mre)
+            terminals = terminals[max_size:]
+        return mres
+
+    def match(self, text: TextSlice, pos):
+        for mre in self._mres:
+            m = mre.match(text.text, pos, text.end)
+            if m:
+                return m.group(0), m.lastgroup
+
+
+    def fullmatch(self, text: str) -> Optional[str]:
+        for mre in self._mres:
+            m = mre.fullmatch(text)
+            if m:
+                return m.lastgroup
+        return None
+
+def _regexp_has_newline(r: str):
+    r"""Expressions that may indicate newlines in a regexp:
+        - newlines (\n)
+        - escaped newline (\\n)
+        - anything but ([^...])
+        - any-char (.) when the flag (?s) exists
+        - spaces (\s)
+    """
+    return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
+
+
+class LexerState:
+    """Represents the current state of the lexer as it scans the text
+    (Lexer objects are only instantiated per grammar, not per text)
+    """
+
+    __slots__ = 'text', 'line_ctr', 'last_token'
+
+    text: TextSlice
+    line_ctr: LineCounter
+    last_token: Optional[Token]
+
+    def __init__(self, text: TextSlice, line_ctr: Optional[LineCounter] = None, last_token: Optional[Token]=None):
+        if isinstance(text, TextSlice):
+            if line_ctr is None:
+                line_ctr = LineCounter(b'\n' if isinstance(text.text, bytes) else '\n')
+
+                if text.start > 0:
+                    # Advance the line-count until line_ctr.char_pos == text.start
+                    line_ctr.feed(TextSlice(text.text, 0, text.start))
+
+            if not (text.start <= line_ctr.char_pos <= text.end):
+                raise ValueError("LineCounter.char_pos is out of bounds")
+
+        self.text = text
+        self.line_ctr = line_ctr
+        self.last_token = last_token
+
+
+    def __eq__(self, other):
+        if not isinstance(other, LexerState):
+            return NotImplemented
+
+        return self.text == other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token
+
+    def __copy__(self):
+        return type(self)(self.text, copy(self.line_ctr), self.last_token)
+
+
+class LexerThread:
+    """A thread that ties a lexer instance and a lexer state, to be used by the parser
+    """
+
+    def __init__(self, lexer: 'Lexer', lexer_state: Optional[LexerState]):
+        self.lexer = lexer
+        self.state = lexer_state
+
+    @classmethod
+    def from_text(cls, lexer: 'Lexer', text_or_slice: TextOrSlice) -> 'LexerThread':
+        text = TextSlice.cast_from(text_or_slice)
+        return cls(lexer, LexerState(text))
+
+    @classmethod
+    def from_custom_input(cls, lexer: 'Lexer', text: Any) -> 'LexerThread':
+        return cls(lexer, LexerState(text))
+
+    def lex(self, parser_state):
+        if self.state is None:
+            raise TypeError("Cannot lex: No text assigned to lexer state")
+        return self.lexer.lex(self.state, parser_state)
+
+    def __copy__(self):
+        return type(self)(self.lexer, copy(self.state))
+
+    _Token = Token
+
+
+_Callback = Callable[[Token], Token]
+
+class Lexer(ABC):
+    """Lexer interface
+
+    Method Signatures:
+        lex(self, lexer_state, parser_state) -> Iterator[Token]
+    """
+    @abstractmethod
+    def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
+        return NotImplemented
+
+    def make_lexer_state(self, text: str):
+        "Deprecated"
+        return LexerState(TextSlice.cast_from(text))
+
+
+def _check_regex_collisions(terminal_to_regexp: Dict[TerminalDef, str], comparator, strict_mode, max_collisions_to_show=8):
+    if not comparator:
+        comparator = interegular.Comparator.from_regexes(terminal_to_regexp)
+
+    # When in strict mode, we only ever try to provide one example, so taking
+    # a long time for that should be fine
+    max_time = 2 if strict_mode else 0.2
+
+    # We don't want to show too many collisions.
+    if comparator.count_marked_pairs() >= max_collisions_to_show:
+        return
+    for group in classify(terminal_to_regexp, lambda t: t.priority).values():
+        for a, b in comparator.check(group, skip_marked=True):
+            assert a.priority == b.priority
+            # Mark this pair to not repeat warnings when multiple different BasicLexers see the same collision
+            comparator.mark(a, b)
+
+            # Notify the user
+            message = f"Collision between Terminals {a.name} and {b.name}. "
+            try:
+                example = comparator.get_example_overlap(a, b, max_time).format_multiline()
+            except ValueError:
+                # Couldn't find an example within max_time steps.
+                example = "No example could be found fast enough. However, the collision does still exists"
+            if strict_mode:
+                raise LexError(f"{message}\n{example}")
+            logger.warning("%s The lexer will choose between them arbitrarily.\n%s", message, example)
+            if comparator.count_marked_pairs() >= max_collisions_to_show:
+                logger.warning("Found 8 regex collisions, will not check for more.")
+                return
+
+
+class AbstractBasicLexer(Lexer):
+    terminals_by_name: Dict[str, TerminalDef]
+
+    @abstractmethod
+    def __init__(self, conf: 'LexerConf', comparator=None) -> None:
+        ...
+
+    @abstractmethod
+    def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
+        ...
+
+    def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
+        with suppress(EOFError):
+            while True:
+                yield self.next_token(state, parser_state)
+
+
+class BasicLexer(AbstractBasicLexer):
+    terminals: Collection[TerminalDef]
+    ignore_types: FrozenSet[str]
+    newline_types: FrozenSet[str]
+    user_callbacks: Dict[str, _Callback]
+    callback: Dict[str, _Callback]
+    re: ModuleType
+
+    def __init__(self, conf: 'LexerConf', comparator=None) -> None:
+        terminals = list(conf.terminals)
+        assert all(isinstance(t, TerminalDef) for t in terminals), terminals
+
+        self.re = conf.re_module
+
+        if not conf.skip_validation:
+            # Sanitization
+            terminal_to_regexp = {}
+            for t in terminals:
+                regexp = t.pattern.to_regexp()
+                try:
+                    self.re.compile(regexp, conf.g_regex_flags)
+                except self.re.error:
+                    raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
+
+                if t.pattern.min_width == 0:
+                    raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
+                if t.pattern.type == "re":
+                    terminal_to_regexp[t] = regexp
+
+            if not (set(conf.ignore) <= {t.name for t in terminals}):
+                raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))
+
+            if has_interegular:
+                _check_regex_collisions(terminal_to_regexp, comparator, conf.strict)
+            elif conf.strict:
+                raise LexError("interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`.")
+
+        # Init
+        self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
+        self.ignore_types = frozenset(conf.ignore)
+
+        terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
+        self.terminals = terminals
+        self.user_callbacks = conf.callbacks
+        self.g_regex_flags = conf.g_regex_flags
+        self.use_bytes = conf.use_bytes
+        self.terminals_by_name = conf.terminals_by_name
+
+        self._scanner: Optional[Scanner] = None
+
+    def _build_scanner(self) -> Scanner:
+        terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
+        assert all(self.callback.values())
+
+        for type_, f in self.user_callbacks.items():
+            if type_ in self.callback:
+                # Already a callback there, probably UnlessCallback
+                self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
+            else:
+                self.callback[type_] = f
+
+        return Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
+
+    @property
+    def scanner(self) -> Scanner:
+        if self._scanner is None:
+            self._scanner = self._build_scanner()
+        return self._scanner
+
+    def match(self, text, pos):
+        return self.scanner.match(text, pos)
+
+    def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
+        line_ctr = lex_state.line_ctr
+        while line_ctr.char_pos < lex_state.text.end:
+            res = self.match(lex_state.text, line_ctr.char_pos)
+            if not res:
+                allowed = self.scanner.allowed_types - self.ignore_types
+                if not allowed:
+                    allowed = {"<END-OF-FILE>"}
+                raise UnexpectedCharacters(lex_state.text.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
+                                           allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
+                                           state=parser_state, terminals_by_name=self.terminals_by_name)
+
+            value, type_ = res
+
+            ignored = type_ in self.ignore_types
+            t = None
+            if not ignored or type_ in self.callback:
+                t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+            line_ctr.feed(value, type_ in self.newline_types)
+            if t is not None:
+                t.end_line = line_ctr.line
+                t.end_column = line_ctr.column
+                t.end_pos = line_ctr.char_pos
+                if t.type in self.callback:
+                    t = self.callback[t.type](t)
+                if not ignored:
+                    if not isinstance(t, Token):
+                        raise LexError("Callbacks must return a token (returned %r)" % t)
+                    lex_state.last_token = t
+                    return t
+
+        # EOF
+        raise EOFError(self)
+
+
+class ContextualLexer(Lexer):
+    lexers: Dict[int, AbstractBasicLexer]
+    root_lexer: AbstractBasicLexer
+
+    BasicLexer: Type[AbstractBasicLexer] = BasicLexer
+
+    def __init__(self, conf: 'LexerConf', states: Dict[int, Collection[str]], always_accept: Collection[str]=()) -> None:
+        terminals = list(conf.terminals)
+        terminals_by_name = conf.terminals_by_name
+
+        trad_conf = copy(conf)
+        trad_conf.terminals = terminals
+
+        if has_interegular and not conf.skip_validation:
+            comparator = interegular.Comparator.from_regexes({t: t.pattern.to_regexp() for t in terminals})
+        else:
+            comparator = None
+        lexer_by_tokens: Dict[FrozenSet[str], AbstractBasicLexer] = {}
+        self.lexers = {}
+        for state, accepts in states.items():
+            key = frozenset(accepts)
+            try:
+                lexer = lexer_by_tokens[key]
+            except KeyError:
+                accepts = set(accepts) | set(conf.ignore) | set(always_accept)
+                lexer_conf = copy(trad_conf)
+                lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name]
+                lexer = self.BasicLexer(lexer_conf, comparator)
+                lexer_by_tokens[key] = lexer
+
+            self.lexers[state] = lexer
+
+        assert trad_conf.terminals is terminals
+        trad_conf.skip_validation = True  # We don't need to verify all terminals again
+        self.root_lexer = self.BasicLexer(trad_conf, comparator)
+
+    def lex(self, lexer_state: LexerState, parser_state: 'ParserState') -> Iterator[Token]:
+        try:
+            while True:
+                lexer = self.lexers[parser_state.position]
+                yield lexer.next_token(lexer_state, parser_state)
+        except EOFError:
+            pass
+        except UnexpectedCharacters as e:
+            # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
+            # This tests the input against the global context, to provide a nicer error.
+            try:
+                last_token = lexer_state.last_token  # Save last_token. Calling root_lexer.next_token will change this to the wrong token
+                token = self.root_lexer.next_token(lexer_state, parser_state)
+                raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name)
+            except UnexpectedCharacters:
+                raise e  # Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set.
+
+###}
@@ -0,0 +1,391 @@
+"""Provides functions for the automatic building and shaping of the parse-tree."""
+
+from typing import List
+
+from .exceptions import GrammarError, ConfigurationError
+from .lexer import Token
+from .tree import Tree
+from .visitors import Transformer_InPlace
+from .visitors import _vargs_meta, _vargs_meta_inline
+
+###{standalone
+from functools import partial, wraps
+from itertools import product
+
+
+class ExpandSingleChild:
+    def __init__(self, node_builder):
+        self.node_builder = node_builder
+
+    def __call__(self, children):
+        if len(children) == 1:
+            return children[0]
+        else:
+            return self.node_builder(children)
+
+
+
+class PropagatePositions:
+    def __init__(self, node_builder, node_filter=None):
+        self.node_builder = node_builder
+        self.node_filter = node_filter
+
+    def __call__(self, children):
+        res = self.node_builder(children)
+
+        if isinstance(res, Tree):
+            # Calculate positions while the tree is streaming, according to the rule:
+            # - nodes start at the start of their first child's container,
+            #   and end at the end of their last child's container.
+            # Containers are nodes that take up space in text, but have been inlined in the tree.
+
+            res_meta = res.meta
+
+            first_meta = self._pp_get_meta(children)
+            if first_meta is not None:
+                if not hasattr(res_meta, 'line'):
+                    # meta was already set, probably because the rule has been inlined (e.g. `?rule`)
+                    res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
+                    res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
+                    res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
+                    res_meta.empty = False
+
+                res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
+                res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
+                res_meta.container_start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
+
+            last_meta = self._pp_get_meta(reversed(children))
+            if last_meta is not None:
+                if not hasattr(res_meta, 'end_line'):
+                    res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
+                    res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
+                    res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
+                    res_meta.empty = False
+
+                res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
+                res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
+                res_meta.container_end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
+
+        return res
+
+    def _pp_get_meta(self, children):
+        for c in children:
+            if self.node_filter is not None and not self.node_filter(c):
+                continue
+            if isinstance(c, Tree):
+                if not c.meta.empty:
+                    return c.meta
+            elif isinstance(c, Token):
+                return c
+            elif hasattr(c, '__lark_meta__'):
+                return c.__lark_meta__()
+
+def make_propagate_positions(option):
+    if callable(option):
+        return partial(PropagatePositions, node_filter=option)
+    elif option is True:
+        return PropagatePositions
+    elif option is False:
+        return None
+
+    raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
+
+
+class ChildFilter:
+    def __init__(self, to_include, append_none, node_builder):
+        self.node_builder = node_builder
+        self.to_include = to_include
+        self.append_none = append_none
+
+    def __call__(self, children):
+        filtered = []
+
+        for i, to_expand, add_none in self.to_include:
+            if add_none:
+                filtered += [None] * add_none
+            if to_expand:
+                filtered += children[i].children
+            else:
+                filtered.append(children[i])
+
+        if self.append_none:
+            filtered += [None] * self.append_none
+
+        return self.node_builder(filtered)
+
+
+class ChildFilterLALR(ChildFilter):
+    """Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"""
+
+    def __call__(self, children):
+        filtered = []
+        for i, to_expand, add_none in self.to_include:
+            if add_none:
+                filtered += [None] * add_none
+            if to_expand:
+                if filtered:
+                    filtered += children[i].children
+                else:   # Optimize for left-recursion
+                    filtered = children[i].children
+            else:
+                filtered.append(children[i])
+
+        if self.append_none:
+            filtered += [None] * self.append_none
+
+        return self.node_builder(filtered)
+
+
+class ChildFilterLALR_NoPlaceholders(ChildFilter):
+    "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
+    def __init__(self, to_include, node_builder):
+        self.node_builder = node_builder
+        self.to_include = to_include
+
+    def __call__(self, children):
+        filtered = []
+        for i, to_expand in self.to_include:
+            if to_expand:
+                if filtered:
+                    filtered += children[i].children
+                else:   # Optimize for left-recursion
+                    filtered = children[i].children
+            else:
+                filtered.append(children[i])
+        return self.node_builder(filtered)
+
+
+def _should_expand(sym):
+    return not sym.is_term and sym.name.startswith('_')
+
+
+def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
+    # Prepare empty_indices as: How many Nones to insert at each index?
+    if _empty_indices:
+        assert _empty_indices.count(False) == len(expansion)
+        s = ''.join(str(int(b)) for b in _empty_indices)
+        empty_indices = [len(ones) for ones in s.split('0')]
+        assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
+    else:
+        empty_indices = [0] * (len(expansion)+1)
+
+    to_include = []
+    nones_to_add = 0
+    for i, sym in enumerate(expansion):
+        nones_to_add += empty_indices[i]
+        if keep_all_tokens or not (sym.is_term and sym.filter_out):
+            to_include.append((i, _should_expand(sym), nones_to_add))
+            nones_to_add = 0
+
+    nones_to_add += empty_indices[len(expansion)]
+
+    if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
+        if _empty_indices or ambiguous:
+            return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
+        else:
+            # LALR without placeholders
+            return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
+
+
+class AmbiguousExpander:
+    """Deal with the case where we're expanding children ('_rule') into a parent but the children
+       are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
+       ambiguous with as many copies as there are ambiguous children, and then copy the ambiguous children
+       into the right parents in the right places, essentially shifting the ambiguity up the tree."""
+    def __init__(self, to_expand, tree_class, node_builder):
+        self.node_builder = node_builder
+        self.tree_class = tree_class
+        self.to_expand = to_expand
+
+    def __call__(self, children):
+        def _is_ambig_tree(t):
+            return hasattr(t, 'data') and t.data == '_ambig'
+
+        # -- When we're repeatedly expanding ambiguities we can end up with nested ambiguities.
+        #    All children of an _ambig node should be a derivation of that ambig node, hence
+        #    it is safe to assume that if we see an _ambig node nested within an ambig node
+        #    it is safe to simply expand it into the parent _ambig node as an alternative derivation.
+        ambiguous = []
+        for i, child in enumerate(children):
+            if _is_ambig_tree(child):
+                if i in self.to_expand:
+                    ambiguous.append(i)
+
+                child.expand_kids_by_data('_ambig')
+
+        if not ambiguous:
+            return self.node_builder(children)
+
+        expand = [child.children if i in ambiguous else (child,) for i, child in enumerate(children)]
+        return self.tree_class('_ambig', [self.node_builder(list(f)) for f in product(*expand)])
+
+
+def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
+    to_expand = [i for i, sym in enumerate(expansion)
+                 if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
+    if to_expand:
+        return partial(AmbiguousExpander, to_expand, tree_class)
+
+
+class AmbiguousIntermediateExpander:
+    """
+    Propagate ambiguous intermediate nodes and their derivations up to the
+    current rule.
+
+    In general, converts
+
+    rule
+      _iambig
+        _inter
+          someChildren1
+          ...
+        _inter
+          someChildren2
+          ...
+      someChildren3
+      ...
+
+    to
+
+    _ambig
+      rule
+        someChildren1
+        ...
+        someChildren3
+        ...
+      rule
+        someChildren2
+        ...
+        someChildren3
+        ...
+      rule
+        childrenFromNestedIambigs
+        ...
+        someChildren3
+        ...
+      ...
+
+    propagating up any nested '_iambig' nodes along the way.
+    """
+
+    def __init__(self, tree_class, node_builder):
+        self.node_builder = node_builder
+        self.tree_class = tree_class
+
+    def __call__(self, children):
+        def _is_iambig_tree(child):
+            return hasattr(child, 'data') and child.data == '_iambig'
+
+        def _collapse_iambig(children):
+            """
+            Recursively flatten the derivations of the parent of an '_iambig'
+            node. Returns a list of '_inter' nodes guaranteed not
+            to contain any nested '_iambig' nodes, or None if children does
+            not contain an '_iambig' node.
+            """
+
+            # Due to the structure of the SPPF,
+            # an '_iambig' node can only appear as the first child
+            if children and _is_iambig_tree(children[0]):
+                iambig_node = children[0]
+                result = []
+                for grandchild in iambig_node.children:
+                    collapsed = _collapse_iambig(grandchild.children)
+                    if collapsed:
+                        for child in collapsed:
+                            child.children += children[1:]
+                        result += collapsed
+                    else:
+                        new_tree = self.tree_class('_inter', grandchild.children + children[1:])
+                        result.append(new_tree)
+                return result
+
+        collapsed = _collapse_iambig(children)
+        if collapsed:
+            processed_nodes = [self.node_builder(c.children) for c in collapsed]
+            return self.tree_class('_ambig', processed_nodes)
+
+        return self.node_builder(children)
+
+
+
+def inplace_transformer(func):
+    @wraps(func)
+    def f(children):
+        # function name in a Transformer is a rule name.
+        tree = Tree(func.__name__, children)
+        return func(tree)
+    return f
+
+
+def apply_visit_wrapper(func, name, wrapper):
+    if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
+        raise NotImplementedError("Meta args not supported for internal transformer; use YourTransformer().transform(parser.parse()) instead")
+
+    @wraps(func)
+    def f(children):
+        return wrapper(func, name, children, None)
+    return f
+
+
+class ParseTreeBuilder:
+    def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
+        self.tree_class = tree_class
+        self.propagate_positions = propagate_positions
+        self.ambiguous = ambiguous
+        self.maybe_placeholders = maybe_placeholders
+
+        self.rule_builders = list(self._init_builders(rules))
+
+    def _init_builders(self, rules):
+        propagate_positions = make_propagate_positions(self.propagate_positions)
+
+        for rule in rules:
+            options = rule.options
+            keep_all_tokens = options.keep_all_tokens
+            expand_single_child = options.expand1
+
+            wrapper_chain = list(filter(None, [
+                (expand_single_child and not rule.alias) and ExpandSingleChild,
+                maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
+                propagate_positions,
+                self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
+                self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
+            ]))
+
+            yield rule, wrapper_chain
+
+    def create_callback(self, transformer=None):
+        callbacks = {}
+
+        default_handler = getattr(transformer, '__default__', None)
+        if default_handler:
+            def default_callback(data, children):
+                return default_handler(data, children, None)
+        else:
+            default_callback = self.tree_class
+
+        for rule, wrapper_chain in self.rule_builders:
+
+            user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
+            try:
+                f = getattr(transformer, user_callback_name)
+                wrapper = getattr(f, 'visit_wrapper', None)
+                if wrapper is not None:
+                    f = apply_visit_wrapper(f, user_callback_name, wrapper)
+                elif isinstance(transformer, Transformer_InPlace):
+                    f = inplace_transformer(f)
+            except AttributeError:
+                f = partial(default_callback, user_callback_name)
+
+            for w in wrapper_chain:
+                f = w(f)
+
+            if rule in callbacks:
+                raise GrammarError("Rule '%s' already exists" % (rule,))
+
+            callbacks[rule] = f
+
+        return callbacks
+
+###}
@@ -0,0 +1,284 @@
+from typing import Any, Callable, Dict, Optional, Collection, Union, TYPE_CHECKING
+
+from .exceptions import ConfigurationError, GrammarError, assert_config
+from .utils import get_regexp_width, Serialize, TextOrSlice, TextSlice, LarkInput
+from .lexer import LexerThread, BasicLexer, ContextualLexer, Lexer
+from .parsers import earley, xearley, cyk
+from .parsers.lalr_parser import LALR_Parser
+from .tree import Tree
+from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
+
+if TYPE_CHECKING:
+    from .parsers.lalr_analysis import ParseTableBase
+
+
+###{standalone
+
+def _wrap_lexer(lexer_class):
+    future_interface = getattr(lexer_class, '__future_interface__', 0)
+    if future_interface == 2:
+        return lexer_class
+    elif future_interface == 1:
+        class CustomLexerWrapper1(Lexer):
+            def __init__(self, lexer_conf):
+                self.lexer = lexer_class(lexer_conf)
+            def lex(self, lexer_state, parser_state):
+                if isinstance(lexer_state.text, TextSlice) and not lexer_state.text.is_complete_text():
+                    raise TypeError("Interface=1 Custom Lexer don't support TextSlice")
+                lexer_state.text = lexer_state.text
+                return self.lexer.lex(lexer_state, parser_state)
+        return CustomLexerWrapper1
+    elif future_interface == 0:
+        class CustomLexerWrapper0(Lexer):
+            def __init__(self, lexer_conf):
+                self.lexer = lexer_class(lexer_conf)
+
+            def lex(self, lexer_state, parser_state):
+                if isinstance(lexer_state.text, TextSlice):
+                    if not lexer_state.text.is_complete_text():
+                        raise TypeError("Interface=0 Custom Lexer don't support TextSlice")
+                    return self.lexer.lex(lexer_state.text.text)
+                return self.lexer.lex(lexer_state.text)
+        return CustomLexerWrapper0
+    else:
+        raise ValueError(f"Unknown __future_interface__ value {future_interface}, integer 0-2 expected")
+
+
+def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
+    parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
+    cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
+    parser = cls.deserialize(data['parser'], memo, callbacks, options.debug)
+    parser_conf.callbacks = callbacks
+    return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
+
+
+_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {}
+
+
+class ParsingFrontend(Serialize):
+    __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
+
+    lexer_conf: LexerConf
+    parser_conf: ParserConf
+    options: Any
+
+    def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None):
+        self.parser_conf = parser_conf
+        self.lexer_conf = lexer_conf
+        self.options = options
+
+        # Set-up parser
+        if parser:  # From cache
+            self.parser = parser
+        else:
+            create_parser = _parser_creators.get(parser_conf.parser_type)
+            assert create_parser is not None, "{} is not supported in standalone mode".format(
+                    parser_conf.parser_type
+                )
+            self.parser = create_parser(lexer_conf, parser_conf, options)
+
+        # Set-up lexer
+        lexer_type = lexer_conf.lexer_type
+        self.skip_lexer = False
+        if lexer_type in ('dynamic', 'dynamic_complete'):
+            assert lexer_conf.postlex is None
+            self.skip_lexer = True
+            return
+
+        if isinstance(lexer_type, type):
+            assert issubclass(lexer_type, Lexer)
+            self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
+        elif isinstance(lexer_type, str):
+            create_lexer = {
+                'basic': create_basic_lexer,
+                'contextual': create_contextual_lexer,
+            }[lexer_type]
+            self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
+        else:
+            raise TypeError("Bad value for lexer_type: {lexer_type}")
+
+        if lexer_conf.postlex:
+            self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
+
+    def _verify_start(self, start=None):
+        if start is None:
+            start_decls = self.parser_conf.start
+            if len(start_decls) > 1:
+                raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
+            start ,= start_decls
+        elif start not in self.parser_conf.start:
+            raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
+        return start
+
+    def _make_lexer_thread(self, text: Optional[LarkInput]) -> Union[LarkInput, LexerThread, None]:
+        cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
+        if self.skip_lexer:
+            return text
+        if text is None:
+            return cls(self.lexer, None)
+        if isinstance(text, (str, bytes, TextSlice)):
+            return cls.from_text(self.lexer, text)
+        return cls.from_custom_input(self.lexer, text)
+
+    def parse(self, text: Optional[LarkInput], start=None, on_error=None):
+        if self.lexer_conf.lexer_type in ("dynamic", "dynamic_complete"):
+            if isinstance(text, TextSlice) and not text.is_complete_text():
+                raise TypeError(f"Lexer {self.lexer_conf.lexer_type} does not support text slices.")
+
+        chosen_start = self._verify_start(start)
+        kw = {} if on_error is None else {'on_error': on_error}
+        stream = self._make_lexer_thread(text)
+        return self.parser.parse(stream, chosen_start, **kw)
+
+    def parse_interactive(self, text: Optional[TextOrSlice]=None, start=None):
+        # TODO BREAK - Change text from Optional[str] to text: str = ''.
+        #   Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return []
+        chosen_start = self._verify_start(start)
+        if self.parser_conf.parser_type != 'lalr':
+            raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
+        stream = self._make_lexer_thread(text)
+        return self.parser.parse_interactive(stream, chosen_start)
+
+
+def _validate_frontend_args(parser, lexer) -> None:
+    assert_config(parser, ('lalr', 'earley', 'cyk'))
+    if not isinstance(lexer, type):     # not custom lexer?
+        expected = {
+            'lalr': ('basic', 'contextual'),
+            'earley': ('basic', 'dynamic', 'dynamic_complete'),
+            'cyk': ('basic', ),
+         }[parser]
+        assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
+
+
+def _get_lexer_callbacks(transformer, terminals):
+    result = {}
+    for terminal in terminals:
+        callback = getattr(transformer, terminal.name, None)
+        if callback is not None:
+            result[terminal.name] = callback
+    return result
+
+class PostLexConnector:
+    def __init__(self, lexer, postlexer):
+        self.lexer = lexer
+        self.postlexer = postlexer
+
+    def lex(self, lexer_state, parser_state):
+        i = self.lexer.lex(lexer_state, parser_state)
+        return self.postlexer.process(i)
+
+
+
+def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
+    cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
+    return cls(lexer_conf)
+
+def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer:
+    cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
+    parse_table: ParseTableBase[int] = parser._parse_table
+    states: Dict[int, Collection[str]] = {idx:list(t.keys()) for idx, t in parse_table.states.items()}
+    always_accept: Collection[str] = postlex.always_accept if postlex else ()
+    return cls(lexer_conf, states, always_accept=always_accept)
+
+def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser:
+    debug = options.debug if options else False
+    strict = options.strict if options else False
+    cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
+    return cls(parser_conf, debug=debug, strict=strict)
+
+_parser_creators['lalr'] = create_lalr_parser
+
+###}
+
+class EarleyRegexpMatcher:
+    def __init__(self, lexer_conf):
+        self.regexps = {}
+        for t in lexer_conf.terminals:
+            regexp = t.pattern.to_regexp()
+            try:
+                width = get_regexp_width(regexp)[0]
+            except ValueError:
+                raise GrammarError("Bad regexp in token %s: %s" % (t.name, regexp))
+            else:
+                if width == 0:
+                    raise GrammarError("Dynamic Earley doesn't allow zero-width regexps", t)
+            if lexer_conf.use_bytes:
+                regexp = regexp.encode('utf-8')
+
+            self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags)
+
+    def match(self, term, text, index=0):
+        return self.regexps[term.name].match(text, index)
+
+
+def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
+    if lexer_conf.callbacks:
+        raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.")
+
+    earley_matcher = EarleyRegexpMatcher(lexer_conf)
+    return xearley.Parser(lexer_conf, parser_conf, earley_matcher.match, **kw)
+
+def _match_earley_basic(term, token):
+    return term.name == token.type
+
+def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
+    return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw)
+
+def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser:
+    resolve_ambiguity = options.ambiguity == 'resolve'
+    debug = options.debug if options else False
+    tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
+
+    extra = {}
+    if lexer_conf.lexer_type == 'dynamic':
+        f = create_earley_parser__dynamic
+    elif lexer_conf.lexer_type == 'dynamic_complete':
+        extra['complete_lex'] = True
+        f = create_earley_parser__dynamic
+    else:
+        f = create_earley_parser__basic
+
+    return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity,
+             debug=debug, tree_class=tree_class, ordered_sets=options.ordered_sets, **extra)
+
+
+
+class CYK_FrontEnd:
+    def __init__(self, lexer_conf, parser_conf, options=None):
+        self.parser = cyk.Parser(parser_conf.rules)
+
+        self.callbacks = parser_conf.callbacks
+
+    def parse(self, lexer_thread, start):
+        tokens = list(lexer_thread.lex(None))
+        tree = self.parser.parse(tokens, start)
+        return self._transform(tree)
+
+    def _transform(self, tree):
+        subtrees = list(tree.iter_subtrees())
+        for subtree in subtrees:
+            subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children]
+
+        return self._apply_callback(tree)
+
+    def _apply_callback(self, tree):
+        return self.callbacks[tree.rule](tree.children)
+
+
+_parser_creators['earley'] = create_earley_parser
+_parser_creators['cyk'] = CYK_FrontEnd
+
+
+def _construct_parsing_frontend(
+        parser_type: _ParserArgType,
+        lexer_type: _LexerArgType,
+        lexer_conf,
+        parser_conf,
+        options
+):
+    assert isinstance(lexer_conf, LexerConf)
+    assert isinstance(parser_conf, ParserConf)
+    parser_conf.parser_type = parser_type
+    lexer_conf.lexer_type = lexer_type
+    return ParsingFrontend(lexer_conf, parser_conf, options)
@@ -0,0 +1,340 @@
+"""This module implements a CYK parser."""
+
+# Author: https://github.com/ehudt (2018)
+#
+# Adapted by Erez
+
+
+from collections import defaultdict
+import itertools
+
+from ..exceptions import ParseError
+from ..lexer import Token
+from ..tree import Tree
+from ..grammar import Terminal as T, NonTerminal as NT, Symbol
+
+def match(t, s):
+    assert isinstance(t, T)
+    return t.name == s.type
+
+
+class Rule:
+    """Context-free grammar rule."""
+
+    def __init__(self, lhs, rhs, weight, alias):
+        super(Rule, self).__init__()
+        assert isinstance(lhs, NT), lhs
+        assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs
+        self.lhs = lhs
+        self.rhs = rhs
+        self.weight = weight
+        self.alias = alias
+
+    def __str__(self):
+        return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs))
+
+    def __repr__(self):
+        return str(self)
+
+    def __hash__(self):
+        return hash((self.lhs, tuple(self.rhs)))
+
+    def __eq__(self, other):
+        return self.lhs == other.lhs and self.rhs == other.rhs
+
+    def __ne__(self, other):
+        return not (self == other)
+
+
+class Grammar:
+    """Context-free grammar."""
+
+    def __init__(self, rules):
+        self.rules = frozenset(rules)
+
+    def __eq__(self, other):
+        return self.rules == other.rules
+
+    def __str__(self):
+        return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
+
+    def __repr__(self):
+        return str(self)
+
+
+# Parse tree data structures
+class RuleNode:
+    """A node in the parse tree, which also contains the full rhs rule."""
+
+    def __init__(self, rule, children, weight=0):
+        self.rule = rule
+        self.children = children
+        self.weight = weight
+
+    def __repr__(self):
+        return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children))
+
+
+
+class Parser:
+    """Parser wrapper."""
+
+    def __init__(self, rules):
+        super(Parser, self).__init__()
+        self.orig_rules = {rule: rule for rule in rules}
+        rules = [self._to_rule(rule) for rule in rules]
+        self.grammar = to_cnf(Grammar(rules))
+
+    def _to_rule(self, lark_rule):
+        """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
+        assert isinstance(lark_rule.origin, NT)
+        assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
+        return Rule(
+            lark_rule.origin, lark_rule.expansion,
+            weight=lark_rule.options.priority if lark_rule.options.priority else 0,
+            alias=lark_rule)
+
+    def parse(self, tokenized, start):  # pylint: disable=invalid-name
+        """Parses input, which is a list of tokens."""
+        assert start
+        start = NT(start)
+
+        table, trees = _parse(tokenized, self.grammar)
+        # Check if the parse succeeded.
+        if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
+            raise ParseError('Parsing failed.')
+        parse = trees[(0, len(tokenized) - 1)][start]
+        return self._to_tree(revert_cnf(parse))
+
+    def _to_tree(self, rule_node):
+        """Converts a RuleNode parse tree to a lark Tree."""
+        orig_rule = self.orig_rules[rule_node.rule.alias]
+        children = []
+        for child in rule_node.children:
+            if isinstance(child, RuleNode):
+                children.append(self._to_tree(child))
+            else:
+                assert isinstance(child.name, Token)
+                children.append(child.name)
+        t = Tree(orig_rule.origin, children)
+        t.rule=orig_rule
+        return t
+
+
+def print_parse(node, indent=0):
+    if isinstance(node, RuleNode):
+        print(' ' * (indent * 2) + str(node.rule.lhs))
+        for child in node.children:
+            print_parse(child, indent + 1)
+    else:
+        print(' ' * (indent * 2) + str(node.s))
+
+
+def _parse(s, g):
+    """Parses sentence 's' using CNF grammar 'g'."""
+    # The CYK table. Indexed with a 2-tuple: (start pos, end pos)
+    table = defaultdict(set)
+    # Top-level structure is similar to the CYK table. Each cell is a dict from
+    # rule name to the best (lightest) tree for that rule.
+    trees = defaultdict(dict)
+    # Populate base case with existing terminal production rules
+    for i, w in enumerate(s):
+        for terminal, rules in g.terminal_rules.items():
+            if match(terminal, w):
+                for rule in rules:
+                    table[(i, i)].add(rule)
+                    if (rule.lhs not in trees[(i, i)] or
+                        rule.weight < trees[(i, i)][rule.lhs].weight):
+                        trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
+
+    # Iterate over lengths of sub-sentences
+    for l in range(2, len(s) + 1):
+        # Iterate over sub-sentences with the given length
+        for i in range(len(s) - l + 1):
+            # Choose partition of the sub-sentence in [1, l)
+            for p in range(i + 1, i + l):
+                span1 = (i, p - 1)
+                span2 = (p, i + l - 1)
+                for r1, r2 in itertools.product(table[span1], table[span2]):
+                    for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
+                        table[(i, i + l - 1)].add(rule)
+                        r1_tree = trees[span1][r1.lhs]
+                        r2_tree = trees[span2][r2.lhs]
+                        rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
+                        if (rule.lhs not in trees[(i, i + l - 1)]
+                            or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight):
+                            trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight)
+    return table, trees
+
+
+# This section implements context-free grammar converter to Chomsky normal form.
+# It also implements a conversion of parse trees from its CNF to the original
+# grammar.
+# Overview:
+# Applies the following operations in this order:
+# * TERM: Eliminates non-solitary terminals from all rules
+# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side.
+# * UNIT: Eliminates non-terminal unit rules
+#
+# The following grammar characteristics aren't featured:
+# * Start symbol appears on RHS
+# * Empty rules (epsilon rules)
+
+
+class CnfWrapper:
+    """CNF wrapper for grammar.
+
+  Validates that the input grammar is CNF and provides helper data structures.
+  """
+
+    def __init__(self, grammar):
+        super(CnfWrapper, self).__init__()
+        self.grammar = grammar
+        self.rules = grammar.rules
+        self.terminal_rules = defaultdict(list)
+        self.nonterminal_rules = defaultdict(list)
+        for r in self.rules:
+            # Validate that the grammar is CNF and populate auxiliary data structures.
+            assert isinstance(r.lhs, NT), r
+            if len(r.rhs) not in [1, 2]:
+                raise ParseError("CYK doesn't support empty rules")
+            if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
+                self.terminal_rules[r.rhs[0]].append(r)
+            elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
+                self.nonterminal_rules[tuple(r.rhs)].append(r)
+            else:
+                assert False, r
+
+    def __eq__(self, other):
+        return self.grammar == other.grammar
+
+    def __repr__(self):
+        return repr(self.grammar)
+
+
+class UnitSkipRule(Rule):
+    """A rule that records NTs that were skipped during transformation."""
+
+    def __init__(self, lhs, rhs, skipped_rules, weight, alias):
+        super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias)
+        self.skipped_rules = skipped_rules
+
+    def __eq__(self, other):
+        return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules
+
+    __hash__ = Rule.__hash__
+
+
+def build_unit_skiprule(unit_rule, target_rule):
+    skipped_rules = []
+    if isinstance(unit_rule, UnitSkipRule):
+        skipped_rules += unit_rule.skipped_rules
+    skipped_rules.append(target_rule)
+    if isinstance(target_rule, UnitSkipRule):
+        skipped_rules += target_rule.skipped_rules
+    return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules,
+                      weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)
+
+
+def get_any_nt_unit_rule(g):
+    """Returns a non-terminal unit rule from 'g', or None if there is none."""
+    for rule in g.rules:
+        if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
+            return rule
+    return None
+
+
+def _remove_unit_rule(g, rule):
+    """Removes 'rule' from 'g' without changing the language produced by 'g'."""
+    new_rules = [x for x in g.rules if x != rule]
+    refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
+    new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
+    return Grammar(new_rules)
+
+
+def _split(rule):
+    """Splits a rule whose len(rhs) > 2 into shorter rules."""
+    rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
+    rule_name = '__SP_%s' % (rule_str) + '_%d'
+    yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
+    for i in range(1, len(rule.rhs) - 2):
+        yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
+    yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
+
+
+def _term(g):
+    """Applies the TERM rule on 'g' (see top comment)."""
+    all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
+    t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
+    new_rules = []
+    for rule in g.rules:
+        if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
+            new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
+            new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
+            new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
+        else:
+            new_rules.append(rule)
+    return Grammar(new_rules)
+
+
+def _bin(g):
+    """Applies the BIN rule to 'g' (see top comment)."""
+    new_rules = []
+    for rule in g.rules:
+        if len(rule.rhs) > 2:
+            new_rules += _split(rule)
+        else:
+            new_rules.append(rule)
+    return Grammar(new_rules)
+
+
+def _unit(g):
+    """Applies the UNIT rule to 'g' (see top comment)."""
+    nt_unit_rule = get_any_nt_unit_rule(g)
+    while nt_unit_rule:
+        g = _remove_unit_rule(g, nt_unit_rule)
+        nt_unit_rule = get_any_nt_unit_rule(g)
+    return g
+
+
+def to_cnf(g):
+    """Creates a CNF grammar from a general context-free grammar 'g'."""
+    g = _unit(_bin(_term(g)))
+    return CnfWrapper(g)
+
+
+def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
+    if not skipped_rules:
+        return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
+    else:
+        weight = weight - skipped_rules[0].weight
+        return RuleNode(
+            Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
+                unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
+                                skipped_rules[1:], children,
+                                skipped_rules[0].weight, skipped_rules[0].alias)
+            ], weight=weight)
+
+
+def revert_cnf(node):
+    """Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
+    if isinstance(node, T):
+        return node
+    # Reverts TERM rule.
+    if node.rule.lhs.name.startswith('__T_'):
+        return node.children[0]
+    else:
+        children = []
+        for child in map(revert_cnf, node.children):
+            # Reverts BIN rule.
+            if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'):
+                children += child.children
+            else:
+                children.append(child)
+        # Reverts UNIT rule.
+        if isinstance(node.rule, UnitSkipRule):
+            return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
+                                    node.rule.skipped_rules, children,
+                                    node.rule.weight, node.rule.alias)
+        else:
+            return RuleNode(node.rule, children)
@@ -0,0 +1,312 @@
+"""This module implements an Earley parser.
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+    https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
+"""
+
+from typing import TYPE_CHECKING, Callable, Optional, List, Any
+from collections import deque
+
+from ..lexer import Token
+from ..tree import Tree
+from ..exceptions import UnexpectedEOF, UnexpectedToken
+from ..utils import logger, OrderedSet, dedup_list
+from .grammar_analysis import GrammarAnalyzer
+from ..grammar import NonTerminal
+from .earley_common import Item
+from .earley_forest import ForestSumVisitor, SymbolNode, StableSymbolNode, TokenNode, ForestToParseTree
+
+if TYPE_CHECKING:
+    from ..common import LexerConf, ParserConf
+
+class Parser:
+    lexer_conf: 'LexerConf'
+    parser_conf: 'ParserConf'
+    debug: bool
+
+    def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
+                 resolve_ambiguity: bool=True, debug: bool=False,
+                 tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
+        analysis = GrammarAnalyzer(parser_conf)
+        self.lexer_conf = lexer_conf
+        self.parser_conf = parser_conf
+        self.resolve_ambiguity = resolve_ambiguity
+        self.debug = debug
+        self.Tree = tree_class
+        self.Set = OrderedSet if ordered_sets else set
+        self.SymbolNode = StableSymbolNode if ordered_sets else SymbolNode
+
+        self.FIRST = analysis.FIRST
+        self.NULLABLE = analysis.NULLABLE
+        self.callbacks = parser_conf.callbacks
+        # TODO add typing info
+        self.predictions = {}   # type: ignore[var-annotated]
+
+        ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
+        #  the slow 'isupper' in is_terminal.
+        self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
+        self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
+
+        self.forest_sum_visitor = None
+        for rule in parser_conf.rules:
+            if rule.origin not in self.predictions:
+                self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]
+
+            ## Detect if any rules/terminals have priorities set. If the user specified priority = None, then
+            #  the priorities will be stripped from all rules/terminals before they reach us, allowing us to
+            #  skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
+            #  on any rules/terminals.
+            if self.forest_sum_visitor is None and rule.options.priority is not None:
+                self.forest_sum_visitor = ForestSumVisitor
+
+        # Check terminals for priorities
+        # Ignore terminal priorities if the basic lexer is used
+        if self.lexer_conf.lexer_type != 'basic' and self.forest_sum_visitor is None:
+            for term in self.lexer_conf.terminals:
+                if term.priority:
+                    self.forest_sum_visitor = ForestSumVisitor
+                    break
+
+        self.term_matcher = term_matcher
+
+
+    def predict_and_complete(self, i, to_scan, columns, transitives, node_cache):
+        """The core Earley Predictor and Completer.
+
+        At each stage of the input, we handling any completed items (things
+        that matched on the last cycle) and use those to predict what should
+        come next in the input stream. The completions and any predicted
+        non-terminals are recursively processed until we reach a set of,
+        which can be added to the scan list for the next scanner cycle."""
+        # Held Completions (H in E.Scotts paper).
+        held_completions = {}
+
+        column = columns[i]
+        # R (items) = Ei (column.items)
+        items = deque(column)
+        while items:
+            item = items.pop()    # remove an element, A say, from R
+
+            ### The Earley completer
+            if item.is_complete:   ### (item.s == string)
+                if item.node is None:
+                    label = (item.s, item.start, i)
+                    item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    item.node.add_family(item.s, item.rule, item.start, None, None)
+
+                # create_leo_transitives(item.rule.origin, item.start)
+
+                ###R Joop Leo right recursion Completer
+                if item.rule.origin in transitives[item.start]:
+                    transitive = transitives[item.start][item.s]
+                    if transitive.previous in transitives[transitive.column]:
+                        root_transitive = transitives[transitive.column][transitive.previous]
+                    else:
+                        root_transitive = transitive
+
+                    new_item = Item(transitive.rule, transitive.ptr, transitive.start)
+                    label = (root_transitive.s, root_transitive.start, i)
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_path(root_transitive, item.node)
+                    if new_item.expect in self.TERMINALS:
+                        # Add (B :: aC.B, h, y) to Q
+                        to_scan.add(new_item)
+                    elif new_item not in column:
+                        # Add (B :: aC.B, h, y) to Ei and R
+                        column.add(new_item)
+                        items.append(new_item)
+                ###R Regular Earley completer
+                else:
+                    # Empty has 0 length. If we complete an empty symbol in a particular
+                    # parse step, we need to be able to use that same empty symbol to complete
+                    # any predictions that result, that themselves require empty. Avoids
+                    # infinite recursion on empty symbols.
+                    # held_completions is 'H' in E.Scott's paper.
+                    is_empty_item = item.start == i
+                    if is_empty_item:
+                        held_completions[item.rule.origin] = item.node
+
+                    originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
+                    for originator in originators:
+                        new_item = originator.advance()
+                        label = (new_item.s, originator.start, i)
+                        new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                        new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
+                        if new_item.expect in self.TERMINALS:
+                            # Add (B :: aC.B, h, y) to Q
+                            to_scan.add(new_item)
+                        elif new_item not in column:
+                            # Add (B :: aC.B, h, y) to Ei and R
+                            column.add(new_item)
+                            items.append(new_item)
+
+            ### The Earley predictor
+            elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
+                new_items = []
+                for rule in self.predictions[item.expect]:
+                    new_item = Item(rule, 0, i)
+                    new_items.append(new_item)
+
+                # Process any held completions (H).
+                if item.expect in held_completions:
+                    new_item = item.advance()
+                    label = (new_item.s, item.start, i)
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
+                    new_items.append(new_item)
+
+                for new_item in new_items:
+                    if new_item.expect in self.TERMINALS:
+                        to_scan.add(new_item)
+                    elif new_item not in column:
+                        column.add(new_item)
+                        items.append(new_item)
+
+    def _parse(self, lexer, columns, to_scan, start_symbol=None):
+
+        def is_quasi_complete(item):
+            if item.is_complete:
+                return True
+
+            quasi = item.advance()
+            while not quasi.is_complete:
+                if quasi.expect not in self.NULLABLE:
+                    return False
+                if quasi.rule.origin == start_symbol and quasi.expect == start_symbol:
+                    return False
+                quasi = quasi.advance()
+            return True
+
+        # def create_leo_transitives(origin, start):
+        #   ...   # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
+
+        def scan(i, token, to_scan):
+            """The core Earley Scanner.
+
+            This is a custom implementation of the scanner that uses the
+            Lark lexer to match tokens. The scan list is built by the
+            Earley predictor, based on the previously completed tokens.
+            This ensures that at each phase of the parse we have a custom
+            lexer context, allowing for more complex ambiguities."""
+            next_to_scan = self.Set()
+            next_set = self.Set()
+            columns.append(next_set)
+            transitives.append({})
+            node_cache = {}
+
+            for item in self.Set(to_scan):
+                if match(item.expect, token):
+                    new_item = item.advance()
+                    label = (new_item.s, new_item.start, i + 1)
+                    # 'terminals' may not contain token.type when using %declare
+                    # Additionally, token is not always a Token
+                    # For example, it can be a Tree when using TreeMatcher
+                    term = terminals.get(token.type) if isinstance(token, Token) else None
+                    # Set the priority of the token node to 0 so that the
+                    # terminal priorities do not affect the Tree chosen by
+                    # ForestSumVisitor after the basic lexer has already
+                    # "used up" the terminal priorities
+                    token_node = TokenNode(token, term, priority=0)
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
+
+                    if new_item.expect in self.TERMINALS:
+                        # add (B ::= Aai+1.B, h, y) to Q'
+                        next_to_scan.add(new_item)
+                    else:
+                        # add (B ::= Aa+1.B, h, y) to Ei+1
+                        next_set.add(new_item)
+
+            if not next_set and not next_to_scan:
+                expect = {i.expect.name for i in to_scan}
+                raise UnexpectedToken(token, expect, considered_rules=set(to_scan), state=frozenset(i.s for i in to_scan))
+
+            return next_to_scan, node_cache
+
+
+        # Define parser functions
+        match = self.term_matcher
+
+        terminals = self.lexer_conf.terminals_by_name
+
+        # Cache for nodes & tokens created in a particular parse step.
+        transitives = [{}]
+
+        ## The main Earley loop.
+        # Run the Prediction/Completion cycle for any Items in the current Earley set.
+        # Completions will be added to the SPPF tree, and predictions will be recursively
+        # processed down to terminals/empty nodes to be added to the scanner for the next
+        # step.
+        expects = {i.expect for i in to_scan}
+        i = 0
+        node_cache = {}
+        for token in lexer.lex(expects):
+            self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+            to_scan, node_cache = scan(i, token, to_scan)
+            i += 1
+
+            expects.clear()
+            expects |= {i.expect for i in to_scan}
+
+        self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+        ## Column is now the final column in the parse.
+        assert i == len(columns)-1
+        return to_scan
+
+    def parse(self, lexer, start):
+        assert start, start
+        start_symbol = NonTerminal(start)
+
+        columns = [self.Set()]
+        to_scan = self.Set()     # The scan buffer. 'Q' in E.Scott's paper.
+
+        ## Predict for the start_symbol.
+        # Add predicted items to the first Earley set (for the predictor) if they
+        # result in a non-terminal, or the scanner if they result in a terminal.
+        for rule in self.predictions[start_symbol]:
+            item = Item(rule, 0, 0)
+            if item.expect in self.TERMINALS:
+                to_scan.add(item)
+            else:
+                columns[0].add(item)
+
+        to_scan = self._parse(lexer, columns, to_scan, start_symbol)
+
+        # If the parse was successful, the start
+        # symbol should have been completed in the last step of the Earley cycle, and will be in
+        # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
+        solutions = dedup_list(n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0)
+        if not solutions:
+            expected_terminals = [t.expect.name for t in to_scan]
+            raise UnexpectedEOF(expected_terminals, state=frozenset(i.s for i in to_scan))
+        if len(solutions) > 1:
+            raise RuntimeError('Earley should not generate multiple start symbol items! Please report this bug.')
+        solution ,= solutions
+
+        if self.debug:
+            from .earley_forest import ForestToPyDotVisitor
+            try:
+                debug_walker = ForestToPyDotVisitor()
+            except ImportError:
+                logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
+            else:
+                debug_walker.visit(solution, "sppf.png")
+
+
+        if self.Tree is not None:
+            # Perform our SPPF -> AST conversion
+            # Disable the ForestToParseTree cache when ambiguity='resolve'
+            # to prevent a tree construction bug. See issue #1283
+            use_cache = not self.resolve_ambiguity
+            transformer = ForestToParseTree(self.Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity, use_cache)
+            return transformer.transform(solution)
+
+        # return the root of the SPPF
+        return solution
@@ -0,0 +1,42 @@
+"""This module implements useful building blocks for the Earley parser
+"""
+
+
+class Item:
+    "An Earley Item, the atom of the algorithm."
+
+    __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
+    def __init__(self, rule, ptr, start):
+        self.is_complete = len(rule.expansion) == ptr
+        self.rule = rule    # rule
+        self.ptr = ptr      # ptr
+        self.start = start  # j
+        self.node = None    # w
+        if self.is_complete:
+            self.s = rule.origin
+            self.expect = None
+            self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+        else:
+            self.s = (rule, ptr)
+            self.expect = rule.expansion[ptr]
+            self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
+        self._hash = hash((self.s, self.start, self.rule))
+
+    def advance(self):
+        return Item(self.rule, self.ptr + 1, self.start)
+
+    def __eq__(self, other):
+        return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule)
+
+    def __hash__(self):
+        return self._hash
+
+    def __repr__(self):
+        before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
+        after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
+        symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
+        return '%s (%d)' % (symbol, self.start)
+
+
+# class TransitiveItem(Item):
+#   ...   # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420
@@ -0,0 +1,802 @@
+""""This module implements an SPPF implementation
+
+This is used as the primary output mechanism for the Earley parser
+in order to store complex ambiguities.
+
+Full reference and more details is here:
+https://web.archive.org/web/20190616123959/http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+"""
+
+from typing import Type, AbstractSet
+from random import randint
+from collections import deque
+from operator import attrgetter
+from importlib import import_module
+from functools import partial
+
+from ..parse_tree_builder import AmbiguousIntermediateExpander
+from ..visitors import Discard
+from ..utils import logger, OrderedSet
+from ..tree import Tree
+
+class ForestNode:
+    pass
+
+class SymbolNode(ForestNode):
+    """
+    A Symbol Node represents a symbol (or Intermediate LR0).
+
+    Symbol nodes are keyed by the symbol (s). For intermediate nodes
+    s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
+    nodes, s will be a string representing the non-terminal origin (i.e.
+    the left hand side of the rule).
+
+    The children of a Symbol or Intermediate Node will always be Packed Nodes;
+    with each Packed Node child representing a single derivation of a production.
+
+    Hence a Symbol Node with a single child is unambiguous.
+
+    Parameters:
+        s: A Symbol, or a tuple of (rule, ptr) for an intermediate node.
+        start: For dynamic lexers, the index of the start of the substring matched by this symbol (inclusive).
+        end: For dynamic lexers, the index of the end of the substring matched by this symbol (exclusive).
+
+    Properties:
+        is_intermediate: True if this node is an intermediate node.
+        priority: The priority of the node's symbol.
+    """
+    Set: Type[AbstractSet] = set   # Overridden by StableSymbolNode
+    __slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate')
+    def __init__(self, s, start, end):
+        self.s = s
+        self.start = start
+        self.end = end
+        self._children = self.Set()
+        self.paths = self.Set()
+        self.paths_loaded = False
+
+        ### We use inf here as it can be safely negated without resorting to conditionals,
+        #   unlike None or float('NaN'), and sorts appropriately.
+        self.priority = float('-inf')
+        self.is_intermediate = isinstance(s, tuple)
+
+    def add_family(self, lr0, rule, start, left, right):
+        self._children.add(PackedNode(self, lr0, rule, start, left, right))
+
+    def add_path(self, transitive, node):
+        self.paths.add((transitive, node))
+
+    def load_paths(self):
+        for transitive, node in self.paths:
+            if transitive.next_titem is not None:
+                vn = type(self)(transitive.next_titem.s, transitive.next_titem.start, self.end)
+                vn.add_path(transitive.next_titem, node)
+                self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
+            else:
+                self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
+        self.paths_loaded = True
+
+    @property
+    def is_ambiguous(self):
+        """Returns True if this node is ambiguous."""
+        return len(self.children) > 1
+
+    @property
+    def children(self):
+        """Returns a list of this node's children sorted from greatest to
+        least priority."""
+        if not self.paths_loaded:
+            self.load_paths()
+        return sorted(self._children, key=attrgetter('sort_key'))
+
+    def __iter__(self):
+        return iter(self._children)
+
+    def __repr__(self):
+        if self.is_intermediate:
+            rule = self.s[0]
+            ptr = self.s[1]
+            before = ( expansion.name for expansion in rule.expansion[:ptr] )
+            after = ( expansion.name for expansion in rule.expansion[ptr:] )
+            symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+        else:
+            symbol = self.s.name
+        return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)
+
+class StableSymbolNode(SymbolNode):
+    "A version of SymbolNode that uses OrderedSet for output stability"
+    Set = OrderedSet
+
+class PackedNode(ForestNode):
+    """
+    A Packed Node represents a single derivation in a symbol node.
+
+    Parameters:
+        rule: The rule associated with this node.
+        parent: The parent of this node.
+        left: The left child of this node. ``None`` if one does not exist.
+        right: The right child of this node. ``None`` if one does not exist.
+        priority: The priority of this node.
+    """
+    __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
+    def __init__(self, parent, s, rule, start, left, right):
+        self.parent = parent
+        self.s = s
+        self.start = start
+        self.rule = rule
+        self.left = left
+        self.right = right
+        self.priority = float('-inf')
+        self._hash = hash((self.left, self.right))
+
+    @property
+    def is_empty(self):
+        return self.left is None and self.right is None
+
+    @property
+    def sort_key(self):
+        """
+        Used to sort PackedNode children of SymbolNodes.
+        A SymbolNode has multiple PackedNodes if it matched
+        ambiguously. Hence, we use the sort order to identify
+        the order in which ambiguous children should be considered.
+        """
+        return self.is_empty, -self.priority, self.rule.order
+
+    @property
+    def children(self):
+        """Returns a list of this node's children."""
+        return [x for x in [self.left, self.right] if x is not None]
+
+    def __iter__(self):
+        yield self.left
+        yield self.right
+
+    def __eq__(self, other):
+        if not isinstance(other, PackedNode):
+            return False
+        return self is other or (self.left == other.left and self.right == other.right)
+
+    def __hash__(self):
+        return self._hash
+
+    def __repr__(self):
+        if isinstance(self.s, tuple):
+            rule = self.s[0]
+            ptr = self.s[1]
+            before = ( expansion.name for expansion in rule.expansion[:ptr] )
+            after = ( expansion.name for expansion in rule.expansion[ptr:] )
+            symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
+        else:
+            symbol = self.s.name
+        return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)
+
+class TokenNode(ForestNode):
+    """
+    A Token Node represents a matched terminal and is always a leaf node.
+
+    Parameters:
+        token: The Token associated with this node.
+        term: The TerminalDef matched by the token.
+        priority: The priority of this node.
+    """
+    __slots__ = ('token', 'term', 'priority', '_hash')
+    def __init__(self, token, term, priority=None):
+        self.token = token
+        self.term = term
+        if priority is not None:
+            self.priority = priority
+        else:
+            self.priority = term.priority if term is not None else 0
+        self._hash = hash(token)
+
+    def __eq__(self, other):
+        if not isinstance(other, TokenNode):
+            return False
+        return self is other or (self.token == other.token)
+
+    def __hash__(self):
+        return self._hash
+
+    def __repr__(self):
+        return repr(self.token)
+
+class ForestVisitor:
+    """
+    An abstract base class for building forest visitors.
+
+    This class performs a controllable depth-first walk of an SPPF.
+    The visitor will not enter cycles and will backtrack if one is encountered.
+    Subclasses are notified of cycles through the ``on_cycle`` method.
+
+    Behavior for visit events is defined by overriding the
+    ``visit*node*`` functions.
+
+    The walk is controlled by the return values of the ``visit*node_in``
+    methods. Returning a node(s) will schedule them to be visited. The visitor
+    will begin to backtrack if no nodes are returned.
+
+    Parameters:
+        single_visit: If ``True``, non-Token nodes will only be visited once.
+    """
+
+    def __init__(self, single_visit=False):
+        self.single_visit = single_visit
+
+    def visit_token_node(self, node):
+        """Called when a ``Token`` is visited. ``Token`` nodes are always leaves."""
+        pass
+
+    def visit_symbol_node_in(self, node):
+        """Called when a symbol node is visited. Nodes that are returned
+        will be scheduled to be visited. If ``visit_intermediate_node_in``
+        is not implemented, this function will be called for intermediate
+        nodes as well."""
+        pass
+
+    def visit_symbol_node_out(self, node):
+        """Called after all nodes returned from a corresponding ``visit_symbol_node_in``
+        call have been visited. If ``visit_intermediate_node_out``
+        is not implemented, this function will be called for intermediate
+        nodes as well."""
+        pass
+
+    def visit_packed_node_in(self, node):
+        """Called when a packed node is visited. Nodes that are returned
+        will be scheduled to be visited. """
+        pass
+
+    def visit_packed_node_out(self, node):
+        """Called after all nodes returned from a corresponding ``visit_packed_node_in``
+        call have been visited."""
+        pass
+
+    def on_cycle(self, node, path):
+        """Called when a cycle is encountered.
+
+        Parameters:
+            node: The node that causes a cycle.
+            path: The list of nodes being visited: nodes that have been
+                entered but not exited. The first element is the root in a forest
+                visit, and the last element is the node visited most recently.
+                ``path`` should be treated as read-only.
+        """
+        pass
+
+    def get_cycle_in_path(self, node, path):
+        """A utility function for use in ``on_cycle`` to obtain a slice of
+        ``path`` that only contains the nodes that make up the cycle."""
+        index = len(path) - 1
+        while id(path[index]) != id(node):
+            index -= 1
+        return path[index:]
+
+    def visit(self, root):
+        # Visiting is a list of IDs of all symbol/intermediate nodes currently in
+        # the stack. It serves two purposes: to detect when we 'recurse' in and out
+        # of a symbol/intermediate so that we can process both up and down. Also,
+        # since the SPPF can have cycles it allows us to detect if we're trying
+        # to recurse into a node that's already on the stack (infinite recursion).
+        visiting = set()
+
+        # set of all nodes that have been visited
+        visited = set()
+
+        # a list of nodes that are currently being visited
+        # used for the `on_cycle` callback
+        path = []
+
+        # We do not use recursion here to walk the Forest due to the limited
+        # stack size in python. Therefore input_stack is essentially our stack.
+        input_stack = deque([root])
+
+        # It is much faster to cache these as locals since they are called
+        # many times in large parses.
+        vpno = getattr(self, 'visit_packed_node_out')
+        vpni = getattr(self, 'visit_packed_node_in')
+        vsno = getattr(self, 'visit_symbol_node_out')
+        vsni = getattr(self, 'visit_symbol_node_in')
+        vino = getattr(self, 'visit_intermediate_node_out', vsno)
+        vini = getattr(self, 'visit_intermediate_node_in', vsni)
+        vtn = getattr(self, 'visit_token_node')
+        oc = getattr(self, 'on_cycle')
+
+        while input_stack:
+            current = next(reversed(input_stack))
+            try:
+                next_node = next(current)
+            except StopIteration:
+                input_stack.pop()
+                continue
+            except TypeError:
+                ### If the current object is not an iterator, pass through to Token/SymbolNode
+                pass
+            else:
+                if next_node is None:
+                    continue
+
+                if id(next_node) in visiting:
+                    oc(next_node, path)
+                    continue
+
+                input_stack.append(next_node)
+                continue
+
+            if isinstance(current, TokenNode):
+                vtn(current.token)
+                input_stack.pop()
+                continue
+
+            current_id = id(current)
+            if current_id in visiting:
+                if isinstance(current, PackedNode):
+                    vpno(current)
+                elif current.is_intermediate:
+                    vino(current)
+                else:
+                    vsno(current)
+                input_stack.pop()
+                path.pop()
+                visiting.remove(current_id)
+                visited.add(current_id)
+            elif self.single_visit and current_id in visited:
+                input_stack.pop()
+            else:
+                visiting.add(current_id)
+                path.append(current)
+                if isinstance(current, PackedNode):
+                    next_node = vpni(current)
+                elif current.is_intermediate:
+                    next_node = vini(current)
+                else:
+                    next_node = vsni(current)
+                if next_node is None:
+                    continue
+
+                if not isinstance(next_node, ForestNode):
+                    next_node = iter(next_node)
+                elif id(next_node) in visiting:
+                    oc(next_node, path)
+                    continue
+
+                input_stack.append(next_node)
+
+class ForestTransformer(ForestVisitor):
+    """The base class for a bottom-up forest transformation. Most users will
+    want to use ``TreeForestTransformer`` instead as it has a friendlier
+    interface and covers most use cases.
+
+    Transformations are applied via inheritance and overriding of the
+    ``transform*node`` methods.
+
+    ``transform_token_node`` receives a ``Token`` as an argument.
+    All other methods receive the node that is being transformed and
+    a list of the results of the transformations of that node's children.
+    The return value of these methods are the resulting transformations.
+
+    If ``Discard`` is raised in a node's transformation, no data from that node
+    will be passed to its parent's transformation.
+    """
+
+    def __init__(self):
+        super(ForestTransformer, self).__init__()
+        # results of transformations
+        self.data = dict()
+        # used to track parent nodes
+        self.node_stack = deque()
+
+    def transform(self, root):
+        """Perform a transformation on an SPPF."""
+        self.node_stack.append('result')
+        self.data['result'] = []
+        self.visit(root)
+        assert len(self.data['result']) <= 1
+        if self.data['result']:
+            return self.data['result'][0]
+
+    def transform_symbol_node(self, node, data):
+        """Transform a symbol node."""
+        return node
+
+    def transform_intermediate_node(self, node, data):
+        """Transform an intermediate node."""
+        return node
+
+    def transform_packed_node(self, node, data):
+        """Transform a packed node."""
+        return node
+
+    def transform_token_node(self, node):
+        """Transform a ``Token``."""
+        return node
+
+    def visit_symbol_node_in(self, node):
+        self.node_stack.append(id(node))
+        self.data[id(node)] = []
+        return node.children
+
+    def visit_packed_node_in(self, node):
+        self.node_stack.append(id(node))
+        self.data[id(node)] = []
+        return node.children
+
+    def visit_token_node(self, node):
+        transformed = self.transform_token_node(node)
+        if transformed is not Discard:
+            self.data[self.node_stack[-1]].append(transformed)
+
+    def _visit_node_out_helper(self, node, method):
+        self.node_stack.pop()
+        transformed = method(node, self.data[id(node)])
+        if transformed is not Discard:
+            self.data[self.node_stack[-1]].append(transformed)
+        del self.data[id(node)]
+
+    def visit_symbol_node_out(self, node):
+        self._visit_node_out_helper(node, self.transform_symbol_node)
+
+    def visit_intermediate_node_out(self, node):
+        self._visit_node_out_helper(node, self.transform_intermediate_node)
+
+    def visit_packed_node_out(self, node):
+        self._visit_node_out_helper(node, self.transform_packed_node)
+
+
+class ForestSumVisitor(ForestVisitor):
+    """
+    A visitor for prioritizing ambiguous parts of the Forest.
+
+    This visitor is used when support for explicit priorities on
+    rules is requested (whether normal, or invert). It walks the
+    forest (or subsets thereof) and cascades properties upwards
+    from the leaves.
+
+    It would be ideal to do this during parsing, however this would
+    require processing each Earley item multiple times. That's
+    a big performance drawback; so running a forest walk is the
+    lesser of two evils: there can be significantly more Earley
+    items created during parsing than there are SPPF nodes in the
+    final tree.
+    """
+    def __init__(self):
+        super(ForestSumVisitor, self).__init__(single_visit=True)
+
+    def visit_packed_node_in(self, node):
+        yield node.left
+        yield node.right
+
+    def visit_symbol_node_in(self, node):
+        return iter(node.children)
+
+    def visit_packed_node_out(self, node):
+        priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
+        priority += getattr(node.right, 'priority', 0)
+        priority += getattr(node.left, 'priority', 0)
+        node.priority = priority
+
+    def visit_symbol_node_out(self, node):
+        node.priority = max(child.priority for child in node.children)
+
+class PackedData():
+    """Used in transformationss of packed nodes to distinguish the data
+    that comes from the left child and the right child.
+    """
+
+    class _NoData():
+        pass
+
+    NO_DATA = _NoData()
+
+    def __init__(self, node, data):
+        self.left = self.NO_DATA
+        self.right = self.NO_DATA
+        if data:
+            if node.left is not None:
+                self.left = data[0]
+                if len(data) > 1:
+                    self.right = data[1]
+            else:
+                self.right = data[0]
+
+class ForestToParseTree(ForestTransformer):
+    """Used by the earley parser when ambiguity equals 'resolve' or
+    'explicit'. Transforms an SPPF into an (ambiguous) parse tree.
+
+    Parameters:
+        tree_class: The tree class to use for construction
+        callbacks: A dictionary of rules to functions that output a tree
+        prioritizer: A ``ForestVisitor`` that manipulates the priorities of ForestNodes
+        resolve_ambiguity: If True, ambiguities will be resolved based on
+                        priorities. Otherwise, `_ambig` nodes will be in the resulting tree.
+        use_cache: If True, the results of packed node transformations will be cached.
+    """
+
+    def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=True):
+        super(ForestToParseTree, self).__init__()
+        self.tree_class = tree_class
+        self.callbacks = callbacks
+        self.prioritizer = prioritizer
+        self.resolve_ambiguity = resolve_ambiguity
+        self._use_cache = use_cache
+        self._cache = {}
+        self._on_cycle_retreat = False
+        self._cycle_node = None
+        self._successful_visits = set()
+
+    def visit(self, root):
+        if self.prioritizer:
+            self.prioritizer.visit(root)
+        super(ForestToParseTree, self).visit(root)
+        self._cache = {}
+
+    def on_cycle(self, node, path):
+        logger.debug("Cycle encountered in the SPPF at node: %s. "
+                "As infinite ambiguities cannot be represented in a tree, "
+                "this family of derivations will be discarded.", node)
+        self._cycle_node = node
+        self._on_cycle_retreat = True
+
+    def _check_cycle(self, node):
+        if self._on_cycle_retreat:
+            if id(node) == id(self._cycle_node) or id(node) in self._successful_visits:
+                self._cycle_node = None
+                self._on_cycle_retreat = False
+            else:
+                return Discard
+
+    def _collapse_ambig(self, children):
+        new_children = []
+        for child in children:
+            if hasattr(child, 'data') and child.data == '_ambig':
+                new_children += child.children
+            else:
+                new_children.append(child)
+        return new_children
+
+    def _call_rule_func(self, node, data):
+        # called when transforming children of symbol nodes
+        # data is a list of trees or tokens that correspond to the
+        # symbol's rule expansion
+        return self.callbacks[node.rule](data)
+
+    def _call_ambig_func(self, node, data):
+        # called when transforming a symbol node
+        # data is a list of trees where each tree's data is
+        # equal to the name of the symbol or one of its aliases.
+        if len(data) > 1:
+            return self.tree_class('_ambig', data)
+        elif data:
+            return data[0]
+        return Discard
+
+    def transform_symbol_node(self, node, data):
+        if id(node) not in self._successful_visits:
+            return Discard
+        r = self._check_cycle(node)
+        if r is Discard:
+            return r
+        self._successful_visits.remove(id(node))
+        data = self._collapse_ambig(data)
+        return self._call_ambig_func(node, data)
+
+    def transform_intermediate_node(self, node, data):
+        if id(node) not in self._successful_visits:
+            return Discard
+        r = self._check_cycle(node)
+        if r is Discard:
+            return r
+        self._successful_visits.remove(id(node))
+        if len(data) > 1:
+            children = [self.tree_class('_inter', c) for c in data]
+            return self.tree_class('_iambig', children)
+        return data[0]
+
+    def transform_packed_node(self, node, data):
+        r = self._check_cycle(node)
+        if r is Discard:
+            return r
+        if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
+            return Discard
+        if self._use_cache and id(node) in self._cache:
+            return self._cache[id(node)]
+        children = []
+        assert len(data) <= 2
+        data = PackedData(node, data)
+        if data.left is not PackedData.NO_DATA:
+            if node.left.is_intermediate and isinstance(data.left, list):
+                children += data.left
+            else:
+                children.append(data.left)
+        if data.right is not PackedData.NO_DATA:
+            children.append(data.right)
+        transformed = children if node.parent.is_intermediate else self._call_rule_func(node, children)
+        if self._use_cache:
+            self._cache[id(node)] = transformed
+        return transformed
+
+    def visit_symbol_node_in(self, node):
+        super(ForestToParseTree, self).visit_symbol_node_in(node)
+        if self._on_cycle_retreat:
+            return
+        return node.children
+
+    def visit_packed_node_in(self, node):
+        self._on_cycle_retreat = False
+        to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
+        if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
+            if not self._use_cache or id(node) not in self._cache:
+                return to_visit
+
+    def visit_packed_node_out(self, node):
+        super(ForestToParseTree, self).visit_packed_node_out(node)
+        if not self._on_cycle_retreat:
+            self._successful_visits.add(id(node.parent))
+
+def handles_ambiguity(func):
+    """Decorator for methods of subclasses of ``TreeForestTransformer``.
+    Denotes that the method should receive a list of transformed derivations."""
+    func.handles_ambiguity = True
+    return func
+
+class TreeForestTransformer(ForestToParseTree):
+    """A ``ForestTransformer`` with a tree ``Transformer``-like interface.
+    By default, it will construct a tree.
+
+    Methods provided via inheritance are called based on the rule/symbol
+    names of nodes in the forest.
+
+    Methods that act on rules will receive a list of the results of the
+    transformations of the rule's children. By default, trees and tokens.
+
+    Methods that act on tokens will receive a token.
+
+    Alternatively, methods that act on rules may be annotated with
+    ``handles_ambiguity``. In this case, the function will receive a list
+    of all the transformations of all the derivations of the rule.
+    By default, a list of trees where each tree.data is equal to the
+    rule name or one of its aliases.
+
+    Non-tree transformations are made possible by override of
+    ``__default__``, ``__default_token__``, and ``__default_ambig__``.
+
+    Note:
+        Tree shaping features such as inlined rules and token filtering are
+        not built into the transformation. Positions are also not propagated.
+
+    Parameters:
+        tree_class: The tree class to use for construction
+        prioritizer: A ``ForestVisitor`` that manipulates the priorities of nodes in the SPPF.
+        resolve_ambiguity: If True, ambiguities will be resolved based on priorities.
+        use_cache (bool): If True, caches the results of some transformations,
+                          potentially improving performance when ``resolve_ambiguity==False``.
+                          Only use if you know what you are doing: i.e. All transformation
+                          functions are pure and referentially transparent.
+    """
+
+    def __init__(self, tree_class=Tree, prioritizer=ForestSumVisitor(), resolve_ambiguity=True, use_cache=False):
+        super(TreeForestTransformer, self).__init__(tree_class, dict(), prioritizer, resolve_ambiguity, use_cache)
+
+    def __default__(self, name, data):
+        """Default operation on tree (for override).
+
+        Returns a tree with name with data as children.
+        """
+        return self.tree_class(name, data)
+
+    def __default_ambig__(self, name, data):
+        """Default operation on ambiguous rule (for override).
+
+        Wraps data in an '_ambig_' node if it contains more than
+        one element.
+        """
+        if len(data) > 1:
+            return self.tree_class('_ambig', data)
+        elif data:
+            return data[0]
+        return Discard
+
+    def __default_token__(self, node):
+        """Default operation on ``Token`` (for override).
+
+        Returns ``node``.
+        """
+        return node
+
+    def transform_token_node(self, node):
+        return getattr(self, node.type, self.__default_token__)(node)
+
+    def _call_rule_func(self, node, data):
+        name = node.rule.alias or node.rule.options.template_source or node.rule.origin.name
+        user_func = getattr(self, name, self.__default__)
+        if user_func == self.__default__ or hasattr(user_func, 'handles_ambiguity'):
+            user_func = partial(self.__default__, name)
+        if not self.resolve_ambiguity:
+            wrapper = partial(AmbiguousIntermediateExpander, self.tree_class)
+            user_func = wrapper(user_func)
+        return user_func(data)
+
+    def _call_ambig_func(self, node, data):
+        name = node.s.name
+        user_func = getattr(self, name, self.__default_ambig__)
+        if user_func == self.__default_ambig__ or not hasattr(user_func, 'handles_ambiguity'):
+            user_func = partial(self.__default_ambig__, name)
+        return user_func(data)
+
+class ForestToPyDotVisitor(ForestVisitor):
+    """
+    A Forest visitor which writes the SPPF to a PNG.
+
+    The SPPF can get really large, really quickly because
+    of the amount of meta-data it stores, so this is probably
+    only useful for trivial trees and learning how the SPPF
+    is structured.
+    """
+    def __init__(self, rankdir="TB"):
+        super(ForestToPyDotVisitor, self).__init__(single_visit=True)
+        self.pydot = import_module('pydot')
+        self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)
+
+    def visit(self, root, filename):
+        super(ForestToPyDotVisitor, self).visit(root)
+        try:
+            self.graph.write_png(filename)
+        except FileNotFoundError as e:
+            logger.error("Could not write png: ", e)
+
+    def visit_token_node(self, node):
+        graph_node_id = str(id(node))
+        graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
+        graph_node_color = 0x808080
+        graph_node_style = "\"filled,rounded\""
+        graph_node_shape = "diamond"
+        graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+        self.graph.add_node(graph_node)
+
+    def visit_packed_node_in(self, node):
+        graph_node_id = str(id(node))
+        graph_node_label = repr(node)
+        graph_node_color = 0x808080
+        graph_node_style = "filled"
+        graph_node_shape = "diamond"
+        graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+        self.graph.add_node(graph_node)
+        yield node.left
+        yield node.right
+
+    def visit_packed_node_out(self, node):
+        graph_node_id = str(id(node))
+        graph_node = self.graph.get_node(graph_node_id)[0]
+        for child in [node.left, node.right]:
+            if child is not None:
+                child_graph_node_id = str(id(child.token if isinstance(child, TokenNode) else child))
+                child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+                self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
+            else:
+                #### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
+                child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
+                child_graph_node_style = "invis"
+                child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
+                child_edge_style = "invis"
+                self.graph.add_node(child_graph_node)
+                self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))
+
+    def visit_symbol_node_in(self, node):
+        graph_node_id = str(id(node))
+        graph_node_label = repr(node)
+        graph_node_color = 0x808080
+        graph_node_style = "\"filled\""
+        if node.is_intermediate:
+            graph_node_shape = "ellipse"
+        else:
+            graph_node_shape = "rectangle"
+        graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
+        self.graph.add_node(graph_node)
+        return iter(node.children)
+
+    def visit_symbol_node_out(self, node):
+        graph_node_id = str(id(node))
+        graph_node = self.graph.get_node(graph_node_id)[0]
+        for child in node.children:
+            child_graph_node_id = str(id(child))
+            child_graph_node = self.graph.get_node(child_graph_node_id)[0]
+            self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
@@ -0,0 +1,203 @@
+"Provides for superficial grammar analysis."
+
+from collections import Counter, defaultdict
+from typing import List, Dict, Iterator, FrozenSet, Set
+
+from ..utils import bfs, fzset, classify, OrderedSet
+from ..exceptions import GrammarError
+from ..grammar import Rule, Terminal, NonTerminal, Symbol
+from ..common import ParserConf
+
+
+class RulePtr:
+    __slots__ = ('rule', 'index')
+    rule: Rule
+    index: int
+
+    def __init__(self, rule: Rule, index: int):
+        assert isinstance(rule, Rule)
+        assert index <= len(rule.expansion)
+        self.rule = rule
+        self.index = index
+
+    def __repr__(self):
+        before = [x.name for x in self.rule.expansion[:self.index]]
+        after = [x.name for x in self.rule.expansion[self.index:]]
+        return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
+
+    @property
+    def next(self) -> Symbol:
+        return self.rule.expansion[self.index]
+
+    def advance(self, sym: Symbol) -> 'RulePtr':
+        assert self.next == sym
+        return RulePtr(self.rule, self.index+1)
+
+    @property
+    def is_satisfied(self) -> bool:
+        return self.index == len(self.rule.expansion)
+
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, RulePtr):
+            return NotImplemented
+        return self.rule == other.rule and self.index == other.index
+
+    def __hash__(self) -> int:
+        return hash((self.rule, self.index))
+
+
+State = FrozenSet[RulePtr]
+
+# state generation ensures no duplicate LR0ItemSets
+class LR0ItemSet:
+    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
+
+    kernel: State
+    closure: State
+    transitions: Dict[Symbol, 'LR0ItemSet']
+    lookaheads: Dict[Symbol, Set[Rule]]
+
+    def __init__(self, kernel, closure):
+        self.kernel = fzset(kernel)
+        self.closure = fzset(closure)
+        self.transitions = {}
+        self.lookaheads = defaultdict(set)
+
+    def __repr__(self):
+        return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
+
+
+def update_set(set1, set2):
+    if not set2 or set1 > set2:
+        return False
+
+    copy = set(set1)
+    set1 |= set2
+    return set1 != copy
+
+def calculate_sets(rules):
+    """Calculate FOLLOW sets.
+
+    Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
+    symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
+
+    # foreach grammar rule X ::= Y(1) ... Y(k)
+    # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
+    #   NULLABLE = NULLABLE union {X}
+    # for i = 1 to k
+    #   if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
+    #     FIRST(X) = FIRST(X) union FIRST(Y(i))
+    #   for j = i+1 to k
+    #     if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
+    #       FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
+    #     if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
+    #       FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
+    # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
+
+    NULLABLE = set()
+    FIRST = {}
+    FOLLOW = {}
+    for sym in symbols:
+        FIRST[sym]={sym} if sym.is_term else set()
+        FOLLOW[sym]=set()
+
+    # Calculate NULLABLE and FIRST
+    changed = True
+    while changed:
+        changed = False
+
+        for rule in rules:
+            if set(rule.expansion) <= NULLABLE:
+                if update_set(NULLABLE, {rule.origin}):
+                    changed = True
+
+            for i, sym in enumerate(rule.expansion):
+                if set(rule.expansion[:i]) <= NULLABLE:
+                    if update_set(FIRST[rule.origin], FIRST[sym]):
+                        changed = True
+                else:
+                    break
+
+    # Calculate FOLLOW
+    changed = True
+    while changed:
+        changed = False
+
+        for rule in rules:
+            for i, sym in enumerate(rule.expansion):
+                if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
+                    if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
+                        changed = True
+
+                for j in range(i+1, len(rule.expansion)):
+                    if set(rule.expansion[i+1:j]) <= NULLABLE:
+                        if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
+                            changed = True
+
+    return FIRST, FOLLOW, NULLABLE
+
+
+class GrammarAnalyzer:
+    def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
+        self.debug = debug
+        self.strict = strict
+
+        root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
+                      for start in parser_conf.start}
+
+        rules = parser_conf.rules + list(root_rules.values())
+        self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin)
+
+        if len(rules) != len(set(rules)):
+            duplicates = [item for item, count in Counter(rules).items() if count > 1]
+            raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
+
+        for r in rules:
+            for sym in r.expansion:
+                if not (sym.is_term or sym in self.rules_by_origin):
+                    raise GrammarError("Using an undefined rule: %s" % sym)
+
+        self.start_states = {start: self.expand_rule(root_rule.origin)
+                             for start, root_rule in root_rules.items()}
+
+        self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
+                           for start, root_rule in root_rules.items()}
+
+        lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
+                for start in parser_conf.start}
+
+        lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
+        assert(len(lr0_rules) == len(set(lr0_rules)))
+
+        self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
+
+        # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
+        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
+                for start, root_rule in lr0_root_rules.items()}
+
+        self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
+
+    def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> OrderedSet[RulePtr]:
+        "Returns all init_ptrs accessible by rule (recursive)"
+
+        if rules_by_origin is None:
+            rules_by_origin = self.rules_by_origin
+
+        init_ptrs = OrderedSet[RulePtr]()
+        def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]:
+            assert not rule.is_term, rule
+
+            for r in rules_by_origin[rule]:
+                init_ptr = RulePtr(r, 0)
+                init_ptrs.add(init_ptr)
+
+                if r.expansion: # if not empty rule
+                    new_r = init_ptr.next
+                    if not new_r.is_term:
+                        assert isinstance(new_r, NonTerminal)
+                        yield new_r
+
+        for _ in bfs([source_rule], _expand_rule):
+            pass
+
+        return init_ptrs
@@ -0,0 +1,334 @@
+"""This module builds a LALR(1) transition-table for lalr_parser.py
+
+For now, shift/reduce conflicts are automatically resolved as shifts.
+"""
+
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+
+from typing import Dict, Set, Iterator, Tuple, List, TypeVar, Generic
+from collections import defaultdict
+
+from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
+from ..exceptions import GrammarError
+
+from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet, RulePtr, State
+from ..grammar import Rule, Symbol
+from ..common import ParserConf
+
+###{standalone
+
+class Action:
+    def __init__(self, name):
+        self.name = name
+    def __str__(self):
+        return self.name
+    def __repr__(self):
+        return str(self)
+
+Shift = Action('Shift')
+Reduce = Action('Reduce')
+
+StateT = TypeVar("StateT")
+
+class ParseTableBase(Generic[StateT]):
+    states: Dict[StateT, Dict[str, Tuple]]
+    start_states: Dict[str, StateT]
+    end_states: Dict[str, StateT]
+
+    def __init__(self, states, start_states, end_states):
+        self.states = states
+        self.start_states = start_states
+        self.end_states = end_states
+
+    def serialize(self, memo):
+        tokens = Enumerator()
+
+        states = {
+            state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
+                    for token, (action, arg) in actions.items()}
+            for state, actions in self.states.items()
+        }
+
+        return {
+            'tokens': tokens.reversed(),
+            'states': states,
+            'start_states': self.start_states,
+            'end_states': self.end_states,
+        }
+
+    @classmethod
+    def deserialize(cls, data, memo):
+        tokens = data['tokens']
+        states = {
+            state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
+                    for token, (action, arg) in actions.items()}
+            for state, actions in data['states'].items()
+        }
+        return cls(states, data['start_states'], data['end_states'])
+
+class ParseTable(ParseTableBase['State']):
+    """Parse-table whose key is State, i.e. set[RulePtr]
+
+    Slower than IntParseTable, but useful for debugging
+    """
+    pass
+
+
+class IntParseTable(ParseTableBase[int]):
+    """Parse-table whose key is int. Best for performance."""
+
+    @classmethod
+    def from_ParseTable(cls, parse_table: ParseTable):
+        enum = list(parse_table.states)
+        state_to_idx: Dict['State', int] = {s:i for i,s in enumerate(enum)}
+        int_states = {}
+
+        for s, la in parse_table.states.items():
+            la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
+                  for k,v in la.items()}
+            int_states[ state_to_idx[s] ] = la
+
+
+        start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
+        end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
+        return cls(int_states, start_states, end_states)
+
+###}
+
+
+# digraph and traverse, see The Theory and Practice of Compiler Writing
+
+# computes F(x) = G(x) union (union { G(y) | x R y })
+# X: nodes
+# R: relation (function mapping node -> list of nodes that satisfy the relation)
+# G: set valued function
+def digraph(X, R, G):
+    F = {}
+    S = []
+    N = dict.fromkeys(X, 0)
+    for x in X:
+        # this is always true for the first iteration, but N[x] may be updated in traverse below
+        if N[x] == 0:
+            traverse(x, S, N, X, R, G, F)
+    return F
+
+# x: single node
+# S: stack
+# N: weights
+# X: nodes
+# R: relation (see above)
+# G: set valued function
+# F: set valued function we are computing (map of input -> output)
+def traverse(x, S, N, X, R, G, F):
+    S.append(x)
+    d = len(S)
+    N[x] = d
+    F[x] = G[x]
+    for y in R[x]:
+        if N[y] == 0:
+            traverse(y, S, N, X, R, G, F)
+        n_x = N[x]
+        assert(n_x > 0)
+        n_y = N[y]
+        assert(n_y != 0)
+        if (n_y > 0) and (n_y < n_x):
+            N[x] = n_y
+        F[x].update(F[y])
+    if N[x] == d:
+        f_x = F[x]
+        while True:
+            z = S.pop()
+            N[z] = -1
+            F[z] = f_x
+            if z == x:
+                break
+
+
+class LALR_Analyzer(GrammarAnalyzer):
+    lr0_itemsets: Set[LR0ItemSet]
+    nonterminal_transitions: List[Tuple[LR0ItemSet, Symbol]]
+    lookback: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Rule]]]
+    includes: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
+    reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Tuple[LR0ItemSet, Symbol]]]
+    directly_reads: Dict[Tuple[LR0ItemSet, Symbol], Set[Symbol]]
+
+
+    def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
+        GrammarAnalyzer.__init__(self, parser_conf, debug, strict)
+        self.nonterminal_transitions = []
+        self.directly_reads = defaultdict(set)
+        self.reads = defaultdict(set)
+        self.includes = defaultdict(set)
+        self.lookback = defaultdict(set)
+
+
+    def compute_lr0_states(self) -> None:
+        self.lr0_itemsets = set()
+        # map of kernels to LR0ItemSets
+        cache: Dict['State', LR0ItemSet] = {}
+
+        def step(state: LR0ItemSet) -> Iterator[LR0ItemSet]:
+            _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
+
+            d = classify(unsat, lambda rp: rp.next)
+            for sym, rps in d.items():
+                kernel = fzset({rp.advance(sym) for rp in rps})
+                new_state = cache.get(kernel, None)
+                if new_state is None:
+                    closure = set(kernel)
+                    for rp in kernel:
+                        if not rp.is_satisfied and not rp.next.is_term:
+                            closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
+                    new_state = LR0ItemSet(kernel, closure)
+                    cache[kernel] = new_state
+
+                state.transitions[sym] = new_state
+                yield new_state
+
+            self.lr0_itemsets.add(state)
+
+        for _ in bfs(self.lr0_start_states.values(), step):
+            pass
+
+    def compute_reads_relations(self):
+        # handle start state
+        for root in self.lr0_start_states.values():
+            assert(len(root.kernel) == 1)
+            for rp in root.kernel:
+                assert(rp.index == 0)
+                self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
+
+        for state in self.lr0_itemsets:
+            seen = set()
+            for rp in state.closure:
+                if rp.is_satisfied:
+                    continue
+                s = rp.next
+                # if s is a not a nonterminal
+                if s not in self.lr0_rules_by_origin:
+                    continue
+                if s in seen:
+                    continue
+                seen.add(s)
+                nt = (state, s)
+                self.nonterminal_transitions.append(nt)
+                dr = self.directly_reads[nt]
+                r = self.reads[nt]
+                next_state = state.transitions[s]
+                for rp2 in next_state.closure:
+                    if rp2.is_satisfied:
+                        continue
+                    s2 = rp2.next
+                    # if s2 is a terminal
+                    if s2 not in self.lr0_rules_by_origin:
+                        dr.add(s2)
+                    if s2 in self.NULLABLE:
+                        r.add((next_state, s2))
+
+    def compute_includes_lookback(self):
+        for nt in self.nonterminal_transitions:
+            state, nonterminal = nt
+            includes = []
+            lookback = self.lookback[nt]
+            for rp in state.closure:
+                if rp.rule.origin != nonterminal:
+                    continue
+                # traverse the states for rp(.rule)
+                state2 = state
+                for i in range(rp.index, len(rp.rule.expansion)):
+                    s = rp.rule.expansion[i]
+                    nt2 = (state2, s)
+                    state2 = state2.transitions[s]
+                    if nt2 not in self.reads:
+                        continue
+                    for j in range(i + 1, len(rp.rule.expansion)):
+                        if rp.rule.expansion[j] not in self.NULLABLE:
+                            break
+                    else:
+                        includes.append(nt2)
+                # state2 is at the final state for rp.rule
+                if rp.index == 0:
+                    for rp2 in state2.closure:
+                        if (rp2.rule == rp.rule) and rp2.is_satisfied:
+                            lookback.add((state2, rp2.rule))
+            for nt2 in includes:
+                self.includes[nt2].add(nt)
+
+    def compute_lookaheads(self):
+        read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
+        follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
+
+        for nt, lookbacks in self.lookback.items():
+            for state, rule in lookbacks:
+                for s in follow_sets[nt]:
+                    state.lookaheads[s].add(rule)
+
+    def compute_lalr1_states(self) -> None:
+        m: Dict[LR0ItemSet, Dict[str, Tuple]] = {}
+        reduce_reduce = []
+        for itemset in self.lr0_itemsets:
+            actions: Dict[Symbol, Tuple] = {la: (Shift, next_state.closure)
+                                                      for la, next_state in itemset.transitions.items()}
+            for la, rules in itemset.lookaheads.items():
+                if len(rules) > 1:
+                    # Try to resolve conflict based on priority
+                    p = [(r.options.priority or 0, r) for r in rules]
+                    p.sort(key=lambda r: r[0], reverse=True)
+                    best, second_best = p[:2]
+                    if best[0] > second_best[0]:
+                        rules = {best[1]}
+                    else:
+                        reduce_reduce.append((itemset, la, rules))
+                        continue
+
+                rule ,= rules
+                if la in actions:
+                    if self.strict:
+                        msg = f'Shift/Reduce conflict for terminal {la.name}. [strict-mode]\n' \
+                              f' * {rule}\n'
+                        raise GrammarError(msg)
+                    elif self.debug:
+                        logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logger.warning(' * %s', rule)
+                    else:
+                        logger.debug('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logger.debug(' * %s', rule)
+                else:
+                    actions[la] = (Reduce, rule)
+            m[itemset] = { k.name: v for k, v in actions.items() }
+
+        if reduce_reduce:
+            msgs = []
+            for itemset, la, rules in reduce_reduce:
+                msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
+                if self.debug:
+                    msg += '\n    collision occurred in state: {%s\n    }' % ''.join(['\n\t' + str(x) for x in itemset.closure])
+                msgs.append(msg)
+            raise GrammarError('\n\n'.join(msgs))
+
+        states = { k.closure: v for k, v in m.items() }
+
+        # compute end states
+        end_states: Dict[str, 'State'] = {}
+        for state in states:
+            for rp in state:
+                for start in self.lr0_start_states:
+                    if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
+                        assert start not in end_states
+                        end_states[start] = state
+
+        start_states = { start: state.closure for start, state in self.lr0_start_states.items() }
+        _parse_table = ParseTable(states, start_states, end_states)
+
+        if self.debug:
+            self.parse_table = _parse_table
+        else:
+            self.parse_table = IntParseTable.from_ParseTable(_parse_table)
+
+    def compute_lalr(self):
+        self.compute_lr0_states()
+        self.compute_reads_relations()
+        self.compute_includes_lookback()
+        self.compute_lookaheads()
+        self.compute_lalr1_states()
@@ -0,0 +1,158 @@
+# This module provides a LALR interactive parser, which is used for debugging and error handling
+
+from typing import Iterator, List
+from copy import copy
+import warnings
+
+from lark.exceptions import UnexpectedToken
+from lark.lexer import Token, LexerThread
+from .lalr_parser_state import ParserState
+
+###{standalone
+
+class InteractiveParser:
+    """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
+
+    For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
+    """
+    def __init__(self, parser, parser_state: ParserState, lexer_thread: LexerThread):
+        self.parser = parser
+        self.parser_state = parser_state
+        self.lexer_thread = lexer_thread
+        self.result = None
+
+    @property
+    def lexer_state(self) -> LexerThread:
+        warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
+        return self.lexer_thread
+
+    def feed_token(self, token: Token):
+        """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
+
+        Note that ``token`` has to be an instance of ``Token``.
+        """
+        return self.parser_state.feed_token(token, token.type == '$END')
+
+    def iter_parse(self) -> Iterator[Token]:
+        """Step through the different stages of the parse, by reading tokens from the lexer
+        and feeding them to the parser, one per iteration.
+
+        Returns an iterator of the tokens it encounters.
+
+        When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
+        """
+        for token in self.lexer_thread.lex(self.parser_state):
+            yield token
+            self.result = self.feed_token(token)
+
+    def exhaust_lexer(self) -> List[Token]:
+        """Try to feed the rest of the lexer state into the interactive parser.
+
+        Note that this modifies the instance in place and does not feed an '$END' Token
+        """
+        return list(self.iter_parse())
+
+
+    def feed_eof(self, last_token=None):
+        """Feed a '$END' Token. Borrows from 'last_token' if given."""
+        eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
+        return self.feed_token(eof)
+
+
+    def __copy__(self):
+        """Create a new interactive parser with a separate state.
+
+        Calls to feed_token() won't affect the old instance, and vice-versa.
+        """
+        return self.copy()
+
+    def copy(self, deepcopy_values=True):
+        return type(self)(
+            self.parser,
+            self.parser_state.copy(deepcopy_values=deepcopy_values),
+            copy(self.lexer_thread),
+        )
+
+    def __eq__(self, other):
+        if not isinstance(other, InteractiveParser):
+            return False
+
+        return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
+
+    def as_immutable(self):
+        """Convert to an ``ImmutableInteractiveParser``."""
+        p = copy(self)
+        return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
+
+    def pretty(self):
+        """Print the output of ``choices()`` in a way that's easier to read."""
+        out = ["Parser choices:"]
+        for k, v in self.choices().items():
+            out.append('\t- %s -> %r' % (k, v))
+        out.append('stack size: %s' % len(self.parser_state.state_stack))
+        return '\n'.join(out)
+
+    def choices(self):
+        """Returns a dictionary of token types, matched to their action in the parser.
+
+        Only returns token types that are accepted by the current state.
+
+        Updated by ``feed_token()``.
+        """
+        return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
+
+    def accepts(self):
+        """Returns the set of possible tokens that will advance the parser into a new valid state."""
+        accepts = set()
+        conf_no_callbacks = copy(self.parser_state.parse_conf)
+        # We don't want to call callbacks here since those might have arbitrary side effects
+        # and are unnecessarily slow.
+        conf_no_callbacks.callbacks = {}
+        for t in self.choices():
+            if t.isupper(): # is terminal?
+                new_cursor = self.copy(deepcopy_values=False)
+                new_cursor.parser_state.parse_conf = conf_no_callbacks
+                try:
+                    new_cursor.feed_token(self.lexer_thread._Token(t, ''))
+                except UnexpectedToken:
+                    pass
+                else:
+                    accepts.add(t)
+        return accepts
+
+    def resume_parse(self):
+        """Resume automated parsing from the current state.
+        """
+        return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token)
+
+
+
+class ImmutableInteractiveParser(InteractiveParser):
+    """Same as ``InteractiveParser``, but operations create a new instance instead
+    of changing it in-place.
+    """
+
+    result = None
+
+    def __hash__(self):
+        return hash((self.parser_state, self.lexer_thread))
+
+    def feed_token(self, token):
+        c = copy(self)
+        c.result = InteractiveParser.feed_token(c, token)
+        return c
+
+    def exhaust_lexer(self):
+        """Try to feed the rest of the lexer state into the parser.
+
+        Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
+        cursor = self.as_mutable()
+        cursor.exhaust_lexer()
+        return cursor.as_immutable()
+
+    def as_mutable(self):
+        """Convert to an ``InteractiveParser``."""
+        p = copy(self)
+        return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
+
+###}
@@ -0,0 +1,122 @@
+"""This module implements a LALR(1) Parser
+"""
+# Author: Erez Shinan (2017)
+# Email : erezshin@gmail.com
+from typing import Dict, Any, Optional
+from ..lexer import Token, LexerThread
+from ..utils import Serialize
+from ..common import ParserConf, ParserCallbacks
+
+from .lalr_analysis import LALR_Analyzer, IntParseTable, ParseTableBase
+from .lalr_interactive_parser import InteractiveParser
+from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
+from .lalr_parser_state import ParserState, ParseConf
+
+###{standalone
+
+class LALR_Parser(Serialize):
+    def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
+        analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
+        analysis.compute_lalr()
+        callbacks = parser_conf.callbacks
+
+        self._parse_table = analysis.parse_table
+        self.parser_conf = parser_conf
+        self.parser = _Parser(analysis.parse_table, callbacks, debug)
+
+    @classmethod
+    def deserialize(cls, data, memo, callbacks, debug=False):
+        inst = cls.__new__(cls)
+        inst._parse_table = IntParseTable.deserialize(data, memo)
+        inst.parser = _Parser(inst._parse_table, callbacks, debug)
+        return inst
+
+    def serialize(self, memo: Any = None) -> Dict[str, Any]:
+        return self._parse_table.serialize(memo)
+
+    def parse_interactive(self, lexer: LexerThread, start: str):
+        return self.parser.parse(lexer, start, start_interactive=True)
+
+    def parse(self, lexer, start, on_error=None):
+        try:
+            return self.parser.parse(lexer, start)
+        except UnexpectedInput as e:
+            if on_error is None:
+                raise
+
+            while True:
+                if isinstance(e, UnexpectedCharacters):
+                    s = e.interactive_parser.lexer_thread.state
+                    p = s.line_ctr.char_pos
+
+                if not on_error(e):
+                    raise e
+
+                if isinstance(e, UnexpectedCharacters):
+                    # If user didn't change the character position, then we should
+                    if p == s.line_ctr.char_pos:
+                        s.line_ctr.feed(s.text.text[p:p+1])
+
+                try:
+                    return e.interactive_parser.resume_parse()
+                except UnexpectedToken as e2:
+                    if (isinstance(e, UnexpectedToken)
+                        and e.token.type == e2.token.type == '$END'
+                        and e.interactive_parser == e2.interactive_parser):
+                        # Prevent infinite loop
+                        raise e2
+                    e = e2
+                except UnexpectedCharacters as e2:
+                    e = e2
+
+
+class _Parser:
+    parse_table: ParseTableBase
+    callbacks: ParserCallbacks
+    debug: bool
+
+    def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False):
+        self.parse_table = parse_table
+        self.callbacks = callbacks
+        self.debug = debug
+
+    def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False):
+        parse_conf = ParseConf(self.parse_table, self.callbacks, start)
+        parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
+        if start_interactive:
+            return InteractiveParser(self, parser_state, parser_state.lexer)
+        return self.parse_from_state(parser_state)
+
+
+    def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None):
+        """Run the main LALR parser loop
+
+        Parameters:
+            state - the initial state. Changed in-place.
+            last_token - Used only for line information in case of an empty lexer.
+        """
+        try:
+            token = last_token
+            for token in state.lexer.lex(state):
+                assert token is not None
+                state.feed_token(token)
+
+            end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
+            return state.feed_token(end_token, True)
+        except UnexpectedInput as e:
+            try:
+                e.interactive_parser = InteractiveParser(self, state, state.lexer)
+            except NameError:
+                pass
+            raise e
+        except Exception as e:
+            if self.debug:
+                print("")
+                print("STATE STACK DUMP")
+                print("----------------")
+                for i, s in enumerate(state.state_stack):
+                    print('%d)' % i , s)
+                print("")
+
+            raise
+###}
@@ -0,0 +1,110 @@
+from copy import deepcopy, copy
+from typing import Dict, Any, Generic, List
+from ..lexer import Token, LexerThread
+from ..common import ParserCallbacks
+
+from .lalr_analysis import Shift, ParseTableBase, StateT
+from lark.exceptions import UnexpectedToken
+
+###{standalone
+
+class ParseConf(Generic[StateT]):
+    __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
+
+    parse_table: ParseTableBase[StateT]
+    callbacks: ParserCallbacks
+    start: str
+
+    start_state: StateT
+    end_state: StateT
+    states: Dict[StateT, Dict[str, tuple]]
+
+    def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str):
+        self.parse_table = parse_table
+
+        self.start_state = self.parse_table.start_states[start]
+        self.end_state = self.parse_table.end_states[start]
+        self.states = self.parse_table.states
+
+        self.callbacks = callbacks
+        self.start = start
+
+class ParserState(Generic[StateT]):
+    __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
+
+    parse_conf: ParseConf[StateT]
+    lexer: LexerThread
+    state_stack: List[StateT]
+    value_stack: list
+
+    def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None):
+        self.parse_conf = parse_conf
+        self.lexer = lexer
+        self.state_stack = state_stack or [self.parse_conf.start_state]
+        self.value_stack = value_stack or []
+
+    @property
+    def position(self) -> StateT:
+        return self.state_stack[-1]
+
+    # Necessary for match_examples() to work
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, ParserState):
+            return NotImplemented
+        return len(self.state_stack) == len(other.state_stack) and self.position == other.position
+
+    def __copy__(self):
+        return self.copy()
+
+    def copy(self, deepcopy_values=True) -> 'ParserState[StateT]':
+        return type(self)(
+            self.parse_conf,
+            self.lexer, # XXX copy
+            copy(self.state_stack),
+            deepcopy(self.value_stack) if deepcopy_values else copy(self.value_stack),
+        )
+
+    def feed_token(self, token: Token, is_end=False) -> Any:
+        state_stack = self.state_stack
+        value_stack = self.value_stack
+        states = self.parse_conf.states
+        end_state = self.parse_conf.end_state
+        callbacks = self.parse_conf.callbacks
+
+        while True:
+            state = state_stack[-1]
+            try:
+                action, arg = states[state][token.type]
+            except KeyError:
+                expected = {s for s in states[state].keys() if s.isupper()}
+                raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
+
+            assert arg != end_state
+
+            if action is Shift:
+                # shift once and return
+                assert not is_end
+                state_stack.append(arg)
+                value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
+                return
+            else:
+                # reduce+shift as many times as necessary
+                rule = arg
+                size = len(rule.expansion)
+                if size:
+                    s = value_stack[-size:]
+                    del state_stack[-size:]
+                    del value_stack[-size:]
+                else:
+                    s = []
+
+                value = callbacks[rule](s) if callbacks else s
+
+                _action, new_state = states[state_stack[-1]][rule.origin.name]
+                assert _action is Shift
+                state_stack.append(new_state)
+                value_stack.append(value)
+
+                if is_end and state_stack[-1] == end_state:
+                    return value_stack[-1]
+###}
@@ -0,0 +1,166 @@
+"""This module implements an Earley parser with a dynamic lexer
+
+The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
+    https://www.sciencedirect.com/science/article/pii/S1571066108001497
+
+That is probably the best reference for understanding the algorithm here.
+
+The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
+is better documented here:
+    http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
+
+Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
+uses regular expressions by necessity, achieving high-performance while maintaining all of
+Earley's power in parsing any CFG.
+"""
+
+from typing import TYPE_CHECKING, Callable, Optional, List, Any
+from collections import defaultdict
+
+from ..tree import Tree
+from ..exceptions import UnexpectedCharacters
+from ..lexer import Token
+from ..grammar import Terminal
+from .earley import Parser as BaseParser
+from .earley_forest import TokenNode
+
+if TYPE_CHECKING:
+    from ..common import LexerConf, ParserConf
+
+class Parser(BaseParser):
+    def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher: Callable,
+                 resolve_ambiguity: bool=True, complete_lex: bool=False, debug: bool=False,
+                 tree_class: Optional[Callable[[str, List], Any]]=Tree, ordered_sets: bool=True):
+        BaseParser.__init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity,
+                            debug, tree_class, ordered_sets)
+        self.ignore = [Terminal(t) for t in lexer_conf.ignore]
+        self.complete_lex = complete_lex
+
+    def _parse(self, stream, columns, to_scan, start_symbol=None):
+
+        def scan(i, to_scan):
+            """The core Earley Scanner.
+
+            This is a custom implementation of the scanner that uses the
+            Lark lexer to match tokens. The scan list is built by the
+            Earley predictor, based on the previously completed tokens.
+            This ensures that at each phase of the parse we have a custom
+            lexer context, allowing for more complex ambiguities."""
+
+            node_cache = {}
+
+            # 1) Loop the expectations and ask the lexer to match.
+            # Since regexp is forward looking on the input stream, and we only
+            # want to process tokens when we hit the point in the stream at which
+            # they complete, we push all tokens into a buffer (delayed_matches), to
+            # be held possibly for a later parse step when we reach the point in the
+            # input stream at which they complete.
+            for item in self.Set(to_scan):
+                m = match(item.expect, stream, i)
+                if m:
+                    t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+                    delayed_matches[m.end()].append( (item, i, t) )
+
+                    if self.complete_lex:
+                        s = m.group(0)
+                        for j in range(1, len(s)):
+                            m = match(item.expect, s[:-j])
+                            if m:
+                                t = Token(item.expect.name, m.group(0), i, text_line, text_column)
+                                delayed_matches[i+m.end()].append( (item, i, t) )
+
+                    # XXX The following 3 lines were commented out for causing a bug. See issue #768
+                    # # Remove any items that successfully matched in this pass from the to_scan buffer.
+                    # # This ensures we don't carry over tokens that already matched, if we're ignoring below.
+                    # to_scan.remove(item)
+
+            # 3) Process any ignores. This is typically used for e.g. whitespace.
+            # We carry over any unmatched items from the to_scan buffer to be matched again after
+            # the ignore. This should allow us to use ignored symbols in non-terminals to implement
+            # e.g. mandatory spacing.
+            for x in self.ignore:
+                m = match(x, stream, i)
+                if m:
+                    # Carry over any items still in the scan buffer, to past the end of the ignored items.
+                    delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])
+
+                    # If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
+                    delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])
+
+            next_to_scan = self.Set()
+            next_set = self.Set()
+            columns.append(next_set)
+            transitives.append({})
+
+            ## 4) Process Tokens from delayed_matches.
+            # This is the core of the Earley scanner. Create an SPPF node for each Token,
+            # and create the symbol node in the SPPF tree. Advance the item that completed,
+            # and add the resulting new item to either the Earley set (for processing by the
+            # completer/predictor) or the to_scan buffer for the next parse step.
+            for item, start, token in delayed_matches[i+1]:
+                if token is not None:
+                    token.end_line = text_line
+                    token.end_column = text_column + 1
+                    token.end_pos = i + 1
+
+                    new_item = item.advance()
+                    label = (new_item.s, new_item.start, i + 1)
+                    token_node = TokenNode(token, terminals[token.type])
+                    new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, self.SymbolNode(*label))
+                    new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token_node)
+                else:
+                    new_item = item
+
+                if new_item.expect in self.TERMINALS:
+                    # add (B ::= Aai+1.B, h, y) to Q'
+                    next_to_scan.add(new_item)
+                else:
+                    # add (B ::= Aa+1.B, h, y) to Ei+1
+                    next_set.add(new_item)
+
+            del delayed_matches[i+1]    # No longer needed, so unburden memory
+
+            if not next_set and not delayed_matches and not next_to_scan:
+                considered_rules = list(sorted(to_scan, key=lambda key: key.rule.origin.name))
+                raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan},
+                                           set(to_scan), state=frozenset(i.s for i in to_scan),
+                                           considered_rules=considered_rules
+                                           )
+
+            return next_to_scan, node_cache
+
+
+        delayed_matches = defaultdict(list)
+        match = self.term_matcher
+        terminals = self.lexer_conf.terminals_by_name
+
+        # Cache for nodes & tokens created in a particular parse step.
+        transitives = [{}]
+
+        text_line = 1
+        text_column = 1
+
+        ## The main Earley loop.
+        # Run the Prediction/Completion cycle for any Items in the current Earley set.
+        # Completions will be added to the SPPF tree, and predictions will be recursively
+        # processed down to terminals/empty nodes to be added to the scanner for the next
+        # step.
+        i = 0
+        node_cache = {}
+        for token in stream:
+            self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+            to_scan, node_cache = scan(i, to_scan)
+
+            if token == '\n':
+                text_line += 1
+                text_column = 1
+            else:
+                text_column += 1
+            i += 1
+
+        self.predict_and_complete(i, to_scan, columns, transitives, node_cache)
+
+        ## Column is now the final column in the parse.
+        assert i == len(columns)-1
+        return to_scan
@@ -0,0 +1,107 @@
+"""This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar.
+"""
+
+from typing import Dict, Callable, Iterable, Optional
+
+from .lark import Lark
+from .tree import Tree, ParseTree
+from .visitors import Transformer_InPlace
+from .lexer import Token, PatternStr, TerminalDef
+from .grammar import Terminal, NonTerminal, Symbol
+
+from .tree_matcher import TreeMatcher, is_discarded_terminal
+from .utils import is_id_continue
+
+def is_iter_empty(i):
+    try:
+        _ = next(i)
+        return False
+    except StopIteration:
+        return True
+
+
+class WriteTokensTransformer(Transformer_InPlace):
+    "Inserts discarded tokens into their correct place, according to the rules of grammar"
+
+    tokens: Dict[str, TerminalDef]
+    term_subs: Dict[str, Callable[[Symbol], str]]
+
+    def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
+        self.tokens = tokens
+        self.term_subs = term_subs
+
+    def __default__(self, data, children, meta):
+        if not getattr(meta, 'match_tree', False):
+            return Tree(data, children)
+
+        iter_args = iter(children)
+        to_write = []
+        for sym in meta.orig_expansion:
+            if is_discarded_terminal(sym):
+                try:
+                    v = self.term_subs[sym.name](sym)
+                except KeyError:
+                    t = self.tokens[sym.name]
+                    if not isinstance(t.pattern, PatternStr):
+                        raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
+
+                    v = t.pattern.value
+                to_write.append(v)
+            else:
+                x = next(iter_args)
+                if isinstance(x, list):
+                    to_write += x
+                else:
+                    if isinstance(x, Token):
+                        assert Terminal(x.type) == sym, x
+                    else:
+                        assert NonTerminal(x.data) == sym, (sym, x)
+                    to_write.append(x)
+
+        assert is_iter_empty(iter_args)
+        return to_write
+
+
+class Reconstructor(TreeMatcher):
+    """
+    A Reconstructor that will, given a full parse Tree, generate source code.
+
+    Note:
+        The reconstructor cannot generate values from regexps. If you need to produce discarded
+        regexes, such as newlines, use `term_subs` and provide default values for them.
+
+    Parameters:
+        parser: a Lark instance
+        term_subs: a dictionary of [Terminal name as str] to [output text as str]
+    """
+
+    write_tokens: WriteTokensTransformer
+
+    def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
+        TreeMatcher.__init__(self, parser)
+
+        self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
+
+    def _reconstruct(self, tree):
+        unreduced_tree = self.match_tree(tree, tree.data)
+
+        res = self.write_tokens.transform(unreduced_tree)
+        for item in res:
+            if isinstance(item, Tree):
+                # TODO use orig_expansion.rulename to support templates
+                yield from self._reconstruct(item)
+            else:
+                yield item
+
+    def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
+        x = self._reconstruct(tree)
+        if postproc:
+            x = postproc(x)
+        y = []
+        prev_item = ''
+        for item in x:
+            if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]):
+                y.append(' ')
+            y.append(item)
+            prev_item = item
+        return ''.join(y)
@@ -0,0 +1,70 @@
+import sys
+from argparse import ArgumentParser, FileType
+from textwrap import indent
+from logging import DEBUG, INFO, WARN, ERROR
+from typing import Optional
+import warnings
+
+from lark import Lark, logger
+try:
+    from interegular import logger as interegular_logger
+    has_interegular = True
+except ImportError:
+    has_interegular = False
+
+lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options')
+
+flags = [
+    ('d', 'debug'),
+    'keep_all_tokens',
+    'regex',
+    'propagate_positions',
+    'maybe_placeholders',
+    'use_bytes'
+]
+
+options = ['start', 'lexer']
+
+lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times")
+lalr_argparser.add_argument('-s', '--start', action='append', default=[])
+lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual'))
+lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding='utf-8'), default=sys.stdout, help='the output file (default=stdout)')
+lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding='utf-8'), help='A valid .lark file')
+
+for flag in flags:
+    if isinstance(flag, tuple):
+        options.append(flag[1])
+        lalr_argparser.add_argument('-' + flag[0], '--' + flag[1], action='store_true')
+    elif isinstance(flag, str):
+        options.append(flag)
+        lalr_argparser.add_argument('--' + flag, action='store_true')
+    else:
+        raise NotImplementedError("flags must only contain strings or tuples of strings")
+
+
+def build_lalr(namespace):
+    logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)])
+    if has_interegular:
+        interegular_logger.setLevel(logger.getEffectiveLevel())
+    if len(namespace.start) == 0:
+        namespace.start.append('start')
+    kwargs = {n: getattr(namespace, n) for n in options}
+    return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out
+
+
+def showwarning_as_comment(message, category, filename, lineno, file=None, line=None):
+    # Based on warnings._showwarnmsg_impl
+    text = warnings.formatwarning(message, category, filename, lineno, line)
+    text = indent(text, '# ')
+    if file is None:
+        file = sys.stderr
+        if file is None:
+            return
+    try:
+        file.write(text)
+    except OSError:
+        pass
+
+
+def make_warnings_comments():
+    warnings.showwarning = showwarning_as_comment
@@ -0,0 +1,202 @@
+"Converts Nearley grammars to Lark"
+
+import os.path
+import sys
+import codecs
+import argparse
+
+
+from lark import Lark, Transformer, v_args
+
+nearley_grammar = r"""
+    start: (ruledef|directive)+
+
+    directive: "@" NAME (STRING|NAME)
+             | "@" JS  -> js_code
+    ruledef: NAME "->" expansions
+           | NAME REGEXP "->" expansions -> macro
+    expansions: expansion ("|" expansion)*
+
+    expansion: expr+ js
+
+    ?expr: item (":" /[+*?]/)?
+
+    ?item: rule|string|regexp|null
+         | "(" expansions ")"
+
+    rule: NAME
+    string: STRING
+    regexp: REGEXP
+    null: "null"
+    JS: /{%.*?%}/s
+    js: JS?
+
+    NAME: /[a-zA-Z_$]\w*/
+    COMMENT: /#[^\n]*/
+    REGEXP: /\[.*?\]/
+
+    STRING: _STRING "i"?
+
+    %import common.ESCAPED_STRING -> _STRING
+    %import common.WS
+    %ignore WS
+    %ignore COMMENT
+
+    """
+
+nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='basic')
+
+def _get_rulename(name):
+    name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name)
+    return 'n_' + name.replace('$', '__DOLLAR__').lower()
+
+@v_args(inline=True)
+class NearleyToLark(Transformer):
+    def __init__(self):
+        self._count = 0
+        self.extra_rules = {}
+        self.extra_rules_rev = {}
+        self.alias_js_code = {}
+
+    def _new_function(self, code):
+        name = 'alias_%d' % self._count
+        self._count += 1
+
+        self.alias_js_code[name] = code
+        return name
+
+    def _extra_rule(self, rule):
+        if rule in self.extra_rules_rev:
+            return self.extra_rules_rev[rule]
+
+        name = 'xrule_%d' % len(self.extra_rules)
+        assert name not in self.extra_rules
+        self.extra_rules[name] = rule
+        self.extra_rules_rev[rule] = name
+        return name
+
+    def rule(self, name):
+        return _get_rulename(name)
+
+    def ruledef(self, name, exps):
+        return '!%s: %s' % (_get_rulename(name), exps)
+
+    def expr(self, item, op):
+        rule = '(%s)%s' % (item, op)
+        return self._extra_rule(rule)
+
+    def regexp(self, r):
+        return '/%s/' % r
+
+    def null(self):
+        return ''
+
+    def string(self, s):
+        return self._extra_rule(s)
+
+    def expansion(self, *x):
+        x, js = x[:-1], x[-1]
+        if js.children:
+            js_code ,= js.children
+            js_code = js_code[2:-2]
+            alias = '-> ' + self._new_function(js_code)
+        else:
+            alias = ''
+        return ' '.join(x) + alias
+
+    def expansions(self, *x):
+        return '%s' % ('\n    |'.join(x))
+
+    def start(self, *rules):
+        return '\n'.join(filter(None, rules))
+
+def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
+    rule_defs = []
+
+    tree = nearley_grammar_parser.parse(g)
+    for statement in tree.children:
+        if statement.data == 'directive':
+            directive, arg = statement.children
+            if directive in ('builtin', 'include'):
+                folder = builtin_path if directive == 'builtin' else folder_path
+                path = os.path.join(folder, arg[1:-1])
+                if path not in includes:
+                    includes.add(path)
+                    with codecs.open(path, encoding='utf8') as f:
+                        text = f.read()
+                    rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
+            else:
+                assert False, directive
+        elif statement.data == 'js_code':
+            code ,= statement.children
+            code = code[2:-2]
+            js_code.append(code)
+        elif statement.data == 'macro':
+            pass    # TODO Add support for macros!
+        elif statement.data == 'ruledef':
+            rule_defs.append(n2l.transform(statement))
+        else:
+            raise Exception("Unknown statement: %s" % statement)
+
+    return rule_defs
+
+
+def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
+    import js2py
+
+    emit_code = []
+    def emit(x=None):
+        if x:
+            emit_code.append(x)
+        emit_code.append('\n')
+
+    js_code = ['function id(x) {return x[0];}']
+    n2l = NearleyToLark()
+    rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
+    lark_g = '\n'.join(rule_defs)
+    lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
+
+    emit('from lark import Lark, Transformer')
+    emit()
+    emit('grammar = ' + repr(lark_g))
+    emit()
+
+    for alias, code in n2l.alias_js_code.items():
+        js_code.append('%s = (%s);' % (alias, code))
+
+    if es6:
+        emit(js2py.translate_js6('\n'.join(js_code)))
+    else:
+        emit(js2py.translate_js('\n'.join(js_code)))
+    emit('class TransformNearley(Transformer):')
+    for alias in n2l.alias_js_code:
+        emit("    %s = var.get('%s').to_python()" % (alias, alias))
+    emit("    __default__ = lambda self, n, c, m: c if c else None")
+
+    emit()
+    emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
+    emit('def parse(text):')
+    emit('    return TransformNearley().transform(parser.parse(text))')
+
+    return ''.join(emit_code)
+
+def main(fn, start, nearley_lib, es6=False):
+    with codecs.open(fn, encoding='utf8') as f:
+        grammar = f.read()
+    return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
+
+def get_arg_parser():
+    parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
+    parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
+    parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
+    parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
+    parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
+    return parser
+
+if __name__ == '__main__':
+    parser = get_arg_parser()
+    if len(sys.argv) == 1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
+    args = parser.parse_args()
+    print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))
@@ -0,0 +1,32 @@
+import sys
+import json
+
+from lark.grammar import Rule
+from lark.lexer import TerminalDef
+from lark.tools import lalr_argparser, build_lalr
+
+import argparse
+
+argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser],
+                                    description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file",
+                                    epilog='Look at the Lark documentation for more info on the options')
+
+
+def serialize(lark_inst, outfile):
+    data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
+    outfile.write('{\n')
+    outfile.write('  "data": %s,\n' % json.dumps(data))
+    outfile.write('  "memo": %s\n' % json.dumps(memo))
+    outfile.write('}\n')
+
+
+def main():
+    if len(sys.argv)==1:
+        argparser.print_help(sys.stderr)
+        sys.exit(1)
+    ns = argparser.parse_args()
+    serialize(*build_lalr(ns))
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,196 @@
+###{standalone
+#
+#
+#   Lark Stand-alone Generator Tool
+# ----------------------------------
+# Generates a stand-alone LALR(1) parser
+#
+# Git:    https://github.com/erezsh/lark
+# Author: Erez Shinan (erezshin@gmail.com)
+#
+#
+#    >>> LICENSE
+#
+#    This tool and its generated code use a separate license from Lark,
+#    and are subject to the terms of the Mozilla Public License, v. 2.0.
+#    If a copy of the MPL was not distributed with this
+#    file, You can obtain one at https://mozilla.org/MPL/2.0/.
+#
+#    If you wish to purchase a commercial license for this tool and its
+#    generated code, you may contact me via email or otherwise.
+#
+#    If MPL2 is incompatible with your free or open-source project,
+#    contact me and we'll work it out.
+#
+#
+
+from copy import deepcopy
+from abc import ABC, abstractmethod
+from types import ModuleType
+from typing import (
+    TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
+    Union, Iterable, IO, TYPE_CHECKING, overload, Sequence,
+    Pattern as REPattern, ClassVar, Set, Mapping
+)
+###}
+
+import sys
+import token, tokenize
+import os
+from os import path
+from collections import defaultdict
+from functools import partial
+from argparse import ArgumentParser
+
+import lark
+from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
+
+
+from lark.grammar import Rule
+from lark.lexer import TerminalDef
+
+_dir = path.dirname(__file__)
+_larkdir = path.join(_dir, path.pardir)
+
+
+EXTRACT_STANDALONE_FILES = [
+    'tools/standalone.py',
+    'exceptions.py',
+    'utils.py',
+    'tree.py',
+    'visitors.py',
+    'grammar.py',
+    'lexer.py',
+    'common.py',
+    'parse_tree_builder.py',
+    'parsers/lalr_analysis.py',
+    'parsers/lalr_parser_state.py',
+    'parsers/lalr_parser.py',
+    'parsers/lalr_interactive_parser.py',
+    'parser_frontends.py',
+    'lark.py',
+    'indenter.py',
+]
+
+def extract_sections(lines):
+    section = None
+    text = []
+    sections = defaultdict(list)
+    for line in lines:
+        if line.startswith('###'):
+            if line[3] == '{':
+                section = line[4:].strip()
+            elif line[3] == '}':
+                sections[section] += text
+                section = None
+                text = []
+            else:
+                raise ValueError(line)
+        elif section:
+            text.append(line)
+
+    return {name: ''.join(text) for name, text in sections.items()}
+
+
+def strip_docstrings(line_gen):
+    """ Strip comments and docstrings from a file.
+    Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
+    """
+    res = []
+
+    prev_toktype = token.INDENT
+    last_lineno = -1
+    last_col = 0
+
+    tokgen = tokenize.generate_tokens(line_gen)
+    for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
+        if slineno > last_lineno:
+            last_col = 0
+        if scol > last_col:
+            res.append(" " * (scol - last_col))
+        if toktype == token.STRING and prev_toktype == token.INDENT:
+            # Docstring
+            res.append("#--")
+        elif toktype == tokenize.COMMENT:
+            # Comment
+            res.append("##\n")
+        else:
+            res.append(ttext)
+        prev_toktype = toktype
+        last_col = ecol
+        last_lineno = elineno
+
+    return ''.join(res)
+
+
+def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
+    if output is None:
+        output = partial(print, file=out)
+
+    import pickle, zlib, base64
+    def compressed_output(obj):
+        s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
+        c = zlib.compress(s)
+        output(repr(base64.b64encode(c)))
+
+    def output_decompress(name):
+        output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
+
+    output('# The file was automatically generated by Lark v%s' % lark.__version__)
+    output('__version__ = "%s"' % lark.__version__)
+    output()
+
+    for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
+        with open(os.path.join(_larkdir, pyfile)) as f:
+            code = extract_sections(f)['standalone']
+            if i:   # if not this file
+                code = strip_docstrings(partial(next, iter(code.splitlines(True))))
+            output(code)
+
+    data, m = lark_inst.memo_serialize([TerminalDef, Rule])
+    output('import pickle, zlib, base64')
+    if compress:
+        output('DATA = (')
+        compressed_output(data)
+        output(')')
+        output_decompress('DATA')
+        output('MEMO = (')
+        compressed_output(m)
+        output(')')
+        output_decompress('MEMO')
+    else:
+        output('DATA = (')
+        output(data)
+        output(')')
+        output('MEMO = (')
+        output(m)
+        output(')')
+
+
+    output('Shift = 0')
+    output('Reduce = 1')
+    output("def Lark_StandAlone(**kwargs):")
+    output("  return Lark._load_from_dict(DATA, MEMO, **kwargs)")
+
+
+
+
+def main():
+    make_warnings_comments()
+    parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
+                            parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
+    parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
+    if len(sys.argv) == 1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
+    ns = parser.parse_args()
+
+    lark_inst, out = build_lalr(ns)
+    gen_standalone(lark_inst, out=out, compress=ns.compress)
+
+    ns.out.close()
+    ns.grammar_file.close()
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,281 @@
+import sys
+from copy import deepcopy
+
+from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, TYPE_CHECKING
+
+from .lexer import Token
+
+if TYPE_CHECKING:
+    from .lexer import TerminalDef
+    try:
+        import rich
+    except ImportError:
+        pass
+    from typing import Literal
+
+###{standalone
+
+class Meta:
+
+    empty: bool
+    line: int
+    column: int
+    start_pos: int
+    end_line: int
+    end_column: int
+    end_pos: int
+    orig_expansion: 'List[TerminalDef]'
+    match_tree: bool
+
+    def __init__(self):
+        self.empty = True
+
+
+_Leaf_T = TypeVar("_Leaf_T")
+Branch = Union[_Leaf_T, 'Tree[_Leaf_T]']
+
+
+class Tree(Generic[_Leaf_T]):
+    """The main tree class.
+
+    Creates a new tree, and stores "data" and "children" in attributes of the same name.
+    Trees can be hashed and compared.
+
+    Parameters:
+        data: The name of the rule or alias
+        children: List of matched sub-rules and terminals
+        meta: Line & Column numbers (if ``propagate_positions`` is enabled).
+            meta attributes: (line, column, end_line, end_column, start_pos, end_pos,
+                              container_line, container_column, container_end_line, container_end_column)
+            container_* attributes consider all symbols, including those that have been inlined in the tree.
+            For example, in the rule 'a: _A B _C', the regular attributes will mark the start and end of B,
+            but the container_* attributes will also include _A and _C in the range. However, rules that
+            contain 'a' will consider it in full, including _A and _C for all attributes.
+    """
+
+    data: str
+    children: 'List[Branch[_Leaf_T]]'
+
+    def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None:
+        self.data = data
+        self.children = children
+        self._meta = meta
+
+    @property
+    def meta(self) -> Meta:
+        if self._meta is None:
+            self._meta = Meta()
+        return self._meta
+
+    def __repr__(self):
+        return 'Tree(%r, %r)' % (self.data, self.children)
+
+    __match_args__ = ("data", "children")
+
+    def _pretty_label(self):
+        return self.data
+
+    def _pretty(self, level, indent_str):
+        yield f'{indent_str*level}{self._pretty_label()}'
+        if len(self.children) == 1 and not isinstance(self.children[0], Tree):
+            yield f'\t{self.children[0]}\n'
+        else:
+            yield '\n'
+            for n in self.children:
+                if isinstance(n, Tree):
+                    yield from n._pretty(level+1, indent_str)
+                else:
+                    yield f'{indent_str*(level+1)}{n}\n'
+
+    def pretty(self, indent_str: str='  ') -> str:
+        """Returns an indented string representation of the tree.
+
+        Great for debugging.
+        """
+        return ''.join(self._pretty(0, indent_str))
+
+    def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree':
+        """Returns a tree widget for the 'rich' library.
+
+        Example:
+            ::
+                from rich import print
+                from lark import Tree
+
+                tree = Tree('root', ['node1', 'node2'])
+                print(tree)
+        """
+        return self._rich(parent)
+
+    def _rich(self, parent):
+        if parent:
+            tree = parent.add(f'[bold]{self.data}[/bold]')
+        else:
+            import rich.tree
+            tree = rich.tree.Tree(self.data)
+
+        for c in self.children:
+            if isinstance(c, Tree):
+                c._rich(tree)
+            else:
+                tree.add(f'[green]{c}[/green]')
+
+        return tree
+
+    def __eq__(self, other):
+        try:
+            return self.data == other.data and self.children == other.children
+        except AttributeError:
+            return False
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __hash__(self) -> int:
+        return hash((self.data, tuple(self.children)))
+
+    def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
+        """Depth-first iteration.
+
+        Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
+        """
+        queue = [self]
+        subtrees = dict()
+        for subtree in queue:
+            subtrees[id(subtree)] = subtree
+            queue += [c for c in reversed(subtree.children)
+                      if isinstance(c, Tree) and id(c) not in subtrees]
+
+        del queue
+        return reversed(list(subtrees.values()))
+
+    def iter_subtrees_topdown(self):
+        """Breadth-first iteration.
+
+        Iterates over all the subtrees, return nodes in order like pretty() does.
+        """
+        stack = [self]
+        stack_append = stack.append
+        stack_pop = stack.pop
+        while stack:
+            node = stack_pop()
+            if not isinstance(node, Tree):
+                continue
+            yield node
+            for child in reversed(node.children):
+                stack_append(child)
+
+    def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]':
+        """Returns all nodes of the tree that evaluate pred(node) as true."""
+        return filter(pred, self.iter_subtrees())
+
+    def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]':
+        """Returns all nodes of the tree whose data equals the given data."""
+        return self.find_pred(lambda t: t.data == data)
+
+###}
+
+    def find_token(self, token_type: str) -> Iterator[_Leaf_T]:
+        """Returns all tokens whose type equals the given token_type.
+
+        This is a recursive function that will find tokens in all the subtrees.
+
+        Example:
+            >>> term_tokens = tree.find_token('TERM')
+        """
+        return self.scan_values(lambda v: isinstance(v, Token) and v.type == token_type)
+
+    def expand_kids_by_data(self, *data_values):
+        """Expand (inline) children with any of the given data values. Returns True if anything changed"""
+        changed = False
+        for i in range(len(self.children)-1, -1, -1):
+            child = self.children[i]
+            if isinstance(child, Tree) and child.data in data_values:
+                self.children[i:i+1] = child.children
+                changed = True
+        return changed
+
+
+    def scan_values(self, pred: 'Callable[[Branch[_Leaf_T]], bool]') -> Iterator[_Leaf_T]:
+        """Return all values in the tree that evaluate pred(value) as true.
+
+        This can be used to find all the tokens in the tree.
+
+        Example:
+            >>> all_tokens = tree.scan_values(lambda v: isinstance(v, Token))
+        """
+        for c in self.children:
+            if isinstance(c, Tree):
+                for t in c.scan_values(pred):
+                    yield t
+            else:
+                if pred(c):
+                    yield c
+
+    def __deepcopy__(self, memo):
+        return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)
+
+    def copy(self) -> 'Tree[_Leaf_T]':
+        return type(self)(self.data, self.children)
+
+    def set(self, data: str, children: 'List[Branch[_Leaf_T]]') -> None:
+        self.data = data
+        self.children = children
+
+
+ParseTree = Tree['Token']
+
+
+class SlottedTree(Tree):
+    __slots__ = 'data', 'children', 'rule', '_meta'
+
+
+def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None:
+    graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
+    graph.write_png(filename)
+
+
+def pydot__tree_to_dot(tree: Tree, filename, rankdir="LR", **kwargs):
+    graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
+    graph.write(filename)
+
+
+def pydot__tree_to_graph(tree: Tree, rankdir="LR", **kwargs):
+    """Creates a colorful image that represents the tree (data+children, without meta)
+
+    Possible values for `rankdir` are "TB", "LR", "BT", "RL", corresponding to
+    directed graphs drawn from top to bottom, from left to right, from bottom to
+    top, and from right to left, respectively.
+
+    `kwargs` can be any graph attribute (e. g. `dpi=200`). For a list of
+    possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
+    """
+
+    import pydot  # type: ignore[import-not-found]
+    graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
+
+    i = [0]
+
+    def new_leaf(leaf):
+        node = pydot.Node(i[0], label=repr(leaf))
+        i[0] += 1
+        graph.add_node(node)
+        return node
+
+    def _to_pydot(subtree):
+        color = hash(subtree.data) & 0xffffff
+        color |= 0x808080
+
+        subnodes = [_to_pydot(child) if isinstance(child, Tree) else new_leaf(child)
+                    for child in subtree.children]
+        node = pydot.Node(i[0], style="filled", fillcolor="#%x" % color, label=subtree.data)
+        i[0] += 1
+        graph.add_node(node)
+
+        for subnode in subnodes:
+            graph.add_edge(pydot.Edge(node, subnode))
+
+        return node
+
+    _to_pydot(tree)
+    return graph
@@ -0,0 +1,199 @@
+"""Tree matcher based on Lark grammar"""
+
+import re
+from typing import List, Dict
+from collections import defaultdict
+
+from . import Tree, Token, Lark
+from .common import ParserConf
+from .exceptions import ConfigurationError
+from .parsers import earley
+from .grammar import Rule, Terminal, NonTerminal
+
+
+def is_discarded_terminal(t):
+    return t.is_term and t.filter_out
+
+
+class _MakeTreeMatch:
+    def __init__(self, name, expansion):
+        self.name = name
+        self.expansion = expansion
+
+    def __call__(self, args):
+        t = Tree(self.name, args)
+        t.meta.match_tree = True
+        t.meta.orig_expansion = self.expansion
+        return t
+
+
+def _best_from_group(seq, group_key, cmp_key):
+    d = {}
+    for item in seq:
+        key = group_key(item)
+        if key in d:
+            v1 = cmp_key(item)
+            v2 = cmp_key(d[key])
+            if v2 > v1:
+                d[key] = item
+        else:
+            d[key] = item
+    return list(d.values())
+
+
+def _best_rules_from_group(rules: List[Rule]) -> List[Rule]:
+    rules = _best_from_group(rules, lambda r: r, lambda r: -len(r.expansion))
+    rules.sort(key=lambda r: len(r.expansion))
+    return rules
+
+
+def _match(term, token):
+    if isinstance(token, Tree):
+        name, _args = parse_rulename(term.name)
+        return token.data == name
+    elif isinstance(token, Token):
+        return term == Terminal(token.type)
+    assert False, (term, token)
+
+
+def make_recons_rule(origin, expansion, old_expansion):
+    return Rule(origin, expansion, alias=_MakeTreeMatch(origin.name, old_expansion))
+
+
+def make_recons_rule_to_term(origin, term):
+    return make_recons_rule(origin, [Terminal(term.name)], [term])
+
+
+def parse_rulename(s):
+    "Parse rule names that may contain a template syntax (like rule{a, b, ...})"
+    name, args_str = re.match(r'(\w+)(?:{(.+)})?', s).groups()
+    args = args_str and [a.strip() for a in args_str.split(',')]
+    return name, args
+
+
+
+class ChildrenLexer:
+    def __init__(self, children):
+        self.children = children
+
+    def lex(self, parser_state):
+        return self.children
+
+class TreeMatcher:
+    """Match the elements of a tree node, based on an ontology
+    provided by a Lark grammar.
+
+    Supports templates and inlined rules (`rule{a, b,..}` and `_rule`)
+
+    Initialize with an instance of Lark.
+    """
+    rules_for_root: Dict[str, List[Rule]]
+    rules: List[Rule]
+    parser: Lark
+
+    def __init__(self, parser: Lark):
+        # XXX TODO calling compile twice returns different results!
+        assert not parser.options.maybe_placeholders
+
+        if parser.options.postlex and parser.options.postlex.always_accept:
+            # If postlexer's always_accept is used, we need to recompile the grammar with empty terminals-to-keep
+            if not hasattr(parser, 'grammar'):
+                raise ConfigurationError('Source grammar not available from cached parser, use cache_grammar=True'
+                                         if parser.options.cache else "Source grammar not available!")
+            self.tokens, rules, _extra = parser.grammar.compile(parser.options.start, set())
+        else:
+            self.tokens = list(parser.terminals)
+            rules = list(parser.rules)
+
+        self.rules_for_root = defaultdict(list)
+
+        self.rules = list(self._build_recons_rules(rules))
+        self.rules.reverse()
+
+        # Choose the best rule from each group of {rule => [rule.alias]}, since we only really need one derivation.
+        self.rules = _best_rules_from_group(self.rules)
+
+        self.parser = parser
+        self._parser_cache: Dict[str, earley.Parser] = {}
+
+    def _build_recons_rules(self, rules: List[Rule]):
+        "Convert tree-parsing/construction rules to tree-matching rules"
+        expand1s = {r.origin for r in rules if r.options.expand1}
+
+        aliases = defaultdict(list)
+        for r in rules:
+            if r.alias:
+                aliases[r.origin].append(r.alias)
+
+        rule_names = {r.origin for r in rules}
+        nonterminals = {sym for sym in rule_names
+                        if sym.name.startswith('_') or sym in expand1s or sym in aliases}
+
+        seen = set()
+        for r in rules:
+            recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
+                          for sym in r.expansion if not is_discarded_terminal(sym)]
+
+            # Skip self-recursive constructs
+            if recons_exp == [r.origin] and r.alias is None:
+                continue
+
+            sym = NonTerminal(r.alias) if r.alias else r.origin
+            rule = make_recons_rule(sym, recons_exp, r.expansion)
+
+            if sym in expand1s and len(recons_exp) != 1:
+                self.rules_for_root[sym.name].append(rule)
+
+                if sym.name not in seen:
+                    yield make_recons_rule_to_term(sym, sym)
+                    seen.add(sym.name)
+            else:
+                if sym.name.startswith('_') or sym in expand1s:
+                    yield rule
+                else:
+                    self.rules_for_root[sym.name].append(rule)
+
+        for origin, rule_aliases in aliases.items():
+            for alias in rule_aliases:
+                yield make_recons_rule_to_term(origin, NonTerminal(alias))
+            yield make_recons_rule_to_term(origin, origin)
+
+    def match_tree(self, tree: Tree, rulename: str) -> Tree:
+        """Match the elements of `tree` to the symbols of rule `rulename`.
+
+        Parameters:
+            tree (Tree): the tree node to match
+            rulename (str): The expected full rule name (including template args)
+
+        Returns:
+            Tree: an unreduced tree that matches `rulename`
+
+        Raises:
+            UnexpectedToken: If no match was found.
+
+        Note:
+            It's the callers' responsibility to match the tree recursively.
+        """
+        if rulename:
+            # validate
+            name, _args = parse_rulename(rulename)
+            assert tree.data == name
+        else:
+            rulename = tree.data
+
+        # TODO: ambiguity?
+        try:
+            parser = self._parser_cache[rulename]
+        except KeyError:
+            rules = self.rules + _best_rules_from_group(self.rules_for_root[rulename])
+
+            # TODO pass callbacks through dict, instead of alias?
+            callbacks = {rule: rule.alias for rule in rules}
+            conf = ParserConf(rules, callbacks, [rulename]) # type: ignore[arg-type]
+            parser = earley.Parser(self.parser.lexer_conf, conf, _match, resolve_ambiguity=True)
+            self._parser_cache[rulename] = parser
+
+        # find a full derivation
+        unreduced_tree: Tree = parser.parse(ChildrenLexer(tree.children), rulename)
+        assert unreduced_tree.data == rulename
+        return unreduced_tree
@@ -0,0 +1,180 @@
+"""This module defines utilities for matching and translation tree templates.
+
+A tree templates is a tree that contains nodes that are template variables.
+
+"""
+
+from typing import Union, Optional, Mapping, Dict, Tuple, Iterator
+
+from lark import Tree, Transformer
+from lark.exceptions import MissingVariableError
+
+Branch = Union[Tree[str], str]
+TreeOrCode = Union[Tree[str], str]
+MatchResult = Dict[str, Tree]
+_TEMPLATE_MARKER = '$'
+
+
+class TemplateConf:
+    """Template Configuration
+
+    Allows customization for different uses of Template
+
+    parse() must return a Tree instance.
+    """
+
+    def __init__(self, parse=None):
+        self._parse = parse
+
+    def test_var(self, var: Union[Tree[str], str]) -> Optional[str]:
+        """Given a tree node, if it is a template variable return its name. Otherwise, return None.
+
+        This method may be overridden for customization
+
+        Parameters:
+            var: Tree | str - The tree node to test
+
+        """
+        if isinstance(var, str):
+            return _get_template_name(var)
+
+        if (
+            isinstance(var, Tree)
+            and var.data == "var"
+            and len(var.children) > 0
+            and isinstance(var.children[0], str)
+        ):
+            return _get_template_name(var.children[0])
+
+        return None
+
+    def _get_tree(self, template: TreeOrCode) -> Tree[str]:
+        if isinstance(template, str):
+            assert self._parse
+            template = self._parse(template)
+
+        if not isinstance(template, Tree):
+            raise TypeError("template parser must return a Tree instance")
+
+        return template
+
+    def __call__(self, template: Tree[str]) -> 'Template':
+        return Template(template, conf=self)
+
+    def _match_tree_template(self, template: TreeOrCode, tree: Branch) -> Optional[MatchResult]:
+        """Returns dict of {var: match} if found a match, else None
+        """
+        template_var = self.test_var(template)
+        if template_var:
+            if not isinstance(tree, Tree):
+                raise TypeError(f"Template variables can only match Tree instances. Not {tree!r}")
+            return {template_var: tree}
+
+        if isinstance(template, str):
+            if template == tree:
+                return {}
+            return None
+
+        assert isinstance(template, Tree) and isinstance(tree, Tree), f"template={template} tree={tree}"
+
+        if template.data == tree.data and len(template.children) == len(tree.children):
+            res = {}
+            for t1, t2 in zip(template.children, tree.children):
+                matches = self._match_tree_template(t1, t2)
+                if matches is None:
+                    return None
+
+                res.update(matches)
+
+            return res
+
+        return None
+
+
+class _ReplaceVars(Transformer[str, Tree[str]]):
+    def __init__(self, conf: TemplateConf, vars: Mapping[str, Tree[str]]) -> None:
+        super().__init__()
+        self._conf = conf
+        self._vars = vars
+
+    def __default__(self, data, children, meta) -> Tree[str]:
+        tree = super().__default__(data, children, meta)
+
+        var = self._conf.test_var(tree)
+        if var:
+            try:
+                return self._vars[var]
+            except KeyError:
+                raise MissingVariableError(f"No mapping for template variable ({var})")
+        return tree
+
+
+class Template:
+    """Represents a tree template, tied to a specific configuration
+
+    A tree template is a tree that contains nodes that are template variables.
+    Those variables will match any tree.
+    (future versions may support annotations on the variables, to allow more complex templates)
+    """
+
+    def __init__(self, tree: Tree[str], conf: TemplateConf = TemplateConf()):
+        self.conf = conf
+        self.tree = conf._get_tree(tree)
+
+    def match(self, tree: TreeOrCode) -> Optional[MatchResult]:
+        """Match a tree template to a tree.
+
+        A tree template without variables will only match ``tree`` if it is equal to the template.
+
+        Parameters:
+            tree (Tree): The tree to match to the template
+
+        Returns:
+            Optional[Dict[str, Tree]]: If match is found, returns a dictionary mapping
+                template variable names to their matching tree nodes.
+                If no match was found, returns None.
+        """
+        tree = self.conf._get_tree(tree)
+        return self.conf._match_tree_template(self.tree, tree)
+
+    def search(self, tree: TreeOrCode) -> Iterator[Tuple[Tree[str], MatchResult]]:
+        """Search for all occurrences of the tree template inside ``tree``.
+        """
+        tree = self.conf._get_tree(tree)
+        for subtree in tree.iter_subtrees():
+            res = self.match(subtree)
+            if res:
+                yield subtree, res
+
+    def apply_vars(self, vars: Mapping[str, Tree[str]]) -> Tree[str]:
+        """Apply vars to the template tree
+        """
+        return _ReplaceVars(self.conf, vars).transform(self.tree)
+
+
+def translate(t1: Template, t2: Template, tree: TreeOrCode):
+    """Search tree and translate each occurrence of t1 into t2.
+    """
+    tree = t1.conf._get_tree(tree)      # ensure it's a tree, parse if necessary and possible
+    for subtree, vars in t1.search(tree):
+        res = t2.apply_vars(vars)
+        subtree.set(res.data, res.children)
+    return tree
+
+
+class TemplateTranslator:
+    """Utility class for translating a collection of patterns
+    """
+
+    def __init__(self, translations: Mapping[Template, Template]):
+        assert all(isinstance(k, Template) and isinstance(v, Template) for k, v in translations.items())
+        self.translations = translations
+
+    def translate(self, tree: Tree[str]):
+        for k, v in self.translations.items():
+            tree = translate(k, v, tree)
+        return tree
+
+
+def _get_template_name(value: str) -> Optional[str]:
+    return value.lstrip(_TEMPLATE_MARKER) if value.startswith(_TEMPLATE_MARKER) else None
@@ -0,0 +1,416 @@
+import unicodedata
+import os
+from itertools import product
+from collections import deque
+from typing import Callable, Iterator, List, Optional, Tuple, Type, TypeVar, Union, Dict, Any, Sequence, Iterable, AbstractSet
+
+###{standalone
+import sys, re
+import logging
+from dataclasses import dataclass
+from typing import Generic, AnyStr
+
+logger: logging.Logger = logging.getLogger("lark")
+logger.addHandler(logging.StreamHandler())
+# Set to highest level, since we have some warnings amongst the code
+# By default, we should not output any log messages
+logger.setLevel(logging.CRITICAL)
+
+
+NO_VALUE = object()
+
+T = TypeVar("T")
+
+
+def classify(seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None) -> Dict:
+    d: Dict[Any, Any] = {}
+    for item in seq:
+        k = key(item) if (key is not None) else item
+        v = value(item) if (value is not None) else item
+        try:
+            d[k].append(v)
+        except KeyError:
+            d[k] = [v]
+    return d
+
+
+def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any:
+    if isinstance(data, dict):
+        if '__type__' in data:  # Object
+            class_ = namespace[data['__type__']]
+            return class_.deserialize(data, memo)
+        elif '@' in data:
+            return memo[data['@']]
+        return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
+    elif isinstance(data, list):
+        return [_deserialize(value, namespace, memo) for value in data]
+    return data
+
+
+_T = TypeVar("_T", bound="Serialize")
+
+class Serialize:
+    """Safe-ish serialization interface that doesn't rely on Pickle
+
+    Attributes:
+        __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
+        __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
+                                        Should include all field types that aren't builtin types.
+    """
+
+    def memo_serialize(self, types_to_memoize: List) -> Any:
+        memo = SerializeMemoizer(types_to_memoize)
+        return self.serialize(memo), memo.serialize()
+
+    def serialize(self, memo = None) -> Dict[str, Any]:
+        if memo and memo.in_types(self):
+            return {'@': memo.memoized.get(self)}
+
+        fields = getattr(self, '__serialize_fields__')
+        res = {f: _serialize(getattr(self, f), memo) for f in fields}
+        res['__type__'] = type(self).__name__
+        if hasattr(self, '_serialize'):
+            self._serialize(res, memo)
+        return res
+
+    @classmethod
+    def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T:
+        namespace = getattr(cls, '__serialize_namespace__', [])
+        namespace = {c.__name__:c for c in namespace}
+
+        fields = getattr(cls, '__serialize_fields__')
+
+        if '@' in data:
+            return memo[data['@']]
+
+        inst = cls.__new__(cls)
+        for f in fields:
+            try:
+                setattr(inst, f, _deserialize(data[f], namespace, memo))
+            except KeyError as e:
+                raise KeyError("Cannot find key for class", cls, e)
+
+        if hasattr(inst, '_deserialize'):
+            inst._deserialize()
+
+        return inst
+
+
+class SerializeMemoizer(Serialize):
+    "A version of serialize that memoizes objects to reduce space"
+
+    __serialize_fields__ = 'memoized',
+
+    def __init__(self, types_to_memoize: List) -> None:
+        self.types_to_memoize = tuple(types_to_memoize)
+        self.memoized = Enumerator()
+
+    def in_types(self, value: Serialize) -> bool:
+        return isinstance(value, self.types_to_memoize)
+
+    def serialize(self) -> Dict[int, Any]:  # type: ignore[override]
+        return _serialize(self.memoized.reversed(), None)
+
+    @classmethod
+    def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]:  # type: ignore[override]
+        return _deserialize(data, namespace, memo)
+
+
+try:
+    import regex
+    _has_regex = True
+except ImportError:
+    _has_regex = False
+
+if sys.version_info >= (3, 11):
+    import re._parser as sre_parse
+    import re._constants as sre_constants
+else:
+    import sre_parse
+    import sre_constants
+
+categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
+
+def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
+    if _has_regex:
+        # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
+        # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
+        # match here below.
+        regexp_final = re.sub(categ_pattern, 'A', expr)
+    else:
+        if re.search(categ_pattern, expr):
+            raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
+        regexp_final = expr
+    try:
+        # Fixed in next version (past 0.960) of typeshed
+        return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
+    except sre_constants.error:
+        if not _has_regex:
+            raise ValueError(expr)
+        else:
+            # sre_parse does not support the new features in regex. To not completely fail in that case,
+            # we manually test for the most important info (whether the empty string is matched)
+            c = regex.compile(regexp_final)
+            # Python 3.11.7 introducded sre_parse.MAXWIDTH that is used instead of MAXREPEAT
+            # See lark-parser/lark#1376 and python/cpython#109859
+            MAXWIDTH = getattr(sre_parse, "MAXWIDTH", sre_constants.MAXREPEAT)
+            if c.match('') is None:
+                # MAXREPEAT is a none pickable subclass of int, therefore needs to be converted to enable caching
+                return 1, int(MAXWIDTH)
+            else:
+                return 0, int(MAXWIDTH)
+
+
+@dataclass(frozen=True)
+class TextSlice(Generic[AnyStr]):
+    """A view of a string or bytes object, between the start and end indices.
+
+    Never creates a copy.
+
+    Lark accepts instances of TextSlice as input (instead of a string),
+    when the lexer is 'basic' or 'contextual'.
+
+    Args:
+        text (str or bytes): The text to slice.
+        start (int): The start index. Negative indices are supported.
+        end (int): The end index. Negative indices are supported.
+
+    Raises:
+        TypeError: If `text` is not a `str` or `bytes`.
+        AssertionError: If `start` or `end` are out of bounds.
+
+    Examples:
+        >>> TextSlice("Hello, World!", 7, -1)
+        TextSlice(text='Hello, World!', start=7, end=12)
+
+        >>> TextSlice("Hello, World!", 7, None).count("o")
+        1
+
+    """
+    text: AnyStr
+    start: int
+    end: int
+
+    def __post_init__(self):
+        if not isinstance(self.text, (str, bytes)):
+            raise TypeError("text must be str or bytes")
+
+        if self.start < 0:
+            object.__setattr__(self, 'start', self.start + len(self.text))
+            assert self.start >=0
+
+        if self.end is None:
+            object.__setattr__(self, 'end', len(self.text))
+        elif self.end < 0:
+            object.__setattr__(self, 'end', self.end + len(self.text))
+            assert self.end <= len(self.text)
+
+    @classmethod
+    def cast_from(cls, text: 'TextOrSlice') -> 'TextSlice[AnyStr]':
+        if isinstance(text, TextSlice):
+            return text
+
+        return cls(text, 0, len(text))
+
+    def is_complete_text(self):
+        return self.start == 0 and self.end == len(self.text)
+
+    def __len__(self):
+        return self.end - self.start
+
+    def count(self, substr: AnyStr):
+        return self.text.count(substr, self.start, self.end)
+
+    def rindex(self, substr: AnyStr):
+        return self.text.rindex(substr, self.start, self.end)
+
+
+TextOrSlice = Union[AnyStr, 'TextSlice[AnyStr]']
+LarkInput = Union[AnyStr, TextSlice[AnyStr], Any]
+
+###}
+
+
+_ID_START =    'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
+_ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
+
+def _test_unicode_category(s: str, categories: Sequence[str]) -> bool:
+    if len(s) != 1:
+        return all(_test_unicode_category(char, categories) for char in s)
+    return s == '_' or unicodedata.category(s) in categories
+
+def is_id_continue(s: str) -> bool:
+    """
+    Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
+    numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
+    """
+    return _test_unicode_category(s, _ID_CONTINUE)
+
+def is_id_start(s: str) -> bool:
+    """
+    Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
+    numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
+    """
+    return _test_unicode_category(s, _ID_START)
+
+
+def dedup_list(l: Iterable[T]) -> List[T]:
+    """Given a list (l) will removing duplicates from the list,
+       preserving the original order of the list. Assumes that
+       the list entries are hashable."""
+    return list(dict.fromkeys(l))
+
+
+class Enumerator(Serialize):
+    def __init__(self) -> None:
+        self.enums: Dict[Any, int] = {}
+
+    def get(self, item) -> int:
+        if item not in self.enums:
+            self.enums[item] = len(self.enums)
+        return self.enums[item]
+
+    def __len__(self):
+        return len(self.enums)
+
+    def reversed(self) -> Dict[int, Any]:
+        r = {v: k for k, v in self.enums.items()}
+        assert len(r) == len(self.enums)
+        return r
+
+
+
+def combine_alternatives(lists):
+    """
+    Accepts a list of alternatives, and enumerates all their possible concatenations.
+
+    Examples:
+        >>> combine_alternatives([range(2), [4,5]])
+        [[0, 4], [0, 5], [1, 4], [1, 5]]
+
+        >>> combine_alternatives(["abc", "xy", '$'])
+        [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
+
+        >>> combine_alternatives([])
+        [[]]
+    """
+    if not lists:
+        return [[]]
+    assert all(l for l in lists), lists
+    return list(product(*lists))
+
+try:
+    import atomicwrites
+    _has_atomicwrites = True
+except ImportError:
+    _has_atomicwrites = False
+
+class FS:
+    exists = staticmethod(os.path.exists)
+
+    @staticmethod
+    def open(name, mode="r", **kwargs):
+        if _has_atomicwrites and "w" in mode:
+            return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs)
+        else:
+            return open(name, mode, **kwargs)
+
+
+class fzset(frozenset):
+    def __repr__(self):
+        return '{%s}' % ', '.join(map(repr, self))
+
+
+def classify_bool(seq: Iterable, pred: Callable) -> Any:
+    false_elems = []
+    true_elems = [elem for elem in seq if pred(elem) or false_elems.append(elem)]  # type: ignore[func-returns-value]
+    return true_elems, false_elems
+
+
+def bfs(initial: Iterable, expand: Callable) -> Iterator:
+    open_q = deque(list(initial))
+    visited = set(open_q)
+    while open_q:
+        node = open_q.popleft()
+        yield node
+        for next_node in expand(node):
+            if next_node not in visited:
+                visited.add(next_node)
+                open_q.append(next_node)
+
+def bfs_all_unique(initial, expand):
+    "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions"
+    open_q = deque(list(initial))
+    while open_q:
+        node = open_q.popleft()
+        yield node
+        open_q += expand(node)
+
+
+def _serialize(value: Any, memo: Optional[SerializeMemoizer]) -> Any:
+    if isinstance(value, Serialize):
+        return value.serialize(memo)
+    elif isinstance(value, list):
+        return [_serialize(elem, memo) for elem in value]
+    elif isinstance(value, frozenset):
+        return list(value)  # TODO reversible?
+    elif isinstance(value, dict):
+        return {key:_serialize(elem, memo) for key, elem in value.items()}
+    # assert value is None or isinstance(value, (int, float, str, tuple)), value
+    return value
+
+
+
+
+def small_factors(n: int, max_factor: int) -> List[Tuple[int, int]]:
+    """
+    Splits n up into smaller factors and summands <= max_factor.
+    Returns a list of [(a, b), ...]
+    so that the following code returns n:
+
+    n = 1
+    for a, b in values:
+        n = n * a + b
+
+    Currently, we also keep a + b <= max_factor, but that might change
+    """
+    assert n >= 0
+    assert max_factor > 2
+    if n <= max_factor:
+        return [(n, 0)]
+
+    for a in range(max_factor, 1, -1):
+        r, b = divmod(n, a)
+        if a + b <= max_factor:
+            return small_factors(r, max_factor) + [(a, b)]
+    assert False, "Failed to factorize %s" % n
+
+
+class OrderedSet(AbstractSet[T]):
+    """A minimal OrderedSet implementation, using a dictionary.
+
+    (relies on the dictionary being ordered)
+    """
+    def __init__(self, items: Iterable[T] =()):
+        self.d = dict.fromkeys(items)
+
+    def __contains__(self, item: Any) -> bool:
+        return item in self.d
+
+    def add(self, item: T):
+        self.d[item] = None
+
+    def __iter__(self) -> Iterator[T]:
+        return iter(self.d)
+
+    def remove(self, item: T):
+        del self.d[item]
+
+    def __bool__(self):
+        return bool(self.d)
+
+    def __len__(self) -> int:
+        return len(self.d)
+
+    def __repr__(self):
+        return f"{type(self).__name__}({', '.join(map(repr,self))})"
@@ -0,0 +1,596 @@
+from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional, Any, cast
+from abc import ABC
+
+from .utils import combine_alternatives
+from .tree import Tree, Branch
+from .exceptions import VisitError, GrammarError
+from .lexer import Token
+
+###{standalone
+from functools import wraps, update_wrapper
+from inspect import getmembers, getmro
+
+_Return_T = TypeVar('_Return_T')
+_Return_V = TypeVar('_Return_V')
+_Leaf_T = TypeVar('_Leaf_T')
+_Leaf_U = TypeVar('_Leaf_U')
+_R = TypeVar('_R')
+_FUNC = Callable[..., _Return_T]
+_DECORATED = Union[_FUNC, type]
+
+class _DiscardType:
+    """When the Discard value is returned from a transformer callback,
+    that node is discarded and won't appear in the parent.
+
+    Note:
+        This feature is disabled when the transformer is provided to Lark
+        using the ``transformer`` keyword (aka Tree-less LALR mode).
+
+    Example:
+        ::
+
+            class T(Transformer):
+                def ignore_tree(self, children):
+                    return Discard
+
+                def IGNORE_TOKEN(self, token):
+                    return Discard
+    """
+
+    def __repr__(self):
+        return "lark.visitors.Discard"
+
+Discard = _DiscardType()
+
+# Transformers
+
+class _Decoratable:
+    "Provides support for decorating methods with @v_args"
+
+    @classmethod
+    def _apply_v_args(cls, visit_wrapper):
+        mro = getmro(cls)
+        assert mro[0] is cls
+        libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
+        for name, value in getmembers(cls):
+
+            # Make sure the function isn't inherited (unless it's overwritten)
+            if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
+                continue
+            if not callable(value):
+                continue
+
+            # Skip if v_args already applied (at the function level)
+            if isinstance(cls.__dict__[name], _VArgsWrapper):
+                continue
+
+            setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper))
+        return cls
+
+    def __class_getitem__(cls, _):
+        return cls
+
+
+class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
+    """Transformers work bottom-up (or depth-first), starting with visiting the leaves and working
+    their way up until ending at the root of the tree.
+
+    For each node visited, the transformer will call the appropriate method (callbacks), according to the
+    node's ``data``, and use the returned value to replace the node, thereby creating a new tree structure.
+
+    Transformers can be used to implement map & reduce patterns. Because nodes are reduced from leaf to root,
+    at any point the callbacks may assume the children have already been transformed (if applicable).
+
+    If the transformer cannot find a method with the right name, it will instead call ``__default__``, which by
+    default creates a copy of the node.
+
+    To discard a node, return Discard (``lark.visitors.Discard``).
+
+    ``Transformer`` can do anything ``Visitor`` can do, but because it reconstructs the tree,
+    it is slightly less efficient.
+
+    A transformer without methods essentially performs a non-memoized partial deepcopy.
+
+    All these classes implement the transformer interface:
+
+    - ``Transformer`` - Recursively transforms the tree. This is the one you probably want.
+    - ``Transformer_InPlace`` - Non-recursive. Changes the tree in-place instead of returning new instances
+    - ``Transformer_InPlaceRecursive`` - Recursive. Changes the tree in-place instead of returning new instances
+
+    Parameters:
+        visit_tokens (bool, optional): Should the transformer visit tokens in addition to rules.
+                                       Setting this to ``False`` is slightly faster. Defaults to ``True``.
+                                       (For processing ignored tokens, use the ``lexer_callbacks`` options)
+
+    """
+    __visit_tokens__ = True   # For backwards compatibility
+
+    def __init__(self,  visit_tokens: bool=True) -> None:
+        self.__visit_tokens__ = visit_tokens
+
+    def _call_userfunc(self, tree, new_children=None):
+        # Assumes tree is already transformed
+        children = new_children if new_children is not None else tree.children
+        try:
+            f = getattr(self, tree.data)
+        except AttributeError:
+            return self.__default__(tree.data, children, tree.meta)
+        else:
+            try:
+                wrapper = getattr(f, 'visit_wrapper', None)
+                if wrapper is not None:
+                    return f.visit_wrapper(f, tree.data, children, tree.meta)
+                else:
+                    return f(children)
+            except GrammarError:
+                raise
+            except Exception as e:
+                raise VisitError(tree.data, tree, e)
+
+    def _call_userfunc_token(self, token):
+        try:
+            f = getattr(self, token.type)
+        except AttributeError:
+            return self.__default_token__(token)
+        else:
+            try:
+                return f(token)
+            except GrammarError:
+                raise
+            except Exception as e:
+                raise VisitError(token.type, token, e)
+
+    def _transform_children(self, children):
+        for c in children:
+            if isinstance(c, Tree):
+                res = self._transform_tree(c)
+            elif self.__visit_tokens__ and isinstance(c, Token):
+                res = self._call_userfunc_token(c)
+            else:
+                res = c
+
+            if res is not Discard:
+                yield res
+
+    def _transform_tree(self, tree):
+        children = list(self._transform_children(tree.children))
+        return self._call_userfunc(tree, children)
+
+    def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
+        "Transform the given tree, and return the final result"
+        res = list(self._transform_children([tree]))
+        if not res:
+            return None     # type: ignore[return-value]
+        assert len(res) == 1
+        return res[0]
+
+    def __mul__(
+            self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]',
+            other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]'
+    ) -> 'TransformerChain[_Leaf_T, _Return_V]':
+        """Chain two transformers together, returning a new transformer.
+        """
+        return TransformerChain(self, other)
+
+    def __default__(self, data, children, meta):
+        """Default function that is called if there is no attribute matching ``data``
+
+        Can be overridden. Defaults to creating a new copy of the tree node (i.e. ``return Tree(data, children, meta)``)
+        """
+        return Tree(data, children, meta)
+
+    def __default_token__(self, token):
+        """Default function that is called if there is no attribute matching ``token.type``
+
+        Can be overridden. Defaults to returning the token as-is.
+        """
+        return token
+
+
+def merge_transformers(base_transformer=None, **transformers_to_merge):
+    """Merge a collection of transformers into the base_transformer, each into its own 'namespace'.
+
+    When called, it will collect the methods from each transformer, and assign them to base_transformer,
+    with their name prefixed with the given keyword, as ``prefix__methodname``.
+
+    This function is especially useful for processing grammars that import other grammars,
+    thereby creating some of their rules in a 'namespace'. (i.e with a consistent name prefix).
+    In this case, the key for the transformer should match the name of the imported grammar.
+
+    Parameters:
+        base_transformer (Transformer, optional): The transformer that all other transformers will be added to.
+        **transformers_to_merge: Keyword arguments, in the form of ``name_prefix = transformer``.
+
+    Raises:
+        AttributeError: In case of a name collision in the merged methods
+
+    Example:
+        ::
+
+            class TBase(Transformer):
+                def start(self, children):
+                    return children[0] + 'bar'
+
+            class TImportedGrammar(Transformer):
+                def foo(self, children):
+                    return "foo"
+
+            composed_transformer = merge_transformers(TBase(), imported=TImportedGrammar())
+
+            t = Tree('start', [ Tree('imported__foo', []) ])
+
+            assert composed_transformer.transform(t) == 'foobar'
+
+    """
+    if base_transformer is None:
+        base_transformer = Transformer()
+    for prefix, transformer in transformers_to_merge.items():
+        for method_name in dir(transformer):
+            method = getattr(transformer, method_name)
+            if not callable(method):
+                continue
+            if method_name.startswith("_") or method_name == "transform":
+                continue
+            prefixed_method = prefix + "__" + method_name
+            if hasattr(base_transformer, prefixed_method):
+                raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)
+
+            setattr(base_transformer, prefixed_method, method)
+
+    return base_transformer
+
+
+class InlineTransformer(Transformer):   # XXX Deprecated
+    def _call_userfunc(self, tree, new_children=None):
+        # Assumes tree is already transformed
+        children = new_children if new_children is not None else tree.children
+        try:
+            f = getattr(self, tree.data)
+        except AttributeError:
+            return self.__default__(tree.data, children, tree.meta)
+        else:
+            return f(*children)
+
+
+class TransformerChain(Generic[_Leaf_T, _Return_T]):
+
+    transformers: 'Tuple[Union[Transformer, TransformerChain], ...]'
+
+    def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None:
+        self.transformers = transformers
+
+    def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
+        for t in self.transformers:
+            tree = t.transform(tree)
+        return cast(_Return_T, tree)
+
+    def __mul__(
+            self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]',
+            other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]'
+    ) -> 'TransformerChain[_Leaf_T, _Return_V]':
+        return TransformerChain(*self.transformers + (other,))
+
+
+class Transformer_InPlace(Transformer[_Leaf_T, _Return_T]):
+    """Same as Transformer, but non-recursive, and changes the tree in-place instead of returning new instances
+
+    Useful for huge trees. Conservative in memory.
+    """
+    def _transform_tree(self, tree):           # Cancel recursion
+        return self._call_userfunc(tree)
+
+    def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
+        for subtree in tree.iter_subtrees():
+            subtree.children = list(self._transform_children(subtree.children))
+
+        return self._transform_tree(tree)
+
+
+class Transformer_NonRecursive(Transformer[_Leaf_T, _Return_T]):
+    """Same as Transformer but non-recursive.
+
+    Like Transformer, it doesn't change the original tree.
+
+    Useful for huge trees.
+    """
+
+    def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
+        # Tree to postfix
+        rev_postfix = []
+        q: List[Branch[_Leaf_T]] = [tree]
+        while q:
+            t = q.pop()
+            rev_postfix.append(t)
+            if isinstance(t, Tree):
+                q += t.children
+
+        # Postfix to tree
+        stack: List = []
+        for x in reversed(rev_postfix):
+            if isinstance(x, Tree):
+                size = len(x.children)
+                if size:
+                    args = stack[-size:]
+                    del stack[-size:]
+                else:
+                    args = []
+
+                res = self._call_userfunc(x, args)
+                if res is not Discard:
+                    stack.append(res)
+
+            elif self.__visit_tokens__ and isinstance(x, Token):
+                res = self._call_userfunc_token(x)
+                if res is not Discard:
+                    stack.append(res)
+            else:
+                stack.append(x)
+
+        result, = stack  # We should have only one tree remaining
+        # There are no guarantees on the type of the value produced by calling a user func for a
+        # child will produce. This means type system can't statically know that the final result is
+        # _Return_T. As a result a cast is required.
+        return cast(_Return_T, result)
+
+
+class Transformer_InPlaceRecursive(Transformer[_Leaf_T, _Return_T]):
+    "Same as Transformer, recursive, but changes the tree in-place instead of returning new instances"
+    def _transform_tree(self, tree):
+        tree.children = list(self._transform_children(tree.children))
+        return self._call_userfunc(tree)
+
+
+# Visitors
+
+class VisitorBase:
+    def _call_userfunc(self, tree):
+        return getattr(self, tree.data, self.__default__)(tree)
+
+    def __default__(self, tree):
+        """Default function that is called if there is no attribute matching ``tree.data``
+
+        Can be overridden. Defaults to doing nothing.
+        """
+        return tree
+
+    def __class_getitem__(cls, _):
+        return cls
+
+
+class Visitor(VisitorBase, ABC, Generic[_Leaf_T]):
+    """Tree visitor, non-recursive (can handle huge trees).
+
+    Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
+    """
+
+    def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
+        "Visits the tree, starting with the leaves and finally the root (bottom-up)"
+        for subtree in tree.iter_subtrees():
+            self._call_userfunc(subtree)
+        return tree
+
+    def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
+        "Visit the tree, starting at the root, and ending at the leaves (top-down)"
+        for subtree in tree.iter_subtrees_topdown():
+            self._call_userfunc(subtree)
+        return tree
+
+
+class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]):
+    """Bottom-up visitor, recursive.
+
+    Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
+
+    Slightly faster than the non-recursive version.
+    """
+
+    def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
+        "Visits the tree, starting with the leaves and finally the root (bottom-up)"
+        for child in tree.children:
+            if isinstance(child, Tree):
+                self.visit(child)
+
+        self._call_userfunc(tree)
+        return tree
+
+    def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
+        "Visit the tree, starting at the root, and ending at the leaves (top-down)"
+        self._call_userfunc(tree)
+
+        for child in tree.children:
+            if isinstance(child, Tree):
+                self.visit_topdown(child)
+
+        return tree
+
+
+class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
+    """Interpreter walks the tree starting at the root.
+
+    Visits the tree, starting with the root and finally the leaves (top-down)
+
+    For each tree node, it calls its methods (provided by user via inheritance) according to ``tree.data``.
+
+    Unlike ``Transformer`` and ``Visitor``, the Interpreter doesn't automatically visit its sub-branches.
+    The user has to explicitly call ``visit``, ``visit_children``, or use the ``@visit_children_decor``.
+    This allows the user to implement branching and loops.
+    """
+
+    def visit(self, tree: Tree[_Leaf_T]) -> _Return_T:
+        # There are no guarantees on the type of the value produced by calling a user func for a
+        # child will produce. So only annotate the public method and use an internal method when
+        # visiting child trees.
+        return self._visit_tree(tree)
+
+    def _visit_tree(self, tree: Tree[_Leaf_T]):
+        f = getattr(self, tree.data)
+        wrapper = getattr(f, 'visit_wrapper', None)
+        if wrapper is not None:
+            return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
+        else:
+            return f(tree)
+
+    def visit_children(self, tree: Tree[_Leaf_T]) -> List:
+        return [self._visit_tree(child) if isinstance(child, Tree) else child
+                for child in tree.children]
+
+    def __getattr__(self, name):
+        return self.__default__
+
+    def __default__(self, tree):
+        return self.visit_children(tree)
+
+
+_InterMethod = Callable[[Type[Interpreter], _Return_T], _R]
+
+def visit_children_decor(func: _InterMethod) -> _InterMethod:
+    "See Interpreter"
+    @wraps(func)
+    def inner(cls, tree):
+        values = cls.visit_children(tree)
+        return func(cls, values)
+    return inner
+
+# Decorators
+
+def _apply_v_args(obj, visit_wrapper):
+    try:
+        _apply = obj._apply_v_args
+    except AttributeError:
+        return _VArgsWrapper(obj, visit_wrapper)
+    else:
+        return _apply(visit_wrapper)
+
+
+class _VArgsWrapper:
+    """
+    A wrapper around a Callable. It delegates `__call__` to the Callable.
+    If the Callable has a `__get__`, that is also delegate and the resulting function is wrapped.
+    Otherwise, we use the original function mirroring the behaviour without a __get__.
+    We also have the visit_wrapper attribute to be used by Transformers.
+    """
+    base_func: Callable
+
+    def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]):
+        if isinstance(func, _VArgsWrapper):
+            func = func.base_func
+        self.base_func = func
+        self.visit_wrapper = visit_wrapper
+        update_wrapper(self, func)
+
+    def __call__(self, *args, **kwargs):
+        return self.base_func(*args, **kwargs)
+
+    def __get__(self, instance, owner=None):
+        try:
+            # Use the __get__ attribute of the type instead of the instance
+            # to fully mirror the behavior of getattr
+            g = type(self.base_func).__get__
+        except AttributeError:
+            return self
+        else:
+            return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper)
+
+    def __set_name__(self, owner, name):
+        try:
+            f = type(self.base_func).__set_name__
+        except AttributeError:
+            return
+        else:
+            f(self.base_func, owner, name)
+
+
+def _vargs_inline(f, _data, children, _meta):
+    return f(*children)
+def _vargs_meta_inline(f, _data, children, meta):
+    return f(meta, *children)
+def _vargs_meta(f, _data, children, meta):
+    return f(meta, children)
+def _vargs_tree(f, data, children, meta):
+    return f(Tree(data, children, meta))
+
+
+def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]:
+    """A convenience decorator factory for modifying the behavior of user-supplied callback methods of ``Transformer`` classes.
+
+    By default, transformer callback methods accept one argument - a list of the node's children.
+
+    ``v_args`` can modify this behavior. When used on a ``Transformer`` class definition, it applies to
+    all the callback methods inside it.
+
+    ``v_args`` can be applied to a single method, or to an entire class. When applied to both,
+    the options given to the method take precedence.
+
+    Parameters:
+        inline (bool, optional): Children are provided as ``*args`` instead of a list argument (not recommended for very long lists).
+        meta (bool, optional): Provides two arguments: ``meta`` and ``children`` (instead of just the latter); ``meta`` isn't available for transformers supplied to Lark using the ``transformer`` parameter (aka internal transformers).
+        tree (bool, optional): Provides the entire tree as the argument, instead of the children.
+        wrapper (function, optional): Provide a function to decorate all methods.
+
+    Example:
+        ::
+
+            @v_args(inline=True)
+            class SolveArith(Transformer):
+                def add(self, left, right):
+                    return left + right
+
+                @v_args(meta=True)
+                def mul(self, meta, children):
+                    logger.info(f'mul at line {meta.line}')
+                    left, right = children
+                    return left * right
+
+
+            class ReverseNotation(Transformer_InPlace):
+                @v_args(tree=True)
+                def tree_node(self, tree):
+                    tree.children = tree.children[::-1]
+    """
+    if tree and (meta or inline):
+        raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
+
+    func = None
+    if meta:
+        if inline:
+            func = _vargs_meta_inline
+        else:
+            func = _vargs_meta
+    elif inline:
+        func = _vargs_inline
+    elif tree:
+        func = _vargs_tree
+
+    if wrapper is not None:
+        if func is not None:
+            raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
+        func = wrapper
+
+    def _visitor_args_dec(obj):
+        return _apply_v_args(obj, func)
+    return _visitor_args_dec
+
+
+###}
+
+
+# --- Visitor Utilities ---
+
+class CollapseAmbiguities(Transformer):
+    """
+    Transforms a tree that contains any number of _ambig nodes into a list of trees,
+    each one containing an unambiguous tree.
+
+    The length of the resulting list is the product of the length of all _ambig nodes.
+
+    Warning: This may quickly explode for highly ambiguous trees.
+
+    """
+    def _ambig(self, options):
+        return sum(options, [])
+
+    def __default__(self, data, children_lists, meta):
+        return [Tree(data, children, meta) for children in combine_alternatives(children_lists)]
+
+    def __default_token__(self, t):
+        return [t]