diff --git a/documentation/test_grammar_elements_compound_doctest.txt b/documentation/test_grammar_elements_compound_doctest.txt index acc8bb33..1c9c4765 100644 --- a/documentation/test_grammar_elements_compound_doctest.txt +++ b/documentation/test_grammar_elements_compound_doctest.txt @@ -48,3 +48,31 @@ therefore have three elements, even when "there" is not spoken. :: [u'hello', None, u'universe'] >>> tester.recognize("hello galaxy") RecognitionFailure + + +Inline repetition +---------------------------------------------------------------------------- + +Repeated extras can be expressed directly in the compound spec using +open-ended brace ranges. Repeated extras are returned as lists when used +from a value function. :: + + >>> element = Compound( + ... "test {1,}", + ... extras=[Choice("food", {"apple": "good", "burger": "bad"})], + ... value_func=lambda node, extras: extras["food"], + ... ) + >>> tester = ElementTester(element) + >>> tester.recognize("test apple burger") + ['good', 'bad'] + +Zero-or-more repetition returns an empty list when nothing is spoken. :: + + >>> element = Compound( + ... "test {0,}", + ... extras=[Choice("food", {"apple": "good", "burger": "bad"})], + ... value_func=lambda node, extras: extras["food"], + ... ) + >>> tester = ElementTester(element) + >>> tester.recognize("test") + [] diff --git a/dragonfly/engines/backend_kaldi/compiler.py b/dragonfly/engines/backend_kaldi/compiler.py index 7118511c..566d3cbf 100644 --- a/dragonfly/engines/backend_kaldi/compiler.py +++ b/dragonfly/engines/backend_kaldi/compiler.py @@ -229,6 +229,31 @@ def compile_element(self, element, *args, **kwargs): def _compile_sequence(self, element, src_state, dst_state, grammar, kaldi_rule, fst): src_state = self.add_weight_linkage(src_state, dst_state, self.get_weight(element), fst) children = element.children + is_repetition = isinstance(element, elements_.Repetition) + if is_repetition and element.unbounded: + current_src_state = src_state + for unused_index in range(max(element.min - 1, 0)): + next_state = fst.add_state() + self.compile_element(element._child, current_src_state, + next_state, grammar, kaldi_rule, fst) + current_src_state = next_state + + loop_src_state = fst.add_state() + loop_dst_state = fst.add_state() + if element.min == 0: + fst.add_arc(current_src_state, dst_state, None) + fst.add_arc(current_src_state, loop_src_state, None) + self.compile_element(element._child, loop_src_state, loop_dst_state, + grammar, kaldi_rule, fst) + if not fst.has_eps_path(loop_src_state, loop_dst_state, + self._eps_like_nonterms): + fst.add_arc(loop_dst_state, loop_src_state, fst.eps_disambig, + fst.eps) + fst.add_arc(loop_dst_state, dst_state, None) + return + raise CompilerError("Cannot compile unbounded repetition whose " + "child can match empty") + # Optimize for special lengths if len(children) == 0: fst.add_arc(src_state, dst_state, None) @@ -239,7 +264,6 @@ def _compile_sequence(self, element, src_state, dst_state, grammar, kaldi_rule, else: # len(children) >= 2: # Handle Repetition elements differently as a special case - is_repetition = isinstance(element, elements_.Repetition) if is_repetition and element.optimize: # Repetition... # Insert new states, so back arc only affects child diff --git a/dragonfly/engines/backend_natlink/compiler.py b/dragonfly/engines/backend_natlink/compiler.py index bb4b4103..e2fbdd69 100644 --- a/dragonfly/engines/backend_natlink/compiler.py +++ b/dragonfly/engines/backend_natlink/compiler.py @@ -64,11 +64,34 @@ def _compile_rule(self, rule, compiler): #----------------------------------------------------------------------- # Methods for compiling elements. + def _compile_unbounded_repetition(self, element, compiler): + if element.min > 1: + compiler.start_sequence() + for unused_index in range(element.min - 1): + self.compile_element(element._child, compiler) + + if element.min == 0: + compiler.start_optional() + + compiler.start_repetition() + self.compile_element(element._child, compiler) + compiler.end_repetition() + + if element.min == 0: + compiler.end_optional() + + if element.min > 1: + compiler.end_sequence() + def _compile_sequence(self, element, compiler): children = element.children + is_rep = isinstance(element, elements_.Repetition) + if is_rep and element.unbounded: + self._compile_unbounded_repetition(element, compiler) + return + if len(children) > 1: # Compile Sequence and Repetition elements differently. - is_rep = isinstance(element, elements_.Repetition) if is_rep and element.optimize: compiler.start_repetition() self.compile_element(children[0], compiler) diff --git a/dragonfly/engines/backend_sapi5/compiler.py b/dragonfly/engines/backend_sapi5/compiler.py index 5c3fe4fe..94dad030 100644 --- a/dragonfly/engines/backend_sapi5/compiler.py +++ b/dragonfly/engines/backend_sapi5/compiler.py @@ -31,7 +31,7 @@ from dragonfly.engines.base import CompilerBase, CompilerError from dragonfly.grammar.rule_base import Rule -from dragonfly.grammar.elements_basic import Literal +from dragonfly.grammar.elements_basic import Literal, Repetition #--------------------------------------------------------------------------- @@ -135,8 +135,31 @@ def _compile_rule(self, rule, grammar, grammar_handle): #----------------------------------------------------------------------- # Methods for compiling elements. + @trace_compile + def _compile_unbounded_repetition(self, element, src_state, dst_state, + grammar, grammar_handle): + current_src_state = src_state + for unused_index in range(max(element.min - 1, 0)): + next_state = current_src_state.Rule.AddState() + self.compile_element(element._child, current_src_state, next_state, + grammar, grammar_handle) + current_src_state = next_state + + loop_dst_state = current_src_state.Rule.AddState() + if element.min == 0: + current_src_state.AddWordTransition(dst_state, '') + self.compile_element(element._child, current_src_state, loop_dst_state, + grammar, grammar_handle) + loop_dst_state.AddWordTransition(current_src_state, '') + loop_dst_state.AddWordTransition(dst_state, '') + @trace_compile def _compile_sequence(self, element, src_state, dst_state, grammar, grammar_handle): + if isinstance(element, Repetition) and element.unbounded: + self._compile_unbounded_repetition(element, src_state, dst_state, + grammar, grammar_handle) + return + states = [src_state.Rule.AddState() for i in range(len(element.children)-1)] states.insert(0, src_state) states.append(dst_state) diff --git a/dragonfly/engines/backend_sphinx/compiler.py b/dragonfly/engines/backend_sphinx/compiler.py index c87a2767..58739588 100644 --- a/dragonfly/engines/backend_sphinx/compiler.py +++ b/dragonfly/engines/backend_sphinx/compiler.py @@ -148,7 +148,30 @@ def compile_element(self, element, *args, **kwargs): " for element type %s." % (self, element)) + def _compile_unbounded_repetition(self, element, repeat_class, *args, + **kwargs): + repeated_child = repeat_class( + self.compile_element(element._child, *args, **kwargs) + ) + + if element.min == 0: + return jsgf.OptionalGrouping(repeated_child) + if element.min == 1: + return repeated_child + + children = [ + self.compile_element(element._child, *args, **kwargs) + for unused_index in range(element.min - 1) + ] + children.append(repeated_child) + return jsgf.Sequence(*children) + def _compile_repetition(self, element, *args, **kwargs): + if element.unbounded: + return self._compile_unbounded_repetition( + element, jsgf.Repeat, *args, **kwargs + ) + # Compile the first element only; pyjsgf doesn't support limits on # repetition (yet). children = element.children @@ -249,6 +272,11 @@ def __init__(self, engine): # Methods for compiling elements. def _compile_repetition(self, element, *args, **kwargs): + if element.unbounded: + return self._compile_unbounded_repetition( + element, PatchedRepeat, *args, **kwargs + ) + # Compile the first element only; pyjsgf doesn't support limits on # repetition (yet). children = element.children diff --git a/dragonfly/grammar/elements_basic.py b/dragonfly/grammar/elements_basic.py index bd85fee7..a20ffb24 100644 --- a/dragonfly/grammar/elements_basic.py +++ b/dragonfly/grammar/elements_basic.py @@ -556,6 +556,51 @@ def value(self, node): return node.children[0].value() +#--------------------------------------------------------------------------- + +def element_matches_empty(element, memo=None): + """ + Return ``True`` if *element* can match an empty recognition. + + This is used to reject unbounded repetitions of empty-matchable + expressions, which would otherwise lead to infinite decode loops. + """ + if memo is None: + memo = set() + + element_id = id(element) + if element_id in memo: + return False + memo.add(element_id) + + try: + if isinstance(element, Empty): + return True + if isinstance(element, Impossible): + return False + if isinstance(element, Optional): + return True + if isinstance(element, Literal): + return len(element.words) == 0 + if isinstance(element, Dictation): + return False + if isinstance(element, ListRef): + return False + if isinstance(element, RuleRef): + return element_matches_empty(element.rule.element, memo) + if isinstance(element, Alternative): + return len(element.children) == 0 or any( + element_matches_empty(child, memo) + for child in element.children + ) + if isinstance(element, Sequence): + return all(element_matches_empty(child, memo) + for child in element.children) + return False + finally: + memo.remove(element_id) + + #--------------------------------------------------------------------------- class Repetition(Sequence): @@ -572,6 +617,8 @@ class Repetition(Sequence): the maximum number of times that the child element must be recognized (exclusive!); if *None*, the child element must be recognized exactly *min* times (i.e. *max = min + 1*) + - *unbounded* (*bool*, default: *False*) -- + whether the child element may repeat without an upper bound - *name* (*str*, default: *None*) -- the name of this element - *default* (*object*, default: *None*) -- @@ -604,34 +651,47 @@ class Repetition(Sequence): # pylint: disable=redefined-builtin,unused-variable def __init__(self, child, min=1, max=None, name=None, default=None, - optimize=True): + optimize=True, unbounded=False): if not isinstance(child, ElementBase): raise TypeError("Child of %s object must be an" " ElementBase instance." % self) assert isinstance(min, six.integer_types) assert max is None or isinstance(max, six.integer_types) + assert isinstance(unbounded, bool) assert max is None or min < max, "min must be less than max" + if unbounded and max is not None: + raise ValueError("unbounded repetitions cannot define max") + if unbounded and element_matches_empty(child): + raise ValueError("unbounded repetition child cannot match empty") self._child = child self._min = min - if max is None: self._max = min + 1 - else: self._max = max + self._unbounded = unbounded + if unbounded: + self._max = None + elif max is None: + self._max = min + 1 + else: + self._max = max self._optimize = optimize - optional_length = self._max - self._min - 1 - if optional_length > 0: - element = Optional(child) - for index in range(optional_length-1): - element = Optional(Sequence([child, element])) - - if self._min >= 1: - children = [child] * self._min + [element] - else: - children = [element] - elif self._min > 0: - children = [child] * self._min + if self._unbounded: + children = [child] else: - raise ValueError("Repetition not allowed to be empty.") + optional_length = self._max - self._min - 1 + if optional_length > 0: + element = Optional(child) + for index in range(optional_length-1): + element = Optional(Sequence([child, element])) + + if self._min >= 1: + children = [child] * self._min + [element] + else: + children = [element] + elif self._min > 0: + children = [child] * self._min + else: + raise ValueError("Repetition not allowed to be empty.") Sequence.__init__(self, children, name=name, default=default) @@ -655,6 +715,12 @@ def __init__(self, child, min=1, max=None, name=None, default=None, "optimally. (Read-only)" ) + unbounded = property( + lambda self: self._unbounded, + doc="Whether the child may repeat without an upper bound. " + "(Read-only)" + ) + def dependencies(self, memo): if self._id in memo: return [] @@ -664,6 +730,58 @@ def dependencies(self, memo): #----------------------------------------------------------------------- # Methods for runtime recognition processing. + def decode(self, state): + state.decode_attempt(self) + + max_count = self._max + min_count = self._min + child = self._child + unbounded = self._unbounded + + class _Frame(object): + __slots__ = ("count", "child_results", "can_accept", + "accept_yielded") + + def __init__(self, count): + self.count = count + if unbounded: + self.child_results = child.decode(state) + elif count < max_count - 1: + self.child_results = child.decode(state) + else: + self.child_results = None + + self.can_accept = ( + count >= min_count + and (unbounded or count < max_count) + ) + self.accept_yielded = False + + stack = [_Frame(0)] + while stack: + frame = stack[-1] + + if frame.child_results is not None: + try: + next(frame.child_results) + except StopIteration: + frame.child_results = None + else: + # Prefer longer matches first to preserve the historical + # greedy behavior of bounded repetitions. + stack.append(_Frame(frame.count + 1)) + continue + + if frame.can_accept and not frame.accept_yielded: + frame.accept_yielded = True + state.decode_success(self) + yield state + state.decode_retry(self) + continue + + stack.pop() + state.decode_failure(self) + def get_repetitions(self, node): """ Returns a list containing the nodes associated with @@ -677,31 +795,11 @@ def get_repetitions(self, node): """ repetitions = [] - for index in range(self._min): - element = node.children[index] - if element.actor != self._child: - raise TypeError("Invalid child of %s: %s" \ - % (self, element.actor)) - repetitions.append(element) - - if self._max - self._min > 1: - optional = node.children[-1] - while optional.children: - child = optional.children[0] - if isinstance(child.actor, Sequence): - assert len(child.children) == 2 - element, optional = child.children - if element.actor != self._child: - raise TypeError("Invalid child of %s: %s" \ - % (self, element.actor)) - repetitions.append(element) - elif child.actor == self._child: - repetitions.append(child) - break - else: - raise TypeError("Invalid child of %s: %s" \ - % (self, child.actor)) - + for child in node.children: + if child.actor != self._child: + raise TypeError("Invalid child of %s: %s" + % (self, child.actor)) + repetitions.append(child) return repetitions def value(self, node): diff --git a/dragonfly/grammar/elements_compound.py b/dragonfly/grammar/elements_compound.py index 705888c2..a86eaef8 100644 --- a/dragonfly/grammar/elements_compound.py +++ b/dragonfly/grammar/elements_compound.py @@ -60,6 +60,102 @@ #--------------------------------------------------------------------------- # The Compound class. +def _collect_extra_name_shapes_from_element(element, memo=None, + repeated=False): + from dragonfly.grammar.elements_basic import Repetition + + if memo is None: + memo = set() + + memo_key = (id(element), repeated) + if memo_key in memo: + return set(), set() + memo.add(memo_key) + + if repeated and element.name: + return set([element.name]), set() + if element.name: + return set(), set([element.name]) + + repeated_here = repeated or isinstance(element, Repetition) + repeated_names = set() + scalar_names = set() + for child in element.children: + child_repeated, child_scalar = _collect_extra_name_shapes_from_element( + child, memo, repeated_here + ) + repeated_names.update(child_repeated) + scalar_names.update(child_scalar) + return repeated_names, scalar_names + + +def _collect_repeated_extra_names_from_node(node, repeated=False): + from dragonfly.grammar.elements_basic import Repetition + + names = set() + if repeated and node.name: + names.add(node.name) + if node.name: + return names + + repeated_here = repeated or isinstance(node.actor, Repetition) + if isinstance(node.actor, Repetition) or not node.children: + memo = set() + inferred_repeated_names = set() + inferred_scalar_names = set() + for child in node.actor.children: + child_repeated, child_scalar = _collect_extra_name_shapes_from_element( + child, memo, repeated_here + ) + inferred_repeated_names.update(child_repeated) + inferred_scalar_names.update(child_scalar) + names.update(inferred_repeated_names - inferred_scalar_names) + + for child in node.children: + names.update(_collect_repeated_extra_names_from_node(child, + repeated_here)) + return names + + +def _normalize_repeated_extra_value(value): + if isinstance(value, list): + return value + if isinstance(value, tuple): + return list(value) + return [value] + + +def extract_compound_extras(node, extras, defaults=None, repeated_extra_names=None): + values = {} + if defaults: + values.update(defaults) + + if repeated_extra_names is None: + repeated_extra_names = _collect_repeated_extra_names_from_node(node) + else: + repeated_extra_names = set(repeated_extra_names) + + for name, element in extras.items(): + if name in repeated_extra_names: + extra_nodes = node.get_children_by_name(name, shallow=True) + if extra_nodes: + values[name] = [extra_node.value() for extra_node in extra_nodes] + elif element.has_default(): + values[name] = _normalize_repeated_extra_value(element.default) + elif name in values: + values[name] = _normalize_repeated_extra_value(values[name]) + else: + values[name] = [] + continue + + extra_node = node.get_child_by_name(name, shallow=True) + if extra_node: + values[name] = extra_node.value() + elif element.has_default(): + values[name] = element.default + + return values + class Compound(Alternative): """ Element which parses a string spec to create a hierarchy of basic @@ -177,12 +273,15 @@ def value(self, node): if self._value_func is not None: # Prepare *extras* dict for passing to value_func(). extras = {"_node": node} - for name, element in self._extras.items(): - extra_node = node.get_child_by_name(name, shallow=True) - if extra_node: - extras[name] = extra_node.value() - elif element.has_default(): - extras[name] = element.default + repeated_extra_names = None + if self.name and len(node.children) == 1: + repeated_extra_names = _collect_repeated_extra_names_from_node( + node.children[0] + ) + extras.update(extract_compound_extras( + node, self._extras, + repeated_extra_names=repeated_extra_names, + )) try: value = self._value_func(node, extras) except Exception as e: diff --git a/dragonfly/grammar/rule_compound.py b/dragonfly/grammar/rule_compound.py index 9d3535b0..c7de62f2 100644 --- a/dragonfly/grammar/rule_compound.py +++ b/dragonfly/grammar/rule_compound.py @@ -68,6 +68,7 @@ def _process_recognition(self, node, extras): from dragonfly.grammar.rule_base import Rule from dragonfly.grammar.elements import ElementBase, Compound +from dragonfly.grammar.elements_compound import extract_compound_extras #--------------------------------------------------------------------------- @@ -149,13 +150,9 @@ def process_recognition(self, node): "_rule": self, "_node": node, } - extras.update(self._defaults) - for name, element in self._extras.items(): - extra_node = node.get_child_by_name(name, shallow=True) - if extra_node: - extras[name] = extra_node.value() - elif element.has_default(): - extras[name] = element.default + extras.update(extract_compound_extras( + node.children[0], self._extras, defaults=self._defaults, + )) # Call the method to do the actual processing. self._process_recognition(node, extras) diff --git a/dragonfly/grammar/rule_mapping.py b/dragonfly/grammar/rule_mapping.py index a6a98cd4..0d6a9df6 100644 --- a/dragonfly/grammar/rule_mapping.py +++ b/dragonfly/grammar/rule_mapping.py @@ -79,6 +79,7 @@ class ExampleRule(MappingRule): from dragonfly.grammar.rule_base import Rule from dragonfly.grammar.elements import ElementBase, Compound, Alternative +from dragonfly.grammar.elements_compound import extract_compound_extras from dragonfly.actions.actions import ActionBase @@ -164,23 +165,20 @@ def specs(self): return [k for k, _ in self._mapping.items()] def value(self, node): - node = node.children[0] - value = node.value() + alternative_node = node.children[0] + value = alternative_node.value() + match_node = alternative_node.children[0] if hasattr(value, "copy_bind"): # Prepare *extras* dict for passing to _copy_bind(). extras = { "_grammar": self.grammar, "_rule": self, - "_node": node, + "_node": alternative_node, } - extras.update(self._defaults) - for name, element in self._extras.items(): - extra_node = node.get_child_by_name(name, shallow=True) - if extra_node: - extras[name] = extra_node.value() - elif element.has_default(): - extras[name] = element.default + extras.update(extract_compound_extras( + match_node, self._extras, defaults=self._defaults, + )) value = value.copy_bind(extras) @@ -205,13 +203,10 @@ def process_recognition(self, node): "_rule": self, "_node": node, } - extras.update(self._defaults) - for name, element in self._extras.items(): - extra_node = node.get_child_by_name(name, shallow=True) - if extra_node: - extras[name] = extra_node.value() - elif element.has_default(): - extras[name] = element.default + match_node = node.children[0].children[0] + extras.update(extract_compound_extras( + match_node, self._extras, defaults=self._defaults, + )) # Call the method to do the actual processing. self._process_recognition(item_value, extras) diff --git a/dragonfly/parsing/parse.py b/dragonfly/parsing/parse.py index f05075a8..6fa4254c 100644 --- a/dragonfly/parsing/parse.py +++ b/dragonfly/parsing/parse.py @@ -1,37 +1,60 @@ +import copy +import re + from lark import Lark, Transformer from dragonfly.grammar.elements_basic import (Literal, Optional, Sequence, - Alternative, Empty) + Alternative, Repetition, + id_generator) grammar_string = r""" ?start: alternative // ? means that the rule will be inlined iff there is a single child ?alternative: sequence ("|" sequence)* -?sequence: single* - | sequence "{" WORD "}" -> special +sequence: single* special* + +?single: atom REPEAT_RANGE -> quantified + | atom + +?atom: WORD -> word + | "<" WORD ">" -> reference + | "[" alternative "]" -> optional + | "(" alternative ")" -> grouped -?single: WORD+ -> literal - | "<" WORD ">" -> reference - | "[" alternative "]" -> optional - | "(" alternative ")" +special: SPECIAL -> special_specifier // Match anything which is not whitespace or a control character, // we will let the engine handle invalid words WORD: /[^\s\[\]<>|(){}]+/ +REPEAT_RANGE.2: /\{\d+(?:,\d*)?\}/ +SPECIAL.1: /\{(?=[^\s\[\]<>|(){}]*[^\s\[\]<>|(){}\d,])[^\s\[\]<>|(){}]+\}/ %import common.WS_INLINE %ignore WS_INLINE """ -spec_parser = Lark( +_spec_parser = Lark( grammar_string, - parser="lalr" + parser="lalr", + lexer="contextual", ) +spec_parser = _spec_parser class ParseError(Exception): pass + +class _LiteralWord(str): + pass + + +def _copy_element_for_specials(element): + cloned = copy.copy(element) + cloned._id = next(id_generator) + return cloned + + class CompoundTransformer(Transformer): """ Visits each node of the parse tree starting with the leaves @@ -44,15 +67,54 @@ def __init__(self, extras=None, *args, **kwargs): Transformer.__init__(self, *args, **kwargs) def optional(self, args): - return Optional(args[0]) + return Optional(self._coerce_element(args[0])) - def literal(self, args): - return Literal(" ".join(args)) + def grouped(self, args): + return self._coerce_element(args[0]) + + def word(self, args): + return _LiteralWord(str(args[0])) def sequence(self, args): - return Sequence(args) + children = [] + literal_words = [] + specifiers = [] + + def flush_literal_words(): + if literal_words: + children.append(Literal(" ".join(literal_words))) + del literal_words[:] + + for arg in args: + if isinstance(arg, _LiteralWord): + literal_words.append(arg) + elif isinstance(arg, tuple) and arg and arg[0] == "special": + flush_literal_words() + specifiers.append(arg[1]) + else: + flush_literal_words() + children.append(self._coerce_element(arg)) + + flush_literal_words() + + if not children: + element = Sequence([]) + elif len(children) == 1: + element = children[0] + else: + element = Sequence(children) + + if specifiers and len(children) == 1: + element = _copy_element_for_specials(element) + + for specifier in specifiers: + element = self._apply_special(element, specifier) + return element def alternative(self, args): + args = [self._coerce_element(arg) for arg in args] + if len(args) == 1: + return args[0] return Alternative(args) def reference(self, args): @@ -62,8 +124,43 @@ def reference(self, args): except KeyError: raise Exception("Unknown reference name %r" % (str(ref))) - def special(self, args): - child, specifier = args + def brace_repeat_quantifier(self, args): + text = str(args[0])[1:-1] + if text.endswith(","): + return ("unbounded", int(text[:-1])) + if "," in text: + minimum, maximum = text.split(",", 1) + minimum = int(minimum) + maximum = int(maximum) + if minimum > maximum: + raise ParseError("Invalid repetition range {%d,%d}" % + (minimum, maximum)) + return ("range", minimum, maximum) + return ("exact", int(text)) + + def special_specifier(self, args): + return ("special", str(args[0])[1:-1]) + + def quantified(self, args): + return self._make_repetition( + self._coerce_element(args[0]), + self.brace_repeat_quantifier([args[1]]), + ) + + def _make_repetition(self, child, quantifier): + kind = quantifier[0] + if kind == "unbounded": + return Repetition(child, min=quantifier[1], unbounded=True) + if kind == "exact": + return Repetition(child, min=quantifier[1], max=None) + if kind == "range": + return Repetition(child, min=quantifier[1], max=quantifier[2] + 1) + raise ParseError("Unknown repetition quantifier %r" % (quantifier,)) + + def _apply_special(self, child, specifier): + if re.match(r"^\d+(,\d*)?$", specifier): + raise ParseError("Numeric brace bodies must be attached directly " + "to a single item for repetition") if '=' in specifier: name, value = specifier.split('=') @@ -90,3 +187,8 @@ def special(self, args): specifier) return child + + def _coerce_element(self, value): + if isinstance(value, _LiteralWord): + return Literal(str(value)) + return value diff --git a/dragonfly/test/suites.py b/dragonfly/test/suites.py index d19519a6..71b7019e 100644 --- a/dragonfly/test/suites.py +++ b/dragonfly/test/suites.py @@ -109,11 +109,13 @@ # Define doctests for each engine. engine_tests_dict = { "sapi5": [ + "test_compiler_sapi5", "test_engine_sapi5", "test_language_en_number", ] + common_names, "sphinx": [ + "test_compiler_sphinx", "test_engine_sphinx", "test_language_en_number", "test_dictation", diff --git a/dragonfly/test/test_compiler_kaldi.py b/dragonfly/test/test_compiler_kaldi.py new file mode 100644 index 00000000..d036bc03 --- /dev/null +++ b/dragonfly/test/test_compiler_kaldi.py @@ -0,0 +1,86 @@ +import unittest + +from dragonfly import Literal, Repetition +from dragonfly.engines.backend_kaldi.compiler import KaldiCompiler + + +class _DummyFst(object): + + eps = "" + eps_disambig = "#0" + + def __init__(self): + self.next_state = 2 + self.arcs = [] + + def add_state(self, initial=False, final=False): + state = self.next_state + self.next_state += 1 + return state + + def add_arc(self, src, dst, label, output=None, weight=None): + self.arcs.append((src, dst, label, output, weight)) + + def has_eps_path(self, src, dst, eps_like_nonterms): + return False + + +class _DummyKaldiCompiler(object): + + _eps_like_nonterms = frozenset() + + def add_weight_linkage(self, src, dst, weight, fst): + return src + + def get_weight(self, element): + return 1 + + def compile_element(self, element, src, dst, grammar, kaldi_rule, fst): + if isinstance(element, Literal): + fst.add_arc(src, dst, tuple(element.words), None, None) + return + return KaldiCompiler._compile_sequence( + self, element, src, dst, grammar, kaldi_rule, fst, + ) + + +class TestCompilerKaldi(unittest.TestCase): + + def test_unbounded_repetition_optimize_false_zero_or_more_has_skip_arc(self): + fst = _DummyFst() + compiler = _DummyKaldiCompiler() + + KaldiCompiler._compile_sequence( + compiler, + Repetition(Literal("hello"), min=0, unbounded=True, + optimize=False), + 0, 1, None, None, fst, + ) + + assert fst.arcs == [ + (0, 1, None, None, None), + (0, 2, None, None, None), + (2, 3, ("hello",), None, None), + (3, 2, "#0", "", None), + (3, 1, None, None, None), + ] + + def test_unbounded_repetition_optimize_false_preserves_minimum(self): + fst = _DummyFst() + compiler = _DummyKaldiCompiler() + + KaldiCompiler._compile_sequence( + compiler, + Repetition(Literal("hello"), min=3, unbounded=True, + optimize=False), + 0, 1, None, None, fst, + ) + + assert fst.arcs == [ + (0, 2, ("hello",), None, None), + (2, 3, ("hello",), None, None), + (3, 4, None, None, None), + (4, 5, ("hello",), None, None), + (5, 4, "#0", "", None), + (5, 1, None, None, None), + ] diff --git a/dragonfly/test/test_compiler_natlink.py b/dragonfly/test/test_compiler_natlink.py index cce5b6c0..fefbc412 100644 --- a/dragonfly/test/test_compiler_natlink.py +++ b/dragonfly/test/test_compiler_natlink.py @@ -22,11 +22,91 @@ from six import PY2 -from dragonfly import CompoundRule, Choice, Grammar +from dragonfly import CompoundRule, Choice, Grammar, Literal, Repetition class TestCompilerNatlink(unittest.TestCase): + def test_zero_or_more_repetition_optimize_false_is_optional_repetition(self): + from dragonfly.engines.backend_natlink.compiler import NatlinkCompiler + + class RecordingCompiler(object): + + def __init__(self): + self.events = [] + + def start_optional(self): + self.events.append("start_optional") + + def end_optional(self): + self.events.append("end_optional") + + def start_repetition(self): + self.events.append("start_repetition") + + def end_repetition(self): + self.events.append("end_repetition") + + def add_word(self, word): + self.events.append(("word", word)) + + compiler = NatlinkCompiler() + recorder = RecordingCompiler() + compiler.compile_element( + Repetition(Literal("hello"), min=0, unbounded=True, + optimize=False), + recorder, + ) + + assert recorder.events == [ + "start_optional", + "start_repetition", + ("word", "hello"), + "end_repetition", + "end_optional", + ] + + def test_unbounded_repetition_optimize_false_preserves_minimum(self): + from dragonfly.engines.backend_natlink.compiler import NatlinkCompiler + + class RecordingCompiler(object): + + def __init__(self): + self.events = [] + + def start_sequence(self): + self.events.append("start_sequence") + + def end_sequence(self): + self.events.append("end_sequence") + + def start_repetition(self): + self.events.append("start_repetition") + + def end_repetition(self): + self.events.append("end_repetition") + + def add_word(self, word): + self.events.append(("word", word)) + + compiler = NatlinkCompiler() + recorder = RecordingCompiler() + compiler.compile_element( + Repetition(Literal("hello"), min=3, unbounded=True, + optimize=False), + recorder, + ) + + assert recorder.events == [ + "start_sequence", + ("word", "hello"), + ("word", "hello"), + "start_repetition", + ("word", "hello"), + "end_repetition", + "end_sequence", + ] + def test_natlink_compiler(self): from dragonfly.engines.backend_natlink.compiler import NatlinkCompiler extras = [ diff --git a/dragonfly/test/test_compiler_sapi5.py b/dragonfly/test/test_compiler_sapi5.py new file mode 100644 index 00000000..ec9ea2f0 --- /dev/null +++ b/dragonfly/test/test_compiler_sapi5.py @@ -0,0 +1,110 @@ +import sys +import types +import unittest + +from dragonfly import Literal, Repetition + +try: + from dragonfly.engines.backend_sapi5.compiler import Sapi5Compiler +except ImportError: + sys.modules.pop("dragonfly.engines.backend_sapi5.compiler", None) + client = types.ModuleType("win32com.client") + client.constants = type( + "Constants", (), {"SRATopLevel": 1, "SRADynamic": 2} + )() + win32com = types.ModuleType("win32com") + win32com.client = client + sys.modules.setdefault("win32com", win32com) + sys.modules["win32com.client"] = client + from dragonfly.engines.backend_sapi5.compiler import Sapi5Compiler + + +class _FakeState(object): + + def __init__(self, rule): + self.Rule = rule + self.word_transitions = [] + + def AddWordTransition(self, dst_state, text): + self.word_transitions.append((dst_state, text)) + + def AddRuleTransition(self, dst_state, rule_handle): + raise AssertionError("Unexpected rule transition during test") + + def AddSpecialTransition(self, dst_state, special): + raise AssertionError("Unexpected special transition during test") + + +class _FakeRule(object): + + def __init__(self): + self.states = [] + + def AddState(self): + state = _FakeState(self) + self.states.append(state) + return state + + +class TestCompilerSapi5(unittest.TestCase): + + @staticmethod + def _serialize_transitions(state, labels): + return [(labels[dst_state], text) for dst_state, text + in state.word_transitions] + + def test_zero_or_more_unbounded_repetition_stays_optional(self): + compiler = Sapi5Compiler() + rule = _FakeRule() + src = _FakeState(rule) + dst = _FakeState(rule) + + compiler.compile_element( + Repetition(Literal("hello"), min=0, unbounded=True, + optimize=False), + src, dst, None, None, + ) + + labels = {src: "src", dst: "dst", rule.states[0]: "s0"} + self.assertEqual( + self._serialize_transitions(src, labels), + [("dst", ""), ("s0", "hello")], + ) + self.assertEqual( + self._serialize_transitions(rule.states[0], labels), + [("src", ""), ("dst", "")], + ) + + def test_unbounded_repetition_preserves_minimum(self): + compiler = Sapi5Compiler() + rule = _FakeRule() + src = _FakeState(rule) + dst = _FakeState(rule) + + compiler.compile_element( + Repetition(Literal("hello"), min=3, unbounded=True, + optimize=False), + src, dst, None, None, + ) + + labels = {src: "src", dst: "dst"} + labels.update( + {state: "s%d" % index for index, state in enumerate(rule.states)} + ) + + self.assertEqual( + self._serialize_transitions(src, labels), + [("s0", "hello")], + ) + self.assertEqual( + self._serialize_transitions(rule.states[0], labels), + [("s1", "hello")], + ) + self.assertEqual( + self._serialize_transitions(rule.states[1], labels), + [("s2", "hello")], + ) + self.assertEqual( + self._serialize_transitions(rule.states[2], labels), + [("s1", ""), ("dst", "")], + ) diff --git a/dragonfly/test/test_compiler_sphinx.py b/dragonfly/test/test_compiler_sphinx.py new file mode 100644 index 00000000..3db6203b --- /dev/null +++ b/dragonfly/test/test_compiler_sphinx.py @@ -0,0 +1,45 @@ +import unittest + +from dragonfly import Literal, Repetition +from dragonfly.engines.backend_sphinx.compiler import ( + JSGFCompiler, + SphinxJSGFCompiler, +) + + +class _DummyEngine(object): + + language = "en" + + @staticmethod + def check_valid_word(word): + return True + + +class TestCompilerSphinx(unittest.TestCase): + + def test_zero_or_more_unbounded_repetition_stays_optional(self): + element = Repetition( + Literal("hello"), min=0, unbounded=True, optimize=False, + ) + + expansion = JSGFCompiler().compile_element(element, None, set()) + self.assertEqual(expansion.compile(), "[(hello)+]") + + expansion = SphinxJSGFCompiler(_DummyEngine()).compile_element( + element, None, set(), + ) + self.assertEqual(expansion.compile(), "[(hello)[hello]*]") + + def test_unbounded_repetition_preserves_minimum(self): + element = Repetition( + Literal("hello"), min=3, unbounded=True, optimize=False, + ) + + expansion = JSGFCompiler().compile_element(element, None, set()) + self.assertEqual(expansion.compile(), "hello hello (hello)+") + + expansion = SphinxJSGFCompiler(_DummyEngine()).compile_element( + element, None, set(), + ) + self.assertEqual(expansion.compile(), "hello hello (hello)[hello]*") diff --git a/dragonfly/test/test_compound_rule_repetition.py b/dragonfly/test/test_compound_rule_repetition.py new file mode 100644 index 00000000..17ca56bc --- /dev/null +++ b/dragonfly/test/test_compound_rule_repetition.py @@ -0,0 +1,325 @@ +import unittest + +from dragonfly import (Choice, Compound, CompoundRule, Function, Literal, + MappingRule, Optional, Repetition, Sequence) +from dragonfly.engines import get_engine +from dragonfly.parsing.parse import ParseError +from dragonfly.test import ElementTester +from dragonfly.test.rule_test_grammar import RuleTestGrammar + + +class CompoundInlineRepetitionElementTests(unittest.TestCase): + + def setUp(self): + self.engine = get_engine("text") + + def test_compound_value_func_returns_repeated_extra_list(self): + element = Compound( + "test {1,}", + extras=[Choice("word", {"alpha": "A", "bravo": "B"})], + value_func=lambda node, extras: extras["word"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("test alpha bravo"), ["A", "B"]) + + def test_compound_value_func_returns_empty_list_for_star(self): + element = Compound( + "test {0,}", + extras=[Choice("word", {"alpha": "A", "bravo": "B"})], + value_func=lambda node, extras: extras["word"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("test"), []) + + def test_compound_value_func_wraps_omitted_repeated_extra_default(self): + element = Compound( + "test {0,}", + extras=[Choice("word", {"alpha": "A"}, default="fallback")], + value_func=lambda node, extras: extras["word"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("test"), ["fallback"]) + + def test_compound_value_func_collects_reused_repeated_extra_values(self): + element = Compound( + "test {1,}", + extras=[Choice("word", { + "alpha": "A", + "bravo": "B", + "charlie": "C", + })], + value_func=lambda node, extras: extras["word"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual( + tester.recognize("test alpha bravo charlie"), + ["A", "B", "C"], + ) + + def test_named_compound_value_func_collects_repeated_extra_values(self): + element = Compound( + "test {1,}", + name="phrase", + extras=[Choice("word", {"alpha": "A", "bravo": "B"})], + value_func=lambda node, extras: extras["word"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("test alpha bravo"), ["A", "B"]) + + def test_compound_value_func_uses_matched_branch_repetition_shape(self): + element = Compound( + "(single ) | (many {1,})", + extras=[Choice("item", {"alpha": "A", "bravo": "B"})], + value_func=lambda node, extras: extras["item"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("single alpha"), "A") + self.assertEqual(tester.recognize("many alpha bravo"), ["A", "B"]) + + def test_compound_value_func_keeps_repeated_shape_inside_omitted_wrapper(self): + element = Compound( + "test [prefix {0,}]", + extras=[Choice("item", {"alpha": "A", "bravo": "B"})], + value_func=lambda node, extras: extras["item"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("test"), []) + + def test_compound_value_func_omitted_mixed_optional_branch_stays_scalar(self): + element = Compound( + "test [single | many {1,}]", + extras=[Choice("item", {"alpha": "A", "bravo": "B"}, + default="fallback")], + value_func=lambda node, extras: extras["item"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("test"), "fallback") + self.assertEqual(tester.recognize("test single alpha"), "A") + self.assertEqual(tester.recognize("test many alpha bravo"), + ["A", "B"]) + + def test_named_repetition_extra_keeps_historical_shape(self): + element = Compound( + "", + extras=[Repetition(Literal("hello"), min=1, max=3, + name="repetition")], + value_func=lambda node, extras: extras["repetition"], + ) + tester = ElementTester(element, engine=self.engine) + self.assertEqual(tester.recognize("hello hello"), ["hello", "hello"]) + + def test_compound_rejects_unbounded_empty_match(self): + with self.assertRaises(ParseError): + Compound("test [word]{0,}") + + def test_unbounded_repetition_rejects_reused_empty_child(self): + optional = Optional(Literal("word")) + with self.assertRaises(ValueError): + Repetition(Sequence([optional, optional]), min=0, unbounded=True) + + def test_unbounded_repetition_rejects_empty_alternative(self): + with self.assertRaises(ValueError): + Repetition(Choice("word", {}), min=0, unbounded=True) + + def test_unbounded_repetition_rejects_empty_literal(self): + with self.assertRaises(ValueError): + Repetition(Literal(""), min=0, unbounded=True) + + def test_bounded_repetition_remains_greedy(self): + element = Sequence([ + Repetition(Literal("a"), min=1, max=3), + Optional(Literal("a")), + ]) + tester = ElementTester(element, engine=self.engine) + + self.assertEqual(tester.recognize("a a"), [["a", "a"], None]) + + +class CompoundInlineRepetitionRuleTests(unittest.TestCase): + + def setUp(self): + self.engine = get_engine("text") + self.grammar = RuleTestGrammar(engine=self.engine) + + def tearDown(self): + if self.grammar.loaded: + self.grammar.unload() + for rule in self.grammar.rules: + self.grammar.remove_rule(rule) + for lst in self.grammar.lists: + self.grammar.remove_list(lst) + + def test_compound_rule_repeated_choice_returns_list(self): + class TestRule(CompoundRule): + spec = "test {1,}" + extras = [Choice("word", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("test alpha bravo") + self.assertEqual(extras["word"], ["A", "B"]) + + def test_compound_rule_star_returns_empty_list(self): + class TestRule(CompoundRule): + spec = "test {0,}" + extras = [Choice("word", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("test") + self.assertEqual(extras["word"], []) + + def test_compound_rule_uses_matched_branch_repetition_shape(self): + class TestRule(CompoundRule): + spec = "(single ) | (many {1,})" + extras = [Choice("item", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + self.assertEqual(self.grammar.recognize_extras("single alpha")["item"], + "A") + self.assertEqual( + self.grammar.recognize_extras("many alpha bravo")["item"], + ["A", "B"], + ) + + def test_compound_rule_star_preserves_default_when_omitted(self): + class TestRule(CompoundRule): + spec = "test {0,}" + extras = [Choice("word", {"alpha": "A", "bravo": "B"})] + defaults = {"word": ["fallback"]} + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("test") + self.assertEqual(extras["word"], ["fallback"]) + + def test_compound_rule_star_prefers_element_default_when_omitted(self): + class TestRule(CompoundRule): + spec = "test {0,}" + extras = [Choice("word", {"alpha": "A"}, default="element")] + defaults = {"word": "rule"} + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("test") + self.assertEqual(extras["word"], ["element"]) + + def test_compound_rule_named_repetition_extra_keeps_historical_shape(self): + class TestRule(CompoundRule): + spec = "" + extras = [Repetition(Literal("hello"), min=1, max=3, + name="repetition")] + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("hello hello") + self.assertEqual(extras["repetition"], ["hello", "hello"]) + + def test_compound_rule_named_repeated_branch_keeps_child_default(self): + items = Repetition( + Choice("item", {"alpha": "A", "bravo": "B"}), + min=1, max=4, name="items", + ) + + class TestRule(CompoundRule): + spec = "(single ) | (many )" + extras = [Choice("item", {"alpha": "A", "bravo": "B"}), items] + defaults = {"item": "fallback"} + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("many alpha bravo") + self.assertEqual(extras["item"], "fallback") + self.assertEqual(extras["items"], ["A", "B"]) + + def test_mapping_rule_function_binding_uses_repeated_list(self): + captured = [] + + class TestRule(MappingRule): + mapping = { + "test {1,}": Function(lambda word: captured.append(word)), + } + extras = [Choice("word", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + self.grammar.recognize("test alpha bravo") + self.assertEqual(captured, [["A", "B"]]) + + def test_mapping_rule_non_repeated_choice_stays_scalar(self): + captured = [] + + class TestRule(MappingRule): + mapping = { + "test ": Function(lambda word: captured.append(word)), + } + extras = [Choice("word", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + self.grammar.recognize("test alpha") + self.assertEqual(captured, ["A"]) + + def test_mapping_rule_uses_matched_branch_repetition_shape(self): + captured = [] + + class TestRule(MappingRule): + mapping = { + "(single ) | (many {1,})": + Function(lambda item: captured.append(item)), + } + extras = [Choice("item", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + self.grammar.recognize("single alpha") + self.grammar.recognize("many alpha bravo") + self.assertEqual(captured, ["A", ["A", "B"]]) + + def test_mapping_rule_keeps_repeated_shape_inside_omitted_wrapper(self): + captured = [] + + class TestRule(MappingRule): + mapping = { + "test [prefix {0,}]": + Function(lambda item: captured.append(item)), + } + extras = [Choice("item", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + self.grammar.recognize("test") + self.assertEqual(captured, [[]]) + + def test_mapping_rule_whole_spec_extra_still_binds(self): + captured = [] + + class TestRule(MappingRule): + mapping = { + "": Function(lambda choice: captured.append(choice)), + } + extras = [Choice("choice", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + self.grammar.recognize("alpha") + self.assertEqual(captured, ["A"]) + + def test_mapping_rule_whole_spec_extra_still_reaches_process_extras(self): + class TestRule(MappingRule): + mapping = { + "": "value", + } + extras = [Choice("choice", {"alpha": "A", "bravo": "B"})] + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("alpha") + self.assertEqual(extras["choice"], "A") + + def test_mapping_rule_whole_spec_repetition_still_reaches_process_extras(self): + class TestRule(MappingRule): + mapping = { + "": "value", + } + extras = [Repetition(Literal("hello"), min=1, max=4, name="rep")] + + self.grammar.add_rule(TestRule()) + extras = self.grammar.recognize_extras("hello hello") + self.assertEqual(extras["rep"], ["hello", "hello"]) + + def test_unbounded_repetition_handles_long_match(self): + words = " ".join(["a"] * 1200) + element = Repetition(Literal("a"), min=1, unbounded=True) + tester = ElementTester(element, engine=self.engine) + + result = tester.recognize(words) + self.assertEqual(len(result), 1200) diff --git a/dragonfly/test/test_lark_parser.py b/dragonfly/test/test_lark_parser.py index d9a9676c..d3d66b19 100644 --- a/dragonfly/test/test_lark_parser.py +++ b/dragonfly/test/test_lark_parser.py @@ -1,10 +1,10 @@ # coding=utf-8 import unittest -import string -from dragonfly.parsing.parse import spec_parser, CompoundTransformer -from dragonfly import Compound, Literal, Sequence, Optional, Empty, Alternative +from dragonfly.parsing.parse import spec_parser, CompoundTransformer, ParseError +from dragonfly import (Compound, Literal, Sequence, Optional, Alternative, + Repetition) # =========================================================================== @@ -30,14 +30,32 @@ def test_parens(self): def test_punctuation(self): check_parse_tree(",", Literal(u",")) + check_parse_tree("*", Literal(u"*")) check_parse_tree("test's ", Literal(u"test's")) check_parse_tree("cul-de-sac ", Literal(u"cul-de-sac")) + check_parse_tree("C++", Literal(u"C++")) def test_sequence(self): check_parse_tree( " test [op]", Sequence([Literal(u"test"), extras["an_extra"], Optional(Literal(u"op"))]), ) + check_parse_tree( + " +", + Sequence([extras["an_extra"], Literal(u"+")]), + ) + check_parse_tree( + " grade +", + Sequence([Literal(u"grade"), extras["an_extra"], Literal(u"+")]), + ) + check_parse_tree( + " (test) *", + Sequence([Literal(u"test"), Literal(u"*")]), + ) + check_parse_tree( + " (test)*", + Sequence([Literal(u"test"), Literal(u"*")]), + ) def test_alternative_no_parens(self): check_parse_tree( @@ -112,6 +130,57 @@ def test_other_special_in_alternative(self): assert output.children[1].test_special == 4 assert getattr(output.children[2], 'test_special', None) == None + def test_special_on_single_reference_does_not_mutate_shared_extra(self): + shared = Alternative([Literal(u"1"), Literal(u"2")]) + output = CompoundTransformer({"shared": shared}).transform( + spec_parser.parse("{weight=0.1}") + ) + + self.assertIsNot(output, shared) + self.assertEqual(output.weight, 0.1) + self.assertIsNone(getattr(shared, "weight", None)) + + def test_one_or_more_repeat(self): + check_parse_tree( + "{1,}", + Repetition(extras["an_extra"], min=1, unbounded=True), + ) + + def test_zero_or_more_repeat(self): + check_parse_tree( + "{0,}", + Repetition(extras["an_extra"], min=0, unbounded=True), + ) + + def test_bounded_repeat(self): + check_parse_tree( + "(test | hello){2,4}", + Repetition( + Alternative([Literal(u"test"), Literal(u"hello")]), + min=2, max=5, + ), + ) + + def test_repeat_after_bare_word_sequence(self): + check_parse_tree( + "test hello{2}", + Sequence([ + Literal(u"test"), + Repetition(Literal(u"hello"), min=2, max=3), + ]), + ) + + def test_empty_sequence_keeps_historical_value_shape(self): + check_parse_tree("", Sequence([])) + check_parse_tree( + "(foo |)", + Alternative([Literal(u"foo"), Sequence([])]), + ) + + def test_invalid_repeat_range(self): + with self.assertRaises(ParseError): + Compound("hello{3,2}") + # ===========================================================================