diff --git a/README.md b/README.md index e2ab53c..3876ef2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc) Graphtage is a command-line utility and [underlying library](https://trailofbits.github.io/graphtage/latest/library.html) -for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, plist, and CSS files. Its name is a +for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, TOML, plist, and CSS files. Its name is a portmanteau of “graph” and “graftage”—the latter being the horticultural practice of joining two trees together such that they grow as one. diff --git a/graphtage/__init__.py b/graphtage/__init__.py index e01d15b..0adb405 100644 --- a/graphtage/__init__.py +++ b/graphtage/__init__.py @@ -9,7 +9,7 @@ ast, bounds, builder, constraints, dataclasses, edits, expressions, fibonacci, formatter, levenshtein, matching, object_set, pickle, printer, pydiff, search, sequences, tree, utils ) -from . import csv, json, xml, yaml, plist +from . import csv, json, plist, toml, xml, yaml import inspect diff --git a/graphtage/__main__.py b/graphtage/__main__.py index 5a2ad9a..24b404f 100644 --- a/graphtage/__main__.py +++ b/graphtage/__main__.py @@ -225,6 +225,8 @@ def printer_type(*pos_args, **kwargs): mimetypes.suffix_map['.yaml'] = '.yml' if '.json5' not in mimetypes.types_map: mimetypes.add_type('application/json5', '.json5') + if '.toml' not in mimetypes.types_map: + mimetypes.add_type('application/toml', '.toml') if '.plist' not in mimetypes.types_map: mimetypes.add_type('application/x-plist', '.plist') if '.pkl' not in mimetypes.types_map and '.pickle' not in mimetypes.types_map: diff --git a/graphtage/toml.py b/graphtage/toml.py new file mode 100644 index 0000000..783421d --- /dev/null +++ b/graphtage/toml.py @@ -0,0 +1,199 @@ +import itertools +import os +from typing import Iterator, Optional, Tuple, Union + +import toml + +from . import json +from .graphtage import BuildOptions, Filetype, KeyValuePairNode, LeafNode, MappingNode, StringFormatter, StringNode +from .printer import Printer +from .sequences import SequenceFormatter +from .tree import GraphtageFormatter, TreeNode + + +def build_tree(path: str, options: Optional[BuildOptions]) -> TreeNode: + with open(path, 'r') as f: + return json.build_tree(toml.load(f), options) + + +class TOMLListFormatter(SequenceFormatter): + """A sub-formatter for TOML lists.""" + is_partial = True + + def __init__(self): + """Initializes the TOML list formatter. + + Equivalent to:: + + super().__init__('[', ']', ',') + + """ + super().__init__('[', ']', ',') + + def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): + pass + + def print_ListNode(self, *args, **kwargs): + """Prints a :class:`graphtage.ListNode`. + + Equivalent to:: + + super().print_SequenceNode(*args, **kwargs) + + """ + super().print_SequenceNode(*args, **kwargs) + + def print_SequenceNode(self, *args, **kwargs): + """Prints a non-List sequence. + + This delegates to the parent formatter's implementation:: + + self.parent.print(*args, **kwargs) + + which should invoke :meth:`TOMLFormatter.print`, thereby delegating to the :class:`TOMLDictFormatter` in + instances where a list contains a dict (the TOML format doesn't allow this, but it might be necessary if + formatting from another format into TOML) + + """ + self.parent.print(*args, **kwargs) + + +def toml_dumps(obj) -> str: + s = toml.dumps({'result': obj}) + expected_prefix = 'result = ' + expected_suffix = '\n' + assert s.startswith(expected_prefix) + assert s.endswith(expected_suffix) + return s[len(expected_prefix):-len(expected_suffix)] + + +class TOMLStringFormatter(StringFormatter): + """A TOML formatter for strings.""" + is_partial = True + + def escape(self, c: str) -> str: + s = toml_dumps(c) + if s.startswith('"') and s.endswith('"'): + return s[1:-1] + else: + return s + + +class TOMLMapping: + def __init__( + self, + mapping: MappingNode, + parent: Optional['TOMLMapping'] = None, + parent_name: Optional[TreeNode] = None + ): + self.mapping: MappingNode = mapping + self.parent: Optional[TOMLMapping] = parent + self.parent_name: Optional[TreeNode] = parent_name + + @property + def name_segments(self) -> Tuple[TreeNode, ...]: + if self.parent is None: + return () + else: + return self.parent.name_segments + (self.parent_name,) + + def items(self) -> Iterator[KeyValuePairNode]: + inserted = () + if self.mapping.edited and self.mapping.inserted: + inserted = self.mapping.inserted + for kvp in itertools.chain(self.mapping, inserted): + if not isinstance(kvp.value, MappingNode): + yield kvp + + def __bool__(self): + try: + next(self.items()) + return True + except StopIteration: + try: + next(self.children()) + return False + except StopIteration: + return True + + def children(self) -> Iterator['TOMLMapping']: + inserted = () + if self.mapping.edited and self.mapping.inserted: + inserted = self.mapping.inserted + for kvp in itertools.chain(self.mapping, inserted): + if isinstance(kvp.value, MappingNode): + yield TOMLMapping(mapping=kvp.value, parent=self, parent_name=kvp.key) + + +class TOMLFormatter(GraphtageFormatter): + sub_format_types = [TOMLListFormatter, TOMLStringFormatter] + + def print(self, printer: Printer, *args, **kwargs): + # TOML has optional indentation; make it only two spaces, if we use it: + printer.indent_str = ' ' + super().print(printer, *args, **kwargs) + + def print_LeafNode(self, printer: Printer, node: LeafNode): + printer.write(toml_dumps(node.object)) + + def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode): + if isinstance(node.key, StringNode): + node.key.quoted = False + self.print(printer, node.key) + printer.write(' = ') + if isinstance(node.value, StringNode): + node.value.quoted = True + self.print(printer, node.value) + printer.newline() + + def print_MappingNode(self, printer: Printer, node: MappingNode): + mappings = [TOMLMapping(node)] + while mappings: + m: TOMLMapping = mappings.pop() + if m: + name = m.name_segments + if name: + printer.write('[') + first = True + for s in name: + if first: + first = False + else: + printer.write('.') + if isinstance(s, StringNode): + s.quoted = False + self.print(printer, s) + printer.write(']') + printer.newline() + for kvp in m.items(): + self.print(printer, kvp) + printer.newline() + mappings.extend(m.children()) + + +class TOML(Filetype): + """The TOML filetype.""" + def __init__(self): + """Initializes the TOML filetype. + + TOML identifies itself with the MIME types `application/toml` and `text/toml`. + + """ + super().__init__( + 'toml', + 'application/toml', + 'text/toml' + ) + + def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: + """Equivalent to :func:`build_tree`""" + return build_tree(path, options=options) + + def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: + try: + return self.build_tree(path=path, options=options) + except (IndexError, TypeError, ValueError) as e: + return f'Error parsing {os.path.basename(path)}: {e})' + + def get_default_formatter(self) -> json.JSONFormatter: + return TOMLFormatter.DEFAULT_INSTANCE diff --git a/pyproject.toml b/pyproject.toml index 318f881..b0b484d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "numpy>=1.19.4", "PyYAML", "scipy>=1.4.0", + "toml>=0.10.2", "tqdm", "typing_extensions>=3.7.4.3", ] diff --git a/test/test_formatting.py b/test/test_formatting.py index 15e37f1..ccff360 100644 --- a/test/test_formatting.py +++ b/test/test_formatting.py @@ -7,6 +7,7 @@ from typing import FrozenSet, Optional, Tuple, Type, Union from unittest import TestCase +import toml import yaml from tqdm import trange @@ -103,11 +104,17 @@ def make_random_non_container(exclude_bytes: FrozenSet[str] = frozenset(), allow ])() @staticmethod - def _make_random_obj(obj_stack, force_container_type: Optional[Type[Union[dict, list]]] = None, *args, **kwargs): + def _make_random_obj( + obj_stack, + force_container_type: Optional[Type[Union[dict, list]]] = None, + allow_non_container: bool = True, + *args, + **kwargs + ): r = random.random() - NON_CONTAINER_PROB = 0.1 + NON_CONTAINER_PROB = [0.0, 0.1][allow_non_container] CONTAINER_PROB = (1.0 - NON_CONTAINER_PROB) / 2.0 - if r <= NON_CONTAINER_PROB: + if r <= NON_CONTAINER_PROB and allow_non_container: ret = TestFormatting.make_random_non_container(*args, **kwargs) elif r <= NON_CONTAINER_PROB + CONTAINER_PROB: if force_container_type is not None: @@ -128,9 +135,18 @@ def make_random_obj( force_string_keys: bool = False, allow_empty_containers: bool = True, alternate_containers: bool = False, + lists_can_contain_dicts: bool = True, + force_outer_container_type: Optional[Type[Union[dict, list]]] = None, + allow_lists: bool = True, *args, **kwargs): obj_stack = [] - ret = TestFormatting._make_random_obj(obj_stack, *args, **kwargs) + ret = TestFormatting._make_random_obj( + obj_stack, + force_container_type=force_outer_container_type, + allow_non_container=force_outer_container_type is None, + *args, + **kwargs + ) while obj_stack: expanding = obj_stack.pop() @@ -144,7 +160,9 @@ def make_random_obj( expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \ TestFormatting.make_random_non_container(*args, **kwargs) else: - if alternate_containers: + if not allow_lists: + force_container_type = dict + elif alternate_containers: force_container_type = list else: force_container_type = None @@ -163,7 +181,9 @@ def make_random_obj( if size == 0 and not allow_empty_containers: expanding.append(TestFormatting.make_random_non_container(*args, **kwargs)) else: - if alternate_containers: + if not lists_can_contain_dicts and allow_lists: + force_container_type = list + elif alternate_containers: force_container_type = dict else: force_container_type = None @@ -197,6 +217,22 @@ def test_csv_formatting(self): writer.writerow(row) return orig_obj, s.getvalue() + @filetype_test(iterations=200) + def test_toml_formatting(self): + orig_obj = TestFormatting.make_random_obj( + force_string_keys=True, + exclude_bytes=frozenset('\t \\\'"\r:[]{}&\n()`|+%<>#*^%$@!~_+-=.,;?/'), + allow_empty_containers=False, + force_outer_container_type=dict, + lists_can_contain_dicts=False, + allow_lists=False, + allow_empty_strings=False + ) + try: + return orig_obj, toml.dumps(orig_obj) + except (TypeError, ValueError, IndexError) as e: + self.fail(f"""Invalid random TOML object {orig_obj!r}: {e}""") + @staticmethod def make_random_xml() -> xml.XMLElementObj: ret = xml.XMLElementObj('', {})