From 40ff708d1ebd79b9eee8f404e80ea68221feb63c Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 9 Jun 2020 11:45:44 -0400 Subject: [PATCH 1/5] TOML diffing support, but no formatter yet (#16) --- graphtage/__init__.py | 2 +- graphtage/__main__.py | 2 ++ graphtage/toml.py | 37 +++++++++++++++++++++++++++++++++++++ setup.py | 1 + 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 graphtage/toml.py diff --git a/graphtage/__init__.py b/graphtage/__init__.py index 2d14636..bf4249d 100644 --- a/graphtage/__init__.py +++ b/graphtage/__init__.py @@ -7,7 +7,7 @@ from .version import __version__, VERSION_STRING from . import bounds, edits, expressions, fibonacci, formatter, levenshtein, matching, printer, \ search, sequences, tree, utils -from . import csv, json, xml, yaml +from . import csv, json, xml, yaml, toml import inspect diff --git a/graphtage/__main__.py b/graphtage/__main__.py index 5887526..d485434 100644 --- a/graphtage/__main__.py +++ b/graphtage/__main__.py @@ -241,6 +241,8 @@ def printer_type(*pos_args, **kwargs): mimetypes.suffix_map['.yaml'] = '.yml' if '.json5' not in mimetypes.types_map: mimetypes.add_type('application/json5', '.json5') + if '.toml' not in mimetypes.types_map: + mimetypes.add_type('application/toml', '.toml') if args.from_mime is not None: from_mime = args.from_mime diff --git a/graphtage/toml.py b/graphtage/toml.py new file mode 100644 index 0000000..497b724 --- /dev/null +++ b/graphtage/toml.py @@ -0,0 +1,37 @@ +from typing import Optional + +import toml + +from . import json +from .graphtage import BuildOptions, Filetype +from .tree import TreeNode + + +def build_tree(path: str, options: Optional[BuildOptions]) -> TreeNode: + with open(path, 'r') as f: + return json.build_tree(toml.load(f), options) + + +class TOML(Filetype): + """The TOML filetype.""" + def __init__(self): + """Initializes the TOML filetype. + + TOML identifies itself with the MIME types `application/toml` and `text/toml`. + + """ + super().__init__( + 'toml', + 'application/toml', + 'text/toml' + ) + + def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: + """Equivalent to :func:`build_tree`""" + return build_tree(path, options=options) + + def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: + return self.build_tree(path=path, options=options) + + def get_default_formatter(self) -> json.JSONFormatter: + return json.JSONFormatter.DEFAULT_INSTANCE diff --git a/setup.py b/setup.py index 78e12f7..672a7e6 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ def get_version_string(): 'json5==0.9.5', 'PyYAML', 'scipy>=1.4.0', + 'toml==0.10.0', 'tqdm', 'typing_extensions>=3.6.5' ], From e0edd9c3f89d3c4d7c7e7c90942adf87f3c6cb62 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 9 Jun 2020 17:27:16 -0400 Subject: [PATCH 2/5] Rudimentary TOML formatting (#16) --- graphtage/toml.py | 162 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 156 insertions(+), 6 deletions(-) diff --git a/graphtage/toml.py b/graphtage/toml.py index 497b724..3eee07b 100644 --- a/graphtage/toml.py +++ b/graphtage/toml.py @@ -1,10 +1,14 @@ -from typing import Optional +import itertools +import os +from typing import Iterator, Optional, Tuple, Union import toml from . import json -from .graphtage import BuildOptions, Filetype -from .tree import TreeNode +from .graphtage import BuildOptions, Filetype, KeyValuePairNode, LeafNode, MappingNode, StringFormatter, StringNode +from .printer import Printer +from .sequences import SequenceFormatter +from .tree import GraphtageFormatter, TreeNode def build_tree(path: str, options: Optional[BuildOptions]) -> TreeNode: @@ -12,6 +16,149 @@ def build_tree(path: str, options: Optional[BuildOptions]) -> TreeNode: return json.build_tree(toml.load(f), options) +class TOMLListFormatter(SequenceFormatter): + """A sub-formatter for TOML lists.""" + is_partial = True + + def __init__(self): + """Initializes the TOML list formatter. + + Equivalent to:: + + super().__init__('[', ']', ',') + + """ + super().__init__('[', ']', ',') + + def item_newline(self, printer: Printer, is_first: bool = False, is_last: bool = False): + pass + + def print_ListNode(self, *args, **kwargs): + """Prints a :class:`graphtage.ListNode`. + + Equivalent to:: + + super().print_SequenceNode(*args, **kwargs) + + """ + super().print_SequenceNode(*args, **kwargs) + + def print_SequenceNode(self, *args, **kwargs): + """Prints a non-List sequence. + + This delegates to the parent formatter's implementation:: + + self.parent.print(*args, **kwargs) + + which should invoke :meth:`TOMLFormatter.print`, thereby delegating to the :class:`TOMLDictFormatter` in + instances where a list contains a dict (the TOML format doesn't allow this, but it might be necessary if + formatting from another format into TOML) + + """ + self.parent.print(*args, **kwargs) + + +def toml_dumps(obj) -> str: + s = toml.dumps({'result': obj}) + expected_prefix = 'result = ' + expected_suffix = '\n' + assert s.startswith(expected_prefix) + assert s.endswith(expected_suffix) + return s[len(expected_prefix):-len(expected_suffix)] + + +class TOMLStringFormatter(StringFormatter): + """A TOML formatter for strings.""" + is_partial = True + + def escape(self, c: str) -> str: + s = toml_dumps(c) + if s.startswith('"') and s.endswith('"'): + return s[1:-1] + else: + return s + + +class TOMLMapping: + def __init__( + self, + mapping: MappingNode, + parent: Optional['TOMLMapping'] = None, + parent_name: Optional[TreeNode] = None + ): + self.mapping: MappingNode = mapping + self.parent: Optional[TOMLMapping] = parent + self.parent_name: Optional[TreeNode] = parent_name + + @property + def name_segments(self) -> Tuple[TreeNode, ...]: + if self.parent is None: + return () + else: + return self.parent.name_segments + (self.parent_name,) + + def items(self) -> Iterator[KeyValuePairNode]: + inserted = () + if self.mapping.edited and self.mapping.inserted: + inserted = self.mapping.inserted + for kvp in itertools.chain(self.mapping, inserted): + if not isinstance(kvp.value, MappingNode): + yield kvp + + def children(self) -> Iterator['TOMLMapping']: + inserted = () + if self.mapping.edited and self.mapping.inserted: + inserted = self.mapping.inserted + for kvp in itertools.chain(self.mapping, inserted): + if isinstance(kvp.value, MappingNode): + yield TOMLMapping(mapping=kvp.value, parent=self, parent_name=kvp.key) + + +class TOMLFormatter(GraphtageFormatter): + sub_format_types = [TOMLListFormatter, TOMLStringFormatter] + + def print(self, printer: Printer, *args, **kwargs): + # TOML has optional indentation; make it only two spaces, if we use it: + printer.indent_str = ' ' + super().print(printer, *args, **kwargs) + + def print_LeafNode(self, printer: Printer, node: LeafNode): + printer.write(toml_dumps(node.object)) + + def print_KeyValuePairNode(self, printer: Printer, node: KeyValuePairNode): + if isinstance(node.key, StringNode): + node.key.quoted = False + self.print(printer, node.key) + printer.write(' = ') + if isinstance(node.value, StringNode): + node.value.quoted = True + self.print(printer, node.value) + printer.newline() + + def print_MappingNode(self, printer: Printer, node: MappingNode): + mappings = [TOMLMapping(node)] + while mappings: + m: TOMLMapping = mappings.pop() + name = m.name_segments + if name: + printer.write('[') + first = True + for s in name: + if first: + first = False + else: + printer.write('.') + if isinstance(s, StringNode): + s.quoted = False + self.print(printer, s) + printer.write(']') + printer.newline() + for kvp in m.items(): + self.print(printer, kvp) + printer.newline() + mappings.extend(m.children()) + + class TOML(Filetype): """The TOML filetype.""" def __init__(self): @@ -30,8 +177,11 @@ def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeN """Equivalent to :func:`build_tree`""" return build_tree(path, options=options) - def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> TreeNode: - return self.build_tree(path=path, options=options) + def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: + try: + return self.build_tree(path=path, options=options) + except ValueError as e: + return f'Error parsing {os.path.basename(path)}: {e})' def get_default_formatter(self) -> json.JSONFormatter: - return json.JSONFormatter.DEFAULT_INSTANCE + return TOMLFormatter.DEFAULT_INSTANCE From 3c1c65c60b76febf8c99d113b5f95675dc5918ad Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 9 Jun 2020 22:02:35 -0400 Subject: [PATCH 3/5] Do not print TOML headers if they contain no key value pairs and have children --- graphtage/toml.py | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/graphtage/toml.py b/graphtage/toml.py index 3eee07b..347d793 100644 --- a/graphtage/toml.py +++ b/graphtage/toml.py @@ -105,6 +105,17 @@ def items(self) -> Iterator[KeyValuePairNode]: if not isinstance(kvp.value, MappingNode): yield kvp + def __bool__(self): + try: + next(self.items()) + return True + except StopIteration: + try: + next(self.children()) + return False + except StopIteration: + return True + def children(self) -> Iterator['TOMLMapping']: inserted = () if self.mapping.edited and self.mapping.inserted: @@ -139,23 +150,24 @@ def print_MappingNode(self, printer: Printer, node: MappingNode): mappings = [TOMLMapping(node)] while mappings: m: TOMLMapping = mappings.pop() - name = m.name_segments - if name: - printer.write('[') - first = True - for s in name: - if first: - first = False - else: - printer.write('.') - if isinstance(s, StringNode): - s.quoted = False - self.print(printer, s) - printer.write(']') + if m: + name = m.name_segments + if name: + printer.write('[') + first = True + for s in name: + if first: + first = False + else: + printer.write('.') + if isinstance(s, StringNode): + s.quoted = False + self.print(printer, s) + printer.write(']') + printer.newline() + for kvp in m.items(): + self.print(printer, kvp) printer.newline() - for kvp in m.items(): - self.print(printer, kvp) - printer.newline() mappings.extend(m.children()) From 761a09e14bd10e3f075255333d8619aba19b9bd4 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 9 Jun 2020 22:31:11 -0400 Subject: [PATCH 4/5] Added a formatting test for TOML (#16) --- graphtage/toml.py | 2 +- test/test_formatting.py | 53 +++++++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/graphtage/toml.py b/graphtage/toml.py index 347d793..783421d 100644 --- a/graphtage/toml.py +++ b/graphtage/toml.py @@ -192,7 +192,7 @@ def build_tree(self, path: str, options: Optional[BuildOptions] = None) -> TreeN def build_tree_handling_errors(self, path: str, options: Optional[BuildOptions] = None) -> Union[str, TreeNode]: try: return self.build_tree(path=path, options=options) - except ValueError as e: + except (IndexError, TypeError, ValueError) as e: return f'Error parsing {os.path.basename(path)}: {e})' def get_default_formatter(self) -> json.JSONFormatter: diff --git a/test/test_formatting.py b/test/test_formatting.py index 967324a..06120dc 100644 --- a/test/test_formatting.py +++ b/test/test_formatting.py @@ -6,6 +6,7 @@ from typing import FrozenSet, Optional, Tuple, Type, Union from unittest import TestCase +import toml import yaml from tqdm import trange @@ -46,7 +47,10 @@ def wrapper(self: 'TestFormatting'): for _ in trange(iterations): orig_obj, str_representation = test_func(self) with graphtage.utils.Tempfile(str_representation.encode('utf-8')) as t: - tree = filetype.build_tree(t) + tree = filetype.build_tree_handling_errors(t) + if isinstance(tree, str): + self.fail(f"""{filetype_name.upper()} parse error {tree}: Original object: +{orig_obj!r}""") stream = StringIO() printer = graphtage.printer.Printer(out_stream=stream, ansi_color=False) formatter.print(printer, tree) @@ -100,11 +104,17 @@ def make_random_non_container(exclude_bytes: FrozenSet[str] = frozenset(), allow ])() @staticmethod - def _make_random_obj(obj_stack, force_container_type: Optional[Type[Union[dict, list]]] = None, *args, **kwargs): + def _make_random_obj( + obj_stack, + force_container_type: Optional[Type[Union[dict, list]]] = None, + allow_non_container: bool = True, + *args, + **kwargs + ): r = random.random() - NON_CONTAINER_PROB = 0.1 + NON_CONTAINER_PROB = [0.0, 0.1][allow_non_container] CONTAINER_PROB = (1.0 - NON_CONTAINER_PROB) / 2.0 - if r <= NON_CONTAINER_PROB: + if r <= NON_CONTAINER_PROB and allow_non_container: ret = TestFormatting.make_random_non_container(*args, **kwargs) elif r <= NON_CONTAINER_PROB + CONTAINER_PROB: if force_container_type is not None: @@ -125,9 +135,18 @@ def make_random_obj( force_string_keys: bool = False, allow_empty_containers: bool = True, alternate_containers: bool = False, + lists_can_contain_dicts: bool = True, + force_outer_container_type: Optional[Type[Union[dict, list]]] = None, + allow_lists: bool = True, *args, **kwargs): obj_stack = [] - ret = TestFormatting._make_random_obj(obj_stack, *args, **kwargs) + ret = TestFormatting._make_random_obj( + obj_stack, + force_container_type=force_outer_container_type, + allow_non_container=force_outer_container_type is None, + *args, + **kwargs + ) while obj_stack: expanding = obj_stack.pop() @@ -141,7 +160,9 @@ def make_random_obj( expanding[TestFormatting.make_random_non_container(*args, **kwargs)] = \ TestFormatting.make_random_non_container(*args, **kwargs) else: - if alternate_containers: + if not allow_lists: + force_container_type = dict + elif alternate_containers: force_container_type = list else: force_container_type = None @@ -160,7 +181,9 @@ def make_random_obj( if size == 0 and not allow_empty_containers: expanding.append(TestFormatting.make_random_non_container(*args, **kwargs)) else: - if alternate_containers: + if not lists_can_contain_dicts and allow_lists: + force_container_type = list + elif alternate_containers: force_container_type = dict else: force_container_type = None @@ -194,6 +217,22 @@ def test_csv_formatting(self): writer.writerow(row) return orig_obj, s.getvalue() + @filetype_test(iterations=200) + def test_toml_formatting(self): + orig_obj = TestFormatting.make_random_obj( + force_string_keys=True, + exclude_bytes=frozenset('\t \\\'"\r:[]{}&\n()`|+%<>#*^%$@!~_+-=.,;?/'), + allow_empty_containers=False, + force_outer_container_type=dict, + lists_can_contain_dicts=False, + allow_lists=False, + allow_empty_strings=False + ) + try: + return orig_obj, toml.dumps(orig_obj) + except (TypeError, ValueError, IndexError) as e: + self.fail(f"""Invalid random TOML object {orig_obj!r}: {e}""") + @staticmethod def make_random_xml() -> xml.XMLElementObj: ret = xml.XMLElementObj('', {}) From 7e6ed8c8541d7b0858186fa8cf89b9c07550e265 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Fri, 22 Aug 2025 12:05:09 -0400 Subject: [PATCH 5/5] Adds TOML to the list of supported formats --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e2ab53c..3876ef2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc) Graphtage is a command-line utility and [underlying library](https://trailofbits.github.io/graphtage/latest/library.html) -for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, plist, and CSS files. Its name is a +for semantically comparing and merging tree-like structures, such as JSON, XML, HTML, YAML, TOML, plist, and CSS files. Its name is a portmanteau of “graph” and “graftage”—the latter being the horticultural practice of joining two trees together such that they grow as one.