diff --git a/artefact/lib/artefact/mermaid.ex b/artefact/lib/artefact/mermaid.ex index 318cda6..7479999 100644 --- a/artefact/lib/artefact/mermaid.ex +++ b/artefact/lib/artefact/mermaid.ex @@ -3,7 +3,21 @@ defmodule Artefact.Mermaid do @moduledoc """ - Derives Mermaid diagram source from an `%Artefact{}`. + Converts between `%Artefact{}` structs and Mermaid legacy `graph` source. + + Two public functions: + + - `export/2` — artefact → Mermaid string + - `from_mmd!/2` — Mermaid string → artefact + + ## Round-trip fidelity + + `export/2` followed by `from_mmd!/2` followed by `export/2` produces + identical Mermaid source. The preserved fields are: `title`, `description`, + node `name` and `description` properties, node labels, and relationship + types. See *Lossy* below for what is not preserved. + + ## Export format Uses the legacy `graph` syntax for broad renderer compatibility (GitHub, Notion, mdBook, Livebook). Nodes render as circles (`id(("..."))`) — the @@ -23,8 +37,14 @@ defmodule Artefact.Mermaid do description is omitted. Like `accTitle`, the description is screen-reader only — Mermaid does not render it visually. - Lossy: `position`, `style`, properties beyond `name`, and the artefact-level - `base_label` (collapsed into per-node labels at output time) are not represented. + Node `description` properties are emitted as `click id "description"` tooltip lines — + present in source, visible on hover, and parseable by `from_mmd!/2`. + + ## Lossy + + `position`, `style`, properties beyond `name` and `description`, and the + artefact-level `base_label` (collapsed into per-node labels at output time) + are not represented in Mermaid source and are not recovered on import. """ @directions ~w(LR RL TB BT TD)a @@ -63,10 +83,11 @@ defmodule Artefact.Mermaid do end node_lines = Enum.map(graph.nodes, &node_line(&1, base_label)) + click_lines = Enum.flat_map(graph.nodes, &click_line/1) rel_lines = Enum.map(graph.relationships, &rel_line/1) accessibility = acc_title_lines(title) ++ acc_descr_lines(description) - body = ["graph #{direction}" | accessibility ++ node_lines ++ rel_lines] + body = ["graph #{direction}" | accessibility ++ node_lines ++ click_lines ++ rel_lines] Enum.join(front_matter(title) ++ body, "\n") end @@ -156,10 +177,252 @@ defmodule Artefact.Mermaid do " #{node.id}((\"#{label_text}\"))" end + defp click_line(%Artefact.Node{id: id, properties: props}) do + case Map.get(props, "description") do + nil -> [] + desc -> [" click #{id} \"#{escape(desc)}\""] + end + end + defp rel_line(%Artefact.Relationship{type: type, from_id: from, to_id: to}) do " #{from} -->|#{escape_pipe(type)}| #{to}" end + # -- parser -- + + @doc """ + Parse a Mermaid `graph` source string into an `%Artefact{}`. + + Accepts both the round-trip format produced by `export/2` and the broader + Mermaid legacy graph syntax used by tools like Confluence and GitHub. + + ## Node content conventions + + Three label formats are recognised inside node shapes: + + - `name
Label1 Label2` — our export format: name on top, space-joined + semantic labels below + - `LABEL · name` — yarn convention: a single label and name separated by ` · ` + - plain text — treated as the name with no labels + + `click id "text"` lines become the node `description` property. + + ## UUID identity + + Each node's UUID is derived deterministically from its **Mermaid node id** + (the `\w+` identifier, e.g. `val_0`, `std_ulogic`) via + `Artefact.UUID.from_name/1`. The display name inside the shape label is not + used. This means: + + - The same diagram imported twice produces the same artefact — safe to repeat. + - Two diagrams that share a node id will bind via `combine!/2` without any + manual UUID management. + - Renaming a node id changes its UUID and breaks bindings. Keep ids stable. + + ## Inline edge + node syntax + + When a node's shape is declared on the same line as an edge + (`A["label"] -->|TYPE| B["label"]`), only the **edge** is registered; the + node label is not captured. Use a separate declaration line to preserve + labels and names: + + graph LR + val_0["VALUE · 0"] + val_0 -->|ENUMERATES| value + + The round-trip format produced by `export/2` always emits separate node and + edge lines, so this limitation does not affect round-trips. + + ## Options + + * `:title` — overrides the title parsed from YAML front matter + * `:description` — overrides the description parsed from `accDescr:` + * `:base_label` — sets the artefact base label (not inferred from source) + + ## Example + + iex> source = \""" + ...> --- + ...> title: Us Two + ...> --- + ...> graph LR + ...> n0(("Matt
Agent Me")) + ...> n1(("Claude
Agent You")) + ...> n0 -->|US_TWO| n1 + ...> \""" + iex> artefact = Artefact.Mermaid.from_mmd!(source) + iex> artefact.title + "Us Two" + iex> length(artefact.graph.nodes) + 2 + + """ + def from_mmd!(source, opts \\ []) do + require Artefact + {parsed_title, parsed_desc, node_decls, edge_decls, click_decls} = parse_mmd(source) + + title = Keyword.get(opts, :title, parsed_title) + description = Keyword.get(opts, :description, parsed_desc) + base_label = Keyword.get(opts, :base_label) + + all_ids = + (Map.keys(node_decls) ++ + Enum.flat_map(edge_decls, fn {f, _t, to} -> [f, to] end)) + |> Enum.uniq() + + id_to_key = all_ids |> Enum.with_index() |> Map.new(fn {id, i} -> {id, :"n#{i}"} end) + + nodes = + Enum.map(all_ids, fn id -> + {name, labels} = Map.get(node_decls, id, {id, []}) + desc = Map.get(click_decls, id) + props = if desc, do: %{"name" => name, "description" => desc}, else: %{"name" => name} + {id_to_key[id], [labels: labels, properties: props, uuid: Artefact.UUID.from_name(id)]} + end) + + relationships = + edge_decls + |> Enum.map(fn {from_id, type, to_id} -> + [from: id_to_key[from_id], type: type, to: id_to_key[to_id]] + end) + |> Enum.uniq() + + Artefact.new!( + title: title, + description: description, + base_label: base_label, + nodes: nodes, + relationships: relationships + ) + end + + defp parse_mmd(source) do + {title_from_fm, body} = strip_front_matter(source) + + {title, desc, node_decls, edge_decls, click_decls} = + body + |> String.split("\n") + |> Enum.map(&String.trim/1) + |> collect_lines() + + {title_from_fm || title, desc, node_decls, edge_decls, click_decls} + end + + defp strip_front_matter(source) do + case Regex.run(~r/\A---\n(.*?)\n---\n/s, source) do + [matched, fm_body] -> + title = + case Regex.run(~r/^title:\s*["']?(.+?)["']?\s*$/m, fm_body) do + [_, t] -> String.trim(t, "\"") + nil -> nil + end + + {title, String.slice(source, String.length(matched)..-1//1)} + + nil -> + {nil, source} + end + end + + defp collect_lines(lines) do + acc = {nil, nil, %{}, [], %{}, false, []} + + {title, desc, node_decls, edge_decls, click_decls, _in_descr, _descr_lines} = + Enum.reduce(lines, acc, fn line, state -> + {title, desc, nodes, edges, clicks, in_descr, descr_lines} = state + + cond do + # Close accDescr block + in_descr and Regex.match?(~r/^\}/, line) -> + {title, Enum.join(Enum.reverse(descr_lines), "\n"), nodes, edges, clicks, false, []} + + # Accumulate accDescr block lines + in_descr -> + {title, desc, nodes, edges, clicks, true, [String.trim_leading(line, " ") | descr_lines]} + + # accDescr block open + Regex.match?(~r/^accDescr\s*\{/, line) -> + {title, desc, nodes, edges, clicks, true, []} + + # accDescr inline + m = Regex.run(~r/^accDescr:\s*(.+)$/, line) -> + [_, d] = m + {title, d, nodes, edges, clicks, false, []} + + # accTitle (ignore — title comes from front matter or opts) + Regex.match?(~r/^accTitle:/, line) -> + state + + # graph declaration, subgraph, end, comments, blank — skip + Regex.match?(~r/^(?:graph\s|subgraph\s|end$|%%|$)/, line) -> + state + + # click tooltip: click id "text" + m = Regex.run(~r/^click\s+(\w+)\s+"([^"]*)"/, line) -> + [_, id, tooltip] = m + {title, desc, nodes, edges, Map.put(clicks, id, unescape_html(tooltip)), false, []} + + # edge with label: id -->|TYPE| id (also handles inline node shapes like A["label"] -->|TYPE| B) + m = Regex.run(~r/^(\w+).*?(?:-->|-\.->|===>)\|([^|]+)\|\s*(\w+)/, line) -> + [_, from_id, type, to_id] = m + {title, desc, nodes, [{from_id, type, to_id} | edges], clicks, false, []} + + # node — try most specific format first + m = Regex.run(~r/^(\w+)\(\("(.+?)"\)\)/, line) -> + [_, id, content] = m + {title, desc, Map.put_new(nodes, id, parse_node_content(content)), edges, clicks, false, []} + + m = Regex.run(~r/^(\w+)\["(.+?)"\]/, line) -> + [_, id, content] = m + {title, desc, Map.put_new(nodes, id, parse_node_content(content)), edges, clicks, false, []} + + m = Regex.run(~r/^(\w+)\("(.+?)"\)/, line) -> + [_, id, content] = m + {title, desc, Map.put_new(nodes, id, parse_node_content(content)), edges, clicks, false, []} + + m = Regex.run(~r/^(\w+)\[([^\]]+)\]/, line) -> + [_, id, content] = m + {title, desc, Map.put_new(nodes, id, parse_node_content(content)), edges, clicks, false, []} + + m = Regex.run(~r/^(\w+)\(([^)]+)\)/, line) -> + [_, id, content] = m + {title, desc, Map.put_new(nodes, id, parse_node_content(content)), edges, clicks, false, []} + + true -> + state + end + end) + + {title, desc, node_decls, Enum.reverse(edge_decls), click_decls} + end + + defp parse_node_content(content) do + raw = unescape_html(content) + + cond do + String.contains?(raw, "
") -> + [name_part, labels_part] = String.split(raw, "
", parts: 2) + labels = labels_part |> String.split(" ") |> Enum.reject(&(&1 == "")) + {String.trim(name_part), labels} + + String.contains?(raw, " · ") -> + [label, name] = String.split(raw, " · ", parts: 2) + {String.trim(name), [String.trim(label)]} + + true -> + {String.trim(raw), []} + end + end + + defp unescape_html(s) do + s + |> String.replace(""", "\"") + |> String.replace("&", "&") + |> String.replace("|", "|") + |> String.replace("<", "<") + |> String.replace(">", ">") + end + # Mermaid node label text inside `(("..."))` — escape double quotes only; # `
` is rendered as a line break, which is what we want. defp escape(value) do diff --git a/artefact/lib/artefact/uuid.ex b/artefact/lib/artefact/uuid.ex index 878620b..b2ab411 100644 --- a/artefact/lib/artefact/uuid.ex +++ b/artefact/lib/artefact/uuid.ex @@ -29,6 +29,23 @@ defmodule Artefact.UUID do format(a, b, 0x7000 ||| c, 0x8000000000000000 ||| d) end + @doc """ + Deterministic UUIDv7-shaped identifier derived from a name string. + + Uses SHA-256 of the name in place of random bytes, with version and variant + bits forced identically to `generate_v7/0`. The timestamp field is filled from + the hash rather than the clock, so the result is not time-ordered, but it is + stable: the same name always produces the same UUID. Passes `valid?/1`. + """ + def from_name(name) when is_binary(name) do + <> = :crypto.hash(:sha256, name) + + <> = + <> + + format(a, b, 0x7000 ||| c, 0x8000000000000000 ||| d) + end + @doc "Compare two UUIDv7 strings. Returns the lower (earlier) of the two." def harmonise(uuid_a, uuid_b) when uuid_a <= uuid_b, do: uuid_a def harmonise(_uuid_a, uuid_b), do: uuid_b diff --git a/artefact/test/artefact_test.exs b/artefact/test/artefact_test.exs index db33102..d3c887c 100644 --- a/artefact/test/artefact_test.exs +++ b/artefact/test/artefact_test.exs @@ -1983,4 +1983,158 @@ defmodule ArtefactTest do assert Artefact.is_valid?(result) end end + + describe "Artefact.UUID.from_name/1" do + test "produces a valid UUIDv7" do + uuid = Artefact.UUID.from_name("n0") + assert Artefact.UUID.valid?(uuid) + end + + test "is deterministic — same name always gives same UUID" do + assert Artefact.UUID.from_name("hello") == Artefact.UUID.from_name("hello") + end + + test "different names give different UUIDs" do + refute Artefact.UUID.from_name("n0") == Artefact.UUID.from_name("n1") + end + end + + describe "Artefact.Mermaid.from_mmd!/2" do + test "parses a minimal exported diagram (round-trip node/rel counts)" do + require Artefact + + source = Artefact.new!( + title: "UsTwo", + base_label: "Agent", + nodes: [ + matt: [labels: ["Agent", "Me"], properties: %{"name" => "Matt"}], + claude: [labels: ["Agent", "You"], properties: %{"name" => "Claude"}] + ], + relationships: [[from: :matt, type: "US_TWO", to: :claude]] + ) + |> Artefact.Mermaid.export() + + result = Artefact.Mermaid.from_mmd!(source) + + assert length(result.graph.nodes) == 2 + assert length(result.graph.relationships) == 1 + end + + test "round-trip: export → from_mmd! → export produces identical output" do + require Artefact + + a = Artefact.new!( + title: "Round Trip", + base_label: "Concept", + nodes: [ + x: [labels: ["Concept"], properties: %{"name" => "Alpha"}], + y: [labels: ["Concept"], properties: %{"name" => "Beta"}] + ], + relationships: [[from: :x, type: "RELATES", to: :y]] + ) + + mmd1 = Artefact.Mermaid.export(a) + mmd2 = mmd1 |> Artefact.Mermaid.from_mmd!() |> Artefact.Mermaid.export() + + assert mmd1 == mmd2 + end + + test "click tooltip becomes node description property" do + source = """ + graph LR + n0(("Alice")) + click n0 "The description" + """ + + result = Artefact.Mermaid.from_mmd!(source) + node = hd(result.graph.nodes) + assert node.properties["description"] == "The description" + end + + test "parses hand-authored mermaid with bracket nodes and edge labels" do + source = """ + graph LR + A[Alpha] -->|KNOWS| B[Beta] + B -->|USES| C[Gamma] + """ + + result = Artefact.Mermaid.from_mmd!(source) + assert length(result.graph.nodes) == 3 + assert length(result.graph.relationships) == 2 + types = Enum.map(result.graph.relationships, & &1.type) + assert "KNOWS" in types + assert "USES" in types + end + + test "same mermaid node id in two diagrams produces same UUID" do + # node names default to their mermaid id when declared inline on an edge line + source_a = "graph LR\n shared -->|REL| other_a" + source_b = "graph LR\n shared -->|REL| other_b" + + result_a = Artefact.Mermaid.from_mmd!(source_a) + result_b = Artefact.Mermaid.from_mmd!(source_b) + + uuid_a = Enum.find(result_a.graph.nodes, &(&1.properties["name"] == "shared")).uuid + uuid_b = Enum.find(result_b.graph.nodes, &(&1.properties["name"] == "shared")).uuid + + assert uuid_a == uuid_b + end + + test "shared UUID enables combine!/2 to find binding across two parsed diagrams" do + require Artefact + + source_a = "graph LR\n shared(\"Hub\") -->|TO| leaf_a(\"Leaf A\")" + source_b = "graph LR\n shared(\"Hub\") -->|TO| leaf_b(\"Leaf B\")" + + a = Artefact.Mermaid.from_mmd!(source_a, base_label: "Hub") + b = Artefact.Mermaid.from_mmd!(source_b, base_label: "Leaf") + + combined = Artefact.combine!(a, b) + + # 3 unique nodes (shared hub + leaf_a + leaf_b) and 2 relationships + assert length(combined.graph.nodes) == 3 + assert length(combined.graph.relationships) == 2 + end + + test "YAML front matter title is parsed" do + source = """ + --- + title: My Diagram + --- + graph LR + a(("Alpha")) -->|REL| b(("Beta")) + """ + + result = Artefact.Mermaid.from_mmd!(source) + assert result.title == "My Diagram" + end + + test ":title opt overrides parsed front matter" do + source = """ + --- + title: Original + --- + graph LR + a(("Alpha")) + """ + + result = Artefact.Mermaid.from_mmd!(source, title: "Override") + assert result.title == "Override" + end + + test "LABEL · name convention recovers both label and name" do + # Separate declaration line so the node regex fires (not the edge regex) + source = """ + graph LR + val_0["VALUE · 0"] + value["VALUE · value"] + val_0 -->|ENUMERATES| value + """ + + result = Artefact.Mermaid.from_mmd!(source) + node = Enum.find(result.graph.nodes, &(&1.properties["name"] == "0")) + assert node != nil + assert "VALUE" in node.labels + end + end end