From c98aa6dcf946951e152ecca90cf515c90365d261 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Guimar=C3=A3es?= Date: Tue, 31 Mar 2026 18:33:13 +0100 Subject: [PATCH 1/5] fix: emit permissions: read-all when permissions block is empty --- provider/github/config.go | 1 + provider/github/parse_workflow.go | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/provider/github/config.go b/provider/github/config.go index 75924a0..5356eb7 100644 --- a/provider/github/config.go +++ b/provider/github/config.go @@ -59,6 +59,7 @@ type hclJobBlock struct { ServiceBlocks []hclServiceBlock `hcl:"service,block"` RunsOnBlocks []hclRunsOnBlock `hcl:"runs_on,block"` StrategyBlocks []hclStrategyBlock `hcl:"strategy,block"` + PermAttr hcl.Expression `hcl:"permissions,optional"` Permissions []hclGenericBlock `hcl:"permissions,block"` Defaults []hclGenericBlock `hcl:"defaults,block"` Concurrency []hclGenericBlock `hcl:"concurrency,block"` diff --git a/provider/github/parse_workflow.go b/provider/github/parse_workflow.go index 11b3b47..d5b061b 100644 --- a/provider/github/parse_workflow.go +++ b/provider/github/parse_workflow.go @@ -253,13 +253,21 @@ func parseJobConfig(cfg hclJobBlock, hv *hclparser.HCLVars) (map[string]any, err out["strategy"] = strategyValue } + if err := setOptionalYAMLAttr(out, "permissions", cfg.PermAttr, hv); err != nil { + return nil, err + } + for _, block := range cfg.Permissions { child, err := parseBodyMap(block.Body, hv, "permissions") if err != nil { return nil, err } - out["permissions"] = child + if len(child) == 0 { + out["permissions"] = "read-all" + } else { + out["permissions"] = child + } } for _, block := range cfg.Defaults { @@ -366,7 +374,11 @@ func parseWorkflowConfig(cfg hclWorkflowBlock, hv *hclparser.HCLVars) (map[strin return nil, err } - out["permissions"] = child + if len(child) == 0 { + out["permissions"] = "read-all" + } else { + out["permissions"] = child + } } for _, block := range cfg.Defaults { From 7b6881c04141acfd1d646b1b52c9fad20df7994d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Guimar=C3=A3es?= Date: Tue, 31 Mar 2026 22:06:06 +0100 Subject: [PATCH 2/5] =?UTF-8?q?fix:=20preserve=20job=20order=20during=20YA?= =?UTF-8?q?ML=E2=86=92HCL=20unparse=20using=20yaml.v3=20Node=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the goccy/go-yaml parse in parseYAMLDocument with a single yaml.v3 Node API pass: yamlv3.Unmarshal into *Node, then node.Decode for map[string]any and node.Content walk for source key order. Eliminates the prior double-parse and keeps both views consistent. Also move jobOrder from YAMLDocument struct to an explicit parameter on workflowToHCL, and add a clear presence-check error in buildWorkflowJobIndex when an ordered job name is absent from the map. --- .../yaml-map-key-order-lost-use-node-api.md | 186 ++++++++++++++++++ provider/github/benchmark_test.go | 15 +- provider/github/github.go | 4 +- provider/github/unparse_emit.go | 21 +- provider/github/unparse_workflow.go | 57 +++++- ...p1-double-yaml-parse-violates-claude-md.md | 67 +++++++ ...2-order-job-not-in-map-misleading-error.md | 68 +++++++ ...bs-in-map-not-in-order-silently-dropped.md | 68 +++++++ ...er-responsibility-split-across-packages.md | 77 ++++++++ ...-complete-p3-benchmark-gap-yamlJobOrder.md | 32 +++ ...ending-p1-yaml-bomb-no-input-size-limit.md | 65 ++++++ ...ing-p2-job-names-in-errors-need-quoting.md | 62 ++++++ ...-pending-p2-completeness-check-is-yagni.md | 64 ++++++ ...-p2-no-regression-test-yaml-v3-vs-goccy.md | 69 +++++++ ...ng-p3-job-order-empty-key-guard-comment.md | 66 +++++++ 15 files changed, 897 insertions(+), 24 deletions(-) create mode 100644 docs/solutions/logic-errors/yaml-map-key-order-lost-use-node-api.md create mode 100644 todos/001-complete-p1-double-yaml-parse-violates-claude-md.md create mode 100644 todos/002-complete-p2-order-job-not-in-map-misleading-error.md create mode 100644 todos/003-complete-p2-jobs-in-map-not-in-order-silently-dropped.md create mode 100644 todos/004-complete-p2-joborder-responsibility-split-across-packages.md create mode 100644 todos/005-complete-p3-benchmark-gap-yamlJobOrder.md create mode 100644 todos/006-pending-p1-yaml-bomb-no-input-size-limit.md create mode 100644 todos/007-pending-p2-job-names-in-errors-need-quoting.md create mode 100644 todos/008-pending-p2-completeness-check-is-yagni.md create mode 100644 todos/009-pending-p2-no-regression-test-yaml-v3-vs-goccy.md create mode 100644 todos/010-pending-p3-job-order-empty-key-guard-comment.md diff --git a/docs/solutions/logic-errors/yaml-map-key-order-lost-use-node-api.md b/docs/solutions/logic-errors/yaml-map-key-order-lost-use-node-api.md new file mode 100644 index 0000000..d178c17 --- /dev/null +++ b/docs/solutions/logic-errors/yaml-map-key-order-lost-use-node-api.md @@ -0,0 +1,186 @@ +--- +title: Preserve YAML job key order in cinzel unparse without double-parsing +description: Fix job order loss during YAML→HCL unparse by using yaml.v3 Node API for a single parse that yields both map data and ordered keys +date: 2026-03-31 +category: logic-errors +tags: [yaml, go, yaml-v3, key-order, parsing, unparse, roundtrip, hcl, github-actions] +symptoms: + - Jobs in HCL output are sorted alphabetically instead of matching source YAML order + - Go map iteration non-determinism causes inconsistent job ordering across runs + - Fixing order with a second yaml.v3 parse violates the "do not unmarshal YAML twice" rule +components: + - parseYAMLDocument + - workflowToHCL + - buildWorkflowJobIndex + - YAMLDocument struct +severity: medium +resolved: true +--- + +# Preserve YAML job key order in unparse without double-parsing + +## Symptoms + +- `cinzel unparse` emits jobs in alphabetical order instead of the order they appear in the source YAML +- Job order is non-deterministic across runs (Go map iteration randomises output) +- A naive fix adds a second YAML parse, violating the CLAUDE.md rule "do not unmarshal YAML twice" + +## Root Cause + +`goccy/go-yaml` (and `yaml.v3` plain `Unmarshal`) decode YAML mappings into `map[string]any`, which loses insertion order. There was no ordered representation of the keys after parsing. + +The tempting fix — a second `yaml.v3.Unmarshal` just to walk `node.Content` for key order — parses the same bytes twice and creates a correctness risk: if the two parsers produce different views of the document (anchor expansion, merge keys), `jobOrder` and `jobs` can diverge silently. + +## Solution + +Use `gopkg.in/yaml.v3`'s Node API as the **single** parse entry point. One `yamlv3.Unmarshal` into a `*yamlv3.Node` gives a complete parse tree. From that tree you get two views for free: + +- `node.Decode(&doc)` — converts the already-parsed tree into `map[string]any` (no second parse; this is a tree walk) +- `node.Content` — pairs of [key, value] child nodes in source declaration order + +```go +// parseYAMLDocument parses YAML content into a document map and extracts +// job names in source order in a single pass using the yaml.v3 Node API. +func parseYAMLDocument(content []byte) (map[string]any, []string, error) { + var node yamlv3.Node + + if err := yamlv3.Unmarshal(content, &node); err != nil { + return nil, nil, err + } + + if len(node.Content) == 0 { + return nil, nil, nil + } + + var doc map[string]any + + if err := node.Decode(&doc); err != nil { + return nil, nil, err + } + + return doc, jobOrderFromNode(node.Content[0]), nil +} + +// jobOrderFromNode extracts job names in source order from a yaml.v3 mapping +// node. It relies on the Node API's preservation of mapping key order, which +// is not available when unmarshaling directly into map[string]any. +func jobOrderFromNode(root *yamlv3.Node) []string { + if root.Kind != yamlv3.MappingNode { + return nil + } + + for i := 0; i+1 < len(root.Content); i += 2 { + if root.Content[i].Value == "jobs" { + jobs := root.Content[i+1] + + if jobs.Kind != yamlv3.MappingNode { + return nil + } + + keys := make([]string, 0, len(jobs.Content)/2) + + for j := 0; j+1 < len(jobs.Content); j += 2 { + if key := jobs.Content[j].Value; key != "" { + keys = append(keys, key) + } + } + + return keys + } + } + + return nil +} +``` + +**Key insight**: After `yamlv3.Unmarshal(content, &node)`, `node` is a DocumentNode and `node.Content[0]` is the root MappingNode. `node.Decode(&doc)` re-uses the already-resident node tree — it does not re-parse the input bytes. The CLAUDE.md rule "do not unmarshal YAML twice" is satisfied. + +### Pattern + +``` +yaml.v3.Node ──── node.Decode(&typedMap) → map[string]any (values, for conversion) + └─── walk node.Content → []string keys (order, for output order) +``` + +### Related structural changes + +**Pass job order as an explicit parameter** — do not store it on `YAMLDocument`. Order is a parse-time artifact of the source bytes, not a semantic property of the document: + +```go +// Before: field on struct, set externally after construction +type YAMLDocument struct { + // ... + JobOrder []string +} + +// After: explicit parameter at the call site +func workflowToHCL(doc ghworkflow.YAMLDocument, filename string, jobOrder []string) ([]byte, error) +``` + +**Distinguish presence vs type errors in `buildWorkflowJobIndex`**: + +```go +raw, exists := jobs[jobName] +if !exists { + return nil, nil, nil, fmt.Errorf("job %q listed in order but not found in jobs map", jobName) +} +jobMap, ok := toStringAnyMap(raw) +if !ok { + return nil, nil, nil, fmt.Errorf("job %q must be an object", jobName) +} +``` + +## What Didn't Work + +Adding a standalone `yamlJobOrder(content []byte) []string` function that called `yamlv3.Unmarshal` separately — after `parseYAMLDocument` had already called `goccy/go-yaml`. Two parsers, same bytes, different internal models. Violated the CLAUDE.md pitfall "do not unmarshal YAML twice" and introduced a latent correctness risk if the two parsers diverged on anchor expansion or merge keys. + +## Prevention + +### Detecting a re-introduced double parse + +Search for any function that calls both `yaml.Unmarshal` and `node.Decode` (or two separate `Unmarshal` calls) on the same input bytes: + +```bash +grep -n "Unmarshal\|\.Decode" provider/github/unparse_workflow.go +``` + +A PR that adds a new `yaml.Unmarshal` call in the unparse path should be flagged unless there is an explicit comment explaining why a second parse is unavoidable. + +### When adding a new "get key order from YAML" requirement + +1. Is there already a `yaml.v3.Node` in scope? Walk `node.Content` directly — no new parse. +2. If not, introduce `yaml.v3.Node` as the single parse entry point; get typed data via `node.Decode`. +3. Never add a second `yaml.Unmarshal` just to extract key order. + +### Test strategies + +**Golden roundtrip with non-alphabetical job order** — most direct regression catch: + +- Fixture: a workflow with jobs declared in deliberate reverse-alphabetical order (`deploy`, `build`, `lint`). +- Assert: HCL output preserves `deploy → build → lint`, not sorted `build → deploy → lint`. +- Assert: YAML→HCL→YAML roundtrip produces identical job order. + +**Unit test for `jobOrderFromNode`**: + +```go +func TestJobOrderFromNode(t *testing.T) { + yaml := "jobs:\n deploy:\n build:\n lint:\n" + var node yamlv3.Node + yamlv3.Unmarshal([]byte(yaml), &node) + got := jobOrderFromNode(node.Content[0]) + want := []string{"deploy", "build", "lint"} + // assert got == want +} +``` + +### CLAUDE.md rule + +> **Pitfalls**: do not unmarshal YAML twice + +## Related Documentation + +- [`docs/solutions/logic-errors/nondeterministic-map-iteration.md`](nondeterministic-map-iteration.md) — Go map iteration as a source of unstable output; canonical fix using `sortedKeys` +- [`docs/solutions/patterns/critical-patterns.md`](../patterns/critical-patterns.md) — Mandated patterns including the single-unmarshal rule +- [`docs/solutions/runtime-errors/unicode-emoji-zwj-escape-roundtrip.md`](../runtime-errors/unicode-emoji-zwj-escape-roundtrip.md) — Another roundtrip stability issue: ZWJ emoji escaping +- [`docs/solutions/logic-errors/github-strict-schema-parse-unparse-parity-unknown-rejection-stable-mapping.md`](github-strict-schema-parse-unparse-parity-unknown-rejection-stable-mapping.md) — Parse/unparse schema parity +- [`docs/solutions/developer-experience/yaml-string-quoting-rules.md`](../developer-experience/yaml-string-quoting-rules.md) — When yaml.v3 Node API output must use double-quoted style diff --git a/provider/github/benchmark_test.go b/provider/github/benchmark_test.go index 32631f6..b773e8a 100644 --- a/provider/github/benchmark_test.go +++ b/provider/github/benchmark_test.go @@ -14,7 +14,7 @@ import ( func mustParseYAMLDoc(b *testing.B, content []byte) map[string]any { b.Helper() - doc, err := parseYAMLDocument(content) + doc, _, err := parseYAMLDocument(content) if err != nil { b.Fatal(err) } @@ -116,16 +116,7 @@ func BenchmarkUnparseWorkflowInMemory(b *testing.B) { b.ResetTimer() for b.Loop() { - doc, err := classifyWorkflowDocument(mustParseYAMLDoc(b, content)) - if err != nil { - b.Fatal(err) - } - - if doc == nil { - b.Fatal("expected workflow document") - } - - if _, err := workflowToHCL(*doc, "workflow_call"); err != nil { + if _, err := unparseYAMLFile(content, "workflow_call"); err != nil { b.Fatal(err) } } @@ -151,7 +142,7 @@ func BenchmarkWorkflowToHCLInMemory(b *testing.B) { b.ResetTimer() for b.Loop() { - if _, err := workflowToHCL(*doc, "workflow_call"); err != nil { + if _, err := workflowToHCL(*doc, "workflow_call", nil); err != nil { b.Fatal(err) } } diff --git a/provider/github/github.go b/provider/github/github.go index 061680d..df0e264 100644 --- a/provider/github/github.go +++ b/provider/github/github.go @@ -195,7 +195,7 @@ func (p *GitHub) Unparse(opts provider.ProviderOps) error { // the document is a workflow, action, or step-only file. Returns nil if // the document is empty or unrecognized. func unparseYAMLFile(yamlBytes []byte, baseName string) ([]byte, error) { - doc, err := parseYAMLDocument(yamlBytes) + doc, jobOrder, err := parseYAMLDocument(yamlBytes) if err != nil { return nil, err } @@ -206,7 +206,7 @@ func unparseYAMLFile(yamlBytes []byte, baseName string) ([]byte, error) { } if workflowDoc != nil { - return workflowToHCL(*workflowDoc, baseName) + return workflowToHCL(*workflowDoc, baseName, jobOrder) } if actionDoc := classifyActionDocument(doc); actionDoc != nil { diff --git a/provider/github/unparse_emit.go b/provider/github/unparse_emit.go index 4ccbf54..42ba545 100644 --- a/provider/github/unparse_emit.go +++ b/provider/github/unparse_emit.go @@ -17,14 +17,23 @@ type workflowJobEntry struct { Body map[string]any } -func buildWorkflowJobIndex(jobs map[string]any) ([]workflowJobEntry, []string, map[string]string, error) { - jobNames := sortedKeys(jobs) +func buildWorkflowJobIndex(jobs map[string]any, order []string) ([]workflowJobEntry, []string, map[string]string, error) { + jobNames := order + if len(jobNames) == 0 { + jobNames = sortedKeys(jobs) + } entries := make([]workflowJobEntry, 0, len(jobs)) jobRefs := make([]string, 0, len(jobs)) jobIDMap := make(map[string]string, len(jobs)) for _, jobName := range jobNames { - jobMap, ok := toStringAnyMap(jobs[jobName]) + raw, exists := jobs[jobName] + + if !exists { + return nil, nil, nil, fmt.Errorf("job '%s' listed in order but not found in jobs map", jobName) + } + + jobMap, ok := toStringAnyMap(raw) if !ok { return nil, nil, nil, fmt.Errorf("job '%s' must be an object", jobName) @@ -43,6 +52,12 @@ func buildWorkflowJobIndex(jobs map[string]any) ([]workflowJobEntry, []string, m jobIDMap[jobName] = jobID } + for jobName := range jobs { + if _, covered := jobIDMap[jobName]; !covered { + return nil, nil, nil, fmt.Errorf("job '%s' is defined but was not included in the job order", jobName) + } + } + return entries, jobRefs, jobIDMap, nil } diff --git a/provider/github/unparse_workflow.go b/provider/github/unparse_workflow.go index d94c20b..250d079 100644 --- a/provider/github/unparse_workflow.go +++ b/provider/github/unparse_workflow.go @@ -10,23 +10,66 @@ import ( "strconv" "unicode/utf8" - "github.com/goccy/go-yaml" "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/yldio/cinzel/internal/maputil" "github.com/yldio/cinzel/internal/naming" "github.com/yldio/cinzel/provider/github/step" ghworkflow "github.com/yldio/cinzel/provider/github/workflow" + yamlv3 "gopkg.in/yaml.v3" ) -func parseYAMLDocument(content []byte) (map[string]any, error) { +// parseYAMLDocument parses YAML content into a document map and extracts +// job names in source order in a single pass using the yaml.v3 Node API. +func parseYAMLDocument(content []byte) (map[string]any, []string, error) { + var node yamlv3.Node + + if err := yamlv3.Unmarshal(content, &node); err != nil { + return nil, nil, err + } + + if len(node.Content) == 0 { + return nil, nil, nil + } + var doc map[string]any - if err := yaml.Unmarshal(content, &doc); err != nil { - return nil, err + if err := node.Decode(&doc); err != nil { + return nil, nil, err + } + + return doc, jobOrderFromNode(node.Content[0]), nil +} + +// jobOrderFromNode extracts job names in source order from a yaml.v3 mapping +// node. It relies on the Node API's preservation of mapping key order, which +// is not available when unmarshaling directly into map[string]any. +func jobOrderFromNode(root *yamlv3.Node) []string { + if root.Kind != yamlv3.MappingNode { + return nil } - return doc, nil + for i := 0; i+1 < len(root.Content); i += 2 { + if root.Content[i].Value == "jobs" { + jobs := root.Content[i+1] + + if jobs.Kind != yamlv3.MappingNode { + return nil + } + + keys := make([]string, 0, len(jobs.Content)/2) + + for j := 0; j+1 < len(jobs.Content); j += 2 { + if key := jobs.Content[j].Value; key != "" { + keys = append(keys, key) + } + } + + return keys + } + } + + return nil } func classifyWorkflowDocument(doc map[string]any) (*ghworkflow.YAMLDocument, error) { @@ -46,7 +89,7 @@ func classifyWorkflowDocument(doc map[string]any) (*ghworkflow.YAMLDocument, err return nil, nil } -func workflowToHCL(doc ghworkflow.YAMLDocument, filename string) ([]byte, error) { +func workflowToHCL(doc ghworkflow.YAMLDocument, filename string, jobOrder []string) ([]byte, error) { if err := validateWorkflowYAMLDoc(doc); err != nil { return nil, err } @@ -58,7 +101,7 @@ func workflowToHCL(doc ghworkflow.YAMLDocument, filename string) ([]byte, error) return nil, errors.New("workflow must define at least one job in 'jobs'") } - jobEntries, jobRefs, jobIDMap, err := buildWorkflowJobIndex(doc.Jobs) + jobEntries, jobRefs, jobIDMap, err := buildWorkflowJobIndex(doc.Jobs, jobOrder) if err != nil { return nil, err } diff --git a/todos/001-complete-p1-double-yaml-parse-violates-claude-md.md b/todos/001-complete-p1-double-yaml-parse-violates-claude-md.md new file mode 100644 index 0000000..5bca4bb --- /dev/null +++ b/todos/001-complete-p1-double-yaml-parse-violates-claude-md.md @@ -0,0 +1,67 @@ +--- +status: pending +priority: p1 +issue_id: "001" +tags: [code-review, architecture, quality] +dependencies: [] +--- + +# Double YAML parse violates CLAUDE.md rule + +## Problem Statement + +`unparseYAMLFile` parses `yamlBytes` twice: once via `parseYAMLDocument` (using `goccy/go-yaml`) and again inside `yamlJobOrder` (using `gopkg.in/yaml.v3` Node API). This directly violates the `CLAUDE.md` pitfall: + +> do not unmarshal YAML twice + +In addition to the style violation, two parsers operating on the same bytes can produce inconsistent results (differing YAML 1.1 vs 1.2 semantics, anchor expansion, merge keys), meaning `JobOrder` could reference names that do not correspond to keys in the `Jobs` map. + +## Findings + +- `provider/github/github.go` → `unparseYAMLFile` calls `parseYAMLDocument(yamlBytes)` then `yamlJobOrder(yamlBytes)` +- `provider/github/unparse_workflow.go:26` → `yamlJobOrder` does a full `yamlv3.Unmarshal` of raw bytes +- CLAUDE.md (Pitfalls section) explicitly forbids double unmarshal + +## Proposed Solutions + +### Option A — Extract order from `yaml.v3` Node API in a single pass (Recommended) + +Replace the first parse in `unparseYAMLFile` with a `yaml.v3` node walk that extracts both `map[string]any` data and job order simultaneously. Remove `yamlJobOrder` entirely. + +- **Pros:** single parse, no rule violation, eliminates consistency risk +- **Cons:** requires refactoring `parseYAMLDocument` to use `yaml.v3`; must verify that `yaml.v3` typed decode matches the behavior of `goccy/go-yaml` for all edge cases + +### Option B — Accept the double parse but isolate it (Minimal) + +Keep `yamlJobOrder` but make it a two-return-value variant of `parseYAMLDocument` so the two parses are collocated and documented as an intentional exception. Add a code comment explaining why `yaml.v3` is needed for order and `goccy/go-yaml` for validation. Update CLAUDE.md pitfall note to reflect the exception. + +- **Pros:** minimal code change, explicit and documented tradeoff +- **Cons:** still violates the rule; the inconsistency risk remains + +### Option C — Use `yaml.v3` only (Replace goccy dependency in unparse path) + +Since `goccy/go-yaml` strict decode is primarily needed for HCL→YAML parse direction, evaluate whether the unparse path actually requires it. If unparse only uses `map[string]any`, switch unparse to `yaml.v3` entirely. + +- **Pros:** removes dual-library dependency, single parse +- **Cons:** larger surface change; need to confirm goccy is not needed in unparse path + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected files: `provider/github/github.go`, `provider/github/unparse_workflow.go` +- CLAUDE.md pitfall: "do not unmarshal YAML twice" +- Both parsers agree in practice for typical workflow files, but correctness guarantee is missing + +## Acceptance Criteria + +- [ ] `yamlBytes` is parsed at most once per call to `unparseYAMLFile` +- [ ] Job order is still preserved correctly after the fix +- [ ] All existing tests pass +- [ ] Roundtrip stability tests pass + +## Work Log + +- 2026-03-31: Finding created during code review of job order preservation feature diff --git a/todos/002-complete-p2-order-job-not-in-map-misleading-error.md b/todos/002-complete-p2-order-job-not-in-map-misleading-error.md new file mode 100644 index 0000000..c25d488 --- /dev/null +++ b/todos/002-complete-p2-order-job-not-in-map-misleading-error.md @@ -0,0 +1,68 @@ +--- +status: pending +priority: p2 +issue_id: "002" +tags: [code-review, quality, error-handling] +dependencies: [001] +--- + +# Job in `order` but absent from `jobs` map gives misleading error + +## Problem Statement + +In `buildWorkflowJobIndex`, when `order` is non-empty it is used verbatim as the iteration list. If `yamlJobOrder` returns a name that is not a key in the `jobs` map (due to parser disagreement, anchor expansion, or merge-key edge cases), `toStringAnyMap(jobs[jobName])` returns `(nil, false)` and the caller gets: + +``` +job 'x' must be an object +``` + +This is misleading — the actual problem is that the job does not exist in the parsed map, not that it has the wrong type. + +## Findings + +- `provider/github/unparse_emit.go:30-34` +- `jobs[jobName]` returns `nil` (zero value of `any`) when key is absent +- `toStringAnyMap(nil)` returns `(nil, false)` — same as a wrongly-typed value +- Error message `"job 'x' must be an object"` conflates two distinct failure modes + +## Proposed Solutions + +### Option A — Pre-check presence before type assertion (Recommended) + +```go +raw, exists := jobs[jobName] +if !exists { + return nil, nil, nil, fmt.Errorf("job '%s' listed in order but not found in jobs map", jobName) +} +jobMap, ok := toStringAnyMap(raw) +if !ok { + return nil, nil, nil, fmt.Errorf("job '%s' must be an object", jobName) +} +``` + +- **Pros:** distinct, actionable error messages; minimal change +- **Cons:** none + +### Option B — Silently skip missing jobs with a warning log + +- **Pros:** more lenient +- **Cons:** produces silently incomplete output; not consistent with project error philosophy + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected file: `provider/github/unparse_emit.go` +- Function: `buildWorkflowJobIndex` + +## Acceptance Criteria + +- [ ] When a job name from `order` is absent in `jobs`, error message clearly states it was not found +- [ ] When a job name is present but wrong type, original "must be an object" error is preserved +- [ ] Existing tests pass + +## Work Log + +- 2026-03-31: Finding created during code review of job order preservation feature diff --git a/todos/003-complete-p2-jobs-in-map-not-in-order-silently-dropped.md b/todos/003-complete-p2-jobs-in-map-not-in-order-silently-dropped.md new file mode 100644 index 0000000..b482343 --- /dev/null +++ b/todos/003-complete-p2-jobs-in-map-not-in-order-silently-dropped.md @@ -0,0 +1,68 @@ +--- +status: pending +priority: p2 +issue_id: "003" +tags: [code-review, correctness, quality] +dependencies: [001] +--- + +# Jobs in `jobs` map but absent from `order` are silently dropped + +## Problem Statement + +In `buildWorkflowJobIndex`, when `order` is non-empty, only the jobs named in `order` are included in `entries`. If `yamlJobOrder` returns a subset of job names (parser inconsistency, partial parse, future anchor expansion adding synthetic keys), the remaining jobs are silently omitted from the HCL output. No error is raised and no warning is emitted. The user gets a silently incomplete conversion. + +## Findings + +- `provider/github/unparse_emit.go:21-47` +- `jobNames := order` — if `order` is shorter than `jobs`, extra jobs are dropped +- No check `len(entries) == len(jobs)` after the loop +- Silent data loss is especially dangerous in a YAML→HCL converter where the output may be used as the authoritative source + +## Proposed Solutions + +### Option A — Validate completeness after the loop (Recommended) + +After the loop, check that all jobs in the map were covered: + +```go +for jobName := range jobs { + if _, seen := jobIDMap[jobName]; !seen { + return nil, nil, nil, fmt.Errorf("job '%s' is in the workflow but was not included in the job order", jobName) + } +} +``` + +- **Pros:** surfaces inconsistency immediately; no silent data loss +- **Cons:** fails rather than falling back — appropriate for a converter tool + +### Option B — Append uncovered jobs at the end + +For jobs in `jobs` but not in `order`, append them after the ordered ones using `sortedKeys` for determinism. + +- **Pros:** no data loss; graceful degradation +- **Cons:** output order may not match source; unclear contract + +### Option C — Log a warning and continue + +- **Pros:** lenient +- **Cons:** silent in non-verbose mode; data loss still occurs + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected file: `provider/github/unparse_emit.go` +- Function: `buildWorkflowJobIndex` + +## Acceptance Criteria + +- [ ] If `order` is non-empty and a job in `jobs` is not in `order`, the function returns an error or appends the job (per chosen option) +- [ ] No silent data loss in HCL output +- [ ] Existing tests pass + +## Work Log + +- 2026-03-31: Finding created during code review of job order preservation feature diff --git a/todos/004-complete-p2-joborder-responsibility-split-across-packages.md b/todos/004-complete-p2-joborder-responsibility-split-across-packages.md new file mode 100644 index 0000000..37cca0c --- /dev/null +++ b/todos/004-complete-p2-joborder-responsibility-split-across-packages.md @@ -0,0 +1,77 @@ +--- +status: pending +priority: p2 +issue_id: "004" +tags: [code-review, architecture, quality] +dependencies: [001] +--- + +# `JobOrder` responsibility is split: field on struct but set at call site + +## Problem Statement + +`YAMLDocument.JobOrder` is a field on the struct in `provider/github/workflow/yaml_document.go`, but `NewYAMLDocument` never sets it. It is set at the call site in `unparseYAMLFile` after the fact: + +```go +workflowDoc.JobOrder = yamlJobOrder(yamlBytes) +``` + +This means: +1. Any other caller of `NewYAMLDocument` will silently get `JobOrder == nil` +2. The struct is partially constructed by its constructor — violating encapsulation +3. The contract is not enforced or documented + +## Findings + +- `provider/github/workflow/yaml_document.go:37` — `NewYAMLDocument` returns `YAMLDocument` without `JobOrder` +- `provider/github/github.go:209` — `workflowDoc.JobOrder = yamlJobOrder(yamlBytes)` set externally +- `provider/github/unparse_workflow.go:87` — `workflowToHCL` reads `doc.JobOrder` expecting it to be set + +## Proposed Solutions + +### Option A — Pass order as parameter to `workflowToHCL` instead of a struct field (Recommended) + +Remove `JobOrder` from `YAMLDocument` entirely. Change `workflowToHCL` signature: + +```go +func workflowToHCL(doc ghworkflow.YAMLDocument, filename string, jobOrder []string) ([]byte, error) +``` + +At the call site: +```go +return workflowToHCL(*workflowDoc, baseName, yamlJobOrder(yamlBytes)) +``` + +- **Pros:** explicit data flow; `YAMLDocument` is a pure data struct; no surprise nil fields +- **Cons:** `workflowToHCL` signature changes (internal function, no external callers) + +### Option B — Accept `order []string` in `NewYAMLDocument` + +Thread the raw bytes or pre-computed order into `NewYAMLDocument`. + +- **Pros:** constructor is complete +- **Cons:** `NewYAMLDocument` is in a different package and takes `mapper func` — adding a raw bytes param is awkward + +### Option C — Document the two-step construction with a comment + +Keep current code, add a comment on the field explaining it must be set by the caller. + +- **Pros:** minimal change +- **Cons:** invisible contract; easy to violate silently + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected files: `provider/github/workflow/yaml_document.go`, `provider/github/github.go`, `provider/github/unparse_workflow.go` + +## Acceptance Criteria + +- [ ] `YAMLDocument` is fully constructed in one place, or `JobOrder` is not a struct field +- [ ] No caller can inadvertently get `JobOrder == nil` when order was available + +## Work Log + +- 2026-03-31: Finding created during code review of job order preservation feature diff --git a/todos/005-complete-p3-benchmark-gap-yamlJobOrder.md b/todos/005-complete-p3-benchmark-gap-yamlJobOrder.md new file mode 100644 index 0000000..895f6a4 --- /dev/null +++ b/todos/005-complete-p3-benchmark-gap-yamlJobOrder.md @@ -0,0 +1,32 @@ +--- +status: pending +priority: p3 +issue_id: "005" +tags: [code-review, testing, performance] +dependencies: [] +--- + +# `BenchmarkUnparseWorkflowInMemory` does not cover `yamlJobOrder` + +## Problem Statement + +The existing `BenchmarkUnparseWorkflowInMemory` calls `workflowToHCL` directly, bypassing `unparseYAMLFile` and therefore never invoking `yamlJobOrder`. The double-parse cost has zero benchmark coverage. If bulk unparse (e.g., AI assist processing dozens of workflow files) ever becomes a concern, there will be no baseline to detect regression. + +## Proposed Solutions + +### Option A — Extend existing benchmark to call `unparseYAMLFile` (Recommended) + +Replace the direct `workflowToHCL` call in the benchmark with `unparseYAMLFile`, which exercises the full path including `yamlJobOrder`. + +### Option B — Add a dedicated `BenchmarkYAMLJobOrder` micro-benchmark + +Measures only the `yamlJobOrder` function to isolate the second-parse cost. + +## Acceptance Criteria + +- [ ] Benchmark covers the `yamlJobOrder` code path +- [ ] Baseline numbers are documented + +## Work Log + +- 2026-03-31: Finding created during code review of job order preservation feature diff --git a/todos/006-pending-p1-yaml-bomb-no-input-size-limit.md b/todos/006-pending-p1-yaml-bomb-no-input-size-limit.md new file mode 100644 index 0000000..592346d --- /dev/null +++ b/todos/006-pending-p1-yaml-bomb-no-input-size-limit.md @@ -0,0 +1,65 @@ +--- +status: pending +priority: p1 +issue_id: "006" +tags: [code-review, security, performance] +dependencies: [] +--- + +# No input size limit before YAML alias expansion (billion laughs DoS) + +## Problem Statement + +`parseYAMLDocument` calls `yamlv3.Unmarshal(content, &node)` with no upper bound on input size or alias depth. `gopkg.in/yaml.v3` expands YAML aliases eagerly during unmarshal. A crafted input using deeply nested anchors (e.g., billion-laughs pattern) causes exponential memory allocation before any application check runs. + +While `cinzel` is a CLI tool operating on local files, two attack surfaces exist: +1. `cinzel assist` feeds LLM-generated YAML through the unparse path — the LLM output is untrusted +2. A repository CI workflow could trigger `cinzel unparse` against a YAML file controlled by a third party + +## Findings + +- `provider/github/unparse_workflow.go` — `parseYAMLDocument` calls `yamlv3.Unmarshal` with no size check +- No max-byte guard at any call site in `Unparse` or `unparseYAMLFile` +- `node.Decode(&doc)` after unmarshal expands the node tree into `map[string]any`, compounding allocation + +## Proposed Solutions + +### Option A — Byte-length guard at `parseYAMLDocument` call site (Recommended) + +Add a length check in `parseYAMLDocument` before the unmarshal: + +```go +const maxYAMLBytes = 1 << 20 // 1 MB +if len(content) > maxYAMLBytes { + return nil, nil, fmt.Errorf("YAML input exceeds maximum size of %d bytes", maxYAMLBytes) +} +``` + +- **Pros:** simple, fast, zero false positives for real workflow files (largest real workflow is <100KB) +- **Cons:** arbitrary limit; must be documented + +### Option B — Cap alias expansion with a wrapper decoder + +No built-in alias limit in yaml.v3. Would require forking or wrapping. + +- **Pros:** more precise +- **Cons:** significant implementation cost; yaml.v3 is not easily intercepted + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected file: `provider/github/unparse_workflow.go`, function `parseYAMLDocument` +- Related path: `provider/github/github.go` → `Unparse` → `unparseYAMLFile` + +## Acceptance Criteria + +- [ ] Input larger than a defined maximum is rejected with a clear error before `yamlv3.Unmarshal` +- [ ] Limit is documented +- [ ] Existing tests pass + +## Work Log + +- 2026-03-31: Finding created during code review of single-pass yaml.v3 parse refactor diff --git a/todos/007-pending-p2-job-names-in-errors-need-quoting.md b/todos/007-pending-p2-job-names-in-errors-need-quoting.md new file mode 100644 index 0000000..541cd10 --- /dev/null +++ b/todos/007-pending-p2-job-names-in-errors-need-quoting.md @@ -0,0 +1,62 @@ +--- +status: pending +priority: p2 +issue_id: "007" +tags: [code-review, security, quality] +dependencies: [] +--- + +# Job names in error messages use `%s` — terminal injection via ANSI escape sequences + +## Problem Statement + +Three error messages in `buildWorkflowJobIndex` interpolate raw YAML job names using `%s`: + +```go +fmt.Errorf("job '%s' listed in order but not found in jobs map", jobName) +fmt.Errorf("job '%s' must be an object", jobName) +fmt.Errorf("job '%s' is defined but was not included in the job order", jobName) +``` + +A job name containing ANSI escape sequences (e.g., `\x1b[31m`) or other control characters will be passed verbatim to the terminal when `cinzel` prints the error. This enables terminal injection for any user running `cinzel unparse` against a YAML file with a crafted job name. + +## Findings + +- `provider/github/unparse_emit.go:33,39,57` +- Job names come from YAML input and are not sanitized before error formatting + +## Proposed Solutions + +### Option A — Use `%q` instead of `%s` in all three format strings (Recommended) + +```go +fmt.Errorf("job %q listed in order but not found in jobs map", jobName) +fmt.Errorf("job %q must be an object", jobName) +fmt.Errorf("job %q is defined but was not included in the job order", jobName) +``` + +`%q` uses Go's `strconv.Quote` semantics: control characters and escape sequences are hex-escaped, producing safe printable output. + +- **Pros:** one-line fix; `%q` is already used in other error messages in this codebase +- **Cons:** changes error message format (quotes shift from `'` to `"`) — check test fixtures for exact string matches + +### Option B — `strconv.Quote(jobName)` in format string + +Equivalent to `%q` but more explicit. + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected file: `provider/github/unparse_emit.go`, function `buildWorkflowJobIndex` + +## Acceptance Criteria + +- [ ] All three error messages use `%q` or equivalent safe quoting +- [ ] Tests that assert on the exact error message text are updated + +## Work Log + +- 2026-03-31: Finding created during code review diff --git a/todos/008-pending-p2-completeness-check-is-yagni.md b/todos/008-pending-p2-completeness-check-is-yagni.md new file mode 100644 index 0000000..4bcaf9d --- /dev/null +++ b/todos/008-pending-p2-completeness-check-is-yagni.md @@ -0,0 +1,64 @@ +--- +status: pending +priority: p2 +issue_id: "008" +tags: [code-review, quality, architecture] +dependencies: [] +--- + +# Completeness check in `buildWorkflowJobIndex` defends unreachable state (YAGNI) + +## Problem Statement + +The new loop at the end of `buildWorkflowJobIndex`: + +```go +for jobName := range jobs { + if _, covered := jobIDMap[jobName]; !covered { + return nil, nil, nil, fmt.Errorf("job '%s' is defined but was not included in the job order", jobName) + } +} +``` + +Both `jobs` (from `doc.Jobs`) and `order` (from `jobOrderFromNode`) are derived from the **same** `yaml.v3.Node` tree in the same parse pass. They cannot diverge. The check defends against a state that is structurally impossible given the current call graph, adds dead code, and will produce a confusing error message if `jobOrderFromNode` has a bug (the message implies a caller contract violation, not an internal bug). + +**Note:** This concern does not apply to the fallback path where `order` is `nil` and `jobNames = sortedKeys(jobs)` is used — in that case there is no order/map divergence to check anyway. + +## Findings + +- `provider/github/unparse_emit.go:52-58` +- Both `order` and `jobs` originate from `parseYAMLDocument` in the same yaml.v3 node tree +- The check fires only when `order` is non-empty, which is exactly the case where divergence is impossible + +## Proposed Solutions + +### Option A — Remove the completeness loop (Recommended) + +Delete lines 52–58 of `unparse_emit.go`. If `jobOrderFromNode` produces wrong output, the existing test suite will catch it. + +- **Pros:** -6 LOC; removes dead code; function intent is clearer +- **Cons:** slightly less defensive — an acceptable tradeoff given the invariant is provable + +### Option B — Keep the loop but add a comment explaining the invariant + +Add: `// Invariant: order and jobs derive from the same yaml.v3 node; this check catches bugs in jobOrderFromNode.` + +- **Pros:** explicit invariant documentation +- **Cons:** dead code still present; misleading error message + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected file: `provider/github/unparse_emit.go`, `buildWorkflowJobIndex` + +## Acceptance Criteria + +- [ ] Completeness loop is removed OR clearly documented as a self-invariant check +- [ ] All existing tests pass after removal + +## Work Log + +- 2026-03-31: Finding created during code review diff --git a/todos/009-pending-p2-no-regression-test-yaml-v3-vs-goccy.md b/todos/009-pending-p2-no-regression-test-yaml-v3-vs-goccy.md new file mode 100644 index 0000000..3916738 --- /dev/null +++ b/todos/009-pending-p2-no-regression-test-yaml-v3-vs-goccy.md @@ -0,0 +1,69 @@ +--- +status: pending +priority: p2 +issue_id: "009" +tags: [code-review, testing, quality] +dependencies: [] +--- + +# No targeted regression test for yaml.v3 vs goccy/go-yaml decode behavioral differences + +## Problem Statement + +`parseYAMLDocument` switched from `goccy/go-yaml` (strict, YAML 1.2) to `yaml.v3`'s `node.Decode` for mapping the YAML document into `map[string]any`. The golden roundtrip tests cover happy-path workflow files but not the edge cases where the two libraries can produce different results: + +- `null` / `~` values in maps +- Boolean-like strings (`on`, `off`, `yes`, `no`, `true`, `false`) as map values +- Integer-keyed maps (uncommon but valid YAML) +- Merge keys (`<<`) in job definitions +- Anchors/aliases (`&foo`, `*foo`) in the jobs mapping + +If the new decode behavior silently changes any of these, the golden tests will not catch it because those edge cases are not present in the fixtures. + +## Findings + +- `provider/github/unparse_workflow.go:68` — `node.Decode(&doc)` replaces `yaml.Unmarshal` from `goccy/go-yaml` +- No test in `provider/github/` exercises YAML edge cases at the decode boundary +- Most relevant CLAUDE.md doc: "use strict typed decode (goccy/go-yaml)" — the switch to `yaml.v3` for the unparse decode path should be explicitly validated + +## Proposed Solutions + +### Option A — Add a unit test for `parseYAMLDocument` with edge-case YAML inputs (Recommended) + +```go +func TestParseYAMLDocumentEdgeCases(t *testing.T) { + cases := []struct{ name, yaml string; wantKey string; wantVal any }{ + {"null value", "jobs:\n build:\n timeout: ~\n", "timeout", nil}, + {"bool-like string on", "on:\n push:\n", "on", ...}, + // etc. + } +} +``` + +- **Pros:** explicit contract; catches future regressions +- **Cons:** need to know exact `yaml.v3` decode semantics for each case + +### Option B — Add edge-case YAML fixtures to `testdata/matrix/unparse/` + +Extend the matrix unparse test infrastructure with YAML files exercising the above edge cases, paired with expected HCL golden output. + +- **Pros:** end-to-end; catches decode AND HCL emit differences +- **Cons:** more fixture overhead + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected file: `provider/github/unparse_workflow.go`, `parseYAMLDocument` +- Decoder switched from `github.com/goccy/go-yaml` to `gopkg.in/yaml.v3` for the `map[string]any` decode step + +## Acceptance Criteria + +- [ ] At least one test exercises `parseYAMLDocument` with a YAML `null` value, a boolean-like string, and a merge key +- [ ] Tests pass and document the expected behavior + +## Work Log + +- 2026-03-31: Finding created during code review diff --git a/todos/010-pending-p3-job-order-empty-key-guard-comment.md b/todos/010-pending-p3-job-order-empty-key-guard-comment.md new file mode 100644 index 0000000..e4cba8a --- /dev/null +++ b/todos/010-pending-p3-job-order-empty-key-guard-comment.md @@ -0,0 +1,66 @@ +--- +status: pending +priority: p3 +issue_id: "010" +tags: [code-review, quality] +dependencies: [] +--- + +# `jobOrderFromNode` empty-key guard is undocumented; its intent is unclear + +## Problem Statement + +```go +for j := 0; j+1 < len(jobs.Content); j += 2 { + if key := jobs.Content[j].Value; key != "" { + keys = append(keys, key) + } +} +``` + +The `key != ""` guard silently skips job key nodes with an empty `.Value`. There are two interpretations: + +1. **It's defensive against malformed YAML** — a key node with empty `.Value` is invalid and should be skipped +2. **It's guarding against alias nodes in key position** — in yaml.v3, alias nodes (`*ref`) in mapping key position have `.Kind == AliasNode` and `.Value == ""`. Skipping them prevents a blank string from entering the order slice. + +Without a comment, future maintainers can't tell which it is. The simplicity reviewer argues the guard is unreachable; the security reviewer argues it protects against a real edge case. Both are partially right — the guard is reachable for alias-keyed mappings but the resulting behavior (silently drop then hit the completeness check with a confusing error) is not good. + +## Proposed Solutions + +### Option A — Add a comment explaining the alias-key edge case + +```go +for j := 0; j+1 < len(jobs.Content); j += 2 { + // Skip alias nodes used as keys (Kind == AliasNode, Value == ""). + // These are unusual in practice but valid YAML. + if key := jobs.Content[j].Value; key != "" { + keys = append(keys, key) + } +} +``` + +### Option B — Explicitly reject alias keys with an error + +```go +keyNode := jobs.Content[j] +if keyNode.Kind == yamlv3.AliasNode || keyNode.Value == "" { + return nil // or error +} +keys = append(keys, keyNode.Value) +``` + +## Recommended Action + +_To be filled during triage._ + +## Technical Details + +- Affected file: `provider/github/unparse_workflow.go`, `jobOrderFromNode` + +## Acceptance Criteria + +- [ ] The intent of the empty-key guard is clear from a comment or explicit alias check + +## Work Log + +- 2026-03-31: Finding created during code review From b2905fc754d10b0f9ddf0bb0da49a5f6983ce312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Guimar=C3=A3es?= Date: Tue, 31 Mar 2026 22:26:32 +0100 Subject: [PATCH 3/5] fix: add blank line between multiple on blocks in unparse HCL output --- provider/github/snapshot_format_test.go | 81 +++++++++++++------ .../workflow_unparse_multi_on.golden.hcl | 30 +++++++ provider/github/unparse_emit.go | 6 +- 3 files changed, 92 insertions(+), 25 deletions(-) create mode 100644 provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl diff --git a/provider/github/snapshot_format_test.go b/provider/github/snapshot_format_test.go index 318a8b1..cc8e4d9 100644 --- a/provider/github/snapshot_format_test.go +++ b/provider/github/snapshot_format_test.go @@ -13,11 +13,16 @@ import ( ) func TestUnparseFormattingSnapshot(t *testing.T) { - tmpDir := t.TempDir() - inputFile := filepath.Join(tmpDir, "pull-request.yaml") - outputDir := filepath.Join(tmpDir, "out") - - content := `name: Pull Request + tests := []struct { + name string + filename string + content string + goldenFile string + }{ + { + name: "single on block", + filename: "pull-request.yaml", + content: `name: Pull Request on: pull_request: {} jobs: @@ -34,31 +39,59 @@ jobs: - id: checkout name: Checkout uses: actions/checkout@v4 -` - - if err := os.WriteFile(inputFile, []byte(content), 0o644); err != nil { - t.Fatal(err) +`, + goldenFile: "workflow_unparse.golden.hcl", + }, + { + name: "multiple on blocks have blank line between them", + filename: "multi-on.yaml", + content: `name: CI +on: + push: + branches: [main] + pull_request: {} +jobs: + build: + runs-on: ubuntu-latest + steps: + - run: echo hi +`, + goldenFile: "workflow_unparse_multi_on.golden.hcl", + }, } - if err := New().Unparse(provider.ProviderOps{File: inputFile, OutputDirectory: outputDir}); err != nil { - t.Fatal(err) - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + inputFile := filepath.Join(tmpDir, tt.filename) + outputDir := filepath.Join(tmpDir, "out") - gotBytes, err := os.ReadFile(filepath.Join(outputDir, "pull-request.hcl")) - if err != nil { - t.Fatal(err) - } + if err := os.WriteFile(inputFile, []byte(tt.content), 0o644); err != nil { + t.Fatal(err) + } - expectedBytes, err := os.ReadFile(filepath.Join("testdata", "fixtures", "formatting", "workflow_unparse.golden.hcl")) - if err != nil { - t.Fatal(err) - } + if err := New().Unparse(provider.ProviderOps{File: inputFile, OutputDirectory: outputDir}); err != nil { + t.Fatal(err) + } + + base := strings.TrimSuffix(tt.filename, filepath.Ext(tt.filename)) + gotBytes, err := os.ReadFile(filepath.Join(outputDir, base+".hcl")) + if err != nil { + t.Fatal(err) + } + + expectedBytes, err := os.ReadFile(filepath.Join("testdata", "fixtures", "formatting", tt.goldenFile)) + if err != nil { + t.Fatal(err) + } - got := normalizeLineEndings(string(gotBytes)) - expected := normalizeLineEndings(string(expectedBytes)) + got := normalizeLineEndings(string(gotBytes)) + expected := normalizeLineEndings(string(expectedBytes)) - if got != expected { - t.Fatalf("snapshot mismatch\n--- got ---\n%s\n--- expected ---\n%s", got, expected) + if got != expected { + t.Fatalf("snapshot mismatch\n--- got ---\n%s\n--- expected ---\n%s", got, expected) + } + }) } } diff --git a/provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl b/provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl new file mode 100644 index 0000000..9bda360 --- /dev/null +++ b/provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl @@ -0,0 +1,30 @@ +workflow "multi_on" { + filename = "multi-on" + + name = "CI" + + on "pull_request" { + } + + on "push" { + branches = ["main"] + } + + jobs = [ + job.build, + ] +} + +job "build" { + runs_on { + runners = "ubuntu-latest" + } + + steps = [ + step.build_step_1, + ] +} + +step "build_step_1" { + run = "echo hi" +} diff --git a/provider/github/unparse_emit.go b/provider/github/unparse_emit.go index 42ba545..4b36b37 100644 --- a/provider/github/unparse_emit.go +++ b/provider/github/unparse_emit.go @@ -84,7 +84,11 @@ func writeWorkflowMetadata(body *hclwrite.Body, doc ghworkflow.YAMLDocument) err return errors.New("workflow 'on' must be an object") } - for _, eventName := range sortedKeys(events) { + for i, eventName := range sortedKeys(events) { + if i > 0 { + appendSection() + } + eventBlock := body.AppendNewBlock("on", []string{eventName}) if err := writeOnEventBody(eventName, events[eventName], eventBlock.Body()); err != nil { From c3c519fced9e5b04808e123d6832b61e05e6e039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Guimar=C3=A3es?= Date: Wed, 1 Apr 2026 09:22:09 +0100 Subject: [PATCH 4/5] feat: improve step identifiers and deduplicate shared steps during unparse --- .../workflow_unparse_multi_on.golden.hcl | 4 +- ...flow_dispatch_inputs.roundtrip.golden.yaml | 2 +- provider/github/unparse_action.go | 2 +- provider/github/unparse_emit.go | 21 +++-- provider/github/unparse_workflow.go | 85 +++++++++++++++++-- 5 files changed, 97 insertions(+), 17 deletions(-) diff --git a/provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl b/provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl index 9bda360..40a2859 100644 --- a/provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl +++ b/provider/github/testdata/fixtures/formatting/workflow_unparse_multi_on.golden.hcl @@ -21,10 +21,10 @@ job "build" { } steps = [ - step.build_step_1, + step.echo, ] } -step "build_step_1" { +step "echo" { run = "echo hi" } diff --git a/provider/github/testdata/fixtures/matrix/unparse/valid/workflow_dispatch_inputs.roundtrip.golden.yaml b/provider/github/testdata/fixtures/matrix/unparse/valid/workflow_dispatch_inputs.roundtrip.golden.yaml index 9d77942..81b4d71 100644 --- a/provider/github/testdata/fixtures/matrix/unparse/valid/workflow_dispatch_inputs.roundtrip.golden.yaml +++ b/provider/github/testdata/fixtures/matrix/unparse/valid/workflow_dispatch_inputs.roundtrip.golden.yaml @@ -8,5 +8,5 @@ jobs: build: runs-on: ubuntu-latest steps: - - id: build_step_1 + - id: echo run: echo hi diff --git a/provider/github/unparse_action.go b/provider/github/unparse_action.go index cfb138c..2ed7298 100644 --- a/provider/github/unparse_action.go +++ b/provider/github/unparse_action.go @@ -237,7 +237,7 @@ func writeActionSteps(root *hclwrite.Body, raw any) ([]string, error) { return nil, errors.New("action step must be an object") } - stepID := stepIdentifier("action", idx, stepObj, used) + stepID := stepIdentifier(idx, stepObj, used) parsedStep, err := stepFromMap(stepObj) if err != nil { return nil, err diff --git a/provider/github/unparse_emit.go b/provider/github/unparse_emit.go index 4b36b37..97e9fad 100644 --- a/provider/github/unparse_emit.go +++ b/provider/github/unparse_emit.go @@ -113,7 +113,7 @@ func writeWorkflowMetadata(body *hclwrite.Body, doc ghworkflow.YAMLDocument) err return nil } -func writeWorkflowJobs(root *hclwrite.Body, jobs []workflowJobEntry, jobIDMap map[string]string, generatedVariables map[string]any) error { +func writeWorkflowJobs(root *hclwrite.Body, jobs []workflowJobEntry, jobIDMap map[string]string, generatedVariables map[string]any, stepRegistry map[string]string, usedStepIDs map[string]int) error { for _, job := range jobs { jobName := job.Name jobMap := job.Body @@ -125,7 +125,7 @@ func writeWorkflowJobs(root *hclwrite.Body, jobs []workflowJobEntry, jobIDMap ma jobID := jobIDMap[jobName] jobBlock := root.AppendNewBlock("job", []string{jobID}) - if err := writeJobBody(root, jobBlock.Body(), jobID, jobMap, jobIDMap, generatedVariables); err != nil { + if err := writeJobBody(root, jobBlock.Body(), jobID, jobMap, jobIDMap, generatedVariables, stepRegistry, usedStepIDs); err != nil { return fmt.Errorf("error in job '%s': %w", jobName, err) } } @@ -153,10 +153,10 @@ func writeGeneratedVariables(root *hclwrite.Body, generatedVariables map[string] return nil } -func writeJobKey(root *hclwrite.Body, body *hclwrite.Body, jobID string, key string, value any, jobIDMap map[string]string, generatedVariables map[string]any, stepRefs *[]string) error { +func writeJobKey(root *hclwrite.Body, body *hclwrite.Body, jobID string, key string, value any, jobIDMap map[string]string, generatedVariables map[string]any, stepRegistry map[string]string, usedStepIDs map[string]int, stepRefs *[]string) error { switch key { case "steps": - refs, err := writeJobSteps(root, jobID, value) + refs, err := writeJobSteps(root, value, stepRegistry, usedStepIDs) if err != nil { return err } @@ -195,14 +195,13 @@ func writeJobKey(root *hclwrite.Body, body *hclwrite.Body, jobID string, key str } } -func writeJobSteps(root *hclwrite.Body, jobID string, raw any) ([]string, error) { +func writeJobSteps(root *hclwrite.Body, raw any, stepRegistry map[string]string, usedStepIDs map[string]int) ([]string, error) { items, ok := raw.([]any) if !ok { return nil, errors.New("job 'steps' must be a list") } - used := map[string]int{} stepRefs := make([]string, 0, len(items)) for idx, item := range items { @@ -212,7 +211,14 @@ func writeJobSteps(root *hclwrite.Body, jobID string, raw any) ([]string, error) return nil, errors.New("job step must be an object") } - stepID := stepIdentifier(jobID, idx, stepObj, used) + if fp := stepFingerprint(stepObj); fp != "" { + if existingID, exists := stepRegistry[fp]; exists { + stepRefs = append(stepRefs, existingID) + continue + } + } + + stepID := stepIdentifier(idx, stepObj, usedStepIDs) parsedStep, err := stepFromMap(stepObj) if err != nil { return nil, err @@ -224,6 +230,7 @@ func writeJobSteps(root *hclwrite.Body, jobID string, raw any) ([]string, error) return nil, err } + stepRegistry[stepFingerprint(stepObj)] = stepID stepRefs = append(stepRefs, stepID) } diff --git a/provider/github/unparse_workflow.go b/provider/github/unparse_workflow.go index 250d079..973e51d 100644 --- a/provider/github/unparse_workflow.go +++ b/provider/github/unparse_workflow.go @@ -4,10 +4,12 @@ package github import ( + "encoding/json" "errors" "fmt" "regexp" "strconv" + "strings" "unicode/utf8" "github.com/hashicorp/hcl/v2/hclsyntax" @@ -96,6 +98,8 @@ func workflowToHCL(doc ghworkflow.YAMLDocument, filename string, jobOrder []stri f, root, workflowBody := newWorkflowRoot(filename) generatedVariables := map[string]any{} + stepRegistry := map[string]string{} + usedStepIDs := map[string]int{} if len(doc.Jobs) == 0 { return nil, errors.New("workflow must define at least one job in 'jobs'") @@ -118,7 +122,7 @@ func workflowToHCL(doc ghworkflow.YAMLDocument, filename string, jobOrder []stri return nil, err } - if err := writeWorkflowJobs(root, jobEntries, jobIDMap, generatedVariables); err != nil { + if err := writeWorkflowJobs(root, jobEntries, jobIDMap, generatedVariables, stepRegistry, usedStepIDs); err != nil { return nil, err } @@ -151,12 +155,12 @@ func unescapeHCLUnicode(src []byte) []byte { var reHCLUnicodeEscape = regexp.MustCompile(`\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}`) -func writeJobBody(root *hclwrite.Body, jobBody *hclwrite.Body, jobID string, job map[string]any, jobIDMap map[string]string, generatedVariables map[string]any) error { +func writeJobBody(root *hclwrite.Body, jobBody *hclwrite.Body, jobID string, job map[string]any, jobIDMap map[string]string, generatedVariables map[string]any, stepRegistry map[string]string, usedStepIDs map[string]int) error { stepRefs := []string{} for _, key := range sortedKeys(job) { if key == "steps" { - refs, err := writeJobSteps(root, jobID, job[key]) + refs, err := writeJobSteps(root, job[key], stepRegistry, usedStepIDs) if err != nil { return err } @@ -169,7 +173,7 @@ func writeJobBody(root *hclwrite.Body, jobBody *hclwrite.Body, jobID string, job jobBody.AppendNewline() } - if err := writeJobKey(root, jobBody, jobID, key, job[key], jobIDMap, generatedVariables, &stepRefs); err != nil { + if err := writeJobKey(root, jobBody, jobID, key, job[key], jobIDMap, generatedVariables, stepRegistry, usedStepIDs, &stepRefs); err != nil { return err } } @@ -376,7 +380,7 @@ func stepFromMap(value map[string]any) (step.Step, error) { return s, nil } -func stepIdentifier(jobID string, idx int, stepMap map[string]any, used map[string]int) string { +func stepIdentifier(idx int, stepMap map[string]any, used map[string]int) string { id := "" if raw, ok := stepMap["id"].(string); ok && raw != "" { @@ -384,9 +388,29 @@ func stepIdentifier(jobID string, idx int, stepMap map[string]any, used map[stri } if id == "" { - id = fmt.Sprintf("%s_step_%d", sanitizeIdentifier(jobID), idx+1) + if name, ok := stepMap["name"].(string); ok && name != "" { + id = sanitizeIdentifier(name) + } + } + + if id == "" { + if uses, ok := stepMap["uses"].(string); ok && uses != "" { + id = sanitizeIdentifier(stepActionName(uses)) + } + } + + if id == "" { + if run, ok := stepMap["run"].(string); ok && run != "" { + id = sanitizeIdentifier(stepFirstWord(run)) + } } + if id == "" { + id = fmt.Sprintf("step_%d", idx+1) + } + + id = strings.ToLower(id) + if count, exists := used[id]; exists { used[id] = count + 1 @@ -398,6 +422,55 @@ func stepIdentifier(jobID string, idx int, stepMap map[string]any, used map[stri return id } +// stepActionName extracts a short name from a uses string such as +// "actions/checkout@v4" → "checkout". +func stepActionName(uses string) string { + if at := strings.IndexByte(uses, '@'); at >= 0 { + uses = uses[:at] + } + + if slash := strings.LastIndexByte(uses, '/'); slash >= 0 { + uses = uses[slash+1:] + } + + return uses +} + +// stepFirstWord returns the first whitespace-delimited word of the first +// line of a run script, used as a fallback step identifier. +func stepFirstWord(run string) string { + line := run + + if nl := strings.IndexByte(run, '\n'); nl >= 0 { + line = run[:nl] + } + + line = strings.TrimSpace(line) + + if sp := strings.IndexAny(line, " \t"); sp >= 0 { + line = line[:sp] + } + + return line +} + +// stepFingerprint returns a canonical string representing the content of a +// step, excluding the step id field which is assigned by the unparser. +// Used to detect duplicate steps across jobs. +func stepFingerprint(stepMap map[string]any) string { + m := make(map[string]any, len(stepMap)) + + for k, v := range stepMap { + if k != "id" { + m[k] = v + } + } + + b, _ := json.Marshal(m) + + return string(b) +} + func normalizeNeeds(raw any, jobIDMap map[string]string) ([]string, error) { list, ok := raw.([]any) From 2bb67b813fc1d1caedf7587cf6d5874d62a47a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Guimar=C3=A3es?= Date: Wed, 1 Apr 2026 13:04:52 +0100 Subject: [PATCH 5/5] fix: preserve HCL-defined job order in parse direction YAML output Jobs declared as `jobs = [job.test, job.build, job.deploy]` now appear in that exact order in the generated YAML instead of being sorted alphabetically by the generic map serialiser. The parser stores the ref order as a `jobsOrder` sentinel key alongside the `jobs` map in the workflow body. The YAML serialiser (`workflowMapNode`) extracts the sentinel, writes jobs in the declared order via the new `appendOrderedJobsMap` helper, then falls back to sorted order for any jobs not covered. Added a byte-exact snapshot test with three jobs in non-alphabetical order (`test`, `build`, `deploy`) to pin this behaviour. --- .../preserve-hcl-job-order-in-yaml-output.md | 187 ++++++++++++++++++ provider/github/parse_workflow.go | 1 + provider/github/snapshot_format_test.go | 6 + .../workflow_parse_job_order.golden.yaml | 21 ++ .../formatting/workflow_parse_job_order.hcl | 36 ++++ provider/github/workflow_yaml.go | 59 ++++++ 6 files changed, 310 insertions(+) create mode 100644 docs/solutions/logic-errors/preserve-hcl-job-order-in-yaml-output.md create mode 100644 provider/github/testdata/fixtures/formatting/workflow_parse_job_order.golden.yaml create mode 100644 provider/github/testdata/fixtures/formatting/workflow_parse_job_order.hcl diff --git a/docs/solutions/logic-errors/preserve-hcl-job-order-in-yaml-output.md b/docs/solutions/logic-errors/preserve-hcl-job-order-in-yaml-output.md new file mode 100644 index 0000000..b1ceb61 --- /dev/null +++ b/docs/solutions/logic-errors/preserve-hcl-job-order-in-yaml-output.md @@ -0,0 +1,187 @@ +--- +name: Preserve job order in HCL→YAML parse +description: Fixed job ordering bug where HCL job list declaration order was lost during YAML conversion, causing alphabetical sorting instead of preserving source order +type: logic-errors +tags: [go, yaml, hcl, cinzel, job-order, map-ordering, github-actions] +components: [provider/github/parse_workflow.go, provider/github/workflow_yaml.go] +symptoms: [jobs output in alphabetical order instead of declared order, HCL jobs list reference order ignored during parse, roundtrip YAML→HCL→YAML changes job sequence] +--- + +## Problem + +When parsing HCL to YAML, jobs declared as `jobs = [job.test, job.build, job.deploy]` appeared in the YAML output in alphabetical order (`build`, `deploy`, `test`) rather than the declared order (`test`, `build`, `deploy`). + +## Root Cause + +In `parse_workflow.go`, jobs were collected into a `map[string]any`. Go maps have non-deterministic iteration order. When the YAML serializer (`workflowMapNode` → `genericMapNode`) emitted the jobs, it called `sort.Strings(keys)` to get a stable but alphabetical output — losing the original declaration order. + +The declaration order was available in `workflow.JobRefs` (a `[]string` extracted from `jobs = [job.a, job.b, ...]`), but was not threaded through to the serializer. + +## Solution + +**Sentinel key pattern**: store the ordered slice alongside the jobs map in the workflow body, extract it in the serializer, and write jobs in that order. + +### Step 1 — Capture order in `parse_workflow.go` + +After building the jobs map, store `jobsOrder` as a private sentinel alongside it: + +```go +if len(workflow.JobRefs) > 0 { + jobs := make(map[string]any) + + for _, jobID := range workflow.JobRefs { + jobContent, exists := parsedJobs[jobID] + if !exists { + return nil, nil, nil, fmt.Errorf("error in workflow '%s': cannot find job '%s'", wf.ID, jobID) + } + jobs[jobID] = jobContent.Body + } + + workflow.Body["jobs"] = jobs + workflow.Body["jobsOrder"] = workflow.JobRefs // ← new: preserve declaration order +} +``` + +### Step 2 — Extract sentinel in `workflowMapNode` (`workflow_yaml.go`) + +At the top of `workflowMapNode`, extract and exclude the sentinel before iterating keys: + +```go +func workflowMapNode(workflow map[string]any) (*yamlv3.Node, error) { + node := &yamlv3.Node{Kind: yamlv3.MappingNode} + seen := map[string]struct{}{} + + // "jobsOrder" is a private sentinel set by the parser to preserve the + // HCL-defined job sequence; it must never appear in the YAML output. + jobOrder, _ := workflow["jobsOrder"].([]string) + seen["jobsOrder"] = struct{}{} + + for _, key := range workflowKeyOrder { + value, ok := workflow[key] + if !ok { + continue + } + + if key == "jobs" && len(jobOrder) > 0 { + if jobsMap, ok := value.(map[string]any); ok { + if err := appendOrderedJobsMap(node, jobsMap, jobOrder); err != nil { + return nil, err + } + seen[key] = struct{}{} + continue + } + } + + if err := appendMappingPair(node, key, value); err != nil { + return nil, err + } + seen[key] = struct{}{} + } + // ... remaining keys +``` + +### Step 3 — `appendOrderedJobsMap` helper + +Writes jobs in declaration order, with sorted fallback for any jobs not in the order slice: + +```go +func appendOrderedJobsMap(node *yamlv3.Node, jobs map[string]any, jobOrder []string) error { + mapNode := &yamlv3.Node{Kind: yamlv3.MappingNode} + seen := make(map[string]struct{}, len(jobOrder)) + + for _, id := range jobOrder { + v, ok := jobs[id] + if !ok { + continue + } + if err := appendMappingPair(mapNode, id, v); err != nil { + return err + } + seen[id] = struct{}{} + } + + // Append any jobs not in jobOrder, sorted for stability + remaining := make([]string, 0, len(jobs)-len(seen)) + for k := range jobs { + if _, ok := seen[k]; !ok { + remaining = append(remaining, k) + } + } + sort.Strings(remaining) + for _, k := range remaining { + if err := appendMappingPair(mapNode, k, jobs[k]); err != nil { + return err + } + } + + keyNode := &yamlv3.Node{Kind: yamlv3.ScalarNode, Tag: "!!str", Value: "jobs"} + node.Content = append(node.Content, keyNode, mapNode) + return nil +} +``` + +## Test Added + +Added a byte-exact snapshot test to `TestParseFormattingSnapshots` in `snapshot_format_test.go`: + +```go +{ + name: "job order preserved in parse direction", + inputFile: filepath.Join("testdata", "fixtures", "formatting", "workflow_parse_job_order.hcl"), + outputFile: "workflow-parse-job-order.yaml", + expected: filepath.Join("testdata", "fixtures", "formatting", "workflow_parse_job_order.golden.yaml"), +}, +``` + +The fixture defines 3 jobs in non-alphabetical order (`test`, `build`, `deploy`): + +```hcl +jobs = [job.test, job.build, job.deploy] +``` + +The golden file verifies exact output order: + +```yaml +jobs: + test: + runs-on: ubuntu-latest + ... + build: + ... + deploy: + ... +``` + +## Prevention + +### Why byte-equality snapshots are required for order bugs + +`assertYAMLSemanticEqual` (used in roundtrip tests) unmarshals both YAMLs into `any` and compares via `reflect.DeepEqual`. Go map comparison is order-insensitive, so a semantic test would **pass even if job order is wrong**. Only a byte-exact snapshot comparison catches ordering regressions. + +**Rule**: any feature that preserves source order must have a byte-exact snapshot test (not a semantic one). + +### Sentinel key pattern safety checklist + +When using the sentinel key pattern: +1. Use a lowercase (unexported-style) key name — `jobsOrder` not `JobsOrder` +2. Mark the key as `seen` immediately in every serializer that touches the map +3. Add a comment explaining the sentinel's purpose +4. Write a test that verifies the sentinel key does NOT appear in any output + +### Alternative: typed ordered map + +For new code, consider an explicit ordered map type instead of the sentinel: + +```go +type orderedJobs struct { + order []string + jobs map[string]any +} +``` + +This makes the ordering requirement visible in the type system rather than implicit in a hidden key. + +## Related + +- [`yaml-map-key-order-lost-use-node-api.md`](yaml-map-key-order-lost-use-node-api.md) — same class of problem in the unparse direction (YAML→HCL), solved with the yaml.v3 Node API single-pass parse +- [`nondeterministic-map-iteration.md`](nondeterministic-map-iteration.md) — general Go map iteration ordering pitfalls diff --git a/provider/github/parse_workflow.go b/provider/github/parse_workflow.go index d5b061b..9522d4e 100644 --- a/provider/github/parse_workflow.go +++ b/provider/github/parse_workflow.go @@ -111,6 +111,7 @@ func parseHCLToWorkflows(body hcl.Body) ([]WorkflowYAMLFile, map[string]any, []A } workflow.Body["jobs"] = jobs + workflow.Body["jobsOrder"] = workflow.JobRefs } parsedWorkflows = append(parsedWorkflows, WorkflowYAMLFile{ diff --git a/provider/github/snapshot_format_test.go b/provider/github/snapshot_format_test.go index cc8e4d9..ec8d61a 100644 --- a/provider/github/snapshot_format_test.go +++ b/provider/github/snapshot_format_test.go @@ -114,6 +114,12 @@ func TestParseFormattingSnapshots(t *testing.T) { outputFile: "workflow-parse-expression.yaml", expected: filepath.Join("testdata", "fixtures", "formatting", "workflow_parse_expression.golden.yaml"), }, + { + name: "job order preserved in parse direction", + inputFile: filepath.Join("testdata", "fixtures", "formatting", "workflow_parse_job_order.hcl"), + outputFile: "workflow-parse-job-order.yaml", + expected: filepath.Join("testdata", "fixtures", "formatting", "workflow_parse_job_order.golden.yaml"), + }, } for _, tt := range tests { diff --git a/provider/github/testdata/fixtures/formatting/workflow_parse_job_order.golden.yaml b/provider/github/testdata/fixtures/formatting/workflow_parse_job_order.golden.yaml new file mode 100644 index 0000000..6b4f4ac --- /dev/null +++ b/provider/github/testdata/fixtures/formatting/workflow_parse_job_order.golden.yaml @@ -0,0 +1,21 @@ +# generated-by: cinzel +# cinzel-provider: github +name: CI +on: + push: +jobs: + test: + runs-on: ubuntu-latest + steps: + - id: echo + run: echo hi + build: + runs-on: ubuntu-latest + steps: + - id: echo + run: echo hi + deploy: + runs-on: ubuntu-latest + steps: + - id: echo + run: echo hi diff --git a/provider/github/testdata/fixtures/formatting/workflow_parse_job_order.hcl b/provider/github/testdata/fixtures/formatting/workflow_parse_job_order.hcl new file mode 100644 index 0000000..0d2cc21 --- /dev/null +++ b/provider/github/testdata/fixtures/formatting/workflow_parse_job_order.hcl @@ -0,0 +1,36 @@ +step "echo" { + run = "echo hi" +} + +job "test" { + runs_on { + runners = "ubuntu-latest" + } + + steps = [step.echo] +} + +job "build" { + runs_on { + runners = "ubuntu-latest" + } + + steps = [step.echo] +} + +job "deploy" { + runs_on { + runners = "ubuntu-latest" + } + + steps = [step.echo] +} + +workflow "ci" { + filename = "workflow-parse-job-order" + name = "CI" + + on "push" {} + + jobs = [job.test, job.build, job.deploy] +} diff --git a/provider/github/workflow_yaml.go b/provider/github/workflow_yaml.go index 5bdc72b..aaad789 100644 --- a/provider/github/workflow_yaml.go +++ b/provider/github/workflow_yaml.go @@ -77,6 +77,11 @@ func workflowMapNode(workflow map[string]any) (*yamlv3.Node, error) { seen := map[string]struct{}{} + // "jobsOrder" is a private sentinel set by the parser to preserve the + // HCL-defined job sequence; it must never appear in the YAML output. + jobOrder, _ := workflow["jobsOrder"].([]string) + seen["jobsOrder"] = struct{}{} + for _, key := range workflowKeyOrder { value, ok := workflow[key] @@ -84,6 +89,18 @@ func workflowMapNode(workflow map[string]any) (*yamlv3.Node, error) { continue } + if key == "jobs" && len(jobOrder) > 0 { + if jobsMap, ok := value.(map[string]any); ok { + if err := appendOrderedJobsMap(node, jobsMap, jobOrder); err != nil { + return nil, err + } + + seen[key] = struct{}{} + + continue + } + } + if err := appendMappingPair(node, key, value); err != nil { return nil, err } @@ -232,6 +249,48 @@ func stringNeedsQuoting(v string) bool { return false } +// appendOrderedJobsMap writes jobs to node in the order given by jobOrder, +// appending any jobs not listed in the order (sorted) at the end. +func appendOrderedJobsMap(node *yamlv3.Node, jobs map[string]any, jobOrder []string) error { + mapNode := &yamlv3.Node{Kind: yamlv3.MappingNode} + seen := make(map[string]struct{}, len(jobOrder)) + + for _, id := range jobOrder { + v, ok := jobs[id] + + if !ok { + continue + } + + if err := appendMappingPair(mapNode, id, v); err != nil { + return err + } + + seen[id] = struct{}{} + } + + remaining := make([]string, 0, len(jobs)-len(seen)) + + for k := range jobs { + if _, ok := seen[k]; !ok { + remaining = append(remaining, k) + } + } + + sort.Strings(remaining) + + for _, k := range remaining { + if err := appendMappingPair(mapNode, k, jobs[k]); err != nil { + return err + } + } + + keyNode := &yamlv3.Node{Kind: yamlv3.ScalarNode, Tag: "!!str", Value: "jobs"} + node.Content = append(node.Content, keyNode, mapNode) + + return nil +} + func genericMapNode(mapping map[string]any) (*yamlv3.Node, error) { keys := make([]string, 0, len(mapping))