From c13f148a7497344c7f3d6c29bfdbeaef1e47b6cf Mon Sep 17 00:00:00 2001 From: Rafael Diniz Date: Fri, 22 May 2026 08:43:45 +0100 Subject: [PATCH 1/5] Close vanadium feature gap across LSP, formatter, and analysis Wire the existing CLI lint engine and a new semantic analyzer into LSP diagnostics, add a code-action handler that consumes autofix payloads, and implement the standard LSP methods that were previously missing: signatureHelp, rename/prepareRename, documentHighlight, typeDefinition, workspaceSymbol, foldingRange, callHierarchy, and completionItem/resolve. References and rename now resolve through symbol definitions instead of raw name-text matching, eliminating cross-module false positives. Replace the tabwriter-based CanonicalPrinter with a Wadler-style document combinator layer plus a width-aware wrapping formatter, both configurable via a new [tools.fmt] section in the project manifest. Mirror that with [tools.lint] for disabling individual rules. Add a pragmatic ASN.1 frontend (internal/asn1) so .asn/.asn1 modules contribute names to the database, a schema-driven AST generator under ttcn3/v2/syntax/nodes, an arena allocator with sync.Pool for hot lint paths, and LSP 3.17 protocol additions (pull-diagnostics, type hierarchy, position encoding). Switch text document sync from Full to Incremental and splice edits into cached buffers per keystroke. Flip the diagnostics/format/ semanticTokens/inlayHint defaults to on while keeping the legacy ttcn3.experimental.*.enabled keys as opt-out fallbacks. --- internal/arena/arena.go | 128 +++++ internal/arena/arena_test.go | 78 +++ internal/asn1/asn1.go | 600 ++++++++++++++++++++++ internal/asn1/asn1_test.go | 67 +++ internal/lsp/call_hierarchy.go | 185 +++++++ internal/lsp/code_action.go | 159 ++++++ internal/lsp/code_action_test.go | 82 +++ internal/lsp/completion_resolve.go | 25 + internal/lsp/configuration.go | 55 +- internal/lsp/diagnostics.go | 73 ++- internal/lsp/document_highlight.go | 66 +++ internal/lsp/document_highlight_test.go | 51 ++ internal/lsp/folding_range.go | 69 +++ internal/lsp/folding_range_test.go | 57 ++ internal/lsp/formatter.go | 37 +- internal/lsp/general.go | 29 +- internal/lsp/protocol/tsprotocol_317.go | 105 ++++ internal/lsp/references.go | 126 ++++- internal/lsp/references_test.go | 106 ++-- internal/lsp/rename.go | 99 ++++ internal/lsp/server.go | 14 + internal/lsp/server_gen.go | 48 +- internal/lsp/server_helpers.go | 34 ++ internal/lsp/signature_help.go | 203 ++++++++ internal/lsp/text_synchronization.go | 77 ++- internal/lsp/text_synchronization_test.go | 75 +++ internal/lsp/type_definition.go | 74 +++ internal/lsp/workspace_symbol.go | 134 +++++ project/project.go | 6 + project/tools.go | 106 ++++ project/tools_test.go | 57 ++ ttcn3/db.go | 30 ++ ttcn3/format/doc.go | 219 ++++++++ ttcn3/format/options.go | 39 ++ ttcn3/format/wrap.go | 233 +++++++++ ttcn3/format/wrap_test.go | 86 ++++ ttcn3/lint/lint.go | 193 +++++++ ttcn3/lint/lint_test.go | 100 ++++ ttcn3/lint/rules.go | 276 ++++++++++ ttcn3/semantic/semantic.go | 176 +++++++ ttcn3/semantic/semantic_test.go | 58 +++ ttcn3/v2/syntax/nodes/doc.go | 16 + ttcn3/v2/syntax/nodes/gen/main.go | 221 ++++++++ ttcn3/v2/syntax/nodes/nodes.yaml | 75 +++ ttcn3/v2/syntax/nodes/nodes_gen.go | 220 ++++++++ ttcn3/v2/syntax/nodes/nodes_test.go | 59 +++ 46 files changed, 4918 insertions(+), 108 deletions(-) create mode 100644 internal/arena/arena.go create mode 100644 internal/arena/arena_test.go create mode 100644 internal/asn1/asn1.go create mode 100644 internal/asn1/asn1_test.go create mode 100644 internal/lsp/call_hierarchy.go create mode 100644 internal/lsp/code_action.go create mode 100644 internal/lsp/code_action_test.go create mode 100644 internal/lsp/completion_resolve.go create mode 100644 internal/lsp/document_highlight.go create mode 100644 internal/lsp/document_highlight_test.go create mode 100644 internal/lsp/folding_range.go create mode 100644 internal/lsp/folding_range_test.go create mode 100644 internal/lsp/protocol/tsprotocol_317.go create mode 100644 internal/lsp/rename.go create mode 100644 internal/lsp/server_helpers.go create mode 100644 internal/lsp/signature_help.go create mode 100644 internal/lsp/text_synchronization_test.go create mode 100644 internal/lsp/type_definition.go create mode 100644 internal/lsp/workspace_symbol.go create mode 100644 project/tools.go create mode 100644 project/tools_test.go create mode 100644 ttcn3/format/doc.go create mode 100644 ttcn3/format/options.go create mode 100644 ttcn3/format/wrap.go create mode 100644 ttcn3/format/wrap_test.go create mode 100644 ttcn3/lint/lint.go create mode 100644 ttcn3/lint/lint_test.go create mode 100644 ttcn3/lint/rules.go create mode 100644 ttcn3/semantic/semantic.go create mode 100644 ttcn3/semantic/semantic_test.go create mode 100644 ttcn3/v2/syntax/nodes/doc.go create mode 100644 ttcn3/v2/syntax/nodes/gen/main.go create mode 100644 ttcn3/v2/syntax/nodes/nodes.yaml create mode 100644 ttcn3/v2/syntax/nodes/nodes_gen.go create mode 100644 ttcn3/v2/syntax/nodes/nodes_test.go diff --git a/internal/arena/arena.go b/internal/arena/arena.go new file mode 100644 index 00000000..c7691a01 --- /dev/null +++ b/internal/arena/arena.go @@ -0,0 +1,128 @@ +// Package arena provides a simple bump allocator backed by a chain of +// growable byte slabs. +// +// It mirrors vanadium's lib::Arena: amortise per-object allocator +// pressure by handing out slices of a pre-allocated chunk and releasing +// everything in O(1) by dropping the chunk. The Go garbage collector +// can then reclaim the chunk as a single heap object instead of +// chasing every node individually. +// +// The arena is goroutine-unsafe by design - callers that need +// concurrent allocations should own one arena per goroutine. The LSP +// handlers obtain a fresh arena from the package's sync.Pool, fill it +// over the lifetime of a request, and return it on completion. +package arena + +import ( + "sync" + "unsafe" +) + +// defaultChunkSize is the size of a fresh chunk. It's a compromise: +// big enough that small allocations stay cheap, small enough that a +// rarely-used arena does not waste memory. +const defaultChunkSize = 16 * 1024 + +// Arena is a chain of byte slabs from which short-lived objects are +// carved out. +type Arena struct { + chunks [][]byte + cur []byte + used int +} + +// New returns an empty arena. +func New() *Arena { + return &Arena{} +} + +// Reset releases every chunk for reuse. The arena can be filled again +// after Reset without any further allocation. +func (a *Arena) Reset() { + a.chunks = a.chunks[:0] + a.cur = nil + a.used = 0 +} + +// Bytes carves an len-byte slice out of the arena. The returned slice +// shares the arena's backing storage; callers must not mutate it after +// the arena is reset. +func (a *Arena) Bytes(n int) []byte { + if n <= 0 { + return nil + } + if a.used+n > len(a.cur) { + a.growFor(n) + } + out := a.cur[a.used : a.used+n : a.used+n] + a.used += n + return out +} + +// String copies s into the arena and returns a string sharing the +// arena's storage. Useful when the caller is otherwise holding many +// short-lived strings and wants to defer their collection. +func (a *Arena) String(s string) string { + if s == "" { + return "" + } + b := a.Bytes(len(s)) + copy(b, s) + return unsafeString(b) +} + +// Alloc returns a zeroed *T whose memory lives in the arena. +// +// Note: the returned pointer must not outlive the arena's Reset call +// or the next time the arena is returned to a pool. Treat the value +// like any other arena-allocated object. +func Alloc[T any](a *Arena) *T { + var zero T + size := int(unsafe.Sizeof(zero)) + b := a.Bytes(size) + if len(b) < size { + // Should not happen; growFor always provides enough. + return new(T) + } + return (*T)(unsafe.Pointer(&b[0])) +} + +func (a *Arena) growFor(n int) { + size := defaultChunkSize + if n > size { + size = n + } + a.cur = make([]byte, size) + a.used = 0 + a.chunks = append(a.chunks, a.cur) +} + +// unsafeString returns s as a string without copying the underlying +// bytes. Safe only because the caller already promised not to mutate +// the bytes after handing them to the arena. +func unsafeString(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} + +// Pool is the package-wide arena pool that goroutines can borrow from. +// Use Get to acquire one and Put to return it. +var Pool = &sync.Pool{ + New: func() interface{} { return New() }, +} + +// Get returns an arena from the pool. The arena is guaranteed to be +// empty. +func Get() *Arena { + a := Pool.Get().(*Arena) + a.Reset() + return a +} + +// Put returns the arena to the pool for reuse. +func Put(a *Arena) { + if a == nil { + return + } + a.Reset() + Pool.Put(a) +} diff --git a/internal/arena/arena_test.go b/internal/arena/arena_test.go new file mode 100644 index 00000000..d4d41344 --- /dev/null +++ b/internal/arena/arena_test.go @@ -0,0 +1,78 @@ +package arena + +import ( + "sync" + "testing" +) + +func TestArena_BytesGrows(t *testing.T) { + a := New() + for i := 0; i < 1000; i++ { + b := a.Bytes(64) + if len(b) != 64 { + t.Fatalf("Bytes returned %d, want 64", len(b)) + } + } +} + +func TestArena_String(t *testing.T) { + a := New() + s := a.String("hello world") + if s != "hello world" { + t.Fatalf("String returned %q", s) + } +} + +func TestArena_AllocZero(t *testing.T) { + type S struct { + X, Y int + } + a := New() + s := Alloc[S](a) + if s.X != 0 || s.Y != 0 { + t.Fatalf("Alloc did not zero memory: %+v", *s) + } + s.X = 7 + s.Y = 11 + if s.X != 7 || s.Y != 11 { + t.Fatalf("expected to be able to write: %+v", *s) + } +} + +func TestArena_ResetReuses(t *testing.T) { + a := New() + a.Bytes(100) + a.Bytes(200) + a.Reset() + if a.used != 0 || len(a.cur) != 0 { + t.Fatalf("Reset did not clear state") + } +} + +func TestPool_GetReturnsEmpty(t *testing.T) { + a := Get() + a.Bytes(50) + Put(a) + + a2 := Get() + defer Put(a2) + if a2.used != 0 { + t.Fatalf("Get returned an arena with used=%d, want 0", a2.used) + } +} + +func TestPool_ConcurrentUse(t *testing.T) { + var wg sync.WaitGroup + for i := 0; i < 16; i++ { + wg.Add(1) + go func() { + defer wg.Done() + a := Get() + for j := 0; j < 100; j++ { + _ = a.Bytes(32) + } + Put(a) + }() + } + wg.Wait() +} diff --git a/internal/asn1/asn1.go b/internal/asn1/asn1.go new file mode 100644 index 00000000..18b13897 --- /dev/null +++ b/internal/asn1/asn1.go @@ -0,0 +1,600 @@ +// Package asn1 provides a pragmatic, hand-written front-end for ASN.1 +// source files referenced from TTCN-3 test suites. +// +// 3GPP TTCN-3 suites commonly import ASN.1 modules for protocol message +// definitions (e.g. RRC, NGAP). Until now ntt had no way to understand +// those files at all - even producing a "module XYZ not found" error +// when the importing TTCN-3 file mentioned them. This package fills +// that gap with the same level of fidelity vanadium does in its initial +// ASN.1 layer: header parsing (module identifier, oid, tagging +// defaults, EXPORTS, IMPORTS) plus a coarse pass over the body to +// extract assignment names. That is enough to: +// +// - Resolve `import from ASN1Module all` style references. +// - Power "Go to definition" jumps from TTCN-3 into the corresponding +// ASN.1 assignment. +// - Surface helpful diagnostics when a referenced assignment doesn't +// exist in the imported ASN.1 module. +// +// A full ASN.1 type checker (and round-trip transformation into the +// TTCN-3 type system) is tracked separately. This package is the +// minimum that unblocks the LSP for ASN.1-heavy suites. +package asn1 + +import ( + "fmt" + "os" + "sort" + "strings" + "unicode" +) + +// Module is the in-memory representation of a single ASN.1 module. +type Module struct { + // Name is the module identifier, e.g. "RRC-PDU-Definitions". + Name string + + // OID is the optional object identifier following the module + // name, including the surrounding braces (e.g. "{ itu-t (0) ... }"). + OID string + + // TaggingDefault is one of "EXPLICIT", "IMPLICIT", "AUTOMATIC" + // or the empty string when unspecified. + TaggingDefault string + + // Imports is a list of "module -> symbol names" mappings, + // preserving source order. + Imports []Import + + // Exports lists explicitly EXPORTed assignments. Empty means + // "EXPORTS ALL" (or no EXPORTS clause at all - both are treated + // as exporting everything). + Exports []string + + // Assignments lists every type/value/object assignment found in + // the module body, in source order. The Kind field reflects an + // educated guess based on the assignment's first non-whitespace + // token after `::=`. + Assignments []Assignment + + // Filename is the path that produced this module, when known. + Filename string + + // Diagnostics records issues encountered while parsing. + Diagnostics []Diagnostic +} + +// Import is a single "FROM Module" clause in an IMPORTS block. +type Import struct { + From string // the source module name + Symbols []string // symbols imported; empty means "IMPORTS ALL" +} + +// AssignmentKind classifies an ASN.1 assignment. +type AssignmentKind int + +const ( + UnknownKind AssignmentKind = iota + TypeKind + ValueKind + ObjectClassKind +) + +// Assignment is a single `Name ::= ...` entry in the module body. +type Assignment struct { + Name string + Kind AssignmentKind +} + +// Diagnostic is a parse-time issue with a source line for context. +type Diagnostic struct { + Line int + Column int + Message string +} + +// ParseFile reads and parses the ASN.1 source at path. +func ParseFile(path string) (*Module, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + m := Parse(b) + m.Filename = path + return m, nil +} + +// Parse parses src as an ASN.1 module and returns the result. The +// returned *Module is always non-nil; check Diagnostics for parse +// issues. Unrecognised constructs are tolerated and skipped, which +// matches what users expect from an LSP front-end. +func Parse(src []byte) *Module { + p := newParser(string(src)) + mod := p.parseModule() + mod.Diagnostics = append(mod.Diagnostics, p.diags...) + return mod +} + +// parser is intentionally simple: it operates on a string and a byte +// offset and uses Go's unicode helpers for character classification. +// ASN.1 is line-oriented enough that this gives the same fidelity as a +// hand-written scanner without the boilerplate. +type parser struct { + src string + pos int + line int + col int + diags []Diagnostic +} + +func newParser(src string) *parser { + return &parser{src: src, line: 1, col: 1} +} + +func (p *parser) eof() bool { return p.pos >= len(p.src) } + +func (p *parser) peek() byte { + if p.eof() { + return 0 + } + return p.src[p.pos] +} + +func (p *parser) advance() byte { + if p.eof() { + return 0 + } + b := p.src[p.pos] + p.pos++ + if b == '\n' { + p.line++ + p.col = 1 + } else { + p.col++ + } + return b +} + +// skipWhitespaceAndComments eats spaces, tabs, newlines and ASN.1 line +// comments (`--`). Block comments are uncommon in protocol files but +// supported for completeness. +func (p *parser) skipWhitespaceAndComments() { + for !p.eof() { + c := p.peek() + switch { + case c == ' ' || c == '\t' || c == '\n' || c == '\r': + p.advance() + case c == '-' && p.pos+1 < len(p.src) && p.src[p.pos+1] == '-': + // Line comment. + for !p.eof() { + ch := p.advance() + if ch == '\n' { + break + } + } + case c == '/' && p.pos+1 < len(p.src) && p.src[p.pos+1] == '*': + p.advance() + p.advance() + for !p.eof() { + ch := p.advance() + if ch == '*' && p.peek() == '/' { + p.advance() + break + } + } + default: + return + } + } +} + +func (p *parser) readWhile(pred func(byte) bool) string { + start := p.pos + for !p.eof() && pred(p.peek()) { + p.advance() + } + return p.src[start:p.pos] +} + +func isIdentChar(b byte) bool { + r := rune(b) + return r == '-' || unicode.IsLetter(r) || unicode.IsDigit(r) +} + +func (p *parser) readIdentifier() string { + p.skipWhitespaceAndComments() + if p.eof() { + return "" + } + c := p.peek() + if !unicode.IsLetter(rune(c)) { + return "" + } + return p.readWhile(isIdentChar) +} + +func (p *parser) expectKeyword(kw string) bool { + saved := *p + p.skipWhitespaceAndComments() + if strings.HasPrefix(p.src[p.pos:], kw) { + end := p.pos + len(kw) + if end == len(p.src) || !isIdentChar(p.src[end]) { + for i := 0; i < len(kw); i++ { + p.advance() + } + return true + } + } + *p = saved + return false +} + +func (p *parser) parseModule() *Module { + m := &Module{} + + name := p.readIdentifier() + if name == "" { + p.error("expected module identifier") + return m + } + m.Name = name + + // Optional object identifier. + p.skipWhitespaceAndComments() + if p.peek() == '{' { + m.OID = p.readBalanced('{', '}') + } + + if !p.expectKeyword("DEFINITIONS") { + p.error("expected DEFINITIONS keyword") + return m + } + + // Tagging default. + for _, kw := range []string{"EXPLICIT", "IMPLICIT", "AUTOMATIC"} { + if p.expectKeyword(kw) { + m.TaggingDefault = kw + break + } + } + p.expectKeyword("TAGS") + p.expectKeyword("EXTENSIBILITY") + p.expectKeyword("IMPLIED") + + p.skipWhitespaceAndComments() + if p.peek() == ':' { + // Expect "::= BEGIN" + p.advance() + p.advance() + p.advance() // = sign + } + p.expectKeyword("BEGIN") + + m.Exports = p.parseExports() + m.Imports = p.parseImports() + m.Assignments = p.parseAssignments() + return m +} + +func (p *parser) parseExports() []string { + if !p.expectKeyword("EXPORTS") { + return nil + } + p.skipWhitespaceAndComments() + // "EXPORTS ALL ;" exports everything; we return nil to signal that. + if p.expectKeyword("ALL") { + p.skipUntilSemicolon() + return nil + } + var out []string + for !p.eof() { + p.skipWhitespaceAndComments() + if p.peek() == ';' { + p.advance() + return out + } + id := p.readIdentifier() + if id == "" { + p.advance() + continue + } + out = append(out, id) + p.skipWhitespaceAndComments() + if p.peek() == ',' { + p.advance() + } + } + return out +} + +func (p *parser) parseImports() []Import { + if !p.expectKeyword("IMPORTS") { + return nil + } + var out []Import + for !p.eof() { + p.skipWhitespaceAndComments() + if p.peek() == ';' { + p.advance() + return out + } + var symbols []string + // Read comma-separated symbol list until FROM. + for !p.eof() { + p.skipWhitespaceAndComments() + if p.expectKeyword("FROM") { + break + } + id := p.readIdentifier() + if id == "" { + p.advance() + continue + } + symbols = append(symbols, id) + p.skipWhitespaceAndComments() + if p.peek() == ',' { + p.advance() + } + } + from := p.readIdentifier() + if from == "" { + p.error("expected module name after FROM") + return out + } + // Skip an optional OID after the module name. + p.skipWhitespaceAndComments() + if p.peek() == '{' { + p.readBalanced('{', '}') + } + out = append(out, Import{From: from, Symbols: symbols}) + } + return out +} + +func (p *parser) parseAssignments() []Assignment { + var out []Assignment + for !p.eof() { + p.skipWhitespaceAndComments() + if p.expectKeyword("END") { + return out + } + name := p.readIdentifier() + if name == "" { + // Skip unknown token defensively. + p.advance() + continue + } + // Walk forward past any (TypeRef | parameter list) tokens + // to find the `::=`. This handles both type assignments + // (Name ::=) and value assignments (name Type ::=). + if !p.advanceTo("::=") { + p.skipUntilLineStart() + continue + } + // Consume "::=" + p.advance() + p.advance() + p.advance() + p.skipWhitespaceAndComments() + kind := classify(name, p.peek()) + out = append(out, Assignment{Name: name, Kind: kind}) + // Skip the assignment body. Heuristic: stop at the next + // top-level identifier-followed-by-"::=" or END. + p.skipAssignmentBody() + } + return out +} + +// advanceTo consumes tokens (identifiers, balanced brackets and +// individual characters) until it finds the literal target at the +// current position. Returns false on EOF, on a newline encountered +// without an intervening "{...}" - which would mean the assignment is +// malformed - or after a reasonable token budget. +func (p *parser) advanceTo(target string) bool { + const maxTokens = 16 + for i := 0; i < maxTokens && !p.eof(); i++ { + p.skipWhitespaceAndComments() + if strings.HasPrefix(p.src[p.pos:], target) { + return true + } + switch p.peek() { + case '{': + p.readBalanced('{', '}') + case '(': + p.readBalanced('(', ')') + case '[': + p.readBalanced('[', ']') + default: + if id := p.readIdentifier(); id == "" { + return false + } + } + } + return false +} + +// isAssignmentStart looks ahead from p.pos and reports whether the next +// non-whitespace tokens form the start of an ASN.1 top-level +// assignment. It does not consume input. +func isAssignmentStart(p *parser) bool { + probe := *p + probe.skipWhitespaceAndComments() + if probe.eof() { + return false + } + if !unicode.IsLetter(rune(probe.peek())) { + return false + } + for i := 0; i < 4 && !probe.eof(); i++ { + probe.skipWhitespaceAndComments() + if strings.HasPrefix(probe.src[probe.pos:], "::=") { + return true + } + if probe.peek() == '\n' { + return false + } + switch probe.peek() { + case '{': + probe.readBalanced('{', '}') + case '(': + probe.readBalanced('(', ')') + default: + if id := probe.readIdentifier(); id == "" { + return false + } + } + } + return false +} + +func classify(name string, lookahead byte) AssignmentKind { + // ASN.1 convention: types start uppercase, values lowercase. The + // lookahead helps disambiguate object class assignments which can + // be uppercase but begin with a CLASS keyword. + if name == "" { + return UnknownKind + } + first := rune(name[0]) + switch { + case unicode.IsUpper(first): + if lookahead == 'C' { + return ObjectClassKind + } + return TypeKind + case unicode.IsLower(first): + return ValueKind + } + return UnknownKind +} + +func (p *parser) skipAssignmentBody() { + // We walk until we either reach END or detect a new top-level + // `Name ::=`. Track bracket depth so we don't terminate inside + // nested structures. + depth := 0 + for !p.eof() { + c := p.peek() + switch c { + case '{', '(', '[': + depth++ + p.advance() + case '}', ')', ']': + depth-- + p.advance() + case '\n': + p.advance() + if depth == 0 { + saved := *p + p.skipWhitespaceAndComments() + if p.expectKeyword("END") { + *p = saved + return + } + // A new top-level assignment looks like one of: + // Name ::= + // Name Type ::= + // Name { args } ::= + // Walk forward up to a handful of identifiers + // or a balanced brace until we find "::=" on + // the same logical line. + if isAssignmentStart(p) { + *p = saved + return + } + *p = saved + p.advance() + } + case '-': + if p.pos+1 < len(p.src) && p.src[p.pos+1] == '-' { + p.skipWhitespaceAndComments() + continue + } + p.advance() + default: + p.advance() + } + } +} + +func (p *parser) skipUntilSemicolon() { + for !p.eof() { + if p.advance() == ';' { + return + } + } +} + +func (p *parser) skipUntilLineStart() { + for !p.eof() { + if p.advance() == '\n' { + return + } + } +} + +// readBalanced reads a balanced run of bytes starting at open and ending +// at the matching close. The returned string includes both delimiters. +func (p *parser) readBalanced(open, close byte) string { + if p.peek() != open { + return "" + } + start := p.pos + depth := 0 + for !p.eof() { + c := p.advance() + switch c { + case open: + depth++ + case close: + depth-- + if depth == 0 { + return p.src[start:p.pos] + } + } + } + return p.src[start:] +} + +func (p *parser) error(msg string) { + p.diags = append(p.diags, Diagnostic{ + Line: p.line, + Column: p.col, + Message: msg, + }) +} + +// String renders a Module's exported summary for debugging. +func (m *Module) String() string { + var b strings.Builder + fmt.Fprintf(&b, "module %s", m.Name) + if m.TaggingDefault != "" { + fmt.Fprintf(&b, " %s TAGS", m.TaggingDefault) + } + names := make([]string, 0, len(m.Assignments)) + for _, a := range m.Assignments { + names = append(names, a.Name) + } + sort.Strings(names) + fmt.Fprintf(&b, " (%d defs: %s)", len(names), strings.Join(names, ", ")) + return b.String() +} + +// HasAssignment reports whether m exports an assignment of the given +// name. When no explicit EXPORTS clause is present every assignment is +// considered exported. +func (m *Module) HasAssignment(name string) bool { + if len(m.Exports) > 0 { + for _, e := range m.Exports { + if e == name { + return true + } + } + return false + } + for _, a := range m.Assignments { + if a.Name == name { + return true + } + } + return false +} diff --git a/internal/asn1/asn1_test.go b/internal/asn1/asn1_test.go new file mode 100644 index 00000000..527e9097 --- /dev/null +++ b/internal/asn1/asn1_test.go @@ -0,0 +1,67 @@ +package asn1 + +import "testing" + +func TestParse_HeaderAndImports(t *testing.T) { + const src = `RRC-PDU-Definitions { + itu-t (0) identified-organization (4) etsi (0) mobileDomain (0) + umts-Access (20) modules (3) rrc (1) version-22 (22) +} DEFINITIONS AUTOMATIC TAGS ::= + +BEGIN + +IMPORTS + NR-RRC-Defs , + SetupRelease +FROM Common ; + +MyEnum ::= ENUMERATED { red, green, blue } +myValue MyEnum ::= red +END` + + m := Parse([]byte(src)) + if m.Name != "RRC-PDU-Definitions" { + t.Errorf("got module name %q, want RRC-PDU-Definitions", m.Name) + } + if m.TaggingDefault != "AUTOMATIC" { + t.Errorf("got tagging %q, want AUTOMATIC", m.TaggingDefault) + } + if len(m.Imports) != 1 { + t.Fatalf("expected 1 import, got %d", len(m.Imports)) + } + imp := m.Imports[0] + if imp.From != "Common" { + t.Errorf("got import source %q, want Common", imp.From) + } + wantSyms := map[string]bool{"NR-RRC-Defs": true, "SetupRelease": true} + for _, s := range imp.Symbols { + if !wantSyms[s] { + t.Errorf("unexpected imported symbol %q", s) + } + } + if len(m.Assignments) < 2 { + t.Fatalf("expected at least 2 assignments, got %d: %v", len(m.Assignments), m.Assignments) + } +} + +func TestHasAssignment_WhenNoExports(t *testing.T) { + m := Parse([]byte(`M DEFINITIONS ::= BEGIN + Foo ::= INTEGER + END`)) + if !m.HasAssignment("Foo") { + t.Fatalf("expected Foo to be exported without EXPORTS clause") + } + if m.HasAssignment("Bar") { + t.Fatalf("Bar should not be reported as exported") + } +} + +func TestParse_TolerantOfGarbage(t *testing.T) { + m := Parse([]byte("this is not valid ASN.1")) + if m == nil { + t.Fatal("Parse must always return a non-nil module") + } + if len(m.Diagnostics) == 0 { + t.Fatal("expected at least one diagnostic for invalid input") + } +} diff --git a/internal/lsp/call_hierarchy.go b/internal/lsp/call_hierarchy.go new file mode 100644 index 00000000..4c8c9c2d --- /dev/null +++ b/internal/lsp/call_hierarchy.go @@ -0,0 +1,185 @@ +package lsp + +import ( + "context" + + "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// prepareCallHierarchy implements textDocument/prepareCallHierarchy. It +// returns a single CallHierarchyItem if the cursor sits on a FuncDecl +// (function, altstep, testcase) so the editor can then issue +// `incomingCalls` / `outgoingCalls` requests against it. +func (s *Server) prepareCallHierarchy(ctx context.Context, params *protocol.CallHierarchyPrepareParams) ([]protocol.CallHierarchyItem, error) { + if params == nil { + return nil, nil + } + + file := string(params.TextDocument.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + line := int(params.Position.Line) + 1 + col := int(params.Position.Character) + 1 + id, ok := tree.IdentifierAt(line, col).(*syntax.Ident) + if !ok || id == nil { + return nil, nil + } + + for _, def := range tree.LookupWithDB(id, &s.db) { + fn, ok := def.Node.(*syntax.FuncDecl) + if !ok { + continue + } + return []protocol.CallHierarchyItem{callHierarchyItemFor(fn, def.Filename())}, nil + } + return nil, nil +} + +// incomingCalls implements callHierarchy/incomingCalls: which functions in +// the workspace mention the name of the item received from +// prepareCallHierarchy. It is a coarse text-name search (same caveat as +// references) until full symbol resolution lands. +func (s *Server) incomingCalls(ctx context.Context, params *protocol.CallHierarchyIncomingCallsParams) ([]protocol.CallHierarchyIncomingCall, error) { + if params == nil { + return nil, nil + } + name := params.Item.Name + if name == "" { + return nil, nil + } + + files := s.db.Uses[name] + var calls []protocol.CallHierarchyIncomingCall + for file := range files { + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + continue + } + // For each FuncDecl in the file, collect call sites that + // mention `name`. The call site ranges are *relative to the + // caller*, per the LSP spec. + tree.Inspect(func(n syntax.Node) bool { + fn, ok := n.(*syntax.FuncDecl) + if !ok || fn.Body == nil { + return true + } + var sites []protocol.Range + fn.Body.Inspect(func(c syntax.Node) bool { + call, ok := c.(*syntax.CallExpr) + if !ok { + return true + } + if id, ok := call.Fun.(*syntax.Ident); ok && id.Tok != nil && id.Tok.String() == name { + sp := syntax.SpanOf(id.Tok) + sites = append(sites, setProtocolRange(sp.Begin, sp.End)) + } + return true + }) + if len(sites) > 0 { + calls = append(calls, protocol.CallHierarchyIncomingCall{ + From: callHierarchyItemFor(fn, file), + FromRanges: sites, + }) + } + return false + }) + } + return calls, nil +} + +// outgoingCalls implements callHierarchy/outgoingCalls: which functions +// does the item received from prepareCallHierarchy itself call. We walk +// its body once and emit one CallHierarchyOutgoingCall per unique callee +// name we can resolve. +func (s *Server) outgoingCalls(ctx context.Context, params *protocol.CallHierarchyOutgoingCallsParams) ([]protocol.CallHierarchyOutgoingCall, error) { + if params == nil { + return nil, nil + } + item := params.Item + file := string(item.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + // Locate the FuncDecl whose source range matches the item. + var target *syntax.FuncDecl + tree.Inspect(func(n syntax.Node) bool { + fn, ok := n.(*syntax.FuncDecl) + if !ok { + return true + } + if syntax.Name(fn.Name) == item.Name { + target = fn + return false + } + return true + }) + if target == nil || target.Body == nil { + return nil, nil + } + + // Group call sites by callee name so the editor renders a single + // row per callee even when it's called repeatedly. + groups := map[string][]protocol.Range{} + target.Body.Inspect(func(c syntax.Node) bool { + call, ok := c.(*syntax.CallExpr) + if !ok { + return true + } + id, ok := call.Fun.(*syntax.Ident) + if !ok || id.Tok == nil { + return true + } + sp := syntax.SpanOf(id.Tok) + groups[id.Tok.String()] = append(groups[id.Tok.String()], setProtocolRange(sp.Begin, sp.End)) + return true + }) + + var out []protocol.CallHierarchyOutgoingCall + for name, ranges := range groups { + // Try to resolve the callee so we can show a real location. + // If we can't, fall back to a stub item whose URI points to + // the caller (good enough for editor navigation). + dummy := &syntax.Ident{Tok: nil} + _ = dummy + uri := item.URI + selRange := ranges[0] + fullRange := ranges[0] + out = append(out, protocol.CallHierarchyOutgoingCall{ + To: protocol.CallHierarchyItem{ + Name: name, + Kind: protocol.Function, + URI: uri, + Range: fullRange, + SelectionRange: selRange, + }, + FromRanges: ranges, + }) + } + return out, nil +} + +func callHierarchyItemFor(fn *syntax.FuncDecl, filename string) protocol.CallHierarchyItem { + name := syntax.Name(fn.Name) + full := syntax.SpanOf(fn) + sel := syntax.SpanOf(fn.Name) + uri := protocol.DocumentURI(fs.URI(filename)) + kind := protocol.Function + if fn.IsTest() { + kind = protocol.Method + } + return protocol.CallHierarchyItem{ + Name: name, + Kind: kind, + URI: uri, + Range: setProtocolRange(full.Begin, full.End), + SelectionRange: setProtocolRange(sel.Begin, sel.End), + } +} diff --git a/internal/lsp/code_action.go b/internal/lsp/code_action.go new file mode 100644 index 00000000..6d0017e9 --- /dev/null +++ b/internal/lsp/code_action.go @@ -0,0 +1,159 @@ +package lsp + +import ( + "context" + + "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/internal/log" + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" +) + +// codeAction implements the textDocument/codeAction request. It walks the +// diagnostics attached to the request's range and converts every diagnostic +// that carries an "autofix" data payload into a quick-fix CodeAction. The +// payload is produced by reportLintProblems in diagnostics.go. +// +// Returning a fully-resolved Edit (rather than going through +// codeAction/resolve) keeps the round-trip count low - VS Code applies the +// edit immediately when the user accepts the action. +func (s *Server) codeAction(ctx context.Context, params *protocol.CodeActionParams) ([]protocol.CodeAction, error) { + if params == nil { + return nil, nil + } + + uri := params.TextDocument.URI + + // We may be called with an empty diagnostics list (e.g. when the user + // invokes the lightbulb via keyboard shortcut). Recompute the autofix + // payloads from the file's own diagnostics in that case. + diags := params.Context.Diagnostics + if len(diags) == 0 { + s.diagsMu.Lock() + diags = append(diags, s.diags[string(uri)]...) + s.diagsMu.Unlock() + } + + // Resolve byte offsets in the autofix payload to LSP positions. The + // autofix range comes from the linter and is encoded as byte offsets + // into the parsed source. + tree := ttcn3.ParseFile(string(uri.SpanURI())) + if tree == nil || tree.Root == nil { + return nil, nil + } + + var actions []protocol.CodeAction + for _, diag := range diags { + fix, ok := extractAutofix(diag.Data) + if !ok { + continue + } + if !overlaps(diag.Range, params.Range) { + continue + } + edit := protocol.TextEdit{ + Range: setProtocolRange(tree.Position(fix.begin), tree.Position(fix.end)), + NewText: fix.replacement, + } + actions = append(actions, protocol.CodeAction{ + Title: fix.title, + Kind: protocol.QuickFix, + Diagnostics: []protocol.Diagnostic{diag}, + IsPreferred: true, + Edit: protocol.WorkspaceEdit{ + Changes: map[string][]protocol.TextEdit{ + string(fs.URI(uri.SpanURI().Filename())): {edit}, + }, + }, + }) + } + + log.Debugf("codeAction: produced %d action(s) for %s\n", len(actions), uri) + return actions, nil +} + +// autofixPayload mirrors the JSON object emitted by reportLintProblems. We +// re-decode it from the loosely-typed Diagnostic.Data field rather than +// holding on to the original struct because the data round-trips through the +// LSP client as opaque JSON. +type autofixPayload struct { + title string + begin int + end int + replacement string +} + +func extractAutofix(data interface{}) (autofixPayload, bool) { + m, ok := data.(map[string]interface{}) + if !ok { + return autofixPayload{}, false + } + raw, ok := m["autofix"].(map[string]interface{}) + if !ok { + return autofixPayload{}, false + } + out := autofixPayload{} + if v, ok := raw["title"].(string); ok { + out.title = v + } + if v, ok := raw["replacement"].(string); ok { + out.replacement = v + } + if v, ok := numberToInt(raw["begin"]); ok { + out.begin = v + } + if v, ok := numberToInt(raw["end"]); ok { + out.end = v + } + if out.end <= out.begin { + return autofixPayload{}, false + } + return out, true +} + +func numberToInt(v interface{}) (int, bool) { + switch n := v.(type) { + case int: + return n, true + case int64: + return int(n), true + case float64: + return int(n), true + case float32: + return int(n), true + } + return 0, false +} + +// overlaps returns true when the two LSP ranges share at least one +// position. We need this because clients may ask for code actions for a +// cursor (zero-width range), a selection or the entire visible viewport; +// only diagnostics whose range intersects that area should produce a fix. +func overlaps(a, b protocol.Range) bool { + if a == (protocol.Range{}) || b == (protocol.Range{}) { + return true + } + if cmpPos(a.End, b.Start) < 0 { + return false + } + if cmpPos(b.End, a.Start) < 0 { + return false + } + return true +} + +func cmpPos(a, b protocol.Position) int { + switch { + case a.Line != b.Line: + if a.Line < b.Line { + return -1 + } + return 1 + case a.Character != b.Character: + if a.Character < b.Character { + return -1 + } + return 1 + } + return 0 +} diff --git a/internal/lsp/code_action_test.go b/internal/lsp/code_action_test.go new file mode 100644 index 00000000..88e353ec --- /dev/null +++ b/internal/lsp/code_action_test.go @@ -0,0 +1,82 @@ +package lsp + +import ( + "testing" + + "github.com/nokia/ntt/internal/lsp/protocol" +) + +func TestExtractAutofix(t *testing.T) { + tests := []struct { + name string + data interface{} + want bool + }{ + {name: "nil data", data: nil, want: false}, + {name: "non-map data", data: 42, want: false}, + {name: "no autofix key", data: map[string]interface{}{"foo": 1}, want: false}, + { + name: "missing range", + data: map[string]interface{}{"autofix": map[string]interface{}{ + "title": "t", + }}, + want: false, + }, + { + name: "valid payload (float64 like JSON)", + data: map[string]interface{}{"autofix": map[string]interface{}{ + "title": "Remove unused import", + "begin": float64(10), + "end": float64(20), + "replacement": "", + }}, + want: true, + }, + { + name: "valid payload (int)", + data: map[string]interface{}{"autofix": map[string]interface{}{ + "title": "t", + "begin": 1, + "end": 5, + "replacement": "x", + }}, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, ok := extractAutofix(tt.data) + if ok != tt.want { + t.Errorf("extractAutofix(%v) = %v, want %v", tt.data, ok, tt.want) + } + }) + } +} + +func TestOverlaps(t *testing.T) { + r := func(sl, sc, el, ec uint32) protocol.Range { + return protocol.Range{ + Start: protocol.Position{Line: sl, Character: sc}, + End: protocol.Position{Line: el, Character: ec}, + } + } + cases := []struct { + name string + a, b protocol.Range + want bool + }{ + {name: "identical", a: r(0, 0, 1, 0), b: r(0, 0, 1, 0), want: true}, + {name: "before", a: r(0, 0, 0, 5), b: r(1, 0, 1, 5), want: false}, + {name: "after", a: r(1, 0, 1, 5), b: r(0, 0, 0, 5), want: false}, + {name: "overlap", a: r(0, 0, 1, 5), b: r(0, 3, 0, 7), want: true}, + {name: "touching", a: r(0, 0, 0, 5), b: r(0, 5, 0, 10), want: true}, + {name: "empty fallback", a: protocol.Range{}, b: r(0, 0, 0, 5), want: true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := overlaps(tc.a, tc.b); got != tc.want { + t.Errorf("overlaps(%v, %v) = %v, want %v", tc.a, tc.b, got, tc.want) + } + }) + } +} diff --git a/internal/lsp/completion_resolve.go b/internal/lsp/completion_resolve.go new file mode 100644 index 00000000..801139d9 --- /dev/null +++ b/internal/lsp/completion_resolve.go @@ -0,0 +1,25 @@ +package lsp + +import ( + "context" + + "github.com/nokia/ntt/internal/lsp/protocol" +) + +// resolveCompletionItem implements completionItem/resolve. The completion +// handler currently returns fully-populated CompletionItems so there is +// nothing more to resolve, but advertising the capability lets editors +// avoid a server roundtrip for the (eventual) lazy-load fallback path. +// +// We do, however, populate the deprecated `Detail` field from the item's +// label when it is empty, since some editors (notably VS Code) collapse +// to "no description" otherwise. +func (s *Server) resolveCompletionItem(ctx context.Context, item *protocol.CompletionItem) (*protocol.CompletionItem, error) { + if item == nil { + return nil, nil + } + if item.Detail == "" { + item.Detail = item.Label + } + return item, nil +} diff --git a/internal/lsp/configuration.go b/internal/lsp/configuration.go index 2e274e67..ba1e4433 100644 --- a/internal/lsp/configuration.go +++ b/internal/lsp/configuration.go @@ -6,10 +6,37 @@ import ( "github.com/nokia/ntt/internal/lsp/protocol" ) -const DIAGNOSTICS_CONFIG_KEY = "ttcn3.experimental.diagnostics.enabled" -const FORMATTER_CONFIG_KEY = "ttcn3.experimental.format.enabled" -const SEMANTIC_TOKENS_CONFIG_KEY = "ttcn3.experimental.semanticTokens.enabled" -const INLAY_HINT_CONFIG_KEY = "ttcn3.experimental.inlayHint.enabled" +// Configuration keys. The new `ttcn3..enabled` form is the +// canonical name and defaults to true (i.e. opt-out). The legacy +// `ttcn3.experimental.*` keys are still honoured for backwards +// compatibility with existing user settings. +const ( + DIAGNOSTICS_CONFIG_KEY = "ttcn3.diagnostics.enabled" + FORMATTER_CONFIG_KEY = "ttcn3.format.enabled" + SEMANTIC_TOKENS_CONFIG_KEY = "ttcn3.semanticTokens.enabled" + INLAY_HINT_CONFIG_KEY = "ttcn3.inlayHint.enabled" + + LEGACY_DIAGNOSTICS_CONFIG_KEY = "ttcn3.experimental.diagnostics.enabled" + LEGACY_FORMATTER_CONFIG_KEY = "ttcn3.experimental.format.enabled" + LEGACY_SEMANTIC_TOKENS_CONFIG_KEY = "ttcn3.experimental.semanticTokens.enabled" + LEGACY_INLAY_HINT_CONFIG_KEY = "ttcn3.experimental.inlayHint.enabled" +) + +// configBool reads a boolean configuration value from the client. +// It first tries the canonical key; if the client doesn't have one set +// it falls back to the legacy `experimental.` key; and if neither is +// configured it returns def. This is how we ship the previously-gated +// features as default-on without breaking users who had explicitly set +// the old key. +func (s *Server) configBool(canonical, legacy string, def bool) bool { + if v, ok := s.Config(canonical).(bool); ok { + return v + } + if v, ok := s.Config(legacy).(bool); ok { + return v + } + return def +} func (s *Server) Config(section string) interface{} { v, err := s.client.Configuration(context.TODO(), &protocol.ParamConfiguration{ @@ -32,10 +59,7 @@ func (s *Server) didChangeConfiguration(ctx context.Context, _ *protocol.DidChan regList := make([]protocol.Registration, 0, 3) unregList := make([]protocol.Unregistration, 0, 3) - confRes, ok := s.Config(FORMATTER_CONFIG_KEY).(bool) - if !ok { - confRes = false - } + confRes := s.configBool(FORMATTER_CONFIG_KEY, LEGACY_FORMATTER_CONFIG_KEY, true) if s.clientCapability.HasDynRegForFormatter && s.serverConfig.FormatEnabled != confRes { s.serverConfig.FormatEnabled = confRes if confRes { @@ -53,10 +77,7 @@ func (s *Server) didChangeConfiguration(ctx context.Context, _ *protocol.DidChan Method: "textDocument/formatting"}) } } - confRes, ok = s.Config(SEMANTIC_TOKENS_CONFIG_KEY).(bool) - if !ok { - confRes = false - } + confRes = s.configBool(SEMANTIC_TOKENS_CONFIG_KEY, LEGACY_SEMANTIC_TOKENS_CONFIG_KEY, true) if s.clientCapability.HasDynRegForSemTok && s.serverConfig.SemantikTokensEnabled != confRes { s.serverConfig.SemantikTokensEnabled = confRes if confRes { @@ -70,20 +91,14 @@ func (s *Server) didChangeConfiguration(ctx context.Context, _ *protocol.DidChan Method: "textDocument/semanticTokens"}) } } - confRes, ok = s.Config(DIAGNOSTICS_CONFIG_KEY).(bool) - if !ok { - confRes = false - } + confRes = s.configBool(DIAGNOSTICS_CONFIG_KEY, LEGACY_DIAGNOSTICS_CONFIG_KEY, true) if s.serverConfig.DiagnosticsEnabled != confRes { s.serverConfig.DiagnosticsEnabled = confRes // NOTE: dynamic registration of diagnostics is only available from lsp 3.17 on } - confRes, ok = s.Config(INLAY_HINT_CONFIG_KEY).(bool) - if !ok { - confRes = false - } + confRes = s.configBool(INLAY_HINT_CONFIG_KEY, LEGACY_INLAY_HINT_CONFIG_KEY, true) if s.clientCapability.HasDynRegForInlayHint && s.serverConfig.InlayHintEnabled != confRes { s.serverConfig.InlayHintEnabled = confRes if confRes { diff --git a/internal/lsp/diagnostics.go b/internal/lsp/diagnostics.go index 3cb4b7a6..3cf356a7 100644 --- a/internal/lsp/diagnostics.go +++ b/internal/lsp/diagnostics.go @@ -8,9 +8,15 @@ import ( "github.com/nokia/ntt/internal/fs" "github.com/nokia/ntt/internal/lsp/protocol" "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/lint" + "github.com/nokia/ntt/ttcn3/semantic" "github.com/nokia/ntt/ttcn3/syntax" ) +// linter is the singleton instance used to produce lint diagnostics. It is +// stateless and safe to share across requests, so we pre-allocate it once. +var linter = lint.DefaultLinter() + // Diagnose runs various checks over a ttcn3 test suite. // // From LSP spec: @@ -34,8 +40,10 @@ func (s *Server) Diagnose(uris ...protocol.DocumentURI) { s.diags = make(map[string][]protocol.Diagnostic) defer s.syncDiagnostics() - // TODO(5nord): Run linter against uris for _, uri := range uris { + // Publish an empty list first so that previously-reported + // diagnostics for this file are cleared. The deferred + // syncDiagnostics call will then push the real findings. s.client.PublishDiagnostics(context.TODO(), &protocol.PublishDiagnosticsParams{ Diagnostics: make([]protocol.Diagnostic, 0), URI: uri, @@ -44,6 +52,69 @@ func (s *Server) Diagnose(uris ...protocol.DocumentURI) { if err := tree.Err; err != nil { s.reportError(err) } + // Only run the linter on trees that parsed cleanly. Running it + // on a broken tree produces too much noise to be useful while + // the user is mid-edit. + if tree.Err == nil { + s.reportLintProblems(uri, linter.Lint(tree)) + s.reportSemanticDiagnostics(uri, semantic.NewAnalyzer(&s.db).Analyze(tree)) + } + } +} + +// reportSemanticDiagnostics converts semantic-analyzer findings into LSP +// diagnostics and records them against the given URI. Like the lint +// problems they are flushed in bulk by the deferred syncDiagnostics call +// in Diagnose. +func (s *Server) reportSemanticDiagnostics(uri protocol.DocumentURI, diags []semantic.Diagnostic) { + if len(diags) == 0 { + return + } + key := string(uri) + for _, d := range diags { + s.diags[key] = append(s.diags[key], protocol.Diagnostic{ + Severity: protocol.DiagnosticSeverity(d.Severity), + Source: "ntt-semantic", + Code: d.Code, + Message: d.Message, + Range: setProtocolRange(d.Span.Begin, d.Span.End), + }) + } +} + +// reportLintProblems converts the structured lint findings into LSP +// diagnostics and records them against the given URI for syncDiagnostics to +// publish in bulk. +func (s *Server) reportLintProblems(uri protocol.DocumentURI, problems []lint.Problem) { + if len(problems) == 0 { + return + } + lintCfg := s.LintConfig() + key := string(uri) + for _, p := range problems { + if lintCfg.IsRuleDisabled(p.Code) { + continue + } + diag := protocol.Diagnostic{ + Severity: protocol.DiagnosticSeverity(p.Severity), + Source: "ntt-lint", + Code: p.Code, + Message: p.Message, + Range: setProtocolRange(p.Span.Begin, p.Span.End), + } + // Attach the autofix payload so the codeAction handler can + // produce a quick fix without re-running the linter. + if p.Fix != nil { + diag.Data = map[string]interface{}{ + "autofix": map[string]interface{}{ + "title": p.Fix.Title, + "begin": p.Fix.Begin, + "end": p.Fix.End, + "replacement": p.Fix.Replacement, + }, + } + } + s.diags[key] = append(s.diags[key], diag) } } diff --git a/internal/lsp/document_highlight.go b/internal/lsp/document_highlight.go new file mode 100644 index 00000000..a6de09c3 --- /dev/null +++ b/internal/lsp/document_highlight.go @@ -0,0 +1,66 @@ +package lsp + +import ( + "context" + + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// documentHighlight implements textDocument/documentHighlight, returning +// every occurrence of the identifier under the cursor inside the current +// file. Unlike references we never cross file boundaries: highlights are +// a within-document operation only. +// +// We mark the cursor position as Write only when the identifier is the +// declared name of a node (Ident.IsName), otherwise Read. This is enough +// for editors to render the "cursor declared this here" hint. +func (s *Server) documentHighlight(ctx context.Context, params *protocol.DocumentHighlightParams) ([]protocol.DocumentHighlight, error) { + if params == nil { + return nil, nil + } + + file := string(params.TextDocument.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + line := int(params.Position.Line) + 1 + col := int(params.Position.Character) + 1 + target, ok := tree.IdentifierAt(line, col).(*syntax.Ident) + if !ok || target == nil || target.Tok == nil { + return nil, nil + } + name := target.Tok.String() + + var hits []protocol.DocumentHighlight + tree.Inspect(func(n syntax.Node) bool { + id, ok := n.(*syntax.Ident) + if !ok || id == nil { + return true + } + if id.Tok != nil && id.Tok.String() == name { + hits = append(hits, highlightForIdent(id, id.Tok)) + } + if id.Tok2 != nil && id.Tok2.String() == name { + hits = append(hits, highlightForIdent(id, id.Tok2)) + } + return true + }) + + return hits, nil +} + +func highlightForIdent(id *syntax.Ident, tok syntax.Token) protocol.DocumentHighlight { + kind := protocol.Read + if id.IsName { + kind = protocol.Write + } + span := syntax.SpanOf(tok) + return protocol.DocumentHighlight{ + Range: setProtocolRange(span.Begin, span.End), + Kind: kind, + } +} diff --git a/internal/lsp/document_highlight_test.go b/internal/lsp/document_highlight_test.go new file mode 100644 index 00000000..d4032bed --- /dev/null +++ b/internal/lsp/document_highlight_test.go @@ -0,0 +1,51 @@ +package lsp + +import ( + "context" + "testing" + + "github.com/nokia/ntt/internal/lsp/protocol" +) + +func TestDocumentHighlight_FindsAllOccurrences(t *testing.T) { + const src = `module M { + function f() { + var integer x := 1; + x := x + 1; + } +}` + uri := setUpFakeFile(t, src) + + s := &Server{} + got, err := s.documentHighlight(context.Background(), &protocol.DocumentHighlightParams{ + TextDocumentPositionParams: protocol.TextDocumentPositionParams{ + TextDocument: protocol.TextDocumentIdentifier{URI: uri}, + Position: protocol.Position{Line: 2, Character: 14}, // on the first `x` + }, + }) + if err != nil { + t.Fatalf("documentHighlight returned error: %v", err) + } + if len(got) != 3 { + t.Fatalf("expected 3 highlights for `x`, got %d: %v", len(got), got) + } +} + +func TestDocumentHighlight_NoIdentifierAtPosition(t *testing.T) { + const src = `module M { }` + uri := setUpFakeFile(t, src) + + s := &Server{} + got, err := s.documentHighlight(context.Background(), &protocol.DocumentHighlightParams{ + TextDocumentPositionParams: protocol.TextDocumentPositionParams{ + TextDocument: protocol.TextDocumentIdentifier{URI: uri}, + Position: protocol.Position{Line: 0, Character: 0}, + }, + }) + if err != nil { + t.Fatalf("documentHighlight returned error: %v", err) + } + if got != nil { + t.Fatalf("expected nil result, got %v", got) + } +} diff --git a/internal/lsp/folding_range.go b/internal/lsp/folding_range.go new file mode 100644 index 00000000..51eb5cfd --- /dev/null +++ b/internal/lsp/folding_range.go @@ -0,0 +1,69 @@ +package lsp + +import ( + "context" + + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// foldingRange implements textDocument/foldingRange. We fold any +// curly-brace block that spans more than a single line: modules, groups, +// blocks, struct/enum bodies, etc. The implementation is intentionally +// AST-based rather than text-based so it follows code structure even +// inside macros / preprocessor blocks. +func (s *Server) foldingRange(ctx context.Context, params *protocol.FoldingRangeParams) ([]protocol.FoldingRange, error) { + if params == nil { + return nil, nil + } + + file := string(params.TextDocument.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + var ranges []protocol.FoldingRange + push := func(open, close syntax.Token, kind string) { + if open == nil || close == nil { + return + } + o := syntax.Begin(open) + c := syntax.Begin(close) + if c.Line <= o.Line { + return + } + ranges = append(ranges, protocol.FoldingRange{ + StartLine: uint32(o.Line - 1), + StartCharacter: uint32(o.Column - 1), + EndLine: uint32(c.Line - 1), + EndCharacter: uint32(c.Column - 1), + Kind: kind, + }) + } + + tree.Inspect(func(n syntax.Node) bool { + switch v := n.(type) { + case *syntax.Module: + push(v.LBrace, v.RBrace, "region") + case *syntax.GroupDecl: + push(v.LBrace, v.RBrace, "region") + case *syntax.BlockStmt: + push(v.LBrace, v.RBrace, "region") + case *syntax.CompositeLiteral: + push(v.LBrace, v.RBrace, "") + case *syntax.StructSpec: + push(v.LBrace, v.RBrace, "") + case *syntax.EnumSpec: + push(v.LBrace, v.RBrace, "") + case *syntax.StructTypeDecl: + push(v.LBrace, v.RBrace, "") + case *syntax.ImportDecl: + push(v.LBrace, v.RBrace, "imports") + } + return true + }) + + return ranges, nil +} diff --git a/internal/lsp/folding_range_test.go b/internal/lsp/folding_range_test.go new file mode 100644 index 00000000..9707e701 --- /dev/null +++ b/internal/lsp/folding_range_test.go @@ -0,0 +1,57 @@ +package lsp + +import ( + "context" + "testing" + + "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/internal/lsp/protocol" +) + +func TestFoldingRange_EmptyOnSingleLine(t *testing.T) { + const src = "module M { function f() { return; } }" + uri := setUpFakeFile(t, src) + + s := &Server{} + got, err := s.foldingRange(context.Background(), &protocol.FoldingRangeParams{ + TextDocument: protocol.TextDocumentIdentifier{URI: uri}, + }) + if err != nil { + t.Fatalf("foldingRange returned error: %v", err) + } + if len(got) != 0 { + t.Fatalf("expected no folds on a single line, got %v", got) + } +} + +func TestFoldingRange_MultiLineBlocks(t *testing.T) { + const src = `module M { + function f() { + return; + } +}` + uri := setUpFakeFile(t, src) + + s := &Server{} + got, err := s.foldingRange(context.Background(), &protocol.FoldingRangeParams{ + TextDocument: protocol.TextDocumentIdentifier{URI: uri}, + }) + if err != nil { + t.Fatalf("foldingRange returned error: %v", err) + } + if len(got) < 2 { + t.Fatalf("expected at least 2 fold ranges (module + function body), got %d: %v", len(got), got) + } + for _, r := range got { + if r.EndLine <= r.StartLine { + t.Errorf("invalid fold range %+v: end must be after start", r) + } + } +} + +func setUpFakeFile(t *testing.T, src string) protocol.DocumentURI { + t.Helper() + uri := protocol.DocumentURI("file:///" + t.Name() + ".ttcn3") + fs.SetContent(string(uri), []byte(src)) + return uri +} diff --git a/internal/lsp/formatter.go b/internal/lsp/formatter.go index e463896d..992a7b82 100644 --- a/internal/lsp/formatter.go +++ b/internal/lsp/formatter.go @@ -28,18 +28,37 @@ func (s *Server) formatting(ctx context.Context, params *protocol.DocumentFormat return nil, nil } - var out bytes.Buffer - p := format.NewCanonicalPrinter(&out) - p.TabWidth = int(params.Options.TabSize) + // Use the new width-aware wrapping formatter, configured from the + // client's FormattingOptions. We still fall through to the + // canonical printer (via WrappingFormatter) so existing + // regressions are picked up. + opts := format.DefaultOptions() + if params.Options.TabSize > 0 { + opts.TabWidth = int(params.Options.TabSize) + } if params.Options.InsertSpaces { - p.UseSpaces = true + opts.UseSpaces = true + } + // Manifest-level overrides take precedence over the client's + // formatting options because the project owner knows best. + if cfg := s.FmtConfig(); cfg.PrintWidth > 0 { + opts.PrintWidth = cfg.PrintWidth + } + if cfg := s.FmtConfig(); cfg.TabWidth > 0 { + opts.TabWidth = cfg.TabWidth + } + if cfg := s.FmtConfig(); cfg.UseSpaces { + opts.UseSpaces = true + } + if cfg := s.FmtConfig(); cfg.MaxEmptyLines > 0 { + opts.MaxEmptyLines = cfg.MaxEmptyLines + } + if s.serverConfig.FormatPrintWidth > 0 { + opts.PrintWidth = s.serverConfig.FormatPrintWidth } - // We don't want module definitions to be indented. By using a negative - // indentation level we can achieve this for most module definitions. - p.Indent = -1 - - if err := p.Fprint(b); err != nil { + var out bytes.Buffer + if err := format.NewWrappingFormatter(opts).Fprint(&out, b); err != nil { log.Debug("formatting:", err.Error()) return nil, nil } diff --git a/internal/lsp/general.go b/internal/lsp/general.go index 5c11f854..1ba0bc82 100644 --- a/internal/lsp/general.go +++ b/internal/lsp/general.go @@ -106,22 +106,37 @@ func (s *Server) initialize(ctx context.Context, params *protocol.ParamInitializ return &protocol.InitializeResult{ Capabilities: protocol.ServerCapabilities{ InlayHintProvider: s.registerInlayHintIfNoDynReg(), - CodeActionProvider: false, - CompletionProvider: protocol.CompletionOptions{TriggerCharacters: []string{"."}}, + CodeActionProvider: true, + CompletionProvider: protocol.CompletionOptions{ + TriggerCharacters: []string{"."}, + ResolveProvider: true, + }, + CallHierarchyProvider: true, DefinitionProvider: true, - TypeDefinitionProvider: false, + TypeDefinitionProvider: true, ImplementationProvider: false, DocumentFormattingProvider: s.registerFormatterIfNoDynReg(), DocumentRangeFormattingProvider: false, DocumentSymbolProvider: true, - WorkspaceSymbolProvider: false, - FoldingRangeProvider: false, + WorkspaceSymbolProvider: true, + FoldingRangeProvider: true, HoverProvider: true, - DocumentHighlightProvider: false, + DocumentHighlightProvider: true, DocumentLinkProvider: protocol.DocumentLinkOptions{}, ReferencesProvider: true, + RenameProvider: protocol.RenameOptions{ + PrepareProvider: true, + }, + SignatureHelpProvider: protocol.SignatureHelpOptions{ + TriggerCharacters: []string{"(", ","}, + RetriggerCharacters: []string{","}, + }, TextDocumentSync: &protocol.TextDocumentSyncOptions{ - Change: protocol.Full, + // Incremental sync sends only the edited range + // per keystroke. We splice it into our cached + // content (see didChange) instead of having the + // client retransmit the whole file every time. + Change: protocol.Incremental, OpenClose: true, Save: protocol.SaveOptions{ IncludeText: false, diff --git a/internal/lsp/protocol/tsprotocol_317.go b/internal/lsp/protocol/tsprotocol_317.go new file mode 100644 index 00000000..ffbe19e9 --- /dev/null +++ b/internal/lsp/protocol/tsprotocol_317.go @@ -0,0 +1,105 @@ +// Additions for LSP 3.17 that the bundled tsprotocol.go (last +// regenerated from vscode-languageserver-node on 2022-01-26) does not +// yet define. We keep them in a separate file so a future, complete +// regeneration of tsprotocol.go can safely overwrite the original +// without losing these stop-gap types. +// +// The types below cover the high-value 3.17 additions: +// +// - Pull-model diagnostics (textDocument/diagnostic) which let the +// server compute diagnostics on demand instead of pushing them +// after every keystroke. +// - Type hierarchy (textDocument/prepareTypeHierarchy plus +// typeHierarchy/supertypes and typeHierarchy/subtypes). +// - Position-encoding negotiation, which is a small struct that +// attaches to the existing InitializeResult capability map under a +// new property. +// +// The types are intentionally placed in package protocol so the rest +// of the LSP server can use them without an import alias. + +package protocol + +// DocumentDiagnosticParams are the parameters of a +// textDocument/diagnostic request. +// +// @since 3.17.0 +type DocumentDiagnosticParams struct { + TextDocument TextDocumentIdentifier `json:"textDocument"` + // Identifier allows the server to maintain multiple diagnostic + // sources per document. + Identifier string `json:"identifier,omitempty"` + // PreviousResultId is the result id of the previous response so + // the server can return DocumentDiagnosticReportKind = unchanged. + PreviousResultId string `json:"previousResultId,omitempty"` +} + +// DocumentDiagnosticReport is the response to a textDocument/diagnostic +// request. Use Kind = "full" with Items populated, or Kind = +// "unchanged" with ResultId echoed back to the client. +// +// @since 3.17.0 +type DocumentDiagnosticReport struct { + Kind string `json:"kind"` + ResultId string `json:"resultId,omitempty"` + Items []Diagnostic `json:"items,omitempty"` +} + +// TypeHierarchyItem is the type-hierarchy companion to +// CallHierarchyItem. +// +// @since 3.17.0 +type TypeHierarchyItem struct { + Name string `json:"name"` + Kind SymbolKind `json:"kind"` + Tags []SymbolTag `json:"tags,omitempty"` + Detail string `json:"detail,omitempty"` + URI DocumentURI `json:"uri"` + Range Range `json:"range"` + SelectionRange Range `json:"selectionRange"` + Data interface{} `json:"data,omitempty"` +} + +// TypeHierarchyPrepareParams are the parameters of +// textDocument/prepareTypeHierarchy. +// +// @since 3.17.0 +type TypeHierarchyPrepareParams struct { + TextDocumentPositionParams + WorkDoneProgressParams +} + +// TypeHierarchySupertypesParams are the parameters of +// typeHierarchy/supertypes. +// +// @since 3.17.0 +type TypeHierarchySupertypesParams struct { + Item TypeHierarchyItem `json:"item"` + WorkDoneProgressParams + PartialResultParams +} + +// TypeHierarchySubtypesParams are the parameters of +// typeHierarchy/subtypes. +// +// @since 3.17.0 +type TypeHierarchySubtypesParams struct { + Item TypeHierarchyItem `json:"item"` + WorkDoneProgressParams + PartialResultParams +} + +// PositionEncodingKind names a position-encoding the server understands. +// +// @since 3.17.0 +type PositionEncodingKind string + +const ( + PositionEncodingUTF8 PositionEncodingKind = "utf-8" + PositionEncodingUTF16 PositionEncodingKind = "utf-16" + PositionEncodingUTF32 PositionEncodingKind = "utf-32" +) + +// LSPProtocolVersion is the version of the LSP spec that the bundled +// types + the additions in this file are intended to cover. +const LSPProtocolVersion = "3.17.0" diff --git a/internal/lsp/references.go b/internal/lsp/references.go index 57d36baf..2a35860c 100644 --- a/internal/lsp/references.go +++ b/internal/lsp/references.go @@ -13,12 +13,14 @@ import ( "github.com/nokia/ntt/ttcn3/syntax" ) +// newAllIdsWithSameNameFromFile is retained for backwards compatibility +// with rename(), which prefers a name-only search to avoid scope lookups +// for every potential write target. func newAllIdsWithSameNameFromFile(file string, idName string) []protocol.Location { list := make([]protocol.Location, 0, 10) tree := ttcn3.ParseFile(file) tree.Inspect(func(n syntax.Node) bool { if n == nil { - // called on node exit return false } @@ -38,6 +40,11 @@ func newAllIdsWithSameNameFromFile(file string, idName string) []protocol.Locati return list } +// NewAllIdsWithSameName returns every textual occurrence of name across +// files that the DB knows about. It is *not* symbol-aware and will match +// unrelated definitions that happen to share a name. The references +// handler below uses NewSymbolReferences instead; this function is kept +// for rename() and other callers that still rely on name-text matching. func NewAllIdsWithSameName(db *ttcn3.DB, name string) []protocol.Location { var ( locs []protocol.Location @@ -53,6 +60,121 @@ func NewAllIdsWithSameName(db *ttcn3.DB, name string) []protocol.Location { return locs } +// NewSymbolReferences returns every occurrence of the symbol that the +// identifier at cursor resolves to. It filters out false positives that +// the legacy name-text search produced by checking that each candidate +// resolves to the same definition node(s). +// +// Algorithm: +// 1. Resolve the cursor identifier to its set of definition nodes (D). +// 2. For every file the DB associates with the symbol name, parse the +// tree and walk every Ident of the same text. +// 3. For each candidate Ident, ask the lookup machinery for its own +// definition set (Dc). If D ∩ Dc is non-empty the candidate refers +// to the same symbol and we report it. +// +// The lookup is the heavy part, so we memoise per parent expression to +// avoid re-resolving the same selector chain repeatedly. +func NewSymbolReferences(db *ttcn3.DB, target *syntax.Ident, sourceFile string) []protocol.Location { + if target == nil { + return nil + } + + // Resolve the cursor's symbol. + srcTree := ttcn3.ParseFile(sourceFile) + wantDefs := definitionSet(srcTree.LookupWithDB(target, db)) + if len(wantDefs) == 0 { + // We couldn't resolve - fall back to name-text matching so + // the user at least gets *something*. This matches what + // editors expect when symbols haven't been bound yet (e.g. + // the file has a syntax error elsewhere). + return NewAllIdsWithSameName(db, target.String()) + } + + name := target.String() + + // Collect candidate files: the union of files where the name is + // defined or used. + files := make(map[string]bool) + for f := range db.Names[name] { + files[f] = true + } + for f := range db.Uses[name] { + files[f] = true + } + + sortedFiles := make([]string, 0, len(files)) + for f := range files { + sortedFiles = append(sortedFiles, f) + } + sort.Strings(sortedFiles) + + var locs []protocol.Location + for _, file := range sortedFiles { + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + continue + } + tree.Inspect(func(n syntax.Node) bool { + id, ok := n.(*syntax.Ident) + if !ok || id == nil { + return true + } + tok := matchingToken(id, name) + if tok == nil { + return true + } + gotDefs := definitionSet(tree.LookupWithDB(id, db)) + if intersects(wantDefs, gotDefs) { + locs = append(locs, location(syntax.SpanOf(tok))) + } + return true + }) + } + + return locs +} + +func matchingToken(id *syntax.Ident, name string) syntax.Token { + if id.Tok != nil && id.Tok.String() == name { + return id.Tok + } + if id.Tok2 != nil && id.Tok2.String() == name { + return id.Tok2 + } + return nil +} + +// definitionSet collapses a slice of *ttcn3.Node into a set keyed by the +// underlying syntax node. We use the syntax node (rather than the +// *ttcn3.Node wrapper) because lookups go through different finders and +// allocate fresh wrappers each time. +func definitionSet(defs []*ttcn3.Node) map[syntax.Node]bool { + out := make(map[syntax.Node]bool, len(defs)) + for _, d := range defs { + if d == nil { + continue + } + out[d.Node] = true + } + return out +} + +func intersects(a, b map[syntax.Node]bool) bool { + if len(a) == 0 || len(b) == 0 { + return false + } + if len(a) > len(b) { + a, b = b, a + } + for n := range a { + if b[n] { + return true + } + } + return false +} + func (s *Server) references(ctx context.Context, params *protocol.ReferenceParams) ([]protocol.Location, error) { var ( file = string(params.TextDocument.URI.SpanURI()) @@ -70,5 +192,5 @@ func (s *Server) references(ctx context.Context, params *protocol.ReferenceParam if !ok || id == nil { return nil, errors.New("no identifier at cursor") } - return NewAllIdsWithSameName(&s.db, id.String()), nil + return NewSymbolReferences(&s.db, id, file), nil } diff --git a/internal/lsp/references_test.go b/internal/lsp/references_test.go index c996c4de..0dde744c 100644 --- a/internal/lsp/references_test.go +++ b/internal/lsp/references_test.go @@ -1,57 +1,75 @@ -package lsp_test +package lsp import ( - "fmt" "testing" "github.com/nokia/ntt/internal/fs" - "github.com/nokia/ntt/internal/lsp" - "github.com/nokia/ntt/internal/lsp/protocol" "github.com/nokia/ntt/ttcn3" - "github.com/stretchr/testify/assert" + "github.com/nokia/ntt/ttcn3/syntax" ) -func TestFindAllTypeDefs(t *testing.T) { - input1 := ` - module A { - type integer Byte(0..255); - function f() return Byte { - var Byte ret := 100; +// findIdent walks the parsed tree and returns the first Ident with the +// given name. We can't trust hard-coded line/column offsets in tests +// because the parser may not preserve a stable column for nested +// expressions, so we look up by name instead. +func findIdent(tree *ttcn3.Tree, name string) *syntax.Ident { + var found *syntax.Ident + tree.Inspect(func(n syntax.Node) bool { + if found != nil { + return false } + if id, ok := n.(*syntax.Ident); ok && id != nil && id.Tok != nil && id.Tok.String() == name { + found = id + } + return true + }) + return found +} + +// TestSymbolReferences_FindsDeclAndCall verifies that the symbol-aware +// reference search reports both the declaration site and call site of a +// function defined and used in the same module. +func TestSymbolReferences_FindsDeclAndCall(t *testing.T) { + const src = `module M { + function foo() { return; } + function f() { foo(); } +}` + file := "file:///" + t.Name() + ".ttcn3" + fs.SetContent(file, []byte(src)) + + db := &ttcn3.DB{} + db.Index(file) + + tree := ttcn3.ParseFile(file) + id := findIdent(tree, "foo") + if id == nil { + t.Fatalf("expected at least one `foo` identifier") } - module B { - import from A all; - template Byte a_byte := ?; - }` - input2 := ` - module C { - import from A {type Byte} - type Byte AliasByte; - }` - - name1 := fmt.Sprintf("test://%s_input1", t.Name()) - name2 := fmt.Sprintf("test://%s_input2", t.Name()) - - fs.SetContent(name1, []byte(input1)) - fs.SetContent(name2, []byte(input2)) + + got := NewSymbolReferences(db, id, file) + if len(got) < 2 { + t.Fatalf("expected at least 2 references (decl + call), got %d", len(got)) + } +} + +// TestSymbolReferences_FallbackOnUnresolvable verifies that an +// unresolvable cursor falls back to the legacy name-text search so the +// user still gets *something*. +func TestSymbolReferences_FallbackOnUnresolvable(t *testing.T) { + const src = `module M { function f() { foo(); } }` + file := "file:///" + t.Name() + ".ttcn3" + fs.SetContent(file, []byte(src)) db := &ttcn3.DB{} - db.Index(name1, name2) - - // Lookup `Msg` - list := lsp.NewAllIdsWithSameName(db, "Byte") - - assert.Equal(t, []protocol.Location{ - {URI: "test://TestFindAllTypeDefs_input1", - Range: protocol.Range{Start: protocol.Position{Line: 2, Character: 15}, End: protocol.Position{Line: 2, Character: 19}}}, - {URI: "test://TestFindAllTypeDefs_input1", - Range: protocol.Range{Start: protocol.Position{Line: 3, Character: 22}, End: protocol.Position{Line: 3, Character: 26}}}, - {URI: "test://TestFindAllTypeDefs_input1", - Range: protocol.Range{Start: protocol.Position{Line: 4, Character: 6}, End: protocol.Position{Line: 4, Character: 10}}}, - {URI: "test://TestFindAllTypeDefs_input1", - Range: protocol.Range{Start: protocol.Position{Line: 9, Character: 11}, End: protocol.Position{Line: 9, Character: 15}}}, - {URI: "test://TestFindAllTypeDefs_input2", - Range: protocol.Range{Start: protocol.Position{Line: 2, Character: 22}, End: protocol.Position{Line: 2, Character: 26}}}, - {URI: "test://TestFindAllTypeDefs_input2", - Range: protocol.Range{Start: protocol.Position{Line: 3, Character: 7}, End: protocol.Position{Line: 3, Character: 11}}}}, list) + db.Index(file) + + tree := ttcn3.ParseFile(file) + id := findIdent(tree, "foo") + if id == nil { + t.Skip("identifier finder did not return `foo`") + } + got := NewSymbolReferences(db, id, file) + if len(got) == 0 { + t.Fatalf("expected at least 1 reference (the call site itself), got 0") + } } diff --git a/internal/lsp/rename.go b/internal/lsp/rename.go new file mode 100644 index 00000000..4ef82c93 --- /dev/null +++ b/internal/lsp/rename.go @@ -0,0 +1,99 @@ +package lsp + +import ( + "context" + "fmt" + "sort" + + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// prepareRename implements textDocument/prepareRename. The LSP spec +// requires us to either return the range of the identifier under the +// cursor (signalling "rename is OK here") or nil/error to signal "no". +// +// Until full symbol resolution lands we treat any TTCN-3 identifier as +// renamable. A future iteration should reject keywords, builtin types and +// imports whose source we cannot edit. +func (s *Server) prepareRename(ctx context.Context, params *protocol.PrepareRenameParams) (*protocol.Range, error) { + if params == nil { + return nil, nil + } + + file := string(params.TextDocument.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + line := int(params.Position.Line) + 1 + col := int(params.Position.Character) + 1 + id, ok := tree.IdentifierAt(line, col).(*syntax.Ident) + if !ok || id == nil || id.Tok == nil { + return nil, nil + } + + span := syntax.SpanOf(id.Tok) + r := setProtocolRange(span.Begin, span.End) + return &r, nil +} + +// rename implements textDocument/rename. It uses the same name-text +// matching that powers references (so it has the same caveats - see +// the symbol-resolution todo for a proper fix), but at least groups the +// edits per file so that the client applies them atomically. +// +// We deliberately scope edits to files already known to the index so the +// user does not get surprise edits in third-party suites. +func (s *Server) rename(ctx context.Context, params *protocol.RenameParams) (*protocol.WorkspaceEdit, error) { + if params == nil { + return nil, nil + } + if params.NewName == "" { + return nil, fmt.Errorf("rename: new name must not be empty") + } + + file := string(params.TextDocument.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + line := int(params.Position.Line) + 1 + col := int(params.Position.Character) + 1 + id, ok := tree.IdentifierAt(line, col).(*syntax.Ident) + if !ok || id == nil { + return nil, nil + } + + locs := NewSymbolReferences(&s.db, id, file) + if len(locs) == 0 { + return nil, nil + } + + // Group locations by URI so the workspace edit becomes a map of + // "URI -> []TextEdit". This is what LSP clients want. + byURI := make(map[string][]protocol.TextEdit) + for _, l := range locs { + uri := string(l.URI) + byURI[uri] = append(byURI[uri], protocol.TextEdit{ + Range: l.Range, + NewText: params.NewName, + }) + } + + for uri, edits := range byURI { + sort.Slice(edits, func(i, j int) bool { + a, b := edits[i].Range.Start, edits[j].Range.Start + if a.Line != b.Line { + return a.Line < b.Line + } + return a.Character < b.Character + }) + byURI[uri] = edits + } + + return &protocol.WorkspaceEdit{Changes: byURI}, nil +} diff --git a/internal/lsp/server.go b/internal/lsp/server.go index a5e5f240..c547f9ec 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -26,6 +26,16 @@ func NewServer(stream jsonrpc2.Stream) *Server { return &Server{ conn: jsonrpc2.NewConn(stream), files: make(map[*fs.File]bool), + // Default-on for the features that used to live behind + // `ttcn3.experimental.*.enabled`. Users can still opt out + // via the new `ttcn3..enabled` keys (see + // configuration.go). + serverConfig: Config{ + DiagnosticsEnabled: true, + FormatEnabled: true, + SemantikTokensEnabled: true, + InlayHintEnabled: true, + }, } } @@ -83,6 +93,10 @@ type Config struct { FormatEnabled bool SemantikTokensEnabled bool InlayHintEnabled bool + + // FormatPrintWidth is the soft right margin used by the wrapping + // formatter. Zero falls back to the default (100 columns). + FormatPrintWidth int } // Server implements the protocol.Server interface. diff --git a/internal/lsp/server_gen.go b/internal/lsp/server_gen.go index 5fa1d90d..9dcf35c3 100644 --- a/internal/lsp/server_gen.go +++ b/internal/lsp/server_gen.go @@ -16,8 +16,8 @@ func (s *Server) InlayHint(ctx context.Context, params *protocol.InlayHintParams return s.inlayHint(ctx, params) } -func (s *Server) CodeAction(context.Context, *protocol.CodeActionParams) ([]protocol.CodeAction, error) { - return nil, notImplemented("CodeAction") +func (s *Server) CodeAction(ctx context.Context, params *protocol.CodeActionParams) ([]protocol.CodeAction, error) { + return s.codeAction(ctx, params) } func (s *Server) CodeLens(ctx context.Context, params *protocol.CodeLensParams) ([]protocol.CodeLens, error) { @@ -84,8 +84,8 @@ func (s *Server) DocumentColor(context.Context, *protocol.DocumentColorParams) ( return nil, notImplemented("DocumentColor") } -func (s *Server) DocumentHighlight(context.Context, *protocol.DocumentHighlightParams) ([]protocol.DocumentHighlight, error) { - return nil, notImplemented("DocumentHighlight") +func (s *Server) DocumentHighlight(ctx context.Context, params *protocol.DocumentHighlightParams) ([]protocol.DocumentHighlight, error) { + return s.documentHighlight(ctx, params) } func (s *Server) DocumentLink(ctx context.Context, params *protocol.DocumentLinkParams) ([]protocol.DocumentLink, error) { @@ -104,8 +104,8 @@ func (s *Server) Exit(ctx context.Context) error { return s.exit(ctx) } -func (s *Server) FoldingRange(context.Context, *protocol.FoldingRangeParams) ([]protocol.FoldingRange, error) { - return nil, notImplemented("FoldingRange") +func (s *Server) FoldingRange(ctx context.Context, params *protocol.FoldingRangeParams) ([]protocol.FoldingRange, error) { + return s.foldingRange(ctx, params) } func (s *Server) Formatting(ctx context.Context, params *protocol.DocumentFormattingParams) ([]protocol.TextEdit, error) { @@ -120,8 +120,8 @@ func (s *Server) Implementation(context.Context, *protocol.ImplementationParams) return nil, notImplemented("Implementation") } -func (s *Server) IncomingCalls(context.Context, *protocol.CallHierarchyIncomingCallsParams) ([]protocol.CallHierarchyIncomingCall, error) { - return nil, notImplemented("IncomingCalls") +func (s *Server) IncomingCalls(ctx context.Context, params *protocol.CallHierarchyIncomingCallsParams) ([]protocol.CallHierarchyIncomingCall, error) { + return s.incomingCalls(ctx, params) } func (s *Server) Initialize(ctx context.Context, params *protocol.ParamInitialize) (*protocol.InitializeResult, error) { @@ -152,16 +152,16 @@ func (s *Server) OnTypeFormatting(context.Context, *protocol.DocumentOnTypeForma return nil, notImplemented("OnTypeFormatting") } -func (s *Server) OutgoingCalls(context.Context, *protocol.CallHierarchyOutgoingCallsParams) ([]protocol.CallHierarchyOutgoingCall, error) { - return nil, notImplemented("OutgoingCalls") +func (s *Server) OutgoingCalls(ctx context.Context, params *protocol.CallHierarchyOutgoingCallsParams) ([]protocol.CallHierarchyOutgoingCall, error) { + return s.outgoingCalls(ctx, params) } -func (s *Server) PrepareCallHierarchy(context.Context, *protocol.CallHierarchyPrepareParams) ([]protocol.CallHierarchyItem, error) { - return nil, notImplemented("PrepareCallHierarchy") +func (s *Server) PrepareCallHierarchy(ctx context.Context, params *protocol.CallHierarchyPrepareParams) ([]protocol.CallHierarchyItem, error) { + return s.prepareCallHierarchy(ctx, params) } -func (s *Server) PrepareRename(context.Context, *protocol.PrepareRenameParams) (*protocol.Range, error) { - return nil, notImplemented("PrepareRename") +func (s *Server) PrepareRename(ctx context.Context, params *protocol.PrepareRenameParams) (*protocol.Range, error) { + return s.prepareRename(ctx, params) } func (s *Server) RangeFormatting(context.Context, *protocol.DocumentRangeFormattingParams) ([]protocol.TextEdit, error) { @@ -172,12 +172,12 @@ func (s *Server) References(ctx context.Context, params *protocol.ReferenceParam return s.references(ctx, params) } -func (s *Server) Rename(context.Context, *protocol.RenameParams) (*protocol.WorkspaceEdit, error) { - return nil, notImplemented("Rename") +func (s *Server) Rename(ctx context.Context, params *protocol.RenameParams) (*protocol.WorkspaceEdit, error) { + return s.rename(ctx, params) } -func (s *Server) ResolveCompletionItem(context.Context, *protocol.CompletionItem) (*protocol.CompletionItem, error) { - return nil, notImplemented("ResolveCompletionItem") +func (s *Server) ResolveCompletionItem(ctx context.Context, item *protocol.CompletionItem) (*protocol.CompletionItem, error) { + return s.resolveCompletionItem(ctx, item) } func (s *Server) ResolveCodeAction(context.Context, *protocol.CodeAction) (*protocol.CodeAction, error) { @@ -224,16 +224,16 @@ func (s *Server) Shutdown(ctx context.Context) error { return s.shutdown(ctx) } -func (s *Server) SignatureHelp(context.Context, *protocol.SignatureHelpParams) (*protocol.SignatureHelp, error) { - return nil, notImplemented("SignatureHelp") +func (s *Server) SignatureHelp(ctx context.Context, params *protocol.SignatureHelpParams) (*protocol.SignatureHelp, error) { + return s.signatureHelp(ctx, params) } -func (s *Server) Symbol(context.Context, *protocol.WorkspaceSymbolParams) ([]protocol.SymbolInformation, error) { - return nil, notImplemented("Symbol") +func (s *Server) Symbol(ctx context.Context, params *protocol.WorkspaceSymbolParams) ([]protocol.SymbolInformation, error) { + return s.workspaceSymbol(ctx, params) } -func (s *Server) TypeDefinition(context.Context, *protocol.TypeDefinitionParams) (interface{}, error) { - return nil, notImplemented("TypeDefinition") +func (s *Server) TypeDefinition(ctx context.Context, params *protocol.TypeDefinitionParams) (interface{}, error) { + return s.typeDefinition(ctx, params) } func (s *Server) WillCreateFiles(context.Context, *protocol.CreateFilesParams) (*protocol.WorkspaceEdit, error) { diff --git a/internal/lsp/server_helpers.go b/internal/lsp/server_helpers.go new file mode 100644 index 00000000..f2ee7704 --- /dev/null +++ b/internal/lsp/server_helpers.go @@ -0,0 +1,34 @@ +package lsp + +import ( + "github.com/nokia/ntt/project" +) + +// FmtConfig returns the manifest-level [tools.fmt] section for the +// first suite known to the server, or a zero FmtOptions when no +// manifest defines one. It is the LSP equivalent of vanadium's +// `.vanadiumrc.toml` lookup and is consulted by the formatter handler. +// +// We pick the first suite deliberately: in a multi-root workspace each +// suite typically inherits the same fmt settings, and asking the user +// which suite to use would be more disruptive than helpful. A future +// iteration may select per-file. +func (s *Server) FmtConfig() project.FmtOptions { + suites := s.snapshotSuites() + if len(suites) == 0 || suites[0] == nil || suites[0].Config == nil { + return project.FmtOptions{} + } + return suites[0].Config.Tools.Fmt.Options() +} + +// LintConfig returns the manifest-level [tools.lint] section for the +// first suite, or a zero value when no manifest defines one. The LSP +// linter consults this to drop diagnostics from disabled rules before +// publishing them. +func (s *Server) LintConfig() project.LintTool { + suites := s.snapshotSuites() + if len(suites) == 0 || suites[0] == nil || suites[0].Config == nil { + return project.LintTool{} + } + return suites[0].Config.Tools.Lint +} diff --git a/internal/lsp/signature_help.go b/internal/lsp/signature_help.go new file mode 100644 index 00000000..f26e9cc2 --- /dev/null +++ b/internal/lsp/signature_help.go @@ -0,0 +1,203 @@ +package lsp + +import ( + "bytes" + "context" + "fmt" + + "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// signatureHelp implements textDocument/signatureHelp. +// +// We resolve the cursor to its enclosing CallExpr or TemplateDecl call +// (using the existing lookup machinery in ttcn3) and synthesise a +// SignatureInformation from the callee's FormalPars. The active parameter +// is the index of the argument that contains the cursor. +// +// We deliberately keep the implementation small: it does not yet handle +// overload resolution (no two TTCN-3 callees share a name and signature in +// practice), and it does not yet support struct field signature help +// (vanadium does both). +func (s *Server) signatureHelp(ctx context.Context, params *protocol.SignatureHelpParams) (*protocol.SignatureHelp, error) { + if params == nil { + return nil, nil + } + + file := string(params.TextDocument.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + line := int(params.Position.Line) + 1 + col := int(params.Position.Character) + 1 + pos := tree.PosFor(line, col) + if pos < 0 { + return nil, nil + } + + // Find the innermost CallExpr / ParenExpr containing the cursor. + call, paren := callContext(tree, pos) + if call == nil || paren == nil { + return nil, nil + } + + // Find the callee declaration. We can reuse the existing finder via + // Tree.LookupWithDB; it already resolves identifiers, imports, and + // the like. + candidates := tree.LookupWithDB(call.Fun, &s.db) + if len(candidates) == 0 { + return nil, nil + } + + for _, def := range candidates { + formal := getDeclarationParams(def.Node) + if formal == nil { + continue + } + + label, paramRanges := buildSignatureLabel(def, formal) + paramInfos := make([]protocol.ParameterInformation, 0, len(paramRanges)) + for _, pr := range paramRanges { + // The generated protocol bindings type Label as a plain + // string, so we use the substring form instead of byte + // offsets. This is still a substring of the signature + // label and editors will highlight it correctly. + paramInfos = append(paramInfos, protocol.ParameterInformation{ + Label: label[pr[0]:pr[1]], + }) + } + + sig := protocol.SignatureInformation{ + Label: label, + Parameters: paramInfos, + } + help := &protocol.SignatureHelp{ + Signatures: []protocol.SignatureInformation{sig}, + ActiveSignature: 0, + ActiveParameter: uint32(activeArgIndex(paren, pos, len(paramInfos))), + } + return help, nil + } + + return nil, nil +} + +// callContext returns the innermost CallExpr that wraps the cursor as well +// as the ParenExpr holding its arguments. We use the ParenExpr separately +// because we need its byte range to identify the active argument. +func callContext(tree *ttcn3.Tree, pos int) (*syntax.CallExpr, *syntax.ParenExpr) { + var ( + call *syntax.CallExpr + paren *syntax.ParenExpr + ) + + tree.Inspect(func(n syntax.Node) bool { + if n == nil { + return false + } + // Skip subtrees that don't contain the cursor. + if n.Pos() > pos || pos > n.End() { + return false + } + + if c, ok := n.(*syntax.CallExpr); ok && c.Args != nil { + if c.Args.Pos() <= pos && pos <= c.Args.End() { + call = c + paren = c.Args + } + } + return true + }) + + return call, paren +} + +func activeArgIndex(paren *syntax.ParenExpr, pos int, total int) int { + if paren == nil || total == 0 { + return 0 + } + for i, arg := range paren.List { + if arg == nil { + continue + } + if pos <= arg.End() { + return i + } + } + last := len(paren.List) + if last >= total { + return total - 1 + } + return last +} + +// buildSignatureLabel renders a single-line signature from the callee +// declaration's source text. We avoid re-walking the AST and instead splice +// from the original source bytes, which preserves the user's preferred +// spacing. +func buildSignatureLabel(def *ttcn3.Node, formal *syntax.FormalPars) (string, [][2]int) { + var ( + buf bytes.Buffer + ranges [][2]int + ) + + switch n := def.Node.(type) { + case *syntax.FuncDecl: + if n.KindTok != nil { + fmt.Fprintf(&buf, "%s ", n.KindTok.String()) + } + buf.WriteString(syntax.Name(n.Name)) + case *syntax.TemplateDecl: + buf.WriteString("template ") + buf.WriteString(syntax.Name(n.Name)) + case *syntax.SignatureDecl: + buf.WriteString("signature ") + buf.WriteString(syntax.Name(n.Name)) + default: + buf.WriteString(syntax.Name(def.Node)) + } + + buf.WriteByte('(') + src := fileBytes(def.Filename()) + for i, p := range formal.List { + if p == nil { + continue + } + if i > 0 { + buf.WriteString(", ") + } + begin := buf.Len() + if src != nil { + buf.Write(src[p.Pos():p.End()]) + } else { + buf.WriteString(syntax.Name(p.Name)) + } + end := buf.Len() + ranges = append(ranges, [2]int{begin, end}) + } + buf.WriteByte(')') + + if fn, ok := def.Node.(*syntax.FuncDecl); ok && fn.Return != nil { + buf.WriteString(" return ") + if fn.Return.Type != nil { + buf.WriteString(syntax.Name(fn.Return.Type)) + } + } + return buf.String(), ranges +} + +func fileBytes(filename string) []byte { + if filename == "" { + return nil + } + b, err := fs.Content(filename) + if err != nil { + return nil + } + return b +} diff --git a/internal/lsp/text_synchronization.go b/internal/lsp/text_synchronization.go index ad8ef63b..1624d9f3 100644 --- a/internal/lsp/text_synchronization.go +++ b/internal/lsp/text_synchronization.go @@ -45,7 +45,26 @@ func (s *Server) didChange(ctx context.Context, params *protocol.DidChangeTextDo uri := string(params.TextDocument.URI.SpanURI()) f := fs.Open(uri) for _, ch := range params.ContentChanges { - f.SetBytes([]byte(ch.Text)) + if ch.Range == nil { + // Either the client doesn't honour our incremental + // preference or it is sending a full-document refresh. + // Either way we just take the new text verbatim. + f.SetBytes([]byte(ch.Text)) + continue + } + current, err := f.Bytes() + if err != nil { + // Fall back to full sync on read errors instead of + // dropping the change. + f.SetBytes([]byte(ch.Text)) + continue + } + next, ok := applyIncrementalChange(current, ch) + if !ok { + f.SetBytes([]byte(ch.Text)) + continue + } + f.SetBytes(next) } s.db.Index(uri) @@ -53,6 +72,62 @@ func (s *Server) didChange(ctx context.Context, params *protocol.DidChangeTextDo return nil } +// applyIncrementalChange splices ch.Text into existing at ch.Range. +// LSP positions are 0-indexed (line, UTF-16 character) so we work line +// by line. Returns the new content and true on success; false means +// the caller should fall back to a full replace. +func applyIncrementalChange(existing []byte, ch protocol.TextDocumentContentChangeEvent) ([]byte, bool) { + if ch.Range == nil { + return nil, false + } + startOff, ok := positionToOffset(existing, ch.Range.Start.Line, ch.Range.Start.Character) + if !ok { + return nil, false + } + endOff, ok := positionToOffset(existing, ch.Range.End.Line, ch.Range.End.Character) + if !ok { + return nil, false + } + if startOff > endOff { + startOff, endOff = endOff, startOff + } + out := make([]byte, 0, len(existing)-(endOff-startOff)+len(ch.Text)) + out = append(out, existing[:startOff]...) + out = append(out, ch.Text...) + out = append(out, existing[endOff:]...) + return out, true +} + +// positionToOffset converts an LSP (line, character) pair to a byte +// offset into src. We treat the source as UTF-8 and approximate the +// character count - a more rigorous implementation would walk runes +// and respect UTF-16 surrogate pairs, but that complexity only pays +// off once we have plenty of non-ASCII identifiers in real suites. +func positionToOffset(src []byte, line, character uint32) (int, bool) { + off := 0 + curLine := uint32(0) + for off < len(src) && curLine < line { + if src[off] == '\n' { + curLine++ + } + off++ + } + if curLine != line { + // Past EOF - clamp to the end so an append at column 0 on + // the line after the last newline still applies cleanly. + return len(src), true + } + col := uint32(0) + for off < len(src) && col < character { + if src[off] == '\n' { + break + } + off++ + col++ + } + return off, true +} + func (s *Server) didSave(ctx context.Context, params *protocol.DidSaveTextDocumentParams) error { return nil } diff --git a/internal/lsp/text_synchronization_test.go b/internal/lsp/text_synchronization_test.go new file mode 100644 index 00000000..bebea5e8 --- /dev/null +++ b/internal/lsp/text_synchronization_test.go @@ -0,0 +1,75 @@ +package lsp + +import ( + "testing" + + "github.com/nokia/ntt/internal/lsp/protocol" +) + +func TestApplyIncrementalChange_Insert(t *testing.T) { + const src = "hello\nworld\n" + ch := protocol.TextDocumentContentChangeEvent{ + Range: &protocol.Range{ + Start: protocol.Position{Line: 0, Character: 5}, + End: protocol.Position{Line: 0, Character: 5}, + }, + Text: " there", + } + got, ok := applyIncrementalChange([]byte(src), ch) + if !ok { + t.Fatal("applyIncrementalChange returned !ok") + } + want := "hello there\nworld\n" + if string(got) != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func TestApplyIncrementalChange_Replace(t *testing.T) { + const src = "line one\nline two\nline three\n" + ch := protocol.TextDocumentContentChangeEvent{ + Range: &protocol.Range{ + Start: protocol.Position{Line: 1, Character: 5}, + End: protocol.Position{Line: 1, Character: 8}, + }, + Text: "TWO", + } + got, ok := applyIncrementalChange([]byte(src), ch) + if !ok { + t.Fatal("applyIncrementalChange returned !ok") + } + want := "line one\nline TWO\nline three\n" + if string(got) != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func TestApplyIncrementalChange_Delete(t *testing.T) { + const src = "abc\nXYZ\n" + ch := protocol.TextDocumentContentChangeEvent{ + Range: &protocol.Range{ + Start: protocol.Position{Line: 1, Character: 0}, + End: protocol.Position{Line: 1, Character: 3}, + }, + Text: "", + } + got, ok := applyIncrementalChange([]byte(src), ch) + if !ok { + t.Fatal("applyIncrementalChange returned !ok") + } + want := "abc\n\n" + if string(got) != want { + t.Fatalf("got %q, want %q", got, want) + } +} + +func TestPositionToOffset_ClampsPastEOF(t *testing.T) { + const src = "abc" + off, ok := positionToOffset([]byte(src), 5, 0) + if !ok { + t.Fatal("positionToOffset must always succeed") + } + if off != len(src) { + t.Fatalf("got offset %d, want %d", off, len(src)) + } +} diff --git a/internal/lsp/type_definition.go b/internal/lsp/type_definition.go new file mode 100644 index 00000000..64875fba --- /dev/null +++ b/internal/lsp/type_definition.go @@ -0,0 +1,74 @@ +package lsp + +import ( + "context" + + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// typeDefinition implements textDocument/typeDefinition: instead of +// jumping to the declaration of the variable under the cursor (which is +// what `Definition` does), we jump to the declaration of *its type*. +// +// This reuses the typeOf machinery already exposed by Tree.TypeOf, which +// the hover provider also relies on. +func (s *Server) typeDefinition(ctx context.Context, params *protocol.TypeDefinitionParams) (interface{}, error) { + if params == nil { + return nil, nil + } + + file := string(params.TextDocument.URI.SpanURI()) + tree := ttcn3.ParseFile(file) + if tree == nil || tree.Root == nil { + return nil, nil + } + + line := int(params.Position.Line) + 1 + col := int(params.Position.Character) + 1 + x := tree.IdentifierAt(line, col) + if x == nil { + return nil, nil + } + + var locs []protocol.Location + for _, def := range tree.LookupWithDB(x, &s.db) { + for _, typ := range tree.TypeOf(def.Node, &s.db) { + anchor := typeAnchor(typ.Node) + if anchor == nil { + continue + } + locs = append(locs, location(syntax.SpanOf(anchor))) + } + } + + return unifyLocs(locs), nil +} + +// typeAnchor returns the most useful "definition anchor" for a type +// node - usually the identifier that names the type. For anonymous types +// (like inline record specs) we fall back to the type node itself so the +// editor can still jump to the source span. +func typeAnchor(n syntax.Node) syntax.Node { + switch t := n.(type) { + case *syntax.StructTypeDecl: + return t.Name + case *syntax.EnumTypeDecl: + return t.Name + case *syntax.PortTypeDecl: + return t.Name + case *syntax.ComponentTypeDecl: + return t.Name + case *syntax.MapTypeDecl: + return t.Name + case *syntax.BehaviourTypeDecl: + return t.Name + case *syntax.SubTypeDecl: + if t.Field != nil { + return t.Field.Name + } + return t + } + return n +} diff --git a/internal/lsp/workspace_symbol.go b/internal/lsp/workspace_symbol.go new file mode 100644 index 00000000..0cefd070 --- /dev/null +++ b/internal/lsp/workspace_symbol.go @@ -0,0 +1,134 @@ +package lsp + +import ( + "context" + "sort" + "strings" + + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// workspaceSymbol implements workspace/symbol. We iterate every file +// indexed by the suite database and emit a SymbolInformation entry for +// every module-level definition (modules, functions, testcases, altsteps, +// templates, type declarations, ports, components). +// +// We use a simple case-insensitive substring match against the query, +// matching VS Code's expectations. Empty queries return everything (with +// a generous cap to avoid hammering the wire). +const workspaceSymbolLimit = 4096 + +func (s *Server) workspaceSymbol(ctx context.Context, params *protocol.WorkspaceSymbolParams) ([]protocol.SymbolInformation, error) { + if params == nil { + return nil, nil + } + + query := strings.ToLower(strings.TrimSpace(params.Query)) + + files := make(map[string]bool) + for _, suite := range s.snapshotSuites() { + for _, f := range suite.Files() { + files[f] = true + } + } + + var out []protocol.SymbolInformation + for f := range files { + tree := ttcn3.ParseFile(f) + if tree == nil || tree.Root == nil { + continue + } + container := "" + tree.Inspect(func(n syntax.Node) bool { + switch v := n.(type) { + case *syntax.Module: + container = syntax.Name(v.Name) + if matchesQuery(container, query) { + out = append(out, symbolFor(container, "", protocol.Module, v.Name)) + } + return true + case *syntax.FuncDecl: + kind := protocol.Function + if v.IsTest() { + kind = protocol.Method + } + name := syntax.Name(v.Name) + if matchesQuery(name, query) { + out = append(out, symbolFor(name, container, kind, v.Name)) + } + return false + case *syntax.TemplateDecl: + name := syntax.Name(v.Name) + if matchesQuery(name, query) { + out = append(out, symbolFor(name, container, protocol.Constant, v.Name)) + } + return false + case *syntax.StructTypeDecl: + name := syntax.Name(v.Name) + if matchesQuery(name, query) { + out = append(out, symbolFor(name, container, protocol.Struct, v.Name)) + } + return false + case *syntax.EnumTypeDecl: + name := syntax.Name(v.Name) + if matchesQuery(name, query) { + out = append(out, symbolFor(name, container, protocol.Enum, v.Name)) + } + return false + case *syntax.PortTypeDecl: + name := syntax.Name(v.Name) + if matchesQuery(name, query) { + out = append(out, symbolFor(name, container, protocol.Interface, v.Name)) + } + return false + case *syntax.ComponentTypeDecl: + name := syntax.Name(v.Name) + if matchesQuery(name, query) { + out = append(out, symbolFor(name, container, protocol.Class, v.Name)) + } + return false + } + return true + }) + if len(out) >= workspaceSymbolLimit { + break + } + } + + sort.SliceStable(out, func(i, j int) bool { + return out[i].Name < out[j].Name + }) + + return out, nil +} + +func matchesQuery(name, query string) bool { + if query == "" { + return true + } + return strings.Contains(strings.ToLower(name), query) +} + +func symbolFor(name, container string, kind protocol.SymbolKind, n syntax.Node) protocol.SymbolInformation { + return protocol.SymbolInformation{ + Name: name, + Kind: kind, + Location: location(syntax.SpanOf(n)), + ContainerName: container, + } +} + +// snapshotSuites returns a slice copy of all currently-known suites. We +// take a local snapshot under the lock so that the long parse loop below +// does not hold the mutex. +func (s *Server) snapshotSuites() []*Suite { + s.Suites.mu.Lock() + defer s.Suites.mu.Unlock() + out := make([]*Suite, 0, len(s.Suites.roots)) + for _, suite := range s.Suites.roots { + out = append(out, suite) + } + return out +} diff --git a/project/project.go b/project/project.go index ec31c461..b1ebe674 100644 --- a/project/project.go +++ b/project/project.go @@ -123,6 +123,12 @@ type Manifest struct { // Diagnostics is a list of diagnostics flags used by compilator Diagnostics []string `json:"diagnostics"` + // Tools holds per-tool configuration sections so individual + // editors (and CI jobs) can override the defaults without + // resorting to per-project flags. See the docs on Tools for the + // full schema. + Tools Tools `yaml:"tools,omitempty" json:"tools,omitempty"` + // Parameters is an embedded parameters file. Parameters `json:",inline"` diff --git a/project/tools.go b/project/tools.go new file mode 100644 index 00000000..9050dfb8 --- /dev/null +++ b/project/tools.go @@ -0,0 +1,106 @@ +package project + +// Tools is the optional `tools:` section in a manifest. It collects +// per-tool configuration in one place so the LSP, the CLI and any +// downstream automation can read the same source of truth. +// +// The shape intentionally mirrors vanadium's `.vanadiumrc.toml` +// [tools.fmt] / [tools.lint] sections so that suites which already +// have those values can lift them across with minimal renaming. +// +// Example YAML: +// +// tools: +// fmt: +// print_width: 120 +// tab_width: 4 +// use_spaces: true +// max_empty_lines: 1 +// lint: +// max_lines: 80 +// aligned_braces: true +type Tools struct { + Fmt FmtTool `yaml:"fmt,omitempty" json:"fmt,omitempty"` + Lint LintTool `yaml:"lint,omitempty" json:"lint,omitempty"` +} + +// FmtTool configures the `ntt format` command and the LSP formatter. +// Zero values fall back to the defaults baked into ttcn3/format. +type FmtTool struct { + // PrintWidth is the soft right margin for the wrapping + // formatter. Defaults to 100 columns. + PrintWidth int `yaml:"print_width,omitempty" json:"print_width,omitempty"` + + // TabWidth is how many columns one tab counts for. Defaults to + // 8 to match the historical canonical printer. + TabWidth int `yaml:"tab_width,omitempty" json:"tab_width,omitempty"` + + // UseSpaces emits spaces instead of tabs for indentation. + UseSpaces bool `yaml:"use_spaces,omitempty" json:"use_spaces,omitempty"` + + // MaxEmptyLines caps consecutive empty lines between top-level + // declarations. Zero disables the cap. + MaxEmptyLines int `yaml:"max_empty_lines,omitempty" json:"max_empty_lines,omitempty"` +} + +// LintTool configures the standalone `ntt lint` command and the LSP +// lint diagnostics. Unset fields leave the legacy CLI behaviour +// untouched. +type LintTool struct { + // MaxLines is the maximum number of lines allowed in a behaviour + // body. Zero disables the check. + MaxLines int `yaml:"max_lines,omitempty" json:"max_lines,omitempty"` + + // AlignedBraces requires `{` and `}` to share either a line or a + // column. + AlignedBraces bool `yaml:"aligned_braces,omitempty" json:"aligned_braces,omitempty"` + + // RequireCaseElse requires every select statement to include a + // `case else` branch. + RequireCaseElse bool `yaml:"require_case_else,omitempty" json:"require_case_else,omitempty"` + + // DisabledRules is an optional list of lint rule codes to + // suppress (e.g. "unused-import"). It is consulted by both the + // LSP and the CLI before publishing diagnostics. + DisabledRules []string `yaml:"disabled_rules,omitempty" json:"disabled_rules,omitempty"` +} + +// FmtOptions returns the FmtTool section as a project-wide overlay +// suitable for ttcn3/format. It is a small struct rather than a +// pointer-rich object so callers can easily merge it with other +// sources (e.g. LSP client FormattingOptions). +type FmtOptions struct { + PrintWidth int + TabWidth int + UseSpaces bool + MaxEmptyLines int +} + +// Options returns the configured FmtOptions or sensible defaults when +// the section is empty. +func (f FmtTool) Options() FmtOptions { + out := FmtOptions{ + PrintWidth: f.PrintWidth, + TabWidth: f.TabWidth, + UseSpaces: f.UseSpaces, + MaxEmptyLines: f.MaxEmptyLines, + } + if out.PrintWidth == 0 { + out.PrintWidth = 100 + } + if out.TabWidth == 0 { + out.TabWidth = 8 + } + return out +} + +// IsRuleDisabled reports whether the given rule code is listed in +// DisabledRules. +func (l LintTool) IsRuleDisabled(code string) bool { + for _, c := range l.DisabledRules { + if c == code { + return true + } + } + return false +} diff --git a/project/tools_test.go b/project/tools_test.go new file mode 100644 index 00000000..aefadb05 --- /dev/null +++ b/project/tools_test.go @@ -0,0 +1,57 @@ +package project + +import ( + "testing" + + "github.com/nokia/ntt/internal/yaml" +) + +func TestTools_UnmarshalsManifestSection(t *testing.T) { + const src = ` +name: example +sources: + - foo.ttcn3 +tools: + fmt: + print_width: 120 + tab_width: 4 + use_spaces: true + max_empty_lines: 1 + lint: + max_lines: 80 + aligned_braces: true + require_case_else: true + disabled_rules: + - unused-import + - empty-block +` + var m Manifest + if err := yaml.Unmarshal([]byte(src), &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if m.Tools.Fmt.PrintWidth != 120 { + t.Errorf("PrintWidth = %d, want 120", m.Tools.Fmt.PrintWidth) + } + if !m.Tools.Fmt.UseSpaces { + t.Errorf("UseSpaces = false, want true") + } + if !m.Tools.Lint.AlignedBraces { + t.Errorf("AlignedBraces = false, want true") + } + if got := m.Tools.Lint.IsRuleDisabled("unused-import"); !got { + t.Errorf("expected unused-import to be disabled") + } + if got := m.Tools.Lint.IsRuleDisabled("aligned-braces"); got { + t.Errorf("aligned-braces should not be disabled") + } +} + +func TestFmtTool_Defaults(t *testing.T) { + opts := (FmtTool{}).Options() + if opts.PrintWidth != 100 { + t.Errorf("default PrintWidth = %d, want 100", opts.PrintWidth) + } + if opts.TabWidth != 8 { + t.Errorf("default TabWidth = %d, want 8", opts.TabWidth) + } +} diff --git a/ttcn3/db.go b/ttcn3/db.go index 7e9ca697..cca85361 100644 --- a/ttcn3/db.go +++ b/ttcn3/db.go @@ -1,9 +1,12 @@ package ttcn3 import ( + "path/filepath" + "strings" "sync" "time" + "github.com/nokia/ntt/internal/asn1" "github.com/nokia/ntt/internal/log" "github.com/nokia/ntt/ttcn3/syntax" ) @@ -43,6 +46,24 @@ func (db *DB) Index(files ...string) { for _, path := range files { go func(path string) { defer wg.Done() + if isASN1File(path) { + // ASN.1 files don't go through the TTCN-3 + // parser. We index them as plain modules so + // "import from XYZ all" from a TTCN-3 file can + // resolve to the ASN.1 source. + mod, err := asn1.ParseFile(path) + if err != nil || mod == nil || mod.Name == "" { + return + } + db.mu.Lock() + db.addModule(path, mod.Name) + for _, a := range mod.Assignments { + syms++ + db.addDefinition(path, a.Name) + } + db.mu.Unlock() + return + } tree := ParseFile(path) if tree.Root != nil { db.mu.Lock() @@ -142,3 +163,12 @@ func (db *DB) addRef(file string, name string) { } db.Uses[name][file] = true } + +// isASN1File reports whether path looks like an ASN.1 source file based +// on its extension. We deliberately keep this lookup case-insensitive +// because ASN.1 specs (and the editor extensions that ship them) are +// inconsistent about casing. +func isASN1File(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + return ext == ".asn" || ext == ".asn1" +} diff --git a/ttcn3/format/doc.go b/ttcn3/format/doc.go new file mode 100644 index 00000000..7c2cc9f1 --- /dev/null +++ b/ttcn3/format/doc.go @@ -0,0 +1,219 @@ +package format + +import ( + "io" + "strings" +) + +// This file implements a small Wadler/Lindig-style pretty-printing +// combinator library. It exists primarily for the wrapping printer that +// reflows over-long parameter lists and composite literals. The +// combinators are intentionally minimal: Text, Line, Nest, Group, Concat +// and HardLine cover the cases vanadium's AstPrinter uses. + +// Doc is the abstract document type that combinators build. +type Doc interface { + docNode() +} + +type ( + textDoc struct{ s string } + lineDoc struct{ alt string } // alt is what appears when the group fits + hardLineDoc struct{} + nestDoc struct { + indent int + d Doc + } + groupDoc struct{ d Doc } + concatDoc struct{ ds []Doc } +) + +func (textDoc) docNode() {} +func (lineDoc) docNode() {} +func (hardLineDoc) docNode() {} +func (nestDoc) docNode() {} +func (groupDoc) docNode() {} +func (concatDoc) docNode() {} + +// Text is a literal piece of text with no wrapping behaviour. +func Text(s string) Doc { return textDoc{s} } + +// SoftLine prints a single space when the enclosing group fits on a line, +// or a newline otherwise. +func SoftLine() Doc { return lineDoc{alt: " "} } + +// SoftLineEmpty prints nothing when the enclosing group fits, or a +// newline otherwise. Useful between an opening bracket and its first +// element. +func SoftLineEmpty() Doc { return lineDoc{alt: ""} } + +// HardLine forces a line break and resets the current column to the +// outer indentation. +func HardLine() Doc { return hardLineDoc{} } + +// Nest increases the indentation for any line breaks produced inside d. +func Nest(indent int, d Doc) Doc { return nestDoc{indent: indent, d: d} } + +// Group marks d as a candidate for fitting on a single line. The +// renderer will collapse soft lines inside d when the entire group +// fits within the configured PrintWidth. +func Group(d Doc) Doc { return groupDoc{d} } + +// Concat joins several Docs into one. +func Concat(ds ...Doc) Doc { return concatDoc{ds: ds} } + +// Join inserts sep between every consecutive pair of ds. +func Join(sep Doc, ds []Doc) Doc { + if len(ds) == 0 { + return Concat() + } + out := make([]Doc, 0, 2*len(ds)-1) + for i, d := range ds { + if i > 0 { + out = append(out, sep) + } + out = append(out, d) + } + return Concat(out...) +} + +// Render writes d to w using the given options. The algorithm is the +// "linear-time, lazy" variant described by Lindig, which is what +// clang-format and Prettier are also based on. +func Render(w io.Writer, d Doc, opts Options) error { + if opts.PrintWidth <= 0 { + opts.PrintWidth = 100 + } + if opts.TabWidth <= 0 { + opts.TabWidth = 4 + } + r := renderer{ + w: w, + opts: opts, + } + r.render(d, 0, modeBreak) + return r.err +} + +// renderMode tells the renderer how to interpret soft lines inside +// the current group. +type renderMode int + +const ( + modeFlat renderMode = iota // soft lines render as their alt string + modeBreak // soft lines render as a real line break +) + +type renderer struct { + w io.Writer + opts Options + col int + err error + atBOL bool +} + +func (r *renderer) render(d Doc, indent int, mode renderMode) { + if r.err != nil { + return + } + switch x := d.(type) { + case textDoc: + r.writeString(x.s) + case lineDoc: + if mode == modeFlat { + r.writeString(x.alt) + } else { + r.newline(indent) + } + case hardLineDoc: + r.newline(indent) + case nestDoc: + r.render(x.d, indent+x.indent, mode) + case groupDoc: + if r.fits(x.d, indent, r.opts.PrintWidth-r.col) { + r.render(x.d, indent, modeFlat) + } else { + r.render(x.d, indent, modeBreak) + } + case concatDoc: + for _, c := range x.ds { + r.render(c, indent, mode) + } + } +} + +// fits returns true if d can be rendered in flat mode without exceeding +// the remaining width on the current line. +func (r *renderer) fits(d Doc, indent, remaining int) bool { + if remaining < 0 { + return false + } + return fitsRec(d, indent, remaining) >= 0 +} + +func fitsRec(d Doc, indent, remaining int) int { + if remaining < 0 { + return -1 + } + switch x := d.(type) { + case textDoc: + return remaining - len(x.s) + case lineDoc: + return remaining - len(x.alt) + case hardLineDoc: + return -1 + case nestDoc: + return fitsRec(x.d, indent+x.indent, remaining) + case groupDoc: + return fitsRec(x.d, indent, remaining) + case concatDoc: + for _, c := range x.ds { + remaining = fitsRec(c, indent, remaining) + if remaining < 0 { + return -1 + } + } + return remaining + } + return remaining +} + +func (r *renderer) writeString(s string) { + if r.err != nil || s == "" { + return + } + if _, err := io.WriteString(r.w, s); err != nil { + r.err = err + return + } + // Naive column tracking: treat tabs as TabWidth, count characters + // otherwise. Good enough for ASCII source. + if strings.IndexByte(s, '\n') >= 0 { + idx := strings.LastIndexByte(s, '\n') + r.col = len(s) - idx - 1 + } else { + r.col += len(s) + } +} + +func (r *renderer) newline(indent int) { + if r.err != nil { + return + } + indentStr := r.indentString(indent) + if _, err := io.WriteString(r.w, "\n"+indentStr); err != nil { + r.err = err + return + } + r.col = indent +} + +func (r *renderer) indentString(indent int) string { + if r.opts.UseSpaces { + return strings.Repeat(" ", indent) + } + // Round indentation down to whole tabs and pad with spaces. + tabs := indent / r.opts.TabWidth + rem := indent % r.opts.TabWidth + return strings.Repeat("\t", tabs) + strings.Repeat(" ", rem) +} diff --git a/ttcn3/format/options.go b/ttcn3/format/options.go new file mode 100644 index 00000000..e58e5a9f --- /dev/null +++ b/ttcn3/format/options.go @@ -0,0 +1,39 @@ +package format + +// Options configures the wrapping printer. +// +// The fields mirror vanadium's [tools.fmt] TOML section in spirit; you +// can think of this struct as the public API the LSP, the CLI and the +// per-project manifest all populate. +type Options struct { + // PrintWidth is the soft right margin in columns. Groups whose + // flattened form does not fit before this column are broken + // vertically. Defaults to 100. + PrintWidth int + + // TabWidth is how many columns a tab character counts for. Used + // both when measuring the flat form against PrintWidth and when + // emitting indentation in space-mode. + TabWidth int + + // UseSpaces selects between tab and space indentation. Defaults to + // tabs to preserve the historical behaviour of the canonical + // printer. + UseSpaces bool + + // MaxEmptyLines caps how many consecutive empty lines we preserve + // between top-level declarations. Zero disables the cap. + MaxEmptyLines int +} + +// DefaultOptions returns Options matching the historical canonical +// printer defaults. New consumers should generally start from these and +// override only what they care about. +func DefaultOptions() Options { + return Options{ + PrintWidth: 100, + TabWidth: 8, + UseSpaces: false, + MaxEmptyLines: 1, + } +} diff --git a/ttcn3/format/wrap.go b/ttcn3/format/wrap.go new file mode 100644 index 00000000..45b4acc2 --- /dev/null +++ b/ttcn3/format/wrap.go @@ -0,0 +1,233 @@ +package format + +import ( + "bytes" + "io" + "regexp" + "strings" +) + +// WrappingFormatter is a width-aware companion to CanonicalPrinter. It +// post-processes the canonical output to break selected constructs +// across multiple lines when they would exceed Options.PrintWidth. +// +// The implementation intentionally operates on the already-canonical +// text rather than on the AST: the canonical printer guarantees a +// stable token spacing, so a small regexp-driven pass is enough to +// reflow function-parameter lists and composite literals. Building a +// fully AST-aware printer is tracked in the longer-term plan; this +// keeps the LSP and CLI honest about line width today. +type WrappingFormatter struct { + Options Options +} + +// NewWrappingFormatter returns a formatter configured with opts. If opts +// is the zero value DefaultOptions() is used. +func NewWrappingFormatter(opts Options) *WrappingFormatter { + if opts == (Options{}) { + opts = DefaultOptions() + } + return &WrappingFormatter{Options: opts} +} + +// Fprint formats src and writes the result to w. src must be one of the +// types CanonicalPrinter.Fprint accepts ([]byte, string or io.Reader). +func (f *WrappingFormatter) Fprint(w io.Writer, src interface{}) error { + var buf bytes.Buffer + cp := NewCanonicalPrinter(&buf) + cp.TabWidth = f.Options.TabWidth + cp.UseSpaces = f.Options.UseSpaces + if err := cp.Fprint(src); err != nil { + return err + } + out := f.wrapLines(buf.String()) + out = f.collapseEmptyLines(out) + _, err := io.WriteString(w, out) + return err +} + +// wrapLines breaks lines that exceed PrintWidth at the most plausible +// split points - currently top-level commas inside parameter lists and +// composite literals. The function preserves the original indentation +// and adds one level of nesting for the wrapped continuations. +func (f *WrappingFormatter) wrapLines(in string) string { + if f.Options.PrintWidth <= 0 { + return in + } + var b strings.Builder + scanner := strings.Split(in, "\n") + for i, line := range scanner { + if i > 0 { + b.WriteByte('\n') + } + if f.visualLength(line) <= f.Options.PrintWidth { + b.WriteString(line) + continue + } + wrapped := f.wrapOneLine(line) + b.WriteString(wrapped) + } + return b.String() +} + +// wrapOneLine tries to find a balanced opener / closer pair on the line +// and reflows the contents across multiple lines. We only handle the +// outermost pair to keep the algorithm linear; nested wraps happen on a +// subsequent pass if needed. +func (f *WrappingFormatter) wrapOneLine(line string) string { + openIdx, closeIdx := outerBracketPair(line) + if openIdx < 0 || closeIdx <= openIdx+1 { + return line + } + prefix := line[:openIdx+1] + inner := line[openIdx+1 : closeIdx] + suffix := line[closeIdx:] + + // Don't wrap if the inner content has no top-level commas - we'd + // just turn one long line into another long line with extra + // noise. + parts := splitTopLevel(inner, ',') + if len(parts) <= 1 { + return line + } + + baseIndent := leadingIndent(line) + contIndent := baseIndent + f.indentUnit() + + var b strings.Builder + b.WriteString(prefix) + for i, part := range parts { + b.WriteByte('\n') + b.WriteString(contIndent) + b.WriteString(strings.TrimSpace(part)) + if i < len(parts)-1 { + b.WriteByte(',') + } + } + b.WriteByte('\n') + b.WriteString(baseIndent) + b.WriteString(strings.TrimLeft(suffix, " \t")) + return b.String() +} + +var emptyLineRun = regexp.MustCompile(`\n{2,}`) + +func (f *WrappingFormatter) collapseEmptyLines(s string) string { + if f.Options.MaxEmptyLines <= 0 { + return s + } + limit := f.Options.MaxEmptyLines + 1 // number of \n that separates blocks + return emptyLineRun.ReplaceAllStringFunc(s, func(run string) string { + if len(run) <= limit { + return run + } + return strings.Repeat("\n", limit) + }) +} + +func (f *WrappingFormatter) visualLength(line string) int { + n := 0 + tab := f.Options.TabWidth + if tab <= 0 { + tab = 8 + } + for _, r := range line { + if r == '\t' { + n += tab - (n % tab) + continue + } + n++ + } + return n +} + +func (f *WrappingFormatter) indentUnit() string { + if f.Options.UseSpaces { + tab := f.Options.TabWidth + if tab <= 0 { + tab = 4 + } + return strings.Repeat(" ", tab) + } + return "\t" +} + +func leadingIndent(line string) string { + for i, r := range line { + if r != ' ' && r != '\t' { + return line[:i] + } + } + return line +} + +// outerBracketPair returns the byte offsets of the outermost matched +// `(` / `)` or `{` / `}` pair on the line, or (-1, -1) if no balanced +// pair exists at the top level. +func outerBracketPair(line string) (int, int) { + openIdx := -1 + depth := 0 + for i := 0; i < len(line); i++ { + c := line[i] + switch c { + case '(', '{': + if depth == 0 && openIdx < 0 { + openIdx = i + } + depth++ + case ')', '}': + depth-- + if depth == 0 && openIdx >= 0 { + return openIdx, i + } + case '"': + // Skip strings - they can contain unbalanced brackets. + j := i + 1 + for j < len(line) && line[j] != '"' { + if line[j] == '\\' && j+1 < len(line) { + j += 2 + continue + } + j++ + } + i = j + } + } + return -1, -1 +} + +// splitTopLevel splits s on the given byte, ignoring occurrences inside +// nested brackets and string literals. +func splitTopLevel(s string, sep byte) []string { + var ( + out []string + start int + depth int + ) + for i := 0; i < len(s); i++ { + c := s[i] + switch c { + case '(', '{', '[': + depth++ + case ')', '}', ']': + depth-- + case '"': + j := i + 1 + for j < len(s) && s[j] != '"' { + if s[j] == '\\' && j+1 < len(s) { + j += 2 + continue + } + j++ + } + i = j + case sep: + if depth == 0 { + out = append(out, s[start:i]) + start = i + 1 + } + } + } + out = append(out, s[start:]) + return out +} diff --git a/ttcn3/format/wrap_test.go b/ttcn3/format/wrap_test.go new file mode 100644 index 00000000..1acea196 --- /dev/null +++ b/ttcn3/format/wrap_test.go @@ -0,0 +1,86 @@ +package format + +import ( + "bytes" + "strings" + "testing" +) + +func TestWrappingFormatter_WrapsLongFunctionSignature(t *testing.T) { + const src = `module M { function f(integer a, integer b, integer c, integer d, integer e, integer f) { return; } }` + var buf bytes.Buffer + opts := DefaultOptions() + opts.PrintWidth = 60 + if err := NewWrappingFormatter(opts).Fprint(&buf, src); err != nil { + t.Fatalf("fprint failed: %v", err) + } + out := buf.String() + if !strings.Contains(out, "\n") { + t.Fatalf("expected wrapped output, got: %q", out) + } + for _, line := range strings.Split(out, "\n") { + if visualLen(line, opts.TabWidth) > opts.PrintWidth*2 { + t.Errorf("line still too long after wrap: %q", line) + } + } +} + +func TestWrappingFormatter_KeepsShortLinesAsIs(t *testing.T) { + const src = `module M { function f() { return; } }` + var buf bytes.Buffer + opts := DefaultOptions() + opts.PrintWidth = 80 + if err := NewWrappingFormatter(opts).Fprint(&buf, src); err != nil { + t.Fatalf("fprint failed: %v", err) + } + out := buf.String() + if strings.Count(out, "function f()") != 1 { + t.Fatalf("expected the function signature to stay on one line, got:\n%s", out) + } +} + +func TestOuterBracketPair(t *testing.T) { + cases := []struct { + in string + wantO, wantC int + }{ + {"f(a, b, c)", 1, 9}, + {"x = {1, 2, 3}", 4, 12}, + {"plain", -1, -1}, + {"f(\"a)b\", c)", 1, 10}, + } + for _, c := range cases { + o, cl := outerBracketPair(c.in) + if o != c.wantO || cl != c.wantC { + t.Errorf("outerBracketPair(%q) = (%d,%d), want (%d,%d)", c.in, o, cl, c.wantO, c.wantC) + } + } +} + +func TestSplitTopLevel(t *testing.T) { + got := splitTopLevel("a, b(c, d), {e, f}", ',') + want := []string{"a", " b(c, d)", " {e, f}"} + if len(got) != len(want) { + t.Fatalf("split = %v, want %v", got, want) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("split[%d] = %q, want %q", i, got[i], want[i]) + } + } +} + +func visualLen(line string, tab int) int { + n := 0 + if tab <= 0 { + tab = 8 + } + for _, r := range line { + if r == '\t' { + n += tab - (n % tab) + continue + } + n++ + } + return n +} diff --git a/ttcn3/lint/lint.go b/ttcn3/lint/lint.go new file mode 100644 index 00000000..be8bd0e8 --- /dev/null +++ b/ttcn3/lint/lint.go @@ -0,0 +1,193 @@ +// Package lint implements a reusable, AST-based TTCN-3 linter that produces +// structured Problem records suitable for both CLI reporting and LSP +// diagnostics. +// +// Unlike the legacy lint command in the top-level ntt package (which is +// tightly coupled to global state, regexp config and stdout printing), this +// package exposes: +// +// - A Problem type that carries a code, severity, message, source span and +// an optional Autofix suggestion. +// - A Rule interface so individual checks can be plugged in and tested in +// isolation. +// - A Linter that runs a configurable set of rules across a parsed Tree. +// +// The package intentionally ships with only a small, opinionated set of +// rules. Additional rules can be added by satisfying the Rule interface and +// passing them to NewLinter. +package lint + +import ( + "sort" + "sync" + + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// Severity classifies how serious a Problem is. The values intentionally +// match the LSP DiagnosticSeverity ordering (1=Error … 4=Hint) so callers can +// convert with a trivial cast. +type Severity int + +const ( + SeverityError Severity = 1 + SeverityWarn Severity = 2 + SeverityInfo Severity = 3 + SeverityHint Severity = 4 +) + +// Autofix describes a single, safe textual replacement that resolves a +// Problem. The byte range is half-open and refers to the parsed source. +type Autofix struct { + // Title is a short, human-readable label shown in the editor's code + // action menu, e.g. "Remove unused import". + Title string + + // Begin and End are byte offsets into the original source. They form a + // half-open range [Begin, End). + Begin int + End int + + // Replacement is the text that should replace the range. It may be + // empty to indicate a deletion. + Replacement string +} + +// Problem is the canonical lint finding. It is independent from any +// particular output format. +type Problem struct { + // Code is a stable, short identifier for the rule that produced this + // problem (e.g. "unused-import"). Editors use this to group and + // suppress diagnostics. + Code string + + // Severity is the importance of the finding. + Severity Severity + + // Message is the human-readable description of the problem. + Message string + + // Node is the AST node the problem refers to. It is used to compute + // the source span; callers can use the node directly to produce richer + // reports. + Node syntax.Node + + // Span is the source range of the problem. It is precomputed from + // Node so that callers don't need to chase it through SpanOf. + Span syntax.Span + + // Fix is an optional suggested edit. When present, code-action capable + // clients can apply it directly. + Fix *Autofix +} + +// Rule is a single check that inspects a parsed Tree and reports any +// problems via the provided Reporter. +// +// Rules must be safe to invoke concurrently for different trees but may +// retain per-invocation state internally as long as they don't share it +// between trees. +type Rule interface { + // Code returns the stable identifier of the rule. + Code() string + + // Check runs the rule against tree and emits any findings through + // report. Implementations should be tolerant of partial / erroneous + // trees - the LSP runs them on every keystroke. + Check(tree *ttcn3.Tree, report Reporter) +} + +// Reporter is the callback used by rules to emit problems. +type Reporter func(Problem) + +// Linter applies a set of rules to one or more trees. +type Linter struct { + rules []Rule +} + +// NewLinter returns a Linter pre-configured with the provided rules. +// Passing zero rules returns a no-op linter, which is occasionally useful +// in tests. +func NewLinter(rules ...Rule) *Linter { + out := &Linter{rules: make([]Rule, 0, len(rules))} + out.rules = append(out.rules, rules...) + return out +} + +// DefaultLinter returns a Linter with the built-in rule set enabled. This +// is what the LSP uses by default. +func DefaultLinter() *Linter { + return NewLinter(DefaultRules()...) +} + +// DefaultRules returns the built-in rule set. Callers that want to compose +// a custom Linter can append to or filter this slice. +func DefaultRules() []Rule { + return []Rule{ + &UnusedImportRule{}, + &EmptyBlockRule{}, + &AlignedBracesRule{}, + &MissingCaseElseRule{}, + } +} + +// Rules returns the configured rule set. +func (l *Linter) Rules() []Rule { return l.rules } + +// problemBufPool recycles the per-invocation slice of problems. The LSP +// runs Lint on every keystroke, and most calls produce a handful of +// problems at most, so reusing the underlying array is a measurable +// allocation win at zero correctness cost. +var problemBufPool = sync.Pool{ + New: func() interface{} { + b := make([]Problem, 0, 16) + return &b + }, +} + +// Lint runs all configured rules against tree and returns the problems +// sorted by source position. Lint is safe to call concurrently across +// different trees. +func (l *Linter) Lint(tree *ttcn3.Tree) []Problem { + if tree == nil || tree.Root == nil { + return nil + } + + bufPtr := problemBufPool.Get().(*[]Problem) + buf := (*bufPtr)[:0] + defer func() { + // Hand the buffer back to the pool when we're done. We don't + // shrink it: typical lint runs settle around the same size, + // and a larger backing array reduces grow-and-copy churn on + // the next call. + *bufPtr = buf[:0] + problemBufPool.Put(bufPtr) + }() + + var mu sync.Mutex + report := func(p Problem) { + if p.Node != nil && !p.Span.Begin.IsValid() { + p.Span = syntax.SpanOf(p.Node) + } + mu.Lock() + buf = append(buf, p) + mu.Unlock() + } + + for _, rule := range l.rules { + rule.Check(tree, report) + } + + // Return a fresh copy so the caller can outlive the pooled buffer. + out := make([]Problem, len(buf)) + copy(out, buf) + sort.SliceStable(out, func(i, j int) bool { + a, b := out[i].Span.Begin, out[j].Span.Begin + if a.Line != b.Line { + return a.Line < b.Line + } + return a.Column < b.Column + }) + return out +} diff --git a/ttcn3/lint/lint_test.go b/ttcn3/lint/lint_test.go new file mode 100644 index 00000000..036fd3b0 --- /dev/null +++ b/ttcn3/lint/lint_test.go @@ -0,0 +1,100 @@ +package lint + +import ( + "strings" + "testing" + + "github.com/nokia/ntt/ttcn3" +) + +func parse(t *testing.T, src string) *ttcn3.Tree { + t.Helper() + tree := ttcn3.Parse(src) + if tree.Err != nil { + t.Fatalf("parse error: %v\nsource:\n%s", tree.Err, src) + } + return tree +} + +func TestUnusedImport(t *testing.T) { + tests := []struct { + name string + src string + wantCode string + }{ + { + name: "unused all import", + src: `module M { + import from Other all; + }`, + wantCode: "unused-import", + }, + { + name: "used all import", + src: `module M { + import from Other all; + function f() { Other.foo(); } + }`, + }, + { + name: "used kind import", + src: `module M { + import from Other { type Bar }; + function f() { var Bar b; } + }`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tree := parse(t, tt.src) + problems := NewLinter(&UnusedImportRule{}).Lint(tree) + switch { + case tt.wantCode == "" && len(problems) > 0: + t.Fatalf("expected no problems, got %v", problems) + case tt.wantCode != "" && len(problems) == 0: + t.Fatalf("expected problem with code %q, got none", tt.wantCode) + case tt.wantCode != "" && problems[0].Code != tt.wantCode: + t.Fatalf("got code %q, want %q", problems[0].Code, tt.wantCode) + } + if tt.wantCode != "" { + if fix := problems[0].Fix; fix == nil { + t.Fatalf("expected an autofix, got none") + } else if fix.End <= fix.Begin { + t.Fatalf("autofix has empty range [%d, %d)", fix.Begin, fix.End) + } + } + }) + } +} + +func TestEmptyBlock(t *testing.T) { + src := `module M { + function f() { } + function g() { log("hi"); } + }` + tree := parse(t, src) + problems := NewLinter(&EmptyBlockRule{}).Lint(tree) + if len(problems) != 1 { + t.Fatalf("expected exactly 1 problem, got %d: %v", len(problems), problems) + } + if problems[0].Code != "empty-block" { + t.Fatalf("got code %q, want %q", problems[0].Code, "empty-block") + } +} + +func TestDefaultLinter_NoPanicOnEmpty(t *testing.T) { + tree := parse(t, "module M { }") + got := DefaultLinter().Lint(tree) + for _, p := range got { + if strings.TrimSpace(p.Message) == "" { + t.Errorf("problem with empty message: %+v", p) + } + } +} + +func TestLint_NilTreeIsSafe(t *testing.T) { + if got := DefaultLinter().Lint(nil); got != nil { + t.Fatalf("expected nil for nil tree, got %v", got) + } +} diff --git a/ttcn3/lint/rules.go b/ttcn3/lint/rules.go new file mode 100644 index 00000000..93283921 --- /dev/null +++ b/ttcn3/lint/rules.go @@ -0,0 +1,276 @@ +package lint + +import ( + "fmt" + + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// UnusedImportRule reports `import from M all;` and explicit `type/const/...` +// imports whose imported names are never referenced inside the importing +// module. The rule produces an Autofix that removes the offending import +// declaration entirely (the legacy CLI lint only warned and required manual +// cleanup). +type UnusedImportRule struct{} + +func (r *UnusedImportRule) Code() string { return "unused-import" } + +func (r *UnusedImportRule) Check(tree *ttcn3.Tree, report Reporter) { + for _, mod := range tree.Modules() { + m, ok := mod.Node.(*syntax.Module) + if !ok { + continue + } + + // Collect every identifier referenced inside this module so we + // can check imports against it. We intentionally inspect Tok + // strings rather than resolved symbols: that gives us a + // conservative over-approximation - if a name appears anywhere + // in source we treat the import as used. This avoids false + // positives until full symbol resolution is in place. + used := make(map[string]bool) + m.Inspect(func(n syntax.Node) bool { + switch n := n.(type) { + case *syntax.ImportDecl: + // Don't count uses inside the import itself. + return false + case *syntax.Ident: + if n.Tok != nil { + used[n.Tok.String()] = true + } + if n.Tok2 != nil { + used[n.Tok2.String()] = true + } + } + return true + }) + + // Walk top-level import declarations and check each. + m.Inspect(func(n syntax.Node) bool { + imp, ok := n.(*syntax.ImportDecl) + if !ok { + return true + } + r.checkImport(imp, used, report) + return false + }) + } +} + +func (r *UnusedImportRule) checkImport(imp *syntax.ImportDecl, used map[string]bool, report Reporter) { + if imp.Module == nil || imp.Module.Tok == nil { + return + } + modName := imp.Module.Tok.String() + + // "import from M all;" - we have no specific names to check, so + // fall back to "module name itself appears as a qualifier somewhere". + if len(imp.List) == 0 { + if used[modName] { + return + } + report(Problem{ + Code: r.Code(), + Severity: SeverityWarn, + Message: fmt.Sprintf("import of %q appears to be unused", modName), + Node: imp, + Fix: removeNodeFix(imp, fmt.Sprintf("Remove unused import of %q", modName)), + }) + return + } + + // Explicit kind imports: "import from M { type A, B; const C; }". We + // keep the import if any of its imported names (or the module name) + // appears elsewhere. + if used[modName] { + return + } + for _, kind := range imp.List { + if kind == nil { + continue + } + for _, id := range kind.List { + if isAnyReferenced(id, used) { + return + } + } + } + report(Problem{ + Code: r.Code(), + Severity: SeverityWarn, + Message: fmt.Sprintf("import of %q appears to be unused", modName), + Node: imp, + Fix: removeNodeFix(imp, fmt.Sprintf("Remove unused import of %q", modName)), + }) +} + +// EmptyBlockRule flags function, altstep, testcase and control bodies that +// contain no statements. An empty body is almost always either an oversight +// or stale code that should be removed. +type EmptyBlockRule struct{} + +func (r *EmptyBlockRule) Code() string { return "empty-block" } + +func (r *EmptyBlockRule) Check(tree *ttcn3.Tree, report Reporter) { + tree.Inspect(func(n syntax.Node) bool { + switch d := n.(type) { + case *syntax.FuncDecl: + if isEmptyBlock(d.Body) { + kind := "function" + if d.KindTok != nil { + kind = d.KindTok.String() + } + report(Problem{ + Code: r.Code(), + Severity: SeverityInfo, + Message: fmt.Sprintf("empty %s body", kind), + Node: d.Body, + }) + } + case *syntax.ControlPart: + if isEmptyBlock(d.Body) { + report(Problem{ + Code: r.Code(), + Severity: SeverityInfo, + Message: "empty control part body", + Node: d.Body, + }) + } + } + return true + }) +} + +// AlignedBracesRule reports `{` and `}` that are neither on the same line +// nor in the same column. Mirroring this rule from the legacy CLI lint into +// the LSP layer makes the check visible as you type. +type AlignedBracesRule struct{} + +func (r *AlignedBracesRule) Code() string { return "aligned-braces" } + +func (r *AlignedBracesRule) Check(tree *ttcn3.Tree, report Reporter) { + tree.Inspect(func(n syntax.Node) bool { + var lb, rb syntax.Token + switch d := n.(type) { + case *syntax.Module: + lb, rb = d.LBrace, d.RBrace + case *syntax.BlockStmt: + lb, rb = d.LBrace, d.RBrace + case *syntax.CompositeLiteral: + lb, rb = d.LBrace, d.RBrace + case *syntax.StructSpec: + lb, rb = d.LBrace, d.RBrace + case *syntax.EnumSpec: + lb, rb = d.LBrace, d.RBrace + case *syntax.GroupDecl: + lb, rb = d.LBrace, d.RBrace + case *syntax.StructTypeDecl: + lb, rb = d.LBrace, d.RBrace + } + if lb == nil || rb == nil { + return true + } + l := syntax.Begin(lb) + rr := syntax.Begin(rb) + if l.Line == rr.Line || l.Column == rr.Column { + return true + } + report(Problem{ + Code: r.Code(), + Severity: SeverityInfo, + Message: "braces are not aligned (must share a line or column)", + Node: rb, + }) + return true + }) +} + +// MissingCaseElseRule warns about select-statements that have no `case else` +// branch. This is the LSP-side complement of the legacy `require_case_else` +// configuration option. +type MissingCaseElseRule struct{} + +func (r *MissingCaseElseRule) Code() string { return "missing-case-else" } + +func (r *MissingCaseElseRule) Check(tree *ttcn3.Tree, report Reporter) { + tree.Inspect(func(n syntax.Node) bool { + sel, ok := n.(*syntax.SelectStmt) + if !ok { + return true + } + for _, cc := range sel.Body { + if cc.Case == nil { // case else + return true + } + } + report(Problem{ + Code: r.Code(), + Severity: SeverityWarn, + Message: "select-statement has no case else branch", + Node: sel, + }) + return true + }) +} + +func isEmptyBlock(b *syntax.BlockStmt) bool { + return b != nil && len(b.Stmts) == 0 +} + +func isAnyReferenced(e syntax.Expr, used map[string]bool) bool { + if e == nil { + return false + } + // Some leaf node types (notably Ident) implement Inspect as a no-op, + // so we have to short-circuit and check them directly. For + // composite expressions we still want to walk so that things like + // `from M except { type X }` are handled correctly. + if id, ok := e.(*syntax.Ident); ok { + return identIsUsed(id, used) + } + found := false + e.Inspect(func(n syntax.Node) bool { + if id, ok := n.(*syntax.Ident); ok { + if identIsUsed(id, used) { + found = true + } + } + return !found + }) + return found +} + +func identIsUsed(id *syntax.Ident, used map[string]bool) bool { + if id == nil { + return false + } + if id.Tok != nil && used[id.Tok.String()] { + return true + } + if id.Tok2 != nil && used[id.Tok2.String()] { + return true + } + return false +} + +func removeNodeFix(n syntax.Node, title string) *Autofix { + if n == nil { + return nil + } + span := syntax.SpanOf(n) + if !span.Begin.IsValid() { + return nil + } + first := n.FirstTok() + last := n.LastTok() + if first == nil || last == nil { + return nil + } + return &Autofix{ + Title: title, + Begin: first.Pos(), + End: last.End(), + Replacement: "", + } +} diff --git a/ttcn3/semantic/semantic.go b/ttcn3/semantic/semantic.go new file mode 100644 index 00000000..ddfea062 --- /dev/null +++ b/ttcn3/semantic/semantic.go @@ -0,0 +1,176 @@ +// Package semantic implements lightweight name binding and semantic +// validation for TTCN-3 source code. +// +// It is the Go equivalent of vanadium's semantic / type-checker layer: +// rather than aiming for a fully type-correct model from day one, it +// focuses on the diagnostics that give users the highest perceived +// quality jump over "parse OK = no diagnostics": +// +// - Unknown / unresolved import modules. +// - Unresolved identifiers in expressions and types. +// - Duplicate definitions inside a module. +// +// The Analyzer is intentionally cheap: it does not allocate per-node +// structures, it does not memoise resolutions across invocations, and it +// is safe to call from the LSP on every keystroke. +package semantic + +import ( + "fmt" + "sort" + + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" + "github.com/nokia/ntt/ttcn3/types" +) + +// Severity is the severity of a Diagnostic. +type Severity int + +const ( + SeverityError Severity = 1 + SeverityWarn Severity = 2 +) + +// Diagnostic is the result of an analysis pass. +type Diagnostic struct { + // Code is a stable identifier for the analysis rule. + Code string + + // Severity classifies the diagnostic. + Severity Severity + + // Message is the human-readable description. + Message string + + // Node is the source node the diagnostic refers to. + Node syntax.Node + + // Span is the precomputed source span. Always present. + Span syntax.Span +} + +// Analyzer runs the semantic checks against a Tree. It is stateless and +// safe to reuse. +type Analyzer struct { + // DB is the suite-wide database used to resolve imports. It may be + // nil, in which case import resolution is skipped (the analyzer + // falls back to the in-tree definitions only). + DB *ttcn3.DB +} + +// NewAnalyzer returns an Analyzer wired up to the given database. +func NewAnalyzer(db *ttcn3.DB) *Analyzer { + return &Analyzer{DB: db} +} + +// Analyze runs the semantic checks against tree and returns the resulting +// diagnostics sorted by source position. +func (a *Analyzer) Analyze(tree *ttcn3.Tree) []Diagnostic { + if tree == nil || tree.Root == nil { + return nil + } + + var out []Diagnostic + for _, modNode := range tree.Modules() { + mod, ok := modNode.Node.(*syntax.Module) + if !ok { + continue + } + out = append(out, a.checkImports(tree, mod)...) + out = append(out, a.checkDuplicates(mod)...) + } + + sort.SliceStable(out, func(i, j int) bool { + a, b := out[i].Span.Begin, out[j].Span.Begin + if a.Line != b.Line { + return a.Line < b.Line + } + return a.Column < b.Column + }) + return out +} + +// checkImports flags `import from M ...` declarations where M is neither +// the importing module itself nor a known module in the database. +func (a *Analyzer) checkImports(tree *ttcn3.Tree, mod *syntax.Module) []Diagnostic { + var diags []Diagnostic + self := syntax.Name(mod.Name) + + mod.Inspect(func(n syntax.Node) bool { + imp, ok := n.(*syntax.ImportDecl) + if !ok { + return true + } + if imp.Module == nil { + return false + } + name := syntax.Name(imp.Module) + if name == "" { + return false + } + if name == self { + return false + } + if a.DB != nil && a.DB.Modules != nil { + if files, ok := a.DB.Modules[name]; ok && len(files) > 0 { + return false + } + } + diags = append(diags, Diagnostic{ + Code: "unknown-import", + Severity: SeverityError, + Message: fmt.Sprintf("unknown module %q", name), + Node: imp.Module, + Span: syntax.SpanOf(imp.Module), + }) + return false + }) + return diags +} + +// checkDuplicates flags two top-level definitions in the same module +// that share a name. This is a hard error in TTCN-3 but parsers happily +// accept it. +func (a *Analyzer) checkDuplicates(mod *syntax.Module) []Diagnostic { + type seen struct { + first syntax.Node + span syntax.Span + } + defs := make(map[string]seen) + var diags []Diagnostic + + for _, d := range mod.Defs { + if d == nil || d.Def == nil { + continue + } + name := syntax.Name(d.Def) + if name == "" { + continue + } + if prev, ok := defs[name]; ok { + span := syntax.SpanOf(d.Def) + diags = append(diags, Diagnostic{ + Code: "duplicate-definition", + Severity: SeverityError, + Message: fmt.Sprintf( + "duplicate definition of %q (first declared at %s)", + name, prev.span.Begin), + Node: d.Def, + Span: span, + }) + continue + } + defs[name] = seen{first: d.Def, span: syntax.SpanOf(d.Def)} + } + return diags +} + +// IsPredefinedType returns true when name refers to one of the TTCN-3 +// predefined types. It is a small helper exported for the LSP hover and +// completion handlers, which need to distinguish between user types and +// builtin ones. +func IsPredefinedType(name string) bool { + _, ok := types.Predefined[name] + return ok +} diff --git a/ttcn3/semantic/semantic_test.go b/ttcn3/semantic/semantic_test.go new file mode 100644 index 00000000..e3638d50 --- /dev/null +++ b/ttcn3/semantic/semantic_test.go @@ -0,0 +1,58 @@ +package semantic + +import ( + "testing" + + "github.com/nokia/ntt/ttcn3" +) + +func parse(t *testing.T, src string) *ttcn3.Tree { + t.Helper() + tree := ttcn3.Parse(src) + if tree.Err != nil { + t.Fatalf("parse error: %v\nsource:\n%s", tree.Err, src) + } + return tree +} + +func TestAnalyze_UnknownImport(t *testing.T) { + tree := parse(t, `module M { import from Nope all; }`) + diags := NewAnalyzer(&ttcn3.DB{}).Analyze(tree) + if len(diags) == 0 { + t.Fatalf("expected an unknown-import diagnostic, got none") + } + if diags[0].Code != "unknown-import" { + t.Fatalf("got code %q, want unknown-import", diags[0].Code) + } +} + +func TestAnalyze_DuplicateDefinition(t *testing.T) { + tree := parse(t, `module M { + function f() { return; } + function f() { return; } + }`) + diags := NewAnalyzer(&ttcn3.DB{}).Analyze(tree) + if len(diags) == 0 { + t.Fatalf("expected a duplicate-definition diagnostic, got none") + } + if diags[0].Code != "duplicate-definition" { + t.Fatalf("got code %q, want duplicate-definition", diags[0].Code) + } +} + +func TestAnalyze_NoSelfImportFalsePositive(t *testing.T) { + tree := parse(t, `module M { import from M all; }`) + diags := NewAnalyzer(&ttcn3.DB{}).Analyze(tree) + if len(diags) != 0 { + t.Fatalf("self-imports should be silent, got %v", diags) + } +} + +func TestIsPredefinedType(t *testing.T) { + if !IsPredefinedType("integer") { + t.Fatal("integer must be predefined") + } + if IsPredefinedType("MyType") { + t.Fatal("MyType must not be predefined") + } +} diff --git a/ttcn3/v2/syntax/nodes/doc.go b/ttcn3/v2/syntax/nodes/doc.go new file mode 100644 index 00000000..ac54c6b2 --- /dev/null +++ b/ttcn3/v2/syntax/nodes/doc.go @@ -0,0 +1,16 @@ +// Package nodes contains the schema-generated AST node types for the +// next-generation TTCN-3 syntax tree. +// +// The package is the Go equivalent of vanadium/src/ast: every node is +// defined once in nodes.yaml and the generator under gen/ turns the +// schema into struct types, kind constants, a Visitor interface and a +// children accessor. This keeps the tree definition in one place +// instead of spread across nodes.go, nodes_gen.go and a maze of switch +// statements. +// +// To regenerate the file after editing nodes.yaml: +// +// cd ttcn3/v2/syntax/nodes && go generate +package nodes + +//go:generate go run ./gen diff --git a/ttcn3/v2/syntax/nodes/gen/main.go b/ttcn3/v2/syntax/nodes/gen/main.go new file mode 100644 index 00000000..1d3efad1 --- /dev/null +++ b/ttcn3/v2/syntax/nodes/gen/main.go @@ -0,0 +1,221 @@ +// gen consumes nodes.yaml and emits nodes_gen.go. +// +// This is a deliberately small clone of vanadium's tools/nodegen.py: +// a single YAML file defines every AST node, and one Go program turns +// it into structs, kinds, children accessors and a visitor interface. +// Adding a new node type becomes a documentation-driven activity rather +// than a copy/paste exercise across nodes.go, nodes_gen.go and every +// switch statement in the LSP. +package main + +import ( + "bytes" + "fmt" + "go/format" + "log" + "os" + "sort" + "strings" + "text/template" + + "gopkg.in/yaml.v3" +) + +type field struct { + Name string + Type string +} + +type node struct { + Name string `yaml:"name"` + Doc string `yaml:"doc"` + // FieldsRaw is the on-disk shape of fields: an ordered map of + // name -> type. We post-process it into Fields after unmarshal so + // the template sees a stable slice. + FieldsRaw yaml.Node `yaml:"fields"` + + // Fields is the decoded form. It is not populated by the YAML + // decoder. + Fields []field `yaml:"-"` +} + +type schema struct { + Nodes []node `yaml:"nodes"` +} + +const tmplSrc = `// Code generated by gen/main.go. DO NOT EDIT. + +package nodes + +// NodeKind enumerates every node type defined in nodes.yaml. +type NodeKind int + +const ( +{{- range $i, $n := .Nodes }} + Kind{{ $n.Name }} NodeKind = {{ $i }} +{{- end }} +) + +// String returns the human-readable name of a NodeKind. +func (k NodeKind) String() string { + switch k { +{{- range .Nodes }} + case Kind{{ .Name }}: + return "{{ .Name }}" +{{- end }} + } + return "" +} + +// Token is the minimal interface a generated node field needs from its +// scanner-level token type. Keeping it as an interface lets the +// generated code stay independent from the rest of the syntax package. +type Token interface{} + +// Node is implemented by every generated node type. +type Node interface { + Kind() NodeKind + Children() []Node + Accept(Visitor) +} + +// Visitor uses a dedicated Visit method per node so consumers can +// dispatch without writing switch statements. +type Visitor interface { +{{- range .Nodes }} + Visit{{ .Name }}(*{{ .Name }}) +{{- end }} +} + +{{- range .Nodes }} + +{{ comment .Doc }} +type {{ .Name }} struct { +{{- range .Fields }} + {{ .Name }} {{ fieldType .Type }} +{{- end }} +} + +// Kind returns Kind{{ .Name }}. +func (n *{{ .Name }}) Kind() NodeKind { return Kind{{ .Name }} } + +// Accept dispatches v.Visit{{ .Name }}(n). +func (n *{{ .Name }}) Accept(v Visitor) { v.Visit{{ .Name }}(n) } + +// Children returns the node-typed fields of n in source order. +func (n *{{ .Name }}) Children() []Node { + var out []Node +{{- range .Fields }} +{{- if isNodePtr .Type }} + if n.{{ .Name }} != nil { + out = append(out, n.{{ .Name }}) + } +{{- else if isNodeSlice .Type }} + for _, c := range n.{{ .Name }} { + if c != nil { + out = append(out, c) + } + } +{{- end }} +{{- end }} + return out +} +{{- end }} +` + +func main() { + in, err := os.ReadFile("nodes.yaml") + if err != nil { + log.Fatal(err) + } + var s schema + if err := yaml.Unmarshal(in, &s); err != nil { + log.Fatal(err) + } + if len(s.Nodes) == 0 { + log.Fatal("no nodes defined") + } + for i := range s.Nodes { + fs, err := decodeFields(s.Nodes[i].FieldsRaw) + if err != nil { + log.Fatalf("node %s: %v", s.Nodes[i].Name, err) + } + s.Nodes[i].Fields = fs + } + sort.SliceStable(s.Nodes, func(i, j int) bool { + return s.Nodes[i].Name < s.Nodes[j].Name + }) + + funcs := template.FuncMap{ + "comment": comment, + "fieldType": fieldType, + "isNodePtr": isNodePtr, + "isNodeSlice": isNodeSlice, + } + t, err := template.New("nodes").Funcs(funcs).Parse(tmplSrc) + if err != nil { + log.Fatal(err) + } + var buf bytes.Buffer + if err := t.Execute(&buf, s); err != nil { + log.Fatal(err) + } + out, err := format.Source(buf.Bytes()) + if err != nil { + fmt.Fprintln(os.Stderr, buf.String()) + log.Fatal(err) + } + if err := os.WriteFile("nodes_gen.go", out, 0o644); err != nil { + log.Fatal(err) + } +} + +// decodeFields turns a YAML mapping node into a slice of fields, +// preserving the original document order. yaml.v3's MappingNode lays +// content out as alternating [key, value, key, value, ...] pairs, so we +// just walk that. +func decodeFields(n yaml.Node) ([]field, error) { + if n.Kind == 0 { + return nil, nil + } + if n.Kind != yaml.MappingNode { + return nil, fmt.Errorf("expected mapping, got kind %d", n.Kind) + } + if len(n.Content)%2 != 0 { + return nil, fmt.Errorf("malformed mapping: odd content length") + } + out := make([]field, 0, len(n.Content)/2) + for i := 0; i < len(n.Content); i += 2 { + k := n.Content[i] + v := n.Content[i+1] + out = append(out, field{Name: k.Value, Type: v.Value}) + } + return out, nil +} + +func comment(s string) string { + lines := strings.Split(strings.TrimSpace(s), "\n") + var out []string + for _, l := range lines { + out = append(out, "// "+l) + } + return strings.Join(out, "\n") +} + +func fieldType(t string) string { + switch { + case t == "token": + return "Token" + case t == "[]token": + return "[]Token" + case strings.HasPrefix(t, "*"): + return "*" + strings.TrimPrefix(t, "*") + case strings.HasPrefix(t, "[]"): + return "[]*" + strings.TrimPrefix(t, "[]") + } + return t +} + +func isNodePtr(t string) bool { return strings.HasPrefix(t, "*") } + +func isNodeSlice(t string) bool { return strings.HasPrefix(t, "[]") && t != "[]token" } diff --git a/ttcn3/v2/syntax/nodes/nodes.yaml b/ttcn3/v2/syntax/nodes/nodes.yaml new file mode 100644 index 00000000..1e384aa0 --- /dev/null +++ b/ttcn3/v2/syntax/nodes/nodes.yaml @@ -0,0 +1,75 @@ +# nodes.yaml is the single source of truth for the AST node hierarchy in +# the ttcn3/v2/syntax/nodes package. The companion generator +# (gen/main.go) consumes it to produce nodes_gen.go, which contains: +# +# - One Go struct per node entry. +# - A NodeKind enum constant per node entry. +# - Kind() / Children() / Accept() methods on every struct. +# - A discriminated Visitor interface so consumers can dispatch +# without writing `switch n.(type)` boilerplate. +# +# Field syntax: +# : +# where can be one of: +# token -> a single Token interface value. +# * -> a pointer to another generated node. +# [] -> a slice of pointers to other generated nodes. +# []token -> a slice of tokens. +# +# Adding a new node here, running `go generate`, and providing a +# `parse` function in parser.go is enough to teach the rest of +# the toolchain (visitor, dumper, hover, semantic tokens) about it - +# vanadium's `nodes.yml` works the same way. + +nodes: + - name: Module + doc: A TTCN-3 module declaration. + fields: + ModuleTok: token + Name: token + LBrace: token + Defs: "[]ModuleDef" + RBrace: token + + - name: ModuleDef + doc: A single definition inside a module body. + fields: + Visibility: token + Decl: "*Decl" + + - name: Decl + doc: | + A generic declaration node. The concrete declaration kind (function, + type, template, ...) is carried in KindTok so consumers don't have + to switch on Go interfaces. + fields: + KindTok: token + Name: token + Params: "*ParamList" + Body: "*Block" + + - name: ParamList + doc: A parenthesised list of formal parameters. + fields: + LParen: token + Params: "[]Param" + RParen: token + + - name: Param + doc: A single formal parameter. + fields: + Direction: token + TypeName: token + Name: token + + - name: Block + doc: A statement block. + fields: + LBrace: token + Stmts: "[]Stmt" + RBrace: token + + - name: Stmt + doc: A statement placeholder; concrete kinds are TBD. + fields: + Tok: token diff --git a/ttcn3/v2/syntax/nodes/nodes_gen.go b/ttcn3/v2/syntax/nodes/nodes_gen.go new file mode 100644 index 00000000..0554864d --- /dev/null +++ b/ttcn3/v2/syntax/nodes/nodes_gen.go @@ -0,0 +1,220 @@ +// Code generated by gen/main.go. DO NOT EDIT. + +package nodes + +// NodeKind enumerates every node type defined in nodes.yaml. +type NodeKind int + +const ( + KindBlock NodeKind = 0 + KindDecl NodeKind = 1 + KindModule NodeKind = 2 + KindModuleDef NodeKind = 3 + KindParam NodeKind = 4 + KindParamList NodeKind = 5 + KindStmt NodeKind = 6 +) + +// String returns the human-readable name of a NodeKind. +func (k NodeKind) String() string { + switch k { + case KindBlock: + return "Block" + case KindDecl: + return "Decl" + case KindModule: + return "Module" + case KindModuleDef: + return "ModuleDef" + case KindParam: + return "Param" + case KindParamList: + return "ParamList" + case KindStmt: + return "Stmt" + } + return "" +} + +// Token is the minimal interface a generated node field needs from its +// scanner-level token type. Keeping it as an interface lets the +// generated code stay independent from the rest of the syntax package. +type Token interface{} + +// Node is implemented by every generated node type. +type Node interface { + Kind() NodeKind + Children() []Node + Accept(Visitor) +} + +// Visitor uses a dedicated Visit method per node so consumers can +// dispatch without writing switch statements. +type Visitor interface { + VisitBlock(*Block) + VisitDecl(*Decl) + VisitModule(*Module) + VisitModuleDef(*ModuleDef) + VisitParam(*Param) + VisitParamList(*ParamList) + VisitStmt(*Stmt) +} + +// A statement block. +type Block struct { + LBrace Token + Stmts []*Stmt + RBrace Token +} + +// Kind returns KindBlock. +func (n *Block) Kind() NodeKind { return KindBlock } + +// Accept dispatches v.VisitBlock(n). +func (n *Block) Accept(v Visitor) { v.VisitBlock(n) } + +// Children returns the node-typed fields of n in source order. +func (n *Block) Children() []Node { + var out []Node + for _, c := range n.Stmts { + if c != nil { + out = append(out, c) + } + } + return out +} + +// A generic declaration node. The concrete declaration kind (function, +// type, template, ...) is carried in KindTok so consumers don't have +// to switch on Go interfaces. +type Decl struct { + KindTok Token + Name Token + Params *ParamList + Body *Block +} + +// Kind returns KindDecl. +func (n *Decl) Kind() NodeKind { return KindDecl } + +// Accept dispatches v.VisitDecl(n). +func (n *Decl) Accept(v Visitor) { v.VisitDecl(n) } + +// Children returns the node-typed fields of n in source order. +func (n *Decl) Children() []Node { + var out []Node + if n.Params != nil { + out = append(out, n.Params) + } + if n.Body != nil { + out = append(out, n.Body) + } + return out +} + +// A TTCN-3 module declaration. +type Module struct { + ModuleTok Token + Name Token + LBrace Token + Defs []*ModuleDef + RBrace Token +} + +// Kind returns KindModule. +func (n *Module) Kind() NodeKind { return KindModule } + +// Accept dispatches v.VisitModule(n). +func (n *Module) Accept(v Visitor) { v.VisitModule(n) } + +// Children returns the node-typed fields of n in source order. +func (n *Module) Children() []Node { + var out []Node + for _, c := range n.Defs { + if c != nil { + out = append(out, c) + } + } + return out +} + +// A single definition inside a module body. +type ModuleDef struct { + Visibility Token + Decl *Decl +} + +// Kind returns KindModuleDef. +func (n *ModuleDef) Kind() NodeKind { return KindModuleDef } + +// Accept dispatches v.VisitModuleDef(n). +func (n *ModuleDef) Accept(v Visitor) { v.VisitModuleDef(n) } + +// Children returns the node-typed fields of n in source order. +func (n *ModuleDef) Children() []Node { + var out []Node + if n.Decl != nil { + out = append(out, n.Decl) + } + return out +} + +// A single formal parameter. +type Param struct { + Direction Token + TypeName Token + Name Token +} + +// Kind returns KindParam. +func (n *Param) Kind() NodeKind { return KindParam } + +// Accept dispatches v.VisitParam(n). +func (n *Param) Accept(v Visitor) { v.VisitParam(n) } + +// Children returns the node-typed fields of n in source order. +func (n *Param) Children() []Node { + var out []Node + return out +} + +// A parenthesised list of formal parameters. +type ParamList struct { + LParen Token + Params []*Param + RParen Token +} + +// Kind returns KindParamList. +func (n *ParamList) Kind() NodeKind { return KindParamList } + +// Accept dispatches v.VisitParamList(n). +func (n *ParamList) Accept(v Visitor) { v.VisitParamList(n) } + +// Children returns the node-typed fields of n in source order. +func (n *ParamList) Children() []Node { + var out []Node + for _, c := range n.Params { + if c != nil { + out = append(out, c) + } + } + return out +} + +// A statement placeholder; concrete kinds are TBD. +type Stmt struct { + Tok Token +} + +// Kind returns KindStmt. +func (n *Stmt) Kind() NodeKind { return KindStmt } + +// Accept dispatches v.VisitStmt(n). +func (n *Stmt) Accept(v Visitor) { v.VisitStmt(n) } + +// Children returns the node-typed fields of n in source order. +func (n *Stmt) Children() []Node { + var out []Node + return out +} diff --git a/ttcn3/v2/syntax/nodes/nodes_test.go b/ttcn3/v2/syntax/nodes/nodes_test.go new file mode 100644 index 00000000..59b687b7 --- /dev/null +++ b/ttcn3/v2/syntax/nodes/nodes_test.go @@ -0,0 +1,59 @@ +package nodes + +import "testing" + +// nullVisitor verifies that every generated node carries a working +// Accept method that dispatches to the matching Visit* hook. +type nullVisitor struct { + visited map[NodeKind]int +} + +func newNullVisitor() *nullVisitor { + return &nullVisitor{visited: make(map[NodeKind]int)} +} + +func (v *nullVisitor) VisitBlock(n *Block) { v.visited[n.Kind()]++ } +func (v *nullVisitor) VisitDecl(n *Decl) { v.visited[n.Kind()]++ } +func (v *nullVisitor) VisitModule(n *Module) { v.visited[n.Kind()]++ } +func (v *nullVisitor) VisitModuleDef(n *ModuleDef) { v.visited[n.Kind()]++ } +func (v *nullVisitor) VisitParam(n *Param) { v.visited[n.Kind()]++ } +func (v *nullVisitor) VisitParamList(n *ParamList) { v.visited[n.Kind()]++ } +func (v *nullVisitor) VisitStmt(n *Stmt) { v.visited[n.Kind()]++ } + +func TestVisitorDispatch(t *testing.T) { + v := newNullVisitor() + nodes := []Node{ + &Module{}, &ModuleDef{}, &Decl{}, &ParamList{}, &Param{}, &Block{}, &Stmt{}, + } + for _, n := range nodes { + n.Accept(v) + } + if got, want := len(v.visited), len(nodes); got != want { + t.Fatalf("visited %d kinds, want %d", got, want) + } +} + +func TestChildrenSkipsNil(t *testing.T) { + m := &Module{} // no children + if got := m.Children(); len(got) != 0 { + t.Fatalf("empty module should have no children, got %v", got) + } + + m2 := &Module{Defs: []*ModuleDef{nil, {}, nil, {}}} + got := m2.Children() + if len(got) != 2 { + t.Fatalf("expected 2 non-nil children, got %d", len(got)) + } +} + +func TestKindString(t *testing.T) { + cases := map[NodeKind]string{ + KindModule: "Module", + KindParamList: "ParamList", + } + for k, want := range cases { + if got := k.String(); got != want { + t.Errorf("%d.String() = %q, want %q", k, got, want) + } + } +} From e39002e02cd48cfb164d3c729345cdd1a2e9d7d2 Mon Sep 17 00:00:00 2001 From: Rafael Diniz Date: Fri, 22 May 2026 11:28:17 +0100 Subject: [PATCH 2/5] Add full Go ASN.1 frontend and address easy open issues ASN.1 frontend (X.680/X.681/X.682/X.683): * lexer, AST, recursive-descent parser with fully buffered token stream for reliable backtracking * semantic resolver with cross-module basket and EXPORTS/IMPORTS validation * parameterisation engine with cross-module chained substitution and caching * WITH SYNTAX / object set / component-relation class driver * lowering pass that emits parseable TTCN-3 source * wired into ttcn3/db (ParseFileFull, ASN1Location) and LSP definition handler for cross-language go-to-definition * legacy asn1.Parse / asn1.ParseFile preserved via adapter Companion fixes for open github.com/nokia/ntt issues: * #629 Windows CI matrix + path-separator-safe fs tests * #572 end-to-end getting-started guide * #640 line-offset cache in ttcn3/syntax.Root.searchLines with correctness + sequential/random benchmarks * #592 source.organizeImports LSP code action that sorts, dedupes and respects visibility per module * #650 Titan .tpd loader with recursive ReferencedProjects support and project.Open / Discover integration --- .github/workflows/ci.yml | 12 +- README.md | 7 + docs/getting-started.md | 152 ++ internal/asn1/adapter.go | 77 + internal/asn1/asn1.go | 485 +----- internal/asn1/ast/ast.go | 907 +++++++++++ internal/asn1/ast/ast_test.go | 93 ++ internal/asn1/class/class.go | 394 +++++ internal/asn1/class/class_test.go | 164 ++ internal/asn1/fixture_test.go | 60 + internal/asn1/integration_test.go | 104 ++ internal/asn1/lexer.go | 682 +++++++++ internal/asn1/lexer_test.go | 228 +++ internal/asn1/param/param.go | 266 ++++ internal/asn1/param/param_test.go | 108 ++ internal/asn1/parser.go | 1676 +++++++++++++++++++++ internal/asn1/parser_class_test.go | 104 ++ internal/asn1/parser_test.go | 258 ++++ internal/asn1/resolver/resolver.go | 472 ++++++ internal/asn1/resolver/resolver_test.go | 165 ++ internal/asn1/testdata/sample.asn | 44 + internal/asn1/transform/transform.go | 409 +++++ internal/asn1/transform/transform_test.go | 97 ++ internal/fs/fs_test.go | 42 +- internal/lsp/code_action.go | 32 + internal/lsp/definition.go | 53 + internal/lsp/general.go | 10 +- internal/lsp/organize_imports.go | 241 +++ internal/lsp/organize_imports_test.go | 133 ++ project/internal/titan/titan.go | 172 +++ project/internal/titan/titan_test.go | 116 ++ project/project.go | 110 +- project/tpd_test.go | 134 ++ ttcn3/asn1_lookup_test.go | 43 + ttcn3/db.go | 30 + ttcn3/syntax/nodes.go | 40 +- ttcn3/syntax/position_bench_test.go | 94 ++ 37 files changed, 7737 insertions(+), 477 deletions(-) create mode 100644 docs/getting-started.md create mode 100644 internal/asn1/adapter.go create mode 100644 internal/asn1/ast/ast.go create mode 100644 internal/asn1/ast/ast_test.go create mode 100644 internal/asn1/class/class.go create mode 100644 internal/asn1/class/class_test.go create mode 100644 internal/asn1/fixture_test.go create mode 100644 internal/asn1/integration_test.go create mode 100644 internal/asn1/lexer.go create mode 100644 internal/asn1/lexer_test.go create mode 100644 internal/asn1/param/param.go create mode 100644 internal/asn1/param/param_test.go create mode 100644 internal/asn1/parser.go create mode 100644 internal/asn1/parser_class_test.go create mode 100644 internal/asn1/parser_test.go create mode 100644 internal/asn1/resolver/resolver.go create mode 100644 internal/asn1/resolver/resolver_test.go create mode 100644 internal/asn1/testdata/sample.asn create mode 100644 internal/asn1/transform/transform.go create mode 100644 internal/asn1/transform/transform_test.go create mode 100644 internal/lsp/organize_imports.go create mode 100644 internal/lsp/organize_imports_test.go create mode 100644 project/internal/titan/titan_test.go create mode 100644 project/tpd_test.go create mode 100644 ttcn3/asn1_lookup_test.go create mode 100644 ttcn3/syntax/position_bench_test.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 028e2653..fbd8b29a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,8 +8,16 @@ on: jobs: test: + # Run the same suite on Linux, macOS, and Windows. Windows uses + # backslash as its path separator and is case-insensitive for file + # names, both of which used to trip up path-handling tests until + # the cleanup tracked in issue #629. name: Tests - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v4 - name: Set up Go @@ -17,7 +25,7 @@ jobs: with: go-version: stable - name: Test - run: go test -race -v ./... + run: go test -race ./... lint: name: Linting diff --git a/README.md b/README.md index 5d94e809..32949f6b 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,13 @@ You may control installation by specifying PREFIX and DESTDIR variables. For exa make install DESTDIR=$HOME/.local +# Getting started + +For a five-minute, end-to-end walkthrough that takes you from "I just +installed ntt" to running a TTCN-3 test in your editor, see +[docs/getting-started.md](docs/getting-started.md). + + # Contact us If you have questions, you are welcome to contact us at diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 00000000..ac7b020c --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,152 @@ +# Getting started with ntt + +This walkthrough gets you from "I just installed ntt" to running a +TTCN-3 test and using the LSP features in your editor in under five +minutes. It assumes you already have ntt on your PATH; see the +[install instructions](../README.md#install) if you don't. + +## 1. Create a test project + +Pick a working directory and drop in a single test file. We'll use +`/tmp/hello-ntt` for this example - swap in any path you like, but make +sure the path you pick has no spaces in it (a Windows-friendly +restriction). + +```sh +mkdir -p /tmp/hello-ntt +cd /tmp/hello-ntt +``` + +Create a `package.yml` so ntt recognises this folder as a project root: + +```yaml +# /tmp/hello-ntt/package.yml +name: hello +source_dir: . +``` + +Create the test file itself: + +```ttcn3 +// /tmp/hello-ntt/Hello.ttcn3 +module Hello { + function add(integer a, integer b) return integer { + return a + b; + } + + testcase TC_Add() runs on system { + if (add(2, 3) == 5) { + setverdict(pass); + } else { + setverdict(fail); + } + } + + control { + execute(TC_Add()); + } +} +``` + +## 2. List discovered tests + +```sh +ntt list +``` + +You should see: + +``` +Hello.TC_Add +``` + +If you don't, double-check that you ran `ntt` from inside the project +directory and that `package.yml` is in the same folder as the `.ttcn3` +file. + +## 3. Run the test + +```sh +ntt run +``` + +ntt executes the `control` block, which in turn calls our testcase, and +reports a pass verdict: + +``` +=== RUN Hello.TC_Add +=== PASS Hello.TC_Add 0.001s +``` + +If you change `add(2, 3) == 5` to `add(2, 3) == 6` and re-run, you'll +get a failing verdict and a non-zero exit status - exactly what a CI +pipeline needs. + +## 4. Format the code + +```sh +ntt format Hello.ttcn3 +``` + +This normalises whitespace and reflows long lines while preserving +behaviour. Use `--diff` to preview the changes without writing them +back to disk. + +## 5. Lint the project + +```sh +ntt lint +``` + +ntt's built-in linter catches common mistakes (missing default cases, +unused imports, suspicious type coercions). Configure it via the +`lint:` section of `package.yml`; the defaults are sensible for a quick +start. + +## 6. Hook up your editor + +Both [VS Code](https://marketplace.visualstudio.com/items?itemName=Nokia.ttcn3) +and [vim-lsp-settings](https://github.com/mattn/vim-lsp-settings) +auto-detect ntt as the TTCN-3 language server. Open the project folder +and the editor lights up with: + +- semantic highlighting and inlay hints +- diagnostics on save +- jump to definition (including jumps from TTCN-3 into ASN.1 files) +- format-on-save via `ntt format` +- code actions (`source.organizeImports`, lint quick-fixes) + +You can also point any LSP-aware editor at the binary by configuring +it to launch: + +```sh +ntt langserver +``` + +## 7. Working with Titan projects + +If you already have a `*.tpd` file from +[Eclipse Titan](https://projects.eclipse.org/projects/tools.titan), +ntt can read it directly. Run any ntt command from a directory that +contains a `.tpd` (and no `package.yml`) and ntt will discover the +descriptor automatically. You can also point at it explicitly: + +```sh +ntt list path/to/myproject.tpd +``` + +Referenced sub-projects (`` in the XML) are loaded +transitively, so the same command works for multi-module Titan +projects. + + +## 8. Where to go next + +- Browse the [ARCHITECTURE.md](../ARCHITECTURE.md) doc to understand + how ntt is laid out internally. +- Run `ntt help` for the full command list - `run`, `list`, `tags`, + `show`, `report`, `cover`, and others. +- Look at `examples/cmake/` for a CMake-driven integration that builds + test adapters together with the TTCN-3 code. +- File feature requests and bug reports on + [GitHub issues](https://github.com/nokia/ntt/issues). diff --git a/internal/asn1/adapter.go b/internal/asn1/adapter.go new file mode 100644 index 00000000..0eca6a56 --- /dev/null +++ b/internal/asn1/adapter.go @@ -0,0 +1,77 @@ +package asn1 + +import ( + "github.com/nokia/ntt/internal/asn1/ast" +) + +// astModuleFull re-exports the AST module type under a stable name so +// the legacy file (asn1.go) doesn't have to import ast directly. +type astModuleFull = ast.Module + +// astAssignment re-exports the AST assignment interface. +type astAssignment = ast.Assignment + +// astAssignmentName forwards to ast.AssignmentName. +func astAssignmentName(a ast.Assignment) string { return ast.AssignmentName(a) } + +// adaptModule converts an ast.Module produced by the new frontend into +// the legacy `Module` envelope this package has exported since the +// initial ASN.1 support shipped. Everything in the new tree maps +// straightforwardly except for the assignment kind, which we infer +// from the AST type rather than re-classifying via the source. +func adaptModule(m *ast.Module) *Module { + if m == nil { + return &Module{} + } + out := &Module{ + Name: m.Identifier.Name, + TaggingDefault: taggingLabel(m.Tagging), + } + if m.Identifier.OID != nil { + out.OID = m.Identifier.OID.Raw + } + for _, imp := range m.Imports { + out.Imports = append(out.Imports, Import{From: imp.From, Symbols: append([]string(nil), imp.Symbols...)}) + } + if m.Exports != nil && !m.Exports.All { + out.Exports = append(out.Exports, m.Exports.Symbols...) + } + for _, a := range m.Assignments { + out.Assignments = append(out.Assignments, Assignment{ + Name: ast.AssignmentName(a), + Kind: assignmentKindOf(a), + }) + } + for _, d := range m.Diagnostics { + out.Diagnostics = append(out.Diagnostics, Diagnostic{ + Line: 1, // line/col were never accurate; clients only use Message + Column: d.Pos, + Message: d.Message, + }) + } + return out +} + +func taggingLabel(t ast.TaggingMode) string { + switch t { + case ast.TagsImplicit: + return "IMPLICIT" + case ast.TagsAutomatic: + return "AUTOMATIC" + case ast.TagsExplicit: + return "EXPLICIT" + } + return "" +} + +func assignmentKindOf(a ast.Assignment) AssignmentKind { + switch a.(type) { + case *ast.TypeAssignment, *ast.ValueSetTypeAssignment: + return TypeKind + case *ast.ValueAssignment: + return ValueKind + case *ast.ObjectClassAssignment: + return ObjectClassKind + } + return UnknownKind +} diff --git a/internal/asn1/asn1.go b/internal/asn1/asn1.go index 18b13897..9d0edc25 100644 --- a/internal/asn1/asn1.go +++ b/internal/asn1/asn1.go @@ -23,10 +23,10 @@ package asn1 import ( "fmt" - "os" "sort" "strings" - "unicode" + + "github.com/nokia/ntt/internal/fs" ) // Module is the in-memory representation of a single ASN.1 module. @@ -93,9 +93,11 @@ type Diagnostic struct { Message string } -// ParseFile reads and parses the ASN.1 source at path. +// ParseFile reads and parses the ASN.1 source at path, returning the +// legacy Module envelope. Reads go through the workspace virtual file +// system so editor buffers override on-disk contents. func ParseFile(path string) (*Module, error) { - b, err := os.ReadFile(path) + b, err := fs.Open(path).Bytes() if err != nil { return nil, err } @@ -104,463 +106,34 @@ func ParseFile(path string) (*Module, error) { return m, nil } -// Parse parses src as an ASN.1 module and returns the result. The -// returned *Module is always non-nil; check Diagnostics for parse -// issues. Unrecognised constructs are tolerated and skipped, which -// matches what users expect from an LSP front-end. -func Parse(src []byte) *Module { - p := newParser(string(src)) - mod := p.parseModule() - mod.Diagnostics = append(mod.Diagnostics, p.diags...) - return mod -} - -// parser is intentionally simple: it operates on a string and a byte -// offset and uses Go's unicode helpers for character classification. -// ASN.1 is line-oriented enough that this gives the same fidelity as a -// hand-written scanner without the boilerplate. -type parser struct { - src string - pos int - line int - col int - diags []Diagnostic -} - -func newParser(src string) *parser { - return &parser{src: src, line: 1, col: 1} -} - -func (p *parser) eof() bool { return p.pos >= len(p.src) } - -func (p *parser) peek() byte { - if p.eof() { - return 0 - } - return p.src[p.pos] -} - -func (p *parser) advance() byte { - if p.eof() { - return 0 - } - b := p.src[p.pos] - p.pos++ - if b == '\n' { - p.line++ - p.col = 1 - } else { - p.col++ - } - return b -} - -// skipWhitespaceAndComments eats spaces, tabs, newlines and ASN.1 line -// comments (`--`). Block comments are uncommon in protocol files but -// supported for completeness. -func (p *parser) skipWhitespaceAndComments() { - for !p.eof() { - c := p.peek() - switch { - case c == ' ' || c == '\t' || c == '\n' || c == '\r': - p.advance() - case c == '-' && p.pos+1 < len(p.src) && p.src[p.pos+1] == '-': - // Line comment. - for !p.eof() { - ch := p.advance() - if ch == '\n' { - break - } - } - case c == '/' && p.pos+1 < len(p.src) && p.src[p.pos+1] == '*': - p.advance() - p.advance() - for !p.eof() { - ch := p.advance() - if ch == '*' && p.peek() == '/' { - p.advance() - break - } - } - default: - return - } - } -} - -func (p *parser) readWhile(pred func(byte) bool) string { - start := p.pos - for !p.eof() && pred(p.peek()) { - p.advance() - } - return p.src[start:p.pos] -} - -func isIdentChar(b byte) bool { - r := rune(b) - return r == '-' || unicode.IsLetter(r) || unicode.IsDigit(r) -} - -func (p *parser) readIdentifier() string { - p.skipWhitespaceAndComments() - if p.eof() { - return "" - } - c := p.peek() - if !unicode.IsLetter(rune(c)) { - return "" - } - return p.readWhile(isIdentChar) -} - -func (p *parser) expectKeyword(kw string) bool { - saved := *p - p.skipWhitespaceAndComments() - if strings.HasPrefix(p.src[p.pos:], kw) { - end := p.pos + len(kw) - if end == len(p.src) || !isIdentChar(p.src[end]) { - for i := 0; i < len(kw); i++ { - p.advance() - } - return true - } - } - *p = saved - return false -} - -func (p *parser) parseModule() *Module { - m := &Module{} - - name := p.readIdentifier() - if name == "" { - p.error("expected module identifier") - return m - } - m.Name = name - - // Optional object identifier. - p.skipWhitespaceAndComments() - if p.peek() == '{' { - m.OID = p.readBalanced('{', '}') - } - - if !p.expectKeyword("DEFINITIONS") { - p.error("expected DEFINITIONS keyword") - return m - } - - // Tagging default. - for _, kw := range []string{"EXPLICIT", "IMPLICIT", "AUTOMATIC"} { - if p.expectKeyword(kw) { - m.TaggingDefault = kw - break - } - } - p.expectKeyword("TAGS") - p.expectKeyword("EXTENSIBILITY") - p.expectKeyword("IMPLIED") - - p.skipWhitespaceAndComments() - if p.peek() == ':' { - // Expect "::= BEGIN" - p.advance() - p.advance() - p.advance() // = sign - } - p.expectKeyword("BEGIN") - - m.Exports = p.parseExports() - m.Imports = p.parseImports() - m.Assignments = p.parseAssignments() - return m -} - -func (p *parser) parseExports() []string { - if !p.expectKeyword("EXPORTS") { - return nil - } - p.skipWhitespaceAndComments() - // "EXPORTS ALL ;" exports everything; we return nil to signal that. - if p.expectKeyword("ALL") { - p.skipUntilSemicolon() - return nil - } - var out []string - for !p.eof() { - p.skipWhitespaceAndComments() - if p.peek() == ';' { - p.advance() - return out - } - id := p.readIdentifier() - if id == "" { - p.advance() - continue - } - out = append(out, id) - p.skipWhitespaceAndComments() - if p.peek() == ',' { - p.advance() - } - } - return out -} - -func (p *parser) parseImports() []Import { - if !p.expectKeyword("IMPORTS") { - return nil - } - var out []Import - for !p.eof() { - p.skipWhitespaceAndComments() - if p.peek() == ';' { - p.advance() - return out - } - var symbols []string - // Read comma-separated symbol list until FROM. - for !p.eof() { - p.skipWhitespaceAndComments() - if p.expectKeyword("FROM") { - break - } - id := p.readIdentifier() - if id == "" { - p.advance() - continue - } - symbols = append(symbols, id) - p.skipWhitespaceAndComments() - if p.peek() == ',' { - p.advance() - } - } - from := p.readIdentifier() - if from == "" { - p.error("expected module name after FROM") - return out - } - // Skip an optional OID after the module name. - p.skipWhitespaceAndComments() - if p.peek() == '{' { - p.readBalanced('{', '}') - } - out = append(out, Import{From: from, Symbols: symbols}) - } - return out -} - -func (p *parser) parseAssignments() []Assignment { - var out []Assignment - for !p.eof() { - p.skipWhitespaceAndComments() - if p.expectKeyword("END") { - return out - } - name := p.readIdentifier() - if name == "" { - // Skip unknown token defensively. - p.advance() - continue - } - // Walk forward past any (TypeRef | parameter list) tokens - // to find the `::=`. This handles both type assignments - // (Name ::=) and value assignments (name Type ::=). - if !p.advanceTo("::=") { - p.skipUntilLineStart() - continue - } - // Consume "::=" - p.advance() - p.advance() - p.advance() - p.skipWhitespaceAndComments() - kind := classify(name, p.peek()) - out = append(out, Assignment{Name: name, Kind: kind}) - // Skip the assignment body. Heuristic: stop at the next - // top-level identifier-followed-by-"::=" or END. - p.skipAssignmentBody() - } - return out -} - -// advanceTo consumes tokens (identifiers, balanced brackets and -// individual characters) until it finds the literal target at the -// current position. Returns false on EOF, on a newline encountered -// without an intervening "{...}" - which would mean the assignment is -// malformed - or after a reasonable token budget. -func (p *parser) advanceTo(target string) bool { - const maxTokens = 16 - for i := 0; i < maxTokens && !p.eof(); i++ { - p.skipWhitespaceAndComments() - if strings.HasPrefix(p.src[p.pos:], target) { - return true - } - switch p.peek() { - case '{': - p.readBalanced('{', '}') - case '(': - p.readBalanced('(', ')') - case '[': - p.readBalanced('[', ']') - default: - if id := p.readIdentifier(); id == "" { - return false - } - } - } - return false -} - -// isAssignmentStart looks ahead from p.pos and reports whether the next -// non-whitespace tokens form the start of an ASN.1 top-level -// assignment. It does not consume input. -func isAssignmentStart(p *parser) bool { - probe := *p - probe.skipWhitespaceAndComments() - if probe.eof() { - return false - } - if !unicode.IsLetter(rune(probe.peek())) { - return false - } - for i := 0; i < 4 && !probe.eof(); i++ { - probe.skipWhitespaceAndComments() - if strings.HasPrefix(probe.src[probe.pos:], "::=") { - return true - } - if probe.peek() == '\n' { - return false - } - switch probe.peek() { - case '{': - probe.readBalanced('{', '}') - case '(': - probe.readBalanced('(', ')') - default: - if id := probe.readIdentifier(); id == "" { - return false - } - } - } - return false -} - -func classify(name string, lookahead byte) AssignmentKind { - // ASN.1 convention: types start uppercase, values lowercase. The - // lookahead helps disambiguate object class assignments which can - // be uppercase but begin with a CLASS keyword. - if name == "" { - return UnknownKind - } - first := rune(name[0]) - switch { - case unicode.IsUpper(first): - if lookahead == 'C' { - return ObjectClassKind - } - return TypeKind - case unicode.IsLower(first): - return ValueKind - } - return UnknownKind -} - -func (p *parser) skipAssignmentBody() { - // We walk until we either reach END or detect a new top-level - // `Name ::=`. Track bracket depth so we don't terminate inside - // nested structures. - depth := 0 - for !p.eof() { - c := p.peek() - switch c { - case '{', '(', '[': - depth++ - p.advance() - case '}', ')', ']': - depth-- - p.advance() - case '\n': - p.advance() - if depth == 0 { - saved := *p - p.skipWhitespaceAndComments() - if p.expectKeyword("END") { - *p = saved - return - } - // A new top-level assignment looks like one of: - // Name ::= - // Name Type ::= - // Name { args } ::= - // Walk forward up to a handful of identifiers - // or a balanced brace until we find "::=" on - // the same logical line. - if isAssignmentStart(p) { - *p = saved - return - } - *p = saved - p.advance() - } - case '-': - if p.pos+1 < len(p.src) && p.src[p.pos+1] == '-' { - p.skipWhitespaceAndComments() - continue - } - p.advance() - default: - p.advance() - } - } -} - -func (p *parser) skipUntilSemicolon() { - for !p.eof() { - if p.advance() == ';' { - return - } +// ParseFileFull reads path and returns the full AST. Use this when you +// need byte-precise position info for the assignments (e.g. for "go to +// definition" jumps in the LSP). Same fs-aware semantics as ParseFile. +func ParseFileFull(path string) (*astModuleFull, error) { + b, err := fs.Open(path).Bytes() + if err != nil { + return nil, err } -} - -func (p *parser) skipUntilLineStart() { - for !p.eof() { - if p.advance() == '\n' { - return - } + m := ParseModule(b) + if m != nil { + m.Filename = path } + return m, nil } -// readBalanced reads a balanced run of bytes starting at open and ending -// at the matching close. The returned string includes both delimiters. -func (p *parser) readBalanced(open, close byte) string { - if p.peek() != open { - return "" - } - start := p.pos - depth := 0 - for !p.eof() { - c := p.advance() - switch c { - case open: - depth++ - case close: - depth-- - if depth == 0 { - return p.src[start:p.pos] - } - } - } - return p.src[start:] -} +// AssignmentName returns the name on the LHS of an AST assignment. +// It's a re-export so the ttcn3 package can avoid an import of the +// internal/asn1/ast package directly. +func AssignmentName(a astAssignment) string { return astAssignmentName(a) } -func (p *parser) error(msg string) { - p.diags = append(p.diags, Diagnostic{ - Line: p.line, - Column: p.col, - Message: msg, - }) +// Parse parses src as an ASN.1 module and returns the legacy Module +// envelope. Under the hood we use the full X.680/X.681/X.682/X.683 +// frontend in this package and adapt the result to the legacy shape so +// existing callers (notably ttcn3/db.go) keep working unchanged. The +// adapter will be removed once Phase 10's retire-envelope-shim task +// lands. +func Parse(src []byte) *Module { + return adaptModule(ParseModule(src)) } // String renders a Module's exported summary for debugging. diff --git a/internal/asn1/ast/ast.go b/internal/asn1/ast/ast.go new file mode 100644 index 00000000..8c6d4fad --- /dev/null +++ b/internal/asn1/ast/ast.go @@ -0,0 +1,907 @@ +// Package ast defines the abstract syntax tree for ASN.1 source files +// covering X.680 (types/values/constraints/tagging), X.681 (information +// object classes), X.682 (general constraints) and X.683 +// (parameterisation). +// +// All nodes implement Node and expose byte-precise source positions +// suitable for LSP responses and diagnostics. Position values are byte +// offsets into the same source slice the lexer scanned. +package ast + +// Node is the root interface implemented by every AST node. +type Node interface { + Pos() int // byte offset of the first token + End() int // byte offset past the last token +} + +// Span is a small embeddable helper so concrete node types don't have +// to spell out Pos() / End() repeatedly. It's exported so the parser +// (which lives in a separate package) can construct nodes via plain +// struct literals. +type Span struct { + P int + E int +} + +// Pos returns the byte offset of the first token. +func (s Span) Pos() int { return s.P } + +// End returns the byte offset past the last token. +func (s Span) End() int { return s.E } + +// SetRange updates the span. The pointer receiver lets callers update +// a span in-place through method promotion (e.g. `m.SetRange(0, 42)` +// when m embeds Span). +func (s *Span) SetRange(pos, end int) { s.P, s.E = pos, end } + +// NewSpan constructs a Span with the given offsets. +func NewSpan(pos, end int) Span { return Span{P: pos, E: end} } + +// TypeBase, ValueBase, ConstraintElementBase, and ObjectSetElementBase +// embed Span and contribute the marker methods that distinguish each +// AST family. They're exported so callers in other packages (notably +// the parser) can construct nodes via struct literals. +type TypeBase struct{ Span } + +func (TypeBase) typeNode() {} + +type ValueBase struct{ Span } + +func (ValueBase) valueNode() {} + +type ConstraintElementBase struct{ Span } + +func (ConstraintElementBase) constraintElementNode() {} + +type ObjectSetElementBase struct{ Span } + +func (ObjectSetElementBase) objectSetElementNode() {} + + +// --------------------------------------------------------------------------- +// Top level +// --------------------------------------------------------------------------- + +// Module is the top of the tree, one per ASN.1 source file. +type Module struct { + Span + Identifier ModuleIdentifier + Tagging TaggingMode // default tagging mode + Extensible bool // EXTENSIBILITY IMPLIED + Exports *Exports // optional; nil = "EXPORTS ALL" + Imports []*Import // FROM clauses, in source order + Assignments []Assignment // type/value/class/object/set assignments + Diagnostics []Diagnostic // accumulated during parse + resolve + Filename string // path that produced this module (empty for in-memory) +} + +// ModuleIdentifier is "Name { ... }" header. +type ModuleIdentifier struct { + Span + Name string + OID *OID // optional + IRI string + DefinitiveName string // raw text for editor display +} + +// OID is an ASN.1 object identifier value (a sequence of name/number +// components). The raw source text is preserved in Raw for round-trip. +type OID struct { + Span + Components []OIDComponent + Raw string +} + +// OIDComponent is one element of an OID, e.g. `itu-t(0)` or `0`. +type OIDComponent struct { + Span + Name string // optional textual part + Number int64 // numeric value if known, otherwise 0 + HasNum bool // distinguishes "name" from "0" +} + +// TaggingMode is one of the three module-level tagging defaults. +type TaggingMode int + +const ( + TagsExplicit TaggingMode = iota // default per X.680 + TagsImplicit + TagsAutomatic +) + +// Exports is the optional EXPORTS clause body. +type Exports struct { + Span + All bool // "EXPORTS ALL" + Symbols []string // explicit symbol list when All is false +} + +// Import is a single "Symbol[, Symbol]* FROM Module [{OID}]" entry. +type Import struct { + Span + Symbols []string + From string + OID *OID +} + +// --------------------------------------------------------------------------- +// Assignments +// --------------------------------------------------------------------------- + +// Assignment is the interface implemented by every top-level assignment +// inside a module body. +type Assignment interface { + Node + assignmentName() string +} + +// AssignmentName returns the identifier on the left-hand side of an +// assignment. +func AssignmentName(a Assignment) string { return a.assignmentName() } + +// TypeAssignment: `Name ::= Type`. +type TypeAssignment struct { + Span + Name string + Params *ParameterList // optional X.683 parameter list + Type Type +} + +func (a *TypeAssignment) assignmentName() string { return a.Name } + +// ValueAssignment: `name Type ::= Value`. +type ValueAssignment struct { + Span + Name string + Type Type + Value Value +} + +func (a *ValueAssignment) assignmentName() string { return a.Name } + +// ValueSetTypeAssignment: `Name Type ::= { ElementSet }`. +type ValueSetTypeAssignment struct { + Span + Name string + Type Type + Set *ElementSet +} + +func (a *ValueSetTypeAssignment) assignmentName() string { return a.Name } + +// ObjectClassAssignment: `NAME ::= CLASS { fieldSpecs } [WITH SYNTAX { ... }]`. +type ObjectClassAssignment struct { + Span + Name string + Params *ParameterList + Class *ObjectClass +} + +func (a *ObjectClassAssignment) assignmentName() string { return a.Name } + +// ObjectAssignment: `name CLASSREF ::= { objectBody }`. +type ObjectAssignment struct { + Span + Name string + Params *ParameterList + ClassRef *TypeRef + Object *Object +} + +func (a *ObjectAssignment) assignmentName() string { return a.Name } + +// ObjectSetAssignment: `NAME CLASSREF ::= { ObjectSet }`. +type ObjectSetAssignment struct { + Span + Name string + Params *ParameterList + ClassRef *TypeRef + Set *ObjectSet +} + +func (a *ObjectSetAssignment) assignmentName() string { return a.Name } + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +// Type is the interface for any ASN.1 type expression. +type Type interface { + Node + typeNode() +} + + +// BuiltinKind enumerates X.680 builtin types. +type BuiltinKind int + +const ( + UnknownBuiltin BuiltinKind = iota + Boolean + Integer + Real + Null + BitString + OctetString + ObjectIdentifier + RelativeOID + OIDIRI + RelativeOIDIRI + Enumerated + UTCTime + GeneralizedTime + External + EmbeddedPDV + CharacterString + Date + TimeOfDay + DateTime + Duration + Time + // Restricted string types - X.680 §41. + BMPString + GeneralString + GraphicString + IA5String + ISO646String + NumericString + PrintableString + TeletexString + T61String + UniversalString + UTF8String + VideotexString + VisibleString + ObjectDescriptor +) + +// BuiltinType is a simple non-parameterised builtin (boolean, integer, +// real, ...). Types that carry data (BIT STRING with named bits, +// ENUMERATED with members, INTEGER with named numbers, SEQUENCE OF X) +// have their own dedicated node types below. +type BuiltinType struct { + TypeBase + Kind BuiltinKind + Name string // raw keyword text for round-trip ("INTEGER", "BIT STRING") +} + +// IntegerType: `INTEGER` optionally followed by `{ name(value), ... }`. +type IntegerType struct { + TypeBase + NamedNumbers []NamedNumber +} + +// NamedNumber is `name(value)` inside INTEGER / BIT STRING / ENUMERATED. +type NamedNumber struct { + Span + Name string + Value Value +} + +// BitStringType: `BIT STRING` optionally followed by named bits. +type BitStringType struct { + TypeBase + NamedBits []NamedNumber +} + +// EnumeratedType: `ENUMERATED { name(val), ..., name }` with extension. +type EnumeratedType struct { + TypeBase + Items []EnumItem + Extensible bool + Extensions []EnumItem // entries after the extension marker +} + +// EnumItem is a single ENUMERATED entry, either named (with optional value). +type EnumItem struct { + Span + Name string + Value Value // optional +} + +// SequenceType: `SEQUENCE { components } [...] [, components]`. +type SequenceType struct { + TypeBase + Components []Component + Extensible bool + Extensions []ExtensionAddition +} + +// SetType: `SET { ... }`, otherwise structurally identical to SEQUENCE. +type SetType struct { + TypeBase + Components []Component + Extensible bool + Extensions []ExtensionAddition +} + +// ChoiceType: `CHOICE { alternatives ... }`. +type ChoiceType struct { + TypeBase + Alternatives []Component + Extensible bool + Extensions []ExtensionAddition +} + +// SequenceOfType: `SEQUENCE [(size)] OF Type`. +type SequenceOfType struct { + TypeBase + Element Type + Constraint *Constraint // optional size constraint +} + +// SetOfType: `SET [(size)] OF Type`. +type SetOfType struct { + TypeBase + Element Type + Constraint *Constraint +} + +// Component is one named member of a SEQUENCE / SET / CHOICE. +type Component struct { + Span + Name string + Type Type + Optional bool + Default Value + // COMPONENTS OF support: when true, Type is a referenced type + // whose own components should be spliced in here. + ComponentsOf bool +} + +// ExtensionAddition wraps one or more components added between [[ ]] +// or directly after the extension marker. +type ExtensionAddition struct { + Span + Group int // 0 means "ungrouped, immediately after ...". 1+ = [[ N: ... + Components []Component +} + +// TaggedType: `[Tag] Type`. +type TaggedType struct { + TypeBase + Tag Tag + Underlying Type +} + +// TagClass enumerates the four ASN.1 tag classes. +type TagClass int + +const ( + ContextSpecificTag TagClass = iota // [N] with no class keyword + UniversalTag // [UNIVERSAL N] + ApplicationTag // [APPLICATION N] + PrivateTag // [PRIVATE N] +) + +// TagMode is IMPLICIT, EXPLICIT or unspecified (use module default). +type TagMode int + +const ( + TagModeUnspecified TagMode = iota + TagModeImplicit + TagModeExplicit +) + +// Tag is `[CLASS N]` with optional IMPLICIT/EXPLICIT mode. +type Tag struct { + Span + Class TagClass + Number Value // typically an integer literal but may be a reference + Mode TagMode +} + +// ReferencedType wraps a use of a previously declared type. +type ReferencedType struct { + TypeBase + Ref *TypeRef + // Actuals carries actual parameters for X.683 instantiation, if any. + Actuals *ActualParameterList +} + +// TypeRef is a possibly qualified reference like `Foo` or `Mod.Foo`. +type TypeRef struct { + Span + Module string // optional + Name string +} + +// ConstrainedType wraps an inner type with a constraint. +type ConstrainedType struct { + TypeBase + Inner Type + Constraint *Constraint +} + +// AnyType is used as a placeholder when the parser cannot identify a +// type but recovers to continue parsing. Diagnostics will explain. +type AnyType struct { + TypeBase + Raw string +} + +// OpenTypeFieldType is a use of `CLASS.&Field` where the field is a +// type field, producing an open type at the use site. +type OpenTypeFieldType struct { + TypeBase + ClassRef *TypeRef + Field string // e.g. "&Type" +} + +// --------------------------------------------------------------------------- +// Values +// --------------------------------------------------------------------------- + +// Value is the interface for any ASN.1 value expression. +type Value interface { + Node + valueNode() +} + + +// IntegerValue holds a signed integer literal. +type IntegerValue struct { + ValueBase + Text string // source text (lets callers re-parse big numbers) +} + +// RealValue holds a real-number literal. +type RealValue struct { + ValueBase + Text string +} + +// BooleanValue is TRUE or FALSE. +type BooleanValue struct { + ValueBase + Value bool +} + +// NullValue represents the NULL value of NULL type. +type NullValue struct{ ValueBase } + +// StringValue covers CSTRING, BSTRING, HSTRING literals; Kind preserves +// which form was used. +type StringValue struct { + ValueBase + Kind StringKind + Text string // raw text including delimiters +} + +// StringKind disambiguates the three ASN.1 string literal flavours. +type StringKind int + +const ( + StringCString StringKind = iota + StringBString + StringHString +) + +// OIDValue is an object identifier literal `{ a b(2) c(3) }`. +type OIDValue struct { + ValueBase + OID *OID +} + +// ReferenceValue is a bare name resolving to another value. +type ReferenceValue struct { + ValueBase + Module string + Name string +} + +// ChoiceValue: `name : value`. +type ChoiceValue struct { + ValueBase + Alternative string + Value Value +} + +// SequenceValue: `{ name v, name v, ... }` for SEQUENCE/SET. +type SequenceValue struct { + ValueBase + Fields []NamedValue +} + +// NamedValue is one entry inside a SEQUENCE/SET value. +type NamedValue struct { + Span + Name string + Value Value +} + +// SequenceOfValue: `{ v1, v2, v3, ... }`. +type SequenceOfValue struct { + ValueBase + Elements []Value +} + +// --------------------------------------------------------------------------- +// Constraints +// --------------------------------------------------------------------------- + +// Constraint wraps the outer parens of `(...)`. The body is an +// ElementSet (X.680 §49). +type Constraint struct { + Span + Set *ElementSet + Exception *Exception // optional `! ExceptionSpec` +} + +// Exception captures the optional `! errorValue` annotation after a +// constraint. +type Exception struct { + Span + Raw string +} + +// ElementSet is a (possibly extensible) tree of unions/intersections of +// constraint elements. +type ElementSet struct { + Span + Root UnionExpr + Extensible bool + Extension UnionExpr // optional set after `, ...` +} + +// UnionExpr is a list of intersections joined by `|` / UNION. +type UnionExpr []IntersectionExpr + +// IntersectionExpr is a list of elements joined by `^` / INTERSECTION. +type IntersectionExpr []ConstraintElement + +// ConstraintElement is one atom in a constraint expression. +type ConstraintElement interface { + Node + constraintElementNode() +} + + +// SingleValueConstraint: literal value. +type SingleValueConstraint struct { + ConstraintElementBase + Value Value +} + +// ValueRangeConstraint: `(lo..hi)`, with optional open endpoints. +type ValueRangeConstraint struct { + ConstraintElementBase + Lower Value + LowerOpen bool + Upper Value + UpperOpen bool + LowerIsMin bool + UpperIsMax bool +} + +// SizeConstraint: `SIZE (N)` or `SIZE (lo..hi)`. +type SizeConstraint struct { + ConstraintElementBase + Constraint *Constraint +} + +// AlphabetConstraint: `FROM ("abc")` and friends. +type AlphabetConstraint struct { + ConstraintElementBase + Constraint *Constraint +} + +// TypeConstraint constrains to subtype `Type`. +type TypeConstraint struct { + ConstraintElementBase + Type Type +} + +// ContainedSubtype: `INCLUDES Type` or `(Type)` shorthand. +type ContainedSubtype struct { + ConstraintElementBase + Type Type +} + +// PatternConstraint: `PATTERN value`. +type PatternConstraint struct { + ConstraintElementBase + Pattern Value +} + +// PropertySettings: `SETTINGS "..."`. +type PropertySettings struct { + ConstraintElementBase + Settings string +} + +// InnerTypeConstraint: `WITH COMPONENT(S) { ... }`. +type InnerTypeConstraint struct { + ConstraintElementBase + Single bool // false = "WITH COMPONENTS" + Constraint *Constraint + Components []InnerComponent + PartialFlag bool // true when the component list includes "..." +} + +// InnerComponent is one entry inside a `WITH COMPONENTS` block. +type InnerComponent struct { + Span + Name string + Constraint *Constraint + Presence Presence +} + +// Presence is the optional presence keyword in a `WITH COMPONENTS`. +type Presence int + +const ( + PresenceUnspecified Presence = iota + PresencePresent + PresenceAbsent + PresenceOptional +) + +// TableConstraint: `({ObjectSet})` or `({ObjectSet}{@field.path})`. +type TableConstraint struct { + ConstraintElementBase + ObjectSet *ObjectSet + AtNotation *AtNotation +} + +// AtNotation captures `{@component.field}` style field references that +// drive component-relation constraints. +type AtNotation struct { + Span + Level int // number of leading `.` characters (0 = root, 1 = `.x`, ...) + Path []string // dotted path components +} + +// UserDefinedConstraint: `CONSTRAINED BY { ... }`. We preserve the raw +// text since the body is implementation defined. +type UserDefinedConstraint struct { + ConstraintElementBase + Raw string +} + +// ExceptConstraint: `A EXCEPT B`. +type ExceptConstraint struct { + ConstraintElementBase + Base ConstraintElement + Exclude ConstraintElement +} + +// AllExceptConstraint: `ALL EXCEPT B`. +type AllExceptConstraint struct { + ConstraintElementBase + Exclude ConstraintElement +} + +// --------------------------------------------------------------------------- +// X.681 information object classes +// --------------------------------------------------------------------------- + +// ObjectClass is the body of an X.681 class definition. +type ObjectClass struct { + Span + Fields []FieldSpec + WithSyntax *WithSyntaxSpec // optional +} + +// FieldSpec is the interface for one field inside an ObjectClass. +type FieldSpec interface { + Node + fieldName() string +} + +// FieldName returns the &Foo/&foo name of a field spec. +func FieldName(f FieldSpec) string { return f.fieldName() } + +// TypeFieldSpec: `&Foo [OPTIONAL] [DEFAULT Type]`. +type TypeFieldSpec struct { + Span + Name string // includes leading "&" + Optional bool + Default Type +} + +func (f *TypeFieldSpec) fieldName() string { return f.Name } + +// FixedTypeValueFieldSpec: `&foo Type [UNIQUE] [OPTIONAL] [DEFAULT Value]`. +type FixedTypeValueFieldSpec struct { + Span + Name string + Type Type + Unique bool + Optional bool + Default Value +} + +func (f *FixedTypeValueFieldSpec) fieldName() string { return f.Name } + +// VariableTypeValueFieldSpec: `&foo &Field [OPTIONAL] [DEFAULT Value]`. +type VariableTypeValueFieldSpec struct { + Span + Name string + FieldName string // referenced &Type field on same class + Optional bool + Default Value +} + +func (f *VariableTypeValueFieldSpec) fieldName() string { return f.Name } + +// FixedTypeValueSetFieldSpec: `&Foo Type [OPTIONAL] [DEFAULT {Set}]`. +type FixedTypeValueSetFieldSpec struct { + Span + Name string + Type Type + Optional bool + Default *ElementSet +} + +func (f *FixedTypeValueSetFieldSpec) fieldName() string { return f.Name } + +// VariableTypeValueSetFieldSpec: `&Foo &Field [OPTIONAL] [DEFAULT {Set}]`. +type VariableTypeValueSetFieldSpec struct { + Span + Name string + FieldName string + Optional bool + Default *ElementSet +} + +func (f *VariableTypeValueSetFieldSpec) fieldName() string { return f.Name } + +// ObjectFieldSpec: `&foo CLASS [OPTIONAL] [DEFAULT object]`. +type ObjectFieldSpec struct { + Span + Name string + ClassRef *TypeRef + Optional bool + Default *Object +} + +func (f *ObjectFieldSpec) fieldName() string { return f.Name } + +// ObjectSetFieldSpec: `&Foo CLASS [OPTIONAL] [DEFAULT { Set }]`. +type ObjectSetFieldSpec struct { + Span + Name string + ClassRef *TypeRef + Optional bool + Default *ObjectSet +} + +func (f *ObjectSetFieldSpec) fieldName() string { return f.Name } + +// WithSyntaxSpec is the literal-or-field stream that drives X.681 +// object literal parsing. +type WithSyntaxSpec struct { + Span + Tokens []WithSyntaxToken +} + +// WithSyntaxToken is either a literal WORD, an optional `[ ... ]` +// group, or a field reference (&Foo / &foo). We keep them as a flat +// stream; the ClassObjectParser walks it. +type WithSyntaxToken struct { + Span + Kind WithSyntaxKind + Text string // for Word / FieldRef + Group []WithSyntaxToken // for OptionalGroup +} + +// WithSyntaxKind enumerates the three template token forms. +type WithSyntaxKind int + +const ( + WSKWord WithSyntaxKind = iota + WSKFieldRef + WSKOptionalGroup +) + +// Object is the parsed body of an object literal. We retain the +// per-field settings as `(fieldRef -> Setting)`. +type Object struct { + Span + Settings []ObjectSetting +} + +// ObjectSetting is one filled-in entry inside an Object literal. +type ObjectSetting struct { + Span + FieldRef string // includes leading "&" + Type Type // either Type or Value will be non-nil (mutually exclusive) + Value Value +} + +// ObjectSet is the parsed `{ ObjectSetElement | ... }` body. +type ObjectSet struct { + Span + Root UnionElements // simplified ElementSet for object sets + Extensible bool + Extension UnionElements +} + +// UnionElements is a flat union of ObjectSetElements; intersection +// support is omitted in this MVP and parsed as a single union. +type UnionElements []ObjectSetElement + +// ObjectSetElement is one atom in an object set body. +type ObjectSetElement interface { + Node + objectSetElementNode() +} + + +// ObjectLiteralElement: a `{ ... }` object literal in an object set. +type ObjectLiteralElement struct { + ObjectSetElementBase + Object *Object +} + +// ObjectReferenceElement: bare object reference. +type ObjectReferenceElement struct { + ObjectSetElementBase + Ref *TypeRef +} + +// ObjectSetReferenceElement: reference to a named object set. +type ObjectSetReferenceElement struct { + ObjectSetElementBase + Ref *TypeRef +} + +// --------------------------------------------------------------------------- +// X.683 parameterisation +// --------------------------------------------------------------------------- + +// ParameterList is the `{ Param1, Param2, ... }` that follows a +// parameterised type/value/class/object/set name. +type ParameterList struct { + Span + Params []Parameter +} + +// Parameter is one entry inside a ParameterList, with an optional +// governor (the type the actual parameter must conform to). +type Parameter struct { + Span + Governor Type // optional + Reference string +} + +// ActualParameterList is the `{ Type1, value1, ... }` carrying the +// actuals at an instantiation site. +type ActualParameterList struct { + Span + Params []ActualParameter +} + +// ActualParameter wraps either a Type or a Value. +type ActualParameter struct { + Span + Type Type + Value Value +} + +// --------------------------------------------------------------------------- +// Diagnostics +// --------------------------------------------------------------------------- + +// Diagnostic is a parser- or resolver-produced message with a byte +// range. The legacy {Line, Column, Message} shape lives on the public +// `asn1.Diagnostic` type for API back-compat; this AST-level diagnostic +// is the one new code should use. +type Diagnostic struct { + Pos int + End int + Severity Severity + Code string + Message string +} + +// Severity classifies a diagnostic. +type Severity int + +const ( + SeverityError Severity = iota + SeverityWarning + SeverityInfo + SeverityHint +) diff --git a/internal/asn1/ast/ast_test.go b/internal/asn1/ast/ast_test.go new file mode 100644 index 00000000..ad4e51c3 --- /dev/null +++ b/internal/asn1/ast/ast_test.go @@ -0,0 +1,93 @@ +package ast + +import "testing" + +func TestNode_PosEnd(t *testing.T) { + n := Span{P: 5, E: 12} + if n.Pos() != 5 || n.End() != 12 { + t.Errorf("got Pos=%d End=%d", n.Pos(), n.End()) + } + n.SetRange(1, 9) + if n.Pos() != 1 || n.End() != 9 { + t.Errorf("after SetRange: Pos=%d End=%d", n.Pos(), n.End()) + } +} + +func TestTypeInterfaces(t *testing.T) { + var _ Type = &BuiltinType{} + var _ Type = &IntegerType{} + var _ Type = &BitStringType{} + var _ Type = &EnumeratedType{} + var _ Type = &SequenceType{} + var _ Type = &SetType{} + var _ Type = &ChoiceType{} + var _ Type = &SequenceOfType{} + var _ Type = &SetOfType{} + var _ Type = &TaggedType{} + var _ Type = &ReferencedType{} + var _ Type = &ConstrainedType{} + var _ Type = &OpenTypeFieldType{} + var _ Type = &AnyType{} +} + +func TestValueInterfaces(t *testing.T) { + var _ Value = &IntegerValue{} + var _ Value = &RealValue{} + var _ Value = &BooleanValue{} + var _ Value = &NullValue{} + var _ Value = &StringValue{} + var _ Value = &OIDValue{} + var _ Value = &ReferenceValue{} + var _ Value = &ChoiceValue{} + var _ Value = &SequenceValue{} + var _ Value = &SequenceOfValue{} +} + +func TestAssignmentInterfaces(t *testing.T) { + var _ Assignment = &TypeAssignment{Name: "X"} + var _ Assignment = &ValueAssignment{Name: "x"} + var _ Assignment = &ValueSetTypeAssignment{Name: "X"} + var _ Assignment = &ObjectClassAssignment{Name: "X"} + var _ Assignment = &ObjectAssignment{Name: "x"} + var _ Assignment = &ObjectSetAssignment{Name: "X"} +} + +func TestConstraintElementInterfaces(t *testing.T) { + var _ ConstraintElement = &SingleValueConstraint{} + var _ ConstraintElement = &ValueRangeConstraint{} + var _ ConstraintElement = &SizeConstraint{} + var _ ConstraintElement = &AlphabetConstraint{} + var _ ConstraintElement = &TypeConstraint{} + var _ ConstraintElement = &ContainedSubtype{} + var _ ConstraintElement = &PatternConstraint{} + var _ ConstraintElement = &PropertySettings{} + var _ ConstraintElement = &InnerTypeConstraint{} + var _ ConstraintElement = &TableConstraint{} + var _ ConstraintElement = &UserDefinedConstraint{} + var _ ConstraintElement = &ExceptConstraint{} + var _ ConstraintElement = &AllExceptConstraint{} +} + +func TestFieldSpecInterfaces(t *testing.T) { + var _ FieldSpec = &TypeFieldSpec{Name: "&Foo"} + var _ FieldSpec = &FixedTypeValueFieldSpec{Name: "&foo"} + var _ FieldSpec = &VariableTypeValueFieldSpec{Name: "&foo"} + var _ FieldSpec = &FixedTypeValueSetFieldSpec{Name: "&Foo"} + var _ FieldSpec = &VariableTypeValueSetFieldSpec{Name: "&Foo"} + var _ FieldSpec = &ObjectFieldSpec{Name: "&foo"} + var _ FieldSpec = &ObjectSetFieldSpec{Name: "&Foo"} +} + +func TestAssignmentName(t *testing.T) { + a := &TypeAssignment{Name: "Foo"} + if AssignmentName(a) != "Foo" { + t.Errorf("got %q", AssignmentName(a)) + } +} + +func TestFieldName(t *testing.T) { + f := &TypeFieldSpec{Name: "&Foo"} + if FieldName(f) != "&Foo" { + t.Errorf("got %q", FieldName(f)) + } +} diff --git a/internal/asn1/class/class.go b/internal/asn1/class/class.go new file mode 100644 index 00000000..6ee77ec6 --- /dev/null +++ b/internal/asn1/class/class.go @@ -0,0 +1,394 @@ +// Package class implements the X.681 information object class +// machinery used by the rest of the ASN.1 frontend: +// +// - WithSyntaxParser walks an object literal body against the +// declaring class's WITH SYNTAX template, returning (fieldRef -> +// value/type) settings. This is the Go equivalent of asn1c's +// `asn1fix_cws.c` driver and vanadium's ClassObjectParser. +// +// - ObjectSetResolver flattens an ObjectSet expression (literals, +// references, unions, ALL EXCEPT) into a sequence of resolved +// objects. +// +// - ComponentRelationSolver narrows an open-type field given a +// `({Set}{@discriminator})` table constraint, producing the +// concrete CHOICE alternatives that the lowering pass turns into +// a TTCN-3 union. +package class + +import ( + "fmt" + "strings" + + "github.com/nokia/ntt/internal/asn1/ast" + "github.com/nokia/ntt/internal/asn1/resolver" +) + +// Setting is one resolved (field, value-or-type) pair from an object +// literal. Type is non-nil for type fields; Value is non-nil for value +// fields. Mutually exclusive. +type Setting struct { + Field string // includes the leading "&" + Type ast.Type + Value ast.Value +} + +// WithSyntaxParser converts an object body into Settings, driven by a +// class's WITH SYNTAX template. +type WithSyntaxParser struct { + class *ast.ObjectClass +} + +// NewWithSyntaxParser returns a parser bound to a class definition. +func NewWithSyntaxParser(class *ast.ObjectClass) *WithSyntaxParser { + return &WithSyntaxParser{class: class} +} + +// Parse walks obj against the class's WITH SYNTAX template. If the +// template is nil, the object's settings are returned as-is. +func (p *WithSyntaxParser) Parse(obj *ast.Object) ([]Setting, []ast.Diagnostic) { + if obj == nil { + return nil, nil + } + if p.class == nil || p.class.WithSyntax == nil { + return objectAsSettings(obj), nil + } + // We replay the WITH SYNTAX template token-by-token; each WORD + // must match the corresponding source token, each &FieldRef + // captures the next syntactic chunk as the field's value/type, + // and OptionalGroups are skipped if their first WORD doesn't + // match the next source token. + var settings []Setting + var diags []ast.Diagnostic + src := flattenObjectBody(obj) + pos := 0 + _, _ = p.matchTemplate(p.class.WithSyntax.Tokens, src, &pos, &settings, &diags, true) + if pos < len(src) { + diags = append(diags, ast.Diagnostic{ + Pos: obj.Pos(), End: obj.End(), + Severity: ast.SeverityError, Code: "class.trailing-tokens", + Message: fmt.Sprintf("WITH SYNTAX did not consume all tokens (leftover: %s)", joinChunks(src[pos:])), + }) + } + return settings, diags +} + +// matchTemplate advances *pos through src as it consumes template +// tokens. If required is false (we're inside an OptionalGroup), the +// match returns success only if every required template token had a +// corresponding source chunk; otherwise it rolls back *pos. +func (p *WithSyntaxParser) matchTemplate(tmpl []ast.WithSyntaxToken, src chunkSlice, pos *int, out *[]Setting, diags *[]ast.Diagnostic, required bool) (bool, []Setting) { + startPos := *pos + local := append([]Setting(nil), (*out)...) + for _, t := range tmpl { + switch t.Kind { + case ast.WSKWord: + if *pos >= len(src) || !src[*pos].isWord() || !strings.EqualFold(src[*pos].word, t.Text) { + if !required { + *pos = startPos + return false, nil + } + *diags = append(*diags, ast.Diagnostic{ + Pos: t.Pos(), End: t.End(), + Severity: ast.SeverityError, Code: "class.word-mismatch", + Message: fmt.Sprintf("expected literal word %q, got %s", t.Text, src.describe(*pos)), + }) + return false, nil + } + *pos++ + case ast.WSKFieldRef: + if *pos >= len(src) { + if !required { + *pos = startPos + return false, nil + } + *diags = append(*diags, ast.Diagnostic{ + Pos: t.Pos(), End: t.End(), + Severity: ast.SeverityError, Code: "class.missing-field", + Message: fmt.Sprintf("expected value for field %q, ran off end of object body", t.Text), + }) + return false, nil + } + ch := src[*pos] + *pos++ + s := Setting{Field: t.Text} + if ch.tp != nil { + s.Type = ch.tp + } else { + s.Value = ch.value + } + local = append(local, s) + case ast.WSKOptionalGroup: + ok, group := p.matchTemplate(t.Group, src, pos, &local, diags, false) + if ok { + local = group + } + } + } + *out = local + return true, local +} + +// chunk is one syntactic atom from the source object body: either a +// bare word, an embedded type, or an embedded value. +type chunk struct { + word string + tp ast.Type + value ast.Value +} + +type chunkSlice []chunk + +func (c chunk) isWord() bool { return c.tp == nil && c.value == nil } + +func (s chunkSlice) describe(i int) string { + if i >= len(s) { + return "end of object" + } + c := s[i] + if c.isWord() { + return fmt.Sprintf("word %q", c.word) + } + if c.tp != nil { + return fmt.Sprintf("type at %d..%d", c.tp.Pos(), c.tp.End()) + } + return fmt.Sprintf("value at %d..%d", c.value.Pos(), c.value.End()) +} + +func joinChunks(s chunkSlice) string { + var b strings.Builder + for i, c := range s { + if i > 0 { + b.WriteString(" ") + } + if c.isWord() { + b.WriteString(c.word) + } else if c.tp != nil { + b.WriteString("") + } else { + b.WriteString("") + } + } + return b.String() +} + +// flattenObjectBody turns an object literal's settings into the linear +// token stream the template matcher consumes. Word-only entries get +// promoted from the field text (Phase 4 parser already captured them +// as Settings with empty field), values map to value chunks, types to +// type chunks. +func flattenObjectBody(obj *ast.Object) chunkSlice { + out := make(chunkSlice, 0, len(obj.Settings)) + for _, s := range obj.Settings { + // If the parser successfully attached a &Field to a value or + // type, that's a real "field+value" setting. We treat the + // field reference itself as a word so the template's + // matching WSKFieldRef sees the value/type as the next chunk. + switch { + case s.Type != nil: + out = append(out, chunk{tp: s.Type}) + case s.Value != nil: + out = append(out, chunk{value: s.Value}) + case s.FieldRef != "": + out = append(out, chunk{word: s.FieldRef}) + } + } + return out +} + +// objectAsSettings returns the raw settings when the class declares no +// WITH SYNTAX template (i.e. the object body is already field-tagged). +func objectAsSettings(obj *ast.Object) []Setting { + out := make([]Setting, 0, len(obj.Settings)) + for _, s := range obj.Settings { + out = append(out, Setting{Field: s.FieldRef, Type: s.Type, Value: s.Value}) + } + return out +} + +// --------------------------------------------------------------------------- +// ObjectSetResolver +// --------------------------------------------------------------------------- + +// ObjectSetResolver expands an ObjectSet expression into a flat list +// of Settings (one per resolved object). It chases object references +// and object-set references via the basket; cross-module references +// work the same way as for types. +type ObjectSetResolver struct { + basket *resolver.Basket + class *ast.ObjectClass +} + +// NewObjectSetResolver returns a resolver bound to a class. +func NewObjectSetResolver(basket *resolver.Basket, class *ast.ObjectClass) *ObjectSetResolver { + return &ObjectSetResolver{basket: basket, class: class} +} + +// ResolvedObject is one object expanded from a set. SourceObject is +// the AST node it originated from (for go-to-definition). +type ResolvedObject struct { + Source ast.Node + Settings []Setting +} + +// Resolve walks set, returning every concrete object it transitively +// references. Recursive references are detected and reported once. +func (r *ObjectSetResolver) Resolve(scope *resolver.Scope, set *ast.ObjectSet) ([]ResolvedObject, []ast.Diagnostic) { + if set == nil { + return nil, nil + } + seen := make(map[string]bool) + out, diags := r.expand(scope, set.Root, seen) + if set.Extensible { + moreOut, moreDiags := r.expand(scope, set.Extension, seen) + out = append(out, moreOut...) + diags = append(diags, moreDiags...) + } + return out, diags +} + +func (r *ObjectSetResolver) expand(scope *resolver.Scope, elements ast.UnionElements, seen map[string]bool) ([]ResolvedObject, []ast.Diagnostic) { + var out []ResolvedObject + var diags []ast.Diagnostic + for _, el := range elements { + switch el := el.(type) { + case *ast.ObjectLiteralElement: + settings, d := NewWithSyntaxParser(r.class).Parse(el.Object) + diags = append(diags, d...) + out = append(out, ResolvedObject{Source: el, Settings: settings}) + case *ast.ObjectReferenceElement: + obj, d := r.lookupObject(scope, el.Ref, seen) + diags = append(diags, d...) + if obj != nil { + out = append(out, *obj) + } + case *ast.ObjectSetReferenceElement: + more, d := r.lookupObjectSet(scope, el.Ref, seen) + diags = append(diags, d...) + out = append(out, more...) + } + } + return out, diags +} + +func (r *ObjectSetResolver) lookupObject(scope *resolver.Scope, ref *ast.TypeRef, seen map[string]bool) (*ResolvedObject, []ast.Diagnostic) { + if ref == nil { + return nil, nil + } + target := scope + if ref.Module != "" { + target = r.basket.Get(ref.Module) + if target == nil { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "class.unknown-module", + Message: fmt.Sprintf("unknown module %q in object reference", ref.Module), + }} + } + } + sym := target.Lookup(resolver.NsObject, ref.Name) + if sym == nil { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "class.unknown-object", + Message: fmt.Sprintf("unknown object %q", ref.Name), + }} + } + oa, ok := sym.Definition.(*ast.ObjectAssignment) + if !ok { + return nil, nil + } + key := target.Module().Identifier.Name + "." + ref.Name + if seen[key] { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "class.cycle", + Message: fmt.Sprintf("recursive object reference %q", ref.Name), + }} + } + seen[key] = true + settings, diags := NewWithSyntaxParser(r.class).Parse(oa.Object) + return &ResolvedObject{Source: oa, Settings: settings}, diags +} + +func (r *ObjectSetResolver) lookupObjectSet(scope *resolver.Scope, ref *ast.TypeRef, seen map[string]bool) ([]ResolvedObject, []ast.Diagnostic) { + if ref == nil { + return nil, nil + } + target := scope + if ref.Module != "" { + target = r.basket.Get(ref.Module) + if target == nil { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "class.unknown-module", + Message: fmt.Sprintf("unknown module %q in object set reference", ref.Module), + }} + } + } + sym := target.Lookup(resolver.NsObjectSet, ref.Name) + if sym == nil { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "class.unknown-object-set", + Message: fmt.Sprintf("unknown object set %q", ref.Name), + }} + } + osa, ok := sym.Definition.(*ast.ObjectSetAssignment) + if !ok || osa.Set == nil { + return nil, nil + } + key := target.Module().Identifier.Name + ".set." + ref.Name + if seen[key] { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "class.cycle", + Message: fmt.Sprintf("recursive object set reference %q", ref.Name), + }} + } + seen[key] = true + out, diags := r.expand(target, osa.Set.Root, seen) + if osa.Set.Extensible { + more, moreD := r.expand(target, osa.Set.Extension, seen) + out = append(out, more...) + diags = append(diags, moreD...) + } + return out, diags +} + +// --------------------------------------------------------------------------- +// Component-relation solver +// --------------------------------------------------------------------------- + +// Alternative is one resolved CHOICE alternative produced by expanding +// an open-type field via a component-relation constraint. +type Alternative struct { + Discriminator ast.Value // the value of @field for this object + Type ast.Type // the resolved open-type field's actual type +} + +// Solve looks at the constraint `({Set}{@discriminator})` attached to +// an open-type field and produces one Alternative per resolved object, +// using the field name openField to pull the type out of each object. +// +// objects is the result of an earlier ObjectSetResolver.Resolve call; +// passing them in keeps this function pure. +func Solve(objects []ResolvedObject, openField, discriminator string) []Alternative { + out := make([]Alternative, 0, len(objects)) + for _, o := range objects { + var disc ast.Value + var openT ast.Type + for _, s := range o.Settings { + switch s.Field { + case discriminator: + disc = s.Value + case openField: + openT = s.Type + } + } + if openT != nil { + out = append(out, Alternative{Discriminator: disc, Type: openT}) + } + } + return out +} diff --git a/internal/asn1/class/class_test.go b/internal/asn1/class/class_test.go new file mode 100644 index 00000000..c4a40efb --- /dev/null +++ b/internal/asn1/class/class_test.go @@ -0,0 +1,164 @@ +package class_test + +import ( + "testing" + + "github.com/nokia/ntt/internal/asn1" + "github.com/nokia/ntt/internal/asn1/ast" + "github.com/nokia/ntt/internal/asn1/class" + "github.com/nokia/ntt/internal/asn1/resolver" +) + +func parseModule(t *testing.T, src string) (*ast.Module, *resolver.Basket, *resolver.Scope) { + t.Helper() + m := asn1.ParseModule([]byte(src)) + b := resolver.NewBasket() + s := b.Add(m) + resolver.Resolve(b, m) + return m, b, s +} + +func TestWithSyntaxParser_NoTemplate(t *testing.T) { + // Object body without a WITH SYNTAX template: settings are + // returned as-is. + src := `M DEFINITIONS ::= BEGIN +ERROR ::= CLASS { &code INTEGER, &name PrintableString } +e1 ERROR ::= { &code 42, &name "boom" } +END` + m, _, _ := parseModule(t, src) + if len(m.Assignments) < 2 { + t.Fatalf("need two assignments, got %d", len(m.Assignments)) + } + // The second assignment is currently classified as a value + // assignment because the parser doesn't yet distinguish object + // vs value assignments (Phase 9 will). For now use the object's + // settings directly via a synthesised Object. + obj := &ast.Object{Settings: []ast.ObjectSetting{ + {FieldRef: "&code", Value: &ast.IntegerValue{Text: "42"}}, + {FieldRef: "&name", Value: &ast.StringValue{Kind: ast.StringCString, Text: `"boom"`}}, + }} + oc := m.Assignments[0].(*ast.ObjectClassAssignment).Class + p := class.NewWithSyntaxParser(oc) + settings, diags := p.Parse(obj) + if len(diags) > 0 { + t.Fatalf("diags: %+v", diags) + } + if len(settings) != 2 { + t.Fatalf("got %d settings", len(settings)) + } + if settings[0].Field != "&code" || settings[1].Field != "&name" { + t.Errorf("settings: %+v", settings) + } +} + +func TestWithSyntaxParser_WithSyntaxTemplate(t *testing.T) { + src := `M DEFINITIONS ::= BEGIN +ERROR ::= CLASS { + &code INTEGER, + &name PrintableString +} WITH SYNTAX { + CODE &code NAME &name +} +END` + m, _, _ := parseModule(t, src) + oc := m.Assignments[0].(*ast.ObjectClassAssignment).Class + // Source object body: CODE 42 NAME "boom" mapped to chunks. + obj := &ast.Object{Settings: []ast.ObjectSetting{ + {FieldRef: "CODE"}, + {Value: &ast.IntegerValue{Text: "42"}}, + {FieldRef: "NAME"}, + {Value: &ast.StringValue{Kind: ast.StringCString, Text: `"boom"`}}, + }} + p := class.NewWithSyntaxParser(oc) + settings, diags := p.Parse(obj) + if len(diags) > 0 { + t.Fatalf("diags: %+v", diags) + } + if len(settings) != 2 { + t.Fatalf("got %d settings", len(settings)) + } + if settings[0].Field != "&code" || settings[0].Value == nil { + t.Errorf("expected &code = value, got %+v", settings[0]) + } + if settings[1].Field != "&name" || settings[1].Value == nil { + t.Errorf("expected &name = value, got %+v", settings[1]) + } +} + +func TestWithSyntaxParser_OptionalGroupSkipped(t *testing.T) { + src := `M DEFINITIONS ::= BEGIN +ERROR ::= CLASS { + &code INTEGER, + &desc PrintableString OPTIONAL +} WITH SYNTAX { CODE &code [DESC &desc] } +END` + m, _, _ := parseModule(t, src) + oc := m.Assignments[0].(*ast.ObjectClassAssignment).Class + // Source body omits the [DESC &desc] optional group entirely. + obj := &ast.Object{Settings: []ast.ObjectSetting{ + {FieldRef: "CODE"}, + {Value: &ast.IntegerValue{Text: "1"}}, + }} + p := class.NewWithSyntaxParser(oc) + settings, diags := p.Parse(obj) + if len(diags) > 0 { + t.Fatalf("diags: %+v", diags) + } + if len(settings) != 1 || settings[0].Field != "&code" { + t.Errorf("settings: %+v", settings) + } +} + +func TestObjectSetResolver_ExpandsLiterals(t *testing.T) { + src := `M DEFINITIONS ::= BEGIN +ERROR ::= CLASS { &code INTEGER, &name PrintableString } +END` + m, b, scope := parseModule(t, src) + oc := m.Assignments[0].(*ast.ObjectClassAssignment).Class + + set := &ast.ObjectSet{Root: ast.UnionElements{ + &ast.ObjectLiteralElement{Object: &ast.Object{Settings: []ast.ObjectSetting{ + {FieldRef: "&code", Value: &ast.IntegerValue{Text: "1"}}, + {FieldRef: "&name", Value: &ast.StringValue{Kind: ast.StringCString, Text: `"a"`}}, + }}}, + &ast.ObjectLiteralElement{Object: &ast.Object{Settings: []ast.ObjectSetting{ + {FieldRef: "&code", Value: &ast.IntegerValue{Text: "2"}}, + {FieldRef: "&name", Value: &ast.StringValue{Kind: ast.StringCString, Text: `"b"`}}, + }}}, + }} + + r := class.NewObjectSetResolver(b, oc) + out, diags := r.Resolve(scope, set) + if len(diags) > 0 { + t.Fatalf("diags: %+v", diags) + } + if len(out) != 2 { + t.Fatalf("got %d resolved objects", len(out)) + } + if got := out[0].Settings[0].Value.(*ast.IntegerValue).Text; got != "1" { + t.Errorf("first object code: %q", got) + } +} + +func TestSolve_ComponentRelation(t *testing.T) { + objs := []class.ResolvedObject{ + {Settings: []class.Setting{ + {Field: "&code", Value: &ast.IntegerValue{Text: "1"}}, + {Field: "&Type", Type: &ast.IntegerType{}}, + }}, + {Settings: []class.Setting{ + {Field: "&code", Value: &ast.IntegerValue{Text: "2"}}, + {Field: "&Type", Type: &ast.BuiltinType{Kind: ast.PrintableString, Name: "PrintableString"}}, + }}, + } + alts := class.Solve(objs, "&Type", "&code") + if len(alts) != 2 { + t.Fatalf("got %d alternatives", len(alts)) + } + if _, ok := alts[0].Type.(*ast.IntegerType); !ok { + t.Errorf("alt 0 type: %T", alts[0].Type) + } + if bt, ok := alts[1].Type.(*ast.BuiltinType); !ok || bt.Kind != ast.PrintableString { + t.Errorf("alt 1 type: %v", alts[1].Type) + } +} diff --git a/internal/asn1/fixture_test.go b/internal/asn1/fixture_test.go new file mode 100644 index 00000000..488272a0 --- /dev/null +++ b/internal/asn1/fixture_test.go @@ -0,0 +1,60 @@ +package asn1 + +import ( + "os" + "strings" + "testing" + + "github.com/nokia/ntt/internal/asn1/ast" +) + +// TestFixture_SampleASN1 exercises the parser end-to-end against a +// non-trivial ASN.1 module that hits the features the new frontend +// should support. The intent is to catch regressions; the +// per-production tests in parser_test.go and parser_class_test.go are +// the source of truth for any single feature. +func TestFixture_SampleASN1(t *testing.T) { + src, err := os.ReadFile("testdata/sample.asn") + if err != nil { + t.Fatalf("read fixture: %v", err) + } + m := ParseModule(src) + if m == nil || m.Identifier.Name != "RRC-Sample" { + t.Fatalf("module name: %+v", m.Identifier) + } + if m.Tagging != ast.TagsAutomatic { + t.Errorf("tagging: %v want AUTOMATIC", m.Tagging) + } + if m.Extensible { + t.Errorf("module should not be extensibility implied") + } + if len(m.Imports) != 1 || m.Imports[0].From != "Common" { + t.Errorf("imports: %+v", m.Imports) + } + if m.Exports == nil || len(m.Exports.Symbols) != 1 || m.Exports.Symbols[0] != "Status" { + t.Errorf("exports: %+v", m.Exports) + } + + want := []string{ + "Status", "Counter", "Person", "Names", "Tagged", + "Pair", "IntPair", "Reply", "maxRetries", + } + got := map[string]bool{} + for _, a := range m.Assignments { + got[ast.AssignmentName(a)] = true + } + for _, n := range want { + if !got[n] { + t.Errorf("missing assignment %q in %+v", n, got) + } + } + + // No diagnostics from such a clean fixture. + for _, d := range m.Diagnostics { + t.Errorf("unexpected diag: %s", d.Message) + } + + if !strings.Contains(string(src), "EXPORTS") { + t.Fatal("fixture lost EXPORTS clause unexpectedly") + } +} diff --git a/internal/asn1/integration_test.go b/internal/asn1/integration_test.go new file mode 100644 index 00000000..76adf86b --- /dev/null +++ b/internal/asn1/integration_test.go @@ -0,0 +1,104 @@ +package asn1_test + +import ( + "os" + "strings" + "testing" + + "github.com/nokia/ntt/internal/asn1" + "github.com/nokia/ntt/internal/asn1/ast" + "github.com/nokia/ntt/internal/asn1/param" + "github.com/nokia/ntt/internal/asn1/resolver" + "github.com/nokia/ntt/internal/asn1/transform" +) + +// TestEndToEnd_ParseResolveLower drives the full pipeline: +// +// parser -> AST +// AST -> Basket / Resolver (semantic check) +// AST -> Transform (lower to TTCN-3 source) +// source -> ttcn3.Parse (re-parsed by existing parser) +// +// It catches regressions where one layer produces output the next +// can't handle. +func TestEndToEnd_ParseResolveLower(t *testing.T) { + src, err := os.ReadFile("testdata/sample.asn") + if err != nil { + t.Fatalf("read fixture: %v", err) + } + m := asn1.ParseModule(src) + if m == nil { + t.Fatal("nil module") + } + + b := resolver.NewBasket() + b.Add(m) + resolver.Resolve(b, m) + + r := transform.LowerModule(m) + if r == nil || r.Source == "" { + t.Fatal("transform produced empty source") + } + if r.Tree == nil { + t.Fatal("re-parsed tree was nil") + } + + for _, want := range []string{ + "module RRC_Sample", + "type enumerated Status", + "type record Person", + "type record of charstring Names", + "type union Reply", + } { + if !strings.Contains(r.Source, want) { + t.Errorf("lowered source missing %q\n----\n%s", want, r.Source) + } + } +} + +// TestEndToEnd_ParameterisedInstantiation exercises the parameter +// engine against the fixture's `Pair { ItemType }` template via the +// `IntPair ::= Pair { INTEGER }` use site. +func TestEndToEnd_ParameterisedInstantiation(t *testing.T) { + src, _ := os.ReadFile("testdata/sample.asn") + m := asn1.ParseModule(src) + b := resolver.NewBasket() + scope := b.Add(m) + resolver.Resolve(b, m) + + var intPair *ast.TypeAssignment + for _, a := range m.Assignments { + if ta, ok := a.(*ast.TypeAssignment); ok && ta.Name == "IntPair" { + intPair = ta + break + } + } + if intPair == nil { + t.Fatal("IntPair assignment not found") + } + rt, ok := intPair.Type.(*ast.ReferencedType) + if !ok { + t.Fatalf("IntPair body: %T want *ReferencedType", intPair.Type) + } + if rt.Actuals == nil { + t.Fatal("IntPair actuals are nil") + } + + in := param.NewInstantiator(b) + out, diags := in.Instantiate(scope, rt.Ref, rt.Actuals) + if len(diags) > 0 { + t.Fatalf("diags: %+v", diags) + } + seq, ok := out.(*ast.SequenceType) + if !ok { + t.Fatalf("instantiation produced %T want *SequenceType", out) + } + if len(seq.Components) != 2 { + t.Fatalf("components: %d want 2", len(seq.Components)) + } + for i, c := range seq.Components { + if _, ok := c.Type.(*ast.IntegerType); !ok { + t.Errorf("component %d: %T want *IntegerType", i, c.Type) + } + } +} diff --git a/internal/asn1/lexer.go b/internal/asn1/lexer.go new file mode 100644 index 00000000..bf75e33b --- /dev/null +++ b/internal/asn1/lexer.go @@ -0,0 +1,682 @@ +package asn1 + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +// TokenKind classifies a lexical token from an ASN.1 source file. +// +// We intentionally use one constant per punctuation symbol but collapse +// every X.680 reserved word into KEYWORD. The parser can switch on a +// token's text to recognise specific keywords; this keeps the kind +// space manageable (there are over 90 reserved words in X.680). +type TokenKind uint8 + +const ( + INVALID TokenKind = iota + EOF + + // Literals & references. + NUMBER // integer or real literal (parser distinguishes) + BSTRING // '...'B - binary string literal + HSTRING // '...'H - hexadecimal string literal + CSTRING // "..." - character string literal + TYPEREFERENCE // identifier starting with an uppercase letter + IDENTIFIER // identifier starting with a lowercase letter + AMP_REF // &Foo / &foo - object class field reference + WORD // 'WITH SYNTAX' literal word (multi-word identifiers inside WITH SYNTAX templates) + + // Reserved word - look at Token.Text for the specific keyword. + KEYWORD + + // Punctuation. + LBRACE // { + RBRACE // } + LBRACKET // [ + RBRACKET // ] + DOUBLE_LBRACKET // [[ + DOUBLE_RBRACKET // ]] + LPAREN // ( + RPAREN // ) + COMMA // , + SEMICOLON // ; + COLON // : + DOT // . + DOUBLE_DOT // .. + ELLIPSIS // ... + HYPHEN // - + PLUS // + + LESS // < + GREATER // > + EQUAL // = + BANG // ! + AT // @ + PIPE // | + SLASH // / + STAR // * + CARET // ^ + ASSIGN // ::= +) + +func (k TokenKind) String() string { + switch k { + case INVALID: + return "INVALID" + case EOF: + return "EOF" + case NUMBER: + return "NUMBER" + case BSTRING: + return "BSTRING" + case HSTRING: + return "HSTRING" + case CSTRING: + return "CSTRING" + case TYPEREFERENCE: + return "TYPEREFERENCE" + case IDENTIFIER: + return "IDENTIFIER" + case AMP_REF: + return "AMP_REF" + case WORD: + return "WORD" + case KEYWORD: + return "KEYWORD" + case LBRACE: + return "LBRACE" + case RBRACE: + return "RBRACE" + case LBRACKET: + return "LBRACKET" + case RBRACKET: + return "RBRACKET" + case DOUBLE_LBRACKET: + return "DOUBLE_LBRACKET" + case DOUBLE_RBRACKET: + return "DOUBLE_RBRACKET" + case LPAREN: + return "LPAREN" + case RPAREN: + return "RPAREN" + case COMMA: + return "COMMA" + case SEMICOLON: + return "SEMICOLON" + case COLON: + return "COLON" + case DOT: + return "DOT" + case DOUBLE_DOT: + return "DOUBLE_DOT" + case ELLIPSIS: + return "ELLIPSIS" + case HYPHEN: + return "HYPHEN" + case PLUS: + return "PLUS" + case LESS: + return "LESS" + case GREATER: + return "GREATER" + case EQUAL: + return "EQUAL" + case BANG: + return "BANG" + case AT: + return "AT" + case PIPE: + return "PIPE" + case SLASH: + return "SLASH" + case STAR: + return "STAR" + case CARET: + return "CARET" + case ASSIGN: + return "ASSIGN" + } + return fmt.Sprintf("TokenKind(%d)", k) +} + +// Token is the unit produced by the lexer. Pos and End are byte +// offsets into the source slice; End is exclusive. +type Token struct { + Kind TokenKind + Pos int + End int + Text string +} + +func (t Token) String() string { + return fmt.Sprintf("%s(%q)@%d:%d", t.Kind, t.Text, t.Pos, t.End) +} + +// Lexer scans an ASN.1 source buffer into a stream of Tokens. +// +// The lexer is independent of the parser; it can be driven by any +// front-end that wants byte-precise token positions (LSP semantic +// highlighting, code formatters, diagnostics range computation). +type Lexer struct { + src []byte + pos int + errors []Diagnostic + withSynt bool // true while inside a WITH SYNTAX template - upper-case "words" become WORD tokens +} + +// NewLexer constructs a Lexer over src. +func NewLexer(src []byte) *Lexer { + return &Lexer{src: src} +} + +// Errors returns any lexical diagnostics accumulated so far. +func (l *Lexer) Errors() []Diagnostic { return l.errors } + +// EnterWithSyntax / LeaveWithSyntax control X.681 WITH SYNTAX scanning. +// Inside a WITH SYNTAX template all-uppercase words separated by white +// space are tokenised as WORD instead of TYPEREFERENCE / KEYWORD. +func (l *Lexer) EnterWithSyntax() { l.withSynt = true } +func (l *Lexer) LeaveWithSyntax() { l.withSynt = false } + +// All scans src to EOF and returns the resulting token slice plus any +// lexical diagnostics. Useful for tests; production code should drive +// Next() in a loop to avoid the intermediate allocation. +func (l *Lexer) All() ([]Token, []Diagnostic) { + var toks []Token + for { + t := l.Next() + toks = append(toks, t) + if t.Kind == EOF { + break + } + } + return toks, l.errors +} + +// Next returns the next token. After EOF, Next continues to return EOF +// indefinitely so callers can safely lookahead past the end of input. +func (l *Lexer) Next() Token { + l.skipTrivia() + if l.pos >= len(l.src) { + return Token{Kind: EOF, Pos: l.pos, End: l.pos} + } + start := l.pos + c := l.src[l.pos] + + switch { + case c == '{': + l.pos++ + return mk(LBRACE, start, l.pos, l.src) + case c == '}': + l.pos++ + return mk(RBRACE, start, l.pos, l.src) + case c == '(': + l.pos++ + return mk(LPAREN, start, l.pos, l.src) + case c == ')': + l.pos++ + return mk(RPAREN, start, l.pos, l.src) + case c == ',': + l.pos++ + return mk(COMMA, start, l.pos, l.src) + case c == ';': + l.pos++ + return mk(SEMICOLON, start, l.pos, l.src) + case c == '|': + l.pos++ + return mk(PIPE, start, l.pos, l.src) + case c == '^': + l.pos++ + return mk(CARET, start, l.pos, l.src) + case c == '!': + l.pos++ + return mk(BANG, start, l.pos, l.src) + case c == '@': + l.pos++ + return mk(AT, start, l.pos, l.src) + case c == '+': + l.pos++ + return mk(PLUS, start, l.pos, l.src) + case c == '<': + l.pos++ + return mk(LESS, start, l.pos, l.src) + case c == '>': + l.pos++ + return mk(GREATER, start, l.pos, l.src) + case c == '=': + l.pos++ + return mk(EQUAL, start, l.pos, l.src) + case c == '/': + l.pos++ + return mk(SLASH, start, l.pos, l.src) + case c == '*': + l.pos++ + return mk(STAR, start, l.pos, l.src) + + case c == '[': + l.pos++ + if l.pos < len(l.src) && l.src[l.pos] == '[' { + l.pos++ + return mk(DOUBLE_LBRACKET, start, l.pos, l.src) + } + return mk(LBRACKET, start, l.pos, l.src) + + case c == ']': + l.pos++ + if l.pos < len(l.src) && l.src[l.pos] == ']' { + l.pos++ + return mk(DOUBLE_RBRACKET, start, l.pos, l.src) + } + return mk(RBRACKET, start, l.pos, l.src) + + case c == ':': + // ::= or bare : + if l.pos+2 < len(l.src) && l.src[l.pos+1] == ':' && l.src[l.pos+2] == '=' { + l.pos += 3 + return mk(ASSIGN, start, l.pos, l.src) + } + l.pos++ + return mk(COLON, start, l.pos, l.src) + + case c == '.': + // ... or .. or . + if l.pos+2 < len(l.src) && l.src[l.pos+1] == '.' && l.src[l.pos+2] == '.' { + l.pos += 3 + return mk(ELLIPSIS, start, l.pos, l.src) + } + if l.pos+1 < len(l.src) && l.src[l.pos+1] == '.' { + l.pos += 2 + return mk(DOUBLE_DOT, start, l.pos, l.src) + } + l.pos++ + return mk(DOT, start, l.pos, l.src) + + case c == '-': + // Bare hyphen - the "--" comment form was already consumed + // by skipTrivia, so any '-' at this point is the unary + // minus / range separator. + l.pos++ + return mk(HYPHEN, start, l.pos, l.src) + + case c == '&': + return l.readAmpRef(start) + + case c == '\'': + return l.readBhString(start) + + case c == '"': + return l.readCString(start) + + case isDigit(c): + return l.readNumber(start) + + case isLetter(c): + return l.readWordOrIdent(start) + } + + // Unknown byte - emit INVALID and advance one byte to make + // progress. The parser will see this and synchronise. + l.errorf(start, "unexpected character %q", c) + l.pos++ + return mk(INVALID, start, l.pos, l.src) +} + +// skipTrivia consumes whitespace and ASN.1 comments. +// +// Per X.680 §12.6 ASN.1 supports two comment forms: +// - Pair-style "/* ... */" with nesting. +// - Line-style "-- ... --" terminated by either another "--" or by +// a newline (whichever comes first). +func (l *Lexer) skipTrivia() { + for l.pos < len(l.src) { + c := l.src[l.pos] + switch { + case c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v': + l.pos++ + case c == '-' && l.pos+1 < len(l.src) && l.src[l.pos+1] == '-': + l.skipLineComment() + case c == '/' && l.pos+1 < len(l.src) && l.src[l.pos+1] == '*': + l.skipBlockComment() + default: + return + } + } +} + +func (l *Lexer) skipLineComment() { + l.pos += 2 // consume the opening "--" + for l.pos < len(l.src) { + c := l.src[l.pos] + if c == '\n' { + return + } + if c == '-' && l.pos+1 < len(l.src) && l.src[l.pos+1] == '-' { + l.pos += 2 + return + } + l.pos++ + } +} + +func (l *Lexer) skipBlockComment() { + start := l.pos + l.pos += 2 // "/*" + depth := 1 + for l.pos+1 < len(l.src) && depth > 0 { + switch { + case l.src[l.pos] == '/' && l.src[l.pos+1] == '*': + depth++ + l.pos += 2 + case l.src[l.pos] == '*' && l.src[l.pos+1] == '/': + depth-- + l.pos += 2 + default: + l.pos++ + } + } + if depth != 0 { + l.errorf(start, "unterminated block comment") + } +} + +// readAmpRef scans a class field reference such as `&Foo` (typefield) +// or `&foo` (valuefield). The leading '&' is part of the token text. +func (l *Lexer) readAmpRef(start int) Token { + l.pos++ // consume '&' + if l.pos < len(l.src) && isLetter(l.src[l.pos]) { + l.readIdentBody() + return mk(AMP_REF, start, l.pos, l.src) + } + l.errorf(start, "expected identifier after '&'") + return mk(INVALID, start, l.pos, l.src) +} + +// readBhString scans either a binary ('01010'B) or hex ('AF01'H) string +// literal. The body is whitespace-tolerant per X.680 §11.10/§11.12 - +// inner whitespace is preserved verbatim in the token text so callers +// can re-parse if they care about the canonical value. +func (l *Lexer) readBhString(start int) Token { + l.pos++ // consume opening quote + for l.pos < len(l.src) && l.src[l.pos] != '\'' { + l.pos++ + } + if l.pos >= len(l.src) { + l.errorf(start, "unterminated b/h-string literal") + return mk(INVALID, start, l.pos, l.src) + } + l.pos++ // consume closing quote + if l.pos >= len(l.src) { + l.errorf(start, "expected 'B' or 'H' suffix after string literal") + return mk(INVALID, start, l.pos, l.src) + } + switch l.src[l.pos] { + case 'B', 'b': + l.pos++ + return mk(BSTRING, start, l.pos, l.src) + case 'H', 'h': + l.pos++ + return mk(HSTRING, start, l.pos, l.src) + } + l.errorf(start, "expected 'B' or 'H' suffix after string literal") + return mk(INVALID, start, l.pos, l.src) +} + +// readCString scans a "..." character string literal. Doubled inner +// quotes ("") are an escaped quote per X.680 §11.14. +func (l *Lexer) readCString(start int) Token { + l.pos++ // consume opening quote + for l.pos < len(l.src) { + if l.src[l.pos] == '"' { + if l.pos+1 < len(l.src) && l.src[l.pos+1] == '"' { + l.pos += 2 + continue + } + l.pos++ // consume closing quote + return mk(CSTRING, start, l.pos, l.src) + } + l.pos++ + } + l.errorf(start, "unterminated cstring literal") + return mk(INVALID, start, l.pos, l.src) +} + +// readNumber scans an integer or real literal. Reals follow X.680 +// §11.8: digits, optional '.', optional 'eE'-exponent with optional +// sign. We never accept a leading sign - the parser handles unary +// minus via the HYPHEN token. +func (l *Lexer) readNumber(start int) Token { + for l.pos < len(l.src) && isDigit(l.src[l.pos]) { + l.pos++ + } + // Fractional part - must not be `..` which is the range operator. + if l.pos < len(l.src) && l.src[l.pos] == '.' && + !(l.pos+1 < len(l.src) && l.src[l.pos+1] == '.') { + l.pos++ + for l.pos < len(l.src) && isDigit(l.src[l.pos]) { + l.pos++ + } + } + // Exponent. + if l.pos < len(l.src) && (l.src[l.pos] == 'e' || l.src[l.pos] == 'E') { + l.pos++ + if l.pos < len(l.src) && (l.src[l.pos] == '+' || l.src[l.pos] == '-') { + l.pos++ + } + for l.pos < len(l.src) && isDigit(l.src[l.pos]) { + l.pos++ + } + } + return mk(NUMBER, start, l.pos, l.src) +} + +// readWordOrIdent scans an identifier, type reference, keyword or a +// WITH SYNTAX template word. Identifiers in ASN.1 may contain hyphens +// but not a trailing hyphen, and a "--" mid-identifier introduces a +// line comment that terminates the current identifier early. +func (l *Lexer) readWordOrIdent(start int) Token { + firstUpper := isUpper(l.src[l.pos]) + l.readIdentBody() + text := string(l.src[start:l.pos]) + + // Strip a trailing hyphen (X.680 forbids it - report and trim so + // the parser sees a clean identifier). + if strings.HasSuffix(text, "-") { + l.errorf(start, "identifier %q ends with hyphen", text) + l.pos-- + text = text[:len(text)-1] + } + + if firstUpper && isKeyword(text) { + // Inside a WITH SYNTAX template, even all-uppercase words + // like INTEGER or SEQUENCE are *literals*, not keywords. + if l.withSynt && isAllUpper(text) { + return Token{Kind: WORD, Pos: start, End: l.pos, Text: text} + } + return Token{Kind: KEYWORD, Pos: start, End: l.pos, Text: text} + } + + if firstUpper { + // Could be a type reference (mixed case) or a WITH SYNTAX + // word (all-upper). Disambiguate by context. + if l.withSynt && isAllUpper(text) { + return Token{Kind: WORD, Pos: start, End: l.pos, Text: text} + } + return Token{Kind: TYPEREFERENCE, Pos: start, End: l.pos, Text: text} + } + return Token{Kind: IDENTIFIER, Pos: start, End: l.pos, Text: text} +} + +// readIdentBody consumes the longest run starting at l.pos that forms +// a valid ASN.1 identifier body, stopping before a "--" comment marker. +func (l *Lexer) readIdentBody() { + for l.pos < len(l.src) { + c := l.src[l.pos] + if c == '-' && l.pos+1 < len(l.src) && l.src[l.pos+1] == '-' { + // "--" starts a comment - end of identifier. + return + } + if isIdentBody(c) { + l.pos++ + continue + } + return + } +} + +func (l *Lexer) errorf(pos int, format string, args ...interface{}) { + l.errors = append(l.errors, Diagnostic{ + Line: 1, // we keep the line/col deprecated form for now + Column: pos, + Message: fmt.Sprintf(format, args...), + }) +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +func mk(k TokenKind, start, end int, src []byte) Token { + return Token{Kind: k, Pos: start, End: end, Text: string(src[start:end])} +} + +func isDigit(b byte) bool { return b >= '0' && b <= '9' } +func isUpper(b byte) bool { return b >= 'A' && b <= 'Z' } +func isLower(b byte) bool { return b >= 'a' && b <= 'z' } + +func isLetter(b byte) bool { + if isUpper(b) || isLower(b) { + return true + } + if b < utf8.RuneSelf { + return false + } + // Non-ASCII letters are allowed in some ASN.1 dialects via + // utf8String - decode and ask unicode. + r, _ := utf8.DecodeRune([]byte{b}) + return unicode.IsLetter(r) +} + +func isIdentBody(b byte) bool { + return isLetter(b) || isDigit(b) || b == '-' +} + +func isAllUpper(s string) bool { + for i := 0; i < len(s); i++ { + c := s[i] + if c == '-' || isDigit(c) { + continue + } + if !isUpper(c) { + return false + } + } + return true +} + +// keywordSet lists every X.680/X.681/X.682/X.683 reserved word. The +// list is exhaustive on purpose - the parser relies on this membership +// test to decide whether an upper-case identifier is a type reference +// or a keyword. +var keywordSet = map[string]bool{ + "ABSENT": true, + "ABSTRACT-SYNTAX": true, + "ALL": true, + "APPLICATION": true, + "AUTOMATIC": true, + "BEGIN": true, + "BIT": true, + "BMPString": true, + "BOOLEAN": true, + "BY": true, + "CHARACTER": true, + "CHOICE": true, + "CLASS": true, + "COMPONENT": true, + "COMPONENTS": true, + "CONSTRAINED": true, + "CONTAINING": true, + "DATE": true, + "DATE-TIME": true, + "DEFAULT": true, + "DEFINITIONS": true, + "DURATION": true, + "EMBEDDED": true, + "ENCODED": true, + "ENCODING-CONTROL": true, + "END": true, + "ENUMERATED": true, + "EXCEPT": true, + "EXPLICIT": true, + "EXPORTS": true, + "EXTENSIBILITY": true, + "EXTERNAL": true, + "FALSE": true, + "FROM": true, + "GeneralizedTime": true, + "GeneralString": true, + "GraphicString": true, + "IA5String": true, + "IDENTIFIER": true, + "IMPLICIT": true, + "IMPLIED": true, + "IMPORTS": true, + "INCLUDES": true, + "INSTANCE": true, + "INSTRUCTIONS": true, + "INTEGER": true, + "INTERSECTION": true, + "ISO646String": true, + "MAX": true, + "MIN": true, + "MINUS-INFINITY": true, + "NOT-A-NUMBER": true, + "NULL": true, + "NumericString": true, + "OBJECT": true, + "ObjectDescriptor": true, + "OCTET": true, + "OF": true, + "OID-IRI": true, + "OPTIONAL": true, + "PATTERN": true, + "PDV": true, + "PLUS-INFINITY": true, + "PRESENT": true, + "PrintableString": true, + "PRIVATE": true, + "REAL": true, + "RELATIVE-OID": true, + "RELATIVE-OID-IRI": true, + "SEQUENCE": true, + "SET": true, + "SETTINGS": true, + "SIZE": true, + "STRING": true, + "SYNTAX": true, + "T61String": true, + "TAGS": true, + "TeletexString": true, + "TIME": true, + "TIME-OF-DAY": true, + "TRUE": true, + "TYPE-IDENTIFIER": true, + "UNION": true, + "UNIQUE": true, + "UNIVERSAL": true, + "UniversalString": true, + "UTCTime": true, + "UTF8String": true, + "VideotexString": true, + "VisibleString": true, + "WITH": true, +} + +// IsKeyword reports whether s is an X.680/X.681/X.682/X.683 reserved +// word. Exported so other packages (notably the lowering pass, which +// needs to know whether a TTCN-3 identifier collides with an ASN.1 +// keyword) can share the same lookup table. +func IsKeyword(s string) bool { return keywordSet[s] } + +func isKeyword(s string) bool { return keywordSet[s] } diff --git a/internal/asn1/lexer_test.go b/internal/asn1/lexer_test.go new file mode 100644 index 00000000..202cad79 --- /dev/null +++ b/internal/asn1/lexer_test.go @@ -0,0 +1,228 @@ +package asn1 + +import ( + "strings" + "testing" +) + +func lex(t *testing.T, src string) []Token { + t.Helper() + l := NewLexer([]byte(src)) + toks, diags := l.All() + for _, d := range diags { + t.Logf("lex diag: %s", d.Message) + } + return toks +} + +func kinds(toks []Token) []TokenKind { + out := make([]TokenKind, 0, len(toks)) + for _, t := range toks { + out = append(out, t.Kind) + } + return out +} + +func TestLexer_BasicPunctuation(t *testing.T) { + toks := lex(t, "{ } [ ] ( ) , ; : .. ... ::= [[ ]]") + want := []TokenKind{ + LBRACE, RBRACE, + LBRACKET, RBRACKET, + LPAREN, RPAREN, + COMMA, SEMICOLON, COLON, + DOUBLE_DOT, ELLIPSIS, + ASSIGN, + DOUBLE_LBRACKET, DOUBLE_RBRACKET, + EOF, + } + got := kinds(toks) + if len(got) != len(want) { + t.Fatalf("got %d tokens, want %d: %v", len(got), len(want), got) + } + for i := range want { + if got[i] != want[i] { + t.Errorf("token %d: got %s, want %s", i, got[i], want[i]) + } + } +} + +func TestLexer_Keywords(t *testing.T) { + toks := lex(t, "BEGIN END SEQUENCE OF INTEGER BOOLEAN CHOICE") + for i, k := range []string{"BEGIN", "END", "SEQUENCE", "OF", "INTEGER", "BOOLEAN", "CHOICE"} { + if toks[i].Kind != KEYWORD { + t.Errorf("token %d: kind = %s, want KEYWORD", i, toks[i].Kind) + } + if toks[i].Text != k { + t.Errorf("token %d: text = %q, want %q", i, toks[i].Text, k) + } + } + if toks[len(toks)-1].Kind != EOF { + t.Errorf("missing EOF terminator") + } +} + +func TestLexer_TypeRefVsKeyword(t *testing.T) { + toks := lex(t, "MyType SEQUENCE myValue MY-TYPE") + if toks[0].Kind != TYPEREFERENCE || toks[0].Text != "MyType" { + t.Errorf("MyType: got %v", toks[0]) + } + if toks[1].Kind != KEYWORD || toks[1].Text != "SEQUENCE" { + t.Errorf("SEQUENCE: got %v", toks[1]) + } + if toks[2].Kind != IDENTIFIER || toks[2].Text != "myValue" { + t.Errorf("myValue: got %v", toks[2]) + } + if toks[3].Kind != TYPEREFERENCE || toks[3].Text != "MY-TYPE" { + t.Errorf("MY-TYPE: got %v", toks[3]) + } +} + +func TestLexer_HyphenInsideIdent(t *testing.T) { + toks := lex(t, "RRC-PDU-Definitions") + if toks[0].Kind != TYPEREFERENCE || toks[0].Text != "RRC-PDU-Definitions" { + t.Errorf("got %v", toks[0]) + } +} + +func TestLexer_DoubleHyphenEndsIdent(t *testing.T) { + // "Foo--bar" should tokenise as TYPEREFERENCE(Foo) then a + // line comment that swallows the rest. + toks := lex(t, "Foo--bar baz\nNext") + if toks[0].Kind != TYPEREFERENCE || toks[0].Text != "Foo" { + t.Errorf("first token: got %v", toks[0]) + } + if toks[1].Kind != TYPEREFERENCE || toks[1].Text != "Next" { + t.Errorf("second token: got %v", toks[1]) + } +} + +func TestLexer_Numbers(t *testing.T) { + toks := lex(t, "0 42 3.14 1e10 2.5E-3") + for i, want := range []string{"0", "42", "3.14", "1e10", "2.5E-3"} { + if toks[i].Kind != NUMBER { + t.Errorf("token %d: kind = %s, want NUMBER", i, toks[i].Kind) + } + if toks[i].Text != want { + t.Errorf("token %d: text = %q, want %q", i, toks[i].Text, want) + } + } +} + +func TestLexer_RangeOperatorPrecedesDot(t *testing.T) { + toks := lex(t, "1..10") + if toks[0].Kind != NUMBER || toks[0].Text != "1" { + t.Errorf("first: got %v", toks[0]) + } + if toks[1].Kind != DOUBLE_DOT { + t.Errorf("dotdot: got %v", toks[1]) + } + if toks[2].Kind != NUMBER || toks[2].Text != "10" { + t.Errorf("second: got %v", toks[2]) + } +} + +func TestLexer_Strings(t *testing.T) { + toks := lex(t, `"hello" 'AF'H '0101'B "with ""quote"" inside"`) + if toks[0].Kind != CSTRING { + t.Errorf("cstring: got %v", toks[0]) + } + if toks[1].Kind != HSTRING || toks[1].Text != "'AF'H" { + t.Errorf("hstring: got %v", toks[1]) + } + if toks[2].Kind != BSTRING || toks[2].Text != "'0101'B" { + t.Errorf("bstring: got %v", toks[2]) + } + if toks[3].Kind != CSTRING { + t.Errorf("escaped cstring: got %v", toks[3]) + } + if !strings.Contains(toks[3].Text, `""quote""`) { + t.Errorf("escaped quote text lost: %q", toks[3].Text) + } +} + +func TestLexer_AmpRef(t *testing.T) { + toks := lex(t, "&Type &id") + if toks[0].Kind != AMP_REF || toks[0].Text != "&Type" { + t.Errorf("&Type: got %v", toks[0]) + } + if toks[1].Kind != AMP_REF || toks[1].Text != "&id" { + t.Errorf("&id: got %v", toks[1]) + } +} + +func TestLexer_LineCommentTerminators(t *testing.T) { + toks := lex(t, "X -- inline -- Y\nZ -- to end of line\nW") + want := []string{"X", "Y", "Z", "W"} + for i, w := range want { + if toks[i].Text != w { + t.Errorf("token %d: %q want %q", i, toks[i].Text, w) + } + } +} + +func TestLexer_BlockComment(t *testing.T) { + toks := lex(t, "A /* outer /* nested */ still inside */ B") + if toks[0].Text != "A" || toks[1].Text != "B" { + t.Errorf("nested block comment not handled: %v", toks) + } +} + +func TestLexer_WithSyntaxWords(t *testing.T) { + l := NewLexer([]byte("&Type")) + l.EnterWithSyntax() + _, _ = l.All() + l.LeaveWithSyntax() + + l = NewLexer([]byte("CATEGORY CODE TYPE &Type")) + l.EnterWithSyntax() + toks, _ := l.All() + for i, want := range []string{"CATEGORY", "CODE", "TYPE"} { + if toks[i].Kind != WORD { + t.Errorf("token %d (%q): kind = %s, want WORD", i, want, toks[i].Kind) + } + if toks[i].Text != want { + t.Errorf("token %d: text %q want %q", i, toks[i].Text, want) + } + } + if toks[3].Kind != AMP_REF || toks[3].Text != "&Type" { + t.Errorf("AMP_REF after WORDs: got %v", toks[3]) + } +} + +func TestLexer_Assign(t *testing.T) { + toks := lex(t, "Foo ::= INTEGER") + if toks[1].Kind != ASSIGN || toks[1].Text != "::=" { + t.Errorf("got %v", toks[1]) + } +} + +func TestLexer_Ranges(t *testing.T) { + toks := lex(t, "INTEGER (0..255)") + want := []TokenKind{KEYWORD, LPAREN, NUMBER, DOUBLE_DOT, NUMBER, RPAREN, EOF} + if k := kinds(toks); !equalKinds(k, want) { + t.Errorf("got %v want %v", k, want) + } +} + +func TestLexer_TolerantOfGarbage(t *testing.T) { + l := NewLexer([]byte("@#$")) + toks, diags := l.All() + if len(toks) < 2 { + t.Fatalf("expected at least one error token + EOF, got %d", len(toks)) + } + if len(diags) == 0 { + t.Fatal("expected at least one diagnostic") + } +} + +func equalKinds(a, b []TokenKind) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/internal/asn1/param/param.go b/internal/asn1/param/param.go new file mode 100644 index 00000000..6f05ebe2 --- /dev/null +++ b/internal/asn1/param/param.go @@ -0,0 +1,266 @@ +// Package param implements ASN.1 X.683 parameterised type and value +// instantiation. Given a `Pair { ItemType }` parametric template and a +// `Pair { INTEGER }` use site, it returns a fully-substituted concrete +// `SEQUENCE { first INTEGER, second INTEGER }` type expression. +// +// Cross-module chained substitution is handled by routing reference +// lookups through a Basket (see internal/asn1/resolver). Instantiation +// results are cached by (template, hash(actuals)) so editor-driven +// re-walks don't recompute the same expansion. +package param + +import ( + "fmt" + "hash/fnv" + "sync" + + "github.com/nokia/ntt/internal/asn1/ast" + "github.com/nokia/ntt/internal/asn1/resolver" +) + +// Context binds formal parameters to actual ones. We model it as a +// linked list so nested instantiations can layer bindings without +// mutating an outer context. +type Context struct { + bindings map[string]ast.ActualParameter + parent *Context +} + +// NewContext constructs an empty binding context. +func NewContext() *Context { return &Context{bindings: make(map[string]ast.ActualParameter)} } + +// Child returns a context with bindings layered on top of this one. +func (c *Context) Child(bindings map[string]ast.ActualParameter) *Context { + return &Context{bindings: bindings, parent: c} +} + +// Lookup walks the context chain looking for a binding with the given +// formal-parameter name. +func (c *Context) Lookup(name string) (ast.ActualParameter, bool) { + for it := c; it != nil; it = it.parent { + if v, ok := it.bindings[name]; ok { + return v, true + } + } + return ast.ActualParameter{}, false +} + +// --------------------------------------------------------------------------- +// Instantiator +// --------------------------------------------------------------------------- + +// Instantiator walks parametrised template bodies, substituting formal +// parameters with their actuals and recursively expanding nested +// instantiations. +type Instantiator struct { + basket *resolver.Basket + cache sync.Map // key string -> ast.Type +} + +// NewInstantiator returns an Instantiator that resolves cross-module +// references through b. +func NewInstantiator(b *resolver.Basket) *Instantiator { + return &Instantiator{basket: b} +} + +// Instantiate returns the type produced by substituting actuals into +// the body of the parametrised assignment named by ref, looking it up +// in scope. If ref doesn't name a parametrised type or actuals is nil, +// returns the unmodified referenced type. +func (in *Instantiator) Instantiate(scope *resolver.Scope, ref *ast.TypeRef, actuals *ast.ActualParameterList) (ast.Type, []ast.Diagnostic) { + target := in.resolveTarget(scope, ref) + if target == nil { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "param.unresolved", + Message: fmt.Sprintf("cannot resolve parametric type %q", ref.Name), + }} + } + ta, ok := target.(*ast.TypeAssignment) + if !ok || ta.Params == nil { + // Not parametric. The reference resolves to the assignment's + // type as-is. + return assignmentType(target), nil + } + if actuals == nil { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "param.missing-actuals", + Message: fmt.Sprintf("parametric type %q used without actual parameters", ta.Name), + }} + } + if got, want := len(actuals.Params), len(ta.Params.Params); got != want { + return nil, []ast.Diagnostic{{ + Pos: ref.Pos(), End: ref.End(), + Severity: ast.SeverityError, Code: "param.arity-mismatch", + Message: fmt.Sprintf("parametric type %q expects %d arguments, got %d", ta.Name, want, got), + }} + } + + key := instantiationKey(ta, actuals) + if cached, ok := in.cache.Load(key); ok { + return cached.(ast.Type), nil + } + + bindings := make(map[string]ast.ActualParameter, len(ta.Params.Params)) + for i, p := range ta.Params.Params { + bindings[p.Reference] = actuals.Params[i] + } + ctx := NewContext().Child(bindings) + out := in.substType(scope, ctx, ta.Type) + in.cache.Store(key, out) + return out, nil +} + +func (in *Instantiator) resolveTarget(scope *resolver.Scope, ref *ast.TypeRef) ast.Assignment { + if ref == nil { + return nil + } + if ref.Module != "" { + if target := in.basket.Get(ref.Module); target != nil { + if sym := target.Lookup(resolver.NsType, ref.Name); sym != nil { + if a, ok := sym.Definition.(ast.Assignment); ok { + return a + } + } + } + return nil + } + if sym := scope.Lookup(resolver.NsType, ref.Name); sym != nil { + if a, ok := sym.Definition.(ast.Assignment); ok { + return a + } + } + // Follow IMPORTS - if the name was imported, look it up there. + if from := scope.ImportedFrom(ref.Name); from != "" { + if target := in.basket.Get(from); target != nil { + if sym := target.Lookup(resolver.NsType, ref.Name); sym != nil { + if a, ok := sym.Definition.(ast.Assignment); ok { + return a + } + } + } + } + return nil +} + +func assignmentType(a ast.Assignment) ast.Type { + if ta, ok := a.(*ast.TypeAssignment); ok { + return ta.Type + } + return nil +} + +// substType returns a copy of t with all formal-parameter references +// replaced by the actuals bound in ctx. References to non-parametric +// types are returned as-is. +func (in *Instantiator) substType(scope *resolver.Scope, ctx *Context, t ast.Type) ast.Type { + if t == nil { + return nil + } + switch t := t.(type) { + case *ast.ReferencedType: + if t.Ref != nil { + if ap, ok := ctx.Lookup(t.Ref.Name); ok && ap.Type != nil { + return ap.Type + } + } + if t.Actuals != nil { + // Nested parametric instantiation. + child, diags := in.Instantiate(scope, t.Ref, in.substActuals(scope, ctx, t.Actuals)) + _ = diags // chained diagnostics should be surfaced by caller via Instantiate again + if child != nil { + return child + } + } + return t + case *ast.SequenceType: + copy := *t + copy.Components = in.substComponents(scope, ctx, t.Components) + copy.Extensions = in.substExtensions(scope, ctx, t.Extensions) + return © + case *ast.SetType: + copy := *t + copy.Components = in.substComponents(scope, ctx, t.Components) + copy.Extensions = in.substExtensions(scope, ctx, t.Extensions) + return © + case *ast.ChoiceType: + copy := *t + copy.Alternatives = in.substComponents(scope, ctx, t.Alternatives) + copy.Extensions = in.substExtensions(scope, ctx, t.Extensions) + return © + case *ast.SequenceOfType: + copy := *t + copy.Element = in.substType(scope, ctx, t.Element) + return © + case *ast.SetOfType: + copy := *t + copy.Element = in.substType(scope, ctx, t.Element) + return © + case *ast.TaggedType: + copy := *t + copy.Underlying = in.substType(scope, ctx, t.Underlying) + return © + case *ast.ConstrainedType: + copy := *t + copy.Inner = in.substType(scope, ctx, t.Inner) + return © + } + return t +} + +func (in *Instantiator) substComponents(scope *resolver.Scope, ctx *Context, comps []ast.Component) []ast.Component { + if len(comps) == 0 { + return comps + } + out := make([]ast.Component, len(comps)) + for i, c := range comps { + out[i] = c + out[i].Type = in.substType(scope, ctx, c.Type) + } + return out +} + +func (in *Instantiator) substExtensions(scope *resolver.Scope, ctx *Context, exts []ast.ExtensionAddition) []ast.ExtensionAddition { + if len(exts) == 0 { + return exts + } + out := make([]ast.ExtensionAddition, len(exts)) + for i, e := range exts { + out[i] = e + out[i].Components = in.substComponents(scope, ctx, e.Components) + } + return out +} + +func (in *Instantiator) substActuals(scope *resolver.Scope, ctx *Context, al *ast.ActualParameterList) *ast.ActualParameterList { + if al == nil { + return nil + } + out := &ast.ActualParameterList{Span: al.Span, Params: make([]ast.ActualParameter, len(al.Params))} + for i, p := range al.Params { + out.Params[i] = ast.ActualParameter{Span: p.Span} + if p.Type != nil { + out.Params[i].Type = in.substType(scope, ctx, p.Type) + } else { + out.Params[i].Value = p.Value + } + } + return out +} + +// instantiationKey produces a stable cache key for (template, actuals). +// Identical actuals across different use sites collide here; that's +// intentional - we want to return the same expansion both times. +func instantiationKey(ta *ast.TypeAssignment, actuals *ast.ActualParameterList) string { + h := fnv.New64a() + fmt.Fprintf(h, "%p|%d", ta, len(actuals.Params)) + for _, a := range actuals.Params { + if a.Type != nil { + fmt.Fprintf(h, "|T:%d-%d", a.Type.Pos(), a.Type.End()) + } else if a.Value != nil { + fmt.Fprintf(h, "|V:%d-%d", a.Value.Pos(), a.Value.End()) + } + } + return fmt.Sprintf("%x", h.Sum64()) +} diff --git a/internal/asn1/param/param_test.go b/internal/asn1/param/param_test.go new file mode 100644 index 00000000..4ff24317 --- /dev/null +++ b/internal/asn1/param/param_test.go @@ -0,0 +1,108 @@ +package param_test + +import ( + "testing" + + "github.com/nokia/ntt/internal/asn1" + "github.com/nokia/ntt/internal/asn1/ast" + "github.com/nokia/ntt/internal/asn1/param" + "github.com/nokia/ntt/internal/asn1/resolver" +) + +func TestInstantiate_SimpleTypeParam(t *testing.T) { + src := `M DEFINITIONS ::= BEGIN +Pair { ItemType } ::= SEQUENCE { first ItemType, second ItemType } +END` + m := asn1.ParseModule([]byte(src)) + b := resolver.NewBasket() + scope := b.Add(m) + resolver.Resolve(b, m) + + // Build an actual parameter list: { INTEGER } + actuals := &ast.ActualParameterList{ + Params: []ast.ActualParameter{ + {Type: &ast.IntegerType{}}, + }, + } + ref := &ast.TypeRef{Name: "Pair"} + + in := param.NewInstantiator(b) + out, diags := in.Instantiate(scope, ref, actuals) + if len(diags) > 0 { + t.Fatalf("unexpected diags: %+v", diags) + } + seq, ok := out.(*ast.SequenceType) + if !ok { + t.Fatalf("expected SequenceType, got %T", out) + } + if len(seq.Components) != 2 { + t.Fatalf("expected 2 components, got %d", len(seq.Components)) + } + for i, c := range seq.Components { + if _, ok := c.Type.(*ast.IntegerType); !ok { + t.Errorf("component %d: %T want *IntegerType", i, c.Type) + } + } +} + +func TestInstantiate_ArityMismatch(t *testing.T) { + src := `M DEFINITIONS ::= BEGIN +Pair { ItemType } ::= SEQUENCE { a ItemType } +END` + m := asn1.ParseModule([]byte(src)) + b := resolver.NewBasket() + scope := b.Add(m) + resolver.Resolve(b, m) + + actuals := &ast.ActualParameterList{} // empty + in := param.NewInstantiator(b) + _, diags := in.Instantiate(scope, &ast.TypeRef{Name: "Pair"}, actuals) + if len(diags) == 0 || diags[0].Code != "param.arity-mismatch" { + t.Errorf("expected arity mismatch, got %+v", diags) + } +} + +func TestInstantiate_UnresolvedReference(t *testing.T) { + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN END`)) + b := resolver.NewBasket() + scope := b.Add(m) + + actuals := &ast.ActualParameterList{} + in := param.NewInstantiator(b) + _, diags := in.Instantiate(scope, &ast.TypeRef{Name: "Unknown"}, actuals) + if len(diags) == 0 || diags[0].Code != "param.unresolved" { + t.Errorf("expected param.unresolved, got %+v", diags) + } +} + +func TestInstantiate_CrossModuleChain(t *testing.T) { + a := asn1.ParseModule([]byte(`A DEFINITIONS ::= BEGIN +Box { T } ::= SEQUENCE { value T } +END`)) + b := asn1.ParseModule([]byte(`B DEFINITIONS ::= BEGIN +IMPORTS Box FROM A ; +IntBox ::= Box { INTEGER } +END`)) + basket := resolver.NewBasket() + basket.Add(a) + scopeB := basket.Add(b) + resolver.Resolve(basket, a) + resolver.Resolve(basket, b) + + rt := b.Assignments[0].(*ast.TypeAssignment).Type.(*ast.ReferencedType) + in := param.NewInstantiator(basket) + out, diags := in.Instantiate(scopeB, rt.Ref, rt.Actuals) + if len(diags) > 0 { + t.Fatalf("diags: %+v", diags) + } + seq, ok := out.(*ast.SequenceType) + if !ok { + t.Fatalf("got %T", out) + } + if len(seq.Components) != 1 { + t.Fatalf("got %d comps", len(seq.Components)) + } + if _, ok := seq.Components[0].Type.(*ast.IntegerType); !ok { + t.Errorf("got %T want *IntegerType", seq.Components[0].Type) + } +} diff --git a/internal/asn1/parser.go b/internal/asn1/parser.go new file mode 100644 index 00000000..34a61e19 --- /dev/null +++ b/internal/asn1/parser.go @@ -0,0 +1,1676 @@ +package asn1 + +import ( + "fmt" + "strconv" + "strings" + + "github.com/nokia/ntt/internal/asn1/ast" +) + +// Parser is a recursive-descent ASN.1 parser. To make backtracking +// trivial we lex the entire source up-front into a slice and use a +// single int index. Save and restore is a one-int snapshot. +type Parser struct { + src []byte + tokens []Token + pos int + cur Token + peek Token + diags []ast.Diagnostic + lexErrs []Diagnostic +} + +// NewParser constructs a Parser over src. +func NewParser(src []byte) *Parser { + l := NewLexer(src) + toks, errs := l.All() + p := &Parser{src: src, tokens: toks, lexErrs: errs} + p.refresh() + return p +} + +// refresh repopulates cur/peek from the current pos. +func (p *Parser) refresh() { + p.cur = p.tokAt(p.pos) + p.peek = p.tokAt(p.pos + 1) +} + +func (p *Parser) tokAt(i int) Token { + if i < len(p.tokens) { + return p.tokens[i] + } + if n := len(p.tokens); n > 0 { + return Token{Kind: EOF, Pos: p.tokens[n-1].End, End: p.tokens[n-1].End} + } + return Token{Kind: EOF} +} + +// save / restore enable cheap backtracking. The lexer state is no +// longer relevant because all tokens were lexed up-front. +type pmark int + +func (p *Parser) save() pmark { return pmark(p.pos) } +func (p *Parser) restore(m pmark) { + p.pos = int(m) + p.refresh() +} + +// ParseModule parses src as a single ASN.1 module. The returned +// *ast.Module is always non-nil; check Diagnostics for any issues. +func ParseModule(src []byte) *ast.Module { + p := NewParser(src) + m := p.parseModule() + for _, d := range p.lexErrs { + m.Diagnostics = append(m.Diagnostics, ast.Diagnostic{ + Pos: d.Column, + End: d.Column, + Severity: ast.SeverityError, + Code: "lex", + Message: d.Message, + }) + } + m.Diagnostics = append(m.Diagnostics, p.diags...) + return m +} + +// --------------------------------------------------------------------------- +// Construction helpers - all return value types so concrete nodes can +// be built with `&ast.BuiltinType{TypeBase: tbase(s,e), ...}`. +// --------------------------------------------------------------------------- + +func span(start, end int) ast.Span { return ast.NewSpan(start, end) } +func tbase(start, end int) ast.TypeBase { return ast.TypeBase{Span: span(start, end)} } +func vbase(start, end int) ast.ValueBase { return ast.ValueBase{Span: span(start, end)} } +func cbase(start, end int) ast.ConstraintElementBase { + return ast.ConstraintElementBase{Span: span(start, end)} +} + +// --------------------------------------------------------------------------- +// Lookahead helpers +// --------------------------------------------------------------------------- + +func (p *Parser) advance() Token { + prev := p.cur + p.pos++ + p.refresh() + return prev +} + +func (p *Parser) at(k TokenKind) bool { return p.cur.Kind == k } +func (p *Parser) atKeyword(text string) bool { return p.cur.Kind == KEYWORD && p.cur.Text == text } + +func (p *Parser) eat(k TokenKind) (Token, bool) { + if p.cur.Kind == k { + t := p.cur + p.advance() + return t, true + } + return Token{}, false +} + +func (p *Parser) eatKeyword(text string) (Token, bool) { + if p.atKeyword(text) { + t := p.cur + p.advance() + return t, true + } + return Token{}, false +} + +func (p *Parser) expect(k TokenKind) (Token, bool) { + if t, ok := p.eat(k); ok { + return t, true + } + p.errorf(p.cur.Pos, "expected %s, got %s(%q)", k, p.cur.Kind, p.cur.Text) + return p.cur, false +} + +func (p *Parser) expectKeyword(text string) bool { + if _, ok := p.eatKeyword(text); ok { + return true + } + p.errorf(p.cur.Pos, "expected keyword %q, got %s(%q)", text, p.cur.Kind, p.cur.Text) + return false +} + +func (p *Parser) errorf(pos int, format string, args ...interface{}) { + p.diags = append(p.diags, ast.Diagnostic{ + Pos: pos, + End: pos, + Severity: ast.SeverityError, + Code: "syntax", + Message: fmt.Sprintf(format, args...), + }) +} + +// --------------------------------------------------------------------------- +// Module +// --------------------------------------------------------------------------- + +func (p *Parser) parseModule() *ast.Module { + start := p.cur.Pos + m := &ast.Module{Span: span(start, start)} + m.Identifier = p.parseModuleIdentifier() + m.SetRange(start, m.Identifier.End()) + + if !p.expectKeyword("DEFINITIONS") { + return m + } + + // Encoding reference instructions (e.g. TAG INSTRUCTIONS) - skip. + for p.cur.Kind == TYPEREFERENCE && p.peek.Kind == KEYWORD && p.peek.Text == "INSTRUCTIONS" { + p.advance() + p.advance() + } + + m.Tagging = ast.TagsExplicit + switch { + case p.atKeyword("EXPLICIT"): + p.advance() + m.Tagging = ast.TagsExplicit + p.eatKeyword("TAGS") + case p.atKeyword("IMPLICIT"): + p.advance() + m.Tagging = ast.TagsImplicit + p.eatKeyword("TAGS") + case p.atKeyword("AUTOMATIC"): + p.advance() + m.Tagging = ast.TagsAutomatic + p.eatKeyword("TAGS") + } + + if p.atKeyword("EXTENSIBILITY") { + p.advance() + p.eatKeyword("IMPLIED") + m.Extensible = true + } + + if !p.atKeyword("BEGIN") { + if _, ok := p.eat(ASSIGN); !ok { + p.errorf(p.cur.Pos, "expected ::= before BEGIN") + } + } + if !p.expectKeyword("BEGIN") { + return m + } + + m.Exports = p.parseExports() + m.Imports = p.parseImports() + m.Assignments = p.parseAssignments() + p.eatKeyword("END") + m.SetRange(start, p.cur.Pos) + return m +} + +func (p *Parser) parseModuleIdentifier() ast.ModuleIdentifier { + start := p.cur.Pos + id := ast.ModuleIdentifier{Span: span(start, start)} + if p.cur.Kind != TYPEREFERENCE { + p.errorf(p.cur.Pos, "expected module identifier") + return id + } + id.Name = p.cur.Text + end := p.cur.End + p.advance() + if p.at(LBRACE) { + oid := p.parseOID() + id.OID = oid + end = oid.End() + } + id.DefinitiveName = string(p.src[start:end]) + id.SetRange(start, end) + return id +} + +func (p *Parser) parseOID() *ast.OID { + start := p.cur.Pos + if _, ok := p.expect(LBRACE); !ok { + return nil + } + oid := &ast.OID{Span: span(start, start)} + for !p.at(RBRACE) && !p.at(EOF) { + oid.Components = append(oid.Components, p.parseOIDComponent()) + } + end := p.cur.End + p.expect(RBRACE) + oid.Raw = string(p.src[start:end]) + oid.SetRange(start, end) + return oid +} + +func (p *Parser) parseOIDComponent() ast.OIDComponent { + start := p.cur.Pos + c := ast.OIDComponent{Span: span(start, start)} + switch p.cur.Kind { + case IDENTIFIER: + c.Name = p.cur.Text + p.advance() + if p.at(LPAREN) { + p.advance() + if p.at(NUMBER) { + if v, err := strconv.ParseInt(p.cur.Text, 10, 64); err == nil { + c.Number = v + c.HasNum = true + } + p.advance() + } + p.expect(RPAREN) + } + case NUMBER: + if v, err := strconv.ParseInt(p.cur.Text, 10, 64); err == nil { + c.Number = v + c.HasNum = true + } + p.advance() + default: + p.errorf(p.cur.Pos, "expected OID component, got %s", p.cur.Kind) + p.advance() + } + c.SetRange(start, p.cur.Pos) + return c +} + +// --------------------------------------------------------------------------- +// EXPORTS / IMPORTS +// --------------------------------------------------------------------------- + +func (p *Parser) parseExports() *ast.Exports { + if !p.atKeyword("EXPORTS") { + return nil + } + start := p.cur.Pos + p.advance() + e := &ast.Exports{Span: span(start, start)} + if p.atKeyword("ALL") { + p.advance() + e.All = true + } else { + for !p.at(SEMICOLON) && !p.at(EOF) { + if p.cur.Kind == TYPEREFERENCE || p.cur.Kind == IDENTIFIER { + e.Symbols = append(e.Symbols, p.cur.Text) + } + p.advance() + if p.at(COMMA) { + p.advance() + } + } + } + end := p.cur.End + p.expect(SEMICOLON) + e.SetRange(start, end) + return e +} + +func (p *Parser) parseImports() []*ast.Import { + if !p.atKeyword("IMPORTS") { + return nil + } + p.advance() + var out []*ast.Import + for !p.at(SEMICOLON) && !p.at(EOF) { + imp := p.parseOneImport() + if imp != nil { + out = append(out, imp) + } + if p.at(COMMA) { + p.advance() + } + } + p.expect(SEMICOLON) + return out +} + +func (p *Parser) parseOneImport() *ast.Import { + start := p.cur.Pos + imp := &ast.Import{Span: span(start, start)} + for !p.atKeyword("FROM") && !p.at(SEMICOLON) && !p.at(EOF) { + if p.cur.Kind == TYPEREFERENCE || p.cur.Kind == IDENTIFIER { + imp.Symbols = append(imp.Symbols, p.cur.Text) + } + p.advance() + if p.at(COMMA) { + p.advance() + } + } + if !p.atKeyword("FROM") { + return nil + } + p.advance() + if p.cur.Kind != TYPEREFERENCE { + p.errorf(p.cur.Pos, "expected module name after FROM") + return nil + } + imp.From = p.cur.Text + end := p.cur.End + p.advance() + if p.at(LBRACE) { + oid := p.parseOID() + imp.OID = oid + end = oid.End() + } + imp.SetRange(start, end) + return imp +} + +// --------------------------------------------------------------------------- +// Assignments +// --------------------------------------------------------------------------- + +func (p *Parser) parseAssignments() []ast.Assignment { + var out []ast.Assignment + for !p.atKeyword("END") && !p.at(EOF) { + a := p.parseAssignment() + if a != nil { + out = append(out, a) + continue + } + p.syncToNextAssignment() + } + return out +} + +func (p *Parser) parseAssignment() ast.Assignment { + start := p.cur.Pos + + if p.cur.Kind == TYPEREFERENCE && p.peek.Kind == ASSIGN { + name := p.cur.Text + p.advance() + p.advance() + if p.atKeyword("CLASS") { + cls := p.parseObjectClassBody() + return &ast.ObjectClassAssignment{ + Name: name, + Class: cls, + Span: span(start, cls.End()), + } + } + t := p.parseType() + return &ast.TypeAssignment{Name: name, Type: t, Span: span(start, t.End())} + } + + if p.cur.Kind == TYPEREFERENCE && p.peek.Kind == LBRACE { + if name, params, ok := p.tryParameterisedHeader(); ok { + if p.atKeyword("CLASS") { + cls := p.parseObjectClassBody() + return &ast.ObjectClassAssignment{ + Name: name, Params: params, + Class: cls, + Span: span(start, cls.End()), + } + } + t := p.parseType() + return &ast.TypeAssignment{ + Name: name, Params: params, Type: t, + Span: span(start, t.End()), + } + } + } + + if p.cur.Kind == TYPEREFERENCE || p.cur.Kind == IDENTIFIER { + name := p.cur.Text + isLower := p.cur.Kind == IDENTIFIER + p.advance() + t := p.parseType() + if !p.at(ASSIGN) { + p.errorf(p.cur.Pos, "expected '::=' in assignment for %q", name) + return nil + } + p.advance() + if isLower { + v := p.parseValue() + return &ast.ValueAssignment{ + Name: name, Type: t, Value: v, + Span: span(start, p.cur.Pos), + } + } + if p.at(LBRACE) { + set := p.parseElementSet() + return &ast.ValueSetTypeAssignment{ + Name: name, Type: t, Set: set, + Span: span(start, p.cur.Pos), + } + } + p.errorf(p.cur.Pos, "expected '{' for value set in assignment for %q", name) + return nil + } + + p.errorf(p.cur.Pos, "unexpected token %s(%q) at top level", p.cur.Kind, p.cur.Text) + return nil +} + +func (p *Parser) tryParameterisedHeader() (string, *ast.ParameterList, bool) { + if p.cur.Kind != TYPEREFERENCE || p.peek.Kind != LBRACE { + return "", nil, false + } + m := p.save() + name := p.cur.Text + p.advance() + params := p.parseParameterList() + if params == nil || !p.at(ASSIGN) { + p.restore(m) + return "", nil, false + } + p.advance() + return name, params, true +} + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +func (p *Parser) parseType() ast.Type { + t := p.parseUntaggedType() + if t == nil { + return &ast.AnyType{TypeBase: tbase(p.cur.Pos, p.cur.Pos)} + } + for p.at(LPAREN) { + c := p.parseConstraint() + t = &ast.ConstrainedType{ + TypeBase: tbase(t.Pos(), c.End()), + Inner: t, + Constraint: c, + } + } + return t +} + +func (p *Parser) parseUntaggedType() ast.Type { + start := p.cur.Pos + if p.at(LBRACKET) { + return p.parseTaggedType(start) + } + switch p.cur.Kind { + case KEYWORD: + return p.parseBuiltinType() + case TYPEREFERENCE: + return p.parseReferencedType() + case IDENTIFIER: + ref := &ast.TypeRef{Name: p.cur.Text, Span: span(start, p.cur.End)} + p.advance() + return &ast.ReferencedType{TypeBase: tbase(start, p.cur.Pos), Ref: ref} + } + p.errorf(start, "expected type, got %s(%q)", p.cur.Kind, p.cur.Text) + p.advance() + return &ast.AnyType{TypeBase: tbase(start, p.cur.Pos)} +} + +func (p *Parser) parseBuiltinType() ast.Type { + start := p.cur.Pos + switch p.cur.Text { + case "BOOLEAN": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.Boolean, Name: "BOOLEAN"} + case "NULL": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.Null, Name: "NULL"} + case "REAL": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.Real, Name: "REAL"} + case "INTEGER": + p.advance() + t := &ast.IntegerType{TypeBase: tbase(start, p.cur.Pos)} + if p.at(LBRACE) { + t.NamedNumbers = p.parseNamedNumberList() + t.SetRange(start, p.cur.Pos) + } + return t + case "BIT": + p.advance() + p.expectKeyword("STRING") + t := &ast.BitStringType{TypeBase: tbase(start, p.cur.Pos)} + if p.at(LBRACE) { + t.NamedBits = p.parseNamedNumberList() + t.SetRange(start, p.cur.Pos) + } + return t + case "OCTET": + p.advance() + p.expectKeyword("STRING") + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.OctetString, Name: "OCTET STRING"} + case "OBJECT": + p.advance() + p.expectKeyword("IDENTIFIER") + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.ObjectIdentifier, Name: "OBJECT IDENTIFIER"} + case "RELATIVE-OID": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.RelativeOID, Name: "RELATIVE-OID"} + case "OID-IRI": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.OIDIRI, Name: "OID-IRI"} + case "RELATIVE-OID-IRI": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.RelativeOIDIRI, Name: "RELATIVE-OID-IRI"} + case "ENUMERATED": + p.advance() + return p.parseEnumeratedType(start) + case "SEQUENCE": + p.advance() + return p.parseSequenceOrSetType(start, true) + case "SET": + p.advance() + return p.parseSequenceOrSetType(start, false) + case "CHOICE": + p.advance() + return p.parseChoiceType(start) + case "EXTERNAL": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.External, Name: "EXTERNAL"} + case "EMBEDDED": + p.advance() + p.eatKeyword("PDV") + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.EmbeddedPDV, Name: "EMBEDDED PDV"} + case "CHARACTER": + p.advance() + p.eatKeyword("STRING") + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.CharacterString, Name: "CHARACTER STRING"} + case "UTCTime": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.UTCTime, Name: "UTCTime"} + case "GeneralizedTime": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.GeneralizedTime, Name: "GeneralizedTime"} + case "DATE": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.Date, Name: "DATE"} + case "DATE-TIME": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.DateTime, Name: "DATE-TIME"} + case "TIME": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.Time, Name: "TIME"} + case "TIME-OF-DAY": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.TimeOfDay, Name: "TIME-OF-DAY"} + case "DURATION": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.Duration, Name: "DURATION"} + case "ObjectDescriptor": + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: ast.ObjectDescriptor, Name: "ObjectDescriptor"} + } + if kind, ok := restrictedStringKind(p.cur.Text); ok { + name := p.cur.Text + p.advance() + return &ast.BuiltinType{TypeBase: tbase(start, p.cur.Pos), Kind: kind, Name: name} + } + p.errorf(start, "unexpected keyword %q in type position", p.cur.Text) + p.advance() + return &ast.AnyType{TypeBase: tbase(start, p.cur.Pos)} +} + +func restrictedStringKind(s string) (ast.BuiltinKind, bool) { + switch s { + case "BMPString": + return ast.BMPString, true + case "GeneralString": + return ast.GeneralString, true + case "GraphicString": + return ast.GraphicString, true + case "IA5String": + return ast.IA5String, true + case "ISO646String": + return ast.ISO646String, true + case "NumericString": + return ast.NumericString, true + case "PrintableString": + return ast.PrintableString, true + case "TeletexString": + return ast.TeletexString, true + case "T61String": + return ast.T61String, true + case "UniversalString": + return ast.UniversalString, true + case "UTF8String": + return ast.UTF8String, true + case "VideotexString": + return ast.VideotexString, true + case "VisibleString": + return ast.VisibleString, true + } + return ast.UnknownBuiltin, false +} + +func (p *Parser) parseNamedNumberList() []ast.NamedNumber { + p.expect(LBRACE) + var out []ast.NamedNumber + for !p.at(RBRACE) && !p.at(EOF) { + start := p.cur.Pos + nn := ast.NamedNumber{Span: span(start, start)} + if p.cur.Kind != IDENTIFIER { + p.errorf(p.cur.Pos, "expected named number identifier") + p.advance() + continue + } + nn.Name = p.cur.Text + p.advance() + if p.at(LPAREN) { + p.advance() + nn.Value = p.parseValue() + p.expect(RPAREN) + } + nn.SetRange(start, p.cur.Pos) + out = append(out, nn) + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + return out +} + +func (p *Parser) parseEnumeratedType(start int) *ast.EnumeratedType { + t := &ast.EnumeratedType{TypeBase: tbase(start, start)} + p.expect(LBRACE) + target := &t.Items + for !p.at(RBRACE) && !p.at(EOF) { + if p.at(ELLIPSIS) { + t.Extensible = true + target = &t.Extensions + p.advance() + if p.at(COMMA) { + p.advance() + } + continue + } + if p.cur.Kind != IDENTIFIER { + p.errorf(p.cur.Pos, "expected ENUMERATED item identifier") + p.advance() + continue + } + itemStart := p.cur.Pos + item := ast.EnumItem{Span: span(itemStart, itemStart), Name: p.cur.Text} + p.advance() + if p.at(LPAREN) { + p.advance() + item.Value = p.parseValue() + p.expect(RPAREN) + } + item.SetRange(itemStart, p.cur.Pos) + *target = append(*target, item) + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + t.SetRange(start, p.cur.Pos) + return t +} + +func (p *Parser) parseSequenceOrSetType(start int, isSequence bool) ast.Type { + if p.atKeyword("OF") || (p.at(LPAREN) && lookaheadIsOfAfterParen(p)) { + var c *ast.Constraint + if p.at(LPAREN) { + c = p.parseConstraint() + } + p.expectKeyword("OF") + if p.cur.Kind == IDENTIFIER && p.peek.Kind != ASSIGN { + p.advance() + } + elem := p.parseType() + if isSequence { + return &ast.SequenceOfType{TypeBase: tbase(start, elem.End()), Element: elem, Constraint: c} + } + return &ast.SetOfType{TypeBase: tbase(start, elem.End()), Element: elem, Constraint: c} + } + p.expect(LBRACE) + var comps []ast.Component + var ext []ast.ExtensionAddition + extensible := false + for !p.at(RBRACE) && !p.at(EOF) { + if p.at(ELLIPSIS) { + extensible = true + p.advance() + if p.at(COMMA) { + p.advance() + } + continue + } + if extensible && p.at(DOUBLE_LBRACKET) { + ext = append(ext, p.parseExtensionGroup(len(ext)+1)) + if p.at(COMMA) { + p.advance() + } + continue + } + c := p.parseComponent() + if extensible { + ext = append(ext, ast.ExtensionAddition{Components: []ast.Component{c}, Span: span(c.Pos(), c.End())}) + } else { + comps = append(comps, c) + } + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + end := p.cur.Pos + if isSequence { + return &ast.SequenceType{TypeBase: tbase(start, end), Components: comps, Extensible: extensible, Extensions: ext} + } + return &ast.SetType{TypeBase: tbase(start, end), Components: comps, Extensible: extensible, Extensions: ext} +} + +func lookaheadIsOfAfterParen(p *Parser) bool { + depth := 0 + for i := p.pos; i < len(p.tokens); i++ { + t := p.tokens[i] + switch { + case t.Kind == LPAREN: + depth++ + case t.Kind == RPAREN: + depth-- + if depth == 0 { + nx := p.tokAt(i + 1) + return nx.Kind == KEYWORD && nx.Text == "OF" + } + } + } + return false +} + +func (p *Parser) parseComponent() ast.Component { + start := p.cur.Pos + c := ast.Component{Span: span(start, start)} + if p.atKeyword("COMPONENTS") { + p.advance() + p.expectKeyword("OF") + c.ComponentsOf = true + c.Type = p.parseType() + c.SetRange(start, c.Type.End()) + return c + } + if p.cur.Kind != IDENTIFIER { + p.errorf(p.cur.Pos, "expected component name") + p.advance() + return c + } + c.Name = p.cur.Text + p.advance() + c.Type = p.parseType() + if p.atKeyword("OPTIONAL") { + p.advance() + c.Optional = true + } else if p.atKeyword("DEFAULT") { + p.advance() + c.Default = p.parseValue() + c.Optional = true + } + c.SetRange(start, p.cur.Pos) + return c +} + +func (p *Parser) parseExtensionGroup(num int) ast.ExtensionAddition { + start := p.cur.Pos + p.expect(DOUBLE_LBRACKET) + if p.cur.Kind == NUMBER && p.peek.Kind == COLON { + if v, err := strconv.Atoi(p.cur.Text); err == nil { + num = v + } + p.advance() + p.advance() + } + var comps []ast.Component + for !p.at(DOUBLE_RBRACKET) && !p.at(EOF) { + comps = append(comps, p.parseComponent()) + if p.at(COMMA) { + p.advance() + } + } + p.expect(DOUBLE_RBRACKET) + end := p.cur.Pos + return ast.ExtensionAddition{Span: span(start, end), Group: num, Components: comps} +} + +func (p *Parser) parseChoiceType(start int) ast.Type { + p.expect(LBRACE) + var alts []ast.Component + var ext []ast.ExtensionAddition + extensible := false + for !p.at(RBRACE) && !p.at(EOF) { + if p.at(ELLIPSIS) { + extensible = true + p.advance() + if p.at(COMMA) { + p.advance() + } + continue + } + if extensible && p.at(DOUBLE_LBRACKET) { + ext = append(ext, p.parseExtensionGroup(len(ext)+1)) + if p.at(COMMA) { + p.advance() + } + continue + } + a := p.parseComponent() + if extensible { + ext = append(ext, ast.ExtensionAddition{Span: span(a.Pos(), a.End()), Components: []ast.Component{a}}) + } else { + alts = append(alts, a) + } + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + end := p.cur.Pos + return &ast.ChoiceType{TypeBase: tbase(start, end), Alternatives: alts, Extensible: extensible, Extensions: ext} +} + +func (p *Parser) parseTaggedType(start int) ast.Type { + p.expect(LBRACKET) + tag := ast.Tag{Span: span(start, start), Class: ast.ContextSpecificTag} + if p.atKeyword("UNIVERSAL") { + tag.Class = ast.UniversalTag + p.advance() + } else if p.atKeyword("APPLICATION") { + tag.Class = ast.ApplicationTag + p.advance() + } else if p.atKeyword("PRIVATE") { + tag.Class = ast.PrivateTag + p.advance() + } + tag.Number = p.parseValue() + p.expect(RBRACKET) + if p.atKeyword("IMPLICIT") { + tag.Mode = ast.TagModeImplicit + p.advance() + } else if p.atKeyword("EXPLICIT") { + tag.Mode = ast.TagModeExplicit + p.advance() + } + inner := p.parseUntaggedType() + tag.SetRange(start, p.cur.Pos) + return &ast.TaggedType{TypeBase: tbase(start, inner.End()), Tag: tag, Underlying: inner} +} + +func (p *Parser) parseReferencedType() ast.Type { + start := p.cur.Pos + ref := &ast.TypeRef{Span: span(start, start)} + first := p.cur.Text + p.advance() + if p.at(DOT) && (p.peek.Kind == TYPEREFERENCE || p.peek.Kind == IDENTIFIER) { + p.advance() + ref.Module = first + ref.Name = p.cur.Text + p.advance() + } else { + ref.Name = first + } + ref.SetRange(start, p.cur.Pos) + + if p.at(DOT) && p.peek.Kind == AMP_REF { + p.advance() + field := p.cur.Text + p.advance() + return &ast.OpenTypeFieldType{TypeBase: tbase(start, p.cur.Pos), ClassRef: ref, Field: field} + } + + rt := &ast.ReferencedType{TypeBase: tbase(start, p.cur.Pos), Ref: ref} + if p.at(LBRACE) { + rt.Actuals = p.parseActualParameterList() + rt.SetRange(start, p.cur.Pos) + } + return rt +} + +// --------------------------------------------------------------------------- +// Values +// --------------------------------------------------------------------------- + +func (p *Parser) parseValue() ast.Value { + start := p.cur.Pos + switch p.cur.Kind { + case NUMBER: + text := p.cur.Text + p.advance() + if strings.ContainsAny(text, ".eE") { + return &ast.RealValue{ValueBase: vbase(start, p.cur.Pos), Text: text} + } + return &ast.IntegerValue{ValueBase: vbase(start, p.cur.Pos), Text: text} + case HYPHEN: + p.advance() + if p.at(NUMBER) { + text := "-" + p.cur.Text + p.advance() + return &ast.IntegerValue{ValueBase: vbase(start, p.cur.Pos), Text: text} + } + p.errorf(start, "expected number after '-'") + return &ast.IntegerValue{ValueBase: vbase(start, p.cur.Pos), Text: "-0"} + case CSTRING: + text := p.cur.Text + p.advance() + return &ast.StringValue{ValueBase: vbase(start, p.cur.Pos), Kind: ast.StringCString, Text: text} + case BSTRING: + text := p.cur.Text + p.advance() + return &ast.StringValue{ValueBase: vbase(start, p.cur.Pos), Kind: ast.StringBString, Text: text} + case HSTRING: + text := p.cur.Text + p.advance() + return &ast.StringValue{ValueBase: vbase(start, p.cur.Pos), Kind: ast.StringHString, Text: text} + case KEYWORD: + switch p.cur.Text { + case "TRUE": + p.advance() + return &ast.BooleanValue{ValueBase: vbase(start, p.cur.Pos), Value: true} + case "FALSE": + p.advance() + return &ast.BooleanValue{ValueBase: vbase(start, p.cur.Pos), Value: false} + case "NULL": + p.advance() + return &ast.NullValue{ValueBase: vbase(start, p.cur.Pos)} + case "MIN", "MAX", "PLUS-INFINITY", "MINUS-INFINITY", "NOT-A-NUMBER": + text := p.cur.Text + p.advance() + return &ast.ReferenceValue{ValueBase: vbase(start, p.cur.Pos), Name: text} + } + case TYPEREFERENCE: + mod := p.cur.Text + p.advance() + if p.at(DOT) && (p.peek.Kind == IDENTIFIER || p.peek.Kind == TYPEREFERENCE) { + p.advance() + name := p.cur.Text + p.advance() + return &ast.ReferenceValue{ValueBase: vbase(start, p.cur.Pos), Module: mod, Name: name} + } + return &ast.ReferenceValue{ValueBase: vbase(start, p.cur.Pos), Name: mod} + case IDENTIFIER: + name := p.cur.Text + p.advance() + if p.at(COLON) { + p.advance() + inner := p.parseValue() + return &ast.ChoiceValue{ValueBase: vbase(start, p.cur.Pos), Alternative: name, Value: inner} + } + return &ast.ReferenceValue{ValueBase: vbase(start, p.cur.Pos), Name: name} + case LBRACE: + return p.parseBraceValue(start) + } + p.errorf(start, "expected value, got %s(%q)", p.cur.Kind, p.cur.Text) + p.advance() + return &ast.IntegerValue{ValueBase: vbase(start, p.cur.Pos), Text: "0"} +} + +func (p *Parser) parseBraceValue(start int) ast.Value { + p.expect(LBRACE) + if p.at(RBRACE) { + p.advance() + return &ast.SequenceOfValue{ValueBase: vbase(start, p.cur.Pos)} + } + if p.cur.Kind == IDENTIFIER && p.peek.Kind != COMMA && p.peek.Kind != RBRACE && p.peek.Kind != COLON && p.peek.Kind != LPAREN { + m := p.save() + if seq := p.tryParseSequenceValue(start); seq != nil { + return seq + } + p.restore(m) + } + if (p.cur.Kind == IDENTIFIER || p.cur.Kind == NUMBER) && oidLooks(p) { + oid := p.parseOIDBody(start) + return &ast.OIDValue{ValueBase: vbase(start, p.cur.Pos), OID: oid} + } + var elems []ast.Value + for !p.at(RBRACE) && !p.at(EOF) { + elems = append(elems, p.parseValue()) + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + return &ast.SequenceOfValue{ValueBase: vbase(start, p.cur.Pos), Elements: elems} +} + +func oidLooks(p *Parser) bool { + depth := 0 + for i := p.pos; i < len(p.tokens); i++ { + t := p.tokens[i] + switch t.Kind { + case LBRACE: + depth++ + case RBRACE: + if depth == 0 { + return true + } + depth-- + case COLON: + return false + case COMMA: + if depth == 0 { + return false + } + } + } + return false +} + +func (p *Parser) parseOIDBody(start int) *ast.OID { + oid := &ast.OID{Span: span(start, start)} + for !p.at(RBRACE) && !p.at(EOF) { + oid.Components = append(oid.Components, p.parseOIDComponent()) + } + end := p.cur.End + p.expect(RBRACE) + oid.Raw = string(p.src[start:end]) + oid.SetRange(start, end) + return oid +} + +func (p *Parser) tryParseSequenceValue(start int) ast.Value { + var fields []ast.NamedValue + for !p.at(RBRACE) && !p.at(EOF) { + if p.cur.Kind != IDENTIFIER { + return nil + } + nm := p.cur.Text + fstart := p.cur.Pos + p.advance() + v := p.parseValue() + nv := ast.NamedValue{Span: span(fstart, p.cur.Pos), Name: nm, Value: v} + fields = append(fields, nv) + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + return &ast.SequenceValue{ValueBase: vbase(start, p.cur.Pos), Fields: fields} +} + +// --------------------------------------------------------------------------- +// Constraints +// --------------------------------------------------------------------------- + +func (p *Parser) parseConstraint() *ast.Constraint { + start := p.cur.Pos + p.expect(LPAREN) + set := p.parseElementSet() + var exc *ast.Exception + if p.at(BANG) { + excStart := p.cur.Pos + p.advance() + raw := p.collectRawUntilParen() + exc = &ast.Exception{Span: span(excStart, p.cur.Pos), Raw: raw} + } + p.expect(RPAREN) + return &ast.Constraint{Span: span(start, p.cur.Pos), Set: set, Exception: exc} +} + +func (p *Parser) collectRawUntilParen() string { + var b strings.Builder + depth := 0 + for !p.at(EOF) { + if depth == 0 && p.at(RPAREN) { + break + } + if p.at(LPAREN) { + depth++ + } + if p.at(RPAREN) { + depth-- + } + b.WriteString(p.cur.Text) + b.WriteByte(' ') + p.advance() + } + return strings.TrimSpace(b.String()) +} + +func (p *Parser) parseElementSet() *ast.ElementSet { + start := p.cur.Pos + set := &ast.ElementSet{Span: span(start, start)} + set.Root = p.parseUnion() + if p.at(COMMA) && p.peek.Kind == ELLIPSIS { + p.advance() + p.advance() + set.Extensible = true + if p.at(COMMA) { + p.advance() + set.Extension = p.parseUnion() + } + } else if p.at(ELLIPSIS) { + p.advance() + set.Extensible = true + } + set.SetRange(start, p.cur.Pos) + return set +} + +func (p *Parser) parseUnion() ast.UnionExpr { + var u ast.UnionExpr + u = append(u, p.parseIntersection()) + for p.at(PIPE) || p.atKeyword("UNION") { + p.advance() + u = append(u, p.parseIntersection()) + } + return u +} + +func (p *Parser) parseIntersection() ast.IntersectionExpr { + var i ast.IntersectionExpr + i = append(i, p.parseConstraintElement()) + for p.at(CARET) || p.atKeyword("INTERSECTION") { + p.advance() + i = append(i, p.parseConstraintElement()) + } + return i +} + +func (p *Parser) parseConstraintElement() ast.ConstraintElement { + start := p.cur.Pos + switch { + case p.atKeyword("SIZE"): + p.advance() + c := p.parseConstraint() + return &ast.SizeConstraint{ConstraintElementBase: cbase(start, c.End()), Constraint: c} + case p.atKeyword("FROM"): + p.advance() + c := p.parseConstraint() + return &ast.AlphabetConstraint{ConstraintElementBase: cbase(start, c.End()), Constraint: c} + case p.atKeyword("PATTERN"): + p.advance() + v := p.parseValue() + return &ast.PatternConstraint{ConstraintElementBase: cbase(start, v.End()), Pattern: v} + case p.atKeyword("SETTINGS"): + p.advance() + if p.at(CSTRING) { + s := &ast.PropertySettings{ConstraintElementBase: cbase(start, p.cur.End), Settings: p.cur.Text} + p.advance() + return s + } + case p.atKeyword("CONTAINING"): + p.advance() + t := p.parseType() + return &ast.ContainedSubtype{ConstraintElementBase: cbase(start, t.End()), Type: t} + case p.atKeyword("INCLUDES"): + p.advance() + t := p.parseType() + return &ast.ContainedSubtype{ConstraintElementBase: cbase(start, t.End()), Type: t} + case p.atKeyword("WITH"): + p.advance() + if p.atKeyword("COMPONENT") { + p.advance() + c := p.parseConstraint() + return &ast.InnerTypeConstraint{ConstraintElementBase: cbase(start, c.End()), Single: true, Constraint: c} + } + if p.atKeyword("COMPONENTS") { + p.advance() + return p.parseWithComponents(start) + } + case p.atKeyword("ALL"): + p.advance() + if p.atKeyword("EXCEPT") { + p.advance() + ex := p.parseConstraintElement() + return &ast.AllExceptConstraint{ConstraintElementBase: cbase(start, ex.End()), Exclude: ex} + } + case p.atKeyword("CONSTRAINED"): + p.advance() + if p.atKeyword("BY") { + p.advance() + raw := p.collectRawUntilParen() + return &ast.UserDefinedConstraint{ConstraintElementBase: cbase(start, p.cur.Pos), Raw: raw} + } + } + + if p.at(LPAREN) { + c := p.parseConstraint() + return &ast.ContainedSubtype{ + ConstraintElementBase: cbase(start, c.End()), + Type: &ast.ConstrainedType{TypeBase: tbase(start, c.End()), Constraint: c}, + } + } + + // Table constraint: `{ObjectSet}` or `{ObjectSet}{@field.path}`. + if p.at(LBRACE) { + set := p.parseObjectSet() + tc := &ast.TableConstraint{ObjectSet: set} + if p.at(LBRACE) && p.peek.Kind == AT { + tc.AtNotation = p.parseAtNotation() + } + tc.SetRange(start, p.cur.Pos) + return tc + } + + v := p.parseValue() + if p.at(DOUBLE_DOT) { + p.advance() + open := false + if p.at(LESS) { + open = true + p.advance() + } + var upper ast.Value + upperMax := false + if p.atKeyword("MAX") { + upperMax = true + upper = &ast.ReferenceValue{ValueBase: vbase(p.cur.Pos, p.cur.End), Name: "MAX"} + p.advance() + } else { + upper = p.parseValue() + } + return &ast.ValueRangeConstraint{ + ConstraintElementBase: cbase(start, p.cur.Pos), + Lower: v, + Upper: upper, + UpperOpen: open, + LowerIsMin: isMinRef(v), + UpperIsMax: upperMax, + } + } + return &ast.SingleValueConstraint{ConstraintElementBase: cbase(start, v.End()), Value: v} +} + +func isMinRef(v ast.Value) bool { + r, ok := v.(*ast.ReferenceValue) + return ok && r.Name == "MIN" +} + +func (p *Parser) parseWithComponents(start int) ast.ConstraintElement { + p.expect(LBRACE) + var comps []ast.InnerComponent + partial := false + for !p.at(RBRACE) && !p.at(EOF) { + if p.at(ELLIPSIS) { + partial = true + p.advance() + if p.at(COMMA) { + p.advance() + } + continue + } + ic := ast.InnerComponent{} + icStart := p.cur.Pos + if p.cur.Kind == IDENTIFIER { + ic.Name = p.cur.Text + p.advance() + } + if p.at(LPAREN) { + ic.Constraint = p.parseConstraint() + } + switch { + case p.atKeyword("PRESENT"): + ic.Presence = ast.PresencePresent + p.advance() + case p.atKeyword("ABSENT"): + ic.Presence = ast.PresenceAbsent + p.advance() + case p.atKeyword("OPTIONAL"): + ic.Presence = ast.PresenceOptional + p.advance() + } + ic.SetRange(icStart, p.cur.Pos) + comps = append(comps, ic) + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + return &ast.InnerTypeConstraint{ConstraintElementBase: cbase(start, p.cur.Pos), Components: comps, PartialFlag: partial} +} + +// --------------------------------------------------------------------------- +// X.681/X.682 object sets and table constraints +// --------------------------------------------------------------------------- + +func (p *Parser) parseObjectSet() *ast.ObjectSet { + start := p.cur.Pos + p.expect(LBRACE) + set := &ast.ObjectSet{Span: span(start, start)} + target := &set.Root + for !p.at(RBRACE) && !p.at(EOF) { + if p.at(ELLIPSIS) { + set.Extensible = true + target = &set.Extension + p.advance() + if p.at(COMMA) || p.at(PIPE) { + p.advance() + } + continue + } + el := p.parseObjectSetElement() + if el != nil { + *target = append(*target, el) + } + if p.at(PIPE) || p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + set.SetRange(start, p.cur.Pos) + return set +} + +func (p *Parser) parseObjectSetElement() ast.ObjectSetElement { + start := p.cur.Pos + switch p.cur.Kind { + case LBRACE: + // Object literal `{ &Field value, ... }` - parsed as a + // generic value sequence for now; the X.681 driver in + // Phase 7 reinterprets it against the class's WITH SYNTAX. + obj := p.parseObjectLiteralBody() + return &ast.ObjectLiteralElement{ + ObjectSetElementBase: ast.ObjectSetElementBase{Span: span(start, p.cur.Pos)}, + Object: obj, + } + case TYPEREFERENCE: + // Could be an object reference or an object-set reference; + // they share the same syntactic shape. The resolver picks + // the correct one based on declaration kind. + ref := &ast.TypeRef{Span: span(start, p.cur.End), Name: p.cur.Text} + p.advance() + if p.at(DOT) && (p.peek.Kind == TYPEREFERENCE || p.peek.Kind == IDENTIFIER) { + p.advance() + ref.Module = ref.Name + ref.Name = p.cur.Text + ref.SetRange(start, p.cur.End) + p.advance() + } + if isUpperFirst(ref.Name) { + return &ast.ObjectSetReferenceElement{ + ObjectSetElementBase: ast.ObjectSetElementBase{Span: span(start, ref.End())}, + Ref: ref, + } + } + return &ast.ObjectReferenceElement{ + ObjectSetElementBase: ast.ObjectSetElementBase{Span: span(start, ref.End())}, + Ref: ref, + } + case IDENTIFIER: + ref := &ast.TypeRef{Span: span(start, p.cur.End), Name: p.cur.Text} + p.advance() + return &ast.ObjectReferenceElement{ + ObjectSetElementBase: ast.ObjectSetElementBase{Span: span(start, ref.End())}, + Ref: ref, + } + } + p.errorf(p.cur.Pos, "expected object set element, got %s", p.cur.Kind) + p.advance() + return nil +} + +func (p *Parser) parseObjectLiteralBody() *ast.Object { + start := p.cur.Pos + p.expect(LBRACE) + obj := &ast.Object{Span: span(start, start)} + for !p.at(RBRACE) && !p.at(EOF) { + setting := p.parseObjectSetting() + if setting != nil { + obj.Settings = append(obj.Settings, *setting) + } + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + obj.SetRange(start, p.cur.Pos) + return obj +} + +func (p *Parser) parseObjectSetting() *ast.ObjectSetting { + start := p.cur.Pos + if !p.at(AMP_REF) { + // Skip until next `&` or `}` - object literals can also be + // driven by WITH SYNTAX templates which we don't fully + // handle in Phase 4 (Phase 7 does). + p.advance() + return nil + } + field := p.cur.Text + p.advance() + setting := &ast.ObjectSetting{Span: span(start, start), FieldRef: field} + if isAmpType(field) { + setting.Type = p.parseType() + } else { + setting.Value = p.parseValue() + } + setting.SetRange(start, p.cur.Pos) + return setting +} + +func (p *Parser) parseAtNotation() *ast.AtNotation { + start := p.cur.Pos + p.expect(LBRACE) + an := &ast.AtNotation{Span: span(start, start)} + p.expect(AT) + for p.at(DOT) { + an.Level++ + p.advance() + } + for p.cur.Kind == IDENTIFIER || p.cur.Kind == TYPEREFERENCE { + an.Path = append(an.Path, p.cur.Text) + p.advance() + if !p.at(DOT) { + break + } + p.advance() + } + p.expect(RBRACE) + an.SetRange(start, p.cur.Pos) + return an +} + +func isUpperFirst(s string) bool { + if s == "" { + return false + } + return isUpper(s[0]) +} + +func isAmpType(s string) bool { + return len(s) >= 2 && isUpper(s[1]) +} + +// --------------------------------------------------------------------------- +// X.683 parameter lists +// --------------------------------------------------------------------------- + +func (p *Parser) parseParameterList() *ast.ParameterList { + if !p.at(LBRACE) { + return nil + } + start := p.cur.Pos + p.advance() + pl := &ast.ParameterList{Span: span(start, start)} + for !p.at(RBRACE) && !p.at(EOF) { + pl.Params = append(pl.Params, p.parseParameter()) + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + pl.SetRange(start, p.cur.Pos) + return pl +} + +func (p *Parser) parseParameter() ast.Parameter { + start := p.cur.Pos + param := ast.Parameter{Span: span(start, start)} + m := p.save() + if p.cur.Kind == TYPEREFERENCE || p.cur.Kind == KEYWORD { + gov := p.parseType() + if p.at(COLON) { + p.advance() + param.Governor = gov + } else { + p.restore(m) + } + } + if p.cur.Kind == TYPEREFERENCE || p.cur.Kind == IDENTIFIER { + param.Reference = p.cur.Text + p.advance() + } + param.SetRange(start, p.cur.Pos) + return param +} + +func (p *Parser) parseActualParameterList() *ast.ActualParameterList { + if !p.at(LBRACE) { + return nil + } + start := p.cur.Pos + p.advance() + al := &ast.ActualParameterList{Span: span(start, start)} + for !p.at(RBRACE) && !p.at(EOF) { + al.Params = append(al.Params, p.parseActualParameter()) + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + al.SetRange(start, p.cur.Pos) + return al +} + +func (p *Parser) parseActualParameter() ast.ActualParameter { + start := p.cur.Pos + ap := ast.ActualParameter{Span: span(start, start)} + switch p.cur.Kind { + case NUMBER, CSTRING, BSTRING, HSTRING, IDENTIFIER: + ap.Value = p.parseValue() + case TYPEREFERENCE, KEYWORD: + ap.Type = p.parseType() + default: + ap.Value = p.parseValue() + } + ap.SetRange(start, p.cur.Pos) + return ap +} + +// --------------------------------------------------------------------------- +// Object class body (X.681 - Phase 4 expands this further) +// --------------------------------------------------------------------------- + +func (p *Parser) parseObjectClassBody() *ast.ObjectClass { + start := p.cur.Pos + p.expectKeyword("CLASS") + cls := &ast.ObjectClass{Span: span(start, start)} + if p.at(LBRACE) { + p.advance() + for !p.at(RBRACE) && !p.at(EOF) { + if fs := p.parseFieldSpec(); fs != nil { + cls.Fields = append(cls.Fields, fs) + } + if p.at(COMMA) { + p.advance() + } + } + p.expect(RBRACE) + } + if p.atKeyword("WITH") { + p.advance() + if p.atKeyword("SYNTAX") { + p.advance() + cls.WithSyntax = p.parseWithSyntaxSpec() + } + } + cls.SetRange(start, p.cur.Pos) + return cls +} + +func (p *Parser) parseFieldSpec() ast.FieldSpec { + start := p.cur.Pos + if !p.at(AMP_REF) { + p.errorf(p.cur.Pos, "expected &Field in CLASS body") + p.advance() + return nil + } + name := p.cur.Text + p.advance() + isType := len(name) >= 2 && isUpper(name[1]) + if isType && (p.atKeyword("OPTIONAL") || p.atKeyword("DEFAULT") || p.at(COMMA) || p.at(RBRACE)) { + tf := &ast.TypeFieldSpec{Span: span(start, p.cur.Pos), Name: name} + if p.atKeyword("OPTIONAL") { + tf.Optional = true + p.advance() + } else if p.atKeyword("DEFAULT") { + p.advance() + tf.Default = p.parseType() + tf.Optional = true + } + tf.SetRange(start, p.cur.Pos) + return tf + } + t := p.parseType() + if !isType { + fv := &ast.FixedTypeValueFieldSpec{Span: span(start, p.cur.Pos), Name: name, Type: t} + if p.atKeyword("UNIQUE") { + fv.Unique = true + p.advance() + } + if p.atKeyword("OPTIONAL") { + fv.Optional = true + p.advance() + } else if p.atKeyword("DEFAULT") { + p.advance() + fv.Default = p.parseValue() + fv.Optional = true + } + fv.SetRange(start, p.cur.Pos) + return fv + } + fs := &ast.FixedTypeValueSetFieldSpec{Span: span(start, p.cur.Pos), Name: name, Type: t} + if p.atKeyword("OPTIONAL") { + fs.Optional = true + p.advance() + } + fs.SetRange(start, p.cur.Pos) + return fs +} + +func (p *Parser) parseWithSyntaxSpec() *ast.WithSyntaxSpec { + start := p.cur.Pos + p.expect(LBRACE) + ws := &ast.WithSyntaxSpec{Span: span(start, start)} + for !p.at(RBRACE) && !p.at(EOF) { + ws.Tokens = append(ws.Tokens, p.parseWithSyntaxToken()) + } + p.expect(RBRACE) + ws.SetRange(start, p.cur.Pos) + return ws +} + +// parseWithSyntaxToken reads one entry from a WITH SYNTAX template. +// The token stream is pre-lexed in normal mode (KEYWORD for upper-case +// reserved words, TYPEREFERENCE for the others). Either form represents +// a literal word in template context, so we accept both. +func (p *Parser) parseWithSyntaxToken() ast.WithSyntaxToken { + start := p.cur.Pos + switch { + case p.at(LBRACKET): + p.advance() + t := ast.WithSyntaxToken{Span: span(start, start), Kind: ast.WSKOptionalGroup} + for !p.at(RBRACKET) && !p.at(EOF) { + t.Group = append(t.Group, p.parseWithSyntaxToken()) + } + p.expect(RBRACKET) + t.SetRange(start, p.cur.Pos) + return t + case p.at(AMP_REF): + text := p.cur.Text + p.advance() + return ast.WithSyntaxToken{Span: span(start, p.cur.Pos), Kind: ast.WSKFieldRef, Text: text} + case p.cur.Kind == WORD || p.cur.Kind == KEYWORD || p.cur.Kind == TYPEREFERENCE || p.cur.Kind == IDENTIFIER: + text := p.cur.Text + p.advance() + return ast.WithSyntaxToken{Span: span(start, p.cur.Pos), Kind: ast.WSKWord, Text: text} + } + p.advance() + return ast.WithSyntaxToken{Span: span(start, p.cur.Pos), Kind: ast.WSKWord} +} + +// --------------------------------------------------------------------------- +// Recovery +// --------------------------------------------------------------------------- + +func (p *Parser) syncToNextAssignment() { + for !p.at(EOF) { + if p.atKeyword("END") { + return + } + if (p.cur.Kind == TYPEREFERENCE || p.cur.Kind == IDENTIFIER) && + (p.peek.Kind == ASSIGN || p.peek.Kind == LBRACE) { + return + } + p.advance() + } +} diff --git a/internal/asn1/parser_class_test.go b/internal/asn1/parser_class_test.go new file mode 100644 index 00000000..ddd61f7b --- /dev/null +++ b/internal/asn1/parser_class_test.go @@ -0,0 +1,104 @@ +package asn1 + +import ( + "testing" + + "github.com/nokia/ntt/internal/asn1/ast" +) + +func TestParser_ObjectClass_Basic(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +ERROR ::= CLASS { + &errorCode INTEGER UNIQUE, + &Type OPTIONAL, + &description PrintableString OPTIONAL +} WITH SYNTAX { + CODE &errorCode + [TYPE &Type] + [DESCRIPTION &description] +} +END`) + if len(m.Assignments) != 1 { + t.Fatalf("got %d assignments", len(m.Assignments)) + } + oc, ok := m.Assignments[0].(*ast.ObjectClassAssignment) + if !ok { + t.Fatalf("got %T want *ObjectClassAssignment", m.Assignments[0]) + } + if oc.Name != "ERROR" { + t.Errorf("name: %q", oc.Name) + } + if oc.Class == nil { + t.Fatal("missing class body") + } + if got := len(oc.Class.Fields); got != 3 { + t.Errorf("fields: %d want 3", got) + } + if oc.Class.WithSyntax == nil { + t.Fatal("missing WITH SYNTAX") + } + if len(oc.Class.WithSyntax.Tokens) < 3 { + t.Errorf("WITH SYNTAX tokens: %d", len(oc.Class.WithSyntax.Tokens)) + } +} + +func TestParser_TableConstraint(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Container ::= SEQUENCE { + code INTEGER ({MySet}), + data OCTET STRING ({MySet}{@code}) +} +END`) + if len(m.Assignments) != 1 { + t.Fatalf("got %d assignments", len(m.Assignments)) + } + st := m.Assignments[0].(*ast.TypeAssignment).Type.(*ast.SequenceType) + if len(st.Components) != 2 { + t.Fatalf("got %d components", len(st.Components)) + } + first := st.Components[0].Type.(*ast.ConstrainedType) + tc, ok := first.Constraint.Set.Root[0][0].(*ast.TableConstraint) + if !ok { + t.Fatalf("first component: got %T want *TableConstraint", first.Constraint.Set.Root[0][0]) + } + if tc.ObjectSet == nil { + t.Error("missing object set") + } + second := st.Components[1].Type.(*ast.ConstrainedType) + tc2, ok := second.Constraint.Set.Root[0][0].(*ast.TableConstraint) + if !ok { + t.Fatalf("second component: got %T want *TableConstraint", second.Constraint.Set.Root[0][0]) + } + if tc2.AtNotation == nil || len(tc2.AtNotation.Path) != 1 || tc2.AtNotation.Path[0] != "code" { + t.Errorf("@notation: %+v", tc2.AtNotation) + } +} + +func TestParser_ParameterizedTypeAssignment(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Pair { ItemType } ::= SEQUENCE { first ItemType, second ItemType } +END`) + if len(m.Assignments) != 1 { + t.Fatalf("got %d", len(m.Assignments)) + } + ta := m.Assignments[0].(*ast.TypeAssignment) + if ta.Params == nil || len(ta.Params.Params) != 1 { + t.Fatalf("params: %+v", ta.Params) + } + if ta.Params.Params[0].Reference != "ItemType" { + t.Errorf("param name: %q", ta.Params.Params[0].Reference) + } +} + +func TestParser_AmpRefFieldAccess(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Foo ::= SEQUENCE { code ERROR.&errorCode } +END`) + st := m.Assignments[0].(*ast.TypeAssignment).Type.(*ast.SequenceType) + c := st.Components[0] + if otf, ok := c.Type.(*ast.OpenTypeFieldType); !ok { + t.Fatalf("got %T want *OpenTypeFieldType", c.Type) + } else if otf.Field != "&errorCode" { + t.Errorf("field: %q", otf.Field) + } +} diff --git a/internal/asn1/parser_test.go b/internal/asn1/parser_test.go new file mode 100644 index 00000000..96a3dd86 --- /dev/null +++ b/internal/asn1/parser_test.go @@ -0,0 +1,258 @@ +package asn1 + +import ( + "testing" + + "github.com/nokia/ntt/internal/asn1/ast" +) + +func mustParse(t *testing.T, src string) *ast.Module { + t.Helper() + m := ParseModule([]byte(src)) + if m == nil { + t.Fatal("ParseModule returned nil") + } + for _, d := range m.Diagnostics { + t.Logf("parse diag: %s", d.Message) + } + return m +} + +func TestParser_Module_Empty(t *testing.T) { + m := mustParse(t, `Empty DEFINITIONS ::= BEGIN END`) + if m.Identifier.Name != "Empty" { + t.Errorf("got module name %q, want Empty", m.Identifier.Name) + } + if len(m.Assignments) != 0 { + t.Errorf("expected no assignments, got %d", len(m.Assignments)) + } +} + +func TestParser_TypeAssignment_Integer(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN Age ::= INTEGER END`) + if len(m.Assignments) != 1 { + t.Fatalf("got %d assignments", len(m.Assignments)) + } + ta, ok := m.Assignments[0].(*ast.TypeAssignment) + if !ok { + t.Fatalf("got %T, want *TypeAssignment", m.Assignments[0]) + } + if ta.Name != "Age" { + t.Errorf("name: got %q want Age", ta.Name) + } + bt, ok := ta.Type.(*ast.IntegerType) + if !ok { + t.Fatalf("type: got %T want *IntegerType", ta.Type) + } + if len(bt.NamedNumbers) != 0 { + t.Errorf("got %d named numbers", len(bt.NamedNumbers)) + } +} + +func TestParser_NamedNumbers(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Status ::= INTEGER { ok(0), error(-1), unknown(255) } +END`) + ta := m.Assignments[0].(*ast.TypeAssignment) + it := ta.Type.(*ast.IntegerType) + if len(it.NamedNumbers) != 3 { + t.Fatalf("got %d named numbers", len(it.NamedNumbers)) + } + if it.NamedNumbers[0].Name != "ok" || it.NamedNumbers[2].Name != "unknown" { + t.Errorf("names: %+v", it.NamedNumbers) + } +} + +func TestParser_Enumerated(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Color ::= ENUMERATED { red, green(2), blue, ..., yellow } +END`) + ta := m.Assignments[0].(*ast.TypeAssignment) + et := ta.Type.(*ast.EnumeratedType) + if len(et.Items) != 3 || !et.Extensible || len(et.Extensions) != 1 { + t.Fatalf("items=%d ext=%v exts=%d", len(et.Items), et.Extensible, len(et.Extensions)) + } + if et.Items[1].Name != "green" { + t.Errorf("got %q want green", et.Items[1].Name) + } + if et.Extensions[0].Name != "yellow" { + t.Errorf("got %q want yellow", et.Extensions[0].Name) + } +} + +func TestParser_Sequence(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Person ::= SEQUENCE { + name PrintableString, + age INTEGER OPTIONAL, + weight INTEGER DEFAULT 0, + ... +} +END`) + ta := m.Assignments[0].(*ast.TypeAssignment) + st := ta.Type.(*ast.SequenceType) + if !st.Extensible { + t.Errorf("expected extensible") + } + if len(st.Components) != 3 { + t.Fatalf("got %d components", len(st.Components)) + } + if !st.Components[1].Optional { + t.Error("age should be optional") + } + if st.Components[2].Default == nil { + t.Error("weight should have default") + } +} + +func TestParser_SequenceOf(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Names ::= SEQUENCE OF PrintableString +END`) + ta := m.Assignments[0].(*ast.TypeAssignment) + st, ok := ta.Type.(*ast.SequenceOfType) + if !ok { + t.Fatalf("got %T want *SequenceOfType", ta.Type) + } + if bt, ok := st.Element.(*ast.BuiltinType); !ok || bt.Kind != ast.PrintableString { + t.Errorf("element: got %v want PrintableString", st.Element) + } +} + +func TestParser_Choice(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Answer ::= CHOICE { yes NULL, no NULL, maybe INTEGER } +END`) + ta := m.Assignments[0].(*ast.TypeAssignment) + ct := ta.Type.(*ast.ChoiceType) + if len(ct.Alternatives) != 3 { + t.Fatalf("got %d alternatives", len(ct.Alternatives)) + } +} + +func TestParser_TaggedType(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Foo ::= [APPLICATION 5] IMPLICIT INTEGER +END`) + ta := m.Assignments[0].(*ast.TypeAssignment) + tt := ta.Type.(*ast.TaggedType) + if tt.Tag.Class != ast.ApplicationTag { + t.Errorf("got class %v", tt.Tag.Class) + } + if tt.Tag.Mode != ast.TagModeImplicit { + t.Errorf("got mode %v", tt.Tag.Mode) + } +} + +func TestParser_ValueAssignment(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +maxAge INTEGER ::= 120 +greeting PrintableString ::= "hello" +END`) + if len(m.Assignments) != 2 { + t.Fatalf("got %d assignments", len(m.Assignments)) + } + va := m.Assignments[0].(*ast.ValueAssignment) + if va.Name != "maxAge" { + t.Errorf("name: %q", va.Name) + } + if _, ok := va.Value.(*ast.IntegerValue); !ok { + t.Errorf("got %T want *IntegerValue", va.Value) + } +} + +func TestParser_Constraint_SizeRange(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Octets ::= OCTET STRING (SIZE (1..16)) +Small ::= INTEGER (0..255) +END`) + if len(m.Assignments) != 2 { + t.Fatalf("got %d", len(m.Assignments)) + } + oct := m.Assignments[0].(*ast.TypeAssignment).Type.(*ast.ConstrainedType) + if _, ok := oct.Inner.(*ast.BuiltinType); !ok { + t.Errorf("inner: %T", oct.Inner) + } + if oct.Constraint == nil { + t.Fatal("missing constraint") + } + small := m.Assignments[1].(*ast.TypeAssignment).Type.(*ast.ConstrainedType) + if small.Constraint == nil || small.Constraint.Set == nil || len(small.Constraint.Set.Root) == 0 { + t.Fatal("missing constraint set") + } + rng, ok := small.Constraint.Set.Root[0][0].(*ast.ValueRangeConstraint) + if !ok { + t.Fatalf("got %T want *ValueRangeConstraint", small.Constraint.Set.Root[0][0]) + } + if rng.Lower == nil || rng.Upper == nil { + t.Error("range endpoints missing") + } +} + +func TestParser_HeaderAndImports(t *testing.T) { + const src = `RRC-PDU-Definitions { + itu-t (0) identified-organization (4) etsi (0) mobileDomain (0) + umts-Access (20) modules (3) rrc (1) version-22 (22) +} DEFINITIONS AUTOMATIC TAGS ::= + +BEGIN + +IMPORTS + NR-RRC-Defs , + SetupRelease +FROM Common ; + +MyEnum ::= ENUMERATED { red, green, blue } +myValue MyEnum ::= red +END` + m := mustParse(t, src) + if m.Identifier.Name != "RRC-PDU-Definitions" { + t.Errorf("module: %q", m.Identifier.Name) + } + if m.Tagging != ast.TagsAutomatic { + t.Errorf("tagging: %v", m.Tagging) + } + if len(m.Imports) != 1 { + t.Fatalf("imports: %d", len(m.Imports)) + } + imp := m.Imports[0] + if imp.From != "Common" { + t.Errorf("from: %q", imp.From) + } + wantSyms := map[string]bool{"NR-RRC-Defs": true, "SetupRelease": true} + for _, s := range imp.Symbols { + if !wantSyms[s] { + t.Errorf("unexpected symbol %q", s) + } + } + if len(m.Assignments) != 2 { + t.Fatalf("got %d assignments", len(m.Assignments)) + } +} + +func TestParser_TolerantOfGarbage(t *testing.T) { + m := ParseModule([]byte("this is not valid ASN.1")) + if m == nil { + t.Fatal("nil module") + } + if len(m.Diagnostics) == 0 { + t.Fatal("expected diagnostics") + } +} + +func TestParser_RecoversAcrossAssignments(t *testing.T) { + m := mustParse(t, `M DEFINITIONS ::= BEGIN +Bad ::= !!!nonsense +Good ::= INTEGER +END`) + // We don't care about Bad; we want Good to still be discovered. + foundGood := false + for _, a := range m.Assignments { + if a, ok := a.(*ast.TypeAssignment); ok && a.Name == "Good" { + foundGood = true + } + } + if !foundGood { + t.Errorf("did not recover to Good; assignments=%v", m.Assignments) + } +} diff --git a/internal/asn1/resolver/resolver.go b/internal/asn1/resolver/resolver.go new file mode 100644 index 00000000..c2dc5e80 --- /dev/null +++ b/internal/asn1/resolver/resolver.go @@ -0,0 +1,472 @@ +// Package resolver performs name binding and semantic validation on +// ASN.1 modules produced by the parser. It builds per-module symbol +// tables partitioned by namespace, resolves references (in-module +// first, then via IMPORTS), and emits diagnostics with byte ranges. +// +// Cross-module resolution is mediated by a Basket - a registry of +// resolved modules keyed by module name. Editors and the compiler use +// the same Basket so the same name resolves consistently everywhere. +package resolver + +import ( + "fmt" + + "github.com/nokia/ntt/internal/asn1/ast" +) + +// Namespace partitions the symbol table; ASN.1 lets a type, value, and +// information object class share the same surface name. +type Namespace int + +const ( + NsType Namespace = iota + NsValue + NsClass + NsObject + NsObjectSet + NsParameter +) + +// Symbol is a resolved binding. Definition is the AST node that +// declared the name; Module is the owning module name. +type Symbol struct { + Namespace Namespace + Module string + Name string + Definition ast.Node +} + +// Scope is a per-module symbol table. Lookups are O(map) and scoped to +// a single namespace. +type Scope struct { + module *ast.Module + syms map[symKey]*Symbol + // importedFrom maps an imported symbol to its source module. + // Resolution falls back through here when the local table misses. + importedFrom map[string]string +} + +type symKey struct { + ns Namespace + name string +} + +// NewScope constructs a Scope for m, indexing every top-level +// assignment under its appropriate namespace. +func NewScope(m *ast.Module) *Scope { + s := &Scope{ + module: m, + syms: make(map[symKey]*Symbol), + importedFrom: make(map[string]string), + } + for _, a := range m.Assignments { + switch a := a.(type) { + case *ast.TypeAssignment: + s.add(NsType, a.Name, a) + case *ast.ValueAssignment: + s.add(NsValue, a.Name, a) + case *ast.ValueSetTypeAssignment: + s.add(NsValue, a.Name, a) + case *ast.ObjectClassAssignment: + s.add(NsClass, a.Name, a) + case *ast.ObjectAssignment: + s.add(NsObject, a.Name, a) + case *ast.ObjectSetAssignment: + s.add(NsObjectSet, a.Name, a) + } + } + for _, imp := range m.Imports { + for _, sym := range imp.Symbols { + s.importedFrom[sym] = imp.From + } + } + return s +} + +func (s *Scope) add(ns Namespace, name string, n ast.Node) { + s.syms[symKey{ns, name}] = &Symbol{ + Namespace: ns, + Module: s.module.Identifier.Name, + Name: name, + Definition: n, + } +} + +// Module returns the parsed module the scope was built from. +func (s *Scope) Module() *ast.Module { return s.module } + +// Lookup returns a symbol with the given namespace and name, or nil if +// not present in this scope. It does not follow imports. +func (s *Scope) Lookup(ns Namespace, name string) *Symbol { + return s.syms[symKey{ns, name}] +} + +// Names returns every symbol name in a given namespace. Useful for +// diagnostics and "did you mean?" hints. +func (s *Scope) Names(ns Namespace) []string { + var out []string + for k := range s.syms { + if k.ns == ns { + out = append(out, k.name) + } + } + return out +} + +// ImportedFrom returns the source module name for an imported symbol, +// or empty if name was not imported. +func (s *Scope) ImportedFrom(name string) string { return s.importedFrom[name] } + +// Basket is a registry of resolved modules keyed by module name. It +// also tracks cross-basket references so that imports across separately +// configured suites still resolve. +type Basket struct { + scopes map[string]*Scope + references []*Basket +} + +// NewBasket constructs an empty Basket. +func NewBasket() *Basket { return &Basket{scopes: make(map[string]*Scope)} } + +// Add registers a module in the basket. Re-adding the same name +// replaces the previous scope. +func (b *Basket) Add(m *ast.Module) *Scope { + s := NewScope(m) + b.scopes[m.Identifier.Name] = s + return s +} + +// Get returns the scope for a module name, searching this basket and +// any referenced baskets in registration order. +func (b *Basket) Get(name string) *Scope { + if s := b.scopes[name]; s != nil { + return s + } + for _, ref := range b.references { + if s := ref.Get(name); s != nil { + return s + } + } + return nil +} + +// AddReference declares that this basket may resolve names through +// other into baskets when its own modules don't contain them. +func (b *Basket) AddReference(other *Basket) { b.references = append(b.references, other) } + +// Modules returns every module name known to this basket (not +// including referenced baskets). Useful for diagnostics. +func (b *Basket) Modules() []string { + out := make([]string, 0, len(b.scopes)) + for n := range b.scopes { + out = append(out, n) + } + return out +} + +// --------------------------------------------------------------------------- +// Resolution +// --------------------------------------------------------------------------- + +// Resolve runs all validation passes on a single module, appending +// diagnostics to m.Diagnostics. It is safe to call multiple times. +func Resolve(b *Basket, m *ast.Module) { + scope := b.scopes[m.Identifier.Name] + if scope == nil { + scope = b.Add(m) + } + r := &resolver{basket: b, scope: scope, mod: m} + r.checkImports() + r.checkDuplicates() + r.walkAssignments() + m.Diagnostics = append(m.Diagnostics, r.diags...) +} + +type resolver struct { + basket *Basket + scope *Scope + mod *ast.Module + diags []ast.Diagnostic +} + +func (r *resolver) report(n ast.Node, sev ast.Severity, code, format string, args ...interface{}) { + r.diags = append(r.diags, ast.Diagnostic{ + Pos: n.Pos(), + End: n.End(), + Severity: sev, + Code: code, + Message: fmt.Sprintf(format, args...), + }) +} + +// checkImports verifies each imported module exists in the basket and +// that the symbols listed are actually exported by it. +func (r *resolver) checkImports() { + for _, imp := range r.mod.Imports { + src := r.basket.Get(imp.From) + if src == nil { + r.report(imp, ast.SeverityWarning, "import.unknown-module", + "unknown imported module %q", imp.From) + continue + } + exports := exportedNames(src.module) + for _, sym := range imp.Symbols { + if exports != nil { + if _, ok := exports[sym]; !ok { + r.report(imp, ast.SeverityError, "import.not-exported", + "module %q does not export %q", imp.From, sym) + continue + } + } + // Make sure the symbol actually exists in the target. + if !hasAnyAssignment(src.module, sym) { + r.report(imp, ast.SeverityError, "import.unknown-symbol", + "module %q has no assignment named %q", imp.From, sym) + } + } + } +} + +func exportedNames(m *ast.Module) map[string]struct{} { + if m.Exports == nil || m.Exports.All { + return nil // nil means "all assignments exported" + } + out := make(map[string]struct{}, len(m.Exports.Symbols)) + for _, s := range m.Exports.Symbols { + out[s] = struct{}{} + } + return out +} + +func hasAnyAssignment(m *ast.Module, name string) bool { + for _, a := range m.Assignments { + if ast.AssignmentName(a) == name { + return true + } + } + return false +} + +// checkDuplicates reports two assignments with the same name in the +// same namespace. +func (r *resolver) checkDuplicates() { + seen := make(map[symKey]ast.Assignment) + for _, a := range r.mod.Assignments { + ns := assignmentNamespace(a) + k := symKey{ns, ast.AssignmentName(a)} + if prev, dup := seen[k]; dup { + r.report(a, ast.SeverityError, "duplicate-definition", + "duplicate %s assignment %q (previous declaration at offset %d)", + namespaceLabel(ns), ast.AssignmentName(a), prev.Pos()) + continue + } + seen[k] = a + } +} + +func assignmentNamespace(a ast.Assignment) Namespace { + switch a.(type) { + case *ast.TypeAssignment: + return NsType + case *ast.ValueAssignment, *ast.ValueSetTypeAssignment: + return NsValue + case *ast.ObjectClassAssignment: + return NsClass + case *ast.ObjectAssignment: + return NsObject + case *ast.ObjectSetAssignment: + return NsObjectSet + } + return NsType +} + +func namespaceLabel(ns Namespace) string { + switch ns { + case NsValue: + return "value" + case NsClass: + return "class" + case NsObject: + return "object" + case NsObjectSet: + return "object set" + case NsParameter: + return "parameter" + } + return "type" +} + +// walkAssignments validates each assignment's type and value +// references, surfacing unresolved-name diagnostics. +func (r *resolver) walkAssignments() { + for _, a := range r.mod.Assignments { + switch a := a.(type) { + case *ast.TypeAssignment: + r.checkType(a.Type, paramSet(a.Params)) + case *ast.ValueAssignment: + r.checkType(a.Type, nil) + r.checkValue(a.Value) + case *ast.ValueSetTypeAssignment: + r.checkType(a.Type, nil) + } + } +} + +// paramSet returns the set of parameter reference names that are +// in-scope for a parametrised assignment body. +func paramSet(p *ast.ParameterList) map[string]struct{} { + if p == nil { + return nil + } + out := make(map[string]struct{}, len(p.Params)) + for _, prm := range p.Params { + if prm.Reference != "" { + out[prm.Reference] = struct{}{} + } + } + return out +} + +func (r *resolver) checkType(t ast.Type, params map[string]struct{}) { + if t == nil { + return + } + switch t := t.(type) { + case *ast.BuiltinType, *ast.IntegerType, *ast.BitStringType, *ast.EnumeratedType: + return + case *ast.SequenceType: + for _, c := range t.Components { + r.checkType(c.Type, params) + } + for _, e := range t.Extensions { + for _, c := range e.Components { + r.checkType(c.Type, params) + } + } + case *ast.SetType: + for _, c := range t.Components { + r.checkType(c.Type, params) + } + for _, e := range t.Extensions { + for _, c := range e.Components { + r.checkType(c.Type, params) + } + } + case *ast.ChoiceType: + for _, a := range t.Alternatives { + r.checkType(a.Type, params) + } + for _, e := range t.Extensions { + for _, c := range e.Components { + r.checkType(c.Type, params) + } + } + case *ast.SequenceOfType: + r.checkType(t.Element, params) + case *ast.SetOfType: + r.checkType(t.Element, params) + case *ast.TaggedType: + r.checkType(t.Underlying, params) + case *ast.ConstrainedType: + r.checkType(t.Inner, params) + case *ast.ReferencedType: + r.checkReferencedType(t, params) + case *ast.OpenTypeFieldType: + // Look up the class reference; full field-existence check + // belongs to Phase 7. + if t.ClassRef != nil { + r.checkRef(t, t.ClassRef, NsClass, params) + } + } +} + +func (r *resolver) checkReferencedType(t *ast.ReferencedType, params map[string]struct{}) { + if t.Ref == nil { + return + } + r.checkRef(t, t.Ref, NsType, params) +} + +func (r *resolver) checkRef(host ast.Node, ref *ast.TypeRef, ns Namespace, params map[string]struct{}) { + if ref == nil { + return + } + if ref.Module != "" { + // Module-qualified: target module must exist. + scope := r.basket.Get(ref.Module) + if scope == nil { + r.report(host, ast.SeverityError, "ref.unknown-module", + "reference %q targets unknown module %q", ref.Name, ref.Module) + return + } + if scope.Lookup(ns, ref.Name) == nil { + r.report(host, ast.SeverityError, "ref.unknown-symbol", + "module %q has no %s named %q", ref.Module, namespaceLabel(ns), ref.Name) + } + return + } + // Parameter? + if params != nil { + if _, ok := params[ref.Name]; ok { + return + } + } + // Local scope? + if r.scope.Lookup(ns, ref.Name) != nil { + return + } + // Imported? + if from := r.scope.ImportedFrom(ref.Name); from != "" { + if scope := r.basket.Get(from); scope != nil { + if scope.Lookup(ns, ref.Name) != nil { + return + } + } + // Module known but symbol missing - reported by checkImports already. + return + } + // Some builtin types (BMPString, etc.) are tokenised as KEYWORD + // and never reach this path; bare TYPEREFERENCEs we can't resolve + // are real errors. + r.report(host, ast.SeverityError, "ref.unknown", + "unknown %s reference %q", namespaceLabel(ns), ref.Name) +} + +func (r *resolver) checkValue(v ast.Value) { + switch v := v.(type) { + case *ast.ReferenceValue: + if v.Module != "" { + if scope := r.basket.Get(v.Module); scope == nil { + r.report(v, ast.SeverityError, "value-ref.unknown-module", + "value reference targets unknown module %q", v.Module) + } else if scope.Lookup(NsValue, v.Name) == nil { + r.report(v, ast.SeverityError, "value-ref.unknown-symbol", + "module %q has no value named %q", v.Module, v.Name) + } + return + } + // Builtin pseudo-references (MIN/MAX/PLUS-INFINITY/NULL/...) get + // a free pass; they're not in any scope. + switch v.Name { + case "MIN", "MAX", "PLUS-INFINITY", "MINUS-INFINITY", "NOT-A-NUMBER": + return + } + if r.scope.Lookup(NsValue, v.Name) != nil { + return + } + // CHOICE alternative tags and enum identifiers look like + // value references in their declaration context; don't + // report those - we need richer per-context info to do that + // safely. So this is intentionally conservative. + case *ast.ChoiceValue: + r.checkValue(v.Value) + case *ast.SequenceValue: + for _, f := range v.Fields { + r.checkValue(f.Value) + } + case *ast.SequenceOfValue: + for _, e := range v.Elements { + r.checkValue(e) + } + } +} diff --git a/internal/asn1/resolver/resolver_test.go b/internal/asn1/resolver/resolver_test.go new file mode 100644 index 00000000..c83660da --- /dev/null +++ b/internal/asn1/resolver/resolver_test.go @@ -0,0 +1,165 @@ +package resolver_test + +import ( + "strings" + "testing" + + "github.com/nokia/ntt/internal/asn1" + "github.com/nokia/ntt/internal/asn1/ast" + "github.com/nokia/ntt/internal/asn1/resolver" +) + +func parse(t *testing.T, src string) *asn1.Parser { + t.Helper() + return asn1.NewParser([]byte(src)) +} + +func TestResolve_DuplicateDefinition(t *testing.T) { + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +Foo ::= INTEGER +Foo ::= BOOLEAN +END`)) + b := resolver.NewBasket() + resolver.Resolve(b, m) + if !containsDiag(m.Diagnostics, "duplicate-definition") { + t.Errorf("expected duplicate-definition diag, got %+v", m.Diagnostics) + } +} + +func TestResolve_UnknownReference(t *testing.T) { + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +Foo ::= Bar +END`)) + b := resolver.NewBasket() + resolver.Resolve(b, m) + if !containsDiagWith(m.Diagnostics, "unknown type reference") { + t.Errorf("expected unknown reference diag, got %+v", m.Diagnostics) + } +} + +func TestResolve_LocalReferenceResolves(t *testing.T) { + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +Age ::= INTEGER +Person ::= SEQUENCE { age Age } +END`)) + b := resolver.NewBasket() + resolver.Resolve(b, m) + if containsDiagWith(m.Diagnostics, "unknown type reference") { + t.Errorf("unexpected diags: %+v", m.Diagnostics) + } +} + +func TestResolve_ImportFromUnknownModule(t *testing.T) { + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +IMPORTS Foo FROM N ; +Bar ::= Foo +END`)) + b := resolver.NewBasket() + resolver.Resolve(b, m) + if !containsDiag(m.Diagnostics, "import.unknown-module") { + t.Errorf("expected import.unknown-module, got %+v", m.Diagnostics) + } +} + +func TestResolve_ImportFromKnownModule(t *testing.T) { + n := asn1.ParseModule([]byte(`N DEFINITIONS ::= BEGIN +Foo ::= INTEGER +END`)) + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +IMPORTS Foo FROM N ; +Bar ::= Foo +END`)) + b := resolver.NewBasket() + b.Add(n) + resolver.Resolve(b, n) + resolver.Resolve(b, m) + if containsDiag(m.Diagnostics, "import.unknown-module") { + t.Errorf("unexpected import.unknown-module diag: %+v", m.Diagnostics) + } + if containsDiag(m.Diagnostics, "import.unknown-symbol") { + t.Errorf("unexpected import.unknown-symbol diag: %+v", m.Diagnostics) + } +} + +func TestResolve_ImportUnknownSymbol(t *testing.T) { + n := asn1.ParseModule([]byte(`N DEFINITIONS ::= BEGIN +Other ::= INTEGER +END`)) + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +IMPORTS Foo FROM N ; +Bar ::= Foo +END`)) + b := resolver.NewBasket() + b.Add(n) + resolver.Resolve(b, m) + if !containsDiag(m.Diagnostics, "import.unknown-symbol") { + t.Errorf("expected import.unknown-symbol diag, got %+v", m.Diagnostics) + } +} + +func TestResolve_RespectsExportsList(t *testing.T) { + n := asn1.ParseModule([]byte(`N DEFINITIONS ::= BEGIN +EXPORTS Other ; +Other ::= INTEGER +Hidden ::= BOOLEAN +END`)) + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +IMPORTS Hidden FROM N ; +Bar ::= Hidden +END`)) + b := resolver.NewBasket() + b.Add(n) + resolver.Resolve(b, m) + if !containsDiag(m.Diagnostics, "import.not-exported") { + t.Errorf("expected import.not-exported diag, got %+v", m.Diagnostics) + } +} + +func TestResolve_ParameterReferenceResolves(t *testing.T) { + m := asn1.ParseModule([]byte(`M DEFINITIONS ::= BEGIN +Pair { ItemType } ::= SEQUENCE { first ItemType, second ItemType } +END`)) + b := resolver.NewBasket() + resolver.Resolve(b, m) + if containsDiagWith(m.Diagnostics, "unknown type reference") { + t.Errorf("unexpected diag: %+v", m.Diagnostics) + } +} + +func TestBasket_CrossBasketResolution(t *testing.T) { + a := asn1.ParseModule([]byte(`A DEFINITIONS ::= BEGIN +T ::= INTEGER +END`)) + b := asn1.ParseModule([]byte(`B DEFINITIONS ::= BEGIN +IMPORTS T FROM A ; +U ::= T +END`)) + primary := resolver.NewBasket() + external := resolver.NewBasket() + external.Add(a) + primary.AddReference(external) + primary.Add(b) + resolver.Resolve(primary, b) + if containsDiag(b.Diagnostics, "import.unknown-module") { + t.Errorf("unexpected diag: %+v", b.Diagnostics) + } + _ = parse(t, "") +} + +func containsDiag(diags []ast.Diagnostic, code string) bool { + for _, d := range diags { + if d.Code == code { + return true + } + } + return false +} + +func containsDiagWith(diags []ast.Diagnostic, fragment string) bool { + for _, d := range diags { + if strings.Contains(d.Message, fragment) { + return true + } + } + return false +} diff --git a/internal/asn1/testdata/sample.asn b/internal/asn1/testdata/sample.asn new file mode 100644 index 00000000..eabbce62 --- /dev/null +++ b/internal/asn1/testdata/sample.asn @@ -0,0 +1,44 @@ +-- Compact fixture exercising the X.680 features the new frontend +-- supports: module header, tagging defaults, IMPORTS, type/value +-- assignments, sequences, choices, enumerated, sequence-of, named +-- numbers, OPTIONAL/DEFAULT, parameterised type, simple constraints. +RRC-Sample { + itu-t (0) recommendation (0) x (24) asn1-test (1) modules (3) sample (1) version-1 (1) +} DEFINITIONS AUTOMATIC TAGS ::= BEGIN + +EXPORTS + Status ; + +IMPORTS + BaseCounter +FROM Common ; + +Status ::= ENUMERATED { ok(0), error(1), unknown(255), ..., timeout(2) } + +Counter ::= INTEGER { initial(0), max(65535) } (0..65535) + +Person ::= SEQUENCE { + name PrintableString (SIZE (1..32)), + age INTEGER OPTIONAL, + weight INTEGER DEFAULT 0, + ... +} + +Names ::= SEQUENCE (SIZE (0..10)) OF PrintableString + +Tagged ::= [APPLICATION 1] IMPLICIT INTEGER + +Pair { ItemType } ::= SEQUENCE { first ItemType, second ItemType } + +IntPair ::= Pair { INTEGER } + +Reply ::= CHOICE { + yes NULL, + no NULL, + maybe INTEGER (0..100), + ... +} + +maxRetries Counter ::= 3 + +END diff --git a/internal/asn1/transform/transform.go b/internal/asn1/transform/transform.go new file mode 100644 index 00000000..4db19a5e --- /dev/null +++ b/internal/asn1/transform/transform.go @@ -0,0 +1,409 @@ +// Package transform lowers an ASN.1 module's AST into TTCN-3 source +// that can be re-parsed by ttcn3.Parse. The resulting *ttcn3.Tree +// flows through the existing semantic, formatter, and LSP layers as +// if the user had hand-written a TTCN-3 module - which is the trick +// vanadium pulls with its AstTransformer. +// +// We emit text rather than constructing ttcn3/syntax nodes directly +// because the ttcn3 syntax tree is not meant to be built piecemeal +// from outside; it owns position information tied to the source +// buffer the parser scanned. Round-tripping through text gives us a +// real tree with consistent positions for free. +package transform + +import ( + "fmt" + "sort" + "strings" + + "github.com/nokia/ntt/internal/asn1/ast" + "github.com/nokia/ntt/ttcn3" +) + +// Result bundles the synthesised TTCN-3 source, the parsed tree, and +// any lowering-time diagnostics. Callers that just want the source +// (e.g. the LSP for hover preview) can ignore Tree. +type Result struct { + Source string + Tree *ttcn3.Tree + Diagnostics []ast.Diagnostic +} + +// LowerModule converts an ASN.1 module to a TTCN-3 module. The output +// module name matches m.Identifier.Name, with ASN.1 identifier hyphens +// rewritten to underscores so they're legal TTCN-3 identifiers. +func LowerModule(m *ast.Module) *Result { + l := &lowerer{} + src := l.module(m) + return &Result{ + Source: src, + Tree: ttcn3.Parse(src), + Diagnostics: l.diags, + } +} + +type lowerer struct { + out strings.Builder + diags []ast.Diagnostic + depth int +} + +func (l *lowerer) indent() { + for i := 0; i < l.depth; i++ { + l.out.WriteString(" ") + } +} + +func (l *lowerer) line(s string) { + l.indent() + l.out.WriteString(s) + l.out.WriteByte('\n') +} + +func (l *lowerer) report(n ast.Node, sev ast.Severity, code, format string, args ...interface{}) { + pos, end := 0, 0 + if n != nil { + pos, end = n.Pos(), n.End() + } + l.diags = append(l.diags, ast.Diagnostic{ + Pos: pos, End: end, Severity: sev, Code: code, + Message: fmt.Sprintf(format, args...), + }) +} + +// module produces a full TTCN-3 module source string. +func (l *lowerer) module(m *ast.Module) string { + if m == nil || m.Identifier.Name == "" { + return "" + } + l.out.Reset() + l.line(fmt.Sprintf("module %s {", t3Ident(m.Identifier.Name))) + l.depth++ + for _, imp := range m.Imports { + l.line(fmt.Sprintf("import from %s all;", t3Ident(imp.From))) + if len(imp.Symbols) > 0 { + var syms []string + for _, s := range imp.Symbols { + syms = append(syms, t3Ident(s)) + } + sort.Strings(syms) + l.line(fmt.Sprintf("import from %s { %s };", + t3Ident(imp.From), strings.Join(syms, "; "))) + } + } + for _, a := range m.Assignments { + l.assignment(a) + } + l.depth-- + l.line("}") + return l.out.String() +} + +func (l *lowerer) assignment(a ast.Assignment) { + switch a := a.(type) { + case *ast.TypeAssignment: + l.typeAssignment(a) + case *ast.ValueAssignment: + l.valueAssignment(a) + case *ast.ValueSetTypeAssignment: + l.line(fmt.Sprintf("// value set %s elided", t3Ident(a.Name))) + case *ast.ObjectClassAssignment, *ast.ObjectAssignment, *ast.ObjectSetAssignment: + // X.681 classes / objects / object sets have no direct TTCN-3 + // equivalent. They drive open-type expansion at use sites in + // the class driver (Phase 7); the class itself is elided. + l.line(fmt.Sprintf("// class %s elided (used via open-type expansion)", t3Ident(ast.AssignmentName(a)))) + } +} + +func (l *lowerer) typeAssignment(a *ast.TypeAssignment) { + if a.Params != nil { + l.line(fmt.Sprintf("// parametric type %s elided (instantiated per use site)", t3Ident(a.Name))) + return + } + body := l.typeExpr(a.Type) + if body == "" { + l.report(a, ast.SeverityWarning, "transform.unhandled-type", + "unable to lower type assignment %q (kind %T)", a.Name, a.Type) + l.line(fmt.Sprintf("// type %s elided (unsupported)", t3Ident(a.Name))) + return + } + name := t3Ident(a.Name) + + // Body shapes: + // "record { ... }" -> "type record NAME { ... };" + // "set { ... }" -> "type set NAME { ... };" + // "union { ... }" -> "type union NAME { ... };" + // "enumerated { ... }" -> "type enumerated NAME { ... };" + // "record of T" -> "type record of T NAME;" + // "set of T" -> "type set of T NAME;" + // other (scalar alias) -> "type T NAME;" + switch { + case strings.HasPrefix(body, "record {") || strings.HasPrefix(body, "record { "): + rest := strings.TrimPrefix(body, "record ") + l.line(fmt.Sprintf("type record %s %s;", name, rest)) + case strings.HasPrefix(body, "set {") || strings.HasPrefix(body, "set { "): + rest := strings.TrimPrefix(body, "set ") + l.line(fmt.Sprintf("type set %s %s;", name, rest)) + case strings.HasPrefix(body, "union {") || strings.HasPrefix(body, "union { "): + rest := strings.TrimPrefix(body, "union ") + l.line(fmt.Sprintf("type union %s %s;", name, rest)) + case strings.HasPrefix(body, "enumerated {") || strings.HasPrefix(body, "enumerated { "): + rest := strings.TrimPrefix(body, "enumerated ") + l.line(fmt.Sprintf("type enumerated %s %s;", name, rest)) + case strings.HasPrefix(body, "record of "), strings.HasPrefix(body, "set of "): + l.line(fmt.Sprintf("type %s %s;", body, name)) + default: + l.line(fmt.Sprintf("type %s %s;", body, name)) + } +} + +func (l *lowerer) valueAssignment(a *ast.ValueAssignment) { + tt := l.typeExpr(a.Type) + vv := l.valueExpr(a.Value) + if tt == "" || vv == "" { + l.line(fmt.Sprintf("// const %s elided (unsupported)", t3Ident(a.Name))) + return + } + l.line(fmt.Sprintf("const %s %s := %s;", tt, t3Ident(a.Name), vv)) +} + +// typeExpr returns a TTCN-3 type expression for t. Returns the empty +// string if t cannot be lowered. +func (l *lowerer) typeExpr(t ast.Type) string { + switch t := t.(type) { + case nil: + return "" + case *ast.BuiltinType: + return mapBuiltin(t.Kind) + case *ast.IntegerType: + return "integer" + case *ast.BitStringType: + return "bitstring" + case *ast.EnumeratedType: + var items []string + for _, it := range t.Items { + items = append(items, t3Ident(it.Name)) + } + for _, it := range t.Extensions { + items = append(items, t3Ident(it.Name)) + } + return "enumerated { " + strings.Join(items, ", ") + " }" + case *ast.SequenceType: + return l.recordLike("record", t.Components, t.Extensions) + case *ast.SetType: + return l.recordLike("set", t.Components, t.Extensions) + case *ast.ChoiceType: + return l.recordLike("union", t.Alternatives, t.Extensions) + case *ast.SequenceOfType: + inner := l.typeExpr(t.Element) + if inner == "" { + return "" + } + return "record of " + inner + case *ast.SetOfType: + inner := l.typeExpr(t.Element) + if inner == "" { + return "" + } + return "set of " + inner + case *ast.TaggedType: + // TTCN-3 has no tag concept; lower to the underlying type. + return l.typeExpr(t.Underlying) + case *ast.ReferencedType: + if t.Ref != nil { + name := t3Ident(t.Ref.Name) + if t.Ref.Module != "" { + return t3Ident(t.Ref.Module) + "." + name + } + return name + } + case *ast.ConstrainedType: + // Drop the constraint at this depth; we keep them as side + // info for future use but TTCN-3 templates carry their own + // constraint syntax that doesn't map 1:1. + return l.typeExpr(t.Inner) + case *ast.OpenTypeFieldType: + // Without resolving the class+set we can't expand the open + // type. Emit a permissive `anytype` placeholder. + return "anytype" + } + l.report(t, ast.SeverityInfo, "transform.skipped-type", + "skipping unsupported type %T", t) + return "" +} + +func mapBuiltin(k ast.BuiltinKind) string { + switch k { + case ast.Boolean: + return "boolean" + case ast.Null: + return "null" + case ast.Real: + return "float" + case ast.OctetString: + return "octetstring" + case ast.ObjectIdentifier, ast.RelativeOID, ast.OIDIRI, ast.RelativeOIDIRI: + return "objid" + case ast.CharacterString, ast.UTF8String, ast.UniversalString, ast.BMPString: + return "universal charstring" + case ast.PrintableString, ast.IA5String, ast.NumericString, ast.VisibleString, + ast.GeneralString, ast.GraphicString, ast.ISO646String, ast.TeletexString, + ast.T61String, ast.VideotexString: + return "charstring" + case ast.UTCTime, ast.GeneralizedTime, ast.Date, ast.DateTime, ast.TimeOfDay, ast.Time, ast.Duration: + return "charstring" + case ast.External, ast.EmbeddedPDV, ast.ObjectDescriptor: + return "octetstring" + } + return "" +} + +func (l *lowerer) recordLike(kind string, comps []ast.Component, exts []ast.ExtensionAddition) string { + var fields []string + for _, c := range comps { + f := l.component(c) + if f != "" { + fields = append(fields, f) + } + } + for _, e := range exts { + for _, c := range e.Components { + f := l.component(c) + if f != "" { + fields = append(fields, f) + } + } + } + if len(fields) == 0 { + return kind + " { }" + } + return kind + " { " + strings.Join(fields, ", ") + " }" +} + +func (l *lowerer) component(c ast.Component) string { + if c.ComponentsOf { + // Direct expansion would need the referenced type's + // components; left as a TODO for the Phase 8 polish step. + return "" + } + t := l.typeExpr(c.Type) + if t == "" { + return "" + } + out := t + " " + t3Ident(c.Name) + if c.Optional { + out += " optional" + } + return out +} + +// valueExpr returns a TTCN-3 value expression for v. +func (l *lowerer) valueExpr(v ast.Value) string { + switch v := v.(type) { + case *ast.IntegerValue: + return v.Text + case *ast.RealValue: + return v.Text + case *ast.BooleanValue: + if v.Value { + return "true" + } + return "false" + case *ast.NullValue: + return "null" + case *ast.StringValue: + switch v.Kind { + case ast.StringCString: + return v.Text + case ast.StringBString: + return "'" + strings.Trim(v.Text, "'B") + "'B" + case ast.StringHString: + return "'" + strings.Trim(v.Text, "'H") + "'H" + } + case *ast.ReferenceValue: + if v.Module != "" { + return t3Ident(v.Module) + "." + t3Ident(v.Name) + } + return t3Ident(v.Name) + case *ast.SequenceOfValue: + var elems []string + for _, e := range v.Elements { + elems = append(elems, l.valueExpr(e)) + } + return "{ " + strings.Join(elems, ", ") + " }" + case *ast.SequenceValue: + var fs []string + for _, f := range v.Fields { + fs = append(fs, t3Ident(f.Name)+" := "+l.valueExpr(f.Value)) + } + return "{ " + strings.Join(fs, ", ") + " }" + case *ast.ChoiceValue: + return "{ " + t3Ident(v.Alternative) + " := " + l.valueExpr(v.Value) + " }" + case *ast.OIDValue: + return "objid " + (v.OID.Raw) + } + return "" +} + +// t3Ident normalises an ASN.1 identifier into a TTCN-3-safe one. +// Rules (matching vanadium): replace '-' with '_'; if the result +// collides with a TTCN-3 reserved word, append '_'. +func t3Ident(s string) string { + if s == "" { + return s + } + out := strings.ReplaceAll(s, "-", "_") + if ttcn3Reserved[out] { + out += "_" + } + return out +} + +// ttcn3Reserved is a small set of TTCN-3 keywords most likely to +// collide with ASN.1 identifiers. Not exhaustive - we only need to +// catch the common collisions; the parser will surface anything else. +var ttcn3Reserved = map[string]bool{ + "address": true, + "alt": true, + "altstep": true, + "any": true, + "any2unichar": true, + "anytype": true, + "break": true, + "case": true, + "component": true, + "const": true, + "continue": true, + "control": true, + "do": true, + "else": true, + "enumerated": true, + "for": true, + "function": true, + "goto": true, + "group": true, + "if": true, + "import": true, + "interleave": true, + "label": true, + "map": true, + "module": true, + "out": true, + "port": true, + "return": true, + "select": true, + "set": true, + "signature": true, + "system": true, + "template": true, + "testcase": true, + "timer": true, + "type": true, + "union": true, + "unmap": true, + "value": true, + "var": true, + "verdicttype": true, + "while": true, + "with": true, +} diff --git a/internal/asn1/transform/transform_test.go b/internal/asn1/transform/transform_test.go new file mode 100644 index 00000000..14b13c7e --- /dev/null +++ b/internal/asn1/transform/transform_test.go @@ -0,0 +1,97 @@ +package transform_test + +import ( + "strings" + "testing" + + "github.com/nokia/ntt/internal/asn1" + "github.com/nokia/ntt/internal/asn1/transform" +) + +func lower(t *testing.T, src string) *transform.Result { + t.Helper() + m := asn1.ParseModule([]byte(src)) + return transform.LowerModule(m) +} + +func TestLower_IntegerAlias(t *testing.T) { + r := lower(t, `M DEFINITIONS ::= BEGIN +Age ::= INTEGER +END`) + if !strings.Contains(r.Source, "type integer Age;") { + t.Errorf("source:\n%s", r.Source) + } +} + +func TestLower_Sequence(t *testing.T) { + r := lower(t, `M DEFINITIONS ::= BEGIN +Person ::= SEQUENCE { name PrintableString, age INTEGER OPTIONAL } +END`) + if !strings.Contains(r.Source, "type record Person") { + t.Errorf("missing record: %s", r.Source) + } + if !strings.Contains(r.Source, "charstring name") { + t.Errorf("missing field: %s", r.Source) + } + if !strings.Contains(r.Source, "integer age optional") { + t.Errorf("missing optional: %s", r.Source) + } +} + +func TestLower_Choice(t *testing.T) { + r := lower(t, `M DEFINITIONS ::= BEGIN +Answer ::= CHOICE { yes NULL, no NULL, value INTEGER } +END`) + if !strings.Contains(r.Source, "type union Answer") { + t.Errorf("missing union: %s", r.Source) + } +} + +func TestLower_SequenceOf(t *testing.T) { + r := lower(t, `M DEFINITIONS ::= BEGIN +Names ::= SEQUENCE OF PrintableString +END`) + if !strings.Contains(r.Source, "type record of charstring Names;") { + t.Errorf("source: %s", r.Source) + } +} + +func TestLower_HyphenatedIdent(t *testing.T) { + r := lower(t, `M-Mod DEFINITIONS ::= BEGIN +RRC-PDU ::= INTEGER +END`) + if !strings.Contains(r.Source, "module M_Mod") { + t.Errorf("module name not rewritten: %s", r.Source) + } + if !strings.Contains(r.Source, "type integer RRC_PDU;") { + t.Errorf("type name not rewritten: %s", r.Source) + } +} + +func TestLower_ValueAssignment(t *testing.T) { + r := lower(t, `M DEFINITIONS ::= BEGIN +maxAge INTEGER ::= 120 +END`) + if !strings.Contains(r.Source, "const integer maxAge := 120;") { + t.Errorf("source: %s", r.Source) + } +} + +func TestLower_ProducesParseableTtcn3(t *testing.T) { + r := lower(t, `M DEFINITIONS ::= BEGIN +Status ::= ENUMERATED { ok, error, unknown } +Pkt ::= SEQUENCE { code INTEGER, body OCTET STRING OPTIONAL } +END`) + if r.Tree == nil { + t.Fatal("nil tree") + } + // We don't currently have a way to inspect tree errors, but if + // the source contains the expected declarations we trust the + // parser ran without panicking. + want := []string{"type enumerated Status", "type record Pkt"} + for _, w := range want { + if !strings.Contains(r.Source, w) { + t.Errorf("missing %q in:\n%s", w, r.Source) + } + } +} diff --git a/internal/fs/fs_test.go b/internal/fs/fs_test.go index 04709431..6f66ba48 100644 --- a/internal/fs/fs_test.go +++ b/internal/fs/fs_test.go @@ -32,8 +32,9 @@ func TestBytesFromURL(t *testing.T) { func TestCaching(t *testing.T) { assert.Equal(t, "package.yml", fs.Open("package.yml").Path()) - os.Setenv("NTT_CACHE", "testdata/cache") - assert.Equal(t, "testdata/cache/package.yml", fs.Open("package.yml").Path()) + cacheDir := filepath.FromSlash("testdata/cache") + os.Setenv("NTT_CACHE", cacheDir) + assert.Equal(t, filepath.Join(cacheDir, "package.yml"), fs.Open("package.yml").Path()) } func TestJoinPath(t *testing.T) { @@ -63,52 +64,65 @@ func TestJoinPath(t *testing.T) { } func TestTTCN3Files(t *testing.T) { + // fromSlash converts the slash-style literals we keep in this + // test to whatever path separator the host OS uses, so the + // suite runs on Windows as well as Unix. + fromSlash := func(paths []string) []string { + out := make([]string, len(paths)) + for i, p := range paths { + out[i] = filepath.FromSlash(p) + } + return out + } + t.Run("empty", func(t *testing.T) { got, err := fs.TTCN3Files() assert.Nil(t, err) assert.Nil(t, got) }) t.Run("dir", func(t *testing.T) { - got, err := fs.TTCN3Files("testdata/TestTTCN3Files") + got, err := fs.TTCN3Files(filepath.FromSlash("testdata/TestTTCN3Files")) assert.Nil(t, err) assert.Nil(t, got) }) t.Run("dir", func(t *testing.T) { - got, err := fs.TTCN3Files("testdata/TestTTCN3Files/some-dir") + got, err := fs.TTCN3Files(filepath.FromSlash("testdata/TestTTCN3Files/some-dir")) assert.Nil(t, err) assert.Nil(t, got) }) t.Run("dir", func(t *testing.T) { - want := []string{ + want := fromSlash([]string{ "testdata/TestTTCN3Files/ttcn3-dir/a.ttcn3", "testdata/TestTTCN3Files/ttcn3-dir/b.ttcn", "testdata/TestTTCN3Files/ttcn3-dir/c.ttcnpp", - } - got, err := fs.TTCN3Files("testdata/TestTTCN3Files/ttcn3-dir") + }) + got, err := fs.TTCN3Files(filepath.FromSlash("testdata/TestTTCN3Files/ttcn3-dir")) assert.Nil(t, err) assert.Equal(t, want, got) }) t.Run("errors", func(t *testing.T) { - want := []string{ + want := fromSlash([]string{ "testdata/TestTTCN3Files/xxx-dir/a.ttcn3", - } - got, err := fs.TTCN3Files("testdata/TestTTCN3Files/xxx-dir/a.ttcn3") + }) + got, err := fs.TTCN3Files(filepath.FromSlash("testdata/TestTTCN3Files/xxx-dir/a.ttcn3")) assert.True(t, errors.Is(err, os.ErrNotExist)) assert.Equal(t, want, got) }) t.Run("file", func(t *testing.T) { - want := []string{ + want := fromSlash([]string{ "testdata/TestTTCN3Files/ttcn3-dir/a.ttcn3", "testdata/TestTTCN3Files/ttcn3-dir/a.ttcn3", - } + }) got, err := fs.TTCN3Files( - "testdata/TestTTCN3Files/ttcn3-dir/a.ttcn3", - "testdata/TestTTCN3Files/ttcn3-dir/a.ttcn3", + filepath.FromSlash("testdata/TestTTCN3Files/ttcn3-dir/a.ttcn3"), + filepath.FromSlash("testdata/TestTTCN3Files/ttcn3-dir/a.ttcn3"), ) assert.Nil(t, err) assert.Equal(t, want, got) }) t.Run("URI", func(t *testing.T) { + // URIs always use forward slashes regardless of host OS, + // so no conversion here. want := []string{"foo://a.ttcn3"} got, err := fs.TTCN3Files("foo://a.ttcn3") assert.Nil(t, err) diff --git a/internal/lsp/code_action.go b/internal/lsp/code_action.go index 6d0017e9..01422f98 100644 --- a/internal/lsp/code_action.go +++ b/internal/lsp/code_action.go @@ -2,6 +2,7 @@ package lsp import ( "context" + "strings" "github.com/nokia/ntt/internal/fs" "github.com/nokia/ntt/internal/log" @@ -43,6 +44,16 @@ func (s *Server) codeAction(ctx context.Context, params *protocol.CodeActionPara } var actions []protocol.CodeAction + + // Source actions are filtered by Context.Only: if the client + // asks for a specific kind, only emit the actions matching it. + // When Only is empty we emit everything we can compute. + if wantsKind(params.Context.Only, protocol.SourceOrganizeImports) { + if a, ok := s.organizeImports(uri); ok { + actions = append(actions, a) + } + } + for _, diag := range diags { fix, ok := extractAutofix(diag.Data) if !ok { @@ -125,6 +136,27 @@ func numberToInt(v interface{}) (int, bool) { return 0, false } +// wantsKind reports whether the client wants actions of kind k. An +// empty `only` list means "the client wants everything", which keeps +// the keyboard-shortcut path working (those requests don't filter). +func wantsKind(only []protocol.CodeActionKind, k protocol.CodeActionKind) bool { + if len(only) == 0 { + return true + } + for _, candidate := range only { + // Source action kinds are hierarchical (e.g. `source` is + // the parent of `source.organizeImports`). Treat a request + // for the parent as a request for every child. + if candidate == k { + return true + } + if strings.HasPrefix(string(k), string(candidate)+".") { + return true + } + } + return false +} + // overlaps returns true when the two LSP ranges share at least one // position. We need this because clients may ask for code actions for a // cursor (zero-width range), a selection or the entire visible viewport; diff --git a/internal/lsp/definition.go b/internal/lsp/definition.go index d6342e96..230a25c4 100644 --- a/internal/lsp/definition.go +++ b/internal/lsp/definition.go @@ -3,6 +3,7 @@ package lsp import ( "context" "fmt" + "os" "time" "github.com/nokia/ntt/internal/log" @@ -36,5 +37,57 @@ func (s *Server) definition(ctx context.Context, params *protocol.DefinitionPara locs = append(locs, location(span)) } + // If the TTCN-3 finder didn't turn up anything, fall back to the + // ASN.1 cross-file lookup. This lets editor users jump from + // `import from RRC-PDU-Definitions { Foo }` (TTCN-3) into the + // `Foo ::= ...` line in the .asn source. + if len(locs) == 0 && x != nil { + if id, ok := x.(*syntax.Ident); ok { + if asnLoc, ok := asn1Location(&s.db, id.String()); ok { + locs = append(locs, asnLoc) + } + } + } + return unifyLocs(locs), nil } + +// asn1Location wraps db.ASN1Location and converts its byte offset to +// an LSP location with line/column. Returns ok=false when the lookup +// misses or the file can't be read for position translation. +func asn1Location(db *ttcn3.DB, symbol string) (protocol.Location, bool) { + file, offset, ok := db.ASN1Location(symbol) + if !ok { + return protocol.Location{}, false + } + src, err := os.ReadFile(file) + if err != nil { + return protocol.Location{}, false + } + line, col := byteOffsetToLineCol(src, offset) + return protocol.Location{ + URI: protocol.URIFromPath(file), + Range: protocol.Range{ + Start: protocol.Position{Line: uint32(line), Character: uint32(col)}, + End: protocol.Position{Line: uint32(line), Character: uint32(col)}, + }, + }, true +} + +// byteOffsetToLineCol converts a byte offset within src to a zero- +// indexed (line, character) pair using UTF-8 character counting. +func byteOffsetToLineCol(src []byte, offset int) (int, int) { + if offset > len(src) { + offset = len(src) + } + line, col := 0, 0 + for i := 0; i < offset; i++ { + if src[i] == '\n' { + line++ + col = 0 + continue + } + col++ + } + return line, col +} diff --git a/internal/lsp/general.go b/internal/lsp/general.go index 1ba0bc82..2a8bb634 100644 --- a/internal/lsp/general.go +++ b/internal/lsp/general.go @@ -106,7 +106,15 @@ func (s *Server) initialize(ctx context.Context, params *protocol.ParamInitializ return &protocol.InitializeResult{ Capabilities: protocol.ServerCapabilities{ InlayHintProvider: s.registerInlayHintIfNoDynReg(), - CodeActionProvider: true, + // Advertise the kinds we actually emit so clients can + // surface them in their menus (e.g. "Source Action" + // in VS Code) and pre-filter via the `Only` field. + CodeActionProvider: protocol.CodeActionOptions{ + CodeActionKinds: []protocol.CodeActionKind{ + protocol.QuickFix, + protocol.SourceOrganizeImports, + }, + }, CompletionProvider: protocol.CompletionOptions{ TriggerCharacters: []string{"."}, ResolveProvider: true, diff --git a/internal/lsp/organize_imports.go b/internal/lsp/organize_imports.go new file mode 100644 index 00000000..01bd23d6 --- /dev/null +++ b/internal/lsp/organize_imports.go @@ -0,0 +1,241 @@ +package lsp + +import ( + "sort" + "strings" + + "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/internal/lsp/protocol" + "github.com/nokia/ntt/ttcn3" + "github.com/nokia/ntt/ttcn3/syntax" +) + +// organizeImports returns a single source.organizeImports code action +// that rewrites the imports of every module in the file so they are +// alphabetised and de-duplicated. The action is suppressed entirely +// when the file already matches the canonical ordering, so users don't +// see a flash of "no-op" in their editor's lightbulb menu. +// +// We never rewrite imports across module boundaries - each module's +// import block is processed in isolation so the per-module visibility +// rules stay intact. +func (s *Server) organizeImports(uri protocol.DocumentURI) (protocol.CodeAction, bool) { + tree := ttcn3.ParseFile(string(uri.SpanURI())) + if tree == nil || tree.Root == nil { + return protocol.CodeAction{}, false + } + src, err := fs.Content(string(uri.SpanURI())) + if err != nil || len(src) == 0 { + return protocol.CodeAction{}, false + } + + var edits []protocol.TextEdit + for _, node := range tree.Root.Children() { + mod, ok := node.(*syntax.Module) + if !ok || mod == nil { + continue + } + edit, ok := organizeModuleImports(tree, src, mod) + if !ok { + continue + } + edits = append(edits, edit) + } + if len(edits) == 0 { + return protocol.CodeAction{}, false + } + + return protocol.CodeAction{ + Title: "Organize imports", + Kind: protocol.SourceOrganizeImports, + Edit: protocol.WorkspaceEdit{ + Changes: map[string][]protocol.TextEdit{ + string(fs.URI(uri.SpanURI().Filename())): edits, + }, + }, + }, true +} + +// importEntry is the input row for the organize-imports sorter: the +// AST node, the verbatim source text (so we round-trip whitespace and +// comments faithfully), and a sort key derived from the imported +// module name + visibility. +type importEntry struct { + decl *syntax.ImportDecl + text string + key string +} + +// organizeModuleImports inspects the import block of a single Module +// and returns a TextEdit that rewrites it in canonical order, or +// (zero, false) if no rewrite is needed. +func organizeModuleImports(tree *ttcn3.Tree, src []byte, mod *syntax.Module) (protocol.TextEdit, bool) { + var entries []importEntry + first, last := -1, -1 + for _, def := range mod.Defs { + if def == nil { + continue + } + imp, ok := def.Def.(*syntax.ImportDecl) + if !ok || imp == nil || imp.Module == nil { + continue + } + // We use the byte range of the wrapping ModuleDef (visibility + // modifier included) so we can replace the whole block as a + // contiguous region. ImportDecl.Pos() skips the visibility + // token which would otherwise be orphaned. + begin, end := def.Pos(), def.End() + if begin < 0 || end <= begin || end > len(src) { + return protocol.TextEdit{}, false + } + if first < 0 { + first = begin + } + last = end + entries = append(entries, importEntry{ + decl: imp, + text: string(src[begin:end]), + key: importSortKey(def, imp), + }) + } + if len(entries) < 2 { + return protocol.TextEdit{}, false + } + + // Sort stably so two imports with the same module name preserve + // their source order (the de-dupe pass below uses the first + // occurrence and drops the rest). + sorted := append([]importEntry(nil), entries...) + sort.SliceStable(sorted, func(i, j int) bool { + return sorted[i].key < sorted[j].key + }) + sorted = dedupeImports(sorted) + + // Decide if the change is a no-op before doing any text munging: + // if the sorted order is identical to the source order *and* no + // duplicates were dropped, we have nothing to do. + if len(sorted) == len(entries) { + same := true + for i := range sorted { + if sorted[i].decl != entries[i].decl { + same = false + break + } + } + if same { + return protocol.TextEdit{}, false + } + } + + // Reassemble using the document's existing line ending so we + // don't accidentally rewrite CRLF files with LF or vice versa. + // We also preserve the indentation of the first import so the + // rewritten block visually matches the surrounding code. + sep := lineSeparator(src[first:last]) + indent := leadingIndent(src, first) + var b strings.Builder + for i, e := range sorted { + if i > 0 { + b.WriteString(sep) + b.WriteString(indent) + } + b.WriteString(strings.TrimRight(e.text, " \t\r\n")) + } + + return protocol.TextEdit{ + Range: setProtocolRange(tree.Position(first), tree.Position(last)), + NewText: b.String(), + }, true +} + +// importSortKey produces the comparison key for an import. The +// canonical order is: +// 1. by visibility (public < friend < private), so that publicly +// visible imports float to the top; +// 2. by imported module name (case-insensitive); +// 3. by exact module name (case-sensitive tiebreaker). +// +// We deliberately ignore the body of the import (which symbols are +// brought in) - re-ordering those is rename-territory and out of scope +// for organizeImports. +func importSortKey(def *syntax.ModuleDef, imp *syntax.ImportDecl) string { + name := imp.Module.String() + vis := "1" // public default + switch strings.ToLower(visibilityText(def)) { + case "friend": + vis = "2" + case "private": + vis = "3" + } + return vis + "|" + strings.ToLower(name) + "|" + name +} + +func visibilityText(def *syntax.ModuleDef) string { + if def == nil || def.Visibility == nil { + return "" + } + return def.Visibility.String() +} + +// dedupeImports drops entries whose normalised body matches an earlier +// entry. We normalise by collapsing runs of whitespace so that two +// imports written with different indentation still de-dupe. +func dedupeImports(in []importEntry) []importEntry { + if len(in) < 2 { + return in + } + out := in[:0] + seen := make(map[string]struct{}, len(in)) + for _, e := range in { + fp := strings.Join(strings.Fields(e.text), " ") + if _, dup := seen[fp]; dup { + continue + } + seen[fp] = struct{}{} + out = append(out, e) + } + return out +} + +// leadingIndent returns the whitespace prefix between the start of the +// line containing offset and the offset itself, so a regenerated block +// can be indented identically to the original. +func leadingIndent(src []byte, offset int) string { + if offset > len(src) { + offset = len(src) + } + lineStart := offset + for lineStart > 0 && src[lineStart-1] != '\n' { + lineStart-- + } + end := lineStart + for end < offset { + c := src[end] + if c != ' ' && c != '\t' { + break + } + end++ + } + return string(src[lineStart:end]) +} + +// lineSeparator picks the dominant newline style in the given region so +// the regenerated imports blend in. We default to "\n" because that's +// what 99% of TTCN-3 files use. +func lineSeparator(region []byte) string { + crlf, lf := 0, 0 + for i := 0; i < len(region); i++ { + if region[i] != '\n' { + continue + } + if i > 0 && region[i-1] == '\r' { + crlf++ + } else { + lf++ + } + } + if crlf > lf { + return "\r\n" + } + return "\n" +} diff --git a/internal/lsp/organize_imports_test.go b/internal/lsp/organize_imports_test.go new file mode 100644 index 00000000..f80f3895 --- /dev/null +++ b/internal/lsp/organize_imports_test.go @@ -0,0 +1,133 @@ +package lsp + +import ( + "strings" + "testing" + + "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/internal/lsp/protocol" +) + +func TestOrganizeImports_SortsAndDedupes(t *testing.T) { + const src = `module M { + import from Zeta all; + import from Alpha all; + import from Beta all; + import from Alpha all; +} +` + file := "file:///" + t.Name() + ".ttcn3" + fs.SetContent(file, []byte(src)) + + s := &Server{} + uri := protocol.DocumentURI(file) + action, ok := s.organizeImports(uri) + if !ok { + t.Fatalf("expected organizeImports to emit an action") + } + if action.Kind != protocol.SourceOrganizeImports { + t.Fatalf("got kind %q, want %q", action.Kind, protocol.SourceOrganizeImports) + } + + edits := action.Edit.Changes[string(fs.URI(file))] + if len(edits) != 1 { + t.Fatalf("expected exactly one TextEdit, got %d", len(edits)) + } + + got := edits[0].NewText + // Imports must be sorted alphabetically; Alpha must appear only once. + if i := strings.Index(got, "Alpha"); i < 0 { + t.Fatalf("rewrite missing Alpha import: %q", got) + } + if strings.Count(got, "Alpha") != 1 { + t.Errorf("duplicate Alpha import not removed: %q", got) + } + if !precedes(got, "Alpha", "Beta") || !precedes(got, "Beta", "Zeta") { + t.Errorf("imports not alphabetised: %q", got) + } +} + +func TestOrganizeImports_NoOpWhenAlreadySorted(t *testing.T) { + const src = `module M { + import from Alpha all; + import from Beta all; +} +` + file := "file:///" + t.Name() + ".ttcn3" + fs.SetContent(file, []byte(src)) + + s := &Server{} + if _, ok := s.organizeImports(protocol.DocumentURI(file)); ok { + t.Fatalf("expected no action for already-sorted imports") + } +} + +func TestOrganizeImports_LeavesSingleImportAlone(t *testing.T) { + const src = `module M { + import from Alpha all; +} +` + file := "file:///" + t.Name() + ".ttcn3" + fs.SetContent(file, []byte(src)) + + s := &Server{} + if _, ok := s.organizeImports(protocol.DocumentURI(file)); ok { + t.Fatalf("single-import module should not produce an edit") + } +} + +func TestOrganizeImports_PerModuleIsolation(t *testing.T) { + // Two modules with their own unsorted imports - we should get an + // edit for both, each scoped to its own module. + const src = `module A { + import from Zeta all; + import from Alpha all; +} + +module B { + import from Yankee all; + import from Bravo all; +} +` + file := "file:///" + t.Name() + ".ttcn3" + fs.SetContent(file, []byte(src)) + + s := &Server{} + action, ok := s.organizeImports(protocol.DocumentURI(file)) + if !ok { + t.Fatalf("expected an action") + } + edits := action.Edit.Changes[string(fs.URI(file))] + if len(edits) != 2 { + t.Fatalf("expected one edit per module, got %d", len(edits)) + } +} + +func TestWantsKind(t *testing.T) { + tests := []struct { + name string + only []protocol.CodeActionKind + k protocol.CodeActionKind + want bool + }{ + {"empty matches anything", nil, protocol.SourceOrganizeImports, true}, + {"explicit match", []protocol.CodeActionKind{protocol.SourceOrganizeImports}, protocol.SourceOrganizeImports, true}, + {"parent matches child", []protocol.CodeActionKind{"source"}, protocol.SourceOrganizeImports, true}, + {"different family", []protocol.CodeActionKind{protocol.QuickFix}, protocol.SourceOrganizeImports, false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := wantsKind(tc.only, tc.k); got != tc.want { + t.Errorf("wantsKind(%v, %v) = %v, want %v", tc.only, tc.k, got, tc.want) + } + }) + } +} + +// precedes returns true if first appears before second in s. Both must +// be present for the function to succeed. +func precedes(s, first, second string) bool { + i := strings.Index(s, first) + j := strings.Index(s, second) + return i >= 0 && j > i +} diff --git a/project/internal/titan/titan.go b/project/internal/titan/titan.go index 7252232f..8e6ae620 100644 --- a/project/internal/titan/titan.go +++ b/project/internal/titan/titan.go @@ -1 +1,173 @@ package titan + +import ( + "encoding/xml" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +// Project is a thin, ntt-friendly view of a parsed Titan Project +// Descriptor. We deliberately expose only the fields ntt needs (name, +// roots, files, references) so the downstream project package isn't +// coupled to the full TPD schema in titan_gen.go. +type Project struct { + // Path is the absolute path to the .tpd file that produced this + // project. + Path string + // Name is the project name from . Falls back to the + // file's basename if the descriptor omits it. + Name string + // Root is the directory containing the .tpd file - all relative + // paths inside the descriptor resolve against it. + Root string + // Sources lists every TTCN-3 / ASN.1 source file referenced from + // the descriptor, with relative paths resolved against Root. + Sources []string + // ImportDirs lists folders that should be treated as import + // directories (Titan's "centralStorage" folders behave like + // ntt's imports). + ImportDirs []string + // References lists referenced sub-projects (other .tpd files). + // They are returned with their absolute path so the caller can + // recursively load them. + References []Reference +} + +// Reference points at a sibling .tpd loaded transitively via the +// section. +type Reference struct { + Name string + Path string +} + +// Load parses the descriptor at path and returns a ready-to-use +// Project view. The raw XML is also accessible via Raw for callers +// that want full fidelity. +func Load(path string) (*Project, error) { + abs, err := filepath.Abs(path) + if err != nil { + return nil, err + } + f, err := os.Open(abs) + if err != nil { + return nil, err + } + defer f.Close() + return Parse(abs, f) +} + +// Parse reads a TPD document from r. path is used solely to anchor +// relative paths and to produce useful error messages, so callers that +// already have the bytes in memory can pass any meaningful identifier. +func Parse(path string, r io.Reader) (*Project, error) { + var top TopLevelProjectType + dec := xml.NewDecoder(r) + // Titan ships TPDs that occasionally use the wrong namespace + // declaration - we accept them all because Eclipse Titan itself + // is just as forgiving. + dec.Strict = false + if err := dec.Decode(&top); err != nil { + return nil, fmt.Errorf("titan: parse %s: %w", path, err) + } + if top.ProjectType == nil { + return nil, fmt.Errorf("titan: %s: missing ", path) + } + + root := filepath.Dir(path) + p := &Project{ + Path: path, + Name: top.ProjectName, + Root: root, + } + if p.Name == "" { + // Fallback so downstream code always has something to log + // or render in error messages. + base := filepath.Base(path) + p.Name = strings.TrimSuffix(base, filepath.Ext(base)) + } + + // Files block - direct TTCN-3 / ASN.1 sources. + if top.Files != nil { + for _, f := range top.Files.FileResource { + if f == nil { + continue + } + src := resolveRelative(root, f.ProjectRelativePathAttr, f.RelativeURIAttr, f.RawURIAttr) + if src == "" { + continue + } + p.Sources = append(p.Sources, src) + } + } + + // Folders block - directories of sources. We surface them via + // ImportDirs so the project package can scan them recursively. + if top.Folders != nil { + for _, f := range top.Folders.FolderResource { + if f == nil { + continue + } + dir := resolveRelative(root, f.ProjectRelativePathAttr, f.RelativeURIAttr, f.RawURIAttr) + if dir == "" { + continue + } + p.ImportDirs = append(p.ImportDirs, dir) + } + } + + // Referenced projects - we only expose the *paths*, not their + // contents. Recursive loading is the caller's job. + if top.ReferencedProjects != nil { + for _, ref := range top.ReferencedProjects.ReferencedProject { + if ref == nil { + continue + } + path := refPath(root, ref) + if path == "" { + continue + } + p.References = append(p.References, Reference{ + Name: ref.NameAttr, + Path: path, + }) + } + } + + return p, nil +} + +// resolveRelative picks the first non-empty path attribute from a +// FileResource / FolderResource and resolves it against the descriptor +// root. Titan stores both URI-style and plain paths in the XML; we +// honour them all. +func resolveRelative(root string, candidates ...string) string { + for _, c := range candidates { + c = strings.TrimSpace(c) + if c == "" { + continue + } + c = strings.TrimPrefix(c, "file:") + c = strings.TrimPrefix(c, "//") + if filepath.IsAbs(c) { + return filepath.Clean(c) + } + return filepath.Clean(filepath.Join(root, c)) + } + return "" +} + +func refPath(root string, ref *ReferencedProject) string { + uri := strings.TrimSpace(ref.ProjectLocationURIAttr) + if uri == "" { + return "" + } + uri = strings.TrimPrefix(uri, "file:") + uri = strings.TrimPrefix(uri, "//") + if filepath.IsAbs(uri) { + return filepath.Clean(uri) + } + return filepath.Clean(filepath.Join(root, uri)) +} diff --git a/project/internal/titan/titan_test.go b/project/internal/titan/titan_test.go new file mode 100644 index 00000000..312e4e15 --- /dev/null +++ b/project/internal/titan/titan_test.go @@ -0,0 +1,116 @@ +package titan + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +const sampleTPD = ` + + HelloTitan + + + + + + + + + + + +` + +func writeTPD(t *testing.T) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "hello.tpd") + if err := os.WriteFile(path, []byte(sampleTPD), 0o644); err != nil { + t.Fatalf("write tpd: %v", err) + } + return path +} + +func TestLoad_PopulatesSourcesAndReferences(t *testing.T) { + path := writeTPD(t) + p, err := Load(path) + if err != nil { + t.Fatalf("Load: %v", err) + } + + if p.Name != "HelloTitan" { + t.Errorf("Name = %q, want HelloTitan", p.Name) + } + if p.Root != filepath.Dir(path) { + t.Errorf("Root = %q, want %q", p.Root, filepath.Dir(path)) + } + if len(p.Sources) != 2 { + t.Fatalf("Sources = %v, want 2 entries", p.Sources) + } + if !strings.HasSuffix(filepath.ToSlash(p.Sources[0]), "src/main.ttcn3") { + t.Errorf("Sources[0] = %q, want suffix src/main.ttcn3", p.Sources[0]) + } + if len(p.ImportDirs) != 1 || !strings.HasSuffix(filepath.ToSlash(p.ImportDirs[0]), "/lib") { + t.Errorf("ImportDirs = %v, want one entry ending in /lib", p.ImportDirs) + } + if len(p.References) != 1 || p.References[0].Name != "Common" { + t.Fatalf("References = %v, want one entry named Common", p.References) + } + if !strings.HasSuffix(filepath.ToSlash(p.References[0].Path), "/common/common.tpd") { + t.Errorf("Reference path = %q, want suffix /common/common.tpd", p.References[0].Path) + } +} + +func TestLoad_FallbackName(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "Unnamed.tpd") + body := `` + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + p, err := Load(path) + if err != nil { + t.Fatalf("Load: %v", err) + } + if p.Name != "Unnamed" { + t.Errorf("Name = %q, want Unnamed (derived from file basename)", p.Name) + } +} + +func TestLoad_AbsoluteSourcePathPreserved(t *testing.T) { + dir := t.TempDir() + absSrc := filepath.Join(dir, "external.ttcn3") + if err := os.WriteFile(absSrc, []byte("// noop"), 0o644); err != nil { + t.Fatal(err) + } + path := filepath.Join(dir, "abs.tpd") + body := ` + Abs + + + + ` + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + p, err := Load(path) + if err != nil { + t.Fatalf("Load: %v", err) + } + if len(p.Sources) != 1 || p.Sources[0] != absSrc { + t.Errorf("Sources = %v, want [%q]", p.Sources, absSrc) + } +} + +func TestLoad_BadXMLReturnsError(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "bad.tpd") + if err := os.WriteFile(path, []byte("not xml"), 0o644); err != nil { + t.Fatal(err) + } + if _, err := Load(path); err == nil { + t.Fatal("expected error for malformed XML") + } +} diff --git a/project/project.go b/project/project.go index b1ebe674..eb6d600f 100644 --- a/project/project.go +++ b/project/project.go @@ -26,6 +26,7 @@ import ( "github.com/nokia/ntt/internal/log" "github.com/nokia/ntt/internal/results" "github.com/nokia/ntt/internal/yaml" + "github.com/nokia/ntt/project/internal/titan" ) var ( @@ -263,6 +264,14 @@ func Discover(path string) []Suite { log.Debugf("discovered manifest: %q\n", file) list = append(list, Suite{RootDir: path, SourceDir: path}) } + // Discover Titan project descriptors next to package.yml. + // A directory may contain several .tpd files (one per + // configuration); we report each of them so the caller can + // pick. + for _, tpd := range fs.Glob(fs.JoinPath(path, "*.tpd")) { + log.Debugf("discovered titan descriptor: %q\n", tpd) + list = append(list, Suite{RootDir: path, SourceDir: path}) + } list = append(list, readIndices(fs.JoinPath(path, IndexFile))...) // Check parallel build directories @@ -542,11 +551,14 @@ func Open(args ...string) (*Config, error) { } // Treat a single file argument as source, unless it is a directory or - // manifest file. + // a recognised project descriptor. if file := args[0]; fs.IsRegular(file) { if filepath.Base(file) == ManifestFile { return NewConfig(WithManifest(file), defaults) } + if strings.EqualFold(filepath.Ext(file), ".tpd") { + return NewConfig(WithTPD(file), defaults) + } return NewConfig(WithSources(file), defaults) } @@ -658,6 +670,96 @@ func WithManifest(file string) ConfigOption { } } +// WithTPD reads a Titan Project Descriptor (.tpd) and uses it to seed +// the project configuration. We translate the XML into the same fields +// WithManifest would have populated, so downstream code (lookup, lint, +// LSP) doesn't need to know which format the user actually has on disk. +// +// Referenced sub-projects are loaded transitively and their sources + +// import directories are merged into the top-level Config. We dedupe +// paths so a diamond reference graph still produces a sane Sources +// list. +// +// The descriptor's root (the directory containing the .tpd file) +// becomes Config.Root, mirroring WithManifest's behaviour. +func WithTPD(file string) ConfigOption { + return func(c *Config) error { + root, sources, imports, err := loadTPD(file) + if err != nil { + return err + } + c.Root = root + c.Sources = append(c.Sources, sources...) + c.Imports = append(c.Imports, imports...) + log.Debugf("project: loaded titan descriptor %s (%d sources, %d imports)\n", + file, len(sources), len(imports)) + return nil + } +} + +// loadTPD parses descriptor (and any descriptors it references) and +// returns the resolved root + flattened sources + imports. +func loadTPD(descriptor string) (root string, sources, imports []string, err error) { + seen := map[string]bool{} + var visit func(path string) error + visit = func(path string) error { + abs, err := filepath.Abs(path) + if err != nil { + return err + } + if seen[abs] { + return nil + } + seen[abs] = true + + proj, err := titan.Load(abs) + if err != nil { + return err + } + // The first descriptor visited defines the project root. + if root == "" { + root = proj.Root + } + sources = append(sources, proj.Sources...) + imports = append(imports, proj.ImportDirs...) + for _, ref := range proj.References { + // Resolve relative refs against the *current* + // descriptor's root, not the top-level project. + refPath := ref.Path + if !filepath.IsAbs(refPath) { + refPath = filepath.Join(proj.Root, refPath) + } + if err := visit(refPath); err != nil { + log.Debugf("project: skipping unresolvable .tpd reference %q: %s\n", refPath, err.Error()) + continue + } + } + return nil + } + if err := visit(descriptor); err != nil { + return "", nil, nil, err + } + sources = dedupePaths(sources) + imports = dedupePaths(imports) + return root, sources, imports, nil +} + +func dedupePaths(in []string) []string { + if len(in) < 2 { + return in + } + seen := make(map[string]struct{}, len(in)) + out := in[:0] + for _, p := range in { + if _, ok := seen[p]; ok { + continue + } + seen[p] = struct{}{} + out = append(out, p) + } + return out +} + // WithRoot sets the root directory of the project. func WithRoot(root string) ConfigOption { return func(c *Config) error { @@ -710,6 +812,12 @@ func AutomaticRoot(root string) ConfigOption { if manifest := fs.JoinPath(root, ManifestFile); fs.IsRegular(manifest) { return WithManifest(manifest)(c) } + // Fall back to Titan project descriptors when there is no + // package.yml. We deliberately pick the first .tpd in + // alphabetical order so the discovery is deterministic. + if tpds := fs.Glob(fs.JoinPath(root, "*.tpd")); len(tpds) > 0 { + return WithTPD(tpds[0])(c) + } if isRoot(c.Root) { log.Debugln("project: scanning recursively...") diff --git a/project/tpd_test.go b/project/tpd_test.go new file mode 100644 index 00000000..4f8f475c --- /dev/null +++ b/project/tpd_test.go @@ -0,0 +1,134 @@ +package project + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// writeFile writes a file inside dir/sub creating directories as needed. +func writeFile(t *testing.T, dir, sub, body string) string { + t.Helper() + path := filepath.Join(dir, sub) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + return path +} + +const minimalTPD = ` + + Toplevel + + + + + + + + + + +` + +const commonTPD = ` + + Common + + + + +` + +func TestWithTPD_LoadsRecursive(t *testing.T) { + dir := t.TempDir() + tpd := writeFile(t, dir, "top.tpd", minimalTPD) + writeFile(t, dir, "common/common.tpd", commonTPD) + writeFile(t, dir, "src/main.ttcn3", "// noop") + writeFile(t, dir, "common/util.ttcn3", "// noop") + writeFile(t, dir, "lib/x.ttcn3", "// noop") + + cfg, err := NewConfig(WithTPD(tpd)) + if err != nil { + t.Fatalf("NewConfig: %v", err) + } + + if cfg.Root != dir { + t.Errorf("Root = %q, want %q", cfg.Root, dir) + } + wantSrc := []string{ + filepath.Join(dir, "src", "main.ttcn3"), + filepath.Join(dir, "common", "util.ttcn3"), + } + if len(cfg.Sources) != len(wantSrc) { + t.Fatalf("Sources = %v, want %v", cfg.Sources, wantSrc) + } + for _, want := range wantSrc { + found := false + for _, got := range cfg.Sources { + if got == want { + found = true + break + } + } + if !found { + t.Errorf("Sources missing %q (got %v)", want, cfg.Sources) + } + } + if len(cfg.Imports) != 1 || !strings.HasSuffix(filepath.ToSlash(cfg.Imports[0]), "/lib") { + t.Errorf("Imports = %v, want one entry ending in /lib", cfg.Imports) + } +} + +func TestOpen_PrefersTPDWhenNoManifest(t *testing.T) { + dir := t.TempDir() + writeFile(t, dir, "myproj.tpd", minimalTPD) + writeFile(t, dir, "src/main.ttcn3", "// noop") + writeFile(t, dir, "common/common.tpd", commonTPD) + writeFile(t, dir, "common/util.ttcn3", "// noop") + + cfg, err := Open(dir) + if err != nil { + t.Fatalf("Open: %v", err) + } + if cfg.Root != dir { + t.Errorf("Root = %q, want %q", cfg.Root, dir) + } + if len(cfg.Sources) == 0 { + t.Fatal("expected non-empty Sources from .tpd discovery") + } +} + +func TestOpen_ExplicitTPDFile(t *testing.T) { + dir := t.TempDir() + tpd := writeFile(t, dir, "explicit.tpd", minimalTPD) + writeFile(t, dir, "src/main.ttcn3", "// noop") + writeFile(t, dir, "common/common.tpd", commonTPD) + writeFile(t, dir, "common/util.ttcn3", "// noop") + + cfg, err := Open(tpd) + if err != nil { + t.Fatalf("Open: %v", err) + } + if len(cfg.Sources) == 0 { + t.Fatal("expected Sources populated from .tpd") + } +} + +func TestDedupePaths(t *testing.T) { + in := []string{"/a", "/b", "/a", "/c", "/b"} + got := dedupePaths(in) + want := []string{"/a", "/b", "/c"} + if len(got) != len(want) { + t.Fatalf("got %v, want %v", got, want) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("got[%d] = %q, want %q", i, got[i], want[i]) + } + } +} diff --git a/ttcn3/asn1_lookup_test.go b/ttcn3/asn1_lookup_test.go new file mode 100644 index 00000000..15ed1b51 --- /dev/null +++ b/ttcn3/asn1_lookup_test.go @@ -0,0 +1,43 @@ +package ttcn3_test + +import ( + "strings" + "testing" + + "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/ttcn3" +) + +const sampleASN1 = `Sample-Mod DEFINITIONS ::= BEGIN +Counter ::= INTEGER (0..65535) +Status ::= ENUMERATED { ok, error } +END` + +func TestDB_ASN1Location(t *testing.T) { + fs.SetContent("sample-mod.asn", []byte(sampleASN1)) + + db := ttcn3.DB{} + db.Index("sample-mod.asn") + + // Both top-level assignments should be indexed under their bare + // name and locatable via ASN1Location. + for _, name := range []string{"Counter", "Status"} { + path, off, ok := db.ASN1Location(name) + if !ok { + t.Errorf("ASN1Location(%q): not found in db (Names=%v)", name, db.Names) + continue + } + if !strings.HasSuffix(path, "sample-mod.asn") { + t.Errorf("ASN1Location(%q): got file %q, want sample-mod.asn", name, path) + } + got := sampleASN1[off : off+len(name)] + if got != name { + t.Errorf("ASN1Location(%q): offset %d points to %q", name, off, got) + } + } + + // A name not in the file should miss cleanly. + if _, _, ok := db.ASN1Location("Nope"); ok { + t.Error("ASN1Location(Nope) returned ok=true") + } +} diff --git a/ttcn3/db.go b/ttcn3/db.go index cca85361..52b1e585 100644 --- a/ttcn3/db.go +++ b/ttcn3/db.go @@ -172,3 +172,33 @@ func isASN1File(path string) bool { ext := strings.ToLower(filepath.Ext(path)) return ext == ".asn" || ext == ".asn1" } + +// ASN1Location locates an ASN.1 assignment definition by name and +// returns its byte offset within the source file. The result is +// suitable for editor "go to definition" jumps from TTCN-3 callers +// that imported the ASN.1 module. +// +// We return (file, offset, ok). The caller is responsible for any +// LSP-specific line/column conversion via fs.Open(file) - this keeps +// db.go free of LSP imports. +func (db *DB) ASN1Location(symbol string) (file string, offset int, ok bool) { + for path := range db.Names[symbol] { + if !isASN1File(path) { + continue + } + // Reparse the file using the full frontend so we get the + // byte-precise position of the named assignment. The + // underlying parser is cheap enough that re-parsing on every + // jump is fine for editor workloads. + mod, err := asn1.ParseFileFull(path) + if err != nil || mod == nil { + continue + } + for _, a := range mod.Assignments { + if asn1.AssignmentName(a) == symbol { + return path, a.Pos(), true + } + } + } + return "", 0, false +} diff --git a/ttcn3/syntax/nodes.go b/ttcn3/syntax/nodes.go index 1ff2f49c..bac276f9 100644 --- a/ttcn3/syntax/nodes.go +++ b/ttcn3/syntax/nodes.go @@ -48,6 +48,18 @@ type Root struct { Filename string tokens []token errs []error + + // lineCache memoises the line lookup result from the previous + // Position() call. Most LSP traversals visit tokens in source + // order, so the next position lands either on the same line or + // shortly after - both cases short-circuit the binary search. + // Concurrent reads are tolerated because the cache only stores + // a hint; a stale read produces the correct answer via the + // fallback search below. + lineCacheLine int + lineCacheLo int // first byte of the cached line + lineCacheHi int // first byte of the next line (or len(src)) + lineCacheOk bool } func (n *Root) Err() error { @@ -67,19 +79,41 @@ func (n *Root) Position(offset int) Position { return Position{} } +// searchLines returns the index of the line that contains the byte +// offset pos. The result is cached so that the common access pattern +// (looking up positions in monotonically non-decreasing order, as +// happens when the LSP walks a tree from start to end) collapses to a +// single bounds check on the cache. func (n *Root) searchLines(pos int) int { - // TODO(5nord) add line cache + if n.lineCacheOk && pos >= n.lineCacheLo && pos < n.lineCacheHi { + return n.lineCacheLine + } + i, j := 0, len(n.lines) for i < j { h := int(uint(i+j) >> 1) // avoid overflow when computing h - // i ≤ h < j if n.lines[h] <= pos { i = h + 1 } else { j = h } } - return int(i) - 1 + idx := i - 1 + + if idx >= 0 { + n.lineCacheLine = idx + n.lineCacheLo = n.lines[idx] + if idx+1 < len(n.lines) { + n.lineCacheHi = n.lines[idx+1] + } else { + // We don't know the buffer length here, but any + // position past the last newline still belongs to + // the last line. Mark hi as a sentinel. + n.lineCacheHi = 1 << 62 + } + n.lineCacheOk = true + } + return idx } func (n *Root) PosFor(line, col int) int { diff --git a/ttcn3/syntax/position_bench_test.go b/ttcn3/syntax/position_bench_test.go new file mode 100644 index 00000000..a8b8cf3a --- /dev/null +++ b/ttcn3/syntax/position_bench_test.go @@ -0,0 +1,94 @@ +package syntax_test + +import ( + "strings" + "testing" + + "github.com/nokia/ntt/ttcn3/syntax" +) + +// benchmarkSource builds a TTCN-3 module with n short functions so the +// resulting Root spans n*3 lines. That gives the line-offset table +// enough entries that the binary search vs cache difference is visible. +func benchmarkSource(n int) []byte { + var b strings.Builder + b.WriteString("module M {\n") + for i := 0; i < n; i++ { + b.WriteString("function f") + // crude itoa to avoid the strconv allocation + b.WriteString(itoa(i)) + b.WriteString("() runs on system {}\n\n") + } + b.WriteString("}\n") + return []byte(b.String()) +} + +func itoa(i int) string { + if i == 0 { + return "0" + } + var buf [20]byte + pos := len(buf) + for i > 0 { + pos-- + buf[pos] = byte('0' + i%10) + i /= 10 + } + return string(buf[pos:]) +} + +func BenchmarkRoot_Position_Sequential(b *testing.B) { + src := benchmarkSource(1000) + root := syntax.Parse(src) + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Sequential traversal - hits the cache after the first call. + for off := 0; off < len(src); off += 32 { + _ = root.Position(off) + } + } +} + +func BenchmarkRoot_Position_Random(b *testing.B) { + src := benchmarkSource(1000) + root := syntax.Parse(src) + // Pre-computed pseudo-random offsets so the benchmark is + // deterministic and the iteration cost itself is constant. + offsets := make([]int, 1024) + x := 1 + for i := range offsets { + x = x*1103515245 + 12345 + offsets[i] = (x & 0x7fffffff) % len(src) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, off := range offsets { + _ = root.Position(off) + } + } +} + +func TestRoot_Position_CacheCorrectness(t *testing.T) { + src := []byte("module M {\nvar integer x := 1;\nvar integer y := 2;\n}\n") + root := syntax.Parse(src) + + // Sweep every byte twice (once cold, once warm) and compare. + want := make([]syntax.Position, len(src)+1) + for off := 0; off <= len(src); off++ { + want[off] = root.Position(off) + } + for off := 0; off <= len(src); off++ { + got := root.Position(off) + if got != want[off] { + t.Errorf("Position(%d): got %+v want %+v", off, got, want[off]) + } + } + + // Also probe in reverse order so the cache repeatedly misses. + for off := len(src); off >= 0; off-- { + got := root.Position(off) + if got != want[off] { + t.Errorf("Position(%d) reverse: got %+v want %+v", off, got, want[off]) + } + } +} From f2f47103277d6653fa6d2df2ef96e83cba8bd141 Mon Sep 17 00:00:00 2001 From: Rafael Diniz Date: Fri, 22 May 2026 11:33:25 +0100 Subject: [PATCH 3/5] ci: enable Git long paths before Windows checkout testdata/ttcn3-conformance-tests/ ships ~5800 ETSI files whose relative paths exceed Windows' 260-character MAX_PATH limit (the longest are 274 chars). Without core.longpaths=true the very first actions/checkout step fails with "Filename too long" for every long file, before any Go test ever runs. Setting `git config --system core.longpaths true` before checkout is the canonical fix - the action picks it up and the clone succeeds. --- .github/workflows/ci.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fbd8b29a..6a3b7b8d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,15 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] steps: + # The TTCN-3 conformance test suite under + # testdata/ttcn3-conformance-tests/ has paths > 260 chars, + # which trips Windows' MAX_PATH limit during checkout. This + # config must run BEFORE actions/checkout so the clone itself + # can write the long paths. + - name: Enable Git long paths (Windows) + if: runner.os == 'Windows' + shell: pwsh + run: git config --system core.longpaths true - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 From d813cb5a8478c321587bac4d1dbc3a6a546eafb1 Mon Sep 17 00:00:00 2001 From: Rafael Diniz Date: Fri, 22 May 2026 11:40:48 +0100 Subject: [PATCH 4/5] test: make path-sensitive tests pass on Windows The new Windows CI job surfaced a cluster of tests that hard-coded forward slashes or the C: drive letter. Each fix normalises the expectation rather than altering production behaviour: * internal/cache: filepath.Join the cache dir so the comparison uses native separators. * internal/fs: build the file:// URL via span.URIFromPath so Windows drive letters end up percent-encoded correctly, and feed JoinPath expectations through filepath.FromSlash for the plain-path cases (URL cases stay slash-only). * internal/lsp/hover: render the expected file:line marker with filepath.FromSlash so the leading separator matches the OS. * internal/lsp/organize_imports: query the WorkspaceEdit changes map using the same key construction as the production code, which round-trips the URI through Filename() (and therefore prepends a drive letter on Windows). * internal/lsp/span: derive the expected drive letter from the current working directory so the suite works on both C: and D: runners. * project: use filepath.FromSlash for relative hooks_file assertions, and special-case the "/file" case under runtime.GOOS == "windows" where it is genuinely not absolute. --- internal/cache/cache_test.go | 8 +++-- internal/fs/fs_test.go | 45 +++++++++++++++++---------- internal/lsp/hover_test.go | 5 +-- internal/lsp/organize_imports_test.go | 29 ++++++++++++++--- internal/lsp/span/uri_windows_test.go | 31 +++++++++++++++--- project/project_test.go | 16 ++++++++-- 6 files changed, 103 insertions(+), 31 deletions(-) diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go index 41ec80b6..fe3d10b8 100644 --- a/internal/cache/cache_test.go +++ b/internal/cache/cache_test.go @@ -2,6 +2,7 @@ package cache_test import ( "os" + "path/filepath" "testing" "github.com/nokia/ntt/internal/cache" @@ -14,7 +15,10 @@ func init() { } func TestLookup(t *testing.T) { - os.Setenv("NTT_CACHE", "testdata/cache") + // The cache directory is joined with filepath.Join in the + // implementation, so on Windows the expected separator is "\". + cacheDir := filepath.FromSlash("testdata/cache") + os.Setenv("NTT_CACHE", cacheDir) assert.Equal(t, "./file", cache.Lookup("./file")) assert.Equal(t, "./cache.go", cache.Lookup("./cache.go")) @@ -23,5 +27,5 @@ func TestLookup(t *testing.T) { assert.Equal(t, ".", cache.Lookup(".")) assert.Equal(t, "..", cache.Lookup("..")) assert.Equal(t, "cache.go", cache.Lookup("cache.go")) - assert.Equal(t, "testdata/cache/other.go", cache.Lookup("other.go")) + assert.Equal(t, filepath.Join(cacheDir, "other.go"), cache.Lookup("other.go")) } diff --git a/internal/fs/fs_test.go b/internal/fs/fs_test.go index 6f66ba48..a52c6386 100644 --- a/internal/fs/fs_test.go +++ b/internal/fs/fs_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/nokia/ntt/internal/fs" + "github.com/nokia/ntt/internal/lsp/span" "github.com/stretchr/testify/assert" ) @@ -23,7 +24,10 @@ func TestBytesFromURL(t *testing.T) { panic(err) } - f := fs.Open("file://" + path) + // Constructing the URL by literal concatenation produces broken + // URIs on Windows ("file://D:\\..."). Defer to URIFromPath which + // knows how to encode drive letters and convert backslashes. + f := fs.Open(string(span.URIFromPath(path))) b, err := f.Bytes() assert.Nil(t, err) assert.Equal(t, expected, b) @@ -38,27 +42,36 @@ func TestCaching(t *testing.T) { } func TestJoinPath(t *testing.T) { - tests := []struct { + // JoinPath returns OS-native file paths but keeps URLs untouched. + // We mark URL expectations explicitly so we don't accidentally + // run them through filepath.FromSlash. + type joinCase struct { first, second string want string - }{ - {"", "", ""}, - {".", "", "."}, - {".", "a", "a"}, - {"/", "b", "/b"}, - {"//", "c", "/c"}, - {"/", "/d", "/d"}, - {"e", "f", "e/f"}, - {"/g", "h", "/g/h"}, - {"/i", "../j", "/j"}, - {"file://k", "l", "file://k/l"}, - {"file:///m", "n", "file:///m/n"}, - {"file:///o", "../p", "file:///p"}, + isURL bool + } + tests := []joinCase{ + {"", "", "", false}, + {".", "", ".", false}, + {".", "a", "a", false}, + {"/", "b", "/b", false}, + {"//", "c", "/c", false}, + {"/", "/d", "/d", false}, + {"e", "f", "e/f", false}, + {"/g", "h", "/g/h", false}, + {"/i", "../j", "/j", false}, + {"file://k", "l", "file://k/l", true}, + {"file:///m", "n", "file:///m/n", true}, + {"file:///o", "../p", "file:///p", true}, } for _, test := range tests { + want := test.want + if !test.isURL { + want = filepath.FromSlash(want) + } got := fs.JoinPath(test.first, test.second) - assert.Equal(t, test.want, got) + assert.Equal(t, want, got) } } diff --git a/internal/lsp/hover_test.go b/internal/lsp/hover_test.go index e8160d13..5c11b2a5 100644 --- a/internal/lsp/hover_test.go +++ b/internal/lsp/hover_test.go @@ -2,6 +2,7 @@ package lsp_test import ( "fmt" + "path/filepath" "testing" "github.com/nokia/ntt/internal/fs" @@ -80,7 +81,7 @@ func TestPlainTextHoverForPortDefFromDecl(t *testing.T) { "port P p1\n" + "possible map / connect statements\n" + "_________________________________\n" + - "/TestPlainTextHoverForPortDefFromDecl.ttcn3:9\n" + filepath.FromSlash("/TestPlainTextHoverForPortDefFromDecl.ttcn3") + ":9\n" assert.Equal(t, expected, actual.Contents.Value) } @@ -105,7 +106,7 @@ func TestPlainTextHoverForPortDefFromUsage(t *testing.T) { "port P p1\n" + "possible map / connect statements\n" + "_________________________________\n" + - "/TestPlainTextHoverForPortDefFromUsage.ttcn3:9\n" + filepath.FromSlash("/TestPlainTextHoverForPortDefFromUsage.ttcn3") + ":9\n" assert.Equal(t, expected, actual.Contents.Value) } diff --git a/internal/lsp/organize_imports_test.go b/internal/lsp/organize_imports_test.go index f80f3895..5c3c910b 100644 --- a/internal/lsp/organize_imports_test.go +++ b/internal/lsp/organize_imports_test.go @@ -8,6 +8,15 @@ import ( "github.com/nokia/ntt/internal/lsp/protocol" ) +// changesKey is the key organizeImports uses for its WorkspaceEdit +// changes map. The production code derives the key from the URI's +// filename so we can't just look up by the original URI literal - +// Windows in particular round-trips file:///foo through Filename() +// (which prepends a drive letter) before keying the map. +func changesKey(file string) string { + return string(fs.URI(protocol.DocumentURI(file).SpanURI().Filename())) +} + func TestOrganizeImports_SortsAndDedupes(t *testing.T) { const src = `module M { import from Zeta all; @@ -29,9 +38,10 @@ func TestOrganizeImports_SortsAndDedupes(t *testing.T) { t.Fatalf("got kind %q, want %q", action.Kind, protocol.SourceOrganizeImports) } - edits := action.Edit.Changes[string(fs.URI(file))] + edits := action.Edit.Changes[changesKey(file)] if len(edits) != 1 { - t.Fatalf("expected exactly one TextEdit, got %d", len(edits)) + t.Fatalf("expected exactly one TextEdit, got %d (keys: %v)", + len(edits), keysOf(action.Edit.Changes)) } got := edits[0].NewText @@ -97,9 +107,10 @@ module B { if !ok { t.Fatalf("expected an action") } - edits := action.Edit.Changes[string(fs.URI(file))] + edits := action.Edit.Changes[changesKey(file)] if len(edits) != 2 { - t.Fatalf("expected one edit per module, got %d", len(edits)) + t.Fatalf("expected one edit per module, got %d (keys: %v)", + len(edits), keysOf(action.Edit.Changes)) } } @@ -131,3 +142,13 @@ func precedes(s, first, second string) bool { j := strings.Index(s, second) return i >= 0 && j > i } + +// keysOf returns the keys of a map[string][]TextEdit. We only use it +// for failure messages, so we don't bother sorting. +func keysOf(m map[string][]protocol.TextEdit) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} diff --git a/internal/lsp/span/uri_windows_test.go b/internal/lsp/span/uri_windows_test.go index 68233b1d..56b9735b 100644 --- a/internal/lsp/span/uri_windows_test.go +++ b/internal/lsp/span/uri_windows_test.go @@ -7,16 +7,36 @@ package span_test import ( + "os" + "strings" "testing" "github.com/nokia/ntt/internal/lsp/span" ) +// currentDrive returns the upper-case drive letter of the current +// working directory (e.g. "C"). We use this to build expectations in +// TestURIFromPath dynamically, because GitHub Actions Windows runners +// expose D: as the default drive whereas developers typically run on +// C: - either way the test should still pass. +func currentDrive(t *testing.T) string { + t.Helper() + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + if len(cwd) < 2 || cwd[1] != ':' { + t.Fatalf("cwd %q has no drive letter", cwd) + } + return strings.ToUpper(string(cwd[0])) +} + // TestURI tests the conversion between URIs and filenames. The test cases // include Windows-style URIs and filepaths, but we avoid having OS-specific // tests by using only forward slashes, assuming that the standard library // functions filepath.ToSlash and filepath.FromSlash do not need testing. func TestURIFromPath(t *testing.T) { + drive := currentDrive(t) for _, test := range []struct { path, wantFile string wantURI span.URI @@ -41,15 +61,18 @@ func TestURIFromPath(t *testing.T) { wantFile: `C:\Go\src\bob.go`, wantURI: span.URI("file:///C:/Go/src/bob.go"), }, + // Paths without a drive letter are resolved against the + // current drive by filepath.Abs(), so build the expectation + // from currentDrive() rather than hard-coding "C:". { path: `\path\to\dir`, - wantFile: `C:\path\to\dir`, - wantURI: span.URI("file:///C:/path/to/dir"), + wantFile: drive + `:\path\to\dir`, + wantURI: span.URI("file:///" + drive + ":/path/to/dir"), }, { path: `\a\b\c\src\bob.go`, - wantFile: `C:\a\b\c\src\bob.go`, - wantURI: span.URI("file:///C:/a/b/c/src/bob.go"), + wantFile: drive + `:\a\b\c\src\bob.go`, + wantURI: span.URI("file:///" + drive + ":/a/b/c/src/bob.go"), }, { path: `c:\Go\src\bob george\george\george.go`, diff --git a/project/project_test.go b/project/project_test.go index 15fbdf46..79be1587 100644 --- a/project/project_test.go +++ b/project/project_test.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "runtime" "strings" "testing" "time" @@ -229,7 +230,9 @@ func TestWithManifest(t *testing.T) { t.Run("paths", func(t *testing.T) { c, err := manifest("foo/bar/package.yml", "hooks_file: file") assert.Nil(t, err) - assert.Equal(t, "foo/bar/file", c.HooksFile) + // hooks_file is resolved relative to the manifest, so the + // expectation must use the host's native separator. + assert.Equal(t, filepath.FromSlash("foo/bar/file"), c.HooksFile) }) t.Run("paths", func(t *testing.T) { c, err := manifest("foo/bar/package.yml", "hooks_file: $VAR") @@ -239,7 +242,14 @@ func TestWithManifest(t *testing.T) { t.Run("paths", func(t *testing.T) { c, err := manifest("foo/bar/package.yml", "hooks_file: /file") assert.Nil(t, err) - assert.Equal(t, "/file", c.HooksFile) + // On POSIX "/file" is absolute and stays as-is; on Windows + // it lacks a drive letter so it's treated as relative and + // joined with the manifest directory. + want := "/file" + if runtime.GOOS == "windows" { + want = filepath.FromSlash("foo/bar/file") + } + assert.Equal(t, want, c.HooksFile) }) t.Run("paths", func(t *testing.T) { c, err := manifest("foo/bar/package.yml", "hooks_file: https://file.txt") @@ -260,7 +270,7 @@ func TestWithManifest(t *testing.T) { VAR: file hooks_file: $VAR`) assert.Nil(t, err) - assert.Equal(t, "foo/bar/file", c.HooksFile) + assert.Equal(t, filepath.FromSlash("foo/bar/file"), c.HooksFile) }) } From 92dbe177ff7087d6a4ef473c1327f7aa01832b98 Mon Sep 17 00:00:00 2001 From: Rafael Diniz Date: Fri, 22 May 2026 12:03:04 +0100 Subject: [PATCH 5/5] docs: credit Vanadium for architectural inspiration Several pieces of this branch (the pure-Go ASN.1 frontend, the Wadler-style formatter combinator layer, the schema-driven TTCN-3 AST generator and the lint Rule lifecycle) were modeled on the design of Vanadium (https://github.com/makekryl/vanadium) by Mikhail Krylov. The original PR landed without that attribution and the upstream author rightfully flagged it. Vanadium is BSD-3-licensed and so is ntt, so the BSD-3 attribution requirement is satisfied by: * a new THIRD_PARTY_NOTICES.md that reproduces the Vanadium copyright notice + license text and lists the borrowed concepts (Asn1ModuleBasket -> resolver.Basket, ClassObjectParser/ ClassSetResolver -> WithSyntaxParser/ObjectSetResolver, Asn1AstTransformer -> transform.LowerModule, nodes.yml schema -> ttcn3/v2/syntax/nodes, PrintDirective vocabulary -> ttcn3/ format combinators, Rule lifecycle -> ttcn3/lint Rule); * per-package doc-comment pointers to Vanadium and the notices file in the most clearly derivative packages; * an Acknowledgements section in README.md. No verbatim Vanadium source was copied; the ports are reimplementations in Go. --- README.md | 10 +++++ THIRD_PARTY_NOTICES.md | 63 ++++++++++++++++++++++++++++ internal/asn1/class/class.go | 7 +++- internal/asn1/resolver/resolver.go | 5 +++ internal/asn1/transform/transform.go | 5 ++- ttcn3/format/doc.go | 7 +++- ttcn3/lint/lint.go | 5 +++ ttcn3/v2/syntax/nodes/doc.go | 16 ++++--- 8 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 THIRD_PARTY_NOTICES.md diff --git a/README.md b/README.md index 32949f6b..37116530 100644 --- a/README.md +++ b/README.md @@ -129,3 +129,13 @@ automation environment. ## License This project is licensed under the BSD-3-Clause license - see the [LICENSE](https://github.com/nokia/ntt/blob/master/LICENSE). + +## Acknowledgements + +Parts of ntt's architecture (the pure-Go ASN.1 frontend, the +Wadler-style formatter combinators, the schema-driven AST generator +and the lint Rule lifecycle) were inspired by +[Vanadium](https://github.com/makekryl/vanadium) by Mikhail Krylov. +Vanadium is BSD-3-licensed; see +[THIRD_PARTY_NOTICES.md](THIRD_PARTY_NOTICES.md) for the full notice +and a list of the borrowed concepts. diff --git a/THIRD_PARTY_NOTICES.md b/THIRD_PARTY_NOTICES.md new file mode 100644 index 00000000..d0fe8d30 --- /dev/null +++ b/THIRD_PARTY_NOTICES.md @@ -0,0 +1,63 @@ +# Third-party notices + +ntt builds on, or is inspired by, the following third-party projects. +Each entry retains the upstream copyright notice and license text in +keeping with the relevant license terms. + +## Vanadium + +ntt's pure-Go ASN.1 frontend (under `internal/asn1/`), its Wadler-style +TTCN-3 formatter combinator layer, its schema-driven TTCN-3 AST +generator (under `ttcn3/v2/syntax/nodes/`), and the `Rule` / `Context` +lifecycle of the new lint engine were inspired by the design of +[Vanadium](https://github.com/makekryl/vanadium) by Mikhail Krylov. + +The directly-attributable architectural concepts include, but are not +limited to: + +- `Asn1ModuleBasket` (Vanadium) -> `resolver.Basket` (ntt) +- `ClassObjectParser` / `ClassSetResolver` (Vanadium) -> + `class.WithSyntaxParser` / `class.ObjectSetResolver` (ntt) +- `Asn1AstTransformer` (Vanadium) -> `transform.LowerModule` (ntt) +- `src/ast/nodes.yml` schema-driven AST (Vanadium) -> + `ttcn3/v2/syntax/nodes` generator (ntt) +- `format::PrintDirective` combinator vocabulary (Vanadium) -> + Wadler-style document combinator layer in ntt's formatter +- Lint `Rule` with `Register` / `Check` / `Exit` lifecycle (Vanadium) -> + ntt's rule scaffolding + +No verbatim Vanadium source code was copied into ntt - the ports above +are reimplementations in Go - but the design lineage is direct and is +acknowledged here. + +``` +BSD 3-Clause License + +Copyright (c) 2025, Mikhail Krylov +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` diff --git a/internal/asn1/class/class.go b/internal/asn1/class/class.go index 6ee77ec6..4a8883b9 100644 --- a/internal/asn1/class/class.go +++ b/internal/asn1/class/class.go @@ -4,11 +4,14 @@ // - WithSyntaxParser walks an object literal body against the // declaring class's WITH SYNTAX template, returning (fieldRef -> // value/type) settings. This is the Go equivalent of asn1c's -// `asn1fix_cws.c` driver and vanadium's ClassObjectParser. +// `asn1fix_cws.c` driver and Vanadium's ClassObjectParser. // // - ObjectSetResolver flattens an ObjectSet expression (literals, // references, unions, ALL EXCEPT) into a sequence of resolved -// objects. +// objects. The design follows Vanadium's ClassSetResolver. +// +// Vanadium is BSD-3, copyright (c) 2025 Mikhail Krylov. See +// THIRD_PARTY_NOTICES.md at the repository root. // // - ComponentRelationSolver narrows an open-type field given a // `({Set}{@discriminator})` table constraint, producing the diff --git a/internal/asn1/resolver/resolver.go b/internal/asn1/resolver/resolver.go index c2dc5e80..65cd2d53 100644 --- a/internal/asn1/resolver/resolver.go +++ b/internal/asn1/resolver/resolver.go @@ -6,6 +6,11 @@ // Cross-module resolution is mediated by a Basket - a registry of // resolved modules keyed by module name. Editors and the compiler use // the same Basket so the same name resolves consistently everywhere. +// +// The Basket concept and the cross-module reference-chaining design +// are adopted from Asn1ModuleBasket in Vanadium +// (https://github.com/makekryl/vanadium) by Mikhail Krylov, BSD-3. +// See THIRD_PARTY_NOTICES.md at the repository root. package resolver import ( diff --git a/internal/asn1/transform/transform.go b/internal/asn1/transform/transform.go index 4db19a5e..8e993d09 100644 --- a/internal/asn1/transform/transform.go +++ b/internal/asn1/transform/transform.go @@ -2,13 +2,16 @@ // that can be re-parsed by ttcn3.Parse. The resulting *ttcn3.Tree // flows through the existing semantic, formatter, and LSP layers as // if the user had hand-written a TTCN-3 module - which is the trick -// vanadium pulls with its AstTransformer. +// Vanadium's Asn1AstTransformer uses, and the design here follows it. // // We emit text rather than constructing ttcn3/syntax nodes directly // because the ttcn3 syntax tree is not meant to be built piecemeal // from outside; it owns position information tied to the source // buffer the parser scanned. Round-tripping through text gives us a // real tree with consistent positions for free. +// +// Vanadium is BSD-3, copyright (c) 2025 Mikhail Krylov. See +// THIRD_PARTY_NOTICES.md at the repository root. package transform import ( diff --git a/ttcn3/format/doc.go b/ttcn3/format/doc.go index 7c2cc9f1..0dbd5763 100644 --- a/ttcn3/format/doc.go +++ b/ttcn3/format/doc.go @@ -9,7 +9,12 @@ import ( // combinator library. It exists primarily for the wrapping printer that // reflows over-long parameter lists and composite literals. The // combinators are intentionally minimal: Text, Line, Nest, Group, Concat -// and HardLine cover the cases vanadium's AstPrinter uses. +// and HardLine cover the cases Vanadium's AstPrinter uses, and the +// vocabulary here (SoftLine / HardLine / Group / Nest) follows +// Vanadium's PrintDirective set. +// +// Vanadium is BSD-3, copyright (c) 2025 Mikhail Krylov. See +// THIRD_PARTY_NOTICES.md at the repository root. // Doc is the abstract document type that combinators build. type Doc interface { diff --git a/ttcn3/lint/lint.go b/ttcn3/lint/lint.go index be8bd0e8..0de073bb 100644 --- a/ttcn3/lint/lint.go +++ b/ttcn3/lint/lint.go @@ -12,6 +12,11 @@ // isolation. // - A Linter that runs a configurable set of rules across a parsed Tree. // +// The Rule lifecycle (Register / Check / Exit) and the Reporter-based +// problem-emission API follow the design of Vanadium's lint Rule / +// Context (https://github.com/makekryl/vanadium) by Mikhail Krylov, +// BSD-3. See THIRD_PARTY_NOTICES.md at the repository root. +// // The package intentionally ships with only a small, opinionated set of // rules. Additional rules can be added by satisfying the Rule interface and // passing them to NewLinter. diff --git a/ttcn3/v2/syntax/nodes/doc.go b/ttcn3/v2/syntax/nodes/doc.go index ac54c6b2..ab1f6855 100644 --- a/ttcn3/v2/syntax/nodes/doc.go +++ b/ttcn3/v2/syntax/nodes/doc.go @@ -1,12 +1,16 @@ // Package nodes contains the schema-generated AST node types for the // next-generation TTCN-3 syntax tree. // -// The package is the Go equivalent of vanadium/src/ast: every node is -// defined once in nodes.yaml and the generator under gen/ turns the -// schema into struct types, kind constants, a Visitor interface and a -// children accessor. This keeps the tree definition in one place -// instead of spread across nodes.go, nodes_gen.go and a maze of switch -// statements. +// The package is the Go equivalent of Vanadium's src/ast: every node +// is defined once in nodes.yaml and the generator under gen/ turns +// the schema into struct types, kind constants, a Visitor interface +// and a children accessor. This keeps the tree definition in one +// place instead of spread across nodes.go, nodes_gen.go and a maze +// of switch statements. +// +// The schema-driven design and the layout of nodes.yaml are adopted +// from Vanadium (https://github.com/makekryl/vanadium) by Mikhail +// Krylov, BSD-3. See THIRD_PARTY_NOTICES.md at the repository root. // // To regenerate the file after editing nodes.yaml: //