Skip to content

Commit f006651

Browse files
refactor(web): detect hoverable symbols via Lezer highlight tags (#1194)
* refactor(web): detect hoverable symbols via Lezer highlight tags Replaces the hand-maintained NODE_TYPES list in symbolHoverTargetsExtension with a tag-based check that asks each node's highlight tag whether it descends from `tags.name`. This is the same vocabulary every CodeMirror grammar already uses for syntax highlighting, so identifier coverage now follows the grammar authors' classification instead of a guess-driven cross-product of node names. Side effects worth noting: - Picks up C/C++ TypeIdentifier nodes that the old list missed (return types, parameter types, base classes). - Picks up modifier-wrapped variants like `definition(variableName)` and `local(variableName)` automatically. - Adds C/C++ aliasing to the language filter in codeNav api.ts so search-based goto-definition resolves across both. Also adds vitest coverage exercising identifier detection in TypeScript+JSX, Python, Go, Rust, Java, C++, and PHP with positive and negative assertions (expected identifiers present, keywords/punctuation absent). Fixes SOU-1077 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * update CHANGELOG Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 05c749e commit f006651

4 files changed

Lines changed: 323 additions & 50 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2121

2222
### Changed
2323
- Reduced the log verbosity of the worker by changing various log messages from info to debug. [#1179](https://github.com/sourcebot-dev/sourcebot/pull/1179)
24+
- [EE] Switched symbol hover detection to use Lezer highlight tags, broadening identifier coverage. [#1194](https://github.com/sourcebot-dev/sourcebot/pull/1194)
2425

2526
## [4.17.1] - 2026-05-04
2627

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
import { describe, expect, test } from 'vitest';
2+
import { EditorState, Extension } from '@codemirror/state';
3+
import { javascript } from '@codemirror/lang-javascript';
4+
import { python } from '@codemirror/lang-python';
5+
import { go } from '@codemirror/lang-go';
6+
import { rust } from '@codemirror/lang-rust';
7+
import { java } from '@codemirror/lang-java';
8+
import { cpp } from '@codemirror/lang-cpp';
9+
import { php } from '@codemirror/lang-php';
10+
import { symbolHoverTargetsExtension } from './symbolHoverTargetsExtension';
11+
12+
const collectDecoratedSpans = (doc: string, language: Extension): { texts: string[]; ranges: Array<{ from: number; to: number; text: string }> } => {
13+
const state = EditorState.create({
14+
doc,
15+
extensions: [language, symbolHoverTargetsExtension],
16+
});
17+
const decorations = state.field(symbolHoverTargetsExtension);
18+
const ranges: Array<{ from: number; to: number; text: string }> = [];
19+
const iter = decorations.iter();
20+
while (iter.value) {
21+
ranges.push({ from: iter.from, to: iter.to, text: doc.slice(iter.from, iter.to) });
22+
iter.next();
23+
}
24+
return { texts: ranges.map(r => r.text), ranges };
25+
};
26+
27+
const expectAllDetected = (texts: string[], expected: string[]) => {
28+
const found = new Set(texts);
29+
const missing = expected.filter(name => !found.has(name));
30+
expect(missing, `Expected identifiers were not detected: ${missing.join(', ')}`).toEqual([]);
31+
};
32+
33+
const expectNoneDetected = (texts: string[], unexpected: string[]) => {
34+
const found = new Set(texts);
35+
const present = unexpected.filter(name => found.has(name));
36+
expect(present, `Unexpected identifiers were detected: ${present.join(', ')}`).toEqual([]);
37+
};
38+
39+
describe('symbolHoverTargetsExtension', () => {
40+
test('TypeScript: detects functions, classes, props, types, and JSX', () => {
41+
const doc = [
42+
'import { useState } from "react";',
43+
'',
44+
'interface UserProps {',
45+
' id: number;',
46+
' name: string;',
47+
'}',
48+
'',
49+
'class UserCard {',
50+
' private props: UserProps;',
51+
' getDisplayName(): string { return this.props.name; }',
52+
'}',
53+
'',
54+
'function renderCard(user: UserProps) {',
55+
' const [count, setCount] = useState(0);',
56+
' return <UserCard data-id={user.id}>{user.name}</UserCard>;',
57+
'}',
58+
].join('\n');
59+
60+
const { texts } = collectDecoratedSpans(doc, javascript({ jsx: true, typescript: true }));
61+
62+
expectAllDetected(texts, [
63+
'useState',
64+
'UserProps',
65+
'id',
66+
'name',
67+
'UserCard',
68+
'props',
69+
'getDisplayName',
70+
'renderCard',
71+
'user',
72+
'count',
73+
'setCount',
74+
]);
75+
expectNoneDetected(texts, ['import', 'from', 'interface', 'class', 'function', 'const', 'return']);
76+
});
77+
78+
test('Python: detects functions, classes, methods, and parameters', () => {
79+
const doc = [
80+
'from typing import List',
81+
'',
82+
'class Greeter:',
83+
' def __init__(self, name: str):',
84+
' self.name = name',
85+
'',
86+
' def greet(self, others: List[str]) -> str:',
87+
' return f"Hello {self.name} and {others}"',
88+
'',
89+
'def main():',
90+
' greeter = Greeter("World")',
91+
' print(greeter.greet(["a", "b"]))',
92+
].join('\n');
93+
94+
const { texts } = collectDecoratedSpans(doc, python());
95+
96+
expectAllDetected(texts, [
97+
'Greeter',
98+
'__init__',
99+
'self',
100+
'name',
101+
'greet',
102+
'others',
103+
'main',
104+
'greeter',
105+
'print',
106+
]);
107+
expectNoneDetected(texts, ['from', 'import', 'class', 'def', 'return']);
108+
});
109+
110+
test('Go: detects functions, types, fields, and method receivers', () => {
111+
const doc = [
112+
'package main',
113+
'',
114+
'import "fmt"',
115+
'',
116+
'type User struct {',
117+
' ID int',
118+
' Name string',
119+
'}',
120+
'',
121+
'func (u *User) DisplayName() string {',
122+
' return u.Name',
123+
'}',
124+
'',
125+
'func main() {',
126+
' user := User{ID: 1, Name: "Alice"}',
127+
' fmt.Println(user.DisplayName())',
128+
'}',
129+
].join('\n');
130+
131+
const { texts } = collectDecoratedSpans(doc, go());
132+
133+
expectAllDetected(texts, [
134+
'User',
135+
'ID',
136+
'Name',
137+
'DisplayName',
138+
'main',
139+
'user',
140+
'fmt',
141+
'Println',
142+
]);
143+
expectNoneDetected(texts, ['package', 'import', 'type', 'struct', 'func', 'return']);
144+
});
145+
146+
test('Rust: detects structs, traits, functions, and bound identifiers', () => {
147+
const doc = [
148+
'use std::fmt::Display;',
149+
'',
150+
'struct Point {',
151+
' x: i32,',
152+
' y: i32,',
153+
'}',
154+
'',
155+
'trait Drawable {',
156+
' fn draw(&self);',
157+
'}',
158+
'',
159+
'impl Drawable for Point {',
160+
' fn draw(&self) {',
161+
' let location = (self.x, self.y);',
162+
' println!("{:?}", location);',
163+
' }',
164+
'}',
165+
].join('\n');
166+
167+
const { texts } = collectDecoratedSpans(doc, rust());
168+
169+
expectAllDetected(texts, [
170+
'Point',
171+
'x',
172+
'y',
173+
'Drawable',
174+
'draw',
175+
'location',
176+
]);
177+
expectNoneDetected(texts, ['use', 'struct', 'trait', 'impl', 'fn', 'let']);
178+
});
179+
180+
test('Java: detects classes, methods, fields, and parameters', () => {
181+
const doc = [
182+
'package com.example;',
183+
'',
184+
'public class Calculator {',
185+
' private int total;',
186+
'',
187+
' public Calculator(int initial) {',
188+
' this.total = initial;',
189+
' }',
190+
'',
191+
' public int add(int value) {',
192+
' total = total + value;',
193+
' return total;',
194+
' }',
195+
'}',
196+
].join('\n');
197+
198+
const { texts } = collectDecoratedSpans(doc, java());
199+
200+
expectAllDetected(texts, [
201+
'Calculator',
202+
'total',
203+
'initial',
204+
'add',
205+
'value',
206+
]);
207+
expectNoneDetected(texts, ['package', 'public', 'class', 'private', 'return']);
208+
});
209+
210+
test('C++: detects functions, classes, namespaces, and fields', () => {
211+
const doc = [
212+
'#include <string>',
213+
'',
214+
'namespace geom {',
215+
' class Shape {',
216+
' public:',
217+
' Shape(int sides);',
218+
' int getSides() const;',
219+
' private:',
220+
' int sides_;',
221+
' };',
222+
'}',
223+
'',
224+
'int main() {',
225+
' geom::Shape triangle(3);',
226+
' return triangle.getSides();',
227+
'}',
228+
].join('\n');
229+
230+
const { texts } = collectDecoratedSpans(doc, cpp());
231+
232+
expectAllDetected(texts, [
233+
'geom',
234+
'Shape',
235+
'getSides',
236+
'main',
237+
'triangle',
238+
]);
239+
expectNoneDetected(texts, ['namespace', 'class', 'public', 'private', 'return', 'const']);
240+
});
241+
242+
test('PHP: detects classes, methods, properties, and variables', () => {
243+
const doc = [
244+
'<?php',
245+
'namespace App;',
246+
'',
247+
'class Greeter {',
248+
' private string $name;',
249+
'',
250+
' public function __construct(string $name) {',
251+
' $this->name = $name;',
252+
' }',
253+
'',
254+
' public function greet(): string {',
255+
' return "Hello " . $this->name;',
256+
' }',
257+
'}',
258+
].join('\n');
259+
260+
const { texts } = collectDecoratedSpans(doc, php());
261+
262+
expectAllDetected(texts, [
263+
'Greeter',
264+
'name',
265+
'__construct',
266+
'greet',
267+
]);
268+
expectNoneDetected(texts, ['namespace', 'class', 'private', 'public', 'function', 'return']);
269+
});
270+
271+
test('skips zero-width and non-identifier nodes', () => {
272+
const doc = 'const x = 42;';
273+
const { ranges } = collectDecoratedSpans(doc, javascript({ typescript: true }));
274+
275+
for (const range of ranges) {
276+
expect(range.to).toBeGreaterThan(range.from);
277+
}
278+
279+
expectAllDetected(ranges.map(r => r.text), ['x']);
280+
expectNoneDetected(ranges.map(r => r.text), ['42', 'const', '=']);
281+
});
282+
283+
test('returns empty decoration set for plain text without a language', () => {
284+
const state = EditorState.create({
285+
doc: 'just some plain prose with no grammar attached',
286+
extensions: [symbolHoverTargetsExtension],
287+
});
288+
const decorations = state.field(symbolHoverTargetsExtension);
289+
expect(decorations.size).toBe(0);
290+
});
291+
});

packages/web/src/ee/features/codeNav/components/symbolHoverPopup/symbolHoverTargetsExtension.ts

Lines changed: 13 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { StateField, Range } from "@codemirror/state";
22
import { Decoration, DecorationSet, EditorView } from "@codemirror/view";
33
import { ensureSyntaxTree } from "@codemirror/language";
4+
import { getStyleTags, tags as t } from "@lezer/highlight";
45
import { measureSync } from "@/lib/utils";
56

67
export const SYMBOL_HOVER_TARGET_DATA_ATTRIBUTE = "data-symbol-hover-target";
@@ -10,48 +11,6 @@ const decoration = Decoration.mark({
1011
attributes: { [SYMBOL_HOVER_TARGET_DATA_ATTRIBUTE]: "true" }
1112
});
1213

13-
const NODE_TYPES = [
14-
// Typescript + Python
15-
"VariableName",
16-
"VariableDefinition",
17-
"TypeDefinition",
18-
"TypeName",
19-
"PropertyName",
20-
"PropertyDefinition",
21-
"JSXIdentifier",
22-
"Identifier",
23-
// C#
24-
"VarName",
25-
"TypeIdentifier",
26-
"PropertyName",
27-
"MethodName",
28-
"Ident",
29-
"ParamName",
30-
"AttrsNamedArg",
31-
// C/C++
32-
"Identifier",
33-
"NamespaceIdentifier",
34-
"FieldIdentifier",
35-
// Objective-C
36-
"variableName",
37-
"variableName.definition",
38-
// Java
39-
"Definition",
40-
// Rust
41-
"BoundIdentifier",
42-
// Go
43-
"DefName",
44-
"FieldName",
45-
// PHP
46-
"ClassMemberName",
47-
"Name",
48-
// Tcl
49-
"ProcName",
50-
"ProcInvocation",
51-
"PackageName",
52-
"Variable"
53-
]
54-
5514
export const symbolHoverTargetsExtension = StateField.define<DecorationSet>({
5615
create(state) {
5716
// @note: we need to use `ensureSyntaxTree` here (as opposed to `syntaxTree`)
@@ -60,16 +19,20 @@ export const symbolHoverTargetsExtension = StateField.define<DecorationSet>({
6019
const { data: tree } = measureSync(() => ensureSyntaxTree(state, state.doc.length, Infinity), "ensureSyntaxTree");
6120
const decorations: Range<Decoration>[] = [];
6221

63-
// @note: useful for debugging
64-
// const getTextAt = (from: number, to: number) => {
65-
// const doc = state.doc;
66-
// return doc.sliceString(from, to);
67-
// }
68-
6922
tree?.iterate({
7023
enter: (node) => {
71-
// console.log(node.type.name, getTextAt(node.from, node.to));
72-
if (NODE_TYPES.includes(node.type.name) && node.from < node.to) {
24+
if (node.from >= node.to) {
25+
return;
26+
}
27+
const styleTags = getStyleTags(node);
28+
if (!styleTags) {
29+
return;
30+
}
31+
// `Tag.set` is a tag's parent chain. All identifier-shaped highlight tags
32+
// (variableName, typeName, propertyName, etc.) — including modifier-wrapped
33+
// forms like `definition(variableName)` — descend from `tags.name`.
34+
const isIdentifier = styleTags.tags.some(tag => tag.set.includes(t.name));
35+
if (isIdentifier) {
7336
decorations.push(decoration.range(node.from, node.to));
7437
}
7538
},

0 commit comments

Comments
 (0)