Skip to content

Commit 5772ee4

Browse files
asgerfCopilot
andcommitted
YEAST: add NodeRef type, YeastDisplay trait, and source text storage
Introduce NodeRef as a typed wrapper around node arena IDs. Captures in desugaring rules are now bound as NodeRef instead of raw usize, which prevents accidental misuse and enables source-text-aware rendering. Add the YeastDisplay trait as an alternative to Display: its yeast_to_string method receives the Ast, allowing NodeRef to resolve to the captured node's source text instead of printing a numeric ID. Store the original source bytes in the Ast so that NodeContent::Range values (from synthesized literal nodes) can be resolved back to text. Update yeast-macros to emit NodeRef-typed capture bindings and use Into::<usize>::into where raw IDs are needed. The #{expr} template syntax now uses YeastDisplay instead of Display. The effect is visible in the corpus tests: operator nodes now correctly render as e.g. operator "+" instead of operator "3". Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 72b683d commit 5772ee4

7 files changed

Lines changed: 221 additions & 28 deletions

File tree

shared/tree-sitter-extractor/src/extractor/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ pub fn extract(
326326

327327
if let Some(yeast_runner) = yeast_runner {
328328
let ast = yeast_runner
329-
.run_from_tree(&tree)
329+
.run_from_tree(&tree, source)
330330
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
331331
traverse_yeast(&ast, &mut visitor);
332332
} else {

shared/yeast-macros/src/parse.rs

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
299299
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
300300
let group = expect_group(tokens, Delimiter::Brace)?;
301301
let expr = group.stream();
302-
Ok(quote! { #expr })
302+
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
303303
}
304304
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
305305
let group = expect_group(tokens, Delimiter::Parenthesis)?;
@@ -329,12 +329,17 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
329329
return Ok(quote! { #ctx.literal(#kind_str, #lit) });
330330
}
331331

332-
// Check for (kind #{expr}) — computed literal, expr converted via .to_string()
332+
// Check for (kind #{expr}) — computed literal, expr converted via YeastDisplay
333333
if peek_is_hash(tokens) {
334334
tokens.next(); // consume #
335335
let group = expect_group(tokens, Delimiter::Brace)?;
336336
let expr = group.stream();
337-
return Ok(quote! { #ctx.literal(#kind_str, &(#expr).to_string()) });
337+
return Ok(quote! {
338+
{
339+
let __value = yeast::YeastDisplay::yeast_to_string(&(#expr), &*#ctx.ast);
340+
#ctx.literal(#kind_str, &__value)
341+
}
342+
});
338343
}
339344

340345
// Check for (kind $fresh)
@@ -374,15 +379,19 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
374379
inner.next(); // consume first .
375380
inner.next(); // consume second .
376381
let expr: proc_macro2::TokenStream = inner.collect();
377-
stmts.push(quote! { let #temp: Vec<usize> = #expr; });
382+
stmts.push(quote! {
383+
let #temp: Vec<usize> = (#expr).into_iter()
384+
.map(::std::convert::Into::<usize>::into)
385+
.collect();
386+
});
378387
field_args.push(quote! { (#field_str, #temp) });
379388
continue;
380389
}
381390
}
382391
}
383392

384393
let value = parse_direct_node(tokens, ctx)?;
385-
stmts.push(quote! { let #temp = #value; });
394+
stmts.push(quote! { let #temp: usize = #value; });
386395
field_args.push(quote! { (#field_str, vec![#temp]) });
387396
}
388397

@@ -427,10 +436,16 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
427436
inner.next(); // consume first .
428437
inner.next(); // consume second .
429438
let expr: TokenStream = inner.collect();
430-
items.push(quote! { __nodes.extend(#expr); });
439+
items.push(quote! {
440+
__nodes.extend(
441+
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
442+
);
443+
});
431444
} else {
432445
let expr = group.stream();
433-
items.push(quote! { __nodes.push(#expr); });
446+
items.push(quote! {
447+
__nodes.push(::std::convert::Into::<usize>::into(#expr));
448+
});
434449
}
435450
continue;
436451
}
@@ -580,13 +595,24 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
580595
let name_str = &cap.name;
581596
match cap.multiplicity {
582597
CaptureMultiplicity::Repeated => {
583-
quote! { let #name: Vec<usize> = __captures.get_all(#name_str); }
598+
quote! {
599+
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
600+
.into_iter()
601+
.map(yeast::NodeRef)
602+
.collect();
603+
}
584604
}
585605
CaptureMultiplicity::Optional => {
586-
quote! { let #name: Option<usize> = __captures.get_opt(#name_str); }
606+
quote! {
607+
let #name: Option<yeast::NodeRef> =
608+
__captures.get_opt(#name_str).map(yeast::NodeRef);
609+
}
587610
}
588611
CaptureMultiplicity::Single => {
589-
quote! { let #name: usize = __captures.get_var(#name_str).unwrap(); }
612+
quote! {
613+
let #name: yeast::NodeRef =
614+
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
615+
}
590616
}
591617
}
592618
})
@@ -613,19 +639,26 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
613639
CaptureMultiplicity::Repeated => quote! {
614640
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
615641
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
616-
__fields.insert(__field_id, #name);
642+
__fields.insert(
643+
__field_id,
644+
#name.into_iter()
645+
.map(::std::convert::Into::<usize>::into)
646+
.collect(),
647+
);
617648
},
618649
CaptureMultiplicity::Optional => quote! {
619650
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
620651
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
621652
if let Some(__id) = #name {
622-
__fields.entry(__field_id).or_insert_with(Vec::new).push(__id);
653+
__fields.entry(__field_id).or_insert_with(Vec::new)
654+
.push(::std::convert::Into::<usize>::into(__id));
623655
}
624656
},
625657
CaptureMultiplicity::Single => quote! {
626658
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
627659
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
628-
__fields.entry(__field_id).or_insert_with(Vec::new).push(#name);
660+
__fields.entry(__field_id).or_insert_with(Vec::new)
661+
.push(::std::convert::Into::<usize>::into(#name));
629662
},
630663
}
631664
})

shared/yeast/src/lib.rs

Lines changed: 113 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,73 @@ pub use cursor::Cursor;
2323
use query::QueryNode;
2424

2525
/// Node ids are indexes into the arena
26-
type Id = usize;
26+
pub type Id = usize;
2727

2828
/// Field and Kind ids are provided by tree-sitter
2929
type FieldId = u16;
3030
type KindId = u16;
3131

32+
/// A typed reference to a node in an [`Ast`] arena. Wraps an [`Id`] but
33+
/// deliberately does not implement [`std::fmt::Display`]: rendering a node
34+
/// requires the [`Ast`] it lives in (to resolve [`NodeContent::Range`] back
35+
/// to source text). Use [`YeastDisplay::yeast_to_string`] to format it.
36+
#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
37+
pub struct NodeRef(pub Id);
38+
39+
impl NodeRef {
40+
pub fn id(self) -> Id {
41+
self.0
42+
}
43+
}
44+
45+
impl From<NodeRef> for Id {
46+
fn from(value: NodeRef) -> Self {
47+
value.0
48+
}
49+
}
50+
51+
/// Like [`std::fmt::Display`], but the formatting routine is given access to
52+
/// the [`Ast`] so that node references can resolve to their source text.
53+
///
54+
/// All standard primitive and string types implement [`YeastDisplay`] via
55+
/// the [`impl_yeast_display_via_display`] macro below. Coherence prevents a
56+
/// blanket `impl<T: Display>`, so additional types must be added explicitly.
57+
pub trait YeastDisplay {
58+
fn yeast_to_string(&self, ast: &Ast) -> String;
59+
}
60+
61+
impl YeastDisplay for NodeRef {
62+
fn yeast_to_string(&self, ast: &Ast) -> String {
63+
ast.source_text(self.0)
64+
}
65+
}
66+
67+
macro_rules! impl_yeast_display_via_display {
68+
($($t:ty),* $(,)?) => {
69+
$(
70+
impl YeastDisplay for $t {
71+
fn yeast_to_string(&self, _ast: &Ast) -> String {
72+
::std::string::ToString::to_string(self)
73+
}
74+
}
75+
)*
76+
};
77+
}
78+
79+
impl_yeast_display_via_display! {
80+
i8, i16, i32, i64, i128, isize,
81+
u8, u16, u32, u64, u128, usize,
82+
f32, f64,
83+
bool, char,
84+
str, String,
85+
}
86+
87+
impl<T: YeastDisplay + ?Sized> YeastDisplay for &T {
88+
fn yeast_to_string(&self, ast: &Ast) -> String {
89+
(**self).yeast_to_string(ast)
90+
}
91+
}
92+
3293
pub const CHILD_FIELD: u16 = u16::MAX;
3394

3495
#[derive(Debug)]
@@ -160,6 +221,9 @@ pub struct Ast {
160221
root: Id,
161222
nodes: Vec<Node>,
162223
schema: schema::Schema,
224+
/// Original source bytes the tree was parsed from. Used to resolve
225+
/// `NodeContent::Range` to text for synthesized literal nodes.
226+
source: Vec<u8>,
163227
}
164228

165229
impl std::fmt::Debug for Ast {
@@ -182,11 +246,41 @@ impl Ast {
182246
schema: schema::Schema,
183247
tree: &tree_sitter::Tree,
184248
language: &tree_sitter::Language,
249+
) -> Self {
250+
Self::from_tree_with_schema_and_source(schema, tree, language, Vec::new())
251+
}
252+
253+
pub fn from_tree_with_schema_and_source(
254+
schema: schema::Schema,
255+
tree: &tree_sitter::Tree,
256+
language: &tree_sitter::Language,
257+
source: Vec<u8>,
185258
) -> Self {
186259
let mut visitor = visitor::Visitor::new(language.clone());
187260
visitor.visit(tree);
188261

189-
visitor.build_with_schema(schema)
262+
let mut ast = visitor.build_with_schema(schema);
263+
ast.source = source;
264+
ast
265+
}
266+
267+
/// Returns the source text for `id`, resolving `NodeContent::Range`
268+
/// against the stored source bytes when available.
269+
pub fn source_text(&self, id: Id) -> String {
270+
let Some(node) = self.get_node(id) else { return String::new(); };
271+
match &node.content {
272+
NodeContent::Range(range) => {
273+
let start = range.start_byte;
274+
let end = range.end_byte;
275+
if end <= self.source.len() && start <= end {
276+
String::from_utf8_lossy(&self.source[start..end]).into_owned()
277+
} else {
278+
String::new()
279+
}
280+
}
281+
NodeContent::String(s) => s.to_string(),
282+
NodeContent::DynamicString(s) => s.clone(),
283+
}
190284
}
191285

192286
pub fn walk(&self) -> AstCursor {
@@ -894,8 +988,17 @@ impl<'a> Runner<'a> {
894988
})
895989
}
896990

897-
pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Result<Ast, String> {
898-
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), tree, &self.language);
991+
pub fn run_from_tree(
992+
&self,
993+
tree: &tree_sitter::Tree,
994+
source: &[u8],
995+
) -> Result<Ast, String> {
996+
let mut ast = Ast::from_tree_with_schema_and_source(
997+
self.schema.clone(),
998+
tree,
999+
&self.language,
1000+
source.to_vec(),
1001+
);
8991002
self.run_phases(&mut ast)?;
9001003
Ok(ast)
9011004
}
@@ -908,7 +1011,12 @@ impl<'a> Runner<'a> {
9081011
let tree = parser
9091012
.parse(input, None)
9101013
.ok_or_else(|| "Failed to parse input".to_string())?;
911-
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), &tree, &self.language);
1014+
let mut ast = Ast::from_tree_with_schema_and_source(
1015+
self.schema.clone(),
1016+
&tree,
1017+
&self.language,
1018+
input.as_bytes().to_vec(),
1019+
);
9121020
self.run_phases(&mut ast)?;
9131021
Ok(ast)
9141022
}

shared/yeast/src/visitor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ impl Visitor {
5252
root: 0,
5353
schema,
5454
nodes: self.nodes.into_iter().map(|n| n.inner).collect(),
55+
source: Vec::new(),
5556
}
5657
}
5758

shared/yeast/tests/test.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,3 +1060,54 @@ fn test_desugar_for_with_multiple_assignment() {
10601060
"#,
10611061
);
10621062
}
1063+
1064+
/// Regression test: `#{capture}` in a template must render the *source text*
1065+
/// of the captured node, not its arena `Id`. Previously, captures were bound
1066+
/// as `usize`, so `#{cap}` printed the integer id (e.g. `"3"`) via `Display`.
1067+
/// Captures are now bound as `NodeRef`, which has no `Display` impl and
1068+
/// resolves to the captured node's source text via `YeastDisplay`.
1069+
#[test]
1070+
fn test_hash_brace_renders_capture_source_text() {
1071+
let rule = rule!(
1072+
(call
1073+
method: (identifier) @name
1074+
receiver: (identifier) @recv
1075+
)
1076+
=>
1077+
(call
1078+
method: (identifier #{name})
1079+
receiver: (identifier #{recv})
1080+
arguments: (argument_list)
1081+
)
1082+
);
1083+
let dump = run_and_dump("foo.bar()", vec![rule]);
1084+
assert_dump_eq(
1085+
&dump,
1086+
r#"
1087+
program
1088+
call
1089+
arguments: argument_list "foo.bar()"
1090+
method: identifier "bar"
1091+
receiver: identifier "foo"
1092+
"#,
1093+
);
1094+
}
1095+
1096+
/// Regression test: non-`NodeRef` values in `#{expr}` still render via their
1097+
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
1098+
#[test]
1099+
fn test_hash_brace_renders_integer_expression() {
1100+
let rule = rule!(
1101+
(identifier) @_
1102+
=>
1103+
(identifier #{1 + 2})
1104+
);
1105+
let dump = run_and_dump("foo", vec![rule]);
1106+
assert_dump_eq(
1107+
&dump,
1108+
r#"
1109+
program
1110+
identifier "3"
1111+
"#,
1112+
);
1113+
}

unified/extractor/tests/corpus/swift/desugar.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ source_file
1717
top_level
1818
body:
1919
binary_expr
20-
operator: operator "3"
20+
operator: operator "+"
2121
left: unsupported_node "1"
2222
right: unsupported_node "2"
2323

@@ -40,6 +40,6 @@ source_file
4040
top_level
4141
body:
4242
binary_expr
43-
operator: operator "3"
43+
operator: operator "+"
4444
left: name_expr "foo"
4545
right: name_expr "bar"

0 commit comments

Comments
 (0)