From e0fa6cf78509baee2db093fe9b0ed5a474ddc527 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 13:54:02 +0000 Subject: [PATCH 01/13] yeast: Reify the context and allow user-defined data in it Renames what was previously called `__yeast_ctx` into just `ctx`, and adds a new field `user_ctx` to this context. Said field can contain a struct of any user type (necessitating making various parts of the implementation generic in said type). Through some Deref magic, field accesses are delegated to the inner struct (assuming they are not already defined on `ctx`), which should hopefully make the interface a bit more ergonomic. --- shared/yeast-macros/src/parse.rs | 8 +- shared/yeast/src/bin/main.rs | 2 +- shared/yeast/src/build.rs | 48 ++++- shared/yeast/src/lib.rs | 181 ++++++++++++------ shared/yeast/tests/test.rs | 75 ++++---- .../extractor/src/languages/swift/swift.rs | 93 ++++----- .../tests/corpus/swift/variables.txt | 127 ++++++++++++ unified/extractor/tests/corpus_tests.rs | 2 +- 8 files changed, 380 insertions(+), 156 deletions(-) diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 4b27b9804392..594a59e1b5dc 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -296,10 +296,10 @@ fn parse_query_list(tokens: &mut Tokens) -> Result> { // tree! / trees! parsing — direct code generation against BuildCtx // --------------------------------------------------------------------------- -const IMPLICIT_CTX: &str = "__yeast_ctx"; +const IMPLICIT_CTX: &str = "ctx"; /// Determine the context identifier: either explicit `ctx,` or the implicit -/// `__yeast_ctx` from an enclosing `rule!`. +/// `ctx` from an enclosing `rule!`. fn parse_ctx_or_implicit(tokens: &mut Tokens) -> Ident { // Check if first token is an ident followed by a comma let mut lookahead = tokens.clone(); @@ -888,9 +888,9 @@ pub fn parse_rule_top(input: TokenStream) -> Result { Ok(quote! { { let __query = #query_code; - yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option| { + yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _| { #(#bindings)* - let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range); + let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range, __user_ctx); #transform_body })) } diff --git a/shared/yeast/src/bin/main.rs b/shared/yeast/src/bin/main.rs index 975c8e8b25f5..978be21cc003 100644 --- a/shared/yeast/src/bin/main.rs +++ b/shared/yeast/src/bin/main.rs @@ -20,7 +20,7 @@ fn main() { let args = Cli::parse(); let language = get_language(&args.language); let source = std::fs::read_to_string(&args.file).unwrap(); - let runner = yeast::Runner::new(language, &[]); + let runner: yeast::Runner = yeast::Runner::new(language, &[]); let ast = runner.run(&source).unwrap(); println!("{}", ast.print(&source, ast.get_root())); } diff --git a/shared/yeast/src/build.rs b/shared/yeast/src/build.rs index d0f1394ca6d9..6c8b392fb8a7 100644 --- a/shared/yeast/src/build.rs +++ b/shared/yeast/src/build.rs @@ -7,23 +7,46 @@ use crate::{Ast, FieldId, Id, NodeContent}; /// Context for building new AST nodes during a transformation. /// /// Used by the `tree!` and `trees!` macros. Holds a mutable reference to the -/// AST, a reference to the captures from a query match, and a `FreshScope` for -/// generating unique identifiers. -pub struct BuildCtx<'a> { +/// AST, a reference to the captures from a query match, a `FreshScope` for +/// generating unique identifiers, and a mutable reference to a user-defined +/// context of type `C`. +/// +/// The user context `C` is shared across rules via the framework's driver: +/// outer rules can write to it before recursive translation, and inner rules +/// can read (or further mutate) it during their transforms. The framework +/// snapshots and restores the user context around each rule application, so +/// mutations made by a rule are visible to its descendants (via recursive +/// translation) but not to its parent's siblings. +/// +/// `BuildCtx` implements [`Deref`] and [`DerefMut`] targeting `C`, so user +/// context fields are accessible as `ctx.my_field` directly (provided they +/// don't collide with `BuildCtx`'s own fields like `ast`, `captures`, etc.). +/// +/// The default `C = ()` means rules that don't need any user context don't +/// pay any cost. +pub struct BuildCtx<'a, C: 'a = ()> { pub ast: &'a mut Ast, pub captures: &'a Captures, pub fresh: &'a FreshScope, /// Source range of the matched node, inherited by synthetic nodes. pub source_range: Option, + /// User-supplied context, accessible directly via `ctx.field` (via Deref). + pub user_ctx: &'a mut C, } -impl<'a> BuildCtx<'a> { - pub fn new(ast: &'a mut Ast, captures: &'a Captures, fresh: &'a FreshScope) -> Self { +impl<'a, C> BuildCtx<'a, C> { + pub fn new( + ast: &'a mut Ast, + captures: &'a Captures, + fresh: &'a FreshScope, + user_ctx: &'a mut C, + ) -> Self { Self { ast, captures, fresh, source_range: None, + user_ctx, } } @@ -32,12 +55,14 @@ impl<'a> BuildCtx<'a> { captures: &'a Captures, fresh: &'a FreshScope, source_range: Option, + user_ctx: &'a mut C, ) -> Self { Self { ast, captures, fresh, source_range, + user_ctx, } } @@ -113,3 +138,16 @@ impl<'a> BuildCtx<'a> { self.ast.prepend_field_child(node_id, field_id, value_id); } } + +impl std::ops::Deref for BuildCtx<'_, C> { + type Target = C; + fn deref(&self) -> &C { + &*self.user_ctx + } +} + +impl std::ops::DerefMut for BuildCtx<'_, C> { + fn deref_mut(&mut self) -> &mut C { + &mut *self.user_ctx + } +} diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 9c3a4ad41141..d93a72221a9a 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -701,17 +701,24 @@ impl From for NodeContent { } /// The transform function for a rule: takes the AST, captured variables, a -/// fresh-name scope, and the source range of the matched node, and returns -/// the IDs of the replacement nodes. -pub type Transform = Box< - dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option) -> Vec +/// fresh-name scope, the source range of the matched node, and a mutable +/// reference to the user context of type `C`. Returns the IDs of the +/// replacement nodes. +pub type Transform = Box< + dyn Fn( + &mut Ast, + Captures, + &tree_builder::FreshScope, + Option, + &mut C, + ) -> Vec + Send + Sync, >; -pub struct Rule { +pub struct Rule { query: QueryNode, - transform: Transform, + transform: Transform, /// If true, after this rule fires on a node the engine will try to /// re-apply this same rule on the result root. Defaults to false: /// each rule fires at most once on a given node, which prevents @@ -719,8 +726,8 @@ pub struct Rule { repeated: bool, } -impl Rule { - pub fn new(query: QueryNode, transform: Transform) -> Self { +impl Rule { + pub fn new(query: QueryNode, transform: Transform) -> Self { Self { query, transform, @@ -742,9 +749,10 @@ impl Rule { ast: &mut Ast, node: Id, fresh: &tree_builder::FreshScope, + user_ctx: &mut C, ) -> Result>, String> { match self.try_match(ast, node)? { - Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))), + Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh, user_ctx))), None => Ok(None), } } @@ -768,29 +776,30 @@ impl Rule { captures: Captures, node: Id, fresh: &tree_builder::FreshScope, + user_ctx: &mut C, ) -> Vec { fresh.next_scope(); let source_range = ast.get_node(node).and_then(|n| match n.content { NodeContent::Range(r) => Some(r), _ => n.source_range, }); - (self.transform)(ast, captures, fresh, source_range) + (self.transform)(ast, captures, fresh, source_range, user_ctx) } } const MAX_REWRITE_DEPTH: usize = 100; /// Index of rules by their root query kind for fast lookup. -struct RuleIndex<'a> { +struct RuleIndex<'a, C> { /// Rules indexed by root node kind name. - by_kind: BTreeMap<&'static str, Vec<&'a Rule>>, + by_kind: BTreeMap<&'static str, Vec<&'a Rule>>, /// Rules with wildcard queries (Any) that apply to all nodes. - wildcard: Vec<&'a Rule>, + wildcard: Vec<&'a Rule>, } -impl<'a> RuleIndex<'a> { - fn new(rules: &'a [Rule]) -> Self { - let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new(); +impl<'a, C> RuleIndex<'a, C> { + fn new(rules: &'a [Rule]) -> Self { + let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new(); let mut wildcard = Vec::new(); for rule in rules { match rule.query.root_kind() { @@ -801,7 +810,7 @@ impl<'a> RuleIndex<'a> { Self { by_kind, wildcard } } - fn rules_for_kind(&self, kind: &str) -> impl Iterator { + fn rules_for_kind(&self, kind: &str) -> impl Iterator> { self.by_kind .get(kind) .into_iter() @@ -810,23 +819,25 @@ impl<'a> RuleIndex<'a> { } } -fn apply_repeating_rules( - rules: &[Rule], +fn apply_repeating_rules( + rules: &[Rule], ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, ) -> Result, String> { let index = RuleIndex::new(rules); - apply_repeating_rules_inner(&index, ast, id, fresh, 0, None) + apply_repeating_rules_inner(&index, ast, user_ctx, id, fresh, 0, None) } -fn apply_repeating_rules_inner( - index: &RuleIndex, +fn apply_repeating_rules_inner( + index: &RuleIndex, ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, rewrite_depth: usize, - skip_rule: Option<*const Rule>, + skip_rule: Option<*const Rule>, ) -> Result, String> { if rewrite_depth > MAX_REWRITE_DEPTH { return Err(format!( @@ -837,11 +848,16 @@ fn apply_repeating_rules_inner( let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or(""); for rule in index.rules_for_kind(node_kind) { - let rule_ptr = *rule as *const Rule; + let rule_ptr = *rule as *const Rule; if Some(rule_ptr) == skip_rule { continue; } - if let Some(result_node) = rule.try_rule(ast, id, fresh)? { + // Snapshot the user context before invoking the rule so that any + // mutations the rule makes are visible during recursive translation + // of its result, but not leaked to the parent's siblings. + let snapshot = user_ctx.clone(); + let try_result = rule.try_rule(ast, id, fresh, user_ctx)?; + if let Some(result_node) = try_result { // For non-repeated rules, suppress further application of *this* // rule on the result root, so a rule whose output matches its own // query doesn't loop. Other rules and child traversal are @@ -852,14 +868,19 @@ fn apply_repeating_rules_inner( results.extend(apply_repeating_rules_inner( index, ast, + user_ctx, node, fresh, rewrite_depth + 1, next_skip, )?); } + *user_ctx = snapshot; return Ok(results); } + // Rule didn't match; restore any speculative changes (none expected + // since try_rule only mutates on match, but be defensive). + *user_ctx = snapshot; } // Take the parent's fields by ownership: the recursion will rewrite @@ -874,7 +895,7 @@ fn apply_repeating_rules_inner( for children in fields.values_mut() { let mut new_children: Option> = None; for (i, &child_id) in children.iter().enumerate() { - let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?; + let result = apply_repeating_rules_inner(index, ast, user_ctx, child_id, fresh, rewrite_depth, None)?; let unchanged = result.len() == 1 && result[0] == child_id; match (&mut new_children, unchanged) { (None, true) => {} // unchanged so far, no allocation needed @@ -903,19 +924,21 @@ fn apply_repeating_rules_inner( /// each visited node, recursion proceeds only through captured nodes (not /// through the input node's children directly), and an error is returned if /// no rule matches a visited node. -fn apply_one_shot_rules( - rules: &[Rule], +fn apply_one_shot_rules( + rules: &[Rule], ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, ) -> Result, String> { let index = RuleIndex::new(rules); - apply_one_shot_rules_inner(&index, ast, id, fresh, 0) + apply_one_shot_rules_inner(&index, ast, user_ctx, id, fresh, 0) } -fn apply_one_shot_rules_inner( - index: &RuleIndex, +fn apply_one_shot_rules_inner( + index: &RuleIndex, ast: &mut Ast, + user_ctx: &mut C, id: Id, fresh: &tree_builder::FreshScope, rewrite_depth: usize, @@ -932,6 +955,11 @@ fn apply_one_shot_rules_inner( for rule in index.rules_for_kind(node_kind) { if let Some(mut captures) = rule.try_match(ast, id)? { + // Snapshot the user context before invoking the rule so that any + // mutations the rule (or its transitively-translated captures) + // make are visible during this rule's transform, but not leaked + // to the parent's siblings. + let snapshot = user_ctx.clone(); // Recursively translate every captured node before invoking the // transform. The transform's output uses output-schema kinds, so // we must translate captured input-schema nodes to their @@ -944,9 +972,11 @@ fn apply_one_shot_rules_inner( if captured_id == id { return Ok(vec![captured_id]); } - apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1) + apply_one_shot_rules_inner(index, ast, user_ctx, captured_id, fresh, rewrite_depth + 1) })?; - return Ok(rule.run_transform(ast, captures, id, fresh)); + let result = rule.run_transform(ast, captures, id, fresh, user_ctx); + *user_ctx = snapshot; + return Ok(result); } } @@ -974,15 +1004,15 @@ pub enum PhaseKind { /// starts. Rules within a phase compete for matches as usual; rules in /// different phases never compete because each traversal only considers the /// current phase's rules. -pub struct Phase { +pub struct Phase { /// Name used in error messages. pub name: String, - pub rules: Vec, + pub rules: Vec>, pub kind: PhaseKind, } -impl Phase { - pub fn new(name: impl Into, kind: PhaseKind, rules: Vec) -> Self { +impl Phase { + pub fn new(name: impl Into, kind: PhaseKind, rules: Vec>) -> Self { Self { name: name.into(), rules, @@ -1008,17 +1038,30 @@ impl Phase { /// .add_phase("desugar", PhaseKind::Repeating, desugar_rules) /// .with_output_node_types_yaml(yaml); /// ``` -#[derive(Default)] -pub struct DesugaringConfig { +/// +/// The optional type parameter `C` is the user context type threaded through +/// rule transforms. Defaults to `()` (no user context). +pub struct DesugaringConfig { /// Phases of rule application, applied in order. - pub phases: Vec, + pub phases: Vec>, /// Output node-types in YAML format. If `None`, the input grammar's /// node types are used (i.e. the desugared AST has the same node types /// as the tree-sitter grammar). pub output_node_types_yaml: Option<&'static str>, } -impl DesugaringConfig { +// Manual `Default` impl so users with a custom `C` that doesn't implement +// `Default` can still construct an empty config. +impl Default for DesugaringConfig { + fn default() -> Self { + Self { + phases: Vec::new(), + output_node_types_yaml: None, + } + } +} + +impl DesugaringConfig { /// Create an empty configuration. Add phases via [`add_phase`] and an /// optional output schema via [`with_output_node_types_yaml`]. pub fn new() -> Self { @@ -1030,7 +1073,7 @@ impl DesugaringConfig { mut self, name: impl Into, kind: PhaseKind, - rules: Vec, + rules: Vec>, ) -> Self { self.phases.push(Phase::new(name, kind, rules)); self @@ -1052,15 +1095,15 @@ impl DesugaringConfig { } } -pub struct Runner<'a> { +pub struct Runner<'a, C = ()> { language: tree_sitter::Language, schema: schema::Schema, - phases: &'a [Phase], + phases: &'a [Phase], } -impl<'a> Runner<'a> { +impl<'a, C> Runner<'a, C> { /// Create a runner using the input grammar's schema for output. - pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self { + pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self { let schema = schema::Schema::from_language(&language); Self { language, @@ -1073,7 +1116,7 @@ impl<'a> Runner<'a> { pub fn with_schema( language: tree_sitter::Language, schema: &schema::Schema, - phases: &'a [Phase], + phases: &'a [Phase], ) -> Self { Self { language, @@ -1085,7 +1128,7 @@ impl<'a> Runner<'a> { /// Create a runner from a [`DesugaringConfig`]. pub fn from_config( language: tree_sitter::Language, - config: &'a DesugaringConfig, + config: &'a DesugaringConfig, ) -> Result { let schema = config.build_schema(&language)?; Ok(Self { @@ -1094,11 +1137,17 @@ impl<'a> Runner<'a> { phases: &config.phases, }) } +} - pub fn run_from_tree( +impl<'a, C: Clone> Runner<'a, C> { + /// Parse `tree` against `source` and run all phases, threading + /// `user_ctx` through every rule transform. The caller owns the + /// initial context state. + pub fn run_from_tree_with_ctx( &self, tree: &tree_sitter::Tree, source: &[u8], + user_ctx: &mut C, ) -> Result { let mut ast = Ast::from_tree_with_schema_and_source( self.schema.clone(), @@ -1106,11 +1155,13 @@ impl<'a> Runner<'a> { &self.language, source.to_vec(), ); - self.run_phases(&mut ast)?; + self.run_phases(&mut ast, user_ctx)?; Ok(ast) } - pub fn run(&self, input: &str) -> Result { + /// Parse `input` and run all phases, threading `user_ctx` through + /// every rule transform. The caller owns the initial context state. + pub fn run_with_ctx(&self, input: &str, user_ctx: &mut C) -> Result { let mut parser = tree_sitter::Parser::new(); parser .set_language(&self.language) @@ -1124,20 +1175,20 @@ impl<'a> Runner<'a> { &self.language, input.as_bytes().to_vec(), ); - self.run_phases(&mut ast)?; + self.run_phases(&mut ast, user_ctx)?; Ok(ast) } /// Apply each phase in turn to the AST, threading the root through. /// A single `FreshScope` is shared across phases so that fresh /// identifiers generated in different phases don't collide. - fn run_phases(&self, ast: &mut Ast) -> Result<(), String> { + fn run_phases(&self, ast: &mut Ast, user_ctx: &mut C) -> Result<(), String> { let fresh = tree_builder::FreshScope::new(); let mut root = ast.get_root(); for phase in self.phases { let res = match phase.kind { - PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh), - PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh), + PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh), + PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh), } .map_err(|e| format!("Phase `{}`: {e}", phase.name))?; if res.len() != 1 { @@ -1153,3 +1204,23 @@ impl<'a> Runner<'a> { Ok(()) } } + +impl<'a, C: Clone + Default> Runner<'a, C> { + /// Parse `tree` against `source` and run all phases, using the + /// default context (`C::default()`) as the initial context state. + pub fn run_from_tree( + &self, + tree: &tree_sitter::Tree, + source: &[u8], + ) -> Result { + let mut user_ctx = C::default(); + self.run_from_tree_with_ctx(tree, source, &mut user_ctx) + } + + /// Parse `input` and run all phases, using the default context + /// (`C::default()`) as the initial context state. + pub fn run(&self, input: &str) -> Result { + let mut user_ctx = C::default(); + self.run_with_ctx(input, &mut user_ctx) + } +} diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 069132d09237..308c72b725fd 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -7,7 +7,7 @@ const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml"); /// Helper: parse Ruby source with no rules, return dump. fn parse_and_dump(input: &str) -> String { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run(input).unwrap(); dump_ast(&ast, ast.get_root(), input) } @@ -24,7 +24,7 @@ fn run_and_ast(input: &str, rules: Vec) -> Ast { let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); runner.run(input).unwrap() } @@ -34,7 +34,7 @@ fn run_phased_and_dump(input: &str, phases: Vec) -> String { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let ast = runner.run(input).unwrap(); dump_ast(&ast, ast.get_root(), input) } @@ -46,7 +46,7 @@ fn run_and_get_error(input: &str, rules: Vec) -> String { let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); runner .run(input) .expect_err("expected runner to return an error") @@ -54,7 +54,7 @@ fn run_and_get_error(input: &str, rules: Vec) -> String { /// Helper: parse Ruby source with no rules and dump with schema type errors. fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run(input).unwrap(); let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap(); dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema) @@ -64,7 +64,7 @@ fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String { /// building schema with language IDs so field checks align with parser fields. fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); - let runner = Runner::new(lang.clone(), &[]); + let runner: Runner = Runner::new(lang.clone(), &[]); let ast = runner.run(input).unwrap(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang) .unwrap(); @@ -76,7 +76,7 @@ fn run_and_dump_typed(input: &str, rules: Vec, schema_yaml: &str) -> Strin let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap(); let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let ast = runner.run(input).unwrap(); dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema) } @@ -194,7 +194,7 @@ named: // This rewrite runs and preserves the RHS node kind via capture. // With schema above, preserving `integer` should be reported inline. - let rules = vec![yeast::rule!( + let rules: Vec = vec![yeast::rule!( (assignment left: (_) @left right: (_) @right) => (assignment @@ -247,7 +247,7 @@ named: #[test] fn test_query_match() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -268,7 +268,7 @@ fn test_query_match() { #[test] fn test_query_no_match() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -293,7 +293,7 @@ fn test_query_skips_extras_in_positional_match() { // captured comment to nothing (a common idiom, e.g. // `(comment) => ()` in Swift) leaves the capture's match-list empty // and causes the transform to fail with "Variable X has 0 matches". - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("[1, # comment\n2]").unwrap(); // Navigate to the `array` node: program -> array. @@ -327,12 +327,12 @@ fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang) .unwrap(); - let phases = vec![Phase::new( + let phases: Vec = vec![Phase::new( "test", PhaseKind::Repeating, vec![yeast::rule!((integer) => (identifier "replaced"))], )]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -350,7 +350,7 @@ fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() { #[test] fn test_query_repeated_capture() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x, y, z = 1").unwrap(); let query = yeast::query!( @@ -375,7 +375,7 @@ fn test_query_repeated_capture() { #[test] fn test_capture_unnamed_node_parenthesized() { // `("=") @op` captures the unnamed `=` token between left and right. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -403,7 +403,7 @@ fn test_capture_unnamed_node_parenthesized() { fn test_capture_bare_underscore_repeated() { // `_` matches named and unnamed nodes in bare-child position. On this // assignment shape, bare children correspond to unnamed tokens (the `=`). - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!((assignment _* @all)); @@ -425,7 +425,7 @@ fn test_capture_bare_underscore_repeated() { #[test] fn test_capture_unnamed_node_bare_literal() { // `"=" @op` (without surrounding parens) is the same as `("=") @op`. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let query = yeast::query!( @@ -454,7 +454,7 @@ fn test_bare_underscore_matches_unnamed() { // Bare `_` matches any node, including unnamed tokens, while `(_)` // matches only named nodes. Demonstrate by matching the unnamed `=` // token in the implicit `child` field of an `assignment`. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -493,7 +493,7 @@ fn test_bare_forms_in_field_position() { // field's value, not just in the bare-children position. This is // syntactic sugar for `(_)` / `("…")` and goes through the same // code paths. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -532,7 +532,7 @@ fn test_forward_scan_finds_unnamed_token_late() { // query for `("end")` skip past the first two and match the third. // Without forward-scan, the matcher took the first child unconditionally // and failed. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("for x in list do\n y\nend").unwrap(); // Navigate: program > for > do (the body wrapper). @@ -559,7 +559,7 @@ fn test_forward_scan_preserves_order() { // order. A query for ("end") then ("do") should fail because `do` // appears before `end` in the source order; once forward-scan has // consumed `end`, the iterator is exhausted. - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("for x in list do\n y\nend").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -580,7 +580,7 @@ fn test_forward_scan_preserves_order() { #[test] fn test_tree_builder() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let mut ast = runner.run("x = 1").unwrap(); let input = "x = 1"; @@ -598,7 +598,8 @@ fn test_tree_builder() { // Swap left and right let fresh = yeast::tree_builder::FreshScope::new(); - let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh); + let mut user_ctx = (); + let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh, &mut user_ctx); let new_id = yeast::tree!(ctx, (program child: (assignment @@ -626,7 +627,7 @@ fn test_tree_builder() { // tree-sitter-ruby grammar with named fields for nodes that only have // unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter). fn ruby_rules() -> Vec { - let assign_rule = yeast::rule!( + let assign_rule: Rule = yeast::rule!( (assignment left: (left_assignment_list (identifier)* @left @@ -651,7 +652,7 @@ fn ruby_rules() -> Vec { )} ); - let for_rule = yeast::rule!( + let for_rule: Rule = yeast::rule!( (for pattern: (_) @pat value: (in (_) @val) @@ -733,7 +734,7 @@ fn test_desugar_for_loop() { #[test] fn test_shorthand_rule() { - let rule = yeast::rule!( + let rule: Rule = yeast::rule!( (assignment left: (_) @method right: (_) @receiver @@ -885,7 +886,7 @@ fn test_phase_error_includes_phase_name() { PhaseKind::Repeating, vec![swap_assignment_rule().repeated()], )]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let err = runner .run("x = 1") .expect_err("expected runner to return an error"); @@ -928,7 +929,7 @@ fn test_one_shot_phase() { PhaseKind::OneShot, one_shot_xeq1_rules(), )]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -954,7 +955,7 @@ fn test_one_shot_phase_errors_when_no_rule_matches() { let mut rules = one_shot_xeq1_rules(); rules.pop(); let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let err = runner .run("x = 1") @@ -978,7 +979,7 @@ fn test_one_shot_recurses_into_returned_capture() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); - let rules = vec![ + let rules: Vec = vec![ yeast::rule!( (program (_)* @stmts) => @@ -994,7 +995,7 @@ fn test_one_shot_recurses_into_returned_capture() { yeast::rule!((integer) => (integer "INT")), ]; let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -1020,7 +1021,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); - let rules = vec![ + let rules: Vec = vec![ yeast::rule!( (program (_)* @stmts) => @@ -1041,7 +1042,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { yeast::rule!((integer) => (integer "INT")), ]; let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; - let runner = Runner::with_schema(lang, &schema, &phases); + let runner: Runner = Runner::with_schema(lang, &schema, &phases); let input = "x = 1"; let ast = runner.run(input).unwrap(); @@ -1065,7 +1066,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { #[test] fn test_cursor_navigation() { - let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); let ast = runner.run("x = 1").unwrap(); let mut cursor = AstCursor::new(&ast); @@ -1139,7 +1140,7 @@ fn test_desugar_for_with_multiple_assignment() { /// resolves to the captured node's source text via `YeastDisplay`. #[test] fn test_hash_brace_renders_capture_source_text() { - let rule = rule!( + let rule: Rule = rule!( (call method: (identifier) @name receiver: (identifier) @recv @@ -1168,7 +1169,7 @@ fn test_hash_brace_renders_capture_source_text() { /// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives). #[test] fn test_hash_brace_renders_integer_expression() { - let rule = rule!( + let rule: Rule = rule!( (identifier) @_ => (identifier #{1 + 2}) @@ -1187,7 +1188,7 @@ fn test_hash_brace_renders_integer_expression() { /// source location, not the full source range of the matched rule root. #[test] fn test_hash_brace_uses_capture_location_for_leaf() { - let rule = rule!( + let rule: Rule = rule!( (call method: (identifier) @name receiver: (identifier) @recv diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 79f0e65b02f5..2c786810e49c 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,5 +1,5 @@ use codeql_extractor::extractor::simple; -use yeast::{rule, DesugaringConfig, PhaseKind}; +use yeast::{rule, tree, DesugaringConfig, PhaseKind}; fn translation_rules() -> Vec { vec![ @@ -99,17 +99,15 @@ fn translation_rules() -> Vec { computed_value: (computed_property accessor: _+ @accessors)) => {..{ - let name_text = __yeast_ctx.ast.source_text(pattern.into()); - let ty_ids: Vec = ty.iter().map(|&t| t.into()).collect(); - let acc_ids: Vec = accessors.iter().map(|&a| a.into()).collect(); - for &acc_id in &acc_ids { - let ident = __yeast_ctx.literal("identifier", &name_text); - __yeast_ctx.prepend_field(acc_id, "name", ident); - for &ty_id in ty_ids.iter().rev() { - __yeast_ctx.prepend_field(acc_id, "type", ty_id); + for &acc in &accessors { + let acc_id: usize = acc.into(); + for &t in ty.iter().rev() { + ctx.prepend_field(acc_id, "type", t.into()); } + let name_id = tree!((identifier #{pattern})); + ctx.prepend_field(acc_id, "name", name_id); } - acc_ids + accessors }} ), // Computed property: shorthand getter (no explicit get/set, just statements) → @@ -137,30 +135,19 @@ fn translation_rules() -> Vec { value: _? @val observers: (willset_didset_block willset: _? @ws didset: _? @ds)) => + (variable_declaration + pattern: (name_pattern identifier: (identifier #{name})) + type: {..ty} + value: {..val}) {..{ - let name_text = __yeast_ctx.ast.source_text(name.into()); - let val_ids: Vec = val.iter().map(|&v| v.into()).collect(); - let ty_ids: Vec = ty.iter().map(|&t| t.into()).collect(); - let mut obs_ids: Vec = Vec::new(); - obs_ids.extend(ws.iter().map(|&o| { let id: usize = o.into(); id })); - obs_ids.extend(ds.iter().map(|&o| { let id: usize = o.into(); id })); - let ident_for_var = __yeast_ctx.literal("identifier", &name_text); - let pat = __yeast_ctx.node("name_pattern", vec![("identifier", vec![ident_for_var])]); - let mut var_fields: Vec<(&str, Vec)> = vec![("pattern", vec![pat])]; - if !ty_ids.is_empty() { - var_fields.push(("type", ty_ids)); - } - if !val_ids.is_empty() { - var_fields.push(("value", val_ids)); - } - let var_id = __yeast_ctx.node("variable_declaration", var_fields); - let mut result = vec![var_id]; - for obs_id in obs_ids { - let ident = __yeast_ctx.literal("identifier", &name_text); - __yeast_ctx.prepend_field(obs_id, "name", ident); - result.push(obs_id); + let mut obs_ids = Vec::new(); + for &obs in ws.iter().chain(ds.iter()) { + let obs_id: usize = obs.into(); + let ident = tree!((identifier #{name})); + ctx.prepend_field(obs_id, "name", ident); + obs_ids.push(obs_id); } - result + obs_ids }} ), // property_binding with any pattern name (identifier or destructuring) @@ -186,19 +173,19 @@ fn translation_rules() -> Vec { (modifiers)* @mods) => {..{ - let binding_text = __yeast_ctx.ast.source_text(binding_kind.into()); + let binding_text = ctx.ast.source_text(binding_kind.into()); let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); let decl_ids: Vec = decls.iter().map(|&d| d.into()).collect(); for (i, &decl_id) in decl_ids.iter().enumerate() { if i > 0 { - let chained = __yeast_ctx.literal("modifier", "chained_declaration"); - __yeast_ctx.prepend_field(decl_id, "modifier", chained); + let chained = ctx.literal("modifier", "chained_declaration"); + ctx.prepend_field(decl_id, "modifier", chained); } for &mod_id in mod_ids.iter().rev() { - __yeast_ctx.prepend_field(decl_id, "modifier", mod_id); + ctx.prepend_field(decl_id, "modifier", mod_id); } - let binding_mod = __yeast_ctx.literal("modifier", &binding_text); - __yeast_ctx.prepend_field(decl_id, "modifier", binding_mod); + let binding_mod = ctx.literal("modifier", &binding_text); + ctx.prepend_field(decl_id, "modifier", binding_mod); } decl_ids }} @@ -256,11 +243,11 @@ fn translation_rules() -> Vec { let case_ids: Vec = cases.iter().map(|&c| c.into()).collect(); for (i, &case_id) in case_ids.iter().enumerate() { if i > 0 { - let chained = __yeast_ctx.literal("modifier", "chained_declaration"); - __yeast_ctx.prepend_field(case_id, "modifier", chained); + let chained = ctx.literal("modifier", "chained_declaration"); + ctx.prepend_field(case_id, "modifier", chained); } for &mod_id in mod_ids.iter().rev() { - __yeast_ctx.prepend_field(case_id, "modifier", mod_id); + ctx.prepend_field(case_id, "modifier", mod_id); } } case_ids @@ -343,7 +330,7 @@ fn translation_rules() -> Vec { {..{ let p_id: usize = p.into(); for &d in def.iter().rev() { - __yeast_ctx.prepend_field(p_id, "default", d.into()); + ctx.prepend_field(p_id, "default", d.into()); } vec![p_id] }} @@ -585,9 +572,9 @@ fn translation_rules() -> Vec { ), // Labeled statement (e.g. `outer: for ...`). Strip the trailing ':' from the label token. rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {..{ - let text = __yeast_ctx.ast.source_text(lbl.into()); - let name = __yeast_ctx.literal("identifier", &text[..text.len() - 1]); - vec![__yeast_ctx.node("labeled_stmt", vec![("label", vec![name]), ("stmt", vec![stmt.into()])])] + let text = ctx.ast.source_text(lbl.into()); + let name = ctx.literal("identifier", &text[..text.len() - 1]); + vec![ctx.node("labeled_stmt", vec![("label", vec![name]), ("stmt", vec![stmt.into()])])] }}), // ---- Collections ---- // Array literal @@ -602,7 +589,7 @@ fn translation_rules() -> Vec { keys.iter().zip(vals.iter()).map(|(&k, &v)| { let k_id: usize = k.into(); let v_id: usize = v.into(); - __yeast_ctx.node("key_value_pair", vec![ + ctx.node("key_value_pair", vec![ ("key", vec![k_id]), ("value", vec![v_id]), ]) @@ -885,23 +872,23 @@ fn translation_rules() -> Vec { (modifiers)* @mods) => {..{ - let name_text = __yeast_ctx.ast.source_text(pattern.into()); + let name_text = ctx.ast.source_text(pattern.into()); let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); let ty_ids: Vec = ty.iter().map(|&t| t.into()).collect(); let acc_ids: Vec = accessors.iter().map(|&a| a.into()).collect(); for (i, &acc_id) in acc_ids.iter().enumerate() { if i > 0 { - let chained = __yeast_ctx.literal("modifier", "chained_declaration"); - __yeast_ctx.prepend_field(acc_id, "modifier", chained); + let chained = ctx.literal("modifier", "chained_declaration"); + ctx.prepend_field(acc_id, "modifier", chained); } for &mod_id in mod_ids.iter().rev() { - __yeast_ctx.prepend_field(acc_id, "modifier", mod_id); + ctx.prepend_field(acc_id, "modifier", mod_id); } for &ty_id in ty_ids.iter().rev() { - __yeast_ctx.prepend_field(acc_id, "type", ty_id); + ctx.prepend_field(acc_id, "type", ty_id); } - let ident = __yeast_ctx.literal("identifier", &name_text); - __yeast_ctx.prepend_field(acc_id, "name", ident); + let ident = ctx.literal("identifier", &name_text); + ctx.prepend_field(acc_id, "name", ident); } acc_ids }} diff --git a/unified/extractor/tests/corpus/swift/variables.txt b/unified/extractor/tests/corpus/swift/variables.txt index f1da058eef2e..78b80d9a5098 100644 --- a/unified/extractor/tests/corpus/swift/variables.txt +++ b/unified/extractor/tests/corpus/swift/variables.txt @@ -319,3 +319,130 @@ top_level name_expr identifier: identifier "x" value: int_literal "1" + +=== +Property with willSet and didSet observers +=== + +class C { + var x: Int = 0 { + willSet { print(newValue) } + didSet { print(oldValue) } + } +} + +--- + +source_file + statement: + class_declaration + body: + class_body + member: + property_declaration + binding: + value_binding_pattern + mutability: var + declarator: + property_binding + name: + pattern + bound_identifier: simple_identifier "x" + observers: + willset_didset_block + didset: + didset_clause + body: + block + statement: + call_expression + function: simple_identifier "print" + suffix: + call_suffix + arguments: + value_arguments + argument: + value_argument + value: simple_identifier "oldValue" + willset: + willset_clause + body: + block + statement: + call_expression + function: simple_identifier "print" + suffix: + call_suffix + arguments: + value_arguments + argument: + value_argument + value: simple_identifier "newValue" + type: + type_annotation + type: + type + name: + user_type + part: + simple_user_type + name: type_identifier "Int" + value: integer_literal "0" + declaration_kind: class + name: type_identifier "C" + +--- + +top_level + body: + block + stmt: + class_like_declaration + member: + variable_declaration + modifier: modifier "var" + pattern: + name_pattern + identifier: identifier "x" + type: + named_type_expr + name: identifier "Int" + value: int_literal "0" + accessor_declaration + body: + block + stmt: + call_expr + argument: + argument + value: + name_expr + identifier: identifier "newValue" + callee: + name_expr + identifier: identifier "print" + modifier: + modifier "var" + modifier "chained_declaration" + name: identifier "x" + accessor_kind: accessor_kind "willSet" + accessor_declaration + body: + block + stmt: + call_expr + argument: + argument + value: + name_expr + identifier: identifier "oldValue" + callee: + name_expr + identifier: identifier "print" + modifier: + modifier "var" + modifier "chained_declaration" + name: identifier "x" + accessor_kind: accessor_kind "didSet" + modifier: modifier "class" + name: identifier "C" diff --git a/unified/extractor/tests/corpus_tests.rs b/unified/extractor/tests/corpus_tests.rs index 0f1057a8e5b9..85a62726d87a 100644 --- a/unified/extractor/tests/corpus_tests.rs +++ b/unified/extractor/tests/corpus_tests.rs @@ -168,7 +168,7 @@ fn dump_raw_parse( lang: &simple::LanguageSpec, input: &str, ) -> Result { - let runner = Runner::new(lang.ts_language.clone(), &[]); + let runner: Runner = Runner::new(lang.ts_language.clone(), &[]); let ast = runner .run(input) .map_err(|e| format!("Failed to parse input: {e}"))?; From 5f73754b95e4b93ed997fd7494e6deefb840402b Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 14:08:32 +0000 Subject: [PATCH 02/13] yeast: Make transforms return `Result` This will enable us to actually capture and log errors in complicated rules (e.g. ones written in Rust) rather than just panicking. --- shared/yeast-macros/src/parse.rs | 3 ++- shared/yeast/src/lib.rs | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 594a59e1b5dc..fda419aefc7a 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -891,7 +891,8 @@ pub fn parse_rule_top(input: TokenStream) -> Result { yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _| { #(#bindings)* let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range, __user_ctx); - #transform_body + let __result: Vec = { #transform_body }; + Ok(__result) })) } }) diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index d93a72221a9a..0b0c00ec9103 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -703,7 +703,9 @@ impl From for NodeContent { /// The transform function for a rule: takes the AST, captured variables, a /// fresh-name scope, the source range of the matched node, and a mutable /// reference to the user context of type `C`. Returns the IDs of the -/// replacement nodes. +/// replacement nodes, or an error message if the transform could not be +/// completed (for example, a required capture was missing, or a recursive +/// translation invoked by the transform failed). pub type Transform = Box< dyn Fn( &mut Ast, @@ -711,7 +713,7 @@ pub type Transform = Box< &tree_builder::FreshScope, Option, &mut C, - ) -> Vec + ) -> Result, String> + Send + Sync, >; @@ -752,7 +754,7 @@ impl Rule { user_ctx: &mut C, ) -> Result>, String> { match self.try_match(ast, node)? { - Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh, user_ctx))), + Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh, user_ctx)?)), None => Ok(None), } } @@ -777,7 +779,7 @@ impl Rule { node: Id, fresh: &tree_builder::FreshScope, user_ctx: &mut C, - ) -> Vec { + ) -> Result, String> { fresh.next_scope(); let source_range = ast.get_node(node).and_then(|n| match n.content { NodeContent::Range(r) => Some(r), @@ -974,7 +976,7 @@ fn apply_one_shot_rules_inner( } apply_one_shot_rules_inner(index, ast, user_ctx, captured_id, fresh, rewrite_depth + 1) })?; - let result = rule.run_transform(ast, captures, id, fresh, user_ctx); + let result = rule.run_transform(ast, captures, id, fresh, user_ctx)?; *user_ctx = snapshot; return Ok(result); } From a523c7f47f809ec14c2ba4a5beac49d5c69ddaa7 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 14:32:00 +0000 Subject: [PATCH 03/13] yeast: Pass raw captures to `Rule::new` rules This enables users to specify how and when these captures get translated. In conjunction with the context mechanism, this can be used to e.g. translate some piece of information (e.g. the type of something), record it in the context, and then recursively translate some other capture that relies on this information. This allows information to be cleanly passed into descendants (which can be written using context accesses in the `rule!` macro form). As a consequence of this change, we now need to pass around a TranslatorHandle to perform the manual translation. For Repeating rules, it doesn't really make sense to translate things, so in this case we simply signal an error. Also, the implementation of the `rule!` macro changes slightly (without changing semantics): it now essentially delegates to `Rule::new`, receiving raw captures, but then immediately applies the translation to those captures (which, for the majority of cases, is likely the desired behaviour). --- shared/yeast-macros/src/parse.rs | 10 +- shared/yeast/src/build.rs | 50 +++++++++- shared/yeast/src/lib.rs | 155 ++++++++++++++++++++++++++----- 3 files changed, 187 insertions(+), 28 deletions(-) diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index fda419aefc7a..c0f86887ba6e 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -888,9 +888,15 @@ pub fn parse_rule_top(input: TokenStream) -> Result { Ok(quote! { { let __query = #query_code; - yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _| { + yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| { + // Auto-translation prefix: recursively translate every + // captured node before invoking the user's transform body. + // For OneShot rules this preserves the legacy behaviour + // (input-schema captures translated to output-schema + // nodes); for Repeating rules it is a no-op. + __translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?; #(#bindings)* - let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range, __user_ctx); + let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator); let __result: Vec = { #transform_body }; Ok(__result) })) diff --git a/shared/yeast/src/build.rs b/shared/yeast/src/build.rs index 6c8b392fb8a7..9fec7861a55a 100644 --- a/shared/yeast/src/build.rs +++ b/shared/yeast/src/build.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use crate::captures::Captures; use crate::tree_builder::FreshScope; -use crate::{Ast, FieldId, Id, NodeContent}; +use crate::{Ast, FieldId, Id, NodeContent, TranslatorHandle}; /// Context for building new AST nodes during a transformation. /// @@ -24,6 +24,11 @@ use crate::{Ast, FieldId, Id, NodeContent}; /// /// The default `C = ()` means rules that don't need any user context don't /// pay any cost. +/// +/// When constructed by the framework (via the rule! macro), `BuildCtx` also +/// carries a [`TranslatorHandle`] that the [`translate`] method delegates +/// to. When constructed by hand (e.g. in tests), the translator is `None` +/// and [`translate`] returns an error. pub struct BuildCtx<'a, C: 'a = ()> { pub ast: &'a mut Ast, pub captures: &'a Captures, @@ -32,6 +37,9 @@ pub struct BuildCtx<'a, C: 'a = ()> { pub source_range: Option, /// User-supplied context, accessible directly via `ctx.field` (via Deref). pub user_ctx: &'a mut C, + /// Optional translator handle, populated when the context is built by + /// the framework's rule driver. None when the context is built by hand. + pub(crate) translator: Option>, } impl<'a, C> BuildCtx<'a, C> { @@ -47,6 +55,7 @@ impl<'a, C> BuildCtx<'a, C> { fresh, source_range: None, user_ctx, + translator: None, } } @@ -63,6 +72,27 @@ impl<'a, C> BuildCtx<'a, C> { fresh, source_range, user_ctx, + translator: None, + } + } + + /// Construct a `BuildCtx` carrying a translator handle. Used by the + /// `rule!` macro to enable [`translate`] inside rule transforms. + pub fn with_translator( + ast: &'a mut Ast, + captures: &'a Captures, + fresh: &'a FreshScope, + source_range: Option, + user_ctx: &'a mut C, + translator: TranslatorHandle<'a, C>, + ) -> Self { + Self { + ast, + captures, + fresh, + source_range, + user_ctx, + translator: Some(translator), } } @@ -139,6 +169,24 @@ impl<'a, C> BuildCtx<'a, C> { } } +impl BuildCtx<'_, C> { + /// Recursively translate a node via the framework's rule machinery. + /// In a OneShot phase, applies OneShot rules to the given node and + /// returns the resulting node ids. In a Repeating phase, errors + /// (translation is not meaningful when input and output share a + /// schema). + /// + /// Errors if this `BuildCtx` was constructed by hand (without a + /// translator handle) — for example, in unit tests that don't go + /// through the rule driver. + pub fn translate(&mut self, id: Id) -> Result, String> { + match &self.translator { + Some(t) => t.translate(self.ast, self.user_ctx, id), + None => Err("translate() called on a BuildCtx without a translator handle".into()), + } + } +} + impl std::ops::Deref for BuildCtx<'_, C> { type Target = C; fn deref(&self) -> &C { diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 0b0c00ec9103..ac93ae1ab8cb 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -700,12 +700,107 @@ impl From for NodeContent { } } -/// The transform function for a rule: takes the AST, captured variables, a -/// fresh-name scope, the source range of the matched node, and a mutable -/// reference to the user context of type `C`. Returns the IDs of the -/// replacement nodes, or an error message if the transform could not be -/// completed (for example, a required capture was missing, or a recursive -/// translation invoked by the transform failed). +/// A handle that lets a rule transform recursively translate AST nodes via +/// the framework's rule machinery. Constructed by the driver and passed as +/// the last argument of every [`Transform`] invocation. +/// +/// The `rule!` macro uses [`TranslatorHandle::auto_translate_captures`] in +/// its generated prefix to translate captures before running the user's +/// transform body. Manually-written transforms (using [`Rule::new`] +/// directly) can call [`TranslatorHandle::translate`] selectively on +/// specific node ids to control when translation happens. +pub struct TranslatorHandle<'a, C> { + inner: TranslatorImpl<'a, C>, +} + +/// Internal phase-specific translation state. Kept private — callers +/// interact with [`TranslatorHandle`] only. +enum TranslatorImpl<'a, C> { + /// OneShot phase translator: recursively applies OneShot rules. + OneShot { + index: &'a RuleIndex<'a, C>, + fresh: &'a tree_builder::FreshScope, + rewrite_depth: usize, + /// The id of the node the current rule is matching. Used by + /// [`auto_translate_captures`] to avoid infinite recursion when a + /// rule captures its own match root (e.g. via `(_) @_`). + matched_root: Id, + }, + /// Repeating phase translator: translation is not meaningful here + /// (input and output schemas are the same). [`translate`] errors; + /// [`auto_translate_captures`] is a no-op so the macro's auto-prefix + /// works unchanged for Repeating rules. + Repeating, +} + +impl<'a, C: Clone> TranslatorHandle<'a, C> { + /// Recursively apply OneShot rules to `id` and return the resulting + /// node ids. Errors in a Repeating phase (where translation is not + /// meaningful). + pub fn translate( + &self, + ast: &mut Ast, + user_ctx: &mut C, + id: Id, + ) -> Result, String> { + match &self.inner { + TranslatorImpl::OneShot { + index, + fresh, + rewrite_depth, + .. + } => apply_one_shot_rules_inner(index, ast, user_ctx, id, fresh, rewrite_depth + 1), + TranslatorImpl::Repeating => { + Err("translate() is not available in a Repeating phase".into()) + } + } + } + + /// Translate every captured node in `captures` in place (OneShot phase + /// only). In a Repeating phase this is a no-op — Repeating rules + /// receive raw captures. + /// + /// Used by the `rule!` macro's generated prefix to preserve the + /// pre-existing "auto-translate captures before running the transform + /// body" behavior. Manually-written transforms typically translate + /// captures selectively via [`translate`] instead. + /// + /// To avoid infinite recursion, a capture whose id matches the rule's + /// matched root (e.g. from a `(_) @_` pattern) is left unchanged. + pub fn auto_translate_captures( + &self, + captures: &mut Captures, + ast: &mut Ast, + user_ctx: &mut C, + ) -> Result<(), String> { + match &self.inner { + TranslatorImpl::OneShot { matched_root, .. } => { + let root = *matched_root; + captures.try_map_all_captures(|cid| { + if cid == root { + Ok(vec![cid]) + } else { + self.translate(ast, user_ctx, cid) + } + }) + } + TranslatorImpl::Repeating => Ok(()), + } + } +} + +/// The transform function for a rule. +/// +/// Takes the AST, the (raw, untranslated) captured variables, a fresh-name +/// scope, the source range of the matched node, a mutable reference to the +/// user context of type `C`, and a [`TranslatorHandle`] for recursively +/// translating nodes. Returns the IDs of the replacement nodes, or an +/// error message if the transform could not be completed. +/// +/// Transforms produced by [`Rule::new`] receive **raw** captures and must +/// translate them themselves (via the handle). Transforms produced by the +/// `rule!` macro have an auto-translation prefix injected for backward +/// compatibility. pub type Transform = Box< dyn Fn( &mut Ast, @@ -713,6 +808,7 @@ pub type Transform = Box< &tree_builder::FreshScope, Option, &mut C, + TranslatorHandle<'_, C>, ) -> Result, String> + Send + Sync, @@ -752,9 +848,12 @@ impl Rule { node: Id, fresh: &tree_builder::FreshScope, user_ctx: &mut C, + translator: TranslatorHandle<'_, C>, ) -> Result>, String> { match self.try_match(ast, node)? { - Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh, user_ctx)?)), + Some(captures) => Ok(Some(self.run_transform( + ast, captures, node, fresh, user_ctx, translator, + )?)), None => Ok(None), } } @@ -779,13 +878,14 @@ impl Rule { node: Id, fresh: &tree_builder::FreshScope, user_ctx: &mut C, + translator: TranslatorHandle<'_, C>, ) -> Result, String> { fresh.next_scope(); let source_range = ast.get_node(node).and_then(|n| match n.content { NodeContent::Range(r) => Some(r), _ => n.source_range, }); - (self.transform)(ast, captures, fresh, source_range, user_ctx) + (self.transform)(ast, captures, fresh, source_range, user_ctx, translator) } } @@ -858,7 +958,14 @@ fn apply_repeating_rules_inner( // mutations the rule makes are visible during recursive translation // of its result, but not leaked to the parent's siblings. let snapshot = user_ctx.clone(); - let try_result = rule.try_rule(ast, id, fresh, user_ctx)?; + // Repeating rules don't need a real translator: their captures + // aren't auto-translated (Repeating preserves the input schema), + // and `ctx.translate(id)` errors if invoked from a Repeating + // transform. + let translator = TranslatorHandle { + inner: TranslatorImpl::Repeating, + }; + let try_result = rule.try_rule(ast, id, fresh, user_ctx, translator)?; if let Some(result_node) = try_result { // For non-repeated rules, suppress further application of *this* // rule on the result root, so a rule whose output matches its own @@ -956,27 +1063,25 @@ fn apply_one_shot_rules_inner( let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or(""); for rule in index.rules_for_kind(node_kind) { - if let Some(mut captures) = rule.try_match(ast, id)? { + if let Some(captures) = rule.try_match(ast, id)? { // Snapshot the user context before invoking the rule so that any // mutations the rule (or its transitively-translated captures) // make are visible during this rule's transform, but not leaked // to the parent's siblings. let snapshot = user_ctx.clone(); - // Recursively translate every captured node before invoking the - // transform. The transform's output uses output-schema kinds, so - // we must translate captured input-schema nodes to their - // output-schema equivalents first. - captures.try_map_all_captures(|captured_id| { - // Avoid infinite recursion when a capture refers to the root - // node of the matched tree (e.g. an `@_` capture on the - // pattern root): re-analyzing it would match the same rule - // again indefinitely. - if captured_id == id { - return Ok(vec![captured_id]); - } - apply_one_shot_rules_inner(index, ast, user_ctx, captured_id, fresh, rewrite_depth + 1) - })?; - let result = rule.run_transform(ast, captures, id, fresh, user_ctx)?; + // Build the translator handle the transform will use to + // recursively translate captures (or, for macro-generated + // rules, the auto-translate prefix uses it to translate every + // capture up front, preserving the legacy behavior). + let translator = TranslatorHandle { + inner: TranslatorImpl::OneShot { + index, + fresh, + rewrite_depth, + matched_root: id, + }, + }; + let result = rule.run_transform(ast, captures, id, fresh, user_ctx, translator)?; *user_ctx = snapshot; return Ok(result); } From 1ee142d8bdf2b61147e2f3f3ad227eb19034f04e Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 16:06:01 +0000 Subject: [PATCH 04/13] yeast: Add macro for fine-grained rules Adds `manual_rule!` which provides a more low-level interface for defining rewrites. (I'm not entirely sold on the name, so any suggestions would be welcome.) Notably, the captures bound in the body of such rules have _not_ been translated yet -- they still come from the _input_ tree. It is the user's duty to call ctx.translate on these (which has the effect of recursively invoking the translation) before substituting them into the output. For _truly_ low-level access, the user can still construct a Rule directly, but this is now somewhat cumbersome as the closure contained therein takes quite a few parameters. Still, the possibility remains. --- shared/yeast-macros/src/lib.rs | 34 +++++++++++ shared/yeast-macros/src/parse.rs | 100 +++++++++++++++++++++++++++++++ shared/yeast/src/build.rs | 23 ++++++- shared/yeast/src/lib.rs | 2 +- 4 files changed, 157 insertions(+), 2 deletions(-) diff --git a/shared/yeast-macros/src/lib.rs b/shared/yeast-macros/src/lib.rs index 07077be51f04..7153cf306443 100644 --- a/shared/yeast-macros/src/lib.rs +++ b/shared/yeast-macros/src/lib.rs @@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream { Err(err) => err.to_compile_error().into(), } } + +/// Define a desugaring rule whose transform is a hand-written Rust block. +/// +/// Use `manual_rule!` when the transform needs control over capture +/// translation timing — for example, when an outer rule needs to set +/// state in `ctx` (the `BuildCtx`'s user context) before recursive +/// translation reaches inner rules that read that state. +/// +/// ```text +/// manual_rule!( +/// (query_pattern field: (_) @name) +/// { +/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables +/// // (`name: NodeRef`, etc.) are bound from the query. +/// let translated = ctx.translate(name)?; +/// Ok(translated) +/// } +/// ) +/// ``` +/// +/// Differences from [`rule!`]: +/// - Captures are **not** auto-translated before the body runs; they +/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or +/// [`BuildCtx::translate_opt`]) to translate them when you choose. +/// - The body is plain Rust returning `Result, String>` — no +/// tree template, no `Ok(...)` wrap. +#[proc_macro] +pub fn manual_rule(input: TokenStream) -> TokenStream { + let input2: TokenStream2 = input.into(); + match parse::parse_manual_rule_top(input2) { + Ok(output) => output.into(), + Err(err) => err.to_compile_error().into(), + } +} diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index c0f86887ba6e..96cfa8087754 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -904,6 +904,106 @@ pub fn parse_rule_top(input: TokenStream) -> Result { }) } +/// Parse `manual_rule!( query { body } )`. +/// +/// Like [`parse_rule_top`] but: +/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow). +/// - Generates code that does NOT auto-translate captures before +/// running the body. Capture variables refer to raw (input-schema) +/// nodes; the body is responsible for explicit translation via +/// `ctx.translate(...)`. +/// - The body is included verbatim and must evaluate to +/// `Result, String>`. +pub fn parse_manual_rule_top(input: TokenStream) -> Result { + let mut tokens = input.into_iter().peekable(); + + // Collect query tokens up to the body block `{ ... }`. + let mut query_tokens = Vec::new(); + loop { + match tokens.peek() { + None => { + return Err(syn::Error::new( + Span::call_site(), + "expected a Rust block `{ ... }` after the query in manual_rule!", + )) + } + Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break, + _ => { + query_tokens.push(tokens.next().unwrap()); + } + } + } + + let query_stream: TokenStream = query_tokens.into_iter().collect(); + + // Extract captures from the query (same as in `rule!`). + let captures = extract_captures(&query_stream); + + // Parse the query into the QueryNode-building expression. + let query_code = parse_query_top(query_stream)?; + + // Generate capture bindings (same as in `rule!`). + let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site()); + let bindings: Vec = captures + .iter() + .map(|cap| { + let name = Ident::new(&cap.name, Span::call_site()); + let name_str = &cap.name; + match cap.multiplicity { + CaptureMultiplicity::Repeated => quote! { + let #name: Vec = __captures.get_all(#name_str) + .into_iter() + .map(yeast::NodeRef) + .collect(); + }, + CaptureMultiplicity::Optional => quote! { + let #name: Option = + __captures.get_opt(#name_str).map(yeast::NodeRef); + }, + CaptureMultiplicity::Single => quote! { + let #name: yeast::NodeRef = + yeast::NodeRef(__captures.get_var(#name_str).unwrap()); + }, + } + }) + .collect(); + + // Consume the body block. + let body_group = match tokens.next() { + Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g, + other => { + return Err(syn::Error::new( + Span::call_site(), + format!( + "expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}" + ), + )) + } + }; + let body_stream = body_group.stream(); + + // No tokens should follow the body. + if let Some(tok) = tokens.next() { + return Err(syn::Error::new_spanned( + tok, + "unexpected token after manual_rule! body", + )); + } + + Ok(quote! { + { + let __query = #query_code; + yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| { + // No auto-translate prefix for manual rules — the body + // is responsible for translating captures explicitly. + #(#bindings)* + let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator); + #body_stream + })) + } + }) +} + // --------------------------------------------------------------------------- // Token utilities // --------------------------------------------------------------------------- diff --git a/shared/yeast/src/build.rs b/shared/yeast/src/build.rs index 9fec7861a55a..15f34584d5aa 100644 --- a/shared/yeast/src/build.rs +++ b/shared/yeast/src/build.rs @@ -176,15 +176,36 @@ impl BuildCtx<'_, C> { /// (translation is not meaningful when input and output share a /// schema). /// + /// Accepts any value convertible to [`Id`] (including [`crate::NodeRef`]), + /// so manual rules can pass capture bindings directly without unwrapping. + /// /// Errors if this `BuildCtx` was constructed by hand (without a /// translator handle) — for example, in unit tests that don't go /// through the rule driver. - pub fn translate(&mut self, id: Id) -> Result, String> { + pub fn translate>(&mut self, id: I) -> Result, String> { + let id = id.into(); match &self.translator { Some(t) => t.translate(self.ast, self.user_ctx, id), None => Err("translate() called on a BuildCtx without a translator handle".into()), } } + + /// Translate an optional capture, returning the first translated id or + /// `None`. Convenience for `?`-quantifier captures (`Option`). + /// + /// If the underlying translation produces multiple ids for a single + /// input, only the first is returned. For most use cases (e.g. + /// translating a single type annotation) this is what you want; if + /// you need all ids, use [`translate`] directly. + pub fn translate_opt>( + &mut self, + id: Option, + ) -> Result, String> { + match id { + Some(id) => Ok(self.translate(id)?.into_iter().next()), + None => Ok(None), + } + } } impl std::ops::Deref for BuildCtx<'_, C> { diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index ac93ae1ab8cb..89c6be178ee7 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -16,7 +16,7 @@ pub mod schema; pub mod tree_builder; mod visitor; -pub use yeast_macros::{query, rule, tree, trees}; +pub use yeast_macros::{manual_rule, query, rule, tree, trees}; use captures::Captures; pub use cursor::Cursor; From 85c39c04e0a1d05f8781cfa0c15604e62e025ed0 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 16:14:49 +0000 Subject: [PATCH 05/13] yeast: Hide desugaring behind Desugarer trait This was necessary since otherwise the generic type of the user-specified context (which should only be a concern for yeast) starts to bleed out into the shared extractor. Instead, we type-erase it by putting it inside the aforementioned trait. --- .../src/extractor/mod.rs | 15 +++-- .../src/extractor/simple.rs | 33 +++------- shared/yeast/src/lib.rs | 65 +++++++++++++++++++ .../extractor/src/languages/swift/swift.rs | 11 ++-- unified/extractor/tests/corpus_tests.rs | 31 ++++++--- 5 files changed, 112 insertions(+), 43 deletions(-) diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index 436ff9f65a15..b066fbc85b30 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -280,10 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr } /// Extracts the source file at `path`, which is assumed to be canonicalized. -/// When `yeast_runner` is `Some`, the parsed tree is first transformed -/// through the supplied yeast `Runner` before TRAP extraction. Building the -/// `Runner` (which parses YAML and constructs the schema) is the caller's -/// responsibility, allowing it to be done once and shared across files. +/// When `desugarer` is `Some`, the parsed tree is first transformed +/// through the supplied yeast desugarer before TRAP extraction. Building +/// the desugarer (which parses YAML and constructs the schema) is the +/// caller's responsibility, allowing it to be done once and shared across +/// files. #[allow(clippy::too_many_arguments)] pub fn extract( language: &Language, @@ -295,7 +296,7 @@ pub fn extract( path: &Path, source: &[u8], ranges: &[Range], - yeast_runner: Option<&yeast::Runner<'_>>, + desugarer: Option<&dyn yeast::Desugarer>, ) { let path_str = file_paths::normalize_and_transform_path(path, transformer); let source_root = std::env::current_dir() @@ -328,8 +329,8 @@ pub fn extract( schema, ); - if let Some(yeast_runner) = yeast_runner { - let ast = yeast_runner + if let Some(desugarer) = desugarer { + let ast = desugarer .run_from_tree(&tree, source) .unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}")); traverse_yeast(&ast, &mut visitor); diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index 6fcd29b03443..55bf28a3ac21 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -13,11 +13,14 @@ pub struct LanguageSpec { pub prefix: &'static str, pub ts_language: tree_sitter::Language, pub node_types: &'static str, - /// Optional yeast desugaring configuration. When set, the parsed - /// tree is rewritten through yeast before TRAP extraction. The - /// config's `output_node_types_yaml` (if set) provides the schema - /// used both at runtime (for the rewriter) and for TRAP validation. - pub desugar: Option, + /// Optional desugarer. When set, the parsed tree is rewritten through + /// the desugarer before TRAP extraction. The desugarer's + /// `output_node_types_yaml()` (if set) provides the schema used both + /// at runtime (for the rewriter) and for TRAP validation. + /// + /// `Box` so the shared extractor is agnostic to + /// the user-defined context type the desugarer uses internally. + pub desugar: Option>, pub file_globs: Vec, } @@ -91,10 +94,9 @@ impl Extractor { .collect(); let mut schemas = vec![]; - let mut yeast_runners = Vec::new(); for lang in &self.languages { let effective_node_types: String = - match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) { + match lang.desugar.as_ref().and_then(|d| d.output_node_types_yaml()) { Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { std::io::Error::other(format!( "Failed to convert YAML node-types to JSON for {}: {e}", @@ -105,21 +107,6 @@ impl Extractor { }; let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?; schemas.push(schema); - - // Build the yeast runner once per language so the YAML schema - // isn't re-parsed for every file. - let yeast_runner = lang - .desugar - .as_ref() - .map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config)) - .transpose() - .map_err(|e| { - std::io::Error::other(format!( - "Failed to build desugaring runner for {}: {e}", - lang.prefix - )) - })?; - yeast_runners.push(yeast_runner); } // Construct a single globset containing all language globs, @@ -194,7 +181,7 @@ impl Extractor { &path, &source, &[], - yeast_runners[i].as_ref(), + lang.desugar.as_deref(), ); std::fs::create_dir_all(src_archive_file.parent().unwrap())?; std::fs::copy(&path, &src_archive_file)?; diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 89c6be178ee7..84337a1482c2 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -1331,3 +1331,68 @@ impl<'a, C: Clone + Default> Runner<'a, C> { self.run_with_ctx(input, &mut user_ctx) } } + +// --------------------------------------------------------------------------- +// Desugarer: type-erased view of a DesugaringConfig + Runner +// --------------------------------------------------------------------------- + +/// Type-erased interface to a desugaring pipeline for a single language. +/// +/// Consumers (e.g. a generic tree-sitter extractor) hold +/// `Box` so they can dispatch through the trait without +/// knowing the user context type `C` that's internal to yeast. +/// +/// Construct one via [`ConcreteDesugarer::new`] from a +/// [`DesugaringConfig`] and a [`tree_sitter::Language`]. +pub trait Desugarer: Send + Sync { + /// The output AST schema (in YAML format), or `None` if the input + /// grammar's schema should be used. + fn output_node_types_yaml(&self) -> Option<&'static str>; + + /// Parse `tree` against `source` and run the desugaring pipeline. + /// Each call constructs a fresh default user context internally. + fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) + -> Result; +} + +/// A concrete [`Desugarer`] backed by a [`DesugaringConfig`] for a +/// specific user context type `C`. Stores the language and a pre-built +/// schema so that per-call cost is bounded to constructing a transient +/// [`Runner`] and cloning the schema (no YAML re-parsing). +pub struct ConcreteDesugarer { + language: tree_sitter::Language, + schema: schema::Schema, + config: DesugaringConfig, +} + +impl ConcreteDesugarer { + /// Build a desugarer for `language` from `config`. Parses the output + /// schema YAML once (if set) and stores it for reuse across files. + pub fn new( + language: tree_sitter::Language, + config: DesugaringConfig, + ) -> Result { + let schema = config.build_schema(&language)?; + Ok(Self { + language, + schema, + config, + }) + } +} + +impl Desugarer for ConcreteDesugarer { + fn output_node_types_yaml(&self) -> Option<&'static str> { + self.config.output_node_types_yaml + } + + fn run_from_tree( + &self, + tree: &tree_sitter::Tree, + source: &[u8], + ) -> Result { + let runner = + Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases); + runner.run_from_tree(tree, source) + } +} diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 2c786810e49c..5f4c716431d8 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,5 +1,5 @@ use codeql_extractor::extractor::simple; -use yeast::{rule, tree, DesugaringConfig, PhaseKind}; +use yeast::{rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseKind}; fn translation_rules() -> Vec { vec![ @@ -966,14 +966,17 @@ fn translation_rules() -> Vec { } pub fn language_spec(desugared_ast_schema: &'static str) -> simple::LanguageSpec { - let desugar = DesugaringConfig::new() + let ts_language: tree_sitter::Language = tree_sitter_swift::LANGUAGE.into(); + let config = DesugaringConfig::new() .add_phase("translate", PhaseKind::OneShot, translation_rules()) .with_output_node_types_yaml(desugared_ast_schema); + let desugarer = ConcreteDesugarer::new(ts_language.clone(), config) + .expect("failed to build Swift desugarer"); simple::LanguageSpec { prefix: "swift", - ts_language: tree_sitter_swift::LANGUAGE.into(), + ts_language, node_types: tree_sitter_swift::NODE_TYPES, file_globs: vec!["*.swift".into(), "*.swiftinterface".into()], - desugar: Some(desugar), + desugar: Some(Box::new(desugarer)), } } diff --git a/unified/extractor/tests/corpus_tests.rs b/unified/extractor/tests/corpus_tests.rs index 85a62726d87a..e2a0fe17a4c7 100644 --- a/unified/extractor/tests/corpus_tests.rs +++ b/unified/extractor/tests/corpus_tests.rs @@ -150,15 +150,28 @@ fn run_desugaring( lang: &simple::LanguageSpec, input: &str, ) -> Result { - let runner = match lang.desugar.as_ref() { - Some(config) => Runner::from_config(lang.ts_language.clone(), config) - .map_err(|e| format!("Failed to create yeast runner: {e}"))?, - None => Runner::new(lang.ts_language.clone(), &[]), - }; - - runner - .run(input) - .map_err(|e| format!("Failed to parse input: {e}")) + match lang.desugar.as_deref() { + Some(desugarer) => { + // Parse the input ourselves so we don't depend on the desugarer + // knowing about the language. + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&lang.ts_language) + .map_err(|e| format!("Failed to set language: {e}"))?; + let tree = parser + .parse(input, None) + .ok_or_else(|| "Failed to parse input".to_string())?; + desugarer + .run_from_tree(&tree, input.as_bytes()) + .map_err(|e| format!("Desugaring failed: {e}")) + } + None => { + let runner: Runner = Runner::new(lang.ts_language.clone(), &[]); + runner + .run(input) + .map_err(|e| format!("Failed to parse input: {e}")) + } + } } /// Produce the raw tree-sitter parse tree dump for `input`, with no From 6d138c2bd431c1203b4c7ff640700b33aae7ab4c Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 19:18:08 +0000 Subject: [PATCH 06/13] yeast: Simplify Swift rules using the new machinery Propagates in name and type information for various property declarations, using the context mechanism. This avoids mutating already-translated nodes in-place, and is generally much easier to read. --- .../extractor/src/languages/swift/swift.rs | 130 ++++++++++++------ 1 file changed, 90 insertions(+), 40 deletions(-) diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 5f4c716431d8..3f9b3b371fef 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,7 +1,24 @@ use codeql_extractor::extractor::simple; -use yeast::{rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseKind}; +use yeast::{manual_rule, rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule}; -fn translation_rules() -> Vec { +/// User context propagated from outer `property_binding` rules down to the +/// inner accessor-translation rules so that every `accessor_declaration` +/// emitted by an inner rule is born with the property's `name` (and +/// optionally its `type`) already set — no schema-invalid intermediate +/// state requiring post-hoc mutation. +#[derive(Clone, Default)] +struct PropertyContext { + /// Identifier node for the property name, to be used as the + /// `accessor_declaration.name`. Set by the outer property_binding rule + /// before translating accessor children. + property_name: Option, + /// Translated type node for the property type, to be used as the + /// `accessor_declaration.type`. Set by the outer property_binding rule + /// when present. + property_type: Option, +} + +fn translation_rules() -> Vec> { vec![ // ---- Top-level ---- // Capture all top-level statements, including unnamed tokens like `nil`. @@ -88,27 +105,35 @@ fn translation_rules() -> Vec { // nodes for individual declarators. The outer property_declaration rule splices these out // and attaches binding/modifiers from the parent. - // Computed property with explicit accessors (get/set/modify) → - // a sequence of accessor_declaration nodes, each with the property name - // attached. Subsequent accessors will be tagged chained_declaration by - // the outer property_declaration rule. - rule!( + // Computed property with explicit accessors (get/set/modify) → a + // sequence of `accessor_declaration` nodes. The outer rule + // publishes the property's name and type into `ctx` so that each + // inner accessor rule + // (`computed_getter`/`computed_setter`/`computed_modify`) builds + // its `accessor_declaration` with `name` and `type` set from the + // start — no schema-invalid intermediate state. + manual_rule!( (property_binding name: @pattern type: _? @ty computed_value: (computed_property accessor: _+ @accessors)) - => - {..{ - for &acc in &accessors { - let acc_id: usize = acc.into(); - for &t in ty.iter().rev() { - ctx.prepend_field(acc_id, "type", t.into()); - } - let name_id = tree!((identifier #{pattern})); - ctx.prepend_field(acc_id, "name", name_id); + { + // Translate `ty` first so the context holds an + // output-schema node id. + let translated_ty = ctx.translate_opt(ty)?; + // Build the property-name identifier from the + // (untranslated) pattern leaf. + let name_id = tree!((identifier #{pattern})); + + ctx.property_name = Some(name_id); + ctx.property_type = translated_ty; + + let mut result = Vec::new(); + for acc in &accessors { + result.extend(ctx.translate(*acc)?); } - accessors - }} + Ok(result) + } ), // Computed property: shorthand getter (no explicit get/set, just statements) → // a single accessor_declaration with kind "get". @@ -124,31 +149,41 @@ fn translation_rules() -> Vec { accessor_kind: (accessor_kind "get") body: (block stmt: {..body})) ), - // Stored property with willSet/didSet observers (initializer optional) → - // variable_declaration followed by one accessor_declaration per observer, - // each carrying the property name. Subsequent items are tagged - // chained_declaration by the outer property_declaration rule. - rule!( + // Stored property with willSet/didSet observers (initializer + // optional) → a `variable_declaration` followed by one + // `accessor_declaration` per observer, each born with the + // property name set. Manual rule: we publish the property name + // into `ctx` before translating the observer children so the + // inner `willset_clause` / `didset_clause` rules construct + // valid `accessor_declaration` nodes from the start. + manual_rule!( (property_binding name: (pattern bound_identifier: @name) type: _? @ty value: _? @val observers: (willset_didset_block willset: _? @ws didset: _? @ds)) - => - (variable_declaration - pattern: (name_pattern identifier: (identifier #{name})) - type: {..ty} - value: {..val}) - {..{ - let mut obs_ids = Vec::new(); - for &obs in ws.iter().chain(ds.iter()) { - let obs_id: usize = obs.into(); - let ident = tree!((identifier #{name})); - ctx.prepend_field(obs_id, "name", ident); - obs_ids.push(obs_id); + { + // Translate ty and val so the variable_declaration + // below contains output-schema nodes. + let translated_ty = ctx.translate_opt(ty)?; + let translated_val = ctx.translate_opt(val)?; + + let var_decl = tree!( + (variable_declaration + pattern: (name_pattern identifier: (identifier #{name})) + type: {..translated_ty} + value: {..translated_val}) + ); + + // Publish the property name for the observer rules. + ctx.property_name = Some(tree!((identifier #{name}))); + + let mut result = vec![var_decl]; + for obs in ws.into_iter().chain(ds) { + result.extend(ctx.translate(obs)?); } - obs_ids - }} + Ok(result) + } ), // property_binding with any pattern name (identifier or destructuring) rule!( @@ -899,10 +934,14 @@ fn translation_rules() -> Vec { // protocol_property_requirements wrapper — should be consumed by above; fallback rule!((protocol_property_requirements accessor: _* @accs) => {..accs}), // Computed getter → accessor_declaration (body optional). + // Reads `ctx.property_name`/`ctx.property_type` set by the outer + // property_binding manual rule. rule!( (computed_getter body: (block statement: _* @body)?) => (accessor_declaration + name: {ctx.property_name.ok_or("computed_getter outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "get") body: (block stmt: {..body})) ), @@ -911,6 +950,8 @@ fn translation_rules() -> Vec { (computed_setter parameter: @param body: (block statement: _* @body)) => (accessor_declaration + name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "set") parameter: (parameter pattern: (name_pattern identifier: (identifier #{param}))) body: (block stmt: {..body})) @@ -920,6 +961,8 @@ fn translation_rules() -> Vec { (computed_setter body: (block statement: _* @body)?) => (accessor_declaration + name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "set") body: (block stmt: {..body})) ), @@ -928,16 +971,22 @@ fn translation_rules() -> Vec { (computed_modify body: (block statement: _* @body)) => (accessor_declaration + name: {ctx.property_name.ok_or("computed_modify outside property_binding context")?} + type: {..ctx.property_type} accessor_kind: (accessor_kind "modify") body: (block stmt: {..body})) ), - // willset/didset block — spread to children + // willset/didset block — spread to children (only reachable as a + // fallback; the outer property_binding manual rule normally + // captures the willset/didset clauses directly). rule!((willset_didset_block _* @clauses) => {..clauses}), - // willset clause → accessor_declaration (body optional). + // willset clause → accessor_declaration (body optional). Reads + // `ctx.property_name` set by the outer property_binding rule. rule!( (willset_clause body: (block statement: _* @body)?) => (accessor_declaration + name: {ctx.property_name.ok_or("willset_clause outside property_binding context")?} accessor_kind: (accessor_kind "willSet") body: (block stmt: {..body})) ), @@ -946,6 +995,7 @@ fn translation_rules() -> Vec { (didset_clause body: (block statement: _* @body)?) => (accessor_declaration + name: {ctx.property_name.ok_or("didset_clause outside property_binding context")?} accessor_kind: (accessor_kind "didSet") body: (block stmt: {..body})) ), @@ -967,7 +1017,7 @@ fn translation_rules() -> Vec { pub fn language_spec(desugared_ast_schema: &'static str) -> simple::LanguageSpec { let ts_language: tree_sitter::Language = tree_sitter_swift::LANGUAGE.into(); - let config = DesugaringConfig::new() + let config = DesugaringConfig::::new() .add_phase("translate", PhaseKind::OneShot, translation_rules()) .with_output_node_types_yaml(desugared_ast_schema); let desugarer = ConcreteDesugarer::new(ts_language.clone(), config) From 0d845c2ea9046bf3cf5d14d0f4b29cba7fdea4e3 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 22:07:05 +0000 Subject: [PATCH 07/13] unified/swift: Propagate parameter default values via context Extends the context with a field for keeping track of the default value. In the process, we also rename the context to SwiftContext as it now doesn't only concern itself with properties. --- .../extractor/src/languages/swift/swift.rs | 64 +++++++++++-------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 3f9b3b371fef..5eb74cf99ce8 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,24 +1,30 @@ use codeql_extractor::extractor::simple; use yeast::{manual_rule, rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule}; -/// User context propagated from outer `property_binding` rules down to the -/// inner accessor-translation rules so that every `accessor_declaration` -/// emitted by an inner rule is born with the property's `name` (and -/// optionally its `type`) already set — no schema-invalid intermediate -/// state requiring post-hoc mutation. +/// User context propagated from outer rules down to the inner rules that +/// emit the corresponding output declarations, so that each emitted node +/// is born with the outer information (name, type, modifiers, etc.) +/// already set — no schema-invalid intermediate state requiring +/// post-hoc mutation. #[derive(Clone, Default)] -struct PropertyContext { - /// Identifier node for the property name, to be used as the - /// `accessor_declaration.name`. Set by the outer property_binding rule - /// before translating accessor children. +struct SwiftContext { + /// Identifier node for the property name. Set by the outer + /// `property_binding` (computed accessors / willSet-didSet) rule + /// before translating accessor children; read by + /// `computed_getter`/`computed_setter`/`computed_modify`/ + /// `willset_clause`/`didset_clause`. property_name: Option, - /// Translated type node for the property type, to be used as the - /// `accessor_declaration.type`. Set by the outer property_binding rule - /// when present. + /// Translated type node for the property type. Set by the outer + /// `property_binding` rule (computed accessors variant) when + /// present; read by `computed_*` rules. property_type: Option, + /// Default-value expression for the next translated `parameter`. Set + /// by the outer `function_parameter` rule; read by the `parameter` + /// rules. + default_value: Option, } -fn translation_rules() -> Vec> { +fn translation_rules() -> Vec> { vec![ // ---- Top-level ---- // Capture all top-level statements, including unnamed tokens like `nil`. @@ -358,17 +364,15 @@ fn translation_rules() -> Vec> { body: (block stmt: {..body_stmts})) ), // Parameters are wrapped in function_parameter, which also carries - // optional default values. - rule!( + // optional default values. Publishes the default value into `ctx` + // before translating the inner `parameter` so the `parameter` + // rules can include it as a `default:` field directly. + manual_rule!( (function_parameter parameter: @p default_value: _? @def) - => - {..{ - let p_id: usize = p.into(); - for &d in def.iter().rev() { - ctx.prepend_field(p_id, "default", d.into()); - } - vec![p_id] - }} + { + ctx.default_value = ctx.translate_opt(def)?; + ctx.translate(p) + } ), // Parameter with external name and type rule!( @@ -376,7 +380,8 @@ fn translation_rules() -> Vec> { => (parameter external_name: (identifier #{ext}) - pattern: (name_pattern identifier: (identifier #{name}))) + pattern: (name_pattern identifier: (identifier #{name})) + default: {..ctx.default_value}) ), rule!( (parameter external_name: @ext name: @name type: @ty) @@ -384,21 +389,24 @@ fn translation_rules() -> Vec> { (parameter external_name: (identifier #{ext}) pattern: (name_pattern identifier: (identifier #{name})) - type: {ty}) + type: {ty} + default: {..ctx.default_value}) ), // Parameter with just name and type (no external name) rule!( (parameter name: @name) => (parameter - pattern: (name_pattern identifier: (identifier #{name}))) + pattern: (name_pattern identifier: (identifier #{name})) + default: {..ctx.default_value}) ), rule!( (parameter name: @name type: @ty) => (parameter pattern: (name_pattern identifier: (identifier #{name})) - type: {ty}) + type: {ty} + default: {..ctx.default_value}) ), // Reference to a function, f(x:y:z:). This is parsed as a call with a single argument with multiple reference_specifier labels. // We don't want downstream QL to try to handle this as a call_expr with a weird argument, so explicitly mark it as unsupported for now. @@ -1017,7 +1025,7 @@ fn translation_rules() -> Vec> { pub fn language_spec(desugared_ast_schema: &'static str) -> simple::LanguageSpec { let ts_language: tree_sitter::Language = tree_sitter_swift::LANGUAGE.into(); - let config = DesugaringConfig::::new() + let config = DesugaringConfig::::new() .add_phase("translate", PhaseKind::OneShot, translation_rules()) .with_output_node_types_yaml(desugared_ast_schema); let desugarer = ConcreteDesugarer::new(ts_language.clone(), config) From ae4ccc651cdd6e0df5b300cd525358c3013cdad0 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 22:19:03 +0000 Subject: [PATCH 08/13] unified/swift: Translate protocol properties using context Avoids more "mutation after creation" via prepend_field. Also adds a test to the corpus for exercising this syntax. Although it's not evident, the test output was unchanged by this refactoring. --- .../extractor/src/languages/swift/swift.rs | 113 ++++++++++++------ .../extractor/tests/corpus/swift/types.txt | 90 ++++++++++++++ 2 files changed, 169 insertions(+), 34 deletions(-) diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 5eb74cf99ce8..b2264dd590f9 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -9,19 +9,44 @@ use yeast::{manual_rule, rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseK #[derive(Clone, Default)] struct SwiftContext { /// Identifier node for the property name. Set by the outer - /// `property_binding` (computed accessors / willSet-didSet) rule - /// before translating accessor children; read by - /// `computed_getter`/`computed_setter`/`computed_modify`/ - /// `willset_clause`/`didset_clause`. + /// `property_binding` (computed accessors / willSet-didSet) and + /// `protocol_property_declaration` rules before translating accessor + /// children; read by the accessor inner rules + /// (`computed_getter`/`computed_setter`/`computed_modify`/ + /// `willset_clause`/`didset_clause`/`getter_specifier`/ + /// `setter_specifier`). property_name: Option, /// Translated type node for the property type. Set by the outer - /// `property_binding` rule (computed accessors variant) when - /// present; read by `computed_*` rules. + /// `property_binding` rule (computed accessors variant) and + /// `protocol_property_declaration` when present; read by the + /// accessor inner rules. property_type: Option, /// Default-value expression for the next translated `parameter`. Set /// by the outer `function_parameter` rule; read by the `parameter` /// rules. default_value: Option, + /// Translated outer modifiers (e.g. visibility, attributes) to + /// attach to each child of a flattening outer rule. Set by + /// `protocol_property_declaration` (and, later, + /// `property_declaration`/`enum_entry`). + outer_modifiers: Vec, + /// True when the current child of a flattening outer rule is not + /// the first one — its inner rule should emit a + /// `chained_declaration` modifier so the original grouping can be + /// recovered downstream. + is_chained: bool, +} + +/// Build a freshly-created `chained_declaration` modifier node if +/// `ctx.is_chained`, else `None`. Used by inner declaration rules to +/// emit the chained tag for non-first children of a flattening outer +/// rule. Returns `Option` so it splices via `{..…}` to 0 or 1 ids. +fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option { + if ctx.is_chained { + Some(ctx.literal("modifier", "chained_declaration")) + } else { + None + } } fn translation_rules() -> Vec> { @@ -904,41 +929,61 @@ fn translation_rules() -> Vec> { name: (identifier #{name}) bound: {..bound}) ), - // Protocol property declaration: translate each accessor requirement to an - // accessor_declaration without a body, carrying the property name and type. - // Subsequent accessors get chained_declaration (same flattening as computed properties). - rule!( + // Protocol property declaration: translate each accessor + // requirement to an `accessor_declaration` carrying the property + // name, type, and outer modifiers. Manual rule: we publish the + // property's name/type/modifiers into `ctx` and translate each + // accessor with `ctx.is_chained` toggled per iteration so the + // inner `getter_specifier`/`setter_specifier` rules emit + // complete nodes from the start (including the + // `chained_declaration` tag for non-first accessors). + manual_rule!( (protocol_property_declaration - name: @pattern + name: (pattern bound_identifier: @name) requirements: (protocol_property_requirements accessor: _+ @accessors) type: _? @ty (modifiers)* @mods) - => - {..{ - let name_text = ctx.ast.source_text(pattern.into()); - let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); - let ty_ids: Vec = ty.iter().map(|&t| t.into()).collect(); - let acc_ids: Vec = accessors.iter().map(|&a| a.into()).collect(); - for (i, &acc_id) in acc_ids.iter().enumerate() { - if i > 0 { - let chained = ctx.literal("modifier", "chained_declaration"); - ctx.prepend_field(acc_id, "modifier", chained); - } - for &mod_id in mod_ids.iter().rev() { - ctx.prepend_field(acc_id, "modifier", mod_id); - } - for &ty_id in ty_ids.iter().rev() { - ctx.prepend_field(acc_id, "type", ty_id); - } - let ident = ctx.literal("identifier", &name_text); - ctx.prepend_field(acc_id, "name", ident); + { + ctx.property_name = Some(tree!((identifier #{name}))); + ctx.property_type = ctx.translate_opt(ty)?; + let mut modifiers = Vec::new(); + for m in mods { + modifiers.extend(ctx.translate(m)?); } - acc_ids - }} + ctx.outer_modifiers = modifiers; + + let mut result = Vec::new(); + for (i, acc) in accessors.into_iter().enumerate() { + ctx.is_chained = i > 0; + result.extend(ctx.translate(acc)?); + } + Ok(result) + } ), // getter_specifier / setter_specifier → bodyless accessor_declaration - rule!((getter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "get"))), - rule!((setter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "set"))), + // getter_specifier / setter_specifier → bodyless + // accessor_declaration. Reads property name/type/modifiers from + // `ctx` set by the outer `protocol_property_declaration` rule. + rule!( + (getter_specifier) + => + (accessor_declaration + name: {ctx.property_name.ok_or("getter_specifier outside protocol_property_declaration context")?} + type: {..ctx.property_type} + accessor_kind: (accessor_kind "get") + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)}) + ), + rule!( + (setter_specifier) + => + (accessor_declaration + name: {ctx.property_name.ok_or("setter_specifier outside protocol_property_declaration context")?} + type: {..ctx.property_type} + accessor_kind: (accessor_kind "set") + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)}) + ), // protocol_property_requirements wrapper — should be consumed by above; fallback rule!((protocol_property_requirements accessor: _* @accs) => {..accs}), // Computed getter → accessor_declaration (body optional). diff --git a/unified/extractor/tests/corpus/swift/types.txt b/unified/extractor/tests/corpus/swift/types.txt index ef15ad87f594..dc3eb9db305b 100644 --- a/unified/extractor/tests/corpus/swift/types.txt +++ b/unified/extractor/tests/corpus/swift/types.txt @@ -924,3 +924,93 @@ top_level accessor_kind: accessor_kind "set" modifier: modifier "class" name: identifier "Box" + +=== +Protocol with read-only and read-write property requirements +=== + +protocol P { + var foo: Int { get } + var bar: String { get set } +} + +--- + +source_file + statement: + protocol_declaration + body: + protocol_body + member: + protocol_property_declaration + name: + pattern + binding: + value_binding_pattern + mutability: var + bound_identifier: simple_identifier "foo" + requirements: + protocol_property_requirements + accessor: + getter_specifier + type: + type_annotation + type: + type + name: + user_type + part: + simple_user_type + name: type_identifier "Int" + protocol_property_declaration + name: + pattern + binding: + value_binding_pattern + mutability: var + bound_identifier: simple_identifier "bar" + requirements: + protocol_property_requirements + accessor: + getter_specifier + setter_specifier + type: + type_annotation + type: + type + name: + user_type + part: + simple_user_type + name: type_identifier "String" + name: type_identifier "P" + +--- + +top_level + body: + block + stmt: + class_like_declaration + member: + accessor_declaration + name: identifier "foo" + type: + named_type_expr + name: identifier "Int" + accessor_kind: accessor_kind "get" + accessor_declaration + name: identifier "bar" + type: + named_type_expr + name: identifier "String" + accessor_kind: accessor_kind "get" + accessor_declaration + modifier: modifier "chained_declaration" + name: identifier "bar" + type: + named_type_expr + name: identifier "String" + accessor_kind: accessor_kind "set" + modifier: modifier "protocol" + name: identifier "P" From 199489a2254d6af3da08fa924e5364ba119e97dc Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 24 Jun 2026 22:24:18 +0000 Subject: [PATCH 09/13] unified/swift: Propagate enum_entry outer modifiers via context Same as in the preceding commit, we added a test beforehand for testing this syntax, and verified that it was unchanged by the cleanup in this commit. --- .../extractor/src/languages/swift/swift.rs | 47 +++++++------ .../extractor/tests/corpus/swift/types.txt | 66 +++++++++++++++++++ 2 files changed, 94 insertions(+), 19 deletions(-) diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index b2264dd590f9..786b4c867ba1 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -270,14 +270,18 @@ fn translation_rules() -> Vec> { => (parameter type: {ty}) ), - // enum_case_entry with associated values → class_like_declaration containing - // a constructor whose parameters are the data parameters. + // enum_case_entry with associated values → class_like_declaration + // containing a constructor whose parameters are the data + // parameters. Reads outer modifiers / chained tag from `ctx` + // (set by the outer `enum_entry` rule). rule!( (enum_case_entry name: @name data_contents: (enum_type_parameters parameter: _* @params)) => (class_like_declaration + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} modifier: (modifier "enum_case") name: (identifier #{name}) member: (constructor_declaration parameter: {..params} body: (block))) @@ -287,6 +291,8 @@ fn translation_rules() -> Vec> { (enum_case_entry name: @name raw_value: @val) => (variable_declaration + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} modifier: (modifier "enum_case") pattern: (name_pattern identifier: (identifier #{name})) value: {val}) @@ -296,28 +302,31 @@ fn translation_rules() -> Vec> { (enum_case_entry name: @name) => (variable_declaration + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} modifier: (modifier "enum_case") pattern: (name_pattern identifier: (identifier #{name}))) ), - // enum_entry: flatten case entries; attach outer modifiers to each, and - // chained_declaration on every entry after the first. - rule!( + // enum_entry: flatten case entries; publish outer modifiers + // into `ctx` and translate each case with `ctx.is_chained` + // toggled per iteration so the inner `enum_case_entry` rules + // emit complete `modifier:` lists from the start. + manual_rule!( (enum_entry case: _+ @cases (modifiers)* @mods) - => - {..{ - let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); - let case_ids: Vec = cases.iter().map(|&c| c.into()).collect(); - for (i, &case_id) in case_ids.iter().enumerate() { - if i > 0 { - let chained = ctx.literal("modifier", "chained_declaration"); - ctx.prepend_field(case_id, "modifier", chained); - } - for &mod_id in mod_ids.iter().rev() { - ctx.prepend_field(case_id, "modifier", mod_id); - } + { + let mut modifiers = Vec::new(); + for m in mods { + modifiers.extend(ctx.translate(m)?); } - case_ids - }} + ctx.outer_modifiers = modifiers; + + let mut result = Vec::new(); + for (i, case) in cases.into_iter().enumerate() { + ctx.is_chained = i > 0; + result.extend(ctx.translate(case)?); + } + Ok(result) + } ), // Plain assignment: `x = expr` rule!( diff --git a/unified/extractor/tests/corpus/swift/types.txt b/unified/extractor/tests/corpus/swift/types.txt index dc3eb9db305b..9c22ae74798b 100644 --- a/unified/extractor/tests/corpus/swift/types.txt +++ b/unified/extractor/tests/corpus/swift/types.txt @@ -1014,3 +1014,69 @@ top_level accessor_kind: accessor_kind "set" modifier: modifier "protocol" name: identifier "P" + +=== +Enum with comma-separated cases (chained_declaration) +=== + +enum Suit { + case clubs, diamonds, hearts, spades +} + +--- + +source_file + statement: + class_declaration + body: + enum_class_body + member: + enum_entry + case: + enum_case_entry + name: simple_identifier "clubs" + enum_case_entry + name: simple_identifier "diamonds" + enum_case_entry + name: simple_identifier "hearts" + enum_case_entry + name: simple_identifier "spades" + declaration_kind: enum + name: type_identifier "Suit" + +--- + +top_level + body: + block + stmt: + class_like_declaration + member: + variable_declaration + modifier: modifier "enum_case" + pattern: + name_pattern + identifier: identifier "clubs" + variable_declaration + modifier: + modifier "chained_declaration" + modifier "enum_case" + pattern: + name_pattern + identifier: identifier "diamonds" + variable_declaration + modifier: + modifier "chained_declaration" + modifier "enum_case" + pattern: + name_pattern + identifier: identifier "hearts" + variable_declaration + modifier: + modifier "chained_declaration" + modifier "enum_case" + pattern: + name_pattern + identifier: identifier "spades" + modifier: modifier "enum" + name: identifier "Suit" From 474bcd4dd1e2032bac603ea7613a18f0e280b0de Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 25 Jun 2026 11:18:31 +0000 Subject: [PATCH 10/13] unified/swift: Propagate property_declaration modifiers via context Gets rid of the final uses of mutation (via prepend_field). The approach is the same as in the preceding commits: we set the appropriate fields on the context when processing the outer node, and then access these fields on the inner nodes. The repeated use of `modifier` fields is a _bit_ clunky, but since we're likely moving to an out-of-band modifier mechanism at some point, I think it's good enough for now. --- .../extractor/src/languages/swift/swift.rs | 122 +++++++++++++----- 1 file changed, 90 insertions(+), 32 deletions(-) diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 786b4c867ba1..a9fca901548e 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -27,9 +27,14 @@ struct SwiftContext { default_value: Option, /// Translated outer modifiers (e.g. visibility, attributes) to /// attach to each child of a flattening outer rule. Set by - /// `protocol_property_declaration` (and, later, - /// `property_declaration`/`enum_entry`). + /// `property_declaration`, `enum_entry`, and + /// `protocol_property_declaration`. outer_modifiers: Vec, + /// The `let`/`var` binding modifier for a `property_declaration`. + /// Set by `property_declaration`; read by the inner declaration + /// rules (`property_binding` variants, accessor rules) so they + /// emit it as part of the output node's `modifier:` field. + binding_modifier: Option, /// True when the current child of a flattening outer rule is not /// the first one — its inner rule should emit a /// `chained_declaration` modifier so the original grouping can be @@ -143,6 +148,12 @@ fn translation_rules() -> Vec> { // (`computed_getter`/`computed_setter`/`computed_modify`) builds // its `accessor_declaration` with `name` and `type` set from the // start — no schema-invalid intermediate state. + // + // Toggles `ctx.is_chained` per accessor iteration: the first + // accessor inherits the outer rule's chained state (i.e. whether + // this whole property_binding is itself a non-first declarator + // of a containing property_declaration); subsequent accessors + // always emit `chained_declaration`. manual_rule!( (property_binding name: @pattern @@ -160,14 +171,19 @@ fn translation_rules() -> Vec> { ctx.property_type = translated_ty; let mut result = Vec::new(); - for acc in &accessors { - result.extend(ctx.translate(*acc)?); + for (i, acc) in accessors.into_iter().enumerate() { + if i > 0 { + ctx.is_chained = true; + } + result.extend(ctx.translate(acc)?); } Ok(result) } ), - // Computed property: shorthand getter (no explicit get/set, just statements) → - // a single accessor_declaration with kind "get". + // Computed property: shorthand getter (no explicit get/set, just + // statements) → a single accessor_declaration with kind "get". + // Reads outer modifiers / chained tag from `ctx` (set by the + // outer `property_declaration` rule). rule!( (property_binding name: (pattern bound_identifier: @name) @@ -175,6 +191,9 @@ fn translation_rules() -> Vec> { computed_value: (computed_property statement: _* @body)) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: (identifier #{name}) type: {..ty} accessor_kind: (accessor_kind "get") @@ -187,6 +206,10 @@ fn translation_rules() -> Vec> { // into `ctx` before translating the observer children so the // inner `willset_clause` / `didset_clause` rules construct // valid `accessor_declaration` nodes from the start. + // + // The `variable_declaration` itself inherits the outer rule's + // chained state; observers always get `chained_declaration` + // because they're subsequent outputs of this flattening rule. manual_rule!( (property_binding name: (pattern bound_identifier: @name) @@ -201,6 +224,9 @@ fn translation_rules() -> Vec> { let var_decl = tree!( (variable_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} pattern: (name_pattern identifier: (identifier #{name})) type: {..translated_ty} value: {..translated_val}) @@ -208,6 +234,9 @@ fn translation_rules() -> Vec> { // Publish the property name for the observer rules. ctx.property_name = Some(tree!((identifier #{name}))); + // Observers are subsequent outputs of this flattening + // rule, so they always get `chained_declaration`. + ctx.is_chained = true; let mut result = vec![var_decl]; for obs in ws.into_iter().chain(ds) { @@ -216,7 +245,8 @@ fn translation_rules() -> Vec> { Ok(result) } ), - // property_binding with any pattern name (identifier or destructuring) + // property_binding with any pattern name (identifier or + // destructuring). Reads outer modifiers / chained tag from `ctx`. rule!( (property_binding name: @pattern @@ -224,37 +254,44 @@ fn translation_rules() -> Vec> { value: _? @val) => (variable_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} pattern: {pattern} type: {..ty} value: {..val}) ), - // property_declaration: splice declarators (each may translate to multiple nodes — - // variable_declaration and/or accessor_declaration), and attach the binding modifier - // (let/var) and any outer modifiers to each. All children after the first additionally - // get a synthetic chained_declaration modifier so the grouping can be recovered. - rule!( + // property_declaration: flatten declarators (each may translate + // to multiple nodes — variable_declaration and/or + // accessor_declaration) and attach the binding modifier + // (let/var), outer modifiers, and `chained_declaration` for + // non-first declarations. Manual rule: publishes + // binding/outer modifiers into `ctx` and translates each + // declarator with `ctx.is_chained` toggled per iteration. The + // inner declaration rules (`property_binding` variants, + // accessor inner rules) read these fields and emit complete + // `modifier:` lists from the start. + manual_rule!( (property_declaration binding: (value_binding_pattern mutability: @binding_kind) declarator: _* @decls (modifiers)* @mods) - => - {..{ - let binding_text = ctx.ast.source_text(binding_kind.into()); - let mod_ids: Vec = mods.iter().map(|&m| m.into()).collect(); - let decl_ids: Vec = decls.iter().map(|&d| d.into()).collect(); - for (i, &decl_id) in decl_ids.iter().enumerate() { - if i > 0 { - let chained = ctx.literal("modifier", "chained_declaration"); - ctx.prepend_field(decl_id, "modifier", chained); - } - for &mod_id in mod_ids.iter().rev() { - ctx.prepend_field(decl_id, "modifier", mod_id); - } - let binding_mod = ctx.literal("modifier", &binding_text); - ctx.prepend_field(decl_id, "modifier", binding_mod); + { + let binding_text = ctx.ast.source_text(binding_kind.0); + ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text)); + let mut modifiers = Vec::new(); + for m in mods { + modifiers.extend(ctx.translate(m)?); } - decl_ids - }} + ctx.outer_modifiers = modifiers; + + let mut result = Vec::new(); + for (i, decl) in decls.into_iter().enumerate() { + ctx.is_chained = i > 0; + result.extend(ctx.translate(decl)?); + } + Ok(result) + } ), // ---- Enums ---- // enum_type_parameter → parameter (with optional name as pattern). @@ -996,12 +1033,16 @@ fn translation_rules() -> Vec> { // protocol_property_requirements wrapper — should be consumed by above; fallback rule!((protocol_property_requirements accessor: _* @accs) => {..accs}), // Computed getter → accessor_declaration (body optional). - // Reads `ctx.property_name`/`ctx.property_type` set by the outer - // property_binding manual rule. + // Reads property name/type from the outer property_binding rule + // and binding/outer modifiers + chained tag from the outer + // property_declaration rule. rule!( (computed_getter body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: {ctx.property_name.ok_or("computed_getter outside property_binding context")?} type: {..ctx.property_type} accessor_kind: (accessor_kind "get") @@ -1012,6 +1053,9 @@ fn translation_rules() -> Vec> { (computed_setter parameter: @param body: (block statement: _* @body)) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?} type: {..ctx.property_type} accessor_kind: (accessor_kind "set") @@ -1023,6 +1067,9 @@ fn translation_rules() -> Vec> { (computed_setter body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?} type: {..ctx.property_type} accessor_kind: (accessor_kind "set") @@ -1033,6 +1080,9 @@ fn translation_rules() -> Vec> { (computed_modify body: (block statement: _* @body)) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: {ctx.property_name.ok_or("computed_modify outside property_binding context")?} type: {..ctx.property_type} accessor_kind: (accessor_kind "modify") @@ -1043,11 +1093,16 @@ fn translation_rules() -> Vec> { // captures the willset/didset clauses directly). rule!((willset_didset_block _* @clauses) => {..clauses}), // willset clause → accessor_declaration (body optional). Reads - // `ctx.property_name` set by the outer property_binding rule. + // `ctx.property_name` set by the outer property_binding rule and + // binding/outer modifiers + chained tag from the outer + // property_declaration rule. rule!( (willset_clause body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: {ctx.property_name.ok_or("willset_clause outside property_binding context")?} accessor_kind: (accessor_kind "willSet") body: (block stmt: {..body})) @@ -1057,6 +1112,9 @@ fn translation_rules() -> Vec> { (didset_clause body: (block statement: _* @body)?) => (accessor_declaration + modifier: {..ctx.binding_modifier} + modifier: {..ctx.outer_modifiers.clone()} + modifier: {..chained_modifier(&mut ctx)} name: {ctx.property_name.ok_or("didset_clause outside property_binding context")?} accessor_kind: (accessor_kind "didSet") body: (block stmt: {..body})) From 5136d872ae5db3de26a2d2047c3c8d3402cda99a Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 25 Jun 2026 11:40:22 +0000 Subject: [PATCH 11/13] unified/swift: Replace reduce_left with Rust helpers (Both reduce_left and map are still supported, but we could remove them at this point.) I think this way of writing things makes the intent a lot clearer -- it avoids extending the yeast rule language with complicated constructs, pushing the complexity (such as it is) into Rust instead. --- .../extractor/src/languages/swift/swift.rs | 52 ++++++++++++++++--- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index a9fca901548e..e2ae10bcbd1b 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -54,6 +54,41 @@ fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Optio } } +/// Combine a list of boolean sub-conditions into a single expression by +/// left-folding with the infix `&&` operator. Used by control-flow +/// rules (`if`, `guard`, `while`, `repeat-while`) whose tree-sitter +/// nodes carry one or more comma-separated conditions that the target +/// AST represents as a single `condition:` field. Panics on an empty +/// input because every caller's grammar guarantees at least one +/// condition. +fn and_chain( + ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>, + conds: Vec, +) -> yeast::Id { + conds.into_iter() + .map(yeast::Id::from) + .reduce(|acc, elem| { + tree!((binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) + }) + .expect("control-flow statement must have at least one condition") +} + +/// Translate a multi-part identifier (for example `Foo.Bar.Baz`) into a +/// `member_access_expr` chain rooted at a `name_expr` over the first +/// part. Panics on an empty input because the grammar's `_+` quantifier +/// guarantees at least one part. +fn member_chain( + ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>, + parts: Vec, +) -> yeast::Id { + let mut iter = parts.into_iter(); + let first = iter.next().expect("identifier with `part:` must have at least one part"); + let init = tree!((name_expr identifier: (identifier #{first}))); + iter.fold(init, |acc, elem| { + tree!((member_access_expr base: {acc} member: (identifier #{elem}))) + }) +} + fn translation_rules() -> Vec> { vec![ // ---- Top-level ---- @@ -585,11 +620,12 @@ fn translation_rules() -> Vec> { argument: (argument value: {closure})) ), // ---- Control flow ---- + // If statement rule!( (if_statement condition: _* @cond body: @then_body else_branch: _? @else_stmts) => (if_expr - condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) + condition: {and_chain(&mut ctx, cond)} then: {then_body} else: {..else_stmts}) ), @@ -598,7 +634,7 @@ fn translation_rules() -> Vec> { (guard_statement condition: _* @cond body: (block statement: _* @else_stmts)) => (guard_if_stmt - condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) + condition: {and_chain(&mut ctx, cond)} else: (block stmt: {..else_stmts})) ), // Ternary expression → if_expr @@ -676,13 +712,17 @@ fn translation_rules() -> Vec> { rule!( (while_statement condition: _* @cond body: (block statement: _* @body)) => - (while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body})) + (while_stmt + condition: {and_chain(&mut ctx, cond)} + body: (block stmt: {..body})) ), // Repeat-while loop rule!( (repeat_while_statement condition: _* @cond body: (block statement: _* @body)) => - (do_while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body})) + (do_while_stmt + condition: {and_chain(&mut ctx, cond)} + body: (block stmt: {..body})) ), // Labeled statement (e.g. `outer: for ...`). Strip the trailing ':' from the label token. rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {..{ @@ -770,9 +810,7 @@ fn translation_rules() -> Vec> { rule!( (identifier part: _+ @parts) => - {parts}.reduce_left( - first -> (name_expr identifier: (identifier #{first})), - acc, elem -> (member_access_expr base: {acc} member: (identifier #{elem}))) + {member_chain(&mut ctx, parts)} ), // Scoped import declaration (for example `import struct Foo.Bar`): // flatten the identifier parts into a member_access_expr and bind the From 1c4552edb02e47c714dbda4fb11e62863b4f27d7 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 25 Jun 2026 11:58:54 +0000 Subject: [PATCH 12/13] unified/swift: Use `tree!` instead of ctx.node Cleans up a few places where we were constructing trees piece by piece rather than using the `tree!` macro. In the process, Copilot noticed an issue that should probably be addressed: the labeled_statement rule can never fire, since there are no such nodes in the input. This is possibly a simple as making _labeled_statement (which _does_ exist) named, but I haven't attempted this. Finally, a small change to yeast makes it so that the contents of a {} interpolation can be a Rust block (previously it could only be a single expression). This avoids the need to double-wrap instances where you want to interpolate a single node produced as the final value of some block. --- shared/yeast-macros/src/parse.rs | 14 ++++++------- shared/yeast/doc/yeast.md | 16 +++++++++++++- .../extractor/src/languages/swift/swift.rs | 21 +++++++------------ 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 96cfa8087754..3c1d4d44bec4 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -359,7 +359,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result { Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => { let group = expect_group(tokens, Delimiter::Brace)?; let expr = group.stream(); - Ok(quote! { ::std::convert::Into::::into(#expr) }) + Ok(quote! { ::std::convert::Into::::into({ #expr }) }) } Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => { let group = expect_group(tokens, Delimiter::Parenthesis)?; @@ -396,7 +396,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result Result::into) + { #expr }.into_iter().map(::std::convert::Into::::into) } } else { let expr = group.stream(); - quote! { (#expr).into_iter() } + quote! { { #expr }.into_iter() } }; let chained = parse_chain_suffix(tokens, ctx, base)?; stmts.push(quote! { @@ -617,11 +617,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result::into) + { #expr }.into_iter().map(::std::convert::Into::::into) } } else { let expr = group.stream(); - quote! { (#expr).into_iter() } + quote! { { #expr }.into_iter() } }; let chained = parse_chain_suffix(tokens, ctx, base)?; items.push(quote! { @@ -630,7 +630,7 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result::into(#expr)); + __nodes.push(::std::convert::Into::::into({ #expr })); }); } continue; diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md index 823bf1c19425..1700029b43c0 100644 --- a/shared/yeast/doc/yeast.md +++ b/shared/yeast/doc/yeast.md @@ -265,7 +265,21 @@ occurrences of the same `$name` within one `BuildCtx` share the same value: ) ``` -`{..expr}` splices a `Vec` (or any iterable of `Id`): +The contents of `{…}` are treated as a Rust block, so multi-statement +expressions (with `let` bindings) work too: + +```rust +(assignment + left: {tmp} + right: { + let lit = ctx.literal("integer", "0"); + tree!((binary_expr op: (operator "+") left: {tmp} right: {lit})) + }) +``` + +`{..expr}` splices a `Vec` (or any iterable of `Id`); the contents +are likewise a Rust block, so the splice can be the result of arbitrary +computation: ```rust yeast::trees!(ctx, diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index e2ae10bcbd1b..7820d81e29fc 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -725,11 +725,11 @@ fn translation_rules() -> Vec> { body: (block stmt: {..body})) ), // Labeled statement (e.g. `outer: for ...`). Strip the trailing ':' from the label token. - rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {..{ + rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => { let text = ctx.ast.source_text(lbl.into()); - let name = ctx.literal("identifier", &text[..text.len() - 1]); - vec![ctx.node("labeled_stmt", vec![("label", vec![name]), ("stmt", vec![stmt.into()])])] - }}), + let name = &text[..text.len() - 1]; + tree!((labeled_stmt label: (identifier #{name}) stmt: {stmt})) + }), // ---- Collections ---- // Array literal rule!((array_literal element: _* @elems) => (array_literal element: {..elems})), @@ -739,16 +739,9 @@ fn translation_rules() -> Vec> { rule!( (dictionary_literal key: _* @keys value: _* @vals) => - (map_literal element: {..{ - keys.iter().zip(vals.iter()).map(|(&k, &v)| { - let k_id: usize = k.into(); - let v_id: usize = v.into(); - ctx.node("key_value_pair", vec![ - ("key", vec![k_id]), - ("value", vec![v_id]), - ]) - }).collect::>() - }}) + (map_literal element: {..keys.into_iter().zip(vals).map(|(k, v)| + tree!((key_value_pair key: {k} value: {v})) + )}) ), rule!((dictionary_literal element: _* @elems) => (map_literal element: {..elems})), rule!((dictionary_literal_item key: @k value: @v) => (key_value_pair key: {k} value: {v})), From af7ae8c4cb313e26ad72b77dffcb2009aeb2beb8 Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 25 Jun 2026 12:26:52 +0000 Subject: [PATCH 13/13] Apply rustfmt Format the touched Rust crates (shared/tree-sitter-extractor, shared/yeast, shared/yeast-macros, unified/extractor) so the tree-sitter-extractor CI fmt check passes. No functional changes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/extractor/simple.rs | 23 +++--- shared/yeast-macros/src/parse.rs | 27 +++---- shared/yeast/src/build.rs | 5 +- shared/yeast/src/dump.rs | 21 +++--- shared/yeast/src/lib.rs | 59 ++++++++-------- shared/yeast/src/node_types_yaml.rs | 8 +-- shared/yeast/src/schema.rs | 9 +-- shared/yeast/tests/test.rs | 70 +++++++++---------- unified/extractor/src/extractor.rs | 6 +- unified/extractor/src/generator.rs | 13 ++-- .../extractor/src/languages/swift/swift.rs | 13 ++-- unified/extractor/tests/corpus_tests.rs | 22 ++---- 12 files changed, 137 insertions(+), 139 deletions(-) diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index 55bf28a3ac21..9ba6f21778cf 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -95,16 +95,19 @@ impl Extractor { let mut schemas = vec![]; for lang in &self.languages { - let effective_node_types: String = - match lang.desugar.as_ref().and_then(|d| d.output_node_types_yaml()) { - Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { - std::io::Error::other(format!( - "Failed to convert YAML node-types to JSON for {}: {e}", - lang.prefix - )) - })?, - None => lang.node_types.to_string(), - }; + let effective_node_types: String = match lang + .desugar + .as_ref() + .and_then(|d| d.output_node_types_yaml()) + { + Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { + std::io::Error::other(format!( + "Failed to convert YAML node-types to JSON for {}: {e}", + lang.prefix + )) + })?, + None => lang.node_types.to_string(), + }; let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?; schemas.push(schema); } diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 3c1d4d44bec4..fc6031eb39d2 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -121,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { std::collections::HashMap::new(); let mut bare_children: Vec = Vec::new(); let push_field_elem = |order: &mut Vec, - map: &mut std::collections::HashMap>, - name: String, - elem: TokenStream| { + map: &mut std::collections::HashMap>, + name: String, + elem: TokenStream| { if !map.contains_key(&name) { order.push(name.clone()); map.insert(name, vec![elem]); @@ -160,8 +160,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { } else { let child = if peek_is_at(tokens) { tokens.next(); - let capture_name = - expect_ident(tokens, "expected capture name after @")?; + let capture_name = expect_ident(tokens, "expected capture name after @")?; let name_str = capture_name.to_string(); quote! { yeast::query::QueryNode::Capture { @@ -420,7 +419,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result Result Result Result { +fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result { let mut current = base; while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') { tokens.next(); // consume . @@ -608,7 +608,8 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result BuildCtx<'_, C> { /// input, only the first is returned. For most use cases (e.g. /// translating a single type annotation) this is what you want; if /// you need all ids, use [`translate`] directly. - pub fn translate_opt>( - &mut self, - id: Option, - ) -> Result, String> { + pub fn translate_opt>(&mut self, id: Option) -> Result, String> { match id { Some(id) => Ok(self.translate(id)?.into_iter().next()), None => Ok(None), diff --git a/shared/yeast/src/dump.rs b/shared/yeast/src/dump.rs index d046c192053d..be496d40bd5b 100644 --- a/shared/yeast/src/dump.rs +++ b/shared/yeast/src/dump.rs @@ -53,12 +53,7 @@ pub fn dump_ast_with_options( /// /// Any node that does not match the expected type set for its parent field is /// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line. -pub fn dump_ast_with_type_errors( - ast: &Ast, - root: usize, - source: &str, - schema: &Schema, -) -> String { +pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String { dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default()) } @@ -74,7 +69,15 @@ pub fn dump_ast_with_type_errors_and_options( options: &DumpOptions, ) -> String { let mut out = String::new(); - dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out); + dump_node( + ast, + root, + source, + options, + 0, + Some((schema, None, None)), + &mut out, + ); out } @@ -232,8 +235,8 @@ fn dump_node( } let field_name = ast.field_name_for_id(field_id).unwrap_or("?"); let child_type_check = type_check.map(|(schema, _, _)| { - let expected = expected_for_field(schema, node.kind_name(), field_id) - .or(Some(EMPTY_NODE_TYPES)); + let expected = + expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES)); let parent_field = Some((node.kind_name(), field_name)); (schema, expected, parent_field) }); diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 84337a1482c2..e0fffc551f34 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -297,7 +297,9 @@ impl Ast { /// Returns the source text for `id`, resolving `NodeContent::Range` /// against the stored source bytes when available. pub fn source_text(&self, id: Id) -> String { - let Some(node) = self.get_node(id) else { return String::new(); }; + let Some(node) = self.get_node(id) else { + return String::new(); + }; let read_range = |range: &tree_sitter::Range| { let start = range.start_byte; let end = range.end_byte; @@ -488,7 +490,10 @@ impl Ast { /// Prepend a child id to the given field of the given node. pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) { - let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id"); + let node = self + .nodes + .get_mut(node_id) + .expect("prepend_field_child: invalid node id"); node.fields.entry(field_id).or_default().insert(0, value_id); } @@ -737,12 +742,7 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> { /// Recursively apply OneShot rules to `id` and return the resulting /// node ids. Errors in a Repeating phase (where translation is not /// meaningful). - pub fn translate( - &self, - ast: &mut Ast, - user_ctx: &mut C, - id: Id, - ) -> Result, String> { + pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result, String> { match &self.inner { TranslatorImpl::OneShot { index, @@ -851,9 +851,9 @@ impl Rule { translator: TranslatorHandle<'_, C>, ) -> Result>, String> { match self.try_match(ast, node)? { - Some(captures) => Ok(Some(self.run_transform( - ast, captures, node, fresh, user_ctx, translator, - )?)), + Some(captures) => Ok(Some( + self.run_transform(ast, captures, node, fresh, user_ctx, translator)?, + )), None => Ok(None), } } @@ -1004,7 +1004,15 @@ fn apply_repeating_rules_inner( for children in fields.values_mut() { let mut new_children: Option> = None; for (i, &child_id) in children.iter().enumerate() { - let result = apply_repeating_rules_inner(index, ast, user_ctx, child_id, fresh, rewrite_depth, None)?; + let result = apply_repeating_rules_inner( + index, + ast, + user_ctx, + child_id, + fresh, + rewrite_depth, + None, + )?; let unchanged = result.len() == 1 && result[0] == child_id; match (&mut new_children, unchanged) { (None, true) => {} // unchanged so far, no allocation needed @@ -1052,7 +1060,6 @@ fn apply_one_shot_rules_inner( fresh: &tree_builder::FreshScope, rewrite_depth: usize, ) -> Result, String> { - if rewrite_depth > MAX_REWRITE_DEPTH { return Err(format!( "Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \ @@ -1294,8 +1301,12 @@ impl<'a, C: Clone> Runner<'a, C> { let mut root = ast.get_root(); for phase in self.phases { let res = match phase.kind { - PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh), - PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh), + PhaseKind::Repeating => { + apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh) + } + PhaseKind::OneShot => { + apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh) + } } .map_err(|e| format!("Phase `{}`: {e}", phase.name))?; if res.len() != 1 { @@ -1315,11 +1326,7 @@ impl<'a, C: Clone> Runner<'a, C> { impl<'a, C: Clone + Default> Runner<'a, C> { /// Parse `tree` against `source` and run all phases, using the /// default context (`C::default()`) as the initial context state. - pub fn run_from_tree( - &self, - tree: &tree_sitter::Tree, - source: &[u8], - ) -> Result { + pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result { let mut user_ctx = C::default(); self.run_from_tree_with_ctx(tree, source, &mut user_ctx) } @@ -1351,8 +1358,7 @@ pub trait Desugarer: Send + Sync { /// Parse `tree` against `source` and run the desugaring pipeline. /// Each call constructs a fresh default user context internally. - fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) - -> Result; + fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result; } /// A concrete [`Desugarer`] backed by a [`DesugaringConfig`] for a @@ -1386,13 +1392,8 @@ impl Desugarer for ConcreteDesugarer self.config.output_node_types_yaml } - fn run_from_tree( - &self, - tree: &tree_sitter::Tree, - source: &[u8], - ) -> Result { - let runner = - Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases); + fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result { + let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases); runner.run_from_tree(tree, source) } } diff --git a/shared/yeast/src/node_types_yaml.rs b/shared/yeast/src/node_types_yaml.rs index 797f14cba720..f4d9f2a1c427 100644 --- a/shared/yeast/src/node_types_yaml.rs +++ b/shared/yeast/src/node_types_yaml.rs @@ -242,10 +242,7 @@ pub fn convert(yaml_input: &str) -> Result { /// Apply YAML node-type definitions to a mutable Schema. /// Registers all types, fields, and allowed types from the YAML into the schema. -fn apply_yaml_to_schema( - yaml: &YamlNodeTypes, - schema: &mut crate::schema::Schema, -) { +fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) { // Register all supertypes as node kinds for name in yaml.supertypes.keys() { schema.register_kind(name); @@ -307,7 +304,8 @@ fn apply_yaml_to_schema( .into_vec() .into_iter() .map(|type_ref| { - let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types); + let (kind, named) = + resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types); crate::schema::NodeType { kind, named } }) .collect::>(); diff --git a/shared/yeast/src/schema.rs b/shared/yeast/src/schema.rs index bbd425f15a2c..da13bb8b6b70 100644 --- a/shared/yeast/src/schema.rs +++ b/shared/yeast/src/schema.rs @@ -198,13 +198,8 @@ impl Schema { .insert((parent_kind.to_string(), field_id), node_types); } - pub fn field_types( - &self, - parent_kind: &str, - field_id: FieldId, - ) -> Option<&Vec> { - self.field_types - .get(&(parent_kind.to_string(), field_id)) + pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec> { + self.field_types.get(&(parent_kind.to_string(), field_id)) } pub fn set_field_cardinality( diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 308c72b725fd..99471f129abf 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -66,8 +66,8 @@ fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let runner: Runner = Runner::new(lang.clone(), &[]); let ast = runner.run(input).unwrap(); - let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang) - .unwrap(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap(); dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema) } @@ -166,7 +166,7 @@ fn test_parse_for_loop() { #[test] fn test_dump_highlights_type_errors_inline() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -176,13 +176,13 @@ named: identifier: "#; - let dump = parse_and_dump_typed("x = 1", schema_yaml); - assert!(dump.contains("integer \"1\" <-- ERROR:")); + let dump = parse_and_dump_typed("x = 1", schema_yaml); + assert!(dump.contains("integer \"1\" <-- ERROR:")); } #[test] fn test_dump_reports_preserved_unknown_kind_after_transformation() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -192,25 +192,25 @@ named: identifier: "#; - // This rewrite runs and preserves the RHS node kind via capture. - // With schema above, preserving `integer` should be reported inline. + // This rewrite runs and preserves the RHS node kind via capture. + // With schema above, preserving `integer` should be reported inline. let rules: Vec = vec![yeast::rule!( - (assignment left: (_) @left right: (_) @right) - => - (assignment - left: {left} - right: {right} - ) - )]; + (assignment left: (_) @left right: (_) @right) + => + (assignment + left: {left} + right: {right} + ) + )]; - let dump = run_and_dump_typed("x = 1", rules, schema_yaml); - assert!(dump.contains("integer \"1\" <-- ERROR:")); - assert!(dump.contains("node kind 'integer' not in schema")); + let dump = run_and_dump_typed("x = 1", rules, schema_yaml); + assert!(dump.contains("integer \"1\" <-- ERROR:")); + assert!(dump.contains("node kind 'integer' not in schema")); } #[test] fn test_dump_reports_undeclared_field_on_node() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -219,14 +219,14 @@ named: identifier: "#; - let dump = parse_and_dump_typed_with_language("x = y", schema_yaml); - assert!(dump.contains("right: identifier \"y\" <-- ERROR:")); - assert!(dump.contains("the node 'assignment' has no field 'right'")); + let dump = parse_and_dump_typed_with_language("x = y", schema_yaml); + assert!(dump.contains("right: identifier \"y\" <-- ERROR:")); + assert!(dump.contains("the node 'assignment' has no field 'right'")); } #[test] fn test_dump_reports_disallowed_kind_in_field_type() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -237,10 +237,10 @@ named: integer: "#; - let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml); - assert!(dump.contains("right: integer \"1\" <-- ERROR:")); - assert!(dump.contains("should contain")); - assert!(dump.contains("but got integer")); + let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml); + assert!(dump.contains("right: integer \"1\" <-- ERROR:")); + assert!(dump.contains("should contain")); + assert!(dump.contains("but got integer")); } // ---- Query tests ---- @@ -309,15 +309,11 @@ fn test_query_skips_extras_in_positional_match() { let matched = query.do_match(&ast, array_id, &mut captures).unwrap(); assert!(matched); assert_eq!( - ast.get_node(captures.get_var("a").unwrap()) - .unwrap() - .kind(), + ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(), "integer" ); assert_eq!( - ast.get_node(captures.get_var("b").unwrap()) - .unwrap() - .kind(), + ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(), "integer" ); } @@ -325,8 +321,8 @@ fn test_query_skips_extras_in_positional_match() { #[test] fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); - let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang) - .unwrap(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); let phases: Vec = vec![Phase::new( "test", PhaseKind::Repeating, @@ -1205,7 +1201,9 @@ fn test_hash_brace_uses_capture_location_for_leaf() { let mut bar_ids: Vec = Vec::new(); for id in ast.reachable_node_ids() { - let Some(node) = ast.get_node(id) else { continue; }; + let Some(node) = ast.get_node(id) else { + continue; + }; if node.kind() == "identifier" && ast.source_text(id) == "bar" { bar_ids.push(id); } diff --git a/unified/extractor/src/extractor.rs b/unified/extractor/src/extractor.rs index 7601fa8addbe..301c6cf533f4 100644 --- a/unified/extractor/src/extractor.rs +++ b/unified/extractor/src/extractor.rs @@ -1,9 +1,9 @@ use clap::Args; use std::path::PathBuf; +use crate::languages; use codeql_extractor::extractor::simple; use codeql_extractor::trap; -use crate::languages; #[derive(Args)] pub struct Options { @@ -35,7 +35,9 @@ pub fn run(options: Options) -> std::io::Result<()> { prefix: "unified".to_string(), languages, trap_dir: options.output_dir, - trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"), + trap_compression: trap::Compression::from_env( + "CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION", + ), source_archive_dir: options.source_archive_dir, file_lists: vec![options.file_list], }; diff --git a/unified/extractor/src/generator.rs b/unified/extractor/src/generator.rs index cbf971a8ff25..974de5dbca97 100644 --- a/unified/extractor/src/generator.rs +++ b/unified/extractor/src/generator.rs @@ -22,14 +22,19 @@ pub fn run(options: Options) -> std::io::Result<()> { // The QL-visible schema is the unified output AST, not the per-language // input grammars. Pass it via `desugar.output_node_types_yaml` so the // generator converts the YAML to JSON node-types. - let desugar = yeast::DesugaringConfig::new() - .with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA); + let desugar = + yeast::DesugaringConfig::new().with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA); let languages = vec![Language { name: "Unified".to_owned(), - node_types: "", // unused: generator picks up output_node_types_yaml above + node_types: "", // unused: generator picks up output_node_types_yaml above desugar: Some(desugar), }]; - generate(languages, options.dbscheme, options.library, "run unified/scripts/create-extractor-pack.sh") + generate( + languages, + options.dbscheme, + options.library, + "run unified/scripts/create-extractor-pack.sh", + ) } diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 7820d81e29fc..c84e3cf38676 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,5 +1,5 @@ use codeql_extractor::extractor::simple; -use yeast::{manual_rule, rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule}; +use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree}; /// User context propagated from outer rules down to the inner rules that /// emit the corresponding output declarations, so that each emitted node @@ -82,11 +82,14 @@ fn member_chain( parts: Vec, ) -> yeast::Id { let mut iter = parts.into_iter(); - let first = iter.next().expect("identifier with `part:` must have at least one part"); + let first = iter + .next() + .expect("identifier with `part:` must have at least one part"); let init = tree!((name_expr identifier: (identifier #{first}))); - iter.fold(init, |acc, elem| { - tree!((member_access_expr base: {acc} member: (identifier #{elem}))) - }) + iter.fold( + init, + |acc, elem| tree!((member_access_expr base: {acc} member: (identifier #{elem}))), + ) } fn translation_rules() -> Vec> { diff --git a/unified/extractor/tests/corpus_tests.rs b/unified/extractor/tests/corpus_tests.rs index e2a0fe17a4c7..6c859c2f6cf0 100644 --- a/unified/extractor/tests/corpus_tests.rs +++ b/unified/extractor/tests/corpus_tests.rs @@ -2,7 +2,7 @@ use std::fs; use std::path::Path; use codeql_extractor::extractor::simple; -use yeast::{dump::dump_ast, dump::dump_ast_with_type_errors, Runner}; +use yeast::{Runner, dump::dump_ast, dump::dump_ast_with_type_errors}; #[path = "../src/languages/mod.rs"] mod languages; @@ -146,10 +146,7 @@ fn render_corpus(cases: &[CorpusCase]) -> String { out } -fn run_desugaring( - lang: &simple::LanguageSpec, - input: &str, -) -> Result { +fn run_desugaring(lang: &simple::LanguageSpec, input: &str) -> Result { match lang.desugar.as_deref() { Some(desugarer) => { // Parse the input ourselves so we don't depend on the desugarer @@ -177,10 +174,7 @@ fn run_desugaring( /// Produce the raw tree-sitter parse tree dump for `input`, with no /// desugaring rules applied. Uses a `Runner` with an empty phase list and /// the input grammar's own schema. -fn dump_raw_parse( - lang: &simple::LanguageSpec, - input: &str, -) -> Result { +fn dump_raw_parse(lang: &simple::LanguageSpec, input: &str) -> Result { let runner: Runner = Runner::new(lang.ts_language.clone(), &[]); let ast = runner .run(input) @@ -285,11 +279,7 @@ fn test_corpus() { } } - assert!( - failures.is_empty(), - "{}", - failures.join("\n\n") + "\n\n" - ); + assert!(failures.is_empty(), "{}", failures.join("\n\n") + "\n\n"); if update_mode { let updated = render_corpus(&cases); @@ -298,7 +288,9 @@ fn test_corpus() { write_result.is_ok(), "Failed to update corpus file {}: {}", corpus_path.display(), - write_result.err().map_or_else(String::new, |e| e.to_string()) + write_result + .err() + .map_or_else(String::new, |e| e.to_string()) ); } }